library(phyloseq)
library(tidyverse)
When you finish running the picrust2 pipeline you’ll get an output
file called path_abun_unstrat_descrip.tsv. take this file and unzip it
to get path_abun_unstrat_descrip.csv. This is the file you’ll want to
use for the stats.
It looks like this!
EC_table <- read.csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\ULTRA\\picrust\\pathways_out\\path_abun_unstrat_descrip.tsv\\path_abun_unstrat_descrip.csv")
EC_table
We’ll say the taxa table is the pathway and description columns, and
everything else is your OTU table
EC_taxa <- select(EC_table, c("pathway", "description")) #move columns to taxa table
EC_table <- select(EC_table, -c("pathway", "description")) #remove the sample columns from your OTU table
HEY remember when we removed all the spaces from the names to run
picrust? well lets put those back! Picrust also seemed to eat all the
dashes and replace them with periods so we’ll change that back too
spacemore <- function(x) {colnames(x) <- gsub("_", " ", colnames(x));x} #making some quick gsup functions because that the way random internet answer I found did it
dashmore <- function(x) {colnames(x) <- gsub("[.]", "-", colnames(x));x}
EC_table <- spacemore(EC_table)
EC_table <- dashmore(EC_table)
#then this is normal phyloseq stuff you know!
EC_taxa <- as.matrix(EC_taxa)
EC_table <- as.matrix(EC_table)
#check
class(EC_taxa)
[1] "matrix" "array"
class(EC_table)
[1] "matrix" "array"
rownames(EC_table) <- paste0("OTU", 1:nrow(EC_table))
rownames(EC_taxa) <- paste0("OTU", 1:nrow(EC_taxa))
OTU_EC = otu_table(EC_table, taxa_are_rows = TRUE)
TAX_EC = tax_table(EC_taxa)
physeq_EC = phyloseq(OTU_EC, TAX_EC)
Then we can add the sample data and save the file
Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\ULTRA\\They call me... data\\Angus-16S\\ultra to categories key.csv")
Key <- data.frame(Key[,-1], row.names=Key$Name)
sampledata = sample_data(Key)
physeq_EC = merge_phyloseq(physeq_EC, sampledata)
#saveRDS(physeq_EC, file = "C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\ULTRA\\physeq_EC.rds")
#physeq_EC <- readRDS(file = "C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\ULTRA\\physeq_EC.rds")
And done! feel free to look at physeq_EC with the relative abundance
graphs or beta diversity, but be warned. Some programs get ANGRY that
this is data isnt count data (because it has decimals in it)
(Specifically divnet - alpha diversity). Luckily there is a simple
solution! just times everything by a large number!
This is as simple as…
EC_table <-10000000*EC_table
#run this command before you convert EC_table to a matrix
LS0tDQp0aXRsZTogIk1vdmluZyBQSUNSVVN0MiBkYXRhIGludG8gYSBwaHlsb3NlcSBvYmplY3QiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkocGh5bG9zZXEpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmBgYA0KDQoNCg0KV2hlbiB5b3UgZmluaXNoIHJ1bm5pbmcgdGhlIHBpY3J1c3QyIHBpcGVsaW5lIHlvdSdsbCBnZXQgYW4gb3V0cHV0IGZpbGUgY2FsbGVkIHBhdGhfYWJ1bl91bnN0cmF0X2Rlc2NyaXAudHN2LiB0YWtlIHRoaXMgZmlsZSBhbmQgdW56aXAgaXQgdG8gZ2V0IHBhdGhfYWJ1bl91bnN0cmF0X2Rlc2NyaXAuY3N2LiBUaGlzIGlzIHRoZSBmaWxlIHlvdSdsbCB3YW50IHRvIHVzZSBmb3IgdGhlIHN0YXRzLiANCg0KSXQgbG9va3MgbGlrZSB0aGlzIQ0KYGBge3J9DQpFQ190YWJsZSA8LSByZWFkLmNzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXFVMVFJBXFxwaWNydXN0XFxwYXRod2F5c19vdXRcXHBhdGhfYWJ1bl91bnN0cmF0X2Rlc2NyaXAudHN2XFxwYXRoX2FidW5fdW5zdHJhdF9kZXNjcmlwLmNzdiIpDQpFQ190YWJsZQ0KYGBgDQogDQoNCg0KDQpXZSdsbCBzYXkgdGhlIHRheGEgdGFibGUgaXMgdGhlIHBhdGh3YXkgYW5kIGRlc2NyaXB0aW9uIGNvbHVtbnMsIGFuZCBldmVyeXRoaW5nIGVsc2UgaXMgeW91ciBPVFUgdGFibGUNCmBgYHtyfQ0KDQoNCkVDX3RheGEgPC0gc2VsZWN0KEVDX3RhYmxlLCBjKCJwYXRod2F5IiwgImRlc2NyaXB0aW9uIikpICNtb3ZlIGNvbHVtbnMgdG8gdGF4YSB0YWJsZQ0KDQoNCkVDX3RhYmxlIDwtIHNlbGVjdChFQ190YWJsZSwgLWMoInBhdGh3YXkiLCAiZGVzY3JpcHRpb24iKSkgI3JlbW92ZSB0aGUgc2FtcGxlIGNvbHVtbnMgZnJvbSB5b3VyIE9UVSB0YWJsZQ0KDQpgYGANCg0KSEVZIHJlbWVtYmVyIHdoZW4gd2UgcmVtb3ZlZCBhbGwgdGhlIHNwYWNlcyBmcm9tIHRoZSBuYW1lcyB0byBydW4gcGljcnVzdD8gd2VsbCBsZXRzIHB1dCB0aG9zZSBiYWNrISBQaWNydXN0IGFsc28gc2VlbWVkIHRvIGVhdCBhbGwgdGhlIGRhc2hlcyBhbmQgcmVwbGFjZSB0aGVtIHdpdGggcGVyaW9kcyBzbyB3ZSdsbCBjaGFuZ2UgdGhhdCBiYWNrIHRvbw0KDQpgYGB7cn0NCnNwYWNlbW9yZSA8LSBmdW5jdGlvbih4KSB7Y29sbmFtZXMoeCkgPC0gZ3N1YigiXyIsICIgIiwgY29sbmFtZXMoeCkpO3h9ICNtYWtpbmcgc29tZSBxdWljayBnc3VwIGZ1bmN0aW9ucyBiZWNhdXNlIHRoYXQgdGhlIHdheSByYW5kb20gaW50ZXJuZXQgYW5zd2VyIEkgZm91bmQgZGlkIGl0DQpkYXNobW9yZSA8LSBmdW5jdGlvbih4KSB7Y29sbmFtZXMoeCkgPC0gZ3N1YigiWy5dIiwgIi0iLCBjb2xuYW1lcyh4KSk7eH0NCg0KDQpFQ190YWJsZSA8LSBzcGFjZW1vcmUoRUNfdGFibGUpDQpFQ190YWJsZSA8LSBkYXNobW9yZShFQ190YWJsZSkNCmBgYA0KDQoNCg0KDQoNCg0KYGBge3J9DQojdGhlbiB0aGlzIGlzIG5vcm1hbCBwaHlsb3NlcSBzdHVmZiB5b3Uga25vdyEgDQpFQ190YXhhIDwtIGFzLm1hdHJpeChFQ190YXhhKQ0KRUNfdGFibGUgPC0gYXMubWF0cml4KEVDX3RhYmxlKQ0KDQojY2hlY2sNCmNsYXNzKEVDX3RheGEpDQpjbGFzcyhFQ190YWJsZSkgDQoNCg0Kcm93bmFtZXMoRUNfdGFibGUpIDwtIHBhc3RlMCgiT1RVIiwgMTpucm93KEVDX3RhYmxlKSkNCnJvd25hbWVzKEVDX3RheGEpIDwtIHBhc3RlMCgiT1RVIiwgMTpucm93KEVDX3RheGEpKQ0KDQpPVFVfRUMgPSBvdHVfdGFibGUoRUNfdGFibGUsIHRheGFfYXJlX3Jvd3MgPSBUUlVFKQ0KVEFYX0VDID0gdGF4X3RhYmxlKEVDX3RheGEpDQpwaHlzZXFfRUMgPSBwaHlsb3NlcShPVFVfRUMsIFRBWF9FQykNCg0KDQpgYGANCg0KDQoNClRoZW4gd2UgY2FuIGFkZCB0aGUgc2FtcGxlIGRhdGEgYW5kIHNhdmUgdGhlIGZpbGUNCg0KYGBge3J9DQoNCktleSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcVUxUUkFcXFRoZXkgY2FsbCBtZS4uLiBkYXRhXFxBbmd1cy0xNlNcXHVsdHJhIHRvIGNhdGVnb3JpZXMga2V5LmNzdiIpDQpLZXkgPC0gZGF0YS5mcmFtZShLZXlbLC0xXSwgcm93Lm5hbWVzPUtleSROYW1lKQ0Kc2FtcGxlZGF0YSA9IHNhbXBsZV9kYXRhKEtleSkNCg0KcGh5c2VxX0VDID0gbWVyZ2VfcGh5bG9zZXEocGh5c2VxX0VDLCBzYW1wbGVkYXRhKQ0KDQojc2F2ZVJEUyhwaHlzZXFfRUMsIGZpbGUgPSAiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXFVMVFJBXFxwaHlzZXFfRUMucmRzIikNCg0KI3BoeXNlcV9FQyA8LSByZWFkUkRTKGZpbGUgPSAiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXFVMVFJBXFxwaHlzZXFfRUMucmRzIikNCmBgYA0KIA0KQW5kIGRvbmUhIGZlZWwgZnJlZSB0byBsb29rIGF0IHBoeXNlcV9FQyB3aXRoIHRoZSByZWxhdGl2ZSBhYnVuZGFuY2UgZ3JhcGhzIG9yIGJldGEgZGl2ZXJzaXR5LCBidXQgYmUgd2FybmVkLiBTb21lIHByb2dyYW1zIGdldCBBTkdSWSB0aGF0IHRoaXMgaXMgZGF0YSBpc250IGNvdW50IGRhdGEgKGJlY2F1c2UgaXQgaGFzIGRlY2ltYWxzIGluIGl0KSAoU3BlY2lmaWNhbGx5IGRpdm5ldCAtIGFscGhhIGRpdmVyc2l0eSkuIEx1Y2tpbHkgdGhlcmUgaXMgYSBzaW1wbGUgc29sdXRpb24hIGp1c3QgdGltZXMgZXZlcnl0aGluZyBieSBhIGxhcmdlIG51bWJlciENCg0KDQpUaGlzIGlzIGFzIHNpbXBsZSBhcy4uLg0KYGBge3J9DQpFQ190YWJsZSA8LTEwMDAwMDAwKkVDX3RhYmxlDQojcnVuIHRoaXMgY29tbWFuZCBiZWZvcmUgeW91IGNvbnZlcnQgRUNfdGFibGUgdG8gYSBtYXRyaXgNCg0KDQpgYGANCg0K