library(phyloseq)
library(tidyverse)

When you finish running the picrust2 pipeline you’ll get an output file called path_abun_unstrat_descrip.tsv. take this file and unzip it to get path_abun_unstrat_descrip.csv. This is the file you’ll want to use for the stats.

It looks like this!

EC_table <- read.csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\ULTRA\\picrust\\pathways_out\\path_abun_unstrat_descrip.tsv\\path_abun_unstrat_descrip.csv")
EC_table

We’ll say the taxa table is the pathway and description columns, and everything else is your OTU table



EC_taxa <- select(EC_table, c("pathway", "description")) #move columns to taxa table


EC_table <- select(EC_table, -c("pathway", "description")) #remove the sample columns from your OTU table

HEY remember when we removed all the spaces from the names to run picrust? well lets put those back! Picrust also seemed to eat all the dashes and replace them with periods so we’ll change that back too

spacemore <- function(x) {colnames(x) <- gsub("_", " ", colnames(x));x} #making some quick gsup functions because that the way random internet answer I found did it
dashmore <- function(x) {colnames(x) <- gsub("[.]", "-", colnames(x));x}


EC_table <- spacemore(EC_table)
EC_table <- dashmore(EC_table)
#then this is normal phyloseq stuff you know! 
EC_taxa <- as.matrix(EC_taxa)
EC_table <- as.matrix(EC_table)

#check
class(EC_taxa)
[1] "matrix" "array" 
class(EC_table) 
[1] "matrix" "array" 
rownames(EC_table) <- paste0("OTU", 1:nrow(EC_table))
rownames(EC_taxa) <- paste0("OTU", 1:nrow(EC_taxa))

OTU_EC = otu_table(EC_table, taxa_are_rows = TRUE)
TAX_EC = tax_table(EC_taxa)
physeq_EC = phyloseq(OTU_EC, TAX_EC)

Then we can add the sample data and save the file


Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\ULTRA\\They call me... data\\Angus-16S\\ultra to categories key.csv")
Key <- data.frame(Key[,-1], row.names=Key$Name)
sampledata = sample_data(Key)

physeq_EC = merge_phyloseq(physeq_EC, sampledata)

#saveRDS(physeq_EC, file = "C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\ULTRA\\physeq_EC.rds")

#physeq_EC <- readRDS(file = "C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\ULTRA\\physeq_EC.rds")

And done! feel free to look at physeq_EC with the relative abundance graphs or beta diversity, but be warned. Some programs get ANGRY that this is data isnt count data (because it has decimals in it) (Specifically divnet - alpha diversity). Luckily there is a simple solution! just times everything by a large number!

This is as simple as…

EC_table <-10000000*EC_table
#run this command before you convert EC_table to a matrix
LS0tDQp0aXRsZTogIk1vdmluZyBQSUNSVVN0MiBkYXRhIGludG8gYSBwaHlsb3NlcSBvYmplY3QiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkocGh5bG9zZXEpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmBgYA0KDQoNCg0KV2hlbiB5b3UgZmluaXNoIHJ1bm5pbmcgdGhlIHBpY3J1c3QyIHBpcGVsaW5lIHlvdSdsbCBnZXQgYW4gb3V0cHV0IGZpbGUgY2FsbGVkIHBhdGhfYWJ1bl91bnN0cmF0X2Rlc2NyaXAudHN2LiB0YWtlIHRoaXMgZmlsZSBhbmQgdW56aXAgaXQgdG8gZ2V0IHBhdGhfYWJ1bl91bnN0cmF0X2Rlc2NyaXAuY3N2LiBUaGlzIGlzIHRoZSBmaWxlIHlvdSdsbCB3YW50IHRvIHVzZSBmb3IgdGhlIHN0YXRzLiANCg0KSXQgbG9va3MgbGlrZSB0aGlzIQ0KYGBge3J9DQpFQ190YWJsZSA8LSByZWFkLmNzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXFVMVFJBXFxwaWNydXN0XFxwYXRod2F5c19vdXRcXHBhdGhfYWJ1bl91bnN0cmF0X2Rlc2NyaXAudHN2XFxwYXRoX2FidW5fdW5zdHJhdF9kZXNjcmlwLmNzdiIpDQpFQ190YWJsZQ0KYGBgDQogDQoNCg0KDQpXZSdsbCBzYXkgdGhlIHRheGEgdGFibGUgaXMgdGhlIHBhdGh3YXkgYW5kIGRlc2NyaXB0aW9uIGNvbHVtbnMsIGFuZCBldmVyeXRoaW5nIGVsc2UgaXMgeW91ciBPVFUgdGFibGUNCmBgYHtyfQ0KDQoNCkVDX3RheGEgPC0gc2VsZWN0KEVDX3RhYmxlLCBjKCJwYXRod2F5IiwgImRlc2NyaXB0aW9uIikpICNtb3ZlIGNvbHVtbnMgdG8gdGF4YSB0YWJsZQ0KDQoNCkVDX3RhYmxlIDwtIHNlbGVjdChFQ190YWJsZSwgLWMoInBhdGh3YXkiLCAiZGVzY3JpcHRpb24iKSkgI3JlbW92ZSB0aGUgc2FtcGxlIGNvbHVtbnMgZnJvbSB5b3VyIE9UVSB0YWJsZQ0KDQpgYGANCg0KSEVZIHJlbWVtYmVyIHdoZW4gd2UgcmVtb3ZlZCBhbGwgdGhlIHNwYWNlcyBmcm9tIHRoZSBuYW1lcyB0byBydW4gcGljcnVzdD8gd2VsbCBsZXRzIHB1dCB0aG9zZSBiYWNrISBQaWNydXN0IGFsc28gc2VlbWVkIHRvIGVhdCBhbGwgdGhlIGRhc2hlcyBhbmQgcmVwbGFjZSB0aGVtIHdpdGggcGVyaW9kcyBzbyB3ZSdsbCBjaGFuZ2UgdGhhdCBiYWNrIHRvbw0KDQpgYGB7cn0NCnNwYWNlbW9yZSA8LSBmdW5jdGlvbih4KSB7Y29sbmFtZXMoeCkgPC0gZ3N1YigiXyIsICIgIiwgY29sbmFtZXMoeCkpO3h9ICNtYWtpbmcgc29tZSBxdWljayBnc3VwIGZ1bmN0aW9ucyBiZWNhdXNlIHRoYXQgdGhlIHdheSByYW5kb20gaW50ZXJuZXQgYW5zd2VyIEkgZm91bmQgZGlkIGl0DQpkYXNobW9yZSA8LSBmdW5jdGlvbih4KSB7Y29sbmFtZXMoeCkgPC0gZ3N1YigiWy5dIiwgIi0iLCBjb2xuYW1lcyh4KSk7eH0NCg0KDQpFQ190YWJsZSA8LSBzcGFjZW1vcmUoRUNfdGFibGUpDQpFQ190YWJsZSA8LSBkYXNobW9yZShFQ190YWJsZSkNCmBgYA0KDQoNCg0KDQoNCg0KYGBge3J9DQojdGhlbiB0aGlzIGlzIG5vcm1hbCBwaHlsb3NlcSBzdHVmZiB5b3Uga25vdyEgDQpFQ190YXhhIDwtIGFzLm1hdHJpeChFQ190YXhhKQ0KRUNfdGFibGUgPC0gYXMubWF0cml4KEVDX3RhYmxlKQ0KDQojY2hlY2sNCmNsYXNzKEVDX3RheGEpDQpjbGFzcyhFQ190YWJsZSkgDQoNCg0Kcm93bmFtZXMoRUNfdGFibGUpIDwtIHBhc3RlMCgiT1RVIiwgMTpucm93KEVDX3RhYmxlKSkNCnJvd25hbWVzKEVDX3RheGEpIDwtIHBhc3RlMCgiT1RVIiwgMTpucm93KEVDX3RheGEpKQ0KDQpPVFVfRUMgPSBvdHVfdGFibGUoRUNfdGFibGUsIHRheGFfYXJlX3Jvd3MgPSBUUlVFKQ0KVEFYX0VDID0gdGF4X3RhYmxlKEVDX3RheGEpDQpwaHlzZXFfRUMgPSBwaHlsb3NlcShPVFVfRUMsIFRBWF9FQykNCg0KDQpgYGANCg0KDQoNClRoZW4gd2UgY2FuIGFkZCB0aGUgc2FtcGxlIGRhdGEgYW5kIHNhdmUgdGhlIGZpbGUNCg0KYGBge3J9DQoNCktleSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcVUxUUkFcXFRoZXkgY2FsbCBtZS4uLiBkYXRhXFxBbmd1cy0xNlNcXHVsdHJhIHRvIGNhdGVnb3JpZXMga2V5LmNzdiIpDQpLZXkgPC0gZGF0YS5mcmFtZShLZXlbLC0xXSwgcm93Lm5hbWVzPUtleSROYW1lKQ0Kc2FtcGxlZGF0YSA9IHNhbXBsZV9kYXRhKEtleSkNCg0KcGh5c2VxX0VDID0gbWVyZ2VfcGh5bG9zZXEocGh5c2VxX0VDLCBzYW1wbGVkYXRhKQ0KDQojc2F2ZVJEUyhwaHlzZXFfRUMsIGZpbGUgPSAiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXFVMVFJBXFxwaHlzZXFfRUMucmRzIikNCg0KI3BoeXNlcV9FQyA8LSByZWFkUkRTKGZpbGUgPSAiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXFVMVFJBXFxwaHlzZXFfRUMucmRzIikNCmBgYA0KIA0KQW5kIGRvbmUhIGZlZWwgZnJlZSB0byBsb29rIGF0IHBoeXNlcV9FQyB3aXRoIHRoZSByZWxhdGl2ZSBhYnVuZGFuY2UgZ3JhcGhzIG9yIGJldGEgZGl2ZXJzaXR5LCBidXQgYmUgd2FybmVkLiBTb21lIHByb2dyYW1zIGdldCBBTkdSWSB0aGF0IHRoaXMgaXMgZGF0YSBpc250IGNvdW50IGRhdGEgKGJlY2F1c2UgaXQgaGFzIGRlY2ltYWxzIGluIGl0KSAoU3BlY2lmaWNhbGx5IGRpdm5ldCAtIGFscGhhIGRpdmVyc2l0eSkuIEx1Y2tpbHkgdGhlcmUgaXMgYSBzaW1wbGUgc29sdXRpb24hIGp1c3QgdGltZXMgZXZlcnl0aGluZyBieSBhIGxhcmdlIG51bWJlciENCg0KDQpUaGlzIGlzIGFzIHNpbXBsZSBhcy4uLg0KYGBge3J9DQpFQ190YWJsZSA8LTEwMDAwMDAwKkVDX3RhYmxlDQojcnVuIHRoaXMgY29tbWFuZCBiZWZvcmUgeW91IGNvbnZlcnQgRUNfdGFibGUgdG8gYSBtYXRyaXgNCg0KDQpgYGANCg0K