Welcome to the FUNGuild tutorial :)
Here are some references: https://rdrr.io/github/brendanf/FUNGuildR/f/README.Rmd
https://github.com/UMNFuN/FUNGuild/blob/master/FUNGuild_Manual.pdf
First install FUNGuild
install.packages("devtools")
devtools::install_github("brendanf/FUNGuildR")
load the kitchen sinks…
library(phyloseq)
library(ggplot2)
library(janitor)
library(dplyr)
library(tidyverse)
library(ggpubr)
library(ape)
library(FUNGuildR)
library(splitstackshape)
Then input the FUNGuild database You’ll want to do this every time as
the database is always being updated! (This takes a hot second)
fung <- get_funguild_db()
Then lets input our data, We also need to rename one of the rows for
FUNGuild to work, It’s just a quirk of the program, FUNGuild wants the
taxonomy to be called Taxonomy instead of tax.vector, that DADA2 spits
out
fung_data <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\ITS-dada2_nochim_tax.csv")
New names:Rows: 49428 Columns: 91── Column specification ───────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ...1, tax.vector
dbl (89): CHB1P1, CHB1P10, CHB1P11, CHB1P12, CHB1P13, CHB1P14, CHB1P15, CHB1P2,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fung_data <- rename(fung_data, Taxonomy = tax.vector)
Then we can assign the taxonomy to the data
fung_guild <- funguild_assign(fung_data, db = fung)
Now it’s time for FUNGuild to become a phyloseq object. See How to
make a phyloseq object first, I’ll only be explaining the differences
here
#remove the sV value
MetaG_1 <- select(fung_data, -("Taxonomy"))
MetaG_1 <- select(MetaG_1, -(1))
#Remove extraneous samples
MetaG_1 <- select(MetaG_1, -("CHB1P1":"FNP9B3"))
We’ve stolen the the taxonomy table out of the fung_data table, then
we’ve split up the single column into each one with respect to taxon
Tax_1 <- select(fung_data, -("CHB1P1":"S4P9PO"))
Tax_1 <- select(Tax_1, -(1))
Tax_1 <- cSplit(Tax_1, "Taxonomy", ":")
Warning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUE
Tax_1 <- rename(Tax_1,
Kingdom = Taxonomy_1,
Phylum = Taxonomy_2,
Class = Taxonomy_3,
Order = Taxonomy_4,
Family = Taxonomy_5,
Genus = Taxonomy_6,
Species = Taxonomy_7,
)
Then we take the FUNGuild information out of the funguild table
fung_calls <- select(fung_guild, -("CHB1P1":"Taxonomy"))
fung_calls <- select(fung_calls, -(1))
Now we concatenate the table
Tax_1 <- cbind(Tax_1,fung_calls)
Then everything else is the same as a regular phyloseq object
#call row values OTU
rownames(MetaG_1) <- paste0("OTU", 1:nrow(MetaG_1))
rownames(Tax_1) <- paste0("OTU", 1:nrow(Tax_1))
#now real stuff
#convert these bad boys to matrixes
Tax_1 <- as.matrix(Tax_1)
MetaG_1 <- as.matrix(MetaG_1)
#check
class(Tax_1)
[1] "matrix" "array"
class(MetaG_1)
[1] "matrix" "array"
OTU_1 = otu_table(MetaG_1, taxa_are_rows = TRUE)
TAX_1 = tax_table(Tax_1)
#combine that data
physeq = phyloseq(OTU_1, TAX_1)
#You have to be absolutely sure you want to run this next command if you do, see phyloseq objects
#physeq <- tax_glom(physeq, taxrank = rank_names(physeq)[5], NArm = FALSE)
#import sample data
Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\KeyDiv.csv")
Rows: 61 Columns: 5── Column specification ───────────────────────────────────────────────────────────
Delimiter: ","
chr (5): Name, Location, Layer, Treatment, Site
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Key <- data.frame(Key[,-1], row.names=Key$Name)
sampledata = sample_data(Key)
physeq_Key = merge_phyloseq(physeq, sampledata)
Did something break? try reruning “rownames(Tax_1) <-
paste0(”OTU”, 1:nrow(Tax_1))“, sometimes its on the fritz
LS0tDQp0aXRsZTogIkZVTkd1aWxkIG5vdGVib29rIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCldlbGNvbWUgdG8gdGhlIEZVTkd1aWxkIHR1dG9yaWFsIDopDQoNCkhlcmUgYXJlIHNvbWUgcmVmZXJlbmNlczoNCmh0dHBzOi8vcmRyci5pby9naXRodWIvYnJlbmRhbmYvRlVOR3VpbGRSL2YvUkVBRE1FLlJtZA0KaHR0cHM6Ly9naXRodWIuY29tL1VNTkZ1Ti9GVU5HdWlsZC9ibG9iL21hc3Rlci9GVU5HdWlsZF9NYW51YWwucGRmDQoNCg0KDQoNCkZpcnN0IGluc3RhbGwgRlVOR3VpbGQNCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygiZGV2dG9vbHMiKQ0KZGV2dG9vbHM6Omluc3RhbGxfZ2l0aHViKCJicmVuZGFuZi9GVU5HdWlsZFIiKQ0KYGBgDQoNCg0KbG9hZCB0aGUga2l0Y2hlbiBzaW5rcy4uLg0KYGBge3J9DQpsaWJyYXJ5KHBoeWxvc2VxKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShnZ3B1YnIpDQpsaWJyYXJ5KGFwZSkNCmxpYnJhcnkoRlVOR3VpbGRSKQ0KbGlicmFyeShzcGxpdHN0YWNrc2hhcGUpDQpgYGANCg0KDQoNClRoZW4gaW5wdXQgdGhlIEZVTkd1aWxkIGRhdGFiYXNlDQpZb3UnbGwgd2FudCB0byBkbyB0aGlzIGV2ZXJ5IHRpbWUgYXMgdGhlIGRhdGFiYXNlIGlzIGFsd2F5cyBiZWluZyB1cGRhdGVkISAoVGhpcyB0YWtlcyBhIGhvdCBzZWNvbmQpDQpgYGB7cn0NCmZ1bmcgPC0gZ2V0X2Z1bmd1aWxkX2RiKCkNCmBgYA0KDQpUaGVuIGxldHMgaW5wdXQgb3VyIGRhdGEsIFdlIGFsc28gbmVlZCB0byByZW5hbWUgb25lIG9mIHRoZSByb3dzIGZvciBGVU5HdWlsZCB0byB3b3JrLCBJdCdzIGp1c3QgYSBxdWlyayBvZiB0aGUgcHJvZ3JhbSwgRlVOR3VpbGQgd2FudHMgdGhlIHRheG9ub215IHRvIGJlIGNhbGxlZCBUYXhvbm9teSBpbnN0ZWFkIG9mIHRheC52ZWN0b3IsIHRoYXQgREFEQTIgc3BpdHMgb3V0DQpgYGB7cn0NCmZ1bmdfZGF0YSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXEtlbnppZXMgRGF0YVxcSVRTLWRhZGEyX25vY2hpbV90YXguY3N2IikNCg0KZnVuZ19kYXRhIDwtIHJlbmFtZShmdW5nX2RhdGEsIFRheG9ub215ID0gdGF4LnZlY3RvcikNCmBgYA0KVGhlbiB3ZSBjYW4gYXNzaWduIHRoZSB0YXhvbm9teSB0byB0aGUgZGF0YQ0KDQpgYGB7cn0NCmZ1bmdfZ3VpbGQgPC0gZnVuZ3VpbGRfYXNzaWduKGZ1bmdfZGF0YSwgZGIgPSBmdW5nKQ0KYGBgDQoNCg0KTm93IGl0J3MgdGltZSBmb3IgRlVOR3VpbGQgdG8gYmVjb21lIGEgcGh5bG9zZXEgb2JqZWN0LiBTZWUgSG93IHRvIG1ha2UgYSBwaHlsb3NlcSBvYmplY3QgZmlyc3QsIEknbGwgb25seSBiZSBleHBsYWluaW5nIHRoZSBkaWZmZXJlbmNlcyBoZXJlDQoNCmBgYHtyfQ0KI3JlbW92ZSB0aGUgc1YgdmFsdWUNCk1ldGFHXzEgPC0gc2VsZWN0KGZ1bmdfZGF0YSwgLSgiVGF4b25vbXkiKSkNCk1ldGFHXzEgPC0gc2VsZWN0KE1ldGFHXzEsIC0oMSkpDQojUmVtb3ZlIGV4dHJhbmVvdXMgc2FtcGxlcw0KTWV0YUdfMSA8LSBzZWxlY3QoTWV0YUdfMSwgLSgiQ0hCMVAxIjoiRk5QOUIzIikpDQpgYGANCg0KDQpXZSd2ZSBzdG9sZW4gdGhlIHRoZSB0YXhvbm9teSB0YWJsZSBvdXQgb2YgdGhlIGZ1bmdfZGF0YSB0YWJsZSwgdGhlbiB3ZSd2ZSBzcGxpdCB1cCB0aGUgc2luZ2xlIGNvbHVtbiBpbnRvIGVhY2ggb25lIHdpdGggcmVzcGVjdCB0byB0YXhvbg0KYGBge3J9DQpUYXhfMSA8LSBzZWxlY3QoZnVuZ19kYXRhLCAtKCJDSEIxUDEiOiJTNFA5UE8iKSkNClRheF8xIDwtIHNlbGVjdChUYXhfMSwgLSgxKSkNClRheF8xIDwtIGNTcGxpdChUYXhfMSwgIlRheG9ub215IiwgIjoiKQ0KVGF4XzEgPC0gcmVuYW1lKFRheF8xLA0KIEtpbmdkb20gPSBUYXhvbm9teV8xLA0KIFBoeWx1bSA9IFRheG9ub215XzIsDQogQ2xhc3MgPSBUYXhvbm9teV8zLA0KIE9yZGVyID0gVGF4b25vbXlfNCwNCiBGYW1pbHkgPSBUYXhvbm9teV81LA0KIEdlbnVzID0gVGF4b25vbXlfNiwNCiBTcGVjaWVzID0gVGF4b25vbXlfNywNCiAgDQogICkNCmBgYA0KDQpUaGVuIHdlIHRha2UgdGhlIEZVTkd1aWxkIGluZm9ybWF0aW9uIG91dCBvZiB0aGUgZnVuZ3VpbGQgdGFibGUNCg0KYGBge3J9DQpmdW5nX2NhbGxzIDwtIHNlbGVjdChmdW5nX2d1aWxkLCAtKCJDSEIxUDEiOiJUYXhvbm9teSIpKQ0KZnVuZ19jYWxscyA8LSBzZWxlY3QoZnVuZ19jYWxscywgLSgxKSkNCmBgYA0KDQpOb3cgd2UgY29uY2F0ZW5hdGUgdGhlIHRhYmxlDQoNCmBgYHtyfQ0KVGF4XzEgPC0gY2JpbmQoVGF4XzEsZnVuZ19jYWxscykNCmBgYA0KDQpUaGVuIGV2ZXJ5dGhpbmcgZWxzZSBpcyB0aGUgc2FtZSBhcyBhIHJlZ3VsYXIgcGh5bG9zZXEgb2JqZWN0DQoNCmBgYHtyfQ0KI2NhbGwgcm93IHZhbHVlcyBPVFUNCnJvd25hbWVzKE1ldGFHXzEpIDwtIHBhc3RlMCgiT1RVIiwgMTpucm93KE1ldGFHXzEpKQ0KDQpyb3duYW1lcyhUYXhfMSkgPC0gcGFzdGUwKCJPVFUiLCAxOm5yb3coVGF4XzEpKQ0KDQoNCg0KI25vdyByZWFsIHN0dWZmDQojY29udmVydCB0aGVzZSBiYWQgYm95cyB0byBtYXRyaXhlcw0KVGF4XzEgPC0gYXMubWF0cml4KFRheF8xKQ0KTWV0YUdfMSA8LSBhcy5tYXRyaXgoTWV0YUdfMSkNCg0KDQoNCiNjaGVjaw0KY2xhc3MoVGF4XzEpDQpjbGFzcyhNZXRhR18xKQ0KDQoNCg0KDQpPVFVfMSA9IG90dV90YWJsZShNZXRhR18xLCB0YXhhX2FyZV9yb3dzID0gVFJVRSkNClRBWF8xID0gdGF4X3RhYmxlKFRheF8xKQ0KDQoNCiNjb21iaW5lIHRoYXQgZGF0YQ0KcGh5c2VxID0gcGh5bG9zZXEoT1RVXzEsIFRBWF8xKQ0KDQoNCg0KI1lvdSBoYXZlIHRvIGJlIGFic29sdXRlbHkgc3VyZSB5b3Ugd2FudCB0byBydW4gdGhpcyBuZXh0IGNvbW1hbmQgaWYgeW91IGRvLCBzZWUgcGh5bG9zZXEgb2JqZWN0cw0KI3BoeXNlcSA8LSB0YXhfZ2xvbShwaHlzZXEsIHRheHJhbmsgPSByYW5rX25hbWVzKHBoeXNlcSlbNV0sIE5Bcm0gPSBGQUxTRSkNCg0KDQojaW1wb3J0IHNhbXBsZSBkYXRhDQpLZXkgPC0gcmVhZF9jc3YoIkM6XFxVc2Vyc1xcYW5ndXNcXE9uZURyaXZlIC0gVU5CQ1xcQW5ndXMgQmFsbFxcTGFiIHdvcmtcXEJpb2luZm9ybWF0aWNzXFxLZW56aWVzIERhdGFcXEtleURpdi5jc3YiKQ0KDQpLZXkgPC0gZGF0YS5mcmFtZShLZXlbLC0xXSwgcm93Lm5hbWVzPUtleSROYW1lKQ0Kc2FtcGxlZGF0YSA9IHNhbXBsZV9kYXRhKEtleSkNCg0KcGh5c2VxX0tleSA9IG1lcmdlX3BoeWxvc2VxKHBoeXNlcSwgc2FtcGxlZGF0YSkNCg0KYGBgDQoNCkRpZCBzb21ldGhpbmcgYnJlYWs/IHRyeSByZXJ1bmluZyAicm93bmFtZXMoVGF4XzEpIDwtIHBhc3RlMCgiT1RVIiwgMTpucm93KFRheF8xKSkiLCBzb21ldGltZXMgaXRzIG9uIHRoZSBmcml0eg0KDQo=