By Angus Ball
Is funguild not FUN enough for you?? Well fungaltraits is FUN raised
to the power of FUN (literally it is: https://github.com/traitecoevo/fungaltraits).
FUNGuild is technically a bigger database (by 9%), but includes
possible information so I suppose Fungaltraits is better since all of
its info is exacting, but whatever. the main problem with it is
Fungaltraits is meant for an excel user (and thus doesn’t have nice
commands to just do everything for me), and obviously we are in R so
lets goooo
Load the packages
library(phyloseq)
library(dplyr)
library(tidyverse)
library(splitstackshape)
download the database… from https://docs.google.com/spreadsheets/d/1cxImJWMYVTr6uIQXcTLwK1YNNzQvKJJifzzNpKCM6O0/edit?usp=sharing
you want the locked spreadsheet V1.2, IDk how to check if theres a new
version, V1.2 was published in 2020/2021 so keep that in mind. This
didn’t seem to be implemented as cool as funguild (as the github is
confusing as all hell too…)
eitherway
#import your database
fung <- read.csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Lisas data\\FungalTraits 1.2_ver_16Dec_2020 - V.1.2.csv")
#then your sample information
OTU_TAX <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\ITS-dada2_nochim_tax.csv")
#this csv file contains both the taxonomy and the OTU table
#Your key
Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\KeyDiv.csv")
This is what OTU_TAX, Key and Fung look like
fung
OTU_TAX
Key
Then convert your data into OTU and tax tables (Just like in the
phyloseq tutorial)
taxa <- select(OTU_TAX, ("tax.vector"))
#remove the sV value
otu <- select(OTU_TAX, -("tax.vector"))
otu <- select(otu, -(1))
#Remove extraneous samples for this data set
otu <- select(otu, -("CHB1P1":"FNP9B3"))
taxa <- cSplit(taxa, "tax.vector", ":")#splitting up taxa into multiple columns based on class
Warning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUE
taxa <- dplyr::rename(taxa,
Kingdom = tax.vector_1,
Phylum = tax.vector_2,
Class = tax.vector_3,
Order = tax.vector_4,
Family = tax.vector_5,
Genus = tax.vector_6,
Species = tax.vector_7,
)
Then lets assign the fungaltraits…
#Fungaltraits has information on the genus level, lucky for us, the taxa table already has the genus level seperated! so a simple comparision function will merge these tables to our liking.
taxa.df <- as.data.frame(taxa) #converts taxa to df
#because taxa has genus labeled as g__genusname, and fung has GENUS labelled as genusname, we need to do a quick fix on our taxa df
taxa.df$GENUS <- gsub("g__", "", taxa.df$Genus)
#since order of the otu table is very important, we're just gonna create a column that just orders all the species in the order they are in now (this is important bc merge below will mess up our order)
taxa.df$ASV <- seq.int(nrow(taxa.df))
#We'll merge the two tables keeping all the rows from our otu table, but not all the rows from the database
taxa_fung <- merge(taxa.df,
fung,
by.x = "GENUS",
by.y = "GENUS",
all.x = TRUE, #we want all the rows in taxa to be conserved, regardless of if they exist in the fugaltraits database
all.y = FALSE) #we dont care about rows in the database that don't exist within our sample
#then lets clean up taxa_fung by removing columns that contain similar information
taxa_fung<- taxa_fung %>% select(c(-"GENUS",-"Phylum.y",-"Family.y",-"Class.y", -"Order.y"))
#renaming columns for clarity
taxa_fung <- rename(taxa_fung,
Order = Order.x,
Phylum = Phylum.x,
Class = Class.x,
Family = Family.x)
#if you noticed the merge command changed the order of the taxa! oh no! they need to be in the right order to match to the otu table luckily we made a column of the right order so we can reorder by that
taxa_fung <- taxa_fung[order(taxa_fung$ASV),]
#then remove it
taxa_fung <- select(taxa_fung, -"ASV")
And done! now just use taxa_fung as your taxa table and finish making
your phyloseq object!
#call row values OTU
rownames(otu) <- paste0("OTU", 1:nrow(otu))
Warning: Setting row names on a tibble is deprecated.
taxa_fung <- as.matrix(taxa_fung)
rownames(taxa_fung) <- paste0("OTU", 1:nrow(taxa_fung))
#now real stuff
#convert these bad boys to matrixes
otu <- as.matrix(otu)
#check
class(taxa_fung)
[1] "matrix" "array"
class(otu)
[1] "matrix" "array"
OTU = otu_table(otu, taxa_are_rows = TRUE)
TAX = tax_table(taxa_fung)
#combine that data
physeq = phyloseq(OTU, TAX)
#You have to be absolutely sure you want to run this next command if you do, see phyloseq objects
#physeq <- tax_glom(physeq, taxrank = rank_names(physeq)[5], NArm = FALSE)
#import sample data
Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\KeyDiv.csv")
Rows: 61 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Name, Location, Layer, Treatment, Site, FullName
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Key <- data.frame(Key[,-1], row.names=Key$Name)
sampledata = sample_data(Key)
physeq_Key = merge_phyloseq(physeq, sampledata)
Don’t forget to import your sequence information into your phyloseq
object too!!
Seq_1 <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\ULTRA\\They call me... data\\Angus-16S\\16S-sv_seqs.csv")
head(Seq_1)
sequences <- Biostrings::DNAStringSet(Seq_1$Seq)
names(sequences) <- taxa_names(physeq_Key)
physeq_Key <- merge_phyloseq(physeq_Key, sequences)
LS0tDQp0aXRsZTogImFzc2lnbmluZyB0YXhvbm9teSB0aHJvdWdoIGZ1bmdhbHRyYWl0cyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQpCeSBBbmd1cyBCYWxsDQoNCklzIGZ1bmd1aWxkIG5vdCBGVU4gZW5vdWdoIGZvciB5b3U/PyBXZWxsIGZ1bmdhbHRyYWl0cyBpcyBGVU4gcmFpc2VkIHRvIHRoZSBwb3dlciBvZiBGVU4gKGxpdGVyYWxseSBpdCBpczogaHR0cHM6Ly9naXRodWIuY29tL3RyYWl0ZWNvZXZvL2Z1bmdhbHRyYWl0cykuIA0KDQpGVU5HdWlsZCBpcyB0ZWNobmljYWxseSBhIGJpZ2dlciBkYXRhYmFzZSAoYnkgOSUpLCBidXQgaW5jbHVkZXMgcG9zc2libGUgaW5mb3JtYXRpb24gc28gSSBzdXBwb3NlIEZ1bmdhbHRyYWl0cyBpcyBiZXR0ZXIgc2luY2UgYWxsIG9mIGl0cyBpbmZvIGlzIGV4YWN0aW5nLCBidXQgd2hhdGV2ZXIuIHRoZSBtYWluIHByb2JsZW0gd2l0aCBpdCBpcyBGdW5nYWx0cmFpdHMgaXMgbWVhbnQgZm9yIGFuIGV4Y2VsIHVzZXIgKGFuZCB0aHVzIGRvZXNuJ3QgaGF2ZSBuaWNlIGNvbW1hbmRzIHRvIGp1c3QgZG8gZXZlcnl0aGluZyBmb3IgbWUpLCBhbmQgb2J2aW91c2x5IHdlIGFyZSBpbiBSIHNvIGxldHMgZ29vb28NCg0KDQpMb2FkIHRoZSBwYWNrYWdlcw0KYGBge3J9DQpsaWJyYXJ5KHBoeWxvc2VxKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShzcGxpdHN0YWNrc2hhcGUpDQpsaWJyYXJ5KEJpb3N0cmluZ3MpDQpgYGANCg0KDQoNCg0KZG93bmxvYWQgdGhlIGRhdGFiYXNlLi4uIGZyb20gaHR0cHM6Ly9kb2NzLmdvb2dsZS5jb20vc3ByZWFkc2hlZXRzL2QvMWN4SW1KV01ZVlRyNnVJUVhjVEx3SzFZTk56UXZLSkppZnp6TnBLQ002TzAvZWRpdD91c3A9c2hhcmluZw0KeW91IHdhbnQgdGhlIGxvY2tlZCBzcHJlYWRzaGVldCBWMS4yLCBJRGsgaG93IHRvIGNoZWNrIGlmIHRoZXJlcyBhIG5ldyB2ZXJzaW9uLCBWMS4yIHdhcyBwdWJsaXNoZWQgaW4gMjAyMC8yMDIxIHNvIGtlZXAgdGhhdCBpbiBtaW5kLiBUaGlzIGRpZG4ndCBzZWVtIHRvIGJlIGltcGxlbWVudGVkIGFzIGNvb2wgYXMgZnVuZ3VpbGQgKGFzIHRoZSBnaXRodWIgaXMgY29uZnVzaW5nIGFzIGFsbCBoZWxsIHRvby4uLikNCg0KZWl0aGVyd2F5IA0KDQoNCmBgYHtyfQ0KI2ltcG9ydCB5b3VyIGRhdGFiYXNlDQpmdW5nIDwtIHJlYWQuY3N2KCJDOlxcVXNlcnNcXGFuZ3VzXFxPbmVEcml2ZSAtIFVOQkNcXEFuZ3VzIEJhbGxcXExhYiB3b3JrXFxCaW9pbmZvcm1hdGljc1xcTGlzYXMgZGF0YVxcRnVuZ2FsVHJhaXRzIDEuMl92ZXJfMTZEZWNfMjAyMCAtIFYuMS4yLmNzdiIpDQoNCg0KI3RoZW4geW91ciBzYW1wbGUgaW5mb3JtYXRpb24NCk9UVV9UQVggPC0gcmVhZF9jc3YoIkM6XFxVc2Vyc1xcYW5ndXNcXE9uZURyaXZlIC0gVU5CQ1xcQW5ndXMgQmFsbFxcTGFiIHdvcmtcXEJpb2luZm9ybWF0aWNzXFxLZW56aWVzIERhdGFcXElUUy1kYWRhMl9ub2NoaW1fdGF4LmNzdiIpDQojdGhpcyBjc3YgZmlsZSBjb250YWlucyBib3RoIHRoZSB0YXhvbm9teSBhbmQgdGhlIE9UVSB0YWJsZQ0KDQojWW91ciBrZXkNCktleSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXEtlbnppZXMgRGF0YVxcS2V5RGl2LmNzdiIpDQoNCmBgYA0KDQpUaGlzIGlzIHdoYXQgT1RVX1RBWCwgS2V5IGFuZCBGdW5nIGxvb2sgbGlrZQ0KYGBge3J9DQpmdW5nDQpgYGANCg0KYGBge3J9DQpPVFVfVEFYDQpgYGANCg0KYGBge3J9DQpLZXkNCmBgYA0KDQpUaGVuIGNvbnZlcnQgeW91ciBkYXRhIGludG8gT1RVIGFuZCB0YXggdGFibGVzIChKdXN0IGxpa2UgaW4gdGhlIHBoeWxvc2VxIHR1dG9yaWFsKQ0KDQpgYGB7cn0NCnRheGEgPC0gc2VsZWN0KE9UVV9UQVgsICgidGF4LnZlY3RvciIpKQ0KI3JlbW92ZSB0aGUgc1YgdmFsdWUNCm90dSA8LSBzZWxlY3QoT1RVX1RBWCwgLSgidGF4LnZlY3RvciIpKQ0Kb3R1IDwtIHNlbGVjdChvdHUsIC0oMSkpDQojUmVtb3ZlIGV4dHJhbmVvdXMgc2FtcGxlcyBmb3IgdGhpcyBkYXRhIHNldA0Kb3R1IDwtIHNlbGVjdChvdHUsIC0oIkNIQjFQMSI6IkZOUDlCMyIpKQ0KDQp0YXhhIDwtIGNTcGxpdCh0YXhhLCAidGF4LnZlY3RvciIsICI6Iikjc3BsaXR0aW5nIHVwIHRheGEgaW50byBtdWx0aXBsZSBjb2x1bW5zIGJhc2VkIG9uIGNsYXNzDQojcmVuYW1pbmcgdGhvc2UgY29sdW1ucw0KdGF4YSA8LSBkcGx5cjo6cmVuYW1lKHRheGEsDQogS2luZ2RvbSA9ICB0YXgudmVjdG9yXzEsDQogUGh5bHVtID0gdGF4LnZlY3Rvcl8yLA0KIENsYXNzID0gdGF4LnZlY3Rvcl8zLA0KIE9yZGVyID0gdGF4LnZlY3Rvcl80LA0KIEZhbWlseSA9IHRheC52ZWN0b3JfNSwNCiBHZW51cyA9IHRheC52ZWN0b3JfNiwNCiBTcGVjaWVzID0gdGF4LnZlY3Rvcl83LA0KICANCiAgKQ0KYGBgDQoNCg0KDQoNClRoZW4gbGV0cyBhc3NpZ24gdGhlIGZ1bmdhbHRyYWl0cy4uLg0KYGBge3J9DQojRnVuZ2FsdHJhaXRzIGhhcyBpbmZvcm1hdGlvbiBvbiB0aGUgZ2VudXMgbGV2ZWwsIGx1Y2t5IGZvciB1cywgdGhlIHRheGEgdGFibGUgYWxyZWFkeSBoYXMgdGhlIGdlbnVzIGxldmVsIHNlcGVyYXRlZCEgc28gYSBzaW1wbGUgY29tcGFyaXNpb24gZnVuY3Rpb24gd2lsbCBtZXJnZSB0aGVzZSB0YWJsZXMgdG8gb3VyIGxpa2luZy4NCnRheGEuZGYgPC0gYXMuZGF0YS5mcmFtZSh0YXhhKSAjY29udmVydHMgdGF4YSB0byBkZg0KDQojYmVjYXVzZSB0YXhhIGhhcyBnZW51cyBsYWJlbGVkIGFzIGdfX2dlbnVzbmFtZSwgYW5kIGZ1bmcgaGFzIEdFTlVTIGxhYmVsbGVkIGFzIGdlbnVzbmFtZSwgd2UgbmVlZCB0byBkbyBhIHF1aWNrIGZpeCBvbiBvdXIgdGF4YSBkZg0KdGF4YS5kZiRHRU5VUyA8LSBnc3ViKCJnX18iLCAiIiwgdGF4YS5kZiRHZW51cykNCg0KI3NpbmNlIG9yZGVyIG9mIHRoZSBvdHUgdGFibGUgaXMgdmVyeSBpbXBvcnRhbnQsIHdlJ3JlIGp1c3QgZ29ubmEgY3JlYXRlIGEgY29sdW1uIHRoYXQganVzdCBvcmRlcnMgYWxsIHRoZSBzcGVjaWVzIGluIHRoZSBvcmRlciB0aGV5IGFyZSBpbiBub3cgKHRoaXMgaXMgaW1wb3J0YW50IGJjIG1lcmdlIGJlbG93IHdpbGwgbWVzcyB1cCBvdXIgb3JkZXIpDQp0YXhhLmRmJEFTViA8LSBzZXEuaW50KG5yb3codGF4YS5kZikpDQoNCg0KI1dlJ2xsIG1lcmdlIHRoZSB0d28gdGFibGVzIGtlZXBpbmcgYWxsIHRoZSByb3dzIGZyb20gb3VyIG90dSB0YWJsZSwgYnV0IG5vdCBhbGwgdGhlIHJvd3MgZnJvbSB0aGUgZGF0YWJhc2UNCnRheGFfZnVuZyA8LSBtZXJnZSh0YXhhLmRmLCANCiAgICAgICAgICAgICAgICAgICBmdW5nLCANCiAgICAgICAgICAgICAgICAgICBieS54ID0gIkdFTlVTIiwgDQogICAgICAgICAgICAgICAgICAgYnkueSA9ICJHRU5VUyIsIA0KICAgICAgICAgICAgICAgICAgIGFsbC54ID0gVFJVRSwgI3dlIHdhbnQgYWxsIHRoZSByb3dzIGluIHRheGEgdG8gYmUgY29uc2VydmVkLCByZWdhcmRsZXNzIG9mIGlmIHRoZXkgZXhpc3QgaW4gdGhlIGZ1Z2FsdHJhaXRzIGRhdGFiYXNlDQogICAgICAgICAgICAgICAgICAgYWxsLnkgPSBGQUxTRSkgI3dlIGRvbnQgY2FyZSBhYm91dCByb3dzIGluIHRoZSBkYXRhYmFzZSB0aGF0IGRvbid0IGV4aXN0IHdpdGhpbiBvdXIgc2FtcGxlDQoNCiN0aGVuIGxldHMgY2xlYW4gdXAgdGF4YV9mdW5nIGJ5IHJlbW92aW5nIGNvbHVtbnMgdGhhdCBjb250YWluIHNpbWlsYXIgaW5mb3JtYXRpb24NCnRheGFfZnVuZzwtIHRheGFfZnVuZyAlPiUgc2VsZWN0KGMoLSJHRU5VUyIsLSJQaHlsdW0ueSIsLSJGYW1pbHkueSIsLSJDbGFzcy55IiwgLSJPcmRlci55IikpDQojcmVuYW1pbmcgY29sdW1ucyBmb3IgY2xhcml0eQ0KdGF4YV9mdW5nIDwtIHJlbmFtZSh0YXhhX2Z1bmcsIA0KICAgICAgICAgT3JkZXIgPSBPcmRlci54LA0KICAgICAgICAgUGh5bHVtID0gUGh5bHVtLngsDQogICAgICAgICBDbGFzcyA9IENsYXNzLngsDQogICAgICAgICBGYW1pbHkgPSBGYW1pbHkueCkNCg0KI2lmIHlvdSBub3RpY2VkIHRoZSBtZXJnZSBjb21tYW5kIGNoYW5nZWQgdGhlIG9yZGVyIG9mIHRoZSB0YXhhISBvaCBubyEgdGhleSBuZWVkIHRvIGJlIGluIHRoZSByaWdodCBvcmRlciB0byBtYXRjaCB0byB0aGUgb3R1IHRhYmxlIGx1Y2tpbHkgd2UgbWFkZSBhIGNvbHVtbiBvZiB0aGUgcmlnaHQgb3JkZXIgc28gd2UgY2FuIHJlb3JkZXIgYnkgdGhhdA0KdGF4YV9mdW5nIDwtIHRheGFfZnVuZ1tvcmRlcih0YXhhX2Z1bmckQVNWKSxdDQoNCiN0aGVuIHJlbW92ZSBpdA0KdGF4YV9mdW5nIDwtIHNlbGVjdCh0YXhhX2Z1bmcsIC0iQVNWIikNCmBgYA0KDQoNCkFuZCBkb25lISBub3cganVzdCB1c2UgdGF4YV9mdW5nIGFzIHlvdXIgdGF4YSB0YWJsZSBhbmQgZmluaXNoIG1ha2luZyB5b3VyIHBoeWxvc2VxIG9iamVjdCENCg0KYGBge3J9DQojY2FsbCByb3cgdmFsdWVzIE9UVQ0Kcm93bmFtZXMob3R1KSA8LSBwYXN0ZTAoIk9UVSIsIDE6bnJvdyhvdHUpKQ0KDQoNCnRheGFfZnVuZyA8LSBhcy5tYXRyaXgodGF4YV9mdW5nKQ0Kcm93bmFtZXModGF4YV9mdW5nKSA8LSBwYXN0ZTAoIk9UVSIsIDE6bnJvdyh0YXhhX2Z1bmcpKQ0KDQoNCg0KI25vdyByZWFsIHN0dWZmDQojY29udmVydCB0aGVzZSBiYWQgYm95cyB0byBtYXRyaXhlcw0KDQpvdHUgPC0gYXMubWF0cml4KG90dSkNCg0KDQoNCiNjaGVjaw0KY2xhc3ModGF4YV9mdW5nKQ0KY2xhc3Mob3R1KQ0KDQoNCg0KDQpPVFUgPSBvdHVfdGFibGUob3R1LCB0YXhhX2FyZV9yb3dzID0gVFJVRSkNClRBWCA9IHRheF90YWJsZSh0YXhhX2Z1bmcpDQoNCg0KI2NvbWJpbmUgdGhhdCBkYXRhDQpwaHlzZXEgPSBwaHlsb3NlcShPVFUsIFRBWCkNCg0KDQoNCiNZb3UgaGF2ZSB0byBiZSBhYnNvbHV0ZWx5IHN1cmUgeW91IHdhbnQgdG8gcnVuIHRoaXMgbmV4dCBjb21tYW5kIGlmIHlvdSBkbywgc2VlIHBoeWxvc2VxIG9iamVjdHMNCiNwaHlzZXEgPC0gdGF4X2dsb20ocGh5c2VxLCB0YXhyYW5rID0gcmFua19uYW1lcyhwaHlzZXEpWzVdLCBOQXJtID0gRkFMU0UpDQoNCg0KI2ltcG9ydCBzYW1wbGUgZGF0YQ0KS2V5IDwtIHJlYWRfY3N2KCJDOlxcVXNlcnNcXGFuZ3VzXFxPbmVEcml2ZSAtIFVOQkNcXEFuZ3VzIEJhbGxcXExhYiB3b3JrXFxCaW9pbmZvcm1hdGljc1xcS2VuemllcyBEYXRhXFxLZXlEaXYuY3N2IikNCg0KS2V5IDwtIGRhdGEuZnJhbWUoS2V5WywtMV0sIHJvdy5uYW1lcz1LZXkkTmFtZSkNCnNhbXBsZWRhdGEgPSBzYW1wbGVfZGF0YShLZXkpDQoNCnBoeXNlcV9LZXkgPSBtZXJnZV9waHlsb3NlcShwaHlzZXEsIHNhbXBsZWRhdGEpDQpgYGANCg0KRG9uJ3QgZm9yZ2V0IHRvIGltcG9ydCB5b3VyIHNlcXVlbmNlIGluZm9ybWF0aW9uIGludG8geW91ciBwaHlsb3NlcSBvYmplY3QgdG9vISENCg0KYGBge3J9DQpTZXFfMSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcVUxUUkFcXFRoZXkgY2FsbCBtZS4uLiBkYXRhXFxBbmd1cy0xNlNcXDE2Uy1zdl9zZXFzLmNzdiIpDQpgYGANCg0KYGBge3J9DQpoZWFkKFNlcV8xKQ0KYGBgDQoNCmBgYHtyfQ0Kc2VxdWVuY2VzIDwtIEJpb3N0cmluZ3M6OkROQVN0cmluZ1NldChTZXFfMSRTZXEpDQpuYW1lcyhzZXF1ZW5jZXMpIDwtIHRheGFfbmFtZXMocGh5c2VxX0tleSkNCnBoeXNlcV9LZXkgPC0gbWVyZ2VfcGh5bG9zZXEocGh5c2VxX0tleSwgc2VxdWVuY2VzKQ0KYGBgDQoNCg==