By Angus Ball

Is funguild not FUN enough for you?? Well fungaltraits is FUN raised to the power of FUN (literally it is: https://github.com/traitecoevo/fungaltraits).

FUNGuild is technically a bigger database (by 9%), but includes possible information so I suppose Fungaltraits is better since all of its info is exacting, but whatever. the main problem with it is Fungaltraits is meant for an excel user (and thus doesn’t have nice commands to just do everything for me), and obviously we are in R so lets goooo

Load the packages

library(phyloseq)
library(dplyr)
library(tidyverse)
library(splitstackshape)

download the database… from https://docs.google.com/spreadsheets/d/1cxImJWMYVTr6uIQXcTLwK1YNNzQvKJJifzzNpKCM6O0/edit?usp=sharing you want the locked spreadsheet V1.2, IDk how to check if theres a new version, V1.2 was published in 2020/2021 so keep that in mind. This didn’t seem to be implemented as cool as funguild (as the github is confusing as all hell too…)

eitherway

#import your database
fung <- read.csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Lisas data\\FungalTraits 1.2_ver_16Dec_2020 - V.1.2.csv")


#then your sample information
OTU_TAX <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\ITS-dada2_nochim_tax.csv")
#this csv file contains both the taxonomy and the OTU table

#Your key
Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\KeyDiv.csv")

This is what OTU_TAX, Key and Fung look like

fung
OTU_TAX
Key

Then convert your data into OTU and tax tables (Just like in the phyloseq tutorial)

taxa <- select(OTU_TAX, ("tax.vector"))
#remove the sV value
otu <- select(OTU_TAX, -("tax.vector"))
otu <- select(otu, -(1))
#Remove extraneous samples for this data set
otu <- select(otu, -("CHB1P1":"FNP9B3"))

taxa <- cSplit(taxa, "tax.vector", ":")#splitting up taxa into multiple columns based on class
Warning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUEWarning: 'as.is' should be specified by the caller; using TRUE
taxa <- dplyr::rename(taxa,
 Kingdom =  tax.vector_1,
 Phylum = tax.vector_2,
 Class = tax.vector_3,
 Order = tax.vector_4,
 Family = tax.vector_5,
 Genus = tax.vector_6,
 Species = tax.vector_7,
  
  )

Then lets assign the fungaltraits…

#Fungaltraits has information on the genus level, lucky for us, the taxa table already has the genus level seperated! so a simple comparision function will merge these tables to our liking.
taxa.df <- as.data.frame(taxa) #converts taxa to df

#because taxa has genus labeled as g__genusname, and fung has GENUS labelled as genusname, we need to do a quick fix on our taxa df
taxa.df$GENUS <- gsub("g__", "", taxa.df$Genus)

#since order of the otu table is very important, we're just gonna create a column that just orders all the species in the order they are in now (this is important bc merge below will mess up our order)
taxa.df$ASV <- seq.int(nrow(taxa.df))


#We'll merge the two tables keeping all the rows from our otu table, but not all the rows from the database
taxa_fung <- merge(taxa.df, 
                   fung, 
                   by.x = "GENUS", 
                   by.y = "GENUS", 
                   all.x = TRUE, #we want all the rows in taxa to be conserved, regardless of if they exist in the fugaltraits database
                   all.y = FALSE) #we dont care about rows in the database that don't exist within our sample

#then lets clean up taxa_fung by removing columns that contain similar information
taxa_fung<- taxa_fung %>% select(c(-"GENUS",-"Phylum.y",-"Family.y",-"Class.y", -"Order.y"))
#renaming columns for clarity
taxa_fung <- rename(taxa_fung, 
         Order = Order.x,
         Phylum = Phylum.x,
         Class = Class.x,
         Family = Family.x)

#if you noticed the merge command changed the order of the taxa! oh no! they need to be in the right order to match to the otu table luckily we made a column of the right order so we can reorder by that
taxa_fung <- taxa_fung[order(taxa_fung$ASV),]

#then remove it
taxa_fung <- select(taxa_fung, -"ASV")

And done! now just use taxa_fung as your taxa table and finish making your phyloseq object!

#call row values OTU
rownames(otu) <- paste0("OTU", 1:nrow(otu))
Warning: Setting row names on a tibble is deprecated.
taxa_fung <- as.matrix(taxa_fung)
rownames(taxa_fung) <- paste0("OTU", 1:nrow(taxa_fung))



#now real stuff
#convert these bad boys to matrixes

otu <- as.matrix(otu)



#check
class(taxa_fung)
[1] "matrix" "array" 
class(otu)
[1] "matrix" "array" 
OTU = otu_table(otu, taxa_are_rows = TRUE)
TAX = tax_table(taxa_fung)


#combine that data
physeq = phyloseq(OTU, TAX)



#You have to be absolutely sure you want to run this next command if you do, see phyloseq objects
#physeq <- tax_glom(physeq, taxrank = rank_names(physeq)[5], NArm = FALSE)


#import sample data
Key <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\Bioinformatics\\Kenzies Data\\KeyDiv.csv")
Rows: 61 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Name, Location, Layer, Treatment, Site, FullName
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Key <- data.frame(Key[,-1], row.names=Key$Name)
sampledata = sample_data(Key)

physeq_Key = merge_phyloseq(physeq, sampledata)

Don’t forget to import your sequence information into your phyloseq object too!!

Seq_1 <- read_csv("C:\\Users\\angus\\OneDrive - UNBC\\Angus Ball\\Lab work\\ULTRA\\They call me... data\\Angus-16S\\16S-sv_seqs.csv")
head(Seq_1)
sequences <- Biostrings::DNAStringSet(Seq_1$Seq)
names(sequences) <- taxa_names(physeq_Key)
physeq_Key <- merge_phyloseq(physeq_Key, sequences)
LS0tDQp0aXRsZTogImFzc2lnbmluZyB0YXhvbm9teSB0aHJvdWdoIGZ1bmdhbHRyYWl0cyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQpCeSBBbmd1cyBCYWxsDQoNCklzIGZ1bmd1aWxkIG5vdCBGVU4gZW5vdWdoIGZvciB5b3U/PyBXZWxsIGZ1bmdhbHRyYWl0cyBpcyBGVU4gcmFpc2VkIHRvIHRoZSBwb3dlciBvZiBGVU4gKGxpdGVyYWxseSBpdCBpczogaHR0cHM6Ly9naXRodWIuY29tL3RyYWl0ZWNvZXZvL2Z1bmdhbHRyYWl0cykuIA0KDQpGVU5HdWlsZCBpcyB0ZWNobmljYWxseSBhIGJpZ2dlciBkYXRhYmFzZSAoYnkgOSUpLCBidXQgaW5jbHVkZXMgcG9zc2libGUgaW5mb3JtYXRpb24gc28gSSBzdXBwb3NlIEZ1bmdhbHRyYWl0cyBpcyBiZXR0ZXIgc2luY2UgYWxsIG9mIGl0cyBpbmZvIGlzIGV4YWN0aW5nLCBidXQgd2hhdGV2ZXIuIHRoZSBtYWluIHByb2JsZW0gd2l0aCBpdCBpcyBGdW5nYWx0cmFpdHMgaXMgbWVhbnQgZm9yIGFuIGV4Y2VsIHVzZXIgKGFuZCB0aHVzIGRvZXNuJ3QgaGF2ZSBuaWNlIGNvbW1hbmRzIHRvIGp1c3QgZG8gZXZlcnl0aGluZyBmb3IgbWUpLCBhbmQgb2J2aW91c2x5IHdlIGFyZSBpbiBSIHNvIGxldHMgZ29vb28NCg0KDQpMb2FkIHRoZSBwYWNrYWdlcw0KYGBge3J9DQpsaWJyYXJ5KHBoeWxvc2VxKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShzcGxpdHN0YWNrc2hhcGUpDQpsaWJyYXJ5KEJpb3N0cmluZ3MpDQpgYGANCg0KDQoNCg0KZG93bmxvYWQgdGhlIGRhdGFiYXNlLi4uIGZyb20gaHR0cHM6Ly9kb2NzLmdvb2dsZS5jb20vc3ByZWFkc2hlZXRzL2QvMWN4SW1KV01ZVlRyNnVJUVhjVEx3SzFZTk56UXZLSkppZnp6TnBLQ002TzAvZWRpdD91c3A9c2hhcmluZw0KeW91IHdhbnQgdGhlIGxvY2tlZCBzcHJlYWRzaGVldCBWMS4yLCBJRGsgaG93IHRvIGNoZWNrIGlmIHRoZXJlcyBhIG5ldyB2ZXJzaW9uLCBWMS4yIHdhcyBwdWJsaXNoZWQgaW4gMjAyMC8yMDIxIHNvIGtlZXAgdGhhdCBpbiBtaW5kLiBUaGlzIGRpZG4ndCBzZWVtIHRvIGJlIGltcGxlbWVudGVkIGFzIGNvb2wgYXMgZnVuZ3VpbGQgKGFzIHRoZSBnaXRodWIgaXMgY29uZnVzaW5nIGFzIGFsbCBoZWxsIHRvby4uLikNCg0KZWl0aGVyd2F5IA0KDQoNCmBgYHtyfQ0KI2ltcG9ydCB5b3VyIGRhdGFiYXNlDQpmdW5nIDwtIHJlYWQuY3N2KCJDOlxcVXNlcnNcXGFuZ3VzXFxPbmVEcml2ZSAtIFVOQkNcXEFuZ3VzIEJhbGxcXExhYiB3b3JrXFxCaW9pbmZvcm1hdGljc1xcTGlzYXMgZGF0YVxcRnVuZ2FsVHJhaXRzIDEuMl92ZXJfMTZEZWNfMjAyMCAtIFYuMS4yLmNzdiIpDQoNCg0KI3RoZW4geW91ciBzYW1wbGUgaW5mb3JtYXRpb24NCk9UVV9UQVggPC0gcmVhZF9jc3YoIkM6XFxVc2Vyc1xcYW5ndXNcXE9uZURyaXZlIC0gVU5CQ1xcQW5ndXMgQmFsbFxcTGFiIHdvcmtcXEJpb2luZm9ybWF0aWNzXFxLZW56aWVzIERhdGFcXElUUy1kYWRhMl9ub2NoaW1fdGF4LmNzdiIpDQojdGhpcyBjc3YgZmlsZSBjb250YWlucyBib3RoIHRoZSB0YXhvbm9teSBhbmQgdGhlIE9UVSB0YWJsZQ0KDQojWW91ciBrZXkNCktleSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcQmlvaW5mb3JtYXRpY3NcXEtlbnppZXMgRGF0YVxcS2V5RGl2LmNzdiIpDQoNCmBgYA0KDQpUaGlzIGlzIHdoYXQgT1RVX1RBWCwgS2V5IGFuZCBGdW5nIGxvb2sgbGlrZQ0KYGBge3J9DQpmdW5nDQpgYGANCg0KYGBge3J9DQpPVFVfVEFYDQpgYGANCg0KYGBge3J9DQpLZXkNCmBgYA0KDQpUaGVuIGNvbnZlcnQgeW91ciBkYXRhIGludG8gT1RVIGFuZCB0YXggdGFibGVzIChKdXN0IGxpa2UgaW4gdGhlIHBoeWxvc2VxIHR1dG9yaWFsKQ0KDQpgYGB7cn0NCnRheGEgPC0gc2VsZWN0KE9UVV9UQVgsICgidGF4LnZlY3RvciIpKQ0KI3JlbW92ZSB0aGUgc1YgdmFsdWUNCm90dSA8LSBzZWxlY3QoT1RVX1RBWCwgLSgidGF4LnZlY3RvciIpKQ0Kb3R1IDwtIHNlbGVjdChvdHUsIC0oMSkpDQojUmVtb3ZlIGV4dHJhbmVvdXMgc2FtcGxlcyBmb3IgdGhpcyBkYXRhIHNldA0Kb3R1IDwtIHNlbGVjdChvdHUsIC0oIkNIQjFQMSI6IkZOUDlCMyIpKQ0KDQp0YXhhIDwtIGNTcGxpdCh0YXhhLCAidGF4LnZlY3RvciIsICI6Iikjc3BsaXR0aW5nIHVwIHRheGEgaW50byBtdWx0aXBsZSBjb2x1bW5zIGJhc2VkIG9uIGNsYXNzDQojcmVuYW1pbmcgdGhvc2UgY29sdW1ucw0KdGF4YSA8LSBkcGx5cjo6cmVuYW1lKHRheGEsDQogS2luZ2RvbSA9ICB0YXgudmVjdG9yXzEsDQogUGh5bHVtID0gdGF4LnZlY3Rvcl8yLA0KIENsYXNzID0gdGF4LnZlY3Rvcl8zLA0KIE9yZGVyID0gdGF4LnZlY3Rvcl80LA0KIEZhbWlseSA9IHRheC52ZWN0b3JfNSwNCiBHZW51cyA9IHRheC52ZWN0b3JfNiwNCiBTcGVjaWVzID0gdGF4LnZlY3Rvcl83LA0KICANCiAgKQ0KYGBgDQoNCg0KDQoNClRoZW4gbGV0cyBhc3NpZ24gdGhlIGZ1bmdhbHRyYWl0cy4uLg0KYGBge3J9DQojRnVuZ2FsdHJhaXRzIGhhcyBpbmZvcm1hdGlvbiBvbiB0aGUgZ2VudXMgbGV2ZWwsIGx1Y2t5IGZvciB1cywgdGhlIHRheGEgdGFibGUgYWxyZWFkeSBoYXMgdGhlIGdlbnVzIGxldmVsIHNlcGVyYXRlZCEgc28gYSBzaW1wbGUgY29tcGFyaXNpb24gZnVuY3Rpb24gd2lsbCBtZXJnZSB0aGVzZSB0YWJsZXMgdG8gb3VyIGxpa2luZy4NCnRheGEuZGYgPC0gYXMuZGF0YS5mcmFtZSh0YXhhKSAjY29udmVydHMgdGF4YSB0byBkZg0KDQojYmVjYXVzZSB0YXhhIGhhcyBnZW51cyBsYWJlbGVkIGFzIGdfX2dlbnVzbmFtZSwgYW5kIGZ1bmcgaGFzIEdFTlVTIGxhYmVsbGVkIGFzIGdlbnVzbmFtZSwgd2UgbmVlZCB0byBkbyBhIHF1aWNrIGZpeCBvbiBvdXIgdGF4YSBkZg0KdGF4YS5kZiRHRU5VUyA8LSBnc3ViKCJnX18iLCAiIiwgdGF4YS5kZiRHZW51cykNCg0KI3NpbmNlIG9yZGVyIG9mIHRoZSBvdHUgdGFibGUgaXMgdmVyeSBpbXBvcnRhbnQsIHdlJ3JlIGp1c3QgZ29ubmEgY3JlYXRlIGEgY29sdW1uIHRoYXQganVzdCBvcmRlcnMgYWxsIHRoZSBzcGVjaWVzIGluIHRoZSBvcmRlciB0aGV5IGFyZSBpbiBub3cgKHRoaXMgaXMgaW1wb3J0YW50IGJjIG1lcmdlIGJlbG93IHdpbGwgbWVzcyB1cCBvdXIgb3JkZXIpDQp0YXhhLmRmJEFTViA8LSBzZXEuaW50KG5yb3codGF4YS5kZikpDQoNCg0KI1dlJ2xsIG1lcmdlIHRoZSB0d28gdGFibGVzIGtlZXBpbmcgYWxsIHRoZSByb3dzIGZyb20gb3VyIG90dSB0YWJsZSwgYnV0IG5vdCBhbGwgdGhlIHJvd3MgZnJvbSB0aGUgZGF0YWJhc2UNCnRheGFfZnVuZyA8LSBtZXJnZSh0YXhhLmRmLCANCiAgICAgICAgICAgICAgICAgICBmdW5nLCANCiAgICAgICAgICAgICAgICAgICBieS54ID0gIkdFTlVTIiwgDQogICAgICAgICAgICAgICAgICAgYnkueSA9ICJHRU5VUyIsIA0KICAgICAgICAgICAgICAgICAgIGFsbC54ID0gVFJVRSwgI3dlIHdhbnQgYWxsIHRoZSByb3dzIGluIHRheGEgdG8gYmUgY29uc2VydmVkLCByZWdhcmRsZXNzIG9mIGlmIHRoZXkgZXhpc3QgaW4gdGhlIGZ1Z2FsdHJhaXRzIGRhdGFiYXNlDQogICAgICAgICAgICAgICAgICAgYWxsLnkgPSBGQUxTRSkgI3dlIGRvbnQgY2FyZSBhYm91dCByb3dzIGluIHRoZSBkYXRhYmFzZSB0aGF0IGRvbid0IGV4aXN0IHdpdGhpbiBvdXIgc2FtcGxlDQoNCiN0aGVuIGxldHMgY2xlYW4gdXAgdGF4YV9mdW5nIGJ5IHJlbW92aW5nIGNvbHVtbnMgdGhhdCBjb250YWluIHNpbWlsYXIgaW5mb3JtYXRpb24NCnRheGFfZnVuZzwtIHRheGFfZnVuZyAlPiUgc2VsZWN0KGMoLSJHRU5VUyIsLSJQaHlsdW0ueSIsLSJGYW1pbHkueSIsLSJDbGFzcy55IiwgLSJPcmRlci55IikpDQojcmVuYW1pbmcgY29sdW1ucyBmb3IgY2xhcml0eQ0KdGF4YV9mdW5nIDwtIHJlbmFtZSh0YXhhX2Z1bmcsIA0KICAgICAgICAgT3JkZXIgPSBPcmRlci54LA0KICAgICAgICAgUGh5bHVtID0gUGh5bHVtLngsDQogICAgICAgICBDbGFzcyA9IENsYXNzLngsDQogICAgICAgICBGYW1pbHkgPSBGYW1pbHkueCkNCg0KI2lmIHlvdSBub3RpY2VkIHRoZSBtZXJnZSBjb21tYW5kIGNoYW5nZWQgdGhlIG9yZGVyIG9mIHRoZSB0YXhhISBvaCBubyEgdGhleSBuZWVkIHRvIGJlIGluIHRoZSByaWdodCBvcmRlciB0byBtYXRjaCB0byB0aGUgb3R1IHRhYmxlIGx1Y2tpbHkgd2UgbWFkZSBhIGNvbHVtbiBvZiB0aGUgcmlnaHQgb3JkZXIgc28gd2UgY2FuIHJlb3JkZXIgYnkgdGhhdA0KdGF4YV9mdW5nIDwtIHRheGFfZnVuZ1tvcmRlcih0YXhhX2Z1bmckQVNWKSxdDQoNCiN0aGVuIHJlbW92ZSBpdA0KdGF4YV9mdW5nIDwtIHNlbGVjdCh0YXhhX2Z1bmcsIC0iQVNWIikNCmBgYA0KDQoNCkFuZCBkb25lISBub3cganVzdCB1c2UgdGF4YV9mdW5nIGFzIHlvdXIgdGF4YSB0YWJsZSBhbmQgZmluaXNoIG1ha2luZyB5b3VyIHBoeWxvc2VxIG9iamVjdCENCg0KYGBge3J9DQojY2FsbCByb3cgdmFsdWVzIE9UVQ0Kcm93bmFtZXMob3R1KSA8LSBwYXN0ZTAoIk9UVSIsIDE6bnJvdyhvdHUpKQ0KDQoNCnRheGFfZnVuZyA8LSBhcy5tYXRyaXgodGF4YV9mdW5nKQ0Kcm93bmFtZXModGF4YV9mdW5nKSA8LSBwYXN0ZTAoIk9UVSIsIDE6bnJvdyh0YXhhX2Z1bmcpKQ0KDQoNCg0KI25vdyByZWFsIHN0dWZmDQojY29udmVydCB0aGVzZSBiYWQgYm95cyB0byBtYXRyaXhlcw0KDQpvdHUgPC0gYXMubWF0cml4KG90dSkNCg0KDQoNCiNjaGVjaw0KY2xhc3ModGF4YV9mdW5nKQ0KY2xhc3Mob3R1KQ0KDQoNCg0KDQpPVFUgPSBvdHVfdGFibGUob3R1LCB0YXhhX2FyZV9yb3dzID0gVFJVRSkNClRBWCA9IHRheF90YWJsZSh0YXhhX2Z1bmcpDQoNCg0KI2NvbWJpbmUgdGhhdCBkYXRhDQpwaHlzZXEgPSBwaHlsb3NlcShPVFUsIFRBWCkNCg0KDQoNCiNZb3UgaGF2ZSB0byBiZSBhYnNvbHV0ZWx5IHN1cmUgeW91IHdhbnQgdG8gcnVuIHRoaXMgbmV4dCBjb21tYW5kIGlmIHlvdSBkbywgc2VlIHBoeWxvc2VxIG9iamVjdHMNCiNwaHlzZXEgPC0gdGF4X2dsb20ocGh5c2VxLCB0YXhyYW5rID0gcmFua19uYW1lcyhwaHlzZXEpWzVdLCBOQXJtID0gRkFMU0UpDQoNCg0KI2ltcG9ydCBzYW1wbGUgZGF0YQ0KS2V5IDwtIHJlYWRfY3N2KCJDOlxcVXNlcnNcXGFuZ3VzXFxPbmVEcml2ZSAtIFVOQkNcXEFuZ3VzIEJhbGxcXExhYiB3b3JrXFxCaW9pbmZvcm1hdGljc1xcS2VuemllcyBEYXRhXFxLZXlEaXYuY3N2IikNCg0KS2V5IDwtIGRhdGEuZnJhbWUoS2V5WywtMV0sIHJvdy5uYW1lcz1LZXkkTmFtZSkNCnNhbXBsZWRhdGEgPSBzYW1wbGVfZGF0YShLZXkpDQoNCnBoeXNlcV9LZXkgPSBtZXJnZV9waHlsb3NlcShwaHlzZXEsIHNhbXBsZWRhdGEpDQpgYGANCg0KRG9uJ3QgZm9yZ2V0IHRvIGltcG9ydCB5b3VyIHNlcXVlbmNlIGluZm9ybWF0aW9uIGludG8geW91ciBwaHlsb3NlcSBvYmplY3QgdG9vISENCg0KYGBge3J9DQpTZXFfMSA8LSByZWFkX2NzdigiQzpcXFVzZXJzXFxhbmd1c1xcT25lRHJpdmUgLSBVTkJDXFxBbmd1cyBCYWxsXFxMYWIgd29ya1xcVUxUUkFcXFRoZXkgY2FsbCBtZS4uLiBkYXRhXFxBbmd1cy0xNlNcXDE2Uy1zdl9zZXFzLmNzdiIpDQpgYGANCg0KYGBge3J9DQpoZWFkKFNlcV8xKQ0KYGBgDQoNCmBgYHtyfQ0Kc2VxdWVuY2VzIDwtIEJpb3N0cmluZ3M6OkROQVN0cmluZ1NldChTZXFfMSRTZXEpDQpuYW1lcyhzZXF1ZW5jZXMpIDwtIHRheGFfbmFtZXMocGh5c2VxX0tleSkNCnBoeXNlcV9LZXkgPC0gbWVyZ2VfcGh5bG9zZXEocGh5c2VxX0tleSwgc2VxdWVuY2VzKQ0KYGBgDQoNCg==