1. Import libraries

# Import libraries
library("phyloseq")
library("ape")
library("ggplot2")
theme_set(theme_bw()) # set black-white theme for all plots

2. Create phyloseq object

# Load OTUs
in_otu = as.matrix(
  read.table('../dadaist_0.9.0/MicrobiomeAnalyst/table.csv', 
             header=TRUE, 
             sep=",", 
             row.names = 1, 
             comment.char="")
  )

# Load taxonomy
in_tax = read.table('../dadaist_0.9.0/MicrobiomeAnalyst/taxonomy.csv',
  header=TRUE, sep=",",
  row.names = 1,
  fill=TRUE,
  na.strings=c("","NA","k__", "d__","p__","c__","o__","f__","g__","s__"),
  comment.char="")
in_tax$sequenceID<-row.names(in_tax)
in_tax <- as.matrix(in_tax)

# Load tree if available
in_tree <- read.tree('../dadaist_0.9.0/MicrobiomeAnalyst/rep-seqs.tree')

# Import metadata
metaIn = read.table('../dadaist_0.9.0/metadata.tsv', 
                    header=TRUE, 
                    sep="\t", 
                    comment.char="") 
rownames(metaIn) <- metaIn[,1]
colnames(metaIn)[1] <- 'sampleID' 
in_metad = sample_data(metaIn)

# Generate phyloseq object
my_OTU = otu_table(in_otu, taxa_are_rows = TRUE)
my_TAX = tax_table(in_tax)
my_physeq = phyloseq(
  my_OTU, 
  my_TAX, 
  in_metad, 
  in_tree)

# Print phyloseq object
my_physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 218 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 2 sample variables ]
## tax_table()   Taxonomy Table:    [ 218 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 218 tips and 216 internal nodes ]
# Compare phyloseq object to automatically created one from dadaist
my_physeq_auto<-readRDS('../dadaist_0.9.0/R/phyloseq.rds')
print(my_physeq_auto)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 218 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 2 sample variables ]
## tax_table()   Taxonomy Table:    [ 218 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 218 tips and 216 internal nodes ]

3. Create bar charts

# Family colored but OTU level separation
my_physeq_P <- tax_glom(my_physeq,taxrank = "Phylum")
my_barPhyl <- plot_bar(my_physeq_P, fill="Phylum") + 
  scale_fill_discrete(na.value="grey90") + 
  ggtitle("Relative abundances at Phylum level") # color by Phylum

my_physeq_C <- tax_glom(my_physeq,taxrank = "Class")
my_barClass <- plot_bar(my_physeq_C, fill="Class") + 
  scale_fill_discrete(na.value="grey90") + 
  ggtitle("Relative abundances at Class level") # color by Class

# Show bar plots
my_barPhyl

my_barClass

4. Create bubble plots

## Create bubble plots

# Class level
my_physeq_Class = tax_glom(my_physeq, "Class")
my_barClass <- plot_bar(my_physeq_Class) 
my_barClass$layers <- my_barClass$layers[-1]
maxC<-max(otu_table(my_physeq_Class))
my_bubbleClass <- my_barClass + 
    geom_point(aes_string(x="Sample", 
                          y="Class", 
                          size="Abundance", 
                          color = "Sample" ), 
               alpha = 0.7 ) +
    scale_size_continuous(limits = c(0.001,maxC)) +
    xlab("Sample") +
    ylab("Class") +
    ggtitle("Relative abundances at Class level") + 
    labs(caption = "Abundances below 0.001 were considered absent") 

# Order level
my_physeq_Order = tax_glom(my_physeq, "Order")
my_barOrder <- plot_bar(my_physeq_Order) 
my_barOrder$layers <- my_barOrder$layers[-1]
maxC<-max(otu_table(my_physeq_Order))
my_bubbleOrder <- my_barOrder + 
      geom_point(aes_string(x="Sample", 
                            y="Order", 
                            size="Abundance", 
                            color = "Sample" ), 
                 alpha = 0.7 ) +
      scale_size_continuous(limits = c(0.001,maxC)) +
      xlab("Sample") +
      ylab("Order") +
      ggtitle("Relative abundances at Order level") + 
      labs(caption = "Abundances below 0.001 were considered absent") 
# Family level
my_physeq_Fam = tax_glom(my_physeq, "Family")
my_barFam <- plot_bar(my_physeq_Fam) 
my_barFam$layers <- my_barFam$layers[-1]
maxC<-max(otu_table(my_physeq_Fam))
my_bubbleFam <- my_barFam + 
    geom_point(aes_string(x="Sample", 
                          y="Family", 
                          size="Abundance", 
                          color = "Sample" ), 
               alpha = 0.7) +
    scale_size_continuous(limits = c(0.001,maxC)) +
    xlab("Sample") +
    ylab("Family") +
    ggtitle("Relative abundances at Family level") + 
    labs(caption = "Abundances below 0.001 were considered absent")

# Genus level
my_physeq_Gen = tax_glom(my_physeq, "Genus")
my_barGen <- plot_bar(my_physeq_Gen) 
my_barGen$layers <- my_barGen$layers[-1]
maxC<-max(otu_table(my_physeq_Gen))
my_bubbleGen <- my_barGen + 
    geom_point(aes_string(x="Sample", 
                          y="Genus", 
                          size="Abundance", 
                          color = "Sample" ), 
               alpha = 0.7) +
    scale_size_continuous(limits = c(0.001,maxC)) +
    xlab("Sample") +
    ylab("Genus") +
    ggtitle("Relative abundances at Genus level") + 
    labs(caption = "Abundances below 0.001 were considered absent") 
my_bubbleClass + theme(legend.position = "none")
## Warning: Removed 117 rows containing missing values (geom_point).

my_bubbleOrder + theme(legend.position = "none")
## Warning: Removed 298 rows containing missing values (geom_point).

my_bubbleFam + theme(legend.position = "none")
## Warning: Removed 421 rows containing missing values (geom_point).

my_bubbleGen + theme(legend.position = "none")
## Warning: Removed 598 rows containing missing values (geom_point).