Skip to contents

This article covers taxonomic aggregation, sample aggregation, and long-table export for plotting or modeling.

library(microbiomedataset)
data("global_patterns", package = "microbiomedataset")

object <- global_patterns

Aggregate taxa to a taxonomy rank

Use summarise_taxa() or agglomerate_taxa() when you want rank-aware aggregation.

genus_object <-
  summarise_taxa(
    object,
    taxonomic_rank = "Genus",
    what = "sum_intensity"
  )

dim(genus_object@expression_data)
#> [1] 983  26
head(genus_object@variable_info[, c("variable_id", "Genus")])
#>                variable_id                    Genus
#> 1               Sulfolobus               Sulfolobus
#> 2              Cenarchaeum              Cenarchaeum
#> 3           Nitrosopumilus           Nitrosopumilus
#> 4 CandidatusNitrososphaera CandidatusNitrososphaera
#> 5            Natronococcus            Natronococcus
#> 6            Natronorubrum            Natronorubrum
phylum_object <-
  agglomerate_taxa(
    object,
    taxonomic_rank = "Phylum",
    what = "sum_intensity"
  )

dim(phylum_object@expression_data)
#> [1] 66 26

Aggregate samples by a sample metadata column

sampletype_object <-
  summarise_samples_by_group(
    object,
    group_by = "SampleType",
    what = "mean_intensity"
  )

head(sampletype_object@sample_info[, c("sample_id", "SampleType")])
#>            sample_id         SampleType
#> 1               Soil               Soil
#> 2              Feces              Feces
#> 3               Skin               Skin
#> 4             Tongue             Tongue
#> 5         Freshwater         Freshwater
#> 6 Freshwater (creek) Freshwater (creek)
dim(sampletype_object@expression_data)
#> [1] 19216     9

Export abundance as tidy long tables

Feature-level export:

feature_table <- melt_features(object, relative = TRUE)
head(feature_table[, c("sample_id", "variable_id", "abundance")])
#>   sample_id variable_id abundance
#> 1       CL3      549322         0
#> 2       CC1      549322         0
#> 3       SV1      549322         0
#> 4   M31Fcsw      549322         0
#> 5   M11Fcsw      549322         0
#> 6   M31Plmr      549322         0

Taxonomy-level export:

taxa_table <- melt_taxa(
  object,
  taxonomic_rank = "Phylum",
  relative = TRUE
)

head(taxa_table[, c("sample_id", "Phylum", "abundance")])
#>   sample_id          Phylum    abundance
#> 1    AQC1cm        ABY1_OD1 0.0005138174
#> 2    AQC1cm             AC1 0.0021409059
#> 3    AQC1cm             AD3 0.0002569087
#> 4    AQC1cm   Acidobacteria 1.2521730194
#> 5    AQC1cm  Actinobacteria 2.2601114984
#> 6    AQC1cm Armatimonadetes 0.0690228049

If you need the older phyloseq::psmelt() style naming, the package also keeps the explicit compatibility names:

head(psmelt_microbiome_dataset(object, relative = TRUE)[, 1:4])
#>   variable_id sample_id abundance  Primer
#> 1      549322       CL3         0 ILBC_01
#> 2      549322       CC1         0 ILBC_02
#> 3      549322       SV1         0 ILBC_03
#> 4      549322   M31Fcsw         0 ILBC_04
#> 5      549322   M11Fcsw         0 ILBC_05
#> 6      549322   M31Plmr         0 ILBC_07
head(psmelt_taxa(object, taxonomic_rank = "Genus", relative = TRUE)[, 1:4])
#>   sample_id         Genus    abundance  Primer
#> 1    AQC1cm          4-29 0.5340725502 ILBC_16
#> 2    AQC1cm      4041AA30 0.0000000000 ILBC_16
#> 3    AQC1cm           A17 0.8746237084 ILBC_16
#> 4    AQC1cm   Abiotrophia 0.0000000000 ILBC_16
#> 5    AQC1cm Acaryochloris 0.0052303079 ILBC_16
#> 6    AQC1cm   Acetivibrio 0.0005811453 ILBC_16