vignettes/merge.Rmd
merge.Rmd
Merging OTU or sample indices based on variables in the data can be a useful means of reducing noise or excess features in an analysis or graphic.
library(microbiomedataset)
library(tidyverse)
data("global_patterns")
global_patterns
#> --------------------
#> microbiomedataset version: 0.99.1
#> --------------------
#> 1.expression_data:[ 19216 x 26 data.frame]
#> 2.sample_info:[ 26 x 8 data.frame]
#> 3.variable_info:[ 19216 x 8 data.frame]
#> 4.sample_info_note:[ 8 x 2 data.frame]
#> 5.variable_info_note:[ 8 x 2 data.frame]
#> --------------------
#> Processing information (extract_process_info())
#> create_microbiome_dataset ----------
#> Package Function.used Time
#> 1 microbiomedataset create_microbiome_dataset() 2022-07-10 10:56:13
Remove empty taxa
global_patterns2 <-
global_patterns %>%
mutate2variable(what = "sum_intensity") %>%
activate_microbiome_dataset(what = "variable_info") %>%
dplyr::filter(sum_intensity > 0)
humantypes <- c("Feces", "Mock", "Skin", "Tongue")
global_patterns2 <-
global_patterns2 %>%
activate_microbiome_dataset(what = "sample_info") %>%
dplyr::mutate(human = SampleType %in% humantypes)
Now on to the merging examples.
merged_global_patterns2 <-
microbiomedataset::summarise_samples(object = global_patterns2,
group_by = "SampleType")
extract_sample_info(merged_global_patterns2)
#> sample_id Primer Final_Barcode Barcode_truncated_plus_T
#> 1 Soil ILBC_01 AACGCA TGCGTT
#> 2 Feces ILBC_04 AAGAGA TCTCTT
#> 3 Skin ILBC_07 AATCGT ACGATT
#> 4 Tongue ILBC_10 ACACGA TCGTGT
#> 5 Freshwater ILBC_13 ACACTG CAGTGT
#> 6 Freshwater (creek) ILBC_16 ACAGCA TGCTGT
#> 7 Ocean ILBC_19 ACAGTT AACTGT
#> 8 Sediment (estuary) ILBC_22 ACATGT ACATGT
#> 9 Mock ILBC_27 ACCGCA TGCGGT
#> Barcode_full_length SampleType
#> 1 CTAGCGTGCGT Soil
#> 2 TCGACATCTCT Feces
#> 3 CGAGTCACGAT Skin
#> 4 TGTGGCTCGTG Tongue
#> 5 CATGAACAGTG Freshwater
#> 6 GACCACTGCTG Freshwater (creek)
#> 7 TCGCGCAACTG Ocean
#> 8 CACGTGACATG Sediment (estuary)
#> 9 TGACTCTGCGG Mock
#> Description class human
#> 1 Calhoun South Carolina Pine soil, pH 4.9 Subject FALSE
#> 2 M3, Day 1, fecal swab, whole body study Subject TRUE
#> 3 M3, Day 1, right palm, whole body study Subject TRUE
#> 4 M3, Day 1, tongue, whole body study Subject TRUE
#> 5 Lake Mendota Minnesota, 24 meter epilimnion Subject FALSE
#> 6 Allequash Creek, 0-1cm depth Subject FALSE
#> 7 Newport Pier, CA surface water, Time 1 Subject FALSE
#> 8 Tijuana River Reserve, depth 1 Subject FALSE
#> 9 Even1 Subject TRUE
merged_variables <-
microbiomedataset::summarize_variables(
object = global_patterns2,
variable_index = 1:5,
remain_variable_info_index = 1
)
dim(merged_variables)
#> variables samples
#> 18984 26
dim(global_patterns2)
#> variables samples
#> 18988 26
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur ... 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] forcats_0.5.1.9000 stringr_1.4.0 purrr_0.3.4
#> [4] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
#> [7] ggplot2_3.3.6 tidyverse_1.3.1 dplyr_1.0.9
#> [10] microbiomedataset_0.99.7
#>
#> loaded via a namespace (and not attached):
#> [1] utf8_1.2.2 tidyselect_1.1.2
#> [3] htmlwidgets_1.5.4 grid_4.2.1
#> [5] BiocParallel_1.30.3 munsell_0.5.0
#> [7] codetools_0.2-18 ragg_1.2.2
#> [9] preprocessCore_1.58.0 withr_2.5.0
#> [11] colorspace_2.0-3 Biobase_2.56.0
#> [13] phyloseq_1.40.0 knitr_1.39
#> [15] rstudioapi_0.13 stats4_4.2.1
#> [17] mzID_1.34.0 MatrixGenerics_1.8.1
#> [19] GenomeInfoDbData_1.2.8 polyclip_1.10-0
#> [21] farver_2.1.1 rhdf5_2.40.0
#> [23] rprojroot_2.0.3 vctrs_0.4.1
#> [25] generics_0.1.3 xfun_0.31
#> [27] R6_2.5.1 doParallel_1.0.17
#> [29] GenomeInfoDb_1.32.2 clue_0.3-61
#> [31] graphlayouts_0.8.0 MsCoreUtils_1.8.0
#> [33] bitops_1.0-7 rhdf5filters_1.8.0
#> [35] cachem_1.0.6 gridGraphics_0.5-1
#> [37] DelayedArray_0.22.0 assertthat_0.2.1
#> [39] scales_1.2.0 ggraph_2.0.5
#> [41] gtable_0.3.0 affy_1.74.0
#> [43] tidygraph_1.2.1 rlang_1.0.3
#> [45] systemfonts_1.0.4 mzR_2.30.0
#> [47] GlobalOptions_0.1.2 splines_4.2.1
#> [49] Rdisop_1.56.0 lazyeval_0.2.2
#> [51] impute_1.70.0 broom_1.0.0
#> [53] modelr_0.1.8 BiocManager_1.30.18
#> [55] yaml_2.3.5 reshape2_1.4.4
#> [57] backports_1.4.1 tools_4.2.1
#> [59] ggplotify_0.1.0 affyio_1.66.0
#> [61] ellipsis_0.3.2 jquerylib_0.1.4
#> [63] biomformat_1.24.0 RColorBrewer_1.1-3
#> [65] BiocGenerics_0.42.0 MSnbase_2.22.0
#> [67] Rcpp_1.0.8.3 plyr_1.8.7
#> [69] zlibbioc_1.42.0 RCurl_1.98-1.7
#> [71] pbapply_1.5-0 GetoptLong_1.0.5
#> [73] viridis_0.6.2 S4Vectors_0.34.0
#> [75] zoo_1.8-10 SummarizedExperiment_1.26.1
#> [77] haven_2.5.0 ggrepel_0.9.1
#> [79] cluster_2.1.3 fs_1.5.2
#> [81] magrittr_2.0.3 masstools_0.99.13
#> [83] data.table_1.14.2 openxlsx_4.2.5
#> [85] circlize_0.4.15 reprex_2.0.1
#> [87] pcaMethods_1.88.0 ProtGenerics_1.28.0
#> [89] matrixStats_0.62.0 hms_1.1.1
#> [91] evaluate_0.15 XML_3.99-0.10
#> [93] readxl_1.4.0 IRanges_2.30.0
#> [95] gridExtra_2.3 shape_1.4.6
#> [97] compiler_4.2.1 ncdf4_1.19
#> [99] crayon_1.5.1 htmltools_0.5.2
#> [101] mgcv_1.8-40 tzdb_0.3.0
#> [103] lubridate_1.8.0 DBI_1.1.3
#> [105] tweenr_1.0.2 dbplyr_2.2.1
#> [107] ComplexHeatmap_2.12.0 MASS_7.3-57
#> [109] Matrix_1.4-1 ade4_1.7-19
#> [111] permute_0.9-7 cli_3.3.0
#> [113] vsn_3.64.0 parallel_4.2.1
#> [115] igraph_1.3.2 GenomicRanges_1.48.0
#> [117] pkgconfig_2.0.3 pkgdown_2.0.5
#> [119] plotly_4.10.0 xml2_1.3.3
#> [121] MALDIquant_1.21 foreach_1.5.2
#> [123] bslib_0.3.1 multtest_2.52.0
#> [125] XVector_0.36.0 massdataset_1.0.5
#> [127] rvest_1.0.2 yulab.utils_0.0.5
#> [129] digest_0.6.29 vegan_2.6-2
#> [131] Biostrings_2.64.0 rmarkdown_2.14
#> [133] cellranger_1.1.0 tidytree_0.3.9
#> [135] rjson_0.2.21 lifecycle_1.0.1
#> [137] nlme_3.1-158 jsonlite_1.8.0
#> [139] Rhdf5lib_1.18.2 desc_1.4.1
#> [141] viridisLite_0.4.0 limma_3.52.2
#> [143] fansi_1.0.3 pillar_1.7.0
#> [145] ggsci_2.9 lattice_0.20-45
#> [147] fastmap_1.1.0 httr_1.4.3
#> [149] survival_3.3-1 glue_1.6.2
#> [151] zip_2.2.0 png_0.1-7
#> [153] iterators_1.0.14 ggforce_0.3.3
#> [155] stringi_1.7.6 sass_0.4.1
#> [157] textshaping_0.3.6 memoise_2.0.1
#> [159] ape_5.6-2