For now this function just works with SummarizedExperiments
with Ensembl gene or transcript IDs. See example
of usage in tximeta vignette. For obtaining
multiple matching IDs for each row of the SummarizedExperiment
set multiVals="list"
. See select
for documentation
on use of multiVals
.
addIds(se, column, fromDb = FALSE, gene = FALSE, ...)
the SummarizedExperiment
the name of the new ID to add (a column
of the org package
database or of the TxDb/EnsDb is fromDb=TRUE
)
logical, whether to use the TxDb/EnsDb that is associated
with se
. Default is FALSE, and an org package is used.
Currently only implemented for transcript level (gene=FALSE).
Column names can be viewed with columns(retrieveDb(se))
logical, whether to map by genes or transcripts (default is FALSE).
if rows are genes, and easily detected as such (ENSG or ENSMUSG), it will
automatically switch to TRUE. if rows are transcripts and gene=TRUE
,
then it will try to use a gene_id
column to map IDs to column
arguments passed to mapIds
a SummarizedExperiment
example(tximeta)
#>
#> tximet> # point to a Salmon quantification file:
#> tximet> dir <- system.file("extdata/salmon_dm", package="tximportData")
#>
#> tximet> files <- file.path(dir, "SRR1197474", "quant.sf")
#>
#> tximet> coldata <- data.frame(files, names="SRR1197474", condition="A", stringsAsFactors=FALSE)
#>
#> tximet> # normally we would just run the following which would download the appropriate metadata
#> tximet> # se <- tximeta(coldata)
#> tximet>
#> tximet> # for this example, we instead point to a local path where the GTF can be found
#> tximet> # by making a linkedTxome:
#> tximet> indexDir <- file.path(dir, "Dm.BDGP6.22.98_salmon-0.14.1")
#>
#> tximet> fastaFTP <- c("ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP6.22.cdna.all.fa.gz",
#> tximet+ "ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/ncrna/Drosophila_melanogaster.BDGP6.22.ncrna.fa.gz")
#>
#> tximet> gtfPath <- file.path(dir, "Drosophila_melanogaster.BDGP6.22.98.gtf.gz")
#>
#> tximet> makeLinkedTxome(indexDir=indexDir, source="LocalEnsembl", organism="Drosophila melanogaster",
#> tximet+ release="98", genome="BDGP6.22", fasta=fastaFTP, gtf=gtfPath, write=FALSE)
#> saving linkedTxome in bfc (first time)
#>
#> tximet> se <- tximeta(coldata)
#> importing quantifications
#> reading in files with read.delim (install 'readr' package for speed up)
#> 1
#>
#> found matching linked transcriptome:
#> [ LocalEnsembl - Drosophila melanogaster - release 98 ]
#> building TxDb with 'txdbmaker' package
#> Import genomic features from the file as a GRanges object ...
#> OK
#> Prepare the 'metadata' data frame ...
#> OK
#> Make the TxDb object ...
#> OK
#> generating transcript ranges
#> Warning:
#>
#> Warning: the annotation is missing some transcripts that were quantified.
#> 5 out of 33706 txps were missing from GTF/GFF but were in the indexed FASTA.
#> (This occurs sometimes with Ensembl txps on haplotype chromosomes.)
#> In order to build a ranged SummarizedExperiment, these txps were removed.
#> To keep these txps, and to skip adding ranges, use skipMeta=TRUE
#>
#> Example missing txps: [FBtr0307759, FBtr0084079, FBtr0084080, ...]
#>
#> tximet> # to clear the entire linkedTxome table
#> tximet> # (don't run unless you want to clear this table!)
#> tximet> # bfcloc <- getTximetaBFC()
#> tximet> # bfc <- BiocFileCache(bfcloc)
#> tximet> # bfcremove(bfc, bfcquery(bfc, "linkedTxomeTbl")$rid)
#> tximet>
#> tximet>
#> tximet>
#> tximet>
library(org.Dm.eg.db)
#> Loading required package: AnnotationDbi
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#> as.data.frame, basename, cbind, colnames, dirname, do.call,
#> duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#> lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#> pmin.int, rank, rbind, rownames, sapply, setdiff, table, tapply,
#> union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: IRanges
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:utils':
#>
#> findMatches
#> The following objects are masked from 'package:base':
#>
#> I, expand.grid, unname
#>
se <- addIds(se, "REFSEQ", gene=FALSE)
#> mapping to new IDs using org.Dm.eg.db
#> if all matching IDs are desired, and '1:many mappings' are reported,
#> set multiVals='list' to obtain all the matching IDs
#> 'select()' returned 1:many mapping between keys and columns