Match genes in a list-like object to a vector of genesymbols

matchGenes(list, object, ...)

# S4 method for GmtList,character
matchGenes(list, object)

# S4 method for GmtList,matrix
matchGenes(list, object)

# S4 method for GmtList,eSet
matchGenes(list, object, col = "GeneSymbol")

# S4 method for character,character
matchGenes(list, object)

# S4 method for character,matrix
matchGenes(list, object)

# S4 method for character,eSet
matchGenes(list, object)

# S4 method for character,DGEList
matchGenes(list, object, col = "GeneSymbol")

# S4 method for GmtList,DGEList
matchGenes(list, object, col = "GeneSymbol")

# S4 method for SignedGenesets,character
matchGenes(list, object)

# S4 method for SignedGenesets,matrix
matchGenes(list, object)

# S4 method for SignedGenesets,eSet
matchGenes(list, object, col = "GeneSymbol")

# S4 method for SignedGenesets,DGEList
matchGenes(list, object, col = "GeneSymbol")

Arguments

list

A GmtList, list, character or SignedGenesets object

object

Gene symbols to be matched; they can come from a vector of character strings, or a column in the fData of an eSet object.

...

additional arguments like col

col

Column name of fData in an eSet object, or genes in an DGEList object, to specify where gene symbols are stored. The default value is set to "GeneSymbol"

Value

An IndexList object, which is essentially a list of the same length as input (length of 1 in case characters are used as input), with matching indices.

Examples

## test GmtList, character
testGenes <- sprintf("gene%d", 1:10)
testGeneSets <- GmtList(list(gs1=c("gene1", "gene2"), gs2=c("gene9", "gene10"), gs3=c("gene100")))
matchGenes(testGeneSets, testGenes)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

## test GmtList, matrix
testGenes <- sprintf("gene%d", 1:10)
testGeneSets <- GmtList(list(gs1=c("gene1", "gene2"), gs2=c("gene9", "gene10"), gs3=c("gene100")))
testGeneExprs <- matrix(rnorm(100), nrow=10, dimnames=list(testGenes, sprintf("sample%d", 1:10)))
matchGenes(testGeneSets, testGeneExprs)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

## test GmtList, eSet
testGenes <- sprintf("gene%d", 1:10)
testGeneSets <- GmtList(list(gs1=c("gene1", "gene2"), gs2=c("gene9", "gene10"), gs3=c("gene100")))
testGeneExprs <- matrix(rnorm(100), nrow=10, dimnames=list(testGenes, sprintf("sample%d", 1:10)))
testFeat <- data.frame(GeneSymbol=rownames(testGeneExprs), row.names=testGenes)
testPheno <- data.frame(SampleId=colnames(testGeneExprs), row.names=colnames(testGeneExprs))
testEset <- ExpressionSet(assayData=testGeneExprs,
    featureData=AnnotatedDataFrame(testFeat),
    phenoData=AnnotatedDataFrame(testPheno))
matchGenes(testGeneSets, testGeneExprs)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA
## force using row names
matchGenes(testGeneSets, testEset, col=NULL)
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

 ## test GmtList, DGEList
 if(requireNamespace("edgeR")) {
    mat <- matrix(rnbinom(100, mu=5, size=2), ncol=10)
    rownames(mat) <- sprintf("gene%d", 1:nrow(mat))
    y <- edgeR::DGEList(counts=mat, group=rep(1:2, each=5))

    ## if genes are not set, row names of the count matrix will be used for lookup
    myGeneSet <- GmtList(list(gs1=rownames(mat)[1:2], gs2=rownames(mat)[9:10], gs3="gene100"))
    matchGenes(myGeneSet, y)

    matchGenes(c("gene1", "gene2"), y)
    ## alternatively, use 'col' parameter to specify the column in 'genes'
    y2 <- edgeR::DGEList(counts=mat,
      group=rep(1:2, each=5),
      genes=data.frame(GeneIdentifier=rownames(mat), row.names=rownames(mat)))
    matchGenes(myGeneSet, y2, col="GeneIdentifier")
 }
#> A list of 3 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   gs1 (n=2): 1,2
#>   gs2 (n=2): 9,10
#>   gs3 (n=0): NA

## test character, character
matchGenes(c("gene1", "gene2"), testGenes)
#> A list of 1 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   TempGeneSet (n=2): 1,2

## test character, matrix
matchGenes(c("gene1", "gene2"), testGeneExprs)
#> A list of 1 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   TempGeneSet (n=2): 1,2

## test character, eset
matchGenes(c("gene1", "gene2"), testEset)
#> A list of 1 indices with offset=1
#> Options: NA removed: TRUE; duplicates removed: TRUE
#>   TempGeneSet (n=2): 1,2