我正在尝试减少
Grange
目标
library(GenomicRanges)
#example data
gr <- GRanges(seqnames = c("chr1", "chr1", "chr1"),
ranges = IRanges(c(11, 101, 101), c(20, 200, 200)),
strand = c("+", "-", "-"),
name = c("a1", "a2", "a2"),
score = c(1, 2, 3))
gr
seqnames ranges strand | name score
<Rle> <IRanges> <Rle> | <character> <integer>
[1] chr1 11-20 + | a1 1
[2] chr1 101-200 - | a2 2
[3] chr1 101-200 - | a2 3
reduce(gr)
seqnames ranges strand
<Rle> <IRanges> <Rle>
[1] chr1 11-20 +
[2] chr1 101-200 -
但我想保留相同的元数据,例如
seqnames ranges strand score
<Rle> <IRanges> <Rle>
[1] chr1 11-20 + 1
[2] chr1 101-200 - 2,3
设置
with.revmap = TRUE
,然后通过折叠重新映射 mcol:
# reduce with revmap
grr <- reduce(gr, with.revmap = TRUE)
# add meta columns with collapse
mcols(grr) <- do.call(rbind,
lapply(mcols(grr)$revmap, function(i) {
data.frame(
name = paste(mcols(gr)$name[ i ], collapse = ","),
score = paste(mcols(gr)$score[ i ], collapse = ",")
)
}))
grr
#GRanges object with 2 ranges and 2 metadata columns:
# seqnames ranges strand | name score
# <Rle> <IRanges> <Rle> | <character> <character>
# [1] chr1 11-20 + | a1 1
# [2] chr1 101-200 - | a2,a2 2,3
# -------
# seqinfo: 1 sequence from an unspecified genome; no seqlengths