library(GenomicRanges)
library("Gviz")
library("DESeq2")
Create a GRanges object
(A1 <- GRanges(seqnames = Rle("ch1"),
ranges = IRanges(start = c(10, 22, 40),
width = 5),
strand = "+"))
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] ch1 [10, 14] +
## [2] ch1 [22, 26] +
## [3] ch1 [40, 44] +
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
Plot the ranges
options(ucscChromosomeNames=FALSE)
geTrack <- GenomeAxisTrack()
deTrack <- AnnotationTrack(range = A1)
plotTracks(c(geTrack,deTrack),from=1, to=50)
Show all methods for the GRanges class
methods(class="GRanges")
## [1] aggregate anyNA <=
## [4] < == >=
## [7] > != append
## [10] as.character as.complex as.data.frame
## [13] as.env as.factor as.integer
## [16] as.list as.logical as.matrix
## [19] as.numeric as.raw by
## [22] c coerce coerce<-
## [25] countOverlaps coverage disjoin
## [28] disjointBins distance distanceToNearest
## [31] duplicated elementMetadata<- elementMetadata
## [34] end<- end eval
## [37] expand.grid expand extractROWS
## [40] findOverlaps flank follow
## [43] gaps [ $<-
## [46] $ [<- granges
## [49] head high2low %in%
## [52] intersect isDisjoint is.unsorted
## [55] length lengths match
## [58] mcols<- mcols metadata<-
## [61] metadata mstack names<-
## [64] names narrow nearest
## [67] NROW Ops order
## [70] overlapsAny parallelSlotNames pcompare
## [73] pgap pintersect precede
## [76] promoters psetdiff punion
## [79] range ranges<- ranges
## [82] rank reduce relistToClass
## [85] relist rename rep.int
## [88] replaceROWS rep resize
## [91] restrict rev ROWNAMES
## [94] rowRanges<- score<- score
## [97] selfmatch seqinfo<- seqinfo
## [100] seqlevelsInUse seqnames<- seqnames
## [103] setdiff shiftApply shift
## [106] showAsCell show sort
## [109] split split<- start<-
## [112] start strand<- strand
## [115] subsetByOverlaps subset table
## [118] tail tapply tile
## [121] trim union unique
## [124] update updateObject values<-
## [127] values width<- width
## [130] window<- window with
## [133] xtabs xtfrm
## see '?methods' for accessing help and source code
shift(x): moves left/right
promoters(x): returns promoter ranges extending around the transcription start site (TSS) defined as start(x)
reduce(x): orders the ranges in x from left to right, then merges the overlapping or adjacent ones
disjoin(x): breaks into discrete ranges based on original starts/ends
For example:
length(A1)
## [1] 3
width(A1)
## [1] 5 5 5
seqnames(A1)
## factor-Rle of length 3 with 1 run
## Lengths: 3
## Values : ch1
## Levels(1): ch1
ranges(A1)
## IRanges object with 3 ranges and 0 metadata columns:
## start end width
## <integer> <integer> <integer>
## [1] 10 14 5
## [2] 22 26 5
## [3] 40 44 5
promoters(A1,upstream=10, downstream=15)
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] ch1 [ 0, 24] +
## [2] ch1 [12, 36] +
## [3] ch1 [30, 54] +
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
Add metdata to the GRanges object
names(A1) <- LETTERS[1:3]
genome(A1) <- "hg19"
mcols(A1)$bioreplicate <- "I"
A1
## GRanges object with 3 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## <Rle> <IRanges> <Rle> | <character>
## A ch1 [10, 14] + | I
## B ch1 [22, 26] + | I
## C ch1 [40, 44] + | I
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
Coerce the GRange into a dataframe object
as.data.frame(A1)
## seqnames start end width strand bioreplicate
## A ch1 10 14 5 + I
## B ch1 22 26 5 + I
## C ch1 40 44 5 + I
Create two more objects
A2 <- GRanges(seqnames = Rle("ch1"),
ranges = IRanges(start = c(8, 22, 29),
width = 5),
strand = "+")
names(A2) <- LETTERS[1:3]
genome(A2) <- "hg19"
mcols(A2)$bioreplicate <- "II"
A3 <- GRanges(seqnames = Rle("ch1"),
ranges = IRanges(start = c(6, 15, 36),
width = 5),
strand = "+")
names(A3) <- LETTERS[1:3]
genome(A3) <- "hg19"
mcols(A3)$bioreplicate <- "III"
A2
## GRanges object with 3 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## <Rle> <IRanges> <Rle> | <character>
## A ch1 [ 8, 12] + | II
## B ch1 [22, 26] + | II
## C ch1 [29, 33] + | II
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
A3
## GRanges object with 3 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## <Rle> <IRanges> <Rle> | <character>
## A ch1 [ 6, 10] + | III
## B ch1 [15, 19] + | III
## C ch1 [36, 40] + | III
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
Combine the GRange objects
(A <- c(A1, A2, A3))
## GRanges object with 9 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## <Rle> <IRanges> <Rle> | <character>
## A ch1 [10, 14] + | I
## B ch1 [22, 26] + | I
## C ch1 [40, 44] + | I
## A ch1 [ 8, 12] + | II
## B ch1 [22, 26] + | II
## C ch1 [29, 33] + | II
## A ch1 [ 6, 10] + | III
## B ch1 [15, 19] + | III
## C ch1 [36, 40] + | III
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
Merge
(A_merged <- reduce(A))
## GRanges object with 4 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] ch1 [ 6, 19] +
## [2] ch1 [22, 26] +
## [3] ch1 [29, 33] +
## [4] ch1 [36, 44] +
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
Plot it
deTrack <- AnnotationTrack(range = A, group = A$bioreplicate)
mrgTrack <- AnnotationTrack(range = A_merged, name = "Merged")
plotTracks(c(geTrack,deTrack, mrgTrack),groupAnnotation = "group",from=1, to=50)
To find ranges that repeat we can count the number of overlapping regions and remove merged segments that have a certain number of counts (e.g. 1)
(cnts <- countOverlaps(A_merged,A))
## [1] 4 2 1 2
A_merged_consensus <- A_merged[-which(cnts==1),]
mrg_con_Track <- AnnotationTrack(range = A_merged_consensus,name = "Merged Consensus")
plotTracks(c(geTrack, deTrack, mrgTrack, mrg_con_Track),groupAnnotation = "group",from=1, to=50)
The GRangesList class is a container for a list of GRanges objects that have the same metadata column variables. First we create another GRanges object.
(B <- GRanges(seqnames = Rle("ch1"),
ranges = IRanges(start = c(1,3,10,13,24,30),
width = 5),
strand = "+"))
## GRanges object with 6 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] ch1 [ 1, 5] +
## [2] ch1 [ 3, 7] +
## [3] ch1 [10, 14] +
## [4] ch1 [13, 17] +
## [5] ch1 [24, 28] +
## [6] ch1 [30, 34] +
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
names(B) <- rep(LETTERS[1:3],2)
genome(B) <- "hg19"
mcols(B)$bioreplicate <- rep(c("I","II"),c(3,3))
# combine
AB <- GRangesList(A, B)
names(AB) <- c("A", "B")
(AB)
## GRangesList object of length 2:
## $A
## GRanges object with 9 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## <Rle> <IRanges> <Rle> | <character>
## A ch1 [10, 14] + | I
## B ch1 [22, 26] + | I
## C ch1 [40, 44] + | I
## A ch1 [ 8, 12] + | II
## B ch1 [22, 26] + | II
## C ch1 [29, 33] + | II
## A ch1 [ 6, 10] + | III
## B ch1 [15, 19] + | III
## C ch1 [36, 40] + | III
##
## $B
## GRanges object with 6 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## A ch1 [ 1, 5] + | I
## B ch1 [ 3, 7] + | I
## C ch1 [10, 14] + | I
## A ch1 [13, 17] + | II
## B ch1 [24, 28] + | II
## C ch1 [30, 34] + | II
##
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
we can reduce each object independently
(AB_merged <- reduce(AB))
## GRangesList object of length 2:
## $A
## GRanges object with 4 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] ch1 [ 6, 19] +
## [2] ch1 [22, 26] +
## [3] ch1 [29, 33] +
## [4] ch1 [36, 44] +
##
## $B
## GRanges object with 4 ranges and 0 metadata columns:
## seqnames ranges strand
## [1] ch1 [ 1, 7] +
## [2] ch1 [10, 17] +
## [3] ch1 [24, 28] +
## [4] ch1 [30, 34] +
##
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
The GRangesList class is a List object (not list) of GenomicRanges
(ABg <- GenomicRangesList(A, B))
## GenomicRangesList of length 2
We can also reduce the GRanges objects of all conditions into one GRange
(AB_combined <- unlist(GenomicRangesList(A, B)))
## GRanges object with 15 ranges and 1 metadata column:
## seqnames ranges strand | bioreplicate
## <Rle> <IRanges> <Rle> | <character>
## A ch1 [10, 14] + | I
## B ch1 [22, 26] + | I
## C ch1 [40, 44] + | I
## A ch1 [ 8, 12] + | II
## B ch1 [22, 26] + | II
## . ... ... ... . ...
## B ch1 [ 3, 7] + | I
## C ch1 [10, 14] + | I
## A ch1 [13, 17] + | II
## B ch1 [24, 28] + | II
## C ch1 [30, 34] + | II
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
(AB_combined_merged <- reduce(AB_combined ))
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] ch1 [ 1, 19] +
## [2] ch1 [22, 34] +
## [3] ch1 [36, 44] +
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths
RangedSummarizedExperiment is a array-like container in which rows represent features of interest (e.g. genes) and columns represent samples.
First we list the accessor functions for the class
methods(class="RangedSummarizedExperiment")
## [1] aggregate anyNA <=
## [4] < == >=
## [7] > != append
## [10] as.character as.complex as.data.frame
## [13] as.env as.integer as.list
## [16] as.logical as.matrix as.numeric
## [19] as.raw assayNames<- assayNames
## [22] assays<- assays assay<-
## [25] assay by cbind
## [28] coerce<- coerce colData<-
## [31] colData Compare countOverlaps
## [34] coverage dimnames<- dimnames
## [37] dim disjointBins distance
## [40] distanceToNearest duplicated elementMetadata<-
## [43] elementMetadata end<- end
## [46] eval expand.grid expand
## [49] exptData<- exptData extractROWS
## [52] findOverlaps flank follow
## [55] granges head high2low
## [58] %in% isDisjoint is.unsorted
## [61] length lengths match
## [64] mcols<- mcols metadata<-
## [67] metadata mstack names<-
## [70] names narrow nearest
## [73] NROW order overlapsAny
## [76] parallelSlotNames pcompare precede
## [79] promoters ranges<- ranges
## [82] rank rbind relist
## [85] rename rep.int replaceROWS
## [88] rep resize restrict
## [91] rev rowData<- rowData
## [94] ROWNAMES rowRanges rowRanges<-
## [97] seqinfo<- seqinfo seqlevelsInUse
## [100] seqnames shiftApply shift
## [103] showAsCell show sort
## [106] split split<- start<-
## [109] start strand<- strand
## [112] subsetByOverlaps subset [
## [115] [<- [[<- [[
## [118] $<- $ table
## [121] tail tapply trim
## [124] unique values<- values
## [127] width<- width window<-
## [130] window with xtabs
## [133] xtfrm
## see '?methods' for accessing help and source code
We can construct a RangedSummarizedExperiment using the class constructor SummarizedExperiment(assays, rowRanges, colData) or it can be generated by other functions such as the summarizeOverlaps function that extends findOverlaps by providing options to resolve reads that overlap multiple features. For example given a list of bam files, we can geenerate a RangedSummarizedExperiment as such
se <- summarizeOverlaps(
features = A_merged_consensus,
reads = bam_files,
mode = "Union",
singleEnd = FALSE,
ignore.strand = TRUE,
fragments = TRUE,
inter.feature = FALSE
)
assays(se)[[1]]
colData(se)
colnames(se)
DESeqDataSet is a subclass of RangedSummarizedExperiment that serves as a container for differential expression analysis results. The DESeqDataSet(se, design) class constructor takes non-negative integer values in the counts se matrix and a formula to specifies the design of the experiment.
First we list the accessor functions for the class
methods(class="DESeqDataSet")
## [1] aggregate anyNA <=
## [4] < == >=
## [7] > != append
## [10] as.character as.complex as.data.frame
## [13] as.env as.integer as.list
## [16] as.logical as.matrix as.numeric
## [19] as.raw assayNames<- assayNames
## [22] assays<- assays assay<-
## [25] assay by cbind
## [28] coef coerce<- coerce
## [31] colData<- colData Compare
## [34] countOverlaps counts<- counts
## [37] coverage design<- design
## [40] dimnames<- dimnames dim
## [43] disjointBins dispersionFunction<- dispersionFunction
## [46] dispersions dispersions<- distance
## [49] distanceToNearest duplicated elementMetadata<-
## [52] elementMetadata end<- end
## [55] estimateDispersions estimateSizeFactors eval
## [58] expand.grid expand exptData<-
## [61] exptData extractROWS findOverlaps
## [64] flank follow granges
## [67] head high2low %in%
## [70] isDisjoint is.unsorted length
## [73] lengths match mcols<-
## [76] mcols metadata<- metadata
## [79] mstack names<- names
## [82] narrow nearest normalizationFactors<-
## [85] normalizationFactors NROW order
## [88] overlapsAny parallelSlotNames pcompare
## [91] plotDispEsts plotMA precede
## [94] promoters ranges<- ranges
## [97] rank rbind relist
## [100] rename rep.int replaceROWS
## [103] rep resize restrict
## [106] rev rowData<- rowData
## [109] ROWNAMES rowRanges rowRanges<-
## [112] seqinfo<- seqinfo seqlevelsInUse
## [115] seqnames shiftApply shift
## [118] showAsCell show sizeFactors
## [121] sizeFactors<- sort split
## [124] split<- start<- start
## [127] strand<- strand subsetByOverlaps
## [130] subset [ [<-
## [133] [[<- [[ $<-
## [136] $ table tail
## [139] tapply trim unique
## [142] values<- values width<-
## [145] width window<- window
## [148] with xtabs xtfrm
## see '?methods' for accessing help and source code
Examples of accessor functions for the class
DESeq_dataset <- DESeqDataSet(se, design = ~ biosample)
DESeq_dataset <-DESeq_dataset[rowSums(counts(DESeq_dataset)) > 1, ]
rld <- rlog(DESeq_dataset, blind=FALSE)
#fragments per kilobase per million mapped fragments
data_rpkm <-fpkm(rld)
sampleDists <- dist( t( assay(rld) ) )
ddsc <- DESeq(DESeq_dataset)
res <- results(dds)
plotMA(res, ylim=c(-10,10))
To conduct differential analysis
ddsc <- DESeq(DESeq_dataset)
#DESeqResults object
res <- results(dds)
plotMA(res, ylim=c(-10,10))
summary(res)