Bioconductor Classes Quick Overview

The GRanges Class

library(GenomicRanges)
library("Gviz")
library("DESeq2")

Create a GRanges object

(A1 <- GRanges(seqnames = Rle("ch1"),
              ranges   = IRanges(start = c(10, 22, 40),
                                 width = 5),
              strand   = "+"))
## GRanges object with 3 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]      ch1  [10, 14]      +
##   [2]      ch1  [22, 26]      +
##   [3]      ch1  [40, 44]      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths

Plot the ranges

options(ucscChromosomeNames=FALSE)
geTrack <- GenomeAxisTrack()
deTrack <- AnnotationTrack(range =  A1)
plotTracks(c(geTrack,deTrack),from=1, to=50)

Show all methods for the GRanges class

methods(class="GRanges")
##   [1] aggregate         anyNA             <=               
##   [4] <                 ==                >=               
##   [7] >                 !=                append           
##  [10] as.character      as.complex        as.data.frame    
##  [13] as.env            as.factor         as.integer       
##  [16] as.list           as.logical        as.matrix        
##  [19] as.numeric        as.raw            by               
##  [22] c                 coerce            coerce<-         
##  [25] countOverlaps     coverage          disjoin          
##  [28] disjointBins      distance          distanceToNearest
##  [31] duplicated        elementMetadata<- elementMetadata  
##  [34] end<-             end               eval             
##  [37] expand.grid       expand            extractROWS      
##  [40] findOverlaps      flank             follow           
##  [43] gaps              [                 $<-              
##  [46] $                 [<-               granges          
##  [49] head              high2low          %in%             
##  [52] intersect         isDisjoint        is.unsorted      
##  [55] length            lengths           match            
##  [58] mcols<-           mcols             metadata<-       
##  [61] metadata          mstack            names<-          
##  [64] names             narrow            nearest          
##  [67] NROW              Ops               order            
##  [70] overlapsAny       parallelSlotNames pcompare         
##  [73] pgap              pintersect        precede          
##  [76] promoters         psetdiff          punion           
##  [79] range             ranges<-          ranges           
##  [82] rank              reduce            relistToClass    
##  [85] relist            rename            rep.int          
##  [88] replaceROWS       rep               resize           
##  [91] restrict          rev               ROWNAMES         
##  [94] rowRanges<-       score<-           score            
##  [97] selfmatch         seqinfo<-         seqinfo          
## [100] seqlevelsInUse    seqnames<-        seqnames         
## [103] setdiff           shiftApply        shift            
## [106] showAsCell        show              sort             
## [109] split             split<-           start<-          
## [112] start             strand<-          strand           
## [115] subsetByOverlaps  subset            table            
## [118] tail              tapply            tile             
## [121] trim              union             unique           
## [124] update            updateObject      values<-         
## [127] values            width<-           width            
## [130] window<-          window            with             
## [133] xtabs             xtfrm            
## see '?methods' for accessing help and source code

Intra-range methods (transform ranges independently)

shift(x): moves left/right

promoters(x): returns promoter ranges extending around the transcription start site (TSS) defined as start(x)

Inter-range methods (transform all the ranges together)

reduce(x): orders the ranges in x from left to right, then merges the overlapping or adjacent ones

disjoin(x): breaks into discrete ranges based on original starts/ends

For example:

 length(A1)
## [1] 3
 width(A1)
## [1] 5 5 5
 seqnames(A1)
## factor-Rle of length 3 with 1 run
##   Lengths:   3
##   Values : ch1
## Levels(1): ch1
 ranges(A1)
## IRanges object with 3 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]        10        14         5
##   [2]        22        26         5
##   [3]        40        44         5
 promoters(A1,upstream=10, downstream=15)
## GRanges object with 3 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]      ch1  [ 0, 24]      +
##   [2]      ch1  [12, 36]      +
##   [3]      ch1  [30, 54]      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths

Add metdata to the GRanges object

 names(A1) <- LETTERS[1:3]
 genome(A1) <- "hg19"
 mcols(A1)$bioreplicate <-  "I"
 A1
## GRanges object with 3 ranges and 1 metadata column:
##     seqnames    ranges strand | bioreplicate
##        <Rle> <IRanges>  <Rle> |  <character>
##   A      ch1  [10, 14]      + |            I
##   B      ch1  [22, 26]      + |            I
##   C      ch1  [40, 44]      + |            I
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths

Coerce the GRange into a dataframe object

as.data.frame(A1)
##   seqnames start end width strand bioreplicate
## A      ch1    10  14     5      +            I
## B      ch1    22  26     5      +            I
## C      ch1    40  44     5      +            I

Multiple GRange Objects

Create two more objects

A2 <- GRanges(seqnames = Rle("ch1"),
              ranges   = IRanges(start = c(8, 22, 29),
                                 width = 5),
              strand   = "+")

 names(A2) <- LETTERS[1:3]
 genome(A2) <- "hg19"
 mcols(A2)$bioreplicate <-  "II"
 
A3 <- GRanges(seqnames = Rle("ch1"),
              ranges   = IRanges(start = c(6, 15, 36),
                                 width = 5),
              strand   = "+")

 names(A3) <- LETTERS[1:3]
 genome(A3) <- "hg19"
 mcols(A3)$bioreplicate <-  "III"
 
 A2
## GRanges object with 3 ranges and 1 metadata column:
##     seqnames    ranges strand | bioreplicate
##        <Rle> <IRanges>  <Rle> |  <character>
##   A      ch1  [ 8, 12]      + |           II
##   B      ch1  [22, 26]      + |           II
##   C      ch1  [29, 33]      + |           II
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths
 A3
## GRanges object with 3 ranges and 1 metadata column:
##     seqnames    ranges strand | bioreplicate
##        <Rle> <IRanges>  <Rle> |  <character>
##   A      ch1  [ 6, 10]      + |          III
##   B      ch1  [15, 19]      + |          III
##   C      ch1  [36, 40]      + |          III
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths

Combine the GRange objects

(A <-  c(A1, A2, A3))
## GRanges object with 9 ranges and 1 metadata column:
##     seqnames    ranges strand | bioreplicate
##        <Rle> <IRanges>  <Rle> |  <character>
##   A      ch1  [10, 14]      + |            I
##   B      ch1  [22, 26]      + |            I
##   C      ch1  [40, 44]      + |            I
##   A      ch1  [ 8, 12]      + |           II
##   B      ch1  [22, 26]      + |           II
##   C      ch1  [29, 33]      + |           II
##   A      ch1  [ 6, 10]      + |          III
##   B      ch1  [15, 19]      + |          III
##   C      ch1  [36, 40]      + |          III
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths

Merge

(A_merged <- reduce(A))
## GRanges object with 4 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]      ch1  [ 6, 19]      +
##   [2]      ch1  [22, 26]      +
##   [3]      ch1  [29, 33]      +
##   [4]      ch1  [36, 44]      +
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths

Plot it

deTrack <- AnnotationTrack(range =  A, group = A$bioreplicate)
mrgTrack <- AnnotationTrack(range =  A_merged, name = "Merged")
plotTracks(c(geTrack,deTrack, mrgTrack),groupAnnotation = "group",from=1, to=50)

To find ranges that repeat we can count the number of overlapping regions and remove merged segments that have a certain number of counts (e.g. 1)

(cnts <- countOverlaps(A_merged,A))
## [1] 4 2 1 2
A_merged_consensus <- A_merged[-which(cnts==1),]
mrg_con_Track <- AnnotationTrack(range =  A_merged_consensus,name = "Merged Consensus")
plotTracks(c(geTrack, deTrack, mrgTrack, mrg_con_Track),groupAnnotation = "group",from=1, to=50)

GRangesList Class

The GRangesList class is a container for a list of GRanges objects that have the same metadata column variables. First we create another GRanges object.

(B <- GRanges(seqnames = Rle("ch1"),
              ranges   = IRanges(start = c(1,3,10,13,24,30),
                                 width = 5),
              strand   = "+"))
## GRanges object with 6 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]      ch1  [ 1,  5]      +
##   [2]      ch1  [ 3,  7]      +
##   [3]      ch1  [10, 14]      +
##   [4]      ch1  [13, 17]      +
##   [5]      ch1  [24, 28]      +
##   [6]      ch1  [30, 34]      +
##   -------
##   seqinfo: 1 sequence from an unspecified genome; no seqlengths
 names(B) <- rep(LETTERS[1:3],2)
 genome(B) <- "hg19"
 mcols(B)$bioreplicate <-  rep(c("I","II"),c(3,3))
# combine
AB <- GRangesList(A, B)
names(AB) <- c("A", "B")
(AB)
## GRangesList object of length 2:
## $A 
## GRanges object with 9 ranges and 1 metadata column:
##     seqnames    ranges strand | bioreplicate
##        <Rle> <IRanges>  <Rle> |  <character>
##   A      ch1  [10, 14]      + |            I
##   B      ch1  [22, 26]      + |            I
##   C      ch1  [40, 44]      + |            I
##   A      ch1  [ 8, 12]      + |           II
##   B      ch1  [22, 26]      + |           II
##   C      ch1  [29, 33]      + |           II
##   A      ch1  [ 6, 10]      + |          III
##   B      ch1  [15, 19]      + |          III
##   C      ch1  [36, 40]      + |          III
## 
## $B 
## GRanges object with 6 ranges and 1 metadata column:
##     seqnames   ranges strand | bioreplicate
##   A      ch1 [ 1,  5]      + |            I
##   B      ch1 [ 3,  7]      + |            I
##   C      ch1 [10, 14]      + |            I
##   A      ch1 [13, 17]      + |           II
##   B      ch1 [24, 28]      + |           II
##   C      ch1 [30, 34]      + |           II
## 
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths

we can reduce each object independently

(AB_merged <- reduce(AB))
## GRangesList object of length 2:
## $A 
## GRanges object with 4 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]      ch1  [ 6, 19]      +
##   [2]      ch1  [22, 26]      +
##   [3]      ch1  [29, 33]      +
##   [4]      ch1  [36, 44]      +
## 
## $B 
## GRanges object with 4 ranges and 0 metadata columns:
##       seqnames   ranges strand
##   [1]      ch1 [ 1,  7]      +
##   [2]      ch1 [10, 17]      +
##   [3]      ch1 [24, 28]      +
##   [4]      ch1 [30, 34]      +
## 
## -------
## seqinfo: 1 sequence from hg19 genome; no seqlengths

The GRangesList class is a List object (not list) of GenomicRanges

(ABg <- GenomicRangesList(A, B))
## GenomicRangesList of length 2

We can also reduce the GRanges objects of all conditions into one GRange

(AB_combined <- unlist(GenomicRangesList(A, B)))
## GRanges object with 15 ranges and 1 metadata column:
##     seqnames    ranges strand | bioreplicate
##        <Rle> <IRanges>  <Rle> |  <character>
##   A      ch1  [10, 14]      + |            I
##   B      ch1  [22, 26]      + |            I
##   C      ch1  [40, 44]      + |            I
##   A      ch1  [ 8, 12]      + |           II
##   B      ch1  [22, 26]      + |           II
##   .      ...       ...    ... .          ...
##   B      ch1  [ 3,  7]      + |            I
##   C      ch1  [10, 14]      + |            I
##   A      ch1  [13, 17]      + |           II
##   B      ch1  [24, 28]      + |           II
##   C      ch1  [30, 34]      + |           II
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths
(AB_combined_merged <- reduce(AB_combined ))
## GRanges object with 3 ranges and 0 metadata columns:
##       seqnames    ranges strand
##          <Rle> <IRanges>  <Rle>
##   [1]      ch1  [ 1, 19]      +
##   [2]      ch1  [22, 34]      +
##   [3]      ch1  [36, 44]      +
##   -------
##   seqinfo: 1 sequence from hg19 genome; no seqlengths

RangedSummarizedExperiment

RangedSummarizedExperiment is a array-like container in which rows represent features of interest (e.g. genes) and columns represent samples.

First we list the accessor functions for the class

methods(class="RangedSummarizedExperiment")
##   [1] aggregate         anyNA             <=               
##   [4] <                 ==                >=               
##   [7] >                 !=                append           
##  [10] as.character      as.complex        as.data.frame    
##  [13] as.env            as.integer        as.list          
##  [16] as.logical        as.matrix         as.numeric       
##  [19] as.raw            assayNames<-      assayNames       
##  [22] assays<-          assays            assay<-          
##  [25] assay             by                cbind            
##  [28] coerce<-          coerce            colData<-        
##  [31] colData           Compare           countOverlaps    
##  [34] coverage          dimnames<-        dimnames         
##  [37] dim               disjointBins      distance         
##  [40] distanceToNearest duplicated        elementMetadata<-
##  [43] elementMetadata   end<-             end              
##  [46] eval              expand.grid       expand           
##  [49] exptData<-        exptData          extractROWS      
##  [52] findOverlaps      flank             follow           
##  [55] granges           head              high2low         
##  [58] %in%              isDisjoint        is.unsorted      
##  [61] length            lengths           match            
##  [64] mcols<-           mcols             metadata<-       
##  [67] metadata          mstack            names<-          
##  [70] names             narrow            nearest          
##  [73] NROW              order             overlapsAny      
##  [76] parallelSlotNames pcompare          precede          
##  [79] promoters         ranges<-          ranges           
##  [82] rank              rbind             relist           
##  [85] rename            rep.int           replaceROWS      
##  [88] rep               resize            restrict         
##  [91] rev               rowData<-         rowData          
##  [94] ROWNAMES          rowRanges         rowRanges<-      
##  [97] seqinfo<-         seqinfo           seqlevelsInUse   
## [100] seqnames          shiftApply        shift            
## [103] showAsCell        show              sort             
## [106] split             split<-           start<-          
## [109] start             strand<-          strand           
## [112] subsetByOverlaps  subset            [                
## [115] [<-               [[<-              [[               
## [118] $<-               $                 table            
## [121] tail              tapply            trim             
## [124] unique            values<-          values           
## [127] width<-           width             window<-         
## [130] window            with              xtabs            
## [133] xtfrm            
## see '?methods' for accessing help and source code

We can construct a RangedSummarizedExperiment using the class constructor SummarizedExperiment(assays, rowRanges, colData) or it can be generated by other functions such as the summarizeOverlaps function that extends findOverlaps by providing options to resolve reads that overlap multiple features. For example given a list of bam files, we can geenerate a RangedSummarizedExperiment as such

  se <- summarizeOverlaps(
    features = A_merged_consensus,
    reads = bam_files,
    mode = "Union",
    singleEnd = FALSE,
    ignore.strand = TRUE,
    fragments = TRUE,
    inter.feature = FALSE
  )

assays(se)[[1]]
colData(se)
colnames(se)

DESeqDataSet Class

DESeqDataSet is a subclass of RangedSummarizedExperiment that serves as a container for differential expression analysis results. The DESeqDataSet(se, design) class constructor takes non-negative integer values in the counts se matrix and a formula to specifies the design of the experiment.

First we list the accessor functions for the class

methods(class="DESeqDataSet")
##   [1] aggregate              anyNA                  <=                    
##   [4] <                      ==                     >=                    
##   [7] >                      !=                     append                
##  [10] as.character           as.complex             as.data.frame         
##  [13] as.env                 as.integer             as.list               
##  [16] as.logical             as.matrix              as.numeric            
##  [19] as.raw                 assayNames<-           assayNames            
##  [22] assays<-               assays                 assay<-               
##  [25] assay                  by                     cbind                 
##  [28] coef                   coerce<-               coerce                
##  [31] colData<-              colData                Compare               
##  [34] countOverlaps          counts<-               counts                
##  [37] coverage               design<-               design                
##  [40] dimnames<-             dimnames               dim                   
##  [43] disjointBins           dispersionFunction<-   dispersionFunction    
##  [46] dispersions            dispersions<-          distance              
##  [49] distanceToNearest      duplicated             elementMetadata<-     
##  [52] elementMetadata        end<-                  end                   
##  [55] estimateDispersions    estimateSizeFactors    eval                  
##  [58] expand.grid            expand                 exptData<-            
##  [61] exptData               extractROWS            findOverlaps          
##  [64] flank                  follow                 granges               
##  [67] head                   high2low               %in%                  
##  [70] isDisjoint             is.unsorted            length                
##  [73] lengths                match                  mcols<-               
##  [76] mcols                  metadata<-             metadata              
##  [79] mstack                 names<-                names                 
##  [82] narrow                 nearest                normalizationFactors<-
##  [85] normalizationFactors   NROW                   order                 
##  [88] overlapsAny            parallelSlotNames      pcompare              
##  [91] plotDispEsts           plotMA                 precede               
##  [94] promoters              ranges<-               ranges                
##  [97] rank                   rbind                  relist                
## [100] rename                 rep.int                replaceROWS           
## [103] rep                    resize                 restrict              
## [106] rev                    rowData<-              rowData               
## [109] ROWNAMES               rowRanges              rowRanges<-           
## [112] seqinfo<-              seqinfo                seqlevelsInUse        
## [115] seqnames               shiftApply             shift                 
## [118] showAsCell             show                   sizeFactors           
## [121] sizeFactors<-          sort                   split                 
## [124] split<-                start<-                start                 
## [127] strand<-               strand                 subsetByOverlaps      
## [130] subset                 [                      [<-                   
## [133] [[<-                   [[                     $<-                   
## [136] $                      table                  tail                  
## [139] tapply                 trim                   unique                
## [142] values<-               values                 width<-               
## [145] width                  window<-               window                
## [148] with                   xtabs                  xtfrm                 
## see '?methods' for accessing help and source code

Examples of accessor functions for the class

  DESeq_dataset <- DESeqDataSet(se, design = ~ biosample)
  DESeq_dataset <-DESeq_dataset[rowSums(counts(DESeq_dataset)) > 1, ]
  rld <- rlog(DESeq_dataset, blind=FALSE)
  #fragments per kilobase per million mapped fragments
  data_rpkm <-fpkm(rld) 
  sampleDists <- dist( t( assay(rld) ) )
  ddsc  <- DESeq(DESeq_dataset)
  res <- results(dds)
  plotMA(res, ylim=c(-10,10))

To conduct differential analysis

  ddsc  <- DESeq(DESeq_dataset)
 #DESeqResults object
  res <- results(dds)
  plotMA(res, ylim=c(-10,10))
  summary(res)