Skip to contents

The HiCExperiment class describes Hi-C contact files imported in R, either through the HiCExperiment constructor function or using the import method implemented by HiCExperiment package.

Usage

HiCExperiment(
  file,
  resolution = NULL,
  focus = NULL,
  metadata = list(),
  topologicalFeatures = S4Vectors::SimpleList(compartments = GenomicRanges::GRanges(),
    borders = GenomicRanges::GRanges(), loops =
    InteractionSet::GInteractions(GenomicRanges::GRanges(), GenomicRanges::GRanges()),
    viewpoints = GenomicRanges::GRanges()),
  pairsFile = NULL,
  bed = NULL
)

makeHiCExperimentFromGInteractions(gi)

# S4 method for HiCExperiment
resolutions(x)

# S4 method for HiCExperiment
resolution(x)

# S4 method for HiCExperiment
focus(x)

# S4 method for HiCExperiment,character
focus(x) <- value

# S4 method for HiCExperiment,numeric
zoom(x, resolution)

# S4 method for HiCExperiment,character
refocus(x, focus)

# S4 method for HiCExperiment,missing
scores(x)

# S4 method for HiCExperiment,character
scores(x, name)

# S4 method for HiCExperiment,numeric
scores(x, name)

# S4 method for HiCExperiment,character,numeric
scores(x, name) <- value

# S4 method for HiCExperiment,missing
topologicalFeatures(x)

# S4 method for HiCExperiment,character
topologicalFeatures(x, name)

# S4 method for HiCExperiment,numeric
topologicalFeatures(x, name)

# S4 method for HiCExperiment,character,GRangesOrGInteractions
topologicalFeatures(x, name) <- value

# S4 method for HiCExperiment
pairsFile(x)

# S4 method for HiCExperiment,character
pairsFile(x) <- value

# S4 method for HiCExperiment,list
metadata(x) <- value

# S4 method for HiCExperiment,numeric
subsetByOverlaps(x, ranges)

# S4 method for HiCExperiment,logical
subsetByOverlaps(x, ranges)

# S4 method for HiCExperiment,GRanges
subsetByOverlaps(x, ranges, type = c("within", "any"))

# S4 method for HiCExperiment,GInteractions
subsetByOverlaps(x, ranges)

# S4 method for HiCExperiment,Pairs
subsetByOverlaps(x, ranges)

# S4 method for HiCExperiment,numeric,ANY,ANY
[(x, i)

# S4 method for HiCExperiment,GRanges,ANY,ANY
[(x, i)

# S4 method for HiCExperiment,logical,ANY,ANY
[(x, i)

# S4 method for HiCExperiment,GInteractions,ANY,ANY
[(x, i)

# S4 method for HiCExperiment,Pairs,ANY,ANY
[(x, i)

# S4 method for HiCExperiment,character,ANY,ANY
[(x, i)

# S4 method for HiCExperiment
fileName(object)

# S4 method for HiCExperiment
interactions(x, fillout.regions = FALSE)

# S4 method for HiCExperiment,GInteractions
interactions(x) <- value

# S4 method for HiCExperiment
length(x)

# S4 method for HiCExperiment
$(x, name) <- value

# S4 method for HiCExperiment
$(x, name)

# S4 method for HiCExperiment
seqinfo(x)

# S4 method for HiCExperiment
bins(x)

# S4 method for HiCExperiment
anchors(x)

# S4 method for HiCExperiment
regions(x)

# S4 method for HiCExperiment
cis(x)

# S4 method for HiCExperiment
trans(x)

Arguments

file

CoolFile or plain path to a Hi-C contact file

resolution

Resolution to use with the Hi-C contact file

focus

Chromosome coordinates for which interaction counts are extracted from the Hi-C contact file, provided as a character string (e.g. "II:4001-5000"). If not provided, the entire Hi-C contact file will be imported.

metadata

list of metadata

topologicalFeatures

topologicalFeatures provided as a named SimpleList

pairsFile

Path to an associated .pairs file (optional)

bed

Path to regions file generated by HiC-Pro (optional)

gi

GInteractions object

x

A HiCExperiment object.

value

Value to add to topologicalFeatures, scores, pairsFile or metadata slots.

name

Name of the element to access in topologicalFeatures or scores SimpleLists.

type

any of within or any, to subset interactions by overlap with a provided GRanges.

i, ranges

a GRanges, coordinates in character, or boolean vector to subset a HiCExperiment

object

A HiCExperiment object.

fillout.regions

Whehter to add missing regions to GInteractions' regions?

Value

An HiCExperiment object.

Slots

fileName

Path of Hi-C contact file

focus

Chr. coordinates for which interaction counts are extracted from the Hi-C contact file.

resolutions

Resolutions available in the Hi-C contact file.

resolution

Current resolution

interactions

Genomic Interactions extracted from the Hi-C contact file

scores

Available interaction scores.

topologicalFeatures

Topological features associated with the dataset (e.g. loops (\<GInteractions\>), borders (\<GRanges\>), viewpoints (\<GRanges\>), etc...)

pairsFile

Path to the .pairs file associated with the Hi-C contact file

metadata

metadata associated with the Hi-C contact file.

Examples

#####################################################################
## Create a HiCExperiment object from a disk-stored contact matrix ##
#####################################################################

mcool_file <- HiContactsData::HiContactsData("yeast_wt", "mcool")
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
pairs_file <- HiContactsData::HiContactsData("yeast_wt", "pairs.gz")
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
contacts <- HiCExperiment(
    file = mcool_file, 
    resolution = 8000L, 
    pairsFile = pairs_file
)
contacts
#> `HiCExperiment` object with 8,757,906 contacts over 1,517 regions 
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752" 
#> focus: "whole genome" 
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000 
#> interactions: 801962 
#> scores(2): count balanced 
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0) 
#> pairsFile: /github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753 
#> metadata(0):

#####################################################################
## ----- Manually create a HiCExperiment from GInteractions ------ ##
#####################################################################

gis <- interactions(contacts)[1:1000]
contacts2 <- makeHiCExperimentFromGInteractions(gis)
contacts2
#> `HiCExperiment` object with 6,670 contacts over 1,517 regions 
#> -------
#> fileName: N/A 
#> focus: N/A 
#> resolutions(1): 8000
#> active resolution: 8000 
#> interactions: 1000 
#> scores(2): count balanced 
#> topologicalFeatures: () 
#> pairsFile: N/A 
#> metadata(0):

#####################################################################
## -------- Slots present in an HiCExperiment object ------------- ##
#####################################################################

fileName(contacts)
#> [1] "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752"
focus(contacts)
#> NULL
resolutions(contacts)
#> [1]  1000  2000  4000  8000 16000
resolution(contacts)
#> [1] 8000
interactions(contacts)
#> GInteractions object with 801962 interactions and 4 metadata columns:
#>            seqnames1       ranges1     seqnames2       ranges2 |   bin_id1
#>                <Rle>     <IRanges>         <Rle>     <IRanges> | <numeric>
#>        [1]         I        1-8000 ---         I        1-8000 |         0
#>        [2]         I        1-8000 ---         I    8001-16000 |         0
#>        [3]         I        1-8000 ---         I   16001-24000 |         0
#>        [4]         I        1-8000 ---         I   24001-32000 |         0
#>        [5]         I        1-8000 ---         I   32001-40000 |         0
#>        ...       ...           ... ...       ...           ... .       ...
#>   [801958]       XVI 920001-928000 ---       XVI 928001-936000 |      1513
#>   [801959]       XVI 920001-928000 ---       XVI 936001-944000 |      1513
#>   [801960]       XVI 928001-936000 ---       XVI 928001-936000 |      1514
#>   [801961]       XVI 928001-936000 ---       XVI 936001-944000 |      1514
#>   [801962]       XVI 936001-944000 ---       XVI 936001-944000 |      1515
#>              bin_id2     count  balanced
#>            <numeric> <numeric> <numeric>
#>        [1]         0       705  0.582493
#>        [2]         1      1260  0.935951
#>        [3]         2       557  0.294491
#>        [4]         3       274  0.174475
#>        [5]         4       291  0.138932
#>        ...       ...       ...       ...
#>   [801958]      1514       893  0.515549
#>   [801959]      1515       524  0.275326
#>   [801960]      1514      1317  0.728102
#>   [801961]      1515      1582  0.795995
#>   [801962]      1515      1409  0.645227
#>   -------
#>   regions: 1517 ranges and 4 metadata columns
#>   seqinfo: 16 sequences from an unspecified genome
scores(contacts)
#> List of length 2
#> names(2): count balanced
topologicalFeatures(contacts)
#> List of length 4
#> names(4): compartments borders loops viewpoints
pairsFile(contacts)
#>                                                  EH7703 
#> "/github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753" 

#####################################################################
## ---------------------- Slot getters --------------------------- ##
#####################################################################

scores(contacts, 1) |> head()
#> [1]  705 1260  557  274  291  214
scores(contacts, 'balanced') |> head()
#> [1] 0.58249310 0.93595103 0.29449150 0.17447479 0.13893217 0.09970617
topologicalFeatures(contacts, 1)
#> GRanges object with 0 ranges and 0 metadata columns:
#>    seqnames    ranges strand
#>       <Rle> <IRanges>  <Rle>
#>   -------
#>   seqinfo: no sequences

#####################################################################
## ---------------------- Slot setters --------------------------- ##
#####################################################################

scores(contacts, 'random') <- runif(length(contacts))
topologicalFeatures(contacts, 'loops') <- InteractionSet::GInteractions(
  GenomicRanges::GRanges('II:15324'), 
  GenomicRanges::GRanges('II:24310')
)
pairsFile(contacts) <- HiContactsData('yeast_wt', 'pairs.gz')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache

#####################################################################
## ------------------ Subsetting functions ----------------------- ##
#####################################################################

contacts[1:100]
#> `HiCExperiment` object with 4,140 contacts over 100 regions 
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752" 
#> focus: "whole genome" 
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000 
#> interactions: 100 
#> scores(3): count balanced random 
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0) 
#> pairsFile: /github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753 
#> metadata(0):
contacts['II']
#> `HiCExperiment` object with 471,364 contacts over 102 regions 
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752" 
#> focus: "II" 
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000 
#> interactions: 4693 
#> scores(3): count balanced random 
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0) 
#> pairsFile: /github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753 
#> metadata(0):
contacts[c('II', 'III')]
#> `HiCExperiment` object with 632,446 contacts over 142 regions 
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752" 
#> focus: "II, III" 
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000 
#> interactions: 8502 
#> scores(3): count balanced random 
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0) 
#> pairsFile: /github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753 
#> metadata(0):
contacts['II|III']
#> `HiCExperiment` object with 9,092 contacts over 142 regions 
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752" 
#> focus: "II:1-813184|III:1-316620" 
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000 
#> interactions: 3000 
#> scores(3): count balanced random 
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0) 
#> pairsFile: /github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753 
#> metadata(0):
contacts['II:10001-30000|III:50001-90000']
#> `HiCExperiment` object with 11 contacts over 5 regions 
#> -------
#> fileName: "/github/home/.cache/R/ExperimentHub/13fc768c4160_7752" 
#> focus: "II:10001-30000|III:50001-90000" 
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 8000 
#> interactions: 4 
#> scores(3): count balanced random 
#> topologicalFeatures: compartments(0) borders(0) loops(1) viewpoints(0) 
#> pairsFile: /github/home/.cache/R/ExperimentHub/13fc530cc1f5_7753 
#> metadata(0):

#####################################################################
## --------------------- Utils functions ------------------------- ##
#####################################################################
## Adapted from other packages

seqinfo(contacts)
#> Seqinfo object with 16 sequences from an unspecified genome:
#>   seqnames seqlengths isCircular genome
#>   I            230218       <NA>   <NA>
#>   II           813184       <NA>   <NA>
#>   III          316620       <NA>   <NA>
#>   IV          1531933       <NA>   <NA>
#>   V            576874       <NA>   <NA>
#>   ...             ...        ...    ...
#>   XII         1078177       <NA>   <NA>
#>   XIII         924431       <NA>   <NA>
#>   XIV          784333       <NA>   <NA>
#>   XV          1091291       <NA>   <NA>
#>   XVI          948066       <NA>   <NA>
bins(contacts)
#> GRanges object with 1517 ranges and 2 metadata columns:
#>                     seqnames        ranges strand |    bin_id    weight
#>                        <Rle>     <IRanges>  <Rle> | <numeric> <numeric>
#>            I_1_8000        I        1-8000      * |         0 0.0287442
#>        I_8001_16000        I    8001-16000      * |         1 0.0258423
#>       I_16001_24000        I   16001-24000      * |         2 0.0183936
#>       I_24001_32000        I   24001-32000      * |         3 0.0221529
#>       I_32001_40000        I   32001-40000      * |         4 0.0166096
#>                 ...      ...           ...    ... .       ...       ...
#>   XVI_912001_920000      XVI 912001-920000      * |      1512 0.0122756
#>   XVI_920001_928000      XVI 920001-928000      * |      1513 0.0245536
#>   XVI_928001_936000      XVI 928001-936000      * |      1514 0.0235127
#>   XVI_936001_944000      XVI 936001-944000      * |      1515 0.0213994
#>   XVI_944001_948066      XVI 944001-948066      * |      1516       NaN
#>   -------
#>   seqinfo: 16 sequences from an unspecified genome
anchors(contacts)
#> $first
#> GRanges object with 801962 ranges and 4 metadata columns:
#>            seqnames        ranges strand |    bin_id    weight   chr    center
#>               <Rle>     <IRanges>  <Rle> | <numeric> <numeric> <Rle> <integer>
#>        [1]        I        1-8000      * |         0 0.0287442     I      4000
#>        [2]        I        1-8000      * |         0 0.0287442     I      4000
#>        [3]        I        1-8000      * |         0 0.0287442     I      4000
#>        [4]        I        1-8000      * |         0 0.0287442     I      4000
#>        [5]        I        1-8000      * |         0 0.0287442     I      4000
#>        ...      ...           ...    ... .       ...       ...   ...       ...
#>   [801958]      XVI 920001-928000      * |      1513 0.0245536   XVI    924000
#>   [801959]      XVI 920001-928000      * |      1513 0.0245536   XVI    924000
#>   [801960]      XVI 928001-936000      * |      1514 0.0235127   XVI    932000
#>   [801961]      XVI 928001-936000      * |      1514 0.0235127   XVI    932000
#>   [801962]      XVI 936001-944000      * |      1515 0.0213994   XVI    940000
#>   -------
#>   seqinfo: 16 sequences from an unspecified genome
#> 
#> $second
#> GRanges object with 801962 ranges and 4 metadata columns:
#>            seqnames        ranges strand |    bin_id    weight   chr    center
#>               <Rle>     <IRanges>  <Rle> | <numeric> <numeric> <Rle> <integer>
#>        [1]        I        1-8000      * |         0 0.0287442     I      4000
#>        [2]        I    8001-16000      * |         1 0.0258423     I     12000
#>        [3]        I   16001-24000      * |         2 0.0183936     I     20000
#>        [4]        I   24001-32000      * |         3 0.0221529     I     28000
#>        [5]        I   32001-40000      * |         4 0.0166096     I     36000
#>        ...      ...           ...    ... .       ...       ...   ...       ...
#>   [801958]      XVI 928001-936000      * |      1514 0.0235127   XVI    932000
#>   [801959]      XVI 936001-944000      * |      1515 0.0213994   XVI    940000
#>   [801960]      XVI 928001-936000      * |      1514 0.0235127   XVI    932000
#>   [801961]      XVI 936001-944000      * |      1515 0.0213994   XVI    940000
#>   [801962]      XVI 936001-944000      * |      1515 0.0213994   XVI    940000
#>   -------
#>   seqinfo: 16 sequences from an unspecified genome
#> 
regions(contacts)
#> GRanges object with 1517 ranges and 4 metadata columns:
#>                     seqnames        ranges strand |    bin_id    weight   chr
#>                        <Rle>     <IRanges>  <Rle> | <numeric> <numeric> <Rle>
#>            I_1_8000        I        1-8000      * |         0 0.0287442     I
#>        I_8001_16000        I    8001-16000      * |         1 0.0258423     I
#>       I_16001_24000        I   16001-24000      * |         2 0.0183936     I
#>       I_24001_32000        I   24001-32000      * |         3 0.0221529     I
#>       I_32001_40000        I   32001-40000      * |         4 0.0166096     I
#>                 ...      ...           ...    ... .       ...       ...   ...
#>   XVI_912001_920000      XVI 912001-920000      * |      1512 0.0122756   XVI
#>   XVI_920001_928000      XVI 920001-928000      * |      1513 0.0245536   XVI
#>   XVI_928001_936000      XVI 928001-936000      * |      1514 0.0235127   XVI
#>   XVI_936001_944000      XVI 936001-944000      * |      1515 0.0213994   XVI
#>   XVI_944001_948066      XVI 944001-948066      * |      1516       NaN   XVI
#>                        center
#>                     <integer>
#>            I_1_8000      4000
#>        I_8001_16000     12000
#>       I_16001_24000     20000
#>       I_24001_32000     28000
#>       I_32001_40000     36000
#>                 ...       ...
#>   XVI_912001_920000    916000
#>   XVI_920001_928000    924000
#>   XVI_928001_936000    932000
#>   XVI_936001_944000    940000
#>   XVI_944001_948066    946033
#>   -------
#>   seqinfo: 16 sequences from an unspecified genome

#####################################################################
## ------------- Coercing HiCExperiment objects ------------------ ##
#####################################################################

as(contacts, 'GInteractions')
#> GInteractions object with 801962 interactions and 5 metadata columns:
#>            seqnames1       ranges1     seqnames2       ranges2 |   bin_id1
#>                <Rle>     <IRanges>         <Rle>     <IRanges> | <numeric>
#>        [1]         I        1-8000 ---         I        1-8000 |         0
#>        [2]         I        1-8000 ---         I    8001-16000 |         0
#>        [3]         I        1-8000 ---         I   16001-24000 |         0
#>        [4]         I        1-8000 ---         I   24001-32000 |         0
#>        [5]         I        1-8000 ---         I   32001-40000 |         0
#>        ...       ...           ... ...       ...           ... .       ...
#>   [801958]       XVI 920001-928000 ---       XVI 928001-936000 |      1513
#>   [801959]       XVI 920001-928000 ---       XVI 936001-944000 |      1513
#>   [801960]       XVI 928001-936000 ---       XVI 928001-936000 |      1514
#>   [801961]       XVI 928001-936000 ---       XVI 936001-944000 |      1514
#>   [801962]       XVI 936001-944000 ---       XVI 936001-944000 |      1515
#>              bin_id2     count  balanced     random
#>            <numeric> <numeric> <numeric>  <numeric>
#>        [1]         0       705  0.582493 0.08075014
#>        [2]         1      1260  0.935951 0.83433304
#>        [3]         2       557  0.294491 0.60076089
#>        [4]         3       274  0.174475 0.15720844
#>        [5]         4       291  0.138932 0.00739944
#>        ...       ...       ...       ...        ...
#>   [801958]      1514       893  0.515549  0.0686281
#>   [801959]      1515       524  0.275326  0.1704898
#>   [801960]      1514      1317  0.728102  0.2895182
#>   [801961]      1515      1582  0.795995  0.1125209
#>   [801962]      1515      1409  0.645227  0.7087667
#>   -------
#>   regions: 1517 ranges and 4 metadata columns
#>   seqinfo: 16 sequences from an unspecified genome
as(contacts, 'ContactMatrix')
#> class: ContactMatrix 
#> dim: 1517 1517 
#> type: dgCMatrix 
#> rownames: NULL
#> colnames: NULL
#> metadata(0):
#> regions: 1517
as(contacts, 'matrix')[seq_len(10), seq_len(10)]
#>             [,1]       [,2]       [,3]       [,4]       [,5]       [,6]
#>  [1,] 0.58249310 0.93595103 0.29449150 0.17447479 0.13893217 0.09970617
#>  [2,] 0.93595103 0.58167668 0.47153090 0.22269611 0.16139073 0.12650163
#>  [3,] 0.29449150 0.47153090 0.17119221 0.22492467 0.14572837 0.09093352
#>  [4,] 0.17447479 0.22269611 0.22492467 0.12661416 0.31091881 0.19533840
#>  [5,] 0.13893217 0.16139073 0.14572837 0.31091881 0.40002396 0.40626151
#>  [6,] 0.09970617 0.12650163 0.09093352 0.19533840 0.40626151 0.28033628
#>  [7,] 0.07443284 0.09628981 0.08017355 0.14042680 0.17496056 0.28222645
#>  [8,] 0.05510079 0.07961469 0.05603711 0.07924403 0.12479940 0.17533293
#>  [9,] 0.04456457 0.05371419 0.04230561 0.06416197 0.08546010 0.10963404
#> [10,] 0.01947702 0.02495274 0.01869518 0.02514303 0.03882842 0.05464144
#>             [,7]       [,8]       [,9]      [,10]
#>  [1,] 0.07443284 0.05510079 0.04456457 0.01947702
#>  [2,] 0.09628981 0.07961469 0.05371419 0.02495274
#>  [3,] 0.08017355 0.05603711 0.04230561 0.01869518
#>  [4,] 0.14042680 0.07924403 0.06416197 0.02514303
#>  [5,] 0.17496056 0.12479940 0.08546010 0.03882842
#>  [6,] 0.28222645 0.17533293 0.10963404 0.05464144
#>  [7,] 0.25591092 0.37561957 0.17347572 0.09070876
#>  [8,] 0.37561957 0.47046086 0.37791267 0.14931650
#>  [9,] 0.17347572 0.37791267 0.35093060 0.28081603
#> [10,] 0.09070876 0.14931650 0.28081603 0.33402283
as(contacts, 'data.frame')[seq_len(10), seq_len(10)]
#>    seqnames1 start1 end1 width1 strand1 bin_id1    weight1 center1 seqnames2
#> 1          I      1 8000   8000       *       0 0.02874424    4000         I
#> 2          I      1 8000   8000       *       0 0.02874424    4000         I
#> 3          I      1 8000   8000       *       0 0.02874424    4000         I
#> 4          I      1 8000   8000       *       0 0.02874424    4000         I
#> 5          I      1 8000   8000       *       0 0.02874424    4000         I
#> 6          I      1 8000   8000       *       0 0.02874424    4000         I
#> 7          I      1 8000   8000       *       0 0.02874424    4000         I
#> 8          I      1 8000   8000       *       0 0.02874424    4000         I
#> 9          I      1 8000   8000       *       0 0.02874424    4000         I
#> 10         I      1 8000   8000       *       0 0.02874424    4000         I
#>    start2
#> 1       1
#> 2    8001
#> 3   16001
#> 4   24001
#> 5   32001
#> 6   40001
#> 7   48001
#> 8   56001
#> 9   64001
#> 10  72001