Using the cets package

Welcome to the CETS package. We load the cets package, the brain reference profile data package and an example dilution dataset package:

library(cets)
## Warning: found methods to import for function 'as.list' but not the
## generic itself
library(cetsBrain)  # neuron/glia reference samples
library(cetsDilution)  # a neuron/glia dilution dataset

Example: Estimate brain sample cell type composition using the included neuron/glia reference dataset

Load calibration data set

The calibration dataset included in the cetsBrain package contains FACS-sorted neuron and glia samples from a cohort of depression patients and control subjects. The package contains two objects: A data matrix (brain) of sample methylation profiles for the top 10,000 most differentially methylated probes between neurons and glia:

data(cetsBrain)
dim(brain)
## [1] 10000   146
brain[1:3, 1:4]
##            X1_5175.G.P1A1._7766130090_R01C01
## cg02689072                            0.9271
## cg12093060                            0.8890
## cg05940691                            0.9427
##            X7_5175.N.P1A7._7766130090_R01C02
## cg02689072                            0.1191
## cg12093060                            0.1356
## cg05940691                            0.1758
##            X2_813.N.P1A2._7766130090_R02C01
## cg02689072                           0.1259
## cg12093060                           0.1268
## cg05940691                           0.1476
##            X8_1740.N.P1A8._7766130090_R02C02
## cg02689072                            0.1372
## cg12093060                            0.1110
## cg05940691                            0.1693

and a sample annotation data frame (pdBrain):

head(pdBrain)
##   ID1 celltype       diag    sex ethnicity age batch row array PMI
## 1   1        G Depression   Male Caucasian  47     1   A     1  22
## 2   2        N Depression   Male Caucasian  47     1   A     1  22
## 3   3        N    Control Female Caucasian  30     1   A     1  14
## 4   4        N    Control Female   African  13     1   A     1  17
## 5   5        N Depression Female   African  14     1   A     1  15
## 6   6        G Depression Female   African  14     1   A     1  15

Load the dilution test data set

data(cetsDilution)
head(dilution)
##                 N0   N0.1   N0.2   N0.3   N0.4   N0.5   N0.6   N0.7   N0.8
## cg02689072 0.91645 0.7957 0.6680 0.5949 0.4569 0.3589 0.2922 0.2977 0.2412
## cg12093060 0.89246 0.8253 0.7311 0.6529 0.5625 0.5005 0.4316 0.3671 0.2971
## cg05940691 0.92573 0.8875 0.8296 0.7292 0.6874 0.5643 0.4983 0.3979 0.3876
## cg05403655 0.89744 0.8078 0.7690 0.6591 0.4956 0.4696 0.3461 0.3021 0.2259
## cg05699921 0.09288 0.1547 0.2049 0.2391 0.3280 0.3879 0.4580 0.4619 0.5457
## cg00968638 0.06451 0.1717 0.2725 0.3276 0.4564 0.5057 0.6222 0.6430 0.6719
##              N0.9     N1
## cg02689072 0.1946 0.1223
## cg12093060 0.2055 0.1368
## cg05940691 0.2318 0.2122
## cg05403655 0.1681 0.1181
## cg05699921 0.6115 0.7424
## cg00968638 0.7717 0.8294

The dilution data frame column headings indicate the fractions on neurons in the sample. There are 11 dilution samples, ranging from 0 to 100% neurons.

Create the neuron and glia reference profiles

modelIdx <- list(neuron = pdBrain$celltype == "N", glia = pdBrain$celltype == 
    "G")
refProfile <- getReference(brain, modelIdx)
head(refProfile)
##            neuron    glia
## cg02689072 0.1256 0.93511
## cg12093060 0.1426 0.88546
## cg05940691 0.1740 0.92097
## cg05403655 0.1380 0.91072
## cg05699921 0.7126 0.08176
## cg00968638 0.8165 0.07839

getReference returns a 2-column matrix, representing reference profiles for the two cell types.

Estimate the neuronal proportion

The estProportion function returns an estimate of the percentage of cell type in the first column of its profile argument (neurons in this case).

prop <- estProportion(dilution, profile = refProfile)
round(prop, 2)
##   N0 N0.1 N0.2 N0.3 N0.4 N0.5 N0.6 N0.7 N0.8 N0.9   N1 
## 0.00 0.05 0.14 0.23 0.33 0.44 0.54 0.65 0.78 0.90 1.00

To evaluate performance we can plot the predicted neuronal proportion against that estimated from the dilution mix:

par(mar = c(4, 5, 3, 1))
plot(seq(0, 1, 0.1), prop, xlab = "Dilution\n% Neurons", ylab = "CETS estimate\n% Neurons", 
    main = "Predicted vs Expected % Neurons")
abline(0, 1, col = "grey")

plot of chunk unnamed-chunk-7

Creating custom cell type reference profiles

We can specify alternate reference methylation profiles by calling getReference with the appropriate input parameters. For example:

# Example 1: Use only the control samples
idx <- list(controlNeuron = pdBrain$celltype == "N" & pdBrain$diag == "Control", 
    controlGlia = pdBrain$celltype == "G" & pdBrain$diag == "Control")
refProfile <- getReference(brain, idx)
# Example 2: Caucausian samples only
idx <- list(controlNeuron = pdBrain$celltype == "N" & pdBrain$ethnicity == "Caucasian", 
    controlGlia = pdBrain$celltype == "G" & pdBrain$ethnicity == "Caucasian")
refProfile <- getReference(brain, idx)
# Example 3: African American samples only
idx <- list(controlNeuron = pdBrain$celltype == "N" & pdBrain$ethnicity == "African American", 
    controlGlia = pdBrain$celltype == "G" & pdBrain$ethnicity == "African American")
refProfile <- getReference(brain, idx)
# Example 4: Females only
idx <- list(controlNeuron = pdBrain$celltype == "N" & pdBrain$sex == "Female", 
    controlGlia = pdBrain$celltype == "G" & pdBrain$sex == "Female")
refProfile <- getReference(brain, idx)
# Example 5: Specify an age range
idx <- list(controlNeuron = pdBrain$celltype == "N" & pdBrain$age > 1 & pdBrain$age < 
    20, controlGlia = pdBrain$celltype == "G" & pdBrain$age > 1 & pdBrain$age < 
    20)
refProfile <- getReference(brain, idx)

Session Information

sessionInfo()
## R version 2.15.1 (2012-06-22)
## Platform: x86_64-unknown-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C         LC_TIME=C           
##  [4] LC_COLLATE=C         LC_MONETARY=C        LC_MESSAGES=C       
##  [7] LC_PAPER=C           LC_NAME=C            LC_ADDRESS=C        
## [10] LC_TELEPHONE=C       LC_MEASUREMENT=C     LC_IDENTIFICATION=C 
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] cetsDilution_0.99.0 cetsBrain_0.99.1    cets_0.99.0        
## [4] knitr_1.1           BiocInstaller_1.8.3
## 
## loaded via a namespace (and not attached):
##  [1] AnnotationDbi_1.20.3 Biobase_2.18.0       BiocGenerics_0.4.0  
##  [4] DBI_0.2-5            IRanges_1.16.6       RSQLite_0.11.2      
##  [7] XML_3.95-0.1         annotate_1.36.0      digest_0.6.3        
## [10] evaluate_0.4.3       formatR_0.7          genefilter_1.40.0   
## [13] parallel_2.15.1      splines_2.15.1       stats4_2.15.1       
## [16] stringr_0.6.2        survival_2.37-2      tools_2.15.1        
## [19] xtable_1.7-0