Full Script
This describes a generic pipeline to generate a beta-matrix from idat files (generated from DNA methylation Arrays from Illumina) and the associated sample sheet.
library(ChAMP)
idat_folder ="./Idats/"
working_dir = "./Working_Dir"
array_type = "450K" # CAN BE EITHER EPIC OF 450K
core_num = 4 # Number of Cores used for a few functions
group_column = "Sample_Group" # NAME OF COLUMN FROM SAMPLE SHEET CONTAINING THE GROUP LABEL (0 - 1 or 2 levels)
# LOAD
#?champ.load
setwd(working_dir)
myLoad <- champ.load(directory = idat_folder,
method="ChAMP",
methValue="B",
autoimpute=FALSE,
filterDetP=TRUE,
ProbeCutoff=0,
SampleCutoff=0.1,
detPcut=0.01,
filterBeads=TRUE,
beadCutoff=0.05,
filterNoCG=TRUE,
filterSNPs=FALSE,
population=NULL,
filterMultiHit=TRUE,
filterXY=FALSE,
force=FALSE,
arraytype=array_type)
#?champ.QC
setwd(working_dir)
champ.QC(beta = myLoad$beta,
pheno=myLoad$pd[,group_column],
mdsPlot=TRUE,
densityPlot=TRUE,
dendrogram=TRUE,
PDFplot=TRUE,
Rplot=TRUE,
Feature.sel="None",
resultsDir="./CHAMP_QCimages/")
#?champ.norm
# USING BMIQ Normalisation (good enough for most cases)
myNorm <- champ.norm(beta=myLoad$beta,
rgSet=myLoad$rgSet,
mset=myLoad$mset,
resultsDir="./CHAMP_Normalization/",
method="BMIQ",
plotBMIQ=FALSE,
arraytype=array_type,
cores=core_num)
#?champ.SVD
champ.SVD(beta = myNorm,
rgSet=NULL,
pd=myLoad$pd,
RGEffect=FALSE,
PDFplot=TRUE,
Rplot=TRUE,
resultsDir="./CHAMP_SVDimages/")
#?champ.runCombat
myCombat <- champ.runCombat(beta=myNorm,
pd=myLoad$pd,
variablename=group_column,
batchname=c("Slide", "Array"),
logitTrans=TRUE)
champ.SVD(beta = myCombat,
rgSet=NULL,
pd=myLoad$pd,
RGEffect=FALSE,
PDFplot=TRUE,
Rplot=TRUE,
resultsDir="./CHAMP_SVDimages_after_Combat/")
# Replace myNorm by myCombat
myNorm <- myCombat
champ.QC(beta = myNorm,
pheno=myLoad$pd[, group_column],
mdsPlot=TRUE,
densityPlot=TRUE,
dendrogram=TRUE,
PDFplot=TRUE,
Rplot=TRUE,
Feature.sel="None",
resultsDir="./CHAMP_QCimages_after_Norm_and_Combat/")
# EXPORT
write.csv(myNorm, "beta_matrix.csv")
saveRDS(myNorm, "beta_matrix.RDS")
Working Directory Organisation
.
├── ChAMP_Pipeline.R #SCRIPT
├── Idats #Idat files and Sample Sheet
│ ├── 7497398081_R01C01_Grn.idat
│ ├── 7497398081_R01C01_Red.idat
│ ├── 7497398081_R01C02_Grn.idat
│ ├── 7497398081_R01C02_Red.idat
│ └── sample_sheet.csv
└── Working_Dir #Ouput Folder
Example Sample Sheet
Sample_Name | Sample_Plate | Sample_Group | Sentrix_ID | Sentrix_Position | Covariate_1 | Covariate_2 |
---|---|---|---|---|---|---|
N_22990 | Plate_1 | NORMAL | 203952880139 | R01C01 | 0.488809972 | BIG |
D_46292 | Plate_1 | DISEASE | 203952880139 | R02C01 | 0.899841645 | BIG |
N_37025 | Plate_1 | NORMAL | 203952880139 | R03C01 | 0.412311254 | MEDIUM |
D_37596 | Plate_1 | DISEASE | 203952880139 | R04C01 | 0.63172733 | SMALL |
N_24795 | Plate_1 | NORMAL | 203952880139 | R05C01 | 0.246258701 | SMALL |
D_41714 | Plate_1 | DISEASE | 203952880139 | R06C01 | 0.435192689 | MEDIUM |
N_29307 | Plate_2 | NORMAL | 203952880139 | R07C01 | 0.94585593 | MEDIUM |
D_40017 | Plate_2 | DISEASE | 203952880139 | R08C01 | 0.712593684 | MEDIUM |
N_22106 | Plate_2 | NORMAL | 203952880141 | R01C01 | 0.571489252 | SMALL |
D_20343 | Plate_2 | DISEASE | 203952880141 | R02C01 | 0.286686925 | BIG |
N_45830 | Plate_2 | NORMAL | 203952880141 | R03C01 | 0.991324803 | BIG |
D_46246 | Plate_2 | DISEASE | 203952880141 | R04C01 | 0.687774355 | BIG |
D_20872 | Plate_2 | DISEASE | 203952880141 | R05C01 | 0.896529054 | SMALL |
N_28410 | Plate_2 | NORMAL | 203952880141 | R06C01 | 0.31366623 | BIG |