Full Script

This describes a generic pipeline to generate a beta-matrix from idat files (generated from DNA methylation Arrays from Illumina) and the associated sample sheet.

library(ChAMP)

idat_folder ="./Idats/"

working_dir = "./Working_Dir"

array_type = "450K" # CAN BE EITHER EPIC OF 450K

core_num = 4 # Number of Cores used for a few functions

group_column = "Sample_Group" # NAME OF COLUMN FROM SAMPLE SHEET CONTAINING THE GROUP LABEL (0 - 1 or 2 levels)

# LOAD

#?champ.load

setwd(working_dir)

myLoad <- champ.load(directory = idat_folder,
                     method="ChAMP",
                     methValue="B",
                     autoimpute=FALSE,
                     filterDetP=TRUE,
                     ProbeCutoff=0,
                     SampleCutoff=0.1,
                     detPcut=0.01,
                     filterBeads=TRUE,
                     beadCutoff=0.05,
                     filterNoCG=TRUE,
                     filterSNPs=FALSE,
                     population=NULL,
                     filterMultiHit=TRUE,
                     filterXY=FALSE,
                     force=FALSE,
                     arraytype=array_type)
#?champ.QC

setwd(working_dir)

champ.QC(beta = myLoad$beta,
  pheno=myLoad$pd[,group_column],
  mdsPlot=TRUE,
  densityPlot=TRUE,
  dendrogram=TRUE,
  PDFplot=TRUE,
  Rplot=TRUE,
  Feature.sel="None",
  resultsDir="./CHAMP_QCimages/")

#?champ.norm
# USING BMIQ Normalisation (good enough for most cases)
myNorm <- champ.norm(beta=myLoad$beta,
                     rgSet=myLoad$rgSet,
                     mset=myLoad$mset,
                     resultsDir="./CHAMP_Normalization/",
                     method="BMIQ",
                     plotBMIQ=FALSE,
                     arraytype=array_type,
                     cores=core_num)
#?champ.SVD
champ.SVD(beta = myNorm,
          rgSet=NULL,
          pd=myLoad$pd,
          RGEffect=FALSE,
          PDFplot=TRUE,
          Rplot=TRUE,
          resultsDir="./CHAMP_SVDimages/")

#?champ.runCombat
myCombat <- champ.runCombat(beta=myNorm,
                            pd=myLoad$pd,
                            variablename=group_column,
                            batchname=c("Slide", "Array"),
                            logitTrans=TRUE)

champ.SVD(beta = myCombat,
          rgSet=NULL,
          pd=myLoad$pd,
          RGEffect=FALSE,
          PDFplot=TRUE,
          Rplot=TRUE,
          resultsDir="./CHAMP_SVDimages_after_Combat/")

# Replace myNorm by myCombat

myNorm <- myCombat

champ.QC(beta = myNorm,
         pheno=myLoad$pd[, group_column],
         mdsPlot=TRUE,
         densityPlot=TRUE,
         dendrogram=TRUE,
         PDFplot=TRUE,
         Rplot=TRUE,
         Feature.sel="None",
         resultsDir="./CHAMP_QCimages_after_Norm_and_Combat/")
# EXPORT 

write.csv(myNorm, "beta_matrix.csv")
saveRDS(myNorm, "beta_matrix.RDS")

Working Directory Organisation

.
├── ChAMP_Pipeline.R #SCRIPT
├── Idats #Idat files and Sample Sheet
│   ├── 7497398081_R01C01_Grn.idat
│   ├── 7497398081_R01C01_Red.idat
│   ├── 7497398081_R01C02_Grn.idat
│   ├── 7497398081_R01C02_Red.idat
│   └── sample_sheet.csv
└── Working_Dir #Ouput Folder

Example Sample Sheet

Sample_Name Sample_Plate Sample_Group Sentrix_ID Sentrix_Position Covariate_1 Covariate_2
N_22990 Plate_1 NORMAL 203952880139 R01C01 0.488809972 BIG
D_46292 Plate_1 DISEASE 203952880139 R02C01 0.899841645 BIG
N_37025 Plate_1 NORMAL 203952880139 R03C01 0.412311254 MEDIUM
D_37596 Plate_1 DISEASE 203952880139 R04C01 0.63172733 SMALL
N_24795 Plate_1 NORMAL 203952880139 R05C01 0.246258701 SMALL
D_41714 Plate_1 DISEASE 203952880139 R06C01 0.435192689 MEDIUM
N_29307 Plate_2 NORMAL 203952880139 R07C01 0.94585593 MEDIUM
D_40017 Plate_2 DISEASE 203952880139 R08C01 0.712593684 MEDIUM
N_22106 Plate_2 NORMAL 203952880141 R01C01 0.571489252 SMALL
D_20343 Plate_2 DISEASE 203952880141 R02C01 0.286686925 BIG
N_45830 Plate_2 NORMAL 203952880141 R03C01 0.991324803 BIG
D_46246 Plate_2 DISEASE 203952880141 R04C01 0.687774355 BIG
D_20872 Plate_2 DISEASE 203952880141 R05C01 0.896529054 SMALL
N_28410 Plate_2 NORMAL 203952880141 R06C01 0.31366623 BIG