ChAMP Methylation Pipeline

Full Script

This describes a generic pipeline to generate a beta-matrix from idat files (generated from DNA methylation Arrays from Illumina) and the associated sample sheet.

library(ChAMP)

idat_folder ="./Idats/"

working_dir = "./Working_Dir"

array_type = "450K" # CAN BE EITHER EPIC OF 450K

core_num = 4 # Number of Cores used for a few functions

group_column = "Sample_Group" # NAME OF COLUMN FROM SAMPLE SHEET CONTAINING THE GROUP LABEL (0 - 1 or 2 levels)

# LOAD

#?champ.load

setwd(working_dir)

myLoad <- champ.load(directory = idat_folder,
                     method="ChAMP",
                     methValue="B",
                     autoimpute=FALSE,
                     filterDetP=TRUE,
                     ProbeCutoff=0,
                     SampleCutoff=0.1,
                     detPcut=0.01,
                     filterBeads=TRUE,
                     beadCutoff=0.05,
                     filterNoCG=TRUE,
                     filterSNPs=FALSE,
                     population=NULL,
                     filterMultiHit=TRUE,
                     filterXY=FALSE,
                     force=FALSE,
                     arraytype=array_type)
#?champ.QC

setwd(working_dir)

champ.QC(beta = myLoad$beta,
  pheno=myLoad$pd[,group_column],
  mdsPlot=TRUE,
  densityPlot=TRUE,
  dendrogram=TRUE,
  PDFplot=TRUE,
  Rplot=TRUE,
  Feature.sel="None",
  resultsDir="./CHAMP_QCimages/")

#?champ.norm
# USING BMIQ Normalisation (good enough for most cases)
myNorm <- champ.norm(beta=myLoad$beta,
                     rgSet=myLoad$rgSet,
                     mset=myLoad$mset,
                     resultsDir="./CHAMP_Normalization/",
                     method="BMIQ",
                     plotBMIQ=FALSE,
                     arraytype=array_type,
                     cores=core_num)
#?champ.SVD
champ.SVD(beta = myNorm,
          rgSet=NULL,
          pd=myLoad$pd,
          RGEffect=FALSE,
          PDFplot=TRUE,
          Rplot=TRUE,
          resultsDir="./CHAMP_SVDimages/")

#?champ.runCombat
myCombat <- champ.runCombat(beta=myNorm,
                            pd=myLoad$pd,
                            variablename=group_column,
                            batchname=c("Slide", "Array"),
                            logitTrans=TRUE)

champ.SVD(beta = myCombat,
          rgSet=NULL,
          pd=myLoad$pd,
          RGEffect=FALSE,
          PDFplot=TRUE,
          Rplot=TRUE,
          resultsDir="./CHAMP_SVDimages_after_Combat/")

# Replace myNorm by myCombat

myNorm <- myCombat

champ.QC(beta = myNorm,
         pheno=myLoad$pd[, group_column],
         mdsPlot=TRUE,
         densityPlot=TRUE,
         dendrogram=TRUE,
         PDFplot=TRUE,
         Rplot=TRUE,
         Feature.sel="None",
         resultsDir="./CHAMP_QCimages_after_Norm_and_Combat/")
# EXPORT 

write.csv(myNorm, "beta_matrix.csv")
saveRDS(myNorm, "beta_matrix.RDS")

Working Directory Organisation

.
├── ChAMP_Pipeline.R #SCRIPT
├── Idats #Idat files and Sample Sheet
│   ├── 7497398081_R01C01_Grn.idat
│   ├── 7497398081_R01C01_Red.idat
│   ├── 7497398081_R01C02_Grn.idat
│   ├── 7497398081_R01C02_Red.idat
│   └── sample_sheet.csv
└── Working_Dir #Ouput Folder

Example Sample Sheet

Sample_Name	Sample_Plate	Sample_Group	Sentrix_ID	Sentrix_Position	Covariate_1	Covariate_2
N_22990	Plate_1	NORMAL	203952880139	R01C01	0.488809972	BIG
D_46292	Plate_1	DISEASE	203952880139	R02C01	0.899841645	BIG
N_37025	Plate_1	NORMAL	203952880139	R03C01	0.412311254	MEDIUM
D_37596	Plate_1	DISEASE	203952880139	R04C01	0.63172733	SMALL
N_24795	Plate_1	NORMAL	203952880139	R05C01	0.246258701	SMALL
D_41714	Plate_1	DISEASE	203952880139	R06C01	0.435192689	MEDIUM
N_29307	Plate_2	NORMAL	203952880139	R07C01	0.94585593	MEDIUM
D_40017	Plate_2	DISEASE	203952880139	R08C01	0.712593684	MEDIUM
N_22106	Plate_2	NORMAL	203952880141	R01C01	0.571489252	SMALL
D_20343	Plate_2	DISEASE	203952880141	R02C01	0.286686925	BIG
N_45830	Plate_2	NORMAL	203952880141	R03C01	0.991324803	BIG
D_46246	Plate_2	DISEASE	203952880141	R04C01	0.687774355	BIG
D_20872	Plate_2	DISEASE	203952880141	R05C01	0.896529054	SMALL
N_28410	Plate_2	NORMAL	203952880141	R06C01	0.31366623	BIG