#APPENDIX 2 #ANNOTATED R CODE FOR ESPRESSO POWER CALCULATION #FOR A BINARY OUTCOME (CASE-CONTROL STUDY) #STATE WHICH ROW OF THE INPUT CONTROL FILE TO START READING AT #THIS PARAMETER HAS NO EFFECT WHEN PARAMETERS ARE SPECIFIED DIRECTLY VIA THE KEYBOARD start.at.scenario<-1 #PRINT TRACER CODE EVERY Nth ITERATION #THIS ENSURES THAT YOU CAN SEE IF THE PROGRAM GRINDS TO A HALT FOR SOME REASON (IT SHOULDN'T) trace.interval<-10 #LOCATION OF INPUT CONTROL FILE infile<-"c:/pb51/ESPRESSO/WEBSITE.FILES/ESPRESSO.CC.batch.example.03Aug2008.control.csv" #LOCATION OF OUTPUT FILE outfile<-"c:/pb51/ESPRESSO/WEBSITE.FILES/ESPRESSO.CC.batch.example.03Aug2008.output.csv" #SET UP INITIAL PARAMETERS NOT BEING READ IN FROM CONTROL FILE #CREATE UP TO 20M SUBJECTS IN BLOCKS OF 20K UNTIL REQUIRED NUMBER OF #CASES AND CONTROLS IS ACHIEVED. IN GENERAL THE ONLY PROBLEM IN ACHIEVING THE #REQUIRED NUMBER OF CASES WILL OCCUR IF THE DISEASE PREVALENCE IS VERY LOW allowed.sample.size<-20000000 block.size<-20000 #READ IN CONTROL PARAMETERS FROM INPUT CONTROL FILE – EACH ROW IS ONE SCENARIO s.parameters<-scan(infile,what=list( scenario.id=0,random.seed=0,numsims=0,numcases=0,numcontrols=0,is.interaction=0,is.add.gene=0,disease.prev=0,MAF=0,env.prev=0,or.geno=0,or.env=0,or.int=0, RR.5.95=0,pval=0,power=0, sensitivity.geno=0,specificity.geno=0, sensitivity.env=0,specificity.env=0, sensitivity.pheno=0,specificity.pheno=0 ),skip=start.at.scenario,sep=';') #TOTAL NUMBER OF SCENARIOS numscenarios<-length(s.parameters$scenario.id) #DECLARE MATRIX TO SAVE FINAL OUTPUT output.matrix.save<-NULL #MAIN SCENARIO LEVEL LOOP – DEALS WITH ONE SCENARIO AT A TIME #(j FROM 1 TO TOTAL NUMBER OF SCENARIOS) for(j in 1:numscenarios) {#SCE1LOOP #SET RANDOM NUMBER SEED. #RANDOM NUMBER GENERATOR STARTS WITH SEED SET AS SPECIFIED (#INPUT PARAMETER 2) seed.val<-s.parameters$random.seed[j] set.seed(s.parameters$random.seed[j]) #PARAMETER VALUES FOR Jth SCENARIO #GENERAL PARAMETERS scenario.id<-s.parameters$scenario.id[j] #Input parameter 1 seed.val<-s.parameters$random.seed[j] #Input parameter 2 numsims<-s.parameters$numsims[j] #Input parameter 3 numcases<-s.parameters$numcases[j] #Input parameter 4 numcontrols<-s.parameters$numcontrols[j] #Input parameter 5 #MODEL STRUCTURE is.interaction<-s.parameters$is.interaction[j] #Input parameter 6 is.add.gene<-s.parameters$is.add.gene[j] #Input parameter 7 is.ME<-0 if(is.interaction==0)is.ME<-1 is.binary<-0 if(is.add.gene==0)is.binary<-1 #MODELLING PARAMETERS disease.prev<-s.parameters$disease.prev[j] #Input parameter 8 MAF<-s.parameters$MAF[j] #Input parameter 9 env.prev<-s.parameters$env.prev[j] #Input parameter 10 or.geno<-s.parameters$or.geno[j] #Input parameter 11 or.env<-s.parameters$or.env[j] #Input parameter 12 or.int<-s.parameters$or.int[j] #Input parameter 13 RR.5.95<-s.parameters$RR.5.95[j] #Input parameter 14 pval<-s.parameters$pval[j] #Input parameter 15 power<-s.parameters$power[j] #Input parameter 16 #CONVERT BASELINE ODDS RATIO FROM 5th TO 95th PERCENTILES INTO THE CORRESPONDING VARIANCE FOR A #NORMALLY DISTRIBUTED RANDOM EFFECT sigma2.subject.effect<-(log(RR.5.95)/(2*qnorm(0.95)))^2 #ASSESSMENT MISCLASSIFICATION PARAMETERS genotype.misclassification.rate.1.to.0<-(1-s.parameters$sensitivity.geno[j]) #Input parameter 17 genotype.misclassification.rate.0.to.1<-(1-s.parameters$specificity.geno[j]) #Input parameter 18 environment.misclassification.rate.1.to.0<-(1-s.parameters$sensitivity.env[j]) #Input parameter 19 environment.misclassification.rate.0.to.1<-(1-s.parameters$specificity.env[j]) #Input parameter 20 disease.misclassification.rate.1.to.0<-(1-s.parameters$sensitivity.pheno[j]) #Input parameter 21 disease.misclassification.rate.0.to.1<-(1-s.parameters$specificity.pheno[j]) #Input parameter 22 sensitivity.geno<-(s.parameters$sensitivity.geno[j]) specificity.geno<-(s.parameters$specificity.geno[j]) sensitivity.env<-(s.parameters$sensitivity.env[j]) specificity.env<-(s.parameters$specificity.env[j]) sensitivity.pheno<-(s.parameters$sensitivity.pheno[j]) specificity.pheno<-(s.parameters$specificity.pheno[j]) #CORRECTION TERMS FOR MEAN CENTRING mean.geno.add.gene<-(2*MAF*(1-MAF)+2*(MAF^2)) #Target value of mean given simulated MAF mean.geno.binary<-(2*MAF*(1-MAF)+(MAF^2)) #Target value of mean given simulated MAF mean.env<-env.prev #LEVEL OF ASCERTAINMENT proportion.for.cases<-rep(NA,numsims) proportion.for.controls<-rep(NA,numsims) #TRANSFORM INPUT REGRESSION COEFFICIENTS TO LOG-ODDS SCALE alpha<-log(disease.prev/(1-disease.prev)) beta.geno<-log(or.geno) beta.env<-log(or.env) beta.int<-log(or.int) #SET UP EMPTY VECTORS FOR RESULTS OF #THE ANALYSES OF EACH SIMULATION IN THE SCENARIO #GENOTYPE beta.geno.results<-rep(NA,numsims) se.geno.results<-rep(NA,numsims) z.geno.results<-rep(NA,numsims) #ENVIRONMENT beta.env.results<-rep(NA,numsims) se.env.results<-rep(NA,numsims) z.env.results<-rep(NA,numsims) #INTERACTION beta.int.results<-rep(NA,numsims) se.int.results<-rep(NA,numsims) z.int.results<-rep(NA,numsims) #MEASURES OF OBSERVED ASSESSMENT ERROR prop.analysed.cases.true.cases<-rep(NA,numsims) prop.analysed.controls.true.controls<-rep(NA,numsims) sensitivity.disease<-rep(NA,numsims) specificity.non.disease<-rep(NA,numsims) sens.allele.A<-rep(NA,numsims) spec.allele.A<-rep(NA,numsims) R2.allele.A<-rep(NA,numsims) sens.allele.B<-rep(NA,numsims) spec.allele.B<-rep(NA,numsims) R2.allele.B<-rep(NA,numsims) sens.env<-rep(NA,numsims) spec.env<-rep(NA,numsims) R2.env<-rep(NA,numsims) #TRACER TO DETECT EXCEEDING MAX ALLOWABLE SAMPLE SIZE scenario.exceeded.sample.size<-0 #SIMULATION LEVEL LOOP – GENERATES AND ANALYSES DATASETS #ONE AT A TIME (s FROM 1 TO TOTAL NUMBER OF SIMULATIONS) for(s in 1:numsims) {#SIM1LOOP #SET UP ZEROED COUNT VECTORS TO DETERMINE #WHEN ENOUGH CASES AND CONTROLS HAVE BEEN GENERATED complete<-0 complete.absolute<-0 cases.complete<-0 controls.complete<-0 block<-0 #CREATE, CENTRE AND ROUND AN ADDITIVE GENETIC GENOTYPE COVARIATE WITH #APPROPRIATE MINOR ALLELE FREQUENCY (MAF) AND CREATE A FACTOR EQUIVALENT allele.A<-rbinom(block.size,1,MAF) allele.A.f<-(allele.A>0)*1 allele.B<-rbinom(block.size,1,MAF) allele.B.f<-(allele.B>0)*1 #ACTUAL GENO IS SUM OF ALLELES IN ADDITIVE.GENETIC MODEL #AND IS 1 IF SUM OF ALLELES IS 1 OR GREATER IN THE BINARY MODEL geno<-allele.A+allele.B if(is.add.gene==1) #ADDITIVE GENETIC MODEL {#IF20LOOP geno.U<-geno geno.U<-geno.U-mean.geno.add.gene }#IF20LOOP if(is.binary==1) #BINARY GENETIC MODEL {#IF21LOOP geno.U<-geno>0 geno.U<-geno.U-mean.geno.binary }#IF21LOOP #CREATE BINARY ENVIRONMENTAL COVARIATE WITH #APPROPRIATE PREVALENCE FOR AT RISK LEVEL env.U<-rbinom(block.size,1,env.prev) env.U<-env.U-mean.env #CREATE PRODUCT INTERACTION TERM int.U<-geno.U*env.U #CREATE NORMALLY DISTRIBUTED RANDOM EFFECTS VECTOR #WITH APPROPRIATE VARIANCE ON SCALE OF LOG-ODDS subject.effect<-rnorm(block.size,0,sqrt(sigma2.subject.effect)) #GENERATE SIMULATED OUTCOMES UNDER MODEL #MAIN EFFECTS ONLY if(is.ME==1) {#IF2LOOP lp<-alpha+beta.geno*geno.U+beta.env*env.U+subject.effect }#IF2LOOP #MODEL WITH INTERACTION if(is.interaction==1) {#IF3LOOP lp<-alpha+beta.geno*geno.U+beta.env*env.U+beta.int*int.U+subject.effect }#IF3LOOP mu<-exp(lp)/(1 + exp(lp)) pheno<-rbinom(block.size,1,mu) #STORE THESE ‘TRUE’ OUTCOMES (IE TRUE DISEASE-NON_DISEASE STATUS) pheno.original<-pheno #RANDOMLY MISSCLASSIFY TRUE DISEASE-NON_DISEASE STATUS #USING SPECIFIED MISCLASSIFICATION RATES #GENERATE RANDOM VECTORS STATING “IF TRULY DISEASED MISCLASSIFY AS CONTROL” AND #”IF NON-DISEASED MISCLASSIFY AS CASE”. OBEY THE RELEVANT COMMAND IN #Rth SUBJECT IF ELEMENT R OF THE CORRESPONDING MISCLASSIFICATION VECTOR TAKES VALUE 1 #OTHERWISE DO NOT OBEY COMMAND disease.missed<-rbinom(block.size,1,disease.misclassification.rate.1.to.0) non.disease.missed<-rbinom(block.size,1,disease.misclassification.rate.0.to.1) pheno.U<- ((pheno.original==0) * (disease.missed==0) * (non.disease.missed==0))*pheno.original+ ((pheno.original==1) * (disease.missed==0) * (non.disease.missed==0))*pheno.original+ ((pheno.original==0) * (disease.missed==0) * (non.disease.missed==1))*(1-pheno.original)+ ((pheno.original==1) * (disease.missed==0) * (non.disease.missed==1))*pheno.original+ ((pheno.original==0) * (disease.missed==1) * (non.disease.missed==0))*pheno.original+ ((pheno.original==1) * (disease.missed==1) * (non.disease.missed==0))*(1-pheno.original)+ ((pheno.original==0) * (disease.missed==1) * (non.disease.missed==1))*(1-pheno.original)+ ((pheno.original==1) * (disease.missed==1) * (non.disease.missed==1))*(1-pheno.original) #CREATE OUTPUT MATRIX WITH ONE ROW FOR EACH SUBJECT SIMULATED sim.matrix<-cbind(1:block.size,pheno.original,pheno.U,geno.U,allele.A,allele.B,env.U,disease.missed,non.disease.missed) #SELECT OUT CASES sim.matrix.cases<-sim.matrix[pheno.U==1,] #SELECT OUT CONTROLS sim.matrix.controls<-sim.matrix[pheno.U==0,] #COUNT CASES AND CONTROLS cases.simulated<-dim(sim.matrix.cases)[1] controls.simulated<-dim(sim.matrix.controls)[1] #TEST IF THERE ARE AT LEAST ENOUGH CASES ALREADY SIMULATED #IF THERE ARE, DEFINE THE CASE ELEMENT OF THE DATA MATRIX if(cases.simulated>=numcases) {#IF4LOOP sim.matrix.cases<-sim.matrix.cases[1:numcases,] cases.complete<-1 }#IF4LOOP #TEST IF THERE ARE AT LEAST ENOUGH CONTROLS ALREADY SIMULATED #IF THERE ARE, DEFINE THE CONTROL ELEMENT OF THE DATA MATRIX if(controls.simulated>=numcontrols) {#IF5LOOP sim.matrix.controls<-sim.matrix.controls[1:numcontrols,] controls.complete<-1 }#IF5LOOP #HAVE WE NOW GENERATED THE REQUIRED NUMBER OF CASES AND CONTROLS? complete<-cases.complete*controls.complete #IF NOT, GENERATE ANOTHER BLOCK OF 20,000 while(complete==0 && complete.absolute==0) {#WHI1LOOP #START OF EXTRA BLOCK LOOP #CREATE LOCATION IDs FOR THE EXTRA BLOCK block<-block+1 start<-(block*block.size)+1 end<-(block+1)*block.size #CREATE, CENTRE AND ROUND AN ADDITIVE GENETIC GENOTYPE COVARIATE WITH #APPROPRIATE MINOR ALLELE FREQUENCY (MAF) AND CREATE A FACTOR EQUIVALENT allele.A<-rbinom(block.size,1,MAF) allele.A.f<-(allele.A>0)*1 allele.B<-rbinom(block.size,1,MAF) allele.B.f<-(allele.B>0)*1 #ACTUAL GENO IS SUM OF ALLELES IN ADDITIVE.GENETIC MODEL #AND IS 1 IF SUM OF ALLELES IS 1 OR GREATER IN THE BINARY MODEL geno<-allele.A+allele.B if(is.add.gene==1) {#IF22LOOP geno.U<-geno geno.U<-geno.U-mean.geno.add.gene }#IF22LOOP if(is.binary==1) {#IF23LOOP geno.U<-geno>0 geno.U<-geno.U-mean.geno.binary }#IF23LOOP #CREATE BINARY ENVIRONMENTAL COVARIATE WITH #APPROPRIATE PREVALENCE FOR AT RISK LEVEL env.U<-rbinom(block.size,1,env.prev) env.U<-env.U-mean.env #CREATE PRODUCT INTERACTION TERM int.U<-geno.U*env.U #CREATE NORMALLY DISTRIBUTED RANDOM EFFECT VECTOR #WITH APPROPRIATE VARIANCE ON SCALE OF LOG-ODDS subject.effect<-rnorm(block.size,0,sqrt(sigma2.subject.effect)) #GENERATE SIMULATED OUTCOMES UNDER MODEL #MAIN EFFECTS ONLY if(is.ME==1) {#IF6LOOP lp<-alpha+beta.geno*geno.U+beta.env*env.U+subject.effect }#IF6LOOP #MODEL WITH INTERACTION if(is.interaction==1) {#IF7LOOP lp<-alpha+beta.geno*geno.U+beta.env*env.U+beta.int*int.U+subject.effect }#IF7LOOP mu<-exp(lp)/(1 + exp(lp)) pheno<-rbinom(block.size,1,mu) #STORE THESE ‘TRUE’ OUTCOMES (IE TRUE DISEASE-NON_DISEASE STATUS) pheno.original<-pheno #RANDOMLY MISSCLASSIFY TRUE DISEASE-NON_DISEASE STATUS #USING SPECIFIED MISCLASSIFICATION RATES #GENERATE RANDOM VECTORS STATING “IF TRULY DISEASED MISCLASSIFY AS CONTROL” AND #”IF NON-DISEASED MISCLASSIFY AS CASE”. OBEY THE RELEVANT COMMAND IN #Rth SUBJECT IF ELEMENT R OF THE CORRESPONDING MISCLASSIFICATION VECTOR TAKES VALUE 1 #OTHERWISE DO NOT OBEY COMMAND disease.missed<-rbinom(block.size,1,disease.misclassification.rate.1.to.0) non.disease.missed<-rbinom(block.size,1,disease.misclassification.rate.0.to.1) pheno.U<- ((pheno.original==0) * (disease.missed==0) * (non.disease.missed==0))*pheno.original+ ((pheno.original==1) * (disease.missed==0) * (non.disease.missed==0))*pheno.original+ ((pheno.original==0) * (disease.missed==0) * (non.disease.missed==1))*(1-pheno.original)+ ((pheno.original==1) * (disease.missed==0) * (non.disease.missed==1))*pheno.original+ ((pheno.original==0) * (disease.missed==1) * (non.disease.missed==0))*pheno.original+ ((pheno.original==1) * (disease.missed==1) * (non.disease.missed==0))*(1-pheno.original)+ ((pheno.original==0) * (disease.missed==1) * (non.disease.missed==1))*(1-pheno.original)+ ((pheno.original==1) * (disease.missed==1) * (non.disease.missed==1))*(1-pheno.original) #CREATE OUTPUT MATRIX WITH ONE ROW FOR EACH SUBJECT SIMULATED IN THIS BLOCK sim.matrix.temp<-cbind(start:end,pheno.original,pheno.U,geno.U,allele.A,allele.B,env.U,disease.missed,non.disease.missed) #DO WE NEED ANY CASES FROM THE CURRENT BLOCK? if(cases.complete==0) {#IF8LOOP #JOIN CASES IN THIS BLOCK WITH CASES CREATED EARLIER sim.matrix.cases<-rbind(sim.matrix.cases,sim.matrix.temp[pheno.U==1,]) #HOW MANY CASES DO WE NOW HAVE IN TOTAL? cases.simulated<-dim(sim.matrix.cases)[1] #TEST IF THERE ARE AT LEAST ENOUGH CASES ALREADY SIMULATED #IF THERE ARE, DEFINE THE CASE ELEMENT OF THE DATA MATRIX if(cases.simulated>=numcases) {#IF9LOOP sim.matrix.cases<-sim.matrix.cases[1:numcases,] cases.complete<-1 proportion.for.cases[s]<-cases.simulated/end }#IF9LOOP }#IF8LOOP #DO WE NEED ANY CONTROLS FROM THE CURRENT BLOCK? if(controls.complete==0) {#IF10LOOP #JOIN CONTROLS IN THIS BLOCK WITH CONTROLS CREATED EARLIER sim.matrix.controls<-rbind(sim.matrix.controls,sim.matrix.temp[pheno.U==0,]) #HOW MANY CONTROLS DO WE NOW HAVE IN TOTAL? controls.simulated<-dim(sim.matrix.controls)[1] #TEST IF THERE ARE AT LEAST ENOUGH CONTROLS ALREADY SIMULATED #IF THERE ARE, DEFINE THE CONTROL ELEMENT OF THE DATA MATRIX if(controls.simulated>=numcontrols) {#IF11LOOP sim.matrix.controls<-sim.matrix.controls[1:numcontrols,] controls.complete<-1 proportion.for.controls[s]<-controls.simulated/end }#IF11LOOP }#IF10LOOP #ARE THERE ENOUGH CASES AND CONTROLS? complete<-cases.complete*controls.complete #HAVE WE EXCEEDED THE TOTAL SAMPLE SIZE ALLOWED? complete.absolute<-(((block+1)*block.size)>=allowed.sample.size) if(complete.absolute==1)scenario.exceeded.sample.size<-1 }#WHI1LOOP #END OF EXTRA BLOCK LOOP #STACK FINAL DATA MATRIX WITH CASES FIRST sim.matrix<-rbind(sim.matrix.cases,sim.matrix.controls) #SAVE A TEMPORARY COPY OF DATA MATRIX AS SIMULATED sim.matrix.save<-sim.matrix #WHAT PROPORTION OF APPARENT CASES/CONTROLS ARE TRULY DISEASED/NON-DISEASED? #TABULATE REAL DISEASE STATUS (COLUMN 2 OF MATRIX) #AGAINST CASE-CONTROL STATUS (COLUMN 3) cc.table.matrix<-as.matrix(table(sim.matrix[,2],sim.matrix[,3])) dimnames(cc.table.matrix)<-list(c("True.Control","True.Case"), c("Analysed.Control","Analysed.Case")) prop.analysed.cases.true.cases[s]<- cc.table.matrix[2,2]/(cc.table.matrix[1,2]+cc.table.matrix[2,2]) prop.analysed.controls.true.controls[s]<- cc.table.matrix[1,1]/(cc.table.matrix[1,1]+cc.table.matrix[2,1]) sensitivity.disease[s]<- cc.table.matrix[2,2]/(cc.table.matrix[2,1]+cc.table.matrix[2,2]) specificity.non.disease[s]<- cc.table.matrix[1,1]/(cc.table.matrix[1,1]+cc.table.matrix[1,2]) #STRIP OUT DATA ON TRUE CASE-CONTROL STATUS #TO MAKE ABSOLUTELY SURE WE ARE NOT MAKING USE OF IT #WRITE OUT AS A MATRIX AND A DATA.FRAME #NOTE CHANGE NAME OF FACTORS FROM .f TO .b IN DATA.FRAME #EG TO env.b FROM env.f sim.matrix<-sim.matrix[,c(1,3:7)] numsubs<-dim(sim.matrix)[1] dimnames(sim.matrix)<-list(NULL,c("id","cc.U","geno.U","allele.A","allele.B","env.U")) sim.df<-data.frame(sim.matrix) #ADD APPROPRIATE ALLOCATION ERRORS TO PRODUCE #’OBSERVED’ GENOTYPES. (THIS IS EQUIVALENT TO CASE-CONTROL MISCLASSIFICATION [ABOVE]) #ADD GENOTYPIC AND ENVIRONMENTAL ERROR genotyp<-sim.df$geno.U genotyp.orig<-sim.df$geno.U allele.A<-sim.df$allele.A allele.B<-sim.df$allele.B allele.A.orig<-allele.A allele.B.orig<-allele.B genotyp.orig<-allele.A.orig+allele.B.orig allele.A.1.missed<-rbinom(numsubs,1,genotype.misclassification.rate.1.to.0) allele.A.0.missed<-rbinom(numsubs,1,genotype.misclassification.rate.0.to.1) allele.B.1.missed<-rbinom(numsubs,1,genotype.misclassification.rate.1.to.0) allele.B.0.missed<-rbinom(numsubs,1,genotype.misclassification.rate.0.to.1) allele.A.new<- ((allele.A==0) * (allele.A.1.missed==0) * (allele.A.0.missed==0) * allele.A)+ ((allele.A==1) * (allele.A.1.missed==0) * (allele.A.0.missed==0) * allele.A)+ ((allele.A==0) * (allele.A.1.missed==0) * (allele.A.0.missed==1) * (1-allele.A))+ ((allele.A==1) * (allele.A.1.missed==0) * (allele.A.0.missed==1) * allele.A)+ ((allele.A==0) * (allele.A.1.missed==1) * (allele.A.0.missed==0) * allele.A)+ ((allele.A==1) * (allele.A.1.missed==1) * (allele.A.0.missed==0) * (1-allele.A))+ ((allele.A==0) * (allele.A.1.missed==1) * (allele.A.0.missed==1) * (1-allele.A))+ ((allele.A==1) * (allele.A.1.missed==1) * (allele.A.0.missed==1) * (1-allele.A)) allele.B.new<- ((allele.B==0) * (allele.B.1.missed==0) * (allele.B.0.missed==0) * allele.B)+ ((allele.B==1) * (allele.B.1.missed==0) * (allele.B.0.missed==0) * allele.B)+ ((allele.B==0) * (allele.B.1.missed==0) * (allele.B.0.missed==1) * (1-allele.B))+ ((allele.B==1) * (allele.B.1.missed==0) * (allele.B.0.missed==1) * allele.B)+ ((allele.B==0) * (allele.B.1.missed==1) * (allele.B.0.missed==0) * allele.B)+ ((allele.B==1) * (allele.B.1.missed==1) * (allele.B.0.missed==0) * (1-allele.B))+ ((allele.B==0) * (allele.B.1.missed==1) * (allele.B.0.missed==1) * (1-allele.B))+ ((allele.B==1) * (allele.B.1.missed==1) * (allele.B.0.missed==1) * (1-allele.B)) genotyp.new<-allele.A.new+allele.B.new if(is.add.gene==1) {#IF24LOOP genotyp.U<-genotyp.new genotyp.U<-genotyp.U-mean.geno.add.gene }#IF24LOOP if(is.binary==1) {#IF25LOOP genotyp.U<-genotyp.new>0 genotyp.U<-genotyp.U-mean.geno.binary }#IF25LOOP sim.df$geno.U<-genotyp.U genotyp.true<-genotyp.orig genotyp.obs<-genotyp.new allele.A.true<-allele.A.orig allele.A.obs<-allele.A.new allele.B.true<-allele.B.orig allele.B.obs<-allele.B.new tab.true.obs<-table(allele.A.true,allele.A.obs) sens.allele.A[s]<-tab.true.obs[2,2]/(tab.true.obs[2,1]+tab.true.obs[2,2]) spec.allele.A[s]<-tab.true.obs[1,1]/(tab.true.obs[1,1]+tab.true.obs[1,2]) R2.allele.A[s]<-(cor.test(allele.A.true,allele.A.obs))$estimate^2 tab.allele.A<-tab.true.obs tab.true.obs<-table(allele.B.true,allele.B.obs) sens.allele.B[s]<-tab.true.obs[2,2]/(tab.true.obs[2,1]+tab.true.obs[2,2]) spec.allele.B[s]<-tab.true.obs[1,1]/(tab.true.obs[1,1]+tab.true.obs[1,2]) R2.allele.B[s]<-(cor.test(allele.B.true,allele.B.obs))$estimate^2 tab.allele.B<-tab.true.obs #ADD APPROPRIATE ALLOCATION ERRORS TO PRODUCE #’OBSERVED’ ENVIRONMENT. (THIS IS EQUIVALENT TO CASE-CONTROL MISCLASSIFICATION [ABOVE] #BUT OCCURS AFTER THE SUBJECTS HAVE BEEN SAMPLED INTO THE STUDY) environ.1.missed<-rbinom(numsubs,1,environment.misclassification.rate.1.to.0) environ.0.missed<-rbinom(numsubs,1,environment.misclassification.rate.0.to.1) environ<-sim.df$env.U environ.test<-(environ>0)*1 environ.orig<-sim.df$env.U environ<- ((environ.test==0) * (environ.1.missed==0) * (environ.0.missed==0) * environ.test)+ ((environ.test==1) * (environ.1.missed==0) * (environ.0.missed==0) * environ.test)+ ((environ.test==0) * (environ.1.missed==0) * (environ.0.missed==1) * (1-environ.test))+ ((environ.test==1) * (environ.1.missed==0) * (environ.0.missed==1) * environ.test)+ ((environ.test==0) * (environ.1.missed==1) * (environ.0.missed==0) * environ.test)+ ((environ.test==1) * (environ.1.missed==1) * (environ.0.missed==0) * (1-environ.test))+ ((environ.test==0) * (environ.1.missed==1) * (environ.0.missed==1) * (1-environ.test))+ ((environ.test==1) * (environ.1.missed==1) * (environ.0.missed==1) * (1-environ.test)) environ.new<-environ sim.df$env.U<-environ.new-mean.env #CALCULATE EMPIRICAL MISCLASSIFICATION RATES affected.true.env<-environ.orig affected.obs.env <-environ.new true.1<-table(affected.true.env)[2] true.0<-table(affected.true.env)[1] tab.true.obs<-table(affected.true.env,affected.obs.env) R2.env[s]<-(cor.test(affected.true.env,affected.obs.env))$estimate^2 sens.env[s]<-tab.true.obs[2,2]/true.1 spec.env[s]<-tab.true.obs[1,1]/true.0 tab.environ<-tab.true.obs sim.df$int.U<-sim.df$geno.U*sim.df$env.U #MAIN EFFECTS MODEL if(is.ME==1) {#ME1LOOP #FIT CONVENTIONAL UNCONDITIONAL LOGISTIC REGRESSION MODEL #NB NO RANDOM EFFECT IN THE ANALYSIS MODEL (IE ANALYSED AS IT #WOULD BE IN REALITY IN A REAL DATASET) mod.glm<-glm(cc.U~1+geno.U+env.U,family=binomial,data=sim.df) mod.sum<-summary(mod.glm) #EXTRACT COEFFICIENTS, STANDARD ERRORS, AND CALCULATE Z.SCORES FROM MODEL #GENOTYPE beta.geno.results[s]<-mod.sum$coefficients[2,1] se.geno.results[s]<-mod.sum$coefficients[2,2] z.geno.results[s]<-mod.sum$coefficients[2,3] #ENVIRONMENT beta.env.results[s]<-mod.sum$coefficients[3,1] se.env.results[s]<-mod.sum$coefficients[3,2] z.env.results[s]<-mod.sum$coefficients[3,3] #DUMMY TERMS FOR INTERACTION beta.int.results[s]<-NA se.int.results[s]<-NA z.int.results[s]<-NA }#ME1LOOP #MODEL WITH INTERACTION if(is.interaction==1) {#INT1LOOP #FIT CONVENTIONAL UNCONDITIONAL LOGISTIC REGRESSION MODEL #NB NO RANDOM EFFECT IN THE ANALYSIS MODEL (IE ANALYSED AS IT #WOULD BE IN REALITY IN A REAL DATASET) mod.glm<-glm(cc.U~1+geno.U+env.U+int.U,family=binomial,data=sim.df) mod.sum<-summary(mod.glm) #EXTRACT COEFFICIENTS, STANDARD ERRORS, AND CALCULATE Z.SCORES FROM MODEL #GENOTYPE beta.geno.results[s]<-mod.sum$coefficients[2,1] se.geno.results[s]<-mod.sum$coefficients[2,2] z.geno.results[s]<-mod.sum$coefficients[2,3] #ENVIRONMENT beta.env.results[s]<-mod.sum$coefficients[3,1] se.env.results[s]<-mod.sum$coefficients[3,2] z.env.results[s]<-mod.sum$coefficients[3,3] #INTERACTION beta.int.results[s]<-mod.sum$coefficients[4,1] se.int.results[s]<-mod.sum$coefficients[4,2] z.int.results[s]<-mod.sum$coefficients[4,3] }#INT1LOOP #PRINT TRACER AFTER EVERY Nth DATASET CREATED if(s %% trace.interval ==0)cat("\n",s,"of",numsims,"completed in scenario",scenario.id) }#SIM1LOOP #SUMMARISE RESULTS ACROSS ALL SIMULATIONS #CASE-CONTROL MISCLASSIFICATION mean.prop.analysed.cases.true.cases<-mean(prop.analysed.cases.true.cases) mean.prop.analysed.controls.true.controls<-mean(prop.analysed.controls.true.controls) mean.sensitivity.disease<- mean(sensitivity.disease) mean.specificity.non.disease<- mean(specificity.non.disease) mean.sens.allele.A<-mean(sens.allele.A) mean.spec.allele.A<-mean(spec.allele.A) mean.sens.allele.B<-mean(sens.allele.B) mean.spec.allele.B<-mean(spec.allele.B) mean.sens.env<-mean(sens.env) mean.spec.env<-mean(spec.env) mean.R2.allele.A<-mean(R2.allele.A) mean.R2.allele.B<-mean(R2.allele.B) mean.R2.env<-mean(R2.env) #SUMMARISE PRIMARY PARAMETER ESTIMATES #COEFFICIENTS ON LOG-ODDS SCALE mean.beta.geno<-round(mean(beta.geno.results),3) mean.se.geno<-round(sqrt(mean(se.geno.results^2)),3) mean.z.geno<-mean(z.geno.results) mean.model.z.geno<-mean.beta.geno/mean.se.geno mean.beta.env<-round(mean(beta.env.results),3) mean.se.env<-round(sqrt(mean(se.env.results^2)),3) mean.z.env<-mean(z.env.results) mean.model.z.env<-mean.beta.env/mean.se.env mean.beta.int<-NA mean.se.int<-NA mean.z.int<-NA mean.model.z.int<-NA #MEAN ODDS RATIOS est.OR.geno<-exp(mean.beta.geno) est.OR.env<-exp(mean.beta.env) est.OR.int<-NA if(is.interaction==1) {#INT2LOOP mean.beta.int<-round(mean(beta.int.results),3) mean.se.int<-round(sqrt(mean(se.int.results^2)),3) mean.z.int<-mean(z.int.results) mean.model.z.int<-mean.beta.int/mean.se.int est.OR.int<-exp(mean.beta.int) }#INT2LOOP #POWER CALCULATIONS #CALCULATE Z STATISTIC THRESHOLD FOR DESIRED P-VALUE AND POWER z.pval<-qnorm(1-pval/2) z.power.required<-qnorm(power)+z.pval #ESTIMATE HOW MUCH THE SIMULATED STUDY SIZE NEEDS TO BE INFLATED #IN ORDER TO OBTAIN A POWER OF 80% #GENOTYPE #CALCULATE EMPIRICAL POWER OF MODEL FOR SIMULATED SAMPLE SIZE #BASED ON mean.model.z.geno model.power.geno<-pnorm(mean.model.z.geno-z.pval) #ESTIMATE REQUIRED SAMPLE SIZE INFLATION OR SHRINKAGE. #RATIO OF Z STATISTIC REQUIRED FOR DESIRED POWER TO MEAN MODEL Z STATISTIC OBTAINED #INDICATES RELATIVE CHANGE REQUIRED IN STANDARD ERROR. THIS CORRESPONDS TO RELATIVE #CHANGE ON SCALE OF SQUARE ROOT OF SAMPLE SIZE. #RATIO OF SAMPLE SIZE IS THEREFORE THIS RATIO SQUARED sample.size.inflation.geno<-(z.power.required/mean.model.z.geno)^2 #MULTIPLY THE ACTUAL NUMBER OF CASES AND CONTROLS BY THIS #SQUARED RATIO TO GET THE REQUIRED NUMBER OF CASES AND CONTROLS #FOR THE DESIRED POWER numcases.required.geno<-round(numcases*sample.size.inflation.geno,0) numcontrols.required.geno<-round(numcontrols*sample.size.inflation.geno,0) #ENVIRONMENTAL DETERMINANT AS FOR GENOTYPE model.power.env<-pnorm(mean.model.z.env-z.pval) sample.size.inflation.env<-(z.power.required/mean.model.z.env)^2 numcases.required.env<-round(numcases*sample.size.inflation.env,0) numcontrols.required.env<-round(numcontrols*sample.size.inflation.env,0) model.power.int<-NA sample.size.inflation.int<-NA numcases.required.int<-NA numcontrols.required.int<-NA if(is.interaction==1) {#INT3LOOP #INTERACTION - IF PRESENT - AS FOR GENOTYPE AND ENVIRONMENT model.power.int<-pnorm(mean.model.z.int-z.pval) sample.size.inflation.int<-(z.power.required/mean.model.z.int)^2 numcases.required.int<-round(numcases*sample.size.inflation.int,0) numcontrols.required.int<-round(numcontrols*sample.size.inflation.int,0) }#INT3LOOP #CALCULATE EMPIRICAL POWER IE SIMPLY THE PROPORTION OF SIMULATIONS IN WHICH #THE Z STATISTIC FOR THE PARAMETER OF INTEREST EXCEEDS THE Z STATISTIC #FOR THE DESIRED LEVEL OF STATISTICAL SIGNIFICANCE empirical.power.geno<-round(mean(z.geno.results>z.pval),3) empirical.power.env<-round(mean(z.env.results>z.pval),3) empirical.power.int<-NA if(is.interaction==1) {#INT4LOOP #INTERACTION AS FOR GENOTYPE AND ENVIRONMENT empirical.power.int<-round(mean(z.int.results>z.pval),3) }#INT4LOOP #CREATE OUTPUT MATRIX CONTAINING ALL KEY PARAMETERS FOR THE CURRENT SCENARIO #INPUT CONTROL PARAMETERS AND OUTPUT RESULTS PARAMETERS current.output.matrix<-cbind( scenario.id, #INPUT PARAMETERS seed.val,numsims,numcases,numcontrols,is.interaction,is.add.gene, disease.prev,MAF,env.prev,or.geno,or.env,or.int,RR.5.95,pval,power, sensitivity.geno,specificity.geno,sensitivity.env,specificity.env, sensitivity.pheno,specificity.pheno, scenario.id, #CRITICAL OUTPUT PARAMETERS numcases.required.geno,numcontrols.required.geno, numcases.required.env,numcontrols.required.env, numcases.required.int,numcontrols.required.int, model.power.geno,model.power.env,model.power.int, empirical.power.geno,empirical.power.env,empirical.power.int, scenario.id, #OTHER OUTPUT PARAMETERS est.OR.geno,est.OR.env,est.OR.int,mean.se.geno,mean.se.env,mean.se.int, mean.sens.allele.A,mean.spec.allele.A,mean.sens.allele.B,mean.spec.allele.B, mean.R2.allele.A,mean.R2.allele.B, mean.sens.env,mean.spec.env, mean.sensitivity.disease,mean.specificity.non.disease, scenario.id) dimnames(current.output.matrix)<-list(NULL,c("scenario.id","seed.val","numsims","numcases","numcontrols", "is.interaction","is.add.gene", "disease.prev","MAF","env.prev","or.geno","or.env","or.int", "RR.5.95","pval","power", "sensitivity.geno","specificity.geno","sensitivity.env","specificity.env", "sensitivity.pheno","specificity.pheno", "scenario.id", "numcases.required.geno","numcontrols.required.geno", "numcases.required.env","numcontrols.required.env", "numcases.required.int","numcontrols.required.int", "model.power.geno","model.power.env","model.power.int", "empirical.power.geno","empirical.power.env","empirical.power.int", "scenario.id", "est.OR.geno","est.OR.env","est.OR.int", "mean.se.geno","mean.se.env","mean.se.int", "mean.sens.allele.A","mean.spec.allele.A", "mean.sens.allele.B","mean.spec.allele.B", "mean.R2.allele.A","mean.R2.allele.B", "mean.sens.env","mean.spec.env", "mean.sensitivity.disease","mean.specificity.non.disease", "scenario.id")) varnames<- c( "scenario.id","seed.val","numsims","numcases","numcontrols", "is.interaction","is.add.gene", "disease.prev","MAF","env.prev","or.geno","or.env","or.int", "RR.5.95","pval","power", "sensitivity.geno","specificity.geno","sensitivity.env","specificity.env", "sensitivity.pheno","specificity.pheno", "scenario.id", "numcases.required.geno","numcontrols.required.geno", "numcases.required.env","numcontrols.required.env", "numcases.required.int","numcontrols.required.int", "model.power.geno","model.power.env","model.power.int", "empirical.power.geno","empirical.power.env","empirical.power.int", "scenario.id", "est.OR.geno","est.OR.env","est.OR.int", "mean.se.geno","mean.se.env","mean.se.int", "mean.sens.allele.A","mean.spec.allele.A", "mean.sens.allele.B","mean.spec.allele.B", "mean.R2.allele.A","mean.R2.allele.B", "mean.sens.env","mean.spec.env", "mean.sensitivity.disease","mean.specificity.non.disease", "scenario.id") current.input.matrix<-t(matrix(current.output.matrix[1,1:23])) current.critical.output.matrix<-t(matrix(current.output.matrix[1,23:35])) current.critical.print.matrix<-t(matrix(current.output.matrix[1,c(1:7,24:35)])) dimnames(current.input.matrix)<-list(NULL,c( "scenario.id","seed.val","numsims","numcases","numcontrols", "is.interaction","is.add.gene", "disease.prev","MAF","env.prev","or.geno","or.env","or.int", "RR.5.95","pval","power", "sensitivity.geno","specificity.geno","sensitivity.env","specificity.env", "sensitivity.pheno","specificity.pheno", "scenario.id")) dimnames(current.critical.output.matrix)<- list(NULL,c( "scenario.id", "numcases.required.geno","numcontrols.required.geno", "numcases.required.env","numcontrols.required.env", "numcases.required.int","numcontrols.required.int", "model.power.geno","model.power.env","model.power.int", "empirical.power.geno","empirical.power.env","empirical.power.int")) dimnames(current.critical.print.matrix)<-list(NULL,c( "scenario.id","seed.val","numsims","numcases","numcontrols", "is.interaction","is.add.gene", "numcases.required.geno","numcontrols.required.geno", "numcases.required.env","numcontrols.required.env", "numcases.required.int","numcontrols.required.int", "model.power.geno","model.power.env","model.power.int", "empirical.power.geno","empirical.power.env","empirical.power.int")) cat("\n\n\n\n####################\nSUMMARY for Scenario",current.critical.print.matrix[1,1],"\n####################\n") cat("\nPOWER AND SAMPLE SIZE FOR MODELS WITH MAIN EFFECTS AND AN INTERACTION\n") print(current.critical.print.matrix) #APPEND ALL RESULTS TO AN OUTPUT TEXT FILE if(scenario.exceeded.sample.size==1) {write("NEXT RECORD EXCEEDED MAXIMUM SAMPLE SIZE",outfile,append=TRUE)} if(scenario.exceeded.sample.size==1){print("THIS RECORD EXCEEDED MAXIMUM SAMPLE SIZE")} if(j==1) {#IF13LOOP if(start.at.scenario==1) {#IF14LOOP write(t(varnames),outfile,dim(current.output.matrix)[2],append=TRUE,sep=';') }#IF14LOOP write(t(current.output.matrix),outfile,dim(current.output.matrix)[2],append=TRUE,sep=';') }#IF13LOOP if(j>1) {#IF15LOOP write(t(current.output.matrix),outfile,dim(current.output.matrix)[2],append=TRUE,sep=';') }#IF15LOOP output.matrix.save<-rbind(output.matrix.save,current.output.matrix) }#SCE1LOOP output.df.save<-data.frame(output.matrix.save) #END OF SCENARIO LEVEL LOOP