# QG14 Computer lab 8 Solution # Author: Jin Hyun Ju # 2014.10.27 setwd("~/Dropbox/Quantitative_Genomics_2014/Computer_lab_8/") library(MASS) library(ggplot2) logistic.IRLS<- function(Xa,Xd,Y =Y, beta.initial.vec = c(0,0,0), d.stop.th = 1e-6, it.max = 100) { #Create the X matrix X.mx <- cbind(rep(1,dim(Y)[1]), Xa, Xd) N <- dim(X.mx)[1] #check this matrix: #initialize the beta parameter vector at t=0 beta.t <- beta.initial.vec # initialize deviance at d[t] dt<-0 #initialize gamma # K is the part which goes into the exponent K<-X.mx %*% beta.t gamma_inv<-exp(K)/(1+exp(K)) for(i in 1:it.max) { dpt1<-dt #store previous deviance # create empty matrix W W<-matrix(0,N,N) # Fill in the diagonal of W with appropriate values for(j in 1:N){ W[j,j] <- gamma_inv[j] * (1-gamma_inv[j]) } beta.t <- beta.t + ginv(t(X.mx)%*%W%*%X.mx)%*%t(X.mx)%*%(Y-gamma_inv) #update gamma since it's a function of beta K <- X.mx %*% beta.t gamma_inv <- exp(K)/(1+exp(K)) #calculate new deviance dt <- 2*( sum(Y[Y==1]*log(Y[Y==1]/gamma_inv[Y==1])) + sum((1-Y[Y==0])*log((1-Y[Y==0])/(1-gamma_inv[Y==0]))) ) absD <- abs(dt - dpt1) if(absD < d.stop.th) { # cat("Convergence at iteration:", i, "at threshold:", d.stop.th, "\n") logl<-sum(Y*log(gamma_inv)+(1-Y)*log(1-gamma_inv)) return(list(beta.t,logl)) } } # cat("Convergence not reached after iteration:", i, "at threshold:", d.stop.th, "\n") return(list(beta.t= c(NA,NA,NA),logl=NA)) } Y <- read.table("./QG14_Lab8_phenotypes.tsv", header = F,stringsAsFactors = F) geno <- read.table("./QG14_Lab8_genotypes.tsv", header = T) Y <- as.matrix(Y) colnames(Y) <- NULL Xa <- as.matrix(geno) Xd <- 1 - 2*abs(Xa) beta<-NULL logl_HA<-NULL for(j in 1:dim(Xa)[2]){ myList<-logistic.IRLS(Xa[,j],Xd[,j],Y=Y) beta<-cbind(beta,myList[[1]]) logl_HA<-c(logl_HA,myList[[2]]) # cat("Locus ",j,"'s beta values: ",myList[[1]],"\n") } # Log likelihood for null hypothesis logl_H0 <- logistic.IRLS(Y=Y, Xa= NULL, Xd=NULL, beta.initial.vec = c(0))[[2]] LRT<-2*logl_HA-2*logl_H0 #likelihood ratio test statistic pval <- pchisq(LRT, 2, lower.tail = F) plot(-log10(pval), main = "P-values / Bonferroni cut-off") abline(-log10(0.05/300),0,col="red") # Checking the effect i <- 180 plot.df <- data.frame( "Phenotype" = Y, "Xa" = Xa[,i], "Xd" = Xd[,i]) jitter.plot1 <- ggplot(plot.df, aes(x = Xa, y = Phenotype))+ geom_point(position = position_jitter(w = 0.1, h = 0.1)) + ggtitle("Xa vs Phenotype") jitter.plot2 <- ggplot(plot.df, aes(x = Xd, y = Phenotype))+ geom_point(position = position_jitter(w = 0.1, h = 0.1)) + ggtitle("Xd vs Phenotype") print(jitter.plot1) print(jitter.plot2)