Quantitative Genomics and Genetics

HW5 Key (R codes)

– 10 November 2014

Problem 2

    1. Simulate Genotypes
# Load the necessary packages

library(MASS)      # for ginv()

library(ggplot2)   # for ggplot()



set.seed(1500)

N <- 100



nucleotides <- c("A1","A2")



sim.genotypes <- matrix(NA, nrow = N, ncol = 2)

MAF <- 0.3

for( i in 1:N){

  sim.genotypes[i,] <- sample(nucleotides, 2, replace = TRUE, prob = c(MAF,1-MAF))

}



minor.allele <- "A1"



temp <- 1 * (sim.genotypes == minor.allele)

Xa.mx <- as.matrix(temp[,1] + temp[,2] - 1 )



Xd.mx <- 1 - (2 * abs(Xa.mx))
    1. Simulate two phenotypes
beta1 <- c(1,0,0)

beta2 <- c(1,1,-1)

  

X.mx <- cbind(1,Xa.mx,Xd.mx)



k.null <- X.mx%*%beta1

k.alt <- X.mx%*%beta2



expected.y1 <- exp(k.null) / (1+ exp(k.null))

expected.y2 <- exp(k.alt) / (1+ exp(k.alt))



error1 <- rbinom(n = N,size=1,prob = expected.y1) - expected.y1

error2 <- rbinom(n = N,size=1,prob = expected.y2) - expected.y2



y1 <- expected.y1+error1 

y2 <- expected.y2+error2
    1. Plots
plot.df <- data.frame( "Y1" = y1, "Y2"= y2, "Xa" = Xa.mx, "Xd" = Xd.mx)



jitter.plot1 <- ggplot(plot.df, aes(x = Xa, y = Y1))+ geom_point(position = position_jitter(w = 0.1, h = 0.1)) + ggtitle("Xa vs Phenotype 1")



jitter.plot2 <- ggplot(plot.df, aes(x = Xd, y = Y1))+ geom_point(position = position_jitter(w = 0.1, h = 0.1)) + ggtitle("Xd vs Phenotype 1")



jitter.plot3 <- ggplot(plot.df, aes(x = Xa, y = Y2))+ geom_point(position = position_jitter(w = 0.1, h = 0.1)) + ggtitle("Xa vs Phenotype 2")



jitter.plot4 <- ggplot(plot.df, aes(x = Xd, y = Y2))+ geom_point(position = position_jitter(w = 0.1, h = 0.1)) + ggtitle("Xd vs Phenotype 1")



print(jitter.plot1)

print(jitter.plot2)

print(jitter.plot3)

print(jitter.plot4)

    1. MLE with IRLS (Alternative)
logistic.IRLS<- function(Xa,Xd,Y = Y, beta.initial.vec = c(0,0,0), d.stop.th = 1e-6, it.max = 100) {



  #Create the X matrix

  X.mx <- cbind(rep(1,dim(Y)[1]), Xa, Xd)

  Y <- as.matrix(Y)

  

  N <- dim(X.mx)[1]

    #check this matrix:

    #initialize the beta parameter vector at t=0

    beta.t <- beta.initial.vec

  

  # initialize deviance at d[t]

    dt<-0

    

    #initialize gamma

  # K is the part which goes into the exponent

    K<-X.mx %*% beta.t

    gamma_inv<-exp(K)/(1+exp(K))

    

    for(i in 1:it.max) {

        dpt1<-dt #store previous deviance

        

    # create empty matrix W

        W<-matrix(0,N,N)

    

    # Fill in the diagonal of W with appropriate values

        for(j in 1:N){

            W[j,j] <- gamma_inv[j] * (1-gamma_inv[j])

        }

    

    

        beta.t <- beta.t + ginv(t(X.mx)%*%W%*%X.mx)%*%t(X.mx)%*%(Y-gamma_inv)

        

        #update gamma since it's a function of beta

        K <- X.mx %*% beta.t

        gamma_inv <- exp(K)/(1+exp(K))



        #calculate new deviance

        dt <- 2*( sum(Y[Y==1]*log(Y[Y==1]/gamma_inv[Y==1])) + sum((1-Y[Y==0])*log((1-Y[Y==0])/(1-gamma_inv[Y==0]))) )

        

        absD <- abs(dt - dpt1)

        

        if(absD < d.stop.th) {

            cat("Convergence at iteration:", i, "at threshold:", d.stop.th, "\n")

            logl<-sum(Y*log(gamma_inv)+(1-Y)*log(1-gamma_inv))

            return(list(beta.t,logl))

        }   

    }

    cat("Convergence not reached after iteration:", i, "at threshold:", d.stop.th, "\n")

    return(list(beta.t= c(NA,NA,NA),logl=NA))

}





y1.IRLS.result <- logistic.IRLS(Xa = Xa.mx,Xd = Xd.mx, Y=y1)
Convergence at iteration: 4 at threshold: 1e-06 
y2.IRLS.result <- logistic.IRLS(Xa = Xa.mx,Xd = Xd.mx, Y=y2)
Convergence at iteration: 5 at threshold: 1e-06 
beta1 - y1.IRLS.result[[1]]
           [,1]

[1,] -0.1350392

[2,] -0.0942003

[3,]  0.2748379
beta2 - y2.IRLS.result[[1]]
           [,1]

[1,] 0.44685524

[2,] 0.30014131

[3,] 0.04958164
    1. MLE with IRLS (Null)
y1.null.result <- logistic.IRLS(Xa = NULL,Xd = NULL, Y=y1, beta.initial.vec = c(0))
Convergence at iteration: 4 at threshold: 1e-06 
y2.null.result <- logistic.IRLS(Xa = NULL,Xd = NULL, Y=y2, beta.initial.vec = c(0))
Convergence at iteration: 3 at threshold: 1e-06 
beta1[1] - y1.null.result[[1]]
           [,1]

[1,] -0.1526795
beta2[1] - y2.null.result[[1]]
          [,1]

[1,] 0.5526878
    1. LRT calculations
log0.y1 <- y1.null.result[[2]]

log0.y2 <- y2.null.result[[2]]



LRT.y1 <- 2*(y1.IRLS.result[[2]]-log0.y1)

LRT.y2 <- 2*(y2.IRLS.result[[2]]-log0.y2)



LRT.y1
[1] 1.08897
LRT.y2
[1] 15.48612
    1. P-value Calculations
pval.y1 <- pchisq(LRT.y1, 2, lower.tail = FALSE)

pval.y2 <- pchisq(LRT.y2, 2, lower.tail = FALSE)



pval.y1
[1] 0.5801405
pval.y2
[1] 0.0004337419