# Quantitative Genomics and Genetics 2016

## Homework 5 R-code Key

â€“ 11 March 2016

â€“ Author: Jin Hyun Ju (jj328@cornell.edu)

### Problem 2

#### a) Simulating genotype data

genotype_simulator <- function(n_sample,
n_geno){

geno_mx <- matrix(NA, nrow = n_sample, ncol = n_geno)

for (i in 1:n_geno){

geno_mx[,i] <- sample(c("AA", "AB", "BB"), n_sample, prob = c(1/4, 1/2, 1/4), replace = TRUE)

}

return(geno_mx)

}

geno_mx <- genotype_simulator(200, 250)

#### b) Converting simulated genotypes to Xa and Xd

conversion_vec <- c("AA" = -1, "AB" = 0, "BB" = 1)

xa_matrix <- apply(geno_mx, 2, function(x) conversion_vec[x])

xd_matrix <- 1 - 2*(abs(xa_matrix))

#### c) Simulating phenotype

beta_mu <- 1

beta_a_25 <- -0.2
beta_d_25 <- 0
beta_a_100 <- 0
beta_d_100 <- -0.2
beta_a_175 <- 0.2
beta_d_175 <- 0.2
beta_a_225 <- 0
beta_d_225 <- 0

error_sd <- 1

pheno <- beta_mu + xa_matrix[,25] * beta_a_25 + xd_matrix[,25] * beta_d_25 + xa_matrix[,100] * beta_a_100 + xd_matrix[,100] * beta_d_100 + xa_matrix[,175] * beta_a_175 + xd_matrix[,175] * beta_d_175 + xa_matrix[,225] * beta_a_225 + xd_matrix[,225] * beta_d_225 + rnorm(nrow(xa_matrix), 0, error_sd)

#### d) Plotting a histogram of the simulated phenotype

hist(pheno, main = "Simulated phenotype", ylab = "count", xlab = "pheotype")

#### e) X-Y plots for genotypes and phenotypes

par(mfrow = c(4, 2))
geno_idx <- c(25, 100, 175, 225)

for (i in geno_idx){

plot(xa_matrix[,i], pheno, xlab = paste0("Genotype (Xa) ",i), ylab = "phenotype")
plot(xd_matrix[,i], pheno, xlab = paste0("Genotype (Xd) ",i), ylab = "phenotype")
}

#### f) MLE(beta) calculations

library(MASS)

MLE_calculator <- function(pheno_input, xa_input, xd_input){
n_samples <- length(xa_input)

X_mx <- cbind(1,xa_input,xd_input)

MLE_beta <- ginv(t(X_mx) %*% X_mx) %*% t(X_mx) %*% pheno_input

return(MLE_beta)
}

mle_matrix <- matrix(NA, nrow = ncol(xa_matrix), ncol = 3)

colnames(mle_matrix) <- c("beta_mu", "beta_a", "beta_d")

for (i in 1:ncol(xa_matrix)){
mle_matrix[i,] <- MLE_calculator(pheno, xa_matrix[,i], xd_matrix[,i])

}

par(mfrow = c(1,3))

for( j in 1:ncol(mle_matrix)){

hist(mle_matrix[,j], main = colnames(mle_matrix)[j], xlab = "")

}

#### f) P-value calculations

fstat_calculator <- function(pheno_input, xa_input, xd_input){
n_samples <- length(xa_input)

X_mx <- cbind(1,xa_input,xd_input)

MLE_beta <- ginv(t(X_mx) %*% X_mx) %*% t(X_mx) %*% pheno_input
y_hat <- X_mx %*% MLE_beta

SSM <- sum((y_hat - mean(pheno_input))^2)
SSE <- sum((pheno_input - y_hat)^2)

df_M <- 2
df_E <- n_samples - 3

MSM <- SSM / df_M
MSE <- SSE / df_E

Fstatistic <- MSM / MSE

return(Fstatistic)
}

f_stat <- c()

for( i in 1:ncol(xa_matrix)){

f_stat[i] <- fstat_calculator(pheno, xa_matrix[,i], xd_matrix[,i])
}

#### h) P-value calculations

pvals <- pf(f_stat, 2, 197, lower.tail = FALSE)

plot(-log10(pvals), main = "Manhattan Plot")

abline(v = 25, col = 'red')
abline(v = 100, col = 'red')
abline(v = 175, col = 'red')
abline(v = 225, col = 'red')

#### i) Identifying significant hits

which(pvals < 0.05)
 [1]   3  36  40  51  54  63  66 100 105 112 134 143 175 200 210 238

#### j) Bonferroni corrected p-values

which(pvals < (0.05 / 250))
integer(0)