--- author: "Jinhyun Ju" output: html_document --- Quantitative Genomics and Genetics 2016 ====== Homework 5 R-code Key ------ -- 11 March 2016 -- Author: Jin Hyun Ju (jj328@cornell.edu) ### Problem 2 #### a) Simulating genotype data ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8, fig.align='center'} genotype_simulator <- function(n_sample, n_geno){ geno_mx <- matrix(NA, nrow = n_sample, ncol = n_geno) for (i in 1:n_geno){ geno_mx[,i] <- sample(c("AA", "AB", "BB"), n_sample, prob = c(1/4, 1/2, 1/4), replace = TRUE) } return(geno_mx) } geno_mx <- genotype_simulator(200, 250) ``` #### b) Converting simulated genotypes to Xa and Xd ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8, fig.align='center'} conversion_vec <- c("AA" = -1, "AB" = 0, "BB" = 1) xa_matrix <- apply(geno_mx, 2, function(x) conversion_vec[x]) xd_matrix <- 1 - 2*(abs(xa_matrix)) ``` #### c) Simulating phenotype ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8, fig.align='center'} beta_mu <- 1 beta_a_25 <- -0.2 beta_d_25 <- 0 beta_a_100 <- 0 beta_d_100 <- -0.2 beta_a_175 <- 0.2 beta_d_175 <- 0.2 beta_a_225 <- 0 beta_d_225 <- 0 error_sd <- 1 pheno <- beta_mu + xa_matrix[,25] * beta_a_25 + xd_matrix[,25] * beta_d_25 + xa_matrix[,100] * beta_a_100 + xd_matrix[,100] * beta_d_100 + xa_matrix[,175] * beta_a_175 + xd_matrix[,175] * beta_d_175 + xa_matrix[,225] * beta_a_225 + xd_matrix[,225] * beta_d_225 + rnorm(nrow(xa_matrix), 0, error_sd) ``` #### d) Plotting a histogram of the simulated phenotype ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8, fig.align='center'} hist(pheno, main = "Simulated phenotype", ylab = "count", xlab = "pheotype") ``` #### e) X-Y plots for genotypes and phenotypes ```{r, comment = NA, fig.show = 'hold', fig.height = 12, fig.width = 6, fig.align='center'} par(mfrow = c(4, 2)) geno_idx <- c(25, 100, 175, 225) for (i in geno_idx){ plot(xa_matrix[,i], pheno, xlab = paste0("Genotype (Xa) ",i), ylab = "phenotype") plot(xd_matrix[,i], pheno, xlab = paste0("Genotype (Xd) ",i), ylab = "phenotype") } ``` #### f) MLE(beta) calculations ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 12, fig.align='center'} library(MASS) MLE_calculator <- function(pheno_input, xa_input, xd_input){ n_samples <- length(xa_input) X_mx <- cbind(1,xa_input,xd_input) MLE_beta <- ginv(t(X_mx) %*% X_mx) %*% t(X_mx) %*% pheno_input return(MLE_beta) } mle_matrix <- matrix(NA, nrow = ncol(xa_matrix), ncol = 3) colnames(mle_matrix) <- c("beta_mu", "beta_a", "beta_d") for (i in 1:ncol(xa_matrix)){ mle_matrix[i,] <- MLE_calculator(pheno, xa_matrix[,i], xd_matrix[,i]) } par(mfrow = c(1,3)) for( j in 1:ncol(mle_matrix)){ hist(mle_matrix[,j], main = colnames(mle_matrix)[j], xlab = "") } ``` #### f) P-value calculations ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 12, fig.align='center'} fstat_calculator <- function(pheno_input, xa_input, xd_input){ n_samples <- length(xa_input) X_mx <- cbind(1,xa_input,xd_input) MLE_beta <- ginv(t(X_mx) %*% X_mx) %*% t(X_mx) %*% pheno_input y_hat <- X_mx %*% MLE_beta SSM <- sum((y_hat - mean(pheno_input))^2) SSE <- sum((pheno_input - y_hat)^2) df_M <- 2 df_E <- n_samples - 3 MSM <- SSM / df_M MSE <- SSE / df_E Fstatistic <- MSM / MSE return(Fstatistic) } f_stat <- c() for( i in 1:ncol(xa_matrix)){ f_stat[i] <- fstat_calculator(pheno, xa_matrix[,i], xd_matrix[,i]) } ``` #### h) P-value calculations ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 12, fig.align='center'} pvals <- pf(f_stat, 2, 197, lower.tail = FALSE) plot(-log10(pvals), main = "Manhattan Plot") abline(v = 25, col = 'red') abline(v = 100, col = 'red') abline(v = 175, col = 'red') abline(v = 225, col = 'red') ``` #### i) Identifying significant hits ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 12, fig.align='center'} which(pvals < 0.05) ``` #### j) Bonferroni corrected p-values ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 12, fig.align='center'} which(pvals < (0.05 / 250)) ```