Quantitative Genomics and Genetics 2016

Final R-code Key

â€“ 23 May 2016

â€“ Author: Jin Hyun Ju (jj328@cornell.edu)

Problem 1

a)

``````pheno_norm <- read.csv("QG16_Final_Y_normal.csv", row.names =1)

hist(pheno_norm\$normal_pheno, main = "histogram of normal phenotype")``````

c)

``````pheno_cat <- read.csv("QG16_Final_Y_categorical.csv", row.names =1)

hist(pheno_cat\$binary_pheno, main = "histogram of categorical phenotype")``````

Problem 2

a)

``````genotypes <- read.csv("QG16_Final_Genotypes.csv", row.names =1)

maf_calc <- function(single_geno){
n_geno <- length(single_geno)
allele_count <- sum(single_geno)
allele_freq <- allele_count / (2*n_geno)
if(allele_freq > 0.5){
allele_freq <- 1- allele_freq
}
return(allele_freq)
}

maf_vec = apply(genotypes,2, maf_calc)

hist(maf_vec, main = "MAF of all genotypes")``````

b)

``````geno_pca <- prcomp(genotypes)

plot(geno_pca\$x[,1], geno_pca\$x[,2], main = "Genotype PC projections", xlab = "PC1", ylab = "PC2")``````

Problem 3

a)

``````xa_matrix <- genotypes - 1
xd_matrix <- 1 - 2*abs(xa_matrix)

library(MASS)

pval_calculator <- function(pheno_input, xa_input, xd_input){
n_samples <- length(xa_input)

X_mx <- cbind(1,xa_input,xd_input)

MLE_beta <- ginv(t(X_mx) %*% X_mx) %*% t(X_mx) %*% pheno_input
y_hat <- X_mx %*% MLE_beta

SSM <- sum((y_hat - mean(pheno_input))^2)
SSE <- sum((pheno_input - y_hat)^2)

df_M <- 2
df_E <- n_samples - 3

MSM <- SSM / df_M
MSE <- SSE / df_E

Fstatistic <- MSM / MSE

# to check if it is correct
pval <- pf(Fstatistic, df_M, df_E,lower.tail = FALSE)

return(pval)
}

n_geno <- ncol(xa_matrix)

pval_vec <-vector(length =n_geno)

for (i in 1 : n_geno){

#cat("Testing Gentoype = ", i, "\n")
pval_vec[i] <- pval_calculator(pheno_norm\$normal_pheno, xa_input = xa_matrix[,i], xd_input = xd_matrix[,i])

}``````

b)

``plot(1:n_geno, -log10(pval_vec),main = "Problem 3 manhattan plot") ``

c)

``````sorted_pvals <- sort(pval_vec, decreasing = FALSE)
sorted_expected <- sort(runif(n_geno), decreasing = FALSE)

plot(-log10(sorted_expected), -log10(sorted_pvals), main = "Problem 3 QQplot")
abline(a = 0, b = 1, col = "red")``````