--- author: "Jinhyun Ju" output: html_document --- Quantitative Genomics and Genetics 2016 ====== Homework 3 R-code Key ------ -- 25 February 2016 -- Author: Jin Hyun Ju (jj328@cornell.edu) ### Problem 2 #### b) Function that simulates M different iid samples of size n ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8, fig.align='center'} binom_sim <- function(M, n, p){ mean_vector <- c() for (i in 1:M){ sim_sample <- rbinom(n, 1, p) mean_vector[i] <- mean(sim_sample) } return(mean_vector) } data1 <- binom_sim(1000, 10, 0.3) data2 <- binom_sim(1000, 1000, 0.3) par(mfrow = c(1,2)) hist(data1, main = "M = 1000, n = 10, p = 0.3") hist(data2, main = "M = 1000, n = 1000, p = 0.3") ``` #### c) ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8, fig.align='center'} c_data1 <- rbinom(1000, 10, 0.3) c_data2 <- rbinom(1000, 1000, 0.3) par(mfrow = c(1,2)) hist(c_data1/10, main = "M = 1000, n = 10, p = 0.3") hist(c_data2/1000, main = "M = 1000, n = 1000, p = 0.3") ``` #### d) ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 4, fig.align='center'} x <- c(1,0,1,0,0,0,0,1,0,1) likelihood_function <- function(p,x){ temp = 1 for ( i in x){ temp = temp * (p ^ i) * ((1 - p) ^ ( 1- i)) } return(temp) } plot(function(a) likelihood_function(a,x = x), main = "likelihood plot") cat("Likelihood at p = 0.3 is ", likelihood_function(0.3,x),"\n") cat("Maximum Likelihood occurs at p = 0.4 with the value of ", likelihood_function(0.4,x),"\n") ``` #### e) ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 4, fig.align='center'} plot(function(a) log(likelihood_function(a,x = x)), main = "log-likelihood plot") cat("Maximum log-likelihood occurs at p = 0.4 with the value of ", log(likelihood_function(0.4,x)),"\n") ``` #### g) ```{r, comment = NA, fig.show = 'hold', fig.height = 8, fig.width = 8} normal_sim <- function(M, n, pop_mean, pop_sd){ mean_vector <- c() var_vector <- c() for (i in 1:M){ sim_sample <- rnorm(n, mean = pop_mean, sd = pop_sd) mean_vector[i] <- mean(sim_sample) var_vector[i] <- var(sim_sample) } return(list("mean" = mean_vector, "var" = var_vector)) } normal_data1 <- normal_sim(M = 1000, n = 10, pop_mean = 1.6, pop_sd = 1) normal_data2 <- normal_sim(M = 1000, n = 1000, pop_mean = 1.6, pop_sd = 1) par(mfrow = c(2,2)) hist(normal_data1\$mean, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1") hist(normal_data1\$var, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1") hist(normal_data2\$mean, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1") hist(normal_data2\$var, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1") ``` #### h) ```{r, comment = NA, fig.show = 'hold', fig.height = 8, fig.width = 8} normal_matrix1 <- matrix(rnorm(1000 * 10, mean = 1.6, sd = 1), nrow = 1000, ncol = 10) normal_matrix2 <- matrix(rnorm(1000 * 10, mean = 1.6, sd = 1), nrow = 1000, ncol = 1000) par(mfrow = c(2,2)) mean_matrix1 <- rowMeans(normal_matrix1) mean_matrix2 <- rowMeans(normal_matrix2) var_matrix1 <- apply(normal_matrix1, 1, var) var_matrix2 <- apply(normal_matrix2, 1, var) hist(mean_matrix1, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1") hist(var_matrix1, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1") hist(mean_matrix2, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1") hist(var_matrix2, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1") ### Another accepted answer norm_sample1 = rnorm(1000, 1.6, sd = sqrt(1/10)) hist(norm_sample1, freq = TRUE, xlab = "Sample Mean", ylab = "Frequency", main = "Distribution on Mean with n = 10") norm_sample2 = rnorm(1000, 1.6, sd = sqrt(1/1000)) hist(norm_sample2, freq = TRUE, xlab = "Sample Mean", ylab = "Frequency", main = "Distribution on Mean with n = 1000") ``` #### i) ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8} x <- c(2.22, 0.98, 2.63, 3.33, 1.86, 3.25, 2.25, 2.92, 1.78, 1.01) likelihood_normal <- function(input_mean, input_sd, x){ temp = 1 for ( i in x){ temp = temp * (1/ sqrt( 2 * pi * input_sd^2)) * exp( - ((i - input_mean) ^ 2) / (2 * input_sd^2)) } return(temp) } mean_interval <- seq(0, 3.2, by = 0.1) var_interval <- seq(0,3,by = 0.1) par(mfrow = c(1,2)) plot(mean_interval, likelihood_normal(mean_interval, input_sd = 1, x), main = "Normal likelihood plot (mean)", type = 'l') plot(var_interval, likelihood_normal(input_mean = 1.6, input_sd = sqrt(var_interval), x), main = "Normal likelihood plot (var)", type = 'l') cat("Likelihood at mu = 1.6, var = 1 is ", likelihood_normal(input_mean = 1.6, input_sd = sqrt(1), x),"\n") mu_like <- likelihood_normal(mean_interval, input_sd = 1, mean_interval) sigma_like <- likelihood_normal(input_mean = 1.6, input_sd = sqrt(var_interval), var_interval) cat("Maximum likelihood for mu is at = ", mean_interval[which(mu_like == max(mu_like, na.rm = TRUE))], "\n") cat("Maximum likelihood for mu is at = ", var_interval[which(sigma_like == max(sigma_like, na.rm = TRUE))], "\n") ``` #### j) ```{r, comment = NA, fig.show = 'hold', fig.height = 4, fig.width = 8} par(mfrow = c(1,2)) plot(mean_interval, log(likelihood_normal(mean_interval, input_sd = 1, x)), main = "Normal log-likelihood plot (mean)", type = 'l') plot(var_interval, log(likelihood_normal(input_mean = 1.6, input_sd = sqrt(var_interval), x)), main = "Normal log-likelihood plot (var)", type = 'l') ```