Quantitative Genomics and Genetics 2016

Homework 3 R-code Key

– 25 February 2016

– Author: Jin Hyun Ju (jj328@cornell.edu)

Problem 2

b)

Function that simulates M different iid samples of size n

binom_sim <- function(M, n, p){
    mean_vector <- c()
    
    for (i in 1:M){
        sim_sample <- rbinom(n, 1, p)
        mean_vector[i] <- mean(sim_sample)
    }
    
    return(mean_vector)
  
}

data1 <- binom_sim(1000, 10, 0.3)

data2 <- binom_sim(1000, 1000, 0.3)

par(mfrow = c(1,2))

hist(data1, main = "M = 1000, n = 10, p = 0.3")

hist(data2, main = "M = 1000, n = 1000, p = 0.3")

c)

c_data1 <- rbinom(1000, 10, 0.3)

c_data2 <- rbinom(1000, 1000, 0.3)

par(mfrow = c(1,2))

hist(c_data1/10, main = "M = 1000, n = 10, p = 0.3")

hist(c_data2/1000, main = "M = 1000, n = 1000, p = 0.3")

d)

x <- c(1,0,1,0,0,0,0,1,0,1)
likelihood_function <- function(p,x){
      temp = 1
      for ( i in x){
          temp = temp * (p ^ i) * ((1 - p) ^ ( 1- i)) 
      }
      return(temp)
}

plot(function(a) likelihood_function(a,x = x), main = "likelihood plot")

cat("Likelihood at p = 0.3 is ", likelihood_function(0.3,x),"\n")
Likelihood at p = 0.3 is  0.0009529569 
cat("Maximum Likelihood occurs at p = 0.4 with the value of ", likelihood_function(0.4,x),"\n")
Maximum Likelihood occurs at p = 0.4 with the value of  0.001194394 

e)

plot(function(a) log(likelihood_function(a,x = x)), main = "log-likelihood plot")

cat("Maximum log-likelihood occurs at p = 0.4 with the value of ", log(likelihood_function(0.4,x)),"\n")
Maximum log-likelihood occurs at p = 0.4 with the value of  -6.730117 

g)

normal_sim <- function(M, n, pop_mean, pop_sd){
    mean_vector <- c()
    var_vector <- c()
    
    for (i in 1:M){
        sim_sample <- rnorm(n, mean = pop_mean, sd = pop_sd)
        mean_vector[i] <- mean(sim_sample)
        var_vector[i] <- var(sim_sample)
    }
    
    return(list("mean" = mean_vector, "var" = var_vector))
  
}

normal_data1 <- normal_sim(M = 1000, n = 10, pop_mean = 1.6, pop_sd = 1)

normal_data2 <- normal_sim(M = 1000, n = 1000, pop_mean = 1.6, pop_sd = 1)

par(mfrow = c(2,2))

hist(normal_data1$mean, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1")
hist(normal_data1$var, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1")

hist(normal_data2$mean, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1")
hist(normal_data2$var, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1")

h)

normal_matrix1 <- matrix(rnorm(1000 * 10, mean = 1.6, sd = 1), nrow = 1000, ncol = 10)

normal_matrix2 <- matrix(rnorm(1000 * 10, mean = 1.6, sd = 1), nrow = 1000, ncol = 1000)

par(mfrow = c(2,2))

mean_matrix1 <- rowMeans(normal_matrix1)
mean_matrix2 <- rowMeans(normal_matrix2)

var_matrix1 <- apply(normal_matrix1, 1, var)
var_matrix2 <- apply(normal_matrix2, 1, var)

hist(mean_matrix1, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1")
hist(var_matrix1, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1")

hist(mean_matrix2, main = "Mean / M = 1000, n = 10, mean = 1.6, sd = 1")
hist(var_matrix2, main = "Var / M = 1000, n = 10, mean = 1.6, sd = 1")



### Another accepted answer 

norm_sample1 = rnorm(1000, 1.6, sd = sqrt(1/10))

hist(norm_sample1, freq = TRUE, xlab = "Sample Mean", ylab = "Frequency", 
     main = "Distribution on Mean with n = 10")

norm_sample2 = rnorm(1000, 1.6, sd = sqrt(1/1000))

hist(norm_sample2, freq = TRUE, xlab = "Sample Mean", ylab = "Frequency", 
     main = "Distribution on Mean with n = 1000")

i)

x <- c(2.22, 0.98, 2.63, 3.33, 1.86, 3.25, 2.25, 2.92, 1.78, 1.01)

likelihood_normal <- function(input_mean, input_sd, x){
      temp = 1
      for ( i in x){
          temp = temp * (1/ sqrt( 2 * pi * input_sd^2)) * exp( - ((i - input_mean) ^ 2) / (2 * input_sd^2))
      }
      return(temp)
}

mean_interval <- seq(0, 3.2, by = 0.1)
var_interval <- seq(0,3,by = 0.1)

par(mfrow = c(1,2))

plot(mean_interval, likelihood_normal(mean_interval, input_sd = 1, x), main = "Normal likelihood plot (mean)", type = 'l')

plot(var_interval, likelihood_normal(input_mean = 1.6, input_sd = sqrt(var_interval), x), main = "Normal likelihood plot (var)", type = 'l')

cat("Likelihood at mu = 1.6, var = 1 is ", likelihood_normal(input_mean = 1.6, input_sd = sqrt(1), x),"\n")
Likelihood at mu = 1.6, var = 1 is  6.35766e-07 
mu_like <- likelihood_normal(mean_interval, input_sd = 1, mean_interval)

sigma_like <- likelihood_normal(input_mean = 1.6, input_sd = sqrt(var_interval), var_interval)



cat("Maximum likelihood for mu is at = ", mean_interval[which(mu_like == max(mu_like, na.rm = TRUE))], "\n")
Maximum likelihood for mu is at =  1.6 
cat("Maximum likelihood for mu is at = ", var_interval[which(sigma_like == max(sigma_like, na.rm = TRUE))], "\n")
Maximum likelihood for mu is at =  0.8 

j)

par(mfrow = c(1,2))
plot(mean_interval, log(likelihood_normal(mean_interval, input_sd = 1, x)), main = "Normal log-likelihood plot (mean)", type = 'l')

plot(var_interval, log(likelihood_normal(input_mean = 1.6, input_sd = sqrt(var_interval), x)), main = "Normal log-likelihood plot (var)", type = 'l')