### Problem 2

• 2b
lrt <- vector()

for(i in 1:100){

sample.norm <- rnorm(100,0,sqrt(2))

sample.mean <- mean(sample.norm)

sample.var <- var(sample.norm) * 99 / 100 # var function is using n-1 as the denominator

lrt[i] <- (sum(sample.norm^2) - sum((sample.norm - sample.mean)^2)) /sample.var

}
• 2b. + c. Histogram + curve part
# probability = TRUE for hist and add=TRUE for curve

hist(lrt,probability = TRUE)

curve(dchisq(x,1),from=0,to=20, add=TRUE)

• 2d. P-value should be larger than threshold unless you are extremely unlucky. It is expected that we do not reject the null in this case since we generated the sample from the “true” distribution with the parameters from the null hypothesis.
sample.norm <- rnorm(100,0,sqrt(2))

sample.mean <- mean(sample.norm)

sample.var <- var(sample.norm) * 99 / 100

lrt2 <- (sum(sample.norm^2) - sum((sample.norm - sample.mean)^2))/sample.var

1 - pchisq(lrt2, 1)
[1] 0.7873
• 2e. The histogram is closer to the chi squared distribution curve since we have a bigger sample size, thus resulting in better parameter estimates.
lrt.2e <- vector()

for(i in 1:100){

sample.norm <- rnorm(10000,0,sqrt(2))

sample.mean <- mean(sample.norm)

sample.var <- var(sample.norm) * 99 / 100 # var function is using n-1 as the denominator

lrt.2e[i] <- (sum(sample.norm^2) - sum((sample.norm - sample.mean)^2)) /sample.var

}

hist(lrt.2e,probability = TRUE)

curve(dchisq(x,1),from=0,to=20, add=TRUE)

• 2f.
lrt3 <- vector()

for(i in 1:100){

sample.norm <- rnorm(100,0.5,sqrt(2))

sample.mean <- mean(sample.norm)

sample.var <- var(sample.norm) * 99 / 100

lrt3[i] <- (sum(sample.norm^2) - sum((sample.norm - sample.mean)^2))/sample.var

}

hist(lrt3,probability = TRUE)

curve(dchisq(x,1),from=0,to=20, add=TRUE)

-2g. No. We generated the samples from a distribution that has different parameters from the null hypothesis so the histogram is shifted to the right.

• 2h. P-value should be smaller than the threshold (again unless you are extremely unlucky). We reject the null and this is expected since we generated the sample from a distribution that differs from the null.
sample.norm <- rnorm(100,0.5,sqrt(2))

sample.mean <- mean(sample.norm)

sample.var <- var(sample.norm) * 99 / 100

lrt2 <- (sum(sample.norm^2) - sum((sample.norm - sample.mean)^2)) / sample.var

1 - pchisq(lrt2, 1)
[1] 0.1635
• 2i.
lrt3 <- vector()

for(i in 1:100){

sample.norm <- rnorm(100,1,sqrt(2))

sample.mean <- mean(sample.norm)

sample.var <- var(sample.norm) * 99 / 100

lrt3[i] <- (sum(sample.norm^2) - sum((sample.norm - sample.mean)^2))/sample.var

}

hist(lrt3,probability = TRUE)

curve(dchisq(x,1),from=0,to=20, add=TRUE)

• 2j. The mean that was used to generate the samples ( $$\mu = 1$$ ) is further away from the null hypothesis ($$\mu=0$$), so the distribution is shifted more.