#### Quant 1 #### Week 5 Code: Simulations require(foreign) dail2002 <- read.dta("../02 Univariate/dail2002.dta") ## illustrate sampling error spending <- na.omit(dail2002$spend_total) draws.spending <- NULL for (i in 1:100) { draws.spending[i] <- mean(sample(spending, 10)) } hist(draws.spending, breaks=40, freq=FALSE, xlim=c(5000,25000), xlab="Spending", main="100 Draws of 10 observations each") lines(density(draws.spending), col="red") abline(v=mean(spending), lty="dashed", col="blue") draws.spending <- NULL for (i in 1:1000) { draws.spending[i] <- mean(sample(spending, 10)) } hist(draws.spending, breaks=40, freq=FALSE,xlim=c(5000,25000), xlab="Spending", main="1,000 Draws of 10 observations each") lines(density(draws.spending), col="red") abline(v=mean(spending), lty="dashed", col="blue") draws.spending <- NULL for (i in 1:30) { draws.spending[i] <- mean(sample(spending, 100)) } hist(draws.spending, freq=FALSE, xlim=c(5000,25000), breaks=10, xlab="Spending", main="30 Draws of 100 observations each") lines(density(draws.spending), col="red") abline(v=mean(spending), lty="dashed", col="blue") ## illustrate bootstrap sampling # using sample to generate a permutation of the sequence 1:10 sample(10) # bootstrap sample from the same sequence sample(10, replace=T) # boostrap sample from the same sequence with probabilities that # favor the numbers 1-5 prob1 <- c(rep(.15, 5), rep(.05, 5)) prob1 sample(10, replace=T, prob=prob1) ## bootstrap sampling example to estimate std error of the median # using a for loop bs <- NULL for (i in 1:100) { bs[i] <- median(sample(spending, replace=TRUE)) } quantile(bs, c(.025, .5, .975)) median(spending) # using lapply and sapply resamples <- lapply(1:100, function(i) sample(spending, replace=TRUE)) bs <- sapply(resamples, median) quantile(bs, c(.025, .5, .975)) # using a function b.median <- function(data, n) { resamples <- lapply(1:n, function(i) sample(data, replace=T)) sapply(resamples, median) } quantile(b.median(spending, 10), c(.025, .5, .975)) quantile(b.median(spending, 100), c(.025, .5, .975)) quantile(b.median(spending, 300), c(.025, .5, .975)) # using R's boot library library(boot) samplemedian <- function(x, d) return(median(x[d])) quantile(boot(spending, samplemedian, R=10)$t, c(.025, .5, .975)) quantile(boot(spending, samplemedian, R=100)$t, c(.025, .5, .975)) quantile(boot(spending, samplemedian, R=400)$t, c(.025, .5, .975)) # illustrate the Central Limit Theorem plot(0, 0, yaxt="n", bty="n", type="n", xlim=c(0,30000), ylim=c(0,.001), ylab="Density", xlab="Sample mean of Total Spending") for (sample.size in c(10,20,60,100,150)) { mean.sample <- NULL for (i in 1:100) { mean.sample[i] <- mean(sample(dail2002$spend_total, sample.size)) } lines(density(mean.sample, na.rm=TRUE), lwd=2) }