## Week 2: Inference ## Code from class # load in the Benoit and Marsh 2008 dataset library(foreign) dail <- read.dta("dail2002.dta") ## test that joint effect of all indep variables is zero m1 <- lm(votes1st ~ spend_total*incumb, data=dail) summary(m1) SST <- sum((m1$model[,1] - mean(m1$model[,1]))^2) (SSE <- sum(m1$residuals^2)) deviance(m1) # same thing as SSE k <- 3 # same as m1$rank-1 (df <- m1$df.residual) (F <- ((SST-SSE)/k) / (SSE/df)) 1 - pf(F, k, df) # p-value for the F test ## test significance of just 1 predictor (the interactive term) # constrained model lacks the intercept m1c <- lm(votes1st ~ spend_total + incumb, data=dail) SSEc <- deviance(m1c) # the SSE (F2 <- (SSEc - SSE) / (SSE / df)) 1 - pf(F2, 1, df) sqrt(F2) # will be the same as the t-test for this coefficient summary(m1)$coeff anova(m1c,m1) # a much easier way to compare 2 models # an additional example m2c <- lm(votes1st ~ incumb*spend_total + minister + votes1997, data=dail) summary(m2c) anova(m2c,m2) ## confidence intervals for beta summary(m1) # compute the critical value acording to d.f. (tcritical <- qt(.975, m1$df.residual)) # compute the confidence interval the hard way c(4493.325-tcritical*478.81, 4493.325+tcritical*478.81) # compute the CI the easy way in R confint(m1, "incumb") ## permutations test: F-test for all variables m3 <- lm(spend_total ~ gender + electorate, data=dail) summary(m3) fmodel <- summary(m3)$fstat[1] fstats <- numeric(4000) for (i in 1:4000) { temp <- lm(sample(spend_total) ~ gender + electorate, data=dail) fstats[i] <- summary(temp)$fstat[1] } length(fstats[fstats>fmodel])/4000 plot(density(fstats), main="", xlab="F-value from permutations") abline(v=fmodel, col="red") ## permutations test: t-test for electorate variable summary(m3)$coeff (tmodel <- summary(m3)$coeff[3,3]) tstats <- numeric(6000) for (i in 1:6000) { temp <- lm(spend_total ~ gender + sample(electorate), data=dail) tstats[i] <- summary(temp)$coeff[3,3] } mean(abs(tstats)>tmodel) plot(density(tstats), main="", xlab="t-value from permutations") abline(v=c(tmodel,-tmodel), col="red") ## Bootstrap the median "by hand" # get rid of the missings, they make median impossible spend <- dail$spend_total[complete.cases(dail$spend_total)] bsresult <- numeric(1000) for (i in 1:1000) { bsresult[i] <- median(sample(spend, replace=T)) } median(spend) # see what non-bootstrapped median is mean(bsresult) # mean of bootstrapped medians sd(bsresult) # this is the std. error of BSed medians quantile(bsresult,c(.025,.5,.975)) # 95% confidence interval hist(bsresult, breaks=100, xlab="Bootstrapped Medians of spend_total", main="") abline(v=median(spend), col="red") ## Bootstrap the median using boot library library(boot) # here is a function for the median boot.median <- function(data, index) { data <- data[index,] median(data$spend_total[complete.cases(data$spend_total)]) } (median.bs <- boot(dail, boot.median, 1000)) plot(median.bs) ## Bootstrap regression coefficients # this function will return the coefficients, sent to boot() boot.lm <- function(data, index) { data <- data[index,] # select obs. in bootstrap sample coef(lm(votes1st ~ spend_total*incumb, data=data)) # return coeffs } (m1.bs2 <- boot(dail, boot.lm, 2000)) # do 2000 bootstrap samples plot(m1.bs2, index=3) # plot incumbency coeff estimates