### Week 5: Problems with Predictors ### Code from class ## area under the curve x <- seq(-4, 4, 0.1) # numeric sequence every .1 from 4 to 4 plot(x, dnorm(x), type="l") # plot (standard) normal function for x pnorm(1.96) # area up to 1.96 (one-tailed) 2*pnorm(-abs(1.959964)) # area below and above 1.96 (two-tailed) qt((1-.05/2), df=462) # quantile functions qt((1-.05/2), df=30) rbinom(20,2,.5) # count heads from 20 tosses of 2 coins w/Pr(heads)=.5 ## illustrate Q-Q plots rae <- read.csv(url("http://www.kenbenoit.net/courses/iqrm/rae2008politics.csv")) rae <- read.csv("rae2008politics.csv") staff.sorted <- sort(rae$staff) # sort the sample data y <- quantile(staff.sorted, ppoints(staff.sorted)) x <- qnorm(ppoints(staff.sorted)) par(mfrow=c(1,2)) plot(y~x, main="\"By hand\" Q-Q Norm") qqnorm(staff.sorted, main="Built-in Q-Q Norm") ## Illustrate problem of random error in X require(foreign) d <- read.dta("dail2002.dta") # model without error m1 <- lm(votes1st ~ spend_total, data=d) # model with error d$spenderror <- d$spend_total + rnorm(length(d$spend_total), 0, 5000) m1e <- lm(votes1st ~ spenderror, data=d) coef(m1) coef(m1e) # plot the difference in slopes plot(votes1st ~ spend_total, data=d, xlab="Spending", ylab="Votes") abline(m1) points(votes1st ~ spenderror, data=d, pch="+", col="red") abline(m1e, col="red") # show difference in SIMEX library(simex) # must first install from CRAN m1e2 <- lm(votes1st ~ spenderror, data=d, x=TRUE) m1.simex <- simex(model=m1e2, SIMEXvariable="spenderror", measurement.error=5000) coef(m1.simex) coef(m1) coef(m1e) plot(m1.simex) ## standardized coefficients dc <- d[complete.cases(d$votes1st, d$spend_total),] # remove missing m1.std <- lm(scale(dc$votes1st)[,1] ~ scale(dc$spend_total)[,1]) coef(m1) coef(m1.std) coef(m1)[2]*sd(dc$spend_total)/sd(dc$votes1st)