### Class examples from Week 8: OLS ### Ken Benoit Nov 2009 #### Correlation example x <- c(12,10,6,16,8,9,12,11) y <- c(12,8,12,11,10,8,16,15) cor(x,y) # compute the empirical t-value (t.calc <- (.24*sqrt(8-2) / sqrt(1-.24^2))) # compute the critical t-value (t.crit <- qt(1-.05/2, 6)) # compare t.calc > t.crit # using R's built-in correlation significance test cor.test(x,y) #### Prior convictions and sentencing example par(mar=c(4,4,1,1)) x <- c(0,3,1,0,6,5,3,4,10,8) y <- c(12,13,15,19,26,27,29,31,40,48) plot(x, y, xlab="Number of prior convictions (X)", ylab="Sentence length (Y)", pch=19) abline(h=c(10,20,30,40), col="grey70") #### Regression formulas in R x <- c(0,3,1,0,6,5,3,4,10,8) y <- c(12,13,15,19,26,27,29,31,40,48) (data <- data.frame(x, y, xdev=(x-mean(x)), ydev=(y-mean(y)), xdevydev=((x-mean(x))*(y-mean(y))), xdev2=(x-mean(x))^2, ydev2=(y-mean(y))^2)) (SP <- sum(data$xdevydev)) (SSx <- sum(data$xdev2)) (SSy <- sum(data$ydev2)) (b1 <- SP / SSx) (b0 <- mean(y) - b1*mean(x)) #### Plot of regression example par(mar=c(4,4,1,1)) plot(x, y, xlab="Number of prior convictions (X)", ylab="Sentence length (Y)", pch=19) abline(lm(y~x)) text(8,45,"Regression line") text(8,43.5,"Yhat = 14 + 3X") abline(v=mean(x), h=mean(y), lty="dashed") points(0,b0,pch=0) points(mean(x),mean(y),pch=0) text(.8,13,"Y intercept") #### R^2 example x <- c(49,50,53,55,60,55,60,50) y <- c(81,88,87,99,91,89,95,90) N <- length(x) (SSx <- sum(x^2) - N*mean(x)^2) (SSy <- sum(y^2) - N*mean(y)^2) (SP <- sum(x*y) - N*mean(x)*mean(y)) (b <- SP/SSx) (a <- mean(y) - b*mean(x)) (r <- SP / sqrt(SSx*SSy)) (r2 <- r^2) #### plot the regression model plot(x, y, pch=19) abline(lm(y~x)) #### regression models in R regmodel <- lm(y ~ x) summary(regmodel) coef(summary(regmodel)) #### Illustration using Anscombe dataset data(anscombe) attach(anscombe) round(coef(lm(y1~x1)), 2) # same b0, b1 round(coef(lm(y2~x2)), 2) round(coef(lm(y3~x3)), 2) round(coef(lm(y4~x4)), 2) round(summary(lm(y1~x1))$r.squared, 2) # same R^2 round(summary(lm(y2~x2))$r.squared, 2) round(summary(lm(y3~x3))$r.squared, 2) round(summary(lm(y4~x4))$r.squared, 2) # plot the four x-y pairs par(mfrow=c(2,2), mar=c(4, 4, 1, 1)+0.1) # 4 plots in one graphic window plot(x1,y1) abline(lm(y1~x1), col="red", lty="dashed") plot(x2,y2) abline(lm(y2~x2), col="red", lty="dashed") plot(x3,y3) abline(lm(y3~x3), col="red", lty="dashed") plot(x4,y4) abline(lm(y4~x4), col="red", lty="dashed") detach(anscombe) #### Multiple regression example require(foreign) dail <- read.dta("dailcorrected.dta") summary(lm(votes1st ~ spend_total + incumb + electorate + minister, data=dail))