library(MASS) data(whiteside) head(whiteside) ?whiteside plot(whiteside$Temp, whiteside$Gas, col = whiteside$Insul) # fit model y_i = beta_1 * temp + gamma_{Insul} + epsilon_i # same slope different intercepts - make sense? # what would the design matrix be? X <- matrix(c(whiteside$Temp, as.numeric(whiteside$Insul == "Before"), as.numeric(whiteside$Insul == "After")), ncol = 3) colnames(X) <- c("Temp", "Insul_before", "Insul_after") Y <- whiteside$Gas # the R lm command fit <- lm(Gas ~ Temp + Insul - 1, data = whiteside) # Insul is a factor variable - R automatically sets up the dummy variables model.matrix(fit) # an easier way to get X and a check # you can make a variable a factor by using the function factor # i.e. group <- rep(1:4, 5) group.factor <- factor(group) # Get parameter estimates solve(t(X) %*% X) %*% t(X) %*% Y # or coef(fit) coefs <- coef(fit) plot(whiteside$Temp, whiteside$Gas, col = whiteside$Insul) abline(coefs[2], coefs[1], col = "black") abline(coefs[3], coefs[1], col = "red") # Variance of estimates n <- nrow(whiteside) p <- 3 sigma.sq <- 1/(n-p) * sum(residuals(fit)^2) var_est <- solve(t(X) %*% X) * sigma.sq var_est vcov(fit) # Standard errors of estimates sqrt(diag(var_est)) summary(fit)$coefficients[,2] # just pull out from summary(fit) ses <- sqrt(diag(var_est)) # 95\% CI's # upper coefs - qt(0.975, df = n - p) * ses # lower coefs + qt(0.975, df = n - p) * ses #or confint(fit) # Get 95% prediction interval for Gas Consumption at 0C after Insulation xplus <- c(0, 0, 1) #prediction pred <- xplus %*% solve(t(X) %*% X) %*% t(X) %*% Y se_xplus <- sqrt(sigma.sq * (t(xplus) %*% solve(t(X) %*% X) %*% xplus + 1)) pred + c(-1, 1) * qt(0.975, df = n-p) * se_xplus # I predict the gas comsumption for a week with avg. temp of 0 degrees after insulation installation to be between 4.24 and 5.73 with 95% confidence. predict(fit, newdata = data.frame(Temp = 0, Insul = "After"), interval = "prediction") # the level argument adjusts the confidence level see ?predict.lm # Compare to single intercept model # y_i = beta_1 * temp + beta_0 + epsilon_i X_res <- matrix(c(rep(1, n), whiteside$Temp), ncol = 2) fit_res <- lm(Gas ~ Temp, data = whiteside) # coefs solve(t(X_res) %*% X_res) %*% t(X_res) %*% Y # or coef(fit_res) # SS SS_res <- sum(residuals(fit_res)^2) SS_unres <- sum(residuals(fit)^2) # Test parameters using likelihood ratio - approximately chisquare n * (log(SS_res/SS_unres)) # or 2 * (logLik(fit) - logLik(fit_res)) # Test parameters using F-test - exactly F ((SS_res - SS_unres)/(3-2)) / (SS_unres / (n-3)) # or anova(fit_res, fit) # Other models # different slopes and intercepts lm(Gas ~ Insul/Temp, data = whiteside) # different slopes same intercept - must be an easier way? lm(Gas ~ I(as.numeric(Insul == "Before")*Temp) + I(as.numeric(Insul == "After")*Temp), data = whiteside) lm(Gas ~ Temp + Insul * Temp, data = whiteside)