library(SMPracticals) #If this fails see below data(births) births #have a look names(births) #checking variable names nrow(births) #n ## statistics ## mean(births$time) sd(births$time) min(births$time) max(births$time) median(births$time) quantile(births$time, 0.5) quantile(births$time, c(0.25, 0.5, 0.75)) mean(births$time, trim = 0.2) mean(births$time, trim = 0.4) mad(births$time) IQR(births$time) range(births$time) # skewness - in some other packages or do it yourself 1/nrow(births) * sum((births$time - mean(births$time))^3) / (1/(nrow(births) - 1) * sum((births$time - mean(births$time))^2)) ^ (3/2) ## graphs ## plot(births$day, births$time) boxplot(births$time) boxplot(births$time ~ births$day) hist(births$time) # a smooth alternative plot(density(births$time)) # both on same plot plot(density(births$time)) hist(births$time, add= TRUE, prob = TRUE) plot(ecdf(births$time)) #### Figure 2.1 in book #### par(mfrow = c(2, 2)) hist(births$time, xlab = "Hours", prob = TRUE, col = "grey40", border = "white", main = "") rug(births$time) plot(ecdf(births$time), verticals = TRUE, pch = "", xlab = "Hours", ylab = "Empirical distribution function", main = "") boxplot(births$time ~ births$day, xlab = "Day", ylab = "Hours", col = "grey80", pch = "_") # not quite possible with only 7 days of data medians <- aggregate(births$time, list(births$day), median)$x means <- aggregate(births$time, list(births$day), mean)$x plot(means, medians, xlab = "Daily average", ylab = "Daily median", pch = 19, xlim = c(4, 12), ylim = c(4, 12)) abline(0, 1, lty = "dashed") #### Saving a plot #### # windows: right click and "save as" # mac: select graphics window File -> Save As # or on the command line surround the above with pdf(...) and dev.off() # For example pdf(file = "fig2-1.pdf", width = 8, height = 8) par(mfrow = c(2, 2)) hist(births$time, xlab = "Hours", prob = TRUE, col = "grey40", border = "white", main = "") rug(births$time) plot(ecdf(births$time), verticals = TRUE, pch = "", xlab = "Hours", ylab = "Empirical distribution function", main = "") boxplot(births$time ~ births$day, xlab = "Day", ylab = "Hours", col = "grey80", pch = "_") # not quite possible with only 7 days of data medians <- aggregate(births$time, list(births$day), median)$x means <- aggregate(births$time, list(births$day), mean)$x plot(means, medians, xlab = "Daily average", ylab = "Daily median", pch = 19, xlim = c(4, 12), ylim = c(4, 12)) abline(0, 1, lty = "dashed") dev.off()