############################################################################
# Open Online R Stream (https://www.wvbauer.com/doku.php/live_streams)
#
# By: Wolfgang Viechtbauer (https://www.wvbauer.com)
# Date: 2023-04-06
#
# Topic(s):
# - An Introduction to R
# https://cran.r-project.org/doc/manuals/r-release/R-intro.html
# - Section(s): Appendix A
#
# last updated: 2023-04-07
############################################################################
# A few general notes about this manual: Based on my recollection of reading
# this manual 20+ years ago, there are parts that are overly technical and/or
# that illustrate ways of using R that I would not recommend (an example we
# will see below is the use of attach(), which I think is bad practice).
# Therefore, I will adjust the code from the manual for didactic purposes as
# deemed necessary.
### Appendix A: A sample session
# start R (or RStudio or whatever IDE / programming editor you are using)
# open the general help page
help.start()
# generate 50 random values from a standard normal distribution and put them
# into an object called 'x'
x <- rnorm(50)
# x is a 'numeric vector'
x
# again generate 50 random values from a standard normal distribution and put
# them into 'y' (note: the length of 'x' is used to determine here how many
# values should be generated)
y <- rnorm(x)
# scatterplot of x versus y
plot(x, y)
# list the objects in the 'workspace' (also called the 'global environment')
ls()
# remove objects 'x' and 'y' from the workspace
rm(x, y)
# again list the objects in the workspace
ls()
# character(0) means 'a character vector with no elements'
# create a vector of the numbers from 1 to 20
x <- 1:20
x
# take the square root of each element in 'x', divide this by 2, and add 1,
# and put the resulting vector of numbers into 'w'
w <- 1 + sqrt(x)/2
# create a 'data frame' where the first variable called 'x' contains the
# values from the 'x' vector, the second variable 'y' contains 20 random
# values from a standard normal distribution multiplied by w and x added,
# and the third variable contains the 'w' values
dummy <- data.frame(x = x, y = x + rnorm(x)*w, w = w)
dummy
# remove objects 'x' and 'w' from the workspace
rm(x, w)
# fit a simple regression model where 'y' is the outcome variable and 'x' is
# the predictor (these variables can be found in the 'dummy' data frame),
# store the results in 'fm', and then use the summary() function on this
# object to inspect the results
fm <- lm(y ~ x, data=dummy)
summary(fm)
# if we just want the coefficients, we can use coef()
coef(fm)
# fit a simple regression model with 1/w^2 as weights (so this does 'weighted
# least squares regression')
fm1 <- lm(y ~ x, data=dummy, weight=1/w^2)
summary(fm1)
# if we want to access variable 'y', this will not work
y
# variable 'y' is part of the 'dummy' data frame; if we want to access these
# values, we can use the following 'dollar' notation
dummy$y
# make the variables inside of the 'dummy' data frame visible by 'attaching'
# the data frame to the 'search path'
attach(dummy)
# and now this will work
y
# fit a nonparametric local regression function
lrf <- lowess(x, y)
# scatterplot of x versus y
plot(x, y)
# add the fitted line from the local regression model
lines(x, lrf$y)
# add a line to the plot with an intercept of 0 and a slope of 1 (lty=3 means
# that a dotted line should be drawn)
abline(0, 1, lty=3)
# add the regression line from the simple regression model we fitted above
abline(coef(fm), col="blue")
# add the regression line from the weighted regression model
abline(coef(fm1), col="red")
# detach the dummy data frame from the search path
detach(dummy)
# general note: using attach() is *highly* inadvisable (it can easily lead to
# confusion as to where R is finding variables/objects); instead, either make
# use of the 'data' argument as we did above with lm() (if the function allows
# this) or explicitly specify from which data frame you are taking a certain
# variable from
# unfortunately, the lowess() function does not have a 'data' argument that
# can be used as we did with lm()
lrf <- lowess(x, y, data=dummy)
# so instead, we can use the dollar notation to specify which variables to use
lrf <- lowess(dummy$x, dummy$y)
# instead of using attach() or repeatedly using 'dummy$...', we can also use
# the with() function
lrf <- with(dummy, lowess(x, y))
# create a scatterplot of the fitted values versus the residuals from the
# simple regression model we fitted above
plot(fitted(fm), resid(fm), xlab="Fitted values",
ylab="Residuals", main="Residuals vs Fitted")
# create a Q-Q plot of the residuals (to check for normality of the residuals)
# https://en.wikipedia.org/wiki/Q-Q_plot
qqnorm(resid(fm), main="Residuals Rankit Plot")
# remove all objects in the workspace
rm(list=ls())
# figure out where the 'morley.tab' file is located on your computer
filepath <- system.file("data", "morley.tab", package="datasets")
filepath
# read in the Michelson data as a data frame and look at it
mm <- read.table(filepath)
mm
# there are five experiments (column Expt) and each has 20 runs (column Run)
# and (column Speed) is the recorded speed of light (given as km/sec, with
# 299000 subtracted)
# scatterplot of the measurements of the speed of light for the different
# experiments
with(mm, plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No."))
# change Expt and Run into factors
mm$Expt <- factor(mm$Expt)
mm$Run <- factor(mm$Run)
# if the variable on the x-axis is a factor, then the plot() function
# automatically draws boxplots for the variable on the y-axis
with(mm, plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No."))
# so depending on the type of variables used as input, plot() actually creates
# a different type of visualization of the data
# fit a two-way ANOVA model (with 'main effects' only)
fm <- aov(Speed ~ Run + Expt, data=mm)
summary(fm)
# fit the model after removing 'Run'
fm0 <- update(fm, . ~ . - Run)
# do a full versus reduced model comparison
anova(fm0, fm)
# clean up
rm(list=ls())
# create a sequence of numbers between -pi and pi of length 50, put these
# values into 'x' and then make a copy of this vector and call this 'y'
x <- seq(-pi, pi, len=50)
y <- x
# for every combination of x and y, compute cos(y)/(1 + x^2)) and store the
# resulting values in a matrix
f <- outer(x, y, function(x,y) cos(y)/(1 + x^2))
# note that this creates a 50x50 matrix
dim(f)
# create a contour plot
contour(x, y, f)
# create a contour plot with more detail (more contour lines)
contour(x, y, f, nlevels=15)
# create a corresponding 3-d perspective plot
persp(x, y, f, theta=45, phi=40, col="lightgray")
# do some further manipulation of f and store the resulting matrix in fa
fa <- (f-t(f))/2
# create a contour plot of the fa matrix
contour(x, y, fa, nlevels=15)
# create a corresponding 3-d perspective plot
persp(x, y, fa, theta=45, phi=40, col="lightgray")
# filled contour plots of f and fa
filled.contour(x, y, f)
filled.contour(x, y, fa)
# clean up
rm(list=ls())
# R can do complex arithmetic, but who the hell cares about this if one is
# starting out with R?!? so we will skip this part ...
# quit R
q()
# if asked about 'Saving the workspace' when quitting R, always answer No; in
# general, it is best never to make use of this functionality as it can lead
# to a cluttered workspace and confusion (we can look at ways of saving
# objects or workspace states manually later on)
############################################################################