############################################################################ # Open Online R Stream (https://www.wvbauer.com/doku.php/live_streams) # # By: Wolfgang Viechtbauer (https://www.wvbauer.com) # Date: 2023-04-27 # # Topic(s): # - An Introduction to R # https://cran.r-project.org/doc/manuals/r-release/R-intro.html # - Section(s): 1.1 - 2.8 # # last updated: 2024-02-23 ############################################################################ # As noted already in the previous session (on 2023-04-06) where we started # going through this manual, I might adjust the code at times to make it more # accessible, potentially leave things out when I think they are unnecessary # or confusing, and add additional explanations when this can be useful. ############################################################################ ### 1: Introduction and preliminaries ############################################################################ ## 1.2: Related software and documentation # just as a note: while the books mentioned here are definitely interesting, I # would not recommend them to those starting out with R ## 1.5: Using R interactively # this section is focused on using R under a Unix/Linux operating system, # which probably isn't relevant to many people; however, it indirectly touches # on an important concept, namely that of the 'working directory'; when you # start R, a particular directory (folder) on your computer is set as the # working directory (which becomes relevant when reading data from an external # file) # determine what the current working directory is getwd() # one can change this is with setwd(), but this can be tedious; when using # RStudio, one can easily set the working directory to the location of this # script via the 'Session' menu, 'Set Working Directory', and then selecting # 'To Source File Location'; one can also configure RStudio (and other # software for interacting with R) such that a script (like the present one) # is automatically opened in RStudio when (double)clicking on the file and # then the working folder is also automatically set to the location of the # script ## 1.6: An introductory session # we went through this introductory session during the stream on 2023/04/06 ## 1.7: Getting help with functions and features # open up the help file for the mean() function help(mean) ?mean # at times, have to enclose the argument in quotes help("[") # open the general help page help.start() # do a search for a term (searches among the installed packages) ??"principal components analysis" ## 1.8: R commands, case sensitivity, etc. # R is case sensitive, so these are different x <- 1 X <- 2 x X # the same applies to commands, so while there is a mean() function, trying to # use Mean() or MEAN() would result in an error # essentially, you should start object and variable names with a letter and # then after that you can use letters, numbers, ., and _ # avoid special characters specific to certain languages like German Umlauts # (i.e., stick to abc...z, 012...9, ., and _) ## 1.11: Data permanency and removing objects # I would generally recommend not to make use of the option to save the # 'workspace' when quitting R (this has the potential to lead to a cluttered # workspace and confusion); if one needs to save things, one can do this # manually with appropriate commands (like write.table(), save(), etc. which # we might get to later) # in RStudio, go to the 'Tools' menu, then 'Global Options', and uncheck (if # it is checked) 'Restore .RData into workspace at startup' and set 'Save # workspace to .RData on exit' to 'Never' ############################################################################ ### 2: Simple manipulations; numbers and vectors ## 2.1: Vectors and assignment # combine with c() the numbers into a numeric vector and assign this to 'x' x <- c(10.4, 5.6, 3.1, 6.4, 21.7) x # in R, there is no special object type for a single number (a 'scalar') so a # single number is just a vector of length 1 5 # using the assign() function for making the same assignment as above assign("x", c(10.4, 5.6, 3.1, 6.4, 21.7)) # this might be useful when you want to assign something to an object, but the # object name itself is a variable var <- "age" var assign(var, c(19, 34, 22, 58)) age # using the following would just overwrite the existing 'var' object and puts # the numeric vector into 'var' var <- c(19, 34, 22, 58) var # can reverse the arrow in the assignment operation (uncommon to use this) c(10.4, 5.6, 3.1, 6.4, 21.7) -> x # line-by-line style syntax x <- c(10.4, 5.6, 3.1, 6.4, 21.7) y <- sqrt(x) my <- mean(y) my # nested style syntax my <- mean(sqrt(c(10.4, 5.6, 3.1, 6.4, 21.7))) my # pipe style syntax c(10.4, 5.6, 3.1, 6.4, 21.7) |> sqrt() |> mean() -> my my # this also works because <- happens last my <- c(10.4, 5.6, 3.1, 6.4, 21.7) |> sqrt() |> mean() my # a vector can also be one of the inputs to c() y <- c(x, 0, x) y ## 2.2: Vector arithmetic # this illustrates 'recycling' behavior v <- 2*x + y + 1 v # an unusual example, but since y is of length 11 and x is of length 5, x # needs to be repeated twice and the first number from x then gets added as # the last element for this operation to work (but we do get a warning message # that this is a bit funky) # max(), min(), and range() functions min(x) max(x) range(x) # length() to get the number of elements in a vector length(x) # take the sum of the elements in x sum(x) # since sum() and length() (and the other functions above) return vectors, we # can then easily do further things with them sum(x) / length(x) # of course we can use mean(x) directly, but this illustrate the idea that the # results returned from functions can be used for further steps # sample variance of the elements in x var(x) # we could do the calculations manually with this sum((x - mean(x))^2) / (length(x) - 1) # just to make this very clear, we can break down the steps as follows mean(x) x - mean(x) (x - mean(x))^2 sum((x - mean(x))^2) length(x) length(x) - 1 sum((x - mean(x))^2) / (length(x) - 1) # sort the elements in x sort(x) # but again, note that x in itself in unchanged (unless of course we 'back # assign' what sort(x) returns with x <- sort(x)) # illustrate the order() function x order(x) # so the 3rd element from x is the lowest number, the 2nd element is the next # highest number, the 4th element comes next, and so on # illustrate pmin() and pmax() (parallel minimum/maximum) x pmax(x, 6) pmin(x, 6) ## 2.3: Generating regular sequences # create a sequence of numbers from 1 to 30 1:30 # : has high precedence, so need to use parentheses as needed n <- 10 1:n-1 1:(n-1) # backwards sequence 30:1 # seq() is a more flexible way for creating sequences, but these two are the same seq(2,10) 2:10 # look at the help file for the seq() function help(seq) # we see that the first two arguments are called 'from' and 'to', so we can # also use 'named arguments' and then the order does not matter seq(from=1, to=30) seq(to=30, from=1) # illustrate the 'by' argument of seq() seq(-5, 5, by=.2) # illustrate the 'length.out' argument of seq() seq(1, 50, length.out=8) seq(1, 52, length.out=8) # illustrate the 'along' argument of seq() x seq(along=x) # repeat the 'x' vector 5 times rep(x, times=5) # repeat each element of 'x' 5 times rep(x, each=5) ## 2.4: Logical vectors x temp <- x > 13 temp # check for exact equality x == 5.6 # illustrate | (or) large <- x > 13 small <- x < 5 large small large | small # illustrate ! large !large # can use logical vectors also in arithmetic in which case FALSE is treated as # a 0 and TRUE is treated as a 1 large lage * 5 ## 2.5: Missing values # cannot do this to indicate that the value for the third element is missing c(4,2,,6) c(4,2, ,6) # use NA instead c(4,2,NA,6) # any operation on an NA becomes an NA z <- c(4,2,NA,6) z z * 5 # this might also apply to statistical functions we use mean(z) # check the help file for the mean() function help(mean) # set the 'na.rm' argument to TRUE to remove NAs before computing the mean mean(z, na.rm=TRUE) # illustrate the is.na() function z <- c(1:3,NA) z ind <- is.na(z) ind # this does not check if elements are NA, but makes a comparison of each # element with NA, whose result is undecidable and hence NA z == NA # NaN = not a number 0/0 Inf - Inf ## 2.6: Character vectors name <- c("Bob", "Sue", NA, "Joe") name # paste two strings together paste("Bob", "Johnson") # paste two vectors together year <- c(1984, 1988, 1975, 1997) paste(name, year) # illustrate recycling of one vector and the 'sep' argument labs <- paste(c("X","Y"), 1:10, sep="") labs # illustrate the 'collapse' argument paste(name, collapse=", ") paste(name, year, collapse=", ") ## 2.7: Index vectors; selecting and modifying subsets of a data set x <- c(2,5,NA,7,6,3,NA,5,7,4) x # using a logical vector for selecting elements !is.na(x) y <- x[!is.na(x)] y # can read this as: give me from x the elements that are not missing # a more complex example y <- x[!is.na(x) & x > 5] y # can read this as: give me from x the elements that are not missing AND that # are greater than 5 # this can be useful for example to select the data for a particular subgroup; # say the group variable is coded as 1, 2, 3; then we could do this grp <- c(1,3,2,2,1,3,2,3,1,2) grp == 1 x[grp == 1] # using an index vector for selecting elements x[4] # give me the 4th element x[c(4,6)] # give me elements 4 and 6 x[2:5] # give me elements 2 through 5 x[c(4,6,4)] # can also repeat an index value # this can be useful for example when creating a figure (e.g., scatterplot) # and we want to use a different color for different groups; in essence, we # need to have a color for each person, which we could get as follows grp c("blue", "red", "green")[grp] # using negative indices for excluding elements x x[-c(2,4)] # remove elements 2 and 4 x[-(1:5)] # remove elements 1 through 5 # create a named vector fruit <- c("orange" = 5, "banana" = 10, "apple" = 1, "peach" = 20) fruit # can also leave off the quotes fruit <- c(orange = 5, banana = 10, apple = 1, peach = 20) fruit # another way to create a named vector is to add names to an unnamed vector fruit <- c(5, 10, 1, 20) fruit names(fruit) <- c("orange", "banana", "apple", "peach") fruit # can then use the names to select elements from the vector fruit[c("apple","orange")] # this can be useful when the group variable is a character variable grp <- c("trt", "ctrl", "ctrl", "trt", "ctrl", "trt", "ctrl", "trt") c(trt="blue", ctrl="red")[grp] # replace the missing values in x with a 0 x x[is.na(x)] <- 0 x ############################################################################