############################################################################ # Open Online R Stream (https://www.wvbauer.com/doku.php/live_streams) # # By: Wolfgang Viechtbauer (https://www.wvbauer.com) # Date: 2023-11-09 # # Topic(s): # - Regression and Other Stories (https://avehtari.github.io/ROS-Examples/) # - Section(s): 4.4 - 4.5 # # last updated: 2023-11-22 ############################################################################ ### 4.4: Statistical significance, hypothesis testing, and statistical errors ## Statistical significance # say we flip a coin 20 times and it is a fair coin (so 50% chance for heads # and 50% chance for tails); then we can construct the sampling distribution # of the proportion of heads observed in the 20 flips through simulation set.seed(1234) props <- rbinom(10000000, size=20, prob=0.5) / 20 # frequency table of the observed proportions tab <- table(props) tab # examine the sampling distribution of the proportions plot(tab, type="h", lwd=3, bty="l", xlab="Proportion", ylab="Frequency", main="Sampling Distribution of the Proportion") # convert the frequency table of the observed proportions into the # corresponding probabilities of observing the proportions tab <- tab / 10000000 tab # sidenote: we don't actually need to simulate proportions to construct the # sampling distribution in this example, since we know based on statistical # theory that the probabilities of observing these different proportions can # be computed based on a binomial distribution round(dbinom(0:20, size=20, prob=0.5), 6) # but we will stick to the simulated values, since this illustrates the # general principles without requiring additional statistical theory (and the # difference between the simulated and actual probabilities is negligible) # extract from 'tab' the actually observed proportions (as a numeric vector) props <- as.numeric(names(tab)) props # compute the probability of observing a proportion of 0.7 tab[props == 0.7] # compute the probability of observing a proportion of 0.7 or higher sum(tab[props >= 0.7]) # compute the probability of observing a proportion of 0.75 or higher sum(tab[props >= 0.75]) # compute the probability of observing a proportion of 0.25 sum(tab[props == 0.25]) # compute the probability of observing a proportion of 0.25 or lower sum(tab[props <= 0.25]) # now imagine you do the experiment (flipping the coin 20 times and observing # the proportion of heads) once and you get the following result heads <- c(T, F, F, F, F, T, F, F, T, T, F, F, T, F, T, T, F, F, T, F) mean(heads) # is this outcome unusual if the coin is fair? # compute the probability of observing this result under the sampling # distribution of a proportion for a fair coin sum(tab[props == mean(heads)]) # compute the probability of observing this result or an even more extreme # deviation from 0.5 under the sampling distribution of a proportion for a # fair coin sum(tab[props <= mean(heads)]) # this is not an unusual event to happen if the coin is really fair # the probability computed above is the (one-sided) p-value of our observed # result for testing the null hypothesis H0: the coin is fair # but say we had observed the following result heads <- c(T, F, F, F, F, T, F, F, T, T, F, F, F, F, F, T, F, F, F, F) mean(heads) # the probability of observing this result or an even more extreme one is very # small (i.e., the p-value is very small) sum(tab[props <= mean(heads)]) # this may make us question whether the coin is really fair; conventionally, # we are going to reject the null hypothesis if the p-value is .05 or smaller # compute the standard error of a proportion based on a sample size of 20 if # the true proportion is 0.5 (see section 4.2) se <- sqrt(0.5 * 0.5 / 20) se # compute the test statistic (how far away is the observed result from the # value under the null hypothesis, relative to the standard error of the # statistic); if this is large (say, +-2), then again we are going to reject # the null hypothesis z <- (mean(heads) - 0.5) / se z # where does the +-2 come from? under a normal sampling distribution, the # probability of observing a statistic that is 2 or more standard errors to # the right of the center is about 2.5% (strictly, it is 1.96 SEs); so, the # probability of either observing a test statistic of +2 (or more positive) or # -2 (or more negative) is 5% (due to the symmetry of a normal distribution); # in a two-sided test, we don't care if the deviation is to the left or right # of the center, so then we use the rule that the one-sided p-value must be # 0.025 or smaller (or twice the one-sided p-value must be 0.05 or smaller) # the sampling distribution of the proportion we saw above is not normal (it # cannot really be, since it is a discrete distribution, while a normal # distribution is continuous), but we can still use a normal distribution as # an approximation; compute the probability of observing the test statistic we # have observed or a more extreme one under a standard normal distribution pnorm(z) # this is not the same as what we computed earlier (0.0207125), since the # normal approximation is not exact (it works better when we have a larger # sample size, that is, we flip the coin more often) # to get the p-value for our two-sided test, we have to multiple this # probability by 2 (to get the two-sided p-value) 2 * pnorm(z) # or more generally, since z might be negative or positive, we take the # absolute value of the test statistic and then compute twice the probability # of observing this result or an even more positive one 2 * pnorm(abs(z), lower.tail=FALSE) ############################################################################ ## Hypothesis testing for simple comparisons # simulate data for a study as described in this section, where we have 100 # people in each group, assuming that the true means are the same in the two # groups (i.e., the null hypothesis is true that the treatment does not affect # the mean cholesterol level of those in the treatment group) set.seed(1234) meandiff <- replicate(100000, { x.t <- rnorm(100, mean=225, sd=10) x.c <- rnorm(100, mean=225, sd=10) mean(x.t) - mean(x.c) }) # sidenote: could speed this up by directly simulating the means, but the way # above more directly corresponds to how the data would look like if such a # study is run # examine the sampling distribution of the mean difference hist(meandiff, breaks=50, xlab="Mean Difference", bty="l", main="Sampling Distribution of the Mean Difference") # now imagine we actually run the study and get these results x.t <- round(rnorm(100, mean=222, sd=10)) x.c <- round(rnorm(100, mean=225, sd=10)) mean(x.t) - mean(x.c) # add the observed mean difference as a vertical line to the histogram abline(v = mean(x.t) - mean(x.c), lwd=3) # compute the probability of the observed mean difference or a more extreme # one under the sampling distribution mean(meandiff <= mean(x.t) - mean(x.c)) # twice this probability is the two-sided p-value 2 * (mean(meandiff <= mean(x.t) - mean(x.c))) # compute the test statistic (note: the null hypothesis here says that the # true mean difference is 0, so let's add this part also to the numerator) teststat <- ((mean(x.t) - mean(x.c)) - 0) / sd(meandiff) teststat # compute the two-sided p-value using the test statistic 2 * pnorm(abs(teststat), lower.tail=FALSE) # these are not exactly the same because we only simulated 100000 values under # the sampling distribution; also, strictly speaking, we should use a # t-distribution to compute the p-value based on the test statistic 2 * pt(abs(teststat), df=100+100-2, lower.tail=FALSE) # in practice, we do not have sd(meandiff), but we can estimate the standard # error using the observed data se <- sqrt(sd(x.t)^2 / 100 + sd(x.c)^2 / 100) se # so we then compute the test statistic using the estimated standard error and # the corresponding two-sided p-value teststat <- ((mean(x.t) - mean(x.c)) - 0) / se 2 * pt(abs(teststat), df=100+100-2, lower.tail=FALSE) # since the two-sided p-value is below .05, we reject the null hypothesis and # conclude that the treatment does affect the mean cholesterol value of the # treatment group ############################################################################ ### 4.5: Problems with the concept of statistical significance # not really anything to put into code from this section ############################################################################