# R session # Thinking like a computer # One of the upshots of R is that we can ``routinize'' programs that do # the same thing over and over. This requires a bare minimum of computer # ``programmer'' knowledge. Really, though, I just want you to see what # it looks like to have some ``code'' # Let's simulate the Central Limit Theorem! Remember, an infinite numer # of sample means will be normally distributed. # This means we want to take a lot (``infinite'') number of samples. # If we want to do the same thing a lot of times, the answer is a # ``for'' loop. A for loop says, ``for'' a certain number of times, # do the same thing. # Define the population population <- rnorm(100000, 5, 5) # population <- rpois(100000, 5) # Look at the population plot(population) hist(population) # Fun math time: can you guess the 68th, 95th, and 99th percentiles? # use the quantile() function c(mean(population) - sd(population) - sd(population) - sd(population), mean(population) - sd(population) - sd(population), mean(population) - sd(population), mean(population)) quantile(population, c(0.005, 0.025, 0.16, 0.50)) # CLT: we want to sample, take a mean, and plot the means # First: a vector to fill with the means (let's do 5,000 of 1,000 observations) sample.means <- rep(NA, 5000) # Syntax: for(i in start:num of times) {stuff to do} for(i in 1:length(sample.means)) { obs <- sample(population, 1000) sample.means[i] <- mean(obs) } # Standard error of the mean? se.mean <- sd(obs)/sqrt(5000) sample.means hist(sample.means) quantile(sample.means, c(0.025, 0.16, 0.50)) c(mean(obs) - se.mean - se.mean, mean(obs) - se.mean, mean(obs)) # Next: if statements. If statements are logical operators: work off of # ``Boolean'' logic (yes or no). Syntax: you write (ifelse(test), do if true, # do if false) male <- round(runif(1000, 0, 1)) height <- round(rnorm(1000, 64, 4)) + round(male*rnorm(1000, 6, 2)) state <- c(rep("Alaska", 0.25*length(height)), rep("Alabama", 0.25*length(height)), rep("Texas", 0.25*length(height)), rep("Georgia", 0.25*length(height))) plot(male, height) # Example: code variables male.height <- ifelse(male == 1, height, 0) # Imagine a world where you have negative ``missing'' values male.recode <- male male.recode[c(990, 999)] <- -9 male.recode male.recoded <- ifelse(male.recode < 0, NA, male.recode) male.recoded # Can you put them together? Of course! real.tall <- rep(0, length(male)) for(i in 1:length(male)) { real.tall[i] <- ifelse(male[i] == 1 & height[i] > mean(male.height) + sd(male.height), 1, 0) } cbind(real.tall, height) table(state, real.tall) # Nested for loops! Sure! # What if we wanted a variable for the average height in the state? avg.state.height <- rep(NA, length(male)) for(i in 1:length(male)) { #For our observations for(j in 1:length(unique(state))) {# For our states ifelse(state[i] == unique(state)[j], avg.state.height[i] <- mean(subset(height, state == unique(state)[j])), avg.state.height[i] <- avg.state.height[i]) } } mean(subset(height, state == "Alabama")) mean(subset(height, state == "Georgia")) mean(subset(height, state == "Texas")) mean(subset(height, state == "Alaska")) table(avg.state.height) cbind(avg.state.height, state)