# # Unit 8 - Data frames # # Producing a sample of observations in a data frame # # Loading data setwd("c:/R") employees <- read.csv("employees.csv") print(employees) # We have seen this: print(employees[c(3, 5, 8),]) # The order does not matter print(employees[c(8, 3, 5),]) # Also, repetition is permitted. print(employees[c(8, 8, 1, 5, 1),]) # What is this, in essence print(employees[c(10, 9, 8, 7, 7, 5, 4, 4, 2, 1),]) # How would we produce a sample of observations # a given data frame? # Phase 1 - Producing a sample of indices # of size equal to number of rows # Phase II - use [] with the sample of indices # Phase I # Producing a sample of indices # The long way print(employees) result <- c() for (iteration in 1:nrow(employees)) { ind <- sample(1:nrow(employees), size = 1) result <- c(result, ind) } print(result) # Thw short way result <- sample(1:nrow(employees), size = nrow(employees), replace = TRUE) print(result) mySample <- sample(5:10, size = 3, replace = TRUE) print(mySample) # Phase II print(employees[result,]) print(employees[result,c("SALARY")])