# # Unit 8 - Data frames # # Iterative production of a data frame # # BACK TO REAL PROGRAMMING; SPECIFICALLY: LOOPS. # TAKE YOUR TIME, THIS IS IMPORTANT! # Loading data setwd("c:/R") employees <- read.csv("employees.csv") print(employees) # One preliminary question # Question # What does this function do: myMean <- function(vec) { mySum <- 0 for (val in vec) { mySum <- mySum + val } return (mySum / length(vec)) } print(myMean(employees$SALARY)) # Problem: # Build a new data frame in the following steps: # 1 # Produce 10 samples of observations with revision # on the basis of the data frame employees. # 2 # For each sample. # 2.1 # calculate the average # of rows EDUC and YEAR. # 2.2. # produce a vector containing the # results of these two calculations # 2.3 # Add the vector to the end of the # constructed data frame. # Producing a new empty data frame # 1 - using data.frame newDF <- data.frame() print(newDF) # 2 - with matrix() # We will begin with this. newDF <- data.frame(matrix(NA, ncol=2, nrow = 3) ) print(newDF) print(class(newDF)) print(class(matrix(NA, ncol=2, nrow = 3))) # Solving the problem newDF <- data.frame(matrix(NA, ncol=2, nrow = 10) ) print(newDF) # Problem: # Build a new data frame in the following steps: # 1 # Produce 10 samples of observations with revision # on the basis of the data frame employees. for (iteration in 1:10) { # Producing sample of observations inds <- sample(1:nrow(employees), size = nrow(employees), replace = TRUE) mySample <- employees[inds,] # means of EDUC and SALARY meanEDUC <- myMean(mySample$EDUC) meanSALARY <- myMean(mySample$SALARY) # producing a vector with both means results <- c(meanEDUC, meanSALARY) newDF[iteration,] <- results } print(newDF) employees[2, ] <- c(3547, "Yasmin", "Lee", 2, 13, 15555) print(employees) employees[11, ] <- c(3547, "Yasmin", "Lee", 2, 13, 15555) print(employees) # How would we do the above using the # first method for producing an empty data frame? newDF <- data.frame() print(newDF) for (iteration in 1:10) { # Producing sample of observations inds <- sample(1:nrow(employees), size = nrow(employees), replace = TRUE) mySample <- employees[inds,] # means of EDUC and SALARY meanEDUC <- myMean(mySample$EDUC) meanSALARY <- myMean(mySample$SALARY) # producing a vector with both means results <- c(meanEDUC, meanSALARY) newDF <- rbind(newDF, results) } print(data.frame(newDF)) vec1 <- c(3, 4, 5) vec2 <- c(7, 8, 9) myDF <- data.frame(rbind(vec1, vec2)) print(myDF) vec3 <- c(8, 8, 9) myDF <- rbind(myDF, vec3) print(myDF) myDF2 <- data.frame() myDF2 <- rbind(myDF2, vec3) myDF2 <- rbind(myDF2, vec3) print(myDF2)