# # Unit 8 - Data frames # # Scanning a data frame # # Loading data # Notice this .csv includes NAs setwd("c:/R") employees <- read.csv("employeesNA.csv") print(employees) # Problem 1 # Set all salaries as 20000. employees$SALARY <- 20000 print(employees[employees$GENDER == 2,]) print(employees) # Problem 2: # Produce a vector including all # names of types of columns in # the data frame employees. print(class(employees$ID)) print(class(employees$FNAME)) # Scanning columns directly # What about this? for (col in employees) { print(col) } # Solution to Problem 1 types <- c() for (col in employees) { types <- c(types, class(col)) } print(types) # Problem 3: # Produce a vector including all indices of # columns of type integer. indices <- c() for (col in employees) { if (class(col) == "integer") indices <- c(indices, ??) } } print(types) # Scanning column indices # Reminder: data frame's # dimensions and indices print(employees) print(ncol(employees)) # What does this code produce? inds <- 1:ncol(employees) print(inds) # What does this code print? for (ind in 1:ncol(employees)) { print(ind) } # and this? for (ind in 1:ncol(employees)) { print(employees[,ind]) } # and this? for (ind in 1:ncol(employees)) { print(employees[,ind]) } # Solution to Problem 3 indices <- c() for (ind in 1:ncol(employees)) { if (class(employees[,ind]) == "integer") { indices <- c(indices, ind) } } print(indices) print(employees) print(class(employees$SALARY)) # Problem 4 # Produce a vector including # all last names of persons # about whom at least one detail # is missing. employees <- read.csv("employeesNA.csv") print(employees) # Scanning row indices # What does this code produce? inds <- 1:nrow(employees) print(inds) # And this? for (ind in 1:nrow(employees)) { print(ind) } # What does this code print? for (ind in 1:nrow(employees)) { print(employees[ind,]) } # And this? for (i in 1:nrow(employees)) { print(employees[i,]) } # Solution to problem 4. indices <- c() for (ind in 1:nrow(employees)) { df <- employees[ind,] found <- FALSE for (col in df) { if (is.na(col[1])) { found <- TRUE } } if (found) { indices <- c(indices, ind) } } print(indices) print(employees)