# # Unit 8 - Data frames # # Dimensions and "Slicing" # # Loading data setwd("c:/R") employees <- read.csv("employees.csv") print(employees) # Dimensions print(dim(employees)) empDim <- dim(employees) print(empDim[2]) print(nrow(employees)) print(ncol(employees)) # works, yet intended for vectors print(length(employees)) # "Slicing" # Our goal - "slicing" various # shapes of slices # Use this as a reference. # Slicing # Not all possible patterns. # (1) Slicing a cell # Back to the [ ] operator # index, index # to the right of the comma - row index # to its left - column index cell <- employees[10, 3] print(cell) print(class(cell)) cell <- employees[9, 4] print(cell) print(class(cell)) # index, name # to the right of the comma - row index (label) # to its left - column name print(employees) cell <- employees[10, "SALARY"] print(cell) # (2) Slicing a column # Several methods. # The $ operator salary <- employees$SALARY print(salary) print(class(salary)) # Also possible salary <- employees[c("SALARY")] print(salary) # Notice this: print(class(salary)) # If you would like to column as # a vector salary <- employees[,c("SALARY")] print(salary) # Notice this: print(class(salary)) # Note that these are also impossible # Yet for the sake of consistency we # will keep using the c() salary <- employees["SALARY"] print(salary) salary <- employees[,"SALARY"] print(salary) # And yet we can also use an index # Yet this is not always practical. salary <- employees[c(6)] print(salary) salary <- employees[,c(6)] print(salary) # What does this code print? print(employees[6]) # (3) Slicing consecutive columns partialDF <- employees[,2:4] print(partialDF) # (4) Slicing non-consecutive columns partialDF <- employees[,c(1,4)] print(partialDF) partialDF <- employees[,c(1:2,4)] print(partialDF) partialDF <- employees[,c("ID","SALARY")] print(partialDF) partialDF <- employees[,c("SALARY", "ID")] print(partialDF) # with dplyr's select () library(dplyr) partialDF <- select(.data = employees, SALARY, ID) print(partialDF) partialDF <- select(.data = employees, "ID", "SALARY") print(partialDF) partialDF <- select(.data = employees, SALARY, ID) print(partialDF) partialDF <- select(.data = employees, GENDER:SALARY) print(partialDF) partialDF <- select(.data = employees, ID, EDUC:SALARY) print(partialDF) # (5) Slicing a row oneRow <- employees[2,] print(oneRow) print(class(oneRow)) # (6) Slicing consecutive rows # Here: only using indices. partialDF <- employees[3:8,] print(partialDF) print(class(partialDF)) # from the head downwards partialDF <- head(employees, n = 3) print(partialDF) # from the tail upwards partialDF <- tail(employees, n = 4) print(partialDF) partialDF <- tail(employees, n = 15) print(partialDF) partialDF <- head(employees, n = 15) print(partialDF) # (6) Slicing nonconsecutive rows # Note the indices row. partialDF <- employees[c(1:5, 8),] print(partialDF) # Forthcoming - # Slicing nonconsecutive rows # according to a logical condition # namely: filtering.