ch_20_solutions

Prerequisites:

library(readxl)
library(tidyverse)
library(writexl)
library(openxlsx)
library(googlesheets4)

Note

File path argument will depend on where the file is saved to your specific system.

20.2.9 Exercises:

To clean the data I used the methods discussed in section 20.2.3:

read_xlsx(
  'Excel Sheets/survey.xlsx', 
  col_types = c('text', 'text'),
  na = c('N/A', '')
) |> 
  mutate(
    n_pets = if_else(n_pets == "two", "2", n_pets),
    n_pets = parse_number(n_pets)
  )
## # A tibble: 6 × 2
##   survey_id n_pets
##   <chr>      <dbl>
## 1 1.0            0
## 2 2.0            1
## 3 3.0           NA
## 4 4.0            2
## 5 5.0            2
## 6 6.0           NA

fill() function is originally discussed in section 18.2.1.

read_xlsx('Excel Sheets/roster.xlsx') |> 
  fill(group, subgroup)
## # A tibble: 12 × 3
##    group subgroup    id
##    <dbl> <chr>    <dbl>
##  1     1 A            1
##  2     1 A            2
##  3     1 A            3
##  4     1 B            4
##  5     1 B            5
##  6     1 B            6
##  7     1 B            7
##  8     2 A            8
##  9     2 A            9
## 10     2 B           10
## 11     2 B           11
## 12     2 B           12

I will admit there is probably a more concise way to do this.

#a.
sales <- read_xlsx('Excel Sheets/sales.xlsx', col_names = c('id', 'n'), skip = 4)
sales
## # A tibble: 9 × 2
##   id      n    
##   <chr>   <chr>
## 1 Brand 1 n    
## 2 1234.0  8.0  
## 3 8721.0  2.0  
## 4 1822.0  3.0  
## 5 Brand 2 n    
## 6 3333.0  1.0  
## 7 2156.0  3.0  
## 8 3987.0  6.0  
## 9 3216.0  5.0

#b.
brand_1_fixed <- sales |> 
  slice_head(n = 4) |> 
  janitor::row_to_names(row_number = 1) |>
  pivot_longer(cols = `Brand 1`, names_to = 'Brand', values_to = 'id')

brand_2_fixed <- sales |>
  slice_tail(n = 5) |>
  janitor::row_to_names(row_number = 1) |>
  pivot_longer(cols = `Brand 2`, names_to = 'Brand', values_to = 'id')

bind_rows(brand_1_fixed, brand_2_fixed) |> 
  mutate(
    id = as.numeric(id),
    n = as.numeric(n)
  )
## # A tibble: 7 × 3
##       n Brand      id
##   <dbl> <chr>   <dbl>
## 1     8 Brand 1  1234
## 2     2 Brand 1  8721
## 3     3 Brand 1  1822
## 4     1 Brand 2  3333
## 5     3 Brand 2  2156
## 6     6 Brand 2  3987
## 7     5 Brand 2  3216

You must supply your own file path to write.xlsx.

bake_sale <- tibble(
  item     = factor(c("brownie", "cupcake", "cookie")),
  quantity = c(10, 5, 8)
)

write.xlsx(bake_sale, "file_path")

I used another janitor function, row_to_names, in my solution for #3 of this section.

students <- read_xlsx('Excel Sheets/students.xlsx')

students |> 
  janitor::clean_names()
## # A tibble: 6 × 5
##   student_id full_name        favourite_food     meal_plan           age  
##        <dbl> <chr>            <chr>              <chr>               <chr>
## 1          1 Sunil Huffmann   Strawberry yoghurt Lunch only          4.0  
## 2          2 Barclay Lynn     French fries       Lunch only          5.0  
## 3          3 Jayendra Lyne    N/A                Breakfast and lunch 7.0  
## 4          4 Leon Rossini     Anchovies          Lunch only          <NA> 
## 5          5 Chidiegwu Dunkel Pizza              Breakfast and lunch five 
## 6          6 Güvenç Attila    Ice cream          Lunch only          6.0

It returns an error that it cannot open the file. XLS is a completely different format at its core so the functions cannot be used interchangeably.

20.3.6 Exercises:

Only different I saw was that the AGE column was interpreted as a list when read from the Google Sheet. This is an easy fix using the col_types argument.

gs4_deauth()

student_excel <- read_xlsx('Excel Sheets/students.xlsx')

students_sheet_id <- '1V1nPp1tzOuutXFLb3G9Eyxi3qxeEhnOXUzL5_BcCQ0w'
student_sheet <- read_sheet(students_sheet_id)
## ✔ Reading from "students".
## ✔ Range 'Sheet1'.
student_excel
## # A tibble: 6 × 5
##   `Student ID` `Full Name`      favourite.food     mealPlan            AGE  
##          <dbl> <chr>            <chr>              <chr>               <chr>
## 1            1 Sunil Huffmann   Strawberry yoghurt Lunch only          4.0  
## 2            2 Barclay Lynn     French fries       Lunch only          5.0  
## 3            3 Jayendra Lyne    N/A                Breakfast and lunch 7.0  
## 4            4 Leon Rossini     Anchovies          Lunch only          <NA> 
## 5            5 Chidiegwu Dunkel Pizza              Breakfast and lunch five 
## 6            6 Güvenç Attila    Ice cream          Lunch only          6.0

student_sheet
## # A tibble: 6 × 5
##   `Student ID` `Full Name`      favourite.food     mealPlan            AGE      
##          <dbl> <chr>            <chr>              <chr>               <list>   
## 1            1 Sunil Huffmann   Strawberry yoghurt Lunch only          <dbl [1]>
## 2            2 Barclay Lynn     French fries       Lunch only          <dbl [1]>
## 3            3 Jayendra Lyne    N/A                Breakfast and lunch <dbl [1]>
## 4            4 Leon Rossini     Anchovies          Lunch only          <NULL>   
## 5            5 Chidiegwu Dunkel Pizza              Breakfast and lunch <chr [1]>
## 6            6 Güvenç Attila    Ice cream          Lunch only          <dbl [1]>

Similar to Exercise #1 from section 20.2.9, I read in as a character so I can change the ‘two’ value to a numeric 2.

survey_sheet_id <- '1yc5gL-a2OOBr8M7B3IsDNX5uR17vBHOyWZq6xSTG2G8'
survey_sheet <- read_sheet(survey_sheet_id, col_types = 'cc', na = c('N/A', ''))
## ✔ Reading from "survey".
## ✔ Range 'Sheet1'.
survey_sheet |> 
  mutate(
    n_pets = if_else(n_pets == "two", "2", n_pets),
    n_pets = parse_number(n_pets)
  )
## # A tibble: 6 × 2
##   survey_id n_pets
##   <chr>      <dbl>
## 1 1              0
## 2 2              1
## 3 3             NA
## 4 4              2
## 5 5              2
## 6 6             NA

I used the fill function like I did in exercise #2 of section 20.2.9.

roster_sheet_id <- '1LgZ0Bkg9d_NK8uTdP2uHXm07kAlwx8-Ictf8NocebIE'
roster_sheet <- read_sheet(roster_sheet_id) |> 
  fill(group, subgroup)
## ✔ Reading from "roster".
## ✔ Range 'Sheet1'.

roster_sheet
## # A tibble: 12 × 3
##    group subgroup    id
##    <dbl> <chr>    <dbl>
##  1     1 A            1
##  2     1 A            2
##  3     1 A            3
##  4     1 B            4
##  5     1 B            5
##  6     1 B            6
##  7     1 B            7
##  8     2 A            8
##  9     2 A            9
## 10     2 B           10
## 11     2 B           11
## 12     2 B           12