ch_20_solutions

Prerequisites:


library(readxl)
library(tidyverse)
library(writexl)
library(openxlsx)
library(googlesheets4)
Note

File path argument will depend on where the file is saved to your specific system.

20.2.9 Exercises:

  1. To clean the data I used the methods discussed in section 20.2.3:

    read_xlsx(
      'Excel Sheets/survey.xlsx', 
      col_types = c('text', 'text'),
      na = c('N/A', '')
    ) |> 
      mutate(
        n_pets = if_else(n_pets == "two", "2", n_pets),
        n_pets = parse_number(n_pets)
      )
    ## # A tibble: 6 × 2
    ##   survey_id n_pets
    ##   <chr>      <dbl>
    ## 1 1.0            0
    ## 2 2.0            1
    ## 3 3.0           NA
    ## 4 4.0            2
    ## 5 5.0            2
    ## 6 6.0           NA
  2. fill() function is originally discussed in section 18.2.1.

    read_xlsx('Excel Sheets/roster.xlsx') |> 
      fill(group, subgroup)
    ## # A tibble: 12 × 3
    ##    group subgroup    id
    ##    <dbl> <chr>    <dbl>
    ##  1     1 A            1
    ##  2     1 A            2
    ##  3     1 A            3
    ##  4     1 B            4
    ##  5     1 B            5
    ##  6     1 B            6
    ##  7     1 B            7
    ##  8     2 A            8
    ##  9     2 A            9
    ## 10     2 B           10
    ## 11     2 B           11
    ## 12     2 B           12
  3. I will admit there is probably a more concise way to do this.

    #a.
    sales <- read_xlsx('Excel Sheets/sales.xlsx', col_names = c('id', 'n'), skip = 4)
    sales
    ## # A tibble: 9 × 2
    ##   id      n    
    ##   <chr>   <chr>
    ## 1 Brand 1 n    
    ## 2 1234.0  8.0  
    ## 3 8721.0  2.0  
    ## 4 1822.0  3.0  
    ## 5 Brand 2 n    
    ## 6 3333.0  1.0  
    ## 7 2156.0  3.0  
    ## 8 3987.0  6.0  
    ## 9 3216.0  5.0
    
    #b.
    brand_1_fixed <- sales |> 
      slice_head(n = 4) |> 
      janitor::row_to_names(row_number = 1) |>
      pivot_longer(cols = `Brand 1`, names_to = 'Brand', values_to = 'id')
    
    brand_2_fixed <- sales |>
      slice_tail(n = 5) |>
      janitor::row_to_names(row_number = 1) |>
      pivot_longer(cols = `Brand 2`, names_to = 'Brand', values_to = 'id')
    
    bind_rows(brand_1_fixed, brand_2_fixed) |> 
      mutate(
        id = as.numeric(id),
        n = as.numeric(n)
      )
    ## # A tibble: 7 × 3
    ##       n Brand      id
    ##   <dbl> <chr>   <dbl>
    ## 1     8 Brand 1  1234
    ## 2     2 Brand 1  8721
    ## 3     3 Brand 1  1822
    ## 4     1 Brand 2  3333
    ## 5     3 Brand 2  2156
    ## 6     6 Brand 2  3987
    ## 7     5 Brand 2  3216
  4. You must supply your own file path to write.xlsx.

    bake_sale <- tibble(
      item     = factor(c("brownie", "cupcake", "cookie")),
      quantity = c(10, 5, 8)
    )
    
    write.xlsx(bake_sale, "file_path")
  5. I used another janitor function, row_to_names, in my solution for #3 of this section.

    students <- read_xlsx('Excel Sheets/students.xlsx')
    
    students |> 
      janitor::clean_names()
    ## # A tibble: 6 × 5
    ##   student_id full_name        favourite_food     meal_plan           age  
    ##        <dbl> <chr>            <chr>              <chr>               <chr>
    ## 1          1 Sunil Huffmann   Strawberry yoghurt Lunch only          4.0  
    ## 2          2 Barclay Lynn     French fries       Lunch only          5.0  
    ## 3          3 Jayendra Lyne    N/A                Breakfast and lunch 7.0  
    ## 4          4 Leon Rossini     Anchovies          Lunch only          <NA> 
    ## 5          5 Chidiegwu Dunkel Pizza              Breakfast and lunch five 
    ## 6          6 Güvenç Attila    Ice cream          Lunch only          6.0
  6. It returns an error that it cannot open the file. XLS is a completely different format at its core so the functions cannot be used interchangeably.

20.3.6 Exercises:

  1. Only different I saw was that the AGE column was interpreted as a list when read from the Google Sheet. This is an easy fix using the col_types argument.

    gs4_deauth()
    student_excel <- read_xlsx('Excel Sheets/students.xlsx')
    
    students_sheet_id <- '1V1nPp1tzOuutXFLb3G9Eyxi3qxeEhnOXUzL5_BcCQ0w'
    student_sheet <- read_sheet(students_sheet_id)
    ## ✔ Reading from "students".
    ## ✔ Range 'Sheet1'.
    student_excel
    ## # A tibble: 6 × 5
    ##   `Student ID` `Full Name`      favourite.food     mealPlan            AGE  
    ##          <dbl> <chr>            <chr>              <chr>               <chr>
    ## 1            1 Sunil Huffmann   Strawberry yoghurt Lunch only          4.0  
    ## 2            2 Barclay Lynn     French fries       Lunch only          5.0  
    ## 3            3 Jayendra Lyne    N/A                Breakfast and lunch 7.0  
    ## 4            4 Leon Rossini     Anchovies          Lunch only          <NA> 
    ## 5            5 Chidiegwu Dunkel Pizza              Breakfast and lunch five 
    ## 6            6 Güvenç Attila    Ice cream          Lunch only          6.0
    
    student_sheet
    ## # A tibble: 6 × 5
    ##   `Student ID` `Full Name`      favourite.food     mealPlan            AGE      
    ##          <dbl> <chr>            <chr>              <chr>               <list>   
    ## 1            1 Sunil Huffmann   Strawberry yoghurt Lunch only          <dbl [1]>
    ## 2            2 Barclay Lynn     French fries       Lunch only          <dbl [1]>
    ## 3            3 Jayendra Lyne    N/A                Breakfast and lunch <dbl [1]>
    ## 4            4 Leon Rossini     Anchovies          Lunch only          <NULL>   
    ## 5            5 Chidiegwu Dunkel Pizza              Breakfast and lunch <chr [1]>
    ## 6            6 Güvenç Attila    Ice cream          Lunch only          <dbl [1]>
  2. Similar to Exercise #1 from section 20.2.9, I read in as a character so I can change the ‘two’ value to a numeric 2.

    survey_sheet_id <- '1yc5gL-a2OOBr8M7B3IsDNX5uR17vBHOyWZq6xSTG2G8'
    survey_sheet <- read_sheet(survey_sheet_id, col_types = 'cc', na = c('N/A', ''))
    ## ✔ Reading from "survey".
    ## ✔ Range 'Sheet1'.
    survey_sheet |> 
      mutate(
        n_pets = if_else(n_pets == "two", "2", n_pets),
        n_pets = parse_number(n_pets)
      )
    ## # A tibble: 6 × 2
    ##   survey_id n_pets
    ##   <chr>      <dbl>
    ## 1 1              0
    ## 2 2              1
    ## 3 3             NA
    ## 4 4              2
    ## 5 5              2
    ## 6 6             NA
  3. I used the fill function like I did in exercise #2 of section 20.2.9.

    roster_sheet_id <- '1LgZ0Bkg9d_NK8uTdP2uHXm07kAlwx8-Ictf8NocebIE'
    roster_sheet <- read_sheet(roster_sheet_id) |> 
      fill(group, subgroup)
    ## ✔ Reading from "roster".
    ## ✔ Range 'Sheet1'.
    
    roster_sheet
    ## # A tibble: 12 × 3
    ##    group subgroup    id
    ##    <dbl> <chr>    <dbl>
    ##  1     1 A            1
    ##  2     1 A            2
    ##  3     1 A            3
    ##  4     1 B            4
    ##  5     1 B            5
    ##  6     1 B            6
    ##  7     1 B            7
    ##  8     2 A            8
    ##  9     2 A            9
    ## 10     2 B           10
    ## 11     2 B           11
    ## 12     2 B           12