Case-Crossover Example • appc

library(appc)
library(dplyr, warn.conflicts = FALSE)

This example details how to use the appc package to add air pollution exposure estimates for exact locations and dates defined by geocoded coordinates and a case date. For this example workflow, we will simulate 20 random locations in Wayne County, Michigan and case dates in 2022, but in actuality this can be any set of geocoded lat and lon columns with corresponding dates.

d <-
  tigris::counties("MI", year = 2021, progress_bar = FALSE) |>
  suppressWarnings() |>
  filter(GEOID == 26163) |>
  sf::st_sample(20) |>
  sf::st_coordinates() |>
  tibble::as_tibble() |>
  rename(lat = Y, lon = X) |>
  mutate(case_date = sample(seq(as.Date("2022-01-01"), as.Date("2022-12-31"), by = 1), size = 20)) |>
  mutate(id = 1:20) |>
  relocate(id)

d
#> # A tibble: 20 × 4
#>       id   lon   lat case_date 
#>    <int> <dbl> <dbl> <date>    
#>  1     1 -83.5  42.3 2022-10-31
#>  2     2 -82.9  42.4 2022-11-27
#>  3     3 -83.4  42.1 2022-10-18
#>  4     4 -83.2  42.2 2022-05-10
#>  5     5 -83.2  42.3 2022-12-09
#>  6     6 -83.3  42.3 2022-05-11
#>  7     7 -82.9  42.4 2022-10-21
#>  8     8 -83.3  42.2 2022-08-22
#>  9     9 -83.2  42.2 2022-03-06
#> 10    10 -83.4  42.2 2022-08-19
#> 11    11 -83.2  42.1 2022-05-30
#> 12    12 -83.5  42.3 2022-05-27
#> 13    13 -83.2  42.2 2022-09-15
#> 14    14 -83.3  42.1 2022-11-16
#> 15    15 -83.4  42.2 2022-02-09
#> 16    16 -83.5  42.3 2022-07-12
#> 17    17 -83.5  42.4 2022-06-05
#> 18    18 -83.4  42.3 2022-01-12
#> 19    19 -83.3  42.3 2022-10-24
#> 20    20 -83.2  42.2 2022-02-11

For this example, we want to estimate the fine particulate matter on the days of the case, as well as control dates. Here, we define these control dates using time stratification on year, month, and day-of-week, and use the purrr package to create a list-col of dates for each location in our example data:

dates_seq <- seq(min(d$case_date) - 31, max(d$case_date) + 31, by = 1)

make_control_dates <- function(case_date) {
  dates_seq[lubridate::year(dates_seq) == lubridate::year(case_date) & 
              lubridate::month(dates_seq) == lubridate::month(case_date) & 
              lubridate::wday(dates_seq) == lubridate::wday(case_date) &
              dates_seq != case_date]
}

d <- d |>
  rowwise() |>
  mutate(dates = purrr::map(case_date, make_control_dates)) |>
  ungroup()

d
#> # A tibble: 20 × 5
#>       id   lon   lat case_date  dates     
#>    <int> <dbl> <dbl> <date>     <list>    
#>  1     1 -83.5  42.3 2022-10-31 <date [4]>
#>  2     2 -82.9  42.4 2022-11-27 <date [3]>
#>  3     3 -83.4  42.1 2022-10-18 <date [3]>
#>  4     4 -83.2  42.2 2022-05-10 <date [4]>
#>  5     5 -83.2  42.3 2022-12-09 <date [4]>
#>  6     6 -83.3  42.3 2022-05-11 <date [3]>
#>  7     7 -82.9  42.4 2022-10-21 <date [3]>
#>  8     8 -83.3  42.2 2022-08-22 <date [4]>
#>  9     9 -83.2  42.2 2022-03-06 <date [3]>
#> 10    10 -83.4  42.2 2022-08-19 <date [3]>
#> 11    11 -83.2  42.1 2022-05-30 <date [4]>
#> 12    12 -83.5  42.3 2022-05-27 <date [3]>
#> 13    13 -83.2  42.2 2022-09-15 <date [4]>
#> 14    14 -83.3  42.1 2022-11-16 <date [4]>
#> 15    15 -83.4  42.2 2022-02-09 <date [3]>
#> 16    16 -83.5  42.3 2022-07-12 <date [3]>
#> 17    17 -83.5  42.4 2022-06-05 <date [3]>
#> 18    18 -83.4  42.3 2022-01-12 <date [3]>
#> 19    19 -83.3  42.3 2022-10-24 <date [4]>
#> 20    20 -83.2  42.2 2022-02-11 <date [3]>

Next, we will use the lon and lat columns to create the s2 geohash:

d <- d |> dplyr::mutate(s2 = s2::as_s2_cell(s2::s2_geog_point(lon, lat)))

d
#> # A tibble: 20 × 6
#>       id   lon   lat case_date  dates      s2              
#>    <int> <dbl> <dbl> <date>     <list>     <s2cell>        
#>  1     1 -83.5  42.3 2022-10-31 <date [4]> 883b539fde6f468f
#>  2     2 -82.9  42.4 2022-11-27 <date [3]> 882529be4afb184b
#>  3     3 -83.4  42.1 2022-10-18 <date [3]> 883b5cadcae041d3
#>  4     4 -83.2  42.2 2022-05-10 <date [4]> 883b36dae69ea671
#>  5     5 -83.2  42.3 2022-12-09 <date [4]> 883b339ae99f458f
#>  6     6 -83.3  42.3 2022-05-11 <date [3]> 883b4b9750a4f19d
#>  7     7 -82.9  42.4 2022-10-21 <date [3]> 8824d53e3f2addd3
#>  8     8 -83.3  42.2 2022-08-22 <date [4]> 883b462c96efefa9
#>  9     9 -83.2  42.2 2022-03-06 <date [3]> 883b37c7f61125bf
#> 10    10 -83.4  42.2 2022-08-19 <date [3]> 883b44e161d94b5d
#> 11    11 -83.2  42.1 2022-05-30 <date [4]> 883b3f38bc72bc79
#> 12    12 -83.5  42.3 2022-05-27 <date [3]> 883b5129692a69f7
#> 13    13 -83.2  42.2 2022-09-15 <date [4]> 883b362d7f446e2d
#> 14    14 -83.3  42.1 2022-11-16 <date [4]> 883b41493a95f44f
#> 15    15 -83.4  42.2 2022-02-09 <date [3]> 883b451132452249
#> 16    16 -83.5  42.3 2022-07-12 <date [3]> 883b56ca718e3a5b
#> 17    17 -83.5  42.4 2022-06-05 <date [3]> 8824abeaf2f4640b
#> 18    18 -83.4  42.3 2022-01-12 <date [3]> 883b4ef3cd3ded53
#> 19    19 -83.3  42.3 2022-10-24 <date [4]> 883b496b4a4bf401
#> 20    20 -83.2  42.2 2022-02-11 <date [3]> 883b3733a728db21

Then we can directly use the s2 and dates columns to add temperature and humidity using the get_narr_data() function and PM2.5 using the predict_pm25() function:

d <- d |> dplyr::mutate(temperature = get_narr_data(s2, dates, "air.2m"), 
                        humidity = get_narr_data(s2, dates, "rhum.2m"))

d <- d |> dplyr::mutate(pm25 = predict_pm25(s2, dates))
#> ℹ (down)loading random forest model
#> ✔ (down)loading random forest model [18.7s]
#> 
#> ℹ checking that s2 are within the contiguous US
#> ✔ checking that s2 are within the contiguous US [72ms]
#> 
#> ℹ adding coordinates
#> ✔ adding coordinates [39ms]
#> 
#> ℹ adding elevation
#> ✔ adding elevation [407ms]
#> 
#> ℹ adding HMS smoke data
#> ✔ adding HMS smoke data [1.1s]
#> 
#> ℹ adding NARR
#> ✔ adding NARR [452ms]
#> 
#> ℹ adding gridMET
#> ✔ adding gridMET [395ms]
#> 
#> ℹ adding MERRA
#> ✔ adding MERRA [1.1s]
#> 
#> ℹ adding time components
#> ✔ adding time components [28ms]
#> 

d
#> # A tibble: 20 × 9
#>       id   lon   lat case_date  dates      s2      temperature humidity pm25    
#>    <int> <dbl> <dbl> <date>     <list>     <s2cel> <named lis> <named > <list>  
#>  1     1 -83.5  42.3 2022-10-31 <date [4]> 883b53… <dbl [4]>   <dbl>    <tibble>
#>  2     2 -82.9  42.4 2022-11-27 <date [3]> 882529… <dbl [3]>   <dbl>    <tibble>
#>  3     3 -83.4  42.1 2022-10-18 <date [3]> 883b5c… <dbl [3]>   <dbl>    <tibble>
#>  4     4 -83.2  42.2 2022-05-10 <date [4]> 883b36… <dbl [4]>   <dbl>    <tibble>
#>  5     5 -83.2  42.3 2022-12-09 <date [4]> 883b33… <dbl [4]>   <dbl>    <tibble>
#>  6     6 -83.3  42.3 2022-05-11 <date [3]> 883b4b… <dbl [3]>   <dbl>    <tibble>
#>  7     7 -82.9  42.4 2022-10-21 <date [3]> 8824d5… <dbl [3]>   <dbl>    <tibble>
#>  8     8 -83.3  42.2 2022-08-22 <date [4]> 883b46… <dbl [4]>   <dbl>    <tibble>
#>  9     9 -83.2  42.2 2022-03-06 <date [3]> 883b37… <dbl [3]>   <dbl>    <tibble>
#> 10    10 -83.4  42.2 2022-08-19 <date [3]> 883b44… <dbl [3]>   <dbl>    <tibble>
#> 11    11 -83.2  42.1 2022-05-30 <date [4]> 883b3f… <dbl [4]>   <dbl>    <tibble>
#> 12    12 -83.5  42.3 2022-05-27 <date [3]> 883b51… <dbl [3]>   <dbl>    <tibble>
#> 13    13 -83.2  42.2 2022-09-15 <date [4]> 883b36… <dbl [4]>   <dbl>    <tibble>
#> 14    14 -83.3  42.1 2022-11-16 <date [4]> 883b41… <dbl [4]>   <dbl>    <tibble>
#> 15    15 -83.4  42.2 2022-02-09 <date [3]> 883b45… <dbl [3]>   <dbl>    <tibble>
#> 16    16 -83.5  42.3 2022-07-12 <date [3]> 883b56… <dbl [3]>   <dbl>    <tibble>
#> 17    17 -83.5  42.4 2022-06-05 <date [3]> 8824ab… <dbl [3]>   <dbl>    <tibble>
#> 18    18 -83.4  42.3 2022-01-12 <date [3]> 883b4e… <dbl [3]>   <dbl>    <tibble>
#> 19    19 -83.3  42.3 2022-10-24 <date [4]> 883b49… <dbl [4]>   <dbl>    <tibble>
#> 20    20 -83.2  42.2 2022-02-11 <date [3]> 883b37… <dbl [3]>   <dbl>    <tibble>