Case-Crossover Example
Source:vignettes/articles/case-crossover-example.Rmd
case-crossover-example.Rmd
This example details how to use the appc package to add air pollution
exposure estimates for exact locations and dates defined by geocoded
coordinates and a case date. For this example workflow, we will simulate
20 random locations in Wayne County, Michigan and case dates in 2022,
but in actuality this can be any set of geocoded lat
and
lon
columns with corresponding dates.
d <-
tigris::counties("MI", year = 2021, progress_bar = FALSE) |>
suppressWarnings() |>
filter(GEOID == 26163) |>
sf::st_sample(20) |>
sf::st_coordinates() |>
tibble::as_tibble() |>
rename(lat = Y, lon = X) |>
mutate(case_date = sample(seq(as.Date("2022-01-01"), as.Date("2022-12-31"), by = 1), size = 20)) |>
mutate(id = 1:20) |>
relocate(id)
d
#> # A tibble: 20 × 4
#> id lon lat case_date
#> <int> <dbl> <dbl> <date>
#> 1 1 -83.5 42.3 2022-10-31
#> 2 2 -82.9 42.4 2022-11-27
#> 3 3 -83.4 42.1 2022-10-18
#> 4 4 -83.2 42.2 2022-05-10
#> 5 5 -83.2 42.3 2022-12-09
#> 6 6 -83.3 42.3 2022-05-11
#> 7 7 -82.9 42.4 2022-10-21
#> 8 8 -83.3 42.2 2022-08-22
#> 9 9 -83.2 42.2 2022-03-06
#> 10 10 -83.4 42.2 2022-08-19
#> 11 11 -83.2 42.1 2022-05-30
#> 12 12 -83.5 42.3 2022-05-27
#> 13 13 -83.2 42.2 2022-09-15
#> 14 14 -83.3 42.1 2022-11-16
#> 15 15 -83.4 42.2 2022-02-09
#> 16 16 -83.5 42.3 2022-07-12
#> 17 17 -83.5 42.4 2022-06-05
#> 18 18 -83.4 42.3 2022-01-12
#> 19 19 -83.3 42.3 2022-10-24
#> 20 20 -83.2 42.2 2022-02-11
For this example, we want to estimate the fine particulate matter on
the days of the case, as well as control dates. Here, we define these
control dates using time stratification on year, month, and day-of-week,
and use the purrr
package to create a list-col of dates for
each location in our example data:
dates_seq <- seq(min(d$case_date) - 31, max(d$case_date) + 31, by = 1)
make_control_dates <- function(case_date) {
dates_seq[lubridate::year(dates_seq) == lubridate::year(case_date) &
lubridate::month(dates_seq) == lubridate::month(case_date) &
lubridate::wday(dates_seq) == lubridate::wday(case_date) &
dates_seq != case_date]
}
d <- d |>
rowwise() |>
mutate(dates = purrr::map(case_date, make_control_dates)) |>
ungroup()
d
#> # A tibble: 20 × 5
#> id lon lat case_date dates
#> <int> <dbl> <dbl> <date> <list>
#> 1 1 -83.5 42.3 2022-10-31 <date [4]>
#> 2 2 -82.9 42.4 2022-11-27 <date [3]>
#> 3 3 -83.4 42.1 2022-10-18 <date [3]>
#> 4 4 -83.2 42.2 2022-05-10 <date [4]>
#> 5 5 -83.2 42.3 2022-12-09 <date [4]>
#> 6 6 -83.3 42.3 2022-05-11 <date [3]>
#> 7 7 -82.9 42.4 2022-10-21 <date [3]>
#> 8 8 -83.3 42.2 2022-08-22 <date [4]>
#> 9 9 -83.2 42.2 2022-03-06 <date [3]>
#> 10 10 -83.4 42.2 2022-08-19 <date [3]>
#> 11 11 -83.2 42.1 2022-05-30 <date [4]>
#> 12 12 -83.5 42.3 2022-05-27 <date [3]>
#> 13 13 -83.2 42.2 2022-09-15 <date [4]>
#> 14 14 -83.3 42.1 2022-11-16 <date [4]>
#> 15 15 -83.4 42.2 2022-02-09 <date [3]>
#> 16 16 -83.5 42.3 2022-07-12 <date [3]>
#> 17 17 -83.5 42.4 2022-06-05 <date [3]>
#> 18 18 -83.4 42.3 2022-01-12 <date [3]>
#> 19 19 -83.3 42.3 2022-10-24 <date [4]>
#> 20 20 -83.2 42.2 2022-02-11 <date [3]>
Next, we will use the lon
and lat
columns
to create the s2 geohash:
d <- d |> dplyr::mutate(s2 = s2::as_s2_cell(s2::s2_geog_point(lon, lat)))
d
#> # A tibble: 20 × 6
#> id lon lat case_date dates s2
#> <int> <dbl> <dbl> <date> <list> <s2cell>
#> 1 1 -83.5 42.3 2022-10-31 <date [4]> 883b539fde6f468f
#> 2 2 -82.9 42.4 2022-11-27 <date [3]> 882529be4afb184b
#> 3 3 -83.4 42.1 2022-10-18 <date [3]> 883b5cadcae041d3
#> 4 4 -83.2 42.2 2022-05-10 <date [4]> 883b36dae69ea671
#> 5 5 -83.2 42.3 2022-12-09 <date [4]> 883b339ae99f458f
#> 6 6 -83.3 42.3 2022-05-11 <date [3]> 883b4b9750a4f19d
#> 7 7 -82.9 42.4 2022-10-21 <date [3]> 8824d53e3f2addd3
#> 8 8 -83.3 42.2 2022-08-22 <date [4]> 883b462c96efefa9
#> 9 9 -83.2 42.2 2022-03-06 <date [3]> 883b37c7f61125bf
#> 10 10 -83.4 42.2 2022-08-19 <date [3]> 883b44e161d94b5d
#> 11 11 -83.2 42.1 2022-05-30 <date [4]> 883b3f38bc72bc79
#> 12 12 -83.5 42.3 2022-05-27 <date [3]> 883b5129692a69f7
#> 13 13 -83.2 42.2 2022-09-15 <date [4]> 883b362d7f446e2d
#> 14 14 -83.3 42.1 2022-11-16 <date [4]> 883b41493a95f44f
#> 15 15 -83.4 42.2 2022-02-09 <date [3]> 883b451132452249
#> 16 16 -83.5 42.3 2022-07-12 <date [3]> 883b56ca718e3a5b
#> 17 17 -83.5 42.4 2022-06-05 <date [3]> 8824abeaf2f4640b
#> 18 18 -83.4 42.3 2022-01-12 <date [3]> 883b4ef3cd3ded53
#> 19 19 -83.3 42.3 2022-10-24 <date [4]> 883b496b4a4bf401
#> 20 20 -83.2 42.2 2022-02-11 <date [3]> 883b3733a728db21
Then we can directly use the s2
and dates
columns to add temperature and humidity using the
get_narr_data()
function and PM2.5 using the
predict_pm25()
function:
d <- d |> dplyr::mutate(temperature = get_narr_data(s2, dates, "air.2m"),
humidity = get_narr_data(s2, dates, "rhum.2m"))
d <- d |> dplyr::mutate(pm25 = predict_pm25(s2, dates))
#> ℹ (down)loading random forest model
#> ✔ (down)loading random forest model [16.3s]
#>
#> ℹ checking that s2 are within the contiguous US
#> ✔ checking that s2 are within the contiguous US [64ms]
#>
#> ℹ adding coordinates
#> ✔ adding coordinates [37ms]
#>
#> ℹ adding elevation
#> ✔ adding elevation [401ms]
#>
#> ℹ adding HMS smoke data
#> ✔ adding HMS smoke data [1.3s]
#>
#> ℹ adding NARR
#> ✔ adding NARR [456ms]
#>
#> ℹ adding gridMET
#> ✔ adding gridMET [428ms]
#>
#> ℹ adding MERRA
#> ✔ adding MERRA [9.5s]
#>
#> ℹ adding time components
#> ✔ adding time components [39ms]
#>
d
#> # A tibble: 20 × 9
#> id lon lat case_date dates s2 temperature humidity pm25
#> <int> <dbl> <dbl> <date> <list> <s2cel> <named lis> <named > <list>
#> 1 1 -83.5 42.3 2022-10-31 <date [4]> 883b53… <dbl [4]> <dbl> <tibble>
#> 2 2 -82.9 42.4 2022-11-27 <date [3]> 882529… <dbl [3]> <dbl> <tibble>
#> 3 3 -83.4 42.1 2022-10-18 <date [3]> 883b5c… <dbl [3]> <dbl> <tibble>
#> 4 4 -83.2 42.2 2022-05-10 <date [4]> 883b36… <dbl [4]> <dbl> <tibble>
#> 5 5 -83.2 42.3 2022-12-09 <date [4]> 883b33… <dbl [4]> <dbl> <tibble>
#> 6 6 -83.3 42.3 2022-05-11 <date [3]> 883b4b… <dbl [3]> <dbl> <tibble>
#> 7 7 -82.9 42.4 2022-10-21 <date [3]> 8824d5… <dbl [3]> <dbl> <tibble>
#> 8 8 -83.3 42.2 2022-08-22 <date [4]> 883b46… <dbl [4]> <dbl> <tibble>
#> 9 9 -83.2 42.2 2022-03-06 <date [3]> 883b37… <dbl [3]> <dbl> <tibble>
#> 10 10 -83.4 42.2 2022-08-19 <date [3]> 883b44… <dbl [3]> <dbl> <tibble>
#> 11 11 -83.2 42.1 2022-05-30 <date [4]> 883b3f… <dbl [4]> <dbl> <tibble>
#> 12 12 -83.5 42.3 2022-05-27 <date [3]> 883b51… <dbl [3]> <dbl> <tibble>
#> 13 13 -83.2 42.2 2022-09-15 <date [4]> 883b36… <dbl [4]> <dbl> <tibble>
#> 14 14 -83.3 42.1 2022-11-16 <date [4]> 883b41… <dbl [4]> <dbl> <tibble>
#> 15 15 -83.4 42.2 2022-02-09 <date [3]> 883b45… <dbl [3]> <dbl> <tibble>
#> 16 16 -83.5 42.3 2022-07-12 <date [3]> 883b56… <dbl [3]> <dbl> <tibble>
#> 17 17 -83.5 42.4 2022-06-05 <date [3]> 8824ab… <dbl [3]> <dbl> <tibble>
#> 18 18 -83.4 42.3 2022-01-12 <date [3]> 883b4e… <dbl [3]> <dbl> <tibble>
#> 19 19 -83.3 42.3 2022-10-24 <date [4]> 883b49… <dbl [4]> <dbl> <tibble>
#> 20 20 -83.2 42.2 2022-02-11 <date [3]> 883b37… <dbl [3]> <dbl> <tibble>