Skip to contents
library(appc)
library(dplyr, warn.conflicts = FALSE)

This example details how to use the appc package to add air pollution exposure estimates for exact locations and time periods defined by geocoded coordinates and a “key” date. For this example workflow, we will simulate 20 random locations in Wayne County, Michigan and dates of birth during 2022, but in actuality this can be any set of geocoded lat and lon columns with corresponding dates.

d <-
  tigris::counties("MI", year = 2021, progress_bar = FALSE) |>
  suppressWarnings() |>
  filter(GEOID == 26163) |>
  sf::st_sample(20) |>
  sf::st_coordinates() |>
  tibble::as_tibble() |>
  rename(lat = Y, lon = X) |>
  mutate(dob = sample(seq(as.Date("2023-01-01"), as.Date("2023-12-31"), by = 1), size = 20))
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE

d
#> # A tibble: 20 × 3
#>      lon   lat dob       
#>    <dbl> <dbl> <date>    
#>  1 -83.2  42.1 2023-11-03
#>  2 -83.5  42.2 2023-11-18
#>  3 -83.5  42.4 2023-01-31
#>  4 -83.0  42.4 2023-09-16
#>  5 -83.3  42.3 2023-04-14
#>  6 -83.4  42.2 2023-08-08
#>  7 -83.3  42.4 2023-10-29
#>  8 -83.2  42.3 2023-11-29
#>  9 -83.0  42.4 2023-02-14
#> 10 -83.5  42.1 2023-04-06
#> 11 -83.3  42.2 2023-06-07
#> 12 -83.3  42.3 2023-05-31
#> 13 -83.3  42.2 2023-07-25
#> 14 -83.5  42.3 2023-06-21
#> 15 -83.5  42.3 2023-10-07
#> 16 -83.5  42.4 2023-03-30
#> 17 -83.3  42.3 2023-04-15
#> 18 -82.8  42.4 2023-09-08
#> 19 -83.5  42.4 2023-12-13
#> 20 -82.9  42.4 2023-09-29

For this example, we want to estimate the average fine particulate matter from 90 days prior to birth until the date of birth. We define these dates and create a list-col of dates for each location in our example data:

d <- d |>
  mutate(
    start_date = dob - 90,
    end_date = dob
  ) |>
  rowwise() |>
  mutate(dates = list(seq(start_date, end_date, by = 1))) |>
  ungroup()

d
#> # A tibble: 20 × 6
#>      lon   lat dob        start_date end_date   dates      
#>    <dbl> <dbl> <date>     <date>     <date>     <list>     
#>  1 -83.2  42.1 2023-11-03 2023-08-05 2023-11-03 <date [91]>
#>  2 -83.5  42.2 2023-11-18 2023-08-20 2023-11-18 <date [91]>
#>  3 -83.5  42.4 2023-01-31 2022-11-02 2023-01-31 <date [91]>
#>  4 -83.0  42.4 2023-09-16 2023-06-18 2023-09-16 <date [91]>
#>  5 -83.3  42.3 2023-04-14 2023-01-14 2023-04-14 <date [91]>
#>  6 -83.4  42.2 2023-08-08 2023-05-10 2023-08-08 <date [91]>
#>  7 -83.3  42.4 2023-10-29 2023-07-31 2023-10-29 <date [91]>
#>  8 -83.2  42.3 2023-11-29 2023-08-31 2023-11-29 <date [91]>
#>  9 -83.0  42.4 2023-02-14 2022-11-16 2023-02-14 <date [91]>
#> 10 -83.5  42.1 2023-04-06 2023-01-06 2023-04-06 <date [91]>
#> 11 -83.3  42.2 2023-06-07 2023-03-09 2023-06-07 <date [91]>
#> 12 -83.3  42.3 2023-05-31 2023-03-02 2023-05-31 <date [91]>
#> 13 -83.3  42.2 2023-07-25 2023-04-26 2023-07-25 <date [91]>
#> 14 -83.5  42.3 2023-06-21 2023-03-23 2023-06-21 <date [91]>
#> 15 -83.5  42.3 2023-10-07 2023-07-09 2023-10-07 <date [91]>
#> 16 -83.5  42.4 2023-03-30 2022-12-30 2023-03-30 <date [91]>
#> 17 -83.3  42.3 2023-04-15 2023-01-15 2023-04-15 <date [91]>
#> 18 -82.8  42.4 2023-09-08 2023-06-10 2023-09-08 <date [91]>
#> 19 -83.5  42.4 2023-12-13 2023-09-14 2023-12-13 <date [91]>
#> 20 -82.9  42.4 2023-09-29 2023-07-01 2023-09-29 <date [91]>

Next, we will use the lon and lat columns to create the s2 geohash:

d <- d |> dplyr::mutate(s2 = s2::as_s2_cell(s2::s2_geog_point(lon, lat)))

d
#> # A tibble: 20 × 7
#>      lon   lat dob        start_date end_date   dates       s2              
#>    <dbl> <dbl> <date>     <date>     <date>     <list>      <s2cell>        
#>  1 -83.2  42.1 2023-11-03 2023-08-05 2023-11-03 <date [91]> 883b3c20d498eb6b
#>  2 -83.5  42.2 2023-11-18 2023-08-20 2023-11-18 <date [91]> 883b575421954c91
#>  3 -83.5  42.4 2023-01-31 2022-11-02 2023-01-31 <date [91]> 883b53552921a289
#>  4 -83.0  42.4 2023-09-16 2023-06-18 2023-09-16 <date [91]> 8824d22a9dc1178b
#>  5 -83.3  42.3 2023-04-14 2023-01-14 2023-04-14 <date [91]> 883b4eb74c6b5863
#>  6 -83.4  42.2 2023-08-08 2023-05-10 2023-08-08 <date [91]> 883b5aaedc21a45b
#>  7 -83.3  42.4 2023-10-29 2023-07-31 2023-10-29 <date [91]> 8824b3f189d3fbed
#>  8 -83.2  42.3 2023-11-29 2023-08-31 2023-11-29 <date [91]> 883b36aa7efc8e6f
#>  9 -83.0  42.4 2023-02-14 2022-11-16 2023-02-14 <date [91]> 8824d317f9c8e54b
#> 10 -83.5  42.1 2023-04-06 2023-01-06 2023-04-06 <date [91]> 883b5e80424dee2d
#> 11 -83.3  42.2 2023-06-07 2023-03-09 2023-06-07 <date [91]> 883b49b745981995
#> 12 -83.3  42.3 2023-05-31 2023-03-02 2023-05-31 <date [91]> 883b4b0a27f3a163
#> 13 -83.3  42.2 2023-07-25 2023-04-26 2023-07-25 <date [91]> 883b46273bd938e5
#> 14 -83.5  42.3 2023-06-21 2023-03-23 2023-06-21 <date [91]> 883b5127334b7879
#> 15 -83.5  42.3 2023-10-07 2023-07-09 2023-10-07 <date [91]> 883b5104c3f94e6f
#> 16 -83.5  42.4 2023-03-30 2022-12-30 2023-03-30 <date [91]> 8824acf4e7e28e7b
#> 17 -83.3  42.3 2023-04-15 2023-01-15 2023-04-15 <date [91]> 883b4bdcbfeae21f
#> 18 -82.8  42.4 2023-09-08 2023-06-10 2023-09-08 <date [91]> 88252b9017d1e721
#> 19 -83.5  42.4 2023-12-13 2023-09-14 2023-12-13 <date [91]> 883b532e340688b9
#> 20 -82.9  42.4 2023-09-29 2023-07-01 2023-09-29 <date [91]> 8825297c379a80a5

Directly use the s2 and dates columns to call the predict_pm25() function:

d <- d |> dplyr::mutate(pm25 = predict_pm25(s2, dates))
#>  (down)loading random forest model
#>  (down)loading random forest model [14.3s]
#> 
#>  checking that s2 locations are within the contiguous united states
#>  checking that s2 locations are within the contiguous united states [166ms]
#> 
#>  adding coordinates
#>  adding coordinates [33ms]
#> 
#>  adding elevation
#>  adding elevation [3.8s]
#> 
#>  adding HMS smoke data
#>  adding HMS smoke data [14.7s]
#> 
#>  adding NARR
#>  adding NARR [12.7s]
#> 
#>  adding MERRA
#>  adding MERRA [2s]
#> 
#>  adding time components
#>  adding time components [35ms]
#> 

d
#> # A tibble: 20 × 8
#>      lon   lat dob        start_date end_date   dates       s2          pm25    
#>    <dbl> <dbl> <date>     <date>     <date>     <list>      <s2cell>    <list>  
#>  1 -83.2  42.1 2023-11-03 2023-08-05 2023-11-03 <date [91]> 883b3c20d4… <tibble>
#>  2 -83.5  42.2 2023-11-18 2023-08-20 2023-11-18 <date [91]> 883b575421… <tibble>
#>  3 -83.5  42.4 2023-01-31 2022-11-02 2023-01-31 <date [91]> 883b535529… <tibble>
#>  4 -83.0  42.4 2023-09-16 2023-06-18 2023-09-16 <date [91]> 8824d22a9d… <tibble>
#>  5 -83.3  42.3 2023-04-14 2023-01-14 2023-04-14 <date [91]> 883b4eb74c… <tibble>
#>  6 -83.4  42.2 2023-08-08 2023-05-10 2023-08-08 <date [91]> 883b5aaedc… <tibble>
#>  7 -83.3  42.4 2023-10-29 2023-07-31 2023-10-29 <date [91]> 8824b3f189… <tibble>
#>  8 -83.2  42.3 2023-11-29 2023-08-31 2023-11-29 <date [91]> 883b36aa7e… <tibble>
#>  9 -83.0  42.4 2023-02-14 2022-11-16 2023-02-14 <date [91]> 8824d317f9… <tibble>
#> 10 -83.5  42.1 2023-04-06 2023-01-06 2023-04-06 <date [91]> 883b5e8042… <tibble>
#> 11 -83.3  42.2 2023-06-07 2023-03-09 2023-06-07 <date [91]> 883b49b745… <tibble>
#> 12 -83.3  42.3 2023-05-31 2023-03-02 2023-05-31 <date [91]> 883b4b0a27… <tibble>
#> 13 -83.3  42.2 2023-07-25 2023-04-26 2023-07-25 <date [91]> 883b46273b… <tibble>
#> 14 -83.5  42.3 2023-06-21 2023-03-23 2023-06-21 <date [91]> 883b512733… <tibble>
#> 15 -83.5  42.3 2023-10-07 2023-07-09 2023-10-07 <date [91]> 883b5104c3… <tibble>
#> 16 -83.5  42.4 2023-03-30 2022-12-30 2023-03-30 <date [91]> 8824acf4e7… <tibble>
#> 17 -83.3  42.3 2023-04-15 2023-01-15 2023-04-15 <date [91]> 883b4bdcbf… <tibble>
#> 18 -82.8  42.4 2023-09-08 2023-06-10 2023-09-08 <date [91]> 88252b9017… <tibble>
#> 19 -83.5  42.4 2023-12-13 2023-09-14 2023-12-13 <date [91]> 883b532e34… <tibble>
#> 20 -82.9  42.4 2023-09-29 2023-07-01 2023-09-29 <date [91]> 8825297c37… <tibble>

With daily exposures, we could average fine particulate matter throughout the study period:

d |>
  mutate(mean_pm25 = purrr::map_dbl(pm25, \(.) mean(.$pm25)))
#> # A tibble: 20 × 9
#>      lon   lat dob        start_date end_date   dates  s2     pm25     mean_pm25
#>    <dbl> <dbl> <date>     <date>     <date>     <list> <s2ce> <list>       <dbl>
#>  1 -83.2  42.1 2023-11-03 2023-08-05 2023-11-03 <date> 883b3… <tibble>      8.07
#>  2 -83.5  42.2 2023-11-18 2023-08-20 2023-11-18 <date> 883b5… <tibble>      7.86
#>  3 -83.5  42.4 2023-01-31 2022-11-02 2023-01-31 <date> 883b5… <tibble>      9.92
#>  4 -83.0  42.4 2023-09-16 2023-06-18 2023-09-16 <date> 8824d… <tibble>     12.4 
#>  5 -83.3  42.3 2023-04-14 2023-01-14 2023-04-14 <date> 883b4… <tibble>      7.96
#>  6 -83.4  42.2 2023-08-08 2023-05-10 2023-08-08 <date> 883b5… <tibble>     15.2 
#>  7 -83.3  42.4 2023-10-29 2023-07-31 2023-10-29 <date> 8824b… <tibble>      8.38
#>  8 -83.2  42.3 2023-11-29 2023-08-31 2023-11-29 <date> 883b3… <tibble>      7.69
#>  9 -83.0  42.4 2023-02-14 2022-11-16 2023-02-14 <date> 8824d… <tibble>      9.87
#> 10 -83.5  42.1 2023-04-06 2023-01-06 2023-04-06 <date> 883b5… <tibble>      8.55
#> 11 -83.3  42.2 2023-06-07 2023-03-09 2023-06-07 <date> 883b4… <tibble>      9.28
#> 12 -83.3  42.3 2023-05-31 2023-03-02 2023-05-31 <date> 883b4… <tibble>      7.67
#> 13 -83.3  42.2 2023-07-25 2023-04-26 2023-07-25 <date> 883b4… <tibble>     14.1 
#> 14 -83.5  42.3 2023-06-21 2023-03-23 2023-06-21 <date> 883b5… <tibble>     10.3 
#> 15 -83.5  42.3 2023-10-07 2023-07-09 2023-10-07 <date> 883b5… <tibble>     10.2 
#> 16 -83.5  42.4 2023-03-30 2022-12-30 2023-03-30 <date> 8824a… <tibble>      8.68
#> 17 -83.3  42.3 2023-04-15 2023-01-15 2023-04-15 <date> 883b4… <tibble>      8.10
#> 18 -82.8  42.4 2023-09-08 2023-06-10 2023-09-08 <date> 88252… <tibble>     13.2 
#> 19 -83.5  42.4 2023-12-13 2023-09-14 2023-12-13 <date> 883b5… <tibble>      7.90
#> 20 -82.9  42.4 2023-09-29 2023-07-01 2023-09-29 <date> 88252… <tibble>     10.6