Skip to contents

With rsinaica, we can plot long-term pollution trends for different cities. First, we load the required packages.

## Auto-install required R packages
packs <- c("dplyr", "ggplot2", "lubridate", "scales", "rsinaica")
load_pkg <- function(pkg) {
  suppressWarnings(require(pkg, character.only = TRUE))
}
success <- vapply(packs, load_pkg, logical(1))
missing <- packs[!success]
if (length(missing)) {
  install.packages(missing)
  vapply(missing, load_pkg, logical(1))
}

We create a function to download data for a single pollutant from all stations within a network.

get_network_data <- function(network_name, pollutant, remove_extremes = TRUE) {
  ## Download data for all Aguascalientes stations for a single month
  get_month <- function(start_date, end_date, net){
    bind_rows(
      lapply(
        stations_sinaica$station_id[stations_sinaica$network_name %in% net],
        sinaica_station_data, pollutant, start_date, end_date, "Crude",
        remove_extremes = remove_extremes)
    )
  }
  ## Download data for 2017, by month
  df <- bind_rows(
    mapply(get_month,
           seq(as.Date("1997-01-01"), as.Date("2017-12-01"), by = "year"),
           seq(as.Date("1998-01-01"), as.Date("2018-01-01"), by = "year") - 1,
           network_name, 
           SIMPLIFY = FALSE)
  )
  
  df$datetime <-  with_tz(as.POSIXct(paste0(df$date, " ", df$hour, ":00"), 
                                     tz = "Etc/GMT+6"),
                          tz = "America/Mexico_City")
  df
}

Now we are ready to plot data for different cities. When requesting ozone values with the sinaica_station_data function, there is an option to automatically clean the data and set values above 0.2 ppm to NA. Since Guadalajara tends to have high ozone levels, it is advisable to disable this functionality.

get_network_data("Guadalajara", "O3", remove_extremes = FALSE) %>%
  ## remove ozone values above 0.3 as Guadalajara often reports elevated levels
  filter(value < .3 & value >= 0) %>%
  ggplot(aes(as.Date(date), value)) +
  geom_point(alpha = .01, size = .3) +
  geom_smooth(method = 'gam', formula = y ~ s(x)) + 
  ggtitle("Daily ozone values reported by all Guadalajara stations") +
  xlab("date") +
  ylab("ppm") +
  coord_cartesian(ylim = c(0, 0.3)) +
  theme_bw()

get_network_data("Toluca", "PM2.5", remove_extremes = FALSE) %>%
  filter(value <= 750) %>%
  ggplot(aes(as.Date(date), value)) +
  geom_point(alpha = .01, size = .3) +
  geom_smooth(method = 'gam', formula = y ~ s(x)) + 
  ggtitle(expression(paste("Daily ", PM[2.5], 
                           " values reported by all Toluca stations"))) +
  xlab("date") +
  ylab(expression(paste(mu,"g/", m^3))) +
  coord_cartesian(ylim = c(0, 210)) +
  theme_bw()

get_network_data("Pachuca", "PM2.5", remove_extremes = FALSE) %>%
  filter(value <= 600) %>%
  ggplot(aes(as.Date(date), value)) +
  geom_point(alpha = .01, size = .3) +
  geom_smooth(method = 'gam', formula = y ~ s(x)) + 
  ggtitle(expression(paste("Daily ", PM[2.5], 
                           " values reported by all Pachuca stations"))) +
  xlab("date") +
  ylab(expression(paste(mu,"g/", m^3))) +
  coord_cartesian(ylim = c(0, 150)) +
  theme_bw()

get_network_data("Monterrey", "PM10", remove_extremes = FALSE) %>%
  filter(value <= 1000) %>%
  ggplot(aes(as.Date(date), value)) +
  geom_point(alpha = .01, size = .3) +
  geom_smooth(method = 'gam', formula = y ~ s(x)) + 
  ggtitle(expression(paste("Daily ", PM[10], 
                           " values reported by all Monterrey stations"))) +
  xlab("date") +
  ylab(expression(paste(mu,"g/", m^3))) +
  coord_cartesian(ylim=c(0, 500)) +
  theme_bw()