COVID and Air Pollution

COVID and Air Pollution in Mexico

We can use rsinaica to find out what happened to air pollutions levels in several Mexican cities after people started taking social distancing measures due to the novel coronavirus

First, we load the packages necessary for the analysis

## Auto-install required R packages
packs <- c("dplyr", "ggplot2", "gghighlight", "lubridate", "anomalize",
           "aire.zmvm", "tidyr", "zoo", "plotly", "rsinaica", "ggseas")
success <- suppressWarnings(sapply(packs, require, character.only = TRUE))
if (length(names(success)[!success])) {
  install.packages(names(success)[!success])
  sapply(names(success)[!success], require, character.only = TRUE)
}

Download PM10 data

Then we download the data for the whole 2020 and 2019 using the sinaica_param_data function. Since the maximum data range we can download is 1 month, we have to use a little mapply magic to download the entire year.

# Download all PM10 pollution data in 2017
pm10_2020 <- bind_rows(
  mapply(sinaica_param_data,
         "PM10",
         seq(as.Date("2020-01-01"), as.Date("2020-06-01"), by = "month"),
         seq(as.Date("2020-02-01"), as.Date("2020-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)
pm10_2019 <- bind_rows(
  mapply(sinaica_param_data,
         "PM10",
         seq(as.Date("2019-01-01"), as.Date("2019-06-01"), by = "month"),
         seq(as.Date("2019-02-01"), as.Date("2019-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)

This is what the data looks like:

knitr::kable(head(pm10_2020))

id	station_id	station_name	station_code	network_name	network_code	network_id	date	hour	parameter	value_original	flag_original	valid_original	value_actual	valid_actual	date_validated	unit	value
101PM1020010100	101	Atemajac	ATM	Guadalajara	GDL	63	2020-01-01	0	PM10	58.326	128	1	58.326	1	NA	µg/m³	58.326
101PM1020010101	101	Atemajac	ATM	Guadalajara	GDL	63	2020-01-01	1	PM10	104.09	128	1	104.09	1	NA	µg/m³	104.090
101PM1020010102	101	Atemajac	ATM	Guadalajara	GDL	63	2020-01-01	2	PM10	101.73	128	1	101.73	1	NA	µg/m³	101.730
101PM1020010103	101	Atemajac	ATM	Guadalajara	GDL	63	2020-01-01	3	PM10	90.637	128	1	90.637	1	NA	µg/m³	90.637
101PM1020010104	101	Atemajac	ATM	Guadalajara	GDL	63	2020-01-01	4	PM10	82.158	128	1	82.158	1	NA	µg/m³	82.158
101PM1020010105	101	Atemajac	ATM	Guadalajara	GDL	63	2020-01-01	5	PM10	95.17	128	1	95.17	1	NA	µg/m³	95.170

The data is hourly, but we can average it to the daily levels and filter it to only include major cities. Obviously a more formal analysis would make sure that no new stations were added during the periods we are comparing and would filter the measurement errors that are sometimes returned by the sensors.

# Only include stations that reported for more than 80% of days
df_filtered <- bind_rows(pm10_2019, pm10_2020) %>%
  filter(!is.na(value)) %>%
  add_count(station_id, network_name)%>%
  filter(!(value > .0955 & hour %in% c(0:11, 21:23))) %>%
  filter(value < 300) %>%
  filter(n > 180*.8)  %>%
  select(-n)

df_ave <- df_filtered %>%
  group_by(date, network_name) %>%
  summarise(ave = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  add_count(network_name) %>%
  arrange(network_name) %>%
  mutate(year = year(date)) %>%
  mutate(doy = yday(date)) %>%
  filter(network_name %in% c("Valle de México", "Monterrey", "Toluca",
                             "Guadalajara", "Pachuca", "Puebla"))

ggplot(df_ave, aes(doy, ave, group = year, color = as.factor(year))) +
  geom_line(size = .1, alpha = .7) +
  scale_color_brewer("year", type='qual', palette = "Set1") +
  facet_wrap(~ network_name, ncol = 3) +
  ggtitle(expression(paste("Average daily ", PM[10], " concentration and 30 day moving average, by network (2019 and 2020)"))) +
  labs(subtitle = "Community mobility in Mexico started dropping around March 15 (day 74 highlighted by a vertical black line)") +
  xlab("day of year") +
  ylab(expression(paste(PM[10], " ", mu,"g/", m^3))) +
  theme_bw() +
  stat_rollapplyr(width = 30, align = "right") +
  geom_vline(xintercept = 74) + #March 15 is day number 74
  theme(axis.text.x=element_text(angle=60,hjust=1))

## Warning: Removed 58 row(s) containing missing values (geom_path).

PM2.5

# Download all PM10 pollution data in 2017
pm25_2020 <- bind_rows(
  mapply(sinaica_param_data,
         "PM2.5",
         seq(as.Date("2020-01-01"), as.Date("2020-06-01"), by = "month"),
         seq(as.Date("2020-02-01"), as.Date("2020-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)
pm25_2019 <- bind_rows(
  mapply(sinaica_param_data,
         "PM2.5",
         seq(as.Date("2019-01-01"), as.Date("2019-06-01"), by = "month"),
         seq(as.Date("2019-02-01"), as.Date("2019-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)

# Only include stations that reported for more than 80% of days
df_filtered <- bind_rows(pm25_2019, pm25_2020) %>%
  filter(!is.na(value)) %>%
  add_count(station_id, network_name)%>%
  filter(!(value > .0955 & hour %in% c(0:11, 21:23))) %>%
  filter(n > 180*.8)  %>%
  select(-n)

df_ave <- df_filtered %>%
  group_by(date, network_name) %>%
  summarise(ave = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  add_count(network_name) %>%
  arrange(network_name) %>%
  mutate(year = year(date)) %>%
  mutate(doy = yday(date)) %>%
  filter(network_name %in% c("Valle de México", "Monterrey", "Toluca",
                             "Guadalajara", "Pachuca", "Puebla"))

ggplot(df_ave, aes(doy, ave, group = year, color = as.factor(year))) +
  geom_line(size = .1, alpha = .7) +
  scale_color_brewer("year", type='qual', palette = "Set1") +
  facet_wrap(~ network_name, ncol = 3) +
  ggtitle(expression(paste("Average daily ", PM[2.5], " concentration and 30 day moving average, by network (2019 and 2020)"))) +
  labs(subtitle = "Community mobility in Mexico started dropping around March 15 (day 74 highlighted by a vertical black line)") +
  xlab("day of year") +
  ylab(expression(paste("daily maximum 24 average of ", PM[2.5],
                        " (", mu,"g/", m^3, ")"))) +
  theme_bw() +
  stat_rollapplyr(width = 30, align = "right") +
  geom_vline(xintercept = 74) + #March 15 is day number 74
  theme(axis.text.x=element_text(angle=60,hjust=1))

## Warning: Removed 58 row(s) containing missing values (geom_path).

O3

# Download all PM10 pollution data in 2017
o3_2020 <- bind_rows(
  mapply(sinaica_param_data,
         "O3",
         seq(as.Date("2020-01-01"), as.Date("2020-06-01"), by = "month"),
         seq(as.Date("2020-02-01"), as.Date("2020-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)
o3_2019 <- bind_rows(
  mapply(sinaica_param_data,
         "O3",
         seq(as.Date("2019-01-01"), as.Date("2019-06-01"), by = "month"),
         seq(as.Date("2019-02-01"), as.Date("2019-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)

# Only include stations that reported for more than 80% of days
df_filtered <- bind_rows(o3_2019, o3_2020) %>%
  filter(!is.na(value)) %>%
  add_count(station_id, network_name)%>%
  filter(!(value > .0955 & hour %in% c(0:11, 21:23))) %>%
  filter(value < .3) %>%
  filter(n > 180*.8)  %>%
  select(-n)

df_ave <- df_filtered %>%
  group_by(date, network_name) %>%
  summarise(ave = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  add_count(network_name) %>%
  arrange(network_name) %>%
  mutate(year = year(date)) %>%
  mutate(doy = yday(date)) %>%
  filter(network_name %in% c("Valle de México", "Monterrey", "Toluca",
                             "Guadalajara", "Pachuca", "Puebla"))

ggplot(df_ave, aes(doy, ave, group = year, color = as.factor(year))) +
  geom_line(size = .1, alpha = .7) +
  scale_color_brewer("year", type='qual', palette = "Set1") +
  facet_wrap(~ network_name, ncol = 3) +
  ggtitle("Average daily ozone concentration and 30 day moving average, by network (2019 and 2020)") +
  labs(subtitle = "Community mobility in Mexico started dropping around March 15 (day 74 highlighted by a vertical black line)") +
  xlab("day of year") +
  ylab(expression(paste(O[3], " ppm"))) +
  theme_bw() +
  stat_rollapplyr(width = 30, align = "right") +
  geom_vline(xintercept = 74) + #March 15 is day number 74
  theme(axis.text.x=element_text(angle=60,hjust=1))

## Warning: Removed 58 row(s) containing missing values (geom_path).

NO2

# Download all PM10 pollution data in 2017
no2_2020 <- bind_rows(
  mapply(sinaica_param_data,
         "NO2",
         seq(as.Date("2020-01-01"), as.Date("2020-06-01"), by = "month"),
         seq(as.Date("2020-02-01"), as.Date("2020-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)
no2_2019 <- bind_rows(
  mapply(sinaica_param_data,
         "NO2",
         seq(as.Date("2019-01-01"), as.Date("2019-06-01"), by = "month"),
         seq(as.Date("2019-02-01"), as.Date("2019-07-01"), by = "month") - 1,
         remove_extremes = FALSE,
         SIMPLIFY = FALSE)
)

# Only include stations that reported for more than 80% of days
df_filtered <- bind_rows(no2_2019, no2_2020) %>%
  filter(!is.na(value)) %>%
  add_count(station_id, network_name)%>%
  filter(!(value > .0955 & hour %in% c(0:11, 21:23))) %>%
  filter(value < .3) %>%
  filter(n > 180*.8)  %>%
  select(-n)

df_ave <- df_filtered %>%
  group_by(date, network_name) %>%
  summarise(ave = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  add_count(network_name) %>%
  arrange(network_name) %>%
  mutate(year = year(date)) %>%
  mutate(doy = yday(date)) %>%
  filter(network_name %in% c("Valle de México", "Monterrey", "Toluca",
                             "Guadalajara", "Pachuca", "Puebla"))

ggplot(df_ave, aes(doy, ave, group = year, color = as.factor(year))) +
  geom_line(size = .1, alpha = .7) +
  scale_color_brewer("year", type='qual', palette = "Set1") +
  facet_wrap(~ network_name, ncol = 3) +
  ggtitle(expression(paste("Average daily ", NO[2], " concentration and 30 day moving average, by network (2019 and 2020)"))) +
  labs(subtitle = "Community mobility in Mexico started dropping around March 15 (day 74 highlighted by a vertical black line)") +
  xlab("day of year") +
  ylab(expression(paste(NO[2], " ppm"))) +
  theme_bw() +
  stat_rollapplyr(width = 30, align = "right") +
  geom_vline(xintercept = 74) + #March 15 is day number 74
  theme(axis.text.x=element_text(angle=60,hjust=1))

## Warning: Removed 58 row(s) containing missing values (geom_path).

Diego Valle-Jones

8/1/2020