Geography 176A

Question 1: Find counties with more than 100 new cases per 100,000 residents of the past 14 days

Find the 5 worst cumulative counties, 5 counties with most new case, list of safe counties, and a text report

#state of intrest
state.interest= "California"

newCases<- covid %>% 
  filter(state== state.interest) %>% 
  group_by(county, date) %>% 
  summarise(totalCases=sum(cases))  %>% 
  mutate(NewCases= totalCases- lag(totalCases)) %>% 
  ungroup()

create 2 tables: 5 counties with most cumulative cases and 5 counties with most new cases

#table: 5 most cumulative cases
  
mostCasesTbl<- newCases %>%
  filter(date==max(date)) %>% 
  select(county,totalCases) %>%  
  slice_max(totalCases,n=5)  
  
  
#make table nicer
knitr::kable(mostCasesTbl, caption= "Total Cases by County", col.names = c("County", "Total Cases"), 
format.args = list(big.mark = ","))

Total Cases by County
County	Total Cases
Los Angeles	253,985
Riverside	55,073
Orange	52,121
San Bernardino	50,699
San Diego	42,742

# table: 5 counties with most new cases

mostNewCases<- newCases %>% 
  filter(date==max(date)) %>%  
  select(county, NewCases) %>% 
  slice_max(NewCases, n=5)


#make table nicer
knitr::kable(mostNewCases, caption= "New Cases by County", col.names = c("County", "New Cases"), 
format.args = list(big.mark = ","))

New Cases by County
County	New Cases
Los Angeles	809
San Diego	265
Orange	185
Fresno	159
San Bernardino	156

#use stateCases data set that is filtered to state.interest 
stCases<- covid %>% 
  filter(state == state.interest) %>% 
  rename(FIPStxt = fips)


stateCasesPopJoin<-left_join(stCases,popEst,by = "FIPStxt")

Create 2 tables: 5 counties with most cases per capita, 5 counties with most new cases per capita.

#Table1: most cumlative cases per capita

mostCasesPerCapita<- stateCasesPopJoin %>% 
  group_by(county, POP_ESTIMATE_2019,date) %>%  
  summarise(totalCases=sum(cases)) %>% 
  mutate(NewCases= totalCases- lag(totalCases)) %>% 
  ungroup() %>% 
  filter(date== max(date)) %>% 
  mutate(casesPerCapita= totalCases/POP_ESTIMATE_2019) %>% 
  select(county,casesPerCapita) %>% 
  slice_max(casesPerCapita, n=5)


#make table nicer

knitr::kable(mostCasesPerCapita, caption= "Top 5 Highest Covid Cases by County Per Capitia", col.names = c("County", "Cases Per Capita"))

Top 5 Highest Covid Cases by County Per Capitia
County	Cases Per Capita
Imperial	0.0622134
Kings	0.0464038
Kern	0.0341423
Tulare	0.0324199
Merced	0.0307584

#table 2: most current cases per capita
newCasesPerCapita<- stateCasesPopJoin %>% 
  group_by(county, POP_ESTIMATE_2019, date) %>% 
  summarise(totalCases=sum(cases)) %>% 
  mutate(NewCases= totalCases- lag(totalCases)) %>% 
  ungroup() %>% 
  filter(date==max(date)) %>% 
  mutate(newCasesPerCap = NewCases/POP_ESTIMATE_2019) %>% 
  select(county, newCasesPerCap) %>% 
  slice_max(newCasesPerCap, n=5)


#make table nicer

knitr::kable(newCasesPerCapita, caption= "Top 5 Highest New Covid Cases by County Per Capitia", col.names = c("County", "New Cases Per Capita"))

Top 5 Highest New Covid Cases by County Per Capitia
County	New Cases Per Capita
Kings	0.0002615
San Benito	0.0002388
Monterey	0.0002027
Lake	0.0001708
Fresno	0.0001591

# 14 day data
time.length = 13

twoWeek<- stateCasesPopJoin %>% 
  filter(date >= max(date)-time.length) %>% 
  group_by(county,POP_ESTIMATE_2019,date) %>% 
  summarise(totalcase= sum(cases)) %>% 
  mutate(lagCases=totalcase-lag(totalcase,time.length)) %>% 
  ungroup() %>% 
  mutate(CasesPer = 100000*(lagCases/POP_ESTIMATE_2019)) %>% 
  filter(CasesPer>100) %>% 
  select(county,CasesPer) %>% 
  arrange(-CasesPer)

 
  knitr::kable(twoWeek, caption= "CA Counties With More Than 100 New Cases In The Past 14 days, per 100,000 Residents", col.names = c("County", "New Cases Per 100,000 Residents"))

CA Counties With More Than 100 New Cases In The Past 14 days, per 100,000 Residents
County	New Cases Per 100,000 Residents
Kings	494.9653
Imperial	306.2660
Glenn	281.7596
Butte	275.1088
Madera	233.2721
Monterey	233.1470
Stanislaus	229.9059
San Benito	224.4937
Tulare	222.0101
Fresno	211.4901
Colusa	204.2048
San Joaquin	193.0071
Merced	183.3045
Sonoma	182.2647
Yuba	180.5054
Sutter	159.8416
Kern	156.7426
San Bernardino	140.2239
Sacramento	137.3660
Calaveras	135.0615
Modoc	124.4203
Contra Costa	122.4940
San Diego	122.0670
Los Angeles	121.6941
San Mateo	118.4493
Ventura	116.3112
Marin	115.9080
Yolo	114.7392
Santa Clara	114.1166
Orange	112.8258
Santa Barbara	111.7584
Santa Cruz	109.4384
Alameda	107.9381

#find total cases
mostCasesTbl1<- newCases %>%
  filter(date==max(date)) %>% 
  select(county,totalCases) %>% 
  summarise(stateTotal=sum(totalCases))

#find total new, last 14 days
twoWeek1<- stateCasesPopJoin %>% 
  filter(date >= max(date)-time.length) %>% 
  group_by(county,POP_ESTIMATE_2019,date) %>% 
  summarise(totalcase= sum(cases)) %>% 
  mutate(lagCases=totalcase-lag(totalcase,time.length)) %>% 
  ungroup() %>% 
  select(county,lagCases) %>% 
  filter(lagCases>=0) %>% 
  summarise(totNewCase= sum(lagCases))

#find the safe counties
twoWeek2<- stateCasesPopJoin %>% 
  filter(date >= max(date)-time.length) %>% 
  group_by(county,POP_ESTIMATE_2019,date) %>% 
  summarise(totalcase= sum(cases)) %>% 
  mutate(lagCases=totalcase-lag(totalcase,time.length)) %>% 
  ungroup() %>% 
  mutate(CasesPer = 100000*(lagCases/POP_ESTIMATE_2019)) %>% 
  filter(CasesPer<100) %>% 
  select(county,CasesPer) %>% 
  arrange(CasesPer)


knitr::kable(twoWeek2, caption= "Safe California Counties", col.names = c("County", "New Cases Per 100,000 People"))

Safe California Counties
County	New Cases Per 100,000 People
Alpine	0.00000
Sierra	0.00000
Mono	6.92329
Mariposa	11.62588
Plumas	15.95151
Trinity	16.28002
Del Norte	21.57342
Shasta	23.32297
El Dorado	34.74329
Nevada	36.08842
Lassen	39.25032
Siskiyou	41.34224
Humboldt	43.52381
Tuolumne	56.90370
Solano	68.80483
Placer	78.57826
Tehama	79.89675
San Luis Obispo	85.12562
Riverside	87.59197
Napa	90.74805
Lake	91.63483
San Francisco	91.65684
Inyo	94.24026
Amador	95.59267
Mendocino	97.98384

# 10:Results) Total number of cases in California is 621,981 and the total number of new cases in the past 14 days is 112,474. The total number of safe counties, (<100 new cases per 100,000 ppl) is 13 counties in California.

Question 2:

In this question, we are going to look at the story of 4 states and the impact scale can have on data interprtation. The states include: New York, California, Louisiana, and Florida.

#create state level daily new and rolling 7 day average 
states.of.interest <-  c("New York", "California", "Louisiana", "Florida")
stateLvl<- covid %>% 
 filter(state %in% states.of.interest) %>% 
 group_by(state,date) %>% 
  summarise(cases=sum(cases))  %>% 
 mutate(DailyNewCases = c(NA, diff(cases)),
         roll7=rollmean(DailyNewCases,7,fill = NA, align = "right")) %>% 
  pivot_longer(cols = c(DailyNewCases,roll7),names_to = "type",values_to="values")

#create ggplot of daily new cases and 7 day average
plot01Lab2<- ggplot(data = stateLvl, aes(x=date, y=values))+
  geom_line(aes(col = state))+
  labs(x="Date", y="Case Count",
       title= "New Daily COVID-19 Case Count and 7 Day Rolling Average",
       caption = "Data From NY Times")+
  facet_grid(type~state, scales = "free_y")+
  theme_bw()+
  ggsave(plot = last_plot(), file= "../img/COVIDPlot.png")
plot01Lab2

#NY Times style
stateLvls<- covid %>% 
 filter(state %in% states.of.interest) %>% 
 group_by(state,date) %>% 
  summarise(cases=sum(cases))  %>% 
 mutate(DailyNewCases = c(NA, diff(cases)),
         roll7=rollmean(DailyNewCases,7,fill = NA, align = "right")) 

#NY Times style ggplot of new daily cases
nyTimes01<-  ggplot(data=stateLvls, aes(x = date)) +  
  geom_col(aes(y = DailyNewCases), col = NA, fill = "#F5B8B5") +  
  geom_line(aes(y = roll7), col = "darkred", size = 1) +  
  theme_bw()+
  labs(title = paste("New Daily COVID-19 Case Count and 7 Day Rolling Average", state.interest), x="Date", y= "Case Count") +  
  facet_wrap(~state)+
  ggsave(plot = last_plot(), file= "../img/NYTIIMESCOVIDPlot.png")
nyTimes01

#use region data to get common key
region<- data.frame(State = state.abb, state = state.name)

#statelvl1 has roll7, daily new cases, and total cases
stateLv1<-  covid %>% 
 filter(state %in% states.of.interest) %>% 
 group_by(state,date) %>% 
  summarise(cases=sum(cases))  %>% 
 mutate(DailyNewCases = c(NA, diff(cases)),
         roll7=rollmean(DailyNewCases,7,fill = NA, align = "right")) %>% 
  ungroup() 
 

  
#table join region(aka state abbr) to stateLvl1
stateLvl2<-left_join(stateLv1,region,by = "state") 


#clean up population data to match at the stateLvl2, trick with divide by 2 to negate pop double counts
popEst1<- popEst %>%   
  group_by(State) %>% 
  summarise(totalPop<- sum(POP_ESTIMATE_2019)/2)


#table join the state_abb covid data to usda population data, now has correct state pop associated with covid cases
#table also has new cases/pop and rolling7 per capita
#recreating facet_grid to show total population 
stateLvl3<-left_join(stateLvl2,popEst1,by = "State") %>% 
  rename(state_pop = "totalPop <- sum(POP_ESTIMATE_2019)/2") %>% 
  mutate(NewCasesPerCap= DailyNewCases/state_pop) %>% 
  mutate(roll7PerCap=roll7/state_pop) %>% 
  pivot_longer(cols = c(NewCasesPerCap,roll7PerCap),names_to ="type",values_to ="values")

#make adjusted for population plot
stateLvl3plot<- ggplot(data = stateLvl3, aes(x=date,y=values))+
  geom_line(aes(color=state))+
  scale_color_manual(values= c("California" = "red", "Florida" = "green", 
      "Louisiana" = "blue", "New York"= "Cyan3")) + 
  labs(x="Date", y="Case Count",
       title= "New Daily COVID-19 Case Count and 7 Day Rolling Average",subtitle = "Adjusted Per Capita",
       caption = "Data From NY Times")+
  facet_grid(type~state, scales = "free_y")+
  theme_bw()+
  ggsave(plot = last_plot(), file= "../img/COVIDPlotAdjust.png")

stateLvl3plot

#NY Times style
stateLvls3<-left_join(stateLvl2,popEst1,by = "State") %>% 
  rename(state_pop = "totalPop <- sum(POP_ESTIMATE_2019)/2") %>% 
  mutate(NewCasesPerCap= DailyNewCases/state_pop) %>% 
  mutate(roll7PerCap=roll7/state_pop) 

#NY Times style ggplot per capita
nyTimes02<-  ggplot(data=stateLvls3, aes(x = date)) +  
  geom_col(aes(y = NewCasesPerCap), col = NA, fill = "#F5B8B5") +  
  geom_line(aes(y = roll7PerCap), col = "darkred", size = 1) +  
  theme_bw() +  
  labs(title = paste("New Daily COVID-19 Case Count and 7 Day Rolling Average"),subtitle = "Adjusted Per Capita", x= "Date", y= "Case Count") + 
  facet_wrap(~state)+
  ggsave(plot = last_plot(), file= "../img/NYTIMEScovidPerCap.png")
nyTimes02

Results: Scaling COVID-19 cases based on the respective state population makes the relative amount of cases appear to be less severe in some states and more severe in others. This occurs because we are comparing only raw case data values. California, a highly populated state is likely to have more cases than Lousiana, a less populated state. When state cases are adjusted for population, using cases per capita the relative proportion of people within each state contracting COVID-19 becomes more clear.

Question 3: Exploring spatial data

#read in county spatial data
counties = read_csv("../data/county-centroids.csv")

#Join to COVID-19 Data
#get daily cumulative cases
#find weighted mean
covidSpatial<-inner_join(covid,counties,by = "fips") %>%  
  group_by(date) %>% 
  summarise(wmX= sum(LON*cases)/sum(cases),wmY= sum(LAT*cases)/sum(cases),cases=sum(cases)) %>% 
  mutate(month=format(date,"%m"))


#Create weighted mean plot
weightedMeanPlot<- ggplot(data = covidSpatial)+
  borders("state",fill="light gray", colour = "light blue")+
  scale_color_viridis_d()+
  geom_point(aes(x=wmX, y=wmY, color= month))+
  theme_linedraw()+
  labs(color= "month",
       size = "Cases (1,000,000)",
       x="Longitude",
       y="Latitude",
       title = "Weighted Mean Center of COVID-19 Outbreak in the US",
       caption = "NY Times Data")+
      ggsave(plot = last_plot(), file= "../img/COVIDweightedMean.png")

weightedMeanPlot

Results: The movement of the weighted mean center of COVID-19 cases in the United States moves west/southwest over the progression of the year 2020. This is inline with case reportings as the virus hit the east coast, NYC in particular, very hard in the beginning then spread west to almost all heavily populated cities.

Geography 176A

Lab 02: COVID-19 Pandemic

Jacob Braslaw

8/17/2020

Question 1: Find counties with more than 100 new cases per 100,000 residents of the past 14 days

Find the 5 worst cumulative counties, 5 counties with most new case, list of safe counties, and a text report

create 2 tables: 5 counties with most cumulative cases and 5 counties with most new cases

Create 2 tables: 5 counties with most cases per capita, 5 counties with most new cases per capita.

Question 2:

In this question, we are going to look at the story of 4 states and the impact scale can have on data interprtation. The states include: New York, California, Louisiana, and Florida.

Question 3: Exploring spatial data