# create and clean COVID-19 data
# for Young Scholars intern projects July 2021
# Attribute to: "COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University" 
# or "JHU CSSE COVID-19 Data" for short, and the url: https://github.com/CSSEGISandData/COVID-19.
# Kaitlin Maciejewski

library(tidyverse)

us_covid_confirmed <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv") %>% 
  janitor::clean_names() %>% 
  pivot_longer(cols = starts_with("x"), # pivot to longer form on all columns starting with x (dates)
               names_to = c("x","date"), # separate into two columns to remove leading x
               names_sep = "x",
               values_to = "confirmed") %>% # the values in each old column will now be in a column called "confirmed"
  select(-x) %>% # removed the extra column
  separate(col = date,
           into = c("month", "day", "year"), # separate month, day, year, by "_"
           sep = "_") %>% 
  mutate(yr20 = paste0("20", year), # prefix of 20xx for all years
  date = as.Date(ISOdate(year = yr20, month = month, day = day))) # concatenate back and create as a date formatted column


us_covid_deaths <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv") %>% 
  janitor::clean_names() %>% 
  pivot_longer(cols = starts_with("x"),
               names_to = c("x","date"),
               names_sep = "x",
               values_to = "deaths") %>% # call this one deaths instead
  select(-x) %>% 
  separate(col = date,
           into = c("month", "day", "year"),
           sep = "_") %>% 
  mutate(yr20 = paste0("20", year), 
         date = as.Date(ISOdate(year = yr20, month = month, day = day)))

us_covid_joined <- left_join(us_covid_confirmed, us_covid_deaths) # join them all together

write.csv(x = us_covid_joined, 
          file = here::here("..", "us_covid_joined_07062021.csv"))