knitr::opts_chunk$set(echo = T, warning = F, message = F, eval = F)
options(tigris_use_cache = TRUE)
library(tidyverse)
library(sf)
library(tigris)
library(censusapi)
library(mapview)
library(leaflet)
library(mapboxapi)
library(readr)
library(jsonlite)

1 Vehicle Emission

1.1 Commute data from LODES in 94108

# zctas <- zctas() #Download a Zip Code Tabulation Area (ZCTA) shapefile into R
# 
# downtown Palo Alto residential area
# zip <- zctas %>% 
#   filter(GEOID10 == "94108") #single line information of 94108
# 
# blocks <- blocks("CA") #all block information in CA
# 
# zip_blocks <- blocks %>% 
#   st_centroid() %>% 
#   .[zip, ] #all blocks in zip=94108
# saveRDS(zip, "zip_info_94108.rds")
# saveRDS(zip_blocks, "zip_block_info_94108.rds")
zip <- read_rds("zip_info_94108.rds")
zip_blocks <- read_rds("zip_block_info_94108.rds")
zip_cbgs <- 
  block_groups(state = "CA", county = "San Francisco") %>% 
  st_centroid() %>%
  .[zip,] %>% 
  st_drop_geometry() %>% 
  left_join(block_groups(state = "CA", county = "San Francisco") %>% select(GEOID)) %>% 
  st_as_sf()
# full_ct_od <- 2013:2019 %>% 
#   map_dfr(function(year){
#     
#     print(year)
#     
#     temp <- read_csv(paste0("/Volumes/GoogleDrive/Shared drives/SFBI/Data Library/LODES/ca_od_main_JT01_", year, ".csv.gz")) %>% 
#       filter(
#         h_geocode %in% zip_blocks$GEOID10 |
#           w_geocode %in% zip_blocks$GEOID10
#       ) %>% 
#       mutate(year = year)
#     
#     saveRDS(temp, paste0("94108_od_", year, ".rds"))
#     
#     return(temp)
#     
#   }) #5 years data
# 
# saveRDS(full_ct_od, "94108_od_full.rds")
full_ct_od <- readRDS("94108_od_full.rds")
# full_ct_od_cross <- full_ct_od %>% 
#   select(-createdate) %>% 
#   filter(!(
#     h_geocode %in% zip_blocks$GEOID10 &
#       w_geocode %in% zip_blocks$GEOID10
#   )) %>% #not include people who travel within their zip code
#   mutate(
#     direction = ifelse(
#       h_geocode %in% zip_blocks$GEOID10,
#       "outbound",
#       "inbound"
#     )
#   ) #commute data where people travel out or into 94108

1.2 routing

# full_ct_od_routing <- full_ct_od_cross %>% 
#   mutate(
#     origin = ifelse(
#       direction == "inbound",
#       h_geocode,
#       w_geocode
#     ),
#     cbg = origin %>% substr(1,12),
#     tract = origin %>% substr(1,11)
#   ) %>% 
#   filter(!duplicated(tract))
#Question: why are we filtering out the duplicated??
#Answer: to do routing only once for the same origin
# ca_tracts <- tracts("CA") #census acs5/1 only to tract levels
#only decennial data offers blockgroup levels
# 
# #routing origins (all inbound/outbound locations)
od_origin <-
  full_ct_od_routing %>%
  select(tract) %>%
  left_join(ca_tracts %>% select(tract = GEOID)) %>%
  st_as_sf() %>%
  st_centroid() %>%
  st_coordinates() #add geometry information into the od_origins for later routing

view(head(full_ct_od_routing)) #contains destination(outside of 94108) tract ids
view(head(od_origin)) #centroid coordinate(X,Y) for  

# 
# # nrow(od_origin) #7044
# 
# #routing destination (94108 only)
# od_destination <-
#   zip %>% 
#   st_centroid() %>% 
#   st_coordinates()

#routing using mapbox
# od_route <- 
#   1:ceiling(nrow(od_origin)/1000) %>% 
#   map_dfr(function(y){
#     
#     print(y)
#     
#     temp <- 
#       (y * 1000 - 999) : pmin(y * 1000, nrow(od_origin)) %>% 
#       map_dfr(function(x){
#         tryCatch(
#           mb_directions(
#             origin = od_origin[x, ],
#             destination = od_destination,
#             profile = "driving-traffic"
#           ) %>% 
#             mutate(id = x),
#           error = function(e){
#             data.frame(id = x)
#           }
#         )
#       }) %>% 
#       st_as_sf()
#       
#       saveRDS(temp, paste0("94108_od_routing_",y,".rds"))
#       
#       return(temp)
#   })
# saveRDS(od_route, "94108_od_routing_full.rds")

view(head(od_route))
# od_route <-readRDS("94108_od_routing_full.rds")
nrow(full_ct_od_routing)

od_route %>%
  filter(is.na(duration)) #2 rows
full_od_routed <- full_ct_od_routing %>%
  cbind(od_route)

view(head(full_od_routed))
view(head(full_ct_od_cross))
full_od_routed %>%
  filter(is.na(duration)) #2 rows
names(full_od_routed)

sum(is.na(full_od_final$duration))

view(head(full_od_final))

full_od_final <- full_ct_od_cross %>%
  mutate(
    origin = ifelse(
      direction == "inbound",
      h_geocode,
      w_geocode
    ),
    tract = substr(origin, 1, 11)
  ) %>%
  left_join(
    full_od_routed %>%
      select(tract, duration, distance)
  ) %>%
  mutate(
    visits = S000 * 261 #TODO: the number of visits per year for commute patterns
  )
saveRDS(full_od_final,"94108_commute_od_routed_2013_2019.rds")
full_od_final <- read_rds("94108_commute_od_routed_2013_2019.rds")

1.3 combine population data from acs5

#convert distance and visits in full_od_final into VMT
#visits are "per job" so combine with census data to calculate "per vehicle"
Sys.setenv(CENSUS_KEY="c8aa67e4086b4b5ce3a8717f59faa9a28f611dab")

# ca_cbgs_pop <-
#   counties("CA", cb = T, progress_bar = F) %>%
#   pull(COUNTYFP) %>% 
#   map_dfr(function(x){
#     getCensus(
#       name = "acs/acs5",
#       vintage = 2019,
#       region = "block group:*",
#       regionin = paste0("state:06+county:", x),
#       vars = "B01001_001E"
#     )
#   }) %>% 
#   transmute(
#     census_block_group =
#       paste0(state,county,tract,block_group),
#     pop = B01001_001E
#   )
# #names(ca_cbgs_pop) #"census_block_group" "pop" 
# 
# test <-
#   2013:2019 %>%
#   map_dfr(function(year){
#     counties("CA", cb = T, progress_bar = F) %>%
#     pull(COUNTYFP) %>% 
#     map_dfr(function(x){
#       getCensus(
#         name = "acs/acs5",
#         vintage = year,
#         region = "block group:*",
#         regionin = paste0("state:06+county:", x),
#         vars = "B01001_001E"
#       )
#     }) %>% 
#     transmute(
#       census_block_group =
#         paste0(state,county,tract,block_group),
#       pop = B01001_001E,
#       year = year
#     )
#   })
# view(head(ca_cbgs_pop))
#   
# saveRDS(test, "ca_pop_2013_2019.rds")
ca_cbgs_pop <- readRDS("ca_pop_2013_2019.rds")

# origin_cbgs_pop <-
#   full_od_final %>%
#   select(-starts_with("S")) %>%
#   mutate(
#     h_cbgs = substr(h_geocode, 1, 12)
#   ) %>%
#   left_join(
#     ca_cbgs_pop,
#     by = c("h_cbgs" = "census_block_group", "year" = "year")
#   )
# 
# saveRDS(origin_cbgs_pop, "origin_cbgs_pop_94108.rds")

origin_cbgs_pop <- read_rds("origin_cbgs_pop_94108.rds")

There are three census block groups that have no population documented, excluded from this analysis. They are 060379304011, 060378002043, 060378002043. ##TODO: find out where are these places and why they have no routing as well

sum(is.na(origin_cbgs_pop$pop)) #3
sum(is.na(origin_cbgs_pop$duration)) #4

origin_cbgs_pop %>% filter(is.na(pop))
#060379304011
#060378002043
#060378002043

1.4 travel mode

acs_vars_2019_5yr <- read_rds("acs_vars_2019_5yr.rds")

# travel_time_mode <-
#   counties("CA", cb = T, progress_bar = F) %>%
#   pull(COUNTYFP) %>% 
#   map_dfr(function(x){
#     getCensus(
#       name = "acs/acs5",
#       vintage = 2019,
#       region = "block group:*",
#       regionin = paste0("state:06+county:", x),
#       vars = "group(B08134)" #Mode to work by travel time to work
#     )
#   }) %>% 
#   mutate(
#     cbg =
#       paste0(state,county,tract,block_group)
#   ) %>%
#   filter(cbg %in% origin_cbgs_pop$h_cbgs) %>% 
#   select(!c(GEO_ID,state,county,tract,block_group,NAME) & !ends_with(c("EA","MA","M"))) %>%
#   pivot_longer(
#     ends_with("E"),
#     names_to = "variable",
#     values_to = "estimate"
#   ) %>%
#   left_join(
#     acs_vars_2019_5yr %>% 
#       select(name, label), 
#     by = c("variable" = "name")
#   ) %>% 
#   select(-variable) %>% 
#   separate(
#     label,
#     into = c(NA, NA, "total", "mode", "carpool", "time"),
#     sep = "!!"
#   ) %>% 
#   mutate(
#     mode = case_when(
#       total %in% c(
#         "Less than 10 minutes",
#         "10 to 14 minutes",
#         "15 to 19 minutes",
#         "20 to 24 minutes",
#         "25 to 29 minutes",
#         "30 to 34 minutes",
#         "35 to 44 minutes",
#         "45 to 59 minutes",
#         "60 or more minutes"
#       ) ~ "Total",
#       mode == "Drove alone:" ~ mode,
#       carpool %in% c(
#         "In 2-person carpool:",
#         "In 3-or-more-person carpool:"
#       ) ~ carpool
#     ),
#     time = case_when(
#       mode == "Total" ~ total,
#       mode == "Drove alone:" ~ carpool,
#       mode == carpool ~ time
#     )
#   ) %>% 
#   filter(!is.na(time)) %>% 
#   select(-total, -carpool) %>% 
#   pivot_wider(
#     names_from = mode,
#     values_from = estimate
#   ) %>% 
#   mutate(
#     perc_veh1 = `Drove alone:`/Total,
#     perc_veh2 = `In 2-person carpool:`/Total,
#     perc_veh3 = `In 3-or-more-person carpool:`/Total
#   )
# 
# saveRDS(travel_time_mode, "94108_travel_time_mode.rds")

travel_time_mode <- read_rds("94108_travel_time_mode.rds")
ct_trips <-
  origin_cbgs_pop %>% 
  mutate(
    time = case_when(
      duration < 10 ~ "Less than 10 minutes",
      duration < 15 ~ "10 to 14 minutes",
      duration < 20 ~ "15 to 19 minutes",
      duration < 25 ~ "20 to 24 minutes",
      duration < 30 ~ "25 to 29 minutes",
      duration < 35 ~ "30 to 34 minutes",
      duration < 45 ~ "35 to 44 minutes",
      duration < 60 ~ "45 to 59 minutes",
      TRUE ~ "60 or more minutes"
    )
  ) %>% 
  left_join(
    travel_time_mode %>% 
      select(
        cbg,
        time,
        perc_veh1,
        perc_veh2,
        perc_veh3
      ),
    by = c("h_cbgs" = "cbg", "time")
  ) %>% 
  mutate(
    vehicles = 
      visits * perc_veh1 + 
      visits * perc_veh2 / 2 +
      visits * perc_veh3 / 3,
    vmt = vehicles * distance * 2
  )
saveRDS(ct_trips, "94108_trips.rds")
ct_trips <- read_rds("94108_trips.rds")

ct_visits <- ct_trips %>%
  group_by(tract, year) %>%
  summarize(
    visits = sum(visits),
    duration = mean(duration),
    distance = mean(distance),
    vmt = sum(vmt)
  ) %>%
  st_drop_geometry() %>% 
  left_join(tracts(state = "CA") %>% select(tract = GEOID)) %>% 
  st_as_sf()

saveRDS(ct_visits, "94108_trips_vmt_by_year_tract.rds")
ct_trips <- read_rds("94108_trips.rds")
ct_visits <- read_rds("94108_trips_vmt_by_year_tract.rds")
#TODO: fix this chunk
visits_pal <- colorNumeric(
  palette = "Reds",
  domain = ct_visits %>% 
    filter(year == 2019) %>%
    arrange(desc(visits)) %>% 
    pull(visits) 
    # %>% .[-c(1:6)]
)

#TODO: this plot is way too slow 
leaflet() %>% 
  addMapboxTiles(
    style_id = "light-v9",
    username = "mapbox"
  ) %>% 
  addPolygons(
    data = zip_cbgs %>% st_transform(4326),
    fill = F
  ) %>% 
  addPolygons(
    data = ct_visits %>% st_transform(4326),
    fillColor = ~visits_pal(visits),
    color = "red",
    weight = 1,
    fillOpacity = 0.75,
    label = ~visits
  ) %>% 
  addLegend(
    data = ct_visits,
    pal = visits_pal,
    values = ct_visits %>% st_transform(4326) %>% 
      filter(year == 2019) %>%
      arrange(desc(visits)) %>% 
      pull(visits), 
    # %>% .[-c(1:6)],
    title = "Commute visits to 94108"
  )

##convert VMTs to GHG emission

emfac <- 
  read_csv("EMFAC2021-EI-202xClass-SanFranciscoBayArea-2021-Summer.csv", skip = 8) %>% 
  transmute(
    Category = `Vehicle Category`,
    Fuel_Type = Fuel,
    Percent_Trips = Trips/sum(Trips),
    Percent_Miles = `Total VMT`/sum(`Total VMT`),
    `MTCO2_Running_Exhaust` = CO2_RUNEX/`Total VMT`,
    `MTCO2_Start_Exhaust` = CO2_STREX/Trips
  )
ct_trips_ghg_by_year <-
  2013:2019 %>%
  map_dfr(function(y){
    emfac %>% 
  mutate(
    trips = Percent_Trips * sum(ct_trips %>% filter(year == y) %>% .$visits, na.rm = T),
    vmt = Percent_Miles * sum(ct_trips %>% filter(year == y) %>% .$vmt, na.rm = T),
    ghg = vmt*MTCO2_Running_Exhaust + trips*MTCO2_Start_Exhaust*2,
    year = y
  )
  })

ct_trips_ghg_annual <-
  ct_trips_ghg_by_year %>%
  group_by(year) %>%
  summarize(trips = sum(trips),
            vmt = sum(vmt),
            ghg = sum(ghg))

saveRDS(ct_trips_ghg_annual, "94108_vmt_ghg_total.rds")

2 Building Emission

#Benchmarking Greenhouse Gas Emissions for Delivered Electricity 
#UNIT: (Pounds of CO2 per MWh)

pge_elec_emissions_factor <-
  data.frame(
    year = c(2013:2019),
    factor = c(427,435,405,294,210,206,2.68)
  )

pge_elec_emissions_factor %>% 
  ggplot() +
  geom_line(
    aes(
      x = year,
      y = factor
    )
  ) +
  labs(
    x = "Year",
    y = "Pounds of CO2 per MHh",
    title = "PG&E electricity emissions rate"
  )
#assume it's constant over years
#UNIT: (pounds of CO2 per million BTU)
pge_gas_emissions_factor <- 117
pge_data <- 
  2013:2019 %>% 
  map_dfr(function(yr){
    
    factor <- 
      pge_elec_emissions_factor %>% 
      filter(year == yr) %>% 
      pull(factor)
    
    1:4 %>% 
      map_dfr(function(quarter){
        
        c("Electric","Gas") %>% 
          map_dfr(function(type){
            
            filename <- 
              paste0(
                "/Volumes/GoogleDrive/Shared drives/SFBI/Data Library/PG&E/PGE_",
                yr,
                "_Q",
                quarter,
                "_",
                type,
                "UsageByZip.csv"
              )
            
            temp <- read_csv(filename)
            
            if(yr == 2017 & quarter == 4) {
              temp <- 
                temp %>% 
                filter(MONTH != 9)
            }
            
            temp <-
              temp %>% 
              rename_all(toupper) %>% 
              mutate(
                TOTALKBTU = ifelse(
                  substr(CUSTOMERCLASS,1,1) == "E",
                  TOTALKWH * 3.412,
                  TOTALTHM * 99.976
                ),
                TOTALTCO2E = ifelse(
                  substr(CUSTOMERCLASS,1,1) == "E",
                  TOTALKWH/1000 * factor * 0.000453592,
                  TOTALTHM * 0.00531
                )
              ) %>% 
              select(
                ZIPCODE,
                YEAR,
                MONTH,
                CUSTOMERCLASS,
                TOTALKBTU,
                TOTALTCO2E,
                TOTALCUSTOMERS
              )
            
          })
        
      })
    
  })

saveRDS(pge_data, "pge_data_2013_2019.rds")
pge_data <- read_rds("pge_data_2013_2019.rds")

bldg_pge_by_year <- pge_data %>% 
  filter(ZIPCODE == 94108) %>% 
  filter(CUSTOMERCLASS %in% c(
    "Elec- Commercial",
    "Elec- Residential",
    "Gas- Commercial",
    "Gas- Residential"
  )) %>% 
  mutate(
    ENERGYTYPE = substr(CUSTOMERCLASS,1,1),
    BUILDINGTYPE = case_when(
      ENERGYTYPE == "E" ~ substr(CUSTOMERCLASS,7,7),
      ENERGYTYPE == "G" ~ substr(CUSTOMERCLASS,6,6)
    )
  ) %>% 
  group_by(ZIPCODE, ENERGYTYPE, BUILDINGTYPE, YEAR) %>%
  summarize(
    TOTALKBTU = sum(TOTALKBTU, na.rm=T),
    TOTALTCO2E = sum(TOTALTCO2E, na.rm=T),
    TOTALCUSTOMERS = mean(TOTALCUSTOMERS, na.rm=T)
  ) 
# %>%
#   group_by(ENERGYTYPE, BUILDINGTYPE, YEAR) %>% #the second group by doesnt make any sense because we are just dealing with one ZIP code here, but it doesnt hurt to keep here
#   summarize(across(
#     c(TOTALKBTU,TOTALTCO2E, TOTALCUSTOMERS),
#     ~sum(.,na.rm=T)
#   ))

saveRDS(bldg_pge_by_year, "94108_bldg_pge_2013_2019.rds")
bldg_pge_by_year <- read_rds("94108_bldg_pge_2013_2019.rds")
#TODO
ggplot(
  bldg_pge_by_year, 
  aes(
    x = as.factor(YEAR), 
    y = TOTALKBTU/1000000
  )
) + 
  geom_bar(stat = "identity", aes(fill = ENERGYTYPE), position = "dodge") + 
  labs(x = "Year", y = "GBTU", title = "94108 Annual Energy Usage, 2013 to 2019") + 
  scale_fill_discrete(name="Energy Type",labels = c("Electricity","Gas"))

2.1 building energy consumption by resident

#get population data in ZIP=94108 2013-2019
#DATA: acs5
# names(ca_cbgs_pop) #pop data by year, cbgs levels

resident_pop_by_year <- 
  ca_cbgs_pop %>%
  filter(
    census_block_group 
    %in% substr(zip_blocks$GEOID10,1,12)) %>%
  group_by(YEAR = year) %>%
  summarize(
    resident_pop = sum(pop)
  )
saveRDS(resident_pop_by_year, "94108_resident_pop_by_year.rds")

2.2 building energy consumption by job

#get job counts data from LODES WAC in ZIP=94108 2013-2019
t <- read_csv("/Volumes/GoogleDrive/My Drive/Data Library/LODES/ca_wac_S000_JT00_2013.csv")

job_cnts <- 2013:2019 %>% 
  map_dfr(function(year){

    print(year)

    temp <- read_csv(paste0("/Volumes/GoogleDrive/My Drive/Data Library/LODES/ca_wac_S000_JT00_", year, ".csv")) %>%
      filter(w_geocode %in% zip_blocks$GEOID10) %>%
      mutate(
        year = year,
        job_cnts = C000
      ) %>%
      select(
        w_geocode,
        year,
        job_cnts
      )

    saveRDS(temp, paste0("94108_wac_", year, ".rds"))

    return(temp)

  }) %>%
  group_by(YEAR = year) %>%
  summarize(
    job_cnts=sum(job_cnts)
  )

saveRDS(job_cnts, "94108_job_total_by_year.rds")
job_cnts <- read_rds("94108_job_total_by_year.rds")
res_pop <- read_rds("94108_resident_pop_by_year.rds")
names(job_cnts)

bldg_energy <- bldg_pge_by_year %>%
  ungroup() %>%
  left_join(job_cnts) %>%
  left_join(res_pop) %>%
  select(-ZIPCODE) %>%
  mutate(
    intensity = case_when(
      BUILDINGTYPE == "C" ~ TOTALKBTU/job_cnts,
      BUILDINGTYPE == "R" ~ TOTALKBTU/resident_pop
    )
  )

saveRDS(bldg_energy, "94108_bldg_energy_by_year.rds")
bldg_energy <- read_rds("94108_bldg_energy_by_year.rds")
bldg_energy %>% 
  ggplot(
    aes(
      x = YEAR,
      y = intensity
    )
  ) + 
  geom_line(
    aes(
      color = BUILDINGTYPE,
      linetype = ENERGYTYPE
    ),
    size = 1
  ) +
  labs(x = "Year", y = "kBTUs per resident or job", title = "ZIPCODE 94108 Annual Building Energy Use Intensity, 2013 to 2019", color ="Use Type", linetype = "Energy Type") +
  scale_linetype_manual(values = c("solid","dotted"), labels = c("Electricity","Gas"))
cdd <- read_csv("CDD_SF_2013_2019.csv")
hdd <- read_csv("HDD_SF_2013_2019.csv")

HDD_CDD <- data.frame(
  YEAR = c(2013:2019),
  CDD = c(9,10,17,7,5,27,2),
  HDD = c(91,79,80,100,106,80,103)
)

bldg_energy_normalized <- bldg_energy %>%
  left_join(HDD_CDD) %>%
  mutate(
    intensity_normalized = case_when(
      ENERGYTYPE == "E" ~ intensity/CDD,
      ENERGYTYPE == "G" ~ intensity/HDD
    )
  )

saveRDS(bldg_energy_normalized, "94108_bldg_energy_normalized.rds")