storage_graph<- ggplot()+
geom_line(storage_year_graphing, mapping = aes(x = all_years, y = value, color = "Average Annual Storage") ) +
geom_line(oct_matrix, mapping = aes(x = all_years, y = start_value, color = "Starting Water Year Storage"))+
scale_x_continuous(limits = c(1980, 2019))+
scale_color_manual(name = "Storage Type", values = c( "#DD1C77", "#2B8CBE"))+
ylab("Storage (MCM)")+
xlab("Year")+
ggtitle(graph_title2)+
theme_classic()
#### Calculate Normalized Values ####
normalized_matrix$normalized <- normalized_matrix$value/normalized_matrix$ResOpsUS
##### Plot Final Normalized Values
graph_title3 <- paste0("HUC",huc2, " Fraction of Storage Filled in Dataset (Observed Historical Storage Over Storage Capacity) ")
norm_storage_graph<- ggplot( ) +
geom_line(normalized_matrix, mapping = aes(x = all_years, y = normalized))+
scale_x_continuous(limits = c(1950,2020))+
#geom_line(drought_indices, mapping = aes(x = all_years, y = SPI, color = "red"))+
ylab("Fraction Filled")+
xlab("Year")+
ggtitle(graph_title3)+
theme_classic()+
theme(legend.position = "none") # turns legend off
###### Pull out SPI and SPEI by HUC2 #####
drought_index <- huc2+1
SPI_huc <- SPI[,c(1,drought_index)]
colnames(SPI_huc) <- c("all_years", "SPI")
SPEI_huc <- SPI[,c(1,drought_index)]
colnames(SPEI_huc) <- c("all_years", "SPEI")
###### Add in Drought Periods to See Trends ######
drought_indices <- merge(SPI_huc, SPEI_huc, by = "all_years", all = TRUE)
normalized_final <- merge(normalized_matrix, drought_indices, by = "all_years", all = TRUE)
###### MAKE DAILY AVERAGES CSV ###########
final_daily <-rowSums(interpolated[,2:ncol(interpolated)], na.rm=TRUE) *NA^!rowSums(!is.na(interpolated[,2:ncol(interpolated)]))
final_daily_matrix <- cbind(all_huc2$date, final_daily)
colnames(final_daily_matrix) <- c("date", "storage")
all_years <- year(as.Date(final_daily_matrix[,1]))
months<- month(final_daily_matrix[,1])
final_daily_matrix <- cbind(final_daily_matrix, all_years, months)
year_mon <- paste0(final_daily_matrix[,3], "-", final_daily_matrix[,4])
final_daily_matrix <- cbind(final_daily_matrix, year_mon)
final_daily_matrix <- as.data.frame(final_daily_matrix, stringsAsFactors = F)
final_daily_matrix$storage <- as.numeric(final_daily_matrix$storage)
# get storage capacity chart in the right area
row_sum_stor_cap <- rowSums(stor_capacity_daily[,2:ncol(stor_capacity_daily)],na.rm= T)
row_sum_stor_cap[row_sum_stor_cap =="0"] <- NA
capacity_matrix <- data.frame("date" =stor_capacity_daily$date, "ResOpsUS" =row_sum_stor_cap)
final_daily_print <- merge(final_daily_matrix, capacity_matrix, by = "date")
final_daily_print$fraction<- as.numeric(final_daily_print$storage)/as.numeric(final_daily_print$ResOpsUS)
### Create Monthly Dataset ######
#final_daily_matrix[,1]<- as.Date(final_daily_matrix[,1])
month_data2 <- final_daily_print %>% group_by(year_mon) %>%
summarise(storage = mean(as.numeric(storage), na.rm = T),
ResOpsUS = mean(as.numeric(ResOpsUS))) %>%
as.data.frame()
month_data2$all_years <- year(as.Date(paste0(month_data2$year_mon,"-01")))
month_data2$normalized <- month_data2$storage/month_data2$ResOpsUS
### EXTRA STUFF #####
extra_graph<- ggplot(normalized_final , mapping = aes(x = SPEI, y = normalized)) +
geom_point()+
geom_abline(slope = 1, intercept = 0, color = "brown", linetype= "dashed")+
ylab("Storage (MCM)")+
scale_x_continuous(limits = c(1950, 2020))+
geom_text(aes(label = all_years), check_overlap = T)+
xlab("SPEI")+
ggtitle("Storage to Drought Metric")+
theme_classic()+
theme(legend.position = "none") # turns legend off
cor(normalized_final$SPI, normalized_final$normalized, use="complete.obs")
###### Make Panel Plot #####
total_plot <- grid.arrange(storage_cap_graph, fraction_filled_graph, norm_storage_graph, storage_graph,   nrow = 2, ncol =2)
setwd("~/Desktop/Paper_2/HUC2 Water Year and Monthly Averages /graphs")
file_name_all <- paste0("HUC", huc2, "storage_plots_start_end_resopsus.jpeg")
ggsave(file = file_name_all, total_plot)
setwd("~/Desktop/Paper_2/HUC2 Water Year and Monthly Averages /final_data")
data_file <- paste0("HUC", huc2, "averages.csv")
write.csv(normalized_final, file = data_file, row.names = F )
data_file2 <- paste0("HUC", huc2, "monthly_averages.csv")
write.csv(month_data2, file = data_file2, row.names = F )
data_file3 <- paste0("HUC", huc2, "daily_averages.csv")
write.csv(final_daily_print, file = data_file3, row.names = FALSE)
}#for (l in 1:length(huc2_list))
test <-  which(all_huc2$date == "date")
which(all_huc2$date == "date")
all_huc2$date
huc2 <- huc2_list[m] # pull out the huc2 number
print(huc2)
all_dams <- HUC2 %>% filter(join_HUC2 ==huc2)
filtered_res_att <- huc2_res %>% filter(join_HUC2_db == huc2)# filter out all the dams that are in this huc2
dam_ids <- filtered_res_att$DAM_ID # all the dam ID's
###### Combine all storage records into one file (column names = dam name, row names = date) #####
for (l in 1:length(dam_ids)) { # loop through the dam_ids to pull all the data together
dam <- dam_ids[l]
file_name <- paste0("ResOpsUS_",dam, ".csv")
setwd("~/Desktop/ResOpsUS/time_series_all")
res_data <- read.csv(file = file_name, stringsAsFactors = FALSE)
print(dam_ids[l])
#print(head(res_data$date))
if (is.na(sum(res_data$storage, na.rm =T)) == FALSE){
#print(l)
# replace maximum values with maximum storage capacity
max_cap_index <- which(dam_ids[l] == filtered_res_att$DAM_ID)
max_cap <- filtered_res_att[max_cap_index, 10]
res_data$storage[res_data$storage > max_cap] <- max_cap
# replace negative values with NA
res_data$storage[res_data$storage < 0] <- NA
}
colnames(res_data) <- c("date", paste0(dam_ids[l], "_value"), "inflow", "outflow", "elevation", "evaporation")
res_data$date <- as.Date(res_data$date)
res_data[,2] <- as.numeric(res_data[,2])
if(l ==1){
all_huc2 <- res_data[,1:2]
#print(length(all_huc2$date))
}else{
all_huc2 <- merge(all_huc2, res_data[,1:2], by = "date", all= T)
#print(length(all_huc2$date))
}# (l ==1)
} # for (l in 1:length(dam_ids))
all_huc2[all_huc2 < 0] <- NA
#### Get Data Ready to Graph Storage Cap over time ######
all_years1 <- unique(sort(all_dams$YEAR_CREATED))
all_years <- seq(all_years1[1], 2021, by =1)
graphing_panel1 <- data.frame(all_years)
graphing_panel1$storage_cap_data <- NA
graphing_panel1$storage_cap_GRanD <- NA
graphing_color <- mycolors1[huc2]
###### Make Storage Cap Not NA for Dams We Have Data For (IE Fill in Gaps in GRanD) #######
for (p in 1:nrow(all_dams)){
dam_id <- as.numeric(all_dams[p, 1])
col_index <- which(grepl(dam_id, colnames(all_huc2)) == TRUE)
if ((is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0) && length(col_index)>0 ){ # put maximum storage is for the storage capacity in all dams
max_stor <- max(all_huc2[,col_index], na.rm = TRUE)
if(max_stor <0){
all_dams[p,13] <- all_dams[p,12] # replace it with the normal storage so there isn't a gap
}else{
all_dams[p,13] <- max_stor
}#}else{
}#if ((is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0) && length(col_index)>0 ){
if (is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0  ){
all_dams[p,13] <- all_dams[p,12]
}#if (is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0  ){
}
print("Finished Storage Capacity")
View(all_huc2)
l
l=1
dam <- dam_ids[l]
file_name <- paste0("ResOpsUS_",dam, ".csv")
setwd("~/Desktop/ResOpsUS/time_series_all")
res_data <- read.csv(file = file_name, stringsAsFactors = FALSE)
print(dam_ids[l])
head(res_data)
#print(l)
# replace maximum values with maximum storage capacity
max_cap_index <- which(dam_ids[l] == filtered_res_att$DAM_ID)
max_cap <- filtered_res_att[max_cap_index, 10]
res_data$storage[res_data$storage > max_cap] <- max_cap
# replace negative values with NA
res_data$storage[res_data$storage < 0] <- NA
colnames(res_data) <- c("date", paste0(dam_ids[l], "_value"), "inflow", "outflow", "elevation", "evaporation")
res_data$date <- as.Date(res_data$date)
res_data[,2] <- as.numeric(res_data[,2])
all_huc2 <- res_data[,1:2]
head(all_huc2)
l=5
dam <- dam_ids[l]
file_name <- paste0("ResOpsUS_",dam, ".csv")
setwd("~/Desktop/ResOpsUS/time_series_all")
res_data <- read.csv(file = file_name, stringsAsFactors = FALSE)
print(dam_ids[l])
#print(head(res_data$date))
if (is.na(sum(res_data$storage, na.rm =T)) == FALSE){
#print(l)
# replace maximum values with maximum storage capacity
max_cap_index <- which(dam_ids[l] == filtered_res_att$DAM_ID)
max_cap <- filtered_res_att[max_cap_index, 10]
res_data$storage[res_data$storage > max_cap] <- max_cap
# replace negative values with NA
res_data$storage[res_data$storage < 0] <- NA
}
colnames(res_data) <- c("date", paste0(dam_ids[l], "_value"), "inflow", "outflow", "elevation", "evaporation")
res_data$date <- as.Date(res_data$date)
res_data[,2] <- as.numeric(res_data[,2])
all_huc2 <- merge(all_huc2, res_data[,1:2], by = "date", all= T)
head(all_huc2)
for (l in 1:length(dam_ids)) { # loop through the dam_ids to pull all the data together
dam <- dam_ids[l]
file_name <- paste0("ResOpsUS_",dam, ".csv")
setwd("~/Desktop/ResOpsUS/time_series_all")
res_data <- read.csv(file = file_name, stringsAsFactors = FALSE)
print(dam_ids[l])
#print(head(res_data$date))
if (is.na(sum(res_data$storage, na.rm =T)) == FALSE){
#print(l)
# replace maximum values with maximum storage capacity
max_cap_index <- which(dam_ids[l] == filtered_res_att$DAM_ID)
max_cap <- filtered_res_att[max_cap_index, 10]
res_data$storage[res_data$storage > max_cap] <- max_cap
# replace negative values with NA
res_data$storage[res_data$storage < 0] <- NA
}
colnames(res_data) <- c("date", paste0(dam_ids[l], "_value"), "inflow", "outflow", "elevation", "evaporation")
res_data$date <- as.Date(res_data$date)
res_data[,2] <- as.numeric(res_data[,2])
if(l ==1){
all_huc2 <- res_data[,1:2]
#print(length(all_huc2$date))
}else{
all_huc2 <- merge(all_huc2, res_data[,1:2], by = "date", all= T)
#print(length(all_huc2$date))
}# (l ==1)
} # for (l in 1:length(dam_ids))
all_huc2[all_huc2 < 0]
all_huc2[all_huc2 < 0] <- NA
test <-  which(class(all_huc2$date) == "date")
test
all_years1 <- unique(sort(all_dams$YEAR_CREATED))
all_years <- seq(all_years1[1], 2021, by =1)
graphing_panel1 <- data.frame(all_years)
graphing_panel1$storage_cap_data <- NA
graphing_panel1$storage_cap_GRanD <- NA
graphing_color <- mycolors1[huc2]
###### Make Storage Cap Not NA for Dams We Have Data For (IE Fill in Gaps in GRanD) #######
for (p in 1:nrow(all_dams)){
dam_id <- as.numeric(all_dams[p, 1])
col_index <- which(grepl(dam_id, colnames(all_huc2)) == TRUE)
if ((is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0) && length(col_index)>0 ){ # put maximum storage is for the storage capacity in all dams
max_stor <- max(all_huc2[,col_index], na.rm = TRUE)
if(max_stor <0){
all_dams[p,13] <- all_dams[p,12] # replace it with the normal storage so there isn't a gap
}else{
all_dams[p,13] <- max_stor
}#}else{
}#if ((is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0) && length(col_index)>0 ){
if (is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0  ){
all_dams[p,13] <- all_dams[p,12]
}#if (is.na(all_dams[p, 13]) == TRUE| all_dams[p,13] ==0  ){
}
test <-  which(class(all_huc2$date) == "date")
if (length(test) >0){
all_huc2[test,] = NA
}
###### Interpolate data #######
interpolated <- all_huc2 %>%
#group_by(date) %>% # use this is you have multiple entries in one data frame (ie multiple hucs)
mutate_at(vars(matches("_value")), na.approx, na.rm=FALSE) %>%  # mutate at all the variables that have _value in their name, use na.approximate function and don't remove NA's
as.data.frame() # make the output a data frame
###### Pull out all start Dates for the Dams#####
start_date_matrix<- as.data.frame(dam_ids)
start_date_matrix$start <- NA
start_date_matrix$end_date <- NA
for (j in 1:length(dam_ids)){
dam_id <- dam_ids[j] # pull out first dam_id
start_year <- filtered_res_att[j,11] # pull out the built year
column_index <- which(grepl(dam_id, colnames(all_huc2)) == TRUE)
# determines the first instance of number in dataset
counter =1
while (is.na(all_huc2[counter,column_index]) == TRUE && counter <= nrow(all_huc2)){
counter = counter +1 # counter will tell us what the row number is
}
date_start <- (all_huc2[counter, 1])
start_date_matrix[j,2] <- year(date_start)
inverse_counter =nrow(all_huc2)
#finds the end dates for the data
while (is.na(all_huc2[inverse_counter,column_index]) == TRUE && inverse_counter >0){
inverse_counter = inverse_counter -1 # counter will tell us what the row number is
}
# find ending date
date_end <- (all_huc2[inverse_counter, 1])
if (length(date_end) ==0){
start_date_matrix[j,3] <- NA
}else{
start_date_matrix[j,3] <- year(date_end) # add end date to the dates matrix
}
# don't want this-- should be okay with our analysis
# if(is.na(date_start)== TRUE ){
#   percent_covered <- NA
# }else{
#   total_number_records <- length(seq.Date(as.Date(date_start), as.Date(date_end), by = "day"))
#   non_na_records <- total_number_records - sum(is.na(all_huc2[counter:inverse_counter,column_index])) # sum gives the total number of NA
#   percentage_covered <- non_na_records/total_number_records*100
# }
#
# if (percentage_covered <50 && is.na(percentage_covered) == FALSE){ # if less than 50% record will be made NA and not interpolated
#   all_huc2[,column_index] <- NA
# }#(percentage_covered <50)
}#  for (j in 1:length(dam_ids))
colnames(start_date_matrix) <- c("DAM_ID", "start", "end_date")
start_date_matrix <- merge(start_date_matrix, all_dams[,c(1,13)], by ="DAM_ID")
years_using1 <- unique(sort(start_date_matrix$start))
years_using <-seq(years_using1[1],2021, by =1)
###### Fix Dates so that the Start Date in Data is NEVER Before the Start Date in GRanD ######
for (p in 1:length(start_date_matrix$DAM_ID)){
dam_id <- start_date_matrix[p,1]
itemp <- which(dam_id == all_dams$DAM_ID)
year_GRanD <- all_dams[itemp, 29]
year_data <- start_date_matrix[p,2]
if (is.na(year_data) == TRUE){ # if there is no storage data then skip this one because we will have to trust GRanD's build date
next
}
if (year_data < year_GRanD| is.na(year_GRanD) == TRUE){
all_dams[itemp, 29] <- year_data
}
}
print("Finished Dates")
#### Loop through and calculate the Total Storage cap for each dam in the HUC2 in GRanD  #####
for (k in 1:length(all_years)){
year_1 <- all_years[k]
stor_cap_index <- which( as.numeric(all_dams$YEAR_CREATED)== year_1 )
previous <- k-1
if(length(stor_cap_index) ==0){ # if no year exists please use the previous storage capacity
#print(k)
storage_cap = graphing_panel1[previous,3]
}
values <- c()
if (length(stor_cap_index) ==1){ # if this is equal to one then the storage cap is just that number
storage_cap <- as.numeric(all_dams[stor_cap_index,13])
}
if(length(stor_cap_index)>1){ # if you have more than one matching year in this huc6 , loop through to calculate
for (p in 1:length(stor_cap_index)){
index <- stor_cap_index[p]
storage_cap <- as.numeric(all_dams[index,13])
values <- append(values,storage_cap)
}#  for (p in 1:length(stor_cap_index))
storage_cap <- sum(values, na.rm =TRUE)
}#if (length(stor_cap_index) ==1)
# sum the averages as we move forward
if (k ==1){
graphing_panel1[k,3] <- storage_cap
}#(k ==1){
if (k >1 && length(stor_cap_index) >0){
graphing_panel1[k,3] <- sum(storage_cap, as.numeric(graphing_panel1[previous,3]), na.rm = TRUE)
}
if (k>1 && length(stor_cap_index) == 0){
graphing_panel1[k,3] <- storage_cap
}
}
print("Storage Capacity Graph Done")
stor_capacity_daily <- matrix(data = NA, nrow= nrow(all_huc2), ncol= ncol(all_huc2))
stor_capacity_daily <- as.data.frame(stor_capacity_daily)
colnames(stor_capacity_daily) <- c("date", dam_ids)
stor_capacity_daily[,1] <- all_huc2[,1]
col_name_storcap <- colnames(stor_capacity_daily)
for(l in 2:ncol(stor_capacity_daily)){ # loop through dam ids and give index for adding to storage capacity table
for(o in 1:nrow(stor_capacity_daily)){ # loop through rows to  addstorage capacity
if(is.na(interpolated[o,l]) == TRUE){ # if storage is negative then, ignore it
next
}else{# if storage exist then pull storage capacity from GRanD
dam <- col_name_storcap[l]
dam_index <- which(as.numeric(dam) == all_dams$DAM_ID)
stor_cap <- all_dams[dam_index, 13] # pull out storage capacity
stor_capacity_daily[o,l] <- stor_cap
}
}
}
##### Loop through and Calculate the Storage Capacity in ResOpsUS ####
itemp = which(stor_capacity_daily$date == "date")
if (length(itemp >0)){
stor_capacity_daily[itemp,] <- NA
}
for (k in 1:length(years_using)){
year <- years_using[k]
stor_cap_daily_filter <- stor_capacity_daily %>% filter(year(date) == year)
stor_cap_year_col <- colMeans(stor_cap_daily_filter[,2:ncol(stor_cap_daily_filter)], na.rm = T)
storage_capacity <- sum(stor_cap_year_col, na.rm = T)
# find year index for graphing_panel1
year_index <- which(graphing_panel1$all_years == year)
graphing_panel1[year_index,2] <- storage_capacity # plug storage capacity into the data frame
}#(k in 1:length(all_years))
#####Graph all the storage capacity over time#######
graphing_panel1$all_years<- as.Date(paste( graphing_panel1$all_years, "-01-01", sep=""))
colnames(graphing_panel1) <- c("all_years", "ResOpsUS", "GRanD")
graphing_panel1$fraction <- graphing_panel1$ResOpsUS/graphing_panel1$GRanD
graphing_panel1_reshape <- melt(graphing_panel1[,c(1:3)],id.vars = "all_years")
graph_title1 <- paste0("HUC",huc2, " Total Storage Capacity")
capacity_colors <- c(mycolors1, "black", "grey")
storage_cap_graph <- ggplot(graphing_panel1_reshape, mapping = aes(x = all_years, y = value,group = variable)) +
geom_line(aes(linetype = variable))+
scale_x_date(limits = c(as.Date("1960-01-01"), as.Date("2020-01-01")))+
ylab("Storage Capacity (MCM)")+
scale_linetype_manual("Data Source", values = c( 1, 2))+
xlab("Year")+
ggtitle(graph_title1)+
theme_classic()
print("Made Storage Capacity Graph")
###### Calculate the Water Year Averages ######
year_list <- unique(year(interpolated$date))
storage_year <- matrix(data = NA, ncol = 2, nrow = length(year_list))
storage_year[,2] <- year_list
# Aggregate Data by Water Year
for (o in 2:length(year_list)){
current_year <- year_list[o]
past_year <- current_year -1
start_index <- which(year(interpolated$date) ==past_year & month(interpolated$date) == 10 & day (interpolated$date) ==1)
if (length(start_index) == 0){
next
}
if (length(start_index) ==0){
next
}
last_index <- which(year(interpolated$date) ==current_year & month(interpolated$date) == 9 & day(interpolated$date) == 30)
if (length(last_index) ==0){
next
}
water_year_matrix <- interpolated[start_index:last_index,]
water_year_matrix$date <- NA
stor_averages <- colMeans(water_year_matrix, na.rm = TRUE)
average <- sum(stor_averages, na.rm = TRUE)
storage_year[o,1] <- average
}
print("Made Water Year Averages")
storage_year_graphing <- as.data.frame(storage_year, stringsAsFactors = FALSE)
colnames(storage_year_graphing) <- c("value", "all_years")
##### Normalizing Storage Over Time And Graphing #########
graphing_panel1$all_years<- year(graphing_panel1$all_years)
normalized_matrix <- merge(storage_year_graphing, graphing_panel1, by = "all_years", all = TRUE )
graph_titleff <- paste0("HUC", huc2," Fraction of Storage Capacity Covered")
fraction_filled_graph <- ggplot()+
geom_line(graphing_panel1, mapping =aes(x = all_years, y = fraction))+
scale_x_continuous(limits = c(1950, 2020))+
scale_y_continuous(limits= c(0,1))+
xlab("Year")+
ylab("Fraction Covered")+
ggtitle(graph_titleff)+
theme_classic()
####### Pull Out Storage At Start of Water Year #########
oct_matrix <- matrix(data = NA, ncol = 2, nrow = length(year_list))
oct_matrix[,2] <- year_list
# Aggregate Data by Water Year
for (q in 1:length(year_list)){
current_year <- year_list[q]
start_index <- which(year(interpolated$date) ==current_year & month(interpolated$date) == 10 & day (interpolated$date) ==1)
if (length(start_index) == 0){
next
}
if (length(start_index) ==0){
next
}
oct_all <- interpolated[start_index,]
oct_all$date <- NA
stor_average<- rowSums(oct_all, na.rm = TRUE)
oct_matrix[q,1] <- stor_average
}
oct_matrix <- as.data.frame(oct_matrix, stringsAsFactors = FALSE)
colnames(oct_matrix) <- c("start_value", "all_years")
##### Graphing Water Year Averages and Storage At Start of Water Year ######
graph_title2 <- paste0("HUC",huc2, "  Water Year Averages")
storage_graph<- ggplot()+
geom_line(storage_year_graphing, mapping = aes(x = all_years, y = value, color = "Average Annual Storage") ) +
geom_line(oct_matrix, mapping = aes(x = all_years, y = start_value, color = "Starting Water Year Storage"))+
scale_x_continuous(limits = c(1980, 2019))+
scale_color_manual(name = "Storage Type", values = c( "#DD1C77", "#2B8CBE"))+
ylab("Storage (MCM)")+
xlab("Year")+
ggtitle(graph_title2)+
theme_classic()
#### Calculate Normalized Values ####
normalized_matrix$normalized <- normalized_matrix$value/normalized_matrix$ResOpsUS
graph_title3 <- paste0("HUC",huc2, " Fraction of Storage Filled in Dataset (Observed Historical Storage Over Storage Capacity) ")
norm_storage_graph<- ggplot( ) +
geom_line(normalized_matrix, mapping = aes(x = all_years, y = normalized))+
scale_x_continuous(limits = c(1950,2020))+
#geom_line(drought_indices, mapping = aes(x = all_years, y = SPI, color = "red"))+
ylab("Fraction Filled")+
xlab("Year")+
ggtitle(graph_title3)+
theme_classic()+
theme(legend.position = "none") # turns legend off
###### Pull out SPI and SPEI by HUC2 #####
drought_index <- huc2+1
SPI_huc <- SPI[,c(1,drought_index)]
colnames(SPI_huc) <- c("all_years", "SPI")
SPEI_huc <- SPI[,c(1,drought_index)]
colnames(SPEI_huc) <- c("all_years", "SPEI")
###### Add in Drought Periods to See Trends ######
drought_indices <- merge(SPI_huc, SPEI_huc, by = "all_years", all = TRUE)
normalized_final <- merge(normalized_matrix, drought_indices, by = "all_years", all = TRUE)
###### MAKE DAILY AVERAGES CSV ###########
final_daily <-rowSums(interpolated[,2:ncol(interpolated)], na.rm=TRUE) *NA^!rowSums(!is.na(interpolated[,2:ncol(interpolated)]))
final_daily_matrix <- cbind(all_huc2$date, final_daily)
colnames(final_daily_matrix) <- c("date", "storage")
all_years <- year(as.Date(final_daily_matrix[,1]))
months<- month(final_daily_matrix[,1])
final_daily_matrix <- cbind(final_daily_matrix, all_years, months)
year_mon <- paste0(final_daily_matrix[,3], "-", final_daily_matrix[,4])
final_daily_matrix <- cbind(final_daily_matrix, year_mon)
final_daily_matrix <- as.data.frame(final_daily_matrix, stringsAsFactors = F)
final_daily_matrix$storage <- as.numeric(final_daily_matrix$storage)
# get storage capacity chart in the right area
row_sum_stor_cap <- rowSums(stor_capacity_daily[,2:ncol(stor_capacity_daily)],na.rm= T)
row_sum_stor_cap[row_sum_stor_cap =="0"] <- NA
capacity_matrix <- data.frame("date" =stor_capacity_daily$date, "ResOpsUS" =row_sum_stor_cap)
final_daily_print <- merge(final_daily_matrix, capacity_matrix, by = "date")
final_daily_print$fraction<- as.numeric(final_daily_print$storage)/as.numeric(final_daily_print$ResOpsUS)
month_data2 <- final_daily_print %>% group_by(year_mon) %>%
summarise(storage = mean(as.numeric(storage), na.rm = T),
ResOpsUS = mean(as.numeric(ResOpsUS))) %>%
as.data.frame()
month_data2$all_years <- year(as.Date(paste0(month_data2$year_mon,"-01")))
month_data2$normalized <- month_data2$storage/month_data2$ResOpsUS
extra_graph<- ggplot(normalized_final , mapping = aes(x = SPEI, y = normalized)) +
geom_point()+
geom_abline(slope = 1, intercept = 0, color = "brown", linetype= "dashed")+
ylab("Storage (MCM)")+
scale_x_continuous(limits = c(1950, 2020))+
geom_text(aes(label = all_years), check_overlap = T)+
xlab("SPEI")+
ggtitle("Storage to Drought Metric")+
theme_classic()+
theme(legend.position = "none") # turns legend off
cor(normalized_final$SPI, normalized_final$normalized, use="complete.obs")
###### Make Panel Plot #####
total_plot <- grid.arrange(storage_cap_graph, fraction_filled_graph, norm_storage_graph, storage_graph,   nrow = 2, ncol =2)
