#same work as 25strikezone-nostance.R, but using both 24 and 25 seasons to draw next to each other
#no other differences

#load packages
library(tidyr)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggridges) #theme package
library(mgcv) #run bam model
library(lubridate)
library(av)
library(gifski)
library(gganimate)



#load a lot of data
fulls25 <- read.csv('data/statcast2025.csv')
fulls24 <- read.csv('data/statcast2024.csv')



#select only variables and rows that i need from both years
#only need data where the umpire called it to see how they behave
s25 <- fulls25 |> 
  filter(
    description %in% c("ball", "called_strike")) |> 
  mutate(is_strike = ifelse(description == "called_strike", 1, 0),
         game_month = month(game_date),
         plate_x = ifelse(stand == 'L', -plate_x, plate_x)) |>
  select(game_month, plate_x, plate_z, stand, is_strike) |> 
  drop_na()

s24 <- fulls24 |> 
  filter(
    description %in% c("ball", "called_strike")) |> 
  mutate(is_strike = ifelse(description == "called_strike", 1, 0),
         game_month = month(game_date),
         plate_x = ifelse(stand == 'L', -plate_x, plate_x)) |>
  select(game_month, plate_x, plate_z, stand, is_strike) |> 
  drop_na()


#add a constant year column to each dataset before combining
#allows me to facet by year
s25$year <- as.factor(2025)
s24$year <- as.factor(2024)


#probably create one dataset with both years
#allows me to facet by the year to get 24 and 25 next to each other
#double facet with year and stand as well if they are in same table
both <- rbind(s24,s25)


#values and functions for set strike zone and plate
fulls25 <- fulls25 |>
  select(sz_top, sz_bot) |> 
  drop_na()

fulls24 <- fulls24 |> 
  select(sz_top, sz_bot) |> 
  drop_na()

strikezone <- rbind(fulls25, fulls24)

top <- mean(strikezone$sz_top)
bot <- mean(strikezone$sz_bot)



#view number of observations in year_25 and 24 to see if months need to be dropped
both |> 
  group_by(game_month, stand, year) |> 
  summarise(
    pitch_count = n(),
    strike_rate = mean(is_strike),
    .groups = "drop"
  ) |> 
  print(n=36)

#drop 10 and 11 - significantly less than other months and not enough to run BAM model
both <- both |> 
  filter(game_month != 10, game_month != 11)


#############################################
#running model and predictions


#nest the data by month
#run a bam() model for each
monthly_modeling <- both |> 
  group_by(year, game_month) |>  #group by each year and month - need model for each
  nest() |> 
  mutate(model = map(data, ~bam(
    is_strike ~ te(plate_x, plate_z),
    data = .x,
    family=binomial,
    discrete = TRUE
  )))


#create a grid that the predictions will be used against
#50x50 grid with different coordinates around the strike zone
grid <- expand.grid(
  plate_x = seq(-1.5, 1.5, length.out = 50),
  plate_z = seq(1, 4, length.out = 50)
)



# loop through each row of the data
for(i in 1:nrow(monthly_modeling)) {
  month_val <- monthly_modeling$game_month[i]
  year_val  <- monthly_modeling$year[i]
  current_model <- monthly_modeling$model[[i]]
  
  #column name ex:month_4
  col_name <- paste0("pred_", month_val, "_", year_val)
  
  #create predictions and add the probabilty of strike to the grid
  grid[[col_name]] <- predict(
    current_model, 
    newdata = grid, 
    type = "response"
  )
}


#turn grid into long form so there is one month column and one probability column
grid_long <- grid |>
  pivot_longer(
    cols = starts_with("pred_"), 
    names_to = c("month", "year"), 
    names_sep = "_",
    names_prefix = "pred_",
    values_to = "probability"
  ) |>
  mutate(month = month.name[as.numeric(month)],
         month = factor(month, levels = month.name),
         year = as.numeric(year))


grid_long <- grid_long |> 
  mutate(year = as.factor(year))



###########################################
#VISUALS
###########################################
#drawing the plate on the visual
geom_plate <- function(){
  df <- data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
  g <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "gray60", linewidth = 1.25)
  g
}



#visual comparing 2024 and 2025
y <- ggplot(grid_long, aes(plate_x, plate_z, z = probability, color = month)) +
  geom_contour(breaks = 0.5, linewidth = 1.1) +  #drawing a line where the probability of a strike is 50%
  geom_rect(xmin = -0.7083, xmax = 0.7083, ymin = bot, ymax = top, 
            fill = NA, color = "black", alpha = 0.3, inherit.aes = FALSE) +  #"set" strike zone 
  scale_color_brewer(palette = "Blues") +
  labs(title = "'Actual' Strike Zone each Month in 2024 and 2025 from Umpire's Perspective", 
       subtitle = "Compared to 'Set' Strike Zone",
       x = "Horizontal Location of Pitch",
       y = "Vertical Location of Pitch",
       color = "Month"
  ) +
  facet_wrap(~year) + #separate visuals for 24 and 25 seasons
  coord_fixed() +
  theme_minimal()


#further edits from here
#tidy up the visual
y <- y + theme(plot.title = element_text(hjust = 0.5, vjust = 0.5, size = 14), 
               plot.subtitle = element_text(hjust = 0.5, vjust = 0.5, size = 12,
                                            margin = margin(b=20, unit="pt")),
               axis.title.x = element_text(margin = margin(t=15, unit="pt")),
               axis.title.y = element_text(margin = margin(r=15, unit="pt")),
               strip.text = element_text(size = 10, color = 'black'), 
               panel.spacing = unit(2, "lines"))  #add space between facet wraps

#view static visual
y





#visual broken up by year AND stand
p <- ggplot(grid_long, aes(plate_x, plate_z, z = probability, color = month)) +
  geom_contour(aes(color = as.factor(month)), breaks = 0.5, linewidth = 1.1) +  #drawing a line where the probability of a strike is 50%
  geom_rect(xmin = -0.7083, xmax = 0.7083, ymin = bot, ymax = top, 
            fill = NA, color = "black", alpha = 0.3, inherit.aes = FALSE) +  #"set" strike zone 
  scale_color_brewer(palette = "Blues") +
  labs(title = "'Actual' Strike Zone each Month in 2024 and 2025", 
       subtitle = "Compared to 'Set' Strike Zone for Left vs Right-Handed Batters",
       x = "Horizontal Location of Pitch",
       y = "Vertical Location of Pitch",
       color = "Month"
  ) +
  facet_wrap(year~stand) + #separate visuals for left and right handed batters and year
  coord_fixed() +
  theme_minimal()


#further edits from here
p <- p + theme(plot.title = element_text(hjust = 0.5, vjust = 0.5, size = 14), 
               plot.subtitle = element_text(hjust = 0.5, vjust = 0.5, size = 12,
                                            margin = margin(b=20, unit="pt")),
               axis.title.x = element_text(margin = margin(t=15, unit="pt")),
               axis.title.y = element_text(margin = margin(r=15, unit="pt")),
               strip.text = element_text(size = 10, color = 'black'), 
               panel.spacing = unit(2, "lines"))  #add space between facet wraps

#view static visual
p




#24vs25 gif
#create a gif of the strike zone each month with the actual zone being placed on top of the others
library(av)
library(gifski)
library(gganimate)

#set the order of how months will appear on visual
month_order <- c("March", "April", "May", "June", "July", "August", "September")

grid_cleaned <- grid_long %>%
  group_by(year, month, plate_x, plate_z) %>%
  summarise(probability = mean(probability, na.rm = TRUE), .groups = "drop") %>%
  mutate(month = factor(month, levels = month_order))


gif25 <- 
  ggplot(grid_cleaned, aes(plate_x, plate_z, z = probability, color = month, group = month)) +
  geom_rect(xmin = -0.7083, xmax = 0.7083, ymin = bot, ymax = top, 
            fill = NA, color = "black", alpha = 0.3, inherit.aes = FALSE) +  #"set" strike zone 
  geom_contour(breaks = 0.5, linewidth = 1.1) +
  scale_color_brewer(palette = "Blues") +
  facet_wrap(~year) +
  coord_fixed() +
  theme_minimal() +
  labs(title = "'Actual' Strike Zone each Month in 2024 vs 2025 from Umpire's Perspective",
       subtitle = "Month: {closest_state}",
       x = "Horizontal Location of Pitch",
       y = "Vertical Location of Pitch",
       color = "Month") +
  transition_states(month, transition_length = 2, state_length = 1) +
  shadow_mark(past = TRUE, future = FALSE) #keeps the other months on the plot


gif25 <- gif25 + theme(plot.title = element_text(hjust = 0.5, vjust = 0.5, size = 14), 
                       plot.subtitle = element_text(hjust = 0.5, vjust = 0.5, size = 12,
                                                    margin = margin(b=20, unit="pt")),
                       axis.title.x = element_text(margin = margin(t=15, unit="pt")),
                       axis.title.y = element_text(margin = margin(r=15, unit="pt")),
                       strip.text = element_text(size = 10, color = 'black'), 
                       panel.spacing = unit(2, "lines"))  #add space between facet wraps



#animate the visual saved to gif25
final_gif <- animate(gif25, nframes = 80, fps = 10, width = 800, height = 600, renderer = gifski_renderer())

anim_save("changing24and25-umpirepov.gif", final_gif)
