#finding the actual probability where umpires call a strike
#visualizing the 50% boundary of called strikes

#load packages
library(tidyr)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggridges) #theme package
library(mgcv) #run bam model
library(lubridate)

#load full 2025 season data
full <- read.csv('data/statcast2025.csv')


#get the top and bot of the strike zone for visualizing the set strike zone
#could use the mean for the season, or what i ended up using later which was 1.5 bot and 3.5 top (usually how it is generalized)
year_metrics <- full |> 
  select(sz_top, sz_bot) |> 
  drop_na()

top2025 <- mean(year_metrics$sz_top)
bot2025 <- mean(year_metrics$sz_bot)

#function that draws the plate to use in visuals
geom_plate <- function(){
  df <- data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
  g <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "gray60", linewidth = 1.25)
  g
}



#create a clean, simple dataset with only the information i need
#only keep balls and strikes - only what the umpire called
year_25 <- full |> 
  filter(
    description %in% c("ball", "called_strike")) |> 
  mutate(is_strike = ifelse(description == "called_strike", 1, 0),
         game_month = month(game_date),
         plate_x = ifelse(stand == 'L', -plate_x, plate_x)) |> #if they are a lefty, reflect the plate-x coord
  select(game_month, plate_x, plate_z, stand, is_strike) |> 
  drop_na()

#not enough data for these months to run the full BAM model
year_25 <- year_25 |> 
  filter(game_month != 10, game_month != 11)




#nest the data by month
#run a bam() model for each
monthly_modeling <- year_25 |> 
  group_by(game_month) |> #group data by month the game occured
  nest() |> 
  mutate(model = map(data, ~bam(
    is_strike ~ te(plate_x, plate_z), #te allows for variables to have different scales
    data = .x,
    family=binomial,
    discrete = TRUE
  )))


#create a grid that the predictions will be used against
#50x50 grid of different corrdinates around the strike zone
grid25 <- expand.grid(
  plate_x = seq(-1.5, 1.5, length.out = 50),
  plate_z = seq(1, 4, length.out = 50)
)


# loop through each row of the data to generate a prediction for each month
for(i in 1:nrow(monthly_modeling)) {
  month_val <- monthly_modeling$game_month[i] #find month
  current_model <- monthly_modeling$model[[i]] #pull glm for that month
  
  #column name ex:month_4
  col_name <- paste0("month_", month_val)
  
  #for each month, create a prediction for each value in the grid of coordinates around the zone
  grid25[[col_name]] <- predict(
    current_model, 
    newdata = grid25, 
    type = "response" #ensures probability 0-1
  )
}


#turn grid into long form for plotting
grid25_long <- grid25 |>
  pivot_longer(
    cols = starts_with("month_"), 
    names_to = "month",  #put column headers in "month" column
    names_prefix = "month_", #leave month value
    values_to = "probability" #put prediction values in one column
  ) |>
  mutate(
    month = month.name[as.numeric(month)], #convert month number to name
    month = factor(month, levels = month.name)
  )

#make sure month is factor so visuals work nicely
grid25_long <- grid25_long |> 
  mutate(month = as.factor(month))



#create the same visual of the actual strike zone separated by left vs right handed batters
#months are drawn on top of each other, with beginning of the season being lighter than end
p <- ggplot(grid25_long, aes(plate_x, plate_z, z = probability, color = month)) +
  geom_contour(breaks = 0.5, linewidth = 1.1) +  #drawing a line where the probability of a strike is 50%
  geom_rect(xmin = -0.7083, xmax = 0.7083, ymin = bot2025, ymax = top2025, 
            fill = NA, color = "black", alpha = 0.3, inherit.aes = FALSE) +  #"set" strike zone 
  scale_color_brewer(palette = "Blues") +
  labs(title = "'Actual' Strike Zone each Month in 2025", 
       subtitle = "Compared to 'Set' Strike Zone from Umpire's Perspective",
       x = "Horizontal Location of Pitch",
       y = "Vertical Location of Pitch",
       color = "Month"
  ) +
  #facet_wrap(~stand) + #separate visuals for left and right handed batters
  coord_fixed() +
  theme_minimal()


#further edits from here - clean up titles and make sure labels are visible
p <- p + theme(plot.title = element_text(hjust = 0.5, vjust = 0.5, size = 14), 
               plot.subtitle = element_text(hjust = 0.5, vjust = 0.5, size = 12,
                                            margin = margin(b=20, unit="pt")),
               axis.title.x = element_text(margin = margin(t=15, unit="pt")),
               axis.title.y = element_text(margin = margin(r=15, unit="pt")),
               strip.text = element_text(size = 10, color = 'black'), 
               panel.spacing = unit(2, "lines"))  #add space between facet wraps

#view static visual
p





#create a gif of the strike zone each month with the actual zone being placed on top of the others
library(av)
library(gifski)
library(gganimate)

#factor the month names so they are distinct from each other
grid25_long$month <- factor(grid25_long$month, levels = month.name)

#set the order months will appear in the visual
month_order <- c("March", "April", "May", "June", "July", "August", "September")


grid_cleaned <- grid25_long %>%
  group_by(month, plate_x, plate_z) %>%
  summarise(probability = mean(probability, na.rm = TRUE), .groups = "drop") %>%
  mutate(month = factor(month, levels = month_order))


gifs25 <- 
  ggplot(grid_cleaned, aes(plate_x, plate_z, z = probability, color = month, group = month)) +
  geom_rect(xmin = -0.7083, xmax = 0.7083, ymin = bot2025, ymax = top2025, 
            fill = NA, color = "black", alpha = 0.3, inherit.aes = FALSE) +  #"set" strike zone 
  geom_contour(aes(group = month), breaks = 0.5, linewidth = 1.1) +
  scale_color_brewer(palette = "Blues") +
  #facet_wrap(~stand) +
  coord_fixed() +
  theme_minimal() +
  labs(title = "'Actual' Strike Zone each Month in 2025 from Umpire's Perspective",
       subtitle = "Month: {closest_state}",
       x = "Horizontal Location of Pitch",
       y = "Vertical Location of Pitch",
       color = "Month") +
  transition_states(month, transition_length = 2, state_length = 1) +
  shadow_mark(past = TRUE, future = FALSE) #keeps the other months on the plot

#clean up the plot
gifs25 <- gifs25 + theme(plot.title = element_text(hjust = 0.5, vjust = 0.5, size = 14), 
                         plot.subtitle = element_text(hjust = 0.5, vjust = 0.5, size = 12,
                                                      margin = margin(b=20, unit="pt")),
                         axis.title.x = element_text(margin = margin(t=15, unit="pt")),
                         axis.title.y = element_text(margin = margin(r=15, unit="pt")),
                         strip.text = element_text(size = 10, color = 'black'), 
                         panel.spacing = unit(2, "lines"))  #add space between facet wraps



#it worked!!!!!
#animate the image above
final_gifs25 <- animate(gifs25, nframes = 80, fps = 10, width = 800, height = 600, renderer = gifski_renderer())

#save strike zone gif to folder
anim_save("25strikezone-nostand.gif", final_gifs25)






