#creating one full AB simulation

#building off of simulation_updates.R to include all new changes
#adding new necessary functions to create a full AB simulation
#making something dynamic and easy to work with for UI implementation

#full R working version that is converted to python

############################################################################################
#libraries
library(tidyr)
library(tidyverse)
library(ggplot2)
library(ggridges)
library(dplyr)
library(nnet)  


############################################################################################
#(1) import the clean dataset with all pitcher options

pitcher_data <- read.csv('clean_pitcherdata.csv')

############################################################################################
#(2) store user selected variables
#manual right now, but will be dropdowns on the website
# pitcher
selected_pitcher <- 434378

# user selects batter stance
selected_batter_stand <- "L"

selected_pitch <- "Changeup"



#grab and filter data based on user selections
#only data where the pitcher and stand selected are in the data
p_data <- pitcher_data |> 
  filter(pitcher == selected_pitcher,
         stand == selected_batter_stand)


#dataset to model for selected pitch
#now filter donw to the pitch selected
sub_data <- p_data |> filter(pitch_name == selected_pitch)
  
#emtpy list to store coefficients
swing_dec_params <- list()
#logit regression to find prob of a swing
#based on the pitch's distance from the center and number of balls and strikes in the count
if (nrow(sub_data) > 50) {
    
  model <- glm(is_swing ~ dist_x + dist_z + balls + strikes, data = sub_data, family = binomial) 
    
  swing_dec_params <- list(
    intercept = coef(model)[1], #heart of the plate
    b_x = coef(model)[2], #horizontal decision
    b_z = coef(model)[3], #vertical decision
    b_b = coef(model)[4], #ball coefficient
    b_s = coef(model)[5] #strike coefficient
  )
}


#creating another empty list to store the coefficients for the type of contact that could result from a swing
contact_params <- list()

#data for selected pitch, only wehre the batter swung
sub_data <- p_data |> filter(pitch_name == selected_pitch, is_swing == 1)
  
#enough rows and at least one more outcome other than the baseline (whiff)
if(nrow(sub_data) > 25 && length(unique(sub_data$simple_result)) > 1){
  sub_data$simple_result <- as.factor(sub_data$simple_result)
  
  #make sure that swinging_strike (whiff) is the baseline category
  if("swinging_strike" %in% levels(sub_data$simple_result)) {
    sub_data$simple_result <- relevel(sub_data$simple_result, ref = "swinging_strike")
  }
  
  #same predictors as swing - but not being used to predict whiff, foul, hit
  multi_model <- nnet::multinom(simple_result ~ dist_x + dist_z + balls + strikes, data = sub_data, trace = FALSE)
    
  coeffs <- coef(multi_model)
    
  #put results in a matrix
  if (is.vector(coeffs)) {
    coeffs <- matrix(coeffs, nrow = 1, ncol = length(coeffs), #category in each column
                      dimnames = list(unique(sub_data$simple_result)[unique(sub_data$simple_result) != "swinging_strike"], 
                                      names(coeffs)))
  }
    
  #make sure coeff has a value, otherwise return 0
  safe_get <- function(res_row, col_name) {
    if (res_row %in% rownames(coeffs) && col_name %in% colnames(coeffs)) {
      val <- coeffs[res_row, col_name]
      return(if (is.na(val)) 0 else val)
    }
    return(0)
  }
    
    
  #get coefficients and store
  contact_params <- list(
    foul_int = safe_get("foul", "(Intercept)"),
    foul_b_x = safe_get("foul", "dist_x"),
    foul_b_z = safe_get("foul", "dist_z"),
    foul_b_b = safe_get("foul", "balls"),
    foul_b_s = safe_get("foul", "strikes"),
    inplay_int = safe_get("hit_into_play", "(Intercept)"),
    inplay_b_x = safe_get("hit_into_play", "dist_x"),
    inplay_b_z = safe_get("hit_into_play", "dist_z"),
    inplay_b_b = safe_get("hit_into_play", "balls"),
    inplay_b_s = safe_get("hit_into_play", "strikes")
  )
}


############################################################################################
#(3) create simulation based on inputted selection

#simulation updates
#function 1: pitch location L

#takes in the intended target of pitch (mu_x, mu_z) and uses the observed spread of the pitch type
#by the pitcher (sigma_x and sigma_z)
AB_find_location <- function(mu_x, sigma_x, mu_z, sigma_z) {
  
  #keep giving a pitch location until its valid
  repeat {
    pitch_x <- rnorm(1, mean = mu_x, sd = sigma_x) #plate_x coord from a normal distribution
    pitch_z <- rnorm(1, mean = mu_z, sd = sigma_z) #plate_z coord from a normal distribution
  
    #elliptical distance formula - how i am drawing the area around the pitch in html
    #dist_sq holds how many sd's the pitch is from the center
    dist_sq <- ((pitch_x - mu_x)^2 / sigma_x^2) + ((pitch_z - mu_z)^2 / sigma_z^2)
  
    #gets rid of pitches that are outside of 1 sd and goes to find a new random location
    if (dist_sq <= 1) { 
      return(list(x = pitch_x, z = pitch_z))
    }
  }
}



#function 2: batter decision B
#2) B(L, P) ~ swing or not
#takes in the simulated location from find_location, number of balls and strikes in the count,
#and the coefficients of decision to swing based on specific pitch
test_AB_batter <- function(L_x, L_z, B, S, swing_dec_params) { 
  params <- swing_dec_params
  
  #using coefficients from pitch modeling and the simulated location and current count
  logit <- params$intercept + (params$b_x * L_x) + (params$b_z * (L_z - 2.5)) + (params$b_b * B) + (params$b_s * S) 
  
  #what is the probability batter will swing given current info from simulation and coefficients from modeling
  prob_swing <- 1 / (1 + exp(-logit))
  
  #runif(1) - random number from a uniform distribution between 0 and 1
  #if probability of swing is greater than threshold - 1 (batter decided to swing at pitch)
  decision <- ifelse(runif(1) < prob_swing, 1, 0) 
  
  #return 1/0 decision to swing and the probability associated
  return(list(decision = decision, prob = prob_swing))
}




#function 3: swing decision
#takes the simulated location, current count, and coefficients of pitch for type of contact
ttest_AB_swing <- function(L_x, L_z, B, S, contact_params) {
  sub_data <- p_data |> 
    mutate(simple_result = factor(simple_result, levels = c("swinging_strike", "foul", "hit_into_play")))
  
  params <- contact_params
  
  #calculate multinomial predictors
  #baseline is whiff
  #equations using pitch param coefficients and current simulated location and count
  u_whiff  <- 0
  u_foul   <- params$foul_int + (params$foul_b_x * L_x) + (params$foul_b_z * (L_z - 2.5)) + (params$foul_b_b * B) + (params$foul_b_s * S) 
  u_inplay <- params$inplay_int + (params$inplay_b_x * L_x) + (params$inplay_b_z * (L_z - 2.5)) + (params$inplay_b_b * B) + (params$inplay_b_s * S) 
  
  exp_sum <- exp(u_whiff) + exp(u_foul) + exp(u_inplay)
  
  #calculate the probability of each happening - together probabilities add up to 1
  p_whiff  <- exp(u_whiff) / exp_sum
  p_foul   <- exp(u_foul) / exp_sum
  p_inplay <- exp(u_inplay) / exp_sum
  
  #store the probabilities of each result happening
  probs <- c(p_whiff, p_foul, p_inplay)
  
  #randomly select the final outcome using probabilities 
  outcome <- sample(c("Miss", "Foul", "In Play"), size = 1, prob = probs)
  
  #sample outcome - random samples without replacement
  return(list(outcome = outcome, probs = probs))
}


#function 4: umpire decision
#if the batter did NOT swing: run this function 
AB_umpire <- function(L_x, L_z, sz_top = 3.5, sz_bot = 1.5) {
  plate_limit <- 0.7083 #store left and right edges of the plate
  
  # calculate how far the pitch is inside of the strike zone
  # negative values signal that the pitch is outside of the set strike zone
  dist_x <- plate_limit - abs(L_x)
  
  #horizontal distance from edge
  half_height <- (sz_top - sz_bot) / 2
  dist_z <- half_height - abs(L_z - 2.5)
  
  # the probability is determined by whether the x or z coord. is closer to the edge of the strike zone
  min_inside_dist <- pmin(dist_x, dist_z)
  
  
  #k = steepness, logistic growth rate
  #value was determined through testing and viewing pitch results
  #slightly negative drives the probability to 0, slightly positive drives probability to 1
  #this reflects an accurate umpire, probabilities around 50/50 when pitch is on/close to edge (true to real life)
  
  k <- 30 
  logit <- k * min_inside_dist
  
  #get actual probability between 0-1 for whether strike or ball 
  prob_strike <- 1 / (1 + exp(-logit))
  
  #length(L_x) ensures it's generating a random number that matches the size of L_x (just need one number)
  #if random prob is less than prob_strike, then umpire called strike (1)
  return(ifelse(runif(length(L_x)) < prob_strike, 1, 0))
}


#updates to store more data from each pitch 

AB_run_sim <- function(n_pitches, mu_x, mu_z, sigma_x, sigma_z, P, B, S, swing_dec_params, contact_params) {
  results <- replicate(n_pitches, simplify = FALSE, {
    L <- AB_find_location(mu_x, sigma_x, mu_z, sigma_z) 
    L_x <- L$x
    L_z <- L$z
    
    true_call <- AB_umpire(L$x, L$z, sz_top = 3.5, sz_bot = 1.5)
    
    swing_decision <- test_AB_batter(L_x, L_z, B, S, swing_dec_params)    
    
    final_outcome <- NA
    p_whiff <- NA; p_foul <- NA; p_inplay <- NA
    
    if(swing_decision$decision == 1) {
      swing <- ttest_AB_swing(L_x, L_z, B, S, contact_params)
      final_outcome <- swing$outcome
      p_whiff <- swing$probs[1]
      p_foul <- swing$probs[2]
      p_inplay <- swing$probs[3]
    } else {
      call <- AB_umpire(L_x, L_z, sz_top = 3.5, sz_bot = 1.5)
      final_outcome <- ifelse(call == 1, "Called Strike", "Ball")
      p_whiff <- NA
      p_foul <- NA
      p_inplay <- NA
    }
    
    data.frame(
      x = L_x, 
      z = L_z,
      outcome = final_outcome,
      true_call = ifelse(true_call == 1, "Strike", "Ball"),
      p_swing = swing_decision$prob,
      p_whiff = p_whiff,
      p_foul = p_foul,
      p_inplay = p_inplay,
      stringsAsFactors = FALSE
    )
  })
  final_df <- do.call(rbind, results)
  return(final_df)
}


#return and run results
mu_x = -0.35
mu_z = 2.25
sigma_x = 0.3
sigma_z = 0.3
balls = 0
strikes = 0

final_results <- AB_run_sim(1000, mu_x = mu_x, mu_z = mu_z, sigma_x = sigma_x, sigma_z = sigma_z, 
                            B=balls, S=strikes, swing_dec_params=swing_dec_params, contact_params=contact_params)

 view(final_results)


 
#running full AB sim
############################################################################################
#(4) make the full AB simulation
#this replicates how the simulation will run once the user can throw pitch by pitch
#throws the same pitch until the ab is over
#just texting logic - not necessary to run

#initial game state
balls <- 0
strikes <- 0
ab_over <- FALSE
ab_history <- data.frame()
pitch_count <- 0


selected_pitch <- "fastball"


while(!ab_over) {
  pitch_count <- pitch_count + 1
  
  #probabilities of results based on the current count
  sim_output <- AB_run_sim(10000, mu_x = 0.618, mu_z = 1.94, sigma_x = 0.6, sigma_z = 0.6, 
                           B=balls, S=strikes, swing_dec_params=swing_dec_params, contact_params=contact_params)
  
  #result of the pitch type and location given probabilities 
  pitch_result <- sample(names(sim_output), size = 1, prob = sim_output)
  
  #update the count
  if(pitch_result == "Ball") {
    balls <- balls + 1
  } else if(pitch_result == "Called Strike" || pitch_result == "Miss") {
    strikes <- strikes + 1
  } else if(pitch_result == "Foul") {
    if(strikes < 2) {
      strikes <- strikes + 1
    }
  }
  
  #determine result or if ab ends
  if(balls >= 4) {
    ab_result <- "Walk"
    ab_over <- TRUE
  } else if(strikes >= 3) {
    ab_result <- "Strikeout"
    ab_over <- TRUE
  } else if(pitch_result == "In Play") {
    ab_result <- "Hit Into Play"
    ab_over <- TRUE
  }
  
 #show progression of the at bat
  print(paste("Pitch", pitch_count, ":", pitch_result, "| Count:", balls, "-", strikes))
}

print(paste("Final PA Outcome:", ab_result))



############################################################################################

#view options to test with
sim_options <- read.csv('dashboard/pitch & coords/pitchnames_and_estsd.csv')
view(sim_options)


#test for visual - same selections as above:
#selected_pitcher <- 434378
#selected_batter_stand <- "L"
#selected_pitch <- "Changeup"

#now testing visuals with new dataframe!!!!!

library(ggforce)
#heatmap of where sim chose selected pitches in AB
ggplot(final_results, aes(x = x, y = z)) + #locations sim chose
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 0.4) + #heat map
  geom_point(size = 0.5, alpha = 0.2, color = "white") +
  annotate("point", x = mu_x, y = mu_z, color = "red", size = 4, shape = 13) + #target location
  geom_rect(aes(xmin = -0.7083, xmax = 0.7083, ymin = 1.5, ymax = 3.5),  #drawing the set strike zone
            fill = NA, color = "black", linetype = "dashed") +
  coord_fixed() +
  scale_fill_viridis_c() + #color scheme
  labs(title = "Selected Simulated Locations", 
       subtitle = "All 1,000 Pitch Locations Centered Around Chosen Location (-0.35, 2.25)",
       x = "Horizontal Location (plate_x)", y = "Vertical Location (plate_z)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_text(margin = margin(t = 20)),
        axis.text.y = element_text(margin = margin(t = 20)))



library(scales)
#bar chart of final outcome
#all options
ggplot(final_results, aes(x = outcome, fill = outcome)) +
  geom_bar(aes(y = after_stat(count / sum(count)))) +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "Probability of Pitch Outcome",
       subtitle = "from 1,000 simulated pitches based on selections",
       x = "Possible Outcomes",
       y = "Percent Likelihood of Outcome") +
  theme_minimal()

#swing =1 results - type of contact
final_results |> filter(outcome %in% c("Miss", "Foul", "In Play")) |> 
  ggplot(aes(x = outcome, fill = outcome)) +
  geom_bar(aes(y = after_stat(count / sum(count)))) +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "Probability of Pitch Outcome",
       subtitle = "from 1,000 simulated pitches based on selections",
       x = "Possible Outcomes",
       y = "Percent Likelihood of Outcome") +
  theme_minimal()

#umpires decision on the spot
ggplot(final_results, aes(x=true_call, fill=true_call)) +
  geom_bar(aes(y = after_stat(count / sum(count)))) +
  scale_y_continuous(labels = percent_format()) +
  theme_minimal()


#display the top 10 rows of data the simulation stores
library(kableExtra)

DT::datatable(head(final_results),
              caption = htmltools::tags$caption(
                'Sample Simulation Data',
                style = 'font-size:24px;font-weight:bold;'
              ))


