# --------------------- # # Mark the plays that signal the end of a drive # # Currently, the data has no easy way to determine when a # drive has ended. Weather the drive ends on a punt, turnover, # or score. # # This R Script will read in the SORTED .csv file, check to see if the # offensive and defensive teams switch, or if there was a PlayType # that would result in a change of possession, or if there was a turnover # on the play, and then make a bool variable based on the result. This # would be used to help with marking how many points a drive resulted in. # # --------------------- # Libraries to Use library(tidyverse) # Set the working directory to match where the files are pulled from setwd("C:/Users/Ryant/Downloads/Capstone/Cleaned Data Files") # Read in the data Plays <- read_csv("pbpSorted2021-2024.csv") # Set aside the rows that aren't a play NonPlays <- Plays %>% filter(is.na(OffenseTeam) | is.na(DefenseTeam) | ( is.na(PreviousPlayID) & is.na(NextPlayID) )) %>% mutate(DriveEnd = FALSE) %>% select(1:10,45,11:44) # Shift to match the other set later # Grab only the rows of actual plays OnlyPlays <- Plays %>% filter(!is.na(OffenseTeam) & !is.na(DefenseTeam) & ( !is.na(PreviousPlayID) | !is.na(NextPlayID) )) # Case 1: The "Standard" Case # Check for if the Offense & Defense Swap # Compare the Offense and Defense Teams with the next row EndDrive <- OnlyPlays %>% mutate(Case1 = case_when( OffenseTeam == lead(OffenseTeam) & DefenseTeam == lead(DefenseTeam) ~ FALSE, TRUE ~ TRUE )) %>% select(1:10,45,11:44) # Shift up for easier check # Case 2: End of Half/Game # Check for if Quarter == 2 && lead(Quarter) == 3 or... # Check for if GameId != lead(GameId) EndDrive <- EndDrive %>% mutate(Case2 = case_when( Quarter == 2 & lead(Quarter) == 3 ~ TRUE, GameId != lead(GameId) ~ TRUE, TRUE ~ FALSE )) %>% select(1:11,46,12:45) # Shift up for easier check # Case 3: Pick 6/Fumble Recovery for a TD # Check if IsFunble or IsInterception == 1 EndDrive <- EndDrive %>% mutate(Case3 = case_when( # If there is an interception, # that is not listed as "No Play", # doesn't have an accepted penalty, # and doesn't have replay assist reverse the play, # then it is counted as the end of the drive IsInterception == 1 & IsNoPlay == 0 & IsPenaltyAccepted == 0 & !str_detect(Description, "PLAY WAS REVERSED") ~ TRUE, # If there is a fumble, # that is not listed as "No Play", # doesn't have an accepted penalty, # isn't a botched snap, # isn't fumbled out of bounds, # isn't recovered by the same player on offence, # and doesn't have replay assist reverse the play, # then it is counted as the end of the drive IsFumble == 1 & IsNoPlay == 0 & IsPenaltyAccepted == 0 & !str_detect(Description, "ABORTED") & !str_detect(Description, "OUT OF BOUNDS") & !str_detect(Description, "AND RECOVERS") & !str_detect(Description, "REVERSED") ~ TRUE, TRUE ~ FALSE )) %>% select(1:12,47,13:46) # Shift up for easier check # Case 4: Multiple Turnovers resulting in original offense maintaining possession # Covered by previous cases # If Case 3 == True (Fumble/Interception) & Case 1 == False (Same team following play) # Case 5: Successful Onside Kicks # Check for the offence and defense matching following the extra-point/two-point conversion/kickoff EndDrive <- EndDrive %>% mutate(Case5 = case_when( (str_detect(PlayTypeUpdate, "EXTRA POINT") | str_detect(PlayTypeUpdate, "TWO-POINT CONVERSION")) & OffenseTeam == lead(OffenseTeam) & DefenseTeam == lead(DefenseTeam) & !Case2 ~ TRUE, # make sure it wasn't the last play of the half TRUE ~ FALSE )) %>% select(1:13,48,14:47) # Shift up for easier check # Combine the Cases EndDrive <- EndDrive %>% mutate(DriveEnd = case_when( Case1 | Case2 | Case3 | Case5 ~ TRUE, TRUE ~ FALSE )) %>% select(1:10,49,15:48) # Shift up for easier check, removing the case checks # Get all the data back together PlaysFinal <- rbind(EndDrive, NonPlays) %>% arrange(GameId, PlayID) # Save the CSV write_csv(PlaysFinal, "pbpDriveEnd2021-2024.csv") print("Done!")