# --------------------- # # Other miscellaneous Fixes after the original cleaning process # # Currently, the data has no way to determine how many points are # scored at the end of a drive. # # This R Script will read in the DRIVEPOINTS .csv file, and will: # 1. Rename GameId ot GameID # 2. Remove the Variables that are not needed. # 3. Make all Boolean variables TRUE/FALSE # 4. Remove rows that aren't needed # 5. Remove values that aren't needed (ex. Removing '2-pt successful' if not a 2-pt conversion) # 6. Fix the instances of 'LA' occurring in the data (change them to the proper label of 'LAR') # Once complete, will write data back to a clean .csv file # # --------------------- # Libraries to Use library(tidyverse) # Set the working directory to match where the files are pulled from setwd("C:/Users/Ryant/Downloads/Capstone/Cleaned Data Files") # Read in the data Plays <- read_csv("pbpDrivePoints2021-2024.csv") ################# #1. Plays <- Plays %>% rename('GameID' = 'GameId') #2. Plays <- Plays %>% select(-DriveEnd, -SeriesFirstDown, -NextScore, -TeamWin, -IsRush, -IsPass, -Challenger, -IsMeasurement, -YardLineFixed, -YardLineDirection) #3. Plays <- Plays %>% mutate(IsIncomplete = case_when( IsIncomplete == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsTouchdown = case_when( IsTouchdown == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsSack = case_when( IsSack == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsChallenge = case_when( IsChallenge == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsChallengeReversed = case_when( IsChallengeReversed == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsInterception = case_when( IsInterception == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsFumble = case_when( IsFumble == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsPenalty = case_when( IsPenalty == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsTwoPointConversion = case_when( IsTwoPointConversion == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsTwoPointConversionSuccessful = case_when( IsTwoPointConversionSuccessful == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsPenaltyAccepted = case_when( IsPenaltyAccepted == 0 ~ FALSE, TRUE ~ TRUE )) %>% mutate(IsNoPlay = case_when( IsNoPlay == 0 ~ FALSE, TRUE ~ TRUE )) #4. NonPlays <- Plays %>% filter(is.na(OffenseTeam) | is.na(DefenseTeam) | ( is.na(PreviousPlayID) & is.na(NextPlayID) )) NonPlays <- NonPlays %>% filter(!IsPenalty) Plays <- Plays %>% anti_join(NonPlays) #5. # IsTwoPointSuccessful - Remove value if not a 2-Point Conversion temp <- Plays %>% filter(!IsTwoPointConversion) %>% mutate(IsTwoPointConversionSuccessful = NA) Plays <- Plays %>% filter(IsTwoPointConversion) %>% rbind(temp) %>% arrange(GameID, PlayID) # IsIncomplete / IsInterception temp <- Plays %>% filter(PlayTypeUpdate != 'PASS') %>% mutate(IsIncomplete = NA) %>% mutate(IsInterception = NA) Plays <- Plays %>% filter(PlayTypeUpdate == 'PASS') %>% rbind(temp) %>% arrange(GameID, PlayID) # IsChallengeReversed temp <- Plays %>% filter(!IsChallenge) %>% mutate(IsChallengeReversed = NA) Plays <- Plays %>% filter(IsChallenge) %>% rbind(temp) %>% arrange(GameID, PlayID) # IsPenaltyAccepted / PenaltyYards temp <- Plays %>% filter(!IsPenalty) %>% mutate(IsPenaltyAccepted = NA) %>% mutate(PenaltyYards = NA) Plays <- Plays %>% filter(IsPenalty) %>% rbind(temp) %>% arrange(GameID, PlayID) #6. Plays <- Plays %>% mutate(OffenseTeam = case_when( OffenseTeam == 'LA' ~ 'LAR', TRUE ~ OffenseTeam )) %>% mutate(DefenseTeam = case_when( DefenseTeam == 'LA' ~ 'LAR', TRUE ~ DefenseTeam )) %>% mutate(PenaltyTeam = case_when( PenaltyTeam == 'LA' ~ 'LAR', TRUE ~ PenaltyTeam )) ################# # Save the CSV write_csv(Plays, "pbpFinal2021-2024.csv") print("Done!")