import pandas as pd
import numpy as np
Shotlog=pd.read_csv("../../Data/Week 6/Shotlog_16_17.csv")
Shotlog.head()
Shotlog.shape
Shotlog.info()
Shotlog['current_shot_hit'] = np.where(Shotlog['current_shot_outcome']=="SCORED", 1, 0)
Shotlog.head()
import datetime as dt
Shotlog['date']=pd.to_datetime(Shotlog['date'])
Shotlog['time'] = pd.to_timedelta('00:'+ Shotlog['time'])
Shotlog['time'].describe()
Shotlog['lag_shot_hit']=Shotlog.sort_values(by=['quarter','time'], ascending=[True, True]).groupby(['shoot_player','date'])['current_shot_hit'].shift(1)
Shotlog.head()
Shotlog.sort_values(by=['shoot_player', 'date', 'quarter', 'time'], ascending=[True, True, True, True])
Notice that for the first shots of the game by the given players, the lagged outcome variable will have missing value.
Since the "current_shot_hit" variable is a dummy variable (=1 if hit, =0 if miss), the average of this variable would indicate the success rate of the player over the season.
Player_Stats=Shotlog.groupby(['shoot_player'])['current_shot_hit'].mean()
Player_Stats=Player_Stats.reset_index()
Player_Stats.head()
Player_Stats.rename(columns={'current_shot_hit':'average_hit'}, inplace=True)
Shotlog=pd.merge(Shotlog, Player_Stats, on=['shoot_player'])
Shotlog.head()
Player_Shots=Shotlog.groupby(['shoot_player']).size().reset_index(name='shot_count')
Player_Shots.sort_values(by=['shot_count'], ascending=[False]).head()
We should also note that players have different number of shots in each individual game. We will need to treat the data differently for a player who had only two shots in a game compared to those who had attempted 30 in a game.
Player_Game=Shotlog.groupby(['shoot_player','date']).size().reset_index(name='shot_per_game')
Player_Game.head()
Shotlog=pd.merge(Shotlog, Player_Shots, on=['shoot_player'])
Shotlog=pd.merge(Shotlog, Player_Game, on=['shoot_player','date'])
display(Shotlog)
Shotlog.sort_values(by=['shoot_player', 'date', 'quarter', 'time'], ascending=[True, True, True, True])
Shotlog['points'] = Shotlog['points'].astype(object)
Shotlog['quarter'] = Shotlog['quarter'].astype(object)
Shotlog=Shotlog[pd.notnull(Shotlog["lag_shot_hit"])]
Shotlog.shape
Shotlog.to_csv("../../Data/Week 6/Shotlog1.csv", index=False)
Player_Stats.to_csv("../../Data/Week 6/Player_Stats1.csv", index=False)
Player_Shots.to_csv("../../Data/Week 6/Player_Shots1.csv", index=False)
Player_Game.to_csv("../../Data/Week 6/Player_Game1.csv", index=False)