{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import sqlite3\n", "import pandas as pd\n", "import csv\n", "\n", "pd.set_option('max_columns', 180)\n", "pd.set_option('max_rows', 200000)\n", "pd.set_option('max_colwidth', 5000)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Getting to Know the Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(171907, 161)\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datenumber_of_gameday_of_weekv_namev_leaguev_game_numberh_nameh_leagueh_game_numberv_scoreh_scorelength_outsday_nightcompletionforefeitprotestpark_idattendancelength_minutesv_line_scoreh_line_scorev_at_batsv_hitsv_doublesv_triplesv_homerunsv_rbiv_sacrifice_hitsv_sacrifice_fliesv_hit_by_pitchv_walksv_intentional_walksv_strikeoutsv_stolen_basesv_caught_stealingv_grounded_into_doublev_first_catcher_interferencev_left_on_basev_pitchers_usedv_individual_earned_runsv_team_earned_runsv_wild_pitchesv_balksv_putoutsv_assistsv_errorsv_passed_ballsv_double_playsv_triple_playsh_at_batsh_hitsh_doublesh_triplesh_homerunsh_rbih_sacrifice_hitsh_sacrifice_fliesh_hit_by_pitchh_walksh_intentional_walksh_strikeoutsh_stolen_basesh_caught_stealingh_grounded_into_doubleh_first_catcher_interferenceh_left_on_baseh_pitchers_usedh_individual_earned_runsh_team_earned_runsh_wild_pitchesh_balksh_putoutsh_assistsh_errorsh_passed_ballsh_double_playsh_triple_playshp_umpire_idhp_umpire_name1b_umpire_id1b_umpire_name2b_umpire_id2b_umpire_name3b_umpire_id3b_umpire_namelf_umpire_idlf_umpire_namerf_umpire_idrf_umpire_namev_manager_idv_manager_nameh_manager_idh_manager_namewinning_pitcher_idwinning_pitcher_namelosing_pitcher_idlosing_pitcher_namesaving_pitcher_idsaving_pitcher_namewinning_rbi_batter_idwinning_rbi_batter_id_namev_starting_pitcher_idv_starting_pitcher_nameh_starting_pitcher_idh_starting_pitcher_namev_player_1_idv_player_1_namev_player_1_def_posv_player_2_idv_player_2_namev_player_2_def_posv_player_3_idv_player_3_namev_player_3_def_posv_player_4_idv_player_4_namev_player_4_def_posv_player_5_idv_player_5_namev_player_5_def_posv_player_6_idv_player_6_namev_player_6_def_posv_player_7_idv_player_7_namev_player_7_def_posv_player_8_idv_player_8_namev_player_8_def_posv_player_9_idv_player_9_namev_player_9_def_posh_player_1_idh_player_1_nameh_player_1_def_posh_player_2_idh_player_2_nameh_player_2_def_posh_player_3_idh_player_3_nameh_player_3_def_posh_player_4_idh_player_4_nameh_player_4_def_posh_player_5_idh_player_5_nameh_player_5_def_posh_player_6_idh_player_6_nameh_player_6_def_posh_player_7_idh_player_7_nameh_player_7_def_posh_player_8_idh_player_8_nameh_player_8_def_posh_player_9_idh_player_9_nameh_player_9_def_posadditional_infoacquisition_info
0187105040ThuCL1NaN1FW1NaN10254.0DNaNNaNNaNFOR01200.0120.000000000001001000030.04.01.00.00.00.00.00.00.01.0NaN6.01.0NaN-1.0NaN4.01.01.01.00.00.027.09.00.03.00.00.031.04.01.00.00.02.00.00.00.01.0NaN0.00.0NaN-1.0NaN3.01.00.00.00.00.027.03.03.01.01.00.0boakj901John BoakeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNpaboc101Charlie Paborlennb101Bill Lennonmathb101Bobby Mathewsprata101Al PrattNaNNaNNaNNaNprata101Al Prattmathb101Bobby Mathewswhitd102Deacon White2.0kimbg101Gene Kimball4.0paboc101Charlie Pabor7.0allia101Art Allison8.0white104Elmer White9.0prata101Al Pratt1.0sutte101Ezra Sutton5.0carlj102Jim Carleton3.0bassj101John Bass6.0selmf101Frank Sellman5.0mathb101Bobby Mathews1.0foraj101Jim Foran3.0goldw101Wally Goldsmith6.0lennb101Bill Lennon2.0caret101Tom Carey4.0mince101Ed Mincher7.0mcdej101James McDermott8.0kellb105Bill Kelly9.0NaNY
1187105050FriBS1NaN1WS3NaN1201854.0DNaNNaNNaNWAS015000.0145.010700043564011303041.013.01.02.00.013.00.00.00.018.0NaN5.03.0NaN-1.0NaN12.01.06.06.01.00.027.013.010.01.02.00.049.014.02.00.00.011.00.00.00.010.0NaN2.01.0NaN-1.0NaN14.01.07.07.00.00.027.020.010.02.03.00.0dobsh901Henry DobsonNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNwrigh101Harry Wrightyounn801Nick Youngspala101Al Spaldingbraia102Asa BrainardNaNNaNNaNNaNspala101Al Spaldingbraia102Asa Brainardwrigg101George Wright6.0barnr102Ross Barnes4.0birdd102Dave Birdsall9.0mcvec101Cal McVey2.0wrigh101Harry Wright8.0goulc101Charlie Gould3.0schah101Harry Schafer5.0conef101Fred Cone7.0spala101Al Spalding1.0watef102Fred Waterman5.0forcd101Davy Force6.0mille105Everett Mills3.0allid101Doug Allison2.0hallg101George Hall7.0leona101Andy Leonard4.0braia102Asa Brainard1.0burrh101Henry Burroughs9.0berth101Henry Berthrong8.0HTBFY
2187105060SatCL1NaN2RC1NaN112454.0DNaNNaNNaNRCK011000.0140.061002000301002010049.011.01.01.00.08.00.00.00.00.0NaN1.00.0NaN-1.0NaN10.01.00.00.02.00.027.012.08.05.00.00.036.07.02.01.00.02.00.00.00.00.0NaN3.05.0NaN-1.0NaN5.01.03.03.01.00.027.012.013.03.00.00.0mawnj901J.H. MannyNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNpaboc101Charlie Paborhasts101Scott Hastingsprata101Al Prattfishc102Cherokee FisherNaNNaNNaNNaNprata101Al Prattfishc102Cherokee Fisherwhitd102Deacon White2.0kimbg101Gene Kimball4.0paboc101Charlie Pabor7.0allia101Art Allison8.0white104Elmer White9.0prata101Al Pratt1.0sutte101Ezra Sutton5.0carlj102Jim Carleton3.0bassj101John Bass6.0mackd101Denny Mack3.0addyb101Bob Addy4.0fishc102Cherokee Fisher1.0hasts101Scott Hastings8.0ham-r101Ralph Ham5.0ansoc101Cap Anson2.0sagep101Pony Sager6.0birdg101George Bird7.0stirg101Gat Stires9.0NaNY
3187105080MonCL1NaN3CH1NaN1121454.0DNaNNaNNaNCHI015000.0150.010140311107700000046.015.02.01.02.010.00.00.00.00.0NaN1.00.0NaN-1.0NaN7.01.06.06.00.00.027.015.011.06.00.00.043.011.02.00.00.08.00.00.00.04.0NaN2.01.0NaN-1.0NaN6.01.04.04.00.00.027.014.07.02.00.00.0willg901Gardner WillardNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNpaboc101Charlie Paborwoodj106Jimmy Woodzettg101George Zettleinprata101Al PrattNaNNaNNaNNaNprata101Al Prattzettg101George Zettleinwhitd102Deacon White2.0kimbg101Gene Kimball4.0paboc101Charlie Pabor7.0allia101Art Allison8.0white104Elmer White9.0prata101Al Pratt1.0sutte101Ezra Sutton5.0carlj102Jim Carleton3.0bassj101John Bass6.0mcatb101Bub McAtee3.0kingm101Marshall King8.0hodec101Charlie Hodes2.0woodj106Jimmy Wood4.0simmj101Joe Simmons9.0folet101Tom Foley7.0duffe101Ed Duffy6.0pinke101Ed Pinkham5.0zettg101George Zettlein1.0NaNY
4187105090TueBS1NaN2TRONaN19554.0DNaNNaNNaNTRO013250.0145.000000223210100300046.017.04.01.00.06.00.00.00.02.0NaN0.01.0NaN-1.0NaN12.01.02.02.00.00.027.012.05.00.01.00.036.09.00.00.00.02.00.00.00.03.0NaN0.02.0NaN-1.0NaN7.01.03.03.01.00.027.011.07.03.00.00.0leroi901Isaac LeroyNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNwrigh101Harry Wrightpikel101Lip Pikespala101Al Spaldingmcmuj101John McMullinNaNNaNNaNNaNspala101Al Spaldingmcmuj101John McMullinwrigg101George Wright6.0barnr102Ross Barnes4.0birdd102Dave Birdsall9.0mcvec101Cal McVey2.0wrigh101Harry Wright8.0goulc101Charlie Gould3.0schah101Harry Schafer5.0conef101Fred Cone7.0spala101Al Spalding1.0flync101Clipper Flynn9.0mcgem101Mike McGeary2.0yorkt101Tom York8.0mcmuj101John McMullin1.0kings101Steve King7.0beave101Edward Beavens4.0bells101Steve Bellan5.0pikel101Lip Pike3.0cravb101Bill Craver6.0HTBFY
\n", "
" ], "text/plain": [ " date number_of_game day_of_week v_name v_league v_game_number h_name \\\n", "0 18710504 0 Thu CL1 NaN 1 FW1 \n", "1 18710505 0 Fri BS1 NaN 1 WS3 \n", "2 18710506 0 Sat CL1 NaN 2 RC1 \n", "3 18710508 0 Mon CL1 NaN 3 CH1 \n", "4 18710509 0 Tue BS1 NaN 2 TRO \n", "\n", " h_league h_game_number v_score h_score length_outs day_night completion \\\n", "0 NaN 1 0 2 54.0 D NaN \n", "1 NaN 1 20 18 54.0 D NaN \n", "2 NaN 1 12 4 54.0 D NaN \n", "3 NaN 1 12 14 54.0 D NaN \n", "4 NaN 1 9 5 54.0 D NaN \n", "\n", " forefeit protest park_id attendance length_minutes v_line_score \\\n", "0 NaN NaN FOR01 200.0 120.0 000000000 \n", "1 NaN NaN WAS01 5000.0 145.0 107000435 \n", "2 NaN NaN RCK01 1000.0 140.0 610020003 \n", "3 NaN NaN CHI01 5000.0 150.0 101403111 \n", "4 NaN NaN TRO01 3250.0 145.0 000002232 \n", "\n", " h_line_score v_at_bats v_hits v_doubles v_triples v_homeruns v_rbi \\\n", "0 010010000 30.0 4.0 1.0 0.0 0.0 0.0 \n", "1 640113030 41.0 13.0 1.0 2.0 0.0 13.0 \n", "2 010020100 49.0 11.0 1.0 1.0 0.0 8.0 \n", "3 077000000 46.0 15.0 2.0 1.0 2.0 10.0 \n", "4 101003000 46.0 17.0 4.0 1.0 0.0 6.0 \n", "\n", " v_sacrifice_hits v_sacrifice_flies v_hit_by_pitch v_walks \\\n", "0 0.0 0.0 0.0 1.0 \n", "1 0.0 0.0 0.0 18.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 2.0 \n", "\n", " v_intentional_walks v_strikeouts v_stolen_bases v_caught_stealing \\\n", "0 NaN 6.0 1.0 NaN \n", "1 NaN 5.0 3.0 NaN \n", "2 NaN 1.0 0.0 NaN \n", "3 NaN 1.0 0.0 NaN \n", "4 NaN 0.0 1.0 NaN \n", "\n", " v_grounded_into_double v_first_catcher_interference v_left_on_base \\\n", "0 -1.0 NaN 4.0 \n", "1 -1.0 NaN 12.0 \n", "2 -1.0 NaN 10.0 \n", "3 -1.0 NaN 7.0 \n", "4 -1.0 NaN 12.0 \n", "\n", " v_pitchers_used v_individual_earned_runs v_team_earned_runs \\\n", "0 1.0 1.0 1.0 \n", "1 1.0 6.0 6.0 \n", "2 1.0 0.0 0.0 \n", "3 1.0 6.0 6.0 \n", "4 1.0 2.0 2.0 \n", "\n", " v_wild_pitches v_balks v_putouts v_assists v_errors v_passed_balls \\\n", "0 0.0 0.0 27.0 9.0 0.0 3.0 \n", "1 1.0 0.0 27.0 13.0 10.0 1.0 \n", "2 2.0 0.0 27.0 12.0 8.0 5.0 \n", "3 0.0 0.0 27.0 15.0 11.0 6.0 \n", "4 0.0 0.0 27.0 12.0 5.0 0.0 \n", "\n", " v_double_plays v_triple_plays h_at_bats h_hits h_doubles h_triples \\\n", "0 0.0 0.0 31.0 4.0 1.0 0.0 \n", "1 2.0 0.0 49.0 14.0 2.0 0.0 \n", "2 0.0 0.0 36.0 7.0 2.0 1.0 \n", "3 0.0 0.0 43.0 11.0 2.0 0.0 \n", "4 1.0 0.0 36.0 9.0 0.0 0.0 \n", "\n", " h_homeruns h_rbi h_sacrifice_hits h_sacrifice_flies h_hit_by_pitch \\\n", "0 0.0 2.0 0.0 0.0 0.0 \n", "1 0.0 11.0 0.0 0.0 0.0 \n", "2 0.0 2.0 0.0 0.0 0.0 \n", "3 0.0 8.0 0.0 0.0 0.0 \n", "4 0.0 2.0 0.0 0.0 0.0 \n", "\n", " h_walks h_intentional_walks h_strikeouts h_stolen_bases \\\n", "0 1.0 NaN 0.0 0.0 \n", "1 10.0 NaN 2.0 1.0 \n", "2 0.0 NaN 3.0 5.0 \n", "3 4.0 NaN 2.0 1.0 \n", "4 3.0 NaN 0.0 2.0 \n", "\n", " h_caught_stealing h_grounded_into_double h_first_catcher_interference \\\n", "0 NaN -1.0 NaN \n", "1 NaN -1.0 NaN \n", "2 NaN -1.0 NaN \n", "3 NaN -1.0 NaN \n", "4 NaN -1.0 NaN \n", "\n", " h_left_on_base h_pitchers_used h_individual_earned_runs \\\n", "0 3.0 1.0 0.0 \n", "1 14.0 1.0 7.0 \n", "2 5.0 1.0 3.0 \n", "3 6.0 1.0 4.0 \n", "4 7.0 1.0 3.0 \n", "\n", " h_team_earned_runs h_wild_pitches h_balks h_putouts h_assists \\\n", "0 0.0 0.0 0.0 27.0 3.0 \n", "1 7.0 0.0 0.0 27.0 20.0 \n", "2 3.0 1.0 0.0 27.0 12.0 \n", "3 4.0 0.0 0.0 27.0 14.0 \n", "4 3.0 1.0 0.0 27.0 11.0 \n", "\n", " h_errors h_passed_balls h_double_plays h_triple_plays hp_umpire_id \\\n", "0 3.0 1.0 1.0 0.0 boakj901 \n", "1 10.0 2.0 3.0 0.0 dobsh901 \n", "2 13.0 3.0 0.0 0.0 mawnj901 \n", "3 7.0 2.0 0.0 0.0 willg901 \n", "4 7.0 3.0 0.0 0.0 leroi901 \n", "\n", " hp_umpire_name 1b_umpire_id 1b_umpire_name 2b_umpire_id 2b_umpire_name \\\n", "0 John Boake NaN NaN NaN NaN \n", "1 Henry Dobson NaN NaN NaN NaN \n", "2 J.H. Manny NaN NaN NaN NaN \n", "3 Gardner Willard NaN NaN NaN NaN \n", "4 Isaac Leroy NaN NaN NaN NaN \n", "\n", " 3b_umpire_id 3b_umpire_name lf_umpire_id lf_umpire_name rf_umpire_id \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " rf_umpire_name v_manager_id v_manager_name h_manager_id h_manager_name \\\n", "0 NaN paboc101 Charlie Pabor lennb101 Bill Lennon \n", "1 NaN wrigh101 Harry Wright younn801 Nick Young \n", "2 NaN paboc101 Charlie Pabor hasts101 Scott Hastings \n", "3 NaN paboc101 Charlie Pabor woodj106 Jimmy Wood \n", "4 NaN wrigh101 Harry Wright pikel101 Lip Pike \n", "\n", " winning_pitcher_id winning_pitcher_name losing_pitcher_id \\\n", "0 mathb101 Bobby Mathews prata101 \n", "1 spala101 Al Spalding braia102 \n", "2 prata101 Al Pratt fishc102 \n", "3 zettg101 George Zettlein prata101 \n", "4 spala101 Al Spalding mcmuj101 \n", "\n", " losing_pitcher_name saving_pitcher_id saving_pitcher_name \\\n", "0 Al Pratt NaN NaN \n", "1 Asa Brainard NaN NaN \n", "2 Cherokee Fisher NaN NaN \n", "3 Al Pratt NaN NaN \n", "4 John McMullin NaN NaN \n", "\n", " winning_rbi_batter_id winning_rbi_batter_id_name v_starting_pitcher_id \\\n", "0 NaN NaN prata101 \n", "1 NaN NaN spala101 \n", "2 NaN NaN prata101 \n", "3 NaN NaN prata101 \n", "4 NaN NaN spala101 \n", "\n", " v_starting_pitcher_name h_starting_pitcher_id h_starting_pitcher_name \\\n", "0 Al Pratt mathb101 Bobby Mathews \n", "1 Al Spalding braia102 Asa Brainard \n", "2 Al Pratt fishc102 Cherokee Fisher \n", "3 Al Pratt zettg101 George Zettlein \n", "4 Al Spalding mcmuj101 John McMullin \n", "\n", " v_player_1_id v_player_1_name v_player_1_def_pos v_player_2_id \\\n", "0 whitd102 Deacon White 2.0 kimbg101 \n", "1 wrigg101 George Wright 6.0 barnr102 \n", "2 whitd102 Deacon White 2.0 kimbg101 \n", "3 whitd102 Deacon White 2.0 kimbg101 \n", "4 wrigg101 George Wright 6.0 barnr102 \n", "\n", " v_player_2_name v_player_2_def_pos v_player_3_id v_player_3_name \\\n", "0 Gene Kimball 4.0 paboc101 Charlie Pabor \n", "1 Ross Barnes 4.0 birdd102 Dave Birdsall \n", "2 Gene Kimball 4.0 paboc101 Charlie Pabor \n", "3 Gene Kimball 4.0 paboc101 Charlie Pabor \n", "4 Ross Barnes 4.0 birdd102 Dave Birdsall \n", "\n", " v_player_3_def_pos v_player_4_id v_player_4_name v_player_4_def_pos \\\n", "0 7.0 allia101 Art Allison 8.0 \n", "1 9.0 mcvec101 Cal McVey 2.0 \n", "2 7.0 allia101 Art Allison 8.0 \n", "3 7.0 allia101 Art Allison 8.0 \n", "4 9.0 mcvec101 Cal McVey 2.0 \n", "\n", " v_player_5_id v_player_5_name v_player_5_def_pos v_player_6_id \\\n", "0 white104 Elmer White 9.0 prata101 \n", "1 wrigh101 Harry Wright 8.0 goulc101 \n", "2 white104 Elmer White 9.0 prata101 \n", "3 white104 Elmer White 9.0 prata101 \n", "4 wrigh101 Harry Wright 8.0 goulc101 \n", "\n", " v_player_6_name v_player_6_def_pos v_player_7_id v_player_7_name \\\n", "0 Al Pratt 1.0 sutte101 Ezra Sutton \n", "1 Charlie Gould 3.0 schah101 Harry Schafer \n", "2 Al Pratt 1.0 sutte101 Ezra Sutton \n", "3 Al Pratt 1.0 sutte101 Ezra Sutton \n", "4 Charlie Gould 3.0 schah101 Harry Schafer \n", "\n", " v_player_7_def_pos v_player_8_id v_player_8_name v_player_8_def_pos \\\n", "0 5.0 carlj102 Jim Carleton 3.0 \n", "1 5.0 conef101 Fred Cone 7.0 \n", "2 5.0 carlj102 Jim Carleton 3.0 \n", "3 5.0 carlj102 Jim Carleton 3.0 \n", "4 5.0 conef101 Fred Cone 7.0 \n", "\n", " v_player_9_id v_player_9_name v_player_9_def_pos h_player_1_id \\\n", "0 bassj101 John Bass 6.0 selmf101 \n", "1 spala101 Al Spalding 1.0 watef102 \n", "2 bassj101 John Bass 6.0 mackd101 \n", "3 bassj101 John Bass 6.0 mcatb101 \n", "4 spala101 Al Spalding 1.0 flync101 \n", "\n", " h_player_1_name h_player_1_def_pos h_player_2_id h_player_2_name \\\n", "0 Frank Sellman 5.0 mathb101 Bobby Mathews \n", "1 Fred Waterman 5.0 forcd101 Davy Force \n", "2 Denny Mack 3.0 addyb101 Bob Addy \n", "3 Bub McAtee 3.0 kingm101 Marshall King \n", "4 Clipper Flynn 9.0 mcgem101 Mike McGeary \n", "\n", " h_player_2_def_pos h_player_3_id h_player_3_name h_player_3_def_pos \\\n", "0 1.0 foraj101 Jim Foran 3.0 \n", "1 6.0 mille105 Everett Mills 3.0 \n", "2 4.0 fishc102 Cherokee Fisher 1.0 \n", "3 8.0 hodec101 Charlie Hodes 2.0 \n", "4 2.0 yorkt101 Tom York 8.0 \n", "\n", " h_player_4_id h_player_4_name h_player_4_def_pos h_player_5_id \\\n", "0 goldw101 Wally Goldsmith 6.0 lennb101 \n", "1 allid101 Doug Allison 2.0 hallg101 \n", "2 hasts101 Scott Hastings 8.0 ham-r101 \n", "3 woodj106 Jimmy Wood 4.0 simmj101 \n", "4 mcmuj101 John McMullin 1.0 kings101 \n", "\n", " h_player_5_name h_player_5_def_pos h_player_6_id h_player_6_name \\\n", "0 Bill Lennon 2.0 caret101 Tom Carey \n", "1 George Hall 7.0 leona101 Andy Leonard \n", "2 Ralph Ham 5.0 ansoc101 Cap Anson \n", "3 Joe Simmons 9.0 folet101 Tom Foley \n", "4 Steve King 7.0 beave101 Edward Beavens \n", "\n", " h_player_6_def_pos h_player_7_id h_player_7_name h_player_7_def_pos \\\n", "0 4.0 mince101 Ed Mincher 7.0 \n", "1 4.0 braia102 Asa Brainard 1.0 \n", "2 2.0 sagep101 Pony Sager 6.0 \n", "3 7.0 duffe101 Ed Duffy 6.0 \n", "4 4.0 bells101 Steve Bellan 5.0 \n", "\n", " h_player_8_id h_player_8_name h_player_8_def_pos h_player_9_id \\\n", "0 mcdej101 James McDermott 8.0 kellb105 \n", "1 burrh101 Henry Burroughs 9.0 berth101 \n", "2 birdg101 George Bird 7.0 stirg101 \n", "3 pinke101 Ed Pinkham 5.0 zettg101 \n", "4 pikel101 Lip Pike 3.0 cravb101 \n", "\n", " h_player_9_name h_player_9_def_pos additional_info acquisition_info \n", "0 Bill Kelly 9.0 NaN Y \n", "1 Henry Berthrong 8.0 HTBF Y \n", "2 Gat Stires 9.0 NaN Y \n", "3 George Zettlein 1.0 NaN Y \n", "4 Bill Craver 6.0 HTBF Y " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log = pd.read_csv(\"game_log.csv\",low_memory=False)\n", "print(log.shape)\n", "log.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datenumber_of_gameday_of_weekv_namev_leaguev_game_numberh_nameh_leagueh_game_numberv_scoreh_scorelength_outsday_nightcompletionforefeitprotestpark_idattendancelength_minutesv_line_scoreh_line_scorev_at_batsv_hitsv_doublesv_triplesv_homerunsv_rbiv_sacrifice_hitsv_sacrifice_fliesv_hit_by_pitchv_walksv_intentional_walksv_strikeoutsv_stolen_basesv_caught_stealingv_grounded_into_doublev_first_catcher_interferencev_left_on_basev_pitchers_usedv_individual_earned_runsv_team_earned_runsv_wild_pitchesv_balksv_putoutsv_assistsv_errorsv_passed_ballsv_double_playsv_triple_playsh_at_batsh_hitsh_doublesh_triplesh_homerunsh_rbih_sacrifice_hitsh_sacrifice_fliesh_hit_by_pitchh_walksh_intentional_walksh_strikeoutsh_stolen_basesh_caught_stealingh_grounded_into_doubleh_first_catcher_interferenceh_left_on_baseh_pitchers_usedh_individual_earned_runsh_team_earned_runsh_wild_pitchesh_balksh_putoutsh_assistsh_errorsh_passed_ballsh_double_playsh_triple_playshp_umpire_idhp_umpire_name1b_umpire_id1b_umpire_name2b_umpire_id2b_umpire_name3b_umpire_id3b_umpire_namelf_umpire_idlf_umpire_namerf_umpire_idrf_umpire_namev_manager_idv_manager_nameh_manager_idh_manager_namewinning_pitcher_idwinning_pitcher_namelosing_pitcher_idlosing_pitcher_namesaving_pitcher_idsaving_pitcher_namewinning_rbi_batter_idwinning_rbi_batter_id_namev_starting_pitcher_idv_starting_pitcher_nameh_starting_pitcher_idh_starting_pitcher_namev_player_1_idv_player_1_namev_player_1_def_posv_player_2_idv_player_2_namev_player_2_def_posv_player_3_idv_player_3_namev_player_3_def_posv_player_4_idv_player_4_namev_player_4_def_posv_player_5_idv_player_5_namev_player_5_def_posv_player_6_idv_player_6_namev_player_6_def_posv_player_7_idv_player_7_namev_player_7_def_posv_player_8_idv_player_8_namev_player_8_def_posv_player_9_idv_player_9_namev_player_9_def_posh_player_1_idh_player_1_nameh_player_1_def_posh_player_2_idh_player_2_nameh_player_2_def_posh_player_3_idh_player_3_nameh_player_3_def_posh_player_4_idh_player_4_nameh_player_4_def_posh_player_5_idh_player_5_nameh_player_5_def_posh_player_6_idh_player_6_nameh_player_6_def_posh_player_7_idh_player_7_nameh_player_7_def_posh_player_8_idh_player_8_nameh_player_8_def_posh_player_9_idh_player_9_nameh_player_9_def_posadditional_infoacquisition_info
171902201610020SunMILNL162COLNL1626460.0DNaNNaNNaNDEN0227762.0203.00200000202110010001039.010.04.01.02.06.00.00.01.04.00.012.02.01.00.00.08.07.04.04.01.00.030.012.01.00.00.00.041.013.04.00.01.04.01.00.01.03.00.011.00.01.00.00.012.05.06.06.00.00.030.013.00.00.00.00.0barrs901Scott Barrywoodt901Tom Woodringrandt901Tony Randazzoortir901Roberto OrtizNaNNaNNaNNaNcounc001Craig Counsellweisw001Walt Weissthort001Tyler Thornburgrusic001Chris Rusinknebc001Corey Knebelsusaa001Andrew Susaccravt001Tyler Cravymarqg001German Marquezvillj001Jonathan Villar5.0genns001Scooter Gennett4.0cartc002Chris Carter3.0santd002Domingo Santana9.0pereh001Hernan Perez8.0arcio002Orlando Arcia6.0susaa001Andrew Susac2.0elmoj001Jake Elmore7.0cravt001Tyler Cravy1.0blacc001Charlie Blackmon8.0dahld001David Dahl7.0arenn001Nolan Arenado5.0gonzc001Carlos Gonzalez9.0murpt002Tom Murphy2.0pattj005Jordan Patterson3.0valap001Pat Valaika4.0adamc001Cristhian Adames6.0marqg001German Marquez1.0NaNY
171903201610020SunNYNNL162PHINL1622551.0DNaNNaNNaNPHI1336935.0159.000000110000100031x33.08.03.00.00.02.00.00.00.02.00.09.01.01.01.00.06.06.03.03.00.00.024.012.03.01.02.00.033.010.01.00.00.03.00.01.00.02.00.03.00.00.02.00.07.05.02.02.00.00.027.07.00.00.01.00.0barkl901Lance Barksdaleherna901Angel Hernandezbarrt901Ted Barrettlittw901Will LittleNaNNaNNaNNaNcollt801Terry Collinsmackp101Pete Mackaninmurrc002Colton Murraygoede001Erik Goeddelnerih001Hector Nerishernc005Cesar Hernandezynoag001Gabriel Ynoaeickj001Jerad Eickhoffgranc001Curtis Granderson8.0cabra002Asdrubal Cabrera6.0brucj001Jay Bruce9.0dudal001Lucas Duda3.0johnk003Kelly Johnson4.0confm001Michael Conforto7.0campe001Eric Campbell5.0plawk001Kevin Plawecki2.0ynoag001Gabriel Ynoa1.0hernc005Cesar Hernandez4.0parej002Jimmy Paredes7.0herro001Odubel Herrera8.0franm004Maikel Franco5.0howar001Ryan Howard3.0ruppc001Cameron Rupp2.0blana001Andres Blanco6.0altha001Aaron Altherr9.0eickj001Jerad Eickhoff1.0NaNY
171904201610020SunLANNL162SFNNL1621751.0DNaNNaNNaNSFO0341445.0184.000010000023000002x30.04.00.00.00.01.00.00.00.02.00.07.00.00.01.00.04.07.07.07.00.00.024.05.01.00.00.00.039.016.03.01.00.07.00.00.00.04.01.011.02.01.00.00.012.02.01.01.00.00.027.07.00.00.01.00.0knigb901Brian Knightwestj901Joe Westfleta901Andy Fletcherdanlk901Kerwin DanleyNaNNaNNaNNaNrobed001Dave Robertsbochb002Bruce Bochymoorm003Matt Mooremaedk001Kenta MaedaNaNNaNposeb001Buster Poseymaedk001Kenta Maedamoorm003Matt Moorekendh001Howie Kendrick7.0turnj001Justin Turner5.0seagc001Corey Seager6.0puigy001Yasiel Puig9.0gonza003Adrian Gonzalez3.0grany001Yasmani Grandal2.0pedej001Joc Pederson8.0utlec001Chase Utley4.0maedk001Kenta Maeda1.0spand001Denard Span8.0beltb001Brandon Belt3.0poseb001Buster Posey2.0pench001Hunter Pence9.0crawb001Brandon Crawford6.0pagaa001Angel Pagan7.0panij002Joe Panik4.0gillc001Conor Gillaspie5.0moorm003Matt Moore1.0NaNY
171905201610020SunPITNL162SLNNL16241051.0DNaNNaNNaNSTL1044615.0192.000002020000100360x35.09.00.00.01.04.00.00.00.04.00.011.00.01.00.00.08.06.08.08.00.00.024.02.02.00.00.00.036.012.02.00.01.010.00.02.00.04.00.05.00.00.00.00.08.03.04.04.00.00.027.07.00.00.01.00.0cuzzp901Phil Cuzziticht901Todd Tichenorvanol901Larry Vanovermarqa901Alfonso MarquezNaNNaNNaNNaNhurdc001Clint Hurdlemathm001Mike Mathenybroxj001Jonathan Broxtonnicaj001Juan NicasioNaNNaNpiscs001Stephen Piscottyvoger001Ryan Vogelsongwaina001Adam Wainwrightjasoj001John Jaso3.0polag001Gregory Polanco9.0mccua001Andrew McCutchen8.0kangj001Jung Ho Kang5.0joycm001Matt Joyce7.0hansa001Alen Hanson4.0fryee001Eric Fryer2.0florp001Pedro Florimon6.0voger001Ryan Vogelsong1.0carpm002Matt Carpenter3.0diaza003Aledmys Diaz6.0moliy001Yadier Molina2.0piscs001Stephen Piscotty9.0peraj001Jhonny Peralta5.0mossb001Brandon Moss7.0gyorj001Jedd Gyorko4.0gricr001Randal Grichuk8.0waina001Adam Wainwright1.0NaNY
171906201610020SunMIANL161WASNL16271051.0DNaNNaNNaNWAS1128730.0216.000023002003023002x38.014.01.01.02.07.01.00.00.03.02.010.01.01.01.00.08.07.010.010.01.00.024.011.00.00.01.00.030.010.02.00.01.010.01.01.01.08.00.03.02.00.01.00.07.06.07.07.01.00.027.011.00.00.01.00.0tumpj901John Tumpaneporta901Alan Porteronorb901Brian O'Norakellj901Jeff KelloggNaNNaNNaNNaNmattd001Don Mattinglybaked002Dusty Bakerschem001Max Scherzerbrica001Austin Bricemelam001Mark Melancondifow001Wilmer Difokoeht001Tom Koehlerschem001Max Scherzergordd002Dee Gordon4.0telit001Tomas Telis2.0pradm001Martin Prado5.0yelic001Christian Yelich8.0bourj002Justin Bour3.0scrux001Xavier Scruggs7.0hoodd001Destin Hood9.0hecha001Adeiny Hechavarria6.0koeht001Tom Koehler1.0turnt001Trea Turner8.0reveb001Ben Revere7.0harpb003Bryce Harper9.0zimmr001Ryan Zimmerman3.0drews001Stephen Drew5.0difow001Wilmer Difo4.0espid001Danny Espinosa6.0lobaj001Jose Lobaton2.0schem001Max Scherzer1.0NaNY
\n", "
" ], "text/plain": [ " date number_of_game day_of_week v_name v_league v_game_number \\\n", "171902 20161002 0 Sun MIL NL 162 \n", "171903 20161002 0 Sun NYN NL 162 \n", "171904 20161002 0 Sun LAN NL 162 \n", "171905 20161002 0 Sun PIT NL 162 \n", "171906 20161002 0 Sun MIA NL 161 \n", "\n", " h_name h_league h_game_number v_score h_score length_outs \\\n", "171902 COL NL 162 6 4 60.0 \n", "171903 PHI NL 162 2 5 51.0 \n", "171904 SFN NL 162 1 7 51.0 \n", "171905 SLN NL 162 4 10 51.0 \n", "171906 WAS NL 162 7 10 51.0 \n", "\n", " day_night completion forefeit protest park_id attendance \\\n", "171902 D NaN NaN NaN DEN02 27762.0 \n", "171903 D NaN NaN NaN PHI13 36935.0 \n", "171904 D NaN NaN NaN SFO03 41445.0 \n", "171905 D NaN NaN NaN STL10 44615.0 \n", "171906 D NaN NaN NaN WAS11 28730.0 \n", "\n", " length_minutes v_line_score h_line_score v_at_bats v_hits \\\n", "171902 203.0 0200000202 1100100010 39.0 10.0 \n", "171903 159.0 000001100 00100031x 33.0 8.0 \n", "171904 184.0 000100000 23000002x 30.0 4.0 \n", "171905 192.0 000020200 00100360x 35.0 9.0 \n", "171906 216.0 000230020 03023002x 38.0 14.0 \n", "\n", " v_doubles v_triples v_homeruns v_rbi v_sacrifice_hits \\\n", "171902 4.0 1.0 2.0 6.0 0.0 \n", "171903 3.0 0.0 0.0 2.0 0.0 \n", "171904 0.0 0.0 0.0 1.0 0.0 \n", "171905 0.0 0.0 1.0 4.0 0.0 \n", "171906 1.0 1.0 2.0 7.0 1.0 \n", "\n", " v_sacrifice_flies v_hit_by_pitch v_walks v_intentional_walks \\\n", "171902 0.0 1.0 4.0 0.0 \n", "171903 0.0 0.0 2.0 0.0 \n", "171904 0.0 0.0 2.0 0.0 \n", "171905 0.0 0.0 4.0 0.0 \n", "171906 0.0 0.0 3.0 2.0 \n", "\n", " v_strikeouts v_stolen_bases v_caught_stealing \\\n", "171902 12.0 2.0 1.0 \n", "171903 9.0 1.0 1.0 \n", "171904 7.0 0.0 0.0 \n", "171905 11.0 0.0 1.0 \n", "171906 10.0 1.0 1.0 \n", "\n", " v_grounded_into_double v_first_catcher_interference v_left_on_base \\\n", "171902 0.0 0.0 8.0 \n", "171903 1.0 0.0 6.0 \n", "171904 1.0 0.0 4.0 \n", "171905 0.0 0.0 8.0 \n", "171906 1.0 0.0 8.0 \n", "\n", " v_pitchers_used v_individual_earned_runs v_team_earned_runs \\\n", "171902 7.0 4.0 4.0 \n", "171903 6.0 3.0 3.0 \n", "171904 7.0 7.0 7.0 \n", "171905 6.0 8.0 8.0 \n", "171906 7.0 10.0 10.0 \n", "\n", " v_wild_pitches v_balks v_putouts v_assists v_errors \\\n", "171902 1.0 0.0 30.0 12.0 1.0 \n", "171903 0.0 0.0 24.0 12.0 3.0 \n", "171904 0.0 0.0 24.0 5.0 1.0 \n", "171905 0.0 0.0 24.0 2.0 2.0 \n", "171906 1.0 0.0 24.0 11.0 0.0 \n", "\n", " v_passed_balls v_double_plays v_triple_plays h_at_bats h_hits \\\n", "171902 0.0 0.0 0.0 41.0 13.0 \n", "171903 1.0 2.0 0.0 33.0 10.0 \n", "171904 0.0 0.0 0.0 39.0 16.0 \n", "171905 0.0 0.0 0.0 36.0 12.0 \n", "171906 0.0 1.0 0.0 30.0 10.0 \n", "\n", " h_doubles h_triples h_homeruns h_rbi h_sacrifice_hits \\\n", "171902 4.0 0.0 1.0 4.0 1.0 \n", "171903 1.0 0.0 0.0 3.0 0.0 \n", "171904 3.0 1.0 0.0 7.0 0.0 \n", "171905 2.0 0.0 1.0 10.0 0.0 \n", "171906 2.0 0.0 1.0 10.0 1.0 \n", "\n", " h_sacrifice_flies h_hit_by_pitch h_walks h_intentional_walks \\\n", "171902 0.0 1.0 3.0 0.0 \n", "171903 1.0 0.0 2.0 0.0 \n", "171904 0.0 0.0 4.0 1.0 \n", "171905 2.0 0.0 4.0 0.0 \n", "171906 1.0 1.0 8.0 0.0 \n", "\n", " h_strikeouts h_stolen_bases h_caught_stealing \\\n", "171902 11.0 0.0 1.0 \n", "171903 3.0 0.0 0.0 \n", "171904 11.0 2.0 1.0 \n", "171905 5.0 0.0 0.0 \n", "171906 3.0 2.0 0.0 \n", "\n", " h_grounded_into_double h_first_catcher_interference h_left_on_base \\\n", "171902 0.0 0.0 12.0 \n", "171903 2.0 0.0 7.0 \n", "171904 0.0 0.0 12.0 \n", "171905 0.0 0.0 8.0 \n", "171906 1.0 0.0 7.0 \n", "\n", " h_pitchers_used h_individual_earned_runs h_team_earned_runs \\\n", "171902 5.0 6.0 6.0 \n", "171903 5.0 2.0 2.0 \n", "171904 2.0 1.0 1.0 \n", "171905 3.0 4.0 4.0 \n", "171906 6.0 7.0 7.0 \n", "\n", " h_wild_pitches h_balks h_putouts h_assists h_errors \\\n", "171902 0.0 0.0 30.0 13.0 0.0 \n", "171903 0.0 0.0 27.0 7.0 0.0 \n", "171904 0.0 0.0 27.0 7.0 0.0 \n", "171905 0.0 0.0 27.0 7.0 0.0 \n", "171906 1.0 0.0 27.0 11.0 0.0 \n", "\n", " h_passed_balls h_double_plays h_triple_plays hp_umpire_id \\\n", "171902 0.0 0.0 0.0 barrs901 \n", "171903 0.0 1.0 0.0 barkl901 \n", "171904 0.0 1.0 0.0 knigb901 \n", "171905 0.0 1.0 0.0 cuzzp901 \n", "171906 0.0 1.0 0.0 tumpj901 \n", "\n", " hp_umpire_name 1b_umpire_id 1b_umpire_name 2b_umpire_id \\\n", "171902 Scott Barry woodt901 Tom Woodring randt901 \n", "171903 Lance Barksdale herna901 Angel Hernandez barrt901 \n", "171904 Brian Knight westj901 Joe West fleta901 \n", "171905 Phil Cuzzi ticht901 Todd Tichenor vanol901 \n", "171906 John Tumpane porta901 Alan Porter onorb901 \n", "\n", " 2b_umpire_name 3b_umpire_id 3b_umpire_name lf_umpire_id \\\n", "171902 Tony Randazzo ortir901 Roberto Ortiz NaN \n", "171903 Ted Barrett littw901 Will Little NaN \n", "171904 Andy Fletcher danlk901 Kerwin Danley NaN \n", "171905 Larry Vanover marqa901 Alfonso Marquez NaN \n", "171906 Brian O'Nora kellj901 Jeff Kellogg NaN \n", "\n", " lf_umpire_name rf_umpire_id rf_umpire_name v_manager_id \\\n", "171902 NaN NaN NaN counc001 \n", "171903 NaN NaN NaN collt801 \n", "171904 NaN NaN NaN robed001 \n", "171905 NaN NaN NaN hurdc001 \n", "171906 NaN NaN NaN mattd001 \n", "\n", " v_manager_name h_manager_id h_manager_name winning_pitcher_id \\\n", "171902 Craig Counsell weisw001 Walt Weiss thort001 \n", "171903 Terry Collins mackp101 Pete Mackanin murrc002 \n", "171904 Dave Roberts bochb002 Bruce Bochy moorm003 \n", "171905 Clint Hurdle mathm001 Mike Matheny broxj001 \n", "171906 Don Mattingly baked002 Dusty Baker schem001 \n", "\n", " winning_pitcher_name losing_pitcher_id losing_pitcher_name \\\n", "171902 Tyler Thornburg rusic001 Chris Rusin \n", "171903 Colton Murray goede001 Erik Goeddel \n", "171904 Matt Moore maedk001 Kenta Maeda \n", "171905 Jonathan Broxton nicaj001 Juan Nicasio \n", "171906 Max Scherzer brica001 Austin Brice \n", "\n", " saving_pitcher_id saving_pitcher_name winning_rbi_batter_id \\\n", "171902 knebc001 Corey Knebel susaa001 \n", "171903 nerih001 Hector Neris hernc005 \n", "171904 NaN NaN poseb001 \n", "171905 NaN NaN piscs001 \n", "171906 melam001 Mark Melancon difow001 \n", "\n", " winning_rbi_batter_id_name v_starting_pitcher_id \\\n", "171902 Andrew Susac cravt001 \n", "171903 Cesar Hernandez ynoag001 \n", "171904 Buster Posey maedk001 \n", "171905 Stephen Piscotty voger001 \n", "171906 Wilmer Difo koeht001 \n", "\n", " v_starting_pitcher_name h_starting_pitcher_id h_starting_pitcher_name \\\n", "171902 Tyler Cravy marqg001 German Marquez \n", "171903 Gabriel Ynoa eickj001 Jerad Eickhoff \n", "171904 Kenta Maeda moorm003 Matt Moore \n", "171905 Ryan Vogelsong waina001 Adam Wainwright \n", "171906 Tom Koehler schem001 Max Scherzer \n", "\n", " v_player_1_id v_player_1_name v_player_1_def_pos v_player_2_id \\\n", "171902 villj001 Jonathan Villar 5.0 genns001 \n", "171903 granc001 Curtis Granderson 8.0 cabra002 \n", "171904 kendh001 Howie Kendrick 7.0 turnj001 \n", "171905 jasoj001 John Jaso 3.0 polag001 \n", "171906 gordd002 Dee Gordon 4.0 telit001 \n", "\n", " v_player_2_name v_player_2_def_pos v_player_3_id v_player_3_name \\\n", "171902 Scooter Gennett 4.0 cartc002 Chris Carter \n", "171903 Asdrubal Cabrera 6.0 brucj001 Jay Bruce \n", "171904 Justin Turner 5.0 seagc001 Corey Seager \n", "171905 Gregory Polanco 9.0 mccua001 Andrew McCutchen \n", "171906 Tomas Telis 2.0 pradm001 Martin Prado \n", "\n", " v_player_3_def_pos v_player_4_id v_player_4_name \\\n", "171902 3.0 santd002 Domingo Santana \n", "171903 9.0 dudal001 Lucas Duda \n", "171904 6.0 puigy001 Yasiel Puig \n", "171905 8.0 kangj001 Jung Ho Kang \n", "171906 5.0 yelic001 Christian Yelich \n", "\n", " v_player_4_def_pos v_player_5_id v_player_5_name v_player_5_def_pos \\\n", "171902 9.0 pereh001 Hernan Perez 8.0 \n", "171903 3.0 johnk003 Kelly Johnson 4.0 \n", "171904 9.0 gonza003 Adrian Gonzalez 3.0 \n", "171905 5.0 joycm001 Matt Joyce 7.0 \n", "171906 8.0 bourj002 Justin Bour 3.0 \n", "\n", " v_player_6_id v_player_6_name v_player_6_def_pos v_player_7_id \\\n", "171902 arcio002 Orlando Arcia 6.0 susaa001 \n", "171903 confm001 Michael Conforto 7.0 campe001 \n", "171904 grany001 Yasmani Grandal 2.0 pedej001 \n", "171905 hansa001 Alen Hanson 4.0 fryee001 \n", "171906 scrux001 Xavier Scruggs 7.0 hoodd001 \n", "\n", " v_player_7_name v_player_7_def_pos v_player_8_id v_player_8_name \\\n", "171902 Andrew Susac 2.0 elmoj001 Jake Elmore \n", "171903 Eric Campbell 5.0 plawk001 Kevin Plawecki \n", "171904 Joc Pederson 8.0 utlec001 Chase Utley \n", "171905 Eric Fryer 2.0 florp001 Pedro Florimon \n", "171906 Destin Hood 9.0 hecha001 Adeiny Hechavarria \n", "\n", " v_player_8_def_pos v_player_9_id v_player_9_name v_player_9_def_pos \\\n", "171902 7.0 cravt001 Tyler Cravy 1.0 \n", "171903 2.0 ynoag001 Gabriel Ynoa 1.0 \n", "171904 4.0 maedk001 Kenta Maeda 1.0 \n", "171905 6.0 voger001 Ryan Vogelsong 1.0 \n", "171906 6.0 koeht001 Tom Koehler 1.0 \n", "\n", " h_player_1_id h_player_1_name h_player_1_def_pos h_player_2_id \\\n", "171902 blacc001 Charlie Blackmon 8.0 dahld001 \n", "171903 hernc005 Cesar Hernandez 4.0 parej002 \n", "171904 spand001 Denard Span 8.0 beltb001 \n", "171905 carpm002 Matt Carpenter 3.0 diaza003 \n", "171906 turnt001 Trea Turner 8.0 reveb001 \n", "\n", " h_player_2_name h_player_2_def_pos h_player_3_id h_player_3_name \\\n", "171902 David Dahl 7.0 arenn001 Nolan Arenado \n", "171903 Jimmy Paredes 7.0 herro001 Odubel Herrera \n", "171904 Brandon Belt 3.0 poseb001 Buster Posey \n", "171905 Aledmys Diaz 6.0 moliy001 Yadier Molina \n", "171906 Ben Revere 7.0 harpb003 Bryce Harper \n", "\n", " h_player_3_def_pos h_player_4_id h_player_4_name \\\n", "171902 5.0 gonzc001 Carlos Gonzalez \n", "171903 8.0 franm004 Maikel Franco \n", "171904 2.0 pench001 Hunter Pence \n", "171905 2.0 piscs001 Stephen Piscotty \n", "171906 9.0 zimmr001 Ryan Zimmerman \n", "\n", " h_player_4_def_pos h_player_5_id h_player_5_name \\\n", "171902 9.0 murpt002 Tom Murphy \n", "171903 5.0 howar001 Ryan Howard \n", "171904 9.0 crawb001 Brandon Crawford \n", "171905 9.0 peraj001 Jhonny Peralta \n", "171906 3.0 drews001 Stephen Drew \n", "\n", " h_player_5_def_pos h_player_6_id h_player_6_name \\\n", "171902 2.0 pattj005 Jordan Patterson \n", "171903 3.0 ruppc001 Cameron Rupp \n", "171904 6.0 pagaa001 Angel Pagan \n", "171905 5.0 mossb001 Brandon Moss \n", "171906 5.0 difow001 Wilmer Difo \n", "\n", " h_player_6_def_pos h_player_7_id h_player_7_name h_player_7_def_pos \\\n", "171902 3.0 valap001 Pat Valaika 4.0 \n", "171903 2.0 blana001 Andres Blanco 6.0 \n", "171904 7.0 panij002 Joe Panik 4.0 \n", "171905 7.0 gyorj001 Jedd Gyorko 4.0 \n", "171906 4.0 espid001 Danny Espinosa 6.0 \n", "\n", " h_player_8_id h_player_8_name h_player_8_def_pos h_player_9_id \\\n", "171902 adamc001 Cristhian Adames 6.0 marqg001 \n", "171903 altha001 Aaron Altherr 9.0 eickj001 \n", "171904 gillc001 Conor Gillaspie 5.0 moorm003 \n", "171905 gricr001 Randal Grichuk 8.0 waina001 \n", "171906 lobaj001 Jose Lobaton 2.0 schem001 \n", "\n", " h_player_9_name h_player_9_def_pos additional_info acquisition_info \n", "171902 German Marquez 1.0 NaN Y \n", "171903 Jerad Eickhoff 1.0 NaN Y \n", "171904 Matt Moore 1.0 NaN Y \n", "171905 Adam Wainwright 1.0 NaN Y \n", "171906 Max Scherzer 1.0 NaN Y " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log.tail()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "It looks like the game log has a record of over 170,000 games. It looks like these games are chronologically ordered and occur between 1871 and 2016.\n", "\n", "For each game we have:\n", "\n", "- general information on the game\n", "- team level stats for each team\n", "- a list of players from each team, numbered, with their defensive positions\n", "- the umpires that officiated the game\n", "- some 'awards', like winning and losing pitcher\n", "\n", "We have a `game_log_fields.txt` file that tell us that the player number corresponds with the order in which they batted.\n", "\n", "It's worth noting that there is no natural primary key column for this table." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(20494, 7)\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlastfirstplayer_debutmgr_debutcoach_debutump_debut
0aardd001AardsmaDavid04/06/2004NaNNaNNaN
1aaroh101AaronHank04/13/1954NaNNaNNaN
2aarot101AaronTommie04/10/1962NaN04/06/1979NaN
3aased001AaseDon07/26/1977NaNNaNNaN
4abada001AbadAndy09/10/2001NaNNaNNaN
\n", "
" ], "text/plain": [ " id last first player_debut mgr_debut coach_debut ump_debut\n", "0 aardd001 Aardsma David 04/06/2004 NaN NaN NaN\n", "1 aaroh101 Aaron Hank 04/13/1954 NaN NaN NaN\n", "2 aarot101 Aaron Tommie 04/10/1962 NaN 04/06/1979 NaN\n", "3 aased001 Aase Don 07/26/1977 NaN NaN NaN\n", "4 abada001 Abad Andy 09/10/2001 NaN NaN NaN" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "person = pd.read_csv('person_codes.csv')\n", "print(person.shape)\n", "person.head()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "This seems to be a list of people with IDs. The IDs look like they match up with those used in the game log. There are debut dates, for players, managers, coaches and umpires. We can see that some people might have been one or more of these roles.\n", "\n", "It also looks like coaches and managers are two different things in baseball. After some research, managers are what would be called a 'coach' or 'head coach' in other sports, and coaches are more specialized, like base coaches. It also seems like coaches aren't recorded in the game log." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(252, 9)\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
park_idnameakacitystatestartendleaguenotes
0ALB01Riverside ParkNaNAlbanyNY09/11/188005/30/1882NLTRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882
1ALT01Columbia ParkNaNAltoonaPA04/30/188405/31/1884UANaN
2ANA01Angel Stadium of AnaheimEdison Field; Anaheim StadiumAnaheimCA04/19/1966NaNALNaN
3ARL01Arlington StadiumNaNArlingtonTX04/21/197210/03/1993ALNaN
4ARL02Rangers Ballpark in ArlingtonThe Ballpark in Arlington; Ameriquest FlArlingtonTX04/11/1994NaNALNaN
\n", "
" ], "text/plain": [ " park_id name \\\n", "0 ALB01 Riverside Park \n", "1 ALT01 Columbia Park \n", "2 ANA01 Angel Stadium of Anaheim \n", "3 ARL01 Arlington Stadium \n", "4 ARL02 Rangers Ballpark in Arlington \n", "\n", " aka city state start \\\n", "0 NaN Albany NY 09/11/1880 \n", "1 NaN Altoona PA 04/30/1884 \n", "2 Edison Field; Anaheim Stadium Anaheim CA 04/19/1966 \n", "3 NaN Arlington TX 04/21/1972 \n", "4 The Ballpark in Arlington; Ameriquest Fl Arlington TX 04/11/1994 \n", "\n", " end league notes \n", "0 05/30/1882 NL TRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882 \n", "1 05/31/1884 UA NaN \n", "2 NaN AL NaN \n", "3 10/03/1993 AL NaN \n", "4 NaN AL NaN " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "park = pd.read_csv('park_codes.csv')\n", "print(park.shape)\n", "park.head()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "This seems to be a list of all baseball parks. There are IDs which seem to match with the game log, as well as names, nicknames, city and league." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(150, 8)\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
team_idleaguestartendcitynicknamefranch_idseq
0ALTUA18841884AltoonaMountain CitiesALT1
1ARINL19980ArizonaDiamondbacksARI1
2BFNNL18791885BuffaloBisonsBFN1
3BFPPL18901890BuffaloBisonsBFP1
4BL1NaN18721874BaltimoreCanariesBL11
\n", "
" ], "text/plain": [ " team_id league start end city nickname franch_id seq\n", "0 ALT UA 1884 1884 Altoona Mountain Cities ALT 1\n", "1 ARI NL 1998 0 Arizona Diamondbacks ARI 1\n", "2 BFN NL 1879 1885 Buffalo Bisons BFN 1\n", "3 BFP PL 1890 1890 Buffalo Bisons BFP 1\n", "4 BL1 NaN 1872 1874 Baltimore Canaries BL1 1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "team = pd.read_csv('team_codes.csv')\n", "print(team.shape)\n", "team.head()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "This seems to be a list of all teams, with team_ids which seem to match the game log. Interestingly, there is a `franch_id`, let's take a look at this:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "hidden": true }, "outputs": [ { "data": { "text/plain": [ "BS1 4\n", "TRN 3\n", "LAA 3\n", "SE1 3\n", "BR3 3\n", "Name: franch_id, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "team[\"franch_id\"].value_counts().head()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "We might have `franch_id` occurring a few times for some teams, let's look at the first one in more detail." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
team_idleaguestartendcitynicknamefranch_idseq
21BS1NaN18711875BostonBravesBS11
22BSNNL18761952BostonBravesBS12
23MLNNL19531965MilwaukeeBravesBS13
24ATLNL19660AtlantaBravesBS14
\n", "
" ], "text/plain": [ " team_id league start end city nickname franch_id seq\n", "21 BS1 NaN 1871 1875 Boston Braves BS1 1\n", "22 BSN NL 1876 1952 Boston Braves BS1 2\n", "23 MLN NL 1953 1965 Milwaukee Braves BS1 3\n", "24 ATL NL 1966 0 Atlanta Braves BS1 4" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "team[team[\"franch_id\"] == 'BS1']" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "It appears that teams move between leagues and cities. The team_id changes when this happens, `franch_id` (which is probably 'Franchise') helps us tie all of this together." ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "**Defensive Positions**\n", "\n", "In the game log, each player has a defensive position listed, which seems to be a number between 1-10. Doing some research around this, I found [this article](http://probaseballinsider.com/baseball-instruction/baseball-basics/baseball-basics-positions/) which gives us a list of names for each numbered position:\n", "\n", "1. Pitcher\n", "2. Catcher\n", "3. 1st Base\n", "4. 2nd Base\n", "5. 3rd Base\n", "6. Shortstop\n", "7. Left Field\n", "8. Center Field\n", "9. Right Field\n", "\n", "The 10th position isn't included, it may be a way of describing a designated hitter that does not field. I can find a retrosheet page that indicates that position `0` is used for this, but we don't have any position 0 in our data. I have chosen to make this an 'Unknown Position' so I'm not including data based on a hunch.\n", "\n", "**Leagues**\n", "\n", "Wikipedia tells us there are currently two leagues - the American (AL) and National (NL). Let's start by finding out what leagues are listed in the main game log:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "hidden": true }, "outputs": [ { "data": { "text/plain": [ "NL 88867\n", "AL 74712\n", "AA 5039\n", "FL 1243\n", "PL 532\n", "UA 428\n", "Name: h_league, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log[\"h_league\"].value_counts()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "It looks like most of our games fall into the two current leagues, but that there are four other leagues. Let's write a quick function to get some info on the years of these leagues:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nan went from nan to nan\n", "NL went from 18760422 to 20161002\n", "AA went from 18820502 to 18911006\n", "UA went from 18840417 to 18841019\n", "PL went from 18900419 to 18901004\n", "AL went from 19010424 to 20161002\n", "FL went from 19140413 to 19151003\n" ] } ], "source": [ "def league_info(league):\n", " league_games = log[log[\"h_league\"] == league]\n", " earliest = league_games[\"date\"].min()\n", " latest = league_games[\"date\"].max()\n", " print(\"{} went from {} to {}\".format(league,earliest,latest))\n", "\n", "for league in log[\"h_league\"].unique():\n", " league_info(league)" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Now we have some years which will help us do some research. After some googling we come up with:\n", "\n", "- `NL`: National League\n", "- `AL`: American League\n", "- `AA`: [American Association](https://en.wikipedia.org/wiki/American_Association_%2819th_century%29)\n", "- `FL`: [Federal League](https://en.wikipedia.org/wiki/Federal_League)\n", "- `PL`: [Players League](https://en.wikipedia.org/wiki/Players%27_League)\n", "- `UA`: [Union Association](https://en.wikipedia.org/wiki/Union_Association)\n", "\n", "It also looks like we have about 1000 games where the home team doesn't have a value for league." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Importing Data into SQLite" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# These helper functions will be useful as we work\n", "# with the SQLite database from python\n", "\n", "DB = \"mlb.db\"\n", "\n", "def run_query(q):\n", " with sqlite3.connect(DB) as conn:\n", " return pd.read_sql(q,conn)\n", "\n", "def run_command(c):\n", " with sqlite3.connect(DB) as conn:\n", " conn.execute('PRAGMA foreign_keys = ON;')\n", " conn.isolation_level = None\n", " conn.execute(c)\n", "\n", "def show_tables():\n", " q = '''\n", " SELECT\n", " name,\n", " type\n", " FROM sqlite_master\n", " WHERE type IN (\"table\",\"view\");\n", " '''\n", " return run_query(q)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "tables = {\n", " \"game_log\": log,\n", " \"person_codes\": person,\n", " \"team_codes\": team,\n", " \"park_codes\": park\n", "}\n", "\n", "with sqlite3.connect(DB) as conn: \n", " for name, data in tables.items():\n", " conn.execute(\"DROP TABLE IF EXISTS {};\".format(name))\n", " data.to_sql(name,conn,index=False)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nametype
0game_logtable
1park_codestable
2team_codestable
3person_codestable
\n", "
" ], "text/plain": [ " name type\n", "0 game_log table\n", "1 park_codes table\n", "2 team_codes table\n", "3 person_codes table" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show_tables()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
game_iddateh_namenumber_of_game
018710504FW1018710504FW10
118710505WS3018710505WS30
218710506RC1018710506RC10
318710508CH1018710508CH10
418710509TRO018710509TRO0
\n", "
" ], "text/plain": [ " game_id date h_name number_of_game\n", "0 18710504FW10 18710504 FW1 0\n", "1 18710505WS30 18710505 WS3 0\n", "2 18710506RC10 18710506 RC1 0\n", "3 18710508CH10 18710508 CH1 0\n", "4 18710509TRO0 18710509 TRO 0" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "ALTER TABLE game_log\n", "ADD COLUMN game_id TEXT;\n", "\"\"\"\n", "\n", "# try/except loop since ALTER TABLE\n", "# doesn't support IF NOT EXISTS\n", "try:\n", " run_command(c1)\n", "except:\n", " pass\n", "\n", "c2 = \"\"\"\n", "UPDATE game_log\n", "SET game_id = date || h_name || number_of_game\n", "/* WHERE prevents this if it has already been done */\n", "WHERE game_id IS NULL; \n", "\"\"\"\n", "\n", "run_command(c2)\n", "\n", "q = \"\"\"\n", "SELECT\n", " game_id,\n", " date,\n", " h_name,\n", " number_of_game\n", "FROM game_log\n", "LIMIT 5;\n", "\"\"\"\n", "\n", "run_query(q)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Looking for Normalization Opportunities" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "The following are opportunities for normalization of our data:\n", "\n", "- In `person_codes`, all the debut dates will be able to be reproduced using game log data.\n", "- In `team_codes`, the start, end and sequence columns will be able to be reproduced using game log data.\n", "- In `park_codes`, the start and end years will be able to be reproduced using game log data. While technically the state is an attribute of the city, we might not want to have a an incomplete city/state table so we will leave this in.\n", "- There are lots of places in `game` log where we have a player ID followed by the players name. We will be able to remove this and use the name data in `person_codes`\n", "- In `game_log`, all offensive and defensive stats are repeated for the home team and the visiting team. We could break these out and have a table that lists each game twice, one for each team, and cut out this column repetition.\n", "- Similarly, in `game_log`, we have a listing for 9 players on each team with their positions - we can remove these and have one table that tracks player appearances and their positions.\n", "- We can do a similar thing with the umpires from `game_log`, instead of listing all four positions as columns, we can put the umpires either in their own table or make one table for players, umpires and managers.\n", "- We have several awards in `game_log` like winning pitcher and losing pitcher. We can either break these out into their own table, have a table for awards, or combine the awards in with general appearances like the players and umpires." ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Planning a Normalized Schema\n" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "The following schema was planned using [DbDesigner.net](https://dbdesigner.net/):\n", "\n", "![schema](images/schema-screenshot.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating Tables Without Foreign Keys" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
person_idfirst_namelast_name
0aardd001DavidAardsma
1aaroh101HankAaron
2aarot101TommieAaron
3aased001DonAase
4abada001AndyAbad
\n", "
" ], "text/plain": [ " person_id first_name last_name\n", "0 aardd001 David Aardsma\n", "1 aaroh101 Hank Aaron\n", "2 aarot101 Tommie Aaron\n", "3 aased001 Don Aase\n", "4 abada001 Andy Abad" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "CREATE TABLE IF NOT EXISTS person (\n", " person_id TEXT PRIMARY KEY,\n", " first_name TEXT,\n", " last_name TEXT\n", ");\n", "\"\"\"\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO person\n", "SELECT\n", " id,\n", " first,\n", " last\n", "FROM person_codes;\n", "\"\"\"\n", "\n", "q = \"\"\"\n", "SELECT * FROM person\n", "LIMIT 5;\n", "\"\"\"\n", "\n", "run_command(c1)\n", "run_command(c2)\n", "run_query(q)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
park_idnamenicknamecitystatenotes
0ALB01Riverside ParkNoneAlbanyNYTRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882
1ALT01Columbia ParkNoneAltoonaPANone
2ANA01Angel Stadium of AnaheimEdison Field; Anaheim StadiumAnaheimCANone
3ARL01Arlington StadiumNoneArlingtonTXNone
4ARL02Rangers Ballpark in ArlingtonThe Ballpark in Arlington; Ameriquest FlArlingtonTXNone
\n", "
" ], "text/plain": [ " park_id name \\\n", "0 ALB01 Riverside Park \n", "1 ALT01 Columbia Park \n", "2 ANA01 Angel Stadium of Anaheim \n", "3 ARL01 Arlington Stadium \n", "4 ARL02 Rangers Ballpark in Arlington \n", "\n", " nickname city state \\\n", "0 None Albany NY \n", "1 None Altoona PA \n", "2 Edison Field; Anaheim Stadium Anaheim CA \n", "3 None Arlington TX \n", "4 The Ballpark in Arlington; Ameriquest Fl Arlington TX \n", "\n", " notes \n", "0 TRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882 \n", "1 None \n", "2 None \n", "3 None \n", "4 None " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "CREATE TABLE IF NOT EXISTS park (\n", " park_id TEXT PRIMARY KEY,\n", " name TEXT,\n", " nickname TEXT,\n", " city TEXT,\n", " state TEXT,\n", " notes TEXT\n", ");\n", "\"\"\"\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO park\n", "SELECT\n", " park_id,\n", " name,\n", " aka,\n", " city,\n", " state,\n", " notes\n", "FROM park_codes;\n", "\"\"\"\n", "\n", "q = \"\"\"\n", "SELECT * FROM park\n", "LIMIT 5;\n", "\"\"\"\n", "\n", "run_command(c1)\n", "run_command(c2)\n", "run_query(q)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
league_idname
0NLNational League
1ALAmerican League
2AAAmerican Association
3FLFederal League
4PLPlayers League
5UAUnion Association
\n", "
" ], "text/plain": [ " league_id name\n", "0 NL National League\n", "1 AL American League\n", "2 AA American Association\n", "3 FL Federal League\n", "4 PL Players League\n", "5 UA Union Association" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "CREATE TABLE IF NOT EXISTS league (\n", " league_id TEXT PRIMARY KEY,\n", " name TEXT\n", ");\n", "\"\"\"\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO league\n", "VALUES\n", " (\"NL\", \"National League\"),\n", " (\"AL\", \"American League\"),\n", " (\"AA\", \"American Association\"),\n", " (\"FL\", \"Federal League\"),\n", " (\"PL\", \"Players League\"),\n", " (\"UA\", \"Union Association\")\n", ";\n", "\"\"\"\n", "\n", "q = \"\"\"\n", "SELECT * FROM league\n", "\"\"\"\n", "\n", "run_command(c1)\n", "run_command(c2)\n", "run_query(q)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
appearance_type_idnamecategory
0O1Batter 1offense
1O2Batter 2offense
2O3Batter 3offense
3O4Batter 4offense
4O5Batter 5offense
5O6Batter 6offense
6O7Batter 7offense
7O8Batter 8offense
8O9Batter 9offense
9D1Pitcherdefense
10D2Catcherdefense
11D31st Basedefense
12D42nd Basedefense
13D53rd Basedefense
14D6Shortstopdefense
15D7Left Fielddefense
16D8Center Fielddefense
17D9Right Fielddefense
18D10Unknown Positiondefense
19UHPHome Plateumpire
20U1BFirst Baseumpire
21U2BSecond Baseumpire
22U3BThird Baseumpire
23ULFLeft Fieldumpire
24URFRight Fieldumpire
25MMManagermanager
26AWPWinning Pitcheraward
27ALPLosing Pitcheraward
28ASPSaving Pitcheraward
29AWBWinning RBI Batteraward
30PSPStarting Pitcherpitcher
\n", "
" ], "text/plain": [ " appearance_type_id name category\n", "0 O1 Batter 1 offense\n", "1 O2 Batter 2 offense\n", "2 O3 Batter 3 offense\n", "3 O4 Batter 4 offense\n", "4 O5 Batter 5 offense\n", "5 O6 Batter 6 offense\n", "6 O7 Batter 7 offense\n", "7 O8 Batter 8 offense\n", "8 O9 Batter 9 offense\n", "9 D1 Pitcher defense\n", "10 D2 Catcher defense\n", "11 D3 1st Base defense\n", "12 D4 2nd Base defense\n", "13 D5 3rd Base defense\n", "14 D6 Shortstop defense\n", "15 D7 Left Field defense\n", "16 D8 Center Field defense\n", "17 D9 Right Field defense\n", "18 D10 Unknown Position defense\n", "19 UHP Home Plate umpire\n", "20 U1B First Base umpire\n", "21 U2B Second Base umpire\n", "22 U3B Third Base umpire\n", "23 ULF Left Field umpire\n", "24 URF Right Field umpire\n", "25 MM Manager manager\n", "26 AWP Winning Pitcher award\n", "27 ALP Losing Pitcher award\n", "28 ASP Saving Pitcher award\n", "29 AWB Winning RBI Batter award\n", "30 PSP Starting Pitcher pitcher" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"DROP TABLE IF EXISTS appearance_type;\"\n", "\n", "run_command(c1)\n", "\n", "c2 = \"\"\"\n", "CREATE TABLE appearance_type (\n", " appearance_type_id TEXT PRIMARY KEY,\n", " name TEXT,\n", " category TEXT\n", ");\n", "\"\"\"\n", "run_command(c2)\n", "\n", "appearance_type = pd.read_csv('appearance_type.csv')\n", "\n", "with sqlite3.connect('mlb.db') as conn:\n", " appearance_type.to_sql('appearance_type',\n", " conn,\n", " index=False,\n", " if_exists='append')\n", "\n", "q = \"\"\"\n", "SELECT * FROM appearance_type;\n", "\"\"\"\n", "\n", "run_query(q)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding The Team and Game Tables" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
team_idleague_idcitynicknamefranch_id
0ALTUAAltoonaMountain CitiesALT
1ARINLArizonaDiamondbacksARI
2BFNNLBuffaloBisonsBFN
3BFPPLBuffaloBisonsBFP
4BL1NoneBaltimoreCanariesBL1
\n", "
" ], "text/plain": [ " team_id league_id city nickname franch_id\n", "0 ALT UA Altoona Mountain Cities ALT\n", "1 ARI NL Arizona Diamondbacks ARI\n", "2 BFN NL Buffalo Bisons BFN\n", "3 BFP PL Buffalo Bisons BFP\n", "4 BL1 None Baltimore Canaries BL1" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "CREATE TABLE IF NOT EXISTS team (\n", " team_id TEXT PRIMARY KEY,\n", " league_id TEXT,\n", " city TEXT,\n", " nickname TEXT,\n", " franch_id TEXT,\n", " FOREIGN KEY (league_id) REFERENCES league(league_id)\n", ");\n", "\"\"\"\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO team\n", "SELECT\n", " team_id,\n", " league,\n", " city,\n", " nickname,\n", " franch_id\n", "FROM team_codes;\n", "\"\"\"\n", "\n", "q = \"\"\"\n", "SELECT * FROM team\n", "LIMIT 5;\n", "\"\"\"\n", "\n", "run_command(c1)\n", "run_command(c2)\n", "run_query(q)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
game_iddatenumber_of_gamepark_idlength_outsdaycompletionforefeitprotestattendancelegnth_minutesadditional_infoacquisition_info
018710504FW10187105040FOR01541NoneNoneNone200120NoneY
118710505WS30187105050WAS01541NoneNoneNone5000145HTBFY
218710506RC10187105060RCK01541NoneNoneNone1000140NoneY
318710508CH10187105080CHI01541NoneNoneNone5000150NoneY
418710509TRO0187105090TRO01541NoneNoneNone3250145HTBFY
\n", "
" ], "text/plain": [ " game_id date number_of_game park_id length_outs day \\\n", "0 18710504FW10 18710504 0 FOR01 54 1 \n", "1 18710505WS30 18710505 0 WAS01 54 1 \n", "2 18710506RC10 18710506 0 RCK01 54 1 \n", "3 18710508CH10 18710508 0 CHI01 54 1 \n", "4 18710509TRO0 18710509 0 TRO01 54 1 \n", "\n", " completion forefeit protest attendance legnth_minutes additional_info \\\n", "0 None None None 200 120 None \n", "1 None None None 5000 145 HTBF \n", "2 None None None 1000 140 None \n", "3 None None None 5000 150 None \n", "4 None None None 3250 145 HTBF \n", "\n", " acquisition_info \n", "0 Y \n", "1 Y \n", "2 Y \n", "3 Y \n", "4 Y " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "CREATE TABLE IF NOT EXISTS game (\n", " game_id TEXT PRIMARY KEY,\n", " date TEXT,\n", " number_of_game INTEGER,\n", " park_id TEXT,\n", " length_outs INTEGER,\n", " day BOOLEAN,\n", " completion TEXT,\n", " forefeit TEXT,\n", " protest TEXT,\n", " attendance INTEGER,\n", " legnth_minutes INTEGER,\n", " additional_info TEXT,\n", " acquisition_info TEXT,\n", " FOREIGN KEY (park_id) REFERENCES park(park_id)\n", ");\n", "\"\"\"\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO game\n", "SELECT\n", " game_id,\n", " date,\n", " number_of_game,\n", " park_id,\n", " length_outs,\n", " CASE\n", " WHEN day_night = \"D\" THEN 1\n", " WHEN day_night = \"N\" THEN 0\n", " ELSE NULL\n", " END\n", " AS day,\n", " completion,\n", " forefeit,\n", " protest,\n", " attendance,\n", " length_minutes,\n", " additional_info,\n", " acquisition_info\n", "FROM game_log;\n", "\"\"\"\n", "\n", "q = \"\"\"\n", "SELECT * FROM game\n", "LIMIT 5;\n", "\"\"\"\n", "\n", "run_command(c1)\n", "run_command(c2)\n", "run_query(q)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding the Team Appearance Table" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
team_idgame_idhomeleague_idscoreline_scoreat_batshitsdoublestripleshomerunsrbisacrifice_hitssacrifice_flieshit_by_pitchwalksintentional_walksstrikeoutsstolen_basescaught_stealinggrounded_into_doublefirst_catcher_interferenceleft_on_basepitchers_usedindividual_earned_runsteam_earned_runswild_pitchesbalksputoutsassistserrorspassed_ballsdouble_playstriple_plays
0CL118710504FW100None000000000030410000001NaN61NaN-1NaN4111002790300
1FW118710504FW101None201001000031410020001NaN00NaN-1NaN3100002733110
2MIA20161002WAS00NL70002300203814112710032.01011.010.08710101024110010
3WAS20161002WAS01NL1003023002x30102011011180.0320.010.076771027110010
\n", "
" ], "text/plain": [ " team_id game_id home league_id score line_score at_bats hits \\\n", "0 CL1 18710504FW10 0 None 0 000000000 30 4 \n", "1 FW1 18710504FW10 1 None 2 010010000 31 4 \n", "2 MIA 20161002WAS0 0 NL 7 000230020 38 14 \n", "3 WAS 20161002WAS0 1 NL 10 03023002x 30 10 \n", "\n", " doubles triples homeruns rbi sacrifice_hits sacrifice_flies \\\n", "0 1 0 0 0 0 0 \n", "1 1 0 0 2 0 0 \n", "2 1 1 2 7 1 0 \n", "3 2 0 1 10 1 1 \n", "\n", " hit_by_pitch walks intentional_walks strikeouts stolen_bases \\\n", "0 0 1 NaN 6 1 \n", "1 0 1 NaN 0 0 \n", "2 0 3 2.0 10 1 \n", "3 1 8 0.0 3 2 \n", "\n", " caught_stealing grounded_into_double first_catcher_interference \\\n", "0 NaN -1 NaN \n", "1 NaN -1 NaN \n", "2 1.0 1 0.0 \n", "3 0.0 1 0.0 \n", "\n", " left_on_base pitchers_used individual_earned_runs team_earned_runs \\\n", "0 4 1 1 1 \n", "1 3 1 0 0 \n", "2 8 7 10 10 \n", "3 7 6 7 7 \n", "\n", " wild_pitches balks putouts assists errors passed_balls double_plays \\\n", "0 0 0 27 9 0 3 0 \n", "1 0 0 27 3 3 1 1 \n", "2 1 0 24 11 0 0 1 \n", "3 1 0 27 11 0 0 1 \n", "\n", " triple_plays \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = \"\"\"\n", "CREATE TABLE IF NOT EXISTS team_appearance (\n", " team_id TEXT,\n", " game_id TEXT,\n", " home BOOLEAN,\n", " league_id TEXT,\n", " score INTEGER,\n", " line_score TEXT,\n", " at_bats INTEGER,\n", " hits INTEGER,\n", " doubles INTEGER,\n", " triples INTEGER,\n", " homeruns INTEGER,\n", " rbi INTEGER,\n", " sacrifice_hits INTEGER,\n", " sacrifice_flies INTEGER,\n", " hit_by_pitch INTEGER,\n", " walks INTEGER,\n", " intentional_walks INTEGER,\n", " strikeouts INTEGER,\n", " stolen_bases INTEGER,\n", " caught_stealing INTEGER,\n", " grounded_into_double INTEGER,\n", " first_catcher_interference INTEGER,\n", " left_on_base INTEGER,\n", " pitchers_used INTEGER,\n", " individual_earned_runs INTEGER,\n", " team_earned_runs INTEGER,\n", " wild_pitches INTEGER,\n", " balks INTEGER,\n", " putouts INTEGER,\n", " assists INTEGER,\n", " errors INTEGER,\n", " passed_balls INTEGER,\n", " double_plays INTEGER,\n", " triple_plays INTEGER,\n", " PRIMARY KEY (team_id, game_id),\n", " FOREIGN KEY (team_id) REFERENCES team(team_id),\n", " FOREIGN KEY (game_id) REFERENCES game(game_id),\n", " FOREIGN KEY (team_id) REFERENCES team(team_id)\n", ");\n", "\"\"\"\n", "\n", "run_command(c1)\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO team_appearance\n", " SELECT\n", " h_name,\n", " game_id,\n", " 1 AS home,\n", " h_league,\n", " h_score,\n", " h_line_score,\n", " h_at_bats,\n", " h_hits,\n", " h_doubles,\n", " h_triples,\n", " h_homeruns,\n", " h_rbi,\n", " h_sacrifice_hits,\n", " h_sacrifice_flies,\n", " h_hit_by_pitch,\n", " h_walks,\n", " h_intentional_walks,\n", " h_strikeouts,\n", " h_stolen_bases,\n", " h_caught_stealing,\n", " h_grounded_into_double,\n", " h_first_catcher_interference,\n", " h_left_on_base,\n", " h_pitchers_used,\n", " h_individual_earned_runs,\n", " h_team_earned_runs,\n", " h_wild_pitches,\n", " h_balks,\n", " h_putouts,\n", " h_assists,\n", " h_errors,\n", " h_passed_balls,\n", " h_double_plays,\n", " h_triple_plays\n", " FROM game_log\n", "\n", "UNION\n", "\n", " SELECT \n", " v_name,\n", " game_id,\n", " 0 AS home,\n", " v_league,\n", " v_score,\n", " v_line_score,\n", " v_at_bats,\n", " v_hits,\n", " v_doubles,\n", " v_triples,\n", " v_homeruns,\n", " v_rbi,\n", " v_sacrifice_hits,\n", " v_sacrifice_flies,\n", " v_hit_by_pitch,\n", " v_walks,\n", " v_intentional_walks,\n", " v_strikeouts,\n", " v_stolen_bases,\n", " v_caught_stealing,\n", " v_grounded_into_double,\n", " v_first_catcher_interference,\n", " v_left_on_base,\n", " v_pitchers_used,\n", " v_individual_earned_runs,\n", " v_team_earned_runs,\n", " v_wild_pitches,\n", " v_balks,\n", " v_putouts,\n", " v_assists,\n", " v_errors,\n", " v_passed_balls,\n", " v_double_plays,\n", " v_triple_plays\n", " from game_log;\n", "\"\"\"\n", "\n", "run_command(c2)\n", "\n", "q = \"\"\"\n", "SELECT * FROM team_appearance\n", "WHERE game_id = (\n", " SELECT MIN(game_id) from game\n", " )\n", " OR game_id = (\n", " SELECT MAX(game_id) from game\n", " )\n", "ORDER By game_id, home;\n", "\"\"\"\n", "\n", "run_query(q)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding the Person Appearance Table" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "c0 = \"DROP TABLE IF EXISTS person_appearance\"\n", "\n", "run_command(c0)\n", "\n", "c1 = \"\"\"\n", "CREATE TABLE person_appearance (\n", " appearance_id INTEGER PRIMARY KEY,\n", " person_id TEXT,\n", " team_id TEXT,\n", " game_id TEXT,\n", " appearance_type_id,\n", " FOREIGN KEY (person_id) REFERENCES person(person_id),\n", " FOREIGN KEY (team_id) REFERENCES team(team_id),\n", " FOREIGN KEY (game_id) REFERENCES game(game_id),\n", " FOREIGN KEY (appearance_type_id) REFERENCES appearance_type(appearance_type_id)\n", ");\n", "\"\"\"\n", "\n", "c2 = \"\"\"\n", "INSERT OR IGNORE INTO person_appearance (\n", " game_id,\n", " team_id,\n", " person_id,\n", " appearance_type_id\n", ") \n", " SELECT\n", " game_id,\n", " NULL,\n", " hp_umpire_id,\n", " \"UHP\"\n", " FROM game_log\n", " WHERE hp_umpire_id IS NOT NULL \n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " NULL,\n", " [1b_umpire_id],\n", " \"U1B\"\n", " FROM game_log\n", " WHERE \"1b_umpire_id\" IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " NULL,\n", " [2b_umpire_id],\n", " \"U2B\"\n", " FROM game_log\n", " WHERE [2b_umpire_id] IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " NULL,\n", " [3b_umpire_id],\n", " \"U3B\"\n", " FROM game_log\n", " WHERE [3b_umpire_id] IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " NULL,\n", " lf_umpire_id,\n", " \"ULF\"\n", " FROM game_log\n", " WHERE lf_umpire_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " NULL,\n", " rf_umpire_id,\n", " \"URF\"\n", " FROM game_log\n", " WHERE rf_umpire_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " v_name,\n", " v_manager_id,\n", " \"MM\"\n", " FROM game_log\n", " WHERE v_manager_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " h_name,\n", " h_manager_id,\n", " \"MM\"\n", " FROM game_log\n", " WHERE h_manager_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " CASE\n", " WHEN h_score > v_score THEN h_name\n", " ELSE v_name\n", " END,\n", " winning_pitcher_id,\n", " \"AWP\"\n", " FROM game_log\n", " WHERE winning_pitcher_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " CASE\n", " WHEN h_score < v_score THEN h_name\n", " ELSE v_name\n", " END,\n", " losing_pitcher_id,\n", " \"ALP\"\n", " FROM game_log\n", " WHERE losing_pitcher_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " CASE\n", " WHEN h_score > v_score THEN h_name\n", " ELSE v_name\n", " END,\n", " saving_pitcher_id,\n", " \"ASP\"\n", " FROM game_log\n", " WHERE saving_pitcher_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " CASE\n", " WHEN h_score > v_score THEN h_name\n", " ELSE v_name\n", " END,\n", " winning_rbi_batter_id,\n", " \"AWB\"\n", " FROM game_log\n", " WHERE winning_rbi_batter_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " v_name,\n", " v_starting_pitcher_id,\n", " \"PSP\"\n", " FROM game_log\n", " WHERE v_starting_pitcher_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " h_name,\n", " h_starting_pitcher_id,\n", " \"PSP\"\n", " FROM game_log\n", " WHERE h_starting_pitcher_id IS NOT NULL;\n", "\"\"\"\n", "\n", "template = \"\"\"\n", "INSERT INTO person_appearance (\n", " game_id,\n", " team_id,\n", " person_id,\n", " appearance_type_id\n", ") \n", " SELECT\n", " game_id,\n", " {hv}_name,\n", " {hv}_player_{num}_id,\n", " \"O{num}\"\n", " FROM game_log\n", " WHERE {hv}_player_{num}_id IS NOT NULL\n", "\n", "UNION\n", "\n", " SELECT\n", " game_id,\n", " {hv}_name,\n", " {hv}_player_{num}_id,\n", " \"D\" || CAST({hv}_player_{num}_def_pos AS INT)\n", " FROM game_log\n", " WHERE {hv}_player_{num}_id IS NOT NULL;\n", "\"\"\"\n", "\n", "run_command(c1)\n", "run_command(c2)\n", "\n", "for hv in [\"h\",\"v\"]:\n", " for num in range(1,10):\n", " query_vars = {\n", " \"hv\": hv,\n", " \"num\": num\n", " }\n", " run_command(template.format(**query_vars))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " games_game\n", "0 171907\n", " games_person_appearance\n", "0 171907\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
appearance_idperson_idteam_idgame_idappearance_type_idnamecategory
01646109porta901None20161002WAS0U1BFirst Baseumpire
11646108onorb901None20161002WAS0U2BSecond Baseumpire
21646107kellj901None20161002WAS0U3BThird Baseumpire
31646110tumpj901None20161002WAS0UHPHome Plateumpire
41646111brica001MIA20161002WAS0ALPLosing Pitcheraward
56716279koeht001MIA20161002WAS0D1Pitcherdefense
64744553telit001MIA20161002WAS0D2Catcherdefense
75589581bourj002MIA20161002WAS0D31st Basedefense
84462877gordd002MIA20161002WAS0D42nd Basedefense
95026229pradm001MIA20161002WAS0D53rd Basedefense
106434609hecha001MIA20161002WAS0D6Shortstopdefense
115871257scrux001MIA20161002WAS0D7Left Fielddefense
125307905yelic001MIA20161002WAS0D8Center Fielddefense
136152933hoodd001MIA20161002WAS0D9Right Fielddefense
141646113mattd001MIA20161002WAS0MMManagermanager
154462878gordd002MIA20161002WAS0O1Batter 1offense
164744554telit001MIA20161002WAS0O2Batter 2offense
175026230pradm001MIA20161002WAS0O3Batter 3offense
185307906yelic001MIA20161002WAS0O4Batter 4offense
195589582bourj002MIA20161002WAS0O5Batter 5offense
205871258scrux001MIA20161002WAS0O6Batter 6offense
216152934hoodd001MIA20161002WAS0O7Batter 7offense
226434610hecha001MIA20161002WAS0O8Batter 8offense
236716280koeht001MIA20161002WAS0O9Batter 9offense
241646112koeht001MIA20161002WAS0PSPStarting Pitcherpitcher
251646116melam001WAS20161002WAS0ASPSaving Pitcheraward
261646115difow001WAS20161002WAS0AWBWinning RBI Batteraward
271646117schem001WAS20161002WAS0AWPWinning Pitcheraward
284181201schem001WAS20161002WAS0D1Pitcherdefense
293899525lobaj001WAS20161002WAS0D2Catcherdefense
302772821zimmr001WAS20161002WAS0D31st Basedefense
313336173difow001WAS20161002WAS0D42nd Basedefense
323054497drews001WAS20161002WAS0D53rd Basedefense
333617849espid001WAS20161002WAS0D6Shortstopdefense
342209469reveb001WAS20161002WAS0D7Left Fielddefense
351927793turnt001WAS20161002WAS0D8Center Fielddefense
362491145harpb003WAS20161002WAS0D9Right Fielddefense
371646114baked002WAS20161002WAS0MMManagermanager
381927794turnt001WAS20161002WAS0O1Batter 1offense
392209470reveb001WAS20161002WAS0O2Batter 2offense
402491146harpb003WAS20161002WAS0O3Batter 3offense
412772822zimmr001WAS20161002WAS0O4Batter 4offense
423054498drews001WAS20161002WAS0O5Batter 5offense
433336174difow001WAS20161002WAS0O6Batter 6offense
443617850espid001WAS20161002WAS0O7Batter 7offense
453899526lobaj001WAS20161002WAS0O8Batter 8offense
464181202schem001WAS20161002WAS0O9Batter 9offense
471646118schem001WAS20161002WAS0PSPStarting Pitcherpitcher
\n", "
" ], "text/plain": [ " appearance_id person_id team_id game_id appearance_type_id \\\n", "0 1646109 porta901 None 20161002WAS0 U1B \n", "1 1646108 onorb901 None 20161002WAS0 U2B \n", "2 1646107 kellj901 None 20161002WAS0 U3B \n", "3 1646110 tumpj901 None 20161002WAS0 UHP \n", "4 1646111 brica001 MIA 20161002WAS0 ALP \n", "5 6716279 koeht001 MIA 20161002WAS0 D1 \n", "6 4744553 telit001 MIA 20161002WAS0 D2 \n", "7 5589581 bourj002 MIA 20161002WAS0 D3 \n", "8 4462877 gordd002 MIA 20161002WAS0 D4 \n", "9 5026229 pradm001 MIA 20161002WAS0 D5 \n", "10 6434609 hecha001 MIA 20161002WAS0 D6 \n", "11 5871257 scrux001 MIA 20161002WAS0 D7 \n", "12 5307905 yelic001 MIA 20161002WAS0 D8 \n", "13 6152933 hoodd001 MIA 20161002WAS0 D9 \n", "14 1646113 mattd001 MIA 20161002WAS0 MM \n", "15 4462878 gordd002 MIA 20161002WAS0 O1 \n", "16 4744554 telit001 MIA 20161002WAS0 O2 \n", "17 5026230 pradm001 MIA 20161002WAS0 O3 \n", "18 5307906 yelic001 MIA 20161002WAS0 O4 \n", "19 5589582 bourj002 MIA 20161002WAS0 O5 \n", "20 5871258 scrux001 MIA 20161002WAS0 O6 \n", "21 6152934 hoodd001 MIA 20161002WAS0 O7 \n", "22 6434610 hecha001 MIA 20161002WAS0 O8 \n", "23 6716280 koeht001 MIA 20161002WAS0 O9 \n", "24 1646112 koeht001 MIA 20161002WAS0 PSP \n", "25 1646116 melam001 WAS 20161002WAS0 ASP \n", "26 1646115 difow001 WAS 20161002WAS0 AWB \n", "27 1646117 schem001 WAS 20161002WAS0 AWP \n", "28 4181201 schem001 WAS 20161002WAS0 D1 \n", "29 3899525 lobaj001 WAS 20161002WAS0 D2 \n", "30 2772821 zimmr001 WAS 20161002WAS0 D3 \n", "31 3336173 difow001 WAS 20161002WAS0 D4 \n", "32 3054497 drews001 WAS 20161002WAS0 D5 \n", "33 3617849 espid001 WAS 20161002WAS0 D6 \n", "34 2209469 reveb001 WAS 20161002WAS0 D7 \n", "35 1927793 turnt001 WAS 20161002WAS0 D8 \n", "36 2491145 harpb003 WAS 20161002WAS0 D9 \n", "37 1646114 baked002 WAS 20161002WAS0 MM \n", "38 1927794 turnt001 WAS 20161002WAS0 O1 \n", "39 2209470 reveb001 WAS 20161002WAS0 O2 \n", "40 2491146 harpb003 WAS 20161002WAS0 O3 \n", "41 2772822 zimmr001 WAS 20161002WAS0 O4 \n", "42 3054498 drews001 WAS 20161002WAS0 O5 \n", "43 3336174 difow001 WAS 20161002WAS0 O6 \n", "44 3617850 espid001 WAS 20161002WAS0 O7 \n", "45 3899526 lobaj001 WAS 20161002WAS0 O8 \n", "46 4181202 schem001 WAS 20161002WAS0 O9 \n", "47 1646118 schem001 WAS 20161002WAS0 PSP \n", "\n", " name category \n", "0 First Base umpire \n", "1 Second Base umpire \n", "2 Third Base umpire \n", "3 Home Plate umpire \n", "4 Losing Pitcher award \n", "5 Pitcher defense \n", "6 Catcher defense \n", "7 1st Base defense \n", "8 2nd Base defense \n", "9 3rd Base defense \n", "10 Shortstop defense \n", "11 Left Field defense \n", "12 Center Field defense \n", "13 Right Field defense \n", "14 Manager manager \n", "15 Batter 1 offense \n", "16 Batter 2 offense \n", "17 Batter 3 offense \n", "18 Batter 4 offense \n", "19 Batter 5 offense \n", "20 Batter 6 offense \n", "21 Batter 7 offense \n", "22 Batter 8 offense \n", "23 Batter 9 offense \n", "24 Starting Pitcher pitcher \n", "25 Saving Pitcher award \n", "26 Winning RBI Batter award \n", "27 Winning Pitcher award \n", "28 Pitcher defense \n", "29 Catcher defense \n", "30 1st Base defense \n", "31 2nd Base defense \n", "32 3rd Base defense \n", "33 Shortstop defense \n", "34 Left Field defense \n", "35 Center Field defense \n", "36 Right Field defense \n", "37 Manager manager \n", "38 Batter 1 offense \n", "39 Batter 2 offense \n", "40 Batter 3 offense \n", "41 Batter 4 offense \n", "42 Batter 5 offense \n", "43 Batter 6 offense \n", "44 Batter 7 offense \n", "45 Batter 8 offense \n", "46 Batter 9 offense \n", "47 Starting Pitcher pitcher " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(run_query(\"SELECT COUNT(DISTINCT game_id) games_game FROM game\"))\n", "print(run_query(\"SELECT COUNT(DISTINCT game_id) games_person_appearance FROM person_appearance\"))\n", "\n", "q = \"\"\"\n", "SELECT\n", " pa.*,\n", " at.name,\n", " at.category\n", "FROM person_appearance pa\n", "INNER JOIN appearance_type at on at.appearance_type_id = pa.appearance_type_id\n", "WHERE PA.game_id = (\n", " SELECT max(game_id)\n", " FROM person_appearance\n", " )\n", "ORDER BY team_id, appearance_type_id\n", "\"\"\"\n", "\n", "run_query(q)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Removing the Original Tables" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nametype
0game_logtable
1park_codestable
2team_codestable
3person_codestable
4persontable
5parktable
6leaguetable
7appearance_typetable
8teamtable
9gametable
10team_appearancetable
11person_appearancetable
\n", "
" ], "text/plain": [ " name type\n", "0 game_log table\n", "1 park_codes table\n", "2 team_codes table\n", "3 person_codes table\n", "4 person table\n", "5 park table\n", "6 league table\n", "7 appearance_type table\n", "8 team table\n", "9 game table\n", "10 team_appearance table\n", "11 person_appearance table" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show_tables()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nametype
0persontable
1parktable
2leaguetable
3appearance_typetable
4teamtable
5gametable
6team_appearancetable
7person_appearancetable
\n", "
" ], "text/plain": [ " name type\n", "0 person table\n", "1 park table\n", "2 league table\n", "3 appearance_type table\n", "4 team table\n", "5 game table\n", "6 team_appearance table\n", "7 person_appearance table" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tables = [\n", " \"game_log\",\n", " \"park_codes\",\n", " \"team_codes\",\n", " \"person_codes\"\n", "]\n", "\n", "for t in tables:\n", " c = '''\n", " DROP TABLE {}\n", " '''.format(t)\n", " \n", " run_command(c)\n", "\n", "show_tables()" ] } ], "metadata": { "kernelspec": { "display_name": "dscontent", "language": "python", "name": "dscontent" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.4" }, "notify_time": "5" }, "nbformat": 4, "nbformat_minor": 2 }