Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added get_roster function that takes in team abbreviation and year to… #276

Open
wants to merge 2 commits into
base: v4
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions basketball_reference_web_scraper/client.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import requests

from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate, InvalidPlayerAndSeason
from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate, InvalidPlayerAndSeason, \
InvalidTeamSeason
from basketball_reference_web_scraper.http_service import HTTPService
from basketball_reference_web_scraper.output.columns import BOX_SCORE_COLUMN_NAMES, SCHEDULE_COLUMN_NAMES, \
PLAYER_SEASON_TOTALS_COLUMN_NAMES, \
PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES, TEAM_BOX_SCORES_COLUMN_NAMES, PLAY_BY_PLAY_COLUMN_NAMES, \
PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SEARCH_RESULTS_COLUMN_NAMES, STANDINGS_COLUMNS_NAMES
PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SEARCH_RESULTS_COLUMN_NAMES, STANDINGS_COLUMNS_NAMES, ROSTER_COLUMN_NAMES
from basketball_reference_web_scraper.output.fields import format_value, BasketballReferenceJSONEncoder
from basketball_reference_web_scraper.output.service import OutputService
from basketball_reference_web_scraper.output.writers import CSVWriter, JSONWriter, FileOptions, OutputOptions, \
SearchCSVWriter
from basketball_reference_web_scraper.parser_service import ParserService


from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION
def standings(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
Expand Down Expand Up @@ -213,6 +213,32 @@ def team_box_scores(day, month, year, output_type=None, output_file_path=None, o
return output_service.output(data=values, options=options)


def roster(team, season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
try:
http_service = HTTPService(parser=ParserService())
if len(team) > 3:
team = TEAM_TO_TEAM_ABBREVIATION[team.upper()]
values = http_service.get_team_roster(team=team, season_end_year=season_end_year)
except requests.exceptions.HTTPError as http_error:
if http_error.response.status_code == requests.codes.not_found:
raise InvalidTeamSeason(team=team, year=season_end_year)
else:
raise http_error

options = OutputOptions.of(
file_options=FileOptions.of(path=output_file_path, mode=output_write_option),
output_type=output_type,
json_options=json_options,
csv_options={"column_names": ROSTER_COLUMN_NAMES}
)

output_service = OutputService(
json_writer=JSONWriter(value_formatter=BasketballReferenceJSONEncoder),
csv_writer=CSVWriter(value_formatter=format_value)
)
return output_service.output(data=values, options=options)


def play_by_play(home_team, day, month, year, output_type=None, output_file_path=None, output_write_option=None,
json_options=None):
try:
Expand Down Expand Up @@ -250,3 +276,5 @@ def search(term, output_type=None, output_file_path=None, output_write_option=No
csv_writer=SearchCSVWriter(value_formatter=format_value)
)
return output_service.output(data=values, options=options)


5 changes: 5 additions & 0 deletions basketball_reference_web_scraper/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,8 @@ def __init__(self, player_identifier, season_end_year):
message = "Player with identifier \"{player_identifier}\" in season ending in {season_end_year} is invalid" \
.format(player_identifier=player_identifier, season_end_year=season_end_year)
super().__init__(message)

class InvalidTeamSeason(Exception):
def __init__(self, team, year):
message = "Team \"{team}\" in {year} is invalid".format(team=team, year=year)
super().__init__(message)
61 changes: 61 additions & 0 deletions basketball_reference_web_scraper/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,67 @@ def game_url_paths(self):
return [game_link.attrib['href'] for game_link in game_links]


class TeamSeasonPage:
def __init__(self, html):
self.html = html

@property
def roster_query(self):
return '//table[@id="roster"]'

@property
def rows_query(self):
return '//table[@id="roster"]//tbody//tr'

@property
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: let's have a new line between lines 879 and 880.

def team_roster_table(self):
return self.html.xpath(self.roster_query)

@property
def rows(self):
return [
RosterRow(html=row_html)
for row_html in self.html.xpath(self.rows_query)
]


class RosterRow(PlayerIdentificationRow):
def __init__(self, html):
super().__init__(html=html)

@property
def number(self):
cells = self.html.xpath('.//td[@data-stat="number"]')
if len(cells) > 0:
return cells[0].text_content()

return ''

@property
def position_abbreviations(self):
cells = self.html.xpath('.//td[@data-stat="pos"]')
if len(cells) > 0:
return cells[0].text_content()

return ''

@property
def height(self):
cells = self.html.xpath('.//td[@data-stat="height"]')
if len(cells) > 0:
return cells[0].text_content()

return ''

@property
def weight(self):
cells = self.html.xpath('.//td[@data-stat="weight"]')
if len(cells) > 0:
return cells[0].text_content()

return ''


class SchedulePage:
def __init__(self, html):
self.html = html
Expand Down
17 changes: 16 additions & 1 deletion basketball_reference_web_scraper/http_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from datetime import datetime, timezone

import requests
from lxml import html

from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION, TeamTotal, PlayerData
from basketball_reference_web_scraper.errors import InvalidDate, InvalidPlayerAndSeason
from basketball_reference_web_scraper.html import DailyLeadersPage, PlayerSeasonBoxScoresPage, PlayerSeasonTotalTable, \
PlayerAdvancedSeasonTotalsTable, PlayByPlayPage, SchedulePage, BoxScoresPage, DailyBoxScoresPage, SearchPage, \
PlayerPage, StandingsPage
PlayerPage, StandingsPage, TeamSeasonPage


class HTTPService:
Expand Down Expand Up @@ -194,6 +196,17 @@ def team_box_scores(self, day, month, year):
for box_score in self.team_box_score(game_url_path=game_url_path)
]

def get_team_roster(self, team, season_end_year):
url = "{BASE_URL}/teams/{team}/{season_end_year}.html".format(BASE_URL=HTTPService.BASE_URL, team=team, season_end_year=season_end_year)

response = requests.get(url=url)

response.raise_for_status()

page = TeamSeasonPage(html=html.fromstring(response.content))
return [{'slug': row.slug, 'name': row.name} for row in page.rows]


def search(self, term):
response = requests.get(
url="{BASE_URL}/search/search.fcgi".format(BASE_URL=HTTPService.BASE_URL),
Expand Down Expand Up @@ -240,3 +253,5 @@ def search(self, term):
return {
"players": player_results
}


5 changes: 5 additions & 0 deletions basketball_reference_web_scraper/output/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,8 @@
"division",
"conference",
]

ROSTER_COLUMN_NAMES = [
"slug",
"name",
]
4 changes: 3 additions & 1 deletion bin/normalizer
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/Users/jaebradley/projects/basketball_reference_web_scraper/bin/python3
#!/bin/sh
'''exec' "/Users/paramgattupalli/Documents/Fall 2024/CEN 3031/basketball_reference_web_scraper/bin/python" "$0" "$@"
' '''
# -*- coding: utf-8 -*-
import re
import sys
Expand Down
4 changes: 3 additions & 1 deletion bin/pip
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/Users/jaebradley/projects/basketball_reference_web_scraper/bin/python3
#!/bin/sh
'''exec' "/Users/paramgattupalli/Documents/Fall 2024/CEN 3031/basketball_reference_web_scraper/bin/python" "$0" "$@"
' '''
# -*- coding: utf-8 -*-
import re
import sys
Expand Down
4 changes: 3 additions & 1 deletion bin/pip3
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/Users/jaebradley/projects/basketball_reference_web_scraper/bin/python3
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@PGatts these bin files should never have been committed - I removed them in 81dd1e0

Rebasing / merging the latest changes in v4 should resolve the merge conflicts caused by the bin directory.

#!/bin/sh
'''exec' "/Users/paramgattupalli/Documents/Fall 2024/CEN 3031/basketball_reference_web_scraper/bin/python" "$0" "$@"
' '''
# -*- coding: utf-8 -*-
import re
import sys
Expand Down