diff --git a/basketball_reference_web_scraper/client.py b/basketball_reference_web_scraper/client.py index 2694c5f3..8a669225 100644 --- a/basketball_reference_web_scraper/client.py +++ b/basketball_reference_web_scraper/client.py @@ -1,18 +1,18 @@ import requests -from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate, InvalidPlayerAndSeason +from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate, InvalidPlayerAndSeason, \ + InvalidTeamSeason from basketball_reference_web_scraper.http_service import HTTPService from basketball_reference_web_scraper.output.columns import BOX_SCORE_COLUMN_NAMES, SCHEDULE_COLUMN_NAMES, \ PLAYER_SEASON_TOTALS_COLUMN_NAMES, \ PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES, TEAM_BOX_SCORES_COLUMN_NAMES, PLAY_BY_PLAY_COLUMN_NAMES, \ - PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SEARCH_RESULTS_COLUMN_NAMES, STANDINGS_COLUMNS_NAMES + PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SEARCH_RESULTS_COLUMN_NAMES, STANDINGS_COLUMNS_NAMES, ROSTER_COLUMN_NAMES from basketball_reference_web_scraper.output.fields import format_value, BasketballReferenceJSONEncoder from basketball_reference_web_scraper.output.service import OutputService from basketball_reference_web_scraper.output.writers import CSVWriter, JSONWriter, FileOptions, OutputOptions, \ SearchCSVWriter from basketball_reference_web_scraper.parser_service import ParserService - - +from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION def standings(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None): try: @@ -213,6 +213,32 @@ def team_box_scores(day, month, year, output_type=None, output_file_path=None, o return output_service.output(data=values, options=options) +def roster(team, season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None): + try: + http_service = HTTPService(parser=ParserService()) + if len(team) > 3: + team = TEAM_TO_TEAM_ABBREVIATION[team.upper()] + values = http_service.get_team_roster(team=team, season_end_year=season_end_year) + except requests.exceptions.HTTPError as http_error: + if http_error.response.status_code == requests.codes.not_found: + raise InvalidTeamSeason(team=team, year=season_end_year) + else: + raise http_error + + options = OutputOptions.of( + file_options=FileOptions.of(path=output_file_path, mode=output_write_option), + output_type=output_type, + json_options=json_options, + csv_options={"column_names": ROSTER_COLUMN_NAMES} + ) + + output_service = OutputService( + json_writer=JSONWriter(value_formatter=BasketballReferenceJSONEncoder), + csv_writer=CSVWriter(value_formatter=format_value) + ) + return output_service.output(data=values, options=options) + + def play_by_play(home_team, day, month, year, output_type=None, output_file_path=None, output_write_option=None, json_options=None): try: @@ -250,3 +276,5 @@ def search(term, output_type=None, output_file_path=None, output_write_option=No csv_writer=SearchCSVWriter(value_formatter=format_value) ) return output_service.output(data=values, options=options) + + diff --git a/basketball_reference_web_scraper/errors.py b/basketball_reference_web_scraper/errors.py index 12b574f8..13a56bc3 100644 --- a/basketball_reference_web_scraper/errors.py +++ b/basketball_reference_web_scraper/errors.py @@ -20,3 +20,8 @@ def __init__(self, player_identifier, season_end_year): message = "Player with identifier \"{player_identifier}\" in season ending in {season_end_year} is invalid" \ .format(player_identifier=player_identifier, season_end_year=season_end_year) super().__init__(message) + +class InvalidTeamSeason(Exception): + def __init__(self, team, year): + message = "Team \"{team}\" in {year} is invalid".format(team=team, year=year) + super().__init__(message) \ No newline at end of file diff --git a/basketball_reference_web_scraper/html.py b/basketball_reference_web_scraper/html.py index 8bb63b49..393686cd 100644 --- a/basketball_reference_web_scraper/html.py +++ b/basketball_reference_web_scraper/html.py @@ -871,6 +871,67 @@ def game_url_paths(self): return [game_link.attrib['href'] for game_link in game_links] +class TeamSeasonPage: + def __init__(self, html): + self.html = html + + @property + def roster_query(self): + return '//table[@id="roster"]' + + @property + def rows_query(self): + return '//table[@id="roster"]//tbody//tr' + + @property + def team_roster_table(self): + return self.html.xpath(self.roster_query) + + @property + def rows(self): + return [ + RosterRow(html=row_html) + for row_html in self.html.xpath(self.rows_query) + ] + + +class RosterRow(PlayerIdentificationRow): + def __init__(self, html): + super().__init__(html=html) + + @property + def number(self): + cells = self.html.xpath('.//td[@data-stat="number"]') + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def position_abbreviations(self): + cells = self.html.xpath('.//td[@data-stat="pos"]') + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def height(self): + cells = self.html.xpath('.//td[@data-stat="height"]') + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def weight(self): + cells = self.html.xpath('.//td[@data-stat="weight"]') + if len(cells) > 0: + return cells[0].text_content() + + return '' + + class SchedulePage: def __init__(self, html): self.html = html diff --git a/basketball_reference_web_scraper/http_service.py b/basketball_reference_web_scraper/http_service.py index 466b566a..afa3aad4 100644 --- a/basketball_reference_web_scraper/http_service.py +++ b/basketball_reference_web_scraper/http_service.py @@ -1,3 +1,5 @@ +from datetime import datetime, timezone + import requests from lxml import html @@ -5,7 +7,7 @@ from basketball_reference_web_scraper.errors import InvalidDate, InvalidPlayerAndSeason from basketball_reference_web_scraper.html import DailyLeadersPage, PlayerSeasonBoxScoresPage, PlayerSeasonTotalTable, \ PlayerAdvancedSeasonTotalsTable, PlayByPlayPage, SchedulePage, BoxScoresPage, DailyBoxScoresPage, SearchPage, \ - PlayerPage, StandingsPage + PlayerPage, StandingsPage, TeamSeasonPage class HTTPService: @@ -194,6 +196,17 @@ def team_box_scores(self, day, month, year): for box_score in self.team_box_score(game_url_path=game_url_path) ] + def get_team_roster(self, team, season_end_year): + url = "{BASE_URL}/teams/{team}/{season_end_year}.html".format(BASE_URL=HTTPService.BASE_URL, team=team, season_end_year=season_end_year) + + response = requests.get(url=url) + + response.raise_for_status() + + page = TeamSeasonPage(html=html.fromstring(response.content)) + return [{'slug': row.slug, 'name': row.name} for row in page.rows] + + def search(self, term): response = requests.get( url="{BASE_URL}/search/search.fcgi".format(BASE_URL=HTTPService.BASE_URL), @@ -240,3 +253,5 @@ def search(self, term): return { "players": player_results } + + diff --git a/basketball_reference_web_scraper/output/columns.py b/basketball_reference_web_scraper/output/columns.py index 7c58d83a..753897b8 100644 --- a/basketball_reference_web_scraper/output/columns.py +++ b/basketball_reference_web_scraper/output/columns.py @@ -136,3 +136,8 @@ "division", "conference", ] + +ROSTER_COLUMN_NAMES = [ + "slug", + "name", +] \ No newline at end of file diff --git a/bin/normalizer b/bin/normalizer index a48793ed..406afba3 100755 --- a/bin/normalizer +++ b/bin/normalizer @@ -1,4 +1,6 @@ -#!/Users/jaebradley/projects/basketball_reference_web_scraper/bin/python3 +#!/bin/sh +'''exec' "/Users/paramgattupalli/Documents/Fall 2024/CEN 3031/basketball_reference_web_scraper/bin/python" "$0" "$@" +' ''' # -*- coding: utf-8 -*- import re import sys diff --git a/bin/pip b/bin/pip index eadc7df2..502f5b4b 100755 --- a/bin/pip +++ b/bin/pip @@ -1,4 +1,6 @@ -#!/Users/jaebradley/projects/basketball_reference_web_scraper/bin/python3 +#!/bin/sh +'''exec' "/Users/paramgattupalli/Documents/Fall 2024/CEN 3031/basketball_reference_web_scraper/bin/python" "$0" "$@" +' ''' # -*- coding: utf-8 -*- import re import sys diff --git a/bin/pip3 b/bin/pip3 index eadc7df2..502f5b4b 100755 --- a/bin/pip3 +++ b/bin/pip3 @@ -1,4 +1,6 @@ -#!/Users/jaebradley/projects/basketball_reference_web_scraper/bin/python3 +#!/bin/sh +'''exec' "/Users/paramgattupalli/Documents/Fall 2024/CEN 3031/basketball_reference_web_scraper/bin/python" "$0" "$@" +' ''' # -*- coding: utf-8 -*- import re import sys