Skip to content

Commit

Permalink
Merge pull request #583 from biglocalnews/ut-201
Browse files Browse the repository at this point in the history
Try to fix UT-201 #201 on Windows
  • Loading branch information
stucka authored Oct 19, 2023
2 parents 67980f1 + b03051c commit 665f549
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions warn/scrapers/ut.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import csv
import logging
from pathlib import Path

from bs4 import BeautifulSoup

from .. import utils
from ..cache import Cache

# from ..cache import Cache

__authors__ = ["zstumgoren", "Dilcia19"]
__tags__ = ["html"]
Expand All @@ -30,13 +32,14 @@ def scrape(
Returns: the Path where the file is written
"""
# Open the cache
cache = Cache(cache_dir)
# cache = Cache(cache_dir)

# Get the HTML
url = "https://jobs.utah.gov/employer/business/warnnotices.html"
r = utils.get_url(url)
html = r.text
cache.write("ut/source.html", html)
# No need to cache this, and cache.write doesn't explicitly use utf-8, which crashes Windows.
# cache.write("ut/source.html", html)

# Parse table
soup = BeautifulSoup(html, "html.parser")
Expand All @@ -49,7 +52,12 @@ def scrape(

# Write out
data_path = data_dir / "ut.csv"
utils.write_rows_to_csv(data_path, row_list)
utils.create_directory(data_path, is_file=True)
logger.debug(f"Writing {len(row_list)} rows to {data_path}")
# The utils.write_rows_to_csv function doesn't explicitly use utf-8, so it crashes Windows.
with open(data_path, mode="w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerows(row_list)

# Return the path to the CSV
return data_path
Expand Down

0 comments on commit 665f549

Please sign in to comment.