Scrape #254
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Scrape" | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 18 * * *" | |
jobs: | |
scrape: | |
name: Scrape | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Setup Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.9' | |
cache: 'pip' | |
- name: Install Python dependencies | |
run: pip install -r requirements.txt | |
- name: Refresh inspection list | |
run: python scripts/01-refresh-inspection-list.py | |
- name: Download inspection PDFs | |
run: python scripts/02-download-inspection-pdfs.py | |
- name: Parse inspection PDFs | |
run: python scripts/03-parse-inspection-pdfs.py | |
- name: Upload inspection PDFs to DocumentCloud | |
run: python scripts/04-upload-inspection-pdfs.py | |
env: | |
DOCUMENTCLOUD_PROJECT_ID: 211004 | |
DOCUMENTCLOUD_USER: ${{ secrets.DOCUMENTCLOUD_USER }} | |
DOCUMENTCLOUD_PASSWORD: ${{ secrets.DOCUMENTCLOUD_PASSWORD }} | |
- name: Combine data obtained in the previous steps | |
run: python scripts/05-combine-inspection-data.py | |
- name: Update RSS | |
run: python scripts/06-update-rss.py | |
- name: Config git | |
run: git config --global user.email "[email protected]" && git config --global user.name "Automated" | |
- name: Commit changes | |
run: git add data pdfs && (git diff --cached --quiet || git commit -m "Fetch newly available reports") | |
- name: Push changes | |
run: git push | |
- name: Upload artifact | |
uses: actions/upload-pages-artifact@v1 | |
with: | |
path: data/combined | |
deploy: | |
name: Deploy | |
runs-on: ubuntu-latest | |
needs: scrape | |
permissions: | |
pages: write | |
id-token: write | |
environment: | |
name: github-pages | |
url: ${{ steps.deployment.outputs.page_url }} | |
steps: | |
- id: deployment | |
name: Deploy data to GitHub Pages | |
uses: actions/[email protected] | |
mirror: | |
name: Mirror | |
runs-on: ubuntu-latest | |
needs: scrape | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Fetch latest commit | |
run: git pull origin main | |
- name: Upload inspections CSV to biglocalnews.org | |
uses: biglocalnews/upload-files@v2 | |
with: | |
api-key: ${{ secrets.BLN_API_KEY }} | |
project-id: ${{ secrets.BLN_PROJECT_ID }} | |
path: ./data/combined/ |