Skip to content

Commit

Permalink
Add support for Scrapy>=2.9, Python3.7+ (#13)
Browse files Browse the repository at this point in the history
* Add support for Scrapy 2.9, Python 3.7+
* Add tox.ini
* Update test matrix in test workflow
* Loosen test requirements for different python version
  • Loading branch information
leewesleyv authored Oct 22, 2024
1 parent ff7ea2a commit 57fbc92
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 27 deletions.
63 changes: 47 additions & 16 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,64 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.8"

- name: Install Dependencies
run: pip install -r requirements-tests.txt

- name: Ruff
run: ruff check scrapy_webarchive tests

- name: Mypy
run: mypy scrapy_webarchive

test:
name: unittests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
toxenv:
- py37-scrapy29
- py312-scrapy29
- py38-scrapy210
- py312-scrapy210
- py38-scrapy211
- py312-scrapy211
- py312-scrapymaster
include:
- toxenv: py37-scrapy29
python-version: 3.7
- toxenv: py312-scrapy29
python-version: '3.12'

- toxenv: py38-scrapy210
python-version: 3.8
- toxenv: py312-scrapy210
python-version: '3.12'

- toxenv: py38-scrapy211
python-version: 3.8
- toxenv: py312-scrapy211
python-version: '3.12'

- toxenv: py312-scrapymaster
python-version: '3.12'

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
- uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install Dependencies
run: pip install -r requirements-tests.txt
- name: Test
run: coverage run -m pytest tests
- run: coverage report
- run: coverage html --title "Coverage for ${{ github.sha }}"
- name: Store coverage HTML
uses: actions/upload-artifact@v4
with:
name: coverage-data-${{ github.job }}-${{ strategy.job-index }}
path: .coverage.*
include-hidden-files: true

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install tox
- name: Run tox
run: |
tox
env:
TOXENV: ${{ matrix.toxenv }}
continue-on-error: ${{ contains(matrix.toxenv, '-scrapymaster') }}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "scrapy-webarchive"
version = "0.0.1.dev2"
dependencies = [
"Scrapy==2.11.2",
"Scrapy>=2.9,<2.12",
"warcio==1.7.4",
"smart-open==7.0.4",
"warc-knot==0.2.5",
Expand Down
7 changes: 4 additions & 3 deletions requirements-tests.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
-e .
pytest>=8.3,<8.4
pytest>=7.4,<8.4
freezegun==1.5.1
mypy==1.11.2
mypy>=1.4,<1.12
ruff==0.6.8
pyfakefs==5.6.0
pyfakefs==5.6.0
tox
4 changes: 3 additions & 1 deletion scrapy_webarchive/cdxj.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import json
import re
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List

from cdxj_indexer.main import CDXJIndexer
from typing_extensions import TYPE_CHECKING, List

if TYPE_CHECKING:
from scrapy_webarchive.wacz import WaczFile
Expand Down
1 change: 1 addition & 0 deletions scrapy_webarchive/downloadermiddlewares.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import annotations

from scrapy.exceptions import IgnoreRequest
from scrapy.http.request import Request
Expand Down
5 changes: 3 additions & 2 deletions scrapy_webarchive/extensions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from datetime import datetime
from io import BytesIO
from typing import Any, Dict, Protocol, Type, Union, cast

from scrapy import Spider, signals
from scrapy.crawler import Crawler
Expand All @@ -11,7 +12,7 @@
from scrapy.pipelines.media import MediaPipeline
from scrapy.settings import Settings
from twisted.internet.defer import Deferred
from typing_extensions import Self
from typing_extensions import Any, Dict, Protocol, Self, Type, Union, cast

from scrapy_webarchive.utils import get_scheme_from_uri, get_warc_date
from scrapy_webarchive.wacz import WaczFileCreator
Expand Down
5 changes: 3 additions & 2 deletions scrapy_webarchive/spidermiddlewares.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from __future__ import annotations

import re
from typing import Union
from urllib.parse import urlparse

from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.settings import Settings
from scrapy.statscollectors import StatsCollector
from typing_extensions import Iterable, Self
from typing_extensions import Iterable, Self, Union

from scrapy_webarchive.exceptions import WaczMiddlewareException
from scrapy_webarchive.wacz import MultiWaczFile, WaczFile, open_wacz_file
Expand Down
2 changes: 2 additions & 0 deletions scrapy_webarchive/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from datetime import datetime, timezone
from pathlib import Path
from urllib.parse import urlparse, urlunparse
Expand Down
4 changes: 3 additions & 1 deletion scrapy_webarchive/wacz.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

import gzip
import io
import os
import zipfile
from collections import defaultdict
from functools import partial
from typing import IO, TYPE_CHECKING, Dict, Generator, List, Union

from scrapy.settings import Settings
from smart_open import open as smart_open
from typing_extensions import IO, TYPE_CHECKING, Dict, Generator, List, Union
from warc.warc import WARCRecord

from scrapy_webarchive.cdxj import CdxjRecord, write_cdxj_index
Expand Down
2 changes: 2 additions & 0 deletions scrapy_webarchive/warc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import socket
import uuid
from io import BytesIO
Expand Down
2 changes: 1 addition & 1 deletion tests/test_wacz.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_create_wacz(self, fs, wacz_file_creator):

# Retrieve the zip buffer from the call args
call_args = wacz_file_creator.store.persist_file.call_args
zip_buffer = call_args.kwargs['buf']
zip_buffer = call_args[1]['buf']

# Verify that the WACZ zip content is correct
zip_file = zipfile.ZipFile(zip_buffer)
Expand Down
27 changes: 27 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[tox]
skipsdist = True
usedevelop = True

envlist =
py{37,38,39,310,311,312}-scrapy29,
py{38,39,310,311,312}-scrapy{210,211},
py{39,310,311,312}-scrapymaster,

[testenv]
install_command = pip install -r requirements-tests.txt
allowlist_externals = py.test
commands =
py.test tests/
basepython =
py37: python3.7
py38: python3.8
py39: python3.9
py310: python3.10
py311: python3.11
py312: python3.12

deps =
scrapy29: Scrapy~=2.9.0
scrapy210: Scrapy~=2.10.0
scrapy211: Scrapy~=2.11.0
scrapymaster: git+https://github.com/scrapy/scrapy.git@master#egg=Scrapy

0 comments on commit 57fbc92

Please sign in to comment.