Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include py.typed file & fix and/or add missing/wrong type hints in stub files #50

Merged
merged 8 commits into from
Aug 13, 2024
13 changes: 13 additions & 0 deletions fastwarc/fastwarc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,16 @@
from .stream_io import FileStream, GZipStream, LZ4Stream
from .stream_io import FastWARCError, StreamError
from .warc import ArchiveIterator, WarcRecord, WarcRecordType

# Exposing symbols for legacy compatibility, please prefer explicit imports from submodules

__all__ = [
"FileStream",
"GZipStream",
"LZ4Stream",
"FastWARCError",
"StreamError",
"ArchiveIterator",
"WarcRecord",
"WarcRecordType"
]
phoerious marked this conversation as resolved.
Show resolved Hide resolved
Empty file added fastwarc/fastwarc/py.typed
Empty file.
31 changes: 24 additions & 7 deletions fastwarc/fastwarc/stream_io.pyi
Original file line number Diff line number Diff line change
@@ -1,18 +1,34 @@
from typing import ContextManager, IO
from types import TracebackType
from typing import ContextManager, Optional, Type, Union, BinaryIO, Protocol

class _GenericIOStream(Protocol):
def write(self, data: bytes) -> int: ...
def flush(self) -> None: ...
def read(self, size: int) -> bytes: ...
def seek(self, offset: int) -> int: ...
def close(self) -> None: ...
def tell(self) -> int: ...


class IOStream(ContextManager):
class IOStream(ContextManager[IOStream]):
def read(self, size: int) -> bytes: ...
def write(self, data: bytes) -> int: ...
def close(self) -> None: ...
def flush(self) -> None: ...
def seek(self, offset: int) -> None: ...
def tell(self) -> int: ...
def __enter__(self) -> IOStream: ...
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[BaseException],
traceback: Optional[TracebackType]
) -> None: ...


class BufferedReader:
def __init__(
self, stream: IOStream, buf_size: int = 8192, negotiate_stream: bool = True
self, stream: Union[IOStream, BinaryIO, _GenericIOStream], buf_size: int = 65536, negotiate_stream: bool = True
) -> None: ...
def close(self) -> None: ...
def consume(self, size: int = -1) -> int: ...
Expand All @@ -22,6 +38,7 @@ class BufferedReader:


class BytesIOStream(IOStream):
def __init__(self, initial_data: Union[bytes, None] = None) -> None: ...
def getvalue(self) -> bytes: ...


Expand All @@ -36,28 +53,28 @@ class CompressingStream(IOStream):

class BrotliStream(CompressingStream):
def __init__(
self, raw_stream: IOStream, quality: int = 11, lgwin: int = 22, lgblock: int = 0
self, raw_stream: Union[IOStream, BinaryIO, _GenericIOStream], quality: int = 11, lgwin: int = 22, lgblock: int = 0
) -> None: ...


class GZipStream(CompressingStream):
def __init__(
self, raw_stream: IOStream, compression_level: int = 9, zlib: bool = False
self, raw_stream: Union[IOStream, BinaryIO, _GenericIOStream], compression_level: int = 9, zlib: bool = False
) -> None: ...


class LZ4Stream(CompressingStream):
def __init__(
self,
raw_stream: IOStream,
raw_stream: Union[IOStream, BinaryIO, _GenericIOStream],
compression_level: int = 12,
favor_dec_speed: bool = True,
) -> None: ...
def prepopulate(self, initial_data: bytes) -> None: ...


class PythonIOStreamAdapter(IOStream):
def __init__(self, py_stream: IO) -> None: ...
def __init__(self, py_stream: _GenericIOStream) -> None: ...


class FastWARCError(Exception):
Expand Down
18 changes: 9 additions & 9 deletions fastwarc/fastwarc/tools.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import IntFlag
from typing import Union, Type, Iterator, Tuple
from typing import Union, Iterator, Tuple, Protocol

from .stream_io import IOStream
from .stream_io import IOStream, _GenericIOStream
from .warc import WarcRecord


Expand All @@ -16,33 +16,33 @@ def detect_compression_algorithm(file: str) -> CompressionAlg: ...


def wrap_warc_stream(
file: Union[str, Type[IOStream]],
file: Union[str, IOStream, _GenericIOStream],
mode: str,
comp_alg: CompressionAlg = CompressionAlg.auto,
**comp_args
) -> Type[IOStream]: ...
) -> IOStream: ...


def recompress_warc_interactive(
warc_in: Union[str, Type[IOStream]],
warc_out: Union[str, Type[IOStream]],
warc_in: Union[str, IOStream, _GenericIOStream],
warc_out: Union[str, IOStream, _GenericIOStream],
comp_alg_in: CompressionAlg = CompressionAlg.auto,
comp_alg_out: CompressionAlg = CompressionAlg.auto,
**comp_args
) -> Iterator[Tuple[WarcRecord, int]]: ...


def recompress_warc(
warc_in: Union[str, Type[IOStream]],
warc_out: Union[str, Type[IOStream]],
warc_in: Union[str, IOStream, _GenericIOStream],
warc_out: Union[str, IOStream, _GenericIOStream],
comp_alg_in: CompressionAlg = CompressionAlg.auto,
comp_alg_out: CompressionAlg = CompressionAlg.auto,
**comp_args
) -> Iterator[Tuple[WarcRecord, int]]: ...


def verify_digests(
warc_in: Union[str, Type[IOStream]],
warc_in: Union[str, IOStream],
verify_payloads: bool = False,
comp_alg: CompressionAlg = CompressionAlg.auto,
) -> bool: ...
48 changes: 37 additions & 11 deletions fastwarc/fastwarc/warc.pyi
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
from datetime import datetime
from typing import (
Union,
Optional,
Iterator,
Dict,
Tuple,
MutableMapping,
Literal,
Callable,
Iterable,
ValuesView,
KeysView,
Type,
BinaryIO,
)
from enum import IntFlag

from .stream_io import BufferedReader, IOStream
from .stream_io import BufferedReader, IOStream, _GenericIOStream


class WarcRecordType(IntFlag):
Expand All @@ -29,24 +31,38 @@ class WarcRecordType(IntFlag):
no_type = 0


warcinfo = WarcRecordType.warcinfo
response = WarcRecordType.response
resource = WarcRecordType.resource
request = WarcRecordType.request
metadata = WarcRecordType.metadata
revisit = WarcRecordType.revisit
conversion = WarcRecordType.conversion
continuation = WarcRecordType.continuation
unknown = WarcRecordType.unknown
no_type = WarcRecordType.no_type
any_type = WarcRecordType.any_type


class WarcHeaderMap(MutableMapping[str, str]):
class WarcHeaderMap:
reason_phrase: Optional[str]
status_code: Optional[str]
status_line: str

def append(self, key: str, value: str) -> None: ...
def asdict(self) -> Dict[str, str]: ...
def astuples(self) -> Tuple[str, str]: ...
def astuples(self) -> Tuple[Tuple[str, str], ...]: ...
def clear(self) -> None: ...
def get(self, key: str, default: Optional[str] = None) -> Optional[str]: ...
def items(self) -> Iterator[Tuple[str, str]]: ...
def keys(self) -> KeysView[str]: ...
def values(self) -> ValuesView[str]: ...
def write(self, stream: IOStream) -> None: ...
def __getitem__(self, item: str) -> str: ...
def __iter__(self) -> Iterator[Tuple[str, str]]: ...
def __len__(self) -> int: ...
def __setitem__(self, key: str, value: str) -> None: ...
def __contains__(self, item: str) -> bool: ...


class WarcRecord:
Expand All @@ -59,32 +75,42 @@ class WarcRecord:
is_http_parsed: bool
http_headers: Optional[WarcHeaderMap]
http_content_type: Optional[str]
http_content_type: Optional[str]
http_charset: Optional[str]
http_date: Optional[datetime]
http_last_modified: Optional[datetime]
content_length: int
reader: BufferedReader
stream_pos: int

def init_headers(
self, content_length: int = 0, record_type=no_type, record_urn=None
): ...
self, content_length: int = 0, record_type: WarcRecordType = no_type, record_urn: Optional[bytes] = None
) -> None: ...
def freeze(self) -> bool: ...
def set_bytes_content(self, content: bytes) -> None: ...
def parse_http(self, strict_mode=True, auto_decode: str = "none") -> None: ...
def parse_http(self, strict_mode: bool = True, auto_decode: str = "none") -> None: ...
def verify_block_digest(self, consume: bool = False) -> bool: ...
def verify_payload_digest(self, consume: bool = False) -> bool: ...
def write(
self,
stream: Union[IOStream, BinaryIO, _GenericIOStream],
checksum_data: bool = False,
payload_digest: Optional[bytes] = None,
chunk_size: int = 16384
) -> int: ...



class ArchiveIterator(Iterable[WarcRecord]):
def __init__(
self,
stream: Type[IOStream],
stream: Union[IOStream, BinaryIO, _GenericIOStream],
record_types: WarcRecordType = any_type,
parse_http: bool = True,
min_content_length: int = -1,
max_content_length: int = -1,
func_filter: Optional[Callable[[WarcRecord], bool]] = None,
verify_digests: bool = False,
strict_mode: bool = True,
auto_decode: Literal["none", "content", "transfer", "all"] = "none",
) -> None: ...
def __iter__(self) -> Iterator[WarcRecord]: ...
def __next__(self) -> WarcRecord: ...
Loading