diff --git a/poetry.lock b/poetry.lock index fb4f903fbe..1395ef766e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -354,10 +354,10 @@ python-versions = "*" develop = false [package.dependencies] -aiohttp = {version = "*", optional = true, markers = "extra == \"streaming\""} +aiohttp = "*" dill = "*" -fsspec = ">=2021.05.0" -huggingface_hub = "<0.1.0" +fsspec = {version = ">=2021.05.0", extras = ["http"]} +huggingface_hub = ">=0.0.14,<0.1.0" multiprocess = "*" numpy = ">=1.17" packaging = "*" @@ -373,8 +373,7 @@ tensorflow = ["tensorflow (>=2.2.0)"] tensorflow_gpu = ["tensorflow-gpu (>=2.2.0)"] torch = ["torch"] s3 = ["fsspec", "boto3 (==1.16.43)", "botocore (==1.19.52)", "s3fs"] -streaming = ["aiohttp"] -tests = ["absl-py", "pytest", "pytest-xdist", "aiohttp", "apache-beam (>=2.26.0)", "elasticsearch", "aiobotocore (==1.2.2)", "boto3 (==1.16.43)", "botocore (==1.19.52)", "faiss-cpu", "fsspec", "moto[s3,server] (==2.0.4)", "rarfile (>=4.0)", "s3fs", "tensorflow (>=2.3)", "torch", "transformers", "bs4", "conllu", "langdetect", "lxml", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "tldextract", "zstandard", "bert_score (>=0.3.6)", "rouge-score", "sacrebleu", "scipy", "seqeval", "scikit-learn", "jiwer", "sentencepiece", "toml (>=0.10.1)", "requests_file (>=1.5.1)", "tldextract (>=3.1.0)", "texttable (>=1.6.3)", "Werkzeug (>=1.0.1)", "six (>=1.15.0,<1.16.0)"] +tests = ["absl-py", "pytest", "pytest-xdist", "apache-beam (>=2.26.0)", "elasticsearch", "aiobotocore (==1.2.2)", "boto3 (==1.16.43)", "botocore (==1.19.52)", "faiss-cpu", "fsspec", "moto[s3,server] (==2.0.4)", "rarfile (>=4.0)", "s3fs", "tensorflow (>=2.3)", "torch", "transformers", "bs4", "conllu", "langdetect", "lxml", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "tldextract", "zstandard", "bert_score (>=0.3.6)", "rouge-score", "sacrebleu", "scipy", "seqeval", "scikit-learn", "jiwer", "sentencepiece", "toml (>=0.10.1)", "requests_file (>=1.5.1)", "tldextract (>=3.1.0)", "texttable (>=1.6.3)", "Werkzeug (>=1.0.1)", "six (>=1.15.0,<1.16.0)"] quality = ["black (==21.4b0)", "flake8 (==3.7.9)", "isort", "pyyaml (>=5.3.1)"] benchmarks = ["numpy (==1.18.5)", "tensorflow (==2.3.0)", "torch (==1.6.0)", "transformers (==3.0.2)"] docs = ["docutils (==0.16.0)", "recommonmark", "sphinx (==3.1.2)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinxext-opengraph (==0.4.1)", "sphinx-copybutton", "fsspec", "s3fs"] @@ -382,8 +381,8 @@ docs = ["docutils (==0.16.0)", "recommonmark", "sphinx (==3.1.2)", "sphinx-markd [package.source] type = "git" url = "https://github.com/huggingface/datasets.git" -reference = "b9fb8b2567aecfb14ad0bc31b59329f573eb35df" -resolved_reference = "b9fb8b2567aecfb14ad0bc31b59329f573eb35df" +reference = "6f7bca7a8c5518fd8f1aa6bea2542573646dd9f5" +resolved_reference = "6f7bca7a8c5518fd8f1aa6bea2542573646dd9f5" [[package]] name = "dill" @@ -479,6 +478,10 @@ category = "main" optional = false python-versions = ">=3.6" +[package.dependencies] +aiohttp = {version = "*", optional = true, markers = "extra == \"http\""} +requests = {version = "*", optional = true, markers = "extra == \"http\""} + [package.extras] abfs = ["adlfs"] adl = ["adlfs"] @@ -647,7 +650,7 @@ pyparsing = ">=2.4.2,<3" [[package]] name = "huggingface-hub" -version = "0.0.12" +version = "0.0.16" description = "Client library to download and publish models on the huggingface.co hub" category = "main" optional = false @@ -1514,15 +1517,16 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.9.2" +version = "4.10.0.dev0" description = "State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch" category = "main" optional = false python-versions = ">=3.6.0" +develop = false [package.dependencies] filelock = "*" -huggingface-hub = "0.0.12" +huggingface-hub = ">=0.0.12" numpy = ">=1.17" packaging = "*" pyyaml = ">=5.1" @@ -1536,9 +1540,9 @@ tqdm = ">=4.27" all = ["tensorflow (>=2.3)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jax (>=0.2.8)", "jaxlib (>=0.1.65)", "flax (>=0.3.4)", "optax (>=0.0.8)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (>=0.10.1,<0.11)", "soundfile", "torchaudio", "pillow", "optuna", "ray", "timm", "codecarbon (==1.2.0)"] codecarbon = ["codecarbon (==1.2.0)"] deepspeed = ["deepspeed (>=0.4.3)"] -dev = ["tensorflow (>=2.3)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jax (>=0.2.8)", "jaxlib (>=0.1.65)", "flax (>=0.3.4)", "optax (>=0.0.8)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (>=0.10.1,<0.11)", "soundfile", "torchaudio", "pillow", "optuna", "ray", "timm", "codecarbon (==1.2.0)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (==21.4b0)", "sacrebleu (>=1.4.12)", "rouge-score", "nltk", "gitpython", "faiss-cpu", "cookiecutter (==1.7.2)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "docutils (==0.16.0)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "sphinxext-opengraph (==0.4.1)", "scikit-learn"] -docs = ["tensorflow (>=2.3)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jax (>=0.2.8)", "jaxlib (>=0.1.65)", "flax (>=0.3.4)", "optax (>=0.0.8)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (>=0.10.1,<0.11)", "soundfile", "torchaudio", "pillow", "optuna", "ray", "timm", "codecarbon (==1.2.0)", "docutils (==0.16.0)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "sphinxext-opengraph (==0.4.1)"] -docs_specific = ["docutils (==0.16.0)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "sphinxext-opengraph (==0.4.1)"] +dev = ["tensorflow (>=2.3)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jax (>=0.2.8)", "jaxlib (>=0.1.65)", "flax (>=0.3.4)", "optax (>=0.0.8)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (>=0.10.1,<0.11)", "soundfile", "torchaudio", "pillow", "optuna", "ray", "timm", "codecarbon (==1.2.0)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (==21.4b0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "faiss-cpu", "cookiecutter (==1.7.2)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "docutils (==0.16.0)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "sphinxext-opengraph (==0.4.1)", "sphinx-intl", "scikit-learn"] +docs = ["tensorflow (>=2.3)", "onnxconverter-common", "keras2onnx", "torch (>=1.0)", "jax (>=0.2.8)", "jaxlib (>=0.1.65)", "flax (>=0.3.4)", "optax (>=0.0.8)", "sentencepiece (==0.1.91)", "protobuf", "tokenizers (>=0.10.1,<0.11)", "soundfile", "torchaudio", "pillow", "optuna", "ray", "timm", "codecarbon (==1.2.0)", "docutils (==0.16.0)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "sphinxext-opengraph (==0.4.1)", "sphinx-intl"] +docs_specific = ["docutils (==0.16.0)", "recommonmark", "sphinx (==3.2.1)", "sphinx-markdown-tables", "sphinx-rtd-theme (==0.4.3)", "sphinx-copybutton", "sphinxext-opengraph (==0.4.1)", "sphinx-intl"] fairscale = ["fairscale (>0.3)"] flax = ["jax (>=0.2.8)", "jaxlib (>=0.1.65)", "flax (>=0.3.4)", "optax (>=0.0.8)"] integrations = ["optuna", "ray"] @@ -1555,15 +1559,21 @@ sentencepiece = ["sentencepiece (==0.1.91)", "protobuf"] serving = ["pydantic", "uvicorn", "fastapi", "starlette"] sklearn = ["scikit-learn"] speech = ["soundfile", "torchaudio"] -testing = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (==21.4b0)", "sacrebleu (>=1.4.12)", "rouge-score", "nltk", "gitpython", "faiss-cpu", "cookiecutter (==1.7.2)"] +testing = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (==21.4b0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "faiss-cpu", "cookiecutter (==1.7.2)"] tf = ["tensorflow (>=2.3)", "onnxconverter-common", "keras2onnx"] tf-cpu = ["tensorflow-cpu (>=2.3)", "onnxconverter-common", "keras2onnx"] timm = ["timm"] tokenizers = ["tokenizers (>=0.10.1,<0.11)"] torch = ["torch (>=1.0)"] -torchhub = ["filelock", "huggingface-hub (==0.0.12)", "importlib-metadata", "numpy (>=1.17)", "packaging", "protobuf", "regex (!=2019.12.17)", "requests", "sacremoses", "sentencepiece (==0.1.91)", "torch (>=1.0)", "tokenizers (>=0.10.1,<0.11)", "tqdm (>=4.27)"] +torchhub = ["filelock", "huggingface-hub (>=0.0.12)", "importlib-metadata", "numpy (>=1.17)", "packaging", "protobuf", "regex (!=2019.12.17)", "requests", "sacremoses", "sentencepiece (==0.1.91)", "torch (>=1.0)", "tokenizers (>=0.10.1,<0.11)", "tqdm (>=4.27)"] vision = ["pillow"] +[package.source] +type = "git" +url = "https://github.com/huggingface/transformers.git" +reference = "d50649531f4ad6baf7e600099a511b044d5da748" +resolved_reference = "d50649531f4ad6baf7e600099a511b044d5da748" + [[package]] name = "trec-car-tools" version = "2.5.4" @@ -1707,7 +1717,7 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "12d520fc7236ca5cb1f7b9dc2b6a23e350b3b9148a76283c9683e9694e713e77" +content-hash = "39e2be2b3d55378e61b18051eb6c4dd064fca4836400a625039920c3e30af96d" [metadata.files] absl-py = [ @@ -2187,8 +2197,8 @@ httplib2 = [ {file = "httplib2-0.19.1.tar.gz", hash = "sha256:0b12617eeca7433d4c396a100eaecfa4b08ee99aa881e6df6e257a7aad5d533d"}, ] huggingface-hub = [ - {file = "huggingface_hub-0.0.12-py3-none-any.whl", hash = "sha256:5c82ff96897a72e1ed48a94c1796686f120dea05888200522f3994f130c12e6a"}, - {file = "huggingface_hub-0.0.12.tar.gz", hash = "sha256:661b17fab0c475276fd71603ee7e16c3b3d1d6e812e1b29f40144f64d361e59d"}, + {file = "huggingface_hub-0.0.16-py3-none-any.whl", hash = "sha256:c8170998f1ac43ec9253f5fd321213aeee54a9b938c9ce5e696a06274710b67c"}, + {file = "huggingface_hub-0.0.16.tar.gz", hash = "sha256:6742126aef62244db6dc4cd0d4d9f13eef6a9b125c972f92111fb285c1100ae6"}, ] idna = [ {file = "idna-3.2-py3-none-any.whl", hash = "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a"}, @@ -3014,10 +3024,7 @@ tqdm = [ {file = "tqdm-4.62.2-py2.py3-none-any.whl", hash = "sha256:80aead664e6c1672c4ae20dc50e1cdc5e20eeff9b14aa23ecd426375b28be588"}, {file = "tqdm-4.62.2.tar.gz", hash = "sha256:a4d6d112e507ef98513ac119ead1159d286deab17dffedd96921412c2d236ff5"}, ] -transformers = [ - {file = "transformers-4.9.2-py3-none-any.whl", hash = "sha256:ed64723efb6f67a77a60559bbb318009fdd94045724cd474a949f62c5c0b185e"}, - {file = "transformers-4.9.2.tar.gz", hash = "sha256:ad79b0e4a9c9bfedbaaf5a36e8266bea69ab0437187f83ede602ef222d26d9d7"}, -] +transformers = [] trec-car-tools = [] typer = [ {file = "typer-0.3.2-py3-none-any.whl", hash = "sha256:ba58b920ce851b12a2d790143009fa00ac1d05b3ff3257061ff69dbdfc3d161b"}, diff --git a/pyproject.toml b/pyproject.toml index c37f3ded92..77e01e01f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datasets-preview-backend" -version = "0.4.0" +version = "0.4.1" description = "API to extract rows of 🤗 datasets" authors = ["Sylvain Lesage "] @@ -19,13 +19,13 @@ nlp = "^0.4.0" openpyxl = "^3.0.7" py7zr = "^0.16.1" tensorflow = "^2.5.0" -transformers = "^4.9.1" wget = "^3.2" kenlm = {url = "https://github.com/kpu/kenlm/archive/master.zip"} nltk = "^3.6.2" aiohttp = "^3.7.4" -datasets = {extras = ["streaming"], git = "https://github.com/huggingface/datasets.git", rev = "b9fb8b2567aecfb14ad0bc31b59329f573eb35df"} typer = "^0.3.2" +transformers = {git = "https://github.com/huggingface/transformers.git", rev = "d50649531f4ad6baf7e600099a511b044d5da748"} +datasets = {extras = ["streaming"], git = "https://github.com/huggingface/datasets.git", rev = "6f7bca7a8c5518fd8f1aa6bea2542573646dd9f5"} [tool.poetry.dev-dependencies] bandit = "^1.7.0"