From e1f99eb201d7fa14f169a127c462536aa01cca78 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 11:56:41 +0200 Subject: [PATCH 01/16] Documentation contains the README's quickstart instructions and doc-related CI is prepared --- .github/workflows/ci.yml | 36 +- README.md | 2 +- docs/.gitignore | 2 + docs/build/html/_sources/index.rst.txt | 58 - docs/build/html/_sources/modules.rst.txt | 7 - .../_sources/quapy.classification.rst.txt | 45 - docs/build/html/_sources/quapy.data.rst.txt | 46 - docs/build/html/_sources/quapy.method.rst.txt | 69 - docs/build/html/_sources/quapy.rst.txt | 80 - docs/build/html/_static/basic.css | 900 ---- docs/build/html/_static/doctools.js | 156 - .../html/_static/documentation_options.js | 14 - docs/build/html/_static/file.png | Bin 286 -> 0 bytes docs/build/html/_static/jquery.js | 2 - docs/build/html/_static/language_data.js | 199 - docs/build/html/_static/minus.png | Bin 90 -> 0 bytes docs/build/html/_static/plus.png | Bin 90 -> 0 bytes docs/build/html/_static/pygments.css | 74 - docs/build/html/_static/searchtools.js | 566 --- docs/build/html/genindex.html | 1541 ------- docs/build/html/index.html | 826 ---- docs/build/html/modules.html | 335 -- docs/build/html/objects.inv | Bin 4073 -> 0 bytes docs/build/html/py-modindex.html | 261 -- docs/build/html/quapy.classification.html | 967 ----- docs/build/html/quapy.data.html | 1222 ------ docs/build/html/quapy.html | 3127 -------------- docs/build/html/quapy.method.html | 3644 ----------------- docs/build/html/search.html | 131 - docs/build/html/searchindex.js | 1 - SoBigData.png => docs/source/SoBigData.png | Bin docs/source/index.rst | 97 +- 32 files changed, 115 insertions(+), 14293 deletions(-) create mode 100644 docs/.gitignore delete mode 100644 docs/build/html/_sources/index.rst.txt delete mode 100644 docs/build/html/_sources/modules.rst.txt delete mode 100644 docs/build/html/_sources/quapy.classification.rst.txt delete mode 100644 docs/build/html/_sources/quapy.data.rst.txt delete mode 100644 docs/build/html/_sources/quapy.method.rst.txt delete mode 100644 docs/build/html/_sources/quapy.rst.txt delete mode 100644 docs/build/html/_static/basic.css delete mode 100644 docs/build/html/_static/doctools.js delete mode 100644 docs/build/html/_static/documentation_options.js delete mode 100644 docs/build/html/_static/file.png delete mode 100644 docs/build/html/_static/jquery.js delete mode 100644 docs/build/html/_static/language_data.js delete mode 100644 docs/build/html/_static/minus.png delete mode 100644 docs/build/html/_static/plus.png delete mode 100644 docs/build/html/_static/pygments.css delete mode 100644 docs/build/html/_static/searchtools.js delete mode 100644 docs/build/html/genindex.html delete mode 100644 docs/build/html/index.html delete mode 100644 docs/build/html/modules.html delete mode 100644 docs/build/html/objects.inv delete mode 100644 docs/build/html/py-modindex.html delete mode 100644 docs/build/html/quapy.classification.html delete mode 100644 docs/build/html/quapy.data.html delete mode 100644 docs/build/html/quapy.html delete mode 100644 docs/build/html/quapy.method.html delete mode 100644 docs/build/html/search.html delete mode 100644 docs/build/html/searchindex.js rename SoBigData.png => docs/source/SoBigData.png (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85d0dd1..fcf306e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ on: pull_request: push: branches: - - main + - master - devel jobs: @@ -31,3 +31,37 @@ jobs: python -m pip install -e .[bayes,composable,tests] - name: Test with unittest run: python -m unittest + + # build and push documentation to gh-pages (only if pushed to the master branch) + docs: + name: Documentation + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/master' + steps: + - uses: actions/checkout@v1 + - name: Build documentation + uses: ammaraskar/sphinx-action@master + with: + pre-build-command: | + python -m pip install --upgrade pip setuptools wheel + python -m pip install -e .[docs] + mkdir -p docs/source/wiki/wiki_examples/selected_plots + cp docs/source/wiki_editable/wiki_examples/selected_plots/* docs/source/wiki/wiki_examples/selected_plots/ + find docs/source/wiki_editable -name '*.md' -exec sh -c 'pandoc -f markdown -t rst "$$1" -o "docs/source/wiki/$$(basename "$$1" .md).rst"' _ {} \; + sphinx-apidoc --force --output-dir docs/source quapy + docs-folder: "docs/" + - name: Publish documentation + run: | + git clone ${{ github.server_url }}/${{ github.repository }}.git --branch gh-pages --single-branch __gh-pages/ + cp -r docs/build/html/* __gh-pages/ + cd __gh-pages/ + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add . + git commit -am "Documentation based on ${{ github.sha }}" || true + - name: Push changes + uses: ad-m/github-push-action@master + with: + branch: gh-pages + directory: __gh-pages/ + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index f83c344..5e1dffa 100644 --- a/README.md +++ b/README.md @@ -116,4 +116,4 @@ are provided: ## Acknowledgments: -SoBigData++ +SoBigData++ diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..4b7ed84 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,2 @@ +build/ +source/wiki/ diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt deleted file mode 100644 index a4150cd..0000000 --- a/docs/build/html/_sources/index.rst.txt +++ /dev/null @@ -1,58 +0,0 @@ -.. QuaPy: A Python-based open-source framework for quantification documentation master file, created by - sphinx-quickstart on Wed Feb 7 16:26:46 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to QuaPy's documentation! -========================================================================================== - -QuaPy is a Python-based open-source framework for quantification. - -This document contains the API of the modules included in QuaPy. - -Installation ------------- - -`pip install quapy` - -GitHub ------------- - -QuaPy is hosted in GitHub at `https://github.com/HLT-ISTI/QuaPy `_ - - -Wiki Documents ------------- - -In this section you can find useful information concerning different aspects of QuaPy, with examples: - -.. toctree:: - :maxdepth: 1 - - wiki/Datasets - wiki/Evaluation - wiki/ExplicitLossMinimization - wiki/Methods - wiki/Model-Selection - wiki/Plotting - wiki/Protocols - - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - -Contents --------- - -.. toctree:: - - modules - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/build/html/_sources/modules.rst.txt b/docs/build/html/_sources/modules.rst.txt deleted file mode 100644 index 5d84a54..0000000 --- a/docs/build/html/_sources/modules.rst.txt +++ /dev/null @@ -1,7 +0,0 @@ -quapy -===== - -.. toctree:: - :maxdepth: 4 - - quapy diff --git a/docs/build/html/_sources/quapy.classification.rst.txt b/docs/build/html/_sources/quapy.classification.rst.txt deleted file mode 100644 index cfc7d9b..0000000 --- a/docs/build/html/_sources/quapy.classification.rst.txt +++ /dev/null @@ -1,45 +0,0 @@ -quapy.classification package -============================ - -Submodules ----------- - -quapy.classification.calibration module ---------------------------------------- - -.. automodule:: quapy.classification.calibration - :members: - :undoc-members: - :show-inheritance: - -quapy.classification.methods module ------------------------------------ - -.. automodule:: quapy.classification.methods - :members: - :undoc-members: - :show-inheritance: - -quapy.classification.neural module ----------------------------------- - -.. automodule:: quapy.classification.neural - :members: - :undoc-members: - :show-inheritance: - -quapy.classification.svmperf module ------------------------------------ - -.. automodule:: quapy.classification.svmperf - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: quapy.classification - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/build/html/_sources/quapy.data.rst.txt b/docs/build/html/_sources/quapy.data.rst.txt deleted file mode 100644 index cadace6..0000000 --- a/docs/build/html/_sources/quapy.data.rst.txt +++ /dev/null @@ -1,46 +0,0 @@ -quapy.data package -================== - -Submodules ----------- - -quapy.data.base module ----------------------- - -.. automodule:: quapy.data.base - :members: - :undoc-members: - :show-inheritance: - -quapy.data.datasets module --------------------------- - -.. automodule:: quapy.data.datasets - :members: - :undoc-members: - :show-inheritance: - - -quapy.data.preprocessing module -------------------------------- - -.. automodule:: quapy.data.preprocessing - :members: - :undoc-members: - :show-inheritance: - -quapy.data.reader module ------------------------- - -.. automodule:: quapy.data.reader - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: quapy.data - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/build/html/_sources/quapy.method.rst.txt b/docs/build/html/_sources/quapy.method.rst.txt deleted file mode 100644 index 31a357a..0000000 --- a/docs/build/html/_sources/quapy.method.rst.txt +++ /dev/null @@ -1,69 +0,0 @@ -quapy.method package -==================== - -Submodules ----------- - -quapy.method.aggregative module -------------------------------- - -.. automodule:: quapy.method.aggregative - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: quapy.method._kdey - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: quapy.method._neural - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: quapy.method._threshold_optim - :members: - :undoc-members: - :show-inheritance: - - -quapy.method.base module ------------------------- - -.. automodule:: quapy.method.base - :members: - :undoc-members: - :show-inheritance: - -quapy.method.meta module ------------------------- - -.. automodule:: quapy.method.meta - :members: - :undoc-members: - :show-inheritance: - -quapy.method.non\_aggregative module ------------------------------------- - -.. automodule:: quapy.method.non_aggregative - :members: - :undoc-members: - :show-inheritance: - -quapy.method.composable module ------------------------- - -.. automodule:: quapy.method.composable - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: quapy.method - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/build/html/_sources/quapy.rst.txt b/docs/build/html/_sources/quapy.rst.txt deleted file mode 100644 index af2708b..0000000 --- a/docs/build/html/_sources/quapy.rst.txt +++ /dev/null @@ -1,80 +0,0 @@ -quapy package -============= - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - quapy.classification - quapy.data - quapy.method - - -Submodules ----------- - -quapy.error module ------------------- - -.. automodule:: quapy.error - :members: - :undoc-members: - :show-inheritance: - -quapy.evaluation module ------------------------ - -.. automodule:: quapy.evaluation - :members: - :undoc-members: - :show-inheritance: - -quapy.functional module ------------------------ - -.. automodule:: quapy.functional - :members: - :undoc-members: - :show-inheritance: - -quapy.model\_selection module ------------------------------ - -.. automodule:: quapy.model_selection - :members: - :undoc-members: - :show-inheritance: - -quapy.plot module ------------------ - -.. automodule:: quapy.plot - :members: - :undoc-members: - :show-inheritance: - -quapy.protocol module ---------------------- - -.. automodule:: quapy.protocol - :members: - :undoc-members: - :show-inheritance: - -quapy.util module ------------------ - -.. automodule:: quapy.util - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: quapy - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css deleted file mode 100644 index 4e9a9f1..0000000 --- a/docs/build/html/_static/basic.css +++ /dev/null @@ -1,900 +0,0 @@ -/* - * basic.css - * ~~~~~~~~~ - * - * Sphinx stylesheet -- basic theme. - * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ - -/* -- main layout ----------------------------------------------------------- */ - -div.clearer { - clear: both; -} - -div.section::after { - display: block; - content: ''; - clear: left; -} - -/* -- relbar ---------------------------------------------------------------- */ - -div.related { - width: 100%; - font-size: 90%; -} - -div.related h3 { - display: none; -} - -div.related ul { - margin: 0; - padding: 0 0 0 10px; - list-style: none; -} - -div.related li { - display: inline; -} - -div.related li.right { - float: right; - margin-right: 5px; -} - -/* -- sidebar --------------------------------------------------------------- */ - -div.sphinxsidebarwrapper { - padding: 10px 5px 0 10px; -} - -div.sphinxsidebar { - float: left; - width: 230px; - margin-left: -100%; - font-size: 90%; - word-wrap: break-word; - overflow-wrap : break-word; -} - -div.sphinxsidebar ul { - list-style: none; -} - -div.sphinxsidebar ul ul, -div.sphinxsidebar ul.want-points { - margin-left: 20px; - list-style: square; -} - -div.sphinxsidebar ul ul { - margin-top: 0; - margin-bottom: 0; -} - -div.sphinxsidebar form { - margin-top: 10px; -} - -div.sphinxsidebar input { - border: 1px solid #98dbcc; - font-family: sans-serif; - font-size: 1em; -} - -div.sphinxsidebar #searchbox form.search { - overflow: hidden; -} - -div.sphinxsidebar #searchbox input[type="text"] { - float: left; - width: 80%; - padding: 0.25em; - box-sizing: border-box; -} - -div.sphinxsidebar #searchbox input[type="submit"] { - float: left; - width: 20%; - border-left: none; - padding: 0.25em; - box-sizing: border-box; -} - - -img { - border: 0; - max-width: 100%; -} - -/* -- search page ----------------------------------------------------------- */ - -ul.search { - margin: 10px 0 0 20px; - padding: 0; -} - -ul.search li { - padding: 5px 0 5px 20px; - background-image: url(file.png); - background-repeat: no-repeat; - background-position: 0 7px; -} - -ul.search li a { - font-weight: bold; -} - -ul.search li p.context { - color: #888; - margin: 2px 0 0 30px; - text-align: left; -} - -ul.keywordmatches li.goodmatch a { - font-weight: bold; -} - -/* -- index page ------------------------------------------------------------ */ - -table.contentstable { - width: 90%; - margin-left: auto; - margin-right: auto; -} - -table.contentstable p.biglink { - line-height: 150%; -} - -a.biglink { - font-size: 1.3em; -} - -span.linkdescr { - font-style: italic; - padding-top: 5px; - font-size: 90%; -} - -/* -- general index --------------------------------------------------------- */ - -table.indextable { - width: 100%; -} - -table.indextable td { - text-align: left; - vertical-align: top; -} - -table.indextable ul { - margin-top: 0; - margin-bottom: 0; - list-style-type: none; -} - -table.indextable > tbody > tr > td > ul { - padding-left: 0em; -} - -table.indextable tr.pcap { - height: 10px; -} - -table.indextable tr.cap { - margin-top: 10px; - background-color: #f2f2f2; -} - -img.toggler { - margin-right: 3px; - margin-top: 3px; - cursor: pointer; -} - -div.modindex-jumpbox { - border-top: 1px solid #ddd; - border-bottom: 1px solid #ddd; - margin: 1em 0 1em 0; - padding: 0.4em; -} - -div.genindex-jumpbox { - border-top: 1px solid #ddd; - border-bottom: 1px solid #ddd; - margin: 1em 0 1em 0; - padding: 0.4em; -} - -/* -- domain module index --------------------------------------------------- */ - -table.modindextable td { - padding: 2px; - border-collapse: collapse; -} - -/* -- general body styles --------------------------------------------------- */ - -div.body { - min-width: 360px; - max-width: 800px; -} - -div.body p, div.body dd, div.body li, div.body blockquote { - -moz-hyphens: auto; - -ms-hyphens: auto; - -webkit-hyphens: auto; - hyphens: auto; -} - -a.headerlink { - visibility: hidden; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink, -caption:hover > a.headerlink, -p.caption:hover > a.headerlink, -div.code-block-caption:hover > a.headerlink { - visibility: visible; -} - -div.body p.caption { - text-align: inherit; -} - -div.body td { - text-align: left; -} - -.first { - margin-top: 0 !important; -} - -p.rubric { - margin-top: 30px; - font-weight: bold; -} - -img.align-left, figure.align-left, .figure.align-left, object.align-left { - clear: left; - float: left; - margin-right: 1em; -} - -img.align-right, figure.align-right, .figure.align-right, object.align-right { - clear: right; - float: right; - margin-left: 1em; -} - -img.align-center, figure.align-center, .figure.align-center, object.align-center { - display: block; - margin-left: auto; - margin-right: auto; -} - -img.align-default, figure.align-default, .figure.align-default { - display: block; - margin-left: auto; - margin-right: auto; -} - -.align-left { - text-align: left; -} - -.align-center { - text-align: center; -} - -.align-default { - text-align: center; -} - -.align-right { - text-align: right; -} - -/* -- sidebars -------------------------------------------------------------- */ - -div.sidebar, -aside.sidebar { - margin: 0 0 0.5em 1em; - border: 1px solid #ddb; - padding: 7px; - background-color: #ffe; - width: 40%; - float: right; - clear: right; - overflow-x: auto; -} - -p.sidebar-title { - font-weight: bold; -} -nav.contents, -aside.topic, -div.admonition, div.topic, blockquote { - clear: left; -} - -/* -- topics ---------------------------------------------------------------- */ -nav.contents, -aside.topic, -div.topic { - border: 1px solid #ccc; - padding: 7px; - margin: 10px 0 10px 0; -} - -p.topic-title { - font-size: 1.1em; - font-weight: bold; - margin-top: 10px; -} - -/* -- admonitions ----------------------------------------------------------- */ - -div.admonition { - margin-top: 10px; - margin-bottom: 10px; - padding: 7px; -} - -div.admonition dt { - font-weight: bold; -} - -p.admonition-title { - margin: 0px 10px 5px 0px; - font-weight: bold; -} - -div.body p.centered { - text-align: center; - margin-top: 25px; -} - -/* -- content of sidebars/topics/admonitions -------------------------------- */ - -div.sidebar > :last-child, -aside.sidebar > :last-child, -nav.contents > :last-child, -aside.topic > :last-child, -div.topic > :last-child, -div.admonition > :last-child { - margin-bottom: 0; -} - -div.sidebar::after, -aside.sidebar::after, -nav.contents::after, -aside.topic::after, -div.topic::after, -div.admonition::after, -blockquote::after { - display: block; - content: ''; - clear: both; -} - -/* -- tables ---------------------------------------------------------------- */ - -table.docutils { - margin-top: 10px; - margin-bottom: 10px; - border: 0; - border-collapse: collapse; -} - -table.align-center { - margin-left: auto; - margin-right: auto; -} - -table.align-default { - margin-left: auto; - margin-right: auto; -} - -table caption span.caption-number { - font-style: italic; -} - -table caption span.caption-text { -} - -table.docutils td, table.docutils th { - padding: 1px 8px 1px 5px; - border-top: 0; - border-left: 0; - border-right: 0; - border-bottom: 1px solid #aaa; -} - -th { - text-align: left; - padding-right: 5px; -} - -table.citation { - border-left: solid 1px gray; - margin-left: 1px; -} - -table.citation td { - border-bottom: none; -} - -th > :first-child, -td > :first-child { - margin-top: 0px; -} - -th > :last-child, -td > :last-child { - margin-bottom: 0px; -} - -/* -- figures --------------------------------------------------------------- */ - -div.figure, figure { - margin: 0.5em; - padding: 0.5em; -} - -div.figure p.caption, figcaption { - padding: 0.3em; -} - -div.figure p.caption span.caption-number, -figcaption span.caption-number { - font-style: italic; -} - -div.figure p.caption span.caption-text, -figcaption span.caption-text { -} - -/* -- field list styles ----------------------------------------------------- */ - -table.field-list td, table.field-list th { - border: 0 !important; -} - -.field-list ul { - margin: 0; - padding-left: 1em; -} - -.field-list p { - margin: 0; -} - -.field-name { - -moz-hyphens: manual; - -ms-hyphens: manual; - -webkit-hyphens: manual; - hyphens: manual; -} - -/* -- hlist styles ---------------------------------------------------------- */ - -table.hlist { - margin: 1em 0; -} - -table.hlist td { - vertical-align: top; -} - -/* -- object description styles --------------------------------------------- */ - -.sig { - font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; -} - -.sig-name, code.descname { - background-color: transparent; - font-weight: bold; -} - -.sig-name { - font-size: 1.1em; -} - -code.descname { - font-size: 1.2em; -} - -.sig-prename, code.descclassname { - background-color: transparent; -} - -.optional { - font-size: 1.3em; -} - -.sig-paren { - font-size: larger; -} - -.sig-param.n { - font-style: italic; -} - -/* C++ specific styling */ - -.sig-inline.c-texpr, -.sig-inline.cpp-texpr { - font-family: unset; -} - -.sig.c .k, .sig.c .kt, -.sig.cpp .k, .sig.cpp .kt { - color: #0033B3; -} - -.sig.c .m, -.sig.cpp .m { - color: #1750EB; -} - -.sig.c .s, .sig.c .sc, -.sig.cpp .s, .sig.cpp .sc { - color: #067D17; -} - - -/* -- other body styles ----------------------------------------------------- */ - -ol.arabic { - list-style: decimal; -} - -ol.loweralpha { - list-style: lower-alpha; -} - -ol.upperalpha { - list-style: upper-alpha; -} - -ol.lowerroman { - list-style: lower-roman; -} - -ol.upperroman { - list-style: upper-roman; -} - -:not(li) > ol > li:first-child > :first-child, -:not(li) > ul > li:first-child > :first-child { - margin-top: 0px; -} - -:not(li) > ol > li:last-child > :last-child, -:not(li) > ul > li:last-child > :last-child { - margin-bottom: 0px; -} - -ol.simple ol p, -ol.simple ul p, -ul.simple ol p, -ul.simple ul p { - margin-top: 0; -} - -ol.simple > li:not(:first-child) > p, -ul.simple > li:not(:first-child) > p { - margin-top: 0; -} - -ol.simple p, -ul.simple p { - margin-bottom: 0; -} -aside.footnote > span, -div.citation > span { - float: left; -} -aside.footnote > span:last-of-type, -div.citation > span:last-of-type { - padding-right: 0.5em; -} -aside.footnote > p { - margin-left: 2em; -} -div.citation > p { - margin-left: 4em; -} -aside.footnote > p:last-of-type, -div.citation > p:last-of-type { - margin-bottom: 0em; -} -aside.footnote > p:last-of-type:after, -div.citation > p:last-of-type:after { - content: ""; - clear: both; -} - -dl.field-list { - display: grid; - grid-template-columns: fit-content(30%) auto; -} - -dl.field-list > dt { - font-weight: bold; - word-break: break-word; - padding-left: 0.5em; - padding-right: 5px; -} - -dl.field-list > dd { - padding-left: 0.5em; - margin-top: 0em; - margin-left: 0em; - margin-bottom: 0em; -} - -dl { - margin-bottom: 15px; -} - -dd > :first-child { - margin-top: 0px; -} - -dd ul, dd table { - margin-bottom: 10px; -} - -dd { - margin-top: 3px; - margin-bottom: 10px; - margin-left: 30px; -} - -dl > dd:last-child, -dl > dd:last-child > :last-child { - margin-bottom: 0; -} - -dt:target, span.highlighted { - background-color: #fbe54e; -} - -rect.highlighted { - fill: #fbe54e; -} - -dl.glossary dt { - font-weight: bold; - font-size: 1.1em; -} - -.versionmodified { - font-style: italic; -} - -.system-message { - background-color: #fda; - padding: 5px; - border: 3px solid red; -} - -.footnote:target { - background-color: #ffa; -} - -.line-block { - display: block; - margin-top: 1em; - margin-bottom: 1em; -} - -.line-block .line-block { - margin-top: 0; - margin-bottom: 0; - margin-left: 1.5em; -} - -.guilabel, .menuselection { - font-family: sans-serif; -} - -.accelerator { - text-decoration: underline; -} - -.classifier { - font-style: oblique; -} - -.classifier:before { - font-style: normal; - margin: 0 0.5em; - content: ":"; - display: inline-block; -} - -abbr, acronym { - border-bottom: dotted 1px; - cursor: help; -} - -/* -- code displays --------------------------------------------------------- */ - -pre { - overflow: auto; - overflow-y: hidden; /* fixes display issues on Chrome browsers */ -} - -pre, div[class*="highlight-"] { - clear: both; -} - -span.pre { - -moz-hyphens: none; - -ms-hyphens: none; - -webkit-hyphens: none; - hyphens: none; - white-space: nowrap; -} - -div[class*="highlight-"] { - margin: 1em 0; -} - -td.linenos pre { - border: 0; - background-color: transparent; - color: #aaa; -} - -table.highlighttable { - display: block; -} - -table.highlighttable tbody { - display: block; -} - -table.highlighttable tr { - display: flex; -} - -table.highlighttable td { - margin: 0; - padding: 0; -} - -table.highlighttable td.linenos { - padding-right: 0.5em; -} - -table.highlighttable td.code { - flex: 1; - overflow: hidden; -} - -.highlight .hll { - display: block; -} - -div.highlight pre, -table.highlighttable pre { - margin: 0; -} - -div.code-block-caption + div { - margin-top: 0; -} - -div.code-block-caption { - margin-top: 1em; - padding: 2px 5px; - font-size: small; -} - -div.code-block-caption code { - background-color: transparent; -} - -table.highlighttable td.linenos, -span.linenos, -div.highlight span.gp { /* gp: Generic.Prompt */ - user-select: none; - -webkit-user-select: text; /* Safari fallback only */ - -webkit-user-select: none; /* Chrome/Safari */ - -moz-user-select: none; /* Firefox */ - -ms-user-select: none; /* IE10+ */ -} - -div.code-block-caption span.caption-number { - padding: 0.1em 0.3em; - font-style: italic; -} - -div.code-block-caption span.caption-text { -} - -div.literal-block-wrapper { - margin: 1em 0; -} - -code.xref, a code { - background-color: transparent; - font-weight: bold; -} - -h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { - background-color: transparent; -} - -.viewcode-link { - float: right; -} - -.viewcode-back { - float: right; - font-family: sans-serif; -} - -div.viewcode-block:target { - margin: -1px -10px; - padding: 0 10px; -} - -/* -- math display ---------------------------------------------------------- */ - -img.math { - vertical-align: middle; -} - -div.body div.math p { - text-align: center; -} - -span.eqno { - float: right; -} - -span.eqno a.headerlink { - position: absolute; - z-index: 1; -} - -div.math:hover a.headerlink { - visibility: visible; -} - -/* -- printout stylesheet --------------------------------------------------- */ - -@media print { - div.document, - div.documentwrapper, - div.bodywrapper { - margin: 0 !important; - width: 100%; - } - - div.sphinxsidebar, - div.related, - div.footer, - #top-link { - display: none; - } -} \ No newline at end of file diff --git a/docs/build/html/_static/doctools.js b/docs/build/html/_static/doctools.js deleted file mode 100644 index 527b876..0000000 --- a/docs/build/html/_static/doctools.js +++ /dev/null @@ -1,156 +0,0 @@ -/* - * doctools.js - * ~~~~~~~~~~~ - * - * Base JavaScript utilities for all Sphinx HTML documentation. - * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ -"use strict"; - -const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ - "TEXTAREA", - "INPUT", - "SELECT", - "BUTTON", -]); - -const _ready = (callback) => { - if (document.readyState !== "loading") { - callback(); - } else { - document.addEventListener("DOMContentLoaded", callback); - } -}; - -/** - * Small JavaScript module for the documentation. - */ -const Documentation = { - init: () => { - Documentation.initDomainIndexTable(); - Documentation.initOnKeyListeners(); - }, - - /** - * i18n support - */ - TRANSLATIONS: {}, - PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), - LOCALE: "unknown", - - // gettext and ngettext don't access this so that the functions - // can safely bound to a different name (_ = Documentation.gettext) - gettext: (string) => { - const translated = Documentation.TRANSLATIONS[string]; - switch (typeof translated) { - case "undefined": - return string; // no translation - case "string": - return translated; // translation exists - default: - return translated[0]; // (singular, plural) translation tuple exists - } - }, - - ngettext: (singular, plural, n) => { - const translated = Documentation.TRANSLATIONS[singular]; - if (typeof translated !== "undefined") - return translated[Documentation.PLURAL_EXPR(n)]; - return n === 1 ? singular : plural; - }, - - addTranslations: (catalog) => { - Object.assign(Documentation.TRANSLATIONS, catalog.messages); - Documentation.PLURAL_EXPR = new Function( - "n", - `return (${catalog.plural_expr})` - ); - Documentation.LOCALE = catalog.locale; - }, - - /** - * helper function to focus on search bar - */ - focusSearchBar: () => { - document.querySelectorAll("input[name=q]")[0]?.focus(); - }, - - /** - * Initialise the domain index toggle buttons - */ - initDomainIndexTable: () => { - const toggler = (el) => { - const idNumber = el.id.substr(7); - const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); - if (el.src.substr(-9) === "minus.png") { - el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; - toggledRows.forEach((el) => (el.style.display = "none")); - } else { - el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; - toggledRows.forEach((el) => (el.style.display = "")); - } - }; - - const togglerElements = document.querySelectorAll("img.toggler"); - togglerElements.forEach((el) => - el.addEventListener("click", (event) => toggler(event.currentTarget)) - ); - togglerElements.forEach((el) => (el.style.display = "")); - if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); - }, - - initOnKeyListeners: () => { - // only install a listener if it is really needed - if ( - !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && - !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS - ) - return; - - document.addEventListener("keydown", (event) => { - // bail for input elements - if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; - // bail with special keys - if (event.altKey || event.ctrlKey || event.metaKey) return; - - if (!event.shiftKey) { - switch (event.key) { - case "ArrowLeft": - if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; - - const prevLink = document.querySelector('link[rel="prev"]'); - if (prevLink && prevLink.href) { - window.location.href = prevLink.href; - event.preventDefault(); - } - break; - case "ArrowRight": - if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; - - const nextLink = document.querySelector('link[rel="next"]'); - if (nextLink && nextLink.href) { - window.location.href = nextLink.href; - event.preventDefault(); - } - break; - } - } - - // some keyboard layouts may need Shift to get / - switch (event.key) { - case "/": - if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; - Documentation.focusSearchBar(); - event.preventDefault(); - } - }); - }, -}; - -// quick alias for translations -const _ = Documentation.gettext; - -_ready(Documentation.init); diff --git a/docs/build/html/_static/documentation_options.js b/docs/build/html/_static/documentation_options.js deleted file mode 100644 index 17fd07e..0000000 --- a/docs/build/html/_static/documentation_options.js +++ /dev/null @@ -1,14 +0,0 @@ -var DOCUMENTATION_OPTIONS = { - URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '0.1.9', - LANGUAGE: 'en', - COLLAPSE_INDEX: false, - BUILDER: 'html', - FILE_SUFFIX: '.html', - LINK_SUFFIX: '.html', - HAS_SOURCE: true, - SOURCELINK_SUFFIX: '.txt', - NAVIGATION_WITH_KEYS: false, - SHOW_SEARCH_SUMMARY: true, - ENABLE_SEARCH_SHORTCUTS: true, -}; \ No newline at end of file diff --git a/docs/build/html/_static/file.png b/docs/build/html/_static/file.png deleted file mode 100644 index a858a410e4faa62ce324d814e4b816fff83a6fb3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 286 zcmV+(0pb3MP)s`hMrGg#P~ix$^RISR_I47Y|r1 z_CyJOe}D1){SET-^Amu_i71Lt6eYfZjRyw@I6OQAIXXHDfiX^GbOlHe=Ae4>0m)d(f|Me07*qoM6N<$f}vM^LjV8( diff --git a/docs/build/html/_static/jquery.js b/docs/build/html/_static/jquery.js deleted file mode 100644 index c4c6022..0000000 --- a/docs/build/html/_static/jquery.js +++ /dev/null @@ -1,2 +0,0 @@ -/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */ -!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 00 - var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 - var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 - var s_v = "^(" + C + ")?" + v; // vowel in stem - - this.stemWord = function (w) { - var stem; - var suffix; - var firstch; - var origword = w; - - if (w.length < 3) - return w; - - var re; - var re2; - var re3; - var re4; - - firstch = w.substr(0,1); - if (firstch == "y") - w = firstch.toUpperCase() + w.substr(1); - - // Step 1a - re = /^(.+?)(ss|i)es$/; - re2 = /^(.+?)([^s])s$/; - - if (re.test(w)) - w = w.replace(re,"$1$2"); - else if (re2.test(w)) - w = w.replace(re2,"$1$2"); - - // Step 1b - re = /^(.+?)eed$/; - re2 = /^(.+?)(ed|ing)$/; - if (re.test(w)) { - var fp = re.exec(w); - re = new RegExp(mgr0); - if (re.test(fp[1])) { - re = /.$/; - w = w.replace(re,""); - } - } - else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1]; - re2 = new RegExp(s_v); - if (re2.test(stem)) { - w = stem; - re2 = /(at|bl|iz)$/; - re3 = new RegExp("([^aeiouylsz])\\1$"); - re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - if (re2.test(w)) - w = w + "e"; - else if (re3.test(w)) { - re = /.$/; - w = w.replace(re,""); - } - else if (re4.test(w)) - w = w + "e"; - } - } - - // Step 1c - re = /^(.+?)y$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(s_v); - if (re.test(stem)) - w = stem + "i"; - } - - // Step 2 - re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = new RegExp(mgr0); - if (re.test(stem)) - w = stem + step2list[suffix]; - } - - // Step 3 - re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = new RegExp(mgr0); - if (re.test(stem)) - w = stem + step3list[suffix]; - } - - // Step 4 - re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; - re2 = /^(.+?)(s|t)(ion)$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(mgr1); - if (re.test(stem)) - w = stem; - } - else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1] + fp[2]; - re2 = new RegExp(mgr1); - if (re2.test(stem)) - w = stem; - } - - // Step 5 - re = /^(.+?)e$/; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = new RegExp(mgr1); - re2 = new RegExp(meq1); - re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) - w = stem; - } - re = /ll$/; - re2 = new RegExp(mgr1); - if (re.test(w) && re2.test(w)) { - re = /.$/; - w = w.replace(re,""); - } - - // and turn initial Y back to y - if (firstch == "y") - w = firstch.toLowerCase() + w.substr(1); - return w; - } -} - diff --git a/docs/build/html/_static/minus.png b/docs/build/html/_static/minus.png deleted file mode 100644 index d96755fdaf8bb2214971e0db9c1fd3077d7c419d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 90 zcmeAS@N?(olHy`uVBq!ia0vp^+#t*WBp7;*Yy1LIik>cxAr*|t7R?Mi>2?kWtu=nj kDsEF_5m^0CR;1wuP-*O&G^0G}KYk!hp00i_>zopr08q^qX#fBK diff --git a/docs/build/html/_static/plus.png b/docs/build/html/_static/plus.png deleted file mode 100644 index 7107cec93a979b9a5f64843235a16651d563ce2d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 90 zcmeAS@N?(olHy`uVBq!ia0vp^+#t*WBp7;*Yy1LIik>cxAr*|t7R?Mi>2?kWtu>-2 m3q%Vub%g%s<8sJhVPMczOq}xhg9DJoz~JfX=d#Wzp$Pyb1r*Kz diff --git a/docs/build/html/_static/pygments.css b/docs/build/html/_static/pygments.css deleted file mode 100644 index 08bec68..0000000 --- a/docs/build/html/_static/pygments.css +++ /dev/null @@ -1,74 +0,0 @@ -pre { line-height: 125%; } -td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } -span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } -td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -.highlight .hll { background-color: #ffffcc } -.highlight { background: #f8f8f8; } -.highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ -.highlight .err { border: 1px solid #FF0000 } /* Error */ -.highlight .k { color: #008000; font-weight: bold } /* Keyword */ -.highlight .o { color: #666666 } /* Operator */ -.highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ -.highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ -.highlight .cp { color: #9C6500 } /* Comment.Preproc */ -.highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ -.highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ -.highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ -.highlight .gd { color: #A00000 } /* Generic.Deleted */ -.highlight .ge { font-style: italic } /* Generic.Emph */ -.highlight .gr { color: #E40000 } /* Generic.Error */ -.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ -.highlight .gi { color: #008400 } /* Generic.Inserted */ -.highlight .go { color: #717171 } /* Generic.Output */ -.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ -.highlight .gs { font-weight: bold } /* Generic.Strong */ -.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ -.highlight .gt { color: #0044DD } /* Generic.Traceback */ -.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ -.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ -.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ -.highlight .kp { color: #008000 } /* Keyword.Pseudo */ -.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ -.highlight .kt { color: #B00040 } /* Keyword.Type */ -.highlight .m { color: #666666 } /* Literal.Number */ -.highlight .s { color: #BA2121 } /* Literal.String */ -.highlight .na { color: #687822 } /* Name.Attribute */ -.highlight .nb { color: #008000 } /* Name.Builtin */ -.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ -.highlight .no { color: #880000 } /* Name.Constant */ -.highlight .nd { color: #AA22FF } /* Name.Decorator */ -.highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ -.highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ -.highlight .nf { color: #0000FF } /* Name.Function */ -.highlight .nl { color: #767600 } /* Name.Label */ -.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ -.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ -.highlight .nv { color: #19177C } /* Name.Variable */ -.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ -.highlight .w { color: #bbbbbb } /* Text.Whitespace */ -.highlight .mb { color: #666666 } /* Literal.Number.Bin */ -.highlight .mf { color: #666666 } /* Literal.Number.Float */ -.highlight .mh { color: #666666 } /* Literal.Number.Hex */ -.highlight .mi { color: #666666 } /* Literal.Number.Integer */ -.highlight .mo { color: #666666 } /* Literal.Number.Oct */ -.highlight .sa { color: #BA2121 } /* Literal.String.Affix */ -.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ -.highlight .sc { color: #BA2121 } /* Literal.String.Char */ -.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ -.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ -.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ -.highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ -.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ -.highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ -.highlight .sx { color: #008000 } /* Literal.String.Other */ -.highlight .sr { color: #A45A77 } /* Literal.String.Regex */ -.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ -.highlight .ss { color: #19177C } /* Literal.String.Symbol */ -.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ -.highlight .fm { color: #0000FF } /* Name.Function.Magic */ -.highlight .vc { color: #19177C } /* Name.Variable.Class */ -.highlight .vg { color: #19177C } /* Name.Variable.Global */ -.highlight .vi { color: #19177C } /* Name.Variable.Instance */ -.highlight .vm { color: #19177C } /* Name.Variable.Magic */ -.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/docs/build/html/_static/searchtools.js b/docs/build/html/_static/searchtools.js deleted file mode 100644 index e89e34d..0000000 --- a/docs/build/html/_static/searchtools.js +++ /dev/null @@ -1,566 +0,0 @@ -/* - * searchtools.js - * ~~~~~~~~~~~~~~~~ - * - * Sphinx JavaScript utilities for the full-text search. - * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ -"use strict"; - -/** - * Simple result scoring code. - */ -if (typeof Scorer === "undefined") { - var Scorer = { - // Implement the following function to further tweak the score for each result - // The function takes a result array [docname, title, anchor, descr, score, filename] - // and returns the new score. - /* - score: result => { - const [docname, title, anchor, descr, score, filename] = result - return score - }, - */ - - // query matches the full name of an object - objNameMatch: 11, - // or matches in the last dotted part of the object name - objPartialMatch: 6, - // Additive scores depending on the priority of the object - objPrio: { - 0: 15, // used to be importantResults - 1: 5, // used to be objectResults - 2: -5, // used to be unimportantResults - }, - // Used when the priority is not in the mapping. - objPrioDefault: 0, - - // query found in title - title: 15, - partialTitle: 7, - // query found in terms - term: 5, - partialTerm: 2, - }; -} - -const _removeChildren = (element) => { - while (element && element.lastChild) element.removeChild(element.lastChild); -}; - -/** - * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping - */ -const _escapeRegExp = (string) => - string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string - -const _displayItem = (item, searchTerms) => { - const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; - const docUrlRoot = DOCUMENTATION_OPTIONS.URL_ROOT; - const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; - const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; - const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; - - const [docName, title, anchor, descr, score, _filename] = item; - - let listItem = document.createElement("li"); - let requestUrl; - let linkUrl; - if (docBuilder === "dirhtml") { - // dirhtml builder - let dirname = docName + "/"; - if (dirname.match(/\/index\/$/)) - dirname = dirname.substring(0, dirname.length - 6); - else if (dirname === "index/") dirname = ""; - requestUrl = docUrlRoot + dirname; - linkUrl = requestUrl; - } else { - // normal html builders - requestUrl = docUrlRoot + docName + docFileSuffix; - linkUrl = docName + docLinkSuffix; - } - let linkEl = listItem.appendChild(document.createElement("a")); - linkEl.href = linkUrl + anchor; - linkEl.dataset.score = score; - linkEl.innerHTML = title; - if (descr) - listItem.appendChild(document.createElement("span")).innerHTML = - " (" + descr + ")"; - else if (showSearchSummary) - fetch(requestUrl) - .then((responseData) => responseData.text()) - .then((data) => { - if (data) - listItem.appendChild( - Search.makeSearchSummary(data, searchTerms) - ); - }); - Search.output.appendChild(listItem); -}; -const _finishSearch = (resultCount) => { - Search.stopPulse(); - Search.title.innerText = _("Search Results"); - if (!resultCount) - Search.status.innerText = Documentation.gettext( - "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." - ); - else - Search.status.innerText = _( - `Search finished, found ${resultCount} page(s) matching the search query.` - ); -}; -const _displayNextItem = ( - results, - resultCount, - searchTerms -) => { - // results left, load the summary and display it - // this is intended to be dynamic (don't sub resultsCount) - if (results.length) { - _displayItem(results.pop(), searchTerms); - setTimeout( - () => _displayNextItem(results, resultCount, searchTerms), - 5 - ); - } - // search finished, update title and status message - else _finishSearch(resultCount); -}; - -/** - * Default splitQuery function. Can be overridden in ``sphinx.search`` with a - * custom function per language. - * - * The regular expression works by splitting the string on consecutive characters - * that are not Unicode letters, numbers, underscores, or emoji characters. - * This is the same as ``\W+`` in Python, preserving the surrogate pair area. - */ -if (typeof splitQuery === "undefined") { - var splitQuery = (query) => query - .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) - .filter(term => term) // remove remaining empty strings -} - -/** - * Search Module - */ -const Search = { - _index: null, - _queued_query: null, - _pulse_status: -1, - - htmlToText: (htmlString) => { - const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); - htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); - const docContent = htmlElement.querySelector('[role="main"]'); - if (docContent !== undefined) return docContent.textContent; - console.warn( - "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." - ); - return ""; - }, - - init: () => { - const query = new URLSearchParams(window.location.search).get("q"); - document - .querySelectorAll('input[name="q"]') - .forEach((el) => (el.value = query)); - if (query) Search.performSearch(query); - }, - - loadIndex: (url) => - (document.body.appendChild(document.createElement("script")).src = url), - - setIndex: (index) => { - Search._index = index; - if (Search._queued_query !== null) { - const query = Search._queued_query; - Search._queued_query = null; - Search.query(query); - } - }, - - hasIndex: () => Search._index !== null, - - deferQuery: (query) => (Search._queued_query = query), - - stopPulse: () => (Search._pulse_status = -1), - - startPulse: () => { - if (Search._pulse_status >= 0) return; - - const pulse = () => { - Search._pulse_status = (Search._pulse_status + 1) % 4; - Search.dots.innerText = ".".repeat(Search._pulse_status); - if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); - }; - pulse(); - }, - - /** - * perform a search for something (or wait until index is loaded) - */ - performSearch: (query) => { - // create the required interface elements - const searchText = document.createElement("h2"); - searchText.textContent = _("Searching"); - const searchSummary = document.createElement("p"); - searchSummary.classList.add("search-summary"); - searchSummary.innerText = ""; - const searchList = document.createElement("ul"); - searchList.classList.add("search"); - - const out = document.getElementById("search-results"); - Search.title = out.appendChild(searchText); - Search.dots = Search.title.appendChild(document.createElement("span")); - Search.status = out.appendChild(searchSummary); - Search.output = out.appendChild(searchList); - - const searchProgress = document.getElementById("search-progress"); - // Some themes don't use the search progress node - if (searchProgress) { - searchProgress.innerText = _("Preparing search..."); - } - Search.startPulse(); - - // index already loaded, the browser was quick! - if (Search.hasIndex()) Search.query(query); - else Search.deferQuery(query); - }, - - /** - * execute search (requires search index to be loaded) - */ - query: (query) => { - const filenames = Search._index.filenames; - const docNames = Search._index.docnames; - const titles = Search._index.titles; - const allTitles = Search._index.alltitles; - const indexEntries = Search._index.indexentries; - - // stem the search terms and add them to the correct list - const stemmer = new Stemmer(); - const searchTerms = new Set(); - const excludedTerms = new Set(); - const highlightTerms = new Set(); - const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); - splitQuery(query.trim()).forEach((queryTerm) => { - const queryTermLower = queryTerm.toLowerCase(); - - // maybe skip this "word" - // stopwords array is from language_data.js - if ( - stopwords.indexOf(queryTermLower) !== -1 || - queryTerm.match(/^\d+$/) - ) - return; - - // stem the word - let word = stemmer.stemWord(queryTermLower); - // select the correct list - if (word[0] === "-") excludedTerms.add(word.substr(1)); - else { - searchTerms.add(word); - highlightTerms.add(queryTermLower); - } - }); - - if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js - localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) - } - - // console.debug("SEARCH: searching for:"); - // console.info("required: ", [...searchTerms]); - // console.info("excluded: ", [...excludedTerms]); - - // array of [docname, title, anchor, descr, score, filename] - let results = []; - _removeChildren(document.getElementById("search-progress")); - - const queryLower = query.toLowerCase(); - for (const [title, foundTitles] of Object.entries(allTitles)) { - if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { - for (const [file, id] of foundTitles) { - let score = Math.round(100 * queryLower.length / title.length) - results.push([ - docNames[file], - titles[file] !== title ? `${titles[file]} > ${title}` : title, - id !== null ? "#" + id : "", - null, - score, - filenames[file], - ]); - } - } - } - - // search for explicit entries in index directives - for (const [entry, foundEntries] of Object.entries(indexEntries)) { - if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { - for (const [file, id] of foundEntries) { - let score = Math.round(100 * queryLower.length / entry.length) - results.push([ - docNames[file], - titles[file], - id ? "#" + id : "", - null, - score, - filenames[file], - ]); - } - } - } - - // lookup as object - objectTerms.forEach((term) => - results.push(...Search.performObjectSearch(term, objectTerms)) - ); - - // lookup as search terms in fulltext - results.push(...Search.performTermsSearch(searchTerms, excludedTerms)); - - // let the scorer override scores with a custom scoring function - if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item))); - - // now sort the results by score (in opposite order of appearance, since the - // display function below uses pop() to retrieve items) and then - // alphabetically - results.sort((a, b) => { - const leftScore = a[4]; - const rightScore = b[4]; - if (leftScore === rightScore) { - // same score: sort alphabetically - const leftTitle = a[1].toLowerCase(); - const rightTitle = b[1].toLowerCase(); - if (leftTitle === rightTitle) return 0; - return leftTitle > rightTitle ? -1 : 1; // inverted is intentional - } - return leftScore > rightScore ? 1 : -1; - }); - - // remove duplicate search results - // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept - let seen = new Set(); - results = results.reverse().reduce((acc, result) => { - let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); - if (!seen.has(resultStr)) { - acc.push(result); - seen.add(resultStr); - } - return acc; - }, []); - - results = results.reverse(); - - // for debugging - //Search.lastresults = results.slice(); // a copy - // console.info("search results:", Search.lastresults); - - // print the results - _displayNextItem(results, results.length, searchTerms); - }, - - /** - * search for object names - */ - performObjectSearch: (object, objectTerms) => { - const filenames = Search._index.filenames; - const docNames = Search._index.docnames; - const objects = Search._index.objects; - const objNames = Search._index.objnames; - const titles = Search._index.titles; - - const results = []; - - const objectSearchCallback = (prefix, match) => { - const name = match[4] - const fullname = (prefix ? prefix + "." : "") + name; - const fullnameLower = fullname.toLowerCase(); - if (fullnameLower.indexOf(object) < 0) return; - - let score = 0; - const parts = fullnameLower.split("."); - - // check for different match types: exact matches of full name or - // "last name" (i.e. last dotted part) - if (fullnameLower === object || parts.slice(-1)[0] === object) - score += Scorer.objNameMatch; - else if (parts.slice(-1)[0].indexOf(object) > -1) - score += Scorer.objPartialMatch; // matches in last name - - const objName = objNames[match[1]][2]; - const title = titles[match[0]]; - - // If more than one term searched for, we require other words to be - // found in the name/title/description - const otherTerms = new Set(objectTerms); - otherTerms.delete(object); - if (otherTerms.size > 0) { - const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); - if ( - [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) - ) - return; - } - - let anchor = match[3]; - if (anchor === "") anchor = fullname; - else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; - - const descr = objName + _(", in ") + title; - - // add custom score for some objects according to scorer - if (Scorer.objPrio.hasOwnProperty(match[2])) - score += Scorer.objPrio[match[2]]; - else score += Scorer.objPrioDefault; - - results.push([ - docNames[match[0]], - fullname, - "#" + anchor, - descr, - score, - filenames[match[0]], - ]); - }; - Object.keys(objects).forEach((prefix) => - objects[prefix].forEach((array) => - objectSearchCallback(prefix, array) - ) - ); - return results; - }, - - /** - * search for full-text terms in the index - */ - performTermsSearch: (searchTerms, excludedTerms) => { - // prepare search - const terms = Search._index.terms; - const titleTerms = Search._index.titleterms; - const filenames = Search._index.filenames; - const docNames = Search._index.docnames; - const titles = Search._index.titles; - - const scoreMap = new Map(); - const fileMap = new Map(); - - // perform the search on the required terms - searchTerms.forEach((word) => { - const files = []; - const arr = [ - { files: terms[word], score: Scorer.term }, - { files: titleTerms[word], score: Scorer.title }, - ]; - // add support for partial matches - if (word.length > 2) { - const escapedWord = _escapeRegExp(word); - Object.keys(terms).forEach((term) => { - if (term.match(escapedWord) && !terms[word]) - arr.push({ files: terms[term], score: Scorer.partialTerm }); - }); - Object.keys(titleTerms).forEach((term) => { - if (term.match(escapedWord) && !titleTerms[word]) - arr.push({ files: titleTerms[word], score: Scorer.partialTitle }); - }); - } - - // no match but word was a required one - if (arr.every((record) => record.files === undefined)) return; - - // found search word in contents - arr.forEach((record) => { - if (record.files === undefined) return; - - let recordFiles = record.files; - if (recordFiles.length === undefined) recordFiles = [recordFiles]; - files.push(...recordFiles); - - // set score for the word in each file - recordFiles.forEach((file) => { - if (!scoreMap.has(file)) scoreMap.set(file, {}); - scoreMap.get(file)[word] = record.score; - }); - }); - - // create the mapping - files.forEach((file) => { - if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1) - fileMap.get(file).push(word); - else fileMap.set(file, [word]); - }); - }); - - // now check if the files don't contain excluded terms - const results = []; - for (const [file, wordList] of fileMap) { - // check if all requirements are matched - - // as search terms with length < 3 are discarded - const filteredTermCount = [...searchTerms].filter( - (term) => term.length > 2 - ).length; - if ( - wordList.length !== searchTerms.size && - wordList.length !== filteredTermCount - ) - continue; - - // ensure that none of the excluded terms is in the search result - if ( - [...excludedTerms].some( - (term) => - terms[term] === file || - titleTerms[term] === file || - (terms[term] || []).includes(file) || - (titleTerms[term] || []).includes(file) - ) - ) - break; - - // select one (max) score for the file. - const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); - // add result to the result list - results.push([ - docNames[file], - titles[file], - "", - null, - score, - filenames[file], - ]); - } - return results; - }, - - /** - * helper function to return a node containing the - * search summary for a given text. keywords is a list - * of stemmed words. - */ - makeSearchSummary: (htmlText, keywords) => { - const text = Search.htmlToText(htmlText); - if (text === "") return null; - - const textLower = text.toLowerCase(); - const actualStartPosition = [...keywords] - .map((k) => textLower.indexOf(k.toLowerCase())) - .filter((i) => i > -1) - .slice(-1)[0]; - const startWithContext = Math.max(actualStartPosition - 120, 0); - - const top = startWithContext === 0 ? "" : "..."; - const tail = startWithContext + 240 < text.length ? "..." : ""; - - let summary = document.createElement("p"); - summary.classList.add("context"); - summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; - - return summary; - }, -}; - -_ready(Search.init); diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html deleted file mode 100644 index 22b7911..0000000 --- a/docs/build/html/genindex.html +++ /dev/null @@ -1,1541 +0,0 @@ - - - - - - Index — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
-
    -
  • - -
  • -
  • -
-
-
-
-
- - -

Index

- -
- A - | B - | C - | D - | E - | F - | G - | H - | I - | J - | K - | L - | M - | N - | O - | P - | Q - | R - | S - | T - | U - | V - | W - | X - | Y - -
-

A

- - - -
- -

B

- - - -
- -

C

- - - -
- -

D

- - - -
- -

E

- - - -
- -

F

- - - -
- -

G

- - - -
- -

H

- - - -
- -

I

- - - -
- -

J

- - -
- -

K

- - - -
- -

L

- - - -
- -

M

- - - -
- -

N

- - - -
- -

O

- - - -
- -

P

- - - -
- -

Q

- - - -
    -
  • - quapy.data.reader - -
  • -
  • - quapy.error - -
  • -
  • - quapy.evaluation - -
  • -
  • - quapy.functional - -
  • -
  • - quapy.method - -
  • -
  • - quapy.method._kdey - -
  • -
  • - quapy.method._neural - -
  • -
  • - quapy.method._threshold_optim - -
  • -
  • - quapy.method.aggregative - -
  • -
  • - quapy.method.base - -
  • -
  • - quapy.method.composable - -
  • -
  • - quapy.method.meta - -
  • -
  • - quapy.method.non_aggregative - -
  • -
  • - quapy.model_selection - -
  • -
  • - quapy.plot - -
  • -
  • - quapy.protocol - -
  • -
  • - quapy.util - -
  • -
- -

R

- - - -
- -

S

- - - -
- -

T

- - - -
- -

U

- - - -
- -

V

- - - -
- -

W

- - -
- -

X

- - - -
- -

Y

- - -
- - - -
-
-
- -
- -
-

© Copyright 2024, Alejandro Moreo.

-
- - Built with Sphinx using a - theme - provided by Read the Docs. - - -
-
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/index.html b/docs/build/html/index.html deleted file mode 100644 index 74c9597..0000000 --- a/docs/build/html/index.html +++ /dev/null @@ -1,826 +0,0 @@ - - - - - - - Welcome to QuaPy’s documentation! — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
- -
-
-
-
- -
-

Welcome to QuaPy’s documentation!

-

QuaPy is a Python-based open-source framework for quantification.

-

This document contains the API of the modules included in QuaPy.

-
-

Installation

-

pip install quapy

-
-
-

GitHub

-

QuaPy is hosted in GitHub at https://github.com/HLT-ISTI/QuaPy

-
-
-

Wiki Documents

-

In this section you can find useful information concerning different aspects of QuaPy, with examples:

- -
-
-
-
-

Contents

-
- -
-
-
-
-

Indices and tables

- -
- - -
-
- -
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/modules.html b/docs/build/html/modules.html deleted file mode 100644 index d4d4abb..0000000 --- a/docs/build/html/modules.html +++ /dev/null @@ -1,335 +0,0 @@ - - - - - - - quapy — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
- -
-
-
-
- -
-

quapy

-
- -
-
- - -
-
- -
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv deleted file mode 100644 index 8c25dac03b4ab147fd0dcd7009cd2810192cf066..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4073 zcmVNERX>N99Zgg*Qc_4OWa&u{KZXhxWBOp+6Z)#;@bUGkWbzx9> zIv_zHPV>iAPOTORA^-&a%F8{X>Md?av*PJAarPHb0B7EY-J#6b0A}H zZE$jBb8}^6Aa!$TZf78RY-wUH3V7PZoz0TsI1+&G{S>vadu{J@MeJo_V<4a5!ajiIyZHT#%qw%3$`f+8zy{=@9rMDo4Vt#vFKpmsABA9M)T^;{G8w!RJ zzM?F4Rh8h3mGRNNvlK9|H<=_@IChL0%L{3c#W)c@j+ZzLNwS8i_<;}S|8=fxCz6z< zMM5yL%mju2Z)t#*S%lc$=j0FUKBFk{qxoVC)_5iCcCB|%0N#8DFns$R$hT_1N&dgn zgkYg+;5@%>hkGI3lHlfbfEWs|XaIMlCPhGlU^gu=Bj6QXa%hsqOFqhg{G${B54GL` z@5D*V1S-~EV7-vYn5$+B{8lBuU0}adC0pPfvG8Vm@ScCE;ugK}QVaZ$b*bZ4gV&{| z9oovj{461mU68V1(?>$Cm7#iHh3Fx!{)#9Hi*eVb#C#h|v#x+9=q1gYzm5-7hq|B9 zPEn?n0jfj{FwS^=5t}ae@<19%c!-_VdbOW?*vUWqfeeJzkS3vc9YvIH% zi|Css+8Mi6|7f5hx>3sC@1?Iw`23>}icG3tfaTMXON9BJ5{jQ=Z!TSzU5bt{m z#M#4etb3e>70@iP@E#56H%bx+j#XdQaICtuhU3!1H8>z&lEa`JvT$M%H#9VkVmTUU z7X#KTf5m9*lO)7DP%Io#4S>E}EQU0O<8k{DRmZf7?HW)yANL_!`kV>ZG={+~EQTR& zF_SuodMhk~jX(U_dsLL;gt)Wr67bR+KIY-MGCOmJ=|@Kw?l_AC$pafuK~F&7!bVtL zyZ_<--iac^CBg69i6P#J5XB17g7J=+p&?|9mwg~mPmszEYv$WEd2gsX=i;z8rrTid zR*(yjhr-Z*r_3neT|w#>jt(@xZCB8idkdy!Ds4wT24hQyE0-mHK%*>9xC=>+Vo8Wt zSk}iDBk)e#ixml1W)y~x8#`tls1PjX+s0)th4WcZz_<3oJVlH-B5zUH z{pQ%Dt}#Z5Z`*3Qa3bR!ie$G6!8i;@a_+swZa{a6nb{K~yC;TlW<)o;n;YD0R7}J=pnY0*>DHWSe_is^@HRVq?g#Dw);ZJO)=_uyYq*#GsZbwiLK;%4w zGGQoRK8k2s;G78_u4|LG^hP>~cu#UkCFekx^@*BIbsNGe@(Y@BiYw8Wn z^4u@>EbS4&1_y5yaeph9mfbwRN{L8nDngTx)0K$P&R9}ytrpv0EmNpwfO^6m%HGb_ z76wzoiLzHpi7Ez4f)Vq%{7TtB-TAy@ z^sG?qI+Yyu)bJPnmT{pHSscav)3X#4ieSo$hEf8vJO)~Ih{Im1b$7S~)b@fcNXND} zz?54VmjhsNpFZypG-n+Jv`0W&RjT&w{5-rUyQQq)81HN)$2c4%9GBPGvu(QYkv zXRR5Mg8`BazEg(wc^2FAp8D3tdFn$Lr$TWy&Uj>9nehmsG8Kw#Wg358(J9E3$z|U% zG{ItA01Qo{L9}ISo5Py(GR;}-sfz}@#-w+1MteSy6;JI7)zyaowru6uWT-FqS7flY`$XD$BmgKKdl0?3ZDJ?TpWewYV1;1c{k zOty#-IZ>LaLgrND=%(I7zTbtz@|QaMha~K&&1u($IsM&Zgs0z%`5iHy$2UO7IFyDc zO+ar0^<15xnQT9O?x?_aKHU+|q00~LA15AAd}u{OL=3G0`}Jx^_R-|Gqbtk!%|Fe8 z(X)pq-=sl-`(=f^MUkJUnct!{k8mGuT$t090O|IRXpgxJPfpXN69gt;Xm|ioqC!gq0EV$KedA%T3V>Cfuaqa;C1m-*sW8y zsgOp8P~p`1pgJ9FX9J!R_s4q&6G>?p8&yF7Mp9`*xkV&vj6|>@$@*7UOYP?_mfFvO zEVZ9QT52%<=1?msE&n;{SO0u<=%_PiIvbr)P zX4BAM%%}rHxb)O*e{%}9_csOJ{F?&r{!QKN1ZE~+D^<_Dz0oeD?p$3c_I%WLC5*=o z?JA{V+O-M-Xx9Y44@P72=jLaO`Rn*$073b=XeU?9Hwehgrb-Rx0Cjnql>dfarRiC( zfi(o1^{x(MyA@Wj@)fOHwGIQDi|kK0!Hfg|2#391EzY);&N)uaB;CGhWi!XmQ&c%x zMz-#M0(g%RUE*j|X7m^|RfW-eZF|Lm4aKIBnoyQI>{oDhi;XH&AzP0Yuq9pn>Ap1> z1e*V*1c#0;cl@&+>TElbcUZfCw1@@<)+jnQ1a}~#+Is&pW(kf!jL-lCGei>~j_u~6 zKcS4Py?j8EHX^}{+N=SRKKuIh23FBg!40ET0-@lzi!#p`qiXHq=6+8yPEhr`3lJOY zXkfP0!XdzbPIf!bx{W)HP#U2CHvBX*y|?@u^BH=Tju97gd6i-A=K2frwxJ2#&hT0t z3S^fu+s4D8`CLQ(4_Qvvm8dr;cLmM0dINjOfzA595G^;mZA7OIbxk;qm&QLKs>yn< z52InoySF}(lmMgb7&m;<;gdZK`f&o zj(m(OLI)^mYGrdf2fE3FB8D?}pg8P+Bco(1agPIJmnjD++2oRn|I z)u_mNOU#9z-{4e+2G=tHw2r<8oEFbDpm2xMSUOkf5rf(6xgig%8(=ueS2Qk&`Ae@i zn9p4hi{W_i{Iak@1dMX7UUFRh!Nk>rbIu@mm!c%}*RoM<#Mg{wX~hMu$)^z&or99P zxqL~5za+)sZ=JQ&1lN(Aj76rRa%#CgoU_+Wj?p)qEY)FUnQW671yaA{FtQSzA^D++ zW@3yA8qn_N0UiXrhw1cc2dv&)J@o6~GVQ&-CSBz3TM6;P$*{nT8v+CsC^C^*ssT6< z(~4O;TIfvDTJ>Ch+#t1$FvmE|-}!A>lqQlV|3!y2Jy7Yq;vE>idf9}2SI3%c?m69v zwr<7LAP;#5HD3EMnF~&L)_o}eGPp%yUEiSOq}J##GhYm{Q1SzpI*^#K()UEYjm%$>2-`{3 zip)DRR8Po1`WqT|SDmZvek67-6Qg21A-hY|S;$(z&yo}cZ{6ic@D}BVyK|bcpt~tJ z+T#2T8fJ|raGc8J77n{hkQmeKs=bpHjY6+@PpLNH@w0BDsYAx(1zexpW`wUHE)?hz zlZ@~czyMk%Bimqh^7^!Q3os@Ffbr4G$a%0UrqKY_a5U2DD z-jYpfrs^EipsqDr(nzC@)S&cghN4ABxTG~G?~u)Bq`sMVr2y@@b5>>0A{&a)o>pcS zA#|t)wNO>=T+c4~Q)U6G8Tk3r8Yji8$G8Z8EDjxz@WZz`_%46z*{9QOAq~_`l{f=lUrBxeJHD~&uKMEg znqWECzYwjs#$Ek5_fot(*}i76;=4kqc>HiTRrkJ=H}d!X;l-U5U9}5^+~HFpG&sF& z=3MM_sSrGUE=6}~L;@nzs5&m*=g2!B|z=&f1V$o9$tXz*N@!-t0pPGEJ)Lc3ZV{kK1Uob blwMzcDuv25-enDrS4y$N$4~zU4ie3eSlHfr diff --git a/docs/build/html/py-modindex.html b/docs/build/html/py-modindex.html deleted file mode 100644 index 0dc841b..0000000 --- a/docs/build/html/py-modindex.html +++ /dev/null @@ -1,261 +0,0 @@ - - - - - - Python Module Index — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
-
    -
  • - -
  • -
  • -
-
-
-
-
- - -

Python Module Index

- -
- q -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
- q
- quapy -
    - quapy.classification -
    - quapy.classification.calibration -
    - quapy.classification.methods -
    - quapy.classification.neural -
    - quapy.classification.svmperf -
    - quapy.data -
    - quapy.data.base -
    - quapy.data.datasets -
    - quapy.data.preprocessing -
    - quapy.data.reader -
    - quapy.error -
    - quapy.evaluation -
    - quapy.functional -
    - quapy.method -
    - quapy.method._kdey -
    - quapy.method._neural -
    - quapy.method._threshold_optim -
    - quapy.method.aggregative -
    - quapy.method.base -
    - quapy.method.composable -
    - quapy.method.meta -
    - quapy.method.non_aggregative -
    - quapy.model_selection -
    - quapy.plot -
    - quapy.protocol -
    - quapy.util -
- - -
-
-
- -
- -
-

© Copyright 2024, Alejandro Moreo.

-
- - Built with Sphinx using a - theme - provided by Read the Docs. - - -
-
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/quapy.classification.html b/docs/build/html/quapy.classification.html deleted file mode 100644 index 0f36950..0000000 --- a/docs/build/html/quapy.classification.html +++ /dev/null @@ -1,967 +0,0 @@ - - - - - - - quapy.classification package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
- -
-
-
-
- -
-

quapy.classification package

-
-

Submodules

-
-
-

quapy.classification.calibration module

-
-
-class quapy.classification.calibration.BCTSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
-

Bases: RecalibratedProbabilisticClassifierBase

-

Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from abstention.calibration, as defined in -Alexandari et al. paper:

-
-
Parameters:
-
    -
  • classifier – a scikit-learn probabilistic classifier

  • -
  • val_split – indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p -in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the -training instances (the rest is used for training). In any case, the classifier is retrained in the whole -training set afterwards. Default value is 5.

  • -
  • n_jobs – indicate the number of parallel workers (only when val_split is an integer)

  • -
  • verbose – whether or not to display information in the standard output

  • -
-
-
-
- -
-
-class quapy.classification.calibration.NBVSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
-

Bases: RecalibratedProbabilisticClassifierBase

-

Applies the No-Bias Vector Scaling (NBVS) calibration method from abstention.calibration, as defined in -Alexandari et al. paper:

-
-
Parameters:
-
    -
  • classifier – a scikit-learn probabilistic classifier

  • -
  • val_split – indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p -in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the -training instances (the rest is used for training). In any case, the classifier is retrained in the whole -training set afterwards. Default value is 5.

  • -
  • n_jobs – indicate the number of parallel workers (only when val_split is an integer)

  • -
  • verbose – whether or not to display information in the standard output

  • -
-
-
-
- -
-
-class quapy.classification.calibration.RecalibratedProbabilisticClassifier[source]
-

Bases: object

-

Abstract class for (re)calibration method from abstention.calibration, as defined in -Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration -is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR.:

-
- -
-
-class quapy.classification.calibration.RecalibratedProbabilisticClassifierBase(classifier, calibrator, val_split=5, n_jobs=None, verbose=False)[source]
-

Bases: BaseEstimator, RecalibratedProbabilisticClassifier

-

Applies a (re)calibration method from abstention.calibration, as defined in -Alexandari et al. paper.

-
-
Parameters:
-
    -
  • classifier – a scikit-learn probabilistic classifier

  • -
  • calibrator – the calibration object (an instance of abstention.calibration.CalibratorFactory)

  • -
  • val_split – indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p -in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the -training instances (the rest is used for training). In any case, the classifier is retrained in the whole -training set afterwards. Default value is 5.

  • -
  • n_jobs – indicate the number of parallel workers (only when val_split is an integer); default=None

  • -
  • verbose – whether or not to display information in the standard output

  • -
-
-
-
-
-property classes_
-

Returns the classes on which the classifier has been trained on

-
-
Returns:
-

array-like of shape (n_classes)

-
-
-
- -
-
-fit(X, y)[source]
-

Fits the calibration for the probabilistic classifier.

-
-
Parameters:
-
    -
  • X – array-like of shape (n_samples, n_features) with the data instances

  • -
  • y – array-like of shape (n_samples,) with the class labels

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-fit_cv(X, y)[source]
-

Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all -training instances via cross-validation, and then retrains the classifier on all training instances. -The posterior probabilities thus generated are used for calibrating the outputs of the classifier.

-
-
Parameters:
-
    -
  • X – array-like of shape (n_samples, n_features) with the data instances

  • -
  • y – array-like of shape (n_samples,) with the class labels

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-fit_tr_val(X, y)[source]
-

Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a -training and a validation set, and then uses the training samples to learn classifier which is then used -to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate -the classifier. The classifier is not retrained on the whole dataset.

-
-
Parameters:
-
    -
  • X – array-like of shape (n_samples, n_features) with the data instances

  • -
  • y – array-like of shape (n_samples,) with the class labels

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-predict(X)[source]
-

Predicts class labels for the data instances in X

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) with the data instances

-
-
Returns:
-

array-like of shape (n_samples,) with the class label predictions

-
-
-
- -
-
-predict_proba(X)[source]
-

Generates posterior probabilities for the data instances in X

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) with the data instances

-
-
Returns:
-

array-like of shape (n_samples, n_classes) with posterior probabilities

-
-
-
- -
- -
-
-class quapy.classification.calibration.TSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
-

Bases: RecalibratedProbabilisticClassifierBase

-

Applies the Temperature Scaling (TS) calibration method from abstention.calibration, as defined in -Alexandari et al. paper:

-
-
Parameters:
-
    -
  • classifier – a scikit-learn probabilistic classifier

  • -
  • val_split – indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p -in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the -training instances (the rest is used for training). In any case, the classifier is retrained in the whole -training set afterwards. Default value is 5.

  • -
  • n_jobs – indicate the number of parallel workers (only when val_split is an integer)

  • -
  • verbose – whether or not to display information in the standard output

  • -
-
-
-
- -
-
-class quapy.classification.calibration.VSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
-

Bases: RecalibratedProbabilisticClassifierBase

-

Applies the Vector Scaling (VS) calibration method from abstention.calibration, as defined in -Alexandari et al. paper:

-
-
Parameters:
-
    -
  • classifier – a scikit-learn probabilistic classifier

  • -
  • val_split – indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p -in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the -training instances (the rest is used for training). In any case, the classifier is retrained in the whole -training set afterwards. Default value is 5.

  • -
  • n_jobs – indicate the number of parallel workers (only when val_split is an integer)

  • -
  • verbose – whether or not to display information in the standard output

  • -
-
-
-
- -
-
-

quapy.classification.methods module

-
-
-class quapy.classification.methods.LowRankLogisticRegression(n_components=100, **kwargs)[source]
-

Bases: BaseEstimator

-

An example of a classification method (i.e., an object that implements fit, predict, and predict_proba) -that also generates embedded inputs (i.e., that implements transform), as those required for -quapy.method.neural.QuaNet. This is a mock method to allow for easily instantiating -quapy.method.neural.QuaNet on array-like real-valued instances. -The transformation consists of applying sklearn.decomposition.TruncatedSVD -while classification is performed using sklearn.linear_model.LogisticRegression on the low-rank space.

-
-
Parameters:
-
    -
  • n_components – the number of principal components to retain

  • -
  • kwargs – parameters for the -Logistic Regression classifier

  • -
-
-
-
-
-fit(X, y)[source]
-

Fit the model according to the given training data. The fit consists of -fitting TruncatedSVD and then LogisticRegression on the low-rank representation.

-
-
Parameters:
-
    -
  • X – array-like of shape (n_samples, n_features) with the instances

  • -
  • y – array-like of shape (n_samples, n_classes) with the class labels

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-get_params()[source]
-

Get hyper-parameters for this estimator.

-
-
Returns:
-

a dictionary with parameter names mapped to their values

-
-
-
- -
-
-predict(X)[source]
-

Predicts labels for the instances X embedded into the low-rank space.

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) instances to classify

-
-
Returns:
-

a numpy array of length n containing the label predictions, where n is the number of -instances in X

-
-
-
- -
-
-predict_proba(X)[source]
-

Predicts posterior probabilities for the instances X embedded into the low-rank space.

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) instances to classify

-
-
Returns:
-

array-like of shape (n_samples, n_classes) with the posterior probabilities

-
-
-
- -
-
-set_params(**params)[source]
-

Set the parameters of this estimator.

-
-
Parameters:
-

parameters – a **kwargs dictionary with the estimator parameters for -Logistic Regression -and eventually also n_components for TruncatedSVD

-
-
-
- -
-
-transform(X)[source]
-

Returns the low-rank approximation of X with n_components dimensions, or X unaltered if -n_components >= X.shape[1].

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) instances to embed

-
-
Returns:
-

array-like of shape (n_samples, n_components) with the embedded instances

-
-
-
- -
- -
-
-

quapy.classification.neural module

-
-
-class quapy.classification.neural.CNNnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5)[source]
-

Bases: TextClassifierNet

-

An implementation of quapy.classification.neural.TextClassifierNet based on -Convolutional Neural Networks.

-
-
Parameters:
-
    -
  • vocabulary_size – the size of the vocabulary

  • -
  • n_classes – number of target classes

  • -
  • embedding_size – the dimensionality of the word embeddings space (default 100)

  • -
  • hidden_size – the dimensionality of the hidden space (default 256)

  • -
  • repr_size – the dimensionality of the document embeddings space (default 100)

  • -
  • kernel_heights – list of kernel lengths (default [3,5,7]), i.e., the number of -consecutive tokens that each kernel covers

  • -
  • stride – convolutional stride (default 1)

  • -
  • stride – convolutional pad (default 0)

  • -
  • drop_p – drop probability for dropout (default 0.5)

  • -
-
-
-
-
-document_embedding(input)[source]
-

Embeds documents (i.e., performs the forward pass up to the -next-to-last layer).

-
-
Parameters:
-

input – a batch of instances, typically generated by a torch’s DataLoader -instance (see quapy.classification.neural.TorchDataset)

-
-
Returns:
-

a torch tensor of shape (n_samples, n_dimensions), where -n_samples is the number of documents, and n_dimensions is the -dimensionality of the embedding

-
-
-
- -
-
-get_params()[source]
-

Get hyper-parameters for this estimator

-
-
Returns:
-

a dictionary with parameter names mapped to their values

-
-
-
- -
-
-training: bool
-
- -
-
-property vocabulary_size
-

Return the size of the vocabulary

-
-
Returns:
-

integer

-
-
-
- -
- -
-
-class quapy.classification.neural.LSTMnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, drop_p=0.5)[source]
-

Bases: TextClassifierNet

-

An implementation of quapy.classification.neural.TextClassifierNet based on -Long Short Term Memory networks.

-
-
Parameters:
-
    -
  • vocabulary_size – the size of the vocabulary

  • -
  • n_classes – number of target classes

  • -
  • embedding_size – the dimensionality of the word embeddings space (default 100)

  • -
  • hidden_size – the dimensionality of the hidden space (default 256)

  • -
  • repr_size – the dimensionality of the document embeddings space (default 100)

  • -
  • lstm_class_nlayers – number of LSTM layers (default 1)

  • -
  • drop_p – drop probability for dropout (default 0.5)

  • -
-
-
-
-
-document_embedding(x)[source]
-

Embeds documents (i.e., performs the forward pass up to the -next-to-last layer).

-
-
Parameters:
-

x – a batch of instances, typically generated by a torch’s DataLoader -instance (see quapy.classification.neural.TorchDataset)

-
-
Returns:
-

a torch tensor of shape (n_samples, n_dimensions), where -n_samples is the number of documents, and n_dimensions is the -dimensionality of the embedding

-
-
-
- -
-
-get_params()[source]
-

Get hyper-parameters for this estimator

-
-
Returns:
-

a dictionary with parameter names mapped to their values

-
-
-
- -
-
-training: bool
-
- -
-
-property vocabulary_size
-

Return the size of the vocabulary

-
-
Returns:
-

integer

-
-
-
- -
- -
-
-class quapy.classification.neural.NeuralClassifierTrainer(net: TextClassifierNet, lr=0.001, weight_decay=0, patience=10, epochs=200, batch_size=64, batch_size_test=512, padding_length=300, device='cuda', checkpointpath='../checkpoint/classifier_net.dat')[source]
-

Bases: object

-

Trains a neural network for text classification.

-
-
Parameters:
-
    -
  • net – an instance of TextClassifierNet implementing the forward pass

  • -
  • lr – learning rate (default 1e-3)

  • -
  • weight_decay – weight decay (default 0)

  • -
  • patience – number of epochs that do not show any improvement in validation -to wait before applying early stop (default 10)

  • -
  • epochs – maximum number of training epochs (default 200)

  • -
  • batch_size – batch size for training (default 64)

  • -
  • batch_size_test – batch size for test (default 512)

  • -
  • padding_length – maximum number of tokens to consider in a document (default 300)

  • -
  • device – specify ‘cpu’ (default) or ‘cuda’ for enabling gpu

  • -
  • checkpointpath – where to store the parameters of the best model found so far -according to the evaluation in the held-out validation split (default ‘../checkpoint/classifier_net.dat’)

  • -
-
-
-
-
-property device
-

Gets the device in which the network is allocated

-
-
Returns:
-

device

-
-
-
- -
-
-fit(instances, labels, val_split=0.3)[source]
-

Fits the model according to the given training data.

-
-
Parameters:
-
    -
  • instances – list of lists of indexed tokens

  • -
  • labels – array-like of shape (n_samples, n_classes) with the class labels

  • -
  • val_split – proportion of training documents to be taken as the validation set (default 0.3)

  • -
-
-
Returns:
-

-
-
-
- -
-
-get_params()[source]
-

Get hyper-parameters for this estimator

-
-
Returns:
-

a dictionary with parameter names mapped to their values

-
-
-
- -
-
-predict(instances)[source]
-

Predicts labels for the instances

-
-
Parameters:
-

instances – list of lists of indexed tokens

-
-
Returns:
-

a numpy array of length n containing the label predictions, where n is the number of -instances in X

-
-
-
- -
-
-predict_proba(instances)[source]
-

Predicts posterior probabilities for the instances

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) instances to classify

-
-
Returns:
-

array-like of shape (n_samples, n_classes) with the posterior probabilities

-
-
-
- -
-
-reset_net_params(vocab_size, n_classes)[source]
-

Reinitialize the network parameters

-
-
Parameters:
-
    -
  • vocab_size – the size of the vocabulary

  • -
  • n_classes – the number of target classes

  • -
-
-
-
- -
-
-set_params(**params)[source]
-

Set the parameters of this trainer and the learner it is training. -In this current version, parameter names for the trainer and learner should -be disjoint.

-
-
Parameters:
-

params – a **kwargs dictionary with the parameters

-
-
-
- -
-
-transform(instances)[source]
-

Returns the embeddings of the instances

-
-
Parameters:
-

instances – list of lists of indexed tokens

-
-
Returns:
-

array-like of shape (n_samples, embed_size) with the embedded instances, -where embed_size is defined by the classification network

-
-
-
- -
- -
-
-class quapy.classification.neural.TextClassifierNet(*args, **kwargs)[source]
-

Bases: Module

-

Abstract Text classifier (torch.nn.Module)

-
-
-dimensions()[source]
-

Gets the number of dimensions of the embedding space

-
-
Returns:
-

integer

-
-
-
- -
-
-abstract document_embedding(x)[source]
-

Embeds documents (i.e., performs the forward pass up to the -next-to-last layer).

-
-
Parameters:
-

x – a batch of instances, typically generated by a torch’s DataLoader -instance (see quapy.classification.neural.TorchDataset)

-
-
Returns:
-

a torch tensor of shape (n_samples, n_dimensions), where -n_samples is the number of documents, and n_dimensions is the -dimensionality of the embedding

-
-
-
- -
-
-forward(x)[source]
-

Performs the forward pass.

-
-
Parameters:
-

x – a batch of instances, typically generated by a torch’s DataLoader -instance (see quapy.classification.neural.TorchDataset)

-
-
Returns:
-

a tensor of shape (n_instances, n_classes) with the decision scores -for each of the instances and classes

-
-
-
- -
-
-abstract get_params()[source]
-

Get hyper-parameters for this estimator

-
-
Returns:
-

a dictionary with parameter names mapped to their values

-
-
-
- -
-
-predict_proba(x)[source]
-

Predicts posterior probabilities for the instances in x

-
-
Parameters:
-

x – a torch tensor of indexed tokens with shape (n_instances, pad_length) -where n_instances is the number of instances in the batch, and pad_length -is length of the pad in the batch

-
-
Returns:
-

array-like of shape (n_samples, n_classes) with the posterior probabilities

-
-
-
- -
-
-training: bool
-
- -
-
-property vocabulary_size
-

Return the size of the vocabulary

-
-
Returns:
-

integer

-
-
-
- -
-
-xavier_uniform()[source]
-

Performs Xavier initialization of the network parameters

-
- -
- -
-
-class quapy.classification.neural.TorchDataset(instances, labels=None)[source]
-

Bases: Dataset

-

Transforms labelled instances into a Torch’s torch.utils.data.DataLoader object

-
-
Parameters:
-
    -
  • instances – list of lists of indexed tokens

  • -
  • labels – array-like of shape (n_samples, n_classes) with the class labels

  • -
-
-
-
-
-asDataloader(batch_size, shuffle, pad_length, device)[source]
-

Converts the labelled collection into a Torch DataLoader with dynamic padding for -the batch

-
-
Parameters:
-
    -
  • batch_size – batch size

  • -
  • shuffle – whether or not to shuffle instances

  • -
  • pad_length – the maximum length for the list of tokens (dynamic padding is -applied, meaning that if the longest document in the batch is shorter than -pad_length, then the batch is padded up to its length, and not to pad_length.

  • -
  • device – whether to allocate tensors in cpu or in cuda

  • -
-
-
Returns:
-

a torch.utils.data.DataLoader object

-
-
-
- -
- -
-
-

quapy.classification.svmperf module

-
-
-class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None)[source]
-

Bases: BaseEstimator, ClassifierMixin

-

A wrapper for the SVM-perf package by Thorsten Joachims. -When using losses for quantification, the source code has to be patched. See -the installation documentation -for further details.

-

References

- -
-
Parameters:
-
    -
  • svmperf_base – path to directory containing the binary files svm_perf_learn and svm_perf_classify

  • -
  • C – trade-off between training error and margin (default 0.01)

  • -
  • verbose – set to True to print svm-perf std outputs

  • -
  • loss – the loss to optimize for. Available losses are “01”, “f1”, “kld”, “nkld”, “q”, “qacc”, “qf1”, “qgm”, “mae”, “mrae”.

  • -
  • host_folder – directory where to store the trained model; set to None (default) for using a tmp directory -(temporal directories are automatically deleted)

  • -
-
-
-
-
-decision_function(X, y=None)[source]
-

Evaluate the decision function for the samples in X.

-
-
Parameters:
-
    -
  • X – array-like of shape (n_samples, n_features) containing the instances to classify

  • -
  • y – unused

  • -
-
-
Returns:
-

array-like of shape (n_samples,) containing the decision scores of the instances

-
-
-
- -
-
-fit(X, y)[source]
-

Trains the SVM for the multivariate performance loss

-
-
Parameters:
-
    -
  • X – training instances

  • -
  • y – a binary vector of labels

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-predict(X)[source]
-

Predicts labels for the instances X

-
-
Parameters:
-

X – array-like of shape (n_samples, n_features) instances to classify

-
-
Returns:
-

a numpy array of length n containing the label predictions, where n is the number of -instances in X

-
-
-
- -
-
-valid_losses = {'01': 0, 'f1': 1, 'kld': 12, 'mae': 26, 'mrae': 27, 'nkld': 13, 'q': 22, 'qacc': 23, 'qf1': 24, 'qgm': 25}
-
- -
- -
-
-

Module contents

-
-
- - -
-
- -
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/quapy.data.html b/docs/build/html/quapy.data.html deleted file mode 100644 index 1d745a2..0000000 --- a/docs/build/html/quapy.data.html +++ /dev/null @@ -1,1222 +0,0 @@ - - - - - - - quapy.data package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
- -
-
-
-
- -
-

quapy.data package

-
-

Submodules

-
-
-

quapy.data.base module

-
-
-class quapy.data.base.Dataset(training: LabelledCollection, test: LabelledCollection, vocabulary: Optional[dict] = None, name='')[source]
-

Bases: object

-

Abstraction of training and test LabelledCollection objects.

-
-
Parameters:
-
    -
  • training – a LabelledCollection instance

  • -
  • test – a LabelledCollection instance

  • -
  • vocabulary – if indicated, is a dictionary of the terms used in this textual dataset

  • -
  • name – a string representing the name of the dataset

  • -
-
-
-
-
-classmethod SplitStratified(collection: LabelledCollection, train_size=0.6)[source]
-

Generates a Dataset from a stratified split of a LabelledCollection instance. -See LabelledCollection.split_stratified()

-
-
Parameters:
-
    -
  • collectionLabelledCollection

  • -
  • train_size – the proportion of training documents (the rest conforms the test split)

  • -
-
-
Returns:
-

an instance of Dataset

-
-
-
- -
-
-property binary
-

Returns True if the training collection is labelled according to two classes

-
-
Returns:
-

boolean

-
-
-
- -
-
-property classes_
-

The classes according to which the training collection is labelled

-
-
Returns:
-

The classes according to which the training collection is labelled

-
-
-
- -
-
-classmethod kFCV(data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0)[source]
-

Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around -LabelledCollection.kFCV() that returns Dataset instances made of training and test folds.

-
-
Parameters:
-
    -
  • nfolds – integer (default 5), the number of folds to generate

  • -
  • nrepeats – integer (default 1), the number of rounds of k-fold cross validation to run

  • -
  • random_state – integer (default 0), guarantees that the folds generated are reproducible

  • -
-
-
Returns:
-

yields nfolds * nrepeats folds for k-fold cross validation as instances of Dataset

-
-
-
- -
-
-classmethod load(train_path, test_path, loader_func: callable, classes=None, **loader_kwargs)[source]
-

Loads a training and a test labelled set of data and convert it into a Dataset instance. -The function in charge of reading the instances must be specified. This function can be a custom one, or any of -the reading functions defined in quapy.data.reader module.

-
-
Parameters:
-
    -
  • train_path – string, the path to the file containing the training instances

  • -
  • test_path – string, the path to the file containing the test instances

  • -
  • loader_func – a custom function that implements the data loader and returns a tuple with instances and -labels

  • -
  • classes – array-like, the classes according to which the instances are labelled

  • -
  • loader_kwargs – any argument that the loader_func function needs in order to read the instances. -See LabelledCollection.load() for further details.

  • -
-
-
Returns:
-

a Dataset object

-
-
-
- -
-
-property n_classes
-

The number of classes according to which the training collection is labelled

-
-
Returns:
-

integer

-
-
-
- -
-
-reduce(n_train=100, n_test=100, random_state=None)[source]
-

Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.

-
-
Parameters:
-
    -
  • n_train – number of training documents to keep (default 100)

  • -
  • n_test – number of test documents to keep (default 100)

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-stats(show=True)[source]
-

Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:

-
>>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
->>> data.stats()
->>> Dataset=kindle #tr-instances=3821, #te-instances=21591, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]
-
-
-
-
Parameters:
-

show – if set to True (default), prints the stats in standard output

-
-
Returns:
-

a dictionary containing some stats of this collection for the training and test collections. The keys -are train and test, and point to dedicated dictionaries of stats, for each collection, with keys -#instances (the number of instances), type (the type representing the instances), -#features (the number of features, if the instances are in array-like format), #classes (the classes of -the collection), prevs (the prevalence values for each class)

-
-
-
- -
-
-property train_test
-

Alias to self.training and self.test

-
-
Returns:
-

the training and test collections

-
-
Returns:
-

the training and test collections

-
-
-
- -
-
-property vocabulary_size
-

If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary

-
-
Returns:
-

integer

-
-
-
- -
- -
-
-class quapy.data.base.LabelledCollection(instances, labels, classes=None)[source]
-

Bases: object

-

A LabelledCollection is a set of objects each with a label attached to each of them. -This class implements several sampling routines and other utilities.

-
-
Parameters:
-
    -
  • instances – array-like (np.ndarray, list, or csr_matrix are supported)

  • -
  • labels – array-like with the same length of instances

  • -
  • classes – optional, list of classes from which labels are taken. If not specified, the classes are inferred -from the labels. The classes must be indicated in cases in which some of the labels might have no examples -(i.e., a prevalence of 0)

  • -
-
-
-
-
-property X
-

An alias to self.instances

-
-
Returns:
-

self.instances

-
-
-
- -
-
-property Xp
-

Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from -a LabelledCollection object.

-
-
Returns:
-

a tuple (instances, prevalence) from this collection

-
-
-
- -
-
-property Xy
-

Gets the instances and labels. This is useful when working with sklearn estimators, e.g.:

-
>>> svm = LinearSVC().fit(*my_collection.Xy)
-
-
-
-
Returns:
-

a tuple (instances, labels) from this collection

-
-
-
- -
-
-property binary
-

Returns True if the number of classes is 2

-
-
Returns:
-

boolean

-
-
-
- -
-
-counts()[source]
-

Returns the number of instances for each of the classes in the codeframe.

-
-
Returns:
-

a np.ndarray of shape (n_classes) with the number of instances of each class, in the same order -as listed by self.classes_

-
-
-
- -
-
-classmethod join(*args: Iterable[LabelledCollection])[source]
-

Returns a new LabelledCollection as the union of the collections given in input.

-
-
Parameters:
-

args – instances of LabelledCollection

-
-
Returns:
-

a LabelledCollection representing the union of both collections

-
-
-
- -
-
-kFCV(nfolds=5, nrepeats=1, random_state=None)[source]
-

Generator of stratified folds to be used in k-fold cross validation.

-
-
Parameters:
-
    -
  • nfolds – integer (default 5), the number of folds to generate

  • -
  • nrepeats – integer (default 1), the number of rounds of k-fold cross validation to run

  • -
  • random_state – integer (default 0), guarantees that the folds generated are reproducible

  • -
-
-
Returns:
-

yields nfolds * nrepeats folds for k-fold cross validation

-
-
-
- -
-
-classmethod load(path: str, loader_func: callable, classes=None, **loader_kwargs)[source]
-

Loads a labelled set of data and convert it into a LabelledCollection instance. The function in charge -of reading the instances must be specified. This function can be a custom one, or any of the reading functions -defined in quapy.data.reader module.

-
-
Parameters:
-
    -
  • path – string, the path to the file containing the labelled instances

  • -
  • loader_func – a custom function that implements the data loader and returns a tuple with instances and -labels

  • -
  • classes – array-like, the classes according to which the instances are labelled

  • -
  • loader_kwargs – any argument that the loader_func function needs in order to read the instances, i.e., -these arguments are used to call loader_func(path, **loader_kwargs)

  • -
-
-
Returns:
-

a LabelledCollection object

-
-
-
- -
-
-property n_classes
-

The number of classes

-
-
Returns:
-

integer

-
-
-
- -
-
-property p
-

An alias to self.prevalence()

-
-
Returns:
-

self.prevalence()

-
-
-
- -
-
-prevalence()[source]
-

Returns the prevalence, or relative frequency, of the classes in the codeframe.

-
-
Returns:
-

a np.ndarray of shape (n_classes) with the relative frequencies of each class, in the same order -as listed by self.classes_

-
-
-
- -
-
-sampling(size, *prevs, shuffle=True, random_state=None)[source]
-

Return a random sample (an instance of LabelledCollection) of desired size and desired prevalence -values. For each class, the sampling is drawn with replacement.

-
-
Parameters:
-
    -
  • size – integer, the requested size

  • -
  • prevs – the prevalence for each class; the prevalence value for the last class can be lead empty since -it is constrained. E.g., for binary collections, only the prevalence p for the first class (as listed in -self.classes_ can be specified, while the other class takes prevalence value 1-p

  • -
  • shuffle – if set to True (default), shuffles the index before returning it

  • -
  • random_state – seed for reproducing sampling

  • -
-
-
Returns:
-

an instance of LabelledCollection with length == size and prevalence close to prevs (or -prevalence == prevs if the exact prevalence values can be met as proportions of instances)

-
-
-
- -
-
-sampling_from_index(index)[source]
-

Returns an instance of LabelledCollection whose elements are sampled from this collection using the -index.

-
-
Parameters:
-

index – np.ndarray

-
-
Returns:
-

an instance of LabelledCollection

-
-
-
- -
-
-sampling_index(size, *prevs, shuffle=True, random_state=None)[source]
-

Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the -prevalence values are not specified, then returns the index of a uniform sampling. -For each class, the sampling is drawn with replacement.

-
-
Parameters:
-
    -
  • size – integer, the requested size

  • -
  • prevs – the prevalence for each class; the prevalence value for the last class can be lead empty since -it is constrained. E.g., for binary collections, only the prevalence p for the first class (as listed in -self.classes_ can be specified, while the other class takes prevalence value 1-p

  • -
  • shuffle – if set to True (default), shuffles the index before returning it

  • -
  • random_state – seed for reproducing sampling

  • -
-
-
Returns:
-

a np.ndarray of shape (size) with the indexes

-
-
-
- -
-
-split_random(train_prop=0.6, random_state=None)[source]
-

Returns two instances of LabelledCollection split randomly from this collection, at desired -proportion.

-
-
Parameters:
-
    -
  • train_prop – the proportion of elements to include in the left-most returned collection (typically used -as the training collection). The rest of elements are included in the right-most returned collection -(typically used as a test collection).

  • -
  • random_state – if specified, guarantees reproducibility of the split.

  • -
-
-
Returns:
-

two instances of LabelledCollection, the first one with train_prop elements, and the -second one with 1-train_prop elements

-
-
-
- -
-
-split_stratified(train_prop=0.6, random_state=None)[source]
-

Returns two instances of LabelledCollection split with stratification from this collection, at desired -proportion.

-
-
Parameters:
-
    -
  • train_prop – the proportion of elements to include in the left-most returned collection (typically used -as the training collection). The rest of elements are included in the right-most returned collection -(typically used as a test collection).

  • -
  • random_state – if specified, guarantees reproducibility of the split.

  • -
-
-
Returns:
-

two instances of LabelledCollection, the first one with train_prop elements, and the -second one with 1-train_prop elements

-
-
-
- -
-
-stats(show=True)[source]
-

Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:

-
>>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
->>> data.training.stats()
->>> #instances=3821, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]
-
-
-
-
Parameters:
-

show – if set to True (default), prints the stats in standard output

-
-
Returns:
-

a dictionary containing some stats of this collection. Keys include #instances (the number of -instances), type (the type representing the instances), #features (the number of features, if the -instances are in array-like format), #classes (the classes of the collection), prevs (the prevalence -values for each class)

-
-
-
- -
-
-uniform_sampling(size, random_state=None)[source]
-

Returns a uniform sample (an instance of LabelledCollection) of desired size. The sampling is drawn -with replacement.

-
-
Parameters:
-
    -
  • size – integer, the requested size

  • -
  • random_state – if specified, guarantees reproducibility of the split.

  • -
-
-
Returns:
-

an instance of LabelledCollection with length == size

-
-
-
- -
-
-uniform_sampling_index(size, random_state=None)[source]
-

Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn -with replacement.

-
-
Parameters:
-
    -
  • size – integer, the size of the uniform sample

  • -
  • random_state – if specified, guarantees reproducibility of the split.

  • -
-
-
Returns:
-

a np.ndarray of shape (size) with the indexes

-
-
-
- -
-
-property y
-

An alias to self.labels

-
-
Returns:
-

self.labels

-
-
-
- -
- -
-
-

quapy.data.datasets module

-
-
-quapy.data.datasets.fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None)[source]
-

Loads the IFCB dataset for quantification from Zenodo (for more -information on this dataset, please follow the zenodo link). -This dataset is based on the data available publicly at -WHOI-Plankton repo. -The dataset already comes with processed features. -The scripts used for the processing are available at P. González’s repo.

-

The datasets are downloaded only once, and stored for fast reuse.

-
-
Parameters:
-
    -
  • single_sample_train – a boolean. If true, it will return the train dataset as a -quapy.data.base.LabelledCollection (all examples together). -If false, a generator of training samples will be returned. Each example in the training set has an individual label.

  • -
  • for_model_selection – if True, then returns a split 30% of the training set (86 out of 286 samples) to be used for model selection; -if False, then returns the full training set as training set and the test set as the test set

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
-
-
Returns:
-

a tuple (train, test_gen) where train is an instance of -quapy.data.base.LabelledCollection, if single_sample_train is true or -quapy.data._ifcb.IFCBTrainSamplesFromDir, i.e. a sampling protocol that returns a series of samples -labelled example by example. test_gen will be a quapy.data._ifcb.IFCBTestSamples, -i.e., a sampling protocol that returns a series of samples labelled by prevalence.

-
-
-
- -
-
-quapy.data.datasets.fetch_UCIBinaryDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) Dataset[source]
-

Loads a UCI dataset as an instance of quapy.data.base.Dataset, as used in -Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). -Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. -Information Fusion, 34, 87-100. -and -Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019). -Dynamic ensemble selection for quantification tasks. -Information Fusion, 45, 1-15.. -The datasets do not come with a predefined train-test split (see fetch_UCILabelledCollection() for further -information on how to use these collections), and so a train-test split is generated at desired proportion. -The list of valid dataset names can be accessed in quapy.data.datasets.UCI_DATASETS

-
-
Parameters:
-
    -
  • dataset_name – a dataset name

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
  • test_split – proportion of documents to be included in the test set. The rest conforms the training set

  • -
  • verbose – set to True (default is False) to get information (from the UCI ML repository) about the datasets

  • -
-
-
Returns:
-

a quapy.data.base.Dataset instance

-
-
-
- -
-
-quapy.data.datasets.fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=False) LabelledCollection[source]
-

Loads a UCI collection as an instance of quapy.data.base.LabelledCollection, as used in -Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). -Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. -Information Fusion, 34, 87-100. -and -Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019). -Dynamic ensemble selection for quantification tasks. -Information Fusion, 45, 1-15.. -The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation -protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation. -This can be reproduced by using quapy.data.base.Dataset.kFCV(), e.g.:

-
>>> import quapy as qp
->>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast")
->>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
->>>     ...
-
-
-

The list of valid dataset names can be accessed in quapy.data.datasets.UCI_DATASETS

-
-
Parameters:
-
    -
  • dataset_name – a dataset name

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
  • test_split – proportion of documents to be included in the test set. The rest conforms the training set

  • -
  • verbose – set to True (default is False) to get information (from the UCI ML repository) about the datasets

  • -
-
-
Returns:
-

a quapy.data.base.LabelledCollection instance

-
-
-
- -
-
-quapy.data.datasets.fetch_UCIMulticlassDataset(dataset_name, data_home=None, min_test_split=0.3, max_train_instances=25000, min_class_support=100, verbose=False) Dataset[source]
-

Loads a UCI multiclass dataset as an instance of quapy.data.base.Dataset.

-

The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria: -- It has more than 1000 instances -- It is suited for classification -- It has more than two classes -- It is available for Python import (requires ucimlrepo package)

-
>>> import quapy as qp
->>> dataset = qp.datasets.fetch_UCIMulticlassDataset("dry-bean")
->>> train, test = dataset.train_test
->>>     ...
-
-
-

The list of valid dataset names can be accessed in quapy.data.datasets.UCI_MULTICLASS_DATASETS

-

The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.

-
-
Parameters:
-
    -
  • dataset_name – a dataset name

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
  • min_test_split – minimum proportion of instances to be included in the test set. This value is interpreted -as a minimum proportion, meaning that the real proportion could be higher in case the training proportion -(1-min_test_split`% of the instances) surpasses `max_train_instances. In such case, only max_train_instances -are taken for training, and the rest (irrespective of min_test_split) is taken for test.

  • -
  • max_train_instances – maximum number of instances to keep for training (defaults to 25000)

  • -
  • min_class_support – minimum number of istances per class. Classes with fewer instances -are discarded (deafult is 100)

  • -
  • verbose – set to True (default is False) to get information (stats) about the dataset

  • -
-
-
Returns:
-

a quapy.data.base.Dataset instance

-
-
-
- -
-
-quapy.data.datasets.fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_class_support=100, verbose=False) LabelledCollection[source]
-

Loads a UCI multiclass collection as an instance of quapy.data.base.LabelledCollection.

-

The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria: -- It has more than 1000 instances -- It is suited for classification -- It has more than two classes -- It is available for Python import (requires ucimlrepo package)

-
>>> import quapy as qp
->>> collection = qp.datasets.fetch_UCIMulticlassLabelledCollection("dry-bean")
->>> X, y = collection.Xy
->>>     ...
-
-
-

The list of valid dataset names can be accessed in quapy.data.datasets.UCI_MULTICLASS_DATASETS

-

The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.

-
-
Parameters:
-
    -
  • dataset_name – a dataset name

  • -
  • data_home – specify the quapy home directory where the dataset will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
  • test_split – proportion of instances to be included in the test set. The rest conforms the training set

  • -
  • min_class_support – minimum number of istances per class. Classes with fewer instances -are discarded (deafult is 100)

  • -
  • verbose – set to True (default is False) to get information (stats) about the dataset

  • -
-
-
Returns:
-

a quapy.data.base.LabelledCollection instance

-
-
-
- -
-
-quapy.data.datasets.fetch_lequa2022(task, data_home=None)[source]
-

Loads the official datasets provided for the LeQua competition. -In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification -problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead. -Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification -problems consisting of estimating the class prevalence values of 28 different merchandise products. -We refer to the Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022). -A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify. for a detailed description -on the tasks and datasets.

-

The datasets are downloaded only once, and stored for fast reuse.

-

See 4.lequa2022_experiments.py provided in the example folder, that can serve as a guide on how to use these -datasets.

-
-
Parameters:
-
    -
  • task – a string representing the task name; valid ones are T1A, T1B, T2A, and T2B

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
-
-
Returns:
-

a tuple (train, val_gen, test_gen) where train is an instance of -quapy.data.base.LabelledCollection, val_gen and test_gen are instances of -quapy.data._lequa2022.SamplesFromDir, a subclass of quapy.protocol.AbstractProtocol, -that return a series of samples stored in a directory which are labelled by prevalence.

-
-
-
- -
-
-quapy.data.datasets.fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) Dataset[source]
-

Loads a Reviews dataset as a Dataset instance, as used in -Esuli, A., Moreo, A., and Sebastiani, F. “A recurrent neural network for sentiment quantification.” -Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.. -The list of valid dataset names can be accessed in quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS

-
-
Parameters:
-
    -
  • dataset_name – the name of the dataset: valid ones are ‘hp’, ‘kindle’, ‘imdb’

  • -
  • tfidf – set to True to transform the raw documents into tfidf weighted matrices

  • -
  • min_df – minimun number of documents that should contain a term in order for the term to be -kept (ignored if tfidf==False)

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
  • pickle – set to True to pickle the Dataset object the first time it is generated, in order to allow for -faster subsequent invokations

  • -
-
-
Returns:
-

a quapy.data.base.Dataset instance

-
-
-
- -
-
-quapy.data.datasets.fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False) Dataset[source]
-

Loads a Twitter dataset as a quapy.data.base.Dataset instance, as used in: -Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis. -Social Network Analysis and Mining6(19), 1–22 (2016) -Note that the datasets ‘semeval13’, ‘semeval14’, ‘semeval15’ share the same training set. -The list of valid dataset names corresponding to training sets can be accessed in -quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN, while the test sets can be accessed in -quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST

-
-
Parameters:
-
    -
  • dataset_name – the name of the dataset: valid ones are ‘gasp’, ‘hcr’, ‘omd’, ‘sanders’, ‘semeval13’, -‘semeval14’, ‘semeval15’, ‘semeval16’, ‘sst’, ‘wa’, ‘wb’

  • -
  • for_model_selection – if True, then returns the train split as the training set and the devel split -as the test set; if False, then returns the train+devel split as the training set and the test set as the -test set

  • -
  • min_df – minimun number of documents that should contain a term in order for the term to be kept

  • -
  • data_home – specify the quapy home directory where collections will be dumped (leave empty to use the default -~/quay_data/ directory)

  • -
  • pickle – set to True to pickle the Dataset object the first time it is generated, in order to allow for -faster subsequent invokations

  • -
-
-
Returns:
-

a quapy.data.base.Dataset instance

-
-
-
- -
-
-quapy.data.datasets.warn(*args, **kwargs)[source]
-
- -
-
-

quapy.data.preprocessing module

-
-
-class quapy.data.preprocessing.IndexTransformer(**kwargs)[source]
-

Bases: object

-

This class implements a sklearn’s-style transformer that indexes text as numerical ids for the tokens it -contains, and that would be generated by sklearn’s -CountVectorizer

-
-
Parameters:
-

kwargs

keyworded arguments from -CountVectorizer

-

-
-
-
-
-add_word(word, id=None, nogaps=True)[source]
-

Adds a new token (regardless of whether it has been found in the text or not), with dedicated id. -Useful to define special tokens for codifying unknown words, or padding tokens.

-
-
Parameters:
-
    -
  • word – string, surface form of the token

  • -
  • id – integer, numerical value to assign to the token (leave as None for indicating the next valid id, -default)

  • -
  • nogaps – if set to True (default) asserts that the id indicated leads to no numerical gaps with -precedent ids stored so far

  • -
-
-
Returns:
-

integer, the numerical id for the new token

-
-
-
- -
-
-fit(X)[source]
-

Fits the transformer, i.e., decides on the vocabulary, given a list of strings.

-
-
Parameters:
-

X – a list of strings

-
-
Returns:
-

self

-
-
-
- -
-
-fit_transform(X, n_jobs=None)[source]
-

Fits the transform on X and transforms it.

-
-
Parameters:
-
    -
  • X – a list of strings

  • -
  • n_jobs – the number of parallel workers to carry out this task

  • -
-
-
Returns:
-

a np.ndarray of numerical ids

-
-
-
- -
-
-transform(X, n_jobs=None)[source]
-

Transforms the strings in X as lists of numerical ids

-
-
Parameters:
-
    -
  • X – a list of strings

  • -
  • n_jobs – the number of parallel workers to carry out this task

  • -
-
-
Returns:
-

a np.ndarray of numerical ids

-
-
-
- -
-
-vocabulary_size()[source]
-

Gets the length of the vocabulary according to which the document tokens have been indexed

-
-
Returns:
-

integer

-
-
-
- -
- -
-
-quapy.data.preprocessing.index(dataset: Dataset, min_df=5, inplace=False, **kwargs)[source]
-

Indexes the tokens of a textual quapy.data.base.Dataset of string documents. -To index a document means to replace each different token by a unique numerical index. -Rare words (i.e., words occurring less than min_df times) are replaced by a special token UNK

-
-
Parameters:
-
    -
  • dataset – a quapy.data.base.Dataset object where the instances of training and test documents -are lists of str

  • -
  • min_df – minimum number of occurrences below which the term is replaced by a UNK index

  • -
  • inplace – whether or not to apply the transformation inplace (True), or to a new copy (False, default)

  • -
  • kwargs – the rest of parameters of the transformation (as for sklearn’s -CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>_)

  • -
-
-
Returns:
-

a new quapy.data.base.Dataset (if inplace=False) or a reference to the current -quapy.data.base.Dataset (inplace=True) consisting of lists of integer values representing indices.

-
-
-
- -
-
-quapy.data.preprocessing.reduce_columns(dataset: Dataset, min_df=5, inplace=False)[source]
-

Reduces the dimensionality of the instances, represented as a csr_matrix (or any subtype of -scipy.sparse.spmatrix), of training and test documents by removing the columns of words which are not present -in at least min_df instances in the training set

-
-
Parameters:
-
    -
  • dataset – a quapy.data.base.Dataset in which instances are represented in sparse format (any -subtype of scipy.sparse.spmatrix)

  • -
  • min_df – integer, minimum number of instances below which the columns are removed

  • -
  • inplace – whether or not to apply the transformation inplace (True), or to a new copy (False, default)

  • -
-
-
Returns:
-

a new quapy.data.base.Dataset (if inplace=False) or a reference to the current -quapy.data.base.Dataset (inplace=True) where the dimensions corresponding to infrequent terms -in the training set have been removed

-
-
-
- -
-
-quapy.data.preprocessing.standardize(dataset: Dataset, inplace=False)[source]
-

Standardizes the real-valued columns of a quapy.data.base.Dataset. -Standardization, aka z-scoring, of a variable X comes down to subtracting the average and normalizing by the -standard deviation.

-
-
Parameters:
-
-
-
Returns:
-

an instance of quapy.data.base.Dataset

-
-
-
- -
-
-quapy.data.preprocessing.text2tfidf(dataset: Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs)[source]
-

Transforms a quapy.data.base.Dataset of textual instances into a quapy.data.base.Dataset of -tfidf weighted sparse vectors

-
-
Parameters:
-
    -
  • dataset – a quapy.data.base.Dataset where the instances of training and test collections are -lists of str

  • -
  • min_df – minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)

  • -
  • sublinear_tf – whether or not to apply the log scalling to the tf counters (default True)

  • -
  • inplace – whether or not to apply the transformation inplace (True), or to a new copy (False, default)

  • -
  • kwargs – the rest of parameters of the transformation (as for sklearn’s -TfidfVectorizer)

  • -
-
-
Returns:
-

a new quapy.data.base.Dataset in csr_matrix format (if inplace=False) or a reference to the -current Dataset (if inplace=True) where the instances are stored in a csr_matrix of real-valued tfidf scores

-
-
-
- -
-
-

quapy.data.reader module

-
-
-quapy.data.reader.binarize(y, pos_class)[source]
-

Binarizes a categorical array-like collection of labels towards the positive class pos_class. E.g.,:

-
>>> binarize([1, 2, 3, 1, 1, 0], pos_class=2)
->>> array([0, 1, 0, 0, 0, 0])
-
-
-
-
Parameters:
-
    -
  • y – array-like of labels

  • -
  • pos_class – integer, the positive class

  • -
-
-
Returns:
-

a binary np.ndarray, in which values 1 corresponds to positions in whcih y had pos_class labels, and -0 otherwise

-
-
-
- -
-
-quapy.data.reader.from_csv(path, encoding='utf-8')[source]
-

Reads a csv file in which columns are separated by ‘,’. -File format <label>,<feat1>,<feat2>,…,<featn>

-
-
Parameters:
-
    -
  • path – path to the csv file

  • -
  • encoding – the text encoding used to open the file

  • -
-
-
Returns:
-

a np.ndarray for the labels and a ndarray (float) for the covariates

-
-
-
- -
-
-quapy.data.reader.from_sparse(path)[source]
-

Reads a labelled collection of real-valued instances expressed in sparse format -File format <-1 or 0 or 1>[s col(int):val(float)]

-
-
Parameters:
-

path – path to the labelled collection

-
-
Returns:
-

a csr_matrix containing the instances (rows), and a ndarray containing the labels

-
-
-
- -
-
-quapy.data.reader.from_text(path, encoding='utf-8', verbose=1, class2int=True)[source]
-

Reads a labelled colletion of documents. -File fomart <0 or 1> <document>

-
-
Parameters:
-
    -
  • path – path to the labelled collection

  • -
  • encoding – the text encoding used to open the file

  • -
  • verbose – if >0 (default) shows some progress information in standard output

  • -
-
-
Returns:
-

a list of sentences, and a list of labels

-
-
-
- -
-
-quapy.data.reader.reindex_labels(y)[source]
-

Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes. -E.g.:

-
>>> reindex_labels(['B', 'B', 'A', 'C'])
->>> (array([1, 1, 0, 2]), array(['A', 'B', 'C'], dtype='<U1'))
-
-
-
-
Parameters:
-

y – the list or array of original labels

-
-
Returns:
-

a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.

-
-
-
- -
-
-

Module contents

-
-
- - -
-
- -
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/quapy.html b/docs/build/html/quapy.html deleted file mode 100644 index 5108263..0000000 --- a/docs/build/html/quapy.html +++ /dev/null @@ -1,3127 +0,0 @@ - - - - - - - quapy package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
- -
-
-
-
- -
-

quapy package

-
-

Subpackages

-
- -
-
-
-

Submodules

-
-
-

quapy.error module

-

Implementation of error measures used for quantification

-
-
-quapy.error.absolute_error(prevs, prevs_hat)
-
-
Computes the absolute error between the two prevalence vectors.

Absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as -\(AE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}|\hat{p}(y)-p(y)|\), -where \(\mathcal{Y}\) are the classes of interest.

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
-
-
Returns:
-

absolute error

-
-
-
- -
-
-quapy.error.acc_error(y_true, y_pred)
-

Computes the error in terms of 1-accuracy. The accuracy is computed as -\(\frac{tp+tn}{tp+fp+fn+tn}\), with tp, fp, fn, and tn standing -for true positives, false positives, false negatives, and true negatives, -respectively

-
-
Parameters:
-
    -
  • y_true – array-like of true labels

  • -
  • y_pred – array-like of predicted labels

  • -
-
-
Returns:
-

1-accuracy

-
-
-
- -
-
-quapy.error.acce(y_true, y_pred)[source]
-

Computes the error in terms of 1-accuracy. The accuracy is computed as -\(\frac{tp+tn}{tp+fp+fn+tn}\), with tp, fp, fn, and tn standing -for true positives, false positives, false negatives, and true negatives, -respectively

-
-
Parameters:
-
    -
  • y_true – array-like of true labels

  • -
  • y_pred – array-like of predicted labels

  • -
-
-
Returns:
-

1-accuracy

-
-
-
- -
-
-quapy.error.ae(prevs, prevs_hat)[source]
-
-
Computes the absolute error between the two prevalence vectors.

Absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as -\(AE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}|\hat{p}(y)-p(y)|\), -where \(\mathcal{Y}\) are the classes of interest.

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
-
-
Returns:
-

absolute error

-
-
-
- -
-
-quapy.error.f1_error(y_true, y_pred)
-

F1 error: simply computes the error in terms of macro \(F_1\), i.e., -\(1-F_1^M\), where \(F_1\) is the harmonic mean of precision and recall, -defined as \(\frac{2tp}{2tp+fp+fn}\), with tp, fp, and fn standing -for true positives, false positives, and false negatives, respectively. -Macro averaging means the \(F_1\) is computed for each category independently, -and then averaged.

-
-
Parameters:
-
    -
  • y_true – array-like of true labels

  • -
  • y_pred – array-like of predicted labels

  • -
-
-
Returns:
-

\(1-F_1^M\)

-
-
-
- -
-
-quapy.error.f1e(y_true, y_pred)[source]
-

F1 error: simply computes the error in terms of macro \(F_1\), i.e., -\(1-F_1^M\), where \(F_1\) is the harmonic mean of precision and recall, -defined as \(\frac{2tp}{2tp+fp+fn}\), with tp, fp, and fn standing -for true positives, false positives, and false negatives, respectively. -Macro averaging means the \(F_1\) is computed for each category independently, -and then averaged.

-
-
Parameters:
-
    -
  • y_true – array-like of true labels

  • -
  • y_pred – array-like of predicted labels

  • -
-
-
Returns:
-

\(1-F_1^M\)

-
-
-
- -
-
-quapy.error.from_name(err_name)[source]
-

Gets an error function from its name. E.g., from_name(“mae”) -will return function quapy.error.mae()

-
-
Parameters:
-

err_name – string, the error name

-
-
Returns:
-

a callable implementing the requested error

-
-
-
- -
-
-quapy.error.kld(prevs, prevs_hat, eps=None)[source]
-
-
Computes the Kullback-Leibler divergence between the two prevalence distributions.

Kullback-Leibler divergence between two prevalence distributions \(p\) and \(\hat{p}\) -is computed as -\(KLD(p,\hat{p})=D_{KL}(p||\hat{p})= -\sum_{y\in \mathcal{Y}} p(y)\log\frac{p(y)}{\hat{p}(y)}\), -where \(\mathcal{Y}\) are the classes of interest. -The distributions are smoothed using the eps factor (see quapy.error.smooth()).

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
  • eps – smoothing factor. KLD is not defined in cases in which the distributions contain -zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the sample size. -If eps=None, the sample size will be taken from the environment variable SAMPLE_SIZE -(which has thus to be set beforehand).

  • -
-
-
Returns:
-

Kullback-Leibler divergence between the two distributions

-
-
-
- -
-
-quapy.error.mae(prevs, prevs_hat)[source]
-

Computes the mean absolute error (see quapy.error.ae()) across the sample pairs.

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
-
-
Returns:
-

mean absolute error

-
-
-
- -
-
-quapy.error.mean_absolute_error(prevs, prevs_hat)
-

Computes the mean absolute error (see quapy.error.ae()) across the sample pairs.

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
-
-
Returns:
-

mean absolute error

-
-
-
- -
-
-quapy.error.mean_normalized_absolute_error(prevs, prevs_hat)
-

Computes the mean normalized absolute error (see quapy.error.nae()) across the sample pairs.

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
-
-
Returns:
-

mean normalized absolute error

-
-
-
- -
-
-quapy.error.mean_normalized_relative_absolute_error(prevs, prevs_hat, eps=None)
-

Computes the mean normalized relative absolute error (see quapy.error.nrae()) across -the sample pairs. The distributions are smoothed using the eps factor (see -quapy.error.smooth()).

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true -prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
  • eps – smoothing factor. mnrae is not defined in cases in which the true -distribution contains zeros; eps is typically set to be \(\frac{1}{2T}\), -with \(T\) the sample size. If eps=None, the sample size will be taken from -the environment variable SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

mean normalized relative absolute error

-
-
-
- -
-
-quapy.error.mean_relative_absolute_error(prevs, prevs_hat, eps=None)
-

Computes the mean relative absolute error (see quapy.error.rae()) across -the sample pairs. The distributions are smoothed using the eps factor (see -quapy.error.smooth()).

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true -prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
  • eps – smoothing factor. mrae is not defined in cases in which the true -distribution contains zeros; eps is typically set to be \(\frac{1}{2T}\), -with \(T\) the sample size. If eps=None, the sample size will be taken from -the environment variable SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

mean relative absolute error

-
-
-
- -
-
-quapy.error.mkld(prevs, prevs_hat, eps=None)[source]
-

Computes the mean Kullback-Leibler divergence (see quapy.error.kld()) across the -sample pairs. The distributions are smoothed using the eps factor -(see quapy.error.smooth()).

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true -prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
  • eps – smoothing factor. KLD is not defined in cases in which the distributions contain -zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the sample size. -If eps=None, the sample size will be taken from the environment variable SAMPLE_SIZE -(which has thus to be set beforehand).

  • -
-
-
Returns:
-

mean Kullback-Leibler distribution

-
-
-
- -
-
-quapy.error.mnae(prevs, prevs_hat)[source]
-

Computes the mean normalized absolute error (see quapy.error.nae()) across the sample pairs.

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
-
-
Returns:
-

mean normalized absolute error

-
-
-
- -
-
-quapy.error.mnkld(prevs, prevs_hat, eps=None)[source]
-

Computes the mean Normalized Kullback-Leibler divergence (see quapy.error.nkld()) -across the sample pairs. The distributions are smoothed using the eps factor -(see quapy.error.smooth()).

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
  • eps – smoothing factor. NKLD is not defined in cases in which the distributions contain -zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the sample size. -If eps=None, the sample size will be taken from the environment variable SAMPLE_SIZE -(which has thus to be set beforehand).

  • -
-
-
Returns:
-

mean Normalized Kullback-Leibler distribution

-
-
-
- -
-
-quapy.error.mnrae(prevs, prevs_hat, eps=None)[source]
-

Computes the mean normalized relative absolute error (see quapy.error.nrae()) across -the sample pairs. The distributions are smoothed using the eps factor (see -quapy.error.smooth()).

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true -prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
  • eps – smoothing factor. mnrae is not defined in cases in which the true -distribution contains zeros; eps is typically set to be \(\frac{1}{2T}\), -with \(T\) the sample size. If eps=None, the sample size will be taken from -the environment variable SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

mean normalized relative absolute error

-
-
-
- -
-
-quapy.error.mrae(prevs, prevs_hat, eps=None)[source]
-

Computes the mean relative absolute error (see quapy.error.rae()) across -the sample pairs. The distributions are smoothed using the eps factor (see -quapy.error.smooth()).

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the true -prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the predicted -prevalence values

  • -
  • eps – smoothing factor. mrae is not defined in cases in which the true -distribution contains zeros; eps is typically set to be \(\frac{1}{2T}\), -with \(T\) the sample size. If eps=None, the sample size will be taken from -the environment variable SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

mean relative absolute error

-
-
-
- -
-
-quapy.error.mse(prevs, prevs_hat)[source]
-

Computes the mean squared error (see quapy.error.se()) across the sample pairs.

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_samples, n_classes,) with the -true prevalence values

  • -
  • prevs_hat – array-like of shape (n_samples, n_classes,) with the -predicted prevalence values

  • -
-
-
Returns:
-

mean squared error

-
-
-
- -
-
-quapy.error.nae(prevs, prevs_hat)[source]
-
-
Computes the normalized absolute error between the two prevalence vectors.

Normalized absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as -\(NAE(p,\hat{p})=\frac{AE(p,\hat{p})}{z_{AE}}\), -where \(z_{AE}=\frac{2(1-\min_{y\in \mathcal{Y}} p(y))}{|\mathcal{Y}|}\), and \(\mathcal{Y}\) -are the classes of interest.

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
-
-
Returns:
-

normalized absolute error

-
-
-
- -
-
-quapy.error.nkld(prevs, prevs_hat, eps=None)[source]
-
-
Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.

Normalized Kullback-Leibler divergence between two prevalence distributions \(p\) and -\(\hat{p}\) is computed as -math:NKLD(p,hat{p}) = 2frac{e^{KLD(p,hat{p})}}{e^{KLD(p,hat{p})}+1}-1, -where -\(\mathcal{Y}\) are the classes of interest. -The distributions are smoothed using the eps factor (see quapy.error.smooth()).

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
  • eps – smoothing factor. NKLD is not defined in cases in which the distributions -contain zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the sample -size. If eps=None, the sample size will be taken from the environment variable -SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

Normalized Kullback-Leibler divergence between the two distributions

-
-
-
- -
-
-quapy.error.normalized_absolute_error(prevs, prevs_hat)
-
-
Computes the normalized absolute error between the two prevalence vectors.

Normalized absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as -\(NAE(p,\hat{p})=\frac{AE(p,\hat{p})}{z_{AE}}\), -where \(z_{AE}=\frac{2(1-\min_{y\in \mathcal{Y}} p(y))}{|\mathcal{Y}|}\), and \(\mathcal{Y}\) -are the classes of interest.

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
-
-
Returns:
-

normalized absolute error

-
-
-
- -
-
-quapy.error.normalized_relative_absolute_error(prevs, prevs_hat, eps=None)
-
-
Computes the normalized absolute relative error between the two prevalence vectors.

Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) -is computed as -\(NRAE(p,\hat{p})= \frac{RAE(p,\hat{p})}{z_{RAE}}\), -where -\(z_{RAE} = \frac{|\mathcal{Y}|-1+\frac{1-\min_{y\in \mathcal{Y}} p(y)}{\min_{y\in \mathcal{Y}} p(y)}}{|\mathcal{Y}|}\) -and \(\mathcal{Y}\) are the classes of interest. -The distributions are smoothed using the eps factor (see quapy.error.smooth()).

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
  • eps – smoothing factor. nrae is not defined in cases in which the true distribution -contains zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the -sample size. If eps=None, the sample size will be taken from the environment variable -SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

normalized relative absolute error

-
-
-
- -
-
-quapy.error.nrae(prevs, prevs_hat, eps=None)[source]
-
-
Computes the normalized absolute relative error between the two prevalence vectors.

Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) -is computed as -\(NRAE(p,\hat{p})= \frac{RAE(p,\hat{p})}{z_{RAE}}\), -where -\(z_{RAE} = \frac{|\mathcal{Y}|-1+\frac{1-\min_{y\in \mathcal{Y}} p(y)}{\min_{y\in \mathcal{Y}} p(y)}}{|\mathcal{Y}|}\) -and \(\mathcal{Y}\) are the classes of interest. -The distributions are smoothed using the eps factor (see quapy.error.smooth()).

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
  • eps – smoothing factor. nrae is not defined in cases in which the true distribution -contains zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the -sample size. If eps=None, the sample size will be taken from the environment variable -SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

normalized relative absolute error

-
-
-
- -
-
-quapy.error.rae(prevs, prevs_hat, eps=None)[source]
-
-
Computes the absolute relative error between the two prevalence vectors.

Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) -is computed as -\(RAE(p,\hat{p})= -\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\), -where \(\mathcal{Y}\) are the classes of interest. -The distributions are smoothed using the eps factor (see quapy.error.smooth()).

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
  • eps – smoothing factor. rae is not defined in cases in which the true distribution -contains zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the -sample size. If eps=None, the sample size will be taken from the environment variable -SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

relative absolute error

-
-
-
- -
-
-quapy.error.relative_absolute_error(prevs, prevs_hat, eps=None)
-
-
Computes the absolute relative error between the two prevalence vectors.

Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) -is computed as -\(RAE(p,\hat{p})= -\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}\frac{|\hat{p}(y)-p(y)|}{p(y)}\), -where \(\mathcal{Y}\) are the classes of interest. -The distributions are smoothed using the eps factor (see quapy.error.smooth()).

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
  • eps – smoothing factor. rae is not defined in cases in which the true distribution -contains zeros; eps is typically set to be \(\frac{1}{2T}\), with \(T\) the -sample size. If eps=None, the sample size will be taken from the environment variable -SAMPLE_SIZE (which has thus to be set beforehand).

  • -
-
-
Returns:
-

relative absolute error

-
-
-
- -
-
-quapy.error.se(prevs, prevs_hat)[source]
-
-
Computes the squared error between the two prevalence vectors.

Squared error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as -\(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\), -where -\(\mathcal{Y}\) are the classes of interest.

-
-
-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • prevs_hat – array-like of shape (n_classes,) with the predicted prevalence values

  • -
-
-
Returns:
-

absolute error

-
-
-
- -
-
-quapy.error.smooth(prevs, eps)[source]
-

Smooths a prevalence distribution with \(\epsilon\) (eps) as: -\(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+ -\displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)

-
-
Parameters:
-
    -
  • prevs – array-like of shape (n_classes,) with the true prevalence values

  • -
  • eps – smoothing factor

  • -
-
-
Returns:
-

array-like of shape (n_classes,) with the smoothed distribution

-
-
-
- -
-
-

quapy.evaluation module

-
-
-quapy.evaluation.evaluate(model: BaseQuantifier, protocol: AbstractProtocol, error_metric: Union[str, Callable], aggr_speedup: Union[str, bool] = 'auto', verbose=False)[source]
-

Evaluates a quantification model according to a specific sample generation protocol and in terms of one -evaluation metric (error).

-
-
Parameters:
-
    -
  • model – a quantifier, instance of quapy.method.base.BaseQuantifier

  • -
  • protocolquapy.protocol.AbstractProtocol; if this object is also instance of -quapy.protocol.OnLabelledCollectionProtocol, then the aggregation speed-up can be run. This is the -protocol in charge of generating the samples in which the model is evaluated.

  • -
  • error_metric – a string representing the name(s) of an error function in qp.error -(e.g., ‘mae’), or a callable function implementing the error function itself.

  • -
  • aggr_speedup – whether or not to apply the speed-up. Set to “force” for applying it even if the number of -instances in the original collection on which the protocol acts is larger than the number of instances -in the samples to be generated. Set to True or “auto” (default) for letting QuaPy decide whether it is -convenient or not. Set to False to deactivate.

  • -
  • verbose – boolean, show or not information in stdout

  • -
-
-
Returns:
-

if the error metric is not averaged (e.g., ‘ae’, ‘rae’), returns an array of shape (n_samples,) with -the error scores for each sample; if the error metric is averaged (e.g., ‘mae’, ‘mrae’) then returns -a single float

-
-
-
- -
-
-quapy.evaluation.evaluate_on_samples(model: BaseQuantifier, samples: Iterable[LabelledCollection], error_metric: Union[str, Callable], verbose=False)[source]
-

Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error).

-
-
Parameters:
-
    -
  • model – a quantifier, instance of quapy.method.base.BaseQuantifier

  • -
  • samples – a list of samples on which the quantifier is to be evaluated

  • -
  • error_metric – a string representing the name(s) of an error function in qp.error -(e.g., ‘mae’), or a callable function implementing the error function itself.

  • -
  • verbose – boolean, show or not information in stdout

  • -
-
-
Returns:
-

if the error metric is not averaged (e.g., ‘ae’, ‘rae’), returns an array of shape (n_samples,) with -the error scores for each sample; if the error metric is averaged (e.g., ‘mae’, ‘mrae’) then returns -a single float

-
-
-
- -
-
-quapy.evaluation.evaluation_report(model: BaseQuantifier, protocol: AbstractProtocol, error_metrics: Iterable[Union[str, Callable]] = 'mae', aggr_speedup: Union[str, bool] = 'auto', verbose=False)[source]
-

Generates a report (a pandas’ DataFrame) containing information of the evaluation of the model as according -to a specific protocol and in terms of one or more evaluation metrics (errors).

-
-
Parameters:
-
    -
  • model – a quantifier, instance of quapy.method.base.BaseQuantifier

  • -
  • protocolquapy.protocol.AbstractProtocol; if this object is also instance of -quapy.protocol.OnLabelledCollectionProtocol, then the aggregation speed-up can be run. This is the protocol -in charge of generating the samples in which the model is evaluated.

  • -
  • error_metrics – a string, or list of strings, representing the name(s) of an error function in qp.error -(e.g., ‘mae’, the default value), or a callable function, or a list of callable functions, implementing -the error function itself.

  • -
  • aggr_speedup – whether or not to apply the speed-up. Set to “force” for applying it even if the number of -instances in the original collection on which the protocol acts is larger than the number of instances -in the samples to be generated. Set to True or “auto” (default) for letting QuaPy decide whether it is -convenient or not. Set to False to deactivate.

  • -
  • verbose – boolean, show or not information in stdout

  • -
-
-
Returns:
-

a pandas’ DataFrame containing the columns ‘true-prev’ (the true prevalence of each sample), -‘estim-prev’ (the prevalence estimated by the model for each sample), and as many columns as error metrics -have been indicated, each displaying the score in terms of that metric for every sample.

-
-
-
- -
-
-quapy.evaluation.prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup: Union[str, bool] = 'auto', verbose=False)[source]
-

Uses a quantification model to generate predictions for the samples generated via a specific protocol. -This function is central to all evaluation processes, and is endowed with an optimization to speed-up the -prediction of protocols that generate samples from a large collection. The optimization applies to aggregative -quantifiers only, and to OnLabelledCollectionProtocol protocols, and comes down to generating the classification -predictions once and for all, and then generating samples over the classification predictions (instead of over -the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by -setting aggr_speedup to ‘auto’ or True, and is only carried out if the overall process is convenient in terms -of computations (e.g., if the number of classification predictions needed for the original collection exceed the -number of classification predictions needed for all samples, then the optimization is not undertaken).

-
-
Parameters:
-
    -
  • model – a quantifier, instance of quapy.method.base.BaseQuantifier

  • -
  • protocolquapy.protocol.AbstractProtocol; if this object is also instance of -quapy.protocol.OnLabelledCollectionProtocol, then the aggregation speed-up can be run. This is the protocol -in charge of generating the samples for which the model has to issue class prevalence predictions.

  • -
  • aggr_speedup – whether or not to apply the speed-up. Set to “force” for applying it even if the number of -instances in the original collection on which the protocol acts is larger than the number of instances -in the samples to be generated. Set to True or “auto” (default) for letting QuaPy decide whether it is -convenient or not. Set to False to deactivate.

  • -
  • verbose – boolean, show or not information in stdout

  • -
-
-
Returns:
-

a tuple (true_prevs, estim_prevs) in which each element in the tuple is an array of shape -(n_samples, n_classes) containing the true, or predicted, prevalence values for each sample

-
-
-
- -
-
-

quapy.functional module

-
-
-quapy.functional.HellingerDistance(P: ndarray, Q: ndarray) float[source]
-

Computes the Hellingher Distance (HD) between (discretized) distributions P and Q. -The HD for two discrete distributions of k bins is defined as:

-
-\[HD(P,Q) = \frac{ 1 }{ \sqrt{ 2 } } \sqrt{ \sum_{i=1}^k ( \sqrt{p_i} - \sqrt{q_i} )^2 }\]
-
-
Parameters:
-
    -
  • P – real-valued array-like of shape (k,) representing a discrete distribution

  • -
  • Q – real-valued array-like of shape (k,) representing a discrete distribution

  • -
-
-
Returns:
-

float

-
-
-
- -
-
-quapy.functional.TopsoeDistance(P: ndarray, Q: ndarray, epsilon: float = 1e-20)[source]
-

Topsoe distance between two (discretized) distributions P and Q. -The Topsoe distance for two discrete distributions of k bins is defined as:

-
-\[Topsoe(P,Q) = \sum_{i=1}^k \left( p_i \log\left(\frac{ 2 p_i + \epsilon }{ p_i+q_i+\epsilon }\right) + - q_i \log\left(\frac{ 2 q_i + \epsilon }{ p_i+q_i+\epsilon }\right) \right)\]
-
-
Parameters:
-
    -
  • P – real-valued array-like of shape (k,) representing a discrete distribution

  • -
  • Q – real-valued array-like of shape (k,) representing a discrete distribution

  • -
-
-
Returns:
-

float

-
-
-
- -
-
-quapy.functional.argmin_prevalence(loss: Callable, n_classes: int, method: Literal['optim_minimize', 'linear_search', 'ternary_search'] = 'optim_minimize')[source]
-

Searches for the prevalence vector that minimizes a loss function.

-
-
Parameters:
-
    -
  • loss – callable, the function to minimize

  • -
  • n_classes – int, number of classes

  • -
  • method – string indicating the search strategy. Possible values are:: -‘optim_minimize’: uses scipy.optim -‘linear_search’: carries out a linear search for binary problems in the space [0, 0.01, 0.02, …, 1] -‘ternary_search’: implements the ternary search (not yet implemented)

  • -
-
-
Returns:
-

np.ndarray, a prevalence vector

-
-
-
- -
-
-quapy.functional.as_binary_prevalence(positive_prevalence: Union[float, _SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], clip_if_necessary: bool = False) ndarray[source]
-

Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two -values representing a binary distribution.

-
-
Parameters:
-
    -
  • positive_prevalence – float or array-like of floats with the prevalence for the positive class

  • -
  • clip_if_necessary (bool) – if True, clips the value in [0,1] in order to guarantee the resulting distribution -is valid. If False, it then checks that the value is in the valid range, and raises an error if not.

  • -
-
-
Returns:
-

np.ndarray of shape (2,)

-
-
-
- -
-
-quapy.functional.check_prevalence_vector(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], raise_exception: bool = False, tolerance: float = 1e-08, aggr=True)[source]
-

Checks that prevalences is a valid prevalence vector, i.e., it contains values in [0,1] and -the values sum up to 1. In other words, verifies that the prevalences vectors lies in the -probability simplex.

-
-
Parameters:
-
    -
  • prevalences (ArrayLike) – the prevalence vector, or vectors, to check

  • -
  • raise_exception (bool) – whether to raise an exception if the vector (or any of the vectors) does -not lie in the simplex (default False)

  • -
  • tolerance (float) – error tolerance for the check sum(prevalences) - 1 = 0

  • -
  • aggr (bool) – if True (default) returns one single bool (True if all prevalence vectors are valid, -False otherwise), if False returns an array of bool, one for each prevalence vector

  • -
-
-
Returns:
-

a single bool True if prevalences is a vector of prevalence values that lies on the simplex, -or False otherwise; alternatively, if prevalences is a matrix of shape (num_vectors, n_classes,) -then it returns one such bool for each prevalence vector

-
-
-
- -
-
-quapy.functional.clip(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
-

Clips the values in [0,1] and then applies the L1 normalization.

-
-
Parameters:
-

prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

-
-
Returns:
-

np.ndarray representing a valid distribution

-
-
-
- -
-
-quapy.functional.condsoftmax(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
-

Applies the softmax function only to vectors that do not represent valid distributions.

-
-
Parameters:
-

prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

-
-
Returns:
-

np.ndarray representing a valid distribution

-
-
-
- -
-
-quapy.functional.counts_from_labels(labels: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], classes: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
-

Computes the raw count values from a vector of labels.

-
-
Parameters:
-
    -
  • labels – array-like of shape (n_instances,) with the label for each instance

  • -
  • classes – the class labels. This is needed in order to correctly compute the prevalence vector even when -some classes have no examples.

  • -
-
-
Returns:
-

ndarray of shape (len(classes),) with the raw counts for each class, in the same order -as they appear in classes

-
-
-
- -
-
-quapy.functional.get_divergence(divergence: Union[str, Callable])[source]
-

Guarantees that the divergence received as argument is a function. That is, if this argument is already -a callable, then it is returned, if it is instead a string, then tries to instantiate the corresponding -divergence from the string name.

-
-
Parameters:
-

divergence – callable or string indicating the name of the divergence function

-
-
Returns:
-

callable

-
-
-
- -
-
-quapy.functional.get_nprevpoints_approximation(combinations_budget: int, n_classes: int, n_repeats: int = 1) int[source]
-

Searches for the largest number of (equidistant) prevalence points to define for each of the n_classes classes so -that the number of valid prevalence values generated as combinations of prevalence points (points in a -n_classes-dimensional simplex) do not exceed combinations_budget.

-
-
Parameters:
-
    -
  • combinations_budget (int) – maximum number of combinations allowed

  • -
  • n_classes (int) – number of classes

  • -
  • n_repeats (int) – number of repetitions for each prevalence combination

  • -
-
-
Returns:
-

the largest number of prevalence points that generate less than combinations_budget valid prevalences

-
-
-
- -
-
-quapy.functional.l1_norm(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
-

Applies L1 normalization to the unnormalized_arr so that it becomes a valid prevalence -vector. Zero vectors are mapped onto the uniform distribution. Raises an exception if -the resulting vectors are not valid distributions. This may happen when the original -prevalence vectors contain negative values. Use the clip normalization function -instead to avoid this possibility.

-
-
Parameters:
-

prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

-
-
Returns:
-

np.ndarray representing a valid distribution

-
-
-
- -
- -

Performs a linear search for the best prevalence value in binary problems. The search is carried out by exploring -the range [0,1] stepping by 0.01. This search is inefficient, and is added only for completeness (some of the -early methods in quantification literature used it, e.g., HDy). A most powerful alternative is optim_minimize.

-
-
Parameters:
-
    -
  • loss – (callable) the function to minimize

  • -
  • n_classes – (int) the number of classes, i.e., the dimensionality of the prevalence vector

  • -
-
-
Returns:
-

(ndarray) the best prevalence vector found

-
-
-
- -
-
-quapy.functional.normalize_prevalence(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], method='l1')[source]
-

Normalizes a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in -cases in which the prevalence values are not all-zeros, and to convert the prevalence values into 1/n_classes in -cases in which all values are zero.

-
-
Parameters:
-
    -
  • prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

  • -
  • method (str) –

    indicates the normalization method to employ, options are:

    -
      -
    • l1: applies L1 normalization (default); a 0 vector is mapped onto the uniform prevalence

    • -
    • clip: clip values in [0,1] and then rescales so that the L1 norm is 1

    • -
    • mapsimplex: projects vectors onto the probability simplex. This implementation relies on -Mathieu Blondel’s projection_simplex_sort

    • -
    • softmax: applies softmax to all vectors

    • -
    • condsoftmax: applies softmax only to invalid prevalence vectors

    • -
    -

  • -
-
-
Returns:
-

a normalized vector or matrix of prevalence values

-
-
-
- -
-
-quapy.functional.num_prevalence_combinations(n_prevpoints: int, n_classes: int, n_repeats: int = 1) int[source]
-

Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if n_prevpoints equally -distant prevalence values are generated and n_repeats repetitions are requested. -The computation comes down to calculating:

-
-\[\binom{N+C-1}{C-1} \times r\]
-

where N is n_prevpoints-1, i.e., the number of probability mass blocks to allocate, C is the number of -classes, and r is n_repeats. This solution comes from the -Stars and Bars problem.

-
-
Parameters:
-
    -
  • n_classes (int) – number of classes

  • -
  • n_prevpoints (int) – number of prevalence points.

  • -
  • n_repeats (int) – number of repetitions for each prevalence combination

  • -
-
-
Returns:
-

The number of possible combinations. For example, if `n_classes`=2, `n_prevpoints`=5, `n_repeats`=1, -then the number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], -and [1.0,0.0]

-
-
-
- -
-
-quapy.functional.optim_minimize(loss: Callable, n_classes: int)[source]
-

Searches for the optimal prevalence values, i.e., an n_classes-dimensional vector of the (n_classes-1)-simplex -that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy’s -SLSQP routine.

-
-
Parameters:
-
    -
  • loss – (callable) the function to minimize

  • -
  • n_classes – (int) the number of classes, i.e., the dimensionality of the prevalence vector

  • -
-
-
Returns:
-

(ndarray) the best prevalence vector found

-
-
-
- -
-
-quapy.functional.prevalence_from_labels(labels: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], classes: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]])[source]
-

Computes the prevalence values from a vector of labels.

-
-
Parameters:
-
    -
  • labels – array-like of shape (n_instances,) with the label for each instance

  • -
  • classes – the class labels. This is needed in order to correctly compute the prevalence vector even when -some classes have no examples.

  • -
-
-
Returns:
-

ndarray of shape (len(classes),) with the class proportions for each class, in the same order -as they appear in classes

-
-
-
- -
-
-quapy.functional.prevalence_from_probabilities(posteriors: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], binarize: bool = False)[source]
-

Returns a vector of prevalence values from a matrix of posterior probabilities.

-
-
Parameters:
-
    -
  • posteriors – array-like of shape (n_instances, n_classes,) with posterior probabilities for each class

  • -
  • binarize – set to True (default is False) for computing the prevalence values on crisp decisions (i.e., -converting the vectors of posterior probabilities into class indices, by taking the argmax).

  • -
-
-
Returns:
-

array of shape (n_classes,) containing the prevalence values

-
-
-
- -
-
-quapy.functional.prevalence_linspace(grid_points: int = 21, repeats: int = 1, smooth_limits_epsilon: float = 0.01) ndarray[source]
-

Produces an array of uniformly separated values of prevalence. -By default, produces an array of 21 prevalence values, with -step 0.05 and with the limits smoothed, i.e.: -[0.01, 0.05, 0.10, 0.15, …, 0.90, 0.95, 0.99]

-
-
Parameters:
-
    -
  • grid_points – the number of prevalence values to sample from the [0,1] interval (default 21)

  • -
  • repeats – number of times each prevalence is to be repeated (defaults to 1)

  • -
  • smooth_limits_epsilon – the quantity to add and subtract to the limits 0 and 1

  • -
-
-
Returns:
-

an array of uniformly separated prevalence values

-
-
-
- -
-
-quapy.functional.projection_simplex_sort(unnormalized_arr: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
-

Projects a point onto the probability simplex.

-

The code is adapted from Mathieu Blondel’s BSD-licensed -implementation -(see function projection_simplex_sort in their repo) which is accompanying the paper

-

Mathieu Blondel, Akinori Fujino, and Naonori Ueda. -Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, -ICPR 2014, URL

-
-
Parameters:
-

unnormalized_arr – point in n-dimensional space, shape (n,)

-
-
Returns:
-

projection of unnormalized_arr onto the (n-1)-dimensional probability simplex, shape (n,)

-
-
-
- -
-
-quapy.functional.softmax(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
-

Applies the softmax function to all vectors even if the original vectors were valid distributions. -If you want to leave valid vectors untouched, use condsoftmax instead.

-
-
Parameters:
-

prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

-
-
Returns:
-

np.ndarray representing a valid distribution

-
-
-
- -
-
-quapy.functional.solve_adjustment(class_conditional_rates: ndarray, unadjusted_counts: ndarray, method: Literal['inversion', 'invariant-ratio'], solver: Literal['exact', 'minimize', 'exact-raise', 'exact-cc']) ndarray[source]
-

Function that tries to solve for \(p\) the equation \(q = M p\), where \(q\) is the vector of -unadjusted counts (as estimated, e.g., via classify and count) with \(q_i\) an estimate of -\(P(\hat{Y}=y_i)\), and where \(M\) is the matrix of class-conditional rates with \(M_{ij}\) an -estimate of \(P(\hat{Y}=y_i|Y=y_j)\).

-
-
Parameters:
-
    -
  • class_conditional_rates – array of shape (n_classes, n_classes,) with entry (i,j) being the estimate -of \(P(\hat{Y}=y_i|Y=y_j)\), that is, the probability that an instance that belongs to class \(y_j\) -ends up being classified as belonging to class \(y_i\)

  • -
  • unadjusted_counts – array of shape (n_classes,) containing the unadjusted prevalence values (e.g., as -estimated by CC or PCC)

  • -
  • method (str) –

    indicates the adjustment method to be used. Valid options are:

    -
      -
    • inversion: tries to solve the equation \(q = M p\) as \(p = M^{-1} q\) where -\(M^{-1}\) is the matrix inversion of \(M\). This inversion may not exist in -degenerated cases.

    • -
    • invariant-ratio: invariant ratio estimator of Vaz et al. 2018, -which replaces the last equation in \(M\) with the normalization condition (i.e., that the sum of -all prevalence values must equal 1).

    • -
    -

  • -
  • solver (str) –

    the method to use for solving the system of linear equations. Valid options are:

    -
      -
    • exact-raise: tries to solve the system using matrix inversion. Raises an error if the matrix has rank -strictly lower than n_classes.

    • -
    • exact-cc: if the matrix is not full rank, returns \(q\) (i.e., the unadjusted counts) as the estimates

    • -
    • exact: deprecated, defaults to ‘exact-cc’ (will be removed in future versions)

    • -
    • minimize: minimizes a loss, so the solution always exists

    • -
    -

  • -
-
-
-
- -
-
-quapy.functional.solve_adjustment_binary(prevalence_estim: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], tpr: float, fpr: float, clip: bool = True)[source]
-

Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the -positive class p comes down to computing:

-
-\[ACC(p) = \frac{ p - fpr }{ tpr - fpr }\]
-
-
Parameters:
-
    -
  • prevalence_estim (float) – the estimated value for the positive class (p in the formula)

  • -
  • tpr (float) – the true positive rate of the classifier

  • -
  • fpr (float) – the false positive rate of the classifier

  • -
  • clip (bool) – set to True (default) to clip values that might exceed the range [0,1]

  • -
-
-
Returns:
-

float, the adjusted count

-
-
-
- -
-
-quapy.functional.strprev(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], prec: int = 3) str[source]
-

Returns a string representation for a prevalence vector. E.g.,

-
>>> strprev([1/3, 2/3], prec=2)
->>> '[0.33, 0.67]'
-
-
-
-
Parameters:
-
    -
  • prevalences – array-like of prevalence values

  • -
  • prec – int, indicates the float precision (number of decimal values to print)

  • -
-
-
Returns:
-

string

-
-
-
- -
- -
- -
-
-quapy.functional.uniform_prevalence(n_classes)[source]
-

Returns a vector representing the uniform distribution for n_classes

-
-
Parameters:
-

n_classes – number of classes

-
-
Returns:
-

np.ndarray with all values 1/n_classes

-
-
-
- -
-
-quapy.functional.uniform_prevalence_sampling(n_classes: int, size: int = 1) ndarray[source]
-

Implements the Kraemer algorithm -for sampling uniformly at random from the unit simplex. This implementation is adapted from this -post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_.

-
-
Parameters:
-
    -
  • n_classes – integer, number of classes (dimensionality of the simplex)

  • -
  • size – number of samples to return

  • -
-
-
Returns:
-

np.ndarray of shape (size, n_classes,) if size>1, or of shape (n_classes,) otherwise

-
-
-
- -
-
-quapy.functional.uniform_simplex_sampling(n_classes: int, size: int = 1) ndarray
-

Implements the Kraemer algorithm -for sampling uniformly at random from the unit simplex. This implementation is adapted from this -post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_.

-
-
Parameters:
-
    -
  • n_classes – integer, number of classes (dimensionality of the simplex)

  • -
  • size – number of samples to return

  • -
-
-
Returns:
-

np.ndarray of shape (size, n_classes,) if size>1, or of shape (n_classes,) otherwise

-
-
-
- -
-
-

quapy.model_selection module

-
-
-class quapy.model_selection.ConfigStatus(params, status, msg='')[source]
-

Bases: object

-
-
-failed()[source]
-
- -
-
-success()[source]
-
- -
- -
-
-class quapy.model_selection.GridSearchQ(model: ~quapy.method.base.BaseQuantifier, param_grid: dict, protocol: ~quapy.protocol.AbstractProtocol, error: ~typing.Union[~typing.Callable, str] = <function mae>, refit=True, timeout=-1, n_jobs=None, raise_errors=False, verbose=False)[source]
-

Bases: BaseQuantifier

-

Grid Search optimization targeting a quantification-oriented metric.

-

Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation -protocol for quantification.

-
-
Parameters:
-
    -
  • model (BaseQuantifier) – the quantifier to optimize

  • -
  • param_grid – a dictionary with keys the parameter names and values the list of values to explore

  • -
  • protocol – a sample generation protocol, an instance of quapy.protocol.AbstractProtocol

  • -
  • error – an error function (callable) or a string indicating the name of an error function (valid ones -are those in quapy.error.QUANTIFICATION_ERROR

  • -
  • refit – whether to refit the model on the whole labelled collection (training+validation) with -the best chosen hyperparameter combination. Ignored if protocol=’gen’

  • -
  • timeout – establishes a timer (in seconds) for each of the hyperparameters configurations being tested. -Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up -being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set.

  • -
  • raise_errors – boolean, if True then raises an exception when a param combination yields any error, if -otherwise is False (default), then the combination is marked with an error status, but the process goes on. -However, if no configuration yields a valid model, then a ValueError exception will be raised.

  • -
  • verbose – set to True to get information through the stdout

  • -
-
-
-
-
-best_model()[source]
-

Returns the best model found after calling the fit() method, i.e., the one trained on the combination -of hyper-parameters that minimized the error function.

-
-
Returns:
-

a trained quantifier

-
-
-
- -
-
-fit(training: LabelledCollection)[source]
-
-
Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing

the error metric.

-
-
-
-
Parameters:
-

training – the training set on which to optimize the hyperparameters

-
-
Returns:
-

self

-
-
-
- -
-
-get_params(deep=True)[source]
-

Returns the dictionary of hyper-parameters to explore (param_grid)

-
-
Parameters:
-

deep – Unused

-
-
Returns:
-

the dictionary param_grid

-
-
-
- -
-
-quantify(instances)[source]
-

Estimate class prevalence values using the best model found after calling the fit() method.

-
-
Parameters:
-

instances – sample contanining the instances

-
-
Returns:
-

a ndarray of shape (n_classes) with class prevalence estimates as according to the best model found -by the model selection process.

-
-
-
- -
-
-set_params(**parameters)[source]
-

Sets the hyper-parameters to explore.

-
-
Parameters:
-

parameters – a dictionary with keys the parameter names and values the list of values to explore

-
-
-
- -
- -
-
-class quapy.model_selection.Status(value)[source]
-

Bases: Enum

-

An enumeration.

-
-
-ERROR = 4
-
- -
-
-INVALID = 3
-
- -
-
-SUCCESS = 1
-
- -
-
-TIMEOUT = 2
-
- -
- -
-
-quapy.model_selection.cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0)[source]
-

Akin to scikit-learn’s cross_val_predict -but for quantification.

-
-
Parameters:
-
    -
  • quantifier – a quantifier issuing class prevalence values

  • -
  • data – a labelled collection

  • -
  • nfolds – number of folds for k-fold cross validation generation

  • -
  • random_state – random seed for reproducibility

  • -
-
-
Returns:
-

a vector of class prevalence values

-
-
-
- -
-
-quapy.model_selection.expand_grid(param_grid: dict)[source]
-

Expands a param_grid dictionary as a list of configurations. -Example:

-
>>> combinations = expand_grid({'A': [1, 10, 100], 'B': [True, False]})
->>> print(combinations)
->>> [{'A': 1, 'B': True}, {'A': 1, 'B': False}, {'A': 10, 'B': True}, {'A': 10, 'B': False}, {'A': 100, 'B': True}, {'A': 100, 'B': False}]
-
-
-
-
Parameters:
-

param_grid – dictionary with keys representing hyper-parameter names, and values representing the range -to explore for that hyper-parameter

-
-
Returns:
-

a list of configurations, i.e., combinations of hyper-parameter assignments in the grid.

-
-
-
- -
-
-quapy.model_selection.group_params(param_grid: dict)[source]
-

Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific -hyper-parameters, and another for que quantifier-specific hyper-parameters

-
-
Parameters:
-

param_grid – dictionary with keys representing hyper-parameter names, and values representing the range -to explore for that hyper-parameter

-
-
Returns:
-

two expanded grids of configurations, one for the classifier, another for the quantifier

-
-
-
- -
-
-

quapy.plot module

-
-
-quapy.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=<matplotlib.colors.ListedColormap object>, vertical_xticks=False, legend=True, savepath=None)[source]
-

Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value) -for different bins of (true) prevalence of the positive classs, for each quantification method.

-
-
Parameters:
-
    -
  • method_names – array-like with the method names for each experiment

  • -
  • true_prevs – array-like with the true prevalence values (each being a ndarray with n_classes components) for -each experiment

  • -
  • estim_prevs – array-like with the estimated prevalence values (each being a ndarray with n_classes components) -for each experiment

  • -
  • pos_class – index of the positive class

  • -
  • title – the title to be displayed in the plot

  • -
  • nbins – number of bins

  • -
  • colormap – the matplotlib colormap to use (default cm.tab10)

  • -
  • vertical_xticks – whether or not to add secondary grid (default is False)

  • -
  • legend – whether or not to display the legend (default is True)

  • -
  • savepath – path where to save the plot. If not indicated (as default), the plot is shown.

  • -
-
-
-
- -
-
-quapy.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None)[source]
-

Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value) -for each quantification method with respect to a given positive class.

-
-
Parameters:
-
    -
  • method_names – array-like with the method names for each experiment

  • -
  • true_prevs – array-like with the true prevalence values (each being a ndarray with n_classes components) for -each experiment

  • -
  • estim_prevs – array-like with the estimated prevalence values (each being a ndarray with n_classes components) -for each experiment

  • -
  • pos_class – index of the positive class

  • -
  • title – the title to be displayed in the plot

  • -
  • savepath – path where to save the plot. If not indicated (as default), the plot is shown.

  • -
-
-
-
- -
-
-quapy.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, train_prev=None, savepath=None, method_order=None)[source]
-

The diagonal plot displays the predicted prevalence values (along the y-axis) as a function of the true prevalence -values (along the x-axis). The optimal quantifier is described by the diagonal (0,0)-(1,1) of the plot (hence the -name). It is convenient for binary quantification problems, though it can be used for multiclass problems by -indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots -like the error_by_drift() might be preferable though).

-
-
Parameters:
-
    -
  • method_names – array-like with the method names for each experiment

  • -
  • true_prevs – array-like with the true prevalence values (each being a ndarray with n_classes components) for -each experiment

  • -
  • estim_prevs – array-like with the estimated prevalence values (each being a ndarray with n_classes components) -for each experiment

  • -
  • pos_class – index of the positive class

  • -
  • title – the title to be displayed in the plot

  • -
  • show_std – whether or not to show standard deviations (represented by color bands). This might be inconvenient -for cases in which many methods are compared, or when the standard deviations are high – default True)

  • -
  • legend – whether or not to display the leyend (default True)

  • -
  • train_prev – if indicated (default is None), the training prevalence (for the positive class) is hightlighted -in the plot. This is convenient when all the experiments have been conducted in the same dataset.

  • -
  • savepath – path where to save the plot. If not indicated (as default), the plot is shown.

  • -
  • method_order – if indicated (default is None), imposes the order in which the methods are processed (i.e., -listed in the legend and associated with matplotlib colors).

  • -
-
-
-
- -
-
-quapy.plot.brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, binning='isomerous', x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005, method_order=None, savepath=None)[source]
-

Displays (only) the top performing methods for different regions of the train-test shift in form of a broken -bar chart, in which each method has bars only for those regions in which either one of the following conditions -hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different -(in average) as according to a two-sided t-test on independent samples at confidence ttest_alpha. -The binning can be made “isometric” (same size), or “isomerous” (same number of experiments – default). A second -plot is displayed on top, that displays the distribution of experiments for each bin (when binning=”isometric”) or -the percentiles points of the distribution (when binning=”isomerous”).

-
-
Parameters:
-
    -
  • method_names – array-like with the method names for each experiment

  • -
  • true_prevs – array-like with the true prevalence values (each being a ndarray with n_classes components) for -each experiment

  • -
  • estim_prevs – array-like with the estimated prevalence values (each being a ndarray with n_classes components) -for each experiment

  • -
  • tr_prevs – training prevalence of each experiment

  • -
  • n_bins – number of bins in which the y-axis is to be divided (default is 20)

  • -
  • binning – type of binning, either “isomerous” (default) or “isometric”

  • -
  • x_error – a string representing the name of an error function (as defined in quapy.error) to be used for -measuring the amount of train-test shift (default is “ae”)

  • -
  • y_error – a string representing the name of an error function (as defined in quapy.error) to be used for -measuring the amount of error in the prevalence estimations (default is “ae”)

  • -
  • ttest_alpha – the confidence interval above which a p-value (two-sided t-test on independent samples) is -to be considered as an indicator that the two means are not statistically significantly different. Default is -0.005, meaning that a p-value > 0.005 indicates the two methods involved are to be considered similar

  • -
  • tail_density_threshold – sets a threshold on the density of experiments (over the total number of experiments) -below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some -bins to be shown for train-test outliers.

  • -
  • method_order – if indicated (default is None), imposes the order in which the methods are processed (i.e., -listed in the legend and associated with matplotlib colors).

  • -
  • savepath – path where to save the plot. If not indicated (as default), the plot is shown.

  • -
-
-
Returns:
-

-
-
-
- -
-
-quapy.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, error_name='ae', show_std=False, show_density=True, show_legend=True, logscale=False, title='Quantification error as a function of distribution shift', vlines=None, method_order=None, savepath=None)[source]
-

Plots the error (along the x-axis, as measured in terms of error_name) as a function of the train-test shift -(along the y-axis, as measured in terms of quapy.error.ae()). This plot is useful especially for multiclass -problems, in which “diagonal plots” may be cumbersone, and in order to gain understanding about how methods -fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the -high-shift regime).

-
-
Parameters:
-
    -
  • method_names – array-like with the method names for each experiment

  • -
  • true_prevs – array-like with the true prevalence values (each being a ndarray with n_classes components) for -each experiment

  • -
  • estim_prevs – array-like with the estimated prevalence values (each being a ndarray with n_classes components) -for each experiment

  • -
  • tr_prevs – training prevalence of each experiment

  • -
  • n_bins – number of bins in which the y-axis is to be divided (default is 20)

  • -
  • error_name – a string representing the name of an error function (as defined in quapy.error, default is “ae”)

  • -
  • show_std – whether or not to show standard deviations as color bands (default is False)

  • -
  • show_density – whether or not to display the distribution of experiments for each bin (default is True)

  • -
  • show_density – whether or not to display the legend of the chart (default is True)

  • -
  • logscale – whether or not to log-scale the y-error measure (default is False)

  • -
  • title – title of the plot (default is “Quantification error as a function of distribution shift”)

  • -
  • vlines – array-like list of values (default is None). If indicated, highlights some regions of the space -using vertical dotted lines.

  • -
  • method_order – if indicated (default is None), imposes the order in which the methods are processed (i.e., -listed in the legend and associated with matplotlib colors).

  • -
  • savepath – path where to save the plot. If not indicated (as default), the plot is shown.

  • -
-
-
-
- -
-
-

quapy.protocol module

-
-
-class quapy.protocol.APP(data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=0, sanity_check=10000, return_type='sample_prev')[source]
-

Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

-

Implementation of the artificial prevalence protocol (APP). -The APP consists of exploring a grid of prevalence values containing n_prevalences points (e.g., -[0, 0.05, 0.1, 0.15, …, 1], if n_prevalences=21), and generating all valid combinations of -prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], …, -[1, 0, 0] prevalence values of size sample_size will be yielded). The number of samples for each valid -combination of prevalence values is indicated by repeats.

-
-
Parameters:
-
    -
  • data – a LabelledCollection from which the samples will be drawn

  • -
  • sample_size – integer, number of instances in each sample; if None (default) then it is taken from -qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

  • -
  • n_prevalences – the number of equidistant prevalence points to extract from the [0,1] interval for the -grid (default is 21)

  • -
  • repeats – number of copies for each valid prevalence vector (default is 10)

  • -
  • smooth_limits_epsilon – the quantity to add and subtract to the limits 0 and 1

  • -
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples -will be the same every time the protocol is called)

  • -
  • sanity_check – int, raises an exception warning the user that the number of examples to be generated exceed -this number; set to None for skipping this check

  • -
  • return_type – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or -to “labelled_collection” to get instead instances of LabelledCollection

  • -
-
-
-
-
-prevalence_grid()[source]
-

Generates vectors of prevalence values from an exhaustive grid of prevalence values. The -number of prevalence values explored for each dimension depends on n_prevalences, so that, if, for example, -n_prevalences=11 then the prevalence values of the grid are taken from [0, 0.1, 0.2, …, 0.9, 1]. Only -valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each -valid vector of prevalence values, repeat copies are returned. The vector of prevalence values can be -implicit (by setting return_constrained_dim=False), meaning that the last dimension (which is constrained -to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to -1). Note that this method is deterministic, i.e., there is no random sampling anywhere.

-
-
Returns:
-

a np.ndarray of shape (n, dimensions) if return_constrained_dim=True or of shape -(n, dimensions-1) if return_constrained_dim=False, where n is the number of valid combinations found -in the grid multiplied by repeat

-
-
-
- -
-
-sample(index)[source]
-

Realizes the sample given the index of the instances.

-
-
Parameters:
-

index – indexes of the instances to select

-
-
Returns:
-

an instance of qp.data.LabelledCollection

-
-
-
- -
-
-samples_parameters()[source]
-

Return all the necessary parameters to replicate the samples as according to the APP protocol.

-
-
Returns:
-

a list of indexes that realize the APP sampling

-
-
-
- -
-
-total()[source]
-

Returns the number of samples that will be generated

-
-
Returns:
-

int

-
-
-
- -
- -
-
-class quapy.protocol.AbstractProtocol[source]
-

Bases: object

-

Abstract parent class for sample generation protocols.

-
-
-total()[source]
-

Indicates the total number of samples that the protocol generates.

-
-
Returns:
-

The number of samples to generate if known, or None otherwise.

-
-
-
- -
- -
-
-class quapy.protocol.AbstractStochasticSeededProtocol(random_state=0)[source]
-

Bases: AbstractProtocol

-

An AbstractStochasticSeededProtocol is a protocol that generates, via any random procedure (e.g., -via random sampling), sequences of quapy.data.base.LabelledCollection samples. -The protocol abstraction enforces -the object to be instantiated using a seed, so that the sequence can be fully replicated. -In order to make this functionality possible, the classes extending this abstraction need to -implement only two functions, samples_parameters() which generates all the parameters -needed for extracting the samples, and sample() that, given some parameters as input, -deterministically generates a sample.

-
-
Parameters:
-

random_state – the seed for allowing to replicate any sequence of samples. Default is 0, meaning that -the sequence will be consistent every time the protocol is called.

-
-
-
-
-collator(sample, *args)[source]
-

The collator prepares the sample to accommodate the desired output format before returning the output. -This collator simply returns the sample as it is. Classes inheriting from this abstract class can -implement their custom collators.

-
-
Parameters:
-
    -
  • sample – the sample to be returned

  • -
  • args – additional arguments

  • -
-
-
Returns:
-

the sample adhering to a desired output format (in this case, the sample is returned as it is)

-
-
-
- -
-
-property random_state
-
- -
-
-abstract sample(params)[source]
-

Extract one sample determined by the given parameters

-
-
Parameters:
-

params – all the necessary parameters to generate a sample

-
-
Returns:
-

one sample (the same sample has to be generated for the same parameters)

-
-
-
- -
-
-abstract samples_parameters()[source]
-

This function has to return all the necessary parameters to replicate the samples

-
-
Returns:
-

a list of parameters, each of which serves to deterministically generate a sample

-
-
-
- -
- -
-
-quapy.protocol.ArtificialPrevalenceProtocol
-

alias of APP

-
- -
-
-class quapy.protocol.DomainMixer(domainA: LabelledCollection, domainB: LabelledCollection, sample_size, repeats=1, prevalence=None, mixture_points=11, random_state=0, return_type='sample_prev')[source]
-

Bases: AbstractStochasticSeededProtocol

-

Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.

-
-
Parameters:
-
    -
  • domainA – one domain, an object of qp.data.LabelledCollection

  • -
  • domainB – another domain, an object of qp.data.LabelledCollection

  • -
  • sample_size – integer, the number of instances in each sample; if None (default) then it is taken from -qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

  • -
  • repeats – int, number of samples to draw for every mixture rate

  • -
  • prevalence – the prevalence to preserv along the mixtures. If specified, should be an array containing -one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence -will be taken from the domain A (default).

  • -
  • mixture_points – an integer indicating the number of points to take from a linear scale (e.g., 21 will -generate the mixture points [1, 0.95, 0.9, …, 0]), or the array of mixture values itself. -the specific points

  • -
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples -will be the same every time the protocol is called)

  • -
-
-
-
-
-sample(indexes)[source]
-

Realizes the sample given a pair of indexes of the instances from A and B.

-
-
Parameters:
-

indexes – indexes of the instances to select from A and B

-
-
Returns:
-

an instance of qp.data.LabelledCollection

-
-
-
- -
-
-samples_parameters()[source]
-

Return all the necessary parameters to replicate the samples as according to the this protocol.

-
-
Returns:
-

a list of zipped indexes (from A and B) that realize the sampling

-
-
-
- -
-
-total()[source]
-

Returns the number of samples that will be generated (equals to “repeats * mixture_points”)

-
-
Returns:
-

int

-
-
-
- -
- -
-
-class quapy.protocol.IterateProtocol(samples: [<class 'quapy.data.base.LabelledCollection'>])[source]
-

Bases: AbstractProtocol

-

A very simple protocol which simply iterates over a list of previously generated samples

-
-
Parameters:
-

samples – a list of quapy.data.base.LabelledCollection

-
-
-
-
-total()[source]
-

Returns the number of samples in this protocol

-
-
Returns:
-

int

-
-
-
- -
- -
-
-class quapy.protocol.NPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')[source]
-

Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

-

A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing -samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.

-
-
Parameters:
-
    -
  • data – a LabelledCollection from which the samples will be drawn

  • -
  • sample_size – integer, the number of instances in each sample; if None (default) then it is taken from -qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

  • -
  • repeats – the number of samples to generate. Default is 100.

  • -
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples -will be the same every time the protocol is called)

  • -
  • return_type – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or -to “labelled_collection” to get instead instances of LabelledCollection

  • -
-
-
-
-
-sample(index)[source]
-

Realizes the sample given the index of the instances.

-
-
Parameters:
-

index – indexes of the instances to select

-
-
Returns:
-

an instance of qp.data.LabelledCollection

-
-
-
- -
-
-samples_parameters()[source]
-

Return all the necessary parameters to replicate the samples as according to the NPP protocol.

-
-
Returns:
-

a list of indexes that realize the NPP sampling

-
-
-
- -
-
-total()[source]
-

Returns the number of samples that will be generated (equals to “repeats”)

-
-
Returns:
-

int

-
-
-
- -
- -
-
-quapy.protocol.NaturalPrevalenceProtocol
-

alias of NPP

-
- -
-
-class quapy.protocol.OnLabelledCollectionProtocol[source]
-

Bases: object

-

Protocols that generate samples from a qp.data.LabelledCollection object.

-
-
-RETURN_TYPES = ['sample_prev', 'labelled_collection', 'index']
-
- -
-
-classmethod get_collator(return_type='sample_prev')[source]
-

Returns a collator function, i.e., a function that prepares the yielded data

-
-
Parameters:
-

return_type – either ‘sample_prev’ (default) if the collator is requested to yield tuples of -(sample, prevalence), or ‘labelled_collection’ when it is requested to yield instances of -qp.data.LabelledCollection

-
-
Returns:
-

the collator function (a callable function that takes as input an instance of -qp.data.LabelledCollection)

-
-
-
- -
-
-get_labelled_collection()[source]
-

Returns the labelled collection on which this protocol acts.

-
-
Returns:
-

an object of type qp.data.LabelledCollection

-
-
-
- -
-
-on_preclassified_instances(pre_classifications, in_place=False)[source]
-

Returns a copy of this protocol that acts on a modified version of the original -qp.data.LabelledCollection in which the original instances have been replaced -with the outputs of a classifier for each instance. (This is convenient for speeding-up -the evaluation procedures for many samples, by pre-classifying the instances in advance.)

-
-
Parameters:
-
    -
  • pre_classifications – the predictions issued by a classifier, typically an array-like -with shape (n_instances,) when the classifier is a hard one, or with shape -(n_instances, n_classes) when the classifier is a probabilistic one.

  • -
  • in_place – whether or not to apply the modification in-place or in a new copy (default).

  • -
-
-
Returns:
-

a copy of this protocol

-
-
-
- -
- -
-
-class quapy.protocol.UPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')[source]
-

Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

-

A variant of APP that, instead of using a grid of equidistant prevalence values, -relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with -k the number of classes. This protocol covers the entire range of prevalence values in a -statistical sense, i.e., unlike APP there is no guarantee that it is covered precisely -equally for all classes, but it is preferred in cases in which the number of possible -combinations of the grid values of APP makes this endeavour intractable.

-
-
Parameters:
-
    -
  • data – a LabelledCollection from which the samples will be drawn

  • -
  • sample_size – integer, the number of instances in each sample; if None (default) then it is taken from -qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

  • -
  • repeats – the number of samples to generate. Default is 100.

  • -
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples -will be the same every time the protocol is called)

  • -
  • return_type – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or -to “labelled_collection” to get instead instances of LabelledCollection

  • -
-
-
-
-
-sample(index)[source]
-

Realizes the sample given the index of the instances.

-
-
Parameters:
-

index – indexes of the instances to select

-
-
Returns:
-

an instance of qp.data.LabelledCollection

-
-
-
- -
-
-samples_parameters()[source]
-

Return all the necessary parameters to replicate the samples as according to the UPP protocol.

-
-
Returns:
-

a list of indexes that realize the UPP sampling

-
-
-
- -
-
-total()[source]
-

Returns the number of samples that will be generated (equals to “repeats”)

-
-
Returns:
-

int

-
-
-
- -
- -
-
-quapy.protocol.UniformPrevalenceProtocol
-

alias of UPP

-
- -
-
-

quapy.util module

-
-
-class quapy.util.EarlyStop(patience, lower_is_better=True)[source]
-

Bases: object

-

A class implementing the early-stopping condition typically used for training neural networks.

-
>>> earlystop = EarlyStop(patience=2, lower_is_better=True)
->>> earlystop(0.9, epoch=0)
->>> earlystop(0.7, epoch=1)
->>> earlystop.IMPROVED  # is True
->>> earlystop(1.0, epoch=2)
->>> earlystop.STOP  # is False (patience=1)
->>> earlystop(1.0, epoch=3)
->>> earlystop.STOP  # is True (patience=0)
->>> earlystop.best_epoch  # is 1
->>> earlystop.best_score  # is 0.7
-
-
-
-
Parameters:
-
    -
  • patience – the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a -held-out validation split) can be found to be worse than the best one obtained so far, before flagging the -stopping condition. An instance of this class is callable, and is to be used as follows:

  • -
  • lower_is_better – if True (default) the metric is to be minimized.

  • -
-
-
Variables:
-
    -
  • best_score – keeps track of the best value seen so far

  • -
  • best_epoch – keeps track of the epoch in which the best score was set

  • -
  • STOP – flag (boolean) indicating the stopping condition

  • -
  • IMPROVED – flag (boolean) indicating whether there was an improvement in the last call

  • -
-
-
-
- -
-
-quapy.util.create_if_not_exist(path)[source]
-

An alias to os.makedirs(path, exist_ok=True) that also returns the path. This is useful in cases like, e.g.:

-
>>> path = create_if_not_exist(os.path.join(dir, subdir, anotherdir))
-
-
-
-
Parameters:
-

path – path to create

-
-
Returns:
-

the path itself

-
-
-
- -
-
-quapy.util.create_parent_dir(path)[source]
-

Creates the parent dir (if any) of a given path, if not exists. E.g., for ./path/to/file.txt, the path ./path/to -is created.

-
-
Parameters:
-

path – the path

-
-
-
- -
-
-quapy.util.download_file(url, archive_filename)[source]
-

Downloads a file from a url

-
-
Parameters:
-
    -
  • url – the url

  • -
  • archive_filename – destination filename

  • -
-
-
-
- -
-
-quapy.util.download_file_if_not_exists(url, archive_filename)[source]
-

Dowloads a function (using download_file()) if the file does not exist.

-
-
Parameters:
-
    -
  • url – the url

  • -
  • archive_filename – destination filename

  • -
-
-
-
- -
-
-quapy.util.get_quapy_home()[source]
-

Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets. -This directory is ~/quapy_data

-
-
Returns:
-

a string representing the path

-
-
-
- -
-
-quapy.util.load_report(path, as_dict=False)[source]
-
- -
-
-quapy.util.map_parallel(func, args, n_jobs)[source]
-

Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then -func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function -that already works with a list of arguments.

-
-
Parameters:
-
    -
  • func – function to be parallelized

  • -
  • args – array-like of arguments to be passed to the function in different parallel calls

  • -
  • n_jobs – the number of workers

  • -
-
-
-
- -
-
-quapy.util.parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky')[source]
-

A wrapper of multiprocessing:

-
>>> Parallel(n_jobs=n_jobs)(
->>>      delayed(func)(args_i) for args_i in args
->>> )
-
-
-

that takes the quapy.environ variable as input silently. -Seeds the child processes to ensure reproducibility when n_jobs>1.

-
-
Parameters:
-
    -
  • func – callable

  • -
  • args – args of func

  • -
  • seed – the numeric seed

  • -
  • asarray – set to True to return a np.ndarray instead of a list

  • -
  • backend – indicates the backend used for handling parallel works

  • -
  • open_args – if True, then the delayed function is called on *args_i, instead of on args_i

  • -
-
-
-
- -
-
-quapy.util.parallel_unpack(func, args, n_jobs, seed=None, asarray=True, backend='loky')[source]
-

A wrapper of multiprocessing:

-
>>> Parallel(n_jobs=n_jobs)(
->>>      delayed(func)(*args_i) for args_i in args
->>> )
-
-
-

that takes the quapy.environ variable as input silently. -Seeds the child processes to ensure reproducibility when n_jobs>1.

-
-
Parameters:
-
    -
  • func – callable

  • -
  • args – args of func

  • -
  • seed – the numeric seed

  • -
  • asarray – set to True to return a np.ndarray instead of a list

  • -
  • backend – indicates the backend used for handling parallel works

  • -
-
-
-
- -
-
-quapy.util.pickled_resource(pickle_path: str, generation_func: callable, *args)[source]
-

Allows for fast reuse of resources that are generated only once by calling generation_func(*args). The next times -this function is invoked, it loads the pickled resource. Example:

-
>>> def some_array(n):  # a mock resource created with one parameter (`n`)
->>>     return np.random.rand(n)
->>> pickled_resource('./my_array.pkl', some_array, 10)  # the resource does not exist: it is created by calling some_array(10)
->>> pickled_resource('./my_array.pkl', some_array, 10)  # the resource exists; it is loaded from './my_array.pkl'
-
-
-
-
Parameters:
-
    -
  • pickle_path – the path where to save (first time) and load (next times) the resource

  • -
  • generation_func – the function that generates the resource, in case it does not exist in pickle_path

  • -
  • args – any arg that generation_func uses for generating the resources

  • -
-
-
Returns:
-

the resource

-
-
-
- -
-
-quapy.util.save_text_file(path, text)[source]
-

Saves a text file to disk, given its full path, and creates the parent directory if missing.

-
-
Parameters:
-
    -
  • path – path where to save the path.

  • -
  • text – text to save.

  • -
-
-
-
- -
-
-quapy.util.temp_seed(random_state)[source]
-

Can be used in a “with” context to set a temporal seed without modifying the outer numpy’s current state. E.g.:

-
>>> with temp_seed(random_seed):
->>>  pass # do any computation depending on np.random functionality
-
-
-
-
Parameters:
-

random_state – the seed to set within the “with” context

-
-
-
- -
-
-quapy.util.timeout(seconds)[source]
-

Opens a context that will launch an exception if not closed after a given number of seconds

-
>>> def func(start_msg, end_msg):
->>>     print(start_msg)
->>>     sleep(2)
->>>     print(end_msg)
->>>
->>> with timeout(1):
->>>     func('begin function', 'end function')
->>> Out[]
->>> begin function
->>> TimeoutError
-
-
-
-
Parameters:
-

seconds – number of seconds, set to <=0 to ignore the timer

-
-
-
- -
-
-

Module contents

-

QuaPy module for quantification

-
-
- - -
-
- -
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/quapy.method.html b/docs/build/html/quapy.method.html deleted file mode 100644 index bfabbe3..0000000 --- a/docs/build/html/quapy.method.html +++ /dev/null @@ -1,3644 +0,0 @@ - - - - - - - quapy.method package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
- -
-
-
-
- -
-

quapy.method package

-
-

Submodules

-
-
-

quapy.method.aggregative module

-
-
-class quapy.method.aggregative.ACC(classifier: Optional[BaseEstimator] = None, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip', n_jobs=None)[source]
-

Bases: AggregativeCrispQuantifier

-

Adjusted Classify & Count, -the “adjusted” variant of CC, that corrects the predictions of CC -according to the misclassification rates.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
  • method (str) –

    adjustment method to be used:

    -
      -
    • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), -which tries to invert \(P(C|Y)\) matrix.

    • -
    • ’invariant-ratio’: invariant ratio estimator of Vaz et al. 2018, -which replaces the last equation with the normalization condition.

    • -
    -

  • -
  • solver (str) –

    indicates the method to use for solving the system of linear equations. Valid options are:

    - -

  • -
  • norm (str) –

    the method to use for normalization.

    -
      -
    • clip, the values are clipped to the range [0,1] and then L1-normalized.

    • -
    • mapsimplex projects vectors onto the probability simplex. This implementation relies on -Mathieu Blondel’s projection_simplex_sort

    • -
    • condsoftmax, applies a softmax normalization only to prevalence vectors that lie outside the simplex

    • -
    -

  • -
  • n_jobs – number of parallel workers

  • -
-
-
-
-
-METHODS = ['inversion', 'invariant-ratio']
-
- -
-
-NORMALIZATIONS = ['clip', 'mapsimplex', 'condsoftmax', None]
-
- -
-
-SOLVERS = ['exact', 'minimize', 'exact-raise', 'exact-cc']
-
- -
-
-aggregate(classif_predictions)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Estimates the misclassification rates.

-
-
Parameters:
-
-
-
-
- -
-
-classmethod getPteCondEstim(classes, y, y_)[source]
-

Estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a -document that belongs to yj ends up being classified as belonging to yi

-
-
Parameters:
-
    -
  • classes – array-like with the class names

  • -
  • y – array-like with the true labels

  • -
  • y – array-like with the estimated labels

  • -
-
-
Returns:
-

np.ndarray

-
-
-
- -
-
-classmethod newInvariantRatioEstimation(classifier: BaseEstimator, val_split=5, n_jobs=None)[source]
-

Constructs a quantifier that implements the Invariant Ratio Estimator of -Vaz et al. 2018. This amounts -to setting method to ‘invariant-ratio’ and clipping to ‘project’.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification

  • -
-
-
-

can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated. -:param n_jobs: number of parallel workers -:return: an instance of ACC configured so that it implements the Invariant Ratio Estimator

-
- -
- -
-
-quapy.method.aggregative.AdjustedClassifyAndCount
-

alias of ACC

-
- -
-
-class quapy.method.aggregative.AggregativeCrispQuantifier[source]
-

Bases: AggregativeQuantifier, ABC

-

Abstract class for quantification methods that base their estimations on the aggregation of crisp decisions -as returned by a hard classifier. Aggregative crisp quantifiers thus extend Aggregative -Quantifiers by implementing specifications about crisp predictions.

-
- -
-
-class quapy.method.aggregative.AggregativeMedianEstimator(base_quantifier: AggregativeQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]
-

Bases: BinaryQuantifier

-

This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the -estimation returned by differently (hyper)parameterized base quantifiers. -The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions, -i.e., in cases of binary quantification.

-
-
Parameters:
-
    -
  • base_quantifier – the base, binary quantifier

  • -
  • random_state – a seed to be set before fitting any base quantifier (default None)

  • -
  • param_grid – the grid or parameters towards which the median will be computed

  • -
  • n_jobs – number of parllel workes

  • -
-
-
-
-
-fit(training: LabelledCollection, **kwargs)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-get_params(deep=True)[source]
-

Get parameters for this estimator.

-
-
Parameters:
-

deep (bool, default=True) – If True, will return the parameters for this estimator and -contained subobjects that are estimators.

-
-
Returns:
-

params – Parameter names mapped to their values.

-
-
Return type:
-

dict

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-set_params(**params)[source]
-

Set the parameters of this estimator.

-

The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

-
-
Parameters:
-

**params (dict) – Estimator parameters.

-
-
Returns:
-

self – Estimator instance.

-
-
Return type:
-

estimator instance

-
-
-
- -
- -
-
-class quapy.method.aggregative.AggregativeQuantifier[source]
-

Bases: BaseQuantifier, ABC

-

Abstract class for quantification methods that base their estimations on the aggregation of classification -results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions -and aggregating them. For this reason, the training phase is implemented by classification_fit() followed -by aggregation_fit(), while the testing phase is implemented by classify() followed by -aggregate(). Subclasses of this abstract class must provide implementations for these methods. -Aggregative quantifiers also maintain a classifier attribute.

-

The method fit() comes with a default implementation based on classification_fit() -and aggregation_fit().

-

The method quantify() comes with a default implementation based on classify() -and aggregate().

-
-
-abstract aggregate(classif_predictions: ndarray)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-abstract aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function.

-
-
Parameters:
-
-
-
-
- -
-
-property classes_
-

Class labels, in the same order in which class prevalence values are to be computed. -This default implementation actually returns the class labels of the learner.

-
-
Returns:
-

array-like

-
-
-
- -
-
-property classifier
-

Gives access to the classifier

-
-
Returns:
-

the classifier (typically an sklearn’s Estimator)

-
-
-
- -
-
-classifier_fit_predict(data: LabelledCollection, fit_classifier=True, predict_on=None)[source]
-

Trains the classifier if requested (fit_classifier=True) and generate the necessary predictions to -train the aggregation function.

-
-
Parameters:
-
    -
  • data – a quapy.data.base.LabelledCollection consisting of the training data

  • -
  • fit_classifier – whether to train the learner (default is True). Set to False if the -learner has been trained outside the quantifier.

  • -
  • predict_on – specifies the set on which predictions need to be issued. This parameter can -be specified as None (default) to indicate no prediction is needed; a float in (0, 1) to -indicate the proportion of instances to be used for predictions (the remainder is used for -training); an integer >1 to indicate that the predictions must be generated via k-fold -cross-validation, using this integer as k; or the data sample itself on which to generate -the predictions.

  • -
-
-
-
- -
-
-classify(instances)[source]
-

Provides the label predictions for the given instances. The predictions should respect the format expected by -aggregate(), e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for -non-probabilistic quantifiers. The default one is “decision_function”.

-
-
Parameters:
-

instances – array-like of shape (n_instances, n_features,)

-
-
Returns:
-

np.ndarray of shape (n_instances,) with label predictions

-
-
-
- -
-
-fit(data: LabelledCollection, fit_classifier=True, val_split=None)[source]
-

Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.

-
-
Parameters:
-
    -
  • data – a quapy.data.base.LabelledCollection consisting of the training data

  • -
  • fit_classifier – whether to train the learner (default is True). Set to False if the -learner has been trained outside the quantifier.

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances by aggregating the label predictions generated -by the classifier.

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes) with class prevalence estimates.

-
-
-
- -
-
-property val_split
-
- -
-
-val_split_ = None
-
- -
- -
-
-class quapy.method.aggregative.AggregativeSoftQuantifier[source]
-

Bases: AggregativeQuantifier, ABC

-

Abstract class for quantification methods that base their estimations on the aggregation of posterior -probabilities as returned by a probabilistic classifier. -Aggregative soft quantifiers thus extend Aggregative Quantifiers by implementing specifications -about soft predictions.

-
- -
-
-class quapy.method.aggregative.BayesianCC(classifier: Optional[BaseEstimator] = None, val_split: float = 0.75, num_warmup: int = 500, num_samples: int = 1000, mcmc_seed: int = 0)[source]
-

Bases: AggregativeCrispQuantifier

-

Bayesian quantification method, -which is a variant of ACC that calculates the posterior probability distribution -over the prevalence vectors, rather than providing a point estimate obtained -by matrix inversion.

-

Can be used to diagnose degeneracy in the predictions visible when the confusion -matrix has high condition number or to quantify uncertainty around the point estimate.

-

This method relies on extra dependencies, which have to be installed via: -$ pip install quapy[bayes]

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – a float in (0, 1) indicating the proportion of the training data to be used, -as a stratified held-out validation set, for generating classifier predictions.

  • -
  • num_warmup – number of warmup iterations for the MCMC sampler (default 500)

  • -
  • num_samples – number of samples to draw from the posterior (default 1000)

  • -
  • mcmc_seed – random seed for the MCMC sampler (default 0)

  • -
-
-
-
-
-aggregate(classif_predictions)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Estimates the misclassification rates.

-
-
Parameters:
-
-
-
-
- -
-
-get_conditional_probability_samples()[source]
-
- -
-
-get_prevalence_samples()[source]
-
- -
-
-sample_from_posterior(classif_predictions)[source]
-
- -
- -
-
-class quapy.method.aggregative.BinaryAggregativeQuantifier[source]
-

Bases: AggregativeQuantifier, BinaryQuantifier

-
-
-fit(data: LabelledCollection, fit_classifier=True, val_split=None)[source]
-

Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.

-
-
Parameters:
-
    -
  • data – a quapy.data.base.LabelledCollection consisting of the training data

  • -
  • fit_classifier – whether to train the learner (default is True). Set to False if the -learner has been trained outside the quantifier.

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-property neg_label
-
- -
-
-property pos_label
-
- -
- -
-
-class quapy.method.aggregative.CC(classifier: Optional[BaseEstimator] = None)[source]
-

Bases: AggregativeCrispQuantifier

-

The most basic Quantification method. One that simply classifies all instances and counts how many have been -attributed to each of the classes in order to compute class prevalence estimates.

-
-
Parameters:
-

classifier – a sklearn’s Estimator that generates a classifier

-
-
-
-
-aggregate(classif_predictions: ndarray)[source]
-

Computes class prevalence estimates by counting the prevalence of each of the predicted labels.

-
-
Parameters:
-

classif_predictions – array-like with label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Nothing to do here!

-
-
Parameters:
-
    -
  • classif_predictions – not used

  • -
  • data – not used

  • -
-
-
-
- -
- -
-
-quapy.method.aggregative.ClassifyAndCount
-

alias of CC

-
- -
-
-class quapy.method.aggregative.DMy(classifier: Optional[BaseEstimator] = None, val_split=5, nbins=8, divergence: Union[str, Callable] = 'HD', cdf=False, search='optim_minimize', n_jobs=None)[source]
-

Bases: AggregativeSoftQuantifier

-

Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior -probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF -as hyperparameters.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a probabilistic classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set to model the -validation distribution. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the validation distribution should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
  • nbins – number of bins used to discretize the distributions (default 8)

  • -
  • divergence – a string representing a divergence measure (currently, “HD” and “topsoe” are implemented) -or a callable function taking two ndarrays of the same dimension as input (default “HD”, meaning Hellinger -Distance)

  • -
  • cdf – whether to use CDF instead of PDF (default False)

  • -
  • n_jobs – number of parallel workers (default None)

  • -
-
-
-
-
-aggregate(posteriors: ndarray)[source]
-

Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution -(the mixture) that best matches the test distribution, in terms of the divergence measure of choice. -In the multiclass case, with n the number of classes, the test and mixture distributions contain -n channels (proper distributions of binned posterior probabilities), on which the divergence is computed -independently. The matching is computed as an average of the divergence across all channels.

-
-
Parameters:
-

posteriors – posterior probabilities of the instances in the sample

-
-
Returns:
-

a vector of class prevalence estimates

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function of a distribution matching method. This comes down to generating the -validation distributions out of the training data. -The validation distributions have shape (n, ch, nbins), with n the number of classes, ch the number of -channels, and nbins the number of bins. In particular, let V be the validation distributions; then di=V[i] -are the distributions obtained from training data labelled with class i; while dij = di[j] is the discrete -distribution of posterior probabilities P(Y=j|X=x) for training data labelled with class i, and dij[k] -is the fraction of instances with a value in the k-th bin.

-
-
Parameters:
-
-
-
-
- -
- -
-
-quapy.method.aggregative.DistributionMatchingY
-

alias of DMy

-
- -
-
-class quapy.method.aggregative.DyS(classifier: Optional[BaseEstimator] = None, val_split=5, n_bins=8, divergence: Union[str, Callable] = 'HD', tol=1e-05, n_jobs=None)[source]
-

Bases: AggregativeSoftQuantifier, BinaryAggregativeQuantifier

-

DyS framework (DyS). -DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that -minimizes the distance between distributions. -Details for the ternary search have been got from <https://dl.acm.org/doi/pdf/10.1145/3219819.3220059>

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a binary classifier

  • -
  • val_split – a float in range (0,1) indicating the proportion of data to be used as a stratified held-out -validation distribution, or a quapy.data.base.LabelledCollection (the split itself), or an integer indicating the number of folds (default 5)..

  • -
  • n_bins – an int with the number of bins to use to compute the histograms.

  • -
  • divergence – a str indicating the name of divergence (currently supported ones are “HD” or “topsoe”), or a -callable function computes the divergence between two distributions (two equally sized arrays).

  • -
  • tol – a float with the tolerance for the ternary search algorithm.

  • -
  • n_jobs – number of parallel workers.

  • -
-
-
-
-
-aggregate(classif_posteriors)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function of DyS.

-
-
Parameters:
-
-
-
-
- -
- -
-
-class quapy.method.aggregative.EMQ(classifier: Optional[BaseEstimator] = None, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None)[source]
-

Bases: AggregativeSoftQuantifier

-

Expectation Maximization for Quantification (EMQ), -aka Saerens-Latinne-Decaestecker (SLD) algorithm. -EMQ consists of using the well-known Expectation Maximization algorithm to iteratively update the posterior -probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via -maximum-likelihood estimation, in a mutually recursive way, until convergence.

-

This implementation also gives access to the heuristics proposed by Alexandari et al. paper. These heuristics consist of using, as the training -prevalence, an estimate of it obtained via k-fold cross validation (instead of the true training prevalence), -and to recalibrate the posterior probabilities of the classifier.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer, indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k, default 5); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated. This hyperparameter is only meant to be used when the -heuristics are to be applied, i.e., if a recalibration is required. The default value is None (meaning -the recalibration is not required). In case this hyperparameter is set to a value other than None, but -the recalibration is not required (recalib=None), a warning message will be raised.

  • -
  • exact_train_prev – set to True (default) for using the true training prevalence as the initial observation; -set to False for computing the training prevalence as an estimate of it, i.e., as the expected -value of the posterior probabilities of the training instances.

  • -
  • recalib – a string indicating the method of recalibration. -Available choices include “nbvs” (No-Bias Vector Scaling), “bcts” (Bias-Corrected Temperature Scaling, -default), “ts” (Temperature Scaling), and “vs” (Vector Scaling). Default is None (no recalibration).

  • -
  • n_jobs – number of parallel workers. Only used for recalibrating the classifier if val_split is set to -an integer k –the number of folds.

  • -
-
-
-
-
-classmethod EM(tr_prev, posterior_probabilities, epsilon=0.0001)[source]
-

Computes the Expectation Maximization routine.

-
-
Parameters:
-
    -
  • tr_prev – array-like, the training prevalence

  • -
  • posterior_probabilitiesnp.ndarray of shape (n_instances, n_classes,) with the -posterior probabilities

  • -
  • epsilon – float, the threshold different between two consecutive iterations -to reach before stopping the loop

  • -
-
-
Returns:
-

a tuple with the estimated prevalence values (shape (n_classes,)) and -the corrected posterior probabilities (shape (n_instances, n_classes,))

-
-
-
- -
-
-classmethod EMQ_BCTS(classifier: BaseEstimator, n_jobs=None)[source]
-

Constructs an instance of EMQ using the best configuration found in the Alexandari et al. paper, i.e., one that relies on Bias-Corrected Temperature -Scaling (BCTS) as a recalibration function, and that uses an estimate of the training prevalence instead of -the true training prevalence.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • n_jobs – number of parallel workers.

  • -
-
-
Returns:
-

An instance of EMQ with BCTS

-
-
-
- -
-
-EPSILON = 0.0001
-
- -
-
-MAX_ITER = 1000
-
- -
-
-aggregate(classif_posteriors, epsilon=0.0001)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function of EMQ. This comes down to recalibrating the posterior probabilities -ir requested.

-
-
Parameters:
-
-
-
-
- -
-
-classify(instances)[source]
-

Provides the posterior probabilities for the given instances. If the classifier was required -to be recalibrated, then these posteriors are recalibrated accordingly.

-
-
Parameters:
-

instances – array-like of shape (n_instances, n_dimensions,)

-
-
Returns:
-

np.ndarray of shape (n_instances, n_classes,) with posterior probabilities

-
-
-
- -
-
-predict_proba(instances, epsilon=0.0001)[source]
-

Returns the posterior probabilities updated by the EM algorithm.

-
-
Parameters:
-
    -
  • instances – np.ndarray of shape (n_instances, n_dimensions)

  • -
  • epsilon – error tolerance

  • -
-
-
Returns:
-

np.ndarray of shape (n_instances, n_classes)

-
-
-
- -
- -
-
-quapy.method.aggregative.ExpectationMaximizationQuantifier
-

alias of EMQ

-
- -
-
-class quapy.method.aggregative.HDy(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: AggregativeSoftQuantifier, BinaryAggregativeQuantifier

-

Hellinger Distance y (HDy). -HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of -minimizing the divergence (in terms of the Hellinger Distance) between two distributions of posterior -probabilities returned by the classifier. One of the distributions is generated from the unlabelled examples and -the other is generated from a validation set. This latter distribution is defined as a mixture of the -class-conditional distributions of the posterior probabilities returned for the positive and negative validation -examples, respectively. The parameters of the mixture thus represent the estimates of the class prevalence values.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a binary classifier

  • -
  • val_split – a float in range (0,1) indicating the proportion of data to be used as a stratified held-out -validation distribution, or a quapy.data.base.LabelledCollection (the split itself), or an integer indicating the number of folds (default 5)..

  • -
-
-
-
-
-aggregate(classif_posteriors)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function of HDy.

-
-
Parameters:
-
-
-
-
- -
- -
-
-quapy.method.aggregative.HellingerDistanceY
-

alias of HDy

-
- -
-
-class quapy.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs=None, parallel_backend='multiprocessing')[source]
-

Bases: OneVsAllGeneric, AggregativeQuantifier

-

Allows any binary quantifier to perform quantification on single-label datasets. -The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the -class prevelences sum up to 1. -This variant was used, along with the EMQ quantifier, in -Gao and Sebastiani, 2016.

-
-
Parameters:
-
    -
  • binary_quantifier – a quantifier (binary) that will be employed to work on multiclass model in a -one-vs-all manner

  • -
  • n_jobs – number of parallel workers

  • -
  • parallel_backend – the parallel backend for joblib (default “loky”); this is helpful for some quantifiers -(e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will -is removed and no longer available at predict time.

  • -
-
-
-
-
-aggregate(classif_predictions)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-classify(instances)[source]
-

If the base quantifier is not probabilistic, returns a matrix of shape (n,m,) with n the number of -instances and m the number of classes. The entry (i,j) is a binary value indicating whether instance -i `belongs to class `j. The binary classifications are independent of each other, meaning that an instance -can end up be attributed to 0, 1, or more classes. -If the base quantifier is probabilistic, returns a matrix of shape (n,m,2) with n the number of instances -and m the number of classes. The entry (i,j,1) (resp. (i,j,0)) is a value in [0,1] indicating the -posterior probability that instance i belongs (resp. does not belong) to class j. The posterior -probabilities are independent of each other, meaning that, in general, they do not sum up to one.

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray

-
-
-
- -
- -
-
-class quapy.method.aggregative.PACC(classifier: Optional[BaseEstimator] = None, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip', n_jobs=None)[source]
-

Bases: AggregativeSoftQuantifier

-

Probabilistic Adjusted Classify & Count, -the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k). Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
  • method (str) –

    adjustment method to be used:

    -
      -
    • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), -which tries to invert P(C|Y) matrix.

    • -
    • ’invariant-ratio’: invariant ratio estimator of Vaz et al., -which replaces the last equation with the normalization condition.

    • -
    -

  • -
  • solver (str) –

    the method to use for solving the system of linear equations. Valid options are:

    - -

  • -
  • norm (str) –

    the method to use for normalization.

    -
      -
    • clip, the values are clipped to the range [0,1] and then L1-normalized.

    • -
    • mapsimplex projects vectors onto the probability simplex. This implementation relies on -Mathieu Blondel’s projection_simplex_sort

    • -
    • condsoftmax, applies a softmax normalization only to prevalence vectors that lie outside the simplex

    • -
    -

  • -
  • n_jobs – number of parallel workers

  • -
-
-
-
-
-aggregate(classif_posteriors)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Estimates the misclassification rates

-
-
Parameters:
-
-
-
-
- -
-
-classmethod getPteCondEstim(classes, y, y_)[source]
-
- -
- -
-
-class quapy.method.aggregative.PCC(classifier: Optional[BaseEstimator] = None)[source]
-

Bases: AggregativeSoftQuantifier

-

Probabilistic Classify & Count, -the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier.

-
-
Parameters:
-

classifier – a sklearn’s Estimator that generates a classifier

-
-
-
-
-aggregate(classif_posteriors)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Nothing to do here!

-
-
Parameters:
-
    -
  • classif_predictions – not used

  • -
  • data – not used

  • -
-
-
-
- -
- -
-
-quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount
-

alias of PACC

-
- -
-
-quapy.method.aggregative.ProbabilisticClassifyAndCount
-

alias of PCC

-
- -
-
-quapy.method.aggregative.SLD
-

alias of EMQ

-
- -
-
-class quapy.method.aggregative.SMM(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: AggregativeSoftQuantifier, BinaryAggregativeQuantifier

-

SMM method (SMM). -SMM is a simplification of matching distribution methods where the representation of the examples -is created using the mean instead of a histogram (conceptually equivalent to PACC).

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a binary classifier.

  • -
  • val_split – a float in range (0,1) indicating the proportion of data to be used as a stratified held-out -validation distribution, or a quapy.data.base.LabelledCollection (the split itself), or an integer indicating the number of folds (default 5)..

  • -
-
-
-
-
-aggregate(classif_posteriors)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function of SMM.

-
-
Parameters:
-
-
-
-
- -
- -
-
-quapy.method.aggregative.newELM(svmperf_base=None, loss='01', C=1)[source]
-

Explicit Loss Minimization (ELM) quantifiers. -Quantifiers based on ELM represent a family of methods based on structured output learning; -these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss -measure. This implementation relies on -Joachims’ SVM perf structured output -learning algorithm, which has to be installed and patched for the purpose (see this -script). -This function equivalent to:

-
>>> CC(SVMperf(svmperf_base, loss, C))
-
-
-
-
Parameters:
-
    -
  • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) -this path will be obtained from qp.environ[‘SVMPERF_HOME’]

  • -
  • loss – the loss to optimize (see quapy.classification.svmperf.SVMperf.valid_losses)

  • -
  • C – trade-off between training error and margin (default 0.01)

  • -
-
-
Returns:
-

returns an instance of CC set to work with SVMperf (with loss and C set properly) as the -underlying classifier

-
-
-
- -
-
-quapy.method.aggregative.newSVMAE(svmperf_base=None, C=1)[source]
-

SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by -Moreo and Sebastiani, 2021. -Equivalent to:

-
>>> CC(SVMperf(svmperf_base, loss='mae', C=C))
-
-
-

Quantifiers based on ELM represent a family of methods based on structured output learning; -these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss -measure. This implementation relies on -Joachims’ SVM perf structured output -learning algorithm, which has to be installed and patched for the purpose (see this -script). -This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

-
-
Parameters:
-
    -
  • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) -this path will be obtained from qp.environ[‘SVMPERF_HOME’]

  • -
  • C – trade-off between training error and margin (default 0.01)

  • -
-
-
Returns:
-

returns an instance of CC set to work with SVMperf (with loss and C set properly) as the -underlying classifier

-
-
-
- -
-
-quapy.method.aggregative.newSVMKLD(svmperf_base=None, C=1)[source]
-

SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence -normalized via the logistic function, as proposed by -Esuli et al. 2015. -Equivalent to:

-
>>> CC(SVMperf(svmperf_base, loss='nkld', C=C))
-
-
-

Quantifiers based on ELM represent a family of methods based on structured output learning; -these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss -measure. This implementation relies on -Joachims’ SVM perf structured output -learning algorithm, which has to be installed and patched for the purpose (see this -script). -This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

-
-
Parameters:
-
    -
  • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) -this path will be obtained from qp.environ[‘SVMPERF_HOME’]

  • -
  • C – trade-off between training error and margin (default 0.01)

  • -
-
-
Returns:
-

returns an instance of CC set to work with SVMperf (with loss and C set properly) as the -underlying classifier

-
-
-
- -
-
-quapy.method.aggregative.newSVMQ(svmperf_base=None, C=1)[source]
-

SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Q loss combining a -classification-oriented loss and a quantification-oriented loss, as proposed by -Barranquero et al. 2015. -Equivalent to:

-
>>> CC(SVMperf(svmperf_base, loss='q', C=C))
-
-
-

Quantifiers based on ELM represent a family of methods based on structured output learning; -these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss -measure. This implementation relies on -Joachims’ SVM perf structured output -learning algorithm, which has to be installed and patched for the purpose (see this -script). -This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

-
-
Parameters:
-
    -
  • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) -this path will be obtained from qp.environ[‘SVMPERF_HOME’]

  • -
  • C – trade-off between training error and margin (default 0.01)

  • -
-
-
Returns:
-

returns an instance of CC set to work with SVMperf (with loss and C set properly) as the -underlying classifier

-
-
-
- -
-
-quapy.method.aggregative.newSVMRAE(svmperf_base=None, C=1)[source]
-

SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first -used by Moreo and Sebastiani, 2021. -Equivalent to:

-
>>> CC(SVMperf(svmperf_base, loss='mrae', C=C))
-
-
-

Quantifiers based on ELM represent a family of methods based on structured output learning; -these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss -measure. This implementation relies on -Joachims’ SVM perf structured output -learning algorithm, which has to be installed and patched for the purpose (see this -script). -This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

-
-
Parameters:
-
    -
  • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) -this path will be obtained from qp.environ[‘SVMPERF_HOME’]

  • -
  • C – trade-off between training error and margin (default 0.01)

  • -
-
-
Returns:
-

returns an instance of CC set to work with SVMperf (with loss and C set properly) as the -underlying classifier

-
-
-
- -
-
-class quapy.method._kdey.KDEBase[source]
-

Bases: object

-

Common ancestor for KDE-based methods. Implements some common routines.

-
-
-BANDWIDTH_METHOD = ['scott', 'silverman']
-
- -
-
-get_kde_function(X, bandwidth)[source]
-

Wraps the KDE function from scikit-learn.

-
-
Parameters:
-
    -
  • X – data for which the density function is to be estimated

  • -
  • bandwidth – the bandwidth of the kernel

  • -
-
-
Returns:
-

a scikit-learn’s KernelDensity object

-
-
-
- -
-
-get_mixture_components(X, y, classes, bandwidth)[source]
-

Returns an array containing the mixture components, i.e., the KDE functions for each class.

-
-
Parameters:
-
    -
  • X – the data containing the covariates

  • -
  • y – the class labels

  • -
  • n_classes – integer, the number of classes

  • -
  • bandwidth – float, the bandwidth of the kernel

  • -
-
-
Returns:
-

a list of KernelDensity objects, each fitted with the corresponding class-specific covariates

-
-
-
- -
-
-pdf(kde, X)[source]
-

Wraps the density evalution of scikit-learn’s KDE. Scikit-learn returns log-scores (s), so this -function returns \(e^{s}\)

-
-
Parameters:
-
    -
  • kde – a previously fit KDE function

  • -
  • X – the data for which the density is to be estimated

  • -
-
-
Returns:
-

np.ndarray with the densities

-
-
-
- -
- -
-
-class quapy.method._kdey.KDEyCS(classifier: Optional[BaseEstimator] = None, val_split=5, bandwidth=0.1)[source]
-

Bases: AggregativeSoftQuantifier

-

Kernel Density Estimation model for quantification (KDEy) relying on the Cauchy-Schwarz divergence (CS) as -the divergence measure to be minimized. This method was first proposed in the paper -Kernel Density Estimation for Multiclass Quantification, in which -the authors proposed a Monte Carlo approach for minimizing the divergence.

-

The distribution matching optimization problem comes down to solving:

-

\(\hat{\alpha} = \arg\min_{\alpha\in\Delta^{n-1}} \mathcal{D}(\boldsymbol{p}_{\alpha}||q_{\widetilde{U}})\)

-

where \(p_{\alpha}\) is the mixture of class-specific KDEs with mixture parameter (hence class prevalence) -\(\alpha\) defined by

-

\(\boldsymbol{p}_{\alpha}(\widetilde{x}) = \sum_{i=1}^n \alpha_i p_{\widetilde{L}_i}(\widetilde{x})\)

-

where \(p_X(\boldsymbol{x}) = \frac{1}{|X|} \sum_{x_i\in X} K\left(\frac{x-x_i}{h}\right)\) is the -KDE function that uses the datapoints in X as the kernel centers.

-

In KDEy-CS, the divergence is taken to be the Cauchy-Schwarz divergence given by:

-

\(\mathcal{D}_{\mathrm{CS}}(p||q)=-\log\left(\frac{\int p(x)q(x)dx}{\sqrt{\int p(x)^2dx \int q(x)^2dx}}\right)\)

-

The authors showed that this distribution matching admits a closed-form solution

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a binary classifier.

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
  • bandwidth – float, the bandwidth of the Kernel

  • -
-
-
-
-
-aggregate(posteriors: ndarray)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function.

-
-
Parameters:
-
-
-
-
- -
-
-gram_matrix_mix_sum(X, Y=None)[source]
-
- -
- -
-
-class quapy.method._kdey.KDEyHD(classifier: Optional[BaseEstimator] = None, val_split=5, divergence: str = 'HD', bandwidth=0.1, random_state=None, montecarlo_trials=10000)[source]
-

Bases: AggregativeSoftQuantifier, KDEBase

-

Kernel Density Estimation model for quantification (KDEy) relying on the squared Hellinger Disntace (HD) as -the divergence measure to be minimized. This method was first proposed in the paper -Kernel Density Estimation for Multiclass Quantification, in which -the authors proposed a Monte Carlo approach for minimizing the divergence.

-

The distribution matching optimization problem comes down to solving:

-

\(\hat{\alpha} = \arg\min_{\alpha\in\Delta^{n-1}} \mathcal{D}(\boldsymbol{p}_{\alpha}||q_{\widetilde{U}})\)

-

where \(p_{\alpha}\) is the mixture of class-specific KDEs with mixture parameter (hence class prevalence) -\(\alpha\) defined by

-

\(\boldsymbol{p}_{\alpha}(\widetilde{x}) = \sum_{i=1}^n \alpha_i p_{\widetilde{L}_i}(\widetilde{x})\)

-

where \(p_X(\boldsymbol{x}) = \frac{1}{|X|} \sum_{x_i\in X} K\left(\frac{x-x_i}{h}\right)\) is the -KDE function that uses the datapoints in X as the kernel centers.

-

In KDEy-HD, the divergence is taken to be the squared Hellinger Distance, an f-divergence with corresponding -f-generator function given by:

-

\(f(u)=(\sqrt{u}-1)^2\)

-

The authors proposed a Monte Carlo solution that relies on importance sampling:

-

\(\hat{D}_f(p||q)= \frac{1}{t} \sum_{i=1}^t f\left(\frac{p(x_i)}{q(x_i)}\right) \frac{q(x_i)}{r(x_i)}\)

-

where the datapoints (trials) \(x_1,\ldots,x_t\sim_{\mathrm{iid}} r\) with \(r\) the -uniform distribution.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a binary classifier.

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
  • bandwidth – float, the bandwidth of the Kernel

  • -
  • random_state – a seed to be set before fitting any base quantifier (default None)

  • -
  • montecarlo_trials – number of Monte Carlo trials (default 10000)

  • -
-
-
-
-
-aggregate(posteriors: ndarray)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function.

-
-
Parameters:
-
-
-
-
- -
- -
-
-class quapy.method._kdey.KDEyML(classifier: Optional[BaseEstimator] = None, val_split=5, bandwidth=0.1, random_state=None)[source]
-

Bases: AggregativeSoftQuantifier, KDEBase

-

Kernel Density Estimation model for quantification (KDEy) relying on the Kullback-Leibler divergence (KLD) as -the divergence measure to be minimized. This method was first proposed in the paper -Kernel Density Estimation for Multiclass Quantification, in which -the authors show that minimizing the distribution mathing criterion for KLD is akin to performing -maximum likelihood (ML).

-

The distribution matching optimization problem comes down to solving:

-

\(\hat{\alpha} = \arg\min_{\alpha\in\Delta^{n-1}} \mathcal{D}(\boldsymbol{p}_{\alpha}||q_{\widetilde{U}})\)

-

where \(p_{\alpha}\) is the mixture of class-specific KDEs with mixture parameter (hence class prevalence) -\(\alpha\) defined by

-

\(\boldsymbol{p}_{\alpha}(\widetilde{x}) = \sum_{i=1}^n \alpha_i p_{\widetilde{L}_i}(\widetilde{x})\)

-

where \(p_X(\boldsymbol{x}) = \frac{1}{|X|} \sum_{x_i\in X} K\left(\frac{x-x_i}{h}\right)\) is the -KDE function that uses the datapoints in X as the kernel centers.

-

In KDEy-ML, the divergence is taken to be the Kullback-Leibler Divergence. This is equivalent to solving: -\(\hat{\alpha} = \arg\min_{\alpha\in\Delta^{n-1}} - -\mathbb{E}_{q_{\widetilde{U}}} \left[ \log \boldsymbol{p}_{\alpha}(\widetilde{x}) \right]\)

-

which corresponds to the maximum likelihood estimate.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a binary classifier.

  • -
  • val_split – specifies the data used for generating classifier predictions. This specification -can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to -be extracted from the training set; or as an integer (default 5), indicating that the predictions -are to be generated in a k-fold cross-validation manner (with this integer indicating the value -for k); or as a collection defining the specific set of data to use for validation. -Alternatively, this set can be specified at fit time by indicating the exact set of data -on which the predictions are to be generated.

  • -
  • bandwidth – float, the bandwidth of the Kernel

  • -
  • random_state – a seed to be set before fitting any base quantifier (default None)

  • -
-
-
-
-
-aggregate(posteriors: ndarray)[source]
-

Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood -of the data (i.e., that minimizes the negative log-likelihood)

-
-
Parameters:
-

posteriors – instances in the sample converted into posterior probabilities

-
-
Returns:
-

a vector of class prevalence estimates

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function.

-
-
Parameters:
-
-
-
-
- -
- -
-
-class quapy.method._neural.QuaNetModule(doc_embedding_size, n_classes, stats_size, lstm_hidden_size=64, lstm_nlayers=1, ff_layers=[1024, 512], bidirectional=True, qdrop_p=0.5, order_by=0)[source]
-

Bases: Module

-

Implements the QuaNet forward pass. -See QuaNetTrainer for training QuaNet.

-
-
Parameters:
-
    -
  • doc_embedding_size – integer, the dimensionality of the document embeddings

  • -
  • n_classes – integer, number of classes

  • -
  • stats_size – integer, number of statistics estimated by simple quantification methods

  • -
  • lstm_hidden_size – integer, hidden dimensionality of the LSTM cell

  • -
  • lstm_nlayers – integer, number of LSTM layers

  • -
  • ff_layers – list of integers, dimensions of the densely-connected FF layers on top of the -quantification embedding

  • -
  • bidirectional – boolean, whether or not to use bidirectional LSTM

  • -
  • qdrop_p – float, dropout probability

  • -
  • order_by – integer, class for which the document embeddings are to be sorted

  • -
-
-
-
-
-property device
-
- -
-
-forward(doc_embeddings, doc_posteriors, statistics)[source]
-

Defines the computation performed at every call.

-

Should be overridden by all subclasses.

-
-

Note

-

Although the recipe for forward pass needs to be defined within -this function, one should call the Module instance afterwards -instead of this since the former takes care of running the -registered hooks while the latter silently ignores them.

-
-
- -
-
-training: bool
-
- -
- -
-
-class quapy.method._neural.QuaNetTrainer(classifier, sample_size=None, n_epochs=100, tr_iter_per_poch=500, va_iter_per_poch=100, lr=0.001, lstm_hidden_size=64, lstm_nlayers=1, ff_layers=[1024, 512], bidirectional=True, qdrop_p=0.5, patience=10, checkpointdir='../checkpoint', checkpointname=None, device='cuda')[source]
-

Bases: BaseQuantifier

-

Implementation of QuaNet, a neural network for -quantification. This implementation uses PyTorch and can take advantage of GPU -for speeding-up the training phase.

-

Example:

-
>>> import quapy as qp
->>> from quapy.method_name.meta import QuaNet
->>> from quapy.classification.neural import NeuralClassifierTrainer, CNNnet
->>>
->>> # use samples of 100 elements
->>> qp.environ['SAMPLE_SIZE'] = 100
->>>
->>> # load the kindle dataset as text, and convert words to numerical indexes
->>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
->>> qp.train.preprocessing.index(dataset, min_df=5, inplace=True)
->>>
->>> # the text classifier is a CNN trained by NeuralClassifierTrainer
->>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
->>> classifier = NeuralClassifierTrainer(cnn, device='cuda')
->>>
->>> # train QuaNet (QuaNet is an alias to QuaNetTrainer)
->>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda')
->>> model.fit(dataset.training)
->>> estim_prevalence = model.quantify(dataset.test.instances)
-
-
-
-
Parameters:
-
    -
  • classifier – an object implementing fit (i.e., that can be trained on labelled data), -predict_proba (i.e., that can generate posterior probabilities of unlabelled examples) and -transform (i.e., that can generate embedded representations of the unlabelled instances).

  • -
  • sample_size – integer, the sample size; default is None, meaning that the sample size should be -taken from qp.environ[“SAMPLE_SIZE”]

  • -
  • n_epochs – integer, maximum number of training epochs

  • -
  • tr_iter_per_poch – integer, number of training iterations before considering an epoch complete

  • -
  • va_iter_per_poch – integer, number of validation iterations to perform after each epoch

  • -
  • lr – float, the learning rate

  • -
  • lstm_hidden_size – integer, hidden dimensionality of the LSTM cells

  • -
  • lstm_nlayers – integer, number of LSTM layers

  • -
  • ff_layers – list of integers, dimensions of the densely-connected FF layers on top of the -quantification embedding

  • -
  • bidirectional – boolean, indicates whether the LSTM is bidirectional or not

  • -
  • qdrop_p – float, dropout probability

  • -
  • patience – integer, number of epochs showing no improvement in the validation set before stopping the -training phase (early stopping)

  • -
  • checkpointdir – string, a path where to store models’ checkpoints

  • -
  • checkpointname – string (optional), the name of the model’s checkpoint

  • -
  • device – string, indicate “cpu” or “cuda”

  • -
-
-
-
-
-property classes_
-
- -
-
-clean_checkpoint()[source]
-

Removes the checkpoint

-
- -
-
-clean_checkpoint_dir()[source]
-

Removes anything contained in the checkpoint directory

-
- -
-
-fit(data: LabelledCollection, fit_classifier=True)[source]
-

Trains QuaNet.

-
-
Parameters:
-
    -
  • data – the training data on which to train QuaNet. If fit_classifier=True, the data will be split in -40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If -fit_classifier=False, the data will be split in 66/34 for training QuaNet and validating it, respectively.

  • -
  • fit_classifier – if True, trains the classifier on a split containing 40% of the data

  • -
-
-
Returns:
-

self

-
-
-
- -
-
-get_params(deep=True)[source]
-

Get parameters for this estimator.

-
-
Parameters:
-

deep (bool, default=True) – If True, will return the parameters for this estimator and -contained subobjects that are estimators.

-
-
Returns:
-

params – Parameter names mapped to their values.

-
-
Return type:
-

dict

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-set_params(**parameters)[source]
-

Set the parameters of this estimator.

-

The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

-
-
Parameters:
-

**params (dict) – Estimator parameters.

-
-
Returns:
-

self – Estimator instance.

-
-
Return type:
-

estimator instance

-
-
-
- -
- -
-
-quapy.method._neural.mae_loss(output, target)[source]
-

Torch-like wrapper for the Mean Absolute Error

-
-
Parameters:
-
    -
  • output – predictions

  • -
  • target – ground truth values

  • -
-
-
Returns:
-

mean absolute error loss

-
-
-
- -
-
-class quapy.method._threshold_optim.MAX(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: ThresholdOptimization

-

Threshold Optimization variant for ACC as proposed by -Forman 2006 and -Forman 2008 that looks -for the threshold that maximizes tpr-fpr. -The goal is to bring improved stability to the denominator of the adjustment.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set in which the -misclassification rates are to be estimated. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the misclassification rates should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
-
-
-
-
-condition(tpr, fpr) float[source]
-

Implements the criterion according to which the threshold should be selected. -This function should return the (float) score to be minimized.

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

float, a score for the given tpr and fpr

-
-
-
- -
- -
-
-class quapy.method._threshold_optim.MS(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: ThresholdOptimization

-

Median Sweep. Threshold Optimization variant for ACC as proposed by -Forman 2006 and -Forman 2008 that generates -class prevalence estimates for all decision thresholds and returns the median of them all. -The goal is to bring improved stability to the denominator of the adjustment.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set in which the -misclassification rates are to be estimated. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the misclassification rates should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
-
-
-
-
-aggregate(classif_predictions: ndarray)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function.

-
-
Parameters:
-
-
-
-
- -
-
-condition(tpr, fpr) float[source]
-

Implements the criterion according to which the threshold should be selected. -This function should return the (float) score to be minimized.

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

float, a score for the given tpr and fpr

-
-
-
- -
- -
-
-class quapy.method._threshold_optim.MS2(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: MS

-

Median Sweep 2. Threshold Optimization variant for ACC as proposed by -Forman 2006 and -Forman 2008 that generates -class prevalence estimates for all decision thresholds and returns the median of for cases in -which tpr-fpr>0.25 -The goal is to bring improved stability to the denominator of the adjustment.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set in which the -misclassification rates are to be estimated. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the misclassification rates should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
-
-
-
-
-discard(tpr, fpr) bool[source]
-

Indicates whether a combination of tpr and fpr should be discarded

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

true if the combination is to be discarded, false otherwise

-
-
-
- -
- -
-
-class quapy.method._threshold_optim.T50(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: ThresholdOptimization

-

Threshold Optimization variant for ACC as proposed by -Forman 2006 and -Forman 2008 that looks -for the threshold that makes tpr closest to 0.5. -The goal is to bring improved stability to the denominator of the adjustment.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set in which the -misclassification rates are to be estimated. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the misclassification rates should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
-
-
-
-
-condition(tpr, fpr) float[source]
-

Implements the criterion according to which the threshold should be selected. -This function should return the (float) score to be minimized.

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

float, a score for the given tpr and fpr

-
-
-
- -
- -
-
-class quapy.method._threshold_optim.ThresholdOptimization(classifier: Optional[BaseEstimator] = None, val_split=None, n_jobs=None)[source]
-

Bases: BinaryAggregativeQuantifier

-

Abstract class of Threshold Optimization variants for ACC as proposed by -Forman 2006 and -Forman 2008. -The goal is to bring improved stability to the denominator of the adjustment. -The different variants are based on different heuristics for choosing a decision threshold -that would allow for more true positives and many more false positives, on the grounds this -would deliver larger denominators.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set in which the -misclassification rates are to be estimated. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the misclassification rates should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
-
-
-
-
-aggregate(classif_predictions: ndarray)[source]
-

Implements the aggregation of label predictions.

-
-
Parameters:
-

classif_predictionsnp.ndarray of label predictions

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-aggregate_with_threshold(classif_predictions, tprs, fprs, thresholds)[source]
-
- -
-
-aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
-

Trains the aggregation function.

-
-
Parameters:
-
-
-
-
- -
-
-abstract condition(tpr, fpr) float[source]
-

Implements the criterion according to which the threshold should be selected. -This function should return the (float) score to be minimized.

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

float, a score for the given tpr and fpr

-
-
-
- -
-
-discard(tpr, fpr) bool[source]
-

Indicates whether a combination of tpr and fpr should be discarded

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

true if the combination is to be discarded, false otherwise

-
-
-
- -
- -
-
-class quapy.method._threshold_optim.X(classifier: Optional[BaseEstimator] = None, val_split=5)[source]
-

Bases: ThresholdOptimization

-

Threshold Optimization variant for ACC as proposed by -Forman 2006 and -Forman 2008 that looks -for the threshold that yields tpr=1-fpr. -The goal is to bring improved stability to the denominator of the adjustment.

-
-
Parameters:
-
    -
  • classifier – a sklearn’s Estimator that generates a classifier

  • -
  • val_split – indicates the proportion of data to be used as a stratified held-out validation set in which the -misclassification rates are to be estimated. -This parameter can be indicated as a real value (between 0 and 1), representing a proportion of -validation data, or as an integer, indicating that the misclassification rates should be estimated via -k-fold cross validation (this integer stands for the number of folds k, defaults 5), or as a -quapy.data.base.LabelledCollection (the split itself).

  • -
-
-
-
-
-condition(tpr, fpr) float[source]
-

Implements the criterion according to which the threshold should be selected. -This function should return the (float) score to be minimized.

-
-
Parameters:
-
    -
  • tpr – float, true positive rate

  • -
  • fpr – float, false positive rate

  • -
-
-
Returns:
-

float, a score for the given tpr and fpr

-
-
-
- -
- -
-
-

quapy.method.base module

-
-
-class quapy.method.base.BaseQuantifier[source]
-

Bases: BaseEstimator

-

Abstract Quantifier. A quantifier is defined as an object of a class that implements the method fit() on -quapy.data.base.LabelledCollection, the method quantify(), and the set_params() and -get_params() for model selection (see quapy.model_selection.GridSearchQ())

-
-
-abstract fit(data: LabelledCollection)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-abstract quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
- -
-
-class quapy.method.base.BinaryQuantifier[source]
-

Bases: BaseQuantifier

-

Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes -(typically, to be interpreted as one class and its complement).

-
- -
-
-class quapy.method.base.OneVsAll[source]
-

Bases: object

-
- -
-
-class quapy.method.base.OneVsAllGeneric(binary_quantifier: BaseQuantifier, n_jobs=None)[source]
-

Bases: OneVsAll, BaseQuantifier

-

Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary -quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.

-
-
-property classes_
-
- -
-
-fit(data: LabelledCollection, fit_classifier=True)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
- -
-
-quapy.method.base.newOneVsAll(binary_quantifier: BaseQuantifier, n_jobs=None)[source]
-
- -
-
-

quapy.method.meta module

-
-
-quapy.method.meta.EACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
-

Implements an ensemble of quapy.method.aggregative.ACC quantifiers, as used by -Pérez-Gállego et al., 2019.

-

Equivalent to:

-
>>> ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs)
-
-
-

See ensembleFactory() for further details.

-
-
Parameters:
-
    -
  • classifier – sklearn’s Estimator that generates a classifier

  • -
  • param_grid – a dictionary with the grid of parameters to optimize for

  • -
  • optim – a valid quantification or classification error, or a string name of it

  • -
  • param_model_sel – a dictionary containing any keyworded argument to pass to -quapy.model_selection.GridSearchQ

  • -
  • kwargs – kwargs for the class Ensemble

  • -
-
-
Returns:
-

an instance of Ensemble

-
-
-
- -
-
-quapy.method.meta.ECC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
-

Implements an ensemble of quapy.method.aggregative.CC quantifiers, as used by -Pérez-Gállego et al., 2019.

-

Equivalent to:

-
>>> ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs)
-
-
-

See ensembleFactory() for further details.

-
-
Parameters:
-
    -
  • classifier – sklearn’s Estimator that generates a classifier

  • -
  • param_grid – a dictionary with the grid of parameters to optimize for

  • -
  • optim – a valid quantification or classification error, or a string name of it

  • -
  • param_model_sel – a dictionary containing any keyworded argument to pass to -quapy.model_selection.GridSearchQ

  • -
  • kwargs – kwargs for the class Ensemble

  • -
-
-
Returns:
-

an instance of Ensemble

-
-
-
- -
-
-quapy.method.meta.EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
-

Implements an ensemble of quapy.method.aggregative.EMQ quantifiers.

-

Equivalent to:

-
>>> ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)
-
-
-

See ensembleFactory() for further details.

-
-
Parameters:
-
    -
  • classifier – sklearn’s Estimator that generates a classifier

  • -
  • param_grid – a dictionary with the grid of parameters to optimize for

  • -
  • optim – a valid quantification or classification error, or a string name of it

  • -
  • param_model_sel – a dictionary containing any keyworded argument to pass to -quapy.model_selection.GridSearchQ

  • -
  • kwargs – kwargs for the class Ensemble

  • -
-
-
Returns:
-

an instance of Ensemble

-
-
-
- -
-
-quapy.method.meta.EHDy(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
-

Implements an ensemble of quapy.method.aggregative.HDy quantifiers, as used by -Pérez-Gállego et al., 2019.

-

Equivalent to:

-
>>> ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs)
-
-
-

See ensembleFactory() for further details.

-
-
Parameters:
-
    -
  • classifier – sklearn’s Estimator that generates a classifier

  • -
  • param_grid – a dictionary with the grid of parameters to optimize for

  • -
  • optim – a valid quantification or classification error, or a string name of it

  • -
  • param_model_sel – a dictionary containing any keyworded argument to pass to -quapy.model_selection.GridSearchQ

  • -
  • kwargs – kwargs for the class Ensemble

  • -
-
-
Returns:
-

an instance of Ensemble

-
-
-
- -
-
-quapy.method.meta.EPACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
-

Implements an ensemble of quapy.method.aggregative.PACC quantifiers.

-

Equivalent to:

-
>>> ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs)
-
-
-

See ensembleFactory() for further details.

-
-
Parameters:
-
    -
  • classifier – sklearn’s Estimator that generates a classifier

  • -
  • param_grid – a dictionary with the grid of parameters to optimize for

  • -
  • optim – a valid quantification or classification error, or a string name of it

  • -
  • param_model_sel – a dictionary containing any keyworded argument to pass to -quapy.model_selection.GridSearchQ

  • -
  • kwargs – kwargs for the class Ensemble

  • -
-
-
Returns:
-

an instance of Ensemble

-
-
-
- -
-
-class quapy.method.meta.Ensemble(quantifier: BaseQuantifier, size=50, red_size=25, min_pos=5, policy='ave', max_sample_size=None, val_split: Optional[Union[LabelledCollection, float]] = None, n_jobs=None, verbose=False)[source]
-

Bases: BaseQuantifier

-
-
-VALID_POLICIES = {'ave', 'ds', 'mae', 'mkld', 'mnae', 'mnkld', 'mnrae', 'mrae', 'mse', 'ptr'}
-

Implementation of the Ensemble methods for quantification described by -Pérez-Gállego et al., 2017 -and -Pérez-Gállego et al., 2019. -The policies implemented include:

-
    -
  • Average (policy=’ave’): computes class prevalence estimates as the average of the estimates -returned by the base quantifiers.

  • -
  • Training Prevalence (policy=’ptr’): applies a dynamic selection to the ensemble’s members by retaining only -those members such that the class prevalence values in the samples they use as training set are closest to -preliminary class prevalence estimates computed as the average of the estimates of all the members. The final -estimate is recomputed by considering only the selected members.

  • -
  • Distribution Similarity (policy=’ds’): performs a dynamic selection of base members by retaining -the members trained on samples whose distribution of posterior probabilities is closest, in terms of the -Hellinger Distance, to the distribution of posterior probabilities in the test sample

  • -
  • Accuracy (policy=’<valid error name>’): performs a static selection of the ensemble members by -retaining those that minimize a quantification error measure, which is passed as an argument.

  • -
-

Example:

-
>>> model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1)
-
-
-
-
Parameters:
-
    -
  • quantifier – base quantification member of the ensemble

  • -
  • size – number of members

  • -
  • red_size – number of members to retain after selection (depending on the policy)

  • -
  • min_pos – minimum number of positive instances to consider a sample as valid

  • -
  • policy – the selection policy; available policies include: ave (default), ptr, ds, and accuracy -(which is instantiated via a valid error name, e.g., mae)

  • -
  • max_sample_size – maximum number of instances to consider in the samples (set to None -to indicate no limit, default)

  • -
  • val_split – a float in range (0,1) indicating the proportion of data to be used as a stratified held-out -validation split, or a quapy.data.base.LabelledCollection (the split itself).

  • -
  • n_jobs – number of parallel workers (default 1)

  • -
  • verbose – set to True (default is False) to get some information in standard output

  • -
-
-
-
- -
-
-property aggregative
-

Indicates that the quantifier is not aggregative.

-
-
Returns:
-

False

-
-
-
- -
-
-fit(data: LabelledCollection, val_split: Optional[Union[LabelledCollection, float]] = None)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-get_params(deep=True)[source]
-

This function should not be used within quapy.model_selection.GridSearchQ (is here for compatibility -with the abstract class). -Instead, use Ensemble(GridSearchQ(q),…), with q a Quantifier (recommended), or -Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier l optimized for -classification (not recommended).

-
-
Parameters:
-

deep – for compatibility with scikit-learn

-
-
Returns:
-

raises an Exception

-
-
-
- -
-
-property probabilistic
-

Indicates that the quantifier is not probabilistic.

-
-
Returns:
-

False

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-set_params(**parameters)[source]
-

This function should not be used within quapy.model_selection.GridSearchQ (is here for compatibility -with the abstract class). -Instead, use Ensemble(GridSearchQ(q),…), with q a Quantifier (recommended), or -Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier l optimized for -classification (not recommended).

-
-
Parameters:
-

parameters – dictionary

-
-
Returns:
-

raises an Exception

-
-
-
- -
- -
-
-class quapy.method.meta.MedianEstimator(base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]
-

Bases: BinaryQuantifier

-

This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the -estimation returned by differently (hyper)parameterized base quantifiers. -The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions, -i.e., in cases of binary quantification.

-
-
Parameters:
-
    -
  • base_quantifier – the base, binary quantifier

  • -
  • random_state – a seed to be set before fitting any base quantifier (default None)

  • -
  • param_grid – the grid or parameters towards which the median will be computed

  • -
  • n_jobs – number of parllel workes

  • -
-
-
-
-
-fit(training: LabelledCollection)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-get_params(deep=True)[source]
-

Get parameters for this estimator.

-
-
Parameters:
-

deep (bool, default=True) – If True, will return the parameters for this estimator and -contained subobjects that are estimators.

-
-
Returns:
-

params – Parameter names mapped to their values.

-
-
Return type:
-

dict

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-set_params(**params)[source]
-

Set the parameters of this estimator.

-

The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

-
-
Parameters:
-

**params (dict) – Estimator parameters.

-
-
Returns:
-

self – Estimator instance.

-
-
Return type:
-

estimator instance

-
-
-
- -
- -
-
-class quapy.method.meta.MedianEstimator2(base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]
-

Bases: BinaryQuantifier

-

This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the -estimation returned by differently (hyper)parameterized base quantifiers. -The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions, -i.e., in cases of binary quantification.

-
-
Parameters:
-
    -
  • base_quantifier – the base, binary quantifier

  • -
  • random_state – a seed to be set before fitting any base quantifier (default None)

  • -
  • param_grid – the grid or parameters towards which the median will be computed

  • -
  • n_jobs – number of parllel workes

  • -
-
-
-
-
-fit(training: LabelledCollection)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-get_params(deep=True)[source]
-

Get parameters for this estimator.

-
-
Parameters:
-

deep (bool, default=True) – If True, will return the parameters for this estimator and -contained subobjects that are estimators.

-
-
Returns:
-

params – Parameter names mapped to their values.

-
-
Return type:
-

dict

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-set_params(**params)[source]
-

Set the parameters of this estimator.

-

The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

-
-
Parameters:
-

**params (dict) – Estimator parameters.

-
-
Returns:
-

self – Estimator instance.

-
-
Return type:
-

estimator instance

-
-
-
- -
- -
-
-quapy.method.meta.ensembleFactory(classifier, base_quantifier_class, param_grid=None, optim=None, param_model_sel: Optional[dict] = None, **kwargs)[source]
-

Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model -selection for quantification) for a given evaluation metric using quapy.model_selection.GridSearchQ. -If the evaluation metric is classification-oriented -(instead of quantification-oriented), then the optimization will be carried out via sklearn’s -GridSearchCV.

-

Example to instantiate an Ensemble based on quapy.method.aggregative.PACC -in which the base members are optimized for quapy.error.mae() via -quapy.model_selection.GridSearchQ. The ensemble follows the policy Accuracy based -on quapy.error.mae() (the same measure being optimized), -meaning that a static selection of members of the ensemble is made based on their performance -in terms of this error.

-
>>> param_grid = {
->>>     'C': np.logspace(-3,3,7),
->>>     'class_weight': ['balanced', None]
->>> }
->>> param_mod_sel = {
->>>     'sample_size': 500,
->>>     'protocol': 'app'
->>> }
->>> common={
->>>     'max_sample_size': 1000,
->>>     'n_jobs': -1,
->>>     'param_grid': param_grid,
->>>     'param_mod_sel': param_mod_sel,
->>> }
->>>
->>> ensembleFactory(LogisticRegression(), PACC, optim='mae', policy='mae', **common)
-
-
-
-
Parameters:
-
    -
  • classifier – sklearn’s Estimator that generates a classifier

  • -
  • base_quantifier_class – a class of quantifiers

  • -
  • param_grid – a dictionary with the grid of parameters to optimize for

  • -
  • optim – a valid quantification or classification error, or a string name of it

  • -
  • param_model_sel – a dictionary containing any keyworded argument to pass to -quapy.model_selection.GridSearchQ

  • -
  • kwargs – kwargs for the class Ensemble

  • -
-
-
Returns:
-

an instance of Ensemble

-
-
-
- -
-
-quapy.method.meta.get_probability_distribution(posterior_probabilities, bins=8)[source]
-

Gets a histogram out of the posterior probabilities (only for the binary case).

-
-
Parameters:
-
    -
  • posterior_probabilities – array-like of shape (n_instances, 2,)

  • -
  • bins – integer

  • -
-
-
Returns:
-

np.ndarray with the relative frequencies for each bin (for the positive class only)

-
-
-
- -
-
-

quapy.method.non_aggregative module

-
-
-class quapy.method.non_aggregative.DMx(nbins=8, divergence: Union[str, Callable] = 'HD', cdf=False, search='optim_minimize', n_jobs=None)[source]
-

Bases: BaseQuantifier

-

Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of covariates. -This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters.

-
-
Parameters:
-
    -
  • nbins – number of bins used to discretize the distributions (default 8)

  • -
  • divergence – a string representing a divergence measure (currently, “HD” and “topsoe” are implemented) -or a callable function taking two ndarrays of the same dimension as input (default “HD”, meaning Hellinger -Distance)

  • -
  • cdf – whether to use CDF instead of PDF (default False)

  • -
  • n_jobs – number of parallel workers (default None)

  • -
-
-
-
-
-classmethod HDx(n_jobs=None)[source]
-

Hellinger Distance x (HDx). -HDx is a method for training binary quantifiers, that models quantification as the problem of -minimizing the average divergence (in terms of the Hellinger Distance) across the feature-specific normalized -histograms of two representations, one for the unlabelled examples, and another generated from the training -examples as a mixture model of the class-specific representations. The parameters of the mixture thus represent -the estimates of the class prevalence values.

-

The method computes all matchings for nbins in [10, 20, …, 110] and reports the mean of the median. -The best prevalence is searched via linear search, from 0 to 1 stepping by 0.01.

-
-
Parameters:
-

n_jobs – number of parallel workers

-
-
Returns:
-

an instance of this class setup to mimick the performance of the HDx as originally proposed by -González-Castro, Alaiz-Rodríguez, Alegre (2013)

-
-
-
- -
-
-fit(data: LabelledCollection)[source]
-

Generates the validation distributions out of the training data (covariates). -The validation distributions have shape (n, nfeats, nbins), with n the number of classes, nfeats -the number of features, and nbins the number of bins. -In particular, let V be the validation distributions; then di=V[i] are the distributions obtained from -training data labelled with class i; while dij = di[j] is the discrete distribution for feature j in -training data labelled with class i, and dij[k] is the fraction of instances with a value in the k-th bin.

-
-
Parameters:
-

data – the training set

-
-
-
- -
-
-quantify(instances)[source]
-

Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution -(the mixture) that best matches the test distribution, in terms of the divergence measure of choice. -The matching is computed as the average dissimilarity (in terms of the dissimilarity measure of choice) -between all feature-specific discrete distributions.

-
-
Parameters:
-

instances – instances in the sample

-
-
Returns:
-

a vector of class prevalence estimates

-
-
-
- -
- -
-
-quapy.method.non_aggregative.DistributionMatchingX
-

alias of DMx

-
- -
-
-class quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation[source]
-

Bases: BaseQuantifier

-

The Maximum Likelihood Prevalence Estimation (MLPE) method is a lazy method that assumes there is no prior -probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds). -The estimation of class prevalence values for any test sample is always (i.e., irrespective of the test sample -itself) the class prevalence seen during training. This method is considered to be a lower-bound quantifier that -any quantification method should beat.

-
-
-fit(data: LabelledCollection)[source]
-

Computes the training prevalence and stores it.

-
-
Parameters:
-

data – the training sample

-
-
Returns:
-

self

-
-
-
- -
-
-quantify(instances)[source]
-

Ignores the input instances and returns, as the class prevalence estimantes, the training prevalence.

-
-
Parameters:
-

instances – array-like (ignored)

-
-
Returns:
-

the class prevalence seen during training

-
-
-
- -
- -
-
-class quapy.method.non_aggregative.ReadMe(bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs)[source]
-

Bases: BaseQuantifier

-
-
-fit(data: LabelledCollection)[source]
-

Trains a quantifier.

-
-
Parameters:
-

data – a quapy.data.base.LabelledCollection consisting of the training data

-
-
Returns:
-

self

-
-
-
- -
-
-quantify(instances)[source]
-

Generate class prevalence estimates for the sample’s instances

-
-
Parameters:
-

instances – array-like

-
-
Returns:
-

np.ndarray of shape (n_classes,) with class prevalence estimates.

-
-
-
- -
-
-std_constrained_linear_ls(X, class_cond_X: dict)[source]
-
- -
- -
-
-

quapy.method.composable module

-

This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold.

-
-
-class quapy.method.composable.BlobelLoss[source]
-

Bases: FunctionLoss

-

The loss function of RUN (Blobel, 1985).

-

This loss function models a likelihood function under the assumption of independent Poisson-distributed elements of q with Poisson rates M*p.

-
- -
-
-class quapy.method.composable.CVClassifier(estimator, n_estimators=5, random_state=None)[source]
-

Bases: BaseEstimator, ClassifierMixin

-

An ensemble of classifiers that are trained from cross-validation folds.

-

All objects of this type have a fixed attribute oob_score = True and, when trained, a fitted attribute self.oob_decision_function_, just like scikit-learn bagging classifiers.

-
-
Parameters:
-
    -
  • estimator – A classifier that implements the API of scikit-learn.

  • -
  • n_estimators (optional) – The number of stratified cross-validation folds. Defaults to 5.

  • -
  • random_state (optional) – The random state for stratification. Defaults to None.

  • -
-
-
-

Examples

-

Here, we create an instance of ACC that trains a logistic regression classifier with 10 cross-validation folds.

-
>>> ACC(CVClassifier(LogisticRegression(), 10))
-
-
-
-
-fit(X, y)[source]
-
- -
-
-predict(X)[source]
-
- -
-
-predict_proba(X)[source]
-
- -
- -
-
-class quapy.method.composable.ClassTransformer(classifier, is_probabilistic=False, fit_classifier=True)[source]
-

Bases: AbstractTransformer

-

A classification-based feature transformation.

-

This transformation can either be probabilistic (using the posterior predictions of a classifier) or crisp (using the class predictions of a classifier). It is used in ACC, PACC, CC, PCC, and SLD.

-
-
Parameters:
-
    -
  • classifier – A classifier that implements the API of scikit-learn.

  • -
  • is_probabilistic (optional) – Whether probabilistic or crisp predictions of the classifier are used to transform the data. Defaults to False.

  • -
  • fit_classifier (optional) – Whether to fit the classifier when this quantifier is fitted. Defaults to True.

  • -
-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.CombinedLoss(*losses, weights=None)[source]
-

Bases: AbstractLoss

-

The weighted sum of multiple losses.

-
-
Parameters:
-
    -
  • *losses – An arbitrary number of losses to be added together.

  • -
  • weights (optional) – An array of weights which the losses are scaled.

  • -
-
-
-
- -
-
-quapy.method.composable.ComposableQuantifier(loss, transformer, **kwargs)[source]
-

A generic quantification / unfolding method that solves a linear system of equations.

-

This class represents any quantifier that can be described in terms of a loss function, a feature transformation, and a regularization term. In this implementation, the loss is minimized through unconstrained second-order minimization. Valid probability estimates are ensured through a soft-max trick by Bunse (2022).

-
-
Parameters:
-
    -
  • loss – An instance of a loss class from quapy.methods.composable.

  • -
  • transformer – An instance of a transformer class from quapy.methods.composable.

  • -
  • solver (optional) – The method argument in scipy.optimize.minimize. Defaults to “trust-ncg”.

  • -
  • solver_options (optional) – The options argument in scipy.optimize.minimize. Defaults to {“gtol”: 1e-8, “maxiter”: 1000}.

  • -
  • seed (optional) – A random number generator seed from which a numpy RandomState is created. Defaults to None.

  • -
-
-
-

Examples

-

Here, we create the ordinal variant of ACC (Bunse et al., 2023). This variant consists of the original feature transformation of ACC and of the original loss of ACC, the latter of which is regularized towards smooth solutions.

-
>>> from qunfold.method.composable import (
->>>     ComposableQuantifier,
->>>     TikhonovRegularized,
->>>     LeastSquaresLoss,
->>>     ClassTransformer,
->>> )
->>> from sklearn.ensemble import RandomForestClassifier
->>> o_acc = ComposableQuantifier(
->>>     TikhonovRegularized(LeastSquaresLoss(), 0.01),
->>>     ClassTransformer(RandomForestClassifier(oob_score=True))
->>> )
-
-
-

Here, we perform hyper-parameter optimization with the ordinal ACC.

-
>>> quapy.model_selection.GridSearchQ(
->>>     model = o_acc,
->>>     param_grid = { # try both splitting criteria
->>>         "transformer__classifier__estimator__criterion": ["gini", "entropy"],
->>>     },
->>>     # ...
->>> )
-
-
-

To use a classifier that does not provide the oob_score argument, such as logistic regression, you have to configure a cross validation of this classifier. Here, we employ 10 cross validation folds. 5 folds are the default.

-
>>> from qunfold.method.composable import CVClassifier
->>> from sklearn.linear_model import LogisticRegression
->>> acc_lr = ComposableQuantifier(
->>>     LeastSquaresLoss(),
->>>     ClassTransformer(CVClassifier(LogisticRegression(), 10))
->>> )
-
-
-
- -
-
-class quapy.method.composable.DistanceTransformer(metric='euclidean', preprocessor=None)[source]
-

Bases: AbstractTransformer

-

A distance-based feature transformation, as it is used in EDx and EDy.

-
-
Parameters:
-
    -
  • metric (optional) – The metric with which the distance between data items is measured. Can take any value that is accepted by scipy.spatial.distance.cdist. Defaults to “euclidean”.

  • -
  • preprocessor (optional) – Another AbstractTransformer that is called before this transformer. Defaults to None.

  • -
-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.EnergyKernelTransformer(preprocessor=None)[source]
-

Bases: AbstractTransformer

-

A kernel-based feature transformation, as it is used in KMM, that uses the energy kernel:

-
-

k(x_1, x_2) = ||x_1|| + ||x_2|| - ||x_1 - x_2||

-
-
-

Note

-

The methods of this transformer do not support setting average=False.

-
-
-
Parameters:
-

preprocessor (optional) – Another AbstractTransformer that is called before this transformer. Defaults to None.

-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.EnergyLoss[source]
-

Bases: FunctionLoss

-

The loss function of EDx (Kawakubo et al., 2016) and EDy (Castaño et al., 2022).

-

This loss function represents the Energy Distance between two samples.

-
- -
-
-class quapy.method.composable.GaussianKernelTransformer(sigma=1, preprocessor=None)[source]
-

Bases: AbstractTransformer

-

A kernel-based feature transformation, as it is used in KMM, that uses the gaussian kernel:

-
-

k(x, y) = exp(-||x - y||^2 / (2σ^2))

-
-
-
Parameters:
-
    -
  • sigma (optional) – A smoothing parameter of the kernel function. Defaults to 1.

  • -
  • preprocessor (optional) – Another AbstractTransformer that is called before this transformer. Defaults to None.

  • -
-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.GaussianRFFKernelTransformer(sigma=1, n_rff=1000, preprocessor=None, seed=None)[source]
-

Bases: AbstractTransformer

-

An efficient approximation of the GaussianKernelTransformer, as it is used in KMM, using random Fourier features.

-
-
Parameters:
-
    -
  • sigma (optional) – A smoothing parameter of the kernel function. Defaults to 1.

  • -
  • n_rff (optional) – The number of random Fourier features. Defaults to 1000.

  • -
  • preprocessor (optional) – Another AbstractTransformer that is called before this transformer. Defaults to None.

  • -
  • seed (optional) – Controls the randomness of the random Fourier features. Defaults to None.

  • -
-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.HellingerSurrogateLoss[source]
-

Bases: FunctionLoss

-

The loss function of HDx and HDy (González-Castro et al., 2013).

-

This loss function computes the average of the squared Hellinger distances between feature-wise (or class-wise) histograms. Note that the original HDx and HDy by González-Castro et al (2013) do not use the squared but the regular Hellinger distance. Their approach is problematic because the regular distance is not always twice differentiable and, hence, complicates numerical optimizations.

-
- -
-
-class quapy.method.composable.HistogramTransformer(n_bins, preprocessor=None, unit_scale=True)[source]
-

Bases: AbstractTransformer

-

A histogram-based feature transformation, as it is used in HDx and HDy.

-
-
Parameters:
-
    -
  • n_bins – The number of bins in each feature.

  • -
  • preprocessor (optional) – Another AbstractTransformer that is called before this transformer. Defaults to None.

  • -
  • unit_scale (optional) – Whether or not to scale each output to a sum of one. A value of False indicates that the sum of each output is the number of features. Defaults to True.

  • -
-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.KernelTransformer(kernel)[source]
-

Bases: AbstractTransformer

-

A general kernel-based feature transformation, as it is used in KMM. If you intend to use a Gaussian kernel or energy kernel, prefer their dedicated and more efficient implementations over this class.

-
-

Note

-

The methods of this transformer do not support setting average=False.

-
-
-
Parameters:
-

kernel – A callable that will be used as the kernel. Must follow the signature (X[y==i], X[y==j]) -> scalar.

-
-
-
-
-fit_transform(X, y, average=True, n_classes=None)[source]
-

This abstract method has to fit the transformer and to return the transformation of the input data.

-
-

Note

-

Implementations of this abstract method should check the sanity of labels by calling _check_y(y, n_classes) and they must set the property self.p_trn = class_prevalences(y, n_classes).

-
-
-
Parameters:
-
    -
  • X – The feature matrix to which this transformer will be fitted.

  • -
  • y – The labels to which this transformer will be fitted.

  • -
  • average (optional) – Whether to return a transfer matrix M or a transformation (f(X), y). Defaults to True.

  • -
  • n_classes (optional) – The number of expected classes. Defaults to None.

  • -
-
-
Returns:
-

A transfer matrix M if average==True or a transformation (f(X), y) if average==False.

-
-
-
- -
-
-transform(X, average=True)[source]
-

This abstract method has to transform the data X.

-
-
Parameters:
-
    -
  • X – The feature matrix that will be transformed.

  • -
  • average (optional) – Whether to return a vector q or a transformation f(X). Defaults to True.

  • -
-
-
Returns:
-

A vector q = f(X).mean(axis=0) if average==True or a transformation f(X) if average==False.

-
-
-
- -
- -
-
-class quapy.method.composable.LaplacianKernelTransformer(sigma=1)[source]
-

Bases: KernelTransformer

-

A kernel-based feature transformation, as it is used in KMM, that uses the laplacian kernel.

-
-
Parameters:
-

sigma (optional) – A smoothing parameter of the kernel function. Defaults to 1.

-
-
-
-
-property kernel
-
- -
- -
-
-class quapy.method.composable.LeastSquaresLoss[source]
-

Bases: FunctionLoss

-

The loss function of ACC (Forman, 2008), PACC (Bella et al., 2019), and ReadMe (Hopkins & King, 2010).

-

This loss function computes the sum of squares of element-wise errors between q and M*p.

-
- -
-
-class quapy.method.composable.TikhonovRegularization[source]
-

Bases: AbstractLoss

-

Tikhonov regularization, as proposed by Blobel (1985).

-

This regularization promotes smooth solutions. This behavior is often required in ordinal quantification and in unfolding problems.

-
- -
-
-quapy.method.composable.TikhonovRegularized(loss, tau=0.0)[source]
-

Add TikhonovRegularization (Blobel, 1985) to any loss.

-

Calling this function is equivalent to calling

-
>>> CombinedLoss(loss, TikhonovRegularization(), weights=[1, tau])
-
-
-
-
Parameters:
-
    -
  • loss – An instance from qunfold.losses.

  • -
  • tau (optional) – The regularization strength. Defaults to 0.

  • -
-
-
Returns:
-

An instance of CombinedLoss.

-
-
-

Examples

-

The regularized loss of RUN (Blobel, 1985) is:

-
>>> TikhonovRegularization(BlobelLoss(), tau)
-
-
-
- -
-
-

Module contents

-
-
- - -
-
- -
-
-
-
- - - - \ No newline at end of file diff --git a/docs/build/html/search.html b/docs/build/html/search.html deleted file mode 100644 index 519edd4..0000000 --- a/docs/build/html/search.html +++ /dev/null @@ -1,131 +0,0 @@ - - - - - - Search — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation - - - - - - - - - - - - - - - - - - - - - -
- - -
- -
-
-
-
    -
  • - -
  • -
  • -
-
-
-
-
- - - - -
- -
- -
-
-
- -
- -
-

© Copyright 2024, Alejandro Moreo.

-
- - Built with Sphinx using a - theme - provided by Read the Docs. - - -
-
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js deleted file mode 100644 index 2cf11c1..0000000 --- a/docs/build/html/searchindex.js +++ /dev/null @@ -1 +0,0 @@ -Search.setIndex({"docnames": ["index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method", "wiki/Datasets", "wiki/Evaluation", "wiki/ExplicitLossMinimization", "wiki/Home", "wiki/Methods", "wiki/Model-Selection", "wiki/Plotting", "wiki/Protocols"], "filenames": ["index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst", "wiki/Datasets.rst", "wiki/Evaluation.rst", "wiki/ExplicitLossMinimization.rst", "wiki/Home.rst", "wiki/Methods.rst", "wiki/Model-Selection.rst", "wiki/Plotting.rst", "wiki/Protocols.rst"], "titles": ["Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package", "Datasets", "Evaluation", "Explicit Loss Minimization", "<no title>", "Quantification Methods", "Model Selection", "Plotting", "Protocols"], "terms": {"i": [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], "python": [0, 4, 6], "base": [0, 1, 2, 3, 6, 10], "open": [0, 2, 4, 6], "sourc": [0, 2, 3, 4, 5, 8, 10], "framework": [0, 5, 10], "quantif": [0, 2, 3, 4, 5, 6, 7, 8, 13], "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], "contain": [0, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13], "api": [0, 5, 6, 10], "modul": [0, 1, 6, 7, 10, 12, 13], "includ": [0, 4, 5, 6, 8, 9, 10, 11, 12, 13], "pip": [0, 5, 10], "host": [0, 6], "http": [0, 2, 4, 5, 6, 10], "com": [0, 2, 5, 6, 10], "hlt": 0, "isti": 0, "packag": [0, 1, 6, 8, 10], "subpackag": [0, 1], "classif": [0, 1, 2, 4, 5, 6, 7, 10], "submodul": [0, 1], "calibr": [0, 1, 2, 10], "bctscalibr": [0, 2, 3], "nbvscalibr": [0, 2, 3], "recalibratedprobabilisticclassifi": [0, 2, 3], "recalibratedprobabilisticclassifierbas": [0, 2, 3], "classes_": [0, 2, 3, 4, 5], "fit": [0, 1, 2, 3, 4, 5, 10, 11, 12, 13], "fit_cv": [0, 2, 3], "fit_tr_val": [0, 2, 3], "predict": [0, 1, 2, 3, 5, 7, 10, 12], "predict_proba": [0, 2, 3, 5, 10], "tscalibr": [0, 2, 3], "vscalibr": [0, 2, 3], "method": [0, 1, 2, 6, 7, 8, 9, 11, 12, 13], "lowranklogisticregress": [0, 2, 3], "get_param": [0, 1, 2, 3, 5, 10], "set_param": [0, 1, 2, 3, 5, 10], "transform": [0, 2, 3, 4, 5, 6], "neural": [0, 1, 2, 4, 5, 6], "cnnnet": [0, 2, 3, 5, 10], "document_embed": [0, 2, 3], "train": [0, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "vocabulary_s": [0, 2, 3, 4, 5, 10], "lstmnet": [0, 2, 3], "neuralclassifiertrain": [0, 2, 3, 5, 10], "devic": [0, 2, 3, 5, 6, 10, 12], "reset_net_param": [0, 2, 3], "textclassifiernet": [0, 2, 3], "dimens": [0, 2, 3, 4, 5], "forward": [0, 2, 3, 5], "xavier_uniform": [0, 2, 3], "torchdataset": [0, 2, 3], "asdataload": [0, 2, 3], "svmperf": [0, 1, 2, 5, 8, 9, 10], "decision_funct": [0, 2, 3, 5], "valid_loss": [0, 2, 3, 5, 10], "data": [0, 1, 2, 3, 5, 10, 12], "dataset": [0, 1, 2, 3, 5, 7, 9, 10, 11, 12, 13], "splitstratifi": [0, 2, 4], "binari": [0, 2, 3, 4, 5, 10, 12], "kfcv": [0, 2, 3, 4, 6], "load": [0, 2, 4, 5, 6, 10, 13], "n_class": [0, 2, 3, 4, 5, 10, 13], "reduc": [0, 2, 4, 6, 7], "stat": [0, 2, 4], "train_test": [0, 2, 4, 10, 11, 12, 13], "labelledcollect": [0, 2, 4, 5, 6, 10, 13], "x": [0, 2, 3, 4, 5, 7, 10, 12], "xp": [0, 2, 4], "xy": [0, 2, 4], "count": [0, 2, 4, 5, 11, 12], "join": [0, 2, 4], "p": [0, 2, 3, 4, 5, 6, 10], "preval": [0, 2, 3, 4, 5, 6, 7, 10, 11, 12], "sampl": [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12], "sampling_from_index": [0, 2, 4, 6], "sampling_index": [0, 2, 4, 6], "split_random": [0, 2, 4], "split_stratifi": [0, 2, 4, 6, 11, 13], "uniform_sampl": [0, 2, 4], "uniform_sampling_index": [0, 2, 4], "y": [0, 2, 3, 4, 5, 12], "fetch_ifcb": [0, 2, 4, 6], "fetch_ucibinarydataset": [0, 2, 4, 6, 10], "fetch_ucibinarylabelledcollect": [0, 2, 4, 6], "fetch_ucimulticlassdataset": [0, 2, 4, 6], "fetch_ucimulticlasslabelledcollect": [0, 2, 4, 6], "fetch_lequa2022": [0, 2, 4, 6], "fetch_review": [0, 2, 4, 5, 6, 10, 11, 12, 13], "fetch_twitt": [0, 2, 4, 6, 10], "warn": [0, 2, 4, 5], "preprocess": [0, 1, 2, 5, 6, 10], "indextransform": [0, 2, 4], "add_word": [0, 2, 4], "fit_transform": [0, 2, 4, 5], "index": [0, 2, 3, 4, 5, 6, 10], "reduce_column": [0, 2, 4, 6], "standard": [0, 2, 3, 4, 5, 6, 10, 12, 13], "text2tfidf": [0, 2, 4, 6, 10], "reader": [0, 1, 2], "binar": [0, 2, 4], "from_csv": [0, 2, 4], "from_spars": [0, 2, 4], "from_text": [0, 2, 4], "reindex_label": [0, 2, 4], "aggreg": [0, 1, 2, 7, 11, 12, 13], "acc": [0, 1, 2, 5, 7, 10, 12, 13], "clip": [0, 1, 2, 5, 10], "solver": [0, 2, 5, 10], "aggregation_fit": [0, 2, 5, 10], "getptecondestim": [0, 2, 5], "newinvariantratioestim": [0, 2, 5], "adjustedclassifyandcount": [0, 2, 5], "aggregativecrispquantifi": [0, 2, 5, 10], "aggregativemedianestim": [0, 2, 5], "quantifi": [0, 1, 2, 4, 5, 6, 7, 10, 11, 12, 13], "aggregativequantifi": [0, 2, 5, 7, 10], "classifi": [0, 2, 3, 5, 6, 11, 12], "classifier_fit_predict": [0, 2, 5, 10], "val_split": [0, 2, 3, 5, 10], "val_split_": [0, 2, 5], "aggregativesoftquantifi": [0, 2, 5, 10], "bayesiancc": [0, 2, 5], "get_conditional_probability_sampl": [0, 2, 5], "get_prevalence_sampl": [0, 2, 5, 10], "sample_from_posterior": [0, 2, 5], "binaryaggregativequantifi": [0, 2, 5], "neg_label": [0, 2, 5], "pos_label": [0, 2, 5], "cc": [0, 2, 5, 10, 12], "classifyandcount": [0, 2, 5, 10], "dmy": [0, 2, 5, 10, 11], "distributionmatchingi": [0, 2, 5], "dy": [0, 2, 5, 10], "emq": [0, 2, 5, 12], "em": [0, 2, 5], "emq_bct": [0, 2, 5], "epsilon": [0, 2, 5, 7], "max_it": [0, 2, 5], "expectationmaximizationquantifi": [0, 2, 5, 10], "hdy": [0, 2, 5], "hellingerdistancei": [0, 2, 5, 10], "onevsallaggreg": [0, 2, 5, 10], "pacc": [0, 2, 5, 10, 12], "pcc": [0, 2, 5, 10, 12], "probabilisticadjustedclassifyandcount": [0, 2, 5], "probabilisticclassifyandcount": [0, 2, 5], "sld": [0, 2, 5, 10], "smm": [0, 2, 5, 10], "newelm": [0, 2, 5], "newsvma": [0, 2, 5, 10], "newsvmkld": [0, 2, 5, 10], "newsvmq": [0, 2, 5, 10], "newsvmra": [0, 2, 5, 10], "kdebas": [0, 2, 5], "bandwidth_method": [0, 2, 5], "get_kde_funct": [0, 2, 5], "get_mixture_compon": [0, 2, 5], "pdf": [0, 2, 5, 12], "kdeyc": [0, 2, 5], "gram_matrix_mix_sum": [0, 2, 5], "kdeyhd": [0, 2, 5], "kdeyml": [0, 2, 5], "quanetmodul": [0, 2, 5], "quanettrain": [0, 2, 5], "clean_checkpoint": [0, 2, 5], "clean_checkpoint_dir": [0, 2, 5], "mae_loss": [0, 2, 5], "max": [0, 2, 5, 6, 10], "condit": [0, 2, 5, 10, 13], "m": [0, 2, 5, 10], "ms2": [0, 2, 5, 10], "discard": [0, 2, 4, 5], "t50": [0, 2, 5, 10], "thresholdoptim": [0, 2, 5], "aggregate_with_threshold": [0, 2, 5], "basequantifi": [0, 2, 5, 10], "binaryquantifi": [0, 2, 5], "onevsal": [0, 2, 5, 10], "onevsallgener": [0, 2, 5, 10], "newonevsal": [0, 2, 5, 10], "meta": [0, 1, 2], "eacc": [0, 2, 5], "ecc": [0, 2, 5], "eemq": [0, 2, 5], "ehdi": [0, 2, 5], "epacc": [0, 2, 5], "ensembl": [0, 2, 4, 5, 6], "valid_polici": [0, 2, 5], "probabilist": [0, 2, 3, 5, 10], "medianestim": [0, 2, 5], "medianestimator2": [0, 2, 5], "ensemblefactori": [0, 2, 5], "get_probability_distribut": [0, 2, 5], "non_aggreg": [0, 1, 2], "dmx": [0, 2, 5], "hdx": [0, 2, 5], "distributionmatchingx": [0, 2, 5], "maximumlikelihoodprevalenceestim": [0, 2, 5], "readm": [0, 2, 5], "std_constrained_linear_l": [0, 2, 5], "error": [0, 1, 3, 5, 6, 10, 11], "absolute_error": [0, 1, 2], "acc_error": [0, 1, 2], "ae": [0, 1, 2, 8, 10, 12], "f1_error": [0, 1, 2], "f1e": [0, 1, 2, 7], "from_nam": [0, 1, 2, 7], "kld": [0, 1, 2, 3, 5, 8, 10], "mae": [0, 1, 2, 3, 5, 7, 11, 13], "mean_absolute_error": [0, 1, 2], "mean_normalized_absolute_error": [0, 1, 2], "mean_normalized_relative_absolute_error": [0, 1, 2], "mean_relative_absolute_error": [0, 1, 2], "mkld": [0, 1, 2, 5, 7], "mnae": [0, 1, 2, 5], "mnkld": [0, 1, 2, 5, 7], "mnrae": [0, 1, 2, 5], "mrae": [0, 1, 2, 3, 5, 7], "mse": [0, 1, 2, 5, 7, 10], "nae": [0, 1, 2], "nkld": [0, 1, 2, 3, 5, 8, 10], "normalized_absolute_error": [0, 1, 2], "normalized_relative_absolute_error": [0, 1, 2], "nrae": [0, 1, 2], "rae": [0, 1, 2, 7, 8, 10], "relative_absolute_error": [0, 1, 2], "se": [0, 1, 2], "smooth": [0, 1, 2, 5, 7, 10], "evalu": [0, 1, 3, 4, 5, 6, 9, 10, 11, 12, 13], "evaluate_on_sampl": [0, 1, 2], "evaluation_report": [0, 1, 2, 7], "function": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13], "hellingerdist": [0, 1, 2], "topsoedist": [0, 1, 2], "adjusted_quantif": [], "argmin_preval": [0, 1, 2], "as_binary_preval": [0, 1, 2], "check_prevalence_vector": [0, 1, 2], "clip_preval": [], "counts_from_label": [0, 1, 2], "get_diverg": [0, 1, 2], "get_nprevpoints_approxim": [0, 1, 2, 13], "linear_search": [0, 1, 2], "map_onto_probability_simplex": [], "normalize_preval": [0, 1, 2], "num_prevalence_combin": [0, 1, 2, 13], "optim_minim": [0, 1, 2, 5], "prevalence_from_label": [0, 1, 2], "prevalence_from_prob": [0, 1, 2], "prevalence_linspac": [0, 1, 2], "solve_adjust": [0, 1, 2], "strprev": [0, 1, 2, 6, 7], "uniform_prevalence_sampl": [0, 1, 2], "uniform_simplex_sampl": [0, 1, 2], "model_select": [0, 1, 5, 11, 13], "configstatu": [0, 1, 2], "fail": [0, 1, 2], "success": [0, 1, 2, 6], "gridsearchq": [0, 1, 2, 5, 11, 13], "best_model": [0, 1, 2], "statu": [0, 1, 2], "invalid": [0, 1, 2], "timeout": [0, 1, 2], "cross_val_predict": [0, 1, 2], "expand_grid": [0, 1, 2], "group_param": [0, 1, 2], "plot": [0, 1, 9], "binary_bias_bin": [0, 1, 2, 12], "binary_bias_glob": [0, 1, 2, 12], "binary_diagon": [0, 1, 2, 12], "brokenbar_supremacy_by_drift": [0, 1, 2], "error_by_drift": [0, 1, 2, 12], "protocol": [0, 1, 4, 5, 6, 9, 10, 11, 12], "app": [0, 1, 2, 5, 11, 12, 13], "prevalence_grid": [0, 1, 2], "samples_paramet": [0, 1, 2], "total": [0, 1, 2, 6, 13], "abstractprotocol": [0, 1, 2, 4, 13], "abstractstochasticseededprotocol": [0, 1, 2, 13], "collat": [0, 1, 2], "random_st": [0, 1, 2, 4, 5, 12, 13], "artificialprevalenceprotocol": [0, 1, 2], "domainmix": [0, 1, 2], "iterateprotocol": [0, 1, 2], "npp": [0, 1, 2, 13], "naturalprevalenceprotocol": [0, 1, 2], "onlabelledcollectionprotocol": [0, 1, 2, 7, 13], "return_typ": [0, 1, 2, 13], "get_col": [0, 1, 2], "get_labelled_collect": [0, 1, 2], "on_preclassified_inst": [0, 1, 2], "upp": [0, 1, 2], "uniformprevalenceprotocol": [0, 1, 2], "util": [0, 1, 3, 4], "earlystop": [0, 1, 2], "create_if_not_exist": [0, 1, 2], "create_parent_dir": [0, 1, 2], "download_fil": [0, 1, 2], "download_file_if_not_exist": [0, 1, 2], "get_quapy_hom": [0, 1, 2], "map_parallel": [0, 1, 2], "parallel": [0, 1, 2, 3, 4, 5, 10, 11], "parallel_unpack": [0, 1, 2], "pickled_resourc": [0, 1, 2], "save_text_fil": [0, 1, 2], "temp_se": [0, 1, 2], "search": [0, 2, 5, 11], "page": [0, 6], "content": 1, "implement": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "measur": [2, 5, 8, 10, 11, 12], "us": [0, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "prev": [2, 4, 6, 7, 13], "prevs_hat": [2, 7], "comput": [2, 5, 7, 10, 12, 13], "absolut": [2, 5, 7, 10, 12], "between": [2, 3, 5, 12], "two": [2, 4, 5, 6, 10, 12, 13], "vector": [2, 3, 4, 5, 6, 10, 13], "hat": [2, 5], "frac": [2, 5], "1": [2, 3, 4, 5, 6, 7, 11, 12, 13], "mathcal": [2, 5], "sum_": [2, 5], "where": [2, 3, 4, 5, 10, 12], "ar": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "class": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "interest": [2, 7, 12, 13], "paramet": [2, 3, 4, 5, 7, 10, 11, 13], "arrai": [2, 3, 4, 5, 10, 12], "like": [2, 3, 4, 5, 6, 7, 10, 12, 13], "shape": [2, 3, 4, 5, 12], "true": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "valu": [2, 3, 4, 5, 6, 7, 10, 11, 13], "return": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "y_true": 2, "y_pred": 2, "term": [2, 3, 4, 5, 6, 10, 11, 12], "accuraci": [2, 5, 6, 7, 12], "The": [2, 3, 4, 5, 6, 7, 8, 11, 12, 13], "tp": 2, "tn": 2, "fp": 2, "fn": 2, "stand": [2, 5, 10], "posit": [2, 4, 5, 6, 10, 12], "fals": [2, 3, 4, 5, 6, 7, 10, 12], "neg": [2, 5, 6, 12], "respect": [2, 5, 6, 7, 12], "label": [2, 3, 4, 5, 6, 10, 11, 12, 13], "f1": [2, 3], "simpli": [2, 5, 7, 8, 10, 11, 12], "macro": 2, "f_1": 2, "e": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "harmon": 2, "mean": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "precis": [2, 6], "recal": 2, "defin": [2, 3, 4, 5, 6, 10, 13], "2tp": 2, "averag": [2, 4, 5, 6, 7, 10], "each": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "categori": 2, "independ": [2, 5], "err_nam": 2, "get": [2, 3, 4, 5, 6, 12, 13], "an": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], "from": [2, 3, 4, 5, 6, 7, 10, 11, 12], "its": [2, 3, 5, 10, 11, 13], "name": [2, 3, 4, 5, 10, 12], "g": [2, 4, 5, 6, 7, 10, 11, 13], "string": [2, 4, 5, 7], "callabl": [2, 4, 5, 6], "request": [2, 4, 5, 6, 11], "ep": [2, 7], "none": [2, 3, 4, 5, 7, 10, 11, 13], "kullback": [2, 5, 10], "leibler": [2, 5, 10], "diverg": [2, 5, 10], "distribut": [2, 4, 5, 6, 7, 10, 12, 13], "d_": 2, "kl": 2, "log": [2, 4, 5], "factor": 2, "see": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13], "case": [2, 3, 4, 5, 6, 7, 10, 12, 13], "which": [2, 3, 4, 5, 6, 7, 10, 11, 12], "zero": [2, 6], "typic": [2, 3, 4, 5, 7, 10, 11, 12, 13], "set": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "2t": [2, 7], "t": [2, 3, 5, 6, 7], "size": [2, 3, 4, 5, 6, 7, 10], "If": [2, 4, 5, 10, 12], "taken": [2, 3, 4, 5, 10], "environ": [2, 5, 7, 10, 11, 12, 13], "variabl": [2, 4, 7, 10, 12], "sample_s": [2, 5, 6, 7, 10, 11, 12, 13], "ha": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "thu": [2, 3, 5, 6, 7, 10, 11, 12], "beforehand": 2, "across": [2, 5, 6, 7, 12], "pair": [2, 6], "n_sampl": [2, 3], "normal": [0, 2, 4, 5, 6, 10], "rel": [2, 4, 5, 7, 10], "squar": [2, 5, 10], "z_": 2, "2": [2, 4, 5, 6, 7, 10, 11, 12, 13], "min_": [2, 5], "math": [2, 5, 10], "2frac": 2, "underlin": 2, "displaystyl": 2, "model": [0, 2, 3, 4, 5, 6, 7, 9, 12, 13], "error_metr": [2, 7, 11, 13], "union": [2, 4, 5], "str": [2, 4, 5, 6], "aggr_speedup": [2, 7], "bool": [2, 3, 5], "auto": [2, 7], "verbos": [2, 3, 4, 5, 6, 11], "accord": [2, 3, 4, 5, 11], "specif": [2, 5, 7, 10, 11, 13], "gener": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "one": [2, 4, 5, 6, 7, 8, 10, 11, 12, 13], "metric": [2, 5, 10, 11], "instanc": [2, 3, 4, 5, 6, 7, 10, 12, 13], "object": [2, 3, 4, 5, 6, 13], "also": [2, 3, 5, 6, 7, 8, 10, 11, 12, 13], "speed": [2, 5, 7, 10], "up": [2, 3, 5, 7, 10], "can": [0, 2, 4, 5, 6, 7, 10, 11, 12, 13], "run": [2, 4, 5, 6, 8, 10, 12], "charg": [2, 4, 6], "repres": [2, 4, 5, 10, 12], "": [2, 3, 4, 5, 6, 7, 10, 11, 12], "qp": [2, 4, 5, 6, 7, 10, 11, 12, 13], "itself": [2, 5, 10], "whether": [2, 3, 4, 5], "appli": [2, 3, 4, 5, 7, 8, 10, 11, 12], "forc": [2, 7], "even": 2, "number": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "origin": [2, 4, 5, 6, 7, 10, 13], "collect": [2, 3, 4, 5, 6, 13], "act": 2, "larger": [2, 5], "than": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "default": [2, 3, 4, 5, 10, 13], "let": [2, 5, 7, 10, 11], "decid": [2, 4, 13], "conveni": [2, 7, 10, 13], "deactiv": [2, 7], "boolean": [2, 4, 5], "show": [2, 3, 4, 5, 6, 9, 10, 11, 12, 13], "inform": [0, 2, 3, 4, 5, 6, 10, 11, 13], "stdout": 2, "score": [2, 3, 4, 5, 6, 10, 11], "singl": [2, 5, 6, 7, 10, 13], "float": [2, 3, 4, 5, 6, 10], "iter": [2, 4, 5, 6], "given": [2, 3, 4, 5, 7, 10, 11, 13], "list": [2, 3, 4, 5, 6, 12], "report": [2, 5, 7], "panda": [2, 7], "datafram": [2, 7], "more": [2, 4, 5, 6, 7, 8, 10, 12, 13], "column": [2, 4, 6, 7], "estim": [2, 3, 4, 5, 6, 7, 12], "mani": [2, 5, 6, 7, 10, 11, 12, 13], "have": [2, 4, 5, 6, 7, 10, 11, 12, 13], "been": [2, 3, 4, 5, 6, 10, 11, 12, 13], "indic": [2, 3, 4, 5, 6, 7, 10, 12, 13], "displai": [2, 3, 6, 7, 12, 13], "everi": [2, 5, 6, 7, 10, 11, 13], "via": [2, 3, 5, 10, 13], "central": 2, "all": [2, 3, 4, 5, 6, 7, 8, 10, 12, 13], "process": [2, 4, 7, 10, 11], "endow": 2, "optim": [2, 3, 5, 7, 8, 11], "larg": 2, "onli": [2, 3, 4, 5, 6, 7, 10, 12], "come": [2, 4, 5, 6, 7, 10, 13], "down": [2, 4, 5, 7, 12, 13], "onc": [2, 4, 6, 7, 10, 11, 12, 13], "over": [2, 5, 10, 11], "instead": [2, 4, 5, 6, 7, 10, 13], "raw": [2, 4, 6], "so": [2, 3, 4, 5, 6, 10, 12, 13], "never": [2, 13], "call": [2, 4, 5, 6, 7, 10, 12, 13], "again": [2, 12], "behaviour": [2, 7, 10, 13], "obtain": [2, 3, 5, 13], "carri": [2, 4, 5, 6, 7, 10], "out": [2, 3, 4, 5, 6, 7, 10, 11, 12], "overal": 2, "need": [2, 4, 5, 6, 10, 13], "exce": 2, "undertaken": 2, "issu": [2, 5], "tupl": [2, 4, 5, 13], "true_prev": [2, 7, 12], "estim_prev": [2, 7, 12], "element": [2, 4, 5, 10], "ndarrai": [2, 4, 5, 7, 10], "q": [2, 3, 5, 6, 8, 10], "hellingh": 2, "distanc": [2, 5], "hd": [2, 5, 10], "discret": [2, 5], "k": [2, 3, 4, 5, 10], "bin": [2, 5, 11, 12], "sqrt": [2, 5], "p_i": 2, "q_i": 2, "real": [2, 3, 4, 5], "1e": [2, 3, 5], "20": [2, 5, 7, 10, 12], "topso": [2, 5], "left": [2, 4, 5], "right": [2, 4, 5, 11], "prevalence_estim": 2, "_supportsarrai": 2, "dtype": [2, 4, 7], "_nestedsequ": 2, "int": [2, 4, 5, 6, 12], "complex": 2, "byte": 2, "tpr": [2, 5], "fpr": [2, 5], "adjust": [2, 5, 10], "rate": [2, 3, 5, 10], "might": [2, 4, 7], "rang": [2, 5, 10, 11, 12, 13], "0": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "loss": [0, 2, 3, 5], "liter": [2, 5], "ternary_search": [0, 1, 2], "minim": [0, 2, 5], "strategi": [2, 10, 11], "possibl": [2, 5, 7, 10, 13], "scipi": [2, 4, 5], "linear": [2, 5, 10, 12], "problem": [2, 4, 5, 6, 10, 12], "space": [2, 3, 5, 11], "01": [2, 3, 5, 10, 11], "02": 2, "ternari": [2, 5], "yet": [2, 13], "np": [2, 4, 5, 7, 10, 11, 12, 13], "positive_preval": 2, "clip_if_necessari": 2, "helper": 2, "order": [2, 4, 5, 6, 10, 11, 12, 13], "guarante": [2, 4, 5, 10, 13], "result": [2, 5, 7, 8, 10, 12], "valid": [2, 3, 4, 5, 6, 10, 11, 12, 13], "check": [2, 5, 10], "rais": [2, 5, 10], "raise_except": 2, "toleranz": [], "08": 2, "sum": [2, 5], "otherwis": [2, 4, 5, 6, 10], "project": [2, 5, 10], "proport": [2, 3, 4, 5, 10], "probabl": [2, 3, 5, 6, 7, 10, 11, 12, 13], "perform": [2, 3, 5, 6, 7, 10, 11, 12, 13], "thei": [2, 5, 6, 10], "onto": [2, 5], "simplex": [2, 5, 10], "n_instanc": [2, 3, 5], "correctli": 2, "when": [2, 3, 4, 5, 6, 7, 10, 12, 13], "some": [2, 4, 5, 6, 7, 10, 12, 13], "exampl": [0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13], "len": 2, "occurr": 4, "receiv": [2, 6, 10, 12], "argument": [2, 4, 5, 6, 7, 10, 12, 13], "That": [2, 7, 11], "alreadi": [2, 4, 7, 10], "tri": [2, 5], "instanti": [2, 3, 5, 6, 7, 10, 11, 13], "correspond": [2, 4, 5, 6, 7, 12], "combinations_budget": 2, "n_repeat": [2, 13], "largest": 2, "equidist": 2, "point": [2, 4, 5, 10, 13], "combin": [2, 5, 10, 11, 13], "dimension": [2, 3, 4, 5], "do": [2, 3, 4, 5, 6, 7, 10, 11], "integ": [2, 3, 4, 5, 10], "maximum": [2, 3, 4, 5, 6, 10, 13], "allow": [2, 3, 4, 5, 6, 8, 10, 12, 13], "repetit": 2, "less": [2, 4, 5], "best": [2, 3, 5, 11], "explor": [2, 10, 11], "step": [2, 5, 10, 12], "ineffici": 2, "ad": [2, 5, 13], "complet": [2, 5, 10, 12], "earli": [2, 3, 5], "literatur": [2, 6, 7, 11, 13], "A": [2, 3, 4, 5, 6, 7, 10], "most": [2, 4, 5, 6, 7, 10, 12, 13], "power": 2, "altern": [2, 5, 7, 11], "found": [2, 3, 4, 5, 6, 10], "unnormalized_arr": 2, "code": [2, 3, 6, 7, 8, 10, 11, 12, 13], "adapt": [2, 3], "mathieu": [2, 5, 10], "blondel": [2, 5, 10], "bsd": 2, "licens": 2, "accompani": 2, "paper": [2, 3, 5, 6, 10], "akinori": 2, "fujino": 2, "naonori": 2, "ueda": 2, "scale": [2, 3, 5, 10], "multiclass": [2, 4, 5, 10, 13], "support": [2, 4, 5, 10], "machin": [2, 3, 7, 10, 11], "euclidean": [2, 5], "icpr": 2, "2014": 2, "url": 2, "n": [2, 3, 5, 6, 13], "v": [2, 3, 5, 10], "matrix": [2, 5, 10, 12], "consist": [2, 3, 4, 5, 6, 11, 12, 13], "l1": [2, 5, 10], "convert": [2, 3, 4, 5, 10], "n_prevpoint": [2, 13], "equal": [2, 5, 13], "distant": [2, 13], "calcul": [2, 5], "binom": 2, "c": [2, 3, 4, 5, 10, 11], "time": [2, 4, 5, 6, 7, 10, 13], "r": [2, 4, 5, 6, 10], "mass": 2, "block": [2, 6], "alloc": [2, 3], "solut": [2, 5, 10], "star": 2, "bar": 2, "For": [2, 4, 5, 6, 7, 11, 12, 13], "5": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "25": [2, 3, 5, 10, 12], "75": [2, 5, 11, 12, 13], "50": [2, 5, 6, 12], "yield": [2, 4, 5, 12, 13], "smallest": 2, "lost": 2, "constrain": [2, 4, 12, 13], "slsqp": 2, "routin": [2, 4, 5, 10], "posterior": [2, 3, 5, 10], "crisp": [2, 5, 7, 10], "decis": [2, 3, 5], "take": [2, 4, 5, 6, 10, 12, 13], "argmax": 2, "grid_point": 2, "21": [2, 10, 12, 13], "repeat": [2, 12, 13], "smooth_limits_epsilon": 2, "produc": [2, 6, 7, 12, 13], "uniformli": [2, 13], "separ": [2, 4], "By": [2, 10], "05": [2, 5, 12], "limit": [2, 5, 12, 13], "10": [2, 3, 5, 6, 11, 12, 13], "15": [2, 4, 10], "90": [2, 12], "95": 2, "99": 2, "interv": [2, 12, 13], "quantiti": [2, 10], "add": [2, 4, 5, 10, 11], "subtract": [2, 4, 6], "p_c_cond_i": [], "p_c": 5, "invers": [2, 5, 10], "invari": [2, 5, 10], "ratio": [2, 5, 10], "exact": [2, 4, 5, 6, 10, 13], "solv": [2, 5, 10, 11], "equat": [2, 5, 10], "misclassif": 5, "entri": [2, 5, 6, 7], "being": [2, 5, 7, 11, 13], "belong": [2, 5, 10, 11], "end": [2, 5, 11], "option": [2, 4, 5, 6, 10, 12], "mai": [2, 10], "exist": [2, 10, 13], "degener": [2, 10], "vaz": [2, 5, 10], "et": [2, 3, 4, 5, 6, 8, 10], "al": [2, 3, 4, 5, 6, 8, 10], "replac": [2, 4, 5, 6, 10], "last": [2, 3, 4, 5, 10, 12, 13], "system": [2, 5, 7, 10, 11], "rank": [2, 3, 5, 10], "strictli": [2, 5], "full": [2, 4, 5, 13], "deprec": [2, 5, 13], "alwai": [2, 5, 10, 12], "prec": [2, 6], "3": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "represent": [2, 3, 5, 6, 10], "33": [2, 6, 12], "67": [2, 12], "kraemer": [2, 13], "algorithm": [2, 5, 6, 13], "random": [2, 4, 5, 10, 12, 13], "unit": [2, 5, 6], "post": 2, "stackexchang": 2, "question": 2, "3227": 2, "uniform": [2, 4, 5], "_": [2, 4, 5, 6, 12], "param": [2, 3, 5, 11], "msg": 2, "param_grid": [2, 5, 11, 13], "dict": [2, 4, 5], "type": [2, 4, 5, 6, 10], "refit": [2, 11], "n_job": [2, 3, 4, 5, 10, 11, 13], "raise_error": 2, "grid": [2, 5, 11, 13], "target": [2, 3, 5, 10, 12], "orient": [2, 5, 8, 10], "hyperparamet": [2, 5, 10, 13], "dictionari": [2, 3, 4, 5], "kei": [2, 4], "ones": [2, 4, 5, 7, 12], "those": [2, 3, 5, 7, 10, 11, 12], "quantification_error": 2, "whole": [2, 3, 6, 10, 11], "chosen": [2, 7, 11], "ignor": [2, 4, 5, 6], "gen": 2, "establish": 2, "timer": 2, "second": [2, 4, 5, 6, 7, 10, 12], "configur": [2, 5, 11], "test": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "whenev": [2, 12], "longer": [2, 5], "timeouterror": 2, "except": [2, 5, 10], "bound": [2, 5], "ani": [2, 3, 4, 5, 6, 10, 11, 12, 13], "mark": [2, 11], "goe": [2, 11], "howev": [2, 6, 11, 12], "valueerror": 2, "through": [2, 5, 10], "after": [2, 5, 10], "hyper": [2, 3, 5, 11], "learn": [2, 3, 4, 5, 7, 10, 11], "select": [0, 2, 4, 5, 6, 7, 9, 10, 13], "self": [2, 3, 4, 5, 10, 11], "deep": [2, 5, 10], "unus": [2, 3, 10], "contanin": 2, "enum": 2, "enumer": 2, "4": [2, 4, 6, 7, 10, 11, 12, 13], "nfold": [2, 4, 6], "akin": [2, 5], "scikit": [2, 3, 4, 5, 10, 11], "fold": [2, 4, 5, 10], "cross": [2, 3, 4, 5, 10], "seed": [2, 4, 5, 13], "reproduc": [2, 4], "expand": 2, "100": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13], "b": [2, 4, 5, 6, 10], "print": [2, 3, 4, 6, 7, 10, 11, 13], "assign": [2, 4, 10], "partit": [2, 3, 6], "anoth": [2, 5, 6, 7, 12, 13], "que": 2, "method_nam": [2, 5, 12], "pos_class": [2, 4], "titl": 2, "nbin": [2, 5, 11, 12], "colormap": 2, "matplotlib": 2, "color": [2, 12], "listedcolormap": 2, "vertical_xtick": 2, "legend": 2, "savepath": [2, 12], "box": [2, 10, 12], "local": 2, "bia": [2, 3, 5, 10], "sign": 2, "minu": 2, "differ": [0, 2, 4, 5, 6, 10, 11, 12, 13], "classs": 2, "experi": [2, 4, 6, 10, 12], "compon": [2, 3, 5], "cm": 2, "tab10": 2, "secondari": 2, "path": [2, 3, 4, 5, 6, 10, 12], "save": [2, 4, 10, 12], "shown": [2, 12], "global": 2, "show_std": [2, 12], "train_prev": [2, 12], "method_ord": 2, "diagon": 2, "along": [2, 5, 6, 7, 10], "axi": [2, 5, 12], "describ": [2, 5, 10], "henc": [2, 4, 5], "It": [2, 4, 5, 6, 7, 10, 11, 12], "though": [2, 10], "other": [2, 4, 5, 6, 7, 10, 12], "prefer": [2, 5, 7, 13], "deviat": [2, 4, 6, 12, 13], "band": [2, 12], "inconveni": 2, "compar": [2, 12], "high": [2, 5, 12], "leyend": 2, "hightlight": 2, "conduct": [2, 6], "same": [2, 4, 5, 6, 10, 11, 12, 13], "impos": [2, 11], "associ": 2, "tr_prev": [2, 5, 12], "n_bin": [2, 5, 12], "isomer": 2, "x_error": 2, "y_error": 2, "ttest_alpha": 2, "005": 2, "tail_density_threshold": 2, "top": [2, 5, 10], "region": 2, "shift": [2, 3, 5, 7, 10, 11, 13], "form": [2, 4, 5, 6, 10], "broken": [2, 12], "chart": 2, "either": [2, 5, 7, 10], "follow": [2, 4, 5, 6, 7, 10, 11, 12, 13], "hold": [2, 5], "ii": 2, "statist": [2, 5, 6, 13], "significantli": 2, "side": 2, "confid": [2, 10], "made": [2, 4, 5, 6, 8], "isometr": [2, 12], "percentil": 2, "divid": 2, "amount": [2, 5, 7, 10, 13], "abov": [2, 6, 8, 10, 12], "consid": [2, 3, 4, 5, 10, 12, 13], "involv": [2, 12], "similar": [2, 5], "threshold": [2, 5], "densiti": [2, 5], "below": [2, 4, 6, 10, 12], "tail": 2, "avoid": [2, 10], "outlier": 2, "error_nam": [2, 12], "show_dens": 2, "show_legend": 2, "logscal": 2, "vline": 2, "especi": 2, "cumberson": 2, "gain": 2, "understand": 2, "about": [2, 4, 5, 6, 10, 12], "how": [2, 4, 5, 6, 10, 11, 12], "fare": 2, "prior": [2, 5, 7, 10, 11, 12, 13], "spectrum": [2, 12, 13], "low": [2, 3, 11, 12], "regim": 2, "highlight": 2, "vertic": 2, "dot": [2, 12], "line": [2, 10], "n_preval": [2, 13], "sanity_check": 2, "10000": [2, 5], "sample_prev": [2, 13], "artifici": [2, 10, 11, 12], "drawn": [2, 4, 6, 13], "extract": [2, 4, 5], "copi": [2, 4], "replic": [2, 13], "sequenc": [2, 13], "user": [2, 6, 12, 13], "skip": 2, "labelled_collect": [2, 13], "exhaust": 2, "depend": [2, 5, 6, 10, 11, 12], "11": [2, 6, 13], "9": [2, 6, 12], "implicit": 2, "return_constrained_dim": 2, "rest": [2, 3, 4], "note": [2, 4, 5, 7, 10, 12, 13], "quit": 2, "obvious": 2, "doe": [2, 5, 6, 8, 10], "determinist": 2, "anywher": 2, "multipli": 2, "realiz": [2, 5], "necessari": [2, 5], "abstract": [2, 3, 4, 5, 10], "parent": 2, "known": [2, 5, 6, 10, 11], "procedur": [2, 7, 10], "enforc": 2, "fulli": [2, 6], "In": [0, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13], "make": [2, 5, 6, 8, 10], "extend": [2, 5, 8, 10], "input": [2, 3, 4, 5, 10, 12], "arg": [2, 3, 4, 5], "prepar": 2, "accommod": [2, 6], "desir": [2, 4, 6, 7, 10, 13], "output": [2, 3, 4, 5, 6, 7, 10, 11, 13], "format": [2, 4, 5, 6, 12], "befor": [2, 3, 4, 5], "inherit": [2, 10, 13], "custom": [2, 4, 10, 13], "addit": [2, 10], "adher": 2, "properti": [2, 3, 4, 5, 10], "determin": [2, 11, 12, 13], "serv": [2, 4, 10], "alia": [2, 4, 5, 10], "domaina": 2, "domainb": 2, "mixture_point": 2, "mixtur": [2, 5, 10], "domain": 2, "control": [2, 5, 13], "preserv": [2, 4, 12], "draw": [2, 5], "specifi": [2, 3, 4, 5, 6, 10, 12, 13], "should": [2, 3, 4, 5, 6, 10, 11, 12], "zip": [2, 6, 12], "veri": [2, 6, 10, 12], "simpl": [2, 5, 6, 10, 12], "previous": [2, 5], "natur": 2, "therefor": 2, "approxim": [2, 3, 5, 10, 12], "classmethod": [2, 4, 5, 6], "pre_classif": 2, "in_plac": 2, "modifi": [2, 10], "version": [2, 3, 8], "pre": [2, 6, 10], "advanc": [2, 6, 11, 13], "hard": [2, 3, 5, 10], "modif": 2, "place": [2, 4], "new": [2, 4, 6, 13], "variant": [2, 5, 12], "reli": [2, 5, 10, 13], "cover": [2, 3, 13], "entir": [2, 6, 7, 10, 11, 12, 13], "sens": 2, "unlik": [2, 7], "endeavour": 2, "intract": 2, "patienc": [2, 3, 5], "lower_is_bett": 2, "stop": [2, 3, 5], "network": [2, 3, 4, 5, 6], "epoch": [2, 3, 5], "7": [2, 3, 5, 6, 10, 11, 12, 13], "improv": [2, 3, 5, 10], "best_epoch": 2, "best_scor": 2, "consecut": [2, 3, 4, 5], "monitor": 2, "obtaind": 2, "held": [2, 3, 5, 10, 11], "split": [2, 3, 4, 5, 6, 10, 12], "wors": [2, 12], "far": [2, 3, 4], "flag": 2, "keep": [2, 4], "track": 2, "seen": [2, 5, 12], "wa": [2, 4, 5, 6, 10, 12, 13], "o": [2, 6], "makedir": 2, "exist_ok": 2, "dir": [2, 5], "subdir": 2, "anotherdir": 2, "creat": [2, 5, 6], "file": [2, 3, 4, 5, 6, 12], "txt": 2, "archive_filenam": 2, "download": [2, 4, 6, 8, 10], "destin": 2, "filenam": 2, "dowload": 2, "home": [2, 4], "directori": [2, 3, 4, 5, 8], "perman": 2, "quapy_data": [2, 6], "func": 2, "slice": 2, "item": [2, 5], "work": [2, 4, 5, 10, 12, 13], "pass": [2, 3, 5, 6, 7, 12], "worker": [2, 3, 4, 5], "asarrai": [2, 7], "backend": [2, 5], "loki": [2, 5], "wrapper": [2, 3, 4, 5], "multiprocess": [2, 5], "delai": 2, "args_i": 2, "silent": [2, 5], "child": 2, "ensur": [2, 5], "numer": [2, 4, 5, 6, 7, 10], "handl": [2, 6], "open_arg": 2, "pickle_path": 2, "generation_func": 2, "fast": [2, 4, 6], "reus": [2, 4, 6, 10], "resourc": 2, "next": [2, 3, 4, 11], "invok": [2, 4, 6, 10, 13], "pickl": [2, 4, 5, 10], "def": [2, 6, 7, 10, 12], "some_arrai": 2, "mock": [2, 3], "rand": 2, "my_arrai": 2, "pkl": 2, "first": [2, 4, 5, 6, 7, 8, 10, 12], "text": [2, 3, 4, 5, 6, 10], "disk": [2, 4], "miss": 2, "context": 2, "tempor": [2, 3], "without": [2, 10], "outer": 2, "numpi": [2, 3, 5, 11, 13], "current": [2, 3, 4, 5, 10], "state": [2, 5], "random_se": 2, "within": [2, 5], "launch": 2, "close": [2, 4, 5, 10, 13], "start_msg": 2, "end_msg": 2, "sleep": 2, "begin": 2, "correct": [3, 5, 10], "temperatur": [3, 5, 10], "bct": [3, 5], "abstent": 3, "alexandari": [3, 5, 10], "stratifi": [3, 4, 5, 6, 10], "retrain": [3, 11], "afterward": [3, 5], "No": [3, 5], "nbv": [3, 5], "re": [3, 4, 10, 11], "kundaj": 3, "shrikumar": 3, "2020": [3, 10], "novemb": 3, "likelihood": [3, 5, 10], "beat": [3, 5], "intern": [3, 4, 5, 6, 7, 10], "confer": [3, 4, 6, 10], "pp": [3, 6, 10, 11], "222": [3, 6], "232": 3, "pmlr": 3, "baseestim": [3, 5, 10], "calibratorfactori": 3, "n_featur": [3, 5], "manner": [3, 5, 6], "val": [3, 4, 6, 13], "These": [3, 5, 6, 7, 8, 10, 13], "n_compon": 3, "kwarg": [3, 4, 5], "embed": [3, 5, 10], "requir": [3, 4, 5, 6, 7, 8, 10, 13], "quanet": [3, 5], "easili": [3, 6, 12], "sklearn": [3, 4, 5, 10, 11, 12, 13], "decomposit": 3, "truncatedsvd": 3, "while": [3, 4, 5, 6, 10, 12], "linear_model": [3, 5, 10, 11, 13], "logisticregress": [3, 5, 10, 11, 13], "princip": 3, "retain": [3, 5, 6, 10], "logist": [3, 5, 10], "regress": [3, 5], "map": [2, 3, 5, 7], "length": [3, 4], "eventu": [3, 4], "unalt": 3, "emb": 3, "embedding_s": 3, "hidden_s": 3, "256": [3, 6], "repr_siz": 3, "kernel_height": 3, "stride": 3, "pad": [3, 4], "drop_p": 3, "convolut": 3, "vocabulari": [3, 4], "word": [2, 3, 4, 5, 7, 10], "hidden": [3, 5, 12], "document": [3, 4, 5, 6, 10, 12], "kernel": [0, 2, 3, 5], "token": [3, 4, 6], "drop": 3, "dropout": [3, 5], "layer": [3, 5, 10], "batch": 3, "torch": [3, 5, 10], "dataload": 3, "tensor": 3, "n_dimens": [3, 5], "lstm_class_nlay": 3, "long": [3, 11], "short": 3, "memori": 3, "lstm": [3, 5, 10], "net": 3, "lr": [3, 5, 10], "001": [3, 5, 11], "weight_decai": 3, "200": [3, 6, 7], "batch_siz": 3, "64": [3, 5, 11], "batch_size_test": 3, "512": [3, 5, 6], "padding_length": 3, "300": [3, 6, 7], "cuda": [3, 5, 10], "checkpointpath": 3, "checkpoint": [3, 5], "classifier_net": 3, "dat": [3, 6], "weight": [3, 4, 5], "decai": 3, "wait": 3, "cpu": [3, 5], "enabl": 3, "gpu": [3, 5], "store": [3, 4, 5, 6], "vocab_s": 3, "reiniti": 3, "trainer": 3, "learner": [3, 5, 10, 11], "disjoint": 3, "embed_s": 3, "nn": 3, "pad_length": 3, "xavier": 3, "initi": [3, 5, 6], "shuffl": [3, 4], "dynam": [3, 4, 5, 10], "longest": 3, "shorter": 3, "svmperf_bas": [3, 5], "host_fold": 3, "classifiermixin": [3, 5], "svm": [3, 4, 5, 8, 10, 12], "perf": [3, 5, 8], "thorsten": 3, "joachim": [3, 5, 10], "patch": [3, 5, 8, 10], "instal": [3, 5, 6, 10], "further": [3, 4, 5, 6, 10], "detail": [3, 4, 5, 6, 7, 8, 10], "refer": [3, 4, 6], "esuli": [3, 4, 5, 6, 8, 10], "2015": [3, 5, 6, 8, 10], "barranquero": [3, 5, 8, 10], "svm_perf_learn": 3, "svm_perf_classifi": 3, "trade": [3, 5], "off": [3, 5], "margin": [3, 5], "std": 3, "avail": [3, 4, 5, 6, 7, 8, 10, 12], "qacc": 3, "qf1": 3, "qgm": 3, "tmp": 3, "automat": [3, 6], "delet": 3, "multivari": [3, 10], "12": 3, "26": [3, 6], "27": [3, 10], "13": [3, 6], "22": [3, 4, 6, 10], "23": [3, 10], "24": [3, 6, 7], "textual": [4, 6], "train_siz": 4, "6": [4, 6, 7, 10, 12], "conform": 4, "nrepeat": [4, 6], "around": [4, 5, 10, 13], "round": 4, "train_path": [4, 6], "test_path": [4, 6], "loader_func": [4, 6], "loader_kwarg": 4, "read": 4, "must": [2, 4, 5, 10], "loader": [4, 6], "n_train": 4, "n_test": 4, "quick": 4, "kindl": [4, 5, 6, 10, 12], "tfidf": [4, 6, 11, 12, 13], "min_df": [4, 5, 10, 11, 12, 13], "tr": 4, "3821": [4, 6], "te": 4, "21591": [4, 6], "spars": [4, 6], "csr": 4, "csr_matrix": 4, "featur": [4, 5, 6], "4403": 4, "081": [4, 6], "919": [4, 6], "063": [4, 6], "937": [4, 6], "dedic": [4, 5, 6, 7], "attach": 4, "them": [4, 5, 6, 10], "sever": [4, 6, 8], "infer": [4, 6, 10], "linearsvc": [4, 10, 12], "my_collect": 4, "codefram": 4, "both": [4, 5, 12], "frequenc": [4, 5, 6], "actual": 5, "lead": 4, "empti": 4, "sinc": [4, 5, 6, 7, 10, 11, 12, 13], "met": 4, "whose": [4, 5], "train_prop": [4, 11, 13], "randomli": [4, 6], "stratif": [4, 5], "greater": 6, "single_sample_train": [4, 6], "for_model_select": [4, 6], "data_hom": 4, "ifcb": 4, "zenodo": [4, 6], "pleas": [4, 10], "link": 4, "publicli": [4, 6], "whoi": [4, 6], "plankton": 4, "repo": [2, 4, 6], "script": [4, 5, 6, 8, 10], "gonz\u00e1lez": [4, 5, 6, 10], "basic": [5, 12], "precomput": [6, 7], "togeth": [4, 5], "individu": [4, 10], "30": [4, 5, 6, 10, 11, 13], "86": [4, 6], "286": 4, "dump": 4, "leav": [2, 4], "quay_data": 4, "test_gen": [4, 6], "_ifcb": 4, "ifcbtrainsamplesfromdir": 4, "seri": [4, 6, 11], "ifcbtestsampl": 4, "dataset_nam": 4, "test_split": [4, 6], "uci": 4, "p\u00e9rez": [4, 5, 6, 10], "g\u00e1llego": [4, 5, 6, 10], "quevedo": [4, 6, 10], "j": [2, 4, 5, 6, 10], "del": [4, 6, 10], "coz": [4, 6, 10], "2017": [4, 5, 6, 10], "characteriz": [4, 6, 10], "chang": [4, 6, 7, 10], "studi": [4, 6, 10], "fusion": [4, 6, 10], "34": [4, 5, 6, 10], "87": [4, 6, 10], "castano": [4, 10], "2019": [4, 5, 10], "task": [4, 6, 10, 11], "45": [4, 10, 12], "predefin": 4, "fetch_ucilabelledcollect": 4, "access": [4, 5, 6, 10], "uci_dataset": 4, "ml": [4, 5, 10], "repositori": [4, 6], "adopt": [4, 10, 11], "5fcvx2": 4, "x2": 4, "import": [4, 5, 6, 10, 11, 12, 13], "yeast": [4, 6], "archiv": 4, "ic": 4, "edu": 4, "criteria": [4, 5, 11], "1000": [4, 5, 6, 11], "suit": 4, "ucimlrepo": 4, "dry": [4, 6], "bean": [4, 6], "uci_multiclass_dataset": 4, "offici": 4, "provid": [4, 5, 6, 10, 12, 13], "lequa": [4, 13], "competit": [4, 6], "brief": [4, 6, 7], "t1a": [4, 6], "t1b": [4, 6], "t2a": [4, 6], "t2b": [4, 6], "sentiment": [4, 10], "28": [4, 6, 7, 10], "merchandis": [4, 6], "product": [4, 6, 10], "we": [4, 5, 6, 7, 9, 10, 11, 12, 13], "moreo": [4, 5, 6, 10, 11], "sebastiani": [4, 5, 6, 10, 11], "f": [4, 5, 6, 7, 10, 11, 12, 13], "sperduti": [4, 6], "2022": [4, 5, 10, 13], "overview": [4, 6], "clef": [4, 6], "descript": [4, 6], "lequa2022_experi": [4, 6], "py": [4, 6, 10, 13], "folder": [4, 5, 6, 11, 13], "guid": 4, "val_gen": [4, 6], "_lequa2022": 4, "samplesfromdir": 4, "subclass": [4, 5], "review": [4, 12], "recurr": [4, 6, 10], "proceed": [4, 5, 6, 10], "27th": [4, 6, 10], "acm": [4, 5, 6, 10], "knowledg": [4, 6, 10], "manag": [4, 6, 10], "2018": [2, 4, 5, 6, 10], "reviews_sentiment_dataset": [4, 6], "hp": [4, 6, 10], "imdb": [4, 6, 11, 12, 13], "matric": [4, 6, 12], "minimun": 4, "kept": 4, "faster": [4, 6], "subsequ": 4, "twitter": 4, "gao": [4, 5, 6, 10], "w": [4, 6, 10], "tweet": [4, 6, 10], "analysi": [4, 6, 10], "social": [4, 6, 10], "mining6": 4, "19": [4, 10], "2016": [4, 5, 10], "semeval13": [4, 6], "semeval14": [4, 6], "semeval15": [4, 6], "share": [4, 6], "twitter_sentiment_datasets_train": [4, 6], "twitter_sentiment_datasets_test": [4, 6], "gasp": [4, 6], "hcr": [4, 6, 10], "omd": [4, 6], "sander": 4, "semeval16": 4, "sst": [4, 6], "wb": [4, 6], "devel": 4, "style": 4, "id": [4, 6, 10], "would": [4, 5, 6, 7, 10, 12], "countvector": 4, "keyword": [4, 5], "nogap": 4, "regardless": 4, "special": [4, 6, 12], "codifi": 4, "unknown": 4, "surfac": 4, "assert": 4, "gap": 4, "preced": 4, "inplac": [4, 5, 10], "To": [4, 5, 12], "uniqu": 4, "rare": 4, "occur": [4, 12], "unk": 4, "minimum": [4, 5, 6], "org": [4, 5, 6], "stabl": 4, "feature_extract": 4, "html": 4, "subtyp": 4, "spmatrix": 4, "remov": [2, 4, 5], "present": [4, 6, 10], "least": [4, 6], "infrequ": 4, "aka": [4, 5], "z": [4, 6], "sublinear_tf": 4, "part": [4, 10], "scall": 4, "tf": [4, 6], "counter": 4, "tfidfvector": 4, "categor": [4, 10], "toward": [4, 5, 12], "whcih": 4, "had": 4, "encod": 4, "utf": 4, "8": [4, 5, 6, 10, 11, 12], "csv": 4, "feat1": 4, "feat2": 4, "featn": 4, "covari": [4, 5], "express": 4, "col": [4, 6], "row": [4, 7], "class2int": 4, "collet": 4, "fomart": 4, "progress": 4, "sentenc": 4, "classnam": 4, "u1": 4, "springer": [], "articl": [10, 11], "1007": [], "s10618": [], "008": [], "0097": [], "invert": [5, 10], "l2": [5, 10], "norm": [2, 5, 10], "ax": 5, "better": [5, 10], "consult": [5, 6, 10, 13], "buns": [5, 10], "On": [5, 10], "multi": [5, 10], "extens": [5, 6, 8, 10, 12], "2nd": [5, 6, 10], "workshop": [5, 10], "applic": [5, 10], "lq": [5, 10], "ecml": [5, 10], "pkdd": [5, 10], "grenobl": [5, 10], "franc": [5, 10], "classif_predict": [5, 10], "y_": 5, "construct": [5, 10], "jmlr": [], "v20": [], "18": [], "456": [], "abc": 5, "base_quantifi": 5, "median": [5, 10], "parameter": 5, "parllel": 5, "subobject": 5, "well": [5, 6, 10, 12], "nest": 5, "pipelin": 5, "latter": [5, 6, 10], "__": [5, 10], "updat": 5, "reason": [5, 10, 12, 13], "phase": [5, 10], "classification_fit": 5, "maintain": [5, 10], "attribut": 5, "give": [5, 13], "fit_classifi": [5, 10], "predict_on": [5, 10], "outsid": [5, 10], "remaind": 5, "expect": [5, 13], "non": [5, 10], "soft": [5, 7, 10], "num_warmup": 5, "500": [5, 6, 7, 12], "num_sampl": 5, "mcmc_seed": 5, "bayesian": [5, 10], "rather": [5, 10, 11], "diagnos": 5, "degeneraci": 5, "visibl": 5, "confus": 5, "uncertainti": [5, 10], "extra": 5, "bay": [5, 10], "warmup": 5, "mcmc": 5, "sampler": 5, "One": [5, 6, 7, 10, 13], "noth": 5, "here": [5, 7], "cdf": [5, 10], "match": [5, 10], "helling": 5, "sought": [5, 10], "choic": [5, 7, 11], "channel": 5, "proper": [5, 10], "ch": 5, "particular": [5, 6, 10], "di": 5, "dij": 5, "fraction": [5, 10], "th": 5, "tol": 5, "find": [0, 5, 6, 10, 11], "got": [5, 11], "dl": 5, "doi": 5, "1145": 5, "3219819": 5, "3220059": 5, "histogram": 5, "toler": [2, 5], "classif_posterior": 5, "exact_train_prev": [5, 10], "recalib": [5, 10], "maxim": 5, "saeren": [5, 10], "latinn": [5, 10], "decaesteck": [5, 10], "mutual": 5, "recurs": 5, "wai": [5, 10, 13], "until": 5, "converg": 5, "heurist": [5, 7], "propos": [5, 8, 10, 13], "recalibr": 5, "meant": [5, 10], "messag": 5, "observ": [5, 7, 10], "posterior_prob": 5, "0001": 5, "reach": 5, "loop": 5, "ir": 5, "accordingli": [5, 12], "unlabel": 5, "binary_quantifi": 5, "parallel_backend": 5, "prevel": 5, "emploi": [2, 5], "joblib": 5, "help": [5, 7, 12], "elm": [5, 8, 10], "cannot": 5, "temp": 5, "dure": [5, 7, 12], "resp": 5, "simplif": 5, "conceptu": [5, 11], "equival": [5, 6], "explicit": [0, 5], "famili": [5, 10], "structur": [5, 10], "purpos": [5, 6, 13], "svmperf_hom": [5, 10], "properli": [5, 10], "underli": [5, 10, 11], "2021": [5, 10, 11], "_kdei": 5, "common": [5, 10], "ancestor": 5, "kde": [5, 10], "scott": 5, "silverman": 5, "bandwidth": [5, 10], "wrap": 5, "kerneldens": 5, "evalut": 5, "kdei": 5, "cauchi": [5, 10], "schwarz": [5, 10], "author": 5, "mont": [5, 10], "carlo": [5, 10], "approach": [5, 10, 13], "alpha": 5, "delta": 5, "d": [5, 10], "boldsymbol": 5, "q_": 5, "widetild": 5, "u": [5, 7], "p_": 5, "alpha_i": 5, "l": 5, "_i": 5, "p_x": 5, "x_i": 5, "h": 5, "datapoint": 5, "center": [5, 12], "mathrm": 5, "dx": 5, "2dx": 5, "admit": 5, "montecarlo_tri": 5, "disntac": 5, "_f": 5, "trial": 5, "x_1": 5, "ldot": 5, "x_t": 5, "sim_": 5, "iid": [5, 7, 12], "criterion": 5, "mathbb": 5, "_neural": 5, "doc_embedding_s": 5, "stats_siz": 5, "lstm_hidden_s": 5, "lstm_nlayer": 5, "ff_layer": 5, "1024": 5, "bidirect": 5, "qdrop_p": 5, "order_bi": 5, "cell": 5, "dens": [5, 6], "connect": 5, "ff": 5, "sort": 5, "doc_embed": 5, "doc_posterior": 5, "overridden": [5, 10], "although": [5, 10, 11, 12, 13], "recip": 5, "former": [5, 6], "care": 5, "regist": 5, "hook": 5, "n_epoch": 5, "tr_iter_per_poch": 5, "va_iter_per_poch": 5, "checkpointdir": 5, "checkpointnam": 5, "pytorch": 5, "advantag": [5, 10, 13], "cnn": [5, 10], "estim_preval": [5, 10], "anyth": [5, 10], "40": [5, 6, 10], "66": [5, 13], "ground": 5, "truth": 5, "_threshold_optim": 5, "forman": [5, 10, 13], "2006": [5, 10], "2008": [5, 10], "look": [5, 6, 12], "goal": 5, "bring": 5, "stabil": [5, 7], "denomin": 5, "sweep": [5, 10], "closest": 5, "choos": [5, 10], "deliv": [5, 10, 11], "interpret": [4, 5, 6, 12], "complement": 5, "param_mod_sel": 5, "param_model_sel": 5, "red_siz": [5, 10], "min_po": 5, "polici": [5, 10], "av": [5, 10], "max_sample_s": 5, "ptr": [5, 10], "member": [5, 10], "preliminari": 5, "final": [5, 7, 10, 12], "recomput": 5, "static": [5, 10], "compat": [5, 10], "recommend": [5, 7, 12], "gridsearchcv": [5, 11], "base_quantifier_class": 5, "factori": 5, "unifi": [5, 6], "interfac": [5, 6, 7], "logspac": [5, 11, 13], "class_weight": [5, 11, 12], "balanc": [5, 6, 11, 12], "110": 5, "setup": 5, "mimick": 5, "castro": [5, 10], "alaiz": [5, 10], "rodr\u00edguez": 5, "alegr": [5, 10], "2013": [5, 10], "nfeat": 5, "dissimilar": 5, "mlpe": 5, "lazi": 5, "assum": [5, 7, 10], "put": 5, "assumpion": 5, "irrespect": [4, 5, 7, 12], "lower": [2, 5, 12], "estimant": 5, "bootstrap_tri": 5, "bootstrap_rang": 5, "bagging_tri": 5, "bagging_rang": 5, "vectorizer_kwarg": 5, "class_cond_x": 5, "hat_yi": 5, "yj": 5, "yi": 5, "projection_simplex_sort": [0, 1, 2, 5, 10], "ip_if_necessari": [], "appear": [2, 12], "decim": 2, "formula": 2, "condsoftmax": [0, 1, 2, 5, 10], "l1_norm": [0, 1, 2], "softmax": [0, 1, 2, 5, 10], "solve_adjustment_binari": [0, 1, 2], "aggr": 2, "verifi": 2, "li": [2, 10], "arraylik": 2, "lie": [2, 5], "num_vector": 2, "becom": [2, 13], "happen": [2, 12], "rescal": 2, "mapsimplex": [2, 5, 10], "were": [2, 6], "you": [0, 2, 5, 6, 10], "want": [2, 10], "untouch": 2, "class_conditional_r": 2, "unadjusted_count": 2, "unadjust": [2, 10, 12], "y_i": 2, "m_": 2, "ij": 2, "y_j": 2, "futur": [2, 10], "compos": [0, 1, 2], "uniform_preval": [0, 1, 2], "blobelloss": [0, 2, 5], "cvclassifi": [0, 2, 5], "classtransform": [0, 2, 5], "combinedloss": [0, 2, 5], "composablequantifi": [0, 2, 5], "distancetransform": [0, 2, 5], "energykerneltransform": [0, 2, 5], "energyloss": [0, 2, 5], "gaussiankerneltransform": [0, 2, 5], "gaussianrffkerneltransform": [0, 2, 5], "hellingersurrogateloss": [0, 2, 5], "histogramtransform": [0, 2, 5], "kerneltransform": [0, 2, 5], "laplaciankerneltransform": [0, 2, 5], "leastsquaresloss": [0, 2, 5], "tikhonovregular": [0, 2, 5], "composit": 5, "integr": 5, "qunfold": 5, "github": [5, 6, 10], "mirkobuns": 5, "functionloss": 5, "blobel": 5, "1985": 5, "under": [5, 7, 10, 13], "assumpt": [5, 7, 12], "poisson": 5, "n_estim": 5, "fix": [5, 13], "oob_scor": 5, "oob_decision_function_": 5, "just": [5, 10, 13], "bag": 5, "is_probabilist": 5, "abstracttransform": 5, "saniti": 5, "_check_i": 5, "p_trn": 5, "class_preval": 5, "transfer": 5, "abstractloss": 5, "multipl": 5, "arbitrari": 5, "unfold": 5, "regular": 5, "unconstrain": 5, "trick": 5, "trust": 5, "ncg": 5, "solver_opt": 5, "gtol": 5, "maxit": 5, "randomst": 5, "ordin": 5, "2023": [5, 10], "randomforestclassifi": 5, "o_acc": 5, "try": [5, 11], "transformer__classifier__estimator__criterion": 5, "gini": 5, "entropi": 5, "acc_lr": 5, "preprocessor": 5, "edx": 5, "edi": 5, "accept": [5, 10], "spatial": 5, "cdist": 5, "kmm": 5, "energi": 5, "x_2": 5, "kawakubo": 5, "casta\u00f1o": 5, "sigma": 5, "gaussian": 5, "exp": 5, "2\u03c3": 5, "n_rff": 5, "effici": [5, 10], "fourier": 5, "wise": 5, "Their": 5, "problemat": 5, "becaus": 5, "twice": 5, "differenti": 5, "complic": 5, "unit_scal": 5, "intend": 5, "signatur": 5, "scalar": 5, "laplacian": 5, "bella": 5, "hopkin": 5, "king": 5, "2010": 5, "tikhonov": 5, "promot": 5, "behavior": 5, "often": [5, 7], "tau": 5, "strength": 5, "quapi": [6, 7, 8, 9, 10, 11, 12, 13], "anyon": 6, "roughli": 6, "plai": 6, "role": 6, "1st": 6, "neutral": 6, "3rd": 6, "digit": 6, "17": [6, 10], "tackl": [6, 10], "wiki": [6, 7, 9, 10], "three": [6, 10, 12], "harri": 6, "potter": 6, "movi": 6, "fetch": 6, "octob": [6, 10], "1775": [6, 10], "1778": [6, 10], "fhe": 6, "summar": 6, "9533": 6, "18399": 6, "018": 6, "982": 6, "065": 6, "935": 6, "25000": [4, 6], "idf": 6, "exemplifi": 6, "august": 6, "ieee": 6, "mine": [6, 10], "asonam": 6, "97": 6, "104": [6, 7], "semev": 6, "ase": 6, "typ": 6, "ga": 6, "8788": 6, "3765": 6, "694582": 6, "421": 6, "496": 6, "082": 6, "407": 6, "507": 6, "086": 6, "spa": 6, "rse": 6, "1594": 6, "798": 6, "222046": 6, "546": 6, "211": 6, "243": 6, "640": 6, "167": 6, "193": 6, "1839": 6, "787": 6, "199151": 6, "463": 6, "271": 6, "266": 6, "437": 6, "283": 6, "280": 6, "san": 6, "der": 6, "2155": 6, "923": 6, "229399": 6, "161": 6, "691": 6, "148": 6, "164": [6, 10], "688": 6, "sem": 6, "eva": 6, "l13": 6, "11338": 6, "3813": 6, "1215742": 6, "159": 6, "470": 6, "372": 6, "158": 6, "430": 6, "412": 6, "l14": 6, "1853": 6, "109": 6, "361": 6, "530": 6, "l15": 6, "2390": 6, "153": 6, "413": 6, "434": 6, "l16": 6, "8000": 6, "2000": 6, "889504": 6, "157": 6, "351": 6, "492": 6, "163": 6, "341": 6, "497": 6, "2971": 6, "1271": 6, "376132": 6, "261": 6, "452": 6, "288": 6, "207": 6, "481": 6, "312": 6, "2184": 6, "936": 6, "248563": 6, "305": 6, "414": 6, "281": 6, "282": 6, "446": 6, "272": [6, 7], "4259": 6, "1823": 6, "404333": 6, "270": 6, "392": 6, "337": 6, "274": 6, "335": 6, "32": [6, 11], "exactli": 6, "coincid": 6, "unabl": 6, "diabet": 6, "phonem": 6, "70": 6, "submit": 6, "practic": 6, "could": [4, 6, 7, 10, 11, 12, 13], "2x5fcv": 6, "acut": 6, "120": 6, "508": 6, "583": 6, "417": 6, "625": 6, "539": 6, "461": 6, "922": 6, "078": 6, "breast": 6, "cancer": 6, "683": 6, "350": 6, "650": 6, "cmc": 6, "1473": 6, "573": 6, "427": 6, "774": 6, "226": 6, "653": 6, "347": 6, "ctg": 6, "2126": 6, "778": 6, "861": 6, "139": 6, "917": 6, "083": 6, "german": 6, "700": 6, "haberman": [6, 10], "306": 6, "735": 6, "265": 6, "ionospher": 6, "641": 6, "359": 6, "iri": 6, "150": 6, "667": 6, "333": 6, "mammograph": 6, "830": 6, "514": 6, "486": 6, "pageblock": 6, "5473": 6, "979": 6, "021": 6, "semeion": 6, "1593": 6, "901": 6, "099": 6, "sonar": 6, "208": 6, "60": 6, "534": 6, "466": 6, "spambas": 6, "4601": 6, "57": 6, "606": 6, "394": 6, "spectf": 6, "267": 6, "44": 6, "794": 6, "206": 6, "tictacto": 6, "958": 6, "transfus": 6, "748": 6, "762": 6, "238": 6, "wdbc": 6, "569": 6, "627": 6, "373": 6, "wine": 6, "178": 6, "669": 6, "331": 6, "601": 6, "399": 6, "730": 6, "red": 6, "1599": 6, "465": 6, "535": 6, "white": 6, "4898": 6, "665": 6, "1484": 6, "711": 6, "289": 6, "action": 6, "moment": [6, 7, 10], "autom": 6, "cardiotocographi": 6, "excel": 6, "xlrd": 6, "unix": 6, "compress": 6, "directli": [6, 10], "doabl": 6, "gzip": 6, "uncompress": 6, "softwar": 6, "manual": 6, "good": [6, 11, 12], "idea": [6, 13], "easi": 6, "extrem": 6, "difficult": [6, 12], "tend": [6, 12], "magnitud": 6, "disproportion": 6, "impact": 6, "There": [6, 10], "your": [6, 10], "own": [6, 11], "min_test_split": [4, 6], "retriev": [6, 11], "platform": 6, "filter": 6, "000": 6, "9527": 6, "4084": 6, "qualiti": 6, "3428": 6, "1470": 6, "academ": 6, "3096": 6, "1328": 6, "3933": 6, "1687": 6, "letter": 6, "14000": 6, "6000": 6, "val_gener": 6, "test_gener": 6, "doc": 6, "5000": [6, 7, 12, 13], "250": 6, "20000": 6, "speci": 6, "water": 6, "record": 6, "10036244": 6, "hsosik": 6, "pglez82": 6, "ifcb_zenodo": 6, "678": 6, "584474": 6, "246916": 6, "2626429": 6, "per": [4, 6], "2922": 6, "2871": 6, "3873": 6, "min": 6, "59": 6, "6645": 6, "7375": 6, "9112": 6, "020": 6, "978": 6, "addition": 6, "deal": 6, "cl": 6, "my_data": 6, "my_custom_load": 6, "rb": 6, "fin": 6, "varianc": [6, 12], "_dataset_": [], "_labelledcollection_": [], "php": [], "_verbos": [], "true_": [], "_quapy_data_": [], "_cardiotocographi": [], "set_": [], "_xlrd_": [], "_page": [], "_acut": [], "a_": [], "b_": [], "_balanc": [], "2_": [], "arxiv": 10, "ab": [], "2401": 10, "00490": 10, "ceur": [], "vol": [], "3180": [], "146": 10, "_loader_func_": [], "_qp": [], "preprocessing_": [], "_text2tfidf_": [], "_reduce_columns_": [], "_standardize_": [], "_index_": [], "appeal": 7, "tool": 7, "scenario": [7, 10, 11, 12], "particularli": 7, "aris": 7, "belief": 7, "predictor": 7, "govern": 7, "explain": [7, 12], "popular": [7, 13], "_mae_": [], "_mrae_": [], "among": [7, 10], "_rae_": [], "_acce_": [], "_f1e_": [], "_mkld_": [], "_mnkld_": [], "third": [7, 12], "tradition": 7, "past": 7, "_sample_size_": [], "omit": 7, "thereaft": 7, "3f": 7, "600": 7, "914": 7, "error_funct": 7, "_evalu": [], "protocol_": [], "_sampl": [], "procotol_": [], "character": [7, 10], "wide": [7, 12, 13], "vari": [7, 10, 13], "_shift_": [], "md": 8, "_prot_": [], "our": [7, 13], "_quantifier_": [], "evaluatio": 7, "prot": 7, "4f": [7, 13], "_report_": [], "account": 7, "rise": [7, 13], "straightforward": 7, "visual": [7, 12], "pd": 7, "set_opt": 7, "expand_frame_repr": 7, "308": 7, "692": 7, "314": 7, "686": 7, "005649": 7, "013182": 7, "000074": 7, "896": 7, "909": 7, "091": 7, "013145": 7, "069323": 7, "000985": 7, "848": 7, "152": 7, "809": 7, "191": 7, "039063": 7, "149806": 7, "005175": 7, "016": 7, "984": 7, "033": 7, "967": 7, "017236": 7, "487529": 7, "005298": 7, "728": 7, "751": 7, "249": 7, "022769": 7, "057146": 7, "001350": 7, "4995": 7, "72": 7, "698": 7, "302": 7, "021752": 7, "053631": 7, "001133": 7, "4996": 7, "868": 7, "132": 7, "888": 7, "112": 7, "020490": 7, "088230": 7, "001985": 7, "4997": 7, "292": 7, "708": 7, "298": 7, "702": 7, "006149": 7, "014788": 7, "000090": 7, "4998": 7, "76": 7, "220": 7, "780": 7, "019950": 7, "054309": 7, "001127": 7, "4999": 7, "948": 7, "052": 7, "965": 7, "035": 7, "016941": 7, "165776": 7, "003538": 7, "023588": 7, "108779": 7, "003631": 7, "float64": 7, "finish": [7, 11], "exit": 7, "_aggregativequantifier_": [], "_onlabelledcollectionprotocol_": [], "smaller": 7, "1m": 7, "postpon": [7, 10], "_aggr_speedup": [], "convers": 7, "false_": [], "_model": [], "selection_": [], "execut": 7, "lot": 7, "procotol": 7, "ext": 8, "compil": [8, 10], "prepare_svmperf": [8, 10], "sh": [8, 10], "job": 8, "svm_perf_quantif": [8, 10], "welcom": 9, "illustr": [9, 10, 11, 12], "main": [9, 12, 13], "concept": 9, "behind": 9, "group": 10, "plan": 10, "fact": [10, 12], "shoud": 10, "abstractmethod": 10, "familiar": 10, "inspir": 10, "why": 10, "respond": 10, "simplifi": 10, "selector": 10, "_check_init_paramet": 10, "what": 10, "remain": 10, "ahead": 10, "overrid": 10, "overriden": 10, "quickli": 10, "inconsist": 10, "__init__": 10, "break": 10, "similarli": 10, "extern": 10, "fit_learn": 10, "pattern": 10, "rout": 10, "clone": 10, "huge": 10, "concern": [0, 10], "leverag": 10, "customarili": [10, 11], "done": [10, 11], "four": 10, "simplest": 10, "equip": [10, 12], "constructor": 10, "prevail": 10, "seem": 10, "calibratedclassifiercv": 10, "base_estim": 10, "cv": [10, 11], "As": [10, 11], "calibratedclassifi": 10, "lastli": 10, "everyth": 10, "said": 10, "aboud": 10, "now": [10, 12, 13], "mp": 10, "missclassif": 10, "attempt": 10, "minv": 10, "remark": 10, "izbicki": 10, "stern": 10, "journal": 10, "research": 10, "solvabl": 10, "gist": 10, "mblondel": 10, "6f3b7aaad90606b98f71": 10, "introduc": 10, "ziegler": 10, "czy\u017c": 10, "black": 10, "latent": 10, "weak": 10, "plug": 10, "markov": 10, "chain": 10, "suitabl": 10, "nearli": 10, "slower": 10, "bayesian_quantif": 10, "2002": 10, "priori": 10, "14": 10, "41": 10, "almost": 10, "effect": 10, "depart": 10, "suggest": 10, "etc": 10, "rodr\u0131": 10, "guez": 10, "scienc": 10, "218": 10, "allia": 10, "previou": 10, "firat": 10, "mutliclasshdi": 10, "maletzk": 10, "hassan": 10, "thank": 10, "pablo": 10, "contribut": 10, "achiev": [10, 11, 12, 13], "d\u00edez": 10, "reliabl": 10, "recognit": 10, "48": 10, "591": 10, "604": 10, "newsvmnkld": 10, "transact": 10, "discoveri": 10, "experiment": 10, "plo": 10, "ONE": 10, "nowadai": 10, "behav": [10, 12, 13], "Then": 10, "thing": [10, 13], "mycustomloss": 10, "oper": 10, "trivial": 10, "svmq": 10, "know": [10, 11], "explicit_loss_minim": 10, "one_vs_al": 10, "preprint": 10, "devis": 10, "too": 10, "variat": 10, "nor": 10, "inde": [10, 13], "regressor": 10, "processor": 10, "offer": 10, "supervis": 11, "strongli": [11, 12], "wherebi": 11, "pick": 11, "assess": 11, "aim": [11, 12], "appropri": 11, "design": 11, "regard": 11, "section": [0, 11, 13], "argu": 11, "alejandro": 11, "fabrizio": 11, "ecir": 11, "91": 11, "varieti": 11, "exhibit": [11, 12, 13], "degre": 11, "distributionmatch": 11, "confront": [11, 13], "against": [11, 12], "devot": 11, "prefix": 11, "classifier__": 11, "classifier__c": [11, 13], "16": 11, "best_params_": 11, "best_model_": 11, "mae_scor": 11, "5f": 11, "start": 11, "hyperparam": 11, "04021": 11, "took": 11, "1356": 11, "04286": 11, "2139": 11, "04888": 11, "2491": 11, "05163": 11, "5372": 11, "02445": 11, "9056": 11, "02234": 11, "3114": 11, "develop": 11, "03102": 11, "computation": 11, "costli": 11, "theoret": 11, "suboptim": 11, "opt": 11, "flaw": 11, "hand": 11, "surrog": 11, "analys": 12, "outcom": 12, "variou": 12, "merg": 12, "55": 12, "showcas": 12, "gen_data": 12, "base_classifi": 12, "append": 12, "insight": 12, "view": 12, "unfortun": 12, "bin_diag": 12, "png": 12, "cyan": 12, "bias": 12, "evinc": 12, "bin_bia": 12, "unbias": 12, "overestim": 12, "rewrit": 12, "method_data": 12, "training_preval": 12, "linspac": 12, "training_s": 12, "suffic": 12, "train_sampl": 12, "clearli": 12, "subinterv": 12, "interestingli": 12, "enough": 12, "seemingli": 12, "tendenc": 12, "underestim": 12, "beyond": 12, "curios": 12, "diag": 12, "pretti": 12, "discuss": 12, "analyz": 12, "harder": 12, "err_drift": 12, "higher": [4, 12, 13], "clear": 12, "lowest": 12, "rememb": 12, "solid": 12, "comparison": [12, 13], "detriment": 12, "hide": 12, "v0": 13, "robustli": 13, "presenc": 13, "stochast": 13, "fair": 13, "commonli": 13, "radom_st": 13, "technic": 13, "explan": 13, "custom_protocol": 13, "subject": 13, "2005": 13, "1771": 13, "signific": 13, "budg": 13, "budget": 13, "4960": 13, "usag": 13, "equial": 13, "val_app": 13, "cumbersom": 13, "increas": 13, "rapidli": 13, "impract": 13, "legitim": 13, "drawback": 13, "elect": 13, "burden": 13, "incur": 13, "in_protocol": 13, "sometim": 13, "due": 13, "capabl": 13, "texto": [], "aspect": 0, "explicitlossminim": [], "load_report": [0, 1, 2], "as_dict": 2, "max_train_inst": 4, "min_class_support": 4, "surpass": 4, "istanc": 4, "fewer": 4, "deafult": 4}, "objects": {"": [[2, 0, 0, "-", "quapy"]], "quapy": [[3, 0, 0, "-", "classification"], [4, 0, 0, "-", "data"], [2, 0, 0, "-", "error"], [2, 0, 0, "-", "evaluation"], [2, 0, 0, "-", "functional"], [5, 0, 0, "-", "method"], [2, 0, 0, "-", "model_selection"], [2, 0, 0, "-", "plot"], [2, 0, 0, "-", "protocol"], [2, 0, 0, "-", "util"]], "quapy.classification": [[3, 0, 0, "-", "calibration"], [3, 0, 0, "-", "methods"], [3, 0, 0, "-", "neural"], [3, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[3, 1, 1, "", "BCTSCalibration"], [3, 1, 1, "", "NBVSCalibration"], [3, 1, 1, "", "RecalibratedProbabilisticClassifier"], [3, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [3, 1, 1, "", "TSCalibration"], [3, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[3, 2, 1, "", "classes_"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "fit_cv"], [3, 3, 1, "", "fit_tr_val"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[3, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural": [[3, 1, 1, "", "CNNnet"], [3, 1, 1, "", "LSTMnet"], [3, 1, 1, "", "NeuralClassifierTrainer"], [3, 1, 1, "", "TextClassifierNet"], [3, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[3, 2, 1, "", "device"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "reset_net_params"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[3, 3, 1, "", "dimensions"], [3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "forward"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict_proba"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"], [3, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[3, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[3, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[3, 3, 1, "", "decision_function"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "predict"], [3, 4, 1, "", "valid_losses"]], "quapy.data": [[4, 0, 0, "-", "base"], [4, 0, 0, "-", "datasets"], [4, 0, 0, "-", "preprocessing"], [4, 0, 0, "-", "reader"]], "quapy.data.base": [[4, 1, 1, "", "Dataset"], [4, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[4, 3, 1, "", "SplitStratified"], [4, 2, 1, "", "binary"], [4, 2, 1, "", "classes_"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 3, 1, "", "reduce"], [4, 3, 1, "", "stats"], [4, 2, 1, "", "train_test"], [4, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[4, 2, 1, "", "X"], [4, 2, 1, "", "Xp"], [4, 2, 1, "", "Xy"], [4, 2, 1, "", "binary"], [4, 3, 1, "", "counts"], [4, 3, 1, "", "join"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 2, 1, "", "p"], [4, 3, 1, "", "prevalence"], [4, 3, 1, "", "sampling"], [4, 3, 1, "", "sampling_from_index"], [4, 3, 1, "", "sampling_index"], [4, 3, 1, "", "split_random"], [4, 3, 1, "", "split_stratified"], [4, 3, 1, "", "stats"], [4, 3, 1, "", "uniform_sampling"], [4, 3, 1, "", "uniform_sampling_index"], [4, 2, 1, "", "y"]], "quapy.data.datasets": [[4, 5, 1, "", "fetch_IFCB"], [4, 5, 1, "", "fetch_UCIBinaryDataset"], [4, 5, 1, "", "fetch_UCIBinaryLabelledCollection"], [4, 5, 1, "", "fetch_UCIMulticlassDataset"], [4, 5, 1, "", "fetch_UCIMulticlassLabelledCollection"], [4, 5, 1, "", "fetch_lequa2022"], [4, 5, 1, "", "fetch_reviews"], [4, 5, 1, "", "fetch_twitter"], [4, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[4, 1, 1, "", "IndexTransformer"], [4, 5, 1, "", "index"], [4, 5, 1, "", "reduce_columns"], [4, 5, 1, "", "standardize"], [4, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[4, 3, 1, "", "add_word"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "fit_transform"], [4, 3, 1, "", "transform"], [4, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[4, 5, 1, "", "binarize"], [4, 5, 1, "", "from_csv"], [4, 5, 1, "", "from_sparse"], [4, 5, 1, "", "from_text"], [4, 5, 1, "", "reindex_labels"]], "quapy.error": [[2, 5, 1, "", "absolute_error"], [2, 5, 1, "", "acc_error"], [2, 5, 1, "", "acce"], [2, 5, 1, "", "ae"], [2, 5, 1, "", "f1_error"], [2, 5, 1, "", "f1e"], [2, 5, 1, "", "from_name"], [2, 5, 1, "", "kld"], [2, 5, 1, "", "mae"], [2, 5, 1, "", "mean_absolute_error"], [2, 5, 1, "", "mean_normalized_absolute_error"], [2, 5, 1, "", "mean_normalized_relative_absolute_error"], [2, 5, 1, "", "mean_relative_absolute_error"], [2, 5, 1, "", "mkld"], [2, 5, 1, "", "mnae"], [2, 5, 1, "", "mnkld"], [2, 5, 1, "", "mnrae"], [2, 5, 1, "", "mrae"], [2, 5, 1, "", "mse"], [2, 5, 1, "", "nae"], [2, 5, 1, "", "nkld"], [2, 5, 1, "", "normalized_absolute_error"], [2, 5, 1, "", "normalized_relative_absolute_error"], [2, 5, 1, "", "nrae"], [2, 5, 1, "", "rae"], [2, 5, 1, "", "relative_absolute_error"], [2, 5, 1, "", "se"], [2, 5, 1, "", "smooth"]], "quapy.evaluation": [[2, 5, 1, "", "evaluate"], [2, 5, 1, "", "evaluate_on_samples"], [2, 5, 1, "", "evaluation_report"], [2, 5, 1, "", "prediction"]], "quapy.functional": [[2, 5, 1, "", "HellingerDistance"], [2, 5, 1, "", "TopsoeDistance"], [2, 5, 1, "", "argmin_prevalence"], [2, 5, 1, "", "as_binary_prevalence"], [2, 5, 1, "", "check_prevalence_vector"], [2, 5, 1, "", "clip"], [2, 5, 1, "", "condsoftmax"], [2, 5, 1, "", "counts_from_labels"], [2, 5, 1, "", "get_divergence"], [2, 5, 1, "", "get_nprevpoints_approximation"], [2, 5, 1, "", "l1_norm"], [2, 5, 1, "", "linear_search"], [2, 5, 1, "", "normalize_prevalence"], [2, 5, 1, "", "num_prevalence_combinations"], [2, 5, 1, "", "optim_minimize"], [2, 5, 1, "", "prevalence_from_labels"], [2, 5, 1, "", "prevalence_from_probabilities"], [2, 5, 1, "", "prevalence_linspace"], [2, 5, 1, "", "projection_simplex_sort"], [2, 5, 1, "", "softmax"], [2, 5, 1, "", "solve_adjustment"], [2, 5, 1, "", "solve_adjustment_binary"], [2, 5, 1, "", "strprev"], [2, 5, 1, "", "ternary_search"], [2, 5, 1, "", "uniform_prevalence"], [2, 5, 1, "", "uniform_prevalence_sampling"], [2, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[5, 0, 0, "-", "_kdey"], [5, 0, 0, "-", "_neural"], [5, 0, 0, "-", "_threshold_optim"], [5, 0, 0, "-", "aggregative"], [5, 0, 0, "-", "base"], [5, 0, 0, "-", "composable"], [5, 0, 0, "-", "meta"], [5, 0, 0, "-", "non_aggregative"]], "quapy.method._kdey": [[5, 1, 1, "", "KDEBase"], [5, 1, 1, "", "KDEyCS"], [5, 1, 1, "", "KDEyHD"], [5, 1, 1, "", "KDEyML"]], "quapy.method._kdey.KDEBase": [[5, 4, 1, "", "BANDWIDTH_METHOD"], [5, 3, 1, "", "get_kde_function"], [5, 3, 1, "", "get_mixture_components"], [5, 3, 1, "", "pdf"]], "quapy.method._kdey.KDEyCS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "gram_matrix_mix_sum"]], "quapy.method._kdey.KDEyHD": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._kdey.KDEyML": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._neural": [[5, 1, 1, "", "QuaNetModule"], [5, 1, 1, "", "QuaNetTrainer"], [5, 5, 1, "", "mae_loss"]], "quapy.method._neural.QuaNetModule": [[5, 2, 1, "", "device"], [5, 3, 1, "", "forward"], [5, 4, 1, "", "training"]], "quapy.method._neural.QuaNetTrainer": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "clean_checkpoint"], [5, 3, 1, "", "clean_checkpoint_dir"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method._threshold_optim": [[5, 1, 1, "", "MAX"], [5, 1, 1, "", "MS"], [5, 1, 1, "", "MS2"], [5, 1, 1, "", "T50"], [5, 1, 1, "", "ThresholdOptimization"], [5, 1, 1, "", "X"]], "quapy.method._threshold_optim.MAX": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS2": [[5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.T50": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.ThresholdOptimization": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregate_with_threshold"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"], [5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.X": [[5, 3, 1, "", "condition"]], "quapy.method.aggregative": [[5, 1, 1, "", "ACC"], [5, 4, 1, "", "AdjustedClassifyAndCount"], [5, 1, 1, "", "AggregativeCrispQuantifier"], [5, 1, 1, "", "AggregativeMedianEstimator"], [5, 1, 1, "", "AggregativeQuantifier"], [5, 1, 1, "", "AggregativeSoftQuantifier"], [5, 1, 1, "", "BayesianCC"], [5, 1, 1, "", "BinaryAggregativeQuantifier"], [5, 1, 1, "", "CC"], [5, 4, 1, "", "ClassifyAndCount"], [5, 1, 1, "", "DMy"], [5, 4, 1, "", "DistributionMatchingY"], [5, 1, 1, "", "DyS"], [5, 1, 1, "", "EMQ"], [5, 4, 1, "", "ExpectationMaximizationQuantifier"], [5, 1, 1, "", "HDy"], [5, 4, 1, "", "HellingerDistanceY"], [5, 1, 1, "", "OneVsAllAggregative"], [5, 1, 1, "", "PACC"], [5, 1, 1, "", "PCC"], [5, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [5, 4, 1, "", "ProbabilisticClassifyAndCount"], [5, 4, 1, "", "SLD"], [5, 1, 1, "", "SMM"], [5, 5, 1, "", "newELM"], [5, 5, 1, "", "newSVMAE"], [5, 5, 1, "", "newSVMKLD"], [5, 5, 1, "", "newSVMQ"], [5, 5, 1, "", "newSVMRAE"]], "quapy.method.aggregative.ACC": [[5, 4, 1, "", "METHODS"], [5, 4, 1, "", "NORMALIZATIONS"], [5, 4, 1, "", "SOLVERS"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"], [5, 3, 1, "", "newInvariantRatioEstimation"]], "quapy.method.aggregative.AggregativeMedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.aggregative.AggregativeQuantifier": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 2, 1, "", "classes_"], [5, 2, 1, "", "classifier"], [5, 3, 1, "", "classifier_fit_predict"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 2, 1, "", "val_split"], [5, 4, 1, "", "val_split_"]], "quapy.method.aggregative.BayesianCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "get_conditional_probability_samples"], [5, 3, 1, "", "get_prevalence_samples"], [5, 3, 1, "", "sample_from_posterior"]], "quapy.method.aggregative.BinaryAggregativeQuantifier": [[5, 3, 1, "", "fit"], [5, 2, 1, "", "neg_label"], [5, 2, 1, "", "pos_label"]], "quapy.method.aggregative.CC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DMy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DyS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.EMQ": [[5, 3, 1, "", "EM"], [5, 3, 1, "", "EMQ_BCTS"], [5, 4, 1, "", "EPSILON"], [5, 4, 1, "", "MAX_ITER"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.SMM": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.base": [[5, 1, 1, "", "BaseQuantifier"], [5, 1, 1, "", "BinaryQuantifier"], [5, 1, 1, "", "OneVsAll"], [5, 1, 1, "", "OneVsAllGeneric"], [5, 5, 1, "", "newOneVsAll"]], "quapy.method.base.BaseQuantifier": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.composable": [[5, 1, 1, "", "BlobelLoss"], [5, 1, 1, "", "CVClassifier"], [5, 1, 1, "", "ClassTransformer"], [5, 1, 1, "", "CombinedLoss"], [5, 5, 1, "", "ComposableQuantifier"], [5, 1, 1, "", "DistanceTransformer"], [5, 1, 1, "", "EnergyKernelTransformer"], [5, 1, 1, "", "EnergyLoss"], [5, 1, 1, "", "GaussianKernelTransformer"], [5, 1, 1, "", "GaussianRFFKernelTransformer"], [5, 1, 1, "", "HellingerSurrogateLoss"], [5, 1, 1, "", "HistogramTransformer"], [5, 1, 1, "", "KernelTransformer"], [5, 1, 1, "", "LaplacianKernelTransformer"], [5, 1, 1, "", "LeastSquaresLoss"], [5, 1, 1, "", "TikhonovRegularization"], [5, 5, 1, "", "TikhonovRegularized"]], "quapy.method.composable.CVClassifier": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "predict"], [5, 3, 1, "", "predict_proba"]], "quapy.method.composable.ClassTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.DistanceTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.EnergyKernelTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.GaussianKernelTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.GaussianRFFKernelTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.HistogramTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.KernelTransformer": [[5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"]], "quapy.method.composable.LaplacianKernelTransformer": [[5, 2, 1, "", "kernel"]], "quapy.method.meta": [[5, 5, 1, "", "EACC"], [5, 5, 1, "", "ECC"], [5, 5, 1, "", "EEMQ"], [5, 5, 1, "", "EHDy"], [5, 5, 1, "", "EPACC"], [5, 1, 1, "", "Ensemble"], [5, 1, 1, "", "MedianEstimator"], [5, 1, 1, "", "MedianEstimator2"], [5, 5, 1, "", "ensembleFactory"], [5, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[5, 4, 1, "", "VALID_POLICIES"], [5, 2, 1, "", "aggregative"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 2, 1, "", "probabilistic"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator2": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[5, 1, 1, "", "DMx"], [5, 4, 1, "", "DistributionMatchingX"], [5, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"], [5, 1, 1, "", "ReadMe"]], "quapy.method.non_aggregative.DMx": [[5, 3, 1, "", "HDx"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.ReadMe": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "std_constrained_linear_ls"]], "quapy.model_selection": [[2, 1, 1, "", "ConfigStatus"], [2, 1, 1, "", "GridSearchQ"], [2, 1, 1, "", "Status"], [2, 5, 1, "", "cross_val_predict"], [2, 5, 1, "", "expand_grid"], [2, 5, 1, "", "group_params"]], "quapy.model_selection.ConfigStatus": [[2, 3, 1, "", "failed"], [2, 3, 1, "", "success"]], "quapy.model_selection.GridSearchQ": [[2, 3, 1, "", "best_model"], [2, 3, 1, "", "fit"], [2, 3, 1, "", "get_params"], [2, 3, 1, "", "quantify"], [2, 3, 1, "", "set_params"]], "quapy.model_selection.Status": [[2, 4, 1, "", "ERROR"], [2, 4, 1, "", "INVALID"], [2, 4, 1, "", "SUCCESS"], [2, 4, 1, "", "TIMEOUT"]], "quapy.plot": [[2, 5, 1, "", "binary_bias_bins"], [2, 5, 1, "", "binary_bias_global"], [2, 5, 1, "", "binary_diagonal"], [2, 5, 1, "", "brokenbar_supremacy_by_drift"], [2, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[2, 1, 1, "", "APP"], [2, 1, 1, "", "AbstractProtocol"], [2, 1, 1, "", "AbstractStochasticSeededProtocol"], [2, 4, 1, "", "ArtificialPrevalenceProtocol"], [2, 1, 1, "", "DomainMixer"], [2, 1, 1, "", "IterateProtocol"], [2, 1, 1, "", "NPP"], [2, 4, 1, "", "NaturalPrevalenceProtocol"], [2, 1, 1, "", "OnLabelledCollectionProtocol"], [2, 1, 1, "", "UPP"], [2, 4, 1, "", "UniformPrevalenceProtocol"]], "quapy.protocol.APP": [[2, 3, 1, "", "prevalence_grid"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[2, 3, 1, "", "collator"], [2, 2, 1, "", "random_state"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.IterateProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.NPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[2, 4, 1, "", "RETURN_TYPES"], [2, 3, 1, "", "get_collator"], [2, 3, 1, "", "get_labelled_collection"], [2, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.UPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.util": [[2, 1, 1, "", "EarlyStop"], [2, 5, 1, "", "create_if_not_exist"], [2, 5, 1, "", "create_parent_dir"], [2, 5, 1, "", "download_file"], [2, 5, 1, "", "download_file_if_not_exists"], [2, 5, 1, "", "get_quapy_home"], [2, 5, 1, "", "load_report"], [2, 5, 1, "", "map_parallel"], [2, 5, 1, "", "parallel"], [2, 5, 1, "", "parallel_unpack"], [2, 5, 1, "", "pickled_resource"], [2, 5, 1, "", "save_text_file"], [2, 5, 1, "", "temp_seed"], [2, 5, 1, "", "timeout"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"welcom": 0, "quapi": [0, 1, 2, 3, 4, 5], "": 0, "document": 0, "instal": 0, "github": 0, "content": [0, 2, 3, 4, 5], "indic": 0, "tabl": 0, "packag": [2, 3, 4, 5], "subpackag": 2, "submodul": [2, 3, 4, 5], "error": [2, 7, 12], "modul": [2, 3, 4, 5], "evalu": [2, 7], "function": 2, "model_select": 2, "plot": [2, 12], "protocol": [2, 7, 13], "util": 2, "classif": [3, 11], "calibr": 3, "method": [3, 5, 10], "neural": [3, 10], "svmperf": 3, "data": [4, 6], "base": [4, 5], "dataset": [4, 6], "preprocess": 4, "reader": 4, "aggreg": [5, 10], "meta": [5, 10], "non_aggreg": 5, "compos": 5, "guidelin": [], "review": 6, "twitter": 6, "sentiment": 6, "uci": 6, "machin": 6, "learn": 6, "binari": 6, "issu": 6, "multiclass": 6, "lequa": 6, "2022": 6, "ifcb": 6, "plankton": 6, "ad": 6, "custom": 6, "process": 6, "wiki": 0, "measur": 7, "explicit": [8, 10], "loss": [8, 10, 11], "minim": [8, 10], "quantif": [10, 11, 12], "The": 10, "classifi": 10, "count": 10, "variant": 10, "bayesiancc": 10, "new": 10, "v0": 10, "1": 10, "9": 10, "expect": 10, "maxim": 10, "emq": 10, "helling": 10, "distanc": 10, "y": 10, "hdy": 10, "threshold": 10, "optim": 10, "kernel": 10, "densiti": 10, "estim": 10, "kdei": 10, "model": [10, 11], "ensembl": 10, "quanet": 10, "network": 10, "select": 11, "target": 11, "orient": 11, "diagon": 12, "bia": 12, "drift": 12, "artifici": 13, "preval": 13, "sampl": 13, "from": 13, "unit": 13, "simplex": 13, "uniform": 13, "upp": 13, "natur": 13, "other": 13}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Welcome to QuaPy\u2019s documentation!": [[0, "welcome-to-quapy-s-documentation"]], "Installation": [[0, "installation"]], "GitHub": [[0, "github"]], "Wiki Documents": [[0, "wiki-documents"]], "Contents": [[0, "contents"]], "Indices and tables": [[0, "indices-and-tables"]], "quapy": [[1, "quapy"]], "quapy package": [[2, "quapy-package"]], "Subpackages": [[2, "subpackages"]], "Submodules": [[2, "submodules"], [3, "submodules"], [4, "submodules"], [5, "submodules"]], "quapy.error module": [[2, "module-quapy.error"]], "quapy.evaluation module": [[2, "module-quapy.evaluation"]], "quapy.functional module": [[2, "module-quapy.functional"]], "quapy.model_selection module": [[2, "module-quapy.model_selection"]], "quapy.plot module": [[2, "module-quapy.plot"]], "quapy.protocol module": [[2, "module-quapy.protocol"]], "quapy.util module": [[2, "module-quapy.util"]], "Module contents": [[2, "module-quapy"], [3, "module-quapy.classification"], [4, "module-quapy.data"], [5, "module-quapy.method"]], "quapy.classification package": [[3, "quapy-classification-package"]], "quapy.classification.calibration module": [[3, "module-quapy.classification.calibration"]], "quapy.classification.methods module": [[3, "module-quapy.classification.methods"]], "quapy.classification.neural module": [[3, "module-quapy.classification.neural"]], "quapy.classification.svmperf module": [[3, "module-quapy.classification.svmperf"]], "quapy.data package": [[4, "quapy-data-package"]], "quapy.data.base module": [[4, "module-quapy.data.base"]], "quapy.data.datasets module": [[4, "module-quapy.data.datasets"]], "quapy.data.preprocessing module": [[4, "module-quapy.data.preprocessing"]], "quapy.data.reader module": [[4, "module-quapy.data.reader"]], "quapy.method package": [[5, "quapy-method-package"]], "quapy.method.aggregative module": [[5, "module-quapy.method.aggregative"]], "quapy.method.base module": [[5, "module-quapy.method.base"]], "quapy.method.meta module": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative module": [[5, "module-quapy.method.non_aggregative"]], "quapy.method.composable module": [[5, "quapy-method-composable-module"]], "Datasets": [[6, "datasets"]], "Reviews Datasets": [[6, "reviews-datasets"]], "Twitter Sentiment Datasets": [[6, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[6, "uci-machine-learning"]], "Binary datasets": [[6, "binary-datasets"]], "Issues:": [[6, "issues"]], "Multiclass datasets": [[6, "multiclass-datasets"]], "LeQua 2022 Datasets": [[6, "lequa-2022-datasets"]], "IFCB Plankton dataset": [[6, "ifcb-plankton-dataset"]], "Adding Custom Datasets": [[6, "adding-custom-datasets"]], "Data Processing": [[6, "data-processing"]], "Evaluation": [[7, "evaluation"]], "Error Measures": [[7, "error-measures"]], "Evaluation Protocols": [[7, "evaluation-protocols"]], "Explicit Loss Minimization": [[8, "explicit-loss-minimization"], [10, "explicit-loss-minimization"]], "Quantification Methods": [[10, "quantification-methods"]], "Aggregative Methods": [[10, "aggregative-methods"]], "The Classify & Count variants": [[10, "the-classify-count-variants"]], "BayesianCC (New in v0.1.9!)": [[10, "bayesiancc-new-in-v0-1-9"]], "Expectation Maximization (EMQ)": [[10, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[10, "hellinger-distance-y-hdy"]], "Threshold Optimization methods": [[10, "threshold-optimization-methods"]], "Kernel Density Estimation methods (KDEy)": [[10, "kernel-density-estimation-methods-kdey"]], "Meta Models": [[10, "meta-models"]], "Ensembles": [[10, "ensembles"]], "The QuaNet neural network": [[10, "the-quanet-neural-network"]], "Model Selection": [[11, "model-selection"]], "Targeting a Quantification-oriented loss": [[11, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[11, "targeting-a-classification-oriented-loss"]], "Plotting": [[12, "plotting"]], "Diagonal Plot": [[12, "diagonal-plot"]], "Quantification bias": [[12, "quantification-bias"]], "Error by Drift": [[12, "error-by-drift"]], "Protocols": [[13, "protocols"]], "Artificial-Prevalence Protocol": [[13, "artificial-prevalence-protocol"]], "Sampling from the unit-simplex, the Uniform-Prevalence Protocol (UPP)": [[13, "sampling-from-the-unit-simplex-the-uniform-prevalence-protocol-upp"]], "Natural-Prevalence Protocol": [[13, "natural-prevalence-protocol"]], "Other protocols": [[13, "other-protocols"]]}, "indexentries": {"app (class in quapy.protocol)": [[2, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol"]], "artificialprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.ArtificialPrevalenceProtocol"]], "configstatus (class in quapy.model_selection)": [[2, "quapy.model_selection.ConfigStatus"]], "domainmixer (class in quapy.protocol)": [[2, "quapy.protocol.DomainMixer"]], "error (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.ERROR"]], "earlystop (class in quapy.util)": [[2, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[2, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[2, "quapy.functional.HellingerDistance"]], "invalid (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.INVALID"]], "iterateprotocol (class in quapy.protocol)": [[2, "quapy.protocol.IterateProtocol"]], "npp (class in quapy.protocol)": [[2, "quapy.protocol.NPP"]], "naturalprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.NaturalPrevalenceProtocol"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[2, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "success (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.SUCCESS"]], "status (class in quapy.model_selection)": [[2, "quapy.model_selection.Status"]], "timeout (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.TIMEOUT"]], "topsoedistance() (in module quapy.functional)": [[2, "quapy.functional.TopsoeDistance"]], "upp (class in quapy.protocol)": [[2, "quapy.protocol.UPP"]], "uniformprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.UniformPrevalenceProtocol"]], "absolute_error() (in module quapy.error)": [[2, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[2, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[2, "quapy.error.acce"]], "ae() (in module quapy.error)": [[2, "quapy.error.ae"]], "argmin_prevalence() (in module quapy.functional)": [[2, "quapy.functional.argmin_prevalence"]], "as_binary_prevalence() (in module quapy.functional)": [[2, "quapy.functional.as_binary_prevalence"]], "best_model() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[2, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[2, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[2, "quapy.functional.check_prevalence_vector"]], "clip() (in module quapy.functional)": [[2, "quapy.functional.clip"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "condsoftmax() (in module quapy.functional)": [[2, "quapy.functional.condsoftmax"]], "counts_from_labels() (in module quapy.functional)": [[2, "quapy.functional.counts_from_labels"]], "create_if_not_exist() (in module quapy.util)": [[2, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[2, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[2, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[2, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[2, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[2, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate"]], "evaluate_on_samples() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate_on_samples"]], "evaluation_report() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluation_report"]], "expand_grid() (in module quapy.model_selection)": [[2, "quapy.model_selection.expand_grid"]], "f1_error() (in module quapy.error)": [[2, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[2, "quapy.error.f1e"]], "failed() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.failed"]], "fit() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[2, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_divergence() (in module quapy.functional)": [[2, "quapy.functional.get_divergence"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[2, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[2, "quapy.util.get_quapy_home"]], "group_params() (in module quapy.model_selection)": [[2, "quapy.model_selection.group_params"]], "kld() (in module quapy.error)": [[2, "quapy.error.kld"]], "l1_norm() (in module quapy.functional)": [[2, "quapy.functional.l1_norm"]], "linear_search() (in module quapy.functional)": [[2, "quapy.functional.linear_search"]], "load_report() (in module quapy.util)": [[2, "quapy.util.load_report"]], "mae() (in module quapy.error)": [[2, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[2, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_absolute_error"]], "mean_normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_absolute_error"]], "mean_normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_relative_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[2, "quapy.error.mkld"]], "mnae() (in module quapy.error)": [[2, "quapy.error.mnae"]], "mnkld() (in module quapy.error)": [[2, "quapy.error.mnkld"]], "mnrae() (in module quapy.error)": [[2, "quapy.error.mnrae"]], "module": [[2, "module-quapy"], [2, "module-quapy.error"], [2, "module-quapy.evaluation"], [2, "module-quapy.functional"], [2, "module-quapy.model_selection"], [2, "module-quapy.plot"], [2, "module-quapy.protocol"], [2, "module-quapy.util"], [3, "module-quapy.classification"], [3, "module-quapy.classification.calibration"], [3, "module-quapy.classification.methods"], [3, "module-quapy.classification.neural"], [3, "module-quapy.classification.svmperf"], [4, "module-quapy.data"], [4, "module-quapy.data.base"], [4, "module-quapy.data.datasets"], [4, "module-quapy.data.preprocessing"], [4, "module-quapy.data.reader"], [5, "module-quapy.method"], [5, "module-quapy.method._kdey"], [5, "module-quapy.method._neural"], [5, "module-quapy.method._threshold_optim"], [5, "module-quapy.method.aggregative"], [5, "module-quapy.method.base"], [5, "module-quapy.method.composable"], [5, "module-quapy.method.meta"], [5, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[2, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[2, "quapy.error.mse"]], "nae() (in module quapy.error)": [[2, "quapy.error.nae"]], "nkld() (in module quapy.error)": [[2, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[2, "quapy.functional.normalize_prevalence"]], "normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_absolute_error"]], "normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_relative_absolute_error"]], "nrae() (in module quapy.error)": [[2, "quapy.error.nrae"]], "num_prevalence_combinations() (in module quapy.functional)": [[2, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "optim_minimize() (in module quapy.functional)": [[2, "quapy.functional.optim_minimize"]], "parallel() (in module quapy.util)": [[2, "quapy.util.parallel"]], "parallel_unpack() (in module quapy.util)": [[2, "quapy.util.parallel_unpack"]], "pickled_resource() (in module quapy.util)": [[2, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[2, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[2, "quapy.functional.prevalence_linspace"]], "projection_simplex_sort() (in module quapy.functional)": [[2, "quapy.functional.projection_simplex_sort"]], "quantify() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[2, "module-quapy"]], "quapy.error": [[2, "module-quapy.error"]], "quapy.evaluation": [[2, "module-quapy.evaluation"]], "quapy.functional": [[2, "module-quapy.functional"]], "quapy.model_selection": [[2, "module-quapy.model_selection"]], "quapy.plot": [[2, "module-quapy.plot"]], "quapy.protocol": [[2, "module-quapy.protocol"]], "quapy.util": [[2, "module-quapy.util"]], "rae() (in module quapy.error)": [[2, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[2, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[2, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[2, "quapy.error.smooth"]], "softmax() (in module quapy.functional)": [[2, "quapy.functional.softmax"]], "solve_adjustment() (in module quapy.functional)": [[2, "quapy.functional.solve_adjustment"]], "solve_adjustment_binary() (in module quapy.functional)": [[2, "quapy.functional.solve_adjustment_binary"]], "strprev() (in module quapy.functional)": [[2, "quapy.functional.strprev"]], "success() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.success"]], "temp_seed() (in module quapy.util)": [[2, "quapy.util.temp_seed"]], "ternary_search() (in module quapy.functional)": [[2, "quapy.functional.ternary_search"]], "timeout() (in module quapy.util)": [[2, "quapy.util.timeout"]], "total() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[2, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.iterateprotocol method)": [[2, "quapy.protocol.IterateProtocol.total"]], "total() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.total"]], "uniform_prevalence() (in module quapy.functional)": [[2, "quapy.functional.uniform_prevalence"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_simplex_sampling"]], "bctscalibration (class in quapy.classification.calibration)": [[3, "quapy.classification.calibration.BCTSCalibration"]], "cnnnet (class in quapy.classification.neural)": [[3, "quapy.classification.neural.CNNnet"]], "lstmnet (class in quapy.classification.neural)": [[3, "quapy.classification.neural.LSTMnet"]], "lowranklogisticregression (class in quapy.classification.methods)": [[3, "quapy.classification.methods.LowRankLogisticRegression"]], "nbvscalibration (class in quapy.classification.calibration)": [[3, "quapy.classification.calibration.NBVSCalibration"]], "neuralclassifiertrainer (class in quapy.classification.neural)": [[3, "quapy.classification.neural.NeuralClassifierTrainer"]], "recalibratedprobabilisticclassifier (class in quapy.classification.calibration)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifier"]], "recalibratedprobabilisticclassifierbase (class in quapy.classification.calibration)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase"]], "svmperf (class in quapy.classification.svmperf)": [[3, "quapy.classification.svmperf.SVMperf"]], "tscalibration (class in quapy.classification.calibration)": [[3, "quapy.classification.calibration.TSCalibration"]], "textclassifiernet (class in quapy.classification.neural)": [[3, "quapy.classification.neural.TextClassifierNet"]], "torchdataset (class in quapy.classification.neural)": [[3, "quapy.classification.neural.TorchDataset"]], "vscalibration (class in quapy.classification.calibration)": [[3, "quapy.classification.calibration.VSCalibration"]], "asdataloader() (quapy.classification.neural.torchdataset method)": [[3, "quapy.classification.neural.TorchDataset.asDataloader"]], "classes_ (quapy.classification.calibration.recalibratedprobabilisticclassifierbase property)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.classes_"]], "decision_function() (quapy.classification.svmperf.svmperf method)": [[3, "quapy.classification.svmperf.SVMperf.decision_function"]], "device (quapy.classification.neural.neuralclassifiertrainer property)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.device"]], "dimensions() (quapy.classification.neural.textclassifiernet method)": [[3, "quapy.classification.neural.TextClassifierNet.dimensions"]], "document_embedding() (quapy.classification.neural.cnnnet method)": [[3, "quapy.classification.neural.CNNnet.document_embedding"]], "document_embedding() (quapy.classification.neural.lstmnet method)": [[3, "quapy.classification.neural.LSTMnet.document_embedding"]], "document_embedding() (quapy.classification.neural.textclassifiernet method)": [[3, "quapy.classification.neural.TextClassifierNet.document_embedding"]], "fit() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit"]], "fit() (quapy.classification.methods.lowranklogisticregression method)": [[3, "quapy.classification.methods.LowRankLogisticRegression.fit"]], "fit() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.fit"]], "fit() (quapy.classification.svmperf.svmperf method)": [[3, "quapy.classification.svmperf.SVMperf.fit"]], "fit_cv() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv"]], "fit_tr_val() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val"]], "forward() (quapy.classification.neural.textclassifiernet method)": [[3, "quapy.classification.neural.TextClassifierNet.forward"]], "get_params() (quapy.classification.methods.lowranklogisticregression method)": [[3, "quapy.classification.methods.LowRankLogisticRegression.get_params"]], "get_params() (quapy.classification.neural.cnnnet method)": [[3, "quapy.classification.neural.CNNnet.get_params"]], "get_params() (quapy.classification.neural.lstmnet method)": [[3, "quapy.classification.neural.LSTMnet.get_params"]], "get_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.get_params"]], "get_params() (quapy.classification.neural.textclassifiernet method)": [[3, "quapy.classification.neural.TextClassifierNet.get_params"]], "predict() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict"]], "predict() (quapy.classification.methods.lowranklogisticregression method)": [[3, "quapy.classification.methods.LowRankLogisticRegression.predict"]], "predict() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.predict"]], "predict() (quapy.classification.svmperf.svmperf method)": [[3, "quapy.classification.svmperf.SVMperf.predict"]], "predict_proba() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[3, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba"]], "predict_proba() (quapy.classification.methods.lowranklogisticregression method)": [[3, "quapy.classification.methods.LowRankLogisticRegression.predict_proba"]], "predict_proba() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.predict_proba"]], "predict_proba() (quapy.classification.neural.textclassifiernet method)": [[3, "quapy.classification.neural.TextClassifierNet.predict_proba"]], "quapy.classification": [[3, "module-quapy.classification"]], "quapy.classification.calibration": [[3, "module-quapy.classification.calibration"]], "quapy.classification.methods": [[3, "module-quapy.classification.methods"]], "quapy.classification.neural": [[3, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[3, "module-quapy.classification.svmperf"]], "reset_net_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.reset_net_params"]], "set_params() (quapy.classification.methods.lowranklogisticregression method)": [[3, "quapy.classification.methods.LowRankLogisticRegression.set_params"]], "set_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.set_params"]], "training (quapy.classification.neural.cnnnet attribute)": [[3, "quapy.classification.neural.CNNnet.training"]], "training (quapy.classification.neural.lstmnet attribute)": [[3, "quapy.classification.neural.LSTMnet.training"]], "training (quapy.classification.neural.textclassifiernet attribute)": [[3, "quapy.classification.neural.TextClassifierNet.training"]], "transform() (quapy.classification.methods.lowranklogisticregression method)": [[3, "quapy.classification.methods.LowRankLogisticRegression.transform"]], "transform() (quapy.classification.neural.neuralclassifiertrainer method)": [[3, "quapy.classification.neural.NeuralClassifierTrainer.transform"]], "valid_losses (quapy.classification.svmperf.svmperf attribute)": [[3, "quapy.classification.svmperf.SVMperf.valid_losses"]], "vocabulary_size (quapy.classification.neural.cnnnet property)": [[3, "quapy.classification.neural.CNNnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.lstmnet property)": [[3, "quapy.classification.neural.LSTMnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.textclassifiernet property)": [[3, "quapy.classification.neural.TextClassifierNet.vocabulary_size"]], "xavier_uniform() (quapy.classification.neural.textclassifiernet method)": [[3, "quapy.classification.neural.TextClassifierNet.xavier_uniform"]], "dataset (class in quapy.data.base)": [[4, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[4, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[4, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[4, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.counts"]], "fetch_ifcb() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_IFCB"]], "fetch_ucibinarydataset() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIBinaryDataset"]], "fetch_ucibinarylabelledcollection() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIBinaryLabelledCollection"]], "fetch_ucimulticlassdataset() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIMulticlassDataset"]], "fetch_ucimulticlasslabelledcollection() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIMulticlassLabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[4, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[4, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[4, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.index"]], "join() (quapy.data.base.labelledcollection class method)": [[4, "quapy.data.base.LabelledCollection.join"]], "kfcv() (quapy.data.base.dataset class method)": [[4, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[4, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[4, "quapy.data.base.LabelledCollection.load"]], "n_classes (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[4, "module-quapy.data"]], "quapy.data.base": [[4, "module-quapy.data.base"]], "quapy.data.datasets": [[4, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[4, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[4, "module-quapy.data.reader"]], "reduce() (quapy.data.base.dataset method)": [[4, "quapy.data.base.Dataset.reduce"]], "reduce_columns() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[4, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[4, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativecrispquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeCrispQuantifier"]], "aggregativemedianestimator (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator"]], "aggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeQuantifier"]], "aggregativesoftquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeSoftQuantifier"]], "bandwidth_method (quapy.method._kdey.kdebase attribute)": [[5, "quapy.method._kdey.KDEBase.BANDWIDTH_METHOD"]], "basequantifier (class in quapy.method.base)": [[5, "quapy.method.base.BaseQuantifier"]], "bayesiancc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BayesianCC"]], "binaryaggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[5, "quapy.method.base.BinaryQuantifier"]], "blobelloss (class in quapy.method.composable)": [[5, "quapy.method.composable.BlobelLoss"]], "cc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.CC"]], "cvclassifier (class in quapy.method.composable)": [[5, "quapy.method.composable.CVClassifier"]], "classtransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.ClassTransformer"]], "classifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ClassifyAndCount"]], "combinedloss (class in quapy.method.composable)": [[5, "quapy.method.composable.CombinedLoss"]], "composablequantifier() (in module quapy.method.composable)": [[5, "quapy.method.composable.ComposableQuantifier"]], "dmx (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DMx"]], "dmy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DMy"]], "distancetransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.DistanceTransformer"]], "distributionmatchingx (in module quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DistributionMatchingX"]], "distributionmatchingy (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.DistributionMatchingY"]], "dys (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[5, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[5, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[5, "quapy.method.meta.EHDy"]], "em() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.EMQ"]], "emq_bcts() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EMQ_BCTS"]], "epacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.EPSILON"]], "energykerneltransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.EnergyKernelTransformer"]], "energyloss (class in quapy.method.composable)": [[5, "quapy.method.composable.EnergyLoss"]], "ensemble (class in quapy.method.meta)": [[5, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "gaussiankerneltransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.GaussianKernelTransformer"]], "gaussianrffkerneltransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.GaussianRFFKernelTransformer"]], "hdx() (quapy.method.non_aggregative.dmx class method)": [[5, "quapy.method.non_aggregative.DMx.HDx"]], "hdy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.HellingerDistanceY"]], "hellingersurrogateloss (class in quapy.method.composable)": [[5, "quapy.method.composable.HellingerSurrogateLoss"]], "histogramtransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.HistogramTransformer"]], "kdebase (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEBase"]], "kdeycs (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyCS"]], "kdeyhd (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyHD"]], "kdeyml (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyML"]], "kerneltransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.KernelTransformer"]], "laplaciankerneltransformer (class in quapy.method.composable)": [[5, "quapy.method.composable.LaplacianKernelTransformer"]], "leastsquaresloss (class in quapy.method.composable)": [[5, "quapy.method.composable.LeastSquaresLoss"]], "max (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.MAX_ITER"]], "methods (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.METHODS"]], "ms (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS"]], "ms2 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "medianestimator (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator"]], "medianestimator2 (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator2"]], "normalizations (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.NORMALIZATIONS"]], "onevsall (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetModule"]], "quanettrainer (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetTrainer"]], "readme (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.ReadMe"]], "sld (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.SMM"]], "solvers (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.SOLVERS"]], "t50 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.T50"]], "thresholdoptimization (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.ThresholdOptimization"]], "tikhonovregularization (class in quapy.method.composable)": [[5, "quapy.method.composable.TikhonovRegularization"]], "tikhonovregularized() (in module quapy.method.composable)": [[5, "quapy.method.composable.TikhonovRegularized"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[5, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.X"]], "aggregate() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregate"]], "aggregate() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregate"]], "aggregate() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregate"]], "aggregate() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregate"]], "aggregate() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate"]], "aggregate() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregate"]], "aggregate_with_threshold() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold"]], "aggregation_fit() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregation_fit"]], "aggregative (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method._neural.quanettrainer property)": [[5, "quapy.method._neural.QuaNetTrainer.classes_"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[5, "quapy.method.base.OneVsAllGeneric.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classifier_fit_predict() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier_fit_predict"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "clean_checkpoint() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir"]], "condition() (quapy.method._threshold_optim.max method)": [[5, "quapy.method._threshold_optim.MAX.condition"]], "condition() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.condition"]], "condition() (quapy.method._threshold_optim.t50 method)": [[5, "quapy.method._threshold_optim.T50.condition"]], "condition() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.condition"]], "condition() (quapy.method._threshold_optim.x method)": [[5, "quapy.method._threshold_optim.X.condition"]], "device (quapy.method._neural.quanetmodule property)": [[5, "quapy.method._neural.QuaNetModule.device"]], "discard() (quapy.method._threshold_optim.ms2 method)": [[5, "quapy.method._threshold_optim.MS2.discard"]], "discard() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.discard"]], "ensemblefactory() (in module quapy.method.meta)": [[5, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.fit"]], "fit() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.binaryaggregativequantifier method)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.fit"]], "fit() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.composable.cvclassifier method)": [[5, "quapy.method.composable.CVClassifier.fit"]], "fit() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.fit"]], "fit() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.fit"]], "fit() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "fit() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.fit"]], "fit_transform() (quapy.method.composable.classtransformer method)": [[5, "quapy.method.composable.ClassTransformer.fit_transform"]], "fit_transform() (quapy.method.composable.distancetransformer method)": [[5, "quapy.method.composable.DistanceTransformer.fit_transform"]], "fit_transform() (quapy.method.composable.energykerneltransformer method)": [[5, "quapy.method.composable.EnergyKernelTransformer.fit_transform"]], "fit_transform() (quapy.method.composable.gaussiankerneltransformer method)": [[5, "quapy.method.composable.GaussianKernelTransformer.fit_transform"]], "fit_transform() (quapy.method.composable.gaussianrffkerneltransformer method)": [[5, "quapy.method.composable.GaussianRFFKernelTransformer.fit_transform"]], "fit_transform() (quapy.method.composable.histogramtransformer method)": [[5, "quapy.method.composable.HistogramTransformer.fit_transform"]], "fit_transform() (quapy.method.composable.kerneltransformer method)": [[5, "quapy.method.composable.KernelTransformer.fit_transform"]], "forward() (quapy.method._neural.quanetmodule method)": [[5, "quapy.method._neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[5, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_conditional_probability_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_conditional_probability_samples"]], "get_kde_function() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_kde_function"]], "get_mixture_components() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_mixture_components"]], "get_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.get_params"]], "get_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.get_params"]], "get_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.get_params"]], "get_prevalence_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_prevalence_samples"]], "get_probability_distribution() (in module quapy.method.meta)": [[5, "quapy.method.meta.get_probability_distribution"]], "gram_matrix_mix_sum() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.gram_matrix_mix_sum"]], "kernel (quapy.method.composable.laplaciankerneltransformer property)": [[5, "quapy.method.composable.LaplacianKernelTransformer.kernel"]], "mae_loss() (in module quapy.method._neural)": [[5, "quapy.method._neural.mae_loss"]], "neg_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.neg_label"]], "newelm() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newELM"]], "newinvariantratioestimation() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.newInvariantRatioEstimation"]], "newonevsall() (in module quapy.method.base)": [[5, "quapy.method.base.newOneVsAll"]], "newsvmae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMAE"]], "newsvmkld() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMKLD"]], "newsvmq() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMQ"]], "newsvmrae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMRAE"]], "pdf() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.pdf"]], "pos_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.pos_label"]], "predict() (quapy.method.composable.cvclassifier method)": [[5, "quapy.method.composable.CVClassifier.predict"]], "predict_proba() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.predict_proba"]], "predict_proba() (quapy.method.composable.cvclassifier method)": [[5, "quapy.method.composable.CVClassifier.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.quantify"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.quantify"]], "quantify() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.quantify"]], "quantify() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quantify() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.quantify"]], "quapy.method": [[5, "module-quapy.method"]], "quapy.method._kdey": [[5, "module-quapy.method._kdey"]], "quapy.method._neural": [[5, "module-quapy.method._neural"]], "quapy.method._threshold_optim": [[5, "module-quapy.method._threshold_optim"]], "quapy.method.aggregative": [[5, "module-quapy.method.aggregative"]], "quapy.method.base": [[5, "module-quapy.method.base"]], "quapy.method.composable": [[5, "module-quapy.method.composable"]], "quapy.method.meta": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative": [[5, "module-quapy.method.non_aggregative"]], "sample_from_posterior() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.sample_from_posterior"]], "set_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.set_params"]], "set_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.set_params"]], "set_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.set_params"]], "std_constrained_linear_ls() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.std_constrained_linear_ls"]], "training (quapy.method._neural.quanetmodule attribute)": [[5, "quapy.method._neural.QuaNetModule.training"]], "transform() (quapy.method.composable.classtransformer method)": [[5, "quapy.method.composable.ClassTransformer.transform"]], "transform() (quapy.method.composable.distancetransformer method)": [[5, "quapy.method.composable.DistanceTransformer.transform"]], "transform() (quapy.method.composable.energykerneltransformer method)": [[5, "quapy.method.composable.EnergyKernelTransformer.transform"]], "transform() (quapy.method.composable.gaussiankerneltransformer method)": [[5, "quapy.method.composable.GaussianKernelTransformer.transform"]], "transform() (quapy.method.composable.gaussianrffkerneltransformer method)": [[5, "quapy.method.composable.GaussianRFFKernelTransformer.transform"]], "transform() (quapy.method.composable.histogramtransformer method)": [[5, "quapy.method.composable.HistogramTransformer.transform"]], "transform() (quapy.method.composable.kerneltransformer method)": [[5, "quapy.method.composable.KernelTransformer.transform"]], "val_split (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split"]], "val_split_ (quapy.method.aggregative.aggregativequantifier attribute)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split_"]]}}) \ No newline at end of file diff --git a/SoBigData.png b/docs/source/SoBigData.png similarity index 100% rename from SoBigData.png rename to docs/source/SoBigData.png diff --git a/docs/source/index.rst b/docs/source/index.rst index a4150cd..d2918cf 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,28 +3,69 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to QuaPy's documentation! +.. toctree:: + :hidden: + + self + +Quickstart ========================================================================================== -QuaPy is a Python-based open-source framework for quantification. +QuaPy is an open source framework for quantification (a.k.a. supervised prevalence estimation, or learning to quantify) written in Python. -This document contains the API of the modules included in QuaPy. +QuaPy is based on the concept of "data sample", and provides implementations of the most important aspects of the quantification workflow, such as (baseline and advanced) quantification methods, quantification-oriented model selection mechanisms, evaluation measures, and evaluations protocols used for evaluating quantification methods. QuaPy also makes available commonly used datasets, and offers visualization tools for facilitating the analysis and interpretation of the experimental results. + +QuaPy is hosted on GitHub at ``_ Installation ------------ -`pip install quapy` +.. code-block:: none + + pip install quapy -GitHub +Citing QuaPy ------------ -QuaPy is hosted in GitHub at `https://github.com/HLT-ISTI/QuaPy `_ +If you find QuaPy useful (and we hope you will), please consider citing the original paper in your research. +.. code-block:: none -Wiki Documents ------------- + @inproceedings{moreo2021quapy, + title={QuaPy: a python-based framework for quantification}, + author={Moreo, Alejandro and Esuli, Andrea and Sebastiani, Fabrizio}, + booktitle={Proceedings of the 30th ACM International Conference on Information \& Knowledge Management}, + pages={4534--4543}, + year={2021} + } + +Usage +----- + +The following script fetches a dataset of tweets, trains, applies, and evaluates a quantifier based on the *Adjusted Classify & Count* quantification method, using, as the evaluation measure, the *Mean Absolute Error* (MAE) between the predicted and the true class prevalence values of the test set:: + + import quapy as qp + from sklearn.linear_model import LogisticRegression + + dataset = qp.datasets.fetch_twitter('semeval16') + + # create an "Adjusted Classify & Count" quantifier + model = qp.method.aggregative.ACC(LogisticRegression()) + model.fit(dataset.training) -In this section you can find useful information concerning different aspects of QuaPy, with examples: + estim_prevalence = model.quantify(dataset.test.instances) + true_prevalence = dataset.test.prevalence() + + error = qp.error.mae(true_prevalence, estim_prevalence) + + print(f'Mean Absolute Error (MAE)={error:.3f}') + +Quantification is useful in scenarios characterized by prior probability shift. In other words, we would be little interested in estimating the class prevalence values of the test set if we could assume the IID assumption to hold, as this prevalence would be roughly equivalent to the class prevalence of the training set. For this reason, any quantification model should be tested across many samples, even ones characterized by class prevalence values different or very different from those found in the training set. QuaPy implements sampling procedures and evaluation protocols that automate this workflow. See the `Manuals`_ for detailed examples. + +Manuals +------- + +The following manuals illustrate several aspects of QuaPy through examples: .. toctree:: :maxdepth: 1 @@ -37,22 +78,38 @@ In this section you can find useful information concerning different aspects of wiki/Plotting wiki/Protocols - .. toctree:: - :maxdepth: 2 - :caption: Contents: + :hidden: -Contents + List of Modules + +Features -------- -.. toctree:: +* Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization, quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others). +* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.). +* Implementation of most commonly used evaluation metrics (e.g., AE, RAE, NAE, NRAE, SE, KLD, NKLD, etc.). +* Datasets frequently used in quantification (textual and numeric), including: + + * 32 UCI Machine Learning binary datasets. + * 5 UCI Machine Learning multiclass datasets (new in v0.1.8!). + * 11 Twitter quantification-by-sentiment datasets. + * 3 product reviews quantification-by-sentiment datasets. + * 4 tasks from LeQua competition (new in v0.1.7!) + * IFCB dataset of plankton water samples (new in v0.1.8!). - modules +* Native support for binary and single-label multiclass quantification scenarios. +* Model selection functionality that minimizes quantification-oriented loss functions. +* Visualization tools for analysing the experimental results. + +Contributing +------------ +In case you want to contribute improvements to quapy, please generate pull request to the "devel" branch. -Indices and tables -================== +Acknowledgments +--------------- -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +.. image:: SoBigData.png + :width: 250px + :alt: SoBigData++ From 21a466adf134696452b6b407180ec0c4034cc330 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:06:03 +0200 Subject: [PATCH 02/16] Use MyST instead of pandoc with a Makefile --- .github/workflows/ci.yml | 4 ---- docs/.gitignore | 1 - docs/Makefile | 15 +-------------- docs/source/.gitignore | 1 + docs/source/conf.py | 5 ++++- docs/source/{wiki_editable => wiki}/Datasets.md | 0 .../source/{wiki_editable => wiki}/Evaluation.md | 0 .../ExplicitLossMinimization.md | 0 docs/source/{wiki_editable => wiki}/Methods.md | 0 .../{wiki_editable => wiki}/Model-Selection.md | 0 docs/source/{wiki_editable => wiki}/Plotting.md | 0 docs/source/{wiki_editable => wiki}/Protocols.md | 0 .../wiki_examples/selected_plots/bin_bias.png | Bin .../selected_plots/bin_bias_bin_cc.png | Bin .../wiki_examples/selected_plots/bin_bias_cc.png | Bin .../wiki_examples/selected_plots/bin_diag.png | Bin .../wiki_examples/selected_plots/bin_diag_cc.png | Bin .../wiki_examples/selected_plots/err_drift.png | Bin setup.py | 2 +- 19 files changed, 7 insertions(+), 21 deletions(-) create mode 100644 docs/source/.gitignore rename docs/source/{wiki_editable => wiki}/Datasets.md (100%) rename docs/source/{wiki_editable => wiki}/Evaluation.md (100%) rename docs/source/{wiki_editable => wiki}/ExplicitLossMinimization.md (100%) rename docs/source/{wiki_editable => wiki}/Methods.md (100%) rename docs/source/{wiki_editable => wiki}/Model-Selection.md (100%) rename docs/source/{wiki_editable => wiki}/Plotting.md (100%) rename docs/source/{wiki_editable => wiki}/Protocols.md (100%) rename docs/source/{wiki_editable => wiki}/wiki_examples/selected_plots/bin_bias.png (100%) rename docs/source/{wiki_editable => wiki}/wiki_examples/selected_plots/bin_bias_bin_cc.png (100%) rename docs/source/{wiki_editable => wiki}/wiki_examples/selected_plots/bin_bias_cc.png (100%) rename docs/source/{wiki_editable => wiki}/wiki_examples/selected_plots/bin_diag.png (100%) rename docs/source/{wiki_editable => wiki}/wiki_examples/selected_plots/bin_diag_cc.png (100%) rename docs/source/{wiki_editable => wiki}/wiki_examples/selected_plots/err_drift.png (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fcf306e..7fcecff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,10 +45,6 @@ jobs: pre-build-command: | python -m pip install --upgrade pip setuptools wheel python -m pip install -e .[docs] - mkdir -p docs/source/wiki/wiki_examples/selected_plots - cp docs/source/wiki_editable/wiki_examples/selected_plots/* docs/source/wiki/wiki_examples/selected_plots/ - find docs/source/wiki_editable -name '*.md' -exec sh -c 'pandoc -f markdown -t rst "$$1" -o "docs/source/wiki/$$(basename "$$1" .md).rst"' _ {} \; - sphinx-apidoc --force --output-dir docs/source quapy docs-folder: "docs/" - name: Publish documentation run: | diff --git a/docs/.gitignore b/docs/.gitignore index 4b7ed84..567609b 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,2 +1 @@ build/ -source/wiki/ diff --git a/docs/Makefile b/docs/Makefile index bb42adc..d0c3cbf 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -14,20 +14,7 @@ help: .PHONY: help Makefile -# Convert Markdown files to reStructuredText before building HTML -markdown_to_rst: - @echo "Converting Markdown files to reStructuredText" - @mkdir -p $(SOURCEDIR)/wiki/wiki_examples/selected_plots - @cp $(SOURCEDIR)/wiki_editable/wiki_examples/selected_plots/* $(SOURCEDIR)/wiki/wiki_examples/selected_plots/ - @find $(SOURCEDIR)/wiki_editable -name '*.md' -exec sh -c 'pandoc -f markdown -t rst "$$1" -o "$(SOURCEDIR)/wiki/$$(basename "$$1" .md).rst"' _ {} \; - @echo "Conversion complete." - # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -html: markdown_to_rst +%: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -# # Catch-all target: route all unknown targets to Sphinx using the new -# # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -# %: Makefile -# @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/source/.gitignore b/docs/source/.gitignore new file mode 100644 index 0000000..dab1498 --- /dev/null +++ b/docs/source/.gitignore @@ -0,0 +1 @@ +!*.png \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 01c305d..9d86c63 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -35,9 +35,12 @@ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon' + 'sphinx.ext.napoleon', + 'myst_parser', ] +source_suffix = ['.rst', '.md'] + templates_path = ['_templates'] # List of patterns, relative to source directory, that match files and diff --git a/docs/source/wiki_editable/Datasets.md b/docs/source/wiki/Datasets.md similarity index 100% rename from docs/source/wiki_editable/Datasets.md rename to docs/source/wiki/Datasets.md diff --git a/docs/source/wiki_editable/Evaluation.md b/docs/source/wiki/Evaluation.md similarity index 100% rename from docs/source/wiki_editable/Evaluation.md rename to docs/source/wiki/Evaluation.md diff --git a/docs/source/wiki_editable/ExplicitLossMinimization.md b/docs/source/wiki/ExplicitLossMinimization.md similarity index 100% rename from docs/source/wiki_editable/ExplicitLossMinimization.md rename to docs/source/wiki/ExplicitLossMinimization.md diff --git a/docs/source/wiki_editable/Methods.md b/docs/source/wiki/Methods.md similarity index 100% rename from docs/source/wiki_editable/Methods.md rename to docs/source/wiki/Methods.md diff --git a/docs/source/wiki_editable/Model-Selection.md b/docs/source/wiki/Model-Selection.md similarity index 100% rename from docs/source/wiki_editable/Model-Selection.md rename to docs/source/wiki/Model-Selection.md diff --git a/docs/source/wiki_editable/Plotting.md b/docs/source/wiki/Plotting.md similarity index 100% rename from docs/source/wiki_editable/Plotting.md rename to docs/source/wiki/Plotting.md diff --git a/docs/source/wiki_editable/Protocols.md b/docs/source/wiki/Protocols.md similarity index 100% rename from docs/source/wiki_editable/Protocols.md rename to docs/source/wiki/Protocols.md diff --git a/docs/source/wiki_editable/wiki_examples/selected_plots/bin_bias.png b/docs/source/wiki/wiki_examples/selected_plots/bin_bias.png similarity index 100% rename from docs/source/wiki_editable/wiki_examples/selected_plots/bin_bias.png rename to docs/source/wiki/wiki_examples/selected_plots/bin_bias.png diff --git a/docs/source/wiki_editable/wiki_examples/selected_plots/bin_bias_bin_cc.png b/docs/source/wiki/wiki_examples/selected_plots/bin_bias_bin_cc.png similarity index 100% rename from docs/source/wiki_editable/wiki_examples/selected_plots/bin_bias_bin_cc.png rename to docs/source/wiki/wiki_examples/selected_plots/bin_bias_bin_cc.png diff --git a/docs/source/wiki_editable/wiki_examples/selected_plots/bin_bias_cc.png b/docs/source/wiki/wiki_examples/selected_plots/bin_bias_cc.png similarity index 100% rename from docs/source/wiki_editable/wiki_examples/selected_plots/bin_bias_cc.png rename to docs/source/wiki/wiki_examples/selected_plots/bin_bias_cc.png diff --git a/docs/source/wiki_editable/wiki_examples/selected_plots/bin_diag.png b/docs/source/wiki/wiki_examples/selected_plots/bin_diag.png similarity index 100% rename from docs/source/wiki_editable/wiki_examples/selected_plots/bin_diag.png rename to docs/source/wiki/wiki_examples/selected_plots/bin_diag.png diff --git a/docs/source/wiki_editable/wiki_examples/selected_plots/bin_diag_cc.png b/docs/source/wiki/wiki_examples/selected_plots/bin_diag_cc.png similarity index 100% rename from docs/source/wiki_editable/wiki_examples/selected_plots/bin_diag_cc.png rename to docs/source/wiki/wiki_examples/selected_plots/bin_diag_cc.png diff --git a/docs/source/wiki_editable/wiki_examples/selected_plots/err_drift.png b/docs/source/wiki/wiki_examples/selected_plots/err_drift.png similarity index 100% rename from docs/source/wiki_editable/wiki_examples/selected_plots/err_drift.png rename to docs/source/wiki/wiki_examples/selected_plots/err_drift.png diff --git a/setup.py b/setup.py index d0f299a..5c0dd2d 100644 --- a/setup.py +++ b/setup.py @@ -127,7 +127,7 @@ def get_version(rel_path): 'bayes': ['jax', 'jaxlib', 'numpyro'], 'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'], 'tests': ['certifi'], - 'docs' : ['sphinx-rtd-theme'], + 'docs' : ['sphinx-rtd-theme', 'myst-parser', 'qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'], }, # If there are data files included in your packages that need to be From 6ea15c30b8928c3d72b18028426c9c4bd7f0bd7c Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:19:13 +0200 Subject: [PATCH 03/16] TO REVERT: build gh-pages even on pushes to devel --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7fcecff..0cd88ba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,6 @@ jobs: docs: name: Documentation runs-on: ubuntu-latest - if: github.ref == 'refs/heads/master' steps: - uses: actions/checkout@v1 - name: Build documentation From fedf9b492bd814c8f426076d0bd5ab1a52f5ca1c Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:24:15 +0200 Subject: [PATCH 04/16] Fix the documentation build step of the CI --- .github/workflows/ci.yml | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0cd88ba..33d3da0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,8 +42,9 @@ jobs: uses: ammaraskar/sphinx-action@master with: pre-build-command: | + apt-get update -y && apt-get install -y git python -m pip install --upgrade pip setuptools wheel - python -m pip install -e .[docs] + python -m pip install -e .[composable,docs] docs-folder: "docs/" - name: Publish documentation run: | diff --git a/setup.py b/setup.py index 5c0dd2d..23aa3ca 100644 --- a/setup.py +++ b/setup.py @@ -127,7 +127,7 @@ def get_version(rel_path): 'bayes': ['jax', 'jaxlib', 'numpyro'], 'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'], 'tests': ['certifi'], - 'docs' : ['sphinx-rtd-theme', 'myst-parser', 'qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'], + 'docs' : ['sphinx-rtd-theme', 'myst-parser'], }, # If there are data files included in your packages that need to be From 04e78054455e00b42395f999e507da55995328ec Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:48:45 +0200 Subject: [PATCH 05/16] Try without the composable module --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 33d3da0..44e9e6d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,9 +42,9 @@ jobs: uses: ammaraskar/sphinx-action@master with: pre-build-command: | - apt-get update -y && apt-get install -y git + apt-get update -y && apt-get install -y git && git --version python -m pip install --upgrade pip setuptools wheel - python -m pip install -e .[composable,docs] + python -m pip install -e .[docs] docs-folder: "docs/" - name: Publish documentation run: | From 02365e4beea0a3cb2520b693464eae1b7ea40911 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:51:38 +0200 Subject: [PATCH 06/16] Use --allow-releaseinfo-change in apt-get update --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 44e9e6d..2f5598f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,7 @@ jobs: uses: ammaraskar/sphinx-action@master with: pre-build-command: | - apt-get update -y && apt-get install -y git && git --version + apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version python -m pip install --upgrade pip setuptools wheel python -m pip install -e .[docs] docs-folder: "docs/" From 4e0e747d472b2886c95b4402bf13592401d00216 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:53:24 +0200 Subject: [PATCH 07/16] Ammendment to the last commit --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f5598f..fc7c805 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: pre-build-command: | apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version python -m pip install --upgrade pip setuptools wheel - python -m pip install -e .[docs] + python -m pip install -e .[composable,docs] docs-folder: "docs/" - name: Publish documentation run: | From b1414b2a043706cb85589e8ea7a5b8ca95672189 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 15:05:49 +0200 Subject: [PATCH 08/16] Revert "TO REVERT: build gh-pages even on pushes to devel" This reverts commit 6ea15c30b8928c3d72b18028426c9c4bd7f0bd7c. --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fc7c805..b1e275c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: docs: name: Documentation runs-on: ubuntu-latest + if: github.ref == 'refs/heads/master' steps: - uses: actions/checkout@v1 - name: Build documentation From 8e9e7fa199b7120879828f0274f49a5dd20b2434 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 16:16:45 +0200 Subject: [PATCH 09/16] Move docs/source/wiki/ to docs/source/manuals/ --- docs/source/index.rst | 18 +++++++++--------- .../{wiki/Datasets.md => manuals/datasets.md} | 5 ++--- .../Evaluation.md => manuals/evaluation.md} | 0 .../explicit-loss-minimization.md} | 0 .../{wiki/Methods.md => manuals/methods.md} | 7 +++---- .../model-selection.md} | 4 ++-- .../plots}/bin_bias.png | Bin .../plots}/bin_bias_bin_cc.png | Bin .../plots}/bin_bias_cc.png | Bin .../plots}/bin_diag.png | Bin .../plots}/bin_diag_cc.png | Bin .../plots}/err_drift.png | Bin .../{wiki/Plotting.md => manuals/plotting.md} | 14 +++++++------- .../Protocols.md => manuals/protocols.md} | 0 14 files changed, 23 insertions(+), 25 deletions(-) rename docs/source/{wiki/Datasets.md => manuals/datasets.md} (99%) rename docs/source/{wiki/Evaluation.md => manuals/evaluation.md} (100%) rename docs/source/{wiki/ExplicitLossMinimization.md => manuals/explicit-loss-minimization.md} (100%) rename docs/source/{wiki/Methods.md => manuals/methods.md} (98%) rename docs/source/{wiki/Model-Selection.md => manuals/model-selection.md} (97%) rename docs/source/{wiki/wiki_examples/selected_plots => manuals/plots}/bin_bias.png (100%) rename docs/source/{wiki/wiki_examples/selected_plots => manuals/plots}/bin_bias_bin_cc.png (100%) rename docs/source/{wiki/wiki_examples/selected_plots => manuals/plots}/bin_bias_cc.png (100%) rename docs/source/{wiki/wiki_examples/selected_plots => manuals/plots}/bin_diag.png (100%) rename docs/source/{wiki/wiki_examples/selected_plots => manuals/plots}/bin_diag_cc.png (100%) rename docs/source/{wiki/wiki_examples/selected_plots => manuals/plots}/err_drift.png (100%) rename docs/source/{wiki/Plotting.md => manuals/plotting.md} (95%) rename docs/source/{wiki/Protocols.md => manuals/protocols.md} (100%) diff --git a/docs/source/index.rst b/docs/source/index.rst index d2918cf..7c7916c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -68,15 +68,15 @@ Manuals The following manuals illustrate several aspects of QuaPy through examples: .. toctree:: - :maxdepth: 1 - - wiki/Datasets - wiki/Evaluation - wiki/ExplicitLossMinimization - wiki/Methods - wiki/Model-Selection - wiki/Plotting - wiki/Protocols + :maxdepth: 2 + + manuals/datasets + manuals/evaluation + manuals/explicit-loss-minimization + manuals/methods + manuals/model-selection + manuals/plotting + manuals/protocols .. toctree:: :hidden: diff --git a/docs/source/wiki/Datasets.md b/docs/source/manuals/datasets.md similarity index 99% rename from docs/source/wiki/Datasets.md rename to docs/source/manuals/datasets.md index 904fe53..cc972cd 100644 --- a/docs/source/wiki/Datasets.md +++ b/docs/source/manuals/datasets.md @@ -67,9 +67,8 @@ for method in methods: ``` However, generating samples for evaluation purposes is tackled in QuaPy -by means of the evaluation protocols (see the dedicated entries in the Wiki -for [evaluation](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation) and -[protocols](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols)). +by means of the evaluation protocols (see the dedicated entries in the manuals +for [evaluation](./evaluation) and [protocols](./protocols)). ## Reviews Datasets diff --git a/docs/source/wiki/Evaluation.md b/docs/source/manuals/evaluation.md similarity index 100% rename from docs/source/wiki/Evaluation.md rename to docs/source/manuals/evaluation.md diff --git a/docs/source/wiki/ExplicitLossMinimization.md b/docs/source/manuals/explicit-loss-minimization.md similarity index 100% rename from docs/source/wiki/ExplicitLossMinimization.md rename to docs/source/manuals/explicit-loss-minimization.md diff --git a/docs/source/wiki/Methods.md b/docs/source/manuals/methods.md similarity index 98% rename from docs/source/wiki/Methods.md rename to docs/source/manuals/methods.md index 760df16..03c5c2a 100644 --- a/docs/source/wiki/Methods.md +++ b/docs/source/manuals/methods.md @@ -29,7 +29,7 @@ instance in a sample-- while in quantification the output for a sample is one single array of class prevalences). Quantifiers also extend from scikit-learn's `BaseEstimator`, in order to simplify the use of `set_params` and `get_params` used in -[model selector](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection). +[model selection](./model-selection). ## Aggregative Methods @@ -96,7 +96,7 @@ classifier, and then _clones_ these classifiers and explores the combinations of hyperparameters that are specific to the quantifier (this can result in huge time savings). Concerning the inference phase, this two-step process allow the evaluation of many -standard protocols (e.g., the [artificial sampling protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation)) to be +standard protocols (e.g., the [artificial sampling protocol](./evaluation)) to be carried out very efficiently. The reason is that the entire set can be pre-classified once, and the quantification estimations for different samples can directly reuse these predictions, without requiring to classify each element every time. @@ -484,8 +484,7 @@ the performance estimated for each member of the ensemble in terms of that evalu When using any of the above options, it is important to set the `red_size` parameter, which informs of the number of members to retain. -Please, check the [model selection](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection) -wiki if you want to optimize the hyperparameters of ensemble for classification or quantification. +Please, check the [model selection manual](./model-selection) if you want to optimize the hyperparameters of ensemble for classification or quantification. ### The QuaNet neural network diff --git a/docs/source/wiki/Model-Selection.md b/docs/source/manuals/model-selection.md similarity index 97% rename from docs/source/wiki/Model-Selection.md rename to docs/source/manuals/model-selection.md index 9dd5bab..097f902 100644 --- a/docs/source/wiki/Model-Selection.md +++ b/docs/source/manuals/model-selection.md @@ -33,11 +33,11 @@ of scenarios exhibiting different degrees of prior probability shift. The class _qp.model_selection.GridSearchQ_ implements a grid-search exploration over the space of -hyper-parameter combinations that [evaluates](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation) +hyper-parameter combinations that [evaluates](./evaluation) each combination of hyper-parameters by means of a given quantification-oriented error metric (e.g., any of the error functions implemented in _qp.error_) and according to a -[sampling generation protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols). +[sampling generation protocol](./protocols). The following is an example (also included in the examples folder) of model selection for quantification: diff --git a/docs/source/wiki/wiki_examples/selected_plots/bin_bias.png b/docs/source/manuals/plots/bin_bias.png similarity index 100% rename from docs/source/wiki/wiki_examples/selected_plots/bin_bias.png rename to docs/source/manuals/plots/bin_bias.png diff --git a/docs/source/wiki/wiki_examples/selected_plots/bin_bias_bin_cc.png b/docs/source/manuals/plots/bin_bias_bin_cc.png similarity index 100% rename from docs/source/wiki/wiki_examples/selected_plots/bin_bias_bin_cc.png rename to docs/source/manuals/plots/bin_bias_bin_cc.png diff --git a/docs/source/wiki/wiki_examples/selected_plots/bin_bias_cc.png b/docs/source/manuals/plots/bin_bias_cc.png similarity index 100% rename from docs/source/wiki/wiki_examples/selected_plots/bin_bias_cc.png rename to docs/source/manuals/plots/bin_bias_cc.png diff --git a/docs/source/wiki/wiki_examples/selected_plots/bin_diag.png b/docs/source/manuals/plots/bin_diag.png similarity index 100% rename from docs/source/wiki/wiki_examples/selected_plots/bin_diag.png rename to docs/source/manuals/plots/bin_diag.png diff --git a/docs/source/wiki/wiki_examples/selected_plots/bin_diag_cc.png b/docs/source/manuals/plots/bin_diag_cc.png similarity index 100% rename from docs/source/wiki/wiki_examples/selected_plots/bin_diag_cc.png rename to docs/source/manuals/plots/bin_diag_cc.png diff --git a/docs/source/wiki/wiki_examples/selected_plots/err_drift.png b/docs/source/manuals/plots/err_drift.png similarity index 100% rename from docs/source/wiki/wiki_examples/selected_plots/err_drift.png rename to docs/source/manuals/plots/err_drift.png diff --git a/docs/source/wiki/Plotting.md b/docs/source/manuals/plotting.md similarity index 95% rename from docs/source/wiki/Plotting.md rename to docs/source/manuals/plotting.md index 99f3f7e..ec080da 100644 --- a/docs/source/wiki/Plotting.md +++ b/docs/source/manuals/plotting.md @@ -43,7 +43,7 @@ quantification methods across different scenarios showcasing the accuracy of the quantifier in predicting class prevalences for a wide range of prior distributions. This can easily be achieved by means of the -[artificial sampling protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols) +[artificial sampling protocol](./protocols) that is implemented in QuaPy. The following code shows how to perform one simple experiment @@ -113,7 +113,7 @@ are '.png' or '.pdf'). If this path is not provided, then the plot will be shown but not saved. The resulting plot should look like: -![diagonal plot on Kindle](./wiki_examples/selected_plots/bin_diag.png) +![diagonal plot on Kindle](./plots/bin_diag.png) Note that in this case, we are also indicating the training prevalence, which is plotted in the diagonal a as cyan dot. @@ -138,7 +138,7 @@ qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./pl and should look like: -![bias plot on Kindle](./wiki_examples/selected_plots/bin_bias.png) +![bias plot on Kindle](./plots/bin_bias.png) The box plots show some interesting facts: * all methods are biased towards the training prevalence but specially @@ -181,7 +181,7 @@ def gen_data(): and the plot should now look like: -![bias plot on IMDb](./wiki_examples/selected_plots/bin_bias_cc.png) +![bias plot on IMDb](./plots/bin_bias_cc.png) which clearly shows a negative bias for CC variants trained on data containing more negatives (i.e., < 50%) and positive biases @@ -195,7 +195,7 @@ To this aim, an argument _nbins_ is passed which indicates how many isometric subintervals to take. For example the following plot is produced for _nbins=3_: -![bias plot on IMDb](./wiki_examples/selected_plots/bin_bias_bin_cc.png) +![bias plot on IMDb](./plots/bin_bias_bin_cc.png) Interestingly enough, the seemingly unbiased estimator (CC at 50%) happens to display a positive bias (or a tendency to overestimate) in cases of low prevalence @@ -205,7 +205,7 @@ and a negative bias (or a tendency to underestimate) in cases of high prevalence Out of curiosity, the diagonal plot for this experiment looks like: -![diag plot on IMDb](./wiki_examples/selected_plots/bin_diag_cc.png) +![diag plot on IMDb](./plots/bin_diag_cc.png) showing pretty clearly the dependency of CC on the prior probabilities of the labeled set it was trained on. @@ -234,7 +234,7 @@ qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, error_name='ae', n_bins=10, savepath='./plots/err_drift.png') ``` -![diag plot on IMDb](./wiki_examples/selected_plots/err_drift.png) +![diag plot on IMDb](./plots/err_drift.png) Note that all methods work reasonably well in cases of low prevalence drift (i.e., any CC-variant is a good quantifier whenever the IID diff --git a/docs/source/wiki/Protocols.md b/docs/source/manuals/protocols.md similarity index 100% rename from docs/source/wiki/Protocols.md rename to docs/source/manuals/protocols.md From d2209afab5da3738f157b84ce3832fa1e41707e9 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 16:37:28 +0200 Subject: [PATCH 10/16] Manuals and API sections --- docs/source/index.rst | 12 +++--------- docs/source/manuals.rst | 14 ++++++++++++++ docs/source/modules.rst | 7 ------- 3 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 docs/source/manuals.rst delete mode 100644 docs/source/modules.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 7c7916c..34c7944 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -68,20 +68,14 @@ Manuals The following manuals illustrate several aspects of QuaPy through examples: .. toctree:: - :maxdepth: 2 + :maxdepth: 3 - manuals/datasets - manuals/evaluation - manuals/explicit-loss-minimization - manuals/methods - manuals/model-selection - manuals/plotting - manuals/protocols + manuals .. toctree:: :hidden: - List of Modules + API Features -------- diff --git a/docs/source/manuals.rst b/docs/source/manuals.rst new file mode 100644 index 0000000..a426786 --- /dev/null +++ b/docs/source/manuals.rst @@ -0,0 +1,14 @@ +Manuals +======= + +.. toctree:: + :maxdepth: 2 + :numbered: + + manuals/datasets + manuals/evaluation + manuals/explicit-loss-minimization + manuals/methods + manuals/model-selection + manuals/plotting + manuals/protocols diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index 5d84a54..0000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -quapy -===== - -.. toctree:: - :maxdepth: 4 - - quapy From c668d0b3d80c896f97aeb4ea2a03ff201e29cfdb Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 17:06:35 +0200 Subject: [PATCH 11/16] Translate index.rst to index.md --- docs/source/index.md | 100 ++++++++++++++++++++++++++++++++++++++ docs/source/index.rst | 109 ------------------------------------------ 2 files changed, 100 insertions(+), 109 deletions(-) create mode 100644 docs/source/index.md delete mode 100644 docs/source/index.rst diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 0000000..accb758 --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,100 @@ +```{toctree} +:hidden: + +self +``` + +# Quickstart + +QuaPy is an open source framework for quantification (a.k.a. supervised prevalence estimation, or learning to quantify) written in Python. + +QuaPy is based on the concept of "data sample", and provides implementations of the most important aspects of the quantification workflow, such as (baseline and advanced) quantification methods, quantification-oriented model selection mechanisms, evaluation measures, and evaluations protocols used for evaluating quantification methods. QuaPy also makes available commonly used datasets, and offers visualization tools for facilitating the analysis and interpretation of the experimental results. + +QuaPy is hosted on GitHub at [https://github.com/HLT-ISTI/QuaPy](https://github.com/HLT-ISTI/QuaPy). + +## Installation + +```sh +pip install quapy +``` + +## Usage + +The following script fetches a dataset of tweets, trains, applies, and evaluates a quantifier based on the *Adjusted Classify & Count* quantification method, using, as the evaluation measure, the *Mean Absolute Error* (MAE) between the predicted and the true class prevalence values of the test set: + +```python +import quapy as qp +from sklearn.linear_model import LogisticRegression + +dataset = qp.datasets.fetch_twitter('semeval16') + +# create an "Adjusted Classify & Count" quantifier +model = qp.method.aggregative.ACC(LogisticRegression()) +model.fit(dataset.training) + +estim_prevalence = model.quantify(dataset.test.instances) +true_prevalence = dataset.test.prevalence() + +error = qp.error.mae(true_prevalence, estim_prevalence) + +print(f'Mean Absolute Error (MAE)={error:.3f}') +``` + +Quantification is useful in scenarios characterized by prior probability shift. In other words, we would be little interested in estimating the class prevalence values of the test set if we could assume the IID assumption to hold, as this prevalence would be roughly equivalent to the class prevalence of the training set. For this reason, any quantification model should be tested across many samples, even ones characterized by class prevalence values different or very different from those found in the training set. QuaPy implements sampling procedures and evaluation protocols that automate this workflow. See the [](./manuals) for detailed examples. + +## Manuals + +The following manuals illustrate several aspects of QuaPy through examples: + +```{toctree} +:maxdepth: 3 + +manuals +``` + +```{toctree} +:hidden: + +API +``` + +## Features + +- Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization, quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others). +- Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.). +- Implementation of most commonly used evaluation metrics (e.g., AE, RAE, NAE, NRAE, SE, KLD, NKLD, etc.). +- Datasets frequently used in quantification (textual and numeric), including: + - 32 UCI Machine Learning binary datasets. + - 5 UCI Machine Learning multiclass datasets (new in v0.1.8!). + - 11 Twitter quantification-by-sentiment datasets. + - 3 product reviews quantification-by-sentiment datasets. + - 4 tasks from LeQua competition (new in v0.1.7!) + - IFCB dataset of plankton water samples (new in v0.1.8!). +- Native support for binary and single-label multiclass quantification scenarios. +- Model selection functionality that minimizes quantification-oriented loss functions. +- Visualization tools for analysing the experimental results. + +## Citing QuaPy + +If you find QuaPy useful (and we hope you will), please consider citing the original paper in your research. + +```bibtex +@inproceedings{moreo2021quapy, + title={QuaPy: a python-based framework for quantification}, + author={Moreo, Alejandro and Esuli, Andrea and Sebastiani, Fabrizio}, + booktitle={Proceedings of the 30th ACM International Conference on Information \& Knowledge Management}, + pages={4534--4543}, + year={2021} +} +``` + +## Contributing + +In case you want to contribute improvements to quapy, please generate pull request to the "devel" branch. + +## Acknowledgments + +```{image} SoBigData.png +:width: 250px +:alt: SoBigData++ +``` diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 34c7944..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,109 +0,0 @@ -.. QuaPy: A Python-based open-source framework for quantification documentation master file, created by - sphinx-quickstart on Wed Feb 7 16:26:46 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -.. toctree:: - :hidden: - - self - -Quickstart -========================================================================================== - -QuaPy is an open source framework for quantification (a.k.a. supervised prevalence estimation, or learning to quantify) written in Python. - -QuaPy is based on the concept of "data sample", and provides implementations of the most important aspects of the quantification workflow, such as (baseline and advanced) quantification methods, quantification-oriented model selection mechanisms, evaluation measures, and evaluations protocols used for evaluating quantification methods. QuaPy also makes available commonly used datasets, and offers visualization tools for facilitating the analysis and interpretation of the experimental results. - -QuaPy is hosted on GitHub at ``_ - -Installation ------------- - -.. code-block:: none - - pip install quapy - -Citing QuaPy ------------- - -If you find QuaPy useful (and we hope you will), please consider citing the original paper in your research. - -.. code-block:: none - - @inproceedings{moreo2021quapy, - title={QuaPy: a python-based framework for quantification}, - author={Moreo, Alejandro and Esuli, Andrea and Sebastiani, Fabrizio}, - booktitle={Proceedings of the 30th ACM International Conference on Information \& Knowledge Management}, - pages={4534--4543}, - year={2021} - } - -Usage ------ - -The following script fetches a dataset of tweets, trains, applies, and evaluates a quantifier based on the *Adjusted Classify & Count* quantification method, using, as the evaluation measure, the *Mean Absolute Error* (MAE) between the predicted and the true class prevalence values of the test set:: - - import quapy as qp - from sklearn.linear_model import LogisticRegression - - dataset = qp.datasets.fetch_twitter('semeval16') - - # create an "Adjusted Classify & Count" quantifier - model = qp.method.aggregative.ACC(LogisticRegression()) - model.fit(dataset.training) - - estim_prevalence = model.quantify(dataset.test.instances) - true_prevalence = dataset.test.prevalence() - - error = qp.error.mae(true_prevalence, estim_prevalence) - - print(f'Mean Absolute Error (MAE)={error:.3f}') - -Quantification is useful in scenarios characterized by prior probability shift. In other words, we would be little interested in estimating the class prevalence values of the test set if we could assume the IID assumption to hold, as this prevalence would be roughly equivalent to the class prevalence of the training set. For this reason, any quantification model should be tested across many samples, even ones characterized by class prevalence values different or very different from those found in the training set. QuaPy implements sampling procedures and evaluation protocols that automate this workflow. See the `Manuals`_ for detailed examples. - -Manuals -------- - -The following manuals illustrate several aspects of QuaPy through examples: - -.. toctree:: - :maxdepth: 3 - - manuals - -.. toctree:: - :hidden: - - API - -Features --------- - -* Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization, quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others). -* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.). -* Implementation of most commonly used evaluation metrics (e.g., AE, RAE, NAE, NRAE, SE, KLD, NKLD, etc.). -* Datasets frequently used in quantification (textual and numeric), including: - - * 32 UCI Machine Learning binary datasets. - * 5 UCI Machine Learning multiclass datasets (new in v0.1.8!). - * 11 Twitter quantification-by-sentiment datasets. - * 3 product reviews quantification-by-sentiment datasets. - * 4 tasks from LeQua competition (new in v0.1.7!) - * IFCB dataset of plankton water samples (new in v0.1.8!). - -* Native support for binary and single-label multiclass quantification scenarios. -* Model selection functionality that minimizes quantification-oriented loss functions. -* Visualization tools for analysing the experimental results. - -Contributing ------------- - -In case you want to contribute improvements to quapy, please generate pull request to the "devel" branch. - -Acknowledgments ---------------- - -.. image:: SoBigData.png - :width: 250px - :alt: SoBigData++ From 415c92f803d2acb1b36520b0d271b11e663ec936 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 17:07:01 +0200 Subject: [PATCH 12/16] Fix cross-references within the documentation --- docs/source/manuals/evaluation.md | 4 ++-- docs/source/manuals/explicit-loss-minimization.md | 8 ++++---- docs/source/manuals/methods.md | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/manuals/evaluation.md b/docs/source/manuals/evaluation.md index d9c1b79..e5404a3 100644 --- a/docs/source/manuals/evaluation.md +++ b/docs/source/manuals/evaluation.md @@ -72,8 +72,8 @@ one specific _sample generation procotol_ to genereate many samples, typically characterized by widely varying amounts of _shift_ with respect to the original distribution, that are then used to evaluate the performance of a (trained) quantifier. -These protocols are explained in more detail in a dedicated [entry -in the wiki](Protocols.md). For the moment being, let us assume we already have +These protocols are explained in more detail in a dedicated [manual](./protocols.md). +For the moment being, let us assume we already have chosen and instantiated one specific such protocol, that we here simply call _prot_. Let also assume our model is called _quantifier_ and that our evaluatio measure of choice is diff --git a/docs/source/manuals/explicit-loss-minimization.md b/docs/source/manuals/explicit-loss-minimization.md index 23a07ea..f80c434 100644 --- a/docs/source/manuals/explicit-loss-minimization.md +++ b/docs/source/manuals/explicit-loss-minimization.md @@ -5,14 +5,14 @@ SVM(Q), SVM(KLD), SVM(NKLD), SVM(AE), or SVM(RAE). These methods require to first download the [svmperf](http://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html) package, apply the patch -[svm-perf-quantification-ext.patch](./svm-perf-quantification-ext.patch), and compile the sources. -The script [prepare_svmperf.sh](prepare_svmperf.sh) does all the job. Simply run: +[svm-perf-quantification-ext.patch](https://github.com/HLT-ISTI/QuaPy/blob/master/svm-perf-quantification-ext.patch), and compile the sources. +The script [prepare_svmperf.sh](https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh) does all the job. Simply run: ``` ./prepare_svmperf.sh ``` -The resulting directory [svm_perf_quantification](./svm_perf_quantification) contains the +The resulting directory `svm_perf_quantification/` contains the patched version of _svmperf_ with quantification-oriented losses. The [svm-perf-quantification-ext.patch](https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh) is an extension of the patch made available by @@ -22,5 +22,5 @@ the _Q_ measure as proposed by [Barranquero et al. 2015](https://www.sciencedire and for the _KLD_ and _NKLD_ measures as proposed by [Esuli et al. 2015](https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0). This patch extends the above one by also allowing SVMperf to optimize for _AE_ and _RAE_. -See [Methods.md](Methods.md) for more details and code examples. +See the [](./methods) manual for more details and code examples. diff --git a/docs/source/manuals/methods.md b/docs/source/manuals/methods.md index 03c5c2a..9536820 100644 --- a/docs/source/manuals/methods.md +++ b/docs/source/manuals/methods.md @@ -414,8 +414,8 @@ model.fit(dataset.training) estim_prevalence = model.quantify(dataset.test.instances) ``` -Check the examples _[explicit_loss_minimization.py](..%2Fexamples%2Fexplicit_loss_minimization.py)_ -and [one_vs_all.py](..%2Fexamples%2Fone_vs_all.py) for more details. +Check the examples on [explicit_loss_minimization](https://github.com/HLT-ISTI/QuaPy/blob/devel/examples/5.explicit_loss_minimization.py) +and on [one versus all quantification](https://github.com/HLT-ISTI/QuaPy/blob/devel/examples/10.one_vs_all.py) for more details. ### Kernel Density Estimation methods (KDEy) From b8b3cf540e52eb1e83bcfa10f242d38aa757833b Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 17:48:23 +0200 Subject: [PATCH 13/16] Correct all remaining warnings during the build of the docs --- .github/workflows/ci.yml | 2 +- docs/source/conf.py | 7 ++++++- docs/source/quapy.method.rst | 2 +- setup.py | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b1e275c..83662d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,7 +45,7 @@ jobs: pre-build-command: | apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version python -m pip install --upgrade pip setuptools wheel - python -m pip install -e .[composable,docs] + python -m pip install -e .[composable,neural,docs] docs-folder: "docs/" - name: Publish documentation run: | diff --git a/docs/source/conf.py b/docs/source/conf.py index 9d86c63..702463c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -36,6 +36,7 @@ 'sphinx.ext.autosummary', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', + 'sphinx.ext.intersphinx', 'myst_parser', ] @@ -55,6 +56,10 @@ html_theme = 'sphinx_rtd_theme' # html_theme = 'furo' # need to be installed: pip install furo (not working...) -html_static_path = ['_static'] +# html_static_path = ['_static'] +# intersphinx configuration +intersphinx_mapping = { + "sklearn": ("https://scikit-learn.org/stable/", None), +} diff --git a/docs/source/quapy.method.rst b/docs/source/quapy.method.rst index 31a357a..ac0dfc8 100644 --- a/docs/source/quapy.method.rst +++ b/docs/source/quapy.method.rst @@ -53,7 +53,7 @@ quapy.method.non\_aggregative module :show-inheritance: quapy.method.composable module ------------------------- +------------------------------ .. automodule:: quapy.method.composable :members: diff --git a/setup.py b/setup.py index 23aa3ca..aa699e4 100644 --- a/setup.py +++ b/setup.py @@ -126,6 +126,7 @@ def get_version(rel_path): extras_require={ # Optional 'bayes': ['jax', 'jaxlib', 'numpyro'], 'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'], + 'neural': ['torch'], 'tests': ['certifi'], 'docs' : ['sphinx-rtd-theme', 'myst-parser'], }, From c99c9903a33df71544bdc94f848894cea78a1006 Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 24 Jun 2024 14:19:13 +0200 Subject: [PATCH 14/16] TO REVERT: build gh-pages even on pushes to devel --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 83662d9..fb0647b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,6 @@ jobs: docs: name: Documentation runs-on: ubuntu-latest - if: github.ref == 'refs/heads/master' steps: - uses: actions/checkout@v1 - name: Build documentation From 7f05f8dd41dbab01f09c480a9128b7adc1b2e2ca Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 18:10:29 +0200 Subject: [PATCH 15/16] Fix the autodoc of the composable module --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb0647b..fb5e8c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,7 +43,7 @@ jobs: with: pre-build-command: | apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version - python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade pip setuptools wheel "jax[cpu]" python -m pip install -e .[composable,neural,docs] docs-folder: "docs/" - name: Publish documentation From 1730d5a1a966398485e142b59efe733a3432051c Mon Sep 17 00:00:00 2001 From: Mirko Bunse Date: Mon, 1 Jul 2024 18:17:58 +0200 Subject: [PATCH 16/16] Revert "TO REVERT: build gh-pages even on pushes to devel" This reverts commit c99c9903a33df71544bdc94f848894cea78a1006. --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb5e8c7..030b152 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,7 @@ jobs: docs: name: Documentation runs-on: ubuntu-latest + if: github.ref == 'refs/heads/master' steps: - uses: actions/checkout@v1 - name: Build documentation