diff --git a/.flake8 b/.flake8 deleted file mode 100644 index f6a4abc..0000000 --- a/.flake8 +++ /dev/null @@ -1,11 +0,0 @@ -[flake8] -count = true -docstring-convention = google -exclude = "".git,__pycache__,docs,build,dist,.venv,.github" -filename = "*py" -extend-ignore = E4,E501 -max-complexity = 10 -max-line-length = 120 -show-source = true -statistics = true -tee = true diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 1438709..9708e6e 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -63,21 +63,16 @@ jobs: poetry-version: ${{ vars.POETRY_VERSION }} - name: Install dependencies run: poetry install --with dev - - name: Run black - run: poetry run black . --check - - name: Run isort - run: poetry run isort . --check-only - - name: Run flake8 - run: poetry run flake8 src --output-file=flake8_report.txt - - name: Run pylint - run: poetry run pylint src --recursive=y + - name: Ruff format check + run: poetry run ruff format --check src + - name: Ruff lint check + run: poetry run ruff check src - name: Run bandit run: poetry run bandit -r src - name: Run Safety CLI to check for vulnerabilities - uses: pyupio/safety-action@v1 + uses: pyupio/safety-action@v1.0.1 with: api-key: ${{ secrets.SAFETY_API_KEY }} - continue-on-error: true # Do not fail this action if vulnerabilities are found args: "--short-report --policy-file=.safety-policy.yml" - name: Run mypy run: poetry run mypy src diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e4f891c..f920161 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,16 +10,6 @@ repos: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] - id: check-merge-conflict - - repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - name: isort (python) - args: ["--profile", "black", "--filter-files", "--line-length", "120"] - - repo: https://github.com/psf/black - rev: 23.10.0 - hooks: - - id: black - repo: https://github.com/python-jsonschema/check-jsonschema rev: 0.27.0 hooks: @@ -34,3 +24,18 @@ repos: - id: python-safety-dependencies-check files: pyproject.toml args: ["--short-report", "--policy-file=.safety-policy-v2.yml"] + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.8.1 + hooks: + # Run the linter. + - id: ruff + args: [--fix] + # Run the formatter. + - id: ruff-format + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.13.0 # Use the sha / tag you want to point at + hooks: + - id: mypy + args: [--strict, --ignore-missing-imports] + files: ^src/ diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 9b33a96..0000000 --- a/.pylintrc +++ /dev/null @@ -1,399 +0,0 @@ -# This Pylint rcfile contains a best-effort configuration to uphold the -# best-practices and style described in the Google Python style guide: -# https://google.github.io/styleguide/pyguide.html -# -# Its canonical open-source location is: -# https://google.github.io/styleguide/pylintrc - -[MASTER] - -# Files or directories to be skipped. They should be base names, not paths. -ignore=third_party - -# Files or directories matching the regex patterns are skipped. The regex -# matches against base names, not paths. -ignore-patterns=test_.* - -# Pickle collected data for later comparisons. -persistent=no - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins= - -# Use multiple processes to speed up Pylint. -jobs=4 - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED -confidence= - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -#enable= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once).You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use"--disable=all --enable=classes -# --disable=W" -disable=abstract-method, - apply-builtin, - backtick, - bad-option-value, - basestring-builtin, - buffer-builtin, - c-extension-no-member, - cmp-builtin, - cmp-method, - coerce-builtin, - coerce-method, - delslice-method, - div-method, - execfile-builtin, - file-builtin, - filter-builtin-not-iterating, - fixme, - getslice-method, - hex-method, - idiv-method, - input-builtin, - intern-builtin, - invalid-str-codec, - locally-disabled, - long-builtin, - long-suffix, - map-builtin-not-iterating, - no-absolute-import, - no-else-break, - no-else-continue, - no-else-raise, - no-else-return, - no-init, # added - no-member, - no-name-in-module, - no-self-use, - nonzero-method, - oct-method, - old-division, - old-ne-operator, - old-octal-literal, - old-raise-syntax, - parameter-unpacking, - raising-string, - range-builtin-not-iterating, - raw_input-builtin, - rdiv-method, - reduce-builtin, - relative-import, - reload-builtin, - round-builtin, - setslice-method, - signature-differs, - standarderror-builtin, - suppressed-message, - sys-max-int, - too-few-public-methods, - trailing-whitespace, - unichr-builtin, - unicode-builtin, - unnecessary-pass, - unpacking-in-except, - using-cmp-argument, - wrong-import-order, - xrange-builtin, - zip-builtin-not-iterating - - -[REPORTS] - -# Set the output format. Available formats are text, parseable, colorized, msvs -# (visual studio) and html. You can also give a reporter class, eg -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages -reports=no - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details -#msg-template= - - -[BASIC] - -# Good variable names which should always be accepted, separated by a comma -good-names=main,_ - -# Bad variable names which should always be refused, separated by a comma -bad-names= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Include a hint for the correct naming format with invalid-name -include-naming-hint=no - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl - -# Regular expression matching correct function names -function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ - -# Regular expression matching correct variable names -variable-rgx=^[a-z][a-z0-9_]*$ - -# Regular expression matching correct constant names -const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ - -# Regular expression matching correct attribute names -attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ - -# Regular expression matching correct argument names -argument-rgx=^[a-z][a-z0-9_]*$ - -# Regular expression matching correct class attribute names -class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ - -# Regular expression matching correct inline iteration names -inlinevar-rgx=^[a-z][a-z0-9_]*$ - -# Regular expression matching correct class names -class-rgx=^_?[A-Z][a-zA-Z0-9]*$ - -# Regular expression matching correct module names -module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ - -# Regular expression matching correct method names -method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test|^_.*)$ - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=10 - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - - -[FORMAT] - -# Maximum number of characters on a single line. -max-line-length=120 - -# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt -# lines made too long by directives to pytype. - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=(?x)( - ^\s*(\#\ )??$| - ^\s*(from\s+\S+\s+)?import\s+.+$) - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=yes - -# Maximum number of lines in a module -max-module-lines=99999 - -# String used as indentation unit. The internal Google style guide mandates 2 -# spaces. Google's externaly-published style guide says 4, consistent with -# PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google -# projects (like TensorFlow). -indent-string=' ' - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=TODO - - -[STRING] - -# This flag controls whether inconsistent-quotes generates a warning when the -# character used as a quote delimiter is used inconsistently within a module. -check-quote-consistency=yes - - -[VARIABLES] - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# A regular expression matching the name of dummy variables (i.e. expectedly -# not used). -dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid to define new builtins when possible. -additional-builtins= - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_,_cb - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools - - -[LOGGING] - -# Logging modules to check that the string format arguments are in logging -# function parameter format -logging-modules=logging,absl.logging,tensorflow.io.logging - - -[SIMILARITIES] - -# Minimum lines number of a similarity. -min-similarity-lines=4 - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - - -[SPELLING] - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[IMPORTS] - -# Deprecated modules which should not be used, separated by a comma -deprecated-modules=regsub, - TERMIOS, - Bastion, - rexec, - sets - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled) -import-graph= - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled) -ext-import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled) -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant, absl - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls, - class_ - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=mcs - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "Exception" -overgeneral-exceptions=StandardError, - Exception, - BaseException diff --git a/nb/01_table.ipynb b/nb/01_table.ipynb index 716bc5f..f40e203 100644 --- a/nb/01_table.ipynb +++ b/nb/01_table.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "165da3a1", "metadata": { "scrolled": true @@ -11,9 +11,9 @@ "source": [ "import logging\n", "\n", - "logging.basicConfig(level=logging.INFO)\n", + "from pystatis import Table\n", "\n", - "from pystatis import Table" + "logging.basicConfig(level=logging.INFO)" ] }, { @@ -23,7 +23,7 @@ "source": [ "# The `Table` Class\n", "\n", - "The `Table` class in `pystatis` is the main interface for users to interact with the different databases and download the data/tables in form of `pandas` `DataFrames`." + "The `Table` class in `pystatis` is the main interface for users to interact with the different databases and download the data/tables in form of `pandas` `DataFrames`.\n" ] }, { @@ -31,12 +31,12 @@ "id": "07f3dee4", "metadata": {}, "source": [ - "To use the class, you have to pass only a single parameter: the `name` of the table you want to download." + "To use the class, you have to pass only a single parameter: the `name` of the table you want to download.\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "5d25c79a", "metadata": {}, "outputs": [], @@ -49,7 +49,7 @@ "id": "8ca8127a", "metadata": {}, "source": [ - "## Downloading data" + "## Downloading data\n" ] }, { @@ -57,25 +57,17 @@ "id": "3d841f94", "metadata": {}, "source": [ - "However, creating a new `Table` instance does not automatically retrieve the data from the database (or cache). Instead, you have to call another method: `get_data()`. The reason for this decision was to give you full control over the download process and avoid unnecessary downloads of big tables unless you are certain you want to start the download." + "However, creating a new `Table` instance does not automatically retrieve the data from the database (or cache). Instead, you have to call another method: `get_data()`. The reason for this decision was to give you full control over the download process and avoid unnecessary downloads of big tables unless you are certain you want to start the download.\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "632fc783", "metadata": { "scrolled": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:pystatis.http_helper:Code 0: erfolgreich\n" - ] - } - ], + "outputs": [], "source": [ "t.get_data()" ] @@ -85,26 +77,15 @@ "id": "2370bd5e", "metadata": {}, "source": [ - "You can access the name of a table via the `.name` attribute." + "You can access the name of a table via the `.name` attribute.\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "f5f1aded", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'81000-0001'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "t.name" ] @@ -114,64 +95,15 @@ "id": "4e050eed", "metadata": {}, "source": [ - "After a successful download (or cache retrieval), you can always access the raw data, that is the original response from the web API as a string, via the `.raw_data` attribute." + "After a successful download (or cache retrieval), you can always access the raw data, that is the original response from the web API as a string, via the `.raw_data` attribute.\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "8fede338", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Statistik_Code;Statistik_Label;Zeit_Code;Zeit_Label;Zeit;1_Merkmal_Code;1_Merkmal_Label;1_Auspraegung_Code;1_Auspraegung_Label;2_Merkmal_Code;2_Merkmal_Label;2_Auspraegung_Code;2_Auspraegung_Label;BWS001__Bruttowertschoepfung__jew._ME;STR006__Guetersteuern_abzuegl._Guetersubventionen__jew._ME;STR020_______Guetersteuern__jew._ME;SUB003_______Guetersubventionen__jew._ME;VGR014__Bruttoinlandsprodukt__jew._ME;BIP005__nachr.:_Bruttoinlandsprodukt_(Veraenderung_in_%)__Prozent;BIP004__nachr.:_Bruttoinlandsprodukt_je_Einwohner__jew._ME\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2014;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);2635,393;292,037;298,774;6,737;2927,430;4,1;36149,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2014;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);98,810;96,150;96,250;100,970;98,530;2,2;99,380\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2014;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2689,628;-;-;-;2981,695;2,2;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2014;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);2584,829;-;-;-;2873,722;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2015;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);2722,020;304,160;310,942;6,782;3026,180;3,4;37046,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2015;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);100,000;100,000;100,000;100,000;100,000;1,5;100,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2015;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2722,020;-;-;-;3026,180;1,5;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2015;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);2667,231;-;-;-;2970,965;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2016;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);2822,443;312,297;319,143;6,846;3134,740;3,6;38067,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2016;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);102,250;102,030;101,990;100,240;102,230;2,2;101,410\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2016;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2783,265;-;-;-;3093,664;2,2;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2016;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);2783,371;-;-;-;3093,710;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2017;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);2944,074;323,086;329,847;6,761;3267,160;4,2;39527,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2017;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);105,110;103,760;103,700;101,290;104,970;2,7;103,740\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2017;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2861,115;-;-;-;3176,581;2,7;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2017;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);2901,242;-;-;-;3218,826;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2018;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);3032,736;332,714;339,600;6,886;3365,450;3,0;40594,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2018;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);106,100;105,160;105,070;101,020;106,000;1,0;104,440\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2018;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2888,063;-;-;-;3207,751;1,0;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2018;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);2971,786;-;-;-;3299,232;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2019;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);3130,567;343,543;350,942;7,399;3474,110;3,2;41810,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2019;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);107,000;108,390;108,240;101,420;107,140;1,1;105,330\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2019;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2912,561;-;-;-;3242,249;1,1;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2019;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);3058,504;-;-;-;3401,440;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2020;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);3086,377;317,353;325,967;8,614;3403,730;-2,0;40929,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2020;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);102,670;106,400;105,560;68,580;103,040;-3,8;101,220\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2020;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2794,698;-;-;-;3118,176;-3,8;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2020;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);3003,872;-;-;-;3341,107;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2021;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);3276,377;341,073;365,141;24,068;3617,450;6,3;43481,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2021;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);106,060;108,380;107,530;70,100;106,300;3,2;104,370\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2021;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2886,974;-;-;-;3216,829;3,2;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2021;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);3188,250;-;-;-;3511,508;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2022;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);3509,628;367,182;390,670;23,488;3876,810;7,2;46264,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2022;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);107,860;111,470;108,370;50,060;108,220;1,8;105,500\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2022;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2935,971;-;-;-;3274,932;1,8;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2022;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);3331,942;-;-;-;3682,747;-;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2023;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRJPM;in jeweiligen Preisen (Mrd. EUR);3767,909;354,301;390,660;36,359;4122,210;6,3;48775,000\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2023;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPKM;preisbereinigt, Kettenindex (2015=100);107,930;108,460;104,270;39,670;108,010;-0,2;104,400\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2023;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVK;preisbereinigt, verkettete Volumenang. (Mrd. EUR);2937,876;-;-;-;3268,577;-0,2;-\n", - "81000;Volkswirtschaftliche Gesamtrechnungen des Bundes;JAHR;Jahr;2023;DINSG;Deutschland insgesamt;DG;Deutschland;VGRPB5;Preisbasis;VGRPVU;preisbereinigt, unverkettete Volumenang.(Mrd. EUR);3511,999;-;-;-;3869,273;-;-\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(t.raw_data)" ] @@ -181,132 +113,15 @@ "id": "5ae90416", "metadata": {}, "source": [ - "More likely, you are interested in the `pandas` `DataFrame`, which is accessible via the `.data` attribute." + "More likely, you are interested in the `pandas` `DataFrame`, which is accessible via the `.data` attribute.\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "874bbbb9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
JahrDeutschland insgesamtPreisbasisBruttowertschoepfung__jew._MEGuetersteuern_abzuegl._Guetersubventionen__jew._MEGuetersteuern__jew._MEGuetersubventionen__jew._MEBruttoinlandsprodukt__jew._MEnachr.:_Bruttoinlandsprodukt_(Veraenderung_in_%)__Prozentnachr.:_Bruttoinlandsprodukt_je_Einwohner__jew._ME
02014Deutschlandin jeweiligen Preisen (Mrd. EUR)2635.393292.037298.7746.7372927.4304.136149.00
12014Deutschlandpreisbereinigt, Kettenindex (2015=100)98.81096.15096.250100.97098.5302.299.38
22014Deutschlandpreisbereinigt, verkettete Volumenang. (Mrd. EUR)2689.628NaNNaNNaN2981.6952.2NaN
32014Deutschlandpreisbereinigt, unverkettete Volumenang.(Mrd. EUR)2584.829NaNNaNNaN2873.722NaNNaN
42015Deutschlandin jeweiligen Preisen (Mrd. EUR)2722.020304.160310.9426.7823026.1803.437046.00
\n", - "
" - ], - "text/plain": [ - " Jahr Deutschland insgesamt Preisbasis Bruttowertschoepfung__jew._ME Guetersteuern_abzuegl._Guetersubventionen__jew._ME Guetersteuern__jew._ME Guetersubventionen__jew._ME Bruttoinlandsprodukt__jew._ME nachr.:_Bruttoinlandsprodukt_(Veraenderung_in_%)__Prozent nachr.:_Bruttoinlandsprodukt_je_Einwohner__jew._ME\n", - "0 2014 Deutschland in jeweiligen Preisen (Mrd. EUR) 2635.393 292.037 298.774 6.737 2927.430 4.1 36149.00\n", - "1 2014 Deutschland preisbereinigt, Kettenindex (2015=100) 98.810 96.150 96.250 100.970 98.530 2.2 99.38\n", - "2 2014 Deutschland preisbereinigt, verkettete Volumenang. (Mrd. EUR) 2689.628 NaN NaN NaN 2981.695 2.2 NaN\n", - "3 2014 Deutschland preisbereinigt, unverkettete Volumenang.(Mrd. EUR) 2584.829 NaN NaN NaN 2873.722 NaN NaN\n", - "4 2015 Deutschland in jeweiligen Preisen (Mrd. EUR) 2722.020 304.160 310.942 6.782 3026.180 3.4 37046.00" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "t.data.head()" ] @@ -316,120 +131,15 @@ "id": "677d68b7", "metadata": {}, "source": [ - "Finally, you can also access the metadata for this table via the `.metadata` attribute." + "Finally, you can also access the metadata for this table via the `.metadata` attribute.\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "5f3672e9", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Copyright': '© Statistisches Bundesamt (Destatis), 2024',\n", - " 'Ident': {'Method': 'table', 'Service': 'metadata'},\n", - " 'Object': {'Code': '81000-0001',\n", - " 'Content': 'VGR des Bundes - Bruttowertschöpfung, '\n", - " 'Bruttoinlandsprodukt\\n'\n", - " '(nominal/preisbereinigt): Deutschland, Jahre',\n", - " 'Structure': {'Columns': [{'Code': 'JAHR',\n", - " 'Content': 'Jahr',\n", - " 'Selected': '10',\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': '10'}],\n", - " 'Head': {'Code': '81000',\n", - " 'Content': 'Volkswirtschaftliche '\n", - " 'Gesamtrechnungen des Bundes',\n", - " 'Selected': None,\n", - " 'Structure': [{'Code': 'DINSG',\n", - " 'Content': 'Deutschland '\n", - " 'insgesamt',\n", - " 'Selected': '1',\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': '1'}],\n", - " 'Type': 'Statistik',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " 'Rows': [{'Code': 'BWS001',\n", - " 'Content': 'Bruttowertschöpfung',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " {'Code': 'STR006',\n", - " 'Content': 'Gütersteuern abzügl. '\n", - " 'Gütersubventionen',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " {'Code': 'STR020',\n", - " 'Content': 'Gütersteuern',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " {'Code': 'SUB003',\n", - " 'Content': 'Gütersubventionen',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " {'Code': 'VGR014',\n", - " 'Content': 'Bruttoinlandsprodukt',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " {'Code': 'BIP005',\n", - " 'Content': 'Bruttoinlandsprodukt '\n", - " '(Veränderung in %)',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " {'Code': 'BIP004',\n", - " 'Content': 'Bruttoinlandsprodukt je '\n", - " 'Einwohner',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None}],\n", - " 'Subheading': {'Code': 'VGRPB5',\n", - " 'Content': 'Preisbasis (jeweilige '\n", - " 'Preise / preisbereinigt)',\n", - " 'Selected': None,\n", - " 'Structure': None,\n", - " 'Type': 'Merkmal',\n", - " 'Updated': 'see parent',\n", - " 'Values': None},\n", - " 'Subtitel': None},\n", - " 'Time': {'From': '1991', 'To': '2023'},\n", - " 'Updated': '28.12.2022 17:19:48h',\n", - " 'Valid': 'false'},\n", - " 'Parameter': {'area': 'Alle',\n", - " 'language': 'de',\n", - " 'name': '81000-0001',\n", - " 'password': '********************',\n", - " 'username': '********************'},\n", - " 'Status': {'Code': 0, 'Content': 'erfolgreich', 'Type': 'Information'}}\n" - ] - } - ], + "outputs": [], "source": [ "from pprint import pprint\n", "\n", @@ -441,7 +151,7 @@ "id": "953d7cb2", "metadata": {}, "source": [ - "## How `pystatis` prepares the data for you" + "## How `pystatis` prepares the data for you\n" ] }, { @@ -449,9 +159,9 @@ "id": "22b075b6", "metadata": {}, "source": [ - "As you can notice from a comparison between the `.raw_data` and `.data` formats, `pystatis` is doing a lot behind the scenes to provide you with a format that is hopefully the most useful for you. You will see and learn that there are a few parameters that you can use to actually change this behavior and adjust the table to your needs. \n", + "As you can notice from a comparison between the `.raw_data` and `.data` formats, `pystatis` is doing a lot behind the scenes to provide you with a format that is hopefully the most useful for you. You will see and learn that there are a few parameters that you can use to actually change this behavior and adjust the table to your needs.\n", "\n", - "But first we would like to explain to you how `pystatis` is preparing the data by default so you have a better understanding of the underlying process." + "But first we would like to explain to you how `pystatis` is preparing the data by default so you have a better understanding of the underlying process.\n" ] }, { @@ -460,13 +170,14 @@ "metadata": {}, "source": [ "When we look at the header of the raw data, we can notice a few things:\n", + "\n", "- Many columns always come in a pair of `*_Code` and `*_Label` columns. Both contain the same information, only provided differently.\n", "- There are columns that don't have a direct use as they contain information not needed in the table, like the `Statistik_Code` and `Statistik_Label` columns at the beginning. You already know the statistic from the name of the table and this information is the same for each and every row anyway.\n", "- There is always a time dimension, broken down into three different columns `Zeit_Code`, `Zeit_Label` and `Zeit` (or `time_*` in English).\n", "- The other dimensions are called variables (German \"Merkmale\") and they always come in groups of four columns: `N_Merkmal_Code`, `N_Merkmal_Label`, `N_Auspraegung_Code`, and `N_Auspraegung_Label` (English: variable code and label and variable value code and label).\n", - "- The actual measurements or values are at the end of the table after the variables and each measurement has one column. The name of this column follows the format `__