diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..fc00cec --- /dev/null +++ b/.dockerignore @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: 2022 Albert Meroño, Rinke Hoekstra, Carlos Martínez +# +# SPDX-License-Identifier: MIT + +*~ +\#*\# +bin/ +!bin/grlc-server +build/ +config.ini +db-cache.json +db.json +DispatchSparqlQuery.ipynb +docker-compose.yml +Dockerfile2 +*/.DS_Store +.eggs/ +FileLoaders.ipynb +Get_Parameters.ipynb +GetYamlDecorators.ipynb +.git/ +.*!.gitignore +grlc.egg-info/ +.idea +include/ +ink_ext_XXXXXX_img0.png +.ipynb_checkpoints/ +lib/ +local/ +*.log +node_modules +pip-selfcheck.json +Process_sparql_query_text.ipynb +*.pyc +.pytest_cache +.Python +ReleaseProcedure.md +response_1700168662326.html +Rewrite_Query.ipynb +share/ +src/config.ini +src/FileLoaderTesting.ipynb +ssl-certificates/ +!static/swagger-ui/dist/lib +*.swp +testQueries/ +TODOs.md +TwitterAPIKeys.md +venv3.10/ +venv3.11/ +venv3.8/ +venv3.9/ +venv-dev/ +venv-tests.sh +.vscode/ diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..ff833a1 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +per-file-ignores = + src/__init__.py:F401 + src/prov.py:E203 + tests/test_grlc.py:F401 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..08c3025 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,23 @@ +name: Lint + +on: + push: + paths: + - 'src/*.py' + - 'tests/*.py' +jobs: + linter: + runs-on: ubuntu-latest + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v3 + - name: Set up Python environment + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: flake8 Lint + uses: py-actions/flake8@v2 + with: + max-line-length: "127" + path: "src" diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index ec6f923..2d6696e 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -12,7 +12,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v1 with: - python-version: 3.7 + python-version: 3.11 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 26cd1fe..214625a 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -6,7 +6,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8, 3.9, 3.10.x, 3.11] steps: - uses: actions/checkout@master - name: Set up Python ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index c639a0b..ac09f82 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ TODOs.md TwitterAPIKeys.md config.ini ink_ext_XXXXXX_img0.png +build/ diff --git a/CITATION.cff b/CITATION.cff index 0f383a0..c7d6c84 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -16,7 +16,7 @@ authors: given-names: Carlos orcid: "https://orcid.org/0000-0001-5565-7577" cff-version: "1.0.3" -date-released: 2023-07-30 +date-released: 2024-02-17 doi: 10.5281/zenodo.1064391 license: MIT message: "If you use this software, please cite it as below." @@ -29,4 +29,4 @@ keywords: - "linked-data" - "semantic-web" - "linked-data-api" -version: "1.3.8" +version: "1.3.9" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 051b4b4..ad50a68 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -58,6 +58,7 @@ services: - USERMAP_GID=1000 - USERMAP_UID=1000 - GRLC_GITHUB_ACCESS_TOKEN=xxx + - GRLC_GITLAB_ACCESS_TOKEN=yyy - GRLC_SERVER_NAME=grlc.io ``` diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index acce9f4..6fa9929 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -23,3 +23,4 @@ This is a list of all people who have contributed to grlc. Big thanks to everyon [GenEars](https://github.com/GenEars) [nichtich](https://github.com/nichtich) [jblom](https://github.com/jblom) +[abelsiqueira](https://github.com/abelsiqueira) diff --git a/Dockerfile b/Dockerfile index ed5ef98..fef7d87 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,15 +2,17 @@ # # SPDX-License-Identifier: MIT -FROM python:3.8.16 +FROM python:3.11-slim MAINTAINER albert.merono@vu.nl # Default values for env variables ARG GRLC_GITHUB_ACCESS_TOKEN= +ARG GRLC_GITLAB_ACCESS_TOKEN= ARG GRLC_SERVER_NAME=grlc.io ARG GRLC_SPARQL_ENDPOINT=http://dbpedia.org/sparql ENV GRLC_GITHUB_ACCESS_TOKEN=$GRLC_GITHUB_ACCESS_TOKEN \ + GRLC_GITLAB_ACCESS_TOKEN=$GRLC_GITLAB_ACCESS_TOKEN \ GRLC_SERVER_NAME=$GRLC_SERVER_NAME \ GRLC_SPARQL_ENDPOINT=$GRLC_SPARQL_ENDPOINT diff --git a/README.md b/README.md index dda99f8..82fbc96 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ If you use grlc in your work, please cite it as: ``` ## What is grlc? -grlc is a lightweight server that takes SPARQL queries (stored in a GitHub repository, in your local filesystem, or listed in a URL), and translates them to Linked Data Web APIs. This enables universal access to Linked Data. Users are not required to know SPARQL to query their data, but instead can access a web API. +grlc is a lightweight server that takes SPARQL queries (stored in a GitHub or GitLab repository, in your local filesystem, or listed in a URL), and translates them to Linked Data Web APIs. This enables universal access to Linked Data. Users are not required to know SPARQL to query their data, but instead can access a web API. ## Quick tutorial For a quick usage tutorial check out our wiki [walkthrough](https://github.com/CLARIAH/grlc/wiki/Quick-tutorial) and [list of features](https://github.com/CLARIAH/grlc/wiki/Features). @@ -43,7 +43,7 @@ Your queries can add API parameters to each operation by using the [parameter ma Your queries can include special [decorators](#decorator-syntax) to add extra functionality to your API. ### Query location -grlc can load your query collection from different locations: from a GitHub repository (`api-git`), from local storage (`api-local`), and from a specification file (`api-url`). Each type of location has specific features and is accessible via different paths. However all location types produce the same beautiful APIs. +grlc can load your query collection from different locations: from a GitHub repository (`api-git`), from a GitLab repository (`api-gitlab`), from local storage (`api-local`), and from a specification file (`api-url`). Each type of location has specific features and is accessible via different paths. However all location types produce the same beautiful APIs. #### From a GitHub repository > API path: @@ -58,6 +58,19 @@ grlc can make use of git's version control mechanism to generate an API based on grlc can also use a subdirectory inside your Github repo. This can be done by including a subdirectory in the URL path (`http://grlc-server/api-git///subdir/`). +#### From a GitLab repository +> API path: +`http://grlc-server/api-gitlab//` + +grlc can build an API from any GitLab repository, specified by the GitLab user name of the owner (``) and repository name (``). + +For example, assuming your queries are stored on a GitLab repo: `https://gitlab.com/c-martinez/grlc-queries`, point your browser to the following location +`http://grlc.io/api-gitlab/c-martinez/grlc-queries/` + +grlc can make use of git's version control mechanism to generate an API based on a specific version of queries in the repository. This can be done by including the name of a branch in the URL path (`http://grlc-server/api-gitlab///branch/`), for example: `http://grlc.io/api-gitlab/c-martinez/grlc-queries/branch/master` + +grlc can also use a subdirectory inside your GitLab repo. This can be done by including a subdirectory in the URL path (`http://grlc-server/api-gitlab///subdir/`), for example: `http://grlc-server/api-gitlab/c-martinez/grlc-queries/subdir/subdir`. + #### From local storage > API path: `http://grlc-server/api-local/` @@ -251,10 +264,21 @@ Syntax: Example [query](https://github.com/CLARIAH/grlc-queries/blob/master/transform.rq) and the equivalent [API operation](http://grlc.io/api-git/CLARIAH/grlc-queries/#/default/get_transform). +### `endpoint-method` +Allows the query to be sent from the grlc server to the SPARQL endpoint using either `GET` or `POST` http method. (Default: `POST`) + +Syntax: +``` +#+ endpoint-method: GET +``` + +Example [query](https://github.com/CLARIAH/grlc-queries/blob/master/endpoint-method.rq) and the equivalent [API operation](http://grlc.io/api-git/CLARIAH/grlc-queries/#/default/get_endpoint_method). + ### Example APIs Check these out: - http://grlc.io/api-git/CLARIAH/grlc-queries +- http://grlc.io/api-gitlab/c-martinez/grlc-queries - http://grlc.io/api-url?specUrl=https://raw.githubusercontent.com/CLARIAH/grlc-queries/master/urls.yml - http://grlc.io/api-git/CLARIAH/wp4-queries-hisco - http://grlc.io/api-git/albertmeronyo/lodapi @@ -282,9 +306,9 @@ To run grlc via [docker](https://www.docker.com/), you'll need a working install docker run -it --rm -p 8088:80 clariah/grlc ``` -The docker image allows you to setup several environment variable such as `GRLC_SERVER_NAME` `GRLC_GITHUB_ACCESS_TOKEN` and `GRLC_SPARQL_ENDPOINT`: +The docker image allows you to setup several environment variable such as `GRLC_SERVER_NAME` `GRLC_GITHUB_ACCESS_TOKEN`,`GRLC_GITLAB_ACCESS_TOKEN` and `GRLC_SPARQL_ENDPOINT`: ```bash -docker run -it --rm -p 8088:80 -e GRLC_SERVER_NAME=grlc.io -e GRLC_GITHUB_ACCESS_TOKEN=xxx -e GRLC_SPARQL_ENDPOINT=http://dbpedia.org/sparql -e DEBUG=true clariah/grlc +docker run -it --rm -p 8088:80 -e GRLC_SERVER_NAME=grlc.io -e GRLC_GITHUB_ACCESS_TOKEN=xxx -e GRLC_GITLAB_ACCESS_TOKEN=yyy -e GRLC_SPARQL_ENDPOINT=http://dbpedia.org/sparql -e DEBUG=true clariah/grlc ``` ### Pip @@ -346,19 +370,21 @@ You can use grlc as a library directly from your own python script. See the [usa Regardless of how you are running your grlc server, you will need to configure it using the `config.ini` file. Have a look at the [example config file](./config.default.ini) to see how it this file is structured. The configuration file contains the following variables: - - `github_access_token` [access token](#github-access-token) to communicate with Github API. + - `github_access_token` [access token](#gitaccess-token) to communicate with Github API. + - `gitlab_access_token` [access token](#git-access-token) to communicate with GitLab API. - `local_sparql_dir` local storage directory where [local queries](#from-local-storage) are located. - `server_name` name of the server (e.g. grlc.io) - `sparql_endpoint` default SPARQL endpoint - `user` and `password` SPARQL endpoint default authentication (if required, specify `'none'` if not required) - `debug` enable debug level logging. + - `gitlab_url` to specify the base url of your GitLab instance. -##### GitHub access token -In order for grlc to communicate with GitHub, you'll need to tell grlc what your access token is: +##### Git access token +In order for grlc to communicate with GitHub and/or GitLab, you'll need to tell grlc what your access token is: -1. Get a GitHub personal access token. In your GitHub's profile page, go to _Settings_, then _Developer settings_, _Personal access tokens_, and _Generate new token_ -2. You'll get an access token string, copy it and save it somewhere safe (GitHub won't let you see it again!) -3. Edit your `config.ini` or `docker-compose.yml` as value of the environment variable `GRLC_GITHUB_ACCESS_TOKEN`. +1. Get a [GitHub personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-authentication-to-github#authenticating-to-the-api-with-a-personal-access-token) or [GitLab personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html#create-a-personal-access-token). +2. You'll get an access token string, copy it and save it somewhere safe. +3. Edit your `config.ini` (`github_access_token` and `gitlab_access_token` respectively) and/or `docker-compose.yml` (`GRLC_GITHUB_ACCESS_TOKEN` and `GRLC_GITLAB_ACCESS_TOKEN` environment variables). # Contribute! grlc needs **you** to continue bringing Semantic Web content to developers, applications and users. No matter if you are just a curious user, a developer, or a researcher; there are many ways in which you can contribute: diff --git a/bin/grlc-server b/bin/grlc-server index 944d3b3..c9a329c 100755 --- a/bin/grlc-server +++ b/bin/grlc-server @@ -25,7 +25,7 @@ def runViaWaitress(port=8088): def runViaGunicorn(port=8088): from gunicorn.app.base import BaseApplication - from gunicorn.six import iteritems + class StandaloneApplication(BaseApplication): def __init__(self, app, options=None): self.options = options or {} @@ -33,9 +33,9 @@ def runViaGunicorn(port=8088): super(StandaloneApplication, self).__init__() def load_config(self): - config = dict([(key, value) for key, value in iteritems(self.options) + config = dict([(key, value) for key, value in self.options.items() if key in self.cfg.settings and value is not None]) - for key, value in iteritems(config): + for key, value in config.items(): self.cfg.set(key.lower(), value) def load(self): diff --git a/config.default.ini b/config.default.ini index e8f5e6d..f272b09 100644 --- a/config.default.ini +++ b/config.default.ini @@ -4,6 +4,7 @@ [auth] github_access_token = xxx +gitlab_access_token = yyy [local] local_sparql_dir = /home/grlc/queries/ @@ -12,9 +13,12 @@ local_sparql_dir = /home/grlc/queries/ # Default endpoint, if none specified elsewhere sparql_endpoint = http://dbpedia.org/sparql server_name = grlc.io + # endpoint default authentication user = none password = none +# sparql_access_token = SPARQL endpoint HTTP authorization token + # Logging level debug = True diff --git a/doc/notebooks/GrlcFromNotebook.ipynb b/doc/notebooks/GrlcFromNotebook.ipynb index 37ca539..b482c17 100644 --- a/doc/notebooks/GrlcFromNotebook.ipynb +++ b/doc/notebooks/GrlcFromNotebook.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -34,13 +34,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "user = 'CLARIAH'\n", "repo = 'grlc-queries'\n", - "spec, warning = swagger.build_spec(user, repo)" + "spec, warning = swagger.build_spec(user, repo, git_type=grlc.static.TYPE_GITHUB)" ] }, { @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -60,11 +60,11 @@ "output_type": "stream", "text": [ "{\n", - " \"call_name\": \"/description\",\n", + " \"call_name\": \"/defaults\",\n", " \"method\": \"get\",\n", " \"tags\": [],\n", " \"summary\": \"\",\n", - " \"description\": \"Extended description of my query/operation. This can be a more extensive than the summary and in can span multiple lines.\\nDescriptions support (to some extent) [CommonMark](https://commonmark.org/help/).\",\n", + " \"description\": \"\",\n", " \"params\": [\n", " {\n", " \"name\": \"endpoint\",\n", @@ -72,11 +72,20 @@ " \"in\": \"query\",\n", " \"description\": \"Alternative endpoint for SPARQL query\",\n", " \"default\": \"https://dbpedia.org/sparql\"\n", + " },\n", + " {\n", + " \"name\": \"genre\",\n", + " \"type\": \"string\",\n", + " \"required\": true,\n", + " \"in\": \"query\",\n", + " \"description\": \"A value of type string (iri) that will substitute ?_genre_iri in the original query\",\n", + " \"format\": \"iri\",\n", + " \"default\": \"http://dbpedia.org/resource/Rock_music\"\n", " }\n", " ],\n", " \"item_properties\": null,\n", - " \"query\": \"\\nPREFIX dbo: \\nPREFIX schema: \\nPREFIX rdf: \\n\\nSELECT ?band ?album ?genre WHERE {\\n ?band rdf:type dbo:Band .\\n ?album rdf:type schema:MusicAlbum .\\n ?band dbo:genre ?genre .\\n ?album dbo:artist ?band .\\n} LIMIT 100\\n\",\n", - " \"original_query\": \"#+ description: Extended description of my query/operation.\\n#+ This can be a more extensive than the summary and in can span\\n#+ multiple lines.\\n#+\\n#+ Descriptions support (to some extent) [CommonMark](https://commonmark.org/help/).\\n\\nPREFIX dbo: \\nPREFIX schema: \\nPREFIX rdf: \\n\\nSELECT ?band ?album ?genre WHERE {\\n ?band rdf:type dbo:Band .\\n ?album rdf:type schema:MusicAlbum .\\n ?band dbo:genre ?genre .\\n ?album dbo:artist ?band .\\n} LIMIT 100\\n\"\n", + " \"query\": \"\\nPREFIX dbo: \\nPREFIX dbp: \\nPREFIX schema: \\nPREFIX rdf: \\n\\nSELECT ?band ?album WHERE {\\n ?band rdf:type dbo:Band .\\n ?album rdf:type schema:MusicAlbum .\\n ?band dbo:genre ?_genre_iri .\\n ?album dbp:artist ?band .\\n} LIMIT 100\\n\",\n", + " \"original_query\": \"#+ defaults:\\n#+ - genre: http://dbpedia.org/resource/Rock_music\\n\\nPREFIX dbo: \\nPREFIX dbp: \\nPREFIX schema: \\nPREFIX rdf: \\n\\nSELECT ?band ?album WHERE {\\n ?band rdf:type dbo:Band .\\n ?album rdf:type schema:MusicAlbum .\\n ?band dbo:genre ?_genre_iri .\\n ?album dbp:artist ?band .\\n} LIMIT 100\\n\"\n", "}\n" ] } @@ -103,14 +112,15 @@ "text": [ "\n", "PREFIX dbo: \n", + "PREFIX dbp: \n", "PREFIX schema: \n", "PREFIX rdf: \n", "\n", - "SELECT ?band ?album ?genre WHERE {\n", + "SELECT ?band ?album WHERE {\n", " ?band rdf:type dbo:Band .\n", " ?album rdf:type schema:MusicAlbum .\n", - " ?band dbo:genre ?genre .\n", - " ?album dbo:artist ?band .\n", + " ?band dbo:genre ?_genre_iri .\n", + " ?album dbp:artist ?band .\n", "} LIMIT 100\n", "\n" ] @@ -136,7 +146,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/description\n" + "/defaults\n" ] } ], @@ -157,21 +167,12 @@ "cell_type": "code", "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requesting query at https://raw.githubusercontent.com/CLARIAH/grlc-queries/master/description.rq\n", - "Requesting query at https://raw.githubusercontent.com/CLARIAH/grlc-queries/master/endpoint.txt\n" - ] - } - ], + "outputs": [], "source": [ "query_name = 'description'\n", "acceptHeader = 'text/csv'\n", "\n", - "data, code, headers = utils.dispatch_query(user, repo, query_name, acceptHeader=acceptHeader)" + "data, code, headers = utils.dispatch_query(user, repo, query_name, git_type=grlc.static.TYPE_GITHUB, acceptHeader=acceptHeader)" ] }, { @@ -215,63 +216,63 @@ " \n", " \n", " 0\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Art_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_(Cactus_album)\n", + " http://dbpedia.org/resource/Blues_rock\n", " \n", " \n", " 1\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Progressive_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_(Cactus_album)\n", + " http://dbpedia.org/resource/Hard_rock\n", " \n", " \n", " 2\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Album-oriented_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_(Cactus_album)\n", + " http://dbpedia.org/resource/Heavy_metal_music\n", " \n", " \n", " 3\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Arena_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_V\n", + " http://dbpedia.org/resource/Blues_rock\n", " \n", " \n", " 4\n", - " http://dbpedia.org/resource/Bauhaus_(band)\n", - " http://dbpedia.org/resource/Swing_the_Heartach...\n", - " http://dbpedia.org/resource/Gothic_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_V\n", + " http://dbpedia.org/resource/Hard_rock\n", " \n", " \n", " 5\n", - " http://dbpedia.org/resource/Bauhaus_(band)\n", - " http://dbpedia.org/resource/Swing_the_Heartach...\n", - " http://dbpedia.org/resource/Post-punk\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_V\n", + " http://dbpedia.org/resource/Heavy_metal_music\n", " \n", " \n", " 6\n", - " http://dbpedia.org/resource/Catatonia_(band)\n", - " http://dbpedia.org/resource/Paper_Scissors_Sto...\n", - " http://dbpedia.org/resource/Alternative_rock\n", + " http://dbpedia.org/resource/Cadet_(band)\n", + " http://dbpedia.org/resource/Cadet_(album)\n", + " http://dbpedia.org/resource/Christian_alternat...\n", " \n", " \n", " 7\n", - " http://dbpedia.org/resource/Catatonia_(band)\n", - " http://dbpedia.org/resource/Paper_Scissors_Sto...\n", - " http://dbpedia.org/resource/Indie_rock\n", + " http://dbpedia.org/resource/Cadet_(band)\n", + " http://dbpedia.org/resource/Cadet_(album)\n", + " http://dbpedia.org/resource/Garage_rock\n", " \n", " \n", " 8\n", - " http://dbpedia.org/resource/Siouxsie_and_the_B...\n", - " http://dbpedia.org/resource/Tinderbox_(Siouxsi...\n", - " http://dbpedia.org/resource/Gothic_rock\n", + " http://dbpedia.org/resource/Cadet_(band)\n", + " http://dbpedia.org/resource/Cadet_(album)\n", + " http://dbpedia.org/resource/Rockabilly\n", " \n", " \n", " 9\n", - " http://dbpedia.org/resource/Siouxsie_and_the_B...\n", - " http://dbpedia.org/resource/Tinderbox_(Siouxsi...\n", - " http://dbpedia.org/resource/New_wave_music\n", + " http://dbpedia.org/resource/Café_Tacuba\n", + " http://dbpedia.org/resource/Café_Tacuba_(album)\n", + " http://dbpedia.org/resource/Latin_rock\n", " \n", " \n", "\n", @@ -279,40 +280,40 @@ ], "text/plain": [ " band \\\n", - "0 http://dbpedia.org/resource/Asia_(band) \n", - "1 http://dbpedia.org/resource/Asia_(band) \n", - "2 http://dbpedia.org/resource/Asia_(band) \n", - "3 http://dbpedia.org/resource/Asia_(band) \n", - "4 http://dbpedia.org/resource/Bauhaus_(band) \n", - "5 http://dbpedia.org/resource/Bauhaus_(band) \n", - "6 http://dbpedia.org/resource/Catatonia_(band) \n", - "7 http://dbpedia.org/resource/Catatonia_(band) \n", - "8 http://dbpedia.org/resource/Siouxsie_and_the_B... \n", - "9 http://dbpedia.org/resource/Siouxsie_and_the_B... \n", + "0 http://dbpedia.org/resource/Cactus_(American_b... \n", + "1 http://dbpedia.org/resource/Cactus_(American_b... \n", + "2 http://dbpedia.org/resource/Cactus_(American_b... \n", + "3 http://dbpedia.org/resource/Cactus_(American_b... \n", + "4 http://dbpedia.org/resource/Cactus_(American_b... \n", + "5 http://dbpedia.org/resource/Cactus_(American_b... \n", + "6 http://dbpedia.org/resource/Cadet_(band) \n", + "7 http://dbpedia.org/resource/Cadet_(band) \n", + "8 http://dbpedia.org/resource/Cadet_(band) \n", + "9 http://dbpedia.org/resource/Café_Tacuba \n", "\n", " album \\\n", - "0 http://dbpedia.org/resource/Axioms_(album) \n", - "1 http://dbpedia.org/resource/Axioms_(album) \n", - "2 http://dbpedia.org/resource/Axioms_(album) \n", - "3 http://dbpedia.org/resource/Axioms_(album) \n", - "4 http://dbpedia.org/resource/Swing_the_Heartach... \n", - "5 http://dbpedia.org/resource/Swing_the_Heartach... \n", - "6 http://dbpedia.org/resource/Paper_Scissors_Sto... \n", - "7 http://dbpedia.org/resource/Paper_Scissors_Sto... \n", - "8 http://dbpedia.org/resource/Tinderbox_(Siouxsi... \n", - "9 http://dbpedia.org/resource/Tinderbox_(Siouxsi... \n", + "0 http://dbpedia.org/resource/Cactus_(Cactus_album) \n", + "1 http://dbpedia.org/resource/Cactus_(Cactus_album) \n", + "2 http://dbpedia.org/resource/Cactus_(Cactus_album) \n", + "3 http://dbpedia.org/resource/Cactus_V \n", + "4 http://dbpedia.org/resource/Cactus_V \n", + "5 http://dbpedia.org/resource/Cactus_V \n", + "6 http://dbpedia.org/resource/Cadet_(album) \n", + "7 http://dbpedia.org/resource/Cadet_(album) \n", + "8 http://dbpedia.org/resource/Cadet_(album) \n", + "9 http://dbpedia.org/resource/Café_Tacuba_(album) \n", "\n", - " genre \n", - "0 http://dbpedia.org/resource/Art_rock \n", - "1 http://dbpedia.org/resource/Progressive_rock \n", - "2 http://dbpedia.org/resource/Album-oriented_rock \n", - "3 http://dbpedia.org/resource/Arena_rock \n", - "4 http://dbpedia.org/resource/Gothic_rock \n", - "5 http://dbpedia.org/resource/Post-punk \n", - "6 http://dbpedia.org/resource/Alternative_rock \n", - "7 http://dbpedia.org/resource/Indie_rock \n", - "8 http://dbpedia.org/resource/Gothic_rock \n", - "9 http://dbpedia.org/resource/New_wave_music " + " genre \n", + "0 http://dbpedia.org/resource/Blues_rock \n", + "1 http://dbpedia.org/resource/Hard_rock \n", + "2 http://dbpedia.org/resource/Heavy_metal_music \n", + "3 http://dbpedia.org/resource/Blues_rock \n", + "4 http://dbpedia.org/resource/Hard_rock \n", + "5 http://dbpedia.org/resource/Heavy_metal_music \n", + "6 http://dbpedia.org/resource/Christian_alternat... \n", + "7 http://dbpedia.org/resource/Garage_rock \n", + "8 http://dbpedia.org/resource/Rockabilly \n", + "9 http://dbpedia.org/resource/Latin_rock " ] }, "execution_count": 7, @@ -387,63 +388,63 @@ " \n", " \n", " 0\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Art_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_(Cactus_album)\n", + " http://dbpedia.org/resource/Blues_rock\n", " \n", " \n", " 1\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Progressive_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_(Cactus_album)\n", + " http://dbpedia.org/resource/Hard_rock\n", " \n", " \n", " 2\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Album-oriented_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_(Cactus_album)\n", + " http://dbpedia.org/resource/Heavy_metal_music\n", " \n", " \n", " 3\n", - " http://dbpedia.org/resource/Asia_(band)\n", - " http://dbpedia.org/resource/Axioms_(album)\n", - " http://dbpedia.org/resource/Arena_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_V\n", + " http://dbpedia.org/resource/Blues_rock\n", " \n", " \n", " 4\n", - " http://dbpedia.org/resource/Bauhaus_(band)\n", - " http://dbpedia.org/resource/Swing_the_Heartach...\n", - " http://dbpedia.org/resource/Gothic_rock\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_V\n", + " http://dbpedia.org/resource/Hard_rock\n", " \n", " \n", " 5\n", - " http://dbpedia.org/resource/Bauhaus_(band)\n", - " http://dbpedia.org/resource/Swing_the_Heartach...\n", - " http://dbpedia.org/resource/Post-punk\n", + " http://dbpedia.org/resource/Cactus_(American_b...\n", + " http://dbpedia.org/resource/Cactus_V\n", + " http://dbpedia.org/resource/Heavy_metal_music\n", " \n", " \n", " 6\n", - " http://dbpedia.org/resource/Catatonia_(band)\n", - " http://dbpedia.org/resource/Paper_Scissors_Sto...\n", - " http://dbpedia.org/resource/Alternative_rock\n", + " http://dbpedia.org/resource/Cadet_(band)\n", + " http://dbpedia.org/resource/Cadet_(album)\n", + " http://dbpedia.org/resource/Christian_alternat...\n", " \n", " \n", " 7\n", - " http://dbpedia.org/resource/Catatonia_(band)\n", - " http://dbpedia.org/resource/Paper_Scissors_Sto...\n", - " http://dbpedia.org/resource/Indie_rock\n", + " http://dbpedia.org/resource/Cadet_(band)\n", + " http://dbpedia.org/resource/Cadet_(album)\n", + " http://dbpedia.org/resource/Garage_rock\n", " \n", " \n", " 8\n", - " http://dbpedia.org/resource/Siouxsie_and_the_B...\n", - " http://dbpedia.org/resource/Tinderbox_(Siouxsi...\n", - " http://dbpedia.org/resource/Gothic_rock\n", + " http://dbpedia.org/resource/Cadet_(band)\n", + " http://dbpedia.org/resource/Cadet_(album)\n", + " http://dbpedia.org/resource/Rockabilly\n", " \n", " \n", " 9\n", - " http://dbpedia.org/resource/Siouxsie_and_the_B...\n", - " http://dbpedia.org/resource/Tinderbox_(Siouxsi...\n", - " http://dbpedia.org/resource/New_wave_music\n", + " http://dbpedia.org/resource/Café_Tacuba\n", + " http://dbpedia.org/resource/Café_Tacuba_(album)\n", + " http://dbpedia.org/resource/Latin_rock\n", " \n", " \n", "\n", @@ -451,40 +452,40 @@ ], "text/plain": [ " band \\\n", - "0 http://dbpedia.org/resource/Asia_(band) \n", - "1 http://dbpedia.org/resource/Asia_(band) \n", - "2 http://dbpedia.org/resource/Asia_(band) \n", - "3 http://dbpedia.org/resource/Asia_(band) \n", - "4 http://dbpedia.org/resource/Bauhaus_(band) \n", - "5 http://dbpedia.org/resource/Bauhaus_(band) \n", - "6 http://dbpedia.org/resource/Catatonia_(band) \n", - "7 http://dbpedia.org/resource/Catatonia_(band) \n", - "8 http://dbpedia.org/resource/Siouxsie_and_the_B... \n", - "9 http://dbpedia.org/resource/Siouxsie_and_the_B... \n", + "0 http://dbpedia.org/resource/Cactus_(American_b... \n", + "1 http://dbpedia.org/resource/Cactus_(American_b... \n", + "2 http://dbpedia.org/resource/Cactus_(American_b... \n", + "3 http://dbpedia.org/resource/Cactus_(American_b... \n", + "4 http://dbpedia.org/resource/Cactus_(American_b... \n", + "5 http://dbpedia.org/resource/Cactus_(American_b... \n", + "6 http://dbpedia.org/resource/Cadet_(band) \n", + "7 http://dbpedia.org/resource/Cadet_(band) \n", + "8 http://dbpedia.org/resource/Cadet_(band) \n", + "9 http://dbpedia.org/resource/Café_Tacuba \n", "\n", " album \\\n", - "0 http://dbpedia.org/resource/Axioms_(album) \n", - "1 http://dbpedia.org/resource/Axioms_(album) \n", - "2 http://dbpedia.org/resource/Axioms_(album) \n", - "3 http://dbpedia.org/resource/Axioms_(album) \n", - "4 http://dbpedia.org/resource/Swing_the_Heartach... \n", - "5 http://dbpedia.org/resource/Swing_the_Heartach... \n", - "6 http://dbpedia.org/resource/Paper_Scissors_Sto... \n", - "7 http://dbpedia.org/resource/Paper_Scissors_Sto... \n", - "8 http://dbpedia.org/resource/Tinderbox_(Siouxsi... \n", - "9 http://dbpedia.org/resource/Tinderbox_(Siouxsi... \n", + "0 http://dbpedia.org/resource/Cactus_(Cactus_album) \n", + "1 http://dbpedia.org/resource/Cactus_(Cactus_album) \n", + "2 http://dbpedia.org/resource/Cactus_(Cactus_album) \n", + "3 http://dbpedia.org/resource/Cactus_V \n", + "4 http://dbpedia.org/resource/Cactus_V \n", + "5 http://dbpedia.org/resource/Cactus_V \n", + "6 http://dbpedia.org/resource/Cadet_(album) \n", + "7 http://dbpedia.org/resource/Cadet_(album) \n", + "8 http://dbpedia.org/resource/Cadet_(album) \n", + "9 http://dbpedia.org/resource/Café_Tacuba_(album) \n", "\n", - " genre \n", - "0 http://dbpedia.org/resource/Art_rock \n", - "1 http://dbpedia.org/resource/Progressive_rock \n", - "2 http://dbpedia.org/resource/Album-oriented_rock \n", - "3 http://dbpedia.org/resource/Arena_rock \n", - "4 http://dbpedia.org/resource/Gothic_rock \n", - "5 http://dbpedia.org/resource/Post-punk \n", - "6 http://dbpedia.org/resource/Alternative_rock \n", - "7 http://dbpedia.org/resource/Indie_rock \n", - "8 http://dbpedia.org/resource/Gothic_rock \n", - "9 http://dbpedia.org/resource/New_wave_music " + " genre \n", + "0 http://dbpedia.org/resource/Blues_rock \n", + "1 http://dbpedia.org/resource/Hard_rock \n", + "2 http://dbpedia.org/resource/Heavy_metal_music \n", + "3 http://dbpedia.org/resource/Blues_rock \n", + "4 http://dbpedia.org/resource/Hard_rock \n", + "5 http://dbpedia.org/resource/Heavy_metal_music \n", + "6 http://dbpedia.org/resource/Christian_alternat... \n", + "7 http://dbpedia.org/resource/Garage_rock \n", + "8 http://dbpedia.org/resource/Rockabilly \n", + "9 http://dbpedia.org/resource/Latin_rock " ] }, "execution_count": 10, @@ -496,11 +497,18 @@ "data_requests = pd.read_csv(StringIO(resp.text))\n", "data_requests.head(10)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -514,9 +522,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.18" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/docker-assets/entrypoint.sh b/docker-assets/entrypoint.sh index 2c9a82f..298e01a 100644 --- a/docker-assets/entrypoint.sh +++ b/docker-assets/entrypoint.sh @@ -22,9 +22,10 @@ case ${1} in case ${1} in app:start) cd ${GRLC_INSTALL_DIR} - # put github's access_token in place + # put github and gitlab access_tokens in place cp config.default.ini config.ini sed -i "s/xxx/${GRLC_GITHUB_ACCESS_TOKEN}/" config.ini + sed -i "s/yyy/${GRLC_GITLAB_ACCESS_TOKEN}/" config.ini # configure grlc server name sed -i "s/grlc.io/${GRLC_SERVER_NAME}/" config.ini # configure default sparql endpoint diff --git a/requirements-test.txt b/requirements-test.txt index 773c229..2206ccf 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,3 +1,3 @@ -mock==2.0.0 -pytest==5.2.1 -flake8==3.9.2 +mock==5.1.0 +pytest==7.4.4 +flake8==6.1.0 diff --git a/requirements.txt b/requirements.txt index f30b45f..0a90996 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,28 +1,11 @@ -docopt==0.6.2 -docutils==0.17.1 -Flask==1.0.2 -Flask-Cors==3.0.9 -gevent==1.4.0; python_version <= '3.8' -gevent==22.10.2; python_version > '3.8' -greenlet==0.4.15; python_version <= '3.8' -greenlet==2.0.0; python_version > '3.8' -html5lib==1.0.1 -isodate==0.6.1 -itsdangerous==2.0.1 -MarkupSafe==0.23 -pyaml==18.11.0 -pyparsing==2.0.7 -python-gitlab==2.10.1 -PyYAML==5.4 -rdflib==5.0.0 -rdflib-jsonld==0.6.2 +docopt-ng==0.9.0 +Flask==3.0.0 +Flask-Cors==4.0.0 +pyaml==23.12.0 +python-gitlab==4.3.0 +rdflib==7.0.0 requests==2.31.0 -six==1.12.0 -simplejson==3.16.0 -setuptools>=38.6.0 -SPARQLTransformer==2.1.1 -SPARQLWrapper==1.8.2 -werkzeug==0.16.0 -PyGithub==1.57 -gunicorn==19.6.0; sys_platform!="win32" +SPARQLTransformer==2.3.0 +PyGithub==2.1.1 +gunicorn==21.2.0; sys_platform!="win32" waitress>=1.4.2; sys_platform=="win32" diff --git a/setup.py b/setup.py index 1beb748..4c833e1 100644 --- a/setup.py +++ b/setup.py @@ -10,54 +10,54 @@ import os from setuptools import setup -grlc_base = 'src' -grlc_base_dir = os.path.join(grlc_base, '') +grlc_base = "src" +grlc_base_dir = os.path.join(grlc_base, "") grlc_data = [] -for root,dirs,files in os.walk(grlc_base): +for root, dirs, files in os.walk(grlc_base): if root != grlc_base: - root_dir = root.replace(grlc_base_dir, '') - data_files = os.path.join(root_dir, '*') + root_dir = root.replace(grlc_base_dir, "") + data_files = os.path.join(root_dir, "*") grlc_data.append(data_files) # To update the package version number, edit CITATION.cff -with open('CITATION.cff', 'r') as cff: +with open("CITATION.cff", "r") as cff: for line in cff: - if 'version:' in line: - version = line.replace('version:', '').strip().strip('"') + if "version:" in line: + version = line.replace("version:", "").strip().strip('"') -with codecs.open('requirements.txt', mode='r') as f: +with codecs.open("requirements.txt", mode="r") as f: install_requires = f.read().splitlines() -with codecs.open('requirements-test.txt', mode='r') as f: +with codecs.open("requirements-test.txt", mode="r") as f: tests_require = f.read().splitlines() -with codecs.open('README.md', mode='r', encoding='utf-8') as f: +with codecs.open("README.md", mode="r", encoding="utf-8") as f: long_description = f.read() setup( name="grlc", - description='grlc, the git repository linked data API constructor', + description="grlc, the git repository linked data API constructor", long_description=long_description, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", license="Copyright 2017 Albert Meroño", - author='Albert Meroño', - author_email='albert.merono@vu.nl', - url='https://github.com/CLARIAH/grlc', + author="Albert Meroño", + author_email="albert.merono@vu.nl", + url="https://github.com/CLARIAH/grlc", version=version, - py_modules=['grlc'], - packages=['grlc'], - package_dir = {'grlc': grlc_base}, - scripts=['bin/grlc-server'], + py_modules=["grlc"], + packages=["grlc"], + package_dir={"grlc": grlc_base}, + scripts=["bin/grlc-server"], install_requires=install_requires, setup_requires=[ # dependency for `python setup.py test` - 'pytest-runner', + "pytest-runner", # dependencies for `python setup.py build_sphinx` # 'sphinx', # 'recommonmark' ], tests_require=tests_require, - package_data = { 'grlc': grlc_data }, + package_data={"grlc": grlc_data}, include_package_data=True, - data_files=[('citation/grlc', ['CITATION.cff'])], + data_files=[("citation/grlc", ["CITATION.cff"])], ) diff --git a/src/__version__.py b/src/__version__.py index e141ecf..12a059e 100644 --- a/src/__version__.py +++ b/src/__version__.py @@ -7,7 +7,7 @@ import yaml # To update the package version number, edit CITATION.cff -citationfile = os.path.join(sys.exec_prefix, 'citation/grlc', 'CITATION.cff') -with open(citationfile, 'r') as f: +citationfile = os.path.join(sys.exec_prefix, "citation/grlc", "CITATION.cff") +with open(citationfile, "r") as f: data = yaml.safe_load(f) - __version__ = data['version'] + __version__ = data["version"] diff --git a/src/fileLoaders.py b/src/fileLoaders.py index a78e17a..366ab7c 100644 --- a/src/fileLoaders.py +++ b/src/fileLoaders.py @@ -10,14 +10,13 @@ import gitlab import requests import yaml -import urllib.parse import base64 -import os from os import path from glob import glob -from github import Github +from github import Github, Auth from github.GithubObject import NotSet from github.GithubException import BadCredentialsException +from gitlab.exceptions import GitlabAuthenticationError from configparser import ConfigParser from urllib.parse import urljoin @@ -27,6 +26,7 @@ class BaseLoader: """Base class for File Loaders""" + def getTextForName(self, query_name): """Return the query text and query type for the given query name. Note that file extention is not part of the query name. For example, @@ -34,28 +34,27 @@ def getTextForName(self, query_name): from the loader's source (assuming such file exists).""" # The URIs of all candidates candidateNames = [ - query_name + '.rq', - query_name + '.sparql', - query_name + '.tpf', - query_name + '.json' - ] - candidates = [ - (name, guessQueryType(name)) for name in candidateNames + query_name + ".rq", + query_name + ".sparql", + query_name + ".tpf", + query_name + ".json", ] + candidates = [(name, guessQueryType(name)) for name in candidateNames] for queryFullName, queryType in candidates: queryText = self._getText(queryFullName) if queryText: - if (queryType == qType['JSON']): + if queryType == qType["JSON"]: queryText = json.loads(queryText) - if 'proto' not in queryText and '@graph' not in queryText: + if "proto" not in queryText and "@graph" not in queryText: continue return queryText, queryType # No query found... - return '', None + return "", None def _getText(self, queryFullName): - """To be implemented by sub-classes""" + """To be implemented by sub-classes. + Returns None if the file does not exist.""" raise NotImplementedError("Subclasses must override _getText()!") def fetchFiles(self): @@ -81,52 +80,57 @@ def __init__(self, user, repo, subdir=None, sha=None, prov=None): self.subdir = (subdir + "/") if subdir else "" self.sha = sha if sha else NotSet self.prov = prov - gh = Github(static.ACCESS_TOKEN) + gh = Github(auth=Auth.Token(static.GITHUB_ACCESS_TOKEN)) try: - self.gh_repo = gh.get_repo(user + '/' + repo, lazy=False) + self.gh_repo = gh.get_repo(user + "/" + repo, lazy=False) except BadCredentialsException: - raise Exception('BadCredentials: have you set up github_access_token on config.ini ?') + raise Exception( + "BadCredentials: have you set up github_access_token on config.ini ?" + ) except Exception: - raise Exception('Repo not found: ' + user + '/' + repo) + raise Exception("Repo not found: " + user + "/" + repo) def fetchFiles(self): """Returns a list of file items contained on the github repo.""" - contents = self.gh_repo.get_contents(self.subdir.strip('/'), ref=self.sha) + contents = self.gh_repo.get_contents(self.subdir.strip("/"), ref=self.sha) files = [] for content_file in contents: - if content_file.type == 'file': - files.append({ - 'download_url': content_file.download_url, - 'name': content_file.name, - 'decoded_content': content_file.decoded_content - }) + if content_file.type == "file": + files.append( + { + "download_url": content_file.download_url, + "name": content_file.name, + "decoded_content": content_file.decoded_content, + } + ) return files def getRawRepoUri(self): """Returns the root url of the github repo.""" # TODO: replace by gh_repo.html_url ? - raw_repo_uri = static.GITHUB_RAW_BASE_URL + self.user + '/' + self.repo + raw_repo_uri = static.GITHUB_RAW_BASE_URL + self.user + "/" + self.repo if self.sha is NotSet: - raw_repo_uri += '/master/' + raw_repo_uri += "/master/" else: - raw_repo_uri += '/{}/'.format(self.sha) + raw_repo_uri += "/{}/".format(self.sha) return raw_repo_uri def getTextFor(self, fileItem): """Returns the contents of the given file item on the github repo.""" - raw_query_uri = fileItem['download_url'] + raw_query_uri = fileItem["download_url"] # Add query URI as used entity by the logged activity if self.prov is not None: self.prov.add_used_entity(raw_query_uri) - return str(fileItem['decoded_content'], 'utf-8') + return str(fileItem["decoded_content"], "utf-8") def _getText(self, query_name): - """Return the content of the specified file contained in the github repo.""" + """Return the content of the specified file contained in the github repo. + Returns None if the file does not exist.""" try: c = self.gh_repo.get_contents(self.subdir + query_name) - return str(c.decoded_content, 'utf-8') - except: + return str(c.decoded_content, "utf-8") + except Exception: return None def getRepoTitle(self): @@ -155,13 +159,13 @@ def getRepoURI(self): def getEndpointText(self): """Return content of endpoint file (endpoint.txt)""" - return self._getText('endpoint.txt') + return self._getText("endpoint.txt") def getLicenceURL(self): """Returns the URL of the license file in this repository if one exists.""" for f in self.fetchFiles(): - if f['name'].lower() == 'license' or f['name'].lower() == 'licence': - return f['download_url'] + if f["name"].lower() == "license" or f["name"].lower() == "licence": + return f["download_url"] return None def getRepoDescription(self): @@ -170,8 +174,7 @@ def getRepoDescription(self): class GitlabLoader(BaseLoader): - - def __init__(self, user, repo, subdir=None, sha=None, prov=None, branch='main'): + def __init__(self, user, repo, subdir=None, sha=None, prov=None, branch=None): """Create a new GithubLoader. # TODO: Update to GITLAB ! @@ -189,69 +192,82 @@ def __init__(self, user, repo, subdir=None, sha=None, prov=None, branch='main'): self.sha = sha if sha else None self.prov = prov gl = gitlab.Gitlab( - url=static.GITLAB_URL, - private_token=static.ACCESS_TOKEN + url=static.GITLAB_URL, private_token=static.GITLAB_ACCESS_TOKEN ) try: - self.gl_repo = gl.projects.get(user + '/' + repo) - except BadCredentialsException: - raise Exception('BadCredentials: have you set up github_access_token on config.ini ?') + self.gl_repo = gl.projects.get(user + "/" + repo) + if not self.branch: # Use default branch if not specified + self.branch = self.gl_repo.default_branch + except GitlabAuthenticationError: + raise Exception( + "GitlabAuthenticationError: have you set up gitlab_access_token on config.ini ?" + ) except Exception: - raise Exception('Repo not found: ' + user + '/' + repo) + raise Exception("Repo not found: " + user + "/" + repo) def fetchFiles(self): """Returns a list of file items contained on the github repo.""" - gitlab_files = self.gl_repo.repository_tree(path=self.subdir.strip('/'), ref=self.branch, all=True) + gitlab_files = self.gl_repo.repository_tree( + path=self.subdir.strip("/"), ref=self.branch, all=True + ) files = [] - for gitlab_file in gitlab_files: - if gitlab_file['type'] == 'blob': - name = gitlab_file['name'] - files.append({ - 'download_url': path.join(self.getRawRepoUri(), self.subdir, name), - 'name': name, - 'decoded_content': str.encode(self._getText(gitlab_file['name'])) - }) + for gitlab_file in gitlab_files: + if gitlab_file["type"] == "blob": + name = gitlab_file["name"] + files.append( + { + "download_url": path.join( + self.getRawRepoUri(), self.subdir, name + ), + "name": name, + "decoded_content": str.encode( + self._getText(gitlab_file["name"]) + ), + } + ) return files def getRawRepoUri(self): - """Returns the root url of the github repo.""" - # TODO: replace by gh_repo.html_url ? - return path.join(static.GITLAB_URL, self.user, self.repo, '-', 'raw', self.branch) + """Returns the root url of the gitlab repo.""" + return path.join( + static.GITLAB_URL, self.user, self.repo, "-", "raw", self.branch + ) def getTextFor(self, fileItem): - """Returns the contents of the given file item on the github repo.""" - raw_query_uri = fileItem['download_url'] + """Returns the contents of the given file item on the gitlab repo.""" + raw_query_uri = fileItem["download_url"] # Add query URI as used entity by the logged activity if self.prov is not None: self.prov.add_used_entity(raw_query_uri) - return str(fileItem['decoded_content'], 'utf-8') + return str(fileItem["decoded_content"], "utf-8") def _getText(self, query_name): - """Return the content of the specified file contained in the github repo.""" + """Return the content of the specified file contained in the gitlab repo. + Returns None if the file does not exist.""" try: file_path = path.join(self.subdir, query_name) f = self.gl_repo.files.get(file_path=file_path, ref=self.branch) file_content = base64.b64decode(f.content).decode("utf-8") - return file_content.replace('\\n', '\n').replace('\\t', '\t') - except: + return file_content.replace("\\n", "\n").replace("\\t", "\t") + except Exception: return None - + def getRepoTitle(self): - """Return the title of the github repo.""" + """Return the title of the gitlab repo.""" return self.gl_repo.name def getContactName(self): """Return the name of the owner of the gitlab repo.""" - return self.gl_repo.namespace['name'] + return self.gl_repo.namespace["name"] def getContactUrl(self): """Return the home page of the owner of the gitlab repo.""" - return self.gl_repo.namespace['web_url'] + return self.gl_repo.namespace["web_url"] def getCommitList(self): """Return a list of commits on the gitlab repo.""" - return [ c.id for c in self.gl_repo.commits.list() ] + return [c.id for c in self.gl_repo.commits.list()] def getFullName(self): """Return the full name of the gitlab repo (user/repo).""" @@ -263,13 +279,13 @@ def getRepoURI(self): def getEndpointText(self): """Return content of endpoint file (endpoint.txt)""" - return self._getText('endpoint.txt') + return self._getText("endpoint.txt") def getLicenceURL(self): """Returns the URL of the license file in this repository if one exists.""" for f in self.fetchFiles(): - if f['name'].lower() == 'license' or f['name'].lower() == 'licence': - return f['download_url'] + if f["name"].lower() == "license" or f["name"].lower() == "licence": + return f["download_url"] return None def getRepoDescription(self): @@ -277,7 +293,6 @@ def getRepoDescription(self): return self.gl_repo.description - class LocalLoader(BaseLoader): """Local file system file loader. Retrieves information to construct a grlc specification from a local folder.""" @@ -290,51 +305,49 @@ def __init__(self, baseDir=static.LOCAL_SPARQL_DIR): self.baseDir = baseDir config_fallbacks = { - 'repo_title': 'local', - 'api_description': 'API generated from local files', - 'contact_name': '', - 'contact_url': '', - 'licence_url': '' + "repo_title": "local", + "api_description": "API generated from local files", + "contact_name": "", + "contact_url": "", + "licence_url": "", } config = ConfigParser(config_fallbacks) - config.add_section('repo_info') - config_filename = path.join(baseDir, 'local-api-config.ini') + config.add_section("repo_info") + config_filename = path.join(baseDir, "local-api-config.ini") config.read(config_filename) - self.repo_title = config.get('repo_info', 'repo_title') - self.api_description = config.get('repo_info', 'api_description') - self.contact_name = config.get('repo_info', 'contact_name') - self.contact_url = config.get('repo_info', 'contact_url') - self.licence_url = config.get('repo_info', 'licence_url') + self.repo_title = config.get("repo_info", "repo_title") + self.api_description = config.get("repo_info", "api_description") + self.contact_name = config.get("repo_info", "contact_name") + self.contact_url = config.get("repo_info", "contact_url") + self.licence_url = config.get("repo_info", "licence_url") def fetchFiles(self): """Returns a list of file items contained on the local repo.""" - files = glob(path.join(self.baseDir, '*')) + files = glob(path.join(self.baseDir, "*")) filesDef = [] - baseDirSlash = path.join(self.baseDir, '') + baseDirSlash = path.join(self.baseDir, "") for f in files: - relative = f.replace(baseDirSlash, '') - filesDef.append({ - 'download_url': relative, - 'name': relative - }) + relative = f.replace(baseDirSlash, "") + filesDef.append({"download_url": relative, "name": relative}) return filesDef def getRawRepoUri(self): """Returns the root url of the local repo.""" # Maybe return something like 'file:///path/to/local/queries' ? - return '' + return "" def getTextFor(self, fileItem): """Returns the contents of the given file item on the local repo.""" - return self._getText(fileItem['download_url']) + return self._getText(fileItem["download_url"]) def _getText(self, filename): - """Return the content of the specified file contained in the local repo.""" + """Return the content of the specified file contained in the local repo. + Returns None if the file does not exist.""" targetFile = path.join(self.baseDir, filename) if path.exists(targetFile): - with open(targetFile, 'r') as f: + with open(targetFile, "r") as f: lines = f.readlines() - text = ''.join(lines) + text = "".join(lines) return text else: return None @@ -353,19 +366,19 @@ def getContactUrl(self): def getCommitList(self): """Return a list of commits (always a single commit) on the local repo.""" - return ['local'] + return ["local"] def getFullName(self): """Return the user/repo equivalent for the local repo.""" - return 'local/' + return "local/" def getRepoURI(self): """Return the full URI of the local repo.""" - return 'local-file-system' + return "local-file-system" def getEndpointText(self): """Return content of endpoint file (endpoint.txt)""" - return self._getText('endpoint.txt') + return self._getText("endpoint.txt") def getLicenceURL(self): return self.licence_url @@ -384,96 +397,96 @@ def __init__(self, spec_url): Keyword arguments: spec_url -- URL where the specification YAML file is located.""" - headers = {'Accept' : 'text/yaml'} + headers = {"Accept": "text/yaml"} resp = requests.get(spec_url, headers=headers) if resp.status_code == 200: - self.spec = yaml.load(resp.text) - self.spec['url'] = spec_url - self.spec['files'] = {} + self.spec = yaml.safe_load(resp.text) + self.spec["url"] = spec_url + self.spec["files"] = {} - for query in self.spec['queries']: + for query in self.spec["queries"]: queryName, queryUrl = self.extractQueryInfo(query) - item = { - 'name': queryName, - 'download_url': queryUrl - } - self.spec['files'][queryName] = item - del self.spec['queries'] + item = {"name": queryName, "download_url": queryUrl} + self.spec["files"][queryName] = item + del self.spec["queries"] else: raise Exception(resp.text) def extractQueryInfo(self, query): - """Extract query name and URL from specification. These could - either be explicitly declared (values in a dict) or need to be - infered from the URL (which itself could be explicilty declared or + """Extract query name and URL from specification. These could + either be explicitly declared (values in a dict) or need to be + infered from the URL (which itself could be explicilty declared or be the only element of query.""" - queryUrl = query['url'] if type(query) is dict else query + queryUrl = query["url"] if type(query) is dict else query - if type(query) is dict and 'name' in query: - queryName = query['name'] + if type(query) is dict and "name" in query: + queryName = query["name"] else: queryNameExt = path.basename(queryUrl) - queryName = path.splitext(queryNameExt)[0] # Remove extention + queryName = path.splitext(queryNameExt)[0] # Remove extention return queryName, queryUrl def fetchFiles(self): """Returns a list of file items contained on specification.""" - files = [ - v for k,v in self.spec['files'].items() - ] + files = [v for k, v in self.spec["files"].items()] return files def getRawRepoUri(self): """Returns the root url of the specification.""" - return self.spec['url'] + return self.spec["url"] def getTextFor(self, fileItem): """Returns the contents of the given file item on the specification.""" - # TODO: tiene sentido esto? O es un hack horrible ? - nameExt = path.basename(fileItem['download_url']) - return self._getText(fileItem['name']) + return self._getText(fileItem["name"]) def getTextForName(self, query_name): """Return the query text and query type for the given query name. Specific implementation for URLLoader.""" try: queryText = self._getText(query_name) - queryType = guessQueryType(self.spec['files'][query_name]['download_url']) + queryType = guessQueryType(self.spec["files"][query_name]["download_url"]) return queryText, queryType - except Exception as e: + except Exception: # No query found... - return '', None + return "", None def _getText(self, itemName): - """Return the content of the specified item in the specification.""" - if itemName in self.spec['files']: - headers = {'Accept' : 'text/plain'} - itemUrl = self.spec['files'][itemName]['download_url'] - itemUrl = urljoin(self.spec['url'], itemUrl) # Join with base URL if relative URL + """Return the content of the specified item in the specification. + Returns None if the file does not exist.""" + if itemName in self.spec["files"]: + headers = {"Accept": "text/plain"} + itemUrl = self.spec["files"][itemName]["download_url"] + itemUrl = urljoin( + self.spec["url"], itemUrl + ) # Join with base URL if relative URL resp = requests.get(itemUrl, headers=headers) if resp.status_code == 200: return resp.text else: - raise Exception('HTTP status {} encountered while loading {}'.format(resp.status_code, itemUrl)) + raise Exception( + "HTTP status {} encountered while loading {}".format( + resp.status_code, itemUrl + ) + ) else: return None def getRepoTitle(self): """Return the title contained on the specification.""" - return self.spec['title'] + return self.spec["title"] def getContactName(self): """Return the name of the contact person for the specification.""" - return self.spec['contact']['name'] if self.spec['contact']['name'] else '' + return self.spec["contact"]["name"] if self.spec["contact"]["name"] else "" def getContactUrl(self): """Return the home page defined in the specification.""" - return self.spec['contact']['url'] if self.spec['contact']['url'] else '' + return self.spec["contact"]["url"] if self.spec["contact"]["url"] else "" def getCommitList(self): """Return a list of commits (always a single commit) for the specification.""" - return ['param'] + return ["param"] def getFullName(self): """Return the user/repo equivalent for the specification.""" @@ -485,16 +498,15 @@ def getRepoURI(self): def getLicenceURL(self): """Returns the URL of the license file in the specification.""" - return self.spec['licence'] if 'licence' in self.spec else None + return self.spec["licence"] if "licence" in self.spec else None def getEndpointText(self): """Return content of endpoint file (endpoint.txt)""" - return "" #TODO: add endpoint to spec file definition + return "" # TODO: add endpoint to spec file definition def getRepoDescription(self): """Return the description of the repository""" - if 'description' in self.spec: - return self.spec['description'] + if "description" in self.spec: + return self.spec["description"] else: - return 'API definition loaded from ' + self.getRawRepoUri() - + return "API definition loaded from " + self.getRawRepoUri() diff --git a/src/glogging.py b/src/glogging.py index 48a537d..2e8a743 100644 --- a/src/glogging.py +++ b/src/glogging.py @@ -6,6 +6,7 @@ import grlc.static as static + def getGrlcLogger(name): """Construct a logger for grlc with the logging level specified on `config.ini`.""" glogger = logging.getLogger(name) diff --git a/src/gquery.py b/src/gquery.py index 07ea67b..e3b9d5f 100644 --- a/src/gquery.py +++ b/src/gquery.py @@ -16,16 +16,17 @@ import traceback import re import requests +import SPARQLTransformer # grlc modules import grlc.static as static import grlc.glogging as glogging + glogger = glogging.getGrlcLogger(__name__) -XSD_PREFIX = 'PREFIX xsd: ' +XSD_PREFIX = "PREFIX xsd: " -import SPARQLTransformer def guess_endpoint_uri(rq, loader): """ @@ -36,18 +37,18 @@ def guess_endpoint_uri(rq, loader): Otherwise assigns a default one """ auth = (static.DEFAULT_ENDPOINT_USER, static.DEFAULT_ENDPOINT_PASSWORD) - if auth == ('none', 'none'): + if auth == ("none", "none"): auth = None if has_request_context() and "endpoint" in request.args: - endpoint = request.args['endpoint'] + endpoint = request.args["endpoint"] glogger.debug("Endpoint provided in request: " + endpoint) return endpoint, auth # Decorator try: decorators = get_yaml_decorators(rq) - endpoint = decorators['endpoint'] + endpoint = decorators["endpoint"] auth = None glogger.debug("Decorator guessed endpoint: " + endpoint) except (TypeError, KeyError): @@ -58,11 +59,11 @@ def guess_endpoint_uri(rq, loader): auth = None glogger.debug("File guessed endpoint: " + endpoint) # TODO: except all is really ugly - except: + except Exception: # Default endpoint = static.DEFAULT_ENDPOINT auth = (static.DEFAULT_ENDPOINT_USER, static.DEFAULT_ENDPOINT_PASSWORD) - if auth == ('none', 'none'): + if auth == ("none", "none"): auth = None glogger.info("No endpoint specified, using default ({})".format(endpoint)) @@ -99,88 +100,69 @@ def count_query_results(query, endpoint): def _getDictWithKey(key, dict_list): - """ Returns the first dictionary in dict_list which contains the given key""" + """Returns the first dictionary in dict_list which contains the given key""" for d in dict_list: if key in d: return d return None -def get_parameters(rq, variables, endpoint, query_metadata, auth=None): +def get_parameters(query, endpoint, query_metadata, auth=None): """ - ?_name The variable specifies the API mandatory parameter name. The value is incorporated in the query as plain literal. - ?__name The parameter name is optional. - ?_name_iri The variable is substituted with the parameter value as a IRI (also: number or literal). - ?_name_en The parameter value is considered as literal with the language 'en' (e.g., en,it,es, etc.). - ?_name_integer The parameter value is considered as literal and the XSD datatype 'integer' is added during substitution. - ?_name_prefix_datatype The parameter value is considered as literal and the datatype 'prefix:datatype' is added during substitution. The prefix must be specified according to the SPARQL syntax. + ?_name The variable specifies the API mandatory parameter name. The value is incorporated in the query as plain literal. + ?__name The parameter name is optional. + ?_name_iri The variable is substituted with the parameter value as a IRI (also: number or literal). + ?_name_en The parameter value is considered as literal with the language 'en' (e.g., en,it,es, etc.). + ?_name_integer The parameter value is considered as literal and the XSD datatype 'integer' is added during substitution. + ?_name_prefix_datatype The parameter value is considered as literal and the datatype 'prefix:datatype' is added during + substitution. The prefix must be specified according to the SPARQL syntax. """ - - # variables = translateQuery(Query.parseString(rq, parseAll=True)).algebra['_vars'] - - ## Aggregates - internal_matcher = re.compile("__agg_\d+__") - ## Basil-style variables - variable_matcher = re.compile( - "(?P[_]{1,2})(?P[^_]+)_?(?P[a-zA-Z0-9]+)?_?(?P[a-zA-Z0-9]+)?.*$") + # Basil-style variables + re1 = r"\?" # Start with a '?' + re2 = "(?P[_]{1,2})" # ...followed by one (for required vars) or two (for optional vars) '_' + re3 = "(?P[a-zA-Z0-9]+)" # ...then the name of the var + re4 = "([_](?P(iri)|(number)|(literal)|(integer)))?" # ...optionally with a type (iri, number, literal, integer) + re5 = "([_](?P[a-zA-Z0-9]+)[_](?P[a-zA-Z0-9]+))?" # ... OR a user defined type, with a prefix + re6 = "([_](?P[a-zA-Z0-9]+))?" # ...OR a language + variable_matcher = re.compile(re1 + re2 + re3 + re4 + re5 + re6) parameters = {} - for v in variables: - if internal_matcher.match(v): - continue - - match = variable_matcher.match(v) - # TODO: currently only one parameter per triple pattern is supported - if match: - vname = match.group('name') - vrequired = True if match.group('required') == '_' else False - vtype = 'string' - # All these can be None - vcodes = get_enumeration(rq, vname, endpoint, query_metadata, auth) - vdefault = get_defaults(rq, vname, query_metadata) - vlang = None - vdatatype = None - vformat = None - - mtype = match.group('type') - muserdefined = match.group('userdefined') - - if mtype in ['number', 'literal', 'string']: - vtype = mtype - elif mtype in ['iri']: # TODO: proper form validation of input parameter uris - vtype = 'string' - vformat = 'iri' - elif mtype: - vtype = 'string' - - if mtype in static.XSD_DATATYPES: - vdatatype = 'xsd:{}'.format(mtype) - elif len(mtype) == 2: - vlang = mtype - elif muserdefined: - vdatatype = '{}:{}'.format(mtype, muserdefined) - - parameters[vname] = { - 'original': '?{}'.format(v), - 'required': vrequired, - 'name': vname, - 'type': vtype - } - - # Possibly None parameter attributes - if vcodes is not None: - parameters[vname]['enum'] = sorted(vcodes) - if vlang is not None: - parameters[vname]['lang'] = vlang - if vdatatype is not None: - parameters[vname]['datatype'] = vdatatype - if vformat is not None: - parameters[vname]['format'] = vformat - if vdefault is not None: - parameters[vname]['default'] = vdefault - - glogger.debug('Finished parsing the following parameters: {}'.format(parameters)) - + for match in variable_matcher.finditer(query): + p = {} + vname = match.group("name") + + p["original"] = match.group(0) + p["required"] = len(match.group("required")) == 1 + p["name"] = vname + + mtype = match.group("type") + if mtype in ["number", "literal", "string"]: + p["type"] = mtype + elif mtype in ["iri"]: + p["type"] = "string" + p["format"] = "iri" + else: + p["type"] = "string" + if mtype in static.XSD_DATATYPES: + p["datatype"] = "xsd:{}".format(mtype) + elif match.group("prefix") and match.group("userdefined"): + p["datatype"] = "{}:{}".format( + match.group("prefix"), match.group("userdefined") + ) + + vcodes = get_enumeration(query, vname, endpoint, query_metadata, auth) + if vcodes is not None: + p["enum"] = sorted(vcodes) + vdefault = get_defaults(query, vname, query_metadata) + if vdefault is not None: + p["default"] = vdefault + + if match.group("lang") is not None: + p["lang"] = match.group("lang") + + parameters[vname] = p + + glogger.debug("Finished parsing the following parameters: {}".format(parameters)) return parameters @@ -189,9 +171,9 @@ def get_defaults(rq, v, metadata): Returns the default value for a parameter or None """ glogger.debug("Metadata with defaults: {}".format(metadata)) - if 'defaults' not in metadata: + if "defaults" not in metadata: return None - defaultsDict = _getDictWithKey(v, metadata['defaults']) + defaultsDict = _getDictWithKey(v, metadata["defaults"]) if defaultsDict: return defaultsDict[v] return None @@ -203,12 +185,12 @@ def get_enumeration(rq, v, endpoint, metadata={}, auth=None): """ # glogger.debug("Metadata before processing enums: {}".format(metadata)) # We only fire the enum filling queries if indicated by the query metadata - if 'enumerate' not in metadata: + if "enumerate" not in metadata: return None - enumDict = _getDictWithKey(v, metadata['enumerate']) + enumDict = _getDictWithKey(v, metadata["enumerate"]) if enumDict: return enumDict[v] - if v in metadata['enumerate']: + if v in metadata["enumerate"]: return get_enumeration_sparql(rq, v, endpoint, auth) return None @@ -217,37 +199,55 @@ def get_enumeration_sparql(rq, v, endpoint, auth=None): """ Returns a list of enumerated values for variable 'v' in query 'rq' """ - glogger.debug('Retrieving enumeration for variable {}'.format(v)) + glogger.debug("Retrieving enumeration for variable {}".format(v)) vcodes = [] - # tpattern_matcher = re.compile(".*(FROM\s+)?(?P.*)\s+WHERE.*[\.\{][\n\t\s]*(?P.*\?" + re.escape(v) + ".*?\.).*", flags=re.DOTALL) - # tpattern_matcher = re.compile(".*?((FROM\s*)(?P(\<.*\>)+))?\s*WHERE\s*\{(?P.*)\}.*", flags=re.DOTALL) # WHERE is optional too!! - tpattern_matcher = re.compile(".*?(FROM\s*(?P\<.*\>+))?\s*(WHERE\s*)?\{(?P.*)\}.*", - flags=re.DOTALL) + tpattern_matcher = re.compile( + r".*?(FROM\s*(?P\<.*\>+))?\s*(WHERE\s*)?\{(?P.*)\}.*", + flags=re.DOTALL, + ) glogger.debug(rq) tp_match = tpattern_matcher.match(rq) if tp_match: - vtpattern = tp_match.group('tpattern') - gnames = tp_match.group('gnames') + vtpattern = tp_match.group("tpattern") + gnames = tp_match.group("gnames") glogger.debug("Detected graph names: {}".format(gnames)) glogger.debug("Detected BGP: {}".format(vtpattern)) glogger.debug("Matched triple pattern with parameter") if gnames: - codes_subquery = re.sub("SELECT.*\{.*\}.*", - "SELECT DISTINCT ?" + v + " FROM " + gnames + " WHERE { " + vtpattern + " }", rq, - flags=re.DOTALL) + codes_subquery = re.sub( + r"SELECT.*\{.*\}.*", + r"SELECT DISTINCT ?" + + v + + r" FROM " + + gnames + + r" WHERE { " + + vtpattern + + r" }", + rq, + flags=re.DOTALL, + ) else: - codes_subquery = re.sub("SELECT.*\{.*\}.*", - "SELECT DISTINCT ?" + v + " WHERE { " + vtpattern + " }", rq, - flags=re.DOTALL) + codes_subquery = re.sub( + r"SELECT.*\{.*\}.*", + r"SELECT DISTINCT ?" + v + r" WHERE { " + vtpattern + " }", + rq, + flags=re.DOTALL, + ) glogger.debug("Codes subquery: {}".format(codes_subquery)) glogger.debug(endpoint) - codes_json = requests.get(endpoint, params={'query': codes_subquery}, - headers={'Accept': static.mimetypes['json'], - 'Authorization': 'token {}'.format(static.ACCESS_TOKEN)}, auth=auth).json() - for code in codes_json['results']['bindings']: + codes_json = requests.get( + endpoint, + params={"query": codes_subquery}, + headers={ + "Accept": static.mimetypes["json"], + "Authorization": "token {}".format(static.SPARQL_ACCESS_TOKEN), + }, + auth=auth, + ).json() + for code in codes_json["results"]["bindings"]: vcodes.append(list(code.values())[0]["value"]) else: glogger.debug("No match between variable name and query.") @@ -263,32 +263,36 @@ def get_yaml_decorators(rq): if not rq: return None - yaml_string = '' - query_string = '' + yaml_string = "" + query_string = "" if isinstance(rq, dict): # json query (sparql transformer) - if 'grlc' in rq: - yaml_string = rq['grlc'] + if "grlc" in rq: + yaml_string = rq["grlc"] query_string = rq + query_metadata = yaml_string + else: # classic query - yaml_string = "\n".join([row.lstrip('#+') for row in rq.split('\n') if row.startswith('#+')]) - query_string = "\n".join([row for row in rq.split('\n') if not row.startswith('#+')]) + yaml_string = "\n".join( + [row.lstrip("#+") for row in rq.split("\n") if row.startswith("#+")] + ) + query_string = "\n".join( + [row for row in rq.split("\n") if not row.startswith("#+")] + ) - query_metadata = None - if type(yaml_string) == dict: - query_metadata = yaml_string - elif type(yaml_string) == str: try: # Invalid YAMLs will produce empty metadata query_metadata = yaml.safe_load(yaml_string) - except (yaml.parser.ParserError, yaml.scanner.ScannerError) as e: + except (yaml.parser.ParserError, yaml.scanner.ScannerError): try: query_metadata = json.loads(yaml_string) except json.JSONDecodeError: - glogger.warning("Query decorators could not be parsed; check your YAML syntax") + glogger.warning( + "Query decorators could not be parsed; check your YAML syntax" + ) # If there is no YAML string if query_metadata is None: query_metadata = {} - query_metadata['query'] = query_string + query_metadata["query"] = query_string # glogger.debug("Parsed query decorators: {}".format(query_metadata)) @@ -297,8 +301,12 @@ def get_yaml_decorators(rq): def enable_custom_function_prefix(rq, prefix): """Add SPARQL prefixe header if the prefix is used in the given query.""" - if ' %s:' % prefix in rq or '(%s:' % prefix in rq and not 'PREFIX %s:' % prefix in rq: - rq = 'PREFIX %s: <:%s>\n' % (prefix, prefix) + rq + if ( + " %s:" % prefix in rq + or "(%s:" % prefix in rq + and not "PREFIX %s:" % prefix in rq + ): + rq = "PREFIX %s: <:%s>\n" % (prefix, prefix) + rq return rq @@ -308,38 +316,43 @@ def get_metadata(rq, endpoint): 'exp' is one of: 'endpoint', 'tags', 'summary', 'request', 'pagination', 'enumerate' """ query_metadata = get_yaml_decorators(rq) - query_metadata['type'] = 'UNKNOWN' - query_metadata['original_query'] = rq + query_metadata["type"] = "UNKNOWN" + query_metadata["original_query"] = rq if isinstance(rq, dict): # json query (sparql transformer) rq, proto, opt = SPARQLTransformer.pre_process(rq) rq = rq.strip() - query_metadata['proto'] = proto - query_metadata['opt'] = opt - query_metadata['query'] = rq + query_metadata["proto"] = proto + query_metadata["opt"] = opt + query_metadata["query"] = rq - rq = enable_custom_function_prefix(rq, 'bif') - rq = enable_custom_function_prefix(rq, 'sql') + rq = enable_custom_function_prefix(rq, "bif") + rq = enable_custom_function_prefix(rq, "sql") try: # THE PARSING # select, describe, construct, ask parsed_query = translateQuery(Query.parseString(rq, parseAll=True)) - query_metadata['type'] = parsed_query.algebra.name - if query_metadata['type'] == 'SelectQuery': + query_metadata["type"] = parsed_query.algebra.name + if query_metadata["type"] == "SelectQuery": # Projection variables - query_metadata['variables'] = parsed_query.algebra['PV'] + query_metadata["variables"] = parsed_query.algebra["PV"] # Parameters - query_metadata['parameters'] = get_parameters(rq, parsed_query.algebra['_vars'], endpoint, query_metadata) - elif query_metadata['type'] == 'ConstructQuery': + query_metadata["parameters"] = get_parameters(rq, endpoint, query_metadata) + elif query_metadata["type"] == "ConstructQuery": # Parameters - query_metadata['parameters'] = get_parameters(rq, parsed_query.algebra['_vars'], endpoint, query_metadata) + query_metadata["parameters"] = get_parameters(rq, endpoint, query_metadata) else: glogger.warning( - "Query type {} is currently unsupported and no metadata was parsed!".format(query_metadata['type'])) + "Query type {} is currently unsupported and no metadata was parsed!".format( + query_metadata["type"] + ) + ) except ParseException as pe: glogger.warning(pe) - glogger.warning("Could not parse regular SELECT, CONSTRUCT, DESCRIBE or ASK query") + glogger.warning( + "Could not parse regular SELECT, CONSTRUCT, DESCRIBE or ASK query" + ) # glogger.warning(traceback.print_exc()) # insert queries won't parse, so we regex @@ -353,24 +366,39 @@ def get_metadata(rq, endpoint): glogger.debug("Trying to parse UPDATE query") parsed_query = UpdateUnit.parseString(rq, parseAll=True) glogger.debug(parsed_query) - query_metadata['type'] = parsed_query[0]['request'][0].name - if query_metadata['type'] == 'InsertData': - query_metadata['parameters'] = { - 'g': {'datatype': None, 'enum': [], 'lang': None, 'name': 'g', 'original': '?_g_iri', - 'required': True, 'type': 'iri'}, - 'data': {'datatype': None, 'enum': [], 'lang': None, 'name': 'data', 'original': '?_data', - 'required': True, 'type': 'literal'}} - - glogger.debug("Update query parsed with {}".format(query_metadata['type'])) + query_metadata["type"] = parsed_query[0]["request"][0].name + if query_metadata["type"] == "InsertData": + query_metadata["parameters"] = { + "g": { + "datatype": None, + "enum": [], + "lang": None, + "name": "g", + "original": "?_g_iri", + "required": True, + "type": "iri", + }, + "data": { + "datatype": None, + "enum": [], + "lang": None, + "name": "data", + "original": "?_data", + "required": True, + "type": "literal", + }, + } + + glogger.debug("Update query parsed with {}".format(query_metadata["type"])) # if query_metadata['type'] == 'InsertData': # query_metadata['variables'] = parsed_query.algebra['PV'] except Exception as e: glogger.error("Could not parse query") - glogger.error(query_metadata['query']) + glogger.error(query_metadata["query"]) glogger.error(traceback.print_exc()) - raise Exception('could not parse query: {}'.format(str(e))) + raise Exception("could not parse query: {}".format(str(e))) - glogger.debug("Finished parsing query of type {}".format(query_metadata['type'])) + glogger.debug("Finished parsing query of type {}".format(query_metadata["type"])) glogger.debug("All parsed query metadata (from decorators and content): ") glogger.debug(pformat(query_metadata, indent=32)) @@ -380,18 +408,23 @@ def get_metadata(rq, endpoint): def paginate_query(query, results_per_page, get_args): """Modify the given query so that it can be paginated. The paginated query will split display a maximum of `results_per_page`.""" - page = get_args.get('page', 1) + page = get_args.get("page", 1) - glogger.debug("Paginating query for page {}, {} results per page".format(page, results_per_page)) + glogger.debug( + "Paginating query for page {}, {} results per page".format( + page, results_per_page + ) + ) # If contains LIMIT or OFFSET, remove them glogger.debug("Original query: " + query) - no_limit_query = re.sub("((LIMIT|OFFSET)\s+[0-9]+)*", "", query) + no_limit_query = re.sub(r"((LIMIT|OFFSET)\s+[0-9]+)*", "", query) glogger.debug("No limit query: " + no_limit_query) # Append LIMIT results_per_page OFFSET (page-1)*results_per_page - paginated_query = no_limit_query + " LIMIT {} OFFSET {}".format(results_per_page, - (int(page) - 1) * results_per_page) + paginated_query = no_limit_query + " LIMIT {} OFFSET {}".format( + results_per_page, (int(page) - 1) * results_per_page + ) glogger.debug("Paginated query: " + paginated_query) return paginated_query @@ -401,66 +434,83 @@ def rewrite_query(query, parameters, get_args): """Rewrite query to replace query parameters for given values.""" glogger.debug("Query parameters") glogger.debug(parameters) - requireXSD = False - required_params = {} - for k, v in parameters.items(): - if parameters[k]['required']: - required_params[k] = v - requiredParams = set(required_params.keys()) + # Check that all required parameters are present + requiredParams = set( + k for k, v in parameters.items() if v["required"] + ) # Set of required parameters providedParams = set(get_args.keys()) - glogger.debug("Required parameters: {} Request args: {}".format(requiredParams, providedParams)) - assert requiredParams.issubset(providedParams), 'Provided parameters do not cover the required parameters!' + glogger.debug( + "Required parameters: {} Request args: {}".format( + requiredParams, providedParams + ) + ) + assert requiredParams.issubset( + providedParams + ), "Provided parameters do not cover the required parameters!" + + if isinstance(query, dict): # json query (sparql transformer) + query = rewrite_query_json(query, parameters, get_args) + else: + query = rewrite_query_standard(query, parameters, get_args) + + glogger.debug("Query rewritten as: " + query) + + return query - for pname, p in list(parameters.items()): + +def rewrite_query_json(query, parameters, get_args): + for pname, p in parameters.items(): # Get the parameter value from the GET request v = get_args.get(pname, None) # If the parameter has a value if not v: continue - if isinstance(query, dict): # json query (sparql transformer) - if '$values' not in query: - query['$values'] = {} - values = query['$values'] + if "$values" not in query: + query["$values"] = {} + values = query["$values"] - if not p['original'] in values: - values[p['original']] = v - elif isinstance(values[p['original']], list): - values[p['original']].append(v) - else: - values[p['original']] = [values[p['original']], v] + if not p["original"] in values: + values[p["original"]] = v + elif isinstance(values[p["original"]], list): + values[p["original"]].append(v) + else: + values[p["original"]] = [values[p["original"]], v] + + rq, proto, opt = SPARQLTransformer.pre_process(query) + query = rq.strip() + return query + +def rewrite_query_standard(query, parameters, get_args): + requireXSD = False + for pname, p in parameters.items(): + # Get the parameter value from the GET request + v = get_args.get(pname, None) + # If the parameter has a value + if not v: continue - # IRI - if p['type'] == 'iri': # TODO: never reached anymore, since iris are now type=string with format=iri - query = query.replace(p['original'], "{}{}{}".format('<', v, '>')) - # A number (without a datatype) - elif p['type'] == 'number': - query = query.replace(p['original'], v) - # Literals - elif p['type'] == 'literal' or p['type'] == 'string': + # Number (without a datatype) + if p["type"] == "number": + query = query.replace(p["original"], v) + # Literal + elif p["type"] == "literal" or p["type"] == "string": # If it's a iri - if 'format' in p and p['format'] == 'iri': - query = query.replace(p['original'], "{}{}{}".format('<', v, '>')) + if "format" in p and p["format"] == "iri": + query = query.replace(p["original"], "{}{}{}".format("<", v, ">")) # If there is a language tag - if 'lang' in p and p['lang']: - query = query.replace(p['original'], "\"{}\"@{}".format(v, p['lang'])) - elif 'datatype' in p and p['datatype']: - query = query.replace(p['original'], "\"{}\"^^{}".format(v, p['datatype'])) - if 'xsd' in p['datatype']: + if "lang" in p and p["lang"]: + query = query.replace(p["original"], '"{}"@{}'.format(v, p["lang"])) + elif "datatype" in p and p["datatype"]: + query = query.replace( + p["original"], '"{}"^^{}'.format(v, p["datatype"]) + ) + if "xsd" in p["datatype"]: requireXSD = True else: - query = query.replace(p['original'], "\"{}\"".format(v)) - - if isinstance(query, dict): # json query (sparql transformer) - rq, proto, opt = SPARQLTransformer.pre_process(query) - query = rq.strip() - + query = query.replace(p["original"], '"{}"'.format(v)) if requireXSD and XSD_PREFIX not in query: - query = query.replace('SELECT', XSD_PREFIX + '\n\nSELECT') - - glogger.debug("Query rewritten as: " + query) - + query = query.replace("SELECT", XSD_PREFIX + "\n\nSELECT") return query diff --git a/src/pagination.py b/src/pagination.py index e0cf6fa..bee8903 100644 --- a/src/pagination.py +++ b/src/pagination.py @@ -4,21 +4,27 @@ from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode, ParseResult + def getSwaggerPaginationDef(resultsPerPage): """Build swagger spec section for pagination""" return { - "name": "page", - "type": "int", - "in": "query", - "description": "The page number for this paginated query ({} results per page)".format(resultsPerPage) + "name": "page", + "type": "int", + "in": "query", + "description": "The page number for this paginated query ({} results per page)".format( + resultsPerPage + ), } + def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url): """Build link header for result pagination""" lastPage = resultCount / resultsPerPage url_parts = urlparse(url) - query = dict(parse_qsl(url_parts.query)) # Use dict parse_qsl instead of parse_qs to ensure 'page' is unique + query = dict( + parse_qsl(url_parts.query) + ) # Use dict parse_qsl instead of parse_qs to ensure 'page' is unique first_url = _buildNewUrlWithPage(url_parts, query, page=1) last_url = _buildNewUrlWithPage(url_parts, query, page=lastPage) @@ -35,12 +41,21 @@ def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url): if page == lastPage: headerLink = "<{}>; rel=prev, <{}>; rel=first".format(prev_url, first_url) else: - headerLink = "<{}>; rel=next, <{}>; rel=prev, <{}>; rel=first, <{}>; rel=last".format(next_url, prev_url, first_url, last_url) + headerLink = "<{}>; rel=next, <{}>; rel=prev, <{}>; rel=first, <{}>; rel=last".format( + next_url, prev_url, first_url, last_url + ) return headerLink + def _buildNewUrlWithPage(url_parts, query, page): - query['page'] = page + query["page"] = page new_query = urlencode(query) - newParsedUrl = ParseResult(scheme=url_parts.scheme, netloc=url_parts.netloc, path=url_parts.path, - params=url_parts.params, query=new_query, fragment=url_parts.fragment) + newParsedUrl = ParseResult( + scheme=url_parts.scheme, + netloc=url_parts.netloc, + path=url_parts.path, + params=url_parts.params, + query=new_query, + fragment=url_parts.fragment, + ) return urlunparse(newParsedUrl) diff --git a/src/prov.py b/src/prov.py index d54c0b1..0f5cf36 100644 --- a/src/prov.py +++ b/src/prov.py @@ -9,7 +9,6 @@ from rdflib import Graph, URIRef, Namespace, RDF, Literal from datetime import datetime from subprocess import check_output -from six import PY3 # grlc modules import grlc.static as static @@ -18,7 +17,7 @@ glogger = glogging.getGrlcLogger(__name__) -class grlcPROV(): +class grlcPROV: """Record provenance of the grlc specification constructed.""" def __init__(self, user, repo): @@ -33,10 +32,12 @@ def __init__(self, user, repo): self.prov_g = Graph() prov_uri = URIRef("http://www.w3.org/ns/prov#") self.prov = Namespace(prov_uri) - self.prov_g.bind('prov', self.prov) + self.prov_g.bind("prov", self.prov) self.agent = URIRef("http://{}".format(static.SERVER_NAME)) - self.entity_d = URIRef("http://{}/api/{}/{}/spec".format(static.SERVER_NAME, self.user, self.repo)) + self.entity_d = URIRef( + "http://{}/api/{}/{}/spec".format(static.SERVER_NAME, self.user, self.repo) + ) self.activity = URIRef(self.entity_d + "-activity") self.init_prov_graph() @@ -49,15 +50,19 @@ def init_prov_graph(self): try: # Use git2prov to get prov on the repo repo_prov = check_output( - ['node_modules/git2prov/bin/git2prov', 'https://github.com/{}/{}/'.format(self.user, self.repo), - 'PROV-O']).decode("utf-8") - repo_prov = repo_prov[repo_prov.find('@'):] + [ + "node_modules/git2prov/bin/git2prov", + "https://github.com/{}/{}/".format(self.user, self.repo), + "PROV-O", + ] + ).decode("utf-8") + repo_prov = repo_prov[repo_prov.find("@") :] # glogger.debug('Git2PROV output: {}'.format(repo_prov)) - glogger.debug('Ingesting Git2PROV output into RDF graph') - with open('temp.prov.ttl', 'w') as temp_prov: + glogger.debug("Ingesting Git2PROV output into RDF graph") + with open("temp.prov.ttl", "w") as temp_prov: temp_prov.write(repo_prov) - self.prov_g.parse('temp.prov.ttl', format='turtle') + self.prov_g.parse("temp.prov.ttl", format="turtle") except Exception as e: glogger.error(e) glogger.error("Couldn't parse Git2PROV graph, continuing without repo PROV") @@ -74,7 +79,9 @@ def init_prov_graph(self): # activity self.prov_g.add((self.activity, self.prov.wasAssociatedWith, self.agent)) - self.prov_g.add((self.activity, self.prov.startedAtTime, Literal(datetime.now()))) + self.prov_g.add( + (self.activity, self.prov.startedAtTime, Literal(datetime.now())) + ) # later: activity used entity_o_1 ... entity_o_n # later: activity endedAtTime (when we know the end time) @@ -99,13 +106,10 @@ def log_prov_graph(self): Log provenance graph so far """ glogger.debug("Spec generation provenance graph:") - glogger.debug(self.prov_g.serialize(format='turtle')) + glogger.debug(self.prov_g.serialize(format="turtle")) def serialize(self, format): """ Serialize provenance graph in the specified format """ - if PY3: - return self.prov_g.serialize(format=format).decode('utf-8') - else: - return self.prov_g.serialize(format=format) + return self.prov_g.serialize(format=format) diff --git a/src/queryTypes.py b/src/queryTypes.py index c9f534c..d8168e0 100644 --- a/src/queryTypes.py +++ b/src/queryTypes.py @@ -4,21 +4,18 @@ """Definition of grlc query types.""" -qType = { - 'SPARQL': 'sparql', - 'TPF': 'tpf', - 'JSON': 'json' -} +qType = {"SPARQL": "sparql", "TPF": "tpf", "JSON": "json"} + def guessQueryType(queryUrl): queryUrl = queryUrl.lower() - if queryUrl.endswith('.rq'): - return qType['SPARQL'] - elif queryUrl.endswith('.sparql'): - return qType['SPARQL'] - elif queryUrl.endswith('.tpf'): - return qType['TPF'] - elif queryUrl.endswith('.json'): - return qType['JSON'] + if queryUrl.endswith(".rq"): + return qType["SPARQL"] + elif queryUrl.endswith(".sparql"): + return qType["SPARQL"] + elif queryUrl.endswith(".tpf"): + return qType["TPF"] + elif queryUrl.endswith(".json"): + return qType["JSON"] else: - raise Exception('Unknown query type: ' + queryUrl) + raise Exception("Unknown query type: " + queryUrl) diff --git a/src/server.py b/src/server.py index a05d8a1..bdfa534 100644 --- a/src/server.py +++ b/src/server.py @@ -16,237 +16,415 @@ glogger = glogging.getGrlcLogger(__name__) -### The Flask app ### +# The Flask app app = Flask(__name__) CORS(app) -### Helper functions ### + +# Helper functions def relative_path(): """Generate relative path for the current route. This is used to build relative paths when rendering templates.""" path = request.path - path = '.' + '/..' * (path.count('/') - 1) + path = "." + "/.." * (path.count("/") - 1) return path + def api_docs_template(): """Generate Grlc API page.""" - return render_template('api-docs.html', relative_path=relative_path()) - -def swagger_spec(user, repo, subdir=None, spec_url=None, sha=None, content=None, git_type=None, branch='main'): - """ Generate swagger specification """ - glogger.info("-----> Generating swagger spec for /{}/{} ({}), subdir {}, params {}, on commit {}".format(user, repo, git_type, subdir, spec_url, sha)) - - swag = utils.build_swagger_spec(user, repo, subdir, spec_url, sha, static.SERVER_NAME, git_type, branch) + return render_template("api-docs.html", relative_path=relative_path()) + + +def swagger_spec( + user, + repo, + subdir=None, + spec_url=None, + sha=None, + content=None, + git_type=None, + branch=None, +): + """Generate swagger specification""" + glogger.info( + "-----> Generating swagger spec for /{}/{} ({}), subdir {}, params {}, on commit {}".format( + user, repo, git_type, subdir, spec_url, sha + ) + ) + + swag = utils.build_swagger_spec( + user, repo, subdir, spec_url, sha, static.SERVER_NAME, git_type, branch + ) resp_spec = make_response(jsonify(swag)) - resp_spec.headers['Content-Type'] = 'application/json' + resp_spec.headers["Content-Type"] = "application/json" - resp_spec.headers['Cache-Control'] = static.CACHE_CONTROL_POLICY # Caching JSON specs for 15 minutes + resp_spec.headers[ + "Cache-Control" + ] = static.CACHE_CONTROL_POLICY # Caching JSON specs for 15 minutes - glogger.info("-----> API spec generation for /{}/{}, subdir {}, params {}, on commit {} complete".format(user, repo, subdir, spec_url, sha)) + glogger.info( + "-----> API spec generation for /{}/{}, subdir {}, params {}, on commit {} complete".format( + user, repo, subdir, spec_url, sha + ) + ) return resp_spec -def query(user, repo, query_name, subdir=None, spec_url=None, sha=None, content=None, git_type=None, branch='main'): + +def query( + user, + repo, + query_name, + subdir=None, + spec_url=None, + sha=None, + content=None, + git_type=None, + branch=None, +): """Execute SPARQL query for a specific grlc-generated API endpoint""" - glogger.info("-----> Executing call name at /{}/{} ({})/{}/{} on commit {}".format(user, repo, git_type, subdir, query_name, sha)) + glogger.info( + "-----> Executing call name at /{}/{} ({})/{}/{} on commit {}".format( + user, repo, git_type, subdir, query_name, sha + ) + ) glogger.debug("Request accept header: " + request.headers["Accept"]) requestArgs = request.args - acceptHeader = request.headers['Accept'] + acceptHeader = request.headers["Accept"] requestUrl = request.url formData = request.form method = request.method - query_response, status, headers = utils.dispatch_query(user, repo, query_name, subdir, spec_url, - sha=sha, content=content, requestArgs=requestArgs, - acceptHeader=acceptHeader, requestUrl=requestUrl, - formData=formData, method=method, git_type=git_type, - branch=branch) - if isinstance(query_response, list): + query_response, status, headers = utils.dispatch_query( + user, + repo, + query_name, + subdir, + spec_url, + sha=sha, + content=content, + requestArgs=requestArgs, + acceptHeader=acceptHeader, + requestUrl=requestUrl, + formData=formData, + method=method, + git_type=git_type, + branch=branch, + ) + if isinstance(query_response, list) or isinstance(query_response, dict): query_response = jsonify(query_response) return make_response(query_response, status, headers) -### Server routes ### -@app.route('/') + +# Server routes +@app.route("/") def grlc(): """Grlc landing page.""" - resp = make_response(render_template('index.html')) + resp = make_response(render_template("index.html")) return resp -############################# -### Routes for local APIs ### -############################# + +# Routes for local APIs + # Spec generation, front-end -@app.route('/api-local', methods=['GET'], strict_slashes=False) -@app.route('/api/local/local', methods=['GET'], strict_slashes=False) # backward compatibility route +@app.route("/api-local", methods=["GET"], strict_slashes=False) +@app.route( + "/api/local/local", methods=["GET"], strict_slashes=False +) # backward compatibility route def api_docs_local(): """Grlc API page for local routes.""" return api_docs_template() + # Spec generation, JSON -@app.route('/api-local/swagger', methods=['GET']) -@app.route('/api/local/local/swagger', methods=['GET'], strict_slashes=False) # backward compatibility route +@app.route("/api-local/swagger", methods=["GET"]) +@app.route( + "/api/local/local/swagger", methods=["GET"], strict_slashes=False +) # backward compatibility route def swagger_spec_local(): """Swagger spec for local routes.""" return swagger_spec(user=None, repo=None, sha=None, content=None) + # Callname execution -@app.route('/api-local/', methods=['GET', 'POST']) -@app.route('/api-local/.', methods=['GET', 'POST']) -@app.route('/api/local/local/', methods=['GET', 'POST'], strict_slashes=False) # backward compatibility route -@app.route('/api/local/local/.', methods=['GET', 'POST'], strict_slashes=False) # backward compatibility route +@app.route("/api-local/", methods=["GET", "POST"]) +@app.route("/api-local/.", methods=["GET", "POST"]) +@app.route( + "/api/local/local/", methods=["GET", "POST"], strict_slashes=False +) # backward compatibility route +@app.route( + "/api/local/local/.", + methods=["GET", "POST"], + strict_slashes=False, +) # backward compatibility route def query_local(query_name, content=None): """SPARQL query execution for local routes.""" return query(user=None, repo=None, query_name=query_name, content=content) -################################ -### Routes for URL HTTP APIs ### -################################ + +# Routes for URL HTTP APIs + # Spec generation, front-end -@app.route('/api-url', methods=['POST', 'GET'], strict_slashes=False) +@app.route("/api-url", methods=["POST", "GET"], strict_slashes=False) def api_docs_param(): """Grlc API page for specifications loaded via http.""" # Get queries provided by params - spec_url = request.args['specUrl'] + spec_url = request.args["specUrl"] glogger.info("Spec URL: {}".format(spec_url)) return api_docs_template() + # Spec generation, JSON -@app.route('/api-url/swagger', methods=['GET']) +@app.route("/api-url/swagger", methods=["GET"]) def swagger_spec_param(): """Swagger spec for specifications loaded via http.""" - spec_url = request.args['specUrl'] + spec_url = request.args["specUrl"] glogger.info("Spec URL: {}".format(spec_url)) return swagger_spec(user=None, repo=None, spec_url=spec_url) + # Callname execution -@app.route('/api-url/', methods=['GET', 'POST']) -@app.route('/api-url/.', methods=['GET', 'POST']) +@app.route("/api-url/", methods=["GET", "POST"]) +@app.route("/api-url/.", methods=["GET", "POST"]) def query_param(query_name, content=None): """SPARQL query execution for specifications loaded via http.""" - spec_url = request.args['specUrl'] + spec_url = request.args["specUrl"] glogger.debug("Spec URL: {}".format(spec_url)) - return query(user=None, repo=None, query_name=query_name, spec_url=spec_url, content=content) + return query( + user=None, repo=None, query_name=query_name, spec_url=spec_url, content=content + ) +# Routes for GitHub APIs -############################## -### Routes for GitHub APIs ### -############################## - # Spec generation, front-end -@app.route('/api-git//', strict_slashes=False) -@app.route('/api-git///subdir/', strict_slashes=False) -@app.route('/api-git///api-docs') -@app.route('/api-git///commit/') -@app.route('/api-git///commit//api-docs') -@app.route('/api-git///subdir//commit/') -@app.route('/api-git///subdir//commit//api-docs') -@app.route('/api//', strict_slashes=False) # backward compatibility route -@app.route('/api///', strict_slashes=False) # backward compatibility route -@app.route('/api///api-docs') # backward compatibility route -@app.route('/api///commit/') # backward compatibility route -@app.route('/api///commit//api-docs') # backward compatibility route -@app.route('/api////commit/') # backward compatibility route -@app.route('/api////commit//api-docs') # backward compatibility route +@app.route("/api-git//", strict_slashes=False) +@app.route("/api-git///subdir/", strict_slashes=False) +@app.route("/api-git///api-docs") +@app.route("/api-git///commit/") +@app.route("/api-git///commit//api-docs") +@app.route("/api-git///subdir//commit/") +@app.route("/api-git///subdir//commit//api-docs") +@app.route("/api//", strict_slashes=False) # backward compatibility route +@app.route( + "/api///", strict_slashes=False +) # backward compatibility route +@app.route("/api///api-docs") # backward compatibility route +@app.route("/api///commit/") # backward compatibility route +@app.route("/api///commit//api-docs") # backward compatibility route +@app.route( + "/api////commit/" +) # backward compatibility route +@app.route( + "/api////commit//api-docs" +) # backward compatibility route def api_docs_git(user, repo, subdir=None, sha=None): """Grlc API page for specifications loaded from a Github repo.""" return api_docs_template() + # Spec generation, JSON -@app.route('/api-git///swagger', methods=['GET']) -@app.route('/api-git///subdir//swagger', methods=['GET']) -@app.route('/api-git///commit//swagger') -@app.route('/api-git///subdir//commit//swagger') -@app.route('/api-git////commit//swagger') -@app.route('/api///swagger', methods=['GET']) # backward compatibility route -@app.route('/api////swagger', methods=['GET']) # backward compatibility route -@app.route('/api///commit//swagger') # backward compatibility route -@app.route('/api////commit//swagger') # backward compatibility route -@app.route('/api-git////swagger', methods=['GET']) # backward compatibility route -@app.route('/api-git////commit//swagger') # backward compatibility route +@app.route("/api-git///swagger", methods=["GET"]) +@app.route("/api-git///subdir//swagger", methods=["GET"]) +@app.route("/api-git///commit//swagger") +@app.route("/api-git///subdir//commit//swagger") +@app.route("/api-git////commit//swagger") +@app.route( + "/api///swagger", methods=["GET"] +) # backward compatibility route +@app.route( + "/api////swagger", methods=["GET"] +) # backward compatibility route +@app.route("/api///commit//swagger") # backward compatibility route +@app.route( + "/api////commit//swagger" +) # backward compatibility route +@app.route( + "/api-git////swagger", methods=["GET"] +) # backward compatibility route +@app.route( + "/api-git////commit//swagger" +) # backward compatibility route def swagger_spec_git(user, repo, subdir=None, sha=None): """Swagger spec for specifications loaded from a Github repo.""" - return swagger_spec(user, repo, subdir=subdir, spec_url=None, sha=sha, content=None, git_type=static.TYPE_GITHUB) + return swagger_spec( + user, + repo, + subdir=subdir, + spec_url=None, + sha=sha, + content=None, + git_type=static.TYPE_GITHUB, + ) + # Callname execution -@app.route('/api-git///', methods=['GET', 'POST']) -@app.route('/api-git///subdir//', methods=['GET', 'POST']) -@app.route('/api-git///.', methods=['GET', 'POST']) -@app.route('/api-git///subdir//.', methods=['GET', 'POST']) -@app.route('/api-git///commit//', methods=['GET', 'POST']) -@app.route('/api-git///subdir//commit//', methods=['GET', 'POST']) -@app.route('/api-git///commit//.', methods=['GET', 'POST']) -@app.route('/api-git///subdir//commit//.', methods=['GET', 'POST']) -@app.route('/api///', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api////', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api///.', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api////.', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api///commit//', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api////commit//', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api///commit//.', methods=['GET', 'POST']) # backward compatibility route -@app.route('/api////commit//.', methods=['GET', 'POST']) # backward compatibility route +@app.route("/api-git///", methods=["GET", "POST"]) +@app.route( + "/api-git///subdir//", methods=["GET", "POST"] +) +@app.route("/api-git///.", methods=["GET", "POST"]) +@app.route( + "/api-git///subdir//.", + methods=["GET", "POST"], +) +@app.route("/api-git///commit//", methods=["GET", "POST"]) +@app.route( + "/api-git///subdir//commit//", + methods=["GET", "POST"], +) +@app.route( + "/api-git///commit//.", + methods=["GET", "POST"], +) +@app.route( + "/api-git///subdir//commit//.", + methods=["GET", "POST"], +) +@app.route( + "/api///", methods=["GET", "POST"] +) # backward compatibility route +@app.route( + "/api////", methods=["GET", "POST"] +) # backward compatibility route +@app.route( + "/api///.", methods=["GET", "POST"] +) # backward compatibility route +@app.route( + "/api////.", methods=["GET", "POST"] +) # backward compatibility route +@app.route( + "/api///commit//", methods=["GET", "POST"] +) # backward compatibility route +@app.route( + "/api////commit//", + methods=["GET", "POST"], +) # backward compatibility route +@app.route( + "/api///commit//.", methods=["GET", "POST"] +) # backward compatibility route +@app.route( + "/api////commit//.", + methods=["GET", "POST"], +) # backward compatibility route def query_git(user, repo, query_name, subdir=None, sha=None, content=None): """SPARQL query execution for specifications loaded from a Github repo.""" - return query(user, repo, query_name, subdir=subdir, sha=sha, content=content, git_type=static.TYPE_GITHUB) + return query( + user, + repo, + query_name, + subdir=subdir, + sha=sha, + content=content, + git_type=static.TYPE_GITHUB, + ) +# Routes for GitLab APIs -############################## -### Routes for GitLab APIs ### -############################## - # Spec generation, front-end -@app.route('/api-gitlab//', strict_slashes=False) -@app.route('/api-gitlab///branch/', strict_slashes=False) -@app.route('/api-gitlab///subdir/', strict_slashes=False) -@app.route('/api-gitlab///branch//subdir/', strict_slashes=False) -@app.route('/api-gitlab///api-docs') -@app.route('/api-gitlab///commit/') -@app.route('/api-gitlab///commit//api-docs') -@app.route('/api-gitlab///subdir//commit/') -@app.route('/api-gitlab///subdir//commit//api-docs') -def api_docs_gitlab(user, repo, subdir=None, sha=None, branch='main'): +@app.route("/api-gitlab//", strict_slashes=False) +@app.route("/api-gitlab///branch/", strict_slashes=False) +@app.route("/api-gitlab///subdir/", strict_slashes=False) +@app.route( + "/api-gitlab///branch//subdir/", + strict_slashes=False, +) +@app.route("/api-gitlab///api-docs") +@app.route("/api-gitlab///commit/") +@app.route("/api-gitlab///commit//api-docs") +@app.route("/api-gitlab///subdir//commit/") +@app.route("/api-gitlab///subdir//commit//api-docs") +def api_docs_gitlab(user, repo, subdir=None, sha=None, branch=None): """Grlc API page for specifications loaded from a Github repo.""" glogger.debug("Entry in function: __main__.api_docs_gitlab") return api_docs_template() + # Spec generation, JSON -@app.route('/api-gitlab///swagger', methods=['GET']) -@app.route('/api-gitlab///branch//swagger', methods=['GET']) -@app.route('/api-gitlab///subdir//swagger', methods=['GET']) -@app.route('/api-gitlab///branch//subdir//swagger', methods=['GET']) -@app.route('/api-gitlab///commit//swagger') -@app.route('/api-gitlab///subdir//commit//swagger') -@app.route('/api-gitlab////commit//swagger') -def swagger_spec_gitlab(user, repo, subdir=None, sha=None, branch='main'): +@app.route("/api-gitlab///swagger", methods=["GET"]) +@app.route("/api-gitlab///branch//swagger", methods=["GET"]) +@app.route("/api-gitlab///subdir//swagger", methods=["GET"]) +@app.route( + "/api-gitlab///branch//subdir//swagger", + methods=["GET"], +) +@app.route("/api-gitlab///commit//swagger") +@app.route("/api-gitlab///subdir//commit//swagger") +@app.route("/api-gitlab////commit//swagger") +def swagger_spec_gitlab(user, repo, subdir=None, sha=None, branch=None): """Swagger spec for specifications loaded from a Github repo.""" glogger.debug("Entry in function: __main__.swagger_spec_gitlab") - return swagger_spec(user, repo, subdir=subdir, spec_url=None, sha=sha, content=None, git_type=static.TYPE_GITLAB, branch=branch) + return swagger_spec( + user, + repo, + subdir=subdir, + spec_url=None, + sha=sha, + content=None, + git_type=static.TYPE_GITLAB, + branch=branch, + ) + # Callname execution -@app.route('/api-gitlab///query/', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/branch//', methods=['GET','POST']) -@app.route('/api-gitlab///query/subdir//', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/branch//subdir//', methods=['GET','POST']) -@app.route('/api-gitlab///query/.', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/subdir//.', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/commit//', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/subdir//commit//', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/commit//.', methods=['GET', 'POST']) -@app.route('/api-gitlab///query/subdir//commit//.', methods=['GET', 'POST']) -def query_gitlab(user, repo, query_name, subdir=None, sha=None, content=None, branch='main'): +@app.route("/api-gitlab///query/", methods=["GET", "POST"]) +@app.route( + "/api-gitlab///query/branch//", + methods=["GET", "POST"], +) +@app.route( + "/api-gitlab///query/subdir//", + methods=["GET", "POST"], +) +@app.route( + "/api-gitlab///query/branch//subdir//", + methods=["GET", "POST"], +) +@app.route( + "/api-gitlab///query/.", methods=["GET", "POST"] +) +@app.route( + "/api-gitlab///query/subdir//.", + methods=["GET", "POST"], +) +@app.route( + "/api-gitlab///query/commit//", methods=["GET", "POST"] +) +@app.route( + "/api-gitlab///query/subdir//commit//", + methods=["GET", "POST"], +) +@app.route( + "/api-gitlab///query/commit//.", + methods=["GET", "POST"], +) +@app.route( + "/api-gitlab///query/subdir//commit//.", + methods=["GET", "POST"], +) +def query_gitlab( + user, repo, query_name, subdir=None, sha=None, content=None, branch=None +): """SPARQL query execution for specifications loaded from a Github repo.""" glogger.debug("Entry in function: __main__.query_gitlab") - return query(user, repo, query_name, subdir=subdir, sha=sha, content=content, git_type=static.TYPE_GITLAB, branch=branch) - - + return query( + user, + repo, + query_name, + subdir=subdir, + sha=sha, + content=content, + git_type=static.TYPE_GITLAB, + branch=branch, + ) # Main thread -if __name__ == '__main__': +if __name__ == "__main__": app.run(host=static.DEFAULT_HOST, port=static.DEFAULT_PORT, debug=True) diff --git a/src/sparql.py b/src/sparql.py index 2725f66..2fd5f20 100644 --- a/src/sparql.py +++ b/src/sparql.py @@ -13,18 +13,14 @@ # Default value is JSON SUPPORTED_MIME_FORMATS = defaultdict( - lambda: JSON, { - 'text/csv': CSV, - 'application/json': JSON - } + lambda: JSON, {"text/csv": CSV, "application/json": JSON} ) -MIME_FORMAT = { - format: mime for mime, format in SUPPORTED_MIME_FORMATS.items() -} +MIME_FORMAT = {format: mime for mime, format in SUPPORTED_MIME_FORMATS.items()} + def getResponseText(endpoint, query, requestedMimeType): - """Returns the result and mimetype of executing the given query against + """Returns the result and mimetype of executing the given query against the given endpoint. Keyword arguments: @@ -40,15 +36,18 @@ def getResponseText(endpoint, query, requestedMimeType): client = SPARQLWrapper(endpoint) client.setQuery(query) client.setReturnFormat(retFormat) - client.setCredentials(static.DEFAULT_ENDPOINT_USER, static.DEFAULT_ENDPOINT_PASSWORD) + client.setCredentials( + static.DEFAULT_ENDPOINT_USER, static.DEFAULT_ENDPOINT_PASSWORD + ) result = client.queryAndConvert() - if retFormat==JSON: + if retFormat == JSON: result = jsonify(result) return result, MIME_FORMAT[retFormat] + def _mimeTypeToSparqlFormat(mimeType): - if ';' in mimeType: - mimeType = mimeType.split(';')[0].strip() + if ";" in mimeType: + mimeType = mimeType.split(";")[0].strip() return SUPPORTED_MIME_FORMATS[mimeType] diff --git a/src/static.py b/src/static.py index fb8b10f..90bfdbd 100644 --- a/src/static.py +++ b/src/static.py @@ -12,19 +12,62 @@ DEFAULT_PORT = 8088 # XSD datatypes for parsing queries with parameters -XSD_DATATYPES = ["decimal", "float", "double", "integer", "positiveInteger", "negativeInteger", "nonPositiveInteger", "nonNegativeInteger", "long", "int", "short", "byte", "unsignedLong", "unsignedInt", "unsignedShort", "unsignedByte", "dateTime", "date", "gYearMonth", "gYear", "duration", "gMonthDay", "gDay", "gMonth", "string", "normalizedString", "token", "language", "NMTOKEN", "NMTOKENS", "Name", "NCName", "ID", "IDREFS", "ENTITY", "ENTITIES", "QName", "boolean", "hexBinary", "base64Binary", "anyURI", "notation"] +XSD_DATATYPES = [ + "decimal", + "float", + "double", + "integer", + "positiveInteger", + "negativeInteger", + "nonPositiveInteger", + "nonNegativeInteger", + "long", + "int", + "short", + "byte", + "unsignedLong", + "unsignedInt", + "unsignedShort", + "unsignedByte", + "dateTime", + "date", + "gYearMonth", + "gYear", + "duration", + "gMonthDay", + "gDay", + "gMonth", + "string", + "normalizedString", + "token", + "language", + "NMTOKEN", + "NMTOKENS", + "Name", + "NCName", + "ID", + "IDREFS", + "ENTITY", + "ENTITIES", + "QName", + "boolean", + "hexBinary", + "base64Binary", + "anyURI", + "notation", +] # MIME types for content negotiation mimetypes = { - 'csv' : 'text/csv; q=1.0, */*; q=0.1', - 'json' : 'application/json; q=1.0, application/sparql-results+json; q=0.8, */*; q=0.1', - 'html' : 'text/html; q=1.0, */*; q=0.1', - 'ttl' : 'text/turtle' + "csv": "text/csv; q=1.0, */*; q=0.1", + "json": "application/json; q=1.0, application/sparql-results+json; q=0.8, */*; q=0.1", + "html": "text/html; q=1.0, */*; q=0.1", + "ttl": "text/turtle", } # GitHub base URLS -GITHUB_RAW_BASE_URL = 'https://raw.githubusercontent.com/' -GITHUB_API_BASE_URL = 'https://api.github.com/repos/' +GITHUB_RAW_BASE_URL = "https://raw.githubusercontent.com/" +GITHUB_API_BASE_URL = "https://api.github.com/repos/" # Git types TYPE_GITHUB = "github" @@ -33,46 +76,51 @@ # Cache control # CACHE_CONTROL_POLICY = 'public, max-age=60' # With the new hash retrieveal and redirect caching becomes obsolete -CACHE_CONTROL_POLICY = 'no-cache' +CACHE_CONTROL_POLICY = "no-cache" # Setting headers to use access_token for the GitHub API config_fallbacks = { - 'github_access_token': '', - 'sparql_endpoint': '', - 'user': '', - 'password': '', - 'server_name': '', - 'local_sparql_dir': '', - 'debug': 'False', - 'gitlab_url': 'https://gitlab' + "github_access_token": "", + "gitlab_access_token": "", + "sparql_access_token": "", + "sparql_endpoint": "", + "user": "", + "password": "", + "server_name": "", + "local_sparql_dir": "", + "debug": "False", + "gitlab_url": "https://gitlab", } config = ConfigParser(config_fallbacks) -config.add_section('auth') -config.add_section('defaults') -config.add_section('local') -config.add_section('api_gitlab') -config_filename = os.path.join(os.getcwd(), 'config.ini') -print('Reading config file: ', config_filename) +config.add_section("auth") +config.add_section("defaults") +config.add_section("local") +config.add_section("api_gitlab") + +config_filename = os.path.join(os.getcwd(), "config.ini") +print("Reading config file: ", config_filename) config.read(config_filename) -ACCESS_TOKEN = config.get('auth', 'github_access_token') +GITHUB_ACCESS_TOKEN = config.get("auth", "github_access_token") +GITLAB_ACCESS_TOKEN = config.get("auth", "gitlab_access_token") +SPARQL_ACCESS_TOKEN = config.get("auth", "sparql_access_token") # Default endpoint, if none specified elsewhere -DEFAULT_ENDPOINT = config.get('defaults', 'sparql_endpoint') -DEFAULT_ENDPOINT_USER = config.get('defaults', 'user') -DEFAULT_ENDPOINT_PASSWORD = config.get('defaults', 'password') +DEFAULT_ENDPOINT = config.get("defaults", "sparql_endpoint") +DEFAULT_ENDPOINT_USER = config.get("defaults", "user") +DEFAULT_ENDPOINT_PASSWORD = config.get("defaults", "password") # Local folder where queries are loaded from -LOCAL_SPARQL_DIR = config.get('local', 'local_sparql_dir') +LOCAL_SPARQL_DIR = config.get("local", "local_sparql_dir") # api_gitlab -GITLAB_URL = config.get('api_gitlab', 'gitlab_url') +GITLAB_URL = config.get("api_gitlab", "gitlab_url") # server name, used by the Flask app and in the swagger spec -SERVER_NAME = config.get('defaults', 'server_name') +SERVER_NAME = config.get("defaults", "server_name") # Logging format (prettier than the ugly standard in Flask) -LOG_FORMAT = '%(asctime)-15s [%(levelname)s] (%(module)s.%(funcName)s) %(message)s' -LOG_DEBUG_MODE = config.getboolean('defaults', 'debug') +LOG_FORMAT = "%(asctime)-15s [%(levelname)s] (%(module)s.%(funcName)s) %(message)s" +LOG_DEBUG_MODE = config.getboolean("defaults", "debug") # Pattern for INSERT query call names INSERT_PATTERN = "INSERT DATA { GRAPH ?_g_iri {

}}" diff --git a/src/swagger.py b/src/swagger.py index b79ab57..86936c8 100644 --- a/src/swagger.py +++ b/src/swagger.py @@ -8,20 +8,20 @@ import grlc.pagination as pageUtils from grlc.fileLoaders import GithubLoader, LocalLoader, URLLoader, GitlabLoader -import traceback import grlc.glogging as glogging glogger = glogging.getGrlcLogger(__name__) + def get_blank_spec(): """Creates the base (blank) structure of swagger specification.""" swag = {} - swag['swagger'] = '2.0' - swag['schemes'] = [] # 'http' or 'https' -- leave blank to make it dependent on how UI is loaded - swag['paths'] = {} - swag['definitions'] = { - 'Message': {'type': 'string'} - } + swag["swagger"] = "2.0" + swag[ + "schemes" + ] = [] # 'http' or 'https' -- leave blank to make it dependent on how UI is loaded + swag["paths"] = {} + swag["definitions"] = {"Message": {"type": "string"}} return swag @@ -33,7 +33,7 @@ def get_repo_info(loader, sha, prov_g): contact_name = loader.getContactName() contact_url = loader.getContactUrl() commit_list = loader.getCommitList() - licence_url = loader.getLicenceURL() # This will be None if there is no license + licence_url = loader.getLicenceURL() # This will be None if there is no license # Add the API URI as a used entity by the activity if prov_g: @@ -48,87 +48,105 @@ def get_repo_info(loader, sha, prov_g): next_commit = commit_list[commit_list.index(version) - 1] info = { - 'version': version, - 'title': repo_title, - 'description': repo_desc, - 'contact': { - 'name': contact_name, - 'url': contact_url - } + "version": version, + "title": repo_title, + "description": repo_desc, + "contact": {"name": contact_name, "url": contact_url}, } if licence_url: - info['license'] = { - 'name': 'License', - 'url': licence_url - } + info["license"] = {"name": "License", "url": licence_url} if type(loader) is GithubLoader: - basePath = '/api-git/' + user_repo + '/' - basePath += ('subdir/' + loader.subdir + '/') if loader.subdir else '' - basePath += ('commit/' + sha + '/') if sha else '' + basePath = "/api-git/" + user_repo + "/" + basePath += ("subdir/" + loader.subdir + "/") if loader.subdir else "" + basePath += ("commit/" + sha + "/") if sha else "" if type(loader) is GitlabLoader: - basePath = '/api-gitlab/' + user_repo + '/query/' - basePath += ('branch/' + loader.branch + '/') if loader.branch else '' - basePath += ('subdir/' + loader.subdir.strip('/') + '/') if loader.subdir else '' - basePath += ('commit/' + sha + '/') if sha else '' + basePath = "/api-gitlab/" + user_repo + "/query/" + basePath += ("branch/" + loader.branch + "/") if loader.branch else "" + basePath += ( + ("subdir/" + loader.subdir.strip("/") + "/") if loader.subdir else "" + ) + basePath += ("commit/" + sha + "/") if sha else "" elif type(loader) is LocalLoader: - basePath = '/api-local/' + basePath = "/api-local/" elif type(loader) is URLLoader: - basePath = '/api-url/' + basePath = "/api-url/" else: # TODO: raise error - glogger.error('Cannot set basePath, loader type unkown') + glogger.error("Cannot set basePath, loader type unkown") return prev_commit, next_commit, info, basePath def get_path_for_item(item): - """Builds the swagger definition for a specific path, based on + """Builds the swagger definition for a specific path, based on the given item.""" - query = item['original_query'] + query = item["original_query"] if isinstance(query, dict): - if 'grlc' in query: - del query['grlc'] + if "grlc" in query: + del query["grlc"] query = "\n" + json.dumps(query, indent=2) + "\n" - description = item['description'] - description += '\n\n```\n{}\n```'.format(query) - description += '\n\nSPARQL transformation:\n```json\n{}```'.format( - item['transform']) if 'transform' in item else '' + description = item["description"] + description += "\n\n```\n{}\n```".format(query) + description += ( + "\n\nSPARQL transformation:\n```json\n{}```".format(item["transform"]) + if "transform" in item + else "" + ) item_path = { - item['method']: { - 'tags': item['tags'], - 'summary': item['summary'], - 'description': description, - 'produces': ['text/csv', 'application/json', 'text/html'], - 'parameters': item['params'] if 'params' in item else None, - 'responses': { - '200': { - 'description': 'Query response', - 'schema': { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': item['item_properties'] if 'item_properties' in item else None + item["method"]: { + "tags": item["tags"], + "summary": item["summary"], + "description": description, + "produces": ["text/csv", "application/json", "text/html"], + "parameters": item["params"] if "params" in item else None, + "responses": { + "200": { + "description": "Query response", + "schema": { + "type": "array", + "items": { + "type": "object", + "properties": item["item_properties"] + if "item_properties" in item + else None, }, - } + }, }, - 'default': { - 'description': 'Unexpected error', - 'schema': { - '$ref': '#/definitions/Message' - } - } - } + "default": { + "description": "Unexpected error", + "schema": {"$ref": "#/definitions/Message"}, + }, + }, } } return item_path -def build_spec(user, repo, subdir=None, query_url=None, sha=None, prov=None, extraMetadata=[], git_type=None, branch='main'): +def build_spec( + user, + repo, + subdir=None, + query_url=None, + sha=None, + prov=None, + extraMetadata=[], + git_type=None, + branch=None, +): """Build grlc specification for the given github user / repo.""" - loader = grlc.utils.getLoader(user, repo, subdir, query_url, sha=sha, prov=prov, git_type=git_type, branch=branch) + loader = grlc.utils.getLoader( + user, + repo, + subdir, + query_url, + sha=sha, + prov=prov, + git_type=git_type, + branch=branch, + ) files = loader.fetchFiles() raw_repo_uri = loader.getRawRepoUri() @@ -139,10 +157,12 @@ def build_spec(user, repo, subdir=None, query_url=None, sha=None, prov=None, ext allowed_ext = ["rq", "sparql", "json", "tpf"] for c in files: - glogger.debug('>>>>>>>>>>>>>>>>>>>>>>>>>c_name: {}'.format(c['name'])) - extension = c['name'].split('.')[-1] - if extension in allowed_ext or query_url: # parameter provided queries may not have extension - call_name = c['name'].split('.')[0] + glogger.debug(">>>>>>>>>>>>>>>>>>>>>>>>>c_name: {}".format(c["name"])) + extension = c["name"].split(".")[-1] + if ( + extension in allowed_ext or query_url + ): # parameter provided queries may not have extension + call_name = c["name"].split(".")[0] # Retrieve extra metadata from the query decorators query_text = loader.getTextFor(c) @@ -152,28 +172,44 @@ def build_spec(user, repo, subdir=None, query_url=None, sha=None, prov=None, ext query_text = json.loads(query_text) if extension in ["rq", "sparql", "json"] or query_url: - glogger.debug("===================================================================") - glogger.debug("Processing SPARQL query: {}".format(c['name'])) - glogger.debug("===================================================================") + glogger.debug( + "===================================================================" + ) + glogger.debug("Processing SPARQL query: {}".format(c["name"])) + glogger.debug( + "===================================================================" + ) try: - item = process_sparql_query_text(query_text, loader, call_name, extraMetadata) + item = process_sparql_query_text( + query_text, loader, call_name, extraMetadata + ) except Exception as e: warnings.append(str(e)) elif "tpf" == extension: - glogger.debug("===================================================================") - glogger.debug("Processing TPF query: {}".format(c['name'])) - glogger.debug("===================================================================") - item = process_tpf_query_text(query_text, raw_repo_uri, call_name, extraMetadata) + glogger.debug( + "===================================================================" + ) + glogger.debug("Processing TPF query: {}".format(c["name"])) + glogger.debug( + "===================================================================" + ) + item = process_tpf_query_text( + query_text, raw_repo_uri, call_name, extraMetadata + ) # TODO: raise exceptions in process_tpf_query_text else: - glogger.info("Ignoring unsupported source call name: {}".format(c['name'])) + glogger.info( + "Ignoring unsupported source call name: {}".format(c["name"]) + ) if item: items.append(item) # Add a warning if no license is found if loader.getLicenceURL() is None: - warnings.append("Queries behind this API do not have a license. You may not be allowed to use them.") + warnings.append( + "Queries behind this API do not have a license. You may not be allowed to use them." + ) return items, warnings @@ -182,23 +218,25 @@ def process_tpf_query_text(query_text, raw_repo_uri, call_name, extraMetadata): """Generates a swagger specification item based on the given TPF query file.""" query_metadata = gquery.get_yaml_decorators(query_text) - tags = query_metadata['tags'] if 'tags' in query_metadata else [] - glogger.debug("Read query tags: " + ', '.join(tags)) + tags = query_metadata["tags"] if "tags" in query_metadata else [] + glogger.debug("Read query tags: " + ", ".join(tags)) - summary = query_metadata['summary'] if 'summary' in query_metadata else "" + summary = query_metadata["summary"] if "summary" in query_metadata else "" glogger.debug("Read query summary: " + summary) - description = query_metadata['description'] if 'description' in query_metadata else "" + description = ( + query_metadata["description"] if "description" in query_metadata else "" + ) glogger.debug("Read query description: " + description) - method = query_metadata['method'].lower() if 'method' in query_metadata else "get" - if method not in ['get', 'post', 'head', 'put', 'delete', 'options', 'connect']: + method = query_metadata["method"].lower() if "method" in query_metadata else "get" + if method not in ["get", "post", "head", "put", "delete", "options", "connect"]: method = "get" - pagination = query_metadata['pagination'] if 'pagination' in query_metadata else "" + pagination = query_metadata["pagination"] if "pagination" in query_metadata else "" glogger.debug("Read query pagination: " + str(pagination)) - endpoint = query_metadata['endpoint'] if 'endpoint' in query_metadata else "" + endpoint = query_metadata["endpoint"] if "endpoint" in query_metadata else "" glogger.debug("Read query endpoint: " + endpoint) # If this query allows pagination, add page number as parameter @@ -206,7 +244,16 @@ def process_tpf_query_text(query_text, raw_repo_uri, call_name, extraMetadata): if pagination: params.append(pageUtils.getSwaggerPaginationDef(pagination)) - item = packItem('/' + call_name, method, tags, summary, description, params, query_metadata, extraMetadata) + item = packItem( + "/" + call_name, + method, + tags, + summary, + description, + params, + query_metadata, + extraMetadata, + ) return item @@ -220,136 +267,154 @@ def process_sparql_query_text(query_text, loader, call_name, extraMetadata): try: query_metadata = gquery.get_metadata(query_text, endpoint) except Exception as e: - raise Exception('Could not parse query {}: {}'.format(call_name, str(e))) - - tags = query_metadata['tags'] if 'tags' in query_metadata else [] - - summary = query_metadata['summary'] if 'summary' in query_metadata else "" - - description = query_metadata['description'] if 'description' in query_metadata else "" + raise Exception("Could not parse query {}: {}".format(call_name, str(e))) - method = query_metadata['method'].lower() if 'method' in query_metadata else "" - if method not in ['get', 'post', 'head', 'put', 'delete', 'options', 'connect']: - method = "" - - pagination = query_metadata['pagination'] if 'pagination' in query_metadata else "" - - endpoint_in_url = query_metadata['endpoint_in_url'] if 'endpoint_in_url' in query_metadata else True + tags, summary, description, method, pagination, endpoint_in_url = unpack_metadata( + query_metadata + ) # Processing of the parameters params = [] - # PV properties - item_properties = {} - # If this query allows pagination, add page number as parameter if pagination: params.append(pageUtils.getSwaggerPaginationDef(pagination)) - if query_metadata['type'] in ['SelectQuery', 'ConstructQuery', 'InsertData']: - # TODO: do something intelligent with the parameters! - # As per #3, prefetching IRIs via SPARQL and filling enum - parameters = query_metadata['parameters'] - - for _, p in list(parameters.items()): - param = {} - param['name'] = p['name'] - param['type'] = p['type'] - param['required'] = p['required'] - param['in'] = "query" - param['description'] = "A value of type {} that will substitute {} in the original query".format( - p['type'], p['original']) - if 'lang' in p: - param['description'] = "A value of type {}@{} that will substitute {} in the original query".format( - p['type'], p['lang'], p['original']) - if 'format' in p: - param['format'] = p['format'] - param['description'] = "A value of type {} ({}) that will substitute {} in the original query".format( - p['type'], p['format'], p['original']) - if 'enum' in p: - param['enum'] = p['enum'] - if 'default' in p: - param['default'] = p['default'] - - params.append(param) - if endpoint_in_url: - endpoint_param = {} - endpoint_param['name'] = "endpoint" - endpoint_param['type'] = "string" - endpoint_param['in'] = "query" - endpoint_param['description'] = "Alternative endpoint for SPARQL query" - endpoint_param['default'] = endpoint - params.append(endpoint_param) + params.append(pack_endpoint(endpoint)) # If this is a URL generated spec we need to force API calls with the specUrl parameter set if type(loader) is URLLoader: - specUrl_param = {} - specUrl_param['name'] = "specUrl" - specUrl_param['type'] = "string" - specUrl_param['in'] = "query" - specUrl_param['description'] = "URL of the API specification" - specUrl_param['default'] = loader.getRawRepoUri() - params.append(specUrl_param) - - if query_metadata['type'] == 'SelectQuery': - # Fill in the spec for SELECT - if not method: - method = 'get' - for pv in query_metadata['variables']: - item_properties[pv] = { - "name": pv, - "type": "object", - "required": ["type", "value"], - "properties": { - "type": { - "type": "string" - }, - "value": { - "type": "string" - }, - "xml:lang": { - "type": "string" - }, - "datatype": { - "type": "string" - } - } - } - - elif query_metadata['type'] == 'ConstructQuery': - if not method: - method = 'get' - elif query_metadata['type'] == 'InsertData' or query_metadata['type'] == 'Modify': # UPDATE queries should map here - if not method: - method = 'post' - elif query_metadata['type'] == 'UNKNOWN': - glogger.warning("grlc could not parse this query; assuming a plain, non-parametric SELECT in the API spec") - if not method: - method = 'get' + params.append(pack_specURL(loader)) + + # ONLY SELECT CONSTRUTCT AND INSERT CURRENTLY SUPPORTED! + if query_metadata["type"] in ["SelectQuery", "ConstructQuery", "InsertData"]: + for _, p in query_metadata["parameters"].items(): + params.append(build_parameter(p)) + elif query_metadata["type"] == "UNKNOWN": + glogger.warning( + "grlc could not parse this query; assuming a plain, non-parametric SELECT in the API spec" + ) else: # TODO: process all other kinds of queries - glogger.debug('Could not parse query {}: Query of type {} is currently unsupported'.format(call_name, query_metadata['type'])) - raise Exception('Could not parse query {}: Query of type {} is currently unsupported'.format(call_name, query_metadata['type'])) + glogger.debug( + "Could not parse query {}: Query of type {} is currently unsupported".format( + call_name, query_metadata["type"] + ) + ) + raise Exception( + "Could not parse query {}: Query of type {} is currently unsupported".format( + call_name, query_metadata["type"] + ) + ) # Finally: main structure of the callname spec - item = packItem('/' + call_name, method, tags, summary, description, params, query_metadata, extraMetadata) + item = packItem( + "/" + call_name, + method, + tags, + summary, + description, + params, + query_metadata, + extraMetadata, + ) return item -def packItem(call_name, method, tags, summary, description, params, query_metadata, extraMetadata): +def unpack_metadata(query_metadata): + tags = query_metadata["tags"] if "tags" in query_metadata else [] + + summary = query_metadata["summary"] if "summary" in query_metadata else "" + + description = ( + query_metadata["description"] if "description" in query_metadata else "" + ) + + method = query_metadata["method"].lower() if "method" in query_metadata else "" + if method not in ["get", "post", "head", "put", "delete", "options", "connect"]: + if query_metadata["type"] == "InsertData": + method = "post" + else: + method = "get" + + pagination = query_metadata["pagination"] if "pagination" in query_metadata else "" + + endpoint_in_url = ( + query_metadata["endpoint_in_url"] + if "endpoint_in_url" in query_metadata + else True + ) + return tags, summary, description, method, pagination, endpoint_in_url + + +def build_parameter(p): + param = {} + param["name"] = p["name"] + param["type"] = p["type"] + param["required"] = p["required"] + param["in"] = "query" + # TODO: can we simplify the description + param[ + "description" + ] = "A value of type {} that will substitute {} in the original query".format( + p["type"], p["original"] + ) + if "lang" in p: + param[ + "description" + ] = "A value of type {}@{} that will substitute {} in the original query".format( + p["type"], p["lang"], p["original"] + ) + if "format" in p: + param["format"] = p["format"] + param[ + "description" + ] = "A value of type {} ({}) that will substitute {} in the original query".format( + p["type"], p["format"], p["original"] + ) + if "enum" in p: + param["enum"] = p["enum"] + if "default" in p: + param["default"] = p["default"] + return param + + +def pack_endpoint(endpoint): + endpoint_param = {} + endpoint_param["name"] = "endpoint" + endpoint_param["type"] = "string" + endpoint_param["in"] = "query" + endpoint_param["description"] = "Alternative endpoint for SPARQL query" + endpoint_param["default"] = endpoint + return endpoint_param + + +def pack_specURL(loader): + specUrl_param = {} + specUrl_param["name"] = "specUrl" + specUrl_param["type"] = "string" + specUrl_param["in"] = "query" + specUrl_param["description"] = "URL of the API specification" + specUrl_param["default"] = loader.getRawRepoUri() + return specUrl_param + + +def packItem( + call_name, method, tags, summary, description, params, query_metadata, extraMetadata +): """Generate a swagger specification item using all the given parameters.""" item = { - 'call_name': call_name, - 'method': method, - 'tags': tags, - 'summary': summary, - 'description': description, - 'params': params, - 'item_properties': None, - 'query': query_metadata['query'], - 'original_query': query_metadata.get('original_query', query_metadata['query']) + "call_name": call_name, + "method": method, + "tags": tags, + "summary": summary, + "description": description, + "params": params, + "item_properties": None, + "query": query_metadata["query"], + "original_query": query_metadata.get("original_query", query_metadata["query"]), } for extraField in extraMetadata: @@ -358,5 +423,6 @@ def packItem(call_name, method, tags, summary, description, params, query_metada return item + def get_warning_div(warn): return '

{}
'.format(warn) diff --git a/src/util.py b/src/util.py deleted file mode 100644 index e78ee28..0000000 --- a/src/util.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python - -# SPDX-FileCopyrightText: 2022 Albert Meroño, Rinke Hoekstra, Carlos Martínez -# -# SPDX-License-Identifier: MIT - -# util.py: grlc utility functions - -import datetime - -date_handler = lambda obj: ( - obj.isoformat() - if isinstance(obj, datetime.datetime) - or isinstance(obj, datetime.date) - else None -) diff --git a/src/utils.py b/src/utils.py index 37317ea..0d3ed2b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -23,12 +23,22 @@ glogger = glogging.getGrlcLogger(__name__) -def getLoader(user, repo, subdir=None, spec_url=None, sha=None, prov=None, git_type=None, branch='main'): + +def getLoader( + user, + repo, + subdir=None, + spec_url=None, + sha=None, + prov=None, + git_type=None, + branch=None, +): """Build a fileLoader (LocalLoader, GithubLoader, URLLoader) for the given parameters.""" if user is None and repo is None and not spec_url: loader = LocalLoader() elif spec_url: - loader = URLLoader(spec_url) + loader = URLLoader(spec_url) else: if git_type == static.TYPE_GITHUB: glogger.debug("Building GithubLoader....") @@ -41,15 +51,19 @@ def getLoader(user, repo, subdir=None, spec_url=None, sha=None, prov=None, git_t def build_spec(user, repo, subdir=None, sha=None, prov=None, extraMetadata=[]): """Build grlc specification for the given github user / repo. - + Deprecated.""" - glogger.warning("grlc.utils.build_spec is deprecated and will " \ - "be removed in the future. Use grlc.swagger.build_spec instead.") + glogger.warning( + "grlc.utils.build_spec is deprecated and will " + "be removed in the future. Use grlc.swagger.build_spec instead." + ) items, _ = swagger.build_spec(user, repo, subdir, sha, prov, extraMetadata) return items -def build_swagger_spec(user, repo, subdir, spec_url, sha, serverName, git_type, branch='main'): +def build_swagger_spec( + user, repo, subdir, spec_url, sha, serverName, git_type, branch=None +): """Build grlc specification for the given github user / repo in swagger format.""" if user and repo: # Init provenance recording @@ -58,76 +72,257 @@ def build_swagger_spec(user, repo, subdir, spec_url, sha, serverName, git_type, prov_g = None swag = swagger.get_blank_spec() - swag['host'] = serverName + swag["host"] = serverName try: loader = getLoader(user, repo, subdir, spec_url, sha, prov_g, git_type, branch) except Exception as e: # If repo does not exits - swag['info'] = { - 'title': 'ERROR!', - 'description': str(e) - } - swag['paths'] = {} + swag["info"] = {"title": "ERROR!", "description": str(e)} + swag["paths"] = {} return swag - prev_commit, next_commit, info, basePath = \ - swagger.get_repo_info(loader, sha, prov_g) - swag['prev_commit'] = prev_commit - swag['next_commit'] = next_commit - swag['info'] = info - swag['basePath'] = basePath + prev_commit, next_commit, info, basePath = swagger.get_repo_info( + loader, sha, prov_g + ) + swag["prev_commit"] = prev_commit + swag["next_commit"] = next_commit + swag["info"] = info + swag["basePath"] = basePath # TODO: can we pass loader to build_spec ? --> Ideally yes! - spec, warnings = swagger.build_spec(user, repo, subdir, spec_url, sha, prov_g, [], git_type, branch) + spec, warnings = swagger.build_spec( + user, repo, subdir, spec_url, sha, prov_g, [], git_type, branch + ) # Use items to build API paths for item in spec: - swag['paths'][item['call_name']] = swagger.get_path_for_item(item) + swag["paths"][item["call_name"]] = swagger.get_path_for_item(item) - # TODO: Add bootstrap style to top level HTML + # TODO: Add bootstrap style to top level HTML # Without a better place to display warnings, we can make them part of the description. - if 'description' not in swag['info'] or swag['info']['description'] is None: - swag['info']['description'] = '' + if "description" not in swag["info"] or swag["info"]["description"] is None: + swag["info"]["description"] = "" for warn in warnings: - swag['info']['description'] += swagger.get_warning_div(warn) + swag["info"]["description"] += swagger.get_warning_div(warn) if prov_g: prov_g.end_prov_graph() - swag['prov'] = prov_g.serialize(format='turtle') + swag["prov"] = prov_g.serialize(format="turtle") return swag -def dispatch_query(user, repo, query_name, subdir=None, spec_url=None, sha=None, - content=None, requestArgs={}, acceptHeader='application/json', - requestUrl='http://', formData={}, method="POST", git_type=None, branch='main'): +def dispatch_query( + user, + repo, + query_name, + subdir=None, + spec_url=None, + sha=None, + content=None, + requestArgs={}, + acceptHeader="application/json", + requestUrl="http://", + formData={}, + method="POST", + git_type=None, + branch=None, +): """Executes the specified SPARQL or TPF query.""" - loader = getLoader(user, repo, subdir, spec_url, sha=sha, prov=None, git_type=git_type, branch=branch) + loader = getLoader( + user, + repo, + subdir, + spec_url, + sha=sha, + prov=None, + git_type=git_type, + branch=branch, + ) query, q_type = loader.getTextForName(query_name) # Call name implemented with SPARQL query - if q_type == qType['SPARQL'] or q_type == qType['JSON']: - resp, status, headers = dispatchSPARQLQuery(query, loader, requestArgs, acceptHeader, content, formData, - requestUrl, method) - - if acceptHeader == 'application/json': + if q_type == qType["SPARQL"] or q_type == qType["JSON"]: + resp, status, headers = dispatchSPARQLQuery( + query, + loader, + requestArgs, + acceptHeader, + content, + formData, + requestUrl, + method, + ) + + if acceptHeader == "application/json": # TODO: transform JSON result if suitable pass return resp, status, headers # Call name implemented with TPF query - elif q_type == qType['TPF']: + elif q_type == qType["TPF"]: resp, status, headers = dispatchTPFQuery(query, loader, acceptHeader, content) return resp, status, headers else: - return "Couldn't find a SPARQL, RDF dump, or TPF query with the requested name", 404, {} + return ( + "Couldn't find a SPARQL, RDF dump, or TPF query with the requested name", + 404, + {}, + ) + + +def _dispatchQueryDump( + raw_sparql_query, endpoint, mime_type, rewritten_query, acceptHeader, content +): + glogger.debug( + "Detected {} MIME type, proceeding with locally loading remote dump".format( + mime_type + ) + ) + + g = Graph() + try: + g.parse(endpoint, format=mime_type) + glogger.debug( + "Local RDF graph loaded successfully with {} triples".format(len(g)) + ) + except Exception as e: + glogger.error(e) + + results = g.query(rewritten_query, result="sparql") + + # Prepare return format as requested + if "application/json" in acceptHeader or ( + content and "application/json" in static.mimetypes[content] + ): + resp = results.serialize(format="json") + code = 200 + glogger.debug( + "Results of SPARQL query against locally loaded dump: {}".format(resp) + ) + elif "text/csv" in acceptHeader or ( + content and "text/csv" in static.mimetypes[content] + ): + resp = results.serialize(format="csv") + code = 200 + glogger.debug( + "Results of SPARQL query against locally loaded dump: {}".format(resp) + ) + else: + resp = "Unacceptable requested format" + code = 415 + headers = {} + glogger.debug("Finished processing query against RDF dump, end of use case") + del g + return resp, code, headers + + +def _dispatchQueryInsert( + method, rewritten_query, formData, acceptHeader, endpoint, auth, headers +): + glogger.debug("Processing INSERT query") + if method != "POST": + glogger.debug("INSERT queries must use POST method") + return {"error": "INSERT queries must use POST method"}, 400, headers + + # Rewrite INSERT + rewritten_query = rewritten_query.replace("?_g_iri", "{}".format(formData.get("g"))) + rewritten_query = rewritten_query.replace("

", formData.get("data")) + glogger.debug("INSERT query rewritten as {}".format(rewritten_query)) + + # Prepare HTTP POST request + reqHeaders = { + "Accept": acceptHeader, + "Content-Type": "application/sparql-update", + } + response = requests.post( + endpoint, data=rewritten_query, headers=reqHeaders, auth=auth + ) + glogger.debug("Response header from endpoint: " + response.headers["Content-Type"]) + + # Response headers + resp = response.text + code = 200 + headers["Content-Type"] = response.headers["Content-Type"] + + return resp, code, headers -def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, content, - formData, requestUrl, method="GET"): +def _dispatchQuerySelect( + acceptHeader, content, rewritten_query, endpoint, auth, headers, endpoint_method +): + reqHeaders = {"Accept": acceptHeader, "Content-Type": "application/sparql-query"} + if content: + reqHeaders = { + "Accept": static.mimetypes[content], + "Content-Type": "application/sparql-query", + } + + glogger.debug("Sending HTTP request to SPARQL endpoint") + glogger.debug("... w/params: {}".format(rewritten_query)) + glogger.debug("... w/headers: {}".format(reqHeaders)) + glogger.debug("... w/auth: {}".format(auth)) + glogger.debug("... via: {}".format(endpoint_method)) + + try: + if endpoint_method == "GET": + data = {"query": rewritten_query} + response = requests.get( + endpoint, params=data, headers=reqHeaders, auth=auth + ) + else: + response = requests.post( + endpoint, data=rewritten_query, headers=reqHeaders, auth=auth + ) + # Response headers + resp = response.text + code = 200 + glogger.debug( + "Response header from endpoint: " + response.headers["Content-Type"] + ) + except Exception as e: + # Error contacting SPARQL endpoint + glogger.debug("Exception encountered while connecting to SPARQL endpoint") + return {"error": str(e)}, 400, headers + + glogger.debug("Got HTTP response from to SPARQL endpoint: {}".format(resp)) + headers["Content-Type"] = response.headers["Content-Type"] + + return resp, code, headers + + +def _dispatchTransformerPostprocess(query_metadata, resp): + if "proto" in query_metadata: + resp = SPARQLTransformer.post_process( + json.loads(resp), query_metadata["proto"], query_metadata["opt"] + ) + else: # case ("transform" in query_metadata and acceptHeader == "application/json") + if "@graph" in query_metadata["transform"]: # SPARQLTransformer for JSON-LD + graph = query_metadata["transform"]["@graph"] + proto = graph[0] if isinstance(graph, list) else graph + rq = query_metadata["transform"] + else: # SPARQLTransformer for standard JSON + proto = query_metadata["transform"] + rq = {"proto": proto} + + _, _, opt = SPARQLTransformer.pre_process(rq) + resp = SPARQLTransformer.post_process(json.loads(resp), proto, opt) + return resp + + +def dispatchSPARQLQuery( + raw_sparql_query, + loader, + requestArgs, + acceptHeader, + content, + formData, + requestUrl, + method="GET", +): """Executes the specified SPARQL query.""" endpoint, auth = gquery.guess_endpoint_uri(raw_sparql_query, loader) - if endpoint == '': - return 'No SPARQL endpoint indicated', 407, {} + if endpoint == "": + return "No SPARQL endpoint indicated", 407, {} glogger.debug("=====================================================") glogger.debug("Sending query to SPARQL endpoint: {}".format(endpoint)) @@ -137,112 +332,84 @@ def dispatchSPARQLQuery(raw_sparql_query, loader, requestArgs, acceptHeader, con query_metadata = gquery.get_metadata(raw_sparql_query, endpoint) except Exception as e: # extracting metadata - return { 'error': str(e) }, 400, {} - - acceptHeader = 'application/json' if isinstance(raw_sparql_query, dict) else acceptHeader - pagination = query_metadata['pagination'] if 'pagination' in query_metadata else "" - - rewritten_query = query_metadata['query'] + return {"error": str(e)}, 400, {} + + acceptHeader = ( + "application/json" if isinstance(raw_sparql_query, dict) else acceptHeader + ) + pagination = query_metadata["pagination"] if "pagination" in query_metadata else "" + endpoint_method = ( + query_metadata["endpoint-method"] + if "endpoint-method" in query_metadata + else "POST" + ) + rewritten_query = query_metadata["query"] # Rewrite query using parameter values - if query_metadata['type'] == 'SelectQuery' or query_metadata['type'] == 'ConstructQuery': - rewritten_query = gquery.rewrite_query(query_metadata['original_query'], query_metadata['parameters'], requestArgs) + if ( + query_metadata["type"] == "SelectQuery" + or query_metadata["type"] == "ConstructQuery" + ): + rewritten_query = gquery.rewrite_query( + query_metadata["original_query"], query_metadata["parameters"], requestArgs + ) # Rewrite query using pagination - if query_metadata['type'] == 'SelectQuery' and 'pagination' in query_metadata: - rewritten_query = gquery.paginate_query(rewritten_query, query_metadata['pagination'], requestArgs) + if query_metadata["type"] == "SelectQuery" and "pagination" in query_metadata: + rewritten_query = gquery.paginate_query( + rewritten_query, query_metadata["pagination"], requestArgs + ) resp = None + code = 0 headers = {} # If we have a mime field, we load the remote dump and query it locally - if 'mime' in query_metadata and query_metadata['mime']: - glogger.debug( - "Detected {} MIME type, proceeding with locally loading remote dump".format(query_metadata['mime'])) - g = Graph() - try: - query_metadata = gquery.get_metadata(raw_sparql_query, endpoint) - g.parse(endpoint, format=query_metadata['mime']) - glogger.debug("Local RDF graph loaded successfully with {} triples".format(len(g))) - except Exception as e: - glogger.error(e) - results = g.query(rewritten_query, result='sparql') - # Prepare return format as requested - resp_string = "" - if 'application/json' in acceptHeader or (content and 'application/json' in static.mimetypes[content]): - resp_string = results.serialize(format='json') - glogger.debug("Results of SPARQL query against locally loaded dump: {}".format(resp_string)) - elif 'text/csv' in acceptHeader or (content and 'text/csv' in static.mimetypes[content]): - resp_string = results.serialize(format='csv') - glogger.debug("Results of SPARQL query against locally loaded dump: {}".format(resp_string)) - else: - return 'Unacceptable requested format', 415, {} - glogger.debug("Finished processing query against RDF dump, end of use case") - del g + if "mime" in query_metadata and query_metadata["mime"]: + resp, code, headers = _dispatchQueryDump( + raw_sparql_query, + endpoint, + query_metadata["mime"], + rewritten_query, + acceptHeader, + content, + ) # Check for INSERT/POST - elif query_metadata['type'] == 'InsertData': - glogger.debug("Processing INSERT query") - if method != 'POST': - glogger.debug('INSERT queries must use POST method') - return { 'error': 'INSERT queries must use POST method' }, 400, headers - - # Rewrite INSERT - rewritten_query = rewritten_query.replace("?_g_iri", "{}".format(formData.get('g'))) - rewritten_query = rewritten_query.replace("

", formData.get('data')) - glogger.debug("INSERT query rewritten as {}".format(rewritten_query)) - - # Prepare HTTP POST request - reqHeaders = {'Accept': acceptHeader, 'Content-Type': 'application/sparql-update'} - response = requests.post(endpoint, data=rewritten_query, headers=reqHeaders, auth=auth) - glogger.debug('Response header from endpoint: ' + response.headers['Content-Type']) - - # Response headers - resp = response.text - headers['Content-Type'] = response.headers['Content-Type'] + elif query_metadata["type"] == "InsertData": + resp, code, headers = _dispatchQueryInsert( + method, rewritten_query, formData, acceptHeader, endpoint, auth, headers + ) # If there's no mime type, the endpoint is an actual SPARQL endpoint else: - reqHeaders = {'Accept': acceptHeader} - if content: - reqHeaders = {'Accept': static.mimetypes[content]} - data = {'query': rewritten_query} - - glogger.debug('Sending HTTP request to SPARQL endpoint with params: {}'.format(data)) - glogger.debug('Sending HTTP request to SPARQL endpoint with headers: {}'.format(reqHeaders)) - glogger.debug('Sending HTTP request to SPARQL endpoint with auth: {}'.format(auth)) - try: - response = requests.get(endpoint, params=data, headers=reqHeaders, auth=auth) - except Exception as e: - # Error contacting SPARQL endpoint - glogger.debug('Exception encountered while connecting to SPARQL endpoint') - return { 'error': str(e) }, 400, headers - glogger.debug('Response header from endpoint: ' + response.headers['Content-Type']) - - # Response headers - resp = response.text - - glogger.debug('Got HTTP response from to SPARQL endpoint: {}'.format(resp)) - headers['Content-Type'] = response.headers['Content-Type'] + resp, code, headers = _dispatchQuerySelect( + acceptHeader, + content, + rewritten_query, + endpoint, + auth, + headers, + endpoint_method, + ) # If the query is paginated, set link HTTP headers if pagination: # Get number of total results count = gquery.count_query_results(rewritten_query, endpoint) - pageArg = requestArgs.get('page', None) - headerLink = pageUtils.buildPaginationHeader(count, pagination, pageArg, requestUrl) - headers['Link'] = headerLink + pageArg = requestArgs.get("page", None) + headerLink = pageUtils.buildPaginationHeader( + count, pagination, pageArg, requestUrl + ) + headers["Link"] = headerLink - if 'proto' in query_metadata: # sparql transformer - resp = SPARQLTransformer.post_process(json.loads(resp), query_metadata['proto'], query_metadata['opt']) + if "proto" in query_metadata or ( + "transform" in query_metadata and acceptHeader == "application/json" + ): + resp = _dispatchTransformerPostprocess(query_metadata, resp) - if 'transform' in query_metadata and acceptHeader == 'application/json': # sparql transformer - rq = { 'proto': query_metadata['transform'] } - _, _, opt = SPARQLTransformer.pre_process(rq) - resp = SPARQLTransformer.post_process(json.loads(resp), query_metadata['transform'], opt) - - headers['Server'] = 'grlc/' + grlc_version - return resp, 200, headers + headers["Server"] = "grlc/" + grlc_version + return resp, code, headers def dispatchTPFQuery(raw_tpf_query, loader, acceptHeader, content): @@ -255,21 +422,27 @@ def dispatchTPFQuery(raw_tpf_query, loader, acceptHeader, content): # TODO: pagination for TPF # Preapre HTTP request - reqHeaders = {'Accept': acceptHeader, 'Authorization': 'token {}'.format(static.ACCESS_TOKEN)} + reqHeaders = { + "Accept": acceptHeader, + "Authorization": "token {}".format(static.SPARQL_ACCESS_TOKEN), + } if content: - reqHeaders = {'Accept': static.mimetypes[content], 'Authorization': 'token {}'.format(static.ACCESS_TOKEN)} - tpf_list = re.split('\n|=', raw_tpf_query) - subject = tpf_list[tpf_list.index('subject') + 1] - predicate = tpf_list[tpf_list.index('predicate') + 1] - object = tpf_list[tpf_list.index('object') + 1] - data = {'subject': subject, 'predicate': predicate, 'object': object} + reqHeaders = { + "Accept": static.mimetypes[content], + "Authorization": "token {}".format(static.SPARQL_ACCESS_TOKEN), + } + tpf_list = re.split("\n|=", raw_tpf_query) + subject = tpf_list[tpf_list.index("subject") + 1] + predicate = tpf_list[tpf_list.index("predicate") + 1] + object = tpf_list[tpf_list.index("object") + 1] + data = {"subject": subject, "predicate": predicate, "object": object} response = requests.get(endpoint, params=data, headers=reqHeaders, auth=auth) - glogger.debug('Response header from endpoint: ' + response.headers['Content-Type']) + glogger.debug("Response header from endpoint: " + response.headers["Content-Type"]) # Response headers resp = response.text headers = {} - headers['Content-Type'] = response.headers['Content-Type'] - headers['Server'] = 'grlc/' + grlc_version + headers["Content-Type"] = response.headers["Content-Type"] + headers["Server"] = "grlc/" + grlc_version return resp, 200, headers diff --git a/tests/mock_data.py b/tests/mock_data.py index 28f241d..f42c7e5 100644 --- a/tests/mock_data.py +++ b/tests/mock_data.py @@ -8,74 +8,127 @@ from collections import namedtuple from grlc.fileLoaders import LocalLoader +from grlc import static -base_url = path.join('tests', 'repo') -def buildEntry(entryName): - entryName = entryName.replace(base_url, '') +import base64 + +static.GITHUB_ACCESS_TOKEN = ( + "fake-token" # Manually overwrite access token to avoid empty token +) + +base_url = path.join("tests", "repo") + + +def buildGHEntry(entryName): + entryName = entryName.replace(base_url, "") # Named tuple containing properties of mocked github ContentFile - MockGithubContentFile = namedtuple('MockGithubContentFile', 'download_url name path type decoded_content') + MockGithubContentFile = namedtuple( + "MockGithubContentFile", "download_url name path type decoded_content" + ) return MockGithubContentFile( - download_url = entryName, - name = entryName, - path = entryName, - type = u'file', - decoded_content = 'FAKE FILE CONTENT'.encode() # Because Github ContentFile object contains bytes. + download_url=entryName, + name=entryName, + path=entryName, + type="file", + decoded_content="FAKE FILE CONTENT".encode(), # Because Github ContentFile object contains bytes. ) -mock_files = [ buildEntry(f) for f in glob(path.join(base_url, '*')) ] + + +def buildGLEntry(entryName): + entryName = entryName.replace(base_url, "") + + return {"type": "blob", "name": entryName} + + +mock_gh_files = [buildGHEntry(f) for f in glob(path.join(base_url, "*"))] +mock_gl_files = [buildGLEntry(f) for f in glob(path.join(base_url, "*"))] + class MockGithubRepo: def get_contents(self, filename, ref=None): if filename == "": - return mock_files + return mock_gh_files else: - for f in mock_files: - if filename in f.name: # filenames contain extra / + for f in mock_gh_files: + if filename in f.name: # filenames contain extra / return f return None -class MockGitlabRepo: - pass +class MockGitlabModule: + def __init__(self) -> None: + gl_repo = Mock() + + gl_repo.repository_tree = Mock(return_value=mock_gl_files) + gl_repo.files.get.side_effect = self.gl_files_content + gl_repo.default_branch = "main" + self.projects = Mock() + self.projects.get.return_value = gl_repo + + def gl_files_content(self, file_path, ref): + """Returns none if the file is not in the known repo""" + for glf in mock_gl_files: + if file_path in glf["name"]: # filenames contain extra / + f = Mock() + f_content = "The text of a file" + f.content = base64.b64encode(f_content.encode("utf-8")) + return f + return None def mock_requestsUrl(url, headers={}, params={}): - url = url.replace('http://example.org/', 'tests/repo/') - f = open(url, 'r') + url = url.replace("http://example.org/", "tests/repo/") + f = open(url, "r") lines = f.readlines() - text = ''.join(lines) + text = "".join(lines) return_value = Mock(status_code=200) return_value.text = text return return_value + mock_simpleSparqlResponse = { - "head": { "link": [], "vars": ["p", "o"] }, + "head": {"link": [], "vars": ["p", "o"]}, "results": { "bindings": [ - { "p": { "type": "string", "value": "p1" } , "o": { "type": "string", "value": "o1" }}, - { "p": { "type": "string", "value": "p2" } , "o": { "type": "string", "value": "o2" }}, - { "p": { "type": "string", "value": "p3" } , "o": { "type": "string", "value": "o3" }}, - { "p": { "type": "string", "value": "p4" } , "o": { "type": "string", "value": "o4" }}, - { "p": { "type": "string", "value": "p5" } , "o": { "type": "string", "value": "o5" }} + { + "p": {"type": "string", "value": "p1"}, + "o": {"type": "string", "value": "o1"}, + }, + { + "p": {"type": "string", "value": "p2"}, + "o": {"type": "string", "value": "o2"}, + }, + { + "p": {"type": "string", "value": "p3"}, + "o": {"type": "string", "value": "o3"}, + }, + { + "p": {"type": "string", "value": "p4"}, + "o": {"type": "string", "value": "o4"}, + }, + { + "p": {"type": "string", "value": "p5"}, + "o": {"type": "string", "value": "o5"}, + }, ] - } + }, } + def mock_process_sparql_query_text(query_text, raw_repo_uri, call_name, extraMetadata): - mockItem = { - "status": "This is a mock item", - "call_name": call_name - } + mockItem = {"status": "This is a mock item", "call_name": call_name} return mockItem + filesInRepo = [ { - u'name': u'fakeFile1.rq', - u'download_url': u'https://example.org/path/to/fakeFile.rq', - u'decoded_content': 'CONTENT ?'.encode() # Because Github ContentFile object contains bytes. + "name": "fakeFile1.rq", + "download_url": "https://example.org/path/to/fakeFile.rq", + "decoded_content": "CONTENT ?".encode(), # Because Github ContentFile object contains bytes. } ] -mockLoader = LocalLoader(base_url) \ No newline at end of file +mockLoader = LocalLoader(base_url) diff --git a/tests/repo/test-endpoint-get.rq b/tests/repo/test-endpoint-get.rq new file mode 100644 index 0000000..1a9416b --- /dev/null +++ b/tests/repo/test-endpoint-get.rq @@ -0,0 +1,12 @@ +#+ summary: Sample query for testing SPARQL endpoint method +#+ endpoint: "http://test-endpoint/transform/sparql/" +#+ transform: { +#+ "key": "?p", +#+ "value": "?o", +#+ "$anchor": "key" +#+ } +#+ endpoint-method: GET + +select ?p ?o where { + ?_id_iri ?p ?o +} LIMIT 5 diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index 126608f..2e31ab1 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -7,191 +7,210 @@ from tests.mock_data import mockLoader, mock_requestsUrl from grlc.server import app -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def client(request): - '''Build http client''' + """Build http client""" with app.test_client() as client: yield client + class TestGrlcHome: - '''Test all grlc server endpoints.''' + """Test all grlc server endpoints.""" def test_home(self, client): """Testing get from grlc home page""" - rv = client.get('/') + rv = client.get("/") assert rv.status_code == 200 - assert 'text/html' in rv.content_type - body = str(object=rv.data, encoding=rv.charset, errors='strict') - assert 'grlc' in body - assert 'grlc generates RESTful APIs using SPARQL queries stored in GitHub repositories' in body + assert "text/html" in rv.content_type + body = str(object=rv.data, errors="strict") + assert "grlc" in body + assert ( + "grlc generates RESTful APIs using SPARQL queries stored in GitHub repositories" + in body + ) + class TestGrlcFrontEnd: - '''Test all grlc api front end generation (swagger html page).''' + """Test all grlc api front end generation (swagger html page).""" def validate(self, response): assert response.status_code == 200 - assert 'text/html' in response.content_type - body = str(object=response.data, encoding=response.charset, errors='strict') + assert "text/html" in response.content_type + body = str(object=response.data, errors="strict") assert '

' in body def test_repo(self, client): """...""" - rv = client.get('/api-git/testuser/testrepo') + rv = client.get("/api-git/testuser/testrepo") self.validate(rv) def test_subdir(self, client): """...""" - rv = client.get('/api-git/testuser/testrepo/subdir/') + rv = client.get("/api-git/testuser/testrepo/subdir/") self.validate(rv) def test_commit(self, client): """...""" - rv = client.get('/api-git/testuser/testrepo/commit/') + rv = client.get("/api-git/testuser/testrepo/commit/") self.validate(rv) def test_subdir_commit(self, client): """...""" - rv = client.get('/api-git/testuser/testrepo/subdir//commit/') + rv = client.get("/api-git/testuser/testrepo/subdir//commit/") self.validate(rv) def test_local(self, client): """...""" - rv = client.get('/api-local/') + rv = client.get("/api-local/") self.validate(rv) def test_url(self, client): """...""" - rv = client.get('/api-url/?specUrl=') + rv = client.get("/api-url/?specUrl=") self.validate(rv) + class TestGrlcSpec: - '''Test all grlc api spec generation.''' + """Test all grlc api spec generation.""" def validate(self, response): assert response.status_code == 200 - assert 'application/json' in response.content_type + assert "application/json" in response.content_type spec = response.json - assert spec['swagger'] == '2.0' - assert 'paths' in spec - assert spec['info']['title'] != 'ERROR!' + assert spec["swagger"] == "2.0" + assert "paths" in spec + assert spec["info"]["title"] != "ERROR!" - @patch('grlc.utils.getLoader') + @patch("grlc.utils.getLoader") def test_repo(self, mock_loader, client): """...""" mock_loader.return_value = mockLoader - rv = client.get('/api-git/testuser/testrepo/swagger') + rv = client.get("/api-git/testuser/testrepo/swagger") self.validate(rv) - @patch('grlc.utils.getLoader') + @patch("grlc.utils.getLoader") def test_subdir(self, mock_loader, client): """...""" mock_loader.return_value = mockLoader - rv = client.get('/api-git/testuser/testrepo/subdir/testsubdir/swagger') + rv = client.get("/api-git/testuser/testrepo/subdir/testsubdir/swagger") self.validate(rv) - @patch('grlc.utils.getLoader') + @patch("grlc.utils.getLoader") def test_commit(self, mock_loader, client): """...""" mock_loader.return_value = mockLoader - rv = client.get('/api-git/testuser/testrepo/commit/local/swagger') + rv = client.get("/api-git/testuser/testrepo/commit/local/swagger") self.validate(rv) - @patch('grlc.utils.getLoader') + @patch("grlc.utils.getLoader") def test_subdir_commit(self, mock_loader, client): """...""" mock_loader.return_value = mockLoader - rv = client.get('/api-git/testuser/testrepo/subdir/testsubdir/commit/local/swagger') + rv = client.get( + "/api-git/testuser/testrepo/subdir/testsubdir/commit/local/swagger" + ) self.validate(rv) def test_local(self, client): """...""" - rv = client.get('/api-local/swagger') + rv = client.get("/api-local/swagger") self.validate(rv) - @patch('requests.get', side_effect=mock_requestsUrl) + @patch("requests.get", side_effect=mock_requestsUrl) def test_url(self, mock_get, client): """...""" - rv = client.get('/api-url/swagger?specUrl=http://example.org/url.yml') + rv = client.get("/api-url/swagger?specUrl=http://example.org/url.yml") self.validate(rv) + class TestGrlcExec: - '''Test all grlc api execution endpoints.''' + """Test all grlc api execution endpoints.""" @classmethod def setup_class(self): - query_response = [{ "result": "mock" }] + query_response = [{"result": "mock"}] status = 200 - headers = { 'Content-Type': 'application/json' } + headers = {"Content-Type": "application/json"} self.mock_response = query_response, status, headers def validate(self, response): assert response.status_code == 200 - assert 'application/json' in response.content_type + assert "application/json" in response.content_type assert len(response.json) > 0 - assert 'result' in response.json[0] - assert response.json[0]['result'] == 'mock' + assert "result" in response.json[0] + assert response.json[0]["result"] == "mock" - @patch('grlc.utils.getLoader') - @patch('grlc.utils.dispatch_query') + @patch("grlc.utils.getLoader") + @patch("grlc.utils.dispatch_query") def test_repo(self, mock_dispatch, mock_loader, client): """...""" mock_dispatch.return_value = self.mock_response - rv = client.get('/api-git/testuser/testrepo/query_name', - headers={'Accept': 'application/json'}) + rv = client.get( + "/api-git/testuser/testrepo/query_name", + headers={"Accept": "application/json"}, + ) self.validate(rv) - @patch('grlc.utils.getLoader') - @patch('grlc.utils.dispatch_query') + @patch("grlc.utils.getLoader") + @patch("grlc.utils.dispatch_query") def test_subdir(self, mock_dispatch, mock_loader, client): """...""" mock_dispatch.return_value = self.mock_response # Check types of data passed to make_response. - # If jsonify(dict) fixes the issue, patch make_response to jsonify(query_response) before + # If jsonify(dict) fixes the issue, patch make_response to jsonify(query_response) before # returning data to rv. - rv = client.get('/api-git/testuser/testrepo/subdir/testsubdir/query_name', - headers={'accept': 'application/json'}) + rv = client.get( + "/api-git/testuser/testrepo/subdir/testsubdir/query_name", + headers={"accept": "application/json"}, + ) self.validate(rv) - @patch('grlc.utils.getLoader') - @patch('grlc.utils.dispatch_query') + @patch("grlc.utils.getLoader") + @patch("grlc.utils.dispatch_query") def test_commit(self, mock_dispatch, mock_loader, client): """...""" mock_dispatch.return_value = self.mock_response - rv = client.get('/api-git/testuser/testrepo/commit/local/query_name', - headers={'accept': 'application/json'}) + rv = client.get( + "/api-git/testuser/testrepo/commit/local/query_name", + headers={"accept": "application/json"}, + ) self.validate(rv) - @patch('grlc.utils.getLoader') - @patch('grlc.utils.dispatch_query') + @patch("grlc.utils.getLoader") + @patch("grlc.utils.dispatch_query") def test_subdir_commit(self, mock_dispatch, mock_loader, client): """...""" mock_dispatch.return_value = self.mock_response - rv = client.get('/api-git/testuser/testrepo/subdir/testsubdir/commit/local/query_name', - headers={'accept': 'application/json'}) + rv = client.get( + "/api-git/testuser/testrepo/subdir/testsubdir/commit/local/query_name", + headers={"accept": "application/json"}, + ) self.validate(rv) - @patch('grlc.utils.dispatch_query') + @patch("grlc.utils.dispatch_query") def test_local(self, mock_dispatch, client): """...""" mock_dispatch.return_value = self.mock_response - rv = client.get('/api-local/query_name', - headers={'accept': 'application/json'}) + rv = client.get("/api-local/query_name", headers={"accept": "application/json"}) self.validate(rv) - @patch('requests.get', side_effect=mock_requestsUrl) - @patch('grlc.utils.dispatch_query') + @patch("requests.get", side_effect=mock_requestsUrl) + @patch("grlc.utils.dispatch_query") def test_url(self, mock_dispatch, mock_get, client): """...""" mock_dispatch.return_value = self.mock_response - rv = client.get('/api-url/?specUrl=http://example.org/url.yml', - headers={'accept': 'application/json'}) + rv = client.get( + "/api-url/?specUrl=http://example.org/url.yml", + headers={"accept": "application/json"}, + ) self.validate(rv) diff --git a/tests/test_gquery.py b/tests/test_gquery.py index 49f86fb..d22a9fd 100644 --- a/tests/test_gquery.py +++ b/tests/test_gquery.py @@ -18,182 +18,198 @@ class TestGQuery(unittest.TestCase): @classmethod def setUpClass(self): self.loader = mockLoader - self.app = Flask('unittests') + self.app = Flask("unittests") def test_guess_endpoint(self): - with self.app.test_request_context('/?endpoint=http://url-endpoint/from-url/sparql'): - endpoint, _ = gquery.guess_endpoint_uri('', self.loader) - self.assertIn('from-url', endpoint, - 'Should match endpoint given in url') - - with self.app.test_request_context('/'): - endpoint, _ = gquery.guess_endpoint_uri('', self.loader) - self.assertIn('from-file', endpoint, - 'Should match endpoint in endpoint.txt') + with self.app.test_request_context( + "/?endpoint=http://url-endpoint/from-url/sparql" + ): + endpoint, _ = gquery.guess_endpoint_uri("", self.loader) + self.assertIn("from-url", endpoint, "Should match endpoint given in url") + + with self.app.test_request_context("/"): + endpoint, _ = gquery.guess_endpoint_uri("", self.loader) + self.assertIn( + "from-file", endpoint, "Should match endpoint in endpoint.txt" + ) - rq, _ = self.loader.getTextForName('test-rq') + rq, _ = self.loader.getTextForName("test-rq") endpoint, _ = gquery.guess_endpoint_uri(rq, self.loader) - self.assertIn('from-decorator', endpoint, - 'Should match endpoint in test-rq.rq') + self.assertIn( + "from-decorator", endpoint, "Should match endpoint in test-rq.rq" + ) def test_get_parameters(self): - rq, _ = self.loader.getTextForName('test-rq') + rq, _ = self.loader.getTextForName("test-rq") + + params = gquery.get_parameters(rq, "", {}) - params = gquery.get_parameters(rq, '', '', {}) + self.assertGreaterEqual(len(params), 7, "Should find some parameters") for paramName, param in params.items(): - self.assertIn('name', param, 'Should have a name') - self.assertIn('type', param, 'Should have a type') - self.assertIn('required', param, 'Should have a required') - - orig = param['original'] - if '_iri' in orig: - self.assertEqual(param['type'], 'string', 'Should be type string') - self.assertEqual(param['format'], 'iri', 'Should be format iri') - if '_number' in orig: - self.assertEqual(param['type'], 'number', - 'Should be type number') - if '_literal' in orig: - self.assertEqual(param['type'], 'literal', - 'Should be type literal') - if '_en' in orig: - self.assertEqual(param['type'], 'literal', - 'Should be type literal') - self.assertEqual(param['lang'], 'en', 'Should be en language') - if '_integer' in orig: + self.assertIn("name", param, "Should have a name") + self.assertIn("type", param, "Should have a type") + self.assertIn("required", param, "Should have a required") + + orig = param["original"] + if "_iri" in orig: + self.assertEqual(param["type"], "string", "Should be type string") + self.assertEqual(param["format"], "iri", "Should be format iri") + if "_number" in orig: + self.assertEqual(param["type"], "number", "Should be type number") + if "_literal" in orig: + self.assertEqual(param["type"], "literal", "Should be type literal") + if "_en" in orig: + self.assertEqual(param["type"], "string", "Should be type literal") + self.assertEqual(param["lang"], "en", "Should be en language") + if "_integer" in orig: self.assertEqual( - param['datatype'], 'xsd:integer', 'Should be type xsd:integer') - if '_xsd_date' in orig: - self.assertEqual(param['datatype'], - 'xsd:date', 'Should be type xsd:date') - - @patch('requests.get') + param["datatype"], "xsd:integer", "Should be type xsd:integer" + ) + if "_xsd_date" in orig: + self.assertEqual( + param["datatype"], "xsd:date", "Should be type xsd:date" + ) + + self.assertEqual(params["o1"]["type"], "string", "o1 should be a string") + self.assertEqual(params["o2"]["format"], "iri", "o2 should be format iri") + self.assertEqual(params["o3"]["type"], "number", "o3 should be a number") + self.assertEqual(params["o4"]["type"], "literal", "o4 should be a literal") + self.assertEqual(params["o5"]["lang"], "en", "o5 should be a English") + self.assertEqual( + params["o6"]["datatype"], "xsd:integer", "o6 should be a integer" + ) + self.assertEqual(params["o7"]["datatype"], "xsd:date", "o7 should be a date") + + @patch("requests.get") def test_get_enumeration(self, mock_get): mock_get.return_value = Mock(ok=True) mock_get.return_value.json.return_value = { - 'results': { - 'bindings': [ - {'o1': {'value': 'v1'}}, - {'o1': {'value': 'v2'}} - ] - } + "results": {"bindings": [{"o1": {"value": "v1"}}, {"o1": {"value": "v2"}}]} } - rq, _ = self.loader.getTextForName('test-rq') - metadata = {'enumerate': 'o1'} - enumeration = gquery.get_enumeration(rq, 'o1', 'http://mock-endpoint/sparql', metadata) - self.assertIsInstance(enumeration, list, 'Should return a list of values') - self.assertEqual(len(enumeration), 2, 'Should have two elements') + rq, _ = self.loader.getTextForName("test-rq") + metadata = {"enumerate": "o1"} + enumeration = gquery.get_enumeration( + rq, "o1", "http://mock-endpoint/sparql", metadata + ) + self.assertIsInstance(enumeration, list, "Should return a list of values") + self.assertEqual(len(enumeration), 2, "Should have two elements") def test_get_static_enumeration(self): - rq, _ = self.loader.getTextForName('test-enum') + rq, _ = self.loader.getTextForName("test-enum") metadata = gquery.get_yaml_decorators(rq) - self.assertIn('enumerate', metadata, 'Should contain enumerate') + self.assertIn("enumerate", metadata, "Should contain enumerate") - enumeration = gquery.get_enumeration(rq, 'o', 'http://mock-endpoint/sparql', metadata) - self.assertIsInstance(enumeration, list, 'Should return a list of values') - self.assertEqual(len(enumeration), 2, 'Should have two elements') + enumeration = gquery.get_enumeration( + rq, "o", "http://mock-endpoint/sparql", metadata + ) + self.assertIsInstance(enumeration, list, "Should return a list of values") + self.assertEqual(len(enumeration), 2, "Should have two elements") def test_get_yaml_decorators(self): - rq, _ = self.loader.getTextForName('test-sparql') + rq, _ = self.loader.getTextForName("test-sparql") decorators = gquery.get_yaml_decorators(rq) # Query always exist -- the rest must be present on the file. - self.assertIn('query', decorators, 'Should have a query field') - self.assertIn('summary', decorators, 'Should have a summary field') - self.assertIn('pagination', decorators, - 'Should have a pagination field') - self.assertIn('enumerate', decorators, 'Should have a enumerate field') + self.assertIn("query", decorators, "Should have a query field") + self.assertIn("summary", decorators, "Should have a summary field") + self.assertIn("pagination", decorators, "Should have a pagination field") + self.assertIn("enumerate", decorators, "Should have a enumerate field") self.assertIsInstance( - decorators['summary'], six.string_types, 'Summary should be text') + decorators["summary"], six.string_types, "Summary should be text" + ) self.assertIsInstance( - decorators['pagination'], int, 'Pagination should be numeric') + decorators["pagination"], int, "Pagination should be numeric" + ) self.assertIsInstance( - decorators['enumerate'], list, 'Enumerate should be a list') + decorators["enumerate"], list, "Enumerate should be a list" + ) def test_get_json_decorators(self): - rq, _ = self.loader.getTextForName('test-sparql-jsonconf') + rq, _ = self.loader.getTextForName("test-sparql-jsonconf") decorators = gquery.get_yaml_decorators(rq) # Query always exist -- the rest must be present on the file. - self.assertIn('query', decorators, 'Should have a query field') - self.assertIn('summary', decorators, 'Should have a summary field') - self.assertIn('pagination', decorators, - 'Should have a pagination field') - self.assertIn('enumerate', decorators, 'Should have a enumerate field') + self.assertIn("query", decorators, "Should have a query field") + self.assertIn("summary", decorators, "Should have a summary field") + self.assertIn("pagination", decorators, "Should have a pagination field") + self.assertIn("enumerate", decorators, "Should have a enumerate field") self.assertIsInstance( - decorators['summary'], six.string_types, 'Summary should be text') + decorators["summary"], six.string_types, "Summary should be text" + ) self.assertIsInstance( - decorators['pagination'], int, 'Pagination should be numeric') + decorators["pagination"], int, "Pagination should be numeric" + ) self.assertIsInstance( - decorators['enumerate'], list, 'Enumerate should be a list') + decorators["enumerate"], list, "Enumerate should be a list" + ) def test_get_metadata(self): - rq, _ = self.loader.getTextForName('test-sparql') + rq, _ = self.loader.getTextForName("test-sparql") - metadata = gquery.get_metadata(rq, '') - self.assertIn('type', metadata, 'Should have a type field') - self.assertIn('variables', metadata, 'Should have a variables field') - self.assertEqual(metadata['type'], 'SelectQuery', - 'Should be type SelectQuery') + metadata = gquery.get_metadata(rq, "") + self.assertIn("type", metadata, "Should have a type field") + self.assertIn("variables", metadata, "Should have a variables field") + self.assertEqual(metadata["type"], "SelectQuery", "Should be type SelectQuery") self.assertIsInstance( - metadata['variables'], list, 'Should be a list of variables') - for var in metadata['variables']: - self.assertIsInstance(var, rdflib.term.Variable, - 'Should be of type Variable') + metadata["variables"], list, "Should be a list of variables" + ) + for var in metadata["variables"]: + self.assertIsInstance( + var, rdflib.term.Variable, "Should be of type Variable" + ) def test_paginate_query(self): - rq, _ = self.loader.getTextForName('test-sparql') + rq, _ = self.loader.getTextForName("test-sparql") rq_pag = gquery.paginate_query(rq, 100, {}) + self.assertNotIn("LIMIT", rq, "Original query should not contain LIMIT keyword") + self.assertIn("LIMIT", rq_pag, "Paginated query should contain LIMIT keyword") self.assertNotIn( - 'LIMIT', rq, 'Original query should not contain LIMIT keyword') - self.assertIn('LIMIT', rq_pag, - 'Paginated query should contain LIMIT keyword') - self.assertNotIn( - 'OFFSET', rq, 'Original query should not contain OFFSET keyword') - self.assertIn('OFFSET', rq_pag, - 'Paginated query should contain OFFSET keyword') + "OFFSET", rq, "Original query should not contain OFFSET keyword" + ) + self.assertIn("OFFSET", rq_pag, "Paginated query should contain OFFSET keyword") @staticmethod def build_get_parameter(origName, rwName): """Builds parameter description in the format returned by gquery.get_parameters""" return { - 'original': '?_{}'.format(origName), - 'name': rwName, - 'required': False, - 'enum': [], - 'type': 'literal', - 'datatype': 'xsd:string', - 'lang': 'en', - 'format': None + "original": "?_{}".format(origName), + "name": rwName, + "required": False, + "enum": [], + "type": "literal", + "datatype": "xsd:string", + "lang": "en", + "format": None, } def test_rewrite_query(self): - rq, _ = self.loader.getTextForName('test-rq') + rq, _ = self.loader.getTextForName("test-rq") # Parameters on the format returned by gquery.get_parameters parameters = { - 'o1': self.build_get_parameter('o1', 'x1'), - 'o2': self.build_get_parameter('o2', 'x2'), - 'o3': self.build_get_parameter('o3', 'x3'), - 'o4': self.build_get_parameter('o4', 'x4'), - 'o5': self.build_get_parameter('o5', 'x5'), - 'o6': self.build_get_parameter('o6', 'x6'), - 'o7': self.build_get_parameter('o7', 'x7') + "o1": self.build_get_parameter("o1", "x1"), + "o2": self.build_get_parameter("o2", "x2"), + "o3": self.build_get_parameter("o3", "x3"), + "o4": self.build_get_parameter("o4", "x4"), + "o5": self.build_get_parameter("o5", "x5"), + "o6": self.build_get_parameter("o6", "x6"), + "o7": self.build_get_parameter("o7", "x7"), } args = { - 'o1': 'x1', - 'o2': 'x2', - 'o3': 'x3', - 'o4': 'x4', - 'o5': 'x5', - 'o6': 'x6', - 'o7': 'x7' + "o1": "x1", + "o2": "x2", + "o3": "x3", + "o4": "x4", + "o5": "x5", + "o6": "x6", + "o7": "x7", } # Rewritten query will probably be incorrect because parameters are not # carefully constructed, but that is not the scope of this test @@ -201,13 +217,24 @@ def test_rewrite_query(self): for pName, pValue in parameters.items(): self.assertIn( - pName, rq, 'Original query should contain original parameter name') + pName, rq, "Original query should contain original parameter name" + ) self.assertNotIn( - pName, rq_rw, 'Rewritten query should not contain original parameter name') + pName, + rq_rw, + "Rewritten query should not contain original parameter name", + ) self.assertNotIn( - pValue['name'], rq, 'Original query should not contain replacement parameter value') + pValue["name"], + rq, + "Original query should not contain replacement parameter value", + ) self.assertIn( - pValue['name'], rq_rw, 'Rewritten query should contain replacement parameter value') + pValue["name"], + rq_rw, + "Rewritten query should contain replacement parameter value", + ) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_grlc.py b/tests/test_grlc.py index 5a9fb61..3c2dae9 100644 --- a/tests/test_grlc.py +++ b/tests/test_grlc.py @@ -7,30 +7,38 @@ from tests.mock_data import mock_process_sparql_query_text, filesInRepo + class TestGrlc(unittest.TestCase): - '''Test grlc has been installed''' + """Test grlc has been installed""" + def test_grlc(self): import grlc class TestGrlcLib(unittest.TestCase): - '''Test grlc can be used as a library''' - @patch('github.Github.get_repo') # Corresponding patch object: mockGithubRepo - @patch('grlc.utils.GithubLoader.fetchFiles') # Corresponding patch object: mockLoaderFiles - @patch('grlc.swagger.process_sparql_query_text', side_effect=mock_process_sparql_query_text) + """Test grlc can be used as a library""" + + @patch("github.Github.get_repo") # Corresponding patch object: mockGithubRepo + @patch( + "grlc.utils.GithubLoader.fetchFiles" + ) # Corresponding patch object: mockLoaderFiles + @patch( + "grlc.swagger.process_sparql_query_text", + side_effect=mock_process_sparql_query_text, + ) def test_build_spec(self, mockQueryText, mockLoaderFiles, mockGithubRepo): mockLoaderFiles.return_value = filesInRepo mockGithubRepo.return_value = [] - '''Using grlc as a library''' + """Using grlc as a library""" import grlc.swagger as swagger - user = 'testuser' - repo = 'testrepo' + user = "testuser" + repo = "testrepo" spec, warning = swagger.build_spec(user=user, repo=repo, git_type="github") self.assertEqual(len(spec), len(filesInRepo)) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/tests/test_loaders.py b/tests/test_loaders.py index 81a55b7..07ff4e6 100644 --- a/tests/test_loaders.py +++ b/tests/test_loaders.py @@ -10,16 +10,18 @@ from grlc.fileLoaders import LocalLoader, GithubLoader, GitlabLoader, URLLoader from grlc.queryTypes import qType -from tests.mock_data import MockGithubRepo, MockGitlabRepo, mock_requestsUrl +from tests.mock_data import MockGithubRepo, MockGitlabModule, mock_requestsUrl class TestGithubLoader(unittest.TestCase): @classmethod - @patch('grlc.fileLoaders.Github.get_repo', return_value=MockGithubRepo()) + @patch("grlc.fileLoaders.Github.get_repo", return_value=MockGithubRepo()) def setUpClass(self, mocked_repo): - self.user = 'fakeuser' - self.repo = 'fakerepo' - self.loader = GithubLoader(self.user, self.repo, subdir=None, sha=None, prov=None) + self.user = "fakeuser" + self.repo = "fakerepo" + self.loader = GithubLoader( + self.user, self.repo, subdir=None, sha=None, prov=None + ) def test_fetchFiles(self): files = self.loader.fetchFiles() @@ -27,12 +29,14 @@ def test_fetchFiles(self): # Should return a list of file items self.assertIsInstance(files, list, "Should return a list of file items") - # Should have N files (where N=9) - self.assertEqual(len(files), 9, "Should return correct number of files") + # Should have N files (where N=10) + self.assertEqual(len(files), 10, "Should return correct number of files") # File items should have a download_url for fItem in files: - self.assertIn('download_url', fItem, "File items should have a download_url") + self.assertIn( + "download_url", fItem, "File items should have a download_url" + ) def test_getRawRepoUri(self): repoUri = self.loader.getRawRepoUri() @@ -58,18 +62,24 @@ def test_getTextFor(self): self.assertGreater(len(text), 0, "Should be non-empty") # Should raise exception for invalid file items - with self.assertRaises(Exception, msg="Should raise exception for invalid file items"): + with self.assertRaises( + Exception, msg="Should raise exception for invalid file items" + ): text = self.loader.getTextFor({}) def test_getTextForName(self): testableNames = [ - ('test-rq', qType['SPARQL']), - ('test-sparql', qType['SPARQL']), - ('test-tpf', qType['TPF']) + ("test-rq", qType["SPARQL"]), + ("test-sparql", qType["SPARQL"]), + ("test-tpf", qType["TPF"]), ] for name, expectedType in testableNames: text, actualType = self.loader.getTextForName(name) - self.assertEqual(expectedType, actualType, "Query type should match %s != %s" % (expectedType, actualType)) + self.assertEqual( + expectedType, + actualType, + "Query type should match %s != %s" % (expectedType, actualType), + ) def test_getEndpointText(self): endpoint = self.loader.getEndpointText() @@ -80,28 +90,30 @@ def test_getEndpointText(self): class TestGitlabLoader(unittest.TestCase): @classmethod - # TODO: patch gitlab object? - # TODO: Enable tests (remove x from 'xtest' names) - # @patch('???', return_value=MockGitlabRepo()) + @patch("grlc.fileLoaders.gitlab.Gitlab", return_value=MockGitlabModule()) def setUpClass(self, mocked_repo): - self.user = 'fakeuser' - self.repo = 'fakerepo' - self.loader = GitlabLoader(self.user, self.repo, subdir=None, sha=None, prov=None) + self.user = "fakeuser" + self.repo = "fakerepo" + self.loader = GitlabLoader( + self.user, self.repo, subdir=None, sha=None, prov=None + ) - def xtest_fetchFiles(self): + def test_fetchFiles(self): files = self.loader.fetchFiles() # Should return a list of file items self.assertIsInstance(files, list, "Should return a list of file items") - # Should have N files (where N=9) - self.assertEqual(len(files), 9, "Should return correct number of files") + # Should have N files (where N=10) + self.assertEqual(len(files), 10, "Should return correct number of files") # File items should have a download_url for fItem in files: - self.assertIn('download_url', fItem, "File items should have a download_url") + self.assertIn( + "download_url", fItem, "File items should have a download_url" + ) - def xtest_getRawRepoUri(self): + def test_getRawRepoUri(self): repoUri = self.loader.getRawRepoUri() # Should be a string @@ -111,7 +123,7 @@ def xtest_getRawRepoUri(self): self.assertIn(self.user, repoUri, "Should contain user") self.assertIn(self.repo, repoUri, "Should contain repo") - def xtest_getTextFor(self): + def test_getTextFor(self): files = self.loader.fetchFiles() # the contents of each file @@ -125,20 +137,26 @@ def xtest_getTextFor(self): self.assertGreater(len(text), 0, "Should be non-empty") # Should raise exception for invalid file items - with self.assertRaises(Exception, msg="Should raise exception for invalid file items"): + with self.assertRaises( + Exception, msg="Should raise exception for invalid file items" + ): text = self.loader.getTextFor({}) - def xtest_getTextForName(self): + def test_getTextForName(self): testableNames = [ - ('test-rq', qType['SPARQL']), - ('test-sparql', qType['SPARQL']), - ('test-tpf', qType['TPF']) + ("test-rq", qType["SPARQL"]), + ("test-sparql", qType["SPARQL"]), + ("test-tpf", qType["TPF"]), ] for name, expectedType in testableNames: text, actualType = self.loader.getTextForName(name) - self.assertEqual(expectedType, actualType, "Query type should match %s != %s" % (expectedType, actualType)) + self.assertEqual( + expectedType, + actualType, + "Query type should match %s != %s" % (expectedType, actualType), + ) - def xtest_getEndpointText(self): + def test_getEndpointText(self): endpoint = self.loader.getEndpointText() # Should be some text @@ -148,7 +166,7 @@ def xtest_getEndpointText(self): class TestLocalLoader(unittest.TestCase): @classmethod def setUpClass(self): - self.loader = LocalLoader(path.join('tests', 'repo')) + self.loader = LocalLoader(path.join("tests", "repo")) def test_fetchFiles(self): files = self.loader.fetchFiles() @@ -156,12 +174,14 @@ def test_fetchFiles(self): # Should return a list of file items self.assertIsInstance(files, list, "Should return a list of file items") - # Should have N files (where N=9) - self.assertEqual(len(files), 9, "Should return correct number of files") + # Should have N files (where N=10) + self.assertEqual(len(files), 10, "Should return correct number of files") # File items should have a download_url for fItem in files: - self.assertIn('download_url', fItem, "File items should have a download_url") + self.assertIn( + "download_url", fItem, "File items should have a download_url" + ) def test_getRawRepoUri(self): repoUri = self.loader.getRawRepoUri() @@ -186,18 +206,24 @@ def test_getTextFor(self): self.assertGreater(len(text), 0, "Should be non-empty") # Should raise exception for invalid file items - with self.assertRaises(Exception, msg="Should raise exception for invalid file items"): + with self.assertRaises( + Exception, msg="Should raise exception for invalid file items" + ): text = self.loader.getTextFor({}) def test_getTextForName(self): testableNames = [ - ('test-rq', qType['SPARQL']), - ('test-sparql', qType['SPARQL']), - ('test-tpf', qType['TPF']) + ("test-rq", qType["SPARQL"]), + ("test-sparql", qType["SPARQL"]), + ("test-tpf", qType["TPF"]), ] for name, expectedType in testableNames: text, actualType = self.loader.getTextForName(name) - self.assertEqual(expectedType, actualType, "Query type should match %s != %s" % (expectedType, actualType)) + self.assertEqual( + expectedType, + actualType, + "Query type should match %s != %s" % (expectedType, actualType), + ) def test_getEndpointText(self): endpoint = self.loader.getEndpointText() @@ -209,7 +235,9 @@ def test_getEndpointText(self): class TestURLLoader(unittest.TestCase): @classmethod def setUp(self): - self.patcher = patch('grlc.fileLoaders.requests.get', side_effect=mock_requestsUrl) + self.patcher = patch( + "grlc.fileLoaders.requests.get", side_effect=mock_requestsUrl + ) self.patcher.start() @classmethod @@ -217,9 +245,9 @@ def tearDown(self): self.patcher.stop() @classmethod - @patch('requests.get', side_effect=mock_requestsUrl) + @patch("requests.get", side_effect=mock_requestsUrl) def setUpClass(self, x): - self.specURL = 'http://example.org/url.yml' + self.specURL = "http://example.org/url.yml" self.loader = URLLoader(self.specURL) def test_fetchFiles(self): @@ -233,7 +261,9 @@ def test_fetchFiles(self): # File items should have a download_url for fItem in files: - self.assertIn('download_url', fItem, "File items should have a download_url") + self.assertIn( + "download_url", fItem, "File items should have a download_url" + ) def test_getTextFor(self): files = self.loader.fetchFiles() @@ -249,7 +279,9 @@ def test_getTextFor(self): self.assertGreater(len(text), 0, "Should be non-empty") # Should raise exception for invalid file items - with self.assertRaises(Exception, msg="Should raise exception for invalid file items"): + with self.assertRaises( + Exception, msg="Should raise exception for invalid file items" + ): text = self.loader.getTextFor({}) def test_getRawRepoUri(self): @@ -259,17 +291,23 @@ def test_getRawRepoUri(self): self.assertIsInstance(repoUri, six.string_types, "Should be a string") # Should be the same one we used to create the repo - self.assertIn(self.specURL, repoUri, "Should be the same URL it was initialized with") + self.assertIn( + self.specURL, repoUri, "Should be the same URL it was initialized with" + ) def test_getTextForName(self): testableNames = [ - ('test-rq', qType['SPARQL']), - ('test-sparql', qType['SPARQL']), - ('test-tpf', qType['TPF']) + ("test-rq", qType["SPARQL"]), + ("test-sparql", qType["SPARQL"]), + ("test-tpf", qType["TPF"]), ] for name, expectedType in testableNames: text, actualType = self.loader.getTextForName(name) - self.assertEqual(expectedType, actualType, "Query type should match %s != %s" % (expectedType, actualType)) + self.assertEqual( + expectedType, + actualType, + "Query type should match %s != %s" % (expectedType, actualType), + ) def test_getEndpointText(self): endpoint = self.loader.getEndpointText() @@ -278,5 +316,5 @@ def test_getEndpointText(self): self.assertIsInstance(endpoint, six.string_types, "Should be some text") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_swagger.py b/tests/test_swagger.py index 501b13e..9bea877 100644 --- a/tests/test_swagger.py +++ b/tests/test_swagger.py @@ -7,26 +7,30 @@ import unittest from mock import patch -import grlc.utils # BUG: grlc.swagger will not import without this import first from grlc.swagger import build_spec from tests.mock_data import mock_process_sparql_query_text, filesInRepo class TestSwagger(unittest.TestCase): - @patch('github.Github.get_repo') # Corresponding patch object: mockGithubRepo - @patch('grlc.utils.GithubLoader.fetchFiles') # Corresponding patch object: mockLoaderFiles - @patch('grlc.swagger.process_sparql_query_text', side_effect=mock_process_sparql_query_text) + @patch("github.Github.get_repo") # Corresponding patch object: mockGithubRepo + @patch( + "grlc.utils.GithubLoader.fetchFiles" + ) # Corresponding patch object: mockLoaderFiles + @patch( + "grlc.swagger.process_sparql_query_text", + side_effect=mock_process_sparql_query_text, + ) def test_github(self, mockQueryText, mockLoaderFiles, mockGithubRepo): mockLoaderFiles.return_value = filesInRepo mockGithubRepo.return_value = [] - user = 'testuser' - repo = 'testrepo' + user = "testuser" + repo = "testrepo" spec, warnings = build_spec(user, repo, git_type="github") self.assertEqual(len(spec), len(filesInRepo)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_utils.py b/tests/test_utils.py index c4ca0ae..e82d43e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -10,13 +10,14 @@ from tests.mock_data import mock_simpleSparqlResponse, mockLoader + class TestUtils(unittest.TestCase): @classmethod def setUpClass(self): self.loader = mockLoader - @patch('requests.get') - def test_sparql_transformer(self, mock_get): + @patch("requests.post") + def test_sparql_transformer(self, mock_post): mock_json = { "head": {}, "results": { @@ -24,89 +25,134 @@ def test_sparql_transformer(self, mock_get): { "id": { "type": "uri", - "value": "http://www.w3.org/2001/XMLSchema#anyURI" + "value": "http://www.w3.org/2001/XMLSchema#anyURI", }, "class": { "type": "uri", - "value": "http://www.w3.org/2000/01/rdf-schema#Datatype" + "value": "http://www.w3.org/2000/01/rdf-schema#Datatype", }, "v2": { "type": "literal", "xml:lang": "en", - "value": "xsd:anyURI" - } + "value": "xsd:anyURI", + }, }, { "id": { "type": "uri", - "value": "http://www.w3.org/2001/XMLSchema#boolean" + "value": "http://www.w3.org/2001/XMLSchema#boolean", }, "class": { "type": "uri", - "value": "http://www.w3.org/2000/01/rdf-schema#Datatype" + "value": "http://www.w3.org/2000/01/rdf-schema#Datatype", }, "v2": { "type": "literal", "xml:lang": "en", - "value": "xsd:boolean" - } - }] - } + "value": "xsd:boolean", + }, + }, + ] + }, } - mock_get.return_value = Mock(ok=True) - mock_get.return_value.headers = {'Content-Type': 'application/json'} - mock_get.return_value.text = json.dumps(mock_json) + mock_post.return_value = Mock(ok=True) + mock_post.return_value.headers = {"Content-Type": "application/json"} + mock_post.return_value.text = json.dumps(mock_json) - rq, _ = self.loader.getTextForName('test-json') + rq, _ = self.loader.getTextForName("test-json") - self.assertIn('proto', rq) + self.assertIn("proto", rq) - resp, status, headers = utils.dispatchSPARQLQuery(rq, self.loader, content=None, requestArgs={}, - acceptHeader='application/json', - requestUrl='http://mock-endpoint/sparql', formData={}) + resp, status, headers = utils.dispatchSPARQLQuery( + rq, + self.loader, + content=None, + requestArgs={}, + acceptHeader="application/json", + requestUrl="http://mock-endpoint/sparql", + formData={}, + ) self.assertEqual(status, 200) self.assertIsInstance(resp, list) - self.assertIn('http', resp[0]['id']) + self.assertIn("http", resp[0]["id"]) def validateTestResponse(self, resp): - self.assertIsInstance(resp, list, 'Response should be a list') - self.assertEqual(len(resp), 5, 'Response should have 5 entries') + self.assertIsInstance(resp, list, "Response should be a list") + self.assertEqual(len(resp), 5, "Response should have 5 entries") for item in resp: - self.assertTrue('key' in item, 'Response items should contain a key') - self.assertTrue('value' in item, 'Response items should contain a value') - keys = [ item['key'] for item in resp ] - values = [ item['value'] for item in resp ] - - self.assertTrue(all(k in keys for k in ['p1', 'p2', 'p3', 'p4', 'p5']), 'Response should contain all known keys') - self.assertTrue(all(v in values for v in ['o1', 'o2', 'o3', 'o4', 'o5']), 'Response should contain all known values') - + self.assertTrue("key" in item, "Response items should contain a key") + self.assertTrue("value" in item, "Response items should contain a value") + keys = [item["key"] for item in resp] + values = [item["value"] for item in resp] + + self.assertTrue( + all(k in keys for k in ["p1", "p2", "p3", "p4", "p5"]), + "Response should contain all known keys", + ) + self.assertTrue( + all(v in values for v in ["o1", "o2", "o3", "o4", "o5"]), + "Response should contain all known values", + ) def setMockGetResponse(self): return_value = Mock(ok=True) - return_value.headers = {'Content-Type': 'application/json'} + return_value.headers = {"Content-Type": "application/json"} return_value.text = json.dumps(mock_simpleSparqlResponse) return return_value - - @patch('requests.get') - def test_dispatch_SPARQL_query(self, mock_get): + @patch("requests.post") + def test_dispatch_SPARQL_query(self, mock_post): + mock_post.return_value = self.setMockGetResponse() + + rq, _ = self.loader.getTextForName("test-projection") + resp, status, headers = utils.dispatchSPARQLQuery( + rq, + self.loader, + content=None, + requestArgs={"id": "http://dbpedia.org/resource/Frida_Kahlo"}, + acceptHeader="application/json", + requestUrl="http://mock-endpoint/sparql", + formData={}, + ) + self.validateTestResponse(resp) + self.assertTrue( + mock_post.called, "Should communicate with SPARQL endpoint via POST" + ) + + @patch("requests.get") + def test_dispatch_SPARQL_query_get(self, mock_get): + """Test that communication with SPARQL endpoint goes via GET method + When the endpoint-method decorator is present and set to GET.""" mock_get.return_value = self.setMockGetResponse() - rq, _ = self.loader.getTextForName('test-projection') - resp, status, headers = utils.dispatchSPARQLQuery(rq, self.loader, content=None, requestArgs={'id': 'http://dbpedia.org/resource/Frida_Kahlo'}, - acceptHeader='application/json', - requestUrl='http://mock-endpoint/sparql', formData={}) + rq, _ = self.loader.getTextForName("test-endpoint-get") + resp, status, headers = utils.dispatchSPARQLQuery( + rq, + self.loader, + content=None, + requestArgs={"id": "http://dbpedia.org/resource/Frida_Kahlo"}, + acceptHeader="application/json", + requestUrl="http://mock-endpoint/sparql", + formData={}, + ) self.validateTestResponse(resp) - - - @patch('grlc.utils.getLoader') - @patch('requests.get') - def test_dispatch_query(self, mock_get, mock_loader): - mock_get.return_value = self.setMockGetResponse() + self.assertTrue( + mock_get.called, "Should communicate with SPARQL endpoint via GET" + ) + + @patch("grlc.utils.getLoader") + @patch("requests.post") + def test_dispatch_query(self, mock_post, mock_loader): + mock_post.return_value = self.setMockGetResponse() mock_loader.return_value = self.loader - resp, status, headers = utils.dispatch_query(None, None, 'test-projection', requestArgs={'id': 'http://dbpedia.org/resource/Frida_Kahlo'}) + resp, status, headers = utils.dispatch_query( + None, + None, + "test-projection", + requestArgs={"id": "http://dbpedia.org/resource/Frida_Kahlo"}, + ) self.validateTestResponse(resp) self.assertNotEqual(status, 404)