diff --git a/pyalex/api.py b/pyalex/api.py index 360fc98..2cb3149 100644 --- a/pyalex/api.py +++ b/pyalex/api.py @@ -23,7 +23,7 @@ def __setattr__(self, key, value): config = AlexConfig( email=None, api_key=None, - user_agent="pyalex/" + __version__, + user_agent="pyalex/{__version__}", openalex_url="https://api.openalex.org", max_retries=0, retry_backoff_factor=0.1, @@ -31,12 +31,28 @@ def __setattr__(self, key, value): ) +def _quote_oa_value(v): + """Prepare a value for the OpenAlex API. + + Applies URL encoding to strings and converts booleans to lowercase strings. + """ + + # workaround for bug https://groups.google.com/u/1/g/openalex-users/c/t46RWnzZaXc + if isinstance(v, bool): + return str(v).lower() + + if isinstance(v, str): + return quote_plus(v) + + return v + + def _flatten_kv(d, prefix=""): if isinstance(d, dict): t = [] for k, v in d.items(): if isinstance(v, list): - t.extend([f"{prefix}.{k}:{i}" for i in v]) + t.extend([f"{prefix}.{k}:{_quote_oa_value(i)}" for i in v]) else: new_prefix = f"{prefix}.{k}" if prefix else f"{k}" x = _flatten_kv(v, prefix=new_prefix) @@ -44,10 +60,7 @@ def _flatten_kv(d, prefix=""): return ",".join(t) else: - # workaround for bug https://groups.google.com/u/1/g/openalex-users/c/t46RWnzZaXc - d = str(d).lower() if isinstance(d, bool) else d - - return f"{prefix}:{d}" + return f"{prefix}:{_quote_oa_value(d)}" def _params_merge(params, add_params): @@ -199,10 +212,11 @@ def _get_multi_items(self, record_list): def _full_collection_name(self): if self.params is not None and "q" in self.params.keys(): - base_url = config.openalex_url + "/autocomplete/" - return base_url + self.__class__.__name__.lower() + return ( + f"{config.openalex_url}/autocomplete/{self.__class__.__name__.lower()}" + ) else: - return config.openalex_url + "/" + self.__class__.__name__.lower() + return f"{config.openalex_url}/{self.__class__.__name__.lower()}" def __getattr__(self, key): if key == "groupby": @@ -223,7 +237,7 @@ def __getitem__(self, record_id): return self._get_multi_items(record_id) return self._get_from_url( - self._full_collection_name() + "/" + record_id, return_meta=False + f"{self._full_collection_name()}/{record_id}", return_meta=False ) @property @@ -236,15 +250,14 @@ def url(self): if v is None: pass elif isinstance(v, list): - v_quote = [quote_plus(q) for q in v] - l_params.append(k + "=" + ",".join(v_quote)) + l_params.append("{}={}".format(k, ",".join(map(_quote_oa_value, v)))) elif k in ["filter", "sort"]: - l_params.append(k + "=" + quote_plus(_flatten_kv(v))) + l_params.append(f"{k}={_flatten_kv(v)}") else: - l_params.append(k + "=" + quote_plus(str(v))) + l_params.append(f"{k}={_quote_oa_value(v)}") if l_params: - return self._full_collection_name() + "?" + "&".join(l_params) + return "{}?{}".format(self._full_collection_name(), "&".join(l_params)) return self._full_collection_name() @@ -464,7 +477,7 @@ class autocompletes(BaseOpenAlex): def __getitem__(self, key): return self._get_from_url( - config.openalex_url + "/autocomplete" + "?q=" + key, return_meta=False + f"{config.openalex_url}/autocomplete?q={key}", return_meta=False ) diff --git a/tests/test_pyalex.py b/tests/test_pyalex.py index a6d6076..83e5f6a 100644 --- a/tests/test_pyalex.py +++ b/tests/test_pyalex.py @@ -139,7 +139,7 @@ def test_works_multifilter(): def test_works_url(): - url = "https://api.openalex.org/works?filter=publication_year%3A2020%2Cis_oa%3Atrue" + url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true" assert url == Works().filter(publication_year=2020, is_oa=True).url assert url == Works().filter(publication_year=2020).filter(is_oa=True).url @@ -258,7 +258,7 @@ def test_random_publishers(): def test_and_operator(): # https://github.com/J535D165/pyalex/issues/11 - url = "https://api.openalex.org/works?filter=institutions.country_code%3Atw%2Cinstitutions.country_code%3Ahk%2Cinstitutions.country_code%3Aus%2Cpublication_year%3A2022" + url = "https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022" assert ( url @@ -288,12 +288,12 @@ def test_and_operator(): def test_sample(): - url = "https://api.openalex.org/works?filter=publication_year%3A2020%2Cis_oa%3Atrue&sample=50" + url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true&sample=50" assert url == Works().filter(publication_year=2020, is_oa=True).sample(50).url def test_sample_seed(): - url = "https://api.openalex.org/works?filter=publication_year%3A2020%2Cis_oa%3Atrue&sample=50&seed=535" # noqa + url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true&sample=50&seed=535" # noqa assert ( url == Works().filter(publication_year=2020, is_oa=True).sample(50, seed=535).url @@ -332,3 +332,22 @@ def test_autocomplete(): def test_filter_urlencoding(): assert Works().filter(doi="10.1207/s15327809jls0703&4_2").count() == 1 + assert ( + Works()["https://doi.org/10.1207/s15327809jls0703&4_2"]["id"] + == "https://openalex.org/W4238483711" + ) + + +@pytest.mark.skip("This test is not working due to inconsistencies in the API.") +def test_urlencoding_list(): + assert ( + Works() + .filter( + doi=[ + "https://doi.org/10.1207/s15327809jls0703&4_2", + "https://doi.org/10.1001/jama.264.8.944b", + ] + ) + .count() + == 2 + )