Skip to content

Commit

Permalink
Improve encoding and stability (#43)
Browse files Browse the repository at this point in the history
Extends PR #42
  • Loading branch information
J535D165 authored Jul 1, 2024
1 parent 18a8113 commit 6036385
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 20 deletions.
45 changes: 29 additions & 16 deletions pyalex/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,44 @@ def __setattr__(self, key, value):
config = AlexConfig(
email=None,
api_key=None,
user_agent="pyalex/" + __version__,
user_agent="pyalex/{__version__}",
openalex_url="https://api.openalex.org",
max_retries=0,
retry_backoff_factor=0.1,
retry_http_codes=[429, 500, 503],
)


def _quote_oa_value(v):
"""Prepare a value for the OpenAlex API.
Applies URL encoding to strings and converts booleans to lowercase strings.
"""

# workaround for bug https://groups.google.com/u/1/g/openalex-users/c/t46RWnzZaXc
if isinstance(v, bool):
return str(v).lower()

if isinstance(v, str):
return quote_plus(v)

return v


def _flatten_kv(d, prefix=""):
if isinstance(d, dict):
t = []
for k, v in d.items():
if isinstance(v, list):
t.extend([f"{prefix}.{k}:{i}" for i in v])
t.extend([f"{prefix}.{k}:{_quote_oa_value(i)}" for i in v])
else:
new_prefix = f"{prefix}.{k}" if prefix else f"{k}"
x = _flatten_kv(v, prefix=new_prefix)
t.append(x)

return ",".join(t)
else:
# workaround for bug https://groups.google.com/u/1/g/openalex-users/c/t46RWnzZaXc
d = str(d).lower() if isinstance(d, bool) else d

return f"{prefix}:{d}"
return f"{prefix}:{_quote_oa_value(d)}"


def _params_merge(params, add_params):
Expand Down Expand Up @@ -199,10 +212,11 @@ def _get_multi_items(self, record_list):

def _full_collection_name(self):
if self.params is not None and "q" in self.params.keys():
base_url = config.openalex_url + "/autocomplete/"
return base_url + self.__class__.__name__.lower()
return (
f"{config.openalex_url}/autocomplete/{self.__class__.__name__.lower()}"
)
else:
return config.openalex_url + "/" + self.__class__.__name__.lower()
return f"{config.openalex_url}/{self.__class__.__name__.lower()}"

def __getattr__(self, key):
if key == "groupby":
Expand All @@ -223,7 +237,7 @@ def __getitem__(self, record_id):
return self._get_multi_items(record_id)

return self._get_from_url(
self._full_collection_name() + "/" + record_id, return_meta=False
f"{self._full_collection_name()}/{record_id}", return_meta=False
)

@property
Expand All @@ -236,15 +250,14 @@ def url(self):
if v is None:
pass
elif isinstance(v, list):
v_quote = [quote_plus(q) for q in v]
l_params.append(k + "=" + ",".join(v_quote))
l_params.append("{}={}".format(k, ",".join(map(_quote_oa_value, v))))
elif k in ["filter", "sort"]:
l_params.append(k + "=" + quote_plus(_flatten_kv(v)))
l_params.append(f"{k}={_flatten_kv(v)}")
else:
l_params.append(k + "=" + quote_plus(str(v)))
l_params.append(f"{k}={_quote_oa_value(v)}")

if l_params:
return self._full_collection_name() + "?" + "&".join(l_params)
return "{}?{}".format(self._full_collection_name(), "&".join(l_params))

return self._full_collection_name()

Expand Down Expand Up @@ -464,7 +477,7 @@ class autocompletes(BaseOpenAlex):

def __getitem__(self, key):
return self._get_from_url(
config.openalex_url + "/autocomplete" + "?q=" + key, return_meta=False
f"{config.openalex_url}/autocomplete?q={key}", return_meta=False
)


Expand Down
27 changes: 23 additions & 4 deletions tests/test_pyalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_works_multifilter():


def test_works_url():
url = "https://api.openalex.org/works?filter=publication_year%3A2020%2Cis_oa%3Atrue"
url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true"

assert url == Works().filter(publication_year=2020, is_oa=True).url
assert url == Works().filter(publication_year=2020).filter(is_oa=True).url
Expand Down Expand Up @@ -258,7 +258,7 @@ def test_random_publishers():

def test_and_operator():
# https://github.com/J535D165/pyalex/issues/11
url = "https://api.openalex.org/works?filter=institutions.country_code%3Atw%2Cinstitutions.country_code%3Ahk%2Cinstitutions.country_code%3Aus%2Cpublication_year%3A2022"
url = "https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022"

assert (
url
Expand Down Expand Up @@ -288,12 +288,12 @@ def test_and_operator():


def test_sample():
url = "https://api.openalex.org/works?filter=publication_year%3A2020%2Cis_oa%3Atrue&sample=50"
url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true&sample=50"
assert url == Works().filter(publication_year=2020, is_oa=True).sample(50).url


def test_sample_seed():
url = "https://api.openalex.org/works?filter=publication_year%3A2020%2Cis_oa%3Atrue&sample=50&seed=535" # noqa
url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true&sample=50&seed=535" # noqa
assert (
url
== Works().filter(publication_year=2020, is_oa=True).sample(50, seed=535).url
Expand Down Expand Up @@ -332,3 +332,22 @@ def test_autocomplete():

def test_filter_urlencoding():
assert Works().filter(doi="10.1207/s15327809jls0703&4_2").count() == 1
assert (
Works()["https://doi.org/10.1207/s15327809jls0703&4_2"]["id"]
== "https://openalex.org/W4238483711"
)


@pytest.mark.skip("This test is not working due to inconsistencies in the API.")
def test_urlencoding_list():
assert (
Works()
.filter(
doi=[
"https://doi.org/10.1207/s15327809jls0703&4_2",
"https://doi.org/10.1001/jama.264.8.944b",
]
)
.count()
== 2
)

0 comments on commit 6036385

Please sign in to comment.