Skip to content
This repository has been archived by the owner on Sep 6, 2024. It is now read-only.

Commit

Permalink
fixed cleanup being done on a per page basis instead of once for whol…
Browse files Browse the repository at this point in the history
…e hentai ID list
  • Loading branch information
9FS committed Sep 26, 2023
1 parent b5dbbde commit 48db0c5
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions src/get_hentai_ID_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def get_hentai_ID_list(cookies: dict[str, str], headers: dict[str, str], dbx: dr
hentai_ID_list=_get_hentai_ID_list_from_tag_search(cookies, headers, dropbox_config["tag"]) # get hentai ID list by searching by tag, list[str] -> list[int], clean up data

logging.info("Saving hentai ID list in \"downloadme.txt\"...") # save as backup in case something crashes, normal nHentai to PDF downloader could pick up if needed
with open("downloadme.txt", "wt") as h_ID_list_file:
h_ID_list_file.write("\n".join([str(hentai_ID) for hentai_ID in hentai_ID_list]))
with open("downloadme.txt", "wt") as hentai_ID_list_file:
hentai_ID_list_file.write("\n".join([str(hentai_ID) for hentai_ID in hentai_ID_list]))
logging.info("\rSaved hentai ID list in \"downloadme.txt\".")

if len(hentai_ID_list)==0: # if file or user input empty: retry
Expand Down Expand Up @@ -70,8 +70,9 @@ def _get_hentai_ID_list_from_tag_search(cookies: dict[str, str], headers: dict[s
- hentai_ID_list_str: list of hentai ID to download
"""

hentai_ID_list: list[int]=[] # list of hentai ID to download
hentai_ID_new: list[int]
hentai_ID_list: list[int] # list of hentai ID found by searching by tag, cleaned up
hentai_ID_list_str: list[str]=[] # list of hentai ID found by searching by tag
hentai_ID_new: list[str]
NHENTAI_SEARCH_API_URL: str="https://nhentai.net/api/galleries/search"
page_no_current: int=1
page_no_max: int # number of pages a nhentai search by tag would return
Expand All @@ -86,11 +87,13 @@ def _get_hentai_ID_list_from_tag_search(cookies: dict[str, str], headers: dict[s
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as thread_manager:
search_requests=[requests.Request("GET", NHENTAI_SEARCH_API_URL, cookies=cookies, headers=headers, params={"query": tag, "sort": "popular", "page": page_no}).prepare() for page_no in range(1, page_no_max+1)] # prepare beforehand to generate full URL from params
for hentai_ID_new in thread_manager.map(_search_hentai_ID_by_tag, search_requests): # search by tag on all pages
hentai_ID_list+=hentai_ID_new
hentai_ID_list_str+=hentai_ID_new
logging.info(f"\rDownloaded hentai ID from \"{search_requests[page_no_current-1].url}\", page {KFSfstr.notation_abs(page_no_current, 0, round_static=True)}/{KFSfstr.notation_abs(page_no_max, 0, round_static=True)}.")
logging.debug(hentai_ID_new)
logging.debug("")
page_no_current+=1

hentai_ID_list=_convert_hentai_ID_list_str_to_hentai_ID_list_int(hentai_ID_list_str) # list[str] -> list[int], clean up data

return hentai_ID_list

Expand Down Expand Up @@ -142,21 +145,21 @@ def _get_page_no_max_by_tag(search_request: requests.PreparedRequest) -> int:
return page_no_max


def _search_hentai_ID_by_tag(search_request: requests.PreparedRequest) -> list[int]:
def _search_hentai_ID_by_tag(search_request: requests.PreparedRequest) -> list[str]:
"""
Searches for nhentai ID by tag on page page_no.
Arguments:
- search_request: prepared request to nhentai search API
Returns:
- hentai_ID_list: hentai ID found by searching by tag on page page_no
- hentai_ID_list_str: hentai ID found by searching by tag on page page_no
Raises:
- requests.HTTPError: Downloading tag search from \"{NHENTAI_SEARCH_API_URL}\" with params={"query": tag, "sort": "popular", "page": PAGE_NO,} failed multiple times.
"""

hentai_ID_list: list[int]=[] # list of hentai ID found by searching by tag on page page_no
hentai_ID_list_str: list[str]=[] # list of hentai ID found by searching by tag on page page_no
search: dict
search_page: requests.Response

Expand All @@ -175,8 +178,8 @@ def _search_hentai_ID_by_tag(search_request: requests.PreparedRequest) -> list[i
logging.critical(f"Downloading tag search from \"{search_request.url}\" resulted in status code {search_page.status_code}. Have you set \"cookies.json\" and \"headers.json\" correctly?")
raise requests.HTTPError(f"Error in {_search_hentai_ID_by_tag.__name__}{inspect.signature(_search_hentai_ID_by_tag)}: Downloading tag search from \"{search_request.url}\" resulted in status code {search_page.status_code}. Have you set \"cookies.json\" and \"headers.json\" correctly?")
if search_page.status_code==404: # if status code 404 (not found): nhenati API is sus and randomly does not have some search result pages
hentai_ID_list=[] # just return empty list
return hentai_ID_list
hentai_ID_list_str=[] # just return empty list
return hentai_ID_list_str
if search_page.ok==False:
time.sleep(1)
if attempt_no<3: # try 3 times
Expand All @@ -188,9 +191,9 @@ def _search_hentai_ID_by_tag(search_request: requests.PreparedRequest) -> list[i
break


hentai_ID_list=_convert_hentai_ID_list_str_to_hentai_ID_list_int([str(hentai["id"]) for hentai in search["result"]]) # parse all hentai ID, list[str] -> list[int], clean up data
hentai_ID_list_str=[str(hentai["id"]) for hentai in search["result"]] # parse all hentai ID

return hentai_ID_list
return hentai_ID_list_str


def _convert_hentai_ID_list_str_to_hentai_ID_list_int(hentai_ID_list_str: list[str]) -> list[int]:
Expand Down

0 comments on commit 48db0c5

Please sign in to comment.