Skip to content

Commit

Permalink
add logs
Browse files Browse the repository at this point in the history
  • Loading branch information
kfstorm committed Feb 18, 2024
1 parent 9c381ba commit 39eefc0
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

DOUBAN_COLLECTION_API_PREFIX = "https://m.douban.com/rexxar/api/v2/subject_collection"

logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
)


def get_http_client():
client = httpx.AsyncClient()
Expand Down Expand Up @@ -45,6 +49,7 @@ async def get_collection_items(client: httpx.AsyncClient, collection_id: str):
if items is not None:
return items

logging.info(f"Fetching collection items for {collection_id}...")
collection_info = await get_json(
client, f"{DOUBAN_COLLECTION_API_PREFIX}/{collection_id}"
)
Expand All @@ -59,6 +64,8 @@ async def get_collection_items(client: httpx.AsyncClient, collection_id: str):
)
items.extend(response["subject_collection_items"])
start += count
await asyncio.sleep(random.uniform(0.0, 0.1))
logging.info(f"Fetched {len(items)} items for {collection_id}.")

cache.set(collection_id, items, expire=3600)
return items
Expand All @@ -82,7 +89,7 @@ async def collection(collection_id: str):


async def convert_item(client: httpx.AsyncClient, item):
imdb_id = await get_imdb_id_from_douban_id(client, item["id"])
imdb_id = await get_imdb_id_from_douban_id(client, item["title"], item["id"])
return {
"douban_id": item["id"],
"douban_url": item["url"],
Expand All @@ -96,21 +103,25 @@ async def convert_item(client: httpx.AsyncClient, item):
IMDB_ID_PATTERN = re.compile(r"IMDb:.*?(\btt\d+\b)")


async def get_imdb_id_from_douban_id(client: httpx.AsyncClient, douban_id: str):
async def get_imdb_id_from_douban_id(
client: httpx.AsyncClient, title: str, douban_id: str
):
cache = IMDB_CACHE
imdb_id = cache.get(douban_id)
if imdb_id is not None:
return imdb_id

await asyncio.sleep(random.uniform(0.0, 1.0))

logging.info(f"Fetching IMDb ID for {title} (douban ID: {douban_id})...")
response = await get_response(
client, f"https://movie.douban.com/subject/{douban_id}/"
)
match = IMDB_ID_PATTERN.search(response.text)
if not match:
raise ValueError(f"IMDb ID not found for douban ID: {douban_id}")
imdb_id = match.group(1)
logging.info(f"IMDb ID for {title} (douban ID: {douban_id}) is {imdb_id}.")

cache.set(douban_id, imdb_id)
return imdb_id

0 comments on commit 39eefc0

Please sign in to comment.