add doulist support

kfstorm · Feb 23, 2024 · 9965393 · 9965393
1 parent 7366944
commit 9965393
Show file tree

Hide file tree

Showing 7 changed files with 88 additions and 50 deletions.
diff --git a/README.md b/README.md
@@ -1,26 +1,34 @@
-# Doudarr: 将豆瓣电影榜单转换为Radarr列表
+# Doudarr: 将豆瓣榜单/片单/豆列转换为Radarr列表
 
 ## 介绍
 
-Doudarr是一个将豆瓣电影榜单转换为Radarr列表的工具。它可以将任意豆瓣电影榜单中的电影列表转换为Radarr列表，从而实现自动监控豆瓣电影榜单中的电影，并自动下载。
+Doudarr是一个将豆瓣榜单/片单/豆列（以下统称`豆瓣列表`）转换为Radarr列表的工具。它可以将任意豆瓣列表中的电影列表转换为Radarr列表，从而实现自动监控豆瓣列表中的电影，并自动下载。
 
 ![Cover](res/cover.png)
 
 ## 使用
 
-* 使用Docker部署Doudarr:
+1. 使用Docker部署Doudarr: `docker run -d --name doudarr -p 8000:8000 -v /path/to/cache:/app/cache kfstorm/doudarr:latest`
 
-```bash
-docker run -d --name doudarr -p 8000:8000 -v /path/to/cache:/app/cache kfstorm/doudarr:latest
-```
+2. 访问[http://localhost:8000/collection/movie_weekly_best](http://localhost:8000/collection/movie_weekly_best)，测试是否能够获取到该豆瓣列表中的电影列表。(对应的豆瓣网页为[https://m.douban.com/subject_collection/movie_weekly_best](https://m.douban.com/subject_collection/movie_weekly_best)。)
 
-* 访问[http://localhost:8000/collection/movie_weekly_best](http://localhost:8000/collection/movie_weekly_best)，测试是否能够获取到豆瓣电影榜单中的电影列表。(对应的豆瓣网页为[https://m.douban.com/subject_collection/movie_weekly_best](https://m.douban.com/subject_collection/movie_weekly_best)。)
-* 进入Radarr，在`设置 -> 列表`中新增一个列表，选择`Advanced List`中的`StevenLu Custom`，设置好参数后保存。一些常用参数：
-  * 名称: 可以和豆瓣榜单的名字一样，方便记忆。
-  * 启用自动添加：打开后会自动添加榜单里的电影到库中。（建议打开）
-  * 添加时搜索：打开后在添加电影到库中时会自动开始搜索下载。（建议打开）
-  * URL: URL的格式为`http://<Doudarr服务地址>/collection/<榜单ID>`。请根据需要修改Doudarr服务的地址以及榜单的ID，例如：`http://localhost:8000/collection/movie_weekly_best`。
-* 片刻后，应该能看到Radarr自动添加了榜单中的电影。也可以在Radarr的`电影 -> 发现`中查看。（右上角`选项`里取消勾选`包含Radarr推荐`，右上角`过滤`里选择`全部`。）
+3. 进入Radarr，在`设置 -> 列表`中新增一个列表，选择`Advanced List`中的`StevenLu Custom`，设置好参数后保存。一些常用参数：
+
+* 名称: 可以和豆瓣列表的名字一样，方便记忆。
+* 启用自动添加：打开后会自动添加豆瓣列表里的电影到库中。（建议打开）
+* 添加时搜索：打开后在添加电影到库中时会自动开始搜索下载。（建议打开）
+* URL: 填写完整的Doudarr链接。（请参考[豆瓣列表链接与Doudarr链接的映射关系](#豆瓣列表链接与doudarr链接的映射关系)）
+
+4. 片刻后，应该能看到Radarr自动添加了豆瓣列表中的电影。也可以在Radarr的`电影 -> 发现`中查看。（右上角`选项`里取消勾选`包含Radarr推荐`，右上角`过滤`里选择`全部`。）
+
+## 豆瓣列表链接与Doudarr链接的映射关系
+
+豆瓣列表的链接有两种格式，请根据情况选择对应的Doudarr链接。
+
+| 豆瓣列表链接格式 | Doudarr链接格式 | Doudarr链接示例 |
+| --- | --- | --- |
+| `https://m.douban.com/subject_collection/<豆瓣列表ID>` | `http://<Doudarr服务地址>/collection/<豆瓣列表ID>` | `http://localhost:8000/collection/movie_weekly_best` |
+| `https://www.douban.com/doulist/<豆瓣列表ID>/` | `http://<Doudarr服务地址>/doulist/<豆瓣列表ID>` | `http://localhost:8000/doulist/43556565` |
 
 ## 注意事项
 
@@ -29,11 +37,11 @@ docker run -d --name doudarr -p 8000:8000 -v /path/to/cache:/app/cache kfstorm/d
 
 ## 项目特色
 
-* 支持任意豆瓣电影榜单。
+* 支持任意豆瓣列表。
 * 使用IMDb ID作为电影的唯一标识，不会因为电影名字相近而导致添加错误的电影。
 
 ## FAQ
 
-* 如何获取榜单ID？
+* 如何找到喜欢的豆瓣列表？
 
-在豆瓣手机App中，选择`书影音 -> 电影 -> 豆瓣榜单`，可以浏览所有的电影榜单。选择一个想要监控的榜单，点击进入，然后打开分享菜单，选择`复制链接`，即可获得榜单的URL。榜单URL的格式为`https://m.douban.com/subject_collection/<榜单ID>`，请注意剔除`?`后面的部分（包括`?`）。
+在豆瓣手机App中，依次选择`书影音 -> 电影 -> 豆瓣榜单`，可以浏览所有的榜单/片单（也叫豆列）。选择一个想要监控的豆瓣列表，点击进入，然后打开分享菜单，选择`复制链接`，即可获得该豆瓣列表的URL。
diff --git a/bootstrap.py b/bootstrap.py
@@ -4,6 +4,7 @@
 from collection import CollectionApi
 from imdb import ImdbApi
 from config import app_config
+from utils import get_douban_id
 
 
 COMMON_COLLECTIONS = [
@@ -33,19 +34,18 @@ async def bootstrap(collection_api: CollectionApi, imdb_api: ImdbApi):
             visited_collections.add(collection_id)
 
             try:
-                collection_info = await collection_api.get_collection_info(
-                    collection_id
-                )
-                for related_collection in collection_info["related_charts"]["items"]:
+                info = await collection_api.get_info(collection_id)
+                for related_collection in info["related_charts"]["items"]:
                     related_collection_id = related_collection["id"]
                     if related_collection_id not in visited_collections:
                         all_collections.append(related_collection_id)
 
-                items = await collection_api.get_collection_items(collection_id)
+                items = await collection_api.get_items(collection_id)
                 # Keep only movies
                 items = [item for item in items if item["type"] == "movie"]
                 for item in items:
-                    await imdb_api.get_imdb_id(item)
+                    douban_id = get_douban_id(item)
+                    await imdb_api.get_imdb_id(douban_id, item)
             except Exception as e:
                 logging.error(f"Failed to fetch collection {collection_id}: {e}")
 

diff --git a/collection.py b/collection.py
@@ -8,46 +8,57 @@
 from config import app_config
 
 
-class CollectionApi:
-    def __init__(self):
+class BaseApi:
+    def __init__(self, sub_path: str, cache_name: str, items_key: str):
         self.client = httpx.AsyncClient(
             **get_http_client_args(),
-            base_url="https://m.douban.com/rexxar/api/v2/subject_collection",
+            base_url=f"https://m.douban.com/rexxar/api/v2/{sub_path}",
         )
         del self.client.headers["user-agent"]
-        self.client.headers["Referer"] = "https://m.douban.com/subject_collection"
-        self.cache = Cache(os.path.join(app_config.cache_base_dir, "collection"))
+        self.client.headers["Referer"] = f"https://m.douban.com/{sub_path}"
+        self.cache = Cache(os.path.join(app_config.cache_base_dir, cache_name))
+        self.items_key = items_key
 
     def __exit__(self, exc_type, exc_value, traceback):
         self.client.close()
         self.cache.close()
 
-    async def get_collection_info(self, collection_id: str):
-        return await get_json(self.client, f"/{collection_id}")
+    async def get_info(self, id: str):
+        return await get_json(self.client, f"/{id}")
 
-    async def get_collection_items(self, collection_id: str):
-        items = self.cache.get(collection_id)
+    async def get_items(self, id: str):
+        items = self.cache.get(id)
         if items is not None:
             return items
 
-        logging.info(f"Fetching collection items for {collection_id} ...")
+        logging.info(f"Fetching items for {id} ...")
         total = None
         items = []
         start = 0
         count = 50
         while total is None or start < total:
             response = await get_json(
                 self.client,
-                f"/{collection_id}/items?start={start}&count={count}",
+                f"/{id}/items?start={start}&count={count}",
             )
             if total is None:
                 total = response["total"]
-            items.extend(response["subject_collection_items"])
+            items.extend(response[self.items_key])
             start += count
             await asyncio.sleep(
-                random.uniform(0, app_config.collection_request_delay_max)
+                random.uniform(0, app_config.douban_api_request_delay_max)
             )
-        logging.info(f"Fetched {len(items)} items for {collection_id}.")
+        logging.info(f"Fetched {len(items)} items for {id}.")
 
-        self.cache.set(collection_id, items, expire=app_config.collection_cache_ttl)
+        self.cache.set(id, items, expire=app_config.collection_cache_ttl)
         return items
+
+
+class CollectionApi(BaseApi):
+    def __init__(self):
+        super().__init__("subject_collection", "collection", "subject_collection_items")
+
+
+class DoulistApi(BaseApi):
+    def __init__(self):
+        super().__init__("doulist", "doulist", "items")
diff --git a/config.py b/config.py
@@ -8,7 +8,7 @@ class ImdbApiType(str, Enum):
 
 class AppConfig(BaseSettings):
     cache_base_dir: str = "cache"
-    collection_request_delay_max: float = 1
+    douban_api_request_delay_max: float = 1
     collection_cache_ttl: float = 3600
     imdb_request_delay_max: float = 30
     imdb_api_type: ImdbApiType = ImdbApiType.DOUBAN_HTML

diff --git a/imdb.py b/imdb.py
@@ -19,15 +19,14 @@ def __exit__(self, exc_type, exc_value, traceback):
         self.cache.close()
 
     @abstractmethod
-    async def fetch_imdb_id(self, douban_item: Any):
+    async def fetch_imdb_id(self, douban_id: str, douban_item: Any):
         pass
 
-    async def get_imdb_id(self, douban_item: Any):
-        douban_id = douban_item["id"]
+    async def get_imdb_id(self, douban_id: str, douban_item: Any):
         imdb_id = self.cache.get(douban_id, default="not_cached")
         if imdb_id != "not_cached":
             return imdb_id
-        imdb_id = await self.fetch_imdb_id(douban_item)
+        imdb_id = await self.fetch_imdb_id(douban_id, douban_item)
         if not imdb_id:
             expire = app_config.imdb_cache_ttl_id_not_found
         else:
@@ -50,9 +49,8 @@ def __exit__(self, exc_type, exc_value, traceback):
         self.client.close()
         super().__exit__(exc_type, exc_value, traceback)
 
-    async def fetch_imdb_id(self, douban_item: Any):
+    async def fetch_imdb_id(self, douban_id: str, douban_item: Any):
         title = douban_item["title"]
-        douban_id = douban_item["id"]
 
         await asyncio.sleep(random.uniform(0.0, app_config.imdb_request_delay_max))
 

diff --git a/main.py b/main.py
@@ -5,8 +5,9 @@
 import fastapi
 from bootstrap import bootstrap
 
-from collection import CollectionApi
+from collection import CollectionApi, DoulistApi
 from imdb import get_imdb_api
+from utils import get_douban_id
 
 
 logging.basicConfig(
@@ -15,6 +16,7 @@
 
 app = FastAPI()
 collection_api = CollectionApi()
+doulist_api = DoulistApi()
 imdb_api = get_imdb_api()
 
 asyncio.create_task(bootstrap(collection_api, imdb_api))
@@ -26,9 +28,20 @@ async def internal_exception_handler(request: fastapi.Request, exc: Exception):
     return fastapi.responses.PlainTextResponse(status_code=500, content=content)
 
 
-@app.get("/collection/{collection_id}")
-async def collection(collection_id: str):
-    items = await collection_api.get_collection_items(collection_id)
+@app.get("/collection/{id}")
+async def collection(id: str):
+    items = await collection_api.get_items(id)
+    # Keep only movies
+    items = [item for item in items if item["type"] == "movie"]
+    items = [await convert_item(item) for item in items]
+    # Keep only items with IMDb ID
+    items = [item for item in items if item["imdb_id"]]
+    return items
+
+
+@app.get("/doulist/{id}")
+async def doulist(id: str):
+    items = await doulist_api.get_items(id)
     # Keep only movies
     items = [item for item in items if item["type"] == "movie"]
     items = [await convert_item(item) for item in items]
@@ -38,9 +51,10 @@ async def collection(collection_id: str):
 
 
 async def convert_item(item):
-    imdb_id = await imdb_api.get_imdb_id(item)
+    douban_id = get_douban_id(item)
+    imdb_id = await imdb_api.get_imdb_id(douban_id, item)
     return {
-        "douban_id": item["id"],
+        "douban_id": douban_id,
         "title": item["title"],
         "imdb_id": imdb_id,
     }
diff --git a/utils.py b/utils.py
@@ -1,3 +1,4 @@
+from urllib.parse import urlparse
 import httpx
 import logging
 from config import app_config
@@ -28,3 +29,9 @@ def get_http_client_args():
         }
     else:
         return {}
+
+
+def get_douban_id(item):
+    parsed_url = urlparse(item["url"])
+    douban_id = [_ for _ in parsed_url.path.split("/") if _][-1]
+    return douban_id