From f207d5998559873b15b39c5835a1654d536408ff Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Wed, 17 Jul 2024 22:20:58 +0200 Subject: [PATCH 1/4] more deep search topics --- .../mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java index 73cefe03e..022bb4032 100644 --- a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java @@ -2,6 +2,7 @@ import com.google.gson.reflect.TypeToken; import de.mediathekview.mlib.Config; +import de.mediathekview.mlib.tool.Log; import jakarta.ws.rs.client.WebTarget; import mServer.crawler.sender.MediathekReader; import mServer.crawler.sender.ard.ArdFilmInfoDto; @@ -84,6 +85,10 @@ public class ArdTopicPageTask extends ArdTaskBase TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9vbmUvcm90ZXJvc2Vu"); // Heimatflimmern TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9oZWltYXRmbGltbWVybg"); + // Euro 2024 + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3Nwb3J0c2NoYXUuZGUvc3BvcnRzY2hhdS1ldXJvLTIwMjQ"); + // ard debüt + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL2FyZGRlYnVldA"); } public ArdTopicPageTask(MediathekReader aCrawler, @@ -147,6 +152,7 @@ private ConcurrentLinkedQueue createSubPageUrls( private int getMaximumSubpages(String id) { if (TOPICS_LOAD_ALL_PAGES.contains(id)) { + Log.sysLog("ARD search all: " + id); return 999; } return 0; From 865857878102e2f4027d954681f6a4f2723724f8 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Wed, 17 Jul 2024 23:22:28 +0200 Subject: [PATCH 2/4] more deep search topics --- .../crawler/sender/ard/tasks/ArdTopicPageTask.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java index 022bb4032..de281434d 100644 --- a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java @@ -83,12 +83,12 @@ public class ArdTopicPageTask extends ArdTaskBase TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ"); // Rote Rosen TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9vbmUvcm90ZXJvc2Vu"); - // Heimatflimmern - TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9oZWltYXRmbGltbWVybg"); + // Sturm der Liebe + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL3N0dXJtIGRlciBsaWViZQ"); + // in aller freundschaft -die jungen ärzte + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0LWRpZS1qdW5nZW4tYWVyenRl"); // Euro 2024 TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3Nwb3J0c2NoYXUuZGUvc3BvcnRzY2hhdS1ldXJvLTIwMjQ"); - // ard debüt - TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL2FyZGRlYnVldA"); } public ArdTopicPageTask(MediathekReader aCrawler, From c091f4069d58bbae0a9b6db81b0d14dd67590f83 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Wed, 17 Jul 2024 23:24:27 +0200 Subject: [PATCH 3/4] workaround: add missing topic ids for ard series --- .../crawler/sender/ard/ArdCrawler.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java index dc45ca013..84797fd73 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java @@ -25,6 +25,17 @@ public class ArdCrawler extends MediathekCrawler { private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + public static final String[] MISSING_TOPIC_IDS = new String[]{ + // Dahoam is dahoam + "Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ", + // Rote Rosen + "Y3JpZDovL3dkci5kZS9vbmUvcm90ZXJvc2Vu", + // Sturm der Liebe + "Y3JpZDovL2Rhc2Vyc3RlLmRlL3N0dXJtIGRlciBsaWViZQ", + // in aller freundschaft -die jungen ärzte + "Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0LWRpZS1qdW5nZW4tYWVyenRl" + }; + public ArdCrawler(FilmeSuchen ssearch, int startPrio) { super(ssearch, SENDERNAME, 0, 1, startPrio); } @@ -127,6 +138,8 @@ private Set getTopicsEntries() throws ExecutionException, Interr } Log.sysLog("ard mediathek topics: " + topics.size()); + addAdditionalTopics(topics); + Log.sysLog("ard mediathek topics with additional: " + topics.size()); ConcurrentLinkedQueue topicUrls = new ConcurrentLinkedQueue<>(topics); final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, topicUrls); @@ -135,6 +148,13 @@ private Set getTopicsEntries() throws ExecutionException, Interr return filmInfos; } + // temporary workaround for missing topics + private void addAdditionalTopics(Set topics) { + for (String topicId : MISSING_TOPIC_IDS) { + topics.add(new CrawlerUrlDTO(String.format(ArdConstants.TOPICS_URL, topicId, ArdConstants.TOPIC_PAGE_SIZE))); + } + } + private Set getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException { ArdTopicsTask topicsTask = new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender)); From c00d6e3eec63950707d44bf3c72c7cf417df4987 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Wed, 7 Aug 2024 22:16:32 +0200 Subject: [PATCH 4/4] add more deep search topics --- .../mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java index de281434d..5fbc46a39 100644 --- a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java @@ -89,6 +89,10 @@ public class ArdTopicPageTask extends ArdTaskBase TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0LWRpZS1qdW5nZW4tYWVyenRl"); // Euro 2024 TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3Nwb3J0c2NoYXUuZGUvc3BvcnRzY2hhdS1ldXJvLTIwMjQ"); + // Olympia 2024 + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3Nwb3J0c2NoYXUuZGUvc3BvcnRzY2hhdS1vbHltcGlh"); + // um Himmels willen + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9VbSBIaW1tZWxzIFdpbGxlbiBPTkU"); } public ArdTopicPageTask(MediathekReader aCrawler,