diff --git a/src/main/java/mServer/crawler/sender/arte/ArteConstants.java b/src/main/java/mServer/crawler/sender/arte/ArteConstants.java index 3b14febf..ec265b4a 100644 --- a/src/main/java/mServer/crawler/sender/arte/ArteConstants.java +++ b/src/main/java/mServer/crawler/sender/arte/ArteConstants.java @@ -1,8 +1,7 @@ package mServer.crawler.sender.arte; public class ArteConstants { - public static final String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-broadcastBegin&language=%s&kind=SHOW,MANUAL_CLIP,BONUS"; - public static final String VIDEOS_URL_ALT ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=broadcastBegin&language=%s&kind=SHOW,MANUAL_CLIP,BONUS"; + public static final String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-videoRightsBegin&language=%s&kind=SHOW,MANUAL_CLIP,BONUS"; public static final String VIDEO_URL ="https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/%s/%s"; //PROGRAMID/KIND/LANG public static final String API_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA"; public static final int MAX_POSSIBLE_SUBPAGES = 100; diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java b/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java index d72b2dbf..df164f1d 100644 --- a/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java +++ b/src/main/java/mServer/crawler/sender/arte/ArteCrawler.java @@ -74,8 +74,6 @@ private ConcurrentLinkedQueue createVideosQueue(String language) { final ConcurrentLinkedQueue root = new ConcurrentLinkedQueue<>(); String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, language); root.add(new TopicUrlDTO("all videos sorted up", rootUrl)); - String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, language); - root.add(new TopicUrlDTO("all videos sorted down", rootUrl2)); return root; } @@ -83,9 +81,9 @@ private int getMaxPagesForOverview(String lang) { final int maxAvailablePages = getNumberOfAvailablePages(lang); final int configuredMaxPages = getMaximumSubpages(); if (configuredMaxPages > maxAvailablePages) { - return Math.min(ArteConstants.MAX_POSSIBLE_SUBPAGES, maxAvailablePages / 2); + return Math.min(ArteConstants.MAX_POSSIBLE_SUBPAGES, maxAvailablePages); } else { - return Math.min(ArteConstants.MAX_POSSIBLE_SUBPAGES, configuredMaxPages / 2); + return Math.min(ArteConstants.MAX_POSSIBLE_SUBPAGES, configuredMaxPages); } } diff --git a/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java b/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java index 17364289..d8b19996 100644 --- a/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java +++ b/src/main/java/mServer/crawler/sender/arte/tasks/ArteRateLimitedJsonRestTask.java @@ -1,7 +1,14 @@ package mServer.crawler.sender.arte.tasks; import com.google.common.util.concurrent.RateLimiter; +import com.google.gson.Gson; +import de.mediathekview.mlib.Config; +import de.mediathekview.mlib.tool.Log; +import jakarta.ws.rs.client.Invocation.Builder; import jakarta.ws.rs.client.WebTarget; +import jakarta.ws.rs.core.Response; +import mServer.crawler.FilmeSuchen; +import mServer.crawler.RunSender; import mServer.crawler.sender.MediathekReader; import mServer.crawler.sender.base.AbstractJsonRestTask; import mServer.crawler.sender.base.CrawlerUrlDTO; @@ -9,11 +16,12 @@ import java.util.Optional; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.TimeUnit; public abstract class ArteRateLimitedJsonRestTask extends AbstractJsonRestTask { private static final long serialVersionUID = 1L; private static final RateLimiter rateLimiter = RateLimiter.create(MserverDaten.getArteRateLimit()); - private static final RateLimiter opaApirateLimiter = RateLimiter.create(1.0); + private static final RateLimiter opaApirateLimiter = RateLimiter.create(0.3); protected ArteRateLimitedJsonRestTask(MediathekReader aCrawler, ConcurrentLinkedQueue urlToCrawlDTOs, Optional authKey) { super(aCrawler, urlToCrawlDTOs, authKey); @@ -21,11 +29,50 @@ protected ArteRateLimitedJsonRestTask(MediathekReader aCrawler, ConcurrentLinked @Override protected void processRestTarget(final D aDTO, final WebTarget aTarget) { - if (aTarget.getUri().toString().contains("api.arte.tv/api/opa/")) { - opaApirateLimiter.acquire(); - } else { - rateLimiter.acquire(); + int retryCount = 0; + int maxRetries = 3; + boolean stop = false; + + while (!stop && !Config.getStop()) { + // Apply rate limiting before each request (including retries) + if (aTarget.getUri().toString().contains("api.arte.tv/api/opa/")) { + opaApirateLimiter.acquire(); + } else { + rateLimiter.acquire(); + } + + Builder request = aTarget.request(); + final Optional authKey = getAuthKey(); + if (authKey.isPresent()) { + request = request.header(HEADER_AUTHORIZATION, authKey.get()); + } + + try (Response response = createResponse(request, aDTO)) { + traceRequest(response.getLength()); + + if (response.getStatus() == 200) { + gsonBuilder.registerTypeAdapter(getType(), getParser(aDTO)); + final Gson gson = gsonBuilder.create(); + final String jsonOutput = response.readEntity(String.class); + final R responseObj = gson.fromJson(jsonOutput, getType()); + postProcessing(responseObj, aDTO); + stop = true; + // Check if we got a 429 and have retries left + } else if (response.getStatus() == 429 && retryCount < maxRetries) { + String retryAfter = response.getHeaderString("Retry-After"); + Log.sysLog("429: " + aDTO.getUrl() + " - retry after: " + retryAfter); + retryCount++; + try { + TimeUnit.MILLISECONDS.sleep(60000); + } catch (InterruptedException ignored) { + } + } else { + FilmeSuchen.listeSenderLaufen.inc(crawler.getRunIdentifier(), RunSender.Count.FEHLER); + FilmeSuchen.listeSenderLaufen.inc(crawler.getRunIdentifier(), RunSender.Count.FEHLVERSUCHE); + handleHttpError(aDTO, aTarget.getUri(), response); + stop = true; + } + } } - super.processRestTarget(aDTO, aTarget); } } \ No newline at end of file