/*
 * Decompiled with CFR 0.152.
 */
package eu.openaire.publications_retriever.util.url;

import com.google.common.collect.HashMultimap;
import crawlercommons.filters.basic.BasicURLNormalizer;
import eu.openaire.publications_retriever.PublicationsRetriever;
import eu.openaire.publications_retriever.exceptions.ConnTimeoutException;
import eu.openaire.publications_retriever.exceptions.DomainBlockedException;
import eu.openaire.publications_retriever.exceptions.DomainWithUnsupportedHEADmethodException;
import eu.openaire.publications_retriever.models.IdUrlMimeTypeTriple;
import eu.openaire.publications_retriever.util.args.ArgsUtils;
import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
import eu.openaire.publications_retriever.util.url.GenericUtils;
import eu.openaire.publications_retriever.util.url.UrlTypeChecker;
import eu.openaire.publications_retriever.util.url.UrlUtils;
import java.lang.runtime.SwitchBootstraps;
import java.net.CookieStore;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.commons.lang3.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LoaderAndChecker {
    private static final Logger logger = LoggerFactory.getLogger(LoaderAndChecker.class);
    public static boolean useIdUrlPairs = true;
    public static final String docAndDownloadKeywords = "(?:pdf|download|/doc|document|(?:/|[?]|&)file|/(?:fulltext|texteint[\u00e9e]gral)|attachment|/paper|view(?:file|doc)|/get|cgi/viewcontent.cgi\\?|t[\u00e9e]l[\u00e9e]charger|descargar)";
    public static final Pattern DOC_URL_FILTER = Pattern.compile(".+(?:pdf|download|/doc|document|(?:/|[?]|&)file|/(?:fulltext|texteint[\u00e9e]gral)|attachment|/paper|view(?:file|doc)|/get|cgi/viewcontent.cgi\\?|t[\u00e9e]l[\u00e9e]charger|descargar).*");
    public static final String dataset_formats = "(?:xls[xbm]?|xlt[x]?|[ct]sv|tab|(?:(?:geo)?j|b)son|(?:x|k|g|nmr|sb|wiley|y[a]?)ml|xsd|o[dt]s|ddi|rdf|[g]?zip|zipx|[rt]ar|[7x]z|[t]?gz|[gb]z[\\d]*|smi[l]?|por|ascii|dta|sav|dat|txt|ti[f]{1,2}|tfw|dwg|nt|fits|feather|svg|sas7b(?:dat|ve)|spss|sas|stata|(?:my|postgre)?sql(?:ite)?|bigquery|sh[px]|sb[xn]|prj|dbf|(?:m|acc)db|mif|mat|pcd|bt|n[sc]?[\\d]*|h[\\d]+|hdf[\\d]*|trs|opj|jcamp|fcs|fas(?:ta)?|keys|values|las|rdata|parquet|avro|sql|dcm|gr[i]?b]|rds|[p]?cap|dmp|vcf|cbor|biosample|hic|warc|ig[e]?s|sla|dxf|pdb|[sc]df|cif|f(?:ast)?[qa]|apng|sra|vtp|gltf|[sm]tl|ply|abc|md|rtf|ttl|shp|shx|exr|cdf|glb|mtl|kmz|textFile)";
    public static final Pattern DATASET_URL_FILTER = Pattern.compile(".+(?:dataset[s]?/.*|(?:\\.|format=)(?:xls[xbm]?|xlt[x]?|[ct]sv|tab|(?:(?:geo)?j|b)son|(?:x|k|g|nmr|sb|wiley|y[a]?)ml|xsd|o[dt]s|ddi|rdf|[g]?zip|zipx|[rt]ar|[7x]z|[t]?gz|[gb]z[\\d]*|smi[l]?|por|ascii|dta|sav|dat|txt|ti[f]{1,2}|tfw|dwg|nt|fits|feather|svg|sas7b(?:dat|ve)|spss|sas|stata|(?:my|postgre)?sql(?:ite)?|bigquery|sh[px]|sb[xn]|prj|dbf|(?:m|acc)db|mif|mat|pcd|bt|n[sc]?[\\d]*|h[\\d]+|hdf[\\d]*|trs|opj|jcamp|fcs|fas(?:ta)?|keys|values|las|rdata|parquet|avro|sql|dcm|gr[i]?b]|rds|[p]?cap|dmp|vcf|cbor|biosample|hic|warc|ig[e]?s|sla|dxf|pdb|[sc]df|cif|f(?:ast)?[qa]|apng|sra|vtp|gltf|[sm]tl|ply|abc|md|rtf|ttl|shp|shx|exr|cdf|glb|mtl|kmz|textFile)(?:\\?.+)?$)");
    public static final String alreadyLoggedMessage = "__LOGGED__";
    public static final BasicURLNormalizer basicURLNormalizer = BasicURLNormalizer.newBuilder().build();
    public static int numOfIDs = 0;
    public static AtomicInteger connProblematicUrls = new AtomicInteger(0);
    public static AtomicInteger inputDuplicatesNum = new AtomicInteger(0);
    public static AtomicInteger numOfIDsWithoutAcceptableSourceUrl = new AtomicInteger(0);
    public static AtomicInteger loadingRetries = new AtomicInteger(0);
    public static AtomicInteger totalNumFailedTasks = new AtomicInteger(0);
    public static final Pattern INVALID_URL_HTTP_STATUS = Pattern.compile(".*HTTP 4(?:00|04|10|14|22) Client Error.*");
    public static Pattern COULD_RETRY_HTTP_STATUS = null;
    public static Pattern COULD_RETRY_URLS = Pattern.compile("^https?://[^/]*(?:sciencedirect|elsevier).com[^/]*/.*$");

    public LoaderAndChecker() throws RuntimeException {
        LoaderAndChecker.setCouldRetryRegex();
        try {
            if (useIdUrlPairs) {
                LoaderAndChecker.loadAndCheckIdUrlPairs();
            } else {
                LoaderAndChecker.loadAndCheckUrls();
            }
        }
        catch (Exception e) {
            logger.error("", e);
            throw new RuntimeException(e);
        }
        finally {
            if (!FileUtils.dataForOutput.isEmpty()) {
                logger.debug("Writing last data points to the outputFile.");
                FileUtils.writeResultsToFile();
            }
        }
    }

    public static void loadAndCheckUrls() throws RuntimeException {
        Collection<String> loadedUrlGroup;
        boolean isFirstRun = true;
        int batchCount = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList<Callable<Boolean>> callableTasks = new ArrayList<Callable<Boolean>>(FileUtils.jsonBatchSize);
        while (!LoaderAndChecker.isFinishedLoading((loadedUrlGroup = FileUtils.getNextUrlBatchTest()).isEmpty(), isFirstRun)) {
            isFirstRun = false;
            logger.info("Batch counter: " + batchCount + (String)(ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((double)((++batchCount - 1) * FileUtils.jsonBatchSize) * 100.0 / (double)FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            for (String retrievedUrl : loadedUrlGroup) {
                callableTasks.add(() -> {
                    String retrievedUrlToCheck = retrievedUrl;
                    if ((retrievedUrlToCheck = LoaderAndChecker.handleUrlChecks("null", retrievedUrlToCheck)) == null) {
                        return false;
                    }
                    String urlToCheck = retrievedUrlToCheck;
                    urlToCheck = basicURLNormalizer.filter(retrievedUrlToCheck);
                    if (urlToCheck == null) {
                        logger.warn("Could not normalize url: " + retrievedUrlToCheck);
                        UrlUtils.addOutputData("null", retrievedUrlToCheck, "null", "unreachable", "Discarded at loading time, due to normalization's problems.", "null", null, true, "true", "false", "false", "false", "false", null, "null", "null");
                        connProblematicUrls.incrementAndGet();
                        return false;
                    }
                    IdUrlMimeTypeTriple originalIdUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(retrievedUrl);
                    if (originalIdUrlMimeTypeTriple != null) {
                        ConnSupportUtils.handleReCrossedTargetUrl("null", retrievedUrl, retrievedUrl, retrievedUrl, originalIdUrlMimeTypeTriple, true);
                        return true;
                    }
                    boolean isPossibleDocOrDatasetUrl = false;
                    String lowerCaseRetrievedUrl = retrievedUrlToCheck.toLowerCase();
                    if (ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches() || ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches()) {
                        isPossibleDocOrDatasetUrl = true;
                    }
                    try {
                        HttpConnUtils.connectAndCheckMimeType("null", retrievedUrlToCheck, urlToCheck, urlToCheck, null, true, isPossibleDocOrDatasetUrl);
                    }
                    catch (Exception e) {
                        LoaderAndChecker.handleException("null", urlToCheck, e);
                        return false;
                    }
                    return true;
                });
            }
            LoaderAndChecker.executeTasksAndHandleResults(callableTasks, batchCount, cookieStore);
        }
    }

    public static void loadAndCheckIdUrlPairs() throws RuntimeException {
        HashMultimap<String, String> loadedIdUrlPairs;
        boolean isFirstRun = true;
        int batchCount = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList<Callable<Boolean>> callableTasks = new ArrayList<Callable<Boolean>>(FileUtils.jsonBatchSize);
        while (!LoaderAndChecker.isFinishedLoading((loadedIdUrlPairs = FileUtils.getNextIdUrlPairBatchFromJson()).isEmpty(), isFirstRun)) {
            isFirstRun = false;
            logger.info("Batch counter: " + batchCount + (String)(ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((double)((++batchCount - 1) * FileUtils.jsonBatchSize) * 100.0 / (double)FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            Set keys2 = loadedIdUrlPairs.keySet();
            numOfIDs += keys2.size();
            for (String retrievedId : keys2) {
                HashMultimap<String, String> finalLoadedIdUrlPairs = loadedIdUrlPairs;
                callableTasks.add(() -> {
                    String urlToCheck;
                    boolean goToNextId = false;
                    String possibleDocOrDatasetUrl = null;
                    String bestNonDocNonDatasetUrl = null;
                    String nonDoiUrl = null;
                    String neutralUrl = null;
                    Set retrievedUrlsOfCurrentId = finalLoadedIdUrlPairs.get((Object)retrievedId);
                    boolean isSingleIdUrlPair = retrievedUrlsOfCurrentId.size() == 1;
                    HashSet<String> loggedUrlsOfCurrentId = new HashSet<String>();
                    Iterator i$ = retrievedUrlsOfCurrentId.iterator();
                    while (i$.hasNext()) {
                        String retrievedUrl;
                        String checkedUrl = retrievedUrl = (String)i$.next();
                        if ((retrievedUrl = LoaderAndChecker.handleUrlChecks(retrievedId, retrievedUrl)) == null) {
                            if (isSingleIdUrlPair) continue;
                            loggedUrlsOfCurrentId.add(checkedUrl);
                            continue;
                        }
                        IdUrlMimeTypeTriple originalIdUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(retrievedUrl);
                        if (originalIdUrlMimeTypeTriple != null) {
                            ConnSupportUtils.handleReCrossedTargetUrl(retrievedId, retrievedUrl, retrievedUrl, retrievedUrl, originalIdUrlMimeTypeTriple, true);
                            if (!isSingleIdUrlPair) {
                                loggedUrlsOfCurrentId.add(retrievedUrl);
                            }
                            goToNextId = true;
                            break;
                        }
                        String lowerCaseRetrievedUrl = retrievedUrl.toLowerCase();
                        if (ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches() || ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches()) {
                            possibleDocOrDatasetUrl = retrievedUrl;
                            break;
                        }
                        if (retrievedUrl.contains("/handle/")) {
                            bestNonDocNonDatasetUrl = retrievedUrl;
                            continue;
                        }
                        if (bestNonDocNonDatasetUrl == null && !retrievedUrl.contains("doi.org")) {
                            nonDoiUrl = retrievedUrl;
                            continue;
                        }
                        neutralUrl = retrievedUrl;
                    }
                    if (goToNextId) {
                        if (!isSingleIdUrlPair) {
                            LoaderAndChecker.handleLogOfRemainingUrls(retrievedId, retrievedUrlsOfCurrentId, loggedUrlsOfCurrentId);
                        }
                        return false;
                    }
                    boolean isPossibleDocOrDatasetUrl = false;
                    if (possibleDocOrDatasetUrl != null) {
                        urlToCheck = possibleDocOrDatasetUrl;
                        isPossibleDocOrDatasetUrl = true;
                    } else if (bestNonDocNonDatasetUrl != null) {
                        urlToCheck = bestNonDocNonDatasetUrl;
                    } else if (nonDoiUrl != null) {
                        urlToCheck = nonDoiUrl;
                    } else if (neutralUrl != null) {
                        urlToCheck = neutralUrl;
                    } else {
                        logger.debug("No acceptable sourceUrl was found for ID: \"" + retrievedId + "\".");
                        numOfIDsWithoutAcceptableSourceUrl.incrementAndGet();
                        return false;
                    }
                    String sourceUrl = urlToCheck;
                    urlToCheck = basicURLNormalizer.filter(sourceUrl);
                    if (urlToCheck == null) {
                        logger.warn("Could not normalize url: " + sourceUrl);
                        UrlUtils.addOutputData(retrievedId, sourceUrl, "null", "unreachable", "Discarded at loading time, due to normalization's problems.", "null", null, true, "true", "false", "false", "false", "false", null, "null", "null");
                        connProblematicUrls.incrementAndGet();
                        if (!isSingleIdUrlPair) {
                            loggedUrlsOfCurrentId.add(sourceUrl);
                            LoaderAndChecker.checkRemainingUrls(retrievedId, retrievedUrlsOfCurrentId, loggedUrlsOfCurrentId, isSingleIdUrlPair);
                            LoaderAndChecker.handleLogOfRemainingUrls(retrievedId, retrievedUrlsOfCurrentId, loggedUrlsOfCurrentId);
                        }
                        return false;
                    }
                    boolean wasSuccessful = true;
                    try {
                        HttpConnUtils.connectAndCheckMimeType(retrievedId, sourceUrl, urlToCheck, urlToCheck, null, true, isPossibleDocOrDatasetUrl);
                        if (!isSingleIdUrlPair) {
                            loggedUrlsOfCurrentId.add(urlToCheck);
                        }
                    }
                    catch (Exception e) {
                        if (LoaderAndChecker.handleException(retrievedId, urlToCheck, e)) {
                            return false;
                        }
                        if (!isSingleIdUrlPair) {
                            loggedUrlsOfCurrentId.add(urlToCheck);
                            wasSuccessful = LoaderAndChecker.checkRemainingUrls(retrievedId, retrievedUrlsOfCurrentId, loggedUrlsOfCurrentId, isSingleIdUrlPair);
                        }
                        wasSuccessful = false;
                    }
                    if (!isSingleIdUrlPair) {
                        LoaderAndChecker.handleLogOfRemainingUrls(retrievedId, retrievedUrlsOfCurrentId, loggedUrlsOfCurrentId);
                    }
                    return wasSuccessful;
                });
            }
            LoaderAndChecker.executeTasksAndHandleResults(callableTasks, batchCount, cookieStore);
        }
    }

    public static void loadAndCheckEachIdUrlPairInEntries() throws RuntimeException {
        HashMultimap<String, String> loadedIdUrlPairs;
        boolean isFirstRun = true;
        int batchCount = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList<Callable<Boolean>> callableTasks = new ArrayList<Callable<Boolean>>(FileUtils.jsonBatchSize);
        while (!LoaderAndChecker.isFinishedLoading((loadedIdUrlPairs = FileUtils.getNextIdUrlPairBatchFromJson()).isEmpty(), isFirstRun)) {
            isFirstRun = false;
            logger.info("Batch counter: " + batchCount + (String)(ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((double)((++batchCount - 1) * FileUtils.jsonBatchSize) * 100.0 / (double)FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            Set pairs = loadedIdUrlPairs.entries();
            numOfIDs += pairs.size();
            for (Map.Entry pair : pairs) {
                callableTasks.add(() -> {
                    String retrievedId = (String)pair.getKey();
                    String retrievedUrl = (String)pair.getValue();
                    if ((retrievedUrl = LoaderAndChecker.handleUrlChecks(retrievedId, retrievedUrl)) == null) {
                        return false;
                    }
                    String urlToCheck = retrievedUrl;
                    String sourceUrl = urlToCheck;
                    urlToCheck = basicURLNormalizer.filter(sourceUrl);
                    if (urlToCheck == null) {
                        logger.warn("Could not normalize url: " + sourceUrl);
                        UrlUtils.addOutputData(retrievedId, sourceUrl, "null", "unreachable", "Discarded at loading time, due to normalization's problems.", "null", null, true, "true", "false", "false", "false", "false", null, "null", "null");
                        connProblematicUrls.incrementAndGet();
                        return false;
                    }
                    IdUrlMimeTypeTriple originalIdUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(retrievedUrl);
                    if (originalIdUrlMimeTypeTriple != null) {
                        ConnSupportUtils.handleReCrossedTargetUrl(retrievedId, retrievedUrl, retrievedUrl, retrievedUrl, originalIdUrlMimeTypeTriple, true);
                        return true;
                    }
                    boolean isPossibleDocOrDatasetUrl = false;
                    String lowerCaseRetrievedUrl = retrievedUrl.toLowerCase();
                    if (ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches() || ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches()) {
                        isPossibleDocOrDatasetUrl = true;
                    }
                    try {
                        HttpConnUtils.connectAndCheckMimeType(retrievedId, sourceUrl, urlToCheck, urlToCheck, null, true, isPossibleDocOrDatasetUrl);
                    }
                    catch (Exception e) {
                        LoaderAndChecker.handleException(retrievedId, urlToCheck, e);
                        return false;
                    }
                    return true;
                });
            }
            LoaderAndChecker.executeTasksAndHandleResults(callableTasks, batchCount, cookieStore);
        }
    }

    public static void loadAndCheckEachIdUrlPair() throws RuntimeException {
        HashMultimap<String, String> loadedIdUrlPairs;
        boolean isFirstRun = true;
        int batchCount = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList<Callable<Boolean>> callableTasks = new ArrayList<Callable<Boolean>>(FileUtils.jsonBatchSize);
        while (!LoaderAndChecker.isFinishedLoading((loadedIdUrlPairs = FileUtils.getNextIdUrlPairBatchFromJson()).isEmpty(), isFirstRun)) {
            isFirstRun = false;
            logger.info("Batch counter: " + batchCount + (String)(ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((double)((++batchCount - 1) * FileUtils.jsonBatchSize) * 100.0 / (double)FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            for (String retrievedId : loadedIdUrlPairs.keySet()) {
                Set retrievedUrlsOfCurrentId = loadedIdUrlPairs.get((Object)retrievedId);
                numOfIDs += retrievedUrlsOfCurrentId.size();
                callableTasks.add(() -> {
                    for (String retrievedUrl : retrievedUrlsOfCurrentId) {
                        if ((retrievedUrl = LoaderAndChecker.handleUrlChecks(retrievedId, retrievedUrl)) == null) continue;
                        String urlToCheck = retrievedUrl;
                        String sourceUrl = urlToCheck;
                        urlToCheck = basicURLNormalizer.filter(sourceUrl);
                        if (urlToCheck == null) {
                            logger.warn("Could not normalize url: " + sourceUrl);
                            UrlUtils.addOutputData(retrievedId, sourceUrl, "null", "unreachable", "Discarded at loading time, due to normalization's problems.", "null", null, true, "true", "false", "false", "false", "false", null, "null", "null");
                            connProblematicUrls.incrementAndGet();
                            continue;
                        }
                        IdUrlMimeTypeTriple originalIdUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(retrievedUrl);
                        if (originalIdUrlMimeTypeTriple != null) {
                            ConnSupportUtils.handleReCrossedTargetUrl(retrievedId, retrievedUrl, retrievedUrl, retrievedUrl, originalIdUrlMimeTypeTriple, true);
                            continue;
                        }
                        boolean isPossibleDocOrDatasetUrl = false;
                        String lowerCaseRetrievedUrl = retrievedUrl.toLowerCase();
                        if (ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches() || ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCaseRetrievedUrl).matches()) {
                            isPossibleDocOrDatasetUrl = true;
                        }
                        try {
                            HttpConnUtils.connectAndCheckMimeType(retrievedId, sourceUrl, urlToCheck, urlToCheck, null, true, isPossibleDocOrDatasetUrl);
                        }
                        catch (Exception e) {
                            LoaderAndChecker.handleException(retrievedId, urlToCheck, e);
                            return false;
                        }
                    }
                    return true;
                });
            }
            LoaderAndChecker.executeTasksAndHandleResults(callableTasks, batchCount, cookieStore);
        }
    }

    public static void executeTasksAndHandleResults(List<Callable<Boolean>> callableTasks, int batchCount, CookieStore cookieStore) {
        int numFailedTasks = LoaderAndChecker.invokeAllTasksAndWait(callableTasks);
        if (numFailedTasks == -1) {
            FileUtils.writeResultsToFile();
            System.err.println("Invoking and/or executing the callableTasks failed with the exception written in the log files!");
            System.exit(99);
        } else if (numFailedTasks > 0) {
            logger.warn(numFailedTasks + " tasks failed in batch_" + batchCount);
            totalNumFailedTasks.incrementAndGet();
        }
        callableTasks.clear();
        logger.debug("The number of cookies is: " + cookieStore.getCookies().size());
        boolean cookiesDeleted = cookieStore.removeAll();
        logger.debug(cookiesDeleted ? "The cookies where removed!" : "No cookies where removed!");
        FileUtils.writeResultsToFile();
    }

    public static int invokeAllTasksAndWait(List<Callable<Boolean>> callableTasks) {
        int numFailedTasks = 0;
        try {
            List<Future<Boolean>> futures = PublicationsRetriever.executor.invokeAll(callableTasks);
            int sizeOfFutures = futures.size();
            for (int i = 0; i < sizeOfFutures; ++i) {
                try {
                    Boolean bl = futures.get(i).get();
                    continue;
                }
                catch (ExecutionException ee) {
                    String stackTraceMessage = GenericUtils.getSelectedStackTraceForCausedException(ee, "Task_" + i + " failed with: ", null, 15);
                    logger.error(stackTraceMessage);
                    System.err.println(stackTraceMessage);
                    ++numFailedTasks;
                    continue;
                }
                catch (CancellationException ce) {
                    logger.error("Task_" + i + " was cancelled: " + ce.getMessage());
                    ++numFailedTasks;
                    continue;
                }
                catch (InterruptedException ie) {
                    logger.error("Task_" + i + " was interrupted: " + ie.getMessage());
                    ++numFailedTasks;
                    continue;
                }
                catch (IndexOutOfBoundsException ioobe) {
                    logger.error("IOOBE for task_" + i + " in the futures-list! " + ioobe.getMessage());
                }
            }
        }
        catch (InterruptedException ie) {
            logger.warn("The main thread was interrupted when waiting for the current batch's worker-tasks to finish: " + ie.getMessage());
        }
        catch (Exception e) {
            logger.error("", e);
            return -1;
        }
        return numFailedTasks;
    }

    private static boolean checkRemainingUrls(String retrievedId, Set<String> retrievedUrlsOfThisId, HashSet<String> loggedUrlsOfThisId, boolean isSingleIdUrlPair) {
        for (String urlToCheck : retrievedUrlsOfThisId) {
            if (loggedUrlsOfThisId.contains(urlToCheck) || (urlToCheck = basicURLNormalizer.filter(urlToCheck)) != null && loggedUrlsOfThisId.contains(urlToCheck)) continue;
            loadingRetries.incrementAndGet();
            try {
                HttpConnUtils.connectAndCheckMimeType(retrievedId, urlToCheck, urlToCheck, urlToCheck, null, true, false);
                if (!isSingleIdUrlPair) {
                    loggedUrlsOfThisId.add(urlToCheck);
                }
                return true;
            }
            catch (Exception e) {
                if (LoaderAndChecker.handleException(retrievedId, urlToCheck, e)) {
                    return false;
                }
                if (isSingleIdUrlPair) continue;
                loggedUrlsOfThisId.add(urlToCheck);
            }
        }
        return false;
    }

    public static boolean handleException(String retrievedId, String urlToCheck, Exception e) {
        String msg;
        if (e instanceof RuntimeException && (msg = e.getMessage()) != null && msg.contains(alreadyLoggedMessage)) {
            return true;
        }
        List<String> list = LoaderAndChecker.getWasValidAndCouldRetry(e, urlToCheck);
        String wasUrlValid = list.get(0);
        String couldRetry = list.get(1);
        String errorMsg = "Discarded at loading time, as " + list.get(2);
        UrlUtils.addOutputData(retrievedId, urlToCheck, "null", "unreachable", errorMsg, "null", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, "null", "null");
        return false;
    }

    public static String handleUrlChecks(String urlId, String retrievedUrl) {
        String urlDomain = UrlUtils.getDomainStr(retrievedUrl, null);
        if (urlDomain == null) {
            UrlUtils.addOutputData(urlId, retrievedUrl, "null", "unreachable", "Discarded in 'LoaderAndChecker.handleUrlChecks()' method, after the occurrence of a domain-retrieval error.", "null", null, true, "true", "false", "false", "false", "false", null, "null", "null");
            if (!useIdUrlPairs) {
                connProblematicUrls.incrementAndGet();
            }
            return null;
        }
        if (HttpConnUtils.blacklistedDomains.contains(urlDomain)) {
            logger.debug("Avoid connecting to blacklisted domain: \"" + urlDomain + "\" with url: " + retrievedUrl);
            UrlUtils.addOutputData(urlId, retrievedUrl, "null", "unreachable", "Discarded in 'LoaderAndChecker.handleUrlChecks()' method, as its domain was found blacklisted.", "null", null, true, "true", "true", "false", "false", "false", null, "null", "null");
            if (!useIdUrlPairs) {
                connProblematicUrls.incrementAndGet();
            }
            return null;
        }
        if (ConnSupportUtils.checkIfPathIs403BlackListed(retrievedUrl, urlDomain)) {
            logger.debug("Preventing reaching 403ErrorCode with url: \"" + retrievedUrl + "\"!");
            UrlUtils.addOutputData(urlId, retrievedUrl, "null", "unreachable", "Discarded in 'LoaderAndChecker.handleUrlChecks()' as it had a blacklisted urlPath.", "null", null, true, "true", "true", "false", "false", "false", null, "null", "null");
            if (!useIdUrlPairs) {
                connProblematicUrls.incrementAndGet();
            }
            return null;
        }
        String lowerCaseUrl = retrievedUrl.toLowerCase();
        if (UrlTypeChecker.shouldNotAcceptPageUrl(urlId, retrievedUrl, retrievedUrl, lowerCaseUrl, true)) {
            return null;
        }
        if (lowerCaseUrl.contains("token") || lowerCaseUrl.contains("jsessionid")) {
            retrievedUrl = UrlUtils.removeTemporalIdentifier(retrievedUrl);
        }
        if (UrlUtils.duplicateUrls.contains(retrievedUrl)) {
            logger.debug("Skipping non-DocOrDataset-url: \"" + retrievedUrl + "\", at loading, as it has already been checked.");
            UrlUtils.addOutputData(urlId, retrievedUrl, "null", "duplicate", "Discarded in 'LoaderAndChecker.handleUrlChecks()', as it's a duplicate.", "null", null, false, "true", "null", "null", "null", "true", null, "null", "null");
            if (!useIdUrlPairs) {
                inputDuplicatesNum.incrementAndGet();
            }
            return null;
        }
        if (retrievedUrl.contains("ir.lib.u-ryukyu.ac.jp") && retrievedUrl.contains("/handle/123456789/")) {
            logger.debug("We will handle the weird case of \"" + retrievedUrl + "\".");
            return Strings.CS.replace(retrievedUrl, "/123456789/", "/20.500.12000/", -1);
        }
        retrievedUrl = Strings.CS.replace(retrievedUrl, "amp;", "&", -1);
        return retrievedUrl;
    }

    public static boolean isFinishedLoading(boolean isEmptyOfData, boolean isFirstRun) {
        if (isEmptyOfData) {
            if (isFirstRun) {
                String errorMessage = "Could not retrieve any urls from the inputFile! Exiting..";
                System.err.println(errorMessage);
                logger.error(errorMessage);
                PublicationsRetriever.executor.shutdownNow();
                System.exit(100);
            } else {
                logger.debug("Done processing " + FileUtils.getCurrentlyLoadedUrls() + " urls from the inputFile.");
                return true;
            }
        }
        return false;
    }

    private static void handleLogOfRemainingUrls(String retrievedId, Set<String> retrievedUrlsOfThisId, HashSet<String> loggedUrlsOfThisId) {
        Iterator<String> iterator2 = retrievedUrlsOfThisId.iterator();
        while (iterator2.hasNext()) {
            String retrievedUrl;
            String tempUrl = retrievedUrl = iterator2.next();
            if ((retrievedUrl = basicURLNormalizer.filter(retrievedUrl)) == null) {
                retrievedUrl = tempUrl;
            }
            if (loggedUrlsOfThisId.contains(retrievedUrl)) continue;
            UrlUtils.addOutputData(retrievedId, retrievedUrl, "null", "unreachable", "Skipped in LoaderAndChecker, as a better url was selected for id: " + retrievedId, "null", null, true, "false", "null", "null", "null", "true", null, "null", "null");
        }
    }

    public static void setCouldRetryRegex() {
        String debugLog;
        Object couldRetryRegexString = ".*(?:HTTP 4(?:0[38]|2[569]) Client|";
        if (ConnSupportUtils.shouldBlockMost5XXDomains) {
            couldRetryRegexString = (String)couldRetryRegexString + "503";
            debugLog = "Going to block most of the 5XX domains, except from the 503-domains.";
        } else {
            couldRetryRegexString = (String)couldRetryRegexString + "(?<!511)";
            debugLog = "Going to avoid to block most of the 5XX domains, except from the 511-domains, which will be blocked.";
        }
        couldRetryRegexString = (String)couldRetryRegexString + " Server) Error.*";
        logger.debug(debugLog + " The \"couldRetryRegex\" is: " + (String)couldRetryRegexString);
        COULD_RETRY_HTTP_STATUS = Pattern.compile((String)couldRetryRegexString);
    }

    public static List<String> getWasValidAndCouldRetry(Exception e, String url) {
        ArrayList<String> list = new ArrayList<String>(3);
        String wasUrlValid = "true";
        String couldRetry = "false";
        Exception exception = e;
        int n = 0;
        String errorMsg = switch (SwitchBootstraps.typeSwitch("typeSwitch", new Object[]{RuntimeException.class, ConnTimeoutException.class, DomainWithUnsupportedHEADmethodException.class, DomainBlockedException.class}, (Exception)exception, n)) {
            case 0 -> {
                String message = e.getMessage();
                if (message != null) {
                    if (INVALID_URL_HTTP_STATUS.matcher(message).matches()) {
                        wasUrlValid = "false";
                        yield "the url is invalid and lead to http-client-error.";
                    }
                    if (COULD_RETRY_HTTP_STATUS.matcher(message).matches()) {
                        couldRetry = "true";
                        yield "the url had a non-fatal http-error.";
                    }
                    yield "there is a serious unspecified error.";
                }
                yield "there is an unspecified runtime error.";
            }
            case 1 -> {
                couldRetry = "true";
                yield "the url had a connection-timeout.";
            }
            case 2 -> {
                couldRetry = "true";
                yield "the url does not support HEAD method for checking most of the internal links.";
            }
            case 3 -> {
                couldRetry = "true";
                yield "the url had its initial or redirected domain blocked.";
            }
            default -> "there is a serious unspecified error.";
        };
        if (wasUrlValid.equals("true") && url != null && COULD_RETRY_URLS.matcher(url.toLowerCase()).matches()) {
            couldRetry = "true";
        }
        list.add(0, wasUrlValid);
        list.add(1, couldRetry);
        list.add(2, errorMsg);
        return list;
    }
}

