/*
 * Decompiled with CFR 0.152.
 */
package eu.openaire.publications_retriever.util.url;

import eu.openaire.publications_retriever.models.IdUrlMimeTypeTriple;
import eu.openaire.publications_retriever.util.args.ArgsUtils;
import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
import eu.openaire.publications_retriever.util.url.DataForOutput;
import java.util.Collections;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.Strings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class UrlUtils {
    private static final Logger logger = LoggerFactory.getLogger(UrlUtils.class);
    public static final Pattern URL_TRIPLE = Pattern.compile("^(https?://(?:ww(?:w|\\d)(?:(?:\\w+)?\\.)?)?([\\w.-]+)(?:[:\\d]+)?(?:.*/)?)(?:([^/^;?]*)(?:[;?][^/^=]*(?:=.*)?)?)?$", 2);
    public static final Pattern TEMPORAL_IDENTIFIER_FILTER = Pattern.compile("^(https?://.+)(?:(?:(?:\\?|&|;|%3b)(?:.*token|jsessionid)(?:=|%3d))[^?&]+)([?&].+)?$", 2);
    public static final Pattern ANCHOR_FILTER = Pattern.compile("(.+)(#(?!/).+)");
    public static AtomicInteger sumOfDocUrlsFound = new AtomicInteger(0);
    public static final Set<String> duplicateUrls = Collections.newSetFromMap(new ConcurrentHashMap());
    public static final ConcurrentHashMap<String, IdUrlMimeTypeTriple> resultUrlsWithIDs = new ConcurrentHashMap();
    public static final ConcurrentHashMap<String, Integer> domainsAndNumHits = new ConcurrentHashMap();
    public static final String duplicateUrlIndicator = "duplicate";
    public static final String unreachableDocOrDatasetUrlIndicator = "unreachable";
    public static final Pattern TOP_THREE_LEVEL_DOMAIN_FILTER = Pattern.compile("[\\w.-]*?((?:[\\w-]+.)?[\\w-]+.[\\w-]+)$");

    public static void addOutputData(String urlId, String sourceUrl, String pageUrl, String docOrDatasetUrl, String error, String filePath, String pageDomain, boolean isFirstCrossed, String wasUrlChecked, String wasUrlValid, String wasDocumentOrDatasetAccessible, String wasDirectLink, String couldRetry, Long fileSize, String fileHash, String mimeType) {
        String finalDocOrDatasetUrl = docOrDatasetUrl;
        if (!finalDocOrDatasetUrl.equals(duplicateUrlIndicator) && !finalDocOrDatasetUrl.equals("null")) {
            if (!finalDocOrDatasetUrl.equals(unreachableDocOrDatasetUrlIndicator)) {
                sumOfDocUrlsFound.incrementAndGet();
                String lowerCaseUrl = finalDocOrDatasetUrl.toLowerCase();
                if (lowerCaseUrl.contains("token") || lowerCaseUrl.contains("jsessionid")) {
                    finalDocOrDatasetUrl = UrlUtils.removeTemporalIdentifier(finalDocOrDatasetUrl);
                }
                if (isFirstCrossed && (!ArgsUtils.shouldDownloadDocFiles && !ArgsUtils.shouldJustDownloadHtmlFiles || fileHash != null)) {
                    resultUrlsWithIDs.put(finalDocOrDatasetUrl, new IdUrlMimeTypeTriple(urlId, sourceUrl, mimeType));
                }
                if (pageDomain == null) {
                    pageDomain = UrlUtils.getDomainStr(pageUrl, null);
                }
                if (pageDomain != null) {
                    String docUrlDomain;
                    ConnSupportUtils.countInsertAndGetTimes(domainsAndNumHits, pageDomain);
                    if (!pageUrl.equals(finalDocOrDatasetUrl) && (docUrlDomain = UrlUtils.getDomainStr(finalDocOrDatasetUrl, null)) != null && !docUrlDomain.equals(pageDomain)) {
                        ConnSupportUtils.countInsertAndGetTimes(domainsAndNumHits, docUrlDomain);
                    }
                }
            } else {
                duplicateUrls.add(sourceUrl);
            }
        }
        FileUtils.dataForOutput.add(new DataForOutput(urlId, sourceUrl, pageUrl, finalDocOrDatasetUrl, wasUrlChecked, wasUrlValid, wasDocumentOrDatasetAccessible, wasDirectLink, couldRetry, fileHash, fileSize, mimeType, filePath, error));
    }

    public static String getDomainStr(String urlStr, Matcher matcher) {
        if (matcher == null && (matcher = UrlUtils.getUrlMatcher(urlStr)) == null) {
            return null;
        }
        String domainStr = null;
        try {
            domainStr = matcher.group(2);
        }
        catch (Exception e) {
            logger.error("", e);
            return null;
        }
        if (domainStr == null || domainStr.isEmpty()) {
            logger.warn("No domain was extracted from url: \"" + urlStr + "\".");
            return null;
        }
        return domainStr.toLowerCase();
    }

    public static String getPathStr(String urlStr, Matcher matcher) {
        if (matcher == null && (matcher = UrlUtils.getUrlMatcher(urlStr)) == null) {
            return null;
        }
        String pathStr = null;
        try {
            pathStr = matcher.group(1);
        }
        catch (Exception e) {
            logger.error("", e);
            return null;
        }
        if (pathStr == null || pathStr.isEmpty()) {
            logger.warn("No pathStr was extracted from url: \"" + urlStr + "\".");
            return null;
        }
        return pathStr;
    }

    public static String getDocIdStr(String urlStr, Matcher matcher) {
        if (matcher == null && (matcher = UrlUtils.getUrlMatcher(urlStr)) == null) {
            return null;
        }
        String docIdStr = null;
        try {
            docIdStr = matcher.group(3);
        }
        catch (Exception e) {
            logger.error("", e);
            return null;
        }
        if (docIdStr == null || docIdStr.isEmpty()) {
            logger.warn("No docID was extracted from url: \"" + urlStr + "\".");
            return null;
        }
        return docIdStr;
    }

    public static Matcher getUrlMatcher(String urlStr) {
        Matcher urlMatcher;
        if (urlStr == null) {
            logger.error("The received \"urlStr\" was null in \"getUrlMatcher()\"!");
            return null;
        }
        if (urlStr.endsWith("/")) {
            urlStr = urlStr.substring(0, urlStr.length() - 1);
        }
        if ((urlMatcher = URL_TRIPLE.matcher(urlStr)).matches()) {
            return urlMatcher;
        }
        logger.warn("Unexpected URL_TRIPLE's (" + urlMatcher.toString() + ") mismatch for url: \"" + urlStr + "\"");
        return null;
    }

    public static String getTopThreeLevelDomain(String domainStr) {
        Matcher matcher = TOP_THREE_LEVEL_DOMAIN_FILTER.matcher(domainStr);
        if (matcher.matches()) {
            try {
                domainStr = matcher.group(1);
            }
            catch (Exception e) {
                logger.warn("Could not find the group < 1 > when retrieving the top-three-level-domain from \"" + domainStr + "\"");
                return domainStr;
            }
        } else {
            logger.warn("Could not retrieve the top-three-level-domain from \"" + domainStr + "\"");
        }
        return domainStr;
    }

    public static String removeTemporalIdentifier(String urlStr) {
        if (urlStr == null) {
            logger.error("The received \"urlStr\" was null in \"removeTemporalIdentifier()\"!");
            return "null";
        }
        Matcher temporalIdMatcher = TEMPORAL_IDENTIFIER_FILTER.matcher(urlStr);
        if (!temporalIdMatcher.matches()) {
            return urlStr;
        }
        String preTemporalIdStr = null;
        String afterTemporalIdStr = null;
        try {
            preTemporalIdStr = temporalIdMatcher.group(1);
        }
        catch (Exception e) {
            logger.error("", e);
            return urlStr;
        }
        if (preTemporalIdStr == null || preTemporalIdStr.isEmpty()) {
            logger.warn("Unexpected null or empty value returned by \"temporalIdMatcher.group(1)\" for url: \"" + urlStr + "\"");
            return urlStr;
        }
        try {
            afterTemporalIdStr = temporalIdMatcher.group(2);
        }
        catch (Exception e) {
            logger.error("", e);
            return preTemporalIdStr;
        }
        if (afterTemporalIdStr == null || afterTemporalIdStr.isEmpty()) {
            return preTemporalIdStr;
        }
        if (afterTemporalIdStr.startsWith("&", 0) && !preTemporalIdStr.contains("?")) {
            afterTemporalIdStr = Strings.CS.replace(afterTemporalIdStr, "&", "?", 1);
        }
        return preTemporalIdStr + afterTemporalIdStr;
    }

    public static String removeAnchor(String urlStr) {
        if (urlStr == null) {
            logger.error("The received \"urlStr\" was null in \"removeAnchor()\"!");
            return null;
        }
        String noAnchorUrl = null;
        Matcher anchorMatcher = ANCHOR_FILTER.matcher(urlStr);
        if (!anchorMatcher.matches()) {
            return urlStr;
        }
        try {
            noAnchorUrl = anchorMatcher.group(1);
        }
        catch (Exception e) {
            logger.error("", e);
            return urlStr;
        }
        if (noAnchorUrl == null || noAnchorUrl.isEmpty()) {
            logger.warn("Unexpected null or empty value returned by \"anchorMatcher.group(1)\" for url: \"" + urlStr + "\"");
            return urlStr;
        }
        return noAnchorUrl;
    }
}

