package eu.openaire.publications_retriever.util.url;

import com.google.common.collect.HashMultimap;
import crawlercommons.filters.basic.BasicURLNormalizer;
import eu.openaire.publications_retriever.PublicationsRetriever;
import eu.openaire.publications_retriever.exceptions.ConnTimeoutException;
import eu.openaire.publications_retriever.exceptions.DomainBlockedException;
import eu.openaire.publications_retriever.exceptions.DomainWithUnsupportedHEADmethodException;
import eu.openaire.publications_retriever.models.IdUrlMimeTypeTriple;
import eu.openaire.publications_retriever.util.args.ArgsUtils;
import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
import java.net.CookieStore;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/openaire/publications_retriever/util/url/LoaderAndChecker.class */
public class LoaderAndChecker {
    public static final String dataset_formats = "(?:xls[xbm]?|xlt[x]?|[ct]sv|tab|(?:(?:geo)?j|b)son|(?:x|k|g|nmr|sb|wiley|y[a]?)ml|xsd|o[dt]s|ddi|rdf|[g]?zip|zipx|[rt]ar|[7x]z|[t]?gz|[gb]z[\\d]*|smi[l]?|por|ascii|dta|sav|dat|txt|ti[f]{1,2}|tfw|dwg|nt|fits|feather|svg|sas7b(?:dat|ve)|spss|sas|stata|(?:my|postgre)?sql(?:ite)?|bigquery|sh[px]|sb[xn]|prj|dbf|(?:m|acc)db|mif|mat|pcd|bt|n[sc]?[\\d]*|h[\\d]+|hdf[\\d]*|trs|opj|jcamp|fcs|fas(?:ta)?|keys|values|las|rdata|parquet|avro|sql|dcm|gr[i]?b]|rds|[p]?cap|dmp|vcf|cbor|biosample|hic|warc|ig[e]?s|sla|dxf|pdb|[sc]df|cif|f(?:ast)?[qa]|apng|sra|vtp|gltf|[sm]tl|ply|abc|md|rtf|ttl|shp|shx|exr|cdf|glb|mtl|kmz|textFile)";
    public static final String alreadyLoggedMessage = "__LOGGED__";
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) LoaderAndChecker.class);
    public static boolean useIdUrlPairs = true;
    public static final Pattern DOC_URL_FILTER = Pattern.compile(".+(pdf|download|/doc|document|(?:/|[?]|&)file|/fulltext|attachment|/paper|view(?:file|doc)|/get|cgi/viewcontent.cgi\\?|t[ée]l[ée]charger|descargar).*");
    public static final Pattern DATASET_URL_FILTER = Pattern.compile(".+(?:dataset[s]?/.*|(?:\\.|format=)(?:xls[xbm]?|xlt[x]?|[ct]sv|tab|(?:(?:geo)?j|b)son|(?:x|k|g|nmr|sb|wiley|y[a]?)ml|xsd|o[dt]s|ddi|rdf|[g]?zip|zipx|[rt]ar|[7x]z|[t]?gz|[gb]z[\\d]*|smi[l]?|por|ascii|dta|sav|dat|txt|ti[f]{1,2}|tfw|dwg|nt|fits|feather|svg|sas7b(?:dat|ve)|spss|sas|stata|(?:my|postgre)?sql(?:ite)?|bigquery|sh[px]|sb[xn]|prj|dbf|(?:m|acc)db|mif|mat|pcd|bt|n[sc]?[\\d]*|h[\\d]+|hdf[\\d]*|trs|opj|jcamp|fcs|fas(?:ta)?|keys|values|las|rdata|parquet|avro|sql|dcm|gr[i]?b]|rds|[p]?cap|dmp|vcf|cbor|biosample|hic|warc|ig[e]?s|sla|dxf|pdb|[sc]df|cif|f(?:ast)?[qa]|apng|sra|vtp|gltf|[sm]tl|ply|abc|md|rtf|ttl|shp|shx|exr|cdf|glb|mtl|kmz|textFile)(?:\\?.+)?$)");
    public static final BasicURLNormalizer basicURLNormalizer = BasicURLNormalizer.newBuilder().build();
    public static int numOfIDs = 0;
    public static AtomicInteger connProblematicUrls = new AtomicInteger(0);
    public static AtomicInteger inputDuplicatesNum = new AtomicInteger(0);
    public static AtomicInteger numOfIDsWithoutAcceptableSourceUrl = new AtomicInteger(0);
    public static AtomicInteger loadingRetries = new AtomicInteger(0);
    public static AtomicInteger totalNumFailedTasks = new AtomicInteger(0);
    public static final Pattern INVALID_URL_HTTP_STATUS = Pattern.compile(".*HTTP 4(?:00|04|10|14|22) Client Error.*");
    public static Pattern COULD_RETRY_HTTP_STATUS = null;
    public static Pattern COULD_RETRY_URLS = Pattern.compile("[^/]+://[^/]*(?:sciencedirect|elsevier).com[^/]*/.*");

    public LoaderAndChecker() throws RuntimeException {
        setCouldRetryRegex();
        try {
            try {
                if (useIdUrlPairs) {
                    loadAndCheckIdUrlPairs();
                } else {
                    loadAndCheckUrls();
                }
                if (FileUtils.dataForOutput.isEmpty()) {
                    return;
                }
                logger.debug("Writing last data points to the outputFile.");
                FileUtils.writeResultsToFile();
            } catch (Exception e) {
                logger.error("", (Throwable) e);
                throw new RuntimeException(e);
            }
        } catch (Throwable th) {
            if (!FileUtils.dataForOutput.isEmpty()) {
                logger.debug("Writing last data points to the outputFile.");
                FileUtils.writeResultsToFile();
            }
            throw th;
        }
    }

    public static void loadAndCheckUrls() throws RuntimeException {
        boolean z = true;
        int i = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList arrayList = new ArrayList(FileUtils.jsonBatchSize);
        while (true) {
            Collection<String> nextUrlBatchTest = FileUtils.getNextUrlBatchTest();
            if (isFinishedLoading(nextUrlBatchTest.isEmpty(), z)) {
                return;
            }
            z = false;
            i++;
            logger.info("Batch counter: " + i + (ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((((i - 1) * FileUtils.jsonBatchSize) * 100.0d) / FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            for (String str : nextUrlBatchTest) {
                arrayList.add(() -> {
                    String handleUrlChecks = handleUrlChecks("null", str);
                    if (handleUrlChecks == null) {
                        return false;
                    }
                    String filter = basicURLNormalizer.filter(handleUrlChecks);
                    if (filter == null) {
                        logger.warn("Could not normalize url: " + handleUrlChecks);
                        UrlUtils.addOutputData("null", handleUrlChecks, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to normalization's problems.", "N/A", null, true, "true", "false", "false", "false", "false", null, "null", "N/A");
                        connProblematicUrls.incrementAndGet();
                        return false;
                    }
                    IdUrlMimeTypeTriple idUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(str);
                    if (idUrlMimeTypeTriple != null) {
                        ConnSupportUtils.handleReCrossedTargetUrl("null", str, str, str, idUrlMimeTypeTriple, true);
                        return true;
                    }
                    boolean z2 = false;
                    String lowerCase = handleUrlChecks.toLowerCase();
                    if ((ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCase).matches()) || (ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCase).matches())) {
                        z2 = true;
                    }
                    try {
                        HttpConnUtils.connectAndCheckMimeType("null", handleUrlChecks, filter, filter, null, true, z2);
                        return true;
                    } catch (Exception e) {
                        handleException("null", filter, e);
                        return false;
                    }
                });
            }
            executeTasksAndHandleResults(arrayList, i, cookieStore);
        }
    }

    public static void loadAndCheckIdUrlPairs() throws RuntimeException {
        boolean z = true;
        int i = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList arrayList = new ArrayList(FileUtils.jsonBatchSize);
        while (true) {
            HashMultimap<String, String> nextIdUrlPairBatchFromJson = FileUtils.getNextIdUrlPairBatchFromJson();
            if (isFinishedLoading(nextIdUrlPairBatchFromJson.isEmpty(), z)) {
                return;
            }
            z = false;
            i++;
            logger.info("Batch counter: " + i + (ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((((i - 1) * FileUtils.jsonBatchSize) * 100.0d) / FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            Set<String> keySet = nextIdUrlPairBatchFromJson.keySet();
            numOfIDs += keySet.size();
            for (String str : keySet) {
                arrayList.add(() -> {
                    String str2;
                    String handleUrlChecks;
                    boolean z2 = false;
                    String str3 = null;
                    String str4 = null;
                    String str5 = null;
                    String str6 = null;
                    Set set = nextIdUrlPairBatchFromJson.get((Object) str);
                    boolean z3 = set.size() == 1;
                    HashSet hashSet = new HashSet();
                    Iterator it = set.iterator();
                    while (true) {
                        if (!it.hasNext()) {
                            break;
                        }
                        String str7 = (String) it.next();
                        handleUrlChecks = handleUrlChecks(str, str7);
                        if (handleUrlChecks != null) {
                            IdUrlMimeTypeTriple idUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(handleUrlChecks);
                            if (idUrlMimeTypeTriple != null) {
                                ConnSupportUtils.handleReCrossedTargetUrl(str, handleUrlChecks, handleUrlChecks, handleUrlChecks, idUrlMimeTypeTriple, true);
                                if (!z3) {
                                    hashSet.add(handleUrlChecks);
                                }
                                z2 = true;
                            } else {
                                String lowerCase = handleUrlChecks.toLowerCase();
                                if ((!ArgsUtils.retrieveDocuments || !DOC_URL_FILTER.matcher(lowerCase).matches()) && (!ArgsUtils.retrieveDatasets || !DATASET_URL_FILTER.matcher(lowerCase).matches())) {
                                    if (handleUrlChecks.contains("/handle/")) {
                                        str4 = handleUrlChecks;
                                    } else if (str4 != null || handleUrlChecks.contains("doi.org")) {
                                        str6 = handleUrlChecks;
                                    } else {
                                        str5 = handleUrlChecks;
                                    }
                                }
                            }
                        } else if (!z3) {
                            hashSet.add(str7);
                        }
                    }
                    str3 = handleUrlChecks;
                    if (z2) {
                        if (!z3) {
                            handleLogOfRemainingUrls(str, set, hashSet);
                        }
                        return false;
                    }
                    boolean z4 = false;
                    if (str3 != null) {
                        str2 = str3;
                        z4 = true;
                    } else if (str4 != null) {
                        str2 = str4;
                    } else if (str5 != null) {
                        str2 = str5;
                    } else {
                        if (str6 == null) {
                            logger.debug("No acceptable sourceUrl was found for ID: \"" + str + "\".");
                            numOfIDsWithoutAcceptableSourceUrl.incrementAndGet();
                            return false;
                        }
                        str2 = str6;
                    }
                    String str8 = str2;
                    String filter = basicURLNormalizer.filter(str8);
                    if (filter == null) {
                        logger.warn("Could not normalize url: " + str8);
                        UrlUtils.addOutputData(str, str8, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to normalization's problems.", "N/A", null, true, "true", "false", "false", "false", "false", null, "null", "N/A");
                        connProblematicUrls.incrementAndGet();
                        if (!z3) {
                            hashSet.add(str8);
                            checkRemainingUrls(str, set, hashSet, z3);
                            handleLogOfRemainingUrls(str, set, hashSet);
                        }
                        return false;
                    }
                    boolean z5 = true;
                    try {
                        HttpConnUtils.connectAndCheckMimeType(str, str8, filter, filter, null, true, z4);
                        if (!z3) {
                            hashSet.add(filter);
                        }
                    } catch (Exception e) {
                        if (handleException(str, filter, e)) {
                            return false;
                        }
                        if (z3) {
                            z5 = false;
                        } else {
                            hashSet.add(filter);
                            z5 = checkRemainingUrls(str, set, hashSet, z3);
                        }
                    }
                    if (!z3) {
                        handleLogOfRemainingUrls(str, set, hashSet);
                    }
                    return Boolean.valueOf(z5);
                });
            }
            executeTasksAndHandleResults(arrayList, i, cookieStore);
        }
    }

    public static void loadAndCheckEachIdUrlPairInEntries() throws RuntimeException {
        boolean z = true;
        int i = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList arrayList = new ArrayList(FileUtils.jsonBatchSize);
        while (true) {
            HashMultimap<String, String> nextIdUrlPairBatchFromJson = FileUtils.getNextIdUrlPairBatchFromJson();
            if (isFinishedLoading(nextIdUrlPairBatchFromJson.isEmpty(), z)) {
                return;
            }
            z = false;
            i++;
            logger.info("Batch counter: " + i + (ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((((i - 1) * FileUtils.jsonBatchSize) * 100.0d) / FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            Set<Map.Entry> entries = nextIdUrlPairBatchFromJson.entries();
            numOfIDs += entries.size();
            for (Map.Entry entry : entries) {
                arrayList.add(() -> {
                    String str = (String) entry.getKey();
                    String handleUrlChecks = handleUrlChecks(str, (String) entry.getValue());
                    if (handleUrlChecks == null) {
                        return false;
                    }
                    String filter = basicURLNormalizer.filter(handleUrlChecks);
                    if (filter == null) {
                        logger.warn("Could not normalize url: " + handleUrlChecks);
                        UrlUtils.addOutputData(str, handleUrlChecks, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to normalization's problems.", "N/A", null, true, "true", "false", "false", "false", "false", null, "null", "N/A");
                        connProblematicUrls.incrementAndGet();
                        return false;
                    }
                    IdUrlMimeTypeTriple idUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(handleUrlChecks);
                    if (idUrlMimeTypeTriple != null) {
                        ConnSupportUtils.handleReCrossedTargetUrl(str, handleUrlChecks, handleUrlChecks, handleUrlChecks, idUrlMimeTypeTriple, true);
                        return true;
                    }
                    boolean z2 = false;
                    String lowerCase = handleUrlChecks.toLowerCase();
                    if ((ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCase).matches()) || (ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCase).matches())) {
                        z2 = true;
                    }
                    try {
                        HttpConnUtils.connectAndCheckMimeType(str, handleUrlChecks, filter, filter, null, true, z2);
                        return true;
                    } catch (Exception e) {
                        handleException(str, filter, e);
                        return false;
                    }
                });
            }
            executeTasksAndHandleResults(arrayList, i, cookieStore);
        }
    }

    public static void loadAndCheckEachIdUrlPair() throws RuntimeException {
        boolean z = true;
        int i = 0;
        CookieStore cookieStore = HttpConnUtils.cookieManager.getCookieStore();
        ArrayList arrayList = new ArrayList(FileUtils.jsonBatchSize);
        while (true) {
            HashMultimap<String, String> nextIdUrlPairBatchFromJson = FileUtils.getNextIdUrlPairBatchFromJson();
            if (isFinishedLoading(nextIdUrlPairBatchFromJson.isEmpty(), z)) {
                return;
            }
            z = false;
            i++;
            logger.info("Batch counter: " + i + (ArgsUtils.inputFileFullPath != null ? " | progress: " + PublicationsRetriever.df.format((((i - 1) * FileUtils.jsonBatchSize) * 100.0d) / FileUtils.numOfLines) + "%" : "") + " | every batch contains at most " + FileUtils.jsonBatchSize + " id-url pairs.");
            for (String str : nextIdUrlPairBatchFromJson.keySet()) {
                Set set = nextIdUrlPairBatchFromJson.get((Object) str);
                numOfIDs += set.size();
                arrayList.add(() -> {
                    Iterator it = set.iterator();
                    while (it.hasNext()) {
                        String handleUrlChecks = handleUrlChecks(str, (String) it.next());
                        if (handleUrlChecks != null) {
                            String filter = basicURLNormalizer.filter(handleUrlChecks);
                            if (filter == null) {
                                logger.warn("Could not normalize url: " + handleUrlChecks);
                                UrlUtils.addOutputData(str, handleUrlChecks, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to normalization's problems.", "N/A", null, true, "true", "false", "false", "false", "false", null, "null", "N/A");
                                connProblematicUrls.incrementAndGet();
                            } else {
                                IdUrlMimeTypeTriple idUrlMimeTypeTriple = UrlUtils.resultUrlsWithIDs.get(handleUrlChecks);
                                if (idUrlMimeTypeTriple != null) {
                                    ConnSupportUtils.handleReCrossedTargetUrl(str, handleUrlChecks, handleUrlChecks, handleUrlChecks, idUrlMimeTypeTriple, true);
                                } else {
                                    boolean z2 = false;
                                    String lowerCase = handleUrlChecks.toLowerCase();
                                    if ((ArgsUtils.retrieveDocuments && DOC_URL_FILTER.matcher(lowerCase).matches()) || (ArgsUtils.retrieveDatasets && DATASET_URL_FILTER.matcher(lowerCase).matches())) {
                                        z2 = true;
                                    }
                                    try {
                                        HttpConnUtils.connectAndCheckMimeType(str, handleUrlChecks, filter, filter, null, true, z2);
                                    } catch (Exception e) {
                                        handleException(str, filter, e);
                                        return false;
                                    }
                                }
                            }
                        }
                    }
                    return true;
                });
            }
            executeTasksAndHandleResults(arrayList, i, cookieStore);
        }
    }

    public static void executeTasksAndHandleResults(List<Callable<Boolean>> list, int i, CookieStore cookieStore) {
        int invokeAllTasksAndWait = invokeAllTasksAndWait(list);
        if (invokeAllTasksAndWait == -1) {
            FileUtils.writeResultsToFile();
            System.err.println("Invoking and/or executing the callableTasks failed with the exception written in the log files!");
            System.exit(99);
        } else if (invokeAllTasksAndWait > 0) {
            logger.warn(invokeAllTasksAndWait + " tasks failed in batch_" + i);
            totalNumFailedTasks.incrementAndGet();
        }
        list.clear();
        logger.debug("The number of cookies is: " + cookieStore.getCookies().size());
        logger.debug(cookieStore.removeAll() ? "The cookies where removed!" : "No cookies where removed!");
        FileUtils.writeResultsToFile();
    }

    public static int invokeAllTasksAndWait(List<Callable<Boolean>> list) {
        int i = 0;
        try {
            List invokeAll = PublicationsRetriever.executor.invokeAll(list);
            int size = invokeAll.size();
            for (int i2 = 0; i2 < size; i2++) {
                try {
                } catch (IndexOutOfBoundsException e) {
                    logger.error("IOOBE for task_" + i2 + " in the futures-list! " + e.getMessage());
                } catch (InterruptedException e2) {
                    logger.error("Task_" + i2 + " was interrupted: " + e2.getMessage());
                    i++;
                } catch (CancellationException e3) {
                    logger.error("Task_" + i2 + " was cancelled: " + e3.getMessage());
                    i++;
                } catch (ExecutionException e4) {
                    String selectedStackTraceForCausedException = GenericUtils.getSelectedStackTraceForCausedException(e4, "Task_" + i2 + " failed with: ", null, 15);
                    logger.error(selectedStackTraceForCausedException);
                    System.err.println(selectedStackTraceForCausedException);
                    i++;
                }
            }
        } catch (InterruptedException e5) {
            logger.warn("The main thread was interrupted when waiting for the current batch's worker-tasks to finish: " + e5.getMessage());
        } catch (Exception e6) {
            logger.error("", (Throwable) e6);
            return -1;
        }
        return i;
    }

    private static boolean checkRemainingUrls(String str, Set<String> set, HashSet<String> hashSet, boolean z) {
        String filter;
        for (String str2 : set) {
            if (!hashSet.contains(str2) && ((filter = basicURLNormalizer.filter(str2)) == null || !hashSet.contains(filter))) {
                loadingRetries.incrementAndGet();
                try {
                    HttpConnUtils.connectAndCheckMimeType(str, filter, filter, filter, null, true, false);
                    if (z) {
                        return true;
                    }
                    hashSet.add(filter);
                    return true;
                } catch (Exception e) {
                    if (handleException(str, filter, e)) {
                        return false;
                    }
                    if (!z) {
                        hashSet.add(filter);
                    }
                }
            }
        }
        return false;
    }

    public static boolean handleException(String str, String str2, Exception exc) {
        String message;
        if ((exc instanceof RuntimeException) && (message = exc.getMessage()) != null && message.contains(alreadyLoggedMessage)) {
            return true;
        }
        List<String> wasValidAndCouldRetry = getWasValidAndCouldRetry(exc, str2);
        UrlUtils.addOutputData(str, str2, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, as " + wasValidAndCouldRetry.get(2), "N/A", null, true, "true", wasValidAndCouldRetry.get(0), "false", "false", wasValidAndCouldRetry.get(1), null, "null", "N/A");
        return false;
    }

    public static String handleUrlChecks(String str, String str2) {
        String domainStr = UrlUtils.getDomainStr(str2, null);
        if (domainStr == null) {
            UrlUtils.addOutputData(str, str2, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded in 'LoaderAndChecker.handleUrlChecks()' method, after the occurrence of a domain-retrieval error.", "N/A", null, true, "true", "false", "false", "false", "false", null, "null", "N/A");
            if (useIdUrlPairs) {
                return null;
            }
            connProblematicUrls.incrementAndGet();
            return null;
        }
        if (HttpConnUtils.blacklistedDomains.contains(domainStr)) {
            logger.debug("Avoid connecting to blacklisted domain: \"" + domainStr + "\" with url: " + str2);
            UrlUtils.addOutputData(str, str2, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded in 'LoaderAndChecker.handleUrlChecks()' method, as its domain was found blacklisted.", "N/A", null, true, "true", "true", "false", "false", "false", null, "null", "N/A");
            if (useIdUrlPairs) {
                return null;
            }
            connProblematicUrls.incrementAndGet();
            return null;
        }
        if (ConnSupportUtils.checkIfPathIs403BlackListed(str2, domainStr)) {
            logger.debug("Preventing reaching 403ErrorCode with url: \"" + str2 + "\"!");
            UrlUtils.addOutputData(str, str2, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded in 'LoaderAndChecker.handleUrlChecks()' as it had a blacklisted urlPath.", "N/A", null, true, "true", "true", "false", "false", "false", null, "null", "N/A");
            if (useIdUrlPairs) {
                return null;
            }
            connProblematicUrls.incrementAndGet();
            return null;
        }
        String lowerCase = str2.toLowerCase();
        if (UrlTypeChecker.shouldNotAcceptPageUrl(str, str2, str2, lowerCase, true)) {
            return null;
        }
        if (lowerCase.contains("token") || lowerCase.contains("jsessionid")) {
            str2 = UrlUtils.removeTemporalIdentifier(str2);
        }
        if (!UrlUtils.duplicateUrls.contains(str2)) {
            if (!str2.contains("ir.lib.u-ryukyu.ac.jp") || !str2.contains("/handle/123456789/")) {
                return str2;
            }
            logger.debug("We will handle the weird case of \"" + str2 + "\".");
            return StringUtils.replace(str2, "/123456789/", "/20.500.12000/", -1);
        }
        logger.debug("Skipping non-DocOrDataset-url: \"" + str2 + "\", at loading, as it has already been checked.");
        UrlUtils.addOutputData(str, str2, "N/A", UrlUtils.duplicateUrlIndicator, "Discarded in 'LoaderAndChecker.handleUrlChecks()', as it's a duplicate.", "N/A", null, false, "true", "N/A", "N/A", "N/A", "true", null, "null", "N/A");
        if (useIdUrlPairs) {
            return null;
        }
        inputDuplicatesNum.incrementAndGet();
        return null;
    }

    public static boolean isFinishedLoading(boolean z, boolean z2) {
        if (!z) {
            return false;
        }
        if (!z2) {
            logger.debug("Done processing " + FileUtils.getCurrentlyLoadedUrls() + " urls from the inputFile.");
            return true;
        }
        System.err.println("Could not retrieve any urls from the inputFile! Exiting..");
        logger.error("Could not retrieve any urls from the inputFile! Exiting..");
        PublicationsRetriever.executor.shutdownNow();
        System.exit(100);
        return false;
    }

    private static void handleLogOfRemainingUrls(String str, Set<String> set, HashSet<String> hashSet) {
        for (String str2 : set) {
            String filter = basicURLNormalizer.filter(str2);
            String str3 = filter;
            if (filter == null) {
                str3 = str2;
            }
            if (!hashSet.contains(str3)) {
                UrlUtils.addOutputData(str, str3, "N/A", UrlUtils.unreachableDocOrDatasetUrlIndicator, "Skipped in LoaderAndChecker, as a better url was selected for id: " + str, "N/A", null, true, "false", "N/A", "N/A", "N/A", "true", null, "null", "N/A");
            }
        }
    }

    public static void setCouldRetryRegex() {
        String str;
        Object obj;
        if (ConnSupportUtils.shouldBlockMost5XXDomains) {
            str = ".*(?:HTTP 4(?:0[38]|2[569]) Client|" + "503";
            obj = "Going to block most of the 5XX domains, except from the 503-domains.";
        } else {
            str = ".*(?:HTTP 4(?:0[38]|2[569]) Client|" + "(?<!511)";
            obj = "Going to avoid to block most of the 5XX domains, except from the 511-domains, which will be blocked.";
        }
        String str2 = str + " Server) Error.*";
        logger.debug(obj + " The \"couldRetryRegex\" is: " + str2);
        COULD_RETRY_HTTP_STATUS = Pattern.compile(str2);
    }

    public static List<String> getWasValidAndCouldRetry(Exception exc, String str) {
        Object obj;
        ArrayList arrayList = new ArrayList(3);
        String str2 = "true";
        Object obj2 = "false";
        if (exc instanceof RuntimeException) {
            String message = exc.getMessage();
            if (message == null) {
                obj = "there is an unspecified runtime error.";
            } else if (INVALID_URL_HTTP_STATUS.matcher(message).matches()) {
                str2 = "false";
                obj = "the url is invalid and lead to http-client-error.";
            } else if (COULD_RETRY_HTTP_STATUS.matcher(message).matches()) {
                obj2 = "true";
                obj = "the url had a non-fatal http-error.";
            } else {
                obj = "there is a serious unspecified error.";
            }
        } else if (exc instanceof ConnTimeoutException) {
            obj2 = "true";
            obj = "the url had a connection-timeout.";
        } else if (exc instanceof DomainWithUnsupportedHEADmethodException) {
            obj2 = "true";
            obj = "the url does not support HEAD method for checking most of the internal links.";
        } else if (exc instanceof DomainBlockedException) {
            obj2 = "true";
            obj = "the url had its initial or redirected domain blocked.";
        } else {
            obj = "there is a serious unspecified error.";
        }
        if (str2.equals("true") && str != null && COULD_RETRY_URLS.matcher(str).matches()) {
            obj2 = "true";
        }
        arrayList.add(0, str2);
        arrayList.add(1, obj2);
        arrayList.add(2, obj);
        return arrayList;
    }
}
