package eu.openaire.publications_retriever.util.http;

import ch.qos.logback.core.CoreConstants;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.SetMultimap;
import com.google.common.net.HttpHeaders;
import eu.openaire.publications_retriever.PublicationsRetriever;
import eu.openaire.publications_retriever.crawler.PageCrawler;
import eu.openaire.publications_retriever.exceptions.DocLinkFoundException;
import eu.openaire.publications_retriever.exceptions.DomainBlockedException;
import eu.openaire.publications_retriever.exceptions.FileNotRetrievedException;
import eu.openaire.publications_retriever.models.IdUrlMimeTypeTriple;
import eu.openaire.publications_retriever.models.MimeTypeResult;
import eu.openaire.publications_retriever.util.args.ArgsUtils;
import eu.openaire.publications_retriever.util.file.FileData;
import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.publications_retriever.util.file.HtmlFileUtils;
import eu.openaire.publications_retriever.util.file.HtmlResult;
import eu.openaire.publications_retriever.util.url.LoaderAndChecker;
import eu.openaire.publications_retriever.util.url.UrlUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.commons.compress.compressors.brotli.BrotliCompressorInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2Constants;
import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.io.FileDeleteStrategy;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/openaire/publications_retriever/util/http/ConnSupportUtils.class */
public class ConnSupportUtils {
    public static final int minPolitenessDelay = 3000;
    public static final int maxPolitenessDelay = 7000;
    private static final int timesToHave403errorCodeBeforePathBlocked = 10;
    private static final int numberOf403BlockedPathsBeforeDomainBlocked = 50;
    private static final int timesToHave5XXerrorCodeBeforeDomainBlocked = 10;
    private static final int timesToHaveTimeoutExBeforeDomainBlocked = 25;
    private static final int timesToReturnNoTypeBeforeDomainBlocked = 10;
    public static final String alreadyDownloadedFromIDMessage = "This file is probably already downloaded by ID=";
    public static final String alreadyDownloadedFromSourceUrlContinuedMessage = " and SourceUrl=";
    public static final String alreadyDetectedFromIDMessage = "This url was already detected by ID=";
    public static final String alreadyDetectedFromSourceUrlContinuedMessage = " and SourceUrl=";
    public static ThreadLocal<StringBuilder> htmlStrBuilder;
    private static final int maxAllowedContentSizeMB;
    private static final ThreadLocalRandom threadLocalRandom;
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) ConnSupportUtils.class);
    public static final Pattern MIME_TYPE_FILTER = Pattern.compile("(?:\\([']?)?([\\w]+/[\\w+.-]+).*");
    public static final Pattern POSSIBLE_DOC_OR_DATASET_MIME_TYPE = Pattern.compile("(?:(?:application|binary)/(?:(?:x-)?octet(?:-stream)?|save|force-download))|unknown");
    public static final Pattern DATASET_MIME_TYPE = Pattern.compile("(?:application|binary)/(?:xls[xbm]?|xlt[x]?|[ct]sv|tab|(?:(?:geo)?j|b)son|(?:x|k|g|nmr|sb|wiley|y[a]?)ml|xsd|o[dt]s|ddi|rdf|[g]?zip|zipx|[rt]ar|[7x]z|[t]?gz|[gb]z[\\d]*|smi[l]?|por|ascii|dta|sav|dat|txt|ti[f]{1,2}|tfw|dwg|nt|fits|feather|svg|sas7b(?:dat|ve)|spss|sas|stata|(?:my|postgre)?sql(?:ite)?|bigquery|sh[px]|sb[xn]|prj|dbf|(?:m|acc)db|mif|mat|pcd|bt|n[sc]?[\\d]*|h[\\d]+|hdf[\\d]*|trs|opj|jcamp|fcs|fas(?:ta)?|keys|values|las|rdata|parquet|avro|sql|dcm|gr[i]?b]|rds|[p]?cap|dmp|vcf|cbor|biosample|hic|warc|ig[e]?s|sla|dxf|pdb|[sc]df|cif|f(?:ast)?[qa]|apng|sra|vtp|gltf|[sm]tl|ply|abc|md|rtf|ttl|shp|shx|exr|cdf|glb|mtl|kmz|textFile)");
    public static final Pattern HTML_STRING_MATCH = Pattern.compile("^(?:[\\s]*<(?:!doctype\\s)?html).*");
    public static final Pattern RESPONSE_BODY_UNWANTED_MATCH = Pattern.compile("^(?:[\\s]+|[\\s]*<(?:\\?xml|!--).*)");
    public static final Pattern SPACE_ONLY_LINE = Pattern.compile("^[\\s]+$");
    private static final Pattern NON_PROTOCOL_URL = Pattern.compile("^(?:[^:/]+://)(.*)");
    public static final ConcurrentHashMap<String, Integer> timesDomainsReturned5XX = new ConcurrentHashMap<>();
    public static final ConcurrentHashMap<String, Integer> timesDomainsHadTimeoutEx = new ConcurrentHashMap<>();
    public static final ConcurrentHashMap<String, Integer> timesPathsReturned403 = new ConcurrentHashMap<>();
    public static final SetMultimap<String, String> domainsMultimapWithPaths403BlackListed = Multimaps.synchronizedSetMultimap(HashMultimap.create());
    public static boolean shouldBlockMost5XXDomains = true;
    public static AtomicInteger reCrossedDocUrls = new AtomicInteger(0);
    public static final Set<String> knownDocMimeTypes = Collections.newSetFromMap(new ConcurrentHashMap());
    public static final Set<String> knownDatasetMimeTypes = Collections.newSetFromMap(new ConcurrentHashMap());
    public static final ConcurrentHashMap<String, DomainConnectionData> domainsWithConnectionData = new ConcurrentHashMap<>();
    public static String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0";
    public static String acceptLanguage = "en-US,en;q=0.5";
    private static final Pattern FILTER_COMMENT_FROM_MIMETYPE = Pattern.compile("([^/]+/[^/]+)(?:[\\s]*//.*)?");
    public static final ConcurrentHashMap<String, String> fileHashesWithLocations = new ConcurrentHashMap<>();
    public static final Set<String> domainsNotBlockableAfterTimes = Collections.newSetFromMap(new ConcurrentHashMap());

    public static void setHttpHeaders(HttpURLConnection httpURLConnection, String str) {
        httpURLConnection.setRequestProperty(HttpHeaders.USER_AGENT, userAgent);
        httpURLConnection.setRequestProperty(HttpHeaders.ACCEPT, "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8");
        httpURLConnection.setRequestProperty(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate, br, zstd");
        if (!HttpConnUtils.domainsWithUnsupportedAcceptLanguageParameter.contains(str)) {
            httpURLConnection.setRequestProperty(HttpHeaders.ACCEPT_LANGUAGE, acceptLanguage);
        }
        httpURLConnection.setRequestProperty(HttpHeaders.DNT, "1");
        httpURLConnection.setRequestProperty(HttpHeaders.CONNECTION, "keep-alive");
        httpURLConnection.setRequestProperty(HttpHeaders.SEC_FETCH_DEST, "document");
        httpURLConnection.setRequestProperty(HttpHeaders.SEC_FETCH_MODE, "navigate");
        httpURLConnection.setRequestProperty(HttpHeaders.SEC_FETCH_SITE, "cross-site");
        httpURLConnection.setRequestProperty(HttpHeaders.UPGRADE_INSECURE_REQUESTS, "1");
        httpURLConnection.setRequestProperty(HttpHeaders.PRAGMA, "no-cache");
        httpURLConnection.setRequestProperty(HttpHeaders.CACHE_CONTROL, "no-cache");
        httpURLConnection.setRequestProperty(HttpHeaders.HOST, str);
    }

    public static void setKnownMimeTypes() {
        if (!ArgsUtils.retrieveDocuments) {
            setKnownDatasetMimeTypes();
            return;
        }
        setKnownDocMimeTypes();
        if (ArgsUtils.retrieveDatasets) {
            setKnownDatasetMimeTypes();
        }
    }

    public static void setKnownDocMimeTypes() {
        logger.debug("Setting up the official document mime types. Currently there is support only for pdf documents.");
        knownDocMimeTypes.add("application/pdf");
        knownDocMimeTypes.add("application/x-pdf");
        knownDocMimeTypes.add("image/pdf");
        knownDocMimeTypes.add("image/x-pdf");
        knownDocMimeTypes.add("text/pdf");
        knownDocMimeTypes.add("text/x-pdf");
        knownDocMimeTypes.add("application/acrobat");
        knownDocMimeTypes.add("application/vnd.adobe.pdf");
        knownDocMimeTypes.add("application/vnd.adobe.portable-document-format");
        knownDocMimeTypes.add("application/vnd.pdf");
        knownDocMimeTypes.add("application/vnd.ms-pdf");
        knownDocMimeTypes.add("application/pdf-stream");
        knownDocMimeTypes.add("application/x-pdf-stream");
        if (ArgsUtils.shouldDownloadDocFiles) {
            return;
        }
        knownDocMimeTypes.add("application/postscript");
        knownDocMimeTypes.add("application/x-postscript");
        knownDocMimeTypes.add("application/vnd.cups-postscript");
        knownDocMimeTypes.add("application/eps");
        knownDocMimeTypes.add("application/ps");
        knownDocMimeTypes.add("application/x-ps");
        knownDocMimeTypes.add("application/x-postscript-not-eps");
        knownDocMimeTypes.add("text/postscript");
        knownDocMimeTypes.add("image/eps");
        knownDocMimeTypes.add("image/ps");
        knownDocMimeTypes.add("application/msword");
        knownDocMimeTypes.add("application/vnd.ms-word");
        knownDocMimeTypes.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
        knownDocMimeTypes.add("application/vnd.openxmlformats-officedocument.presentationml.presentation");
        knownDocMimeTypes.add("application/vnd.openxmlformats-officedocument.spreadsheetml.template");
        knownDocMimeTypes.add("application/vnd.ms-powerpoint");
        knownDocMimeTypes.add("application/vnd.oasis.opendocument.presentation");
        knownDocMimeTypes.add("application/x-tex");
        knownDocMimeTypes.add("application/vnd.oasis.opendocument.text");
        knownDocMimeTypes.add("application/vnd.ms-xpsdocument");
        knownDocMimeTypes.add("application/epub+zip");
        knownDocMimeTypes.add("application/oxps");
        knownDocMimeTypes.add("application/rtf");
        knownDocMimeTypes.add("application/x-impress");
        knownDocMimeTypes.add("application/vnd.oasis.opendocument.formula");
        knownDocMimeTypes.add("application/vnd.oasis.opendocument.graphics");
        knownDocMimeTypes.add("application/vnd.oasis.opendocument.chart");
        knownDocMimeTypes.add("application/vnd.oasis.opendocument.image");
        knownDocMimeTypes.add("application/vnd.apple.pages");
        knownDocMimeTypes.add("application/vnd.apple.keynote");
        knownDocMimeTypes.add("application/vnd.wordperfect");
    }

    public static void setKnownDatasetMimeTypes() {
        logger.debug("Setting up the official dataset mime-types.");
        try {
            InputStream resourceAsStream = ConnSupportUtils.class.getClassLoader().getResourceAsStream("dataset-mimetypes.txt");
            if (resourceAsStream == null) {
                try {
                    String str = "File not found in resources: " + "dataset-mimetypes.txt";
                    logger.error(str);
                    System.err.println(str);
                    System.exit(77);
                } finally {
                }
            }
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream, StandardCharsets.UTF_8), FileUtils.mb);
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    Matcher matcher = FILTER_COMMENT_FROM_MIMETYPE.matcher(readLine);
                    if (matcher.matches()) {
                        String group = matcher.group(1);
                        if (group == null || group.isEmpty()) {
                            logger.error("Failed to extract the mimetype from line: " + readLine);
                        } else {
                            knownDatasetMimeTypes.add(group.trim());
                        }
                    } else {
                        logger.error("Failed to match the line using the \"FILTER_COMMENT_FROM_MIMETYPE\"-regex: " + readLine);
                    }
                } catch (Throwable th) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                    throw th;
                }
            }
            if (logger.isTraceEnabled()) {
                logger.trace(knownDatasetMimeTypes.toString());
            }
            bufferedReader.close();
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (IOException e) {
            String str2 = "Could not read file:" + "dataset-mimetypes.txt";
            logger.error(str2, (Throwable) e);
            System.err.println(str2);
            System.exit(78);
        }
    }

    public static MimeTypeResult hasDocOrDatasetMimeType(String str, String str2, String str3, HttpURLConnection httpURLConnection, boolean z, boolean z2) {
        String headerField;
        MimeTypeResult mimeTypeResult = null;
        if (str2 == null) {
            if (str3 == null || str3.equals("attachment")) {
                if (!z && !z2) {
                    return null;
                }
                logger.warn("No mimeType, nor Content-Disposition, were able to be retrieved for url: " + str);
                return null;
            }
            if (ArgsUtils.retrieveDocuments && str3.contains(".pdf")) {
                mimeTypeResult = new MimeTypeResult("application/pdf", "document");
            } else {
                String replace = StringUtils.replace(StringUtils.replace(str3, "\"", "", -1), "'", "", -1);
                if (ArgsUtils.retrieveDatasets && LoaderAndChecker.DATASET_URL_FILTER.matcher(replace).matches()) {
                    mimeTypeResult = new MimeTypeResult("unspecified", "dataset");
                }
            }
            return mimeTypeResult;
        }
        if (str2.contains("system.io.fileinfo")) {
            if (httpURLConnection != null) {
                String headerField2 = httpURLConnection.getHeaderField(HttpHeaders.CONTENT_DISPOSITION);
                if (headerField2 != null) {
                    String lowerCase = headerField2.toLowerCase();
                    if (!lowerCase.equals("attachment")) {
                        mimeTypeResult = lowerCase.contains(".pdf") ? new MimeTypeResult("application/pdf", "document") : null;
                    }
                } else {
                    mimeTypeResult = str.toLowerCase().contains(".pdf") ? new MimeTypeResult("application/pdf", "document") : null;
                }
            }
            return mimeTypeResult;
        }
        String str4 = str2;
        if (str2.contains("charset") || str2.contains("name") || str2.startsWith("(", 0)) {
            str4 = getPlainMimeType(str2);
            if (str4 == null) {
                logger.warn("Url with problematic mimeType (" + str2 + ") was: " + str);
                String lowerCase2 = str.toLowerCase();
                if (lowerCase2.contains("pdf")) {
                    mimeTypeResult = new MimeTypeResult("application/pdf", "document");
                } else if (LoaderAndChecker.DATASET_URL_FILTER.matcher(lowerCase2).matches()) {
                    mimeTypeResult = new MimeTypeResult("unspecified", "dataset");
                }
                return mimeTypeResult;
            }
        }
        String replace2 = StringUtils.replace(StringUtils.replace(str4, "'", "", -1), "\"", "", -1);
        if (ArgsUtils.retrieveDocuments && knownDocMimeTypes.contains(replace2)) {
            mimeTypeResult = new MimeTypeResult(replace2, "document");
        } else if (ArgsUtils.retrieveDatasets && (knownDatasetMimeTypes.contains(replace2) || DATASET_MIME_TYPE.matcher(replace2).matches())) {
            mimeTypeResult = new MimeTypeResult(replace2, "dataset");
        } else if (POSSIBLE_DOC_OR_DATASET_MIME_TYPE.matcher(replace2).matches()) {
            String headerField3 = httpURLConnection.getHeaderField(HttpHeaders.CONTENT_DISPOSITION);
            if (headerField3 != null) {
                String lowerCase3 = headerField3.toLowerCase();
                if (!lowerCase3.equals("attachment")) {
                    if (ArgsUtils.retrieveDocuments && lowerCase3.contains(".pdf")) {
                        mimeTypeResult = new MimeTypeResult("application/pdf", "document");
                    } else {
                        String replace3 = StringUtils.replace(StringUtils.replace(lowerCase3, "\"", "", -1), "'", "", -1);
                        if (ArgsUtils.retrieveDatasets && LoaderAndChecker.DATASET_URL_FILTER.matcher(replace3).matches()) {
                            mimeTypeResult = new MimeTypeResult(replace2, "dataset");
                        }
                    }
                    return mimeTypeResult;
                }
            }
            String lowerCase4 = str.toLowerCase();
            if (ArgsUtils.retrieveDocuments && lowerCase4.contains("pdf")) {
                mimeTypeResult = new MimeTypeResult("application/pdf", "document");
            } else if (ArgsUtils.retrieveDatasets && LoaderAndChecker.DATASET_URL_FILTER.matcher(lowerCase4).matches()) {
                mimeTypeResult = new MimeTypeResult(replace2, "dataset");
            }
        } else if (httpURLConnection != null && (headerField = httpURLConnection.getHeaderField(HttpHeaders.CONTENT_DISPOSITION)) != null) {
            if (ArgsUtils.retrieveDocuments && headerField.toLowerCase().contains(".pdf")) {
                mimeTypeResult = new MimeTypeResult("application/pdf", "document");
            }
            String replace4 = StringUtils.replace(StringUtils.replace(headerField, "\"", "", -1), "'", "", -1);
            if (ArgsUtils.retrieveDatasets && LoaderAndChecker.DATASET_URL_FILTER.matcher(replace4).matches()) {
                mimeTypeResult = new MimeTypeResult(replace2, "dataset");
            }
        }
        return mimeTypeResult;
    }

    public static void handleReCrossedTargetUrl(String str, String str2, String str3, String str4, IdUrlMimeTypeTriple idUrlMimeTypeTriple, boolean z) {
        logger.info("re-crossed targetUrl found: < " + str4 + " >");
        reCrossedDocUrls.incrementAndGet();
        UrlUtils.addOutputData(str, str2, str3, str4, "N/A", ((ArgsUtils.shouldDownloadDocFiles || ArgsUtils.shouldJustDownloadHtmlFiles) ? "This file is probably already downloaded by ID=" + idUrlMimeTypeTriple.id + " and SourceUrl=" : "This url was already detected by ID=" + idUrlMimeTypeTriple.id + " and SourceUrl=") + idUrlMimeTypeTriple.url, null, false, "true", "true", "true", getWasDirectLink(str2, str3, z, str4), "true", null, "null", idUrlMimeTypeTriple.mimeType);
    }

    public static String getPlainMimeType(String str) {
        if (str == null) {
            logger.warn("A null mimeType was given to \"getPlainMimeType()\".");
            return null;
        }
        if (str.length() > 255) {
            logger.warn("A suspiciously large mimeType was given to \"getPlainMimeType()\", having length: " + str.length());
            return null;
        }
        Matcher matcher = MIME_TYPE_FILTER.matcher(str);
        if (!matcher.matches()) {
            logger.warn("Unexpected MIME_TYPE_FILTER's (" + matcher + ") mismatch for mimeType: \"" + str + "\"");
            return null;
        }
        try {
            String group = matcher.group(1);
            if (group != null && !group.isEmpty()) {
                return group;
            }
            logger.warn("Unexpected null or empty value returned by \"mimeMatcher.group(1)\" for mimeType: \"" + str + "\".");
            return null;
        } catch (Exception e) {
            logger.error("", (Throwable) e);
            return null;
        }
    }

    public static FileData checkAndHandleDuplicateHash(FileData fileData, String str) {
        String str2 = fileHashesWithLocations.get(fileData.getHash());
        if (str2 == null) {
            fileHashesWithLocations.put(fileData.getHash(), fileData.getLocation());
            return null;
        }
        logger.debug("The file of url \"" + str + "\" has been already downloaded in location: " + str2);
        File file = fileData.getFile();
        try {
            if (file.exists()) {
                try {
                    FileDeleteStrategy.FORCE.delete(file);
                } catch (Exception e) {
                    logger.error("Error when deleting the duplicate file from url: " + str, (Throwable) e);
                }
            }
        } catch (Exception e2) {
            logger.error("Error when checking if the duplicate file exists, from url: " + str, (Throwable) e2);
        }
        if (ArgsUtils.shouldDownloadDocFiles) {
            if (ArgsUtils.fileNameType.equals(ArgsUtils.fileNameTypeEnum.numberName)) {
                FileUtils.numOfDocFile--;
            } else {
                FileUtils.numOfDocFiles.decrementAndGet();
            }
        }
        fileData.setLocation(str2);
        fileData.setFile(new File(str2));
        return fileData;
    }

    public static FileData downloadAndStoreDocFile(HttpURLConnection httpURLConnection, String str, String str2, String str3, boolean z) throws FileNotRetrievedException {
        boolean z2 = false;
        try {
            try {
                if (httpURLConnection.getRequestMethod().equals("HEAD")) {
                    httpURLConnection = HttpConnUtils.openHttpConnection(str3, str2, false, true);
                    z2 = true;
                    int responseCode = httpURLConnection.getResponseCode();
                    if (responseCode < 200 || responseCode >= 400) {
                        throw new FileNotRetrievedException(onErrorStatusCode(httpURLConnection.getURL().toString(), str2, responseCode, z, httpURLConnection));
                    }
                }
                int contentSize = getContentSize(httpURLConnection, true, false);
                if (contentSize == -1) {
                    throw new FileNotRetrievedException("The HTTP-reported size of this file was unacceptable!");
                }
                FileData storeDocFileWithNumberName = ArgsUtils.fileNameType.equals(ArgsUtils.fileNameTypeEnum.numberName) ? FileUtils.storeDocFileWithNumberName(httpURLConnection, str3, contentSize) : FileUtils.storeDocFileWithIdOrOriginalFileName(httpURLConnection, str3, str, contentSize);
                if (storeDocFileWithNumberName == null) {
                    String str4 = "The file could not be " + (ArgsUtils.shouldUploadFilesToS3 ? "uploaded to S3" : "downloaded") + " from the docUrl " + str3;
                    logger.warn(str4);
                    throw new FileNotRetrievedException(str4);
                }
                FileData checkAndHandleDuplicateHash = checkAndHandleDuplicateHash(storeDocFileWithNumberName, str3);
                if (checkAndHandleDuplicateHash != null) {
                    if (z2) {
                        httpURLConnection.disconnect();
                    }
                    return checkAndHandleDuplicateHash;
                }
                File file = storeDocFileWithNumberName.getFile();
                if (ArgsUtils.shouldUploadFilesToS3) {
                    try {
                        FileDeleteStrategy.FORCE.delete(file);
                    } catch (Exception e) {
                        logger.warn("The file \"" + file.getName() + "\" could not be deleted after being uploaded to S3 ObjectStore!");
                    }
                }
                FileData fileData = storeDocFileWithNumberName;
                if (z2) {
                    httpURLConnection.disconnect();
                }
                return fileData;
            } catch (Throwable th) {
                if (0 != 0) {
                    httpURLConnection.disconnect();
                }
                throw th;
            }
        } catch (FileNotRetrievedException e2) {
            throw e2;
        } catch (Exception e3) {
            logger.error("", (Throwable) e3);
            throw new FileNotRetrievedException(e3.getMessage());
        }
    }

    public static void applyPolitenessDelay(String str) {
        String topThreeLevelDomain = UrlUtils.getTopThreeLevelDomain(str);
        DomainConnectionData domainConnectionData = domainsWithConnectionData.get(topThreeLevelDomain);
        if (domainConnectionData == null) {
            domainsWithConnectionData.put(topThreeLevelDomain, new DomainConnectionData());
            return;
        }
        domainConnectionData.lock.lock();
        Instant now = Instant.now();
        try {
            long millis = Duration.between(domainConnectionData.lastTimeConnected, now).toMillis();
            if (millis < 3000) {
                long randomNumber = getRandomNumber(minPolitenessDelay, maxPolitenessDelay) - millis;
                try {
                    Thread.sleep(randomNumber);
                } catch (InterruptedException e) {
                    Instant now2 = Instant.now();
                    try {
                        long millis2 = Duration.between(now, now2).toMillis();
                        if (millis2 < 3000) {
                            try {
                                Thread.sleep(randomNumber - millis2);
                            } catch (InterruptedException e2) {
                            }
                        }
                    } catch (Exception e3) {
                        logger.warn("An exception was thrown when tried to obtain the time elapsed from the last time the \"currentTime\" was updated: " + e3.getMessage());
                        domainConnectionData.updateAndUnlock(now2);
                        return;
                    }
                }
                now = Instant.now();
            }
            domainConnectionData.updateAndUnlock(now);
        } catch (Exception e4) {
            logger.warn("An exception was thrown when tried to obtain the time elapsed from the last time the domain connected: " + e4.getMessage());
            domainConnectionData.updateAndUnlock(now);
        }
    }

    public static String getInternalLinkFromHTTP300Page(String str, HttpURLConnection httpURLConnection) {
        try {
            HtmlResult html = getHtml(httpURLConnection, null, str, null, false, null, null);
            if (html == null) {
                logger.warn("Could not retrieve the HTML-code for HTTP300PageUrl: " + str);
                return null;
            }
            HashSet<String> extractInternalLinksFromHtml = PageCrawler.extractInternalLinksFromHtml(html.getHtmlString(), str);
            if (extractInternalLinksFromHtml == null || extractInternalLinksFromHtml.size() == 0) {
                return null;
            }
            return (String) new ArrayList(extractInternalLinksFromHtml).get(0);
        } catch (DocLinkFoundException e) {
            return e.getMessage();
        } catch (Exception e2) {
            logger.error("", (Throwable) e2);
            return null;
        }
    }

    public static String onErrorStatusCode(String str, String str2, int i, boolean z, HttpURLConnection httpURLConnection) throws DomainBlockedException {
        String str3;
        String errorMessageFromResponseBody;
        String headerField;
        String errorMessageFromResponseBody2;
        if (i == 500 && str2.contains("handle.net")) {
            i = 404;
        }
        if (i < 400 || i > 499) {
            if (str2 == null || !str.contains(str2)) {
                str2 = UrlUtils.getDomainStr(str, null);
            }
            if (i < 500 || i > 599) {
                str3 = "Url: \"" + str + "\" seems to be unreachable. Received unexpected responseCode: " + i;
                if (z && (errorMessageFromResponseBody = getErrorMessageFromResponseBody(httpURLConnection, str)) != null) {
                    str3 = str3 + " Error-text: " + errorMessageFromResponseBody;
                }
                logger.warn(str3);
                if (str2 != null) {
                    HttpConnUtils.blacklistedDomains.add(str2);
                    logger.warn("Domain: \"" + str2 + "\" was blocked, after giving a " + i + " HTTP-status-code.");
                    throw new DomainBlockedException(str2);
                }
            } else {
                str3 = "Url: \"" + str + "\" seems to be unreachable. Received: HTTP " + i + " Server Error.";
                on5XXerrorCode(i, str2);
            }
        } else {
            str3 = "Url: \"" + str + "\" seems to be unreachable. Received: HTTP " + i + " Client Error.";
            if (z && i != 404 && i != 410 && (errorMessageFromResponseBody2 = getErrorMessageFromResponseBody(httpURLConnection, str)) != null) {
                if (str2.contains("doi.org") && errorMessageFromResponseBody2.contains("Not a DOI")) {
                    logger.warn("Found a \"doi.org\" url with an invalid DOI: " + str);
                }
                str3 = str3 + " Error-text: " + errorMessageFromResponseBody2;
            }
            if (i == 403) {
                on403ErrorCode(str, str2, z);
            } else if (i == 429 && (headerField = httpURLConnection.getHeaderField(HttpHeaders.RETRY_AFTER)) != null) {
                str3 = str3 + " | Retry-After:" + headerField;
            }
        }
        return str3;
    }

    public static InputStream checkEncodingAndGetInputStream(HttpURLConnection httpURLConnection, boolean z) {
        try {
            InputStream errorStream = z ? httpURLConnection.getErrorStream() : httpURLConnection.getInputStream();
            if (z && errorStream == null) {
                return null;
            }
            String headerField = httpURLConnection.getHeaderField("content-encoding");
            if (headerField != null) {
                InputStream compressedInputStream = getCompressedInputStream(errorStream, headerField, httpURLConnection.getURL().toString(), z);
                if (compressedInputStream == null) {
                    try {
                        errorStream.close();
                        return null;
                    } catch (IOException e) {
                        return null;
                    }
                }
                errorStream = compressedInputStream;
            }
            return errorStream;
        } catch (Exception e2) {
            logger.error("", (Throwable) e2);
            return null;
        }
    }

    public static InputStream getCompressedInputStream(InputStream inputStream, String str, String str2, boolean z) {
        CompressorInputStream zstdCompressorInputStream;
        String lowerCase = str.toLowerCase();
        try {
            if (lowerCase.equals("gzip")) {
                zstdCompressorInputStream = new GzipCompressorInputStream(inputStream);
            } else if (lowerCase.equals(CompressorStreamFactory.DEFLATE)) {
                zstdCompressorInputStream = new DeflateCompressorInputStream(inputStream);
            } else if (lowerCase.equals(CompressorStreamFactory.BROTLI)) {
                zstdCompressorInputStream = new BrotliCompressorInputStream(inputStream);
            } else {
                if (!lowerCase.equals(CompressorStreamFactory.ZSTANDARD)) {
                    logger.warn("An unsupported \"content-encoding\" (" + str + ") was received from url: " + str2);
                    return null;
                }
                zstdCompressorInputStream = new ZstdCompressorInputStream(inputStream);
            }
            return zstdCompressorInputStream;
        } catch (IOException e) {
            String message = e.getMessage();
            if (message.startsWith("Input is not in the")) {
                logger.warn(message + " | http-published-encoding: " + str + " | url: " + str2);
                return null;
            }
            logger.error("Could not acquire the compressorInputStream for encoding: " + str + " | url: " + str2, (Throwable) e);
            return null;
        }
    }

    public static String getErrorMessageFromResponseBody(HttpURLConnection httpURLConnection, String str) {
        HtmlResult html = getHtml(httpURLConnection, null, str, null, true, null, null);
        if (html == null) {
            return null;
        }
        String htmlString = html.getHtmlString();
        if (htmlString.length() > 10000) {
            return null;
        }
        String text = Jsoup.parse(htmlString).text();
        if (text.length() > 0) {
            return text;
        }
        return null;
    }

    public static void on403ErrorCode(String str, String str2, boolean z) throws DomainBlockedException {
        Matcher matcher = null;
        if (str2 == null || !str.contains(str2)) {
            Matcher urlMatcher = UrlUtils.getUrlMatcher(str);
            matcher = urlMatcher;
            if (urlMatcher == null) {
                return;
            }
            String domainStr = UrlUtils.getDomainStr(str, matcher);
            str2 = domainStr;
            if (domainStr == null) {
                return;
            }
        }
        String pathStr = UrlUtils.getPathStr(str, matcher);
        if (pathStr != null && countAndBlockPathAfterTimes(domainsMultimapWithPaths403BlackListed, timesPathsReturned403, pathStr, str2, 10, z)) {
            logger.warn("Path: \"" + pathStr + "\" of domain: \"" + str2 + "\" was blocked after returning 403 Error Code more than 10 times.");
            if (domainsMultimapWithPaths403BlackListed.get((SetMultimap<String, String>) str2).size() <= 50 || domainsNotBlockableAfterTimes.contains(str2)) {
                return;
            }
            HttpConnUtils.blacklistedDomains.add(str2);
            logger.warn("Domain: \"" + str2 + "\" was blocked, after having more than 50 of its paths 403blackListed.");
            domainsMultimapWithPaths403BlackListed.removeAll((Object) str2);
            throw new DomainBlockedException(str2);
        }
    }

    public static boolean countAndBlockPathAfterTimes(SetMultimap<String, String> setMultimap, ConcurrentHashMap<String, Integer> concurrentHashMap, String str, String str2, int i, boolean z) {
        if (countInsertAndGetTimes(concurrentHashMap, str) <= i) {
            return false;
        }
        setMultimap.put(str2, str);
        concurrentHashMap.remove(str);
        return true;
    }

    public static boolean checkIfPathIs403BlackListed(String str, String str2) {
        String pathStr;
        if (!domainsMultimapWithPaths403BlackListed.containsKey(str2) || (pathStr = UrlUtils.getPathStr(str, null)) == null) {
            return false;
        }
        return domainsMultimapWithPaths403BlackListed.get((SetMultimap<String, String>) str2).contains(pathStr);
    }

    public static void on5XXerrorCode(int i, String str) throws DomainBlockedException {
        if ((shouldBlockMost5XXDomains || i == 511) && i != 503 && str != null && countAndBlockDomainAfterTimes(HttpConnUtils.blacklistedDomains, timesDomainsReturned5XX, str, 10, true)) {
            logger.warn("Domain: \"" + str + "\" was blocked after returning 5XX Error Code 10 times.");
            throw new DomainBlockedException(str);
        }
    }

    public static void onTimeoutException(String str) throws DomainBlockedException {
        if (countAndBlockDomainAfterTimes(HttpConnUtils.blacklistedDomains, timesDomainsHadTimeoutEx, str, 25, true)) {
            logger.warn("Domain: \"" + str + "\" was blocked after causing TimeoutException 25 times.");
            throw new DomainBlockedException(str);
        }
    }

    public static boolean countAndBlockDomainAfterTimes(Set<String> set, ConcurrentHashMap<String, Integer> concurrentHashMap, String str, int i, boolean z) {
        int countInsertAndGetTimes;
        Integer num;
        if (domainsNotBlockableAfterTimes.contains(str) || (countInsertAndGetTimes = countInsertAndGetTimes(concurrentHashMap, str)) <= i) {
            return false;
        }
        if (z && (num = UrlUtils.domainsAndNumHits.get(str)) != null && countInsertAndGetTimes <= num.intValue() + i) {
            return false;
        }
        set.add(str);
        concurrentHashMap.remove(str);
        return true;
    }

    public static int countInsertAndGetTimes(ConcurrentHashMap<String, Integer> concurrentHashMap, String str) {
        int i = 1;
        Integer num = concurrentHashMap.get(str);
        if (num != null) {
            i = 1 + num.intValue();
        }
        concurrentHashMap.put(str, Integer.valueOf(i));
        return i;
    }

    public static List<String> blockSharedSiteSessionDomains(String str, String str2) {
        String domainStr;
        ArrayList arrayList = new ArrayList(2);
        String domainStr2 = UrlUtils.getDomainStr(str, null);
        if (domainStr2 == null) {
            return null;
        }
        arrayList.add(domainStr2);
        if (HttpConnUtils.blacklistedDomains.add(domainStr2)) {
            logger.warn("Domain: \"" + domainStr2 + "\" was blocked after trying to cause a \"sharedSiteSession-redirectionPack\" with url: \"" + str + "\"!");
        }
        if (str2 != null && !str2.equals(str) && (domainStr = UrlUtils.getDomainStr(str2, null)) != null) {
            arrayList.add(domainStr);
            if (HttpConnUtils.blacklistedDomains.add(domainStr)) {
                logger.warn("Domain: \"" + domainStr + "\" was blocked after its url : \"" + str2 + "\" tried to redirect to targetUrl: \"" + str + "\" and cause a \"sharedSiteSession-redirectionPack\"!");
            }
        }
        return arrayList;
    }

    public static HtmlResult getHtml(HttpURLConnection httpURLConnection, String str, String str2, BufferedReader bufferedReader, boolean z, Matcher matcher, String str3) {
        BufferedReader bufferedReader2;
        BufferedWriter bufferedWriter;
        String str4;
        int contentSize = getContentSize(httpURLConnection, false, z);
        if (contentSize == -1) {
            if (!z) {
                logger.warn("Aborting HTML-extraction for pageUrl: " + str2);
            }
            closeBufferedReader(bufferedReader);
            return null;
        }
        boolean z2 = ArgsUtils.shouldJustDownloadHtmlFiles && !z;
        StringBuilder sb = htmlStrBuilder.get();
        if (sb == null && !z2) {
            sb = new StringBuilder(BZip2Constants.BASEBLOCKSIZE);
            htmlStrBuilder.set(sb);
        }
        int i = 0;
        InputStream inputStream = null;
        if (bufferedReader == null) {
            inputStream = checkEncodingAndGetInputStream(httpURLConnection, z);
            if (inputStream == null) {
                return null;
            }
            i = (contentSize == -2 || contentSize >= 1048576) ? FileUtils.mb : contentSize;
        }
        FileData fileData = null;
        String str5 = null;
        if (z2) {
            try {
                fileData = HtmlFileUtils.getFinalHtmlFilePath(str, str2, matcher, contentSize);
                str5 = fileData.getLocation();
            } catch (Exception e) {
                logger.error("Failed to acquire the \"fullPathFileName\": " + e.getMessage());
                if (!ArgsUtils.fileNameType.equals(ArgsUtils.fileNameTypeEnum.numberName)) {
                    return null;
                }
                HtmlFileUtils.htmlFilesNum.decrementAndGet();
                return null;
            }
        }
        try {
            if (bufferedReader != null) {
                bufferedReader2 = bufferedReader;
            } else {
                try {
                    bufferedReader2 = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), i);
                } catch (IOException e2) {
                    logger.error("IOException when retrieving the HTML-code for pageUrl \"" + str2 + "\": " + e2.getMessage());
                    if (sb != null) {
                        sb.setLength(0);
                    }
                    if (inputStream != null) {
                        try {
                            inputStream.close();
                        } catch (IOException e3) {
                            return null;
                        }
                    }
                    return null;
                } catch (Exception e4) {
                    logger.error("", (Throwable) e4);
                    if (sb != null) {
                        sb.setLength(0);
                    }
                    if (inputStream != null) {
                        try {
                            inputStream.close();
                        } catch (IOException e5) {
                            return null;
                        }
                    }
                    return null;
                }
            }
            BufferedReader bufferedReader3 = bufferedReader2;
            if (z2) {
                try {
                    bufferedWriter = new BufferedWriter(new FileWriter(str5, StandardCharsets.UTF_8), i);
                } catch (Throwable th) {
                    if (bufferedReader3 != null) {
                        try {
                            bufferedReader3.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            } else {
                bufferedWriter = null;
            }
            BufferedWriter bufferedWriter2 = bufferedWriter;
            if (bufferedWriter2 != null) {
                try {
                    str4 = FileUtils.endOfLine;
                } catch (Throwable th3) {
                    if (bufferedWriter2 != null) {
                        try {
                            bufferedWriter2.close();
                        } catch (Throwable th4) {
                            th3.addSuppressed(th4);
                        }
                    }
                    throw th3;
                }
            } else {
                str4 = " ";
            }
            String str6 = str4;
            if (str3 != null) {
                if (!z2) {
                    sb.append(str3).append(str6);
                }
                if (bufferedWriter2 != null) {
                    bufferedWriter2.write(str3);
                    bufferedWriter2.newLine();
                }
            }
            while (true) {
                String readLine = bufferedReader3.readLine();
                if (readLine == null) {
                    break;
                }
                if (!readLine.isEmpty() && readLine.length() != 1 && !SPACE_ONLY_LINE.matcher(readLine).matches()) {
                    if (!z2) {
                        sb.append(readLine).append(str6);
                    }
                    if (bufferedWriter2 != null) {
                        bufferedWriter2.write(readLine);
                        bufferedWriter2.newLine();
                    }
                }
            }
            if (bufferedWriter2 != null) {
                bufferedWriter2.flush();
                logger.info("HtmlFile '" + str5 + "' was downloaded.");
                if (!fileData.calculateAndSetHashAndSize()) {
                    try {
                        FileDeleteStrategy.FORCE.delete(fileData.getFile());
                    } catch (Exception e6) {
                        logger.error("Error when deleting the duplicate file from pageUrl: " + str2, (Throwable) e6);
                    }
                    if (bufferedWriter2 != null) {
                        bufferedWriter2.close();
                    }
                    if (bufferedReader3 != null) {
                        bufferedReader3.close();
                    }
                    if (sb != null) {
                        sb.setLength(0);
                    }
                    if (inputStream != null) {
                        try {
                            inputStream.close();
                        } catch (IOException e7) {
                        }
                    }
                    return null;
                }
                FileData checkAndHandleDuplicateHash = checkAndHandleDuplicateHash(fileData, str2);
                if (checkAndHandleDuplicateHash != null) {
                    fileData = checkAndHandleDuplicateHash;
                } else {
                    HtmlFileUtils.htmlFilesNum.incrementAndGet();
                }
            }
            if (z2) {
                HtmlResult htmlResult = new HtmlResult(null, fileData);
                if (bufferedWriter2 != null) {
                    bufferedWriter2.close();
                }
                if (bufferedReader3 != null) {
                    bufferedReader3.close();
                }
                if (sb != null) {
                    sb.setLength(0);
                }
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e8) {
                    }
                }
                return htmlResult;
            }
            String sb2 = (sb == null || sb.length() == 0) ? null : sb.toString();
            HtmlResult htmlResult2 = sb2 != null ? new HtmlResult(sb2, null) : null;
            if (bufferedWriter2 != null) {
                bufferedWriter2.close();
            }
            if (bufferedReader3 != null) {
                bufferedReader3.close();
            }
            if (sb != null) {
                sb.setLength(0);
            }
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e9) {
                }
            }
            return htmlResult2;
        } catch (Throwable th5) {
            if (sb != null) {
                sb.setLength(0);
            }
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e10) {
                    throw th5;
                }
            }
            throw th5;
        }
    }

    public static ArrayList<Object> detectContentTypeFromResponseBody(String str, String str2, HttpURLConnection httpURLConnection, boolean z) throws DomainBlockedException, RuntimeException {
        String str3 = "No ContentType nor ContentDisposition, were able to be retrieved from url: " + str;
        Object obj = null;
        boolean z2 = false;
        String str4 = null;
        BufferedReader bufferedReader = null;
        boolean z3 = false;
        boolean equals = httpURLConnection.getRequestMethod().equals("GET");
        if (equals) {
            DetectedContentType extractContentTypeFromResponseBody = extractContentTypeFromResponseBody(httpURLConnection);
            if (extractContentTypeFromResponseBody == null) {
                str3 = str3 + "\nCould not retrieve the response-body for url: " + str;
            } else if (extractContentTypeFromResponseBody.detectedContentType.equals("html")) {
                if (z) {
                    logger.debug("The url with the undeclared content type < " + str + " >, was examined and found to have HTML contentType! Going to visit the page.");
                }
                obj = "text/html";
                z2 = true;
                str4 = extractContentTypeFromResponseBody.firstHtmlLine;
                bufferedReader = extractContentTypeFromResponseBody.bufferedReader;
            } else if (extractContentTypeFromResponseBody.detectedContentType.equals("pdf")) {
                logger.debug("The url with the undeclared content type < " + str + " >, was examined and found to have PDF contentType!");
                obj = "application/pdf";
                z3 = true;
                z2 = true;
            } else if (extractContentTypeFromResponseBody.detectedContentType.equals("undefined")) {
                logger.debug("The url with the undeclared content type < " + str + " >, was examined and found to have UNDEFINED contentType.");
            } else {
                str3 = str3 + "\nUnspecified \"detectedContentType\": " + extractContentTypeFromResponseBody.detectedContentType;
            }
        } else {
            str3 = str3 + "\nThe initial connection was made with the \"HTTP-HEAD\" method, so there is no response-body to use to detect the content-type.";
        }
        if (!z2 && equals) {
            if (!countAndBlockDomainAfterTimes(HttpConnUtils.blacklistedDomains, HttpConnUtils.timesDomainsReturnedNoType, str2, 10, true)) {
                throw new RuntimeException(str3);
            }
            logger.warn(str3);
            logger.warn("Domain: \"" + str2 + "\" was blocked after returning no Type-info more than 10 times.");
            throw new DomainBlockedException(str2);
        }
        ArrayList<Object> arrayList = new ArrayList<>(5);
        arrayList.add(0, obj);
        arrayList.add(1, Boolean.valueOf(z2));
        arrayList.add(2, str4);
        arrayList.add(3, bufferedReader);
        arrayList.add(4, Boolean.valueOf(z3));
        return arrayList;
    }

    public static DetectedContentType extractContentTypeFromResponseBody(HttpURLConnection httpURLConnection) {
        String readLine;
        int contentSize = getContentSize(httpURLConnection, false, false);
        if (contentSize == -1) {
            logger.warn("Aborting content-extraction for pageUrl: " + httpURLConnection.getURL().toString());
            return null;
        }
        InputStream checkEncodingAndGetInputStream = checkEncodingAndGetInputStream(httpURLConnection, false);
        if (checkEncodingAndGetInputStream == null) {
            return null;
        }
        int i = (contentSize == -2 || contentSize >= 5242880) ? 5242880 : contentSize;
        BufferedReader bufferedReader = null;
        try {
            BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(checkEncodingAndGetInputStream, StandardCharsets.UTF_8), i);
            while (true) {
                readLine = bufferedReader2.readLine();
                if (readLine == null || (!readLine.isEmpty() && readLine.length() != 1 && !RESPONSE_BODY_UNWANTED_MATCH.matcher(readLine).matches())) {
                    break;
                }
            }
            if (readLine == null) {
                return null;
            }
            String lowerCase = readLine.toLowerCase();
            if (HTML_STRING_MATCH.matcher(lowerCase).matches()) {
                return new DetectedContentType("html", readLine, bufferedReader2);
            }
            try {
                bufferedReader2.close();
            } catch (IOException e) {
            }
            return lowerCase.startsWith("%pdf-", 0) ? new DetectedContentType("pdf", null, null) : new DetectedContentType("undefined", readLine, null);
        } catch (Exception e2) {
            if (e2 instanceof IOException) {
                logger.error("IOException when retrieving the HTML-code: " + e2.getMessage());
            } else {
                logger.error("", (Throwable) e2);
            }
            if (0 == 0) {
                return null;
            }
            try {
                bufferedReader.close();
                return null;
            } catch (IOException e3) {
                return null;
            }
        }
    }

    public static int getContentSize(HttpURLConnection httpURLConnection, boolean z, boolean z2) {
        try {
            int parseInt = Integer.parseInt(httpURLConnection.getHeaderField(HttpHeaders.CONTENT_LENGTH));
            if (parseInt > 0 && parseInt <= HttpConnUtils.maxAllowedContentSize) {
                return parseInt;
            }
            if (z2) {
                return -1;
            }
            logger.warn((z ? "DocUrl: \"" : "Url: \"") + httpURLConnection.getURL().toString() + "\" had a non-acceptable contentSize: " + parseInt + ". The maxAllowed one is: " + maxAllowedContentSizeMB + " MB.");
            return -1;
        } catch (NumberFormatException e) {
            if (!z || !logger.isTraceEnabled()) {
                return -2;
            }
            logger.trace("No \"Content-Length\" was retrieved from docUrl: \"" + httpURLConnection.getURL().toString() + "\"! We will store the docFile anyway..");
            return -2;
        } catch (Exception e2) {
            logger.error("", (Throwable) e2);
            return -2;
        }
    }

    public static void closeBufferedReader(BufferedReader bufferedReader) {
        if (bufferedReader != null) {
            try {
                bufferedReader.close();
            } catch (IOException e) {
                logger.warn("Problem when closing \"BufferedReader\": " + e.getMessage());
            }
        }
    }

    public static String getFullyFormedUrl(String str, String str2, URL url) {
        try {
            if (url == null) {
                if (str == null) {
                    logger.error("No urlBase to produce a fully-formedUrl for internal-link: " + str2);
                    return null;
                }
                url = new URL(str);
            }
            return str2.startsWith(CoreConstants.NA) ? url + str2 : new URL(url, str2).toString();
        } catch (Exception e) {
            logger.error("Error when producing fully-formedUrl for internal-link: " + str2, e.getMessage());
            return null;
        }
    }

    public static boolean isJustAnHTTPSredirect(String str, String str2) {
        return str.startsWith("http://", 0) && str2.startsWith("https://", 0) && haveOnlyProtocolDifference(str, str2);
    }

    public static boolean isJustASlashRedirect(String str, String str2) {
        return !str.endsWith("/") && str2.endsWith("/") && str.equals(str2.substring(0, str2.length() - 1));
    }

    public static boolean haveOnlyProtocolDifference(String str, String str2) {
        Matcher matcher = NON_PROTOCOL_URL.matcher(str);
        if (!matcher.matches()) {
            logger.warn("URL < " + str + " > failed to match with \"NON_PROTOCOL_URL\"-regex: " + NON_PROTOCOL_URL);
            return false;
        }
        try {
            String group = matcher.group(1);
            if (group == null || group.isEmpty()) {
                logger.warn("Unexpected null or empty value returned by \"url1NonProtocolMatcher.group(1)\" for url: \"" + str + "\"");
                return false;
            }
            Matcher matcher2 = NON_PROTOCOL_URL.matcher(str2);
            if (!matcher2.matches()) {
                logger.warn("URL < " + str2 + " > failed to match with \"NON_PROTOCOL_URL\"-regex: " + NON_PROTOCOL_URL);
                return false;
            }
            try {
                String group2 = matcher2.group(1);
                if (group2 != null && !group2.isEmpty()) {
                    return group.equals(group2);
                }
                logger.warn("Unexpected null or empty value returned by \"url2UrlNonProtocolMatcher.group(1)\" for url: \"" + str2 + "\"");
                return false;
            } catch (Exception e) {
                logger.error("No match for url2: " + str2, (Throwable) e);
                return false;
            }
        } catch (Exception e2) {
            logger.error("No match for url1: " + str, (Throwable) e2);
            return false;
        }
    }

    public static InputStream getInputStreamFromInputDataUrl() {
        HttpURLConnection handleConnection;
        InputStream checkEncodingAndGetInputStream;
        if (ArgsUtils.inputDataUrl == null || ArgsUtils.inputDataUrl.isEmpty()) {
            logger.error("The \"inputDataUrl\" was not given, even though");
            System.err.println("The \"inputDataUrl\" was not given, even though");
            PublicationsRetriever.executor.shutdownNow();
            System.exit(55);
        }
        BufferedInputStream bufferedInputStream = null;
        try {
            handleConnection = HttpConnUtils.handleConnection(null, ArgsUtils.inputDataUrl, ArgsUtils.inputDataUrl, ArgsUtils.inputDataUrl, null, true, true);
            String headerField = handleConnection.getHeaderField("Content-Type");
            if (headerField == null || !headerField.toLowerCase().contains("json")) {
                String str = "The mimeType of the url was either null or a non-json: " + headerField;
                logger.error(str);
                System.err.println(str);
                PublicationsRetriever.executor.shutdownNow();
                System.exit(56);
            }
            checkEncodingAndGetInputStream = checkEncodingAndGetInputStream(handleConnection, false);
        } catch (Exception e) {
            String str2 = "Unexpected error when retrieving the input-stream from the inputDataUrl:\n" + e.getMessage();
            logger.error(str2);
            System.err.println(str2);
            PublicationsRetriever.executor.shutdownNow();
            System.exit(57);
        }
        if (checkEncodingAndGetInputStream == null) {
            throw new RuntimeException("Could not acquire the InputStream!");
        }
        int contentSize = getContentSize(handleConnection, true, false);
        if (contentSize == -1) {
            throw new FileNotRetrievedException("The HTTP-reported size of this file was unacceptable!");
        }
        bufferedInputStream = new BufferedInputStream(checkEncodingAndGetInputStream, (contentSize == -2 || contentSize >= 5242880) ? 5242880 : contentSize);
        try {
            System.in.close();
        } catch (Exception e2) {
        }
        return bufferedInputStream;
    }

    public static long getRandomNumber(int i, int i2) {
        return threadLocalRandom.nextLong(i, i2 + 1);
    }

    public static String getWasDirectLink(String str, String str2, boolean z, String str3) {
        String str4;
        if (z) {
            boolean z2 = !ArgsUtils.shouldJustDownloadHtmlFiles && HttpConnUtils.isSpecialUrl.get().booleanValue();
            str4 = ((!z2 && (str2.equals(str3) || haveOnlyProtocolDifference(str2, str3))) || str.equals(str3) || haveOnlyProtocolDifference(str, str3)) ? "true" : z2 ? "false" : "N/A";
        } else {
            str4 = "false";
        }
        return str4;
    }

    public static void printEmbeddedExceptionMessage(Exception exc, String str) {
        String message = exc.getMessage();
        if (message == null) {
            logger.warn("Could not handle connection for \"" + str + "\"!");
        } else {
            StackTraceElement stackTraceElement = exc.getStackTrace()[0];
            logger.warn("[" + stackTraceElement.getFileName() + "->" + stackTraceElement.getMethodName() + "(@" + stackTraceElement.getLineNumber() + ")] - " + message);
        }
    }

    public static void printConnectionDebugInfo(HttpURLConnection httpURLConnection, boolean z) {
        if (httpURLConnection == null) {
            logger.warn("The given connection instance was null..");
            return;
        }
        logger.debug("Connection debug info:\nURL: < {} >,\nContentType: \"{}\". ContentDisposition: \"{}\", HTTP-method: \"{}\"", httpURLConnection.getURL().toString(), httpURLConnection.getContentType(), httpURLConnection.getHeaderField(HttpHeaders.CONTENT_DISPOSITION), httpURLConnection.getRequestMethod());
        if (z) {
            StringBuilder append = new StringBuilder(1000).append("Headers:\n");
            Map<String, List<String>> headerFields = httpURLConnection.getHeaderFields();
            for (String str : headerFields.keySet()) {
                Iterator<String> it = headerFields.get(str).iterator();
                while (it.hasNext()) {
                    append.append(str).append(" : ").append(it.next()).append(StringUtils.LF);
                }
            }
            logger.debug(append.toString());
        }
    }

    public static void printRedirectDebugInfo(String str, String str2, String str3, int i, int i2) {
        if (str.contains("doi.org")) {
            logger.debug(StringUtils.LF);
            logger.debug("Redirect(s) num: " + i2);
            logger.debug("Redirect code: " + i);
            logger.debug("Base: " + str);
            logger.debug("Location: " + str2);
            logger.debug("Target: " + str3 + "\n");
        }
    }

    public static void printFinalRedirectDataForWantedUrlType(String str, String str2, String str3, int i) {
        if (str3 == null || !str.contains(str3)) {
            return;
        }
        logger.debug("\"" + str + "\" DID: " + i + " redirect(s)!");
        logger.debug("Final link is: \"" + str2 + "\"");
    }

    static {
        domainsNotBlockableAfterTimes.add("zenodo.org");
        domainsNotBlockableAfterTimes.add("doi.org");
        domainsNotBlockableAfterTimes.add("dx.doi.org");
        htmlStrBuilder = new ThreadLocal<>();
        maxAllowedContentSizeMB = HttpConnUtils.maxAllowedContentSize / FileUtils.mb;
        threadLocalRandom = ThreadLocalRandom.current();
    }
}
