package crawlercommons.filters.basic;

import ch.qos.logback.core.joran.action.Action;
import crawlercommons.filters.URLFilter;
import crawlercommons.utils.Strings;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.IDN;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:crawlercommons/filters/basic/BasicURLNormalizer.class */
public class BasicURLNormalizer extends URLFilter {
    public static final Logger LOG = LoggerFactory.getLogger((Class<?>) BasicURLNormalizer.class);
    private static final Pattern hasNormalizablePathPattern = Pattern.compile("/[./]|[.]/");
    private static final Pattern unescapeRulePattern = Pattern.compile("%([0-9A-Fa-f]{2})");
    private static final Pattern hasSchemePattern = Pattern.compile("^[A-Za-z][A-Za-z0-9+.-]*:/");
    private static final boolean[] unescapedCharacters = new boolean[128];
    private static final boolean[] escapedCharacters;
    private final Set<String> queryParamsToRemove;
    private final IdnNormalization idnNormalization;

    /* loaded from: input_file:crawlercommons/filters/basic/BasicURLNormalizer$Builder.class */
    public static class Builder {
        public IdnNormalization idnNormalization;
        Set<String> queryParamsToRemove;

        private Builder() {
            this.idnNormalization = IdnNormalization.PUNYCODE;
            this.queryParamsToRemove = new TreeSet();
        }

        public Builder queryParamsToRemove(Collection<String> collection) {
            this.queryParamsToRemove = new TreeSet(collection);
            return this;
        }

        public Builder idnNormalization(IdnNormalization idnNormalization) {
            this.idnNormalization = idnNormalization;
            return this;
        }

        public BasicURLNormalizer build() {
            return new BasicURLNormalizer(this);
        }
    }

    /* loaded from: input_file:crawlercommons/filters/basic/BasicURLNormalizer$IdnNormalization.class */
    public enum IdnNormalization {
        NONE,
        PUNYCODE,
        UNICODE
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:crawlercommons/filters/basic/BasicURLNormalizer$NameValuePair.class */
    public static class NameValuePair {
        protected final String name;
        protected final String value;
        public static final Comparator<NameValuePair> NAME_COMPARATOR = Comparator.comparing((v0) -> {
            return v0.getName();
        });

        public NameValuePair(String str, String str2) {
            this.name = str;
            this.value = str2;
        }

        public String getName() {
            return this.name;
        }

        public String getValue() {
            return this.value;
        }
    }

    private static boolean isAlphaNumeric(int i) {
        return (65 <= i && i <= 90) || (97 <= i && i <= 122) || (48 <= i && i <= 57);
    }

    private static boolean isHexCharacter(int i) {
        return (65 <= i && i <= 70) || (97 <= i && i <= 102) || (48 <= i && i <= 57);
    }

    private static boolean isAscii(String str) {
        for (char c : str.toCharArray()) {
            if (c > 127) {
                return false;
            }
        }
        return true;
    }

    public BasicURLNormalizer() {
        this(new Builder());
    }

    public BasicURLNormalizer(Builder builder) {
        this.queryParamsToRemove = builder.queryParamsToRemove;
        this.idnNormalization = builder.idnNormalization;
    }

    @Override // crawlercommons.filters.URLFilter
    public String filter(String str) {
        if ("".equals(str)) {
            return str;
        }
        String trim = str.trim();
        URL parseStringToURL = parseStringToURL(trim);
        if (parseStringToURL == null) {
            LOG.debug("Malformed URL {}", trim);
            return null;
        }
        String protocol = parseStringToURL.getProtocol();
        String host = parseStringToURL.getHost();
        int port = parseStringToURL.getPort();
        String file = parseStringToURL.getFile();
        boolean z = false;
        boolean z2 = false;
        if (!trim.startsWith(protocol)) {
            z = true;
        }
        if ("http".equals(protocol) || "https".equals(protocol) || "ftp".equals(protocol)) {
            if (host == null || parseStringToURL.getAuthority() == null) {
                z = true;
            } else {
                try {
                    String normalizeHostName = normalizeHostName(host);
                    if (!host.equals(normalizeHostName)) {
                        host = normalizeHostName;
                        z = true;
                    } else if (!parseStringToURL.getAuthority().equals(normalizeHostName)) {
                        z = true;
                    }
                } catch (UnsupportedEncodingException | IllegalArgumentException | IndexOutOfBoundsException e) {
                    LOG.info("Invalid hostname: {}", host, e);
                    return null;
                }
            }
            if (port == parseStringToURL.getDefaultPort()) {
                port = -1;
                z = true;
            }
            z2 = true;
            if (file == null || "".equals(file)) {
                file = "/";
                z = true;
                z2 = false;
            } else if (!file.startsWith("/")) {
                file = "/" + file;
                z = true;
                z2 = false;
            }
            if (parseStringToURL.getRef() != null) {
                z = true;
            }
        } else if (protocol.equals(Action.FILE_ATTRIBUTE)) {
            z2 = true;
        }
        String normalizeUrlFile = normalizeUrlFile(file);
        if (!file.equals(normalizeUrlFile)) {
            z = true;
            file = normalizeUrlFile;
        }
        if (z2) {
            if (z) {
                try {
                    parseStringToURL = new URL(protocol, host, port, file);
                } catch (MalformedURLException e2) {
                    Logger logger = LOG;
                    Object[] objArr = new Object[4];
                    objArr[0] = protocol;
                    objArr[1] = host;
                    objArr[2] = port == -1 ? "" : ":" + port;
                    objArr[3] = file;
                    logger.info("Malformed URL {}://{}{}{}", objArr);
                    return null;
                }
            }
            String fileWithNormalizedPath = getFileWithNormalizedPath(parseStringToURL);
            if (!file.equals(fileWithNormalizedPath)) {
                z = true;
                file = fileWithNormalizedPath;
            }
        }
        if (z) {
            try {
                trim = new URL(protocol, host, port, file).toString();
            } catch (MalformedURLException e3) {
                Logger logger2 = LOG;
                Object[] objArr2 = new Object[4];
                objArr2[0] = protocol;
                objArr2[1] = host;
                objArr2[2] = port == -1 ? "" : ":" + port;
                objArr2[3] = file;
                logger2.info("Malformed URL {}://{}{}{}", objArr2);
                return null;
            }
        }
        return trim;
    }

    private static URL parseStringToURL(String str) {
        URL url = null;
        try {
            url = new URL(str);
        } catch (MalformedURLException e) {
            if (!hasSchemePattern.matcher(str).find()) {
                try {
                    url = new URL("http://" + str);
                } catch (MalformedURLException e2) {
                }
            }
        }
        return url;
    }

    private String normalizeUrlFile(String str) {
        int indexOf = str.indexOf(63);
        if (indexOf == -1) {
            return escapePath(unescapePath(str));
        }
        int i = indexOf + 1;
        if (i >= str.length()) {
            return escapePath(unescapePath(str.substring(0, str.length() - 1)));
        }
        String escapePath = escapePath(unescapePath(str));
        List<NameValuePair> parseQueryParameters = parseQueryParameters(escapePath, i, this.queryParamsToRemove);
        StringBuilder sb = new StringBuilder();
        String substring = escapePath.substring(0, indexOf);
        if (!Strings.isBlank(substring)) {
            sb.append(substring);
        }
        if (!parseQueryParameters.isEmpty()) {
            parseQueryParameters.sort(NameValuePair.NAME_COMPARATOR);
            sb.append('?').append(formatQueryParameters(parseQueryParameters));
        }
        return sb.toString();
    }

    public static List<NameValuePair> parseQueryParameters(String str, int i, Set<String> set) {
        if (str == null || str.isEmpty()) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        char charAt = str.charAt(i);
        int i2 = i;
        int length = str.length();
        while (i2 < length) {
            int i3 = i2;
            while (i2 < length) {
                charAt = str.charAt(i2);
                if (isNameEnd(charAt)) {
                    break;
                }
                i2++;
            }
            String substring = str.substring(i3, i2);
            String str2 = null;
            if (i2 < length && charAt == '=') {
                i2++;
                while (i2 < length) {
                    charAt = str.charAt(i2);
                    if (isValueEnd(charAt)) {
                        break;
                    }
                    i2++;
                }
                if (i2 < i2) {
                    str2 = str.substring(i2, i2);
                }
            }
            if (!substring.isEmpty() && set != null && !set.contains(substring)) {
                arrayList.add(new NameValuePair(substring, str2));
            }
            i2++;
        }
        return arrayList;
    }

    private static boolean isValueEnd(char c) {
        return c == '&';
    }

    private static boolean isNameEnd(char c) {
        return c == '=' || c == '&';
    }

    public static String formatQueryParameters(List<NameValuePair> list) {
        StringBuilder sb = new StringBuilder();
        for (NameValuePair nameValuePair : list) {
            if (sb.length() > 0) {
                sb.append('&');
            }
            sb.append(nameValuePair.getName());
            String value = nameValuePair.getValue();
            if (value != null) {
                sb.append('=');
                sb.append(value);
            }
        }
        return sb.toString();
    }

    private String getFileWithNormalizedPath(URL url) throws MalformedURLException {
        String file;
        if (hasNormalizablePathPattern.matcher(url.getPath()).find()) {
            try {
                file = url.toURI().normalize().toURL().getFile();
                int i = 0;
                while (file.startsWith("/..", i) && (i + 3 == file.length() || file.charAt(3) == '/')) {
                    i += 3;
                }
                if (i > 0) {
                    file = file.substring(i);
                }
            } catch (URISyntaxException e) {
                file = url.getFile();
            }
        } else {
            file = url.getFile();
        }
        if (file.isEmpty()) {
            file = "/";
        } else if (!file.startsWith("/")) {
            file = "/" + file;
        }
        return file;
    }

    public static String unescapePath(String str) {
        int i;
        StringBuilder sb = new StringBuilder();
        Matcher matcher = unescapeRulePattern.matcher(str);
        int i2 = -1;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            sb.append((CharSequence) str, i + 1, matcher.start());
            int intValue = Integer.valueOf(matcher.group().substring(1), 16).intValue();
            if (intValue >= 128 || !unescapedCharacters[intValue]) {
                sb.append(matcher.group().toUpperCase(Locale.ROOT));
            } else {
                sb.append(Character.valueOf((char) intValue));
            }
            i2 = matcher.start() + 2;
        }
        int length = str.length();
        if (i <= length - 1) {
            sb.append((CharSequence) str, i + 1, length);
        }
        return sb.toString();
    }

    private static String escapePath(String str) {
        StringBuilder sb = new StringBuilder(str.length());
        byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
        int i = 0;
        while (i < bytes.length) {
            byte b = bytes[i];
            if (b < 0 || escapedCharacters[b]) {
                sb.append('%');
                String upperCase = Integer.toHexString(b & 255).toUpperCase(Locale.ROOT);
                if (upperCase.length() % 2 != 0) {
                    sb.append('0');
                    sb.append(upperCase);
                } else {
                    sb.append(upperCase);
                }
            } else if (b != 37) {
                sb.append((char) b);
            } else if (i + 2 >= bytes.length) {
                sb.append("%25");
            } else {
                byte b2 = bytes[i + 1];
                byte b3 = bytes[i + 2];
                if (isHexCharacter(b2) && isHexCharacter(b3)) {
                    i += 2;
                    sb.append((char) b);
                    sb.append((char) b2);
                    sb.append((char) b3);
                } else {
                    sb.append("%25");
                }
            }
            i++;
        }
        return sb.toString();
    }

    private String normalizeHostName(String str) throws IllegalArgumentException, IndexOutOfBoundsException, UnsupportedEncodingException {
        if (str.indexOf(37) != -1) {
            str = URLDecoder.decode(str, StandardCharsets.UTF_8.toString());
        }
        String lowerCase = str.toLowerCase(Locale.ROOT);
        if (this.idnNormalization == IdnNormalization.PUNYCODE && !isAscii(lowerCase)) {
            lowerCase = IDN.toASCII(lowerCase);
        } else if (this.idnNormalization == IdnNormalization.UNICODE && lowerCase.contains("xn--")) {
            lowerCase = IDN.toUnicode(lowerCase);
        }
        if (lowerCase.endsWith(".")) {
            lowerCase = lowerCase.substring(0, lowerCase.length() - 1);
        }
        return lowerCase;
    }

    public static Builder newBuilder() {
        return new Builder();
    }

    public static void main(String[] strArr) throws IOException {
        BasicURLNormalizer basicURLNormalizer = new BasicURLNormalizer();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.exit(0);
                return;
            } else {
                LOG.info("{} => {}", readLine, basicURLNormalizer.filter(readLine));
            }
        }
    }

    static {
        for (int i = 0; i < 128; i++) {
            if (isAlphaNumeric(i) || i == 45 || i == 46 || i == 95 || i == 126) {
                unescapedCharacters[i] = true;
            } else {
                unescapedCharacters[i] = false;
            }
        }
        escapedCharacters = new boolean[128];
        for (int i2 = 0; i2 < 128; i2++) {
            if (unescapedCharacters[i2]) {
                escapedCharacters[i2] = false;
            } else if (i2 <= 31 || i2 == 32 || i2 == 34 || i2 == 35 || i2 == 60 || i2 == 62 || i2 == 91 || i2 == 93 || i2 == 94 || i2 == 96 || i2 == 123 || i2 == 124 || i2 == 125 || i2 == 127) {
                escapedCharacters[i2] = true;
            } else {
                LOG.debug("Character {} ({}) not handled as escaped or unescaped", Integer.valueOf(i2), Character.valueOf((char) i2));
            }
        }
    }
}
