package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.utils.CharsetUtils;

/* loaded from: input_file:WEB-INF/lib/tika-parsers-1.7.jar:org/apache/tika/parser/html/HtmlEncodingDetector.class */
public class HtmlEncodingDetector implements EncodingDetector {
    private static final int META_TAG_BUFFER_SIZE = 8192;
    private static final Pattern HTTP_META_PATTERN = Pattern.compile("(?is)<\\s*meta\\s+([^<>]+)");
    private static final Pattern FLEXIBLE_CHARSET_ATTR_PATTERN = Pattern.compile("(?is)charset\\s*=\\s*(?:['\\\"]\\s*)?([-_:\\.a-z0-9]+)");
    private static final Charset ASCII = Charset.forName("US-ASCII");

    @Override // org.apache.tika.detect.EncodingDetector
    public Charset detect(InputStream inputStream, Metadata metadata) throws IOException {
        if (inputStream == null) {
            return null;
        }
        inputStream.mark(8192);
        byte[] bArr = new byte[8192];
        int i = 0;
        int read = inputStream.read(bArr);
        while (true) {
            int i2 = read;
            if (i2 == -1 || i >= bArr.length) {
                break;
            }
            i += i2;
            read = inputStream.read(bArr, i, bArr.length - i);
        }
        inputStream.reset();
        Matcher matcher = HTTP_META_PATTERN.matcher(ASCII.decode(ByteBuffer.wrap(bArr, 0, i)).toString());
        Matcher matcher2 = FLEXIBLE_CHARSET_ATTR_PATTERN.matcher("");
        while (matcher.find()) {
            matcher2.reset(matcher.group(1));
            while (matcher2.find()) {
                String group = matcher2.group(1);
                if (CharsetUtils.isSupported(group)) {
                    try {
                        return CharsetUtils.forName(group);
                    } catch (Exception e) {
                    }
                }
            }
        }
        return null;
    }
}
