package org.archive.modules.extractor;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.URIException;
import org.archive.io.ReplayCharSequence;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.net.RobotsPolicy;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.DevUtils;
import org.archive.util.TextUtils;
import org.archive.util.UriUtils;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:org/archive/modules/extractor/ExtractorHTML.class */
public class ExtractorHTML extends ContentExtractor implements InitializingBean {
    private static final long serialVersionUID = 2;
    private static Logger logger;
    private static final String MAX_ELEMENT_REPLACE = "MAX_ELEMENT";
    private static final String MAX_ATTR_NAME_REPLACE = "MAX_ATTR_NAME";
    private static final String MAX_ATTR_VAL_REPLACE = "MAX_ATTR_VAL";
    public static final String A_META_ROBOTS = "meta-robots";
    static final String RELEVANT_TAG_EXTRACTOR = "(?is)<(?:((script[^>]*+)>.*?</script)|((style[^>]*+)>.*?</style)|(((meta)|(?:\\w{1,MAX_ELEMENT}))\\s+[^>]*+)|(!--.*?--))>";
    static final String EACH_ATTRIBUTE_EXTRACTOR = "(?is)\\s?((href)|(action)|(on\\w*)|((?:src)|(?:lowsrc)|(?:background)|(?:cite)|(?:longdesc)|(?:usemap)|(?:profile)|(?:datasrc))|(codebase)|((?:classid)|(?:data))|(archive)|(code)|(value)|(style)|(method)|([-\\w]{1,MAX_ATTR_NAME}))\\s*=\\s*(?:(?:\"(.{0,MAX_ATTR_VAL}?)(?:\"|$))|(?:'(.{0,MAX_ATTR_VAL}?)(?:'|$))|(\\S{1,MAX_ATTR_VAL}))";
    static final String WHITESPACE = "\\s";
    static final String CLASSEXT = ".class";
    static final String APPLET = "applet";
    static final String BASE = "base";
    static final String LINK = "link";
    static final String FRAME = "frame";
    static final String IFRAME = "iframe";
    CrawlMetadata metadata;
    private String relevantTagPattern;
    private String eachAttributePattern;
    static final String JAVASCRIPT = "(?i)^javascript:.*";
    static final String NON_HTML_PATH_EXTENSION = "(?i)(gif)|(jp(e)?g)|(png)|(tif(f)?)|(bmp)|(avi)|(mov)|(mp(e)?g)|(mp3)|(mp4)|(swf)|(wav)|(au)|(aiff)|(mid)";
    static final /* synthetic */ boolean $assertionsDisabled;

    public int getMaxElementLength() {
        return ((Integer) this.kp.get("maxElementLength")).intValue();
    }

    public void setMaxElementLength(int i) {
        this.kp.put("maxElementLength", Integer.valueOf(i));
    }

    public int getMaxAttributeNameLength() {
        return ((Integer) this.kp.get("maxAttributeNameLength")).intValue();
    }

    public void setMaxAttributeNameLength(int i) {
        this.kp.put("maxAttributeNameLength", Integer.valueOf(i));
    }

    public int getMaxAttributeValLength() {
        return ((Integer) this.kp.get("maxAttributeValLength")).intValue();
    }

    public void setMaxAttributeValLength(int i) {
        this.kp.put("maxAttributeValLength", Integer.valueOf(i));
    }

    public boolean getTreatFramesAsEmbedLinks() {
        return ((Boolean) this.kp.get("treatFramesAsEmbedLinks")).booleanValue();
    }

    public void setTreatFramesAsEmbedLinks(boolean z) {
        this.kp.put("treatFramesAsEmbedLinks", Boolean.valueOf(z));
    }

    public boolean getIgnoreFormActionUrls() {
        return ((Boolean) this.kp.get("ignoreFormActionUrls")).booleanValue();
    }

    public void setIgnoreFormActionUrls(boolean z) {
        this.kp.put("ignoreFormActionUrls", Boolean.valueOf(z));
    }

    public boolean getExtractOnlyFormGets() {
        return ((Boolean) this.kp.get("extractOnlyFormGets")).booleanValue();
    }

    public void setExtractOnlyFormGets(boolean z) {
        this.kp.put("extractOnlyFormGets", Boolean.valueOf(z));
    }

    public boolean getExtractJavascript() {
        return ((Boolean) this.kp.get("extractJavascript")).booleanValue();
    }

    public void setExtractJavascript(boolean z) {
        this.kp.put("extractJavascript", Boolean.valueOf(z));
    }

    public boolean getExtractValueAttributes() {
        return ((Boolean) this.kp.get("extractValueAttributes")).booleanValue();
    }

    public void setExtractValueAttributes(boolean z) {
        this.kp.put("extractValueAttributes", Boolean.valueOf(z));
    }

    public boolean getIgnoreUnexpectedHtml() {
        return ((Boolean) this.kp.get("ignoreUnexpectedHtml")).booleanValue();
    }

    public void setIgnoreUnexpectedHtml(boolean z) {
        this.kp.put("ignoreUnexpectedHtml", Boolean.valueOf(z));
    }

    public CrawlMetadata getMetadata() {
        return this.metadata;
    }

    @Autowired
    public void setMetadata(CrawlMetadata crawlMetadata) {
        this.metadata = crawlMetadata;
    }

    public ExtractorHTML() {
        setMaxElementLength(64);
        setMaxAttributeNameLength(64);
        setMaxAttributeValLength(2048);
        setTreatFramesAsEmbedLinks(true);
        setIgnoreFormActionUrls(false);
        setExtractOnlyFormGets(true);
        setExtractJavascript(true);
        setExtractValueAttributes(true);
        setIgnoreUnexpectedHtml(true);
    }

    public void afterPropertiesSet() {
        this.relevantTagPattern = RELEVANT_TAG_EXTRACTOR.replace(MAX_ELEMENT_REPLACE, Integer.toString(getMaxElementLength()));
        this.eachAttributePattern = EACH_ATTRIBUTE_EXTRACTOR.replace(MAX_ATTR_NAME_REPLACE, Integer.toString(getMaxAttributeNameLength())).replace(MAX_ATTR_VAL_REPLACE, Integer.toString(getMaxAttributeValLength()));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processGeneralTag(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2) {
        Matcher matcher = TextUtils.getMatcher(this.eachAttributePattern, charSequence2);
        String str = null;
        ArrayList arrayList = null;
        CharSequence charSequence3 = null;
        CharSequence charSequence4 = null;
        CharSequence charSequence5 = null;
        CharSequence charSequence6 = null;
        CharSequence charSequence7 = null;
        CharSequence charSequence8 = null;
        boolean treatFramesAsEmbedLinks = getTreatFramesAsEmbedLinks();
        boolean ignoreFormActionUrls = getIgnoreFormActionUrls();
        boolean extractValueAttributes = getExtractValueAttributes();
        String obj = charSequence.toString();
        while (matcher.find()) {
            int i = matcher.start(14) > -1 ? 14 : matcher.start(15) > -1 ? 15 : 16;
            int start = matcher.start(i);
            int end = matcher.end(i);
            if (!$assertionsDisabled && start < 0) {
                throw new AssertionError("Start is: " + start + ", " + crawlURI);
            }
            if (!$assertionsDisabled && end < 0) {
                throw new AssertionError("End is :" + end + ", " + crawlURI);
            }
            CharSequence subSequence = charSequence2.subSequence(start, end);
            CharSequence subSequence2 = charSequence2.subSequence(matcher.start(1), matcher.end(1));
            CharSequence unescapeHtml = TextUtils.unescapeHtml(subSequence);
            if (matcher.start(2) > -1) {
                CharSequence elementContext = elementContext(charSequence, matcher.group(2));
                if (obj.equalsIgnoreCase(LINK)) {
                    processEmbed(crawlURI, unescapeHtml, elementContext);
                } else {
                    processLink(crawlURI, unescapeHtml, elementContext);
                }
                if (obj.equalsIgnoreCase(BASE)) {
                    try {
                        crawlURI.setBaseURI(UURIFactory.getInstance(unescapeHtml.toString()));
                    } catch (URIException e) {
                        logUriError(e, crawlURI.getUURI(), unescapeHtml);
                    }
                }
            } else if (matcher.start(3) > -1) {
                if (!ignoreFormActionUrls) {
                    charSequence3 = unescapeHtml;
                    charSequence4 = elementContext(charSequence, matcher.group(3));
                }
            } else if (matcher.start(4) > -1) {
                processScriptCode(crawlURI, unescapeHtml);
            } else if (matcher.start(5) > -1) {
                processEmbed(crawlURI, unescapeHtml, elementContext(charSequence, matcher.group(5)), (treatFramesAsEmbedLinks || !(obj.equalsIgnoreCase(FRAME) || obj.equalsIgnoreCase(IFRAME))) ? Hop.EMBED : Hop.NAVLINK);
            } else if (matcher.start(6) > -1) {
                str = unescapeHtml instanceof String ? (String) unescapeHtml : unescapeHtml.toString();
                processEmbed(crawlURI, str, elementContext(charSequence, matcher.group(6)));
            } else if (matcher.start(7) > -1) {
                if (arrayList == null) {
                    arrayList = new ArrayList();
                }
                arrayList.add(unescapeHtml.toString());
            } else if (matcher.start(8) > -1) {
                if (arrayList == null) {
                    arrayList = new ArrayList();
                }
                for (String str2 : TextUtils.split(WHITESPACE, unescapeHtml)) {
                    arrayList.add(str2);
                }
            } else if (matcher.start(9) > -1) {
                if (arrayList == null) {
                    arrayList = new ArrayList();
                }
                if (!obj.equalsIgnoreCase(APPLET) || unescapeHtml.toString().toLowerCase().endsWith(CLASSEXT)) {
                    arrayList.add(unescapeHtml.toString());
                } else {
                    arrayList.add(unescapeHtml.toString() + CLASSEXT);
                }
            } else if (matcher.start(10) > -1) {
                charSequence6 = unescapeHtml;
                charSequence7 = elementContext(charSequence, matcher.group(10));
            } else if (matcher.start(11) > -1) {
                this.numberOfLinksExtracted.addAndGet(ExtractorCSS.processStyleCode(this, crawlURI, unescapeHtml));
            } else if (matcher.start(12) > -1) {
                charSequence5 = unescapeHtml;
            } else if (matcher.start(13) > -1) {
                if ("NAME".equalsIgnoreCase(subSequence2.toString())) {
                    charSequence8 = unescapeHtml;
                }
                if ("FLASHVARS".equalsIgnoreCase(subSequence2.toString())) {
                    charSequence7 = elementContext(charSequence, matcher.group(13));
                    considerQueryStringValues(crawlURI, unescapeHtml, charSequence7, Hop.SPECULATIVE);
                }
            }
        }
        TextUtils.recycleMatcher(matcher);
        if (arrayList != null) {
            Iterator it = arrayList.iterator();
            UURI uuri = null;
            String str3 = null;
            if (str != null) {
                try {
                    uuri = UURIFactory.getInstance(crawlURI.getUURI(), str);
                } catch (IllegalArgumentException e2) {
                    DevUtils.logger.log(Level.WARNING, "processGeneralTag()\ncodebase=" + str + " res=" + str3 + "\n" + DevUtils.extraInfo(), (Throwable) e2);
                } catch (URIException e3) {
                    crawlURI.getNonFatalFailures().add(e3);
                }
            }
            while (it.hasNext()) {
                str3 = (String) TextUtils.unescapeHtml(((String) it.next()).toString());
                if (uuri != null) {
                    str3 = uuri.resolve(str3).toString();
                }
                processEmbed(crawlURI, str3, charSequence);
            }
        }
        if (charSequence3 != null && (charSequence5 == null || "GET".equalsIgnoreCase(charSequence5.toString()) || !getExtractOnlyFormGets())) {
            processLink(crawlURI, charSequence3, charSequence4);
        }
        if (charSequence6 != null) {
            if ("PARAM".equalsIgnoreCase(obj) && charSequence8 != null && "flashvars".equalsIgnoreCase(charSequence8.toString())) {
                considerQueryStringValues(crawlURI, charSequence6.toString(), charSequence7, Hop.SPECULATIVE);
            } else if (extractValueAttributes) {
                considerIfLikelyUri(crawlURI, charSequence6, charSequence7, Hop.NAVLINK);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void considerQueryStringValues(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2, Hop hop) {
        for (String str : charSequence.toString().split("&")) {
            String[] split = str.split("=");
            if (split.length == 2) {
                try {
                    considerIfLikelyUri(crawlURI, URLDecoder.decode(split[1], "UTF-8"), charSequence2, hop);
                } catch (UnsupportedEncodingException e) {
                    throw new AssertionError("all jvms must support UTF-8, and yet somehow this happened: " + e);
                } catch (IllegalArgumentException e2) {
                    considerIfLikelyUri(crawlURI, split[1], charSequence2, hop);
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void considerIfLikelyUri(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2, Hop hop) {
        if (UriUtils.isLikelyUri(charSequence)) {
            addLinkFromString(crawlURI, charSequence, charSequence2, hop);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processScriptCode(CrawlURI crawlURI, CharSequence charSequence) {
        if (getExtractJavascript()) {
            this.numberOfLinksExtracted.addAndGet(ExtractorJS.considerStrings(this, crawlURI, charSequence, false));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processLink(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2) {
        if (TextUtils.matches(JAVASCRIPT, charSequence)) {
            processScriptCode(crawlURI, charSequence.subSequence(11, charSequence.length()));
            return;
        }
        if (logger.isLoggable(Level.FINEST)) {
            logger.finest("link: " + charSequence.toString() + " from " + crawlURI);
        }
        addLinkFromString(crawlURI, charSequence, charSequence2, Hop.NAVLINK);
        this.numberOfLinksExtracted.incrementAndGet();
    }

    protected void addLinkFromString(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2, Hop hop) {
        try {
            Link.addRelativeToBase(crawlURI, getExtractorParameters().getMaxOutlinks(), charSequence.toString(), new HTMLLinkContext(charSequence2.toString()), hop);
        } catch (URIException e) {
            logUriError(e, crawlURI.getUURI(), charSequence);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final void processEmbed(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2) {
        processEmbed(crawlURI, charSequence, charSequence2, Hop.EMBED);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processEmbed(CrawlURI crawlURI, CharSequence charSequence, CharSequence charSequence2, Hop hop) {
        if (logger.isLoggable(Level.FINEST)) {
            logger.finest("embed (" + hop.getHopChar() + "): " + charSequence.toString() + " from " + crawlURI);
        }
        addLinkFromString(crawlURI, charSequence instanceof String ? (String) charSequence : charSequence.toString(), charSequence2, hop);
        this.numberOfLinksExtracted.incrementAndGet();
    }

    @Override // org.archive.modules.extractor.ContentExtractor
    protected boolean shouldExtract(CrawlURI crawlURI) {
        if (getIgnoreUnexpectedHtml()) {
            try {
                if (!isHtmlExpectedHere(crawlURI)) {
                    return false;
                }
            } catch (URIException e) {
                logger.severe("Failed expectedHTML test: " + e.getMessage());
            }
        }
        String lowerCase = crawlURI.getContentType().toLowerCase();
        return lowerCase.startsWith("text/html") || lowerCase.startsWith("application/xhtml") || lowerCase.startsWith("text/vnd.wap.wml") || lowerCase.startsWith("application/vnd.wap.wml") || lowerCase.startsWith("application/vnd.wap.xhtml");
    }

    @Override // org.archive.modules.extractor.ContentExtractor
    public boolean innerExtract(CrawlURI crawlURI) {
        if (!crawlURI.containsContentTypeCharsetDeclaration()) {
            Charset contentDeclaredCharset = getContentDeclaredCharset(crawlURI, crawlURI.getRecorder().getContentReplayPrefixString(1000));
            if (!crawlURI.getRecorder().getCharset().equals(contentDeclaredCharset) && contentDeclaredCharset != null) {
                if (contentDeclaredCharset.equals(getContentDeclaredCharset(crawlURI, crawlURI.getRecorder().getContentReplayPrefixString(1000, contentDeclaredCharset)))) {
                    crawlURI.getAnnotations().add("usingCharsetInHTML:" + contentDeclaredCharset);
                    crawlURI.getRecorder().setCharset(contentDeclaredCharset);
                } else {
                    crawlURI.getAnnotations().add("inconsistentCharsetInHTML:" + contentDeclaredCharset);
                }
            }
        }
        try {
            ReplayCharSequence contentReplayCharSequence = crawlURI.getRecorder().getContentReplayCharSequence();
            extract(crawlURI, contentReplayCharSequence);
            if (contentReplayCharSequence.getDecodeExceptionCount() <= 0) {
                return true;
            }
            crawlURI.getNonFatalFailures().add(contentReplayCharSequence.getCodingException());
            return true;
        } catch (IOException e) {
            crawlURI.getNonFatalFailures().add(e);
            logger.log(Level.WARNING, "Failed get of replay char sequence in " + Thread.currentThread().getName(), (Throwable) e);
            return false;
        }
    }

    protected Charset getContentDeclaredCharset(CrawlURI crawlURI, String str) {
        String str2 = null;
        Matcher matcher = TextUtils.getMatcher("(?is)<meta\\s+[^>]*http-equiv\\s*=\\s*['\"]content-type['\"][^>]*>", str);
        if (matcher.find()) {
            String group = matcher.group();
            TextUtils.recycleMatcher(matcher);
            Matcher matcher2 = TextUtils.getMatcher("charset=([^'\";\\s>]+)", group);
            if (matcher2.find()) {
                str2 = matcher2.group(1);
            }
            TextUtils.recycleMatcher(matcher2);
        }
        if (str2 == null) {
            Matcher matcher3 = TextUtils.getMatcher("(?si)<meta\\s+[^>]*charset=['\"]([^'\";\\s>]+)['\"]", str);
            if (matcher3.find()) {
                str2 = matcher3.group(1);
                TextUtils.recycleMatcher(matcher3);
            } else {
                Matcher matcher4 = TextUtils.getMatcher("(?is)<\\?xml\\s+[^>]*encoding=['\"]([^'\"]+)['\"]", str);
                if (!matcher4.find()) {
                    return null;
                }
                str2 = matcher4.group(1);
                TextUtils.recycleMatcher(matcher4);
            }
        }
        try {
            return Charset.forName(str2);
        } catch (IllegalArgumentException e) {
            logger.log(Level.INFO, "Unknown content-encoding '" + str2 + "' declared; using default");
            crawlURI.getAnnotations().add("unsatisfiableCharsetInHTML:" + str2);
            return null;
        }
    }

    void extract(CrawlURI crawlURI, CharSequence charSequence) {
        Matcher matcher = TextUtils.getMatcher(this.relevantTagPattern, charSequence);
        while (matcher.find() && !Thread.interrupted()) {
            if (matcher.start(8) <= 0) {
                if (matcher.start(7) > 0) {
                    int start = matcher.start(5);
                    int end = matcher.end(5);
                    if (!$assertionsDisabled && start < 0) {
                        throw new AssertionError("Start is: " + start + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && end < 0) {
                        throw new AssertionError("End is :" + end + ", " + crawlURI);
                    }
                    if (processMeta(crawlURI, charSequence.subSequence(start, end))) {
                        break;
                    }
                } else if (matcher.start(5) > 0) {
                    int start2 = matcher.start(5);
                    int end2 = matcher.end(5);
                    if (!$assertionsDisabled && start2 < 0) {
                        throw new AssertionError("Start is: " + start2 + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && end2 < 0) {
                        throw new AssertionError("End is :" + end2 + ", " + crawlURI);
                    }
                    int start3 = matcher.start(6);
                    int end3 = matcher.end(6);
                    if (!$assertionsDisabled && start3 < 0) {
                        throw new AssertionError("Start is: " + start3 + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && end3 < 0) {
                        throw new AssertionError("End is :" + end3 + ", " + crawlURI);
                    }
                    processGeneralTag(crawlURI, charSequence.subSequence(start3, end3), charSequence.subSequence(start2, end2));
                } else if (matcher.start(1) > 0) {
                    int start4 = matcher.start(1);
                    int end4 = matcher.end(1);
                    if (!$assertionsDisabled && start4 < 0) {
                        throw new AssertionError("Start is: " + start4 + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && end4 < 0) {
                        throw new AssertionError("End is :" + end4 + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && matcher.end(2) < 0) {
                        throw new AssertionError("Tags.end(2) illegal " + matcher.end(2) + ", " + crawlURI);
                    }
                    processScript(crawlURI, charSequence.subSequence(start4, end4), matcher.end(2) - start4);
                } else if (matcher.start(3) > 0) {
                    int start5 = matcher.start(3);
                    int end5 = matcher.end(3);
                    if (!$assertionsDisabled && start5 < 0) {
                        throw new AssertionError("Start is: " + start5 + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && end5 < 0) {
                        throw new AssertionError("End is :" + end5 + ", " + crawlURI);
                    }
                    if (!$assertionsDisabled && matcher.end(4) < 0) {
                        throw new AssertionError("Tags.end(4) illegal " + matcher.end(4) + ", " + crawlURI);
                    }
                    processStyle(crawlURI, charSequence.subSequence(start5, end5), matcher.end(4) - start5);
                } else {
                    continue;
                }
            }
        }
        TextUtils.recycleMatcher(matcher);
    }

    protected boolean isHtmlExpectedHere(CrawlURI crawlURI) throws URIException {
        int lastIndexOf;
        String path = crawlURI.getUURI().getPath();
        return path == null || (lastIndexOf = path.lastIndexOf(46)) < 0 || lastIndexOf < path.length() - 5 || !TextUtils.matches(NON_HTML_PATH_EXTENSION, path.substring(lastIndexOf + 1));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processScript(CrawlURI crawlURI, CharSequence charSequence, int i) {
        processGeneralTag(crawlURI, charSequence.subSequence(0, 6), charSequence.subSequence(0, i));
        processScriptCode(crawlURI, charSequence.subSequence(i, charSequence.length()));
    }

    protected boolean processMeta(CrawlURI crawlURI, CharSequence charSequence) {
        int indexOf;
        Matcher matcher = TextUtils.getMatcher(this.eachAttributePattern, charSequence);
        String str = null;
        String str2 = null;
        String str3 = null;
        while (matcher.find()) {
            int i = matcher.start(14) > -1 ? 14 : matcher.start(15) > -1 ? 15 : 16;
            CharSequence unescapeHtml = TextUtils.unescapeHtml(charSequence.subSequence(matcher.start(i), matcher.end(i)));
            if (matcher.group(1).equalsIgnoreCase("name")) {
                str = unescapeHtml.toString();
            } else if (matcher.group(1).equalsIgnoreCase("http-equiv")) {
                str2 = unescapeHtml.toString();
            } else if (matcher.group(1).equalsIgnoreCase("content")) {
                str3 = unescapeHtml.toString();
            }
        }
        TextUtils.recycleMatcher(matcher);
        if (!"robots".equalsIgnoreCase(str) || str3 == null) {
            if (!"refresh".equalsIgnoreCase(str2) || str3 == null || (indexOf = str3.indexOf("=") + 1) <= 0) {
                return false;
            }
            String substring = str3.substring(indexOf);
            try {
                Link.addRelativeToBase(crawlURI, getExtractorParameters().getMaxOutlinks(), substring, HTMLLinkContext.META, Hop.REFER);
                return false;
            } catch (URIException e) {
                logUriError(e, crawlURI.getUURI(), substring);
                return false;
            }
        }
        crawlURI.getData().put(A_META_ROBOTS, str3);
        RobotsPolicy robotsPolicy = this.metadata.getRobotsPolicy();
        String lowerCase = str3.toLowerCase();
        if (!robotsPolicy.obeyMetaRobotsNofollow()) {
            return false;
        }
        if (lowerCase.indexOf("nofollow") < 0 && lowerCase.indexOf("none") < 0) {
            return false;
        }
        logger.fine("HTML extraction skipped due to robots meta-tag for: " + crawlURI.toString());
        return true;
    }

    protected void processStyle(CrawlURI crawlURI, CharSequence charSequence, int i) {
        processGeneralTag(crawlURI, charSequence.subSequence(0, 6), charSequence.subSequence(0, i));
        this.numberOfLinksExtracted.addAndGet(ExtractorCSS.processStyleCode(this, crawlURI, charSequence.subSequence(i, charSequence.length())));
    }

    public static CharSequence elementContext(CharSequence charSequence, CharSequence charSequence2) {
        return charSequence2 == null ? "" : ((Object) charSequence) + "/@" + ((Object) charSequence2);
    }

    static {
        $assertionsDisabled = !ExtractorHTML.class.desiredAssertionStatus();
        logger = Logger.getLogger(ExtractorHTML.class.getName());
    }
}
