package org.archive.modules.extractor;

import au.id.jericho.lib.html.Attribute;
import au.id.jericho.lib.html.Attributes;
import au.id.jericho.lib.html.Element;
import au.id.jericho.lib.html.FormControl;
import au.id.jericho.lib.html.FormControlType;
import au.id.jericho.lib.html.FormField;
import au.id.jericho.lib.html.Source;
import au.id.jericho.lib.html.StartTagType;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.StringUtils;
import org.archive.modules.CrawlURI;
import org.archive.modules.net.RobotsPolicy;

/* loaded from: input_file:org/archive/modules/extractor/JerichoExtractorHTML.class */
public class JerichoExtractorHTML extends ExtractorHTML {
    private static final long serialVersionUID = 1684681316546343615L;
    private static final Logger logger = Logger.getLogger(JerichoExtractorHTML.class.getName());
    protected AtomicLong numberOfFormsProcessed = new AtomicLong(0);

    private static List<Attribute> findOnAttributes(Attributes attributes) {
        LinkedList linkedList = new LinkedList();
        Iterator it = attributes.iterator();
        while (it.hasNext()) {
            Attribute attribute = (Attribute) it.next();
            if (attribute.getKey().startsWith("on")) {
                linkedList.add(attribute);
            }
        }
        return linkedList;
    }

    /* JADX WARN: Code restructure failed: missing block: B:51:0x015a, code lost:
    
        if (r0 != null) goto L47;
     */
    /* JADX WARN: Code restructure failed: missing block: B:60:0x01ed, code lost:
    
        if (r0 != null) goto L67;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected void processGeneralTag(org.archive.modules.CrawlURI r7, au.id.jericho.lib.html.Element r8, au.id.jericho.lib.html.Attributes r9) {
        /*
            Method dump skipped, instructions count: 1050
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.archive.modules.extractor.JerichoExtractorHTML.processGeneralTag(org.archive.modules.CrawlURI, au.id.jericho.lib.html.Element, au.id.jericho.lib.html.Attributes):void");
    }

    protected boolean processMeta(CrawlURI crawlURI, Element element) {
        String attributeValue = element.getAttributeValue("name");
        String attributeValue2 = element.getAttributeValue("http-equiv");
        String attributeValue3 = element.getAttributeValue("content");
        if ("robots".equals(attributeValue) && attributeValue3 != null) {
            crawlURI.getData().put(ExtractorHTML.A_META_ROBOTS, attributeValue3);
            RobotsPolicy robotsPolicy = this.metadata.getRobotsPolicy();
            String lowerCase = attributeValue3.toLowerCase();
            if (robotsPolicy.obeyMetaRobotsNofollow() && (lowerCase.indexOf("nofollow") >= 0 || lowerCase.indexOf("none") >= 0)) {
                logger.fine("HTML extraction skipped due to robots meta-tag for: " + crawlURI.toString());
                return true;
            }
        }
        if (!"refresh".equals(attributeValue2) || attributeValue3 == null) {
            return false;
        }
        String substring = attributeValue3.substring(attributeValue3.indexOf("=") + 1);
        try {
            Link.addRelativeToBase(crawlURI, getExtractorParameters().getMaxOutlinks(), substring, HTMLLinkContext.META, Hop.REFER);
            return false;
        } catch (URIException e) {
            logUriError(e, crawlURI.getUURI(), substring);
            return false;
        }
    }

    protected void processScript(CrawlURI crawlURI, Element element) {
        processGeneralTag(crawlURI, element, element.getAttributes());
        processScriptCode(crawlURI, element.getContent());
    }

    protected void processStyle(CrawlURI crawlURI, Element element) {
        processGeneralTag(crawlURI, element, element.getAttributes());
        this.numberOfLinksExtracted.addAndGet(ExtractorCSS.processStyleCode(this, crawlURI, element.getContent()));
    }

    protected void processForm(CrawlURI crawlURI, Element element) {
        String str;
        String attributeValue = element.getAttributeValue("action");
        String attributeValue2 = element.getAttributeValue("name");
        String str2 = "";
        if (getIgnoreFormActionUrls()) {
            return;
        }
        String defaultIfEmpty = StringUtils.defaultIfEmpty(element.getAttributeValue("method"), "GET");
        if (!getExtractOnlyFormGets() || "GET".equalsIgnoreCase(defaultIfEmpty)) {
            this.numberOfFormsProcessed.incrementAndGet();
            Iterator it = element.findFormFields().iterator();
            while (it.hasNext()) {
                for (FormControl formControl : ((FormField) it.next()).getFormControls()) {
                    String name = formControl.getName();
                    Collection values = formControl.getFormControlType() != FormControlType.SUBMIT ? formControl.getValues() : formControl.getPredefinedValues();
                    if (values.size() > 0) {
                        Iterator it2 = values.iterator();
                        while (it2.hasNext()) {
                            str2 = str2 + "&" + name + "=" + ((String) it2.next());
                        }
                    } else {
                        str2 = str2 + "&" + name + "=";
                    }
                }
            }
            if (attributeValue == null) {
                str = str2.replaceFirst("&", "?");
            } else {
                if (!attributeValue.contains("?")) {
                    str2 = str2.replaceFirst("&", "?");
                }
                str = attributeValue + str2;
            }
            processLink(crawlURI, str, elementContext(element.getName(), "name=" + attributeValue2));
        }
    }

    @Override // org.archive.modules.extractor.ExtractorHTML
    void extract(CrawlURI crawlURI, CharSequence charSequence) {
        for (Element element : new Source(charSequence).findAllElements(StartTagType.NORMAL)) {
            String name = element.getName();
            if (name.equals("meta")) {
                if (processMeta(crawlURI, element)) {
                    return;
                }
            } else if (name.equals("script")) {
                processScript(crawlURI, element);
            } else if (name.equals("style")) {
                processStyle(crawlURI, element);
            } else if (name.equals("form")) {
                processForm(crawlURI, element);
            } else {
                Attributes attributes = element.getAttributes();
                if (!attributes.isEmpty()) {
                    processGeneralTag(crawlURI, element, attributes);
                }
            }
        }
    }

    @Override // org.archive.modules.extractor.Extractor, org.archive.modules.Processor
    public String report() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(super.report());
        stringBuffer.append("  " + this.numberOfFormsProcessed + " forms processed\n");
        return stringBuffer.toString();
    }
}
