/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.forms;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.lang.StringUtils;
import org.archive.io.ReplayCharSequence;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.forms.HTMLForm;
import org.archive.util.TextUtils;

public class ExtractorHTMLForms
extends Extractor {
    private static final long serialVersionUID = 2L;
    public static final String A_HTML_FORM_OBJECTS = "html-form-objects";
    private static Logger logger = Logger.getLogger(ExtractorHTMLForms.class.getName());

    public boolean getExtractAllForms() {
        return (Boolean)this.kp.get("extractAllForms");
    }

    public void setExtractAllForms(boolean extractAllForms) {
        this.kp.put((Object)"extractAllForms", (Object)extractAllForms);
    }

    public ExtractorHTMLForms() {
        this.setExtractAllForms(false);
    }

    @Override
    protected boolean shouldProcess(CrawlURI uri) {
        return uri.containsDataKey("form-offsets");
    }

    @Override
    public void extract(CrawlURI curi) {
        try {
            ReplayCharSequence cs = curi.getRecorder().getContentReplayCharSequence();
            this.analyze(curi, (CharSequence)cs);
        }
        catch (IOException e) {
            curi.getNonFatalFailures().add(e);
            logger.log(Level.WARNING, "Failed get of replay char sequence in " + Thread.currentThread().getName(), e);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void analyze(CrawlURI curi, CharSequence cs) {
        for (Object offset : curi.getDataList("form-offsets")) {
            int offsetInt = (Integer)offset;
            CharSequence relevantSequence = cs.subSequence(offsetInt, cs.length());
            String method = this.findAttributeValueGroup("(?i)^[^>]*\\smethod\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, relevantSequence);
            String action = this.findAttributeValueGroup("(?i)^[^>]*\\saction\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, relevantSequence);
            String enctype = this.findAttributeValueGroup("(?i)^[^>]*\\senctype\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, relevantSequence);
            HTMLForm form = new HTMLForm();
            form.setMethod(method);
            form.setAction(action);
            form.setEnctype(enctype);
            for (CharSequence input : this.findGroups("(?i)(<input\\s[^>]*>)|(</?form>)", 1, relevantSequence)) {
                String type = this.findAttributeValueGroup("(?i)^[^>]*\\stype\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, input);
                String name = this.findAttributeValueGroup("(?i)^[^>]*\\sname\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, input);
                String value = this.findAttributeValueGroup("(?i)^[^>]*\\svalue\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, input);
                Matcher m = TextUtils.getMatcher((String)"(?i)^[^>]*\\schecked\\s*[^>]*>", (CharSequence)input);
                boolean checked = false;
                try {
                    checked = m.find();
                }
                finally {
                    TextUtils.recycleMatcher((Matcher)m);
                }
                form.addField(type, name, value, checked);
            }
            if (!form.seemsLoginForm() && !this.getExtractAllForms()) continue;
            curi.getDataList(A_HTML_FORM_OBJECTS).add(form);
            curi.getAnnotations().add(form.asAnnotation());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected List<CharSequence> findGroups(String pattern, int groupNumber, CharSequence cs) {
        ArrayList<CharSequence> groups = new ArrayList<CharSequence>();
        Matcher m = TextUtils.getMatcher((String)pattern, (CharSequence)cs);
        try {
            while (m.find() && m.group(groupNumber) != null) {
                groups.add(cs.subSequence(m.start(groupNumber), m.end(groupNumber)));
            }
            ArrayList<CharSequence> arrayList = groups;
            return arrayList;
        }
        finally {
            TextUtils.recycleMatcher((Matcher)m);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected String findAttributeValueGroup(String pattern, int groupNumber, CharSequence cs) {
        Matcher m = TextUtils.getMatcher((String)pattern, (CharSequence)cs);
        try {
            if (m.find()) {
                String value = m.group(groupNumber);
                value = StringUtils.removeEnd((String)value, (String)"'/");
                value = StringUtils.removeEnd((String)value, (String)"\"/");
                String string = value = StringUtils.strip((String)value, (String)"'\"");
                return string;
            }
            String string = null;
            return string;
        }
        finally {
            TextUtils.recycleMatcher((Matcher)m);
        }
    }
}

