/*
 * Decompiled with CFR 0.152.
 */
package de.tblsoft.solr.pipeline.filter;

import de.tblsoft.solr.pipeline.AbstractFilter;
import de.tblsoft.solr.pipeline.bean.Document;
import java.util.List;
import org.jsoup.Jsoup;

public class HtmlTextExtractorFilter
extends AbstractFilter {
    private List<String> fields;

    @Override
    public void init() {
        this.fields = this.getPropertyAsList("fields", null);
        this.verify(this.fields, "For the HtmlTextExtractorFilter a fields property must be defined!");
        super.init();
    }

    @Override
    public void document(Document document) {
        for (String field : this.fields) {
            List<String> fieldValues = document.getFieldValues(field);
            if (fieldValues == null) continue;
            for (int i = 0; i < fieldValues.size(); ++i) {
                String value = fieldValues.get(i);
                org.jsoup.nodes.Document jsoupDoc = Jsoup.parseBodyFragment((String)value);
                value = jsoupDoc.body().text();
                fieldValues.set(i, value);
            }
        }
        super.document(document);
    }
}

