/*
 * Decompiled with CFR 0.152.
 */
package org.grobid.trainer.sax;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.StringTokenizer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class TEIFulltextSaxParser
extends DefaultHandler {
    private StringBuffer accumulator = null;
    private String output = null;
    private Stack<String> currentTags = null;
    private String currentTag = null;
    private boolean figureBlock = false;
    private ArrayList<String> labeled = new ArrayList();

    public TEIFulltextSaxParser() {
        this.currentTags = new Stack();
        this.accumulator = new StringBuffer();
    }

    @Override
    public void characters(char[] buffer, int start, int length) {
        this.accumulator.append(buffer, start, length);
    }

    public String getText() {
        if (this.accumulator != null) {
            return this.accumulator.toString().trim();
        }
        return null;
    }

    public List<String> getLabeledResult() {
        return this.labeled;
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (!(qName.equals("lb") || qName.equals("pb") || qName.equals("figure"))) {
            this.writeData(qName, true);
            if (!this.currentTags.empty()) {
                this.currentTag = this.currentTags.peek();
            }
        }
        if (qName.equals("figure")) {
            this.figureBlock = false;
        }
    }

    @Override
    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
        if (qName.equals("lb")) {
            this.accumulator.append(" +L+ ");
        } else if (qName.equals("pb")) {
            this.accumulator.append(" +PAGE+ ");
        } else if (qName.equals("space")) {
            this.accumulator.append(" ");
        } else if (qName.equals("figure")) {
            this.figureBlock = true;
        } else {
            String text = this.getText();
            if (text != null && text.length() > 0) {
                this.writeData(qName, false);
            }
            this.accumulator.setLength(0);
            if (qName.equals("div")) {
                int length = atts.getLength();
                for (int i = 0; i < length; ++i) {
                    String name = atts.getQName(i);
                    String value = atts.getValue(i);
                    if (name == null || !name.equals("type") || !value.equals("paragraph")) continue;
                    this.currentTags.push("<paragraph>");
                    this.currentTag = "<paragraph>";
                }
            } else if (qName.equals("p")) {
                this.currentTags.push("<paragraph>");
                this.currentTag = "<paragraph>";
            } else if (qName.equals("other")) {
                this.currentTags.push("<other>");
                this.currentTag = "<other>";
            } else if (qName.equals("ref")) {
                int length = atts.getLength();
                for (int i = 0; i < length; ++i) {
                    String name = atts.getQName(i);
                    String value = atts.getValue(i);
                    if (name == null || !name.equals("type")) continue;
                    if (value.equals("biblio")) {
                        this.currentTags.push("<citation_marker>");
                        this.currentTag = "<citation_marker>";
                        continue;
                    }
                    if (value.equals("figure")) {
                        this.currentTags.push("<figure_marker>");
                        this.currentTag = "<figure_marker>";
                        continue;
                    }
                    if (!value.equals("table")) continue;
                    this.currentTags.push("<table_marker>");
                    this.currentTag = "<table_marker>";
                }
            } else if (qName.equals("formula")) {
                this.currentTags.push("<equation>");
                this.currentTag = "<equation>";
            } else if (qName.equals("head")) {
                if (this.figureBlock) {
                    this.currentTags.push("<figure_head>");
                    this.currentTag = "<figure_head>";
                } else {
                    this.currentTags.push("<section>");
                    this.currentTag = "<section>";
                }
            } else if (qName.equals("figDesc")) {
                this.currentTags.push("<figDesc>");
                this.currentTag = "<figDesc>";
            } else if (qName.equals("table")) {
                this.currentTags.push("<table>");
                this.currentTag = "<table>";
            } else if (qName.equals("item")) {
                this.currentTags.push("<item>");
                this.currentTag = "<item>";
            } else if (qName.equals("trash")) {
                this.currentTags.push("<trash>");
                this.currentTag = "<trash>";
            } else {
                this.currentTags.push("<other>");
                this.currentTag = "<other>";
            }
        }
    }

    private void writeData(String qName, boolean pop) {
        if (qName.equals("other") || qName.equals("ref") || qName.equals("head") || qName.equals("figure_head") || qName.equals("p") || qName.equals("paragraph") || qName.equals("div") || qName.equals("figDesc") || qName.equals("table") || qName.equals("trash") || qName.equals("formula") || qName.equals("item")) {
            if (this.currentTag == null) {
                return;
            }
            if (pop && !this.currentTags.empty()) {
                this.currentTags.pop();
            }
            String text = this.getText();
            StringTokenizer st = new StringTokenizer(text, " \n\t([ ,:;?.!/)-\u2013\"\u201c\u201d\u2018\u2019'`$]*\u2666\u2665\u2663\u2660", true);
            boolean begin = true;
            while (st.hasMoreTokens()) {
                String tok = st.nextToken().trim();
                if (tok.length() == 0) continue;
                if (tok.equals("+L+")) {
                    this.labeled.add("@newline\n");
                } else if (tok.equals("+PAGE+")) {
                    this.labeled.add("@newpage\n");
                } else {
                    String content = tok;
                    boolean i = false;
                    if (content.length() > 0) {
                        if (begin) {
                            this.labeled.add(content + " I-" + this.currentTag + "\n");
                            begin = false;
                        } else {
                            this.labeled.add(content + " " + this.currentTag + "\n");
                        }
                    }
                }
                begin = false;
            }
            this.accumulator.setLength(0);
        }
    }
}

