/*
 * Decompiled with CFR 0.152.
 */
package org.grobid.trainer.sax;

import java.util.ArrayList;
import java.util.List;
import org.grobid.core.lexicon.Lexicon;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.utilities.TextUtilities;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class TEICitationSaxParser
extends DefaultHandler {
    private StringBuffer accumulator = new StringBuffer();
    private StringBuffer allContent = new StringBuffer();
    private String output = null;
    private String currentTag = null;
    private ArrayList<String> labeled = null;
    public int nbCitations = 0;
    public Lexicon lexicon = Lexicon.getInstance();
    public List<List<OffsetPosition>> journalsPositions = null;
    public List<List<OffsetPosition>> abbrevJournalsPositions = null;
    public List<List<OffsetPosition>> conferencesPositions = null;
    public List<List<OffsetPosition>> publishersPositions = null;

    public TEICitationSaxParser() {
        this.labeled = new ArrayList();
        this.journalsPositions = new ArrayList<List<OffsetPosition>>();
        this.abbrevJournalsPositions = new ArrayList<List<OffsetPosition>>();
        this.conferencesPositions = new ArrayList<List<OffsetPosition>>();
        this.publishersPositions = new ArrayList<List<OffsetPosition>>();
    }

    @Override
    public void characters(char[] buffer, int start, int length) {
        this.accumulator.append(buffer, start, length);
        if (this.allContent != null) {
            this.allContent.append(buffer, start, length);
        }
    }

    public String getText() {
        return this.accumulator.toString().trim();
    }

    public ArrayList<String> getLabeledResult() {
        return this.labeled;
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (qName.equals("author") || qName.equals("authors") || qName.equals("orgName") || qName.equals("title") || qName.equals("editor") || qName.equals("editors") || qName.equals("booktitle") || qName.equals("date") || qName.equals("journal") || qName.equals("institution") || qName.equals("tech") || qName.equals("volume") || qName.equals("pages") || qName.equals("page") || qName.equals("pubPlace") || qName.equals("note") || qName.equals("web") || qName.equals("pages") || qName.equals("publisher") || qName.equals("idno") || qName.equals("issue") || qName.equals("pubnum") || qName.equals("biblScope") || qName.equals("ptr") || qName.equals("keyword") || qName.equals("keywords")) {
            String text = this.getText();
            this.writeField(text);
        } else if (qName.equals("lb")) {
            this.accumulator.append(" +L+ ");
        } else if (qName.equals("pb")) {
            this.accumulator.append(" +PAGE+ ");
        } else if (qName.equals("bibl")) {
            String text = this.getText();
            if (text.length() > 0) {
                this.currentTag = "<other>";
                this.writeField(text);
            }
            this.labeled.add("\n \n");
            ++this.nbCitations;
            String allString = this.allContent.toString();
            this.journalsPositions.add(this.lexicon.inJournalNames(allString));
            this.abbrevJournalsPositions.add(this.lexicon.inAbbrevJournalNames(allString));
            this.conferencesPositions.add(this.lexicon.inConferenceNames(allString));
            this.publishersPositions.add(this.lexicon.inPublisherNames(allString));
            this.allContent = null;
            allString = null;
        }
        this.accumulator.setLength(0);
    }

    @Override
    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
        String text = this.getText();
        if (text.length() > 0) {
            this.currentTag = "<other>";
            this.writeField(text);
        }
        this.accumulator.setLength(0);
        if (qName.equals("title")) {
            int length = atts.getLength();
            for (int i = 0; i < length; ++i) {
                String name = atts.getQName(i);
                String value = atts.getValue(i);
                if (name == null || value == null || !name.equals("level")) continue;
                if (value.equals("a")) {
                    this.currentTag = "<title>";
                    continue;
                }
                if (value.equals("j")) {
                    this.currentTag = "<journal>";
                    continue;
                }
                if (!value.equals("m")) continue;
                this.currentTag = "<booktitle>";
            }
        } else if (qName.equals("author") || qName.equals("authors")) {
            this.currentTag = "<author>";
        } else if (qName.equals("editor")) {
            this.currentTag = "<editor>";
        } else if (qName.equals("date")) {
            this.currentTag = "<date>";
        } else if (qName.equals("keywords") || qName.equals("keyword")) {
            this.currentTag = "<keyword>";
        } else if (qName.equals("orgName")) {
            this.currentTag = "<institution>";
        } else if (qName.equals("note")) {
            int length = atts.getLength();
            if (length == 0) {
                this.currentTag = "<note>";
            } else {
                for (int i = 0; i < length; ++i) {
                    String name = atts.getQName(i);
                    String value = atts.getValue(i);
                    if (name == null || value == null || !name.equals("type") || !value.equals("report")) continue;
                    this.currentTag = "<tech>";
                }
            }
        } else if (qName.equals("biblScope")) {
            int length = atts.getLength();
            for (int i = 0; i < length; ++i) {
                String name = atts.getQName(i);
                String value = atts.getValue(i);
                if (name == null || value == null || !name.equals("type")) continue;
                if (value.equals("vol") || value.equals("volume")) {
                    this.currentTag = "<volume>";
                } else if (value.equals("issue") || value.equals("number")) {
                    this.currentTag = "<issue>";
                }
                if (!value.equals("pp")) continue;
                this.currentTag = "<pages>";
            }
        } else if (qName.equals("pubPlace")) {
            this.currentTag = "<location>";
        } else if (qName.equals("publisher")) {
            this.currentTag = "<publisher>";
        } else if (qName.equals("ptr")) {
            int length = atts.getLength();
            for (int i = 0; i < length; ++i) {
                String name = atts.getQName(i);
                String value = atts.getValue(i);
                if (name == null || value == null || !name.equals("type") || !value.equals("web")) continue;
                this.currentTag = "<web>";
            }
        } else if (qName.equals("idno") || qName.equals("pubnum")) {
            this.currentTag = "<pubnum>";
        } else if (qName.equals("bibl")) {
            this.accumulator = new StringBuffer();
            this.allContent = new StringBuffer();
        }
        this.accumulator.setLength(0);
    }

    private void writeField(String text) {
        List tokens = TextUtilities.segment((String)text, (String)"[( ,:;?.!)-\u2013\"\u201c\u201d\u2018\u2019'`$]*\u2666\u2665\u2663\u2660");
        boolean begin = true;
        for (String tok : tokens) {
            if ((tok = tok.trim()).length() == 0) continue;
            boolean punct1 = false;
            if (tok.equals("+L+")) {
                this.labeled.add("@newline\n");
            } else if (tok.equals("+PAGE+")) {
                this.labeled.add("@newline\n");
            } else {
                String content = tok;
                for (int i = 0; i < " ,:;?.!)-\u2013\"\u201c\u201d\u2018\u2019'`$]*\u2666\u2665\u2663\u2660".length(); ++i) {
                    if (tok.length() <= 0 || tok.charAt(tok.length() - 1) != " ,:;?.!)-\u2013\"\u201c\u201d\u2018\u2019'`$]*\u2666\u2665\u2663\u2660".charAt(i)) continue;
                    punct1 = true;
                    content = tok.substring(0, tok.length() - 1);
                    break;
                }
                if (tok.length() > 0) {
                    if (tok.startsWith("(") & tok.length() > 1) {
                        content = punct1 ? tok.substring(1, tok.length() - 1) : tok.substring(1, tok.length());
                        if (begin) {
                            this.labeled.add("( I-" + this.currentTag + "\n");
                            begin = false;
                        } else {
                            this.labeled.add("( " + this.currentTag + "\n");
                        }
                    } else if (tok.startsWith("[") & tok.length() > 1) {
                        content = punct1 ? tok.substring(1, tok.length() - 1) : tok.substring(1, tok.length());
                        if (begin) {
                            this.labeled.add("[ I-" + this.currentTag + "\n");
                            begin = false;
                        } else {
                            this.labeled.add("[ " + this.currentTag + "\n");
                        }
                    } else if (tok.startsWith("\"") & tok.length() > 1) {
                        content = punct1 ? tok.substring(1, tok.length() - 1) : tok.substring(1, tok.length());
                        if (begin) {
                            this.labeled.add("\" I-" + this.currentTag + "\n");
                            begin = false;
                        } else {
                            this.labeled.add("\" " + this.currentTag + "\n");
                        }
                    }
                }
                if (content.length() > 0) {
                    if (begin) {
                        this.labeled.add(content + " I-" + this.currentTag + "\n");
                        begin = false;
                    } else {
                        this.labeled.add(content + " " + this.currentTag + "\n");
                    }
                }
                if (punct1) {
                    if (begin) {
                        this.labeled.add(tok.charAt(tok.length() - 1) + " I-" + this.currentTag + "\n");
                        begin = false;
                    } else {
                        this.labeled.add(tok.charAt(tok.length() - 1) + " " + this.currentTag + "\n");
                    }
                }
            }
            begin = false;
        }
    }
}

