/*
 * Decompiled with CFR 0.152.
 */
package org.grobid.trainer.sax;

import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.StringTokenizer;
import org.grobid.core.exceptions.GrobidException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class WikiTextExtractSaxParser
extends DefaultHandler {
    private StringBuffer accumulator = new StringBuffer();
    private String PageID = null;
    private String lang = null;
    private Writer writer = null;
    private boolean textBegin = false;
    private int page = 0;
    private String path = null;
    private int fileCount = 0;

    public WikiTextExtractSaxParser() {
    }

    public WikiTextExtractSaxParser(String p) {
        this.path = p;
    }

    @Override
    public void characters(char[] buffer, int start, int length) {
        if (this.textBegin) {
            this.accumulator.append(buffer, start, length);
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (qName.equals("text")) {
            this.textBegin = false;
            String blabla = this.accumulator.toString();
            StringTokenizer st = new StringTokenizer(blabla, "\n");
            while (st.hasMoreTokens()) {
                String line = st.nextToken();
                if (line.length() == 0 || line.startsWith("__") || line.startsWith("PMID") || line.startsWith("#")) continue;
                String line0 = "";
                boolean end = false;
                int pos = 0;
                while (!end) {
                    int ind = line.indexOf("[", pos);
                    if (ind != -1) {
                        int inde = line.indexOf(93, pos);
                        if (inde != -1) {
                            line0 = line0 + line.substring(pos, ind);
                            pos = inde + 2;
                            continue;
                        }
                        line0 = line0 + line.substring(pos, ind) + line.substring(inde + 1, line.length());
                        end = true;
                        continue;
                    }
                    if (pos < line.length() - 1) {
                        line0 = line0 + line.substring(pos, line.length());
                    }
                    end = true;
                }
                line = line0.trim();
                if (line.indexOf("|") != -1 || line.startsWith("poly")) continue;
                for (int i = 0; i < 5; ++i) {
                    if (!(line.startsWith(".") | line.startsWith("*") | line.startsWith(":") | line.startsWith("\"") | line.startsWith(";"))) continue;
                    line = line.substring(1, line.length());
                    line = line.trim();
                }
                if (!(line.length() > 0 & !line.startsWith("Help") & !line.startsWith("NONE") & !line.startsWith("beg") & !line.startsWith(": See also") & !line.startsWith(": \"See also") & !line.startsWith(":See also") & !line.startsWith("Wiktionary") & !line.startsWith("subgroup"))) continue;
                try {
                    this.writer.write(line);
                    this.writer.write("\n");
                    this.writer.flush();
                }
                catch (Exception e) {
                    throw new GrobidException("An exception occured while running Grobid.", (Throwable)e);
                }
            }
            this.PageID = null;
        }
    }

    @Override
    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
        if (qName.equals("page")) {
            int length = atts.getLength();
            for (int i = 0; i < length; ++i) {
                String name = atts.getQName(i);
                String value = atts.getValue(i);
                if (name == null || !name.equals("id")) continue;
                this.PageID = value;
                if (this.page > 4000) {
                    this.page = 0;
                }
                if (this.page == 0) {
                    try {
                        if (this.writer != null) {
                            this.writer.close();
                        }
                        File file = new File(this.path + "text-" + this.fileCount + ".txt");
                        System.out.println(this.path + "text-" + this.fileCount + ".txt");
                        FileOutputStream os = new FileOutputStream(file, false);
                        this.writer = new OutputStreamWriter((OutputStream)os, "UTF-8");
                        ++this.fileCount;
                    }
                    catch (Exception e) {
                        throw new GrobidException("An exception occured while running Grobid.", (Throwable)e);
                    }
                }
                ++this.page;
            }
        } else if (qName.equals("text")) {
            this.textBegin = true;
            this.accumulator.setLength(0);
        }
    }
}

