/*
 * Decompiled with CFR 0.152.
 */
package org.grobid.trainer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringTokenizer;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.mock.MockContext;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.trainer.AbstractTrainer;
import org.grobid.trainer.sax.TEIEbookSaxParser;
import org.xml.sax.helpers.DefaultHandler;

public class EbookTrainer
extends AbstractTrainer {
    public EbookTrainer() {
        super(GrobidModels.EBOOK);
    }

    @Override
    public int createCRFPPData(File corpusDir, File trainingOutputPath, File evalOutputPath, double splitRatio) {
        return 0;
    }

    @Override
    public int createCRFPPData(File sourcePathLabel, File outputPath) {
        int totalExamples = 0;
        try {
            System.out.println("sourcePathLabel: " + sourcePathLabel);
            System.out.println("outputPath: " + outputPath);
            File input = new File(sourcePathLabel.getAbsolutePath() + "/structures/");
            File[] refFiles = input.listFiles(new FilenameFilter(){

                @Override
                public boolean accept(File dir, String name) {
                    return name.endsWith(".tei.xml");
                }
            });
            if (refFiles == null) {
                return 0;
            }
            System.out.println(refFiles.length + " tei files");
            FileOutputStream os2 = new FileOutputStream(outputPath);
            OutputStreamWriter writer2 = new OutputStreamWriter((OutputStream)os2, "UTF8");
            SAXParserFactory spf = SAXParserFactory.newInstance();
            for (File tf : refFiles) {
                String line;
                String name = tf.getName();
                System.out.println(name);
                TEIEbookSaxParser parser2 = new TEIEbookSaxParser();
                SAXParser p = spf.newSAXParser();
                p.parse(tf, (DefaultHandler)parser2);
                ArrayList<String> labeled = parser2.getLabeledResult();
                int q = 0;
                BufferedReader bis = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(sourcePathLabel.getAbsolutePath() + "/content/" + name.replace(".tei.xml", "")), "UTF8"));
                StringBuilder fulltext = new StringBuilder();
                block3: while ((line = bis.readLine()) != null) {
                    int ii = line.indexOf(32);
                    String token = null;
                    if (ii != -1) {
                        token = line.substring(0, ii);
                    }
                    for (int pp = q; pp < labeled.size(); ++pp) {
                        String localToken;
                        String localLine = labeled.get(pp);
                        StringTokenizer st = new StringTokenizer(localLine, " ");
                        if (st.hasMoreTokens() && (localToken = st.nextToken()).equals(token)) {
                            String tag = st.nextToken();
                            fulltext.append(line).append(" ").append(tag);
                            q = pp + 1;
                            pp = q + 10;
                        }
                        if (pp - q > 5) continue block3;
                    }
                }
                bis.close();
                writer2.write(fulltext.toString() + "\n");
            }
            ((Writer)writer2).close();
            ((OutputStream)os2).close();
        }
        catch (Exception e) {
            throw new GrobidException("An exception occurred while running Grobid.", (Throwable)e);
        }
        return totalExamples;
    }

    @Override
    public String evaluate() {
        throw new UnsupportedOperationException("Evaluation for E-Books is not implemented yet");
    }

    public static void main(String[] args) throws Exception {
        MockContext.setInitialContext();
        GrobidProperties.getInstance();
        AbstractTrainer.runTraining(new EbookTrainer());
        MockContext.destroyInitialContext();
    }
}

