/*
 * Decompiled with CFR 0.152.
 */
package org.grobid.trainer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringTokenizer;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.mock.MockContext;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.trainer.AbstractTrainer;
import org.grobid.trainer.sax.TEIHeaderSaxParser;
import org.xml.sax.helpers.DefaultHandler;

public class HeaderTrainer
extends AbstractTrainer {
    public HeaderTrainer() {
        super(GrobidModels.HEADER);
    }

    @Override
    public int createCRFPPData(File corpusPath, File trainingOutputPath) {
        return this.addFeaturesHeaders(corpusPath.getAbsolutePath() + "/tei", corpusPath.getAbsolutePath() + "/headers", trainingOutputPath, null, 1.0);
    }

    @Override
    public int createCRFPPData(File corpusDir, File trainingOutputPath, File evalOutputPath, double splitRatio) {
        return this.addFeaturesHeaders(corpusDir.getAbsolutePath() + "/tei", corpusDir.getAbsolutePath() + "/headers", trainingOutputPath, evalOutputPath, splitRatio);
    }

    public int addFeaturesHeaders(String sourceFile, String headerPath, File trainingOutputPath, File evalOutputPath, double splitRatio) {
        System.out.println(sourceFile);
        System.out.println(headerPath);
        System.out.println(trainingOutputPath);
        System.out.println(evalOutputPath);
        System.out.println("TEI files: " + sourceFile);
        System.out.println("header info files: " + headerPath);
        if (trainingOutputPath != null) {
            System.out.println("outputPath for training data: " + trainingOutputPath);
        }
        if (evalOutputPath != null) {
            System.out.println("outputPath for evaluation data: " + evalOutputPath);
        }
        int nbExamples = 0;
        try {
            File pathh = new File(sourceFile);
            File[] refFiles = pathh.listFiles(new FilenameFilter(){

                @Override
                public boolean accept(File dir, String name) {
                    return name.endsWith(".tei") | name.endsWith(".tei.xml");
                }
            });
            if (refFiles == null) {
                return 0;
            }
            nbExamples = refFiles.length;
            System.out.println(nbExamples + " tei files");
            FileOutputStream os2 = null;
            OutputStreamWriter writer2 = null;
            if (trainingOutputPath != null) {
                os2 = new FileOutputStream(trainingOutputPath);
                writer2 = new OutputStreamWriter((OutputStream)os2, "UTF8");
            }
            FileOutputStream os3 = null;
            OutputStreamWriter writer3 = null;
            if (evalOutputPath != null) {
                os3 = new FileOutputStream(evalOutputPath);
                writer3 = new OutputStreamWriter((OutputStream)os3, "UTF8");
            }
            for (File teifile : refFiles) {
                String line;
                File[] refFiles2;
                String name = teifile.getName();
                TEIHeaderSaxParser parser2 = new TEIHeaderSaxParser();
                parser2.setFileName(name);
                SAXParserFactory spf = SAXParserFactory.newInstance();
                SAXParser par = spf.newSAXParser();
                par.parse(teifile, (DefaultHandler)parser2);
                ArrayList<String> labeled = parser2.getLabeledResult();
                File refDir2 = new File(headerPath);
                String headerFile = null;
                for (File aRefFiles2 : refFiles2 = refDir2.listFiles()) {
                    String localFileName = aRefFiles2.getName();
                    if (localFileName.equals(parser2.getPDFName() + ".header")) {
                        headerFile = localFileName;
                        break;
                    }
                    if (!(localFileName.startsWith(parser2.getPDFName() + "._") & localFileName.endsWith(".header"))) continue;
                    headerFile = localFileName;
                    break;
                }
                if (headerFile == null) continue;
                String pathHeader = headerPath + "/" + headerFile;
                int p = 0;
                BufferedReader bis = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(pathHeader), "UTF8"));
                StringBuilder header = new StringBuilder();
                while ((line = bis.readLine()) != null) {
                    header.append(line);
                    int ii = line.indexOf(32);
                    String token = null;
                    if (ii != -1) {
                        token = line.substring(0, ii);
                    }
                    for (int pp = p; pp < labeled.size(); ++pp) {
                        String localToken;
                        String localLine = labeled.get(pp);
                        StringTokenizer st = new StringTokenizer(localLine, " ");
                        if (st.hasMoreTokens() && (localToken = st.nextToken()).equals(token)) {
                            String tag = st.nextToken();
                            header.append(" ").append(tag);
                            p = pp + 1;
                            pp = p + 10;
                        }
                        if (pp - p > 5) break;
                    }
                    header.append("\n");
                }
                bis.close();
                StringBuilder header2 = new StringBuilder();
                String headerStr = header.toString();
                StringTokenizer sto = new StringTokenizer(headerStr, "\n");
                String lastLabel = null;
                String lastLastLabel = null;
                String previousLine = null;
                while (sto.hasMoreTokens()) {
                    String linee = sto.nextToken();
                    StringTokenizer sto2 = new StringTokenizer(linee, " ");
                    String label = null;
                    while (sto2.hasMoreTokens()) {
                        label = sto2.nextToken();
                    }
                    if (label != null && label.length() > 0 && !(label.charAt(0) == '<' | label.startsWith("I-<"))) {
                        label = null;
                    }
                    if (previousLine != null) {
                        if (label != null & lastLabel == null & lastLastLabel != null) {
                            if (label.equals(lastLastLabel)) {
                                lastLabel = label;
                                previousLine = previousLine + " " + label;
                                header2.append(previousLine);
                                header2.append("\n");
                            } else if (lastLabel != null) {
                                header2.append(previousLine);
                                header2.append("\n");
                            }
                        } else if (lastLabel != null) {
                            header2.append(previousLine);
                            header2.append("\n");
                        }
                    }
                    previousLine = linee;
                    lastLastLabel = lastLabel;
                    lastLabel = label;
                }
                if (lastLabel != null) {
                    header2.append(previousLine);
                    header2.append("\n");
                }
                if (writer2 == null && writer3 != null) {
                    writer3.write(header2.toString() + "\n");
                }
                if (writer2 != null && writer3 == null) {
                    writer2.write(header2.toString() + "\n");
                    continue;
                }
                if (Math.random() <= splitRatio) {
                    writer2.write(header2.toString() + "\n");
                    continue;
                }
                writer3.write(header2.toString() + "\n");
            }
            if (writer2 != null) {
                ((Writer)writer2).close();
                ((OutputStream)os2).close();
            }
            if (writer3 != null) {
                ((Writer)writer3).close();
                ((OutputStream)os3).close();
            }
        }
        catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", (Throwable)e);
        }
        return nbExamples;
    }

    public static void main(String[] args) throws Exception {
        MockContext.setInitialContext();
        GrobidProperties.getInstance();
        AbstractTrainer.runTraining(new HeaderTrainer());
        AbstractTrainer.runEvaluation(new HeaderTrainer());
        MockContext.destroyInitialContext();
    }
}

