/*
 * Decompiled with CFR 0.152.
 */
package edu.umd.hooka.corpora;

import edu.umd.hooka.alignment.aer.ReferenceAlignment;
import edu.umd.hooka.corpora.Chunk;
import edu.umd.hooka.corpora.Language;
import edu.umd.hooka.corpora.LanguagePair;
import edu.umd.hooka.corpora.ParallelChunk;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class ParallelCorpusReader
extends DefaultHandler {
    private ParallelChunk resultChunk = null;
    PChunkCallback cb_;
    SAXParser sp = null;
    ParallelChunk pchunk = null;
    Language lang;
    LanguagePair langpair;
    StringBuffer tempVal;
    String docName;
    int pchunkCount = 0;
    int chunkCount = 0;
    int refAlignCount = 0;

    public ParallelCorpusReader() {
        this.cb_ = new ChunkSetCB(this);
        try {
            this.sp = SAXParserFactory.newInstance().newSAXParser();
        }
        catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException("Couldn't build XML parser");
        }
    }

    private ParallelCorpusReader(PChunkCallback cb) {
        this.cb_ = cb;
        try {
            this.sp = SAXParserFactory.newInstance().newSAXParser();
        }
        catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException("Failed " + e);
        }
    }

    public ParallelChunk parseString(String xml) {
        this.resultChunk = null;
        try {
            this.sp.parse(new InputSource(new StringReader(xml)), (DefaultHandler)this);
        }
        catch (SAXException se) {
            this.resultChunk = null;
            se.printStackTrace();
            throw new RuntimeException("SaxE: " + se + "\n" + xml);
        }
        catch (IOException ie) {
            this.resultChunk = null;
            ie.printStackTrace();
            throw new RuntimeException("ioe: " + ie);
        }
        return this.resultChunk;
    }

    public static void parseXMLDocument(String file, PChunkCallback cb) {
        ParallelCorpusReader pcr = new ParallelCorpusReader(cb);
        SAXParserFactory spf = SAXParserFactory.newInstance();
        try {
            SAXParser sp = spf.newSAXParser();
            sp.parse(file, (DefaultHandler)pcr);
        }
        catch (SAXException se) {
            se.printStackTrace();
        }
        catch (ParserConfigurationException pce) {
            pce.printStackTrace();
        }
        catch (IOException ie) {
            ie.printStackTrace();
        }
    }

    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        if (qName.equalsIgnoreCase("pchunk")) {
            this.pchunk = new ParallelChunk();
            this.pchunk.setName(attributes.getValue("name"));
        } else if (qName.equalsIgnoreCase("s")) {
            this.lang = Language.languageForISO639_1(attributes.getValue("lang"));
            this.tempVal = new StringBuffer();
        } else if (qName.equalsIgnoreCase("wordalignment")) {
            this.tempVal = new StringBuffer();
            this.langpair = LanguagePair.languageForISO639_1Pair(attributes.getValue("langpair"));
        } else if (qName.equalsIgnoreCase("pdoc")) {
            this.docName = attributes.getValue("name");
        } else {
            throw new SAXException("Unknown tag: " + qName);
        }
    }

    public void characters(char[] ch, int start, int length) throws SAXException {
        if (this.tempVal != null) {
            this.tempVal.append(ch, start, length);
        }
    }

    public void endElement(String uri, String localName, String qName) throws SAXException {
        if (qName.equalsIgnoreCase("pchunk")) {
            ++this.pchunkCount;
            this.cb_.handlePChunk(this.pchunk);
        } else if (qName.equalsIgnoreCase("s")) {
            String s = this.tempVal.toString().trim();
            if (s.length() == 0) {
                System.err.println(this.pchunk.getName() + ": Empty segment for lang=" + this.lang);
            } else {
                Chunk c = new Chunk(this.tempVal.toString().trim());
                this.pchunk.addChunk(this.lang, c);
                ++this.chunkCount;
                this.tempVal = null;
            }
        } else if (qName.equalsIgnoreCase("wordalignment")) {
            Chunk sc = this.pchunk.getChunk(this.langpair.getSource());
            if (sc == null) {
                throw new RuntimeException("PChunk doesn't contain data for lang: " + this.langpair.getSource() + ".  Note: manual word alignment data must follow the chunk data.");
            }
            Chunk tc = this.pchunk.getChunk(this.langpair.getTarget());
            if (tc == null) {
                throw new RuntimeException("PChunk doesn't contain data for lang: " + this.langpair.getTarget() + ".  Note: manual word alignment data must follow the chunk data.");
            }
            ReferenceAlignment r = new ReferenceAlignment(sc.getLength(), tc.getLength());
            r.addAlignmentPointsPharaoh(this.tempVal.toString().trim());
            this.pchunk.addReferenceAlignment(this.langpair, r);
            ++this.refAlignCount;
            this.tempVal = null;
        } else if (qName.equalsIgnoreCase("pdoc")) {
            System.err.println("Finished parsing document " + this.docName);
            System.err.println("  pchunks: " + this.pchunkCount);
            System.err.println("  chunks: " + this.chunkCount);
            System.err.println("  ref alignments: " + this.refAlignCount);
        } else {
            throw new SAXException("Unknown tag: " + qName);
        }
    }

    private static void convertToXMLDocument(String label, String ifile1, String ifile2, String afile1_2, String ofile, String oenc, String le, String lf, boolean readAlignments) {
        try {
            String t;
            String e;
            if (readAlignments) {
                if (afile1_2 == null || afile1_2.equals("")) {
                    throw new RuntimeException("I'm supposed to read alignments, but no alignment file is set!");
                }
            } else if (afile1_2 != null && !afile1_2.equals("")) {
                throw new RuntimeException("I'm not set to read alignments, but an alignment file is set!");
            }
            BufferedReader r1 = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(ifile1), "UTF8"));
            BufferedReader r2 = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(ifile2), "UTF8"));
            BufferedReader r1_2 = null;
            if (readAlignments) {
                r1_2 = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(afile1_2), "UTF8"));
            }
            OutputStreamWriter w1 = new OutputStreamWriter((OutputStream)new FileOutputStream(ofile), oenc);
            Language de = Language.languageForISO639_1(lf);
            Language en = Language.languageForISO639_1(le);
            LanguagePair ende = null;
            if (readAlignments) {
                ende = LanguagePair.languageForISO639_1Pair(le + "-" + lf);
            }
            System.err.println("Reading " + en + " from: " + ifile1);
            System.err.println("Reading " + de + " from: " + ifile2);
            if (readAlignments) {
                System.err.println("Reading alignments (" + ende + ") from: " + afile1_2);
            }
            BufferedWriter w = new BufferedWriter(w1);
            w.write("<?xml version=\"1.0\" encoding=\"" + w1.getEncoding() + "\"?>");
            w.newLine();
            int x = ifile1.lastIndexOf(47);
            if (x < 0 || x >= ifile1.length()) {
                x = 0;
            }
            w.write("<pdoc name=\"" + ifile1.substring(x + 1) + "\">");
            w.newLine();
            int lc = 0;
            while ((e = r1.readLine()) != null) {
                ++lc;
                String f = r2.readLine();
                if (f == null) {
                    System.err.println("WARNING: " + ifile2 + " has fewer lines than " + ifile1);
                    break;
                }
                String a = null;
                if (readAlignments && (a = r1_2.readLine()) == null) {
                    System.err.println(afile1_2 + " has fewer lines than corpora files -- dropping alignments for remaining sentences");
                }
                Chunk ec = new Chunk(e);
                Chunk fc = new Chunk(f);
                String name = label + lc;
                ParallelChunk p = new ParallelChunk();
                p.setName(name);
                p.addChunk(de, fc);
                p.addChunk(en, ec);
                if (a != null) {
                    ReferenceAlignment ra = new ReferenceAlignment(ec.getLength(), fc.getLength());
                    try {
                        ra.addAlignmentPointsPharaoh(a);
                        p.addReferenceAlignment(ende, ra);
                    }
                    catch (RuntimeException re) {
                        System.err.println("Couldn't set alignment points for sentence # " + lc);
                        System.err.println(" " + en + ": len=" + ec.getLength() + " words=" + ec);
                        System.err.println(" " + de + ": len=" + fc.getLength() + " words=" + fc);
                        System.err.println(" " + ende + ": " + a);
                    }
                }
                w.write(p.toXML());
            }
            if ((t = r2.readLine()) != null) {
                System.err.println("WARNING: " + ifile2 + " has more lines than " + ifile1);
            }
            w.write("</pdoc>");
            System.out.println("Converted " + lc + " sentences");
            w.newLine();
            w.close();
            r1.close();
            r2.close();
            if (readAlignments) {
                r1_2.close();
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        ParallelCorpusReader.convertToXMLDocument("koen_jhu_", "/Users/redpony/bitexts/kkn-eng-alignments/kkn.utf8", "/Users/redpony/bitexts/kkn-eng-alignments/eng", "/Users/redpony/bitexts/kkn-eng-alignments/align", "/tmp/foo.xml", "utf8", "ko", "en", true);
    }

    static class ChunkSetCB
    implements PChunkCallback {
        ParallelCorpusReader pcr_;

        ChunkSetCB(ParallelCorpusReader pcr) {
            this.pcr_ = pcr;
        }

        public void handlePChunk(ParallelChunk p) {
            this.pcr_.resultChunk = p;
        }
    }

    public static interface PChunkCallback {
        public void handlePChunk(ParallelChunk var1);
    }
}

