/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.text;

import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class SequenceFilesFromMailArchives {
    private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromMailArchives.class);
    private static final Pattern MESSAGE_START = Pattern.compile("^From \\S+@\\S.*\\d{4}$", 2);
    private static final Pattern MESSAGE_ID_PREFIX = Pattern.compile("^message-id: <(.*)>$", 2);
    private static final Pattern SUBJECT_PREFIX = Pattern.compile("^subject: (.*)$", 2);

    private static ChunkedWriter createNewChunkedWriter(int chunkSizeInMB, String outputDir) throws IOException {
        return new ChunkedWriter(chunkSizeInMB, outputDir);
    }

    public void createSequenceFiles(File parentDir, String outputDir, String prefix, int chunkSizeInMB, Charset charset) throws IOException {
        ChunkedWriter writer = SequenceFilesFromMailArchives.createNewChunkedWriter(chunkSizeInMB, outputDir);
        PrefixAdditionFilter filter = new PrefixAdditionFilter(prefix, writer, charset);
        parentDir.listFiles(filter);
        writer.close();
        log.info("Parsed " + filter.getMessageCount() + " messages from " + parentDir.getAbsolutePath());
    }

    public static void main(String[] args) throws Exception {
        DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
        ArgumentBuilder abuilder = new ArgumentBuilder();
        GroupBuilder gbuilder = new GroupBuilder();
        DefaultOption parentOpt = obuilder.withLongName("input").withRequired(true).withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()).withDescription("The input dir containing the documents").withShortName("i").create();
        DefaultOption outputDirOpt = obuilder.withLongName("output").withRequired(true).withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The output directory").withShortName("o").create();
        DefaultOption chunkSizeOpt = obuilder.withLongName("chunkSize").withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()).withDescription("The chunkSize in MegaBytes. Defaults to 64").withShortName("chunk").create();
        DefaultOption keyPrefixOpt = obuilder.withLongName("keyPrefix").withArgument(abuilder.withName("keyPrefix").withMinimum(1).withMaximum(1).create()).withDescription("The prefix to be prepended to the key").withShortName("prefix").create();
        DefaultOption charsetOpt = obuilder.withLongName("charset").withRequired(true).withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).withDescription("The name of the character encoding of the input files").withShortName("c").create();
        DefaultOption helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
        Group group = gbuilder.withName("Options").withOption((Option)keyPrefixOpt).withOption((Option)chunkSizeOpt).withOption((Option)charsetOpt).withOption((Option)outputDirOpt).withOption((Option)helpOpt).withOption((Option)parentOpt).create();
        try {
            Parser parser = new Parser();
            parser.setGroup(group);
            parser.setHelpOption((Option)helpOpt);
            CommandLine cmdLine = parser.parse(args);
            if (cmdLine.hasOption((Option)helpOpt)) {
                CommandLineUtil.printHelp((Group)group);
                return;
            }
            File parentDir = new File((String)cmdLine.getValue((Option)parentOpt));
            String outputDir = (String)cmdLine.getValue((Option)outputDirOpt);
            int chunkSize = 64;
            if (cmdLine.hasOption((Option)chunkSizeOpt)) {
                chunkSize = Integer.parseInt((String)cmdLine.getValue((Option)chunkSizeOpt));
            }
            String prefix = "";
            if (cmdLine.hasOption((Option)keyPrefixOpt)) {
                prefix = (String)cmdLine.getValue((Option)keyPrefixOpt);
            }
            Charset charset = Charset.forName((String)cmdLine.getValue((Option)charsetOpt));
            SequenceFilesFromMailArchives dir = new SequenceFilesFromMailArchives();
            dir.createSequenceFiles(parentDir, outputDir, prefix, chunkSize, charset);
        }
        catch (OptionException e) {
            log.error("Exception", (Throwable)e);
            CommandLineUtil.printHelp((Group)group);
        }
    }

    public class PrefixAdditionFilter
    implements FileFilter {
        private final String prefix;
        private final ChunkedWriter writer;
        private final Charset charset;
        private final StringBuilder file;
        private int messageCount;

        public PrefixAdditionFilter(String prefix, ChunkedWriter writer, Charset charset) {
            this.prefix = prefix;
            this.writer = writer;
            this.charset = charset;
            this.file = new StringBuilder();
            this.messageCount = 0;
        }

        public int getMessageCount() {
            return this.messageCount;
        }

        @Override
        public boolean accept(File current) {
            if (current.isDirectory()) {
                log.info("At " + current.getAbsolutePath());
                PrefixAdditionFilter nested = new PrefixAdditionFilter(this.prefix + File.separator + current.getName(), this.writer, this.charset);
                current.listFiles(nested);
                int dirCount = nested.getMessageCount();
                log.info("Parsed " + dirCount + " messages from directory " + current.getAbsolutePath());
                this.messageCount += dirCount;
            } else {
                try {
                    this.parseFileLineByLine(current);
                }
                catch (IOException e) {
                    throw new IllegalStateException(e);
                }
            }
            return false;
        }

        private void parseFileLineByLine(File current) throws IOException {
            try {
                this.file.setLength(0);
                String messageId = null;
                boolean inBody = false;
                Matcher subjectMatcher = SUBJECT_PREFIX.matcher("");
                Matcher messageIdMatcher = MESSAGE_ID_PREFIX.matcher("");
                Matcher messageBoundaryMatcher = MESSAGE_START.matcher("");
                for (String nextLine : new FileLineIterable(current, this.charset, false)) {
                    subjectMatcher.reset(nextLine);
                    if (subjectMatcher.matches()) {
                        this.file.append(subjectMatcher.group(1)).append('\n');
                    }
                    if (messageId != null) {
                        messageBoundaryMatcher.reset(nextLine);
                        if (messageBoundaryMatcher.matches()) {
                            String key = this.prefix + File.separator + current.getName() + File.separator + messageId;
                            this.writer.write(key, this.file.toString());
                            this.file.setLength(0);
                            messageId = null;
                            inBody = false;
                            continue;
                        }
                        if (inBody) {
                            if (nextLine.length() <= 0) continue;
                            this.file.append(nextLine).append('\n');
                            continue;
                        }
                        inBody = nextLine.length() == 0;
                        continue;
                    }
                    if (nextLine.length() <= 14) continue;
                    messageIdMatcher.reset(nextLine);
                    if (!messageIdMatcher.matches()) continue;
                    messageId = messageIdMatcher.group(1);
                    ++this.messageCount;
                }
                if (messageId != null) {
                    String key = this.prefix + File.separator + current.getName() + File.separator + messageId;
                    this.writer.write(key, this.file.toString());
                    this.file.setLength(0);
                    messageId = null;
                    boolean bl = false;
                }
            }
            catch (FileNotFoundException fileNotFoundException) {
                // empty catch block
            }
        }
    }

    public static class ChunkedWriter
    implements Closeable {
        private final int maxChunkSizeInBytes;
        private final String outputDir;
        private SequenceFile.Writer writer;
        private int currentChunkID;
        private int currentChunkSize;
        private final Configuration conf = new Configuration();
        private final FileSystem fs;

        public ChunkedWriter(int chunkSizeInMB, String outputDir) throws IOException {
            if (chunkSizeInMB > 1984) {
                chunkSizeInMB = 1984;
            }
            this.maxChunkSizeInBytes = chunkSizeInMB * 1024 * 1024;
            this.outputDir = outputDir;
            this.fs = FileSystem.get((Configuration)this.conf);
            this.currentChunkID = 0;
            this.writer = SequenceFile.createWriter((FileSystem)this.fs, (Configuration)this.conf, (Path)this.getPath(this.currentChunkID), Text.class, Text.class, (SequenceFile.CompressionType)SequenceFile.CompressionType.BLOCK);
        }

        private Path getPath(int chunkID) {
            return new Path(this.outputDir + "/chunk-" + chunkID);
        }

        public void write(String key, String value) throws IOException {
            if (this.currentChunkSize > this.maxChunkSizeInBytes) {
                this.writer.close();
                log.info("Chunk size (" + this.currentChunkSize + ") reached MAX; creating new chunk " + (this.currentChunkID + 1));
                this.writer = SequenceFile.createWriter((FileSystem)this.fs, (Configuration)this.conf, (Path)this.getPath(this.currentChunkID++), Text.class, Text.class, (SequenceFile.CompressionType)SequenceFile.CompressionType.BLOCK);
                this.currentChunkSize = 0;
            }
            Text keyT = new Text(key);
            Text valueT = new Text(value);
            this.currentChunkSize += keyT.getBytes().length + valueT.getBytes().length;
            this.writer.append((Writable)keyT, (Writable)valueT);
        }

        @Override
        public void close() throws IOException {
            this.writer.close();
        }
    }
}

