package com.cybozu.labs.langdetect;

import com.cybozu.labs.langdetect.util.LangProfile;
import net.arnx.jsonic.JSON;
import net.arnx.jsonic.JSONException;

import java.io.*;
import java.util.*;

/**
 * LangDetect Command Line Interface
 * <p>
 * This is a command line interface of Language Detection Library "LandDetect".
 *
 * @author Nakatani Shuyo
 */
public class Command implements Serializable {
    /**
     * smoothing default parameter (ELE)
     */
    private static final double DEFAULT_ALPHA = 0.5;

    /**
     * for Command line easy parser
     */
    private Map<String, String> opt_with_value = new HashMap<String, String>();
    private Map<String, String> values = new HashMap<String, String>();
    private Set<String> opt_without_value = new HashSet<String>();
    private List<String> arglist = new ArrayList<String>();
    private DetectorFactory detectFact = new DetectorFactory();

    /**
     * Command line easy parser
     *
     * @param args command line arguments
     */
    private void parse(String[] args) {
        for (int i = 0; i < args.length; ++i) {
            if (opt_with_value.containsKey(args[i])) {
                String key = opt_with_value.get(args[i]);
                values.put(key, args[i + 1]);
                ++i;
            } else if (args[i].startsWith("-")) {
                opt_without_value.add(args[i]);
            } else {
                arglist.add(args[i]);
            }
        }
    }

    private void addOpt(String opt, String key, String value) {
        opt_with_value.put(opt, key);
        values.put(key, value);
    }

    private String get(String key) {
        return values.get(key);
    }

    private Long getLong(String key) {
        String value = values.get(key);
        if (value == null) return null;
        try {
            return Long.valueOf(value);
        } catch (NumberFormatException e) {
            return null;
        }
    }

    private double getDouble(String key, double defaultValue) {
        try {
            return Double.valueOf(values.get(key));
        } catch (NumberFormatException e) {
            return defaultValue;
        }
    }

    private boolean hasOpt(String opt) {
        return opt_without_value.contains(opt);
    }

    /**
     * File search (easy glob)
     *
     * @param directory directory path
     * @param pattern   searching file pattern with regular representation
     * @return matched file
     */
    private File searchFile(File directory, String pattern) {
        final File[] files = directory.listFiles();
        if (files != null) {
            for (File file : files) {
                if (file.getName().matches(pattern)) return file;
            }
        }
        return null;
    }

    /**
     * load profiles
     *
     * @return false if load success
     */
    private boolean loadProfile() {
        String profileDirectory = get("directory") + "/";
        try {
            detectFact.loadProfile(profileDirectory);
            Long seed = getLong("seed");
            if (seed != null) detectFact.setSeed(seed);
            return false;
        } catch (LangDetectException e) {
            System.err.println("ERROR: " + e.getMessage());
            return true;
        }
    }

    /**
     * Generate Language Profile from Wikipedia Abstract Database File
     * <pre>
     * usage: --genprofile -d [abstracts directory] [language names]
     * </pre>
     */
    public void generateProfile() {
        File directory = new File(get("directory"));
        for (String lang : arglist) {
            File file = searchFile(directory, lang + "wiki-.*-abstract\\.xml.*");
            if (file == null) {
                System.err.println("Not Found abstract xml : lang = " + lang);
                continue;
            }

            FileOutputStream os = null;
            try {
                LangProfile profile = GenProfile.loadFromWikipediaAbstract(lang, file);
                profile.omitLessFreq();

                File profile_path = new File(get("directory") + "/profiles/" + lang);
                os = new FileOutputStream(profile_path);
                JSON.encode(profile, os);
            } catch (JSONException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (LangDetectException e) {
                e.printStackTrace();
            } finally {
                try {
                    if (os != null) os.close();
                } catch (IOException e) {
                }
            }
        }
    }

    /**
     * Generate Language Profile from Text File
     * <p>
     * <pre>
     * usage: --genprofile-text -l [language code] [text file path]
     * </pre>
     */
    private void generateProfileFromText() {
        if (arglist.size() != 1) {
            System.err.println("Need to specify text file path");
            return;
        }
        File file = new File(arglist.get(0));
        if (!file.exists()) {
            System.err.println("Need to specify existing text file path");
            return;
        }

        String lang = get("lang");
        if (lang == null) {
            System.err.println("Need to specify langage code(-l)");
            return;
        }

        FileOutputStream os = null;
        try {
            LangProfile profile = GenProfile.loadFromText(lang, file);
            profile.omitLessFreq();

            File profile_path = new File(lang);
            os = new FileOutputStream(profile_path);
            JSON.encode(profile, os);
        } catch (JSONException | LangDetectException | IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (os != null) os.close();
            } catch (IOException ignored) {
            }
        }
    }

    /**
     * Language detection test for each file (--detectlang option)
     * <pre>
     * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)]
     * </pre>
     */
    public void detectLang() {
        if (loadProfile()) return;
        for (String filename : arglist) {
            BufferedReader is = null;
            try {
                is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"));

                Detector detector = detectFact.create(getDouble("alpha", DEFAULT_ALPHA));
                if (hasOpt("--debug")) detector.setVerbose();
                detector.append(is);
                System.out.println(filename + ":" + detector.getProbabilities());
            } catch (IOException | LangDetectException e) {
                e.printStackTrace();
            } finally {
                try {
                    if (is != null) is.close();
                } catch (IOException ignored) {
                }
            }

        }
    }

    /**
     * Batch Test of Language Detection (--batchtest option)
     * <pre>
     * usage: --batchtest -d [profile directory] -a [alpha] -s [seed] [test data(s)]
     * </pre>
     * The format of test data(s):
     * <pre>
     *   [correct language name]\t[text body for test]\n
     * </pre>
     */
    public void batchTest() {
        if (loadProfile()) return;
        HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
        for (String filename : arglist) {
            BufferedReader is = null;
            try {
                is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"));
                while (is.ready()) {
                    String line = is.readLine();
                    int idx = line.indexOf('\t');
                    if (idx <= 0) continue;
                    String correctLang = line.substring(0, idx);
                    String text = line.substring(idx + 1);

                    Detector detector = detectFact.create(getDouble("alpha", DEFAULT_ALPHA));
                    detector.append(text);
                    String lang = "";
                    try {
                        lang = detector.detect();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                    if (!result.containsKey(correctLang)) result.put(correctLang, new ArrayList<String>());
                    result.get(correctLang).add(lang);
                    if (hasOpt("--debug"))
                        System.out.println(correctLang + "," + lang + "," + (text.length() > 100 ? text.substring(0, 100) : text));
                }

            } catch (IOException | LangDetectException e) {
                e.printStackTrace();
            } finally {
                try {
                    if (is != null) is.close();
                } catch (IOException ignored) {
                }
            }

            ArrayList<String> langlist = new ArrayList<String>(result.keySet());
            Collections.sort(langlist);

            int totalCount = 0, totalCorrect = 0;
            for (String lang : langlist) {
                HashMap<String, Integer> resultCount = new HashMap<String, Integer>();
                int count = 0;
                ArrayList<String> list = result.get(lang);
                for (String detectedLang : list) {
                    ++count;
                    if (resultCount.containsKey(detectedLang)) {
                        resultCount.put(detectedLang, resultCount.get(detectedLang) + 1);
                    } else {
                        resultCount.put(detectedLang, 1);
                    }
                }
                int correct = resultCount.containsKey(lang) ? resultCount.get(lang) : 0;
                double rate = correct / (double) count;
                System.out.println(String.format("%s (%d/%d=%.2f): %s", lang, correct, count, rate, resultCount));
                totalCorrect += correct;
                totalCount += count;
            }
            System.out.println(String.format("total: %d/%d = %.3f", totalCorrect, totalCount, totalCorrect / (double) totalCount));

        }

    }

    /**
     * Command Line Interface
     *
     * @param args command line arguments
     */
    public static void main(String[] args) {
        Command command = new Command();
        command.addOpt("-d", "directory", "./");
        command.addOpt("-a", "alpha", "" + DEFAULT_ALPHA);
        command.addOpt("-s", "seed", null);
        command.addOpt("-l", "lang", null);
        command.parse(args);

        if (command.hasOpt("--genprofile")) {
            command.generateProfile();
        } else if (command.hasOpt("--genprofile-text")) {
            command.generateProfileFromText();
        } else if (command.hasOpt("--detectlang")) {
            command.detectLang();
        } else if (command.hasOpt("--batchtest")) {
            command.batchTest();
        }
    }

}
