/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.commons;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * <code>LanguageUtil</code>...
 */
public class LanguageUtil {

    /**
     * Set of all valid language iso codes.
     * @see Locale#getISOLanguages()
     */
    public static final Set<String> ISO_LANGUAGES =
            Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(Locale.getISOLanguages())));

    private static final Set<String> NON_STANDARD_LANGUAGE_CODES = Collections.unmodifiableSet(new HashSet<String>(Collections.singletonList("tc")));

    /**
     * Set of all valid country iso codes.
     * @see Locale#getISOCountries()
     */
    public static final Set<String> ISO_COUNTRIES =
            Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(Locale.getISOCountries())));

    // pseudo language for i18n testing
    public static final String PSEUDO_LANGUAGE = "zz";

    /**
     * Regexp to find the Language Label in a Path. eg "en" in "/content/geometrixx/en"
     */
    private static final Pattern SINGLE_CODE = Pattern.compile("/([a-zA-Z]{2})(/|$)");

    /**
     * Regexp to find the Language-Country Code in a Path. eg "en_GB" in "/content/geometrixx/en_GB".
     * Support both "_" and "-" as delimiters.
     */
    private static final Pattern LANGUAGE_AND_COUNTRY = Pattern.compile("/([a-zA-Z]{2,3}[_-][a-zA-Z]{2,3})(/|$)");

    /**
     * Returns the language locale for the given language/country abbreviation.
     * @param code the language/country code. eg de_CH
     * @return the locale or null if not valid.
     */
    public static Locale getLocale(String code) {
        Language l = getLanguage(code);
        return l == null ? null : l.getLocale();
    }

    /**
     * Returns the language for the given language/country abbreviation.
     * @param code the language/country code. eg de_CH or de-CH
     * @return the locale or null if not valid.
     *
     * @since 5.4
     */
    public static Language getLanguage(String code) {
        if (code == null) {
            return null;
        }
        // support both "-" and "_" as delimiters
        code = code.replaceAll("-", "_");

        String lang = null;
        String country = "";
        if (code.length() == 2) {
            lang = code.toLowerCase();
        } else if (code.length() == 5 && code.charAt(2) == '_') {
            lang = code.substring(0, 2);
            country = code.substring(3);
        }

        if (lang == null) {
            return null;
        }
        // check for valid iso codes and non-standard language codes (including our pseudo language)
        if (!PSEUDO_LANGUAGE.equalsIgnoreCase(lang)
                && !ISO_LANGUAGES.contains(lang.toLowerCase())
                && !NON_STANDARD_LANGUAGE_CODES.contains(lang.toLowerCase())) {
            return null;
        }
        // check for valid iso countries and non-standard language codes
        // when the lang is the pseudo language (zz), allow any string for country
        if (country.length() > 0
                && !ISO_COUNTRIES.contains(country.toUpperCase())
                && !NON_STANDARD_LANGUAGE_CODES.contains(country.toUpperCase())
                && !PSEUDO_LANGUAGE.equalsIgnoreCase(lang)) {
            return null;
        }
        return new Language(lang, country);
    }

    /**
     * Returns the language root for the given path by only analyzing the path
     * names starting at the root.
     *
     * @param path path
     * @return the language root or <code>null</code> if not found
     */
    public static String getLanguageRoot(String path) {
        if (path == null || path.length() == 0 || path.equals("/")) {
            return null;
        }
        // Find three declaration
        // 1) /<root>/<path>/gb_en/
        // 2) /<root>/<path>/gb/<website>/en
        // 3) /<root>/<path>/en
        //
        String root = null;
        String strPrefix = "";
        String strTempPath = path;

        //first try the <language>_<country> pattern -> end if found
        Matcher m = LANGUAGE_AND_COUNTRY.matcher(path);
        while(m.find() && root == null) {
            String code = m.group(1);
            if (getLocale(code) != null){
                root = strPrefix + strTempPath.substring(0, m.end(1));
            }
            String strTempPrefix = strTempPath.substring(0, m.end(1));
            strPrefix = strPrefix + strTempPrefix; 
            strTempPath = strTempPath.substring(strTempPrefix.length());
            m = LANGUAGE_AND_COUNTRY.matcher(strTempPath);
        }

        if (root == null) {

            //second try to find language labels,
            // if two consecutive build a <country>_<language> pattern (2) is used
            int pos = -1;
            String last = null;
            Matcher sm = SINGLE_CODE.matcher(path);
            while(sm.find()) {
                String code = sm.group(1);
                if (last == null && getLocale(code) != null) { // first match
                    last = code;
                    pos = sm.end(1);
                } else if (last != null && getLocale(code + "_" + last) != null) { //ancestor matched
                    pos = sm.end(1);                    // that's a /country/language -> shift root to language
                    break;
                }
                sm.region(sm.end(1), path.length());
            }
            if (pos > 0) {
                root = path.substring(0, pos);
            }
        }
        return root;
    }
}