/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * ___________________
 * 
 * Copyright 2024 Adobe
 * All Rights Reserved.
 * 
 * NOTICE: All information contained herein is, and remains
 * the property of Adobe and its suppliers, if any. The intellectual
 * and technical concepts contained herein are proprietary to Adobe
 * and its suppliers and are protected by all applicable intellectual
 * property laws, including trade secret and copyright laws.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe.
 **************************************************************************/

package com.day.cq.commons.predicates.servlets;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.StringTokenizer;

/**
 * <code>AbstractSearchServlet</code> is a base class for search servlets.
 */
public abstract class AbstractSearchServlet extends AbstractPredicateServlet {

    /** Query clause */
    public static final String QUERY = "query";

    /** Start index */
    public static final String START = "start";

    /** Result limit */
    public static final String LIMIT = "limit";

    /** tidy param */
    public static final String TIDY = "tidy";

    /**
     * List of unicode blocks that contain characters that act as words.
     */
    public static final List<Character.UnicodeBlock> WORD_CHARS;

    /**
     * Split terms at these characters.
     */
    public static final String SPLIT_CHARACTERS = " _-.,";

    static {
        // this list should be kept roughly synchronized with
        // the standard lucene tokenizer!
        List<Character.UnicodeBlock> list = new ArrayList<>();
        // Chinese and Japanese
        list.add(Character.UnicodeBlock.HIRAGANA);
        list.add(Character.UnicodeBlock.KATAKANA);
        list.add(Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
        list.add(Character.UnicodeBlock.BOPOMOFO);
        list.add(Character.UnicodeBlock.CJK_COMPATIBILITY);
        list.add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
        list.add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
        list.add(Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
        list.add(Character.UnicodeBlock.SPECIALS);
        // Korean
        list.add(Character.UnicodeBlock.HANGUL_SYLLABLES);
        list.add(Character.UnicodeBlock.HANGUL_JAMO);
        WORD_CHARS = Collections.unmodifiableList(list);
    }

    /**
     * @param text the text to check.
     * @return <code>true</code> if <code>text</code> is a single word;
     *         <code>false</code> otherwise.
     */
    protected boolean isSingleWord(String text) {
        for (int i = 0; i < text.length(); i++) {
            if (WORD_CHARS.contains(Character.UnicodeBlock.of(text.charAt(i)))) {
                return false;
            }
        }
        return true;
    }

    /**
     * Conditionally appends a wildcard to the query <code>text</code> if the
     * text is <b>not</b> considered a single word. This method also breaks
     * the text into multiple terms as {@link #SPLIT_CHARACTERS}. The wildcard
     * is only added to the last term.
     * <p>
     * See also: {@link #isSingleWord(String)}.
     *
     * @param text the query text.
     * @return the processed query text, possibly with appended '*' wildcard.
     */
    protected String applyWildcard(String text) {
        // only append * if query string is a single word
        if (!isSingleWord(text)) {
            return text;
        }
        StringBuilder modified = new StringBuilder();
        StringTokenizer t = new StringTokenizer(text, SPLIT_CHARACTERS);
        String space = "";
        while (t.hasMoreTokens()) {
            modified.append(space);
            space = " ";
            modified.append(t.nextToken());
        }
        modified.append("*");
        return modified.toString();
    }
}
