/*************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
*  Copyright 1997 Adobe Systems Incorporated
*  All Rights Reserved.
*
* NOTICE:  All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any.  The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.crx.statistics.keyword;

import com.day.crx.statistics.Report;
import com.day.crx.statistics.result.ResultsByQueryReport;
import com.day.crx.statistics.query.MostPopularQueriesReport;

import javax.jcr.Session;
import javax.jcr.RepositoryException;
import java.util.Iterator;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import java.util.LinkedHashMap;

/**
 * <code>ExtractKeywordsReport</code> implements a report, which extracts
 * keywords from the statistical query and result data. The algorithm works
 * as follows:
 * <ul>
 * <li>Get the <code>n</code> most popular queries for a given period.</li>
 * <li>For each query, retrieve the results that were selected the most.
 * Whether a result is considered relevant depends on the number of times it
 * was selected. The threshold is set to half the value of the most selected
 * result, but at least 2.</li>
 * </ul>
 *
 * @author mreutegg
 */
public class ExtractKeywordsReport extends Report {

    /**
     * The maximum number of queries to analyze.
     */
    private int size = 100;

    /**
     * Per default cover last 90 days
     */
    private int period = 90;

    /**
     * The path where result data is stored.
     */
    private final String resultDataPath;

    /**
     * Add OAK-specific "option(traversal ok)" to the statistics query so that no traversal warning is triggered
     */
    private final boolean traversalOk;

    /**
     * Creates a new report.
     *
     * @param queryDataPath  the path where query data is stored.
     * @param resultDataPath the path where result data is stored.
     */
    public ExtractKeywordsReport(String queryDataPath, String resultDataPath) {
        this(queryDataPath, resultDataPath, false);
    }

    /**
     * Creates a new report.
     *
     * @param queryDataPath the path where query data is stored.
     * @param resultDataPath the path where result data is stored.
     * @param traversalOk set to true to put "traveral ok" option in the statistics query
     */
    public ExtractKeywordsReport(String queryDataPath, String resultDataPath, boolean traversalOk) {
        super(queryDataPath);
        this.resultDataPath = resultDataPath;
        this.traversalOk = traversalOk;
    }

    /**
     * {@inheritDoc}
     * <p/>
     * Returns result rows with the following objects:
     * <ul>
     * <li>Path <code>String</code> of a page</li>
     * <li><code>Long</code> count (how may times the page was selected as a result)</li>
     * <li><code>List</code> of <code>String</code>s (the keywords)</li>
     * </ul>
     */
    public Iterator getResult(Session session) throws RepositoryException {
        MostPopularQueriesReport mpqr = new MostPopularQueriesReport(getDataPath());
        mpqr.setSize(getSize());
        mpqr.setPeriod(getPeriod());
        Iterator popularQueries = mpqr.getResult(session);
        Map pathToKeywords = new LinkedHashMap();
        while (popularQueries.hasNext()) {
            Object[] data = (Object[]) popularQueries.next();
            String query = (String) data[0];
            ResultsByQueryReport rr = new ResultsByQueryReport(resultDataPath, query, traversalOk);
            long threshold = -1;
            for (Iterator it = rr.getResult(session); it.hasNext(); ) {
                Object[] rrData = (Object[]) it.next();
                String path = (String) rrData[0];
                long count = ((Long) rrData[1]).longValue();
                if (threshold == -1) {
                    // calculate threshold based on first result
                    // result must have been selected at least 2 times
                    threshold = Math.max(count / 2, 2);
                }
                if (count < threshold) {
                    break;
                }
                List keywords = (List) pathToKeywords.get(path);
                if (keywords == null) {
                    keywords = new ArrayList(3);
                    keywords.add(new Long(0));
                    pathToKeywords.put(path, keywords);
                }
                query = query.toLowerCase();
                if (!keywords.contains(query)) {
                    keywords.add(query);
                }
                // update count
                keywords.set(0, new Long(((Long) keywords.get(0)).longValue() + count));
            }
        }
        List result = new ArrayList();
        for (Iterator it = pathToKeywords.entrySet().iterator(); it.hasNext(); ) {
            Map.Entry entry = (Map.Entry) it.next();
            String path = (String) entry.getKey();
            List keywords = (List) entry.getValue();
            Long count = (Long) keywords.get(0);
            keywords = keywords.subList(1, keywords.size());
            result.add(new Object[]{path, count, keywords});
        }
        return result.iterator();
    }

    /**
     * @return the maximum number of queries analyze.
     */
    public int getSize() {
        return size;
    }

    /**
     * @param size the maximum number of queries to analyze.
     */
    public void setSize(int size) {
        this.size = size;
    }

    /**
     * @return the report period in number of days.
     */
    public int getPeriod() {
        return period;
    }

    /**
     * @param period the report period in number of days.
     */
    public void setPeriod(int period) {
        this.period = period;
    }
}
