/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.elasticsearch.web.robot.transformer;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.codelibs.elasticsearch.util.settings.SettingsUtils;
import org.codelibs.elasticsearch.web.config.RiverConfig;
import org.codelibs.elasticsearch.web.config.ScrapingRule;
import org.codelibs.robot.RobotCrawlAccessException;
import org.codelibs.robot.RobotSystemException;
import org.codelibs.robot.builder.RequestDataBuilder;
import org.codelibs.robot.entity.AccessResultData;
import org.codelibs.robot.entity.RequestData;
import org.codelibs.robot.entity.ResponseData;
import org.codelibs.robot.entity.ResultData;
import org.codelibs.robot.helper.EncodingHelper;
import org.codelibs.robot.transformer.impl.HtmlTransformer;
import org.codelibs.robot.util.StreamUtil;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.script.CompiledScript;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.ScriptService;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.seasar.framework.beans.BeanDesc;
import org.seasar.framework.beans.factory.BeanDescFactory;
import org.seasar.framework.beans.util.Beans;
import org.seasar.framework.beans.util.Copy;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.container.annotation.tiger.InitMethod;
import org.seasar.framework.container.factory.SingletonS2ContainerFactory;
import org.seasar.framework.util.Base64Util;
import org.seasar.framework.util.FileUtil;
import org.seasar.framework.util.MethodUtil;
import org.seasar.framework.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ScrapingTransformer
extends HtmlTransformer {
    private static final long DEFAULT_MAX_ATTACHMENT_SIZE = 1000000L;
    private static final String VALUE_QUERY_TYPE = "value";
    private static final String TYPE_QUERY_TYPE = "type";
    private static final String SCRIPT_QUERY_TYPE = "script";
    private static final String ARGS_QUERY_TYPE = "args";
    private static final String IS_ARRAY_PROP_NAME = "isArray";
    private static final String IS_CHILD_URL_PROP_NAME = "isChildUrl";
    private static final String TRIM_SPACES_PROP_NAME = "trimSpaces";
    private static final String TIMESTAMP_FIELD = "@timestamp";
    private static final String POSITION_FIELD = "position";
    private static final String ARRAY_PROPERTY_PREFIX = "[]";
    private static final Logger logger = LoggerFactory.getLogger(ScrapingTransformer.class);
    private static final String[] queryTypes = new String[]{"className", "data", "html", "id", "ownText", "tagName", "text", "val", "nodeName", "outerHtml", "attr", "baseUri", "absUrl"};
    public String[] copiedResonseDataFields = new String[]{"url", "parentUrl", "httpStatusCode", "method", "charSet", "contentLength", "mimeType", "executionTime", "lastModified"};
    protected RiverConfig riverConfig;
    protected ThreadLocal<Set<String>> childUrlSetLocal = new ThreadLocal();

    @InitMethod
    public void init() {
        this.riverConfig = (RiverConfig)SingletonS2Container.getComponent(RiverConfig.class);
    }

    public ResultData transform(ResponseData responseData) {
        try {
            ResultData resultData = super.transform(responseData);
            return resultData;
        }
        finally {
            this.childUrlSetLocal.remove();
        }
    }

    protected void updateCharset(ResponseData responseData) {
        String encoding;
        Integer s;
        int preloadSize = this.preloadSizeForCharset;
        ScrapingRule scrapingRule = this.riverConfig.getScrapingRule(responseData);
        if (scrapingRule != null && (s = scrapingRule.getSetting("preloadSizeForCharset", 0)) > 0) {
            preloadSize = s;
        }
        if ((encoding = this.loadCharset(responseData.getResponseBody(), preloadSize)) == null) {
            if (this.defaultEncoding == null) {
                responseData.setCharSet("UTF-8");
            } else if (responseData.getCharSet() == null) {
                responseData.setCharSet(this.defaultEncoding);
            }
        } else {
            responseData.setCharSet(encoding.trim());
        }
        if (!this.isSupportedCharset(responseData.getCharSet())) {
            responseData.setCharSet("UTF-8");
        }
    }

    protected String loadCharset(InputStream inputStream, int preloadSize) {
        BufferedInputStream bis = null;
        String encoding = null;
        try {
            bis = new BufferedInputStream(inputStream);
            byte[] buffer = new byte[preloadSize];
            int size = bis.read(buffer);
            if (size != -1) {
                String content = new String(buffer, 0, size);
                encoding = this.parseCharset(content);
            }
        }
        catch (IOException e) {
            throw new RobotCrawlAccessException("Could not load a content.", (Throwable)e);
        }
        try {
            EncodingHelper encodingHelper = (EncodingHelper)SingletonS2Container.getComponent(EncodingHelper.class);
            encoding = encodingHelper.normalize(encoding);
        }
        catch (Exception e) {
            // empty catch block
        }
        return encoding;
    }

    protected void storeData(ResponseData responseData, ResultData resultData) {
        ScrapingRule scrapingRule = this.riverConfig.getScrapingRule(responseData);
        if (scrapingRule == null) {
            logger.info("No scraping rule.");
            return;
        }
        File file = null;
        try {
            file = File.createTempFile("river-web-", ".tmp");
            StreamUtil.drain((InputStream)responseData.getResponseBody(), (File)file);
            this.processData(scrapingRule, file, responseData, resultData);
        }
        catch (IOException e) {
            throw new RobotSystemException("Failed to create a temp file.", (Throwable)e);
        }
        finally {
            if (file != null && !file.delete()) {
                logger.warn("Failed to delete " + file.getAbsolutePath());
            }
        }
    }

    protected void processData(ScrapingRule scrapingRule, File file, ResponseData responseData, ResultData resultData) {
        Boolean isHtmlParsed;
        Map<String, Map<String, Object>> scrapingRuleMap = scrapingRule.getRuleMap();
        Document document = null;
        String charsetName = responseData.getCharSet();
        if (charsetName == null) {
            charsetName = "UTF-8";
        }
        if ((isHtmlParsed = scrapingRule.getSetting("html", Boolean.TRUE)).booleanValue()) {
            BufferedInputStream is = null;
            try {
                is = new BufferedInputStream(new FileInputStream(file));
                document = Jsoup.parse((InputStream)is, (String)charsetName, (String)responseData.getUrl());
            }
            catch (IOException e) {
                try {
                    throw new RobotCrawlAccessException("Could not parse " + responseData.getUrl(), (Throwable)e);
                }
                catch (Throwable throwable) {
                    IOUtils.closeQuietly(is);
                    throw throwable;
                }
            }
            IOUtils.closeQuietly((InputStream)is);
        }
        LinkedHashMap<String, Object> dataMap = new LinkedHashMap<String, Object>();
        ((Copy)((Copy)((Copy)Beans.copy((Object)responseData, dataMap).includes((CharSequence[])this.copiedResonseDataFields)).excludesNull()).excludesWhitespace()).execute();
        if (logger.isDebugEnabled()) {
            logger.debug("ruleMap: " + scrapingRuleMap);
            logger.debug("dataMap: " + dataMap);
        }
        for (Map.Entry<String, Map<String, Object>> entry : scrapingRuleMap.entrySet()) {
            Object propertyValue;
            String propName = entry.getKey();
            Map<String, Object> params = entry.getValue();
            boolean isTrimSpaces = (Boolean)SettingsUtils.get(params, (String)TRIM_SPACES_PROP_NAME, (Object)Boolean.FALSE);
            boolean isArray = (Boolean)SettingsUtils.get(params, (String)IS_ARRAY_PROP_NAME, (Object)Boolean.FALSE);
            boolean isChildUrl = (Boolean)SettingsUtils.get(params, (String)IS_CHILD_URL_PROP_NAME, (Object)Boolean.FALSE);
            ArrayList<String> strList = new ArrayList<String>();
            Object value = SettingsUtils.get(params, (String)VALUE_QUERY_TYPE, null);
            String type = (String)SettingsUtils.get(params, (String)TYPE_QUERY_TYPE, null);
            if (value != null) {
                if (value instanceof String) {
                    strList.add(this.trimSpaces(value.toString(), isTrimSpaces));
                } else if (value instanceof List) {
                    List list = (List)value;
                    for (Object obj : list) {
                        strList.add(this.trimSpaces(obj.toString(), isTrimSpaces));
                    }
                }
            } else if ("data".equals(type) || "attachment".equals(type)) {
                long maxFileSize = (Long)SettingsUtils.get(params, (String)"maxFileSize", (Object)1000000L);
                long fileSize = file.length();
                if (fileSize <= maxFileSize) {
                    strList.add(Base64Util.encode((byte[])FileUtil.getBytes((File)file)));
                    isArray = false;
                    isChildUrl = false;
                } else {
                    logger.info("The max file size(" + fileSize + "/" + maxFileSize + " is exceeded: " + responseData.getUrl());
                }
            } else if (document != null) {
                this.processCssQuery(document, propName, params, isTrimSpaces, strList);
            }
            ScriptInfo scriptInfo = this.getScriptValue(params);
            if (scriptInfo == null) {
                propertyValue = isArray ? strList : StringUtils.join(strList, (String)" ");
            } else {
                Client client = this.riverConfig.getClient();
                HashMap<String, Object> vars = new HashMap<String, Object>();
                vars.put("container", SingletonS2ContainerFactory.getContainer());
                vars.put("client", client);
                vars.put("data", responseData);
                vars.put("result", resultData);
                vars.put("property", propName);
                vars.put("parameters", params);
                vars.put("array", isArray);
                vars.put("list", strList);
                if (isArray) {
                    ArrayList<Object> list = new ArrayList<Object>();
                    for (int i = 0; i < strList.size(); ++i) {
                        HashMap<String, Object> localVars = new HashMap<String, Object>(vars);
                        localVars.put("index", i);
                        localVars.put(VALUE_QUERY_TYPE, StringUtils.join(strList, (String)" "));
                        list.add(this.executeScript(scriptInfo.getLang(), scriptInfo.getScript(), scriptInfo.getScriptType(), localVars));
                    }
                    propertyValue = list;
                } else {
                    vars.put(VALUE_QUERY_TYPE, StringUtils.join(strList, (String)" "));
                    propertyValue = this.executeScript(scriptInfo.getLang(), scriptInfo.getScript(), scriptInfo.getScriptType(), vars);
                }
            }
            this.addPropertyData(dataMap, propName, propertyValue);
            if (!isChildUrl) continue;
            Set<String> childUrlSet = this.childUrlSetLocal.get();
            if (childUrlSet == null) {
                childUrlSet = new HashSet<String>();
                this.childUrlSetLocal.set(childUrlSet);
            }
            if (propertyValue instanceof String) {
                String str = (String)propertyValue;
                if (!StringUtils.isNotBlank((CharSequence)str)) continue;
                childUrlSet.add(str);
                continue;
            }
            if (!(propertyValue instanceof List)) continue;
            List list = (List)propertyValue;
            for (Object obj : list) {
                String str = obj.toString();
                if (!StringUtils.isNotBlank((CharSequence)str)) continue;
                childUrlSet.add(str);
            }
        }
        this.storeIndex(responseData, dataMap);
    }

    private Object executeScript(String lang, String script, String scriptTypeValue, Map<String, Object> vars) {
        ScriptService.ScriptType scriptType = ScriptService.ScriptType.FILE.toString().equalsIgnoreCase(scriptTypeValue) ? ScriptService.ScriptType.FILE : (ScriptService.ScriptType.INDEXED.toString().equalsIgnoreCase(scriptTypeValue) ? ScriptService.ScriptType.INDEXED : ScriptService.ScriptType.INLINE);
        ScriptService scriptService = this.riverConfig.getScriptService();
        CompiledScript compiledScript = scriptService.compile(lang, script, scriptType);
        ExecutableScript executable = scriptService.executable(compiledScript, vars);
        return executable.run();
    }

    protected ScriptInfo getScriptValue(Map<String, Object> params) {
        Object value = SettingsUtils.get(params, (String)SCRIPT_QUERY_TYPE, null);
        if (value == null) {
            return null;
        }
        if (value instanceof String) {
            return new ScriptInfo(value.toString());
        }
        if (value instanceof List) {
            return new ScriptInfo(StringUtils.join((Iterable)((List)value), (String)""));
        }
        if (value instanceof Map) {
            Map scriptMap = (Map)value;
            String script = (String)SettingsUtils.get((Map)scriptMap, (String)SCRIPT_QUERY_TYPE);
            if (script == null) {
                return null;
            }
            return new ScriptInfo(script, (String)SettingsUtils.get((Map)scriptMap, (String)"lang", (Object)"groovy"), (String)SettingsUtils.get((Map)scriptMap, (String)"script_type", (Object)"inline"));
        }
        return null;
    }

    protected void processCssQuery(Document document, String propName, Map<String, Object> params, boolean isTrimSpaces, List<String> strList) {
        for (String queryType : queryTypes) {
            Object queryObj = SettingsUtils.get(params, (String)queryType, null);
            Element[] elements = null;
            if (queryObj instanceof String) {
                elements = this.getElements(new Element[]{document}, queryObj.toString());
            } else if (queryObj instanceof List) {
                List queryList = (List)queryObj;
                elements = this.getElements(new Element[]{document}, queryList, propName.startsWith(ARRAY_PROPERTY_PREFIX));
            }
            if (elements == null) continue;
            for (Element element : elements) {
                if (element == null) {
                    strList.add(null);
                    continue;
                }
                List argList = (List)SettingsUtils.get(params, (String)ARGS_QUERY_TYPE, Collections.emptyList());
                try {
                    Method queryMethod = this.getQueryMethod(element, queryType, argList);
                    strList.add(this.trimSpaces((String)MethodUtil.invoke((Method)queryMethod, (Object)element, (Object[])argList.toArray(new Object[argList.size()])), isTrimSpaces));
                }
                catch (Exception e) {
                    logger.warn("Could not invoke " + queryType + " on " + element, (Throwable)e);
                    strList.add(null);
                }
            }
            break;
        }
    }

    protected Method getQueryMethod(Element element, String queryType, List<Object> argList) {
        BeanDesc elementDesc = BeanDescFactory.getBeanDesc(element.getClass());
        if (argList == null || argList.isEmpty()) {
            return elementDesc.getMethod(queryType);
        }
        Class[] paramTypes = new Class[argList.size()];
        for (int i = 0; i < paramTypes.length; ++i) {
            paramTypes[i] = String.class;
        }
        return elementDesc.getMethod(queryType, paramTypes);
    }

    protected Element[] getElements(Element[] elements, List<String> queries, boolean isArrayProperty) {
        Element[] targets = elements;
        for (String query : queries) {
            ArrayList<Element> elementList = new ArrayList<Element>();
            for (Element element : targets) {
                if (element == null) {
                    elementList.add(null);
                    continue;
                }
                Element[] childElements = this.getElements(new Element[]{element}, query);
                if (childElements.length == 0 && isArrayProperty) {
                    elementList.add(null);
                    continue;
                }
                for (Element childElement : childElements) {
                    elementList.add(childElement);
                }
            }
            targets = elementList.toArray(new Element[elementList.size()]);
        }
        return targets;
    }

    protected Element[] getElements(Element[] elements, String query) {
        Element[] targets = elements;
        Pattern pattern = Pattern.compile(":eq\\(([0-9]+)\\)|:lt\\(([0-9]+)\\)|:gt\\(([0-9]+)\\)");
        Matcher matcher = pattern.matcher(query);
        StringBuffer buf = new StringBuffer();
        while (matcher.find()) {
            String value = matcher.group();
            matcher.appendReplacement(buf, "");
            if (buf.charAt(buf.length() - 1) != ' ') {
                try {
                    int index = Integer.parseInt(matcher.group(1));
                    ArrayList<Element> elementList = new ArrayList<Element>();
                    String childQuery = buf.toString();
                    for (Element element : targets) {
                        int i;
                        Elements childElements = element.select(childQuery);
                        if (value.startsWith(":eq")) {
                            if (index >= childElements.size()) continue;
                            elementList.add(childElements.get(index));
                            continue;
                        }
                        if (value.startsWith(":lt")) {
                            for (i = 0; i < childElements.size() && i < index; ++i) {
                                elementList.add(childElements.get(i));
                            }
                            continue;
                        }
                        if (!value.startsWith(":gt")) continue;
                        for (i = index + 1; i < childElements.size(); ++i) {
                            elementList.add(childElements.get(i));
                        }
                    }
                    targets = elementList.toArray(new Element[elementList.size()]);
                    buf.setLength(0);
                }
                catch (NumberFormatException e) {
                    logger.warn("Invalid number: " + query, (Throwable)e);
                    buf.append(value);
                }
                continue;
            }
            buf.append(value);
        }
        matcher.appendTail(buf);
        String lastQuery = buf.toString();
        if (StringUtil.isNotBlank((String)lastQuery)) {
            ArrayList<Element> elementList = new ArrayList<Element>();
            for (Element element : targets) {
                if (element == null) {
                    elementList.add(null);
                    continue;
                }
                Elements childElements = element.select(lastQuery);
                for (int i = 0; i < childElements.size(); ++i) {
                    elementList.add(childElements.get(i));
                }
            }
            targets = elementList.toArray(new Element[elementList.size()]);
        }
        return targets;
    }

    protected String trimSpaces(String value, boolean trimSpaces) {
        if (value == null) {
            return null;
        }
        if (trimSpaces) {
            return value.replaceAll("\\s+", " ").trim();
        }
        return value;
    }

    protected void addPropertyData(Map<String, Object> dataMap, String key, Object value) {
        LinkedHashMap<String, Object> currentDataMap = dataMap;
        String[] keys = key.split("\\.");
        for (int i = 0; i < keys.length - 1; ++i) {
            String currentKey = keys[i];
            LinkedHashMap<String, Object> map = (LinkedHashMap<String, Object>)currentDataMap.get(currentKey);
            if (map == null) {
                map = new LinkedHashMap<String, Object>();
                currentDataMap.put(currentKey, map);
            }
            currentDataMap = map;
        }
        currentDataMap.put(keys[keys.length - 1], value);
    }

    protected void storeIndex(ResponseData responseData, Map<String, Object> dataMap) {
        Map arrayDataMap;
        String sessionId = responseData.getSessionId();
        String indexName = this.riverConfig.getIndexName(sessionId);
        String typeName = this.riverConfig.getTypeName(sessionId);
        boolean overwrite = this.riverConfig.isOverwrite(sessionId);
        Client client = this.riverConfig.getClient();
        if (logger.isDebugEnabled()) {
            logger.debug("Index: " + indexName + ", sessionId: " + sessionId + ", Data: " + dataMap);
        }
        if (overwrite) {
            client.prepareDeleteByQuery(new String[]{indexName}).setQuery((QueryBuilder)QueryBuilders.termQuery((String)"url", (String)responseData.getUrl())).execute().actionGet();
            client.admin().indices().prepareRefresh(new String[]{indexName}).execute().actionGet();
        }
        if ((arrayDataMap = (Map)dataMap.remove(ARRAY_PROPERTY_PREFIX)) != null) {
            LinkedHashMap<String, Object> flatArrayDataMap = new LinkedHashMap<String, Object>();
            this.convertFlatMap("", arrayDataMap, flatArrayDataMap);
            int maxSize = 0;
            for (Map.Entry entry : flatArrayDataMap.entrySet()) {
                int size;
                Object value = entry.getValue();
                if (!(value instanceof List) || (size = ((List)value).size()) <= maxSize) continue;
                maxSize = size;
            }
            for (int i = 0; i < maxSize; ++i) {
                LinkedHashMap<String, Object> newDataMap = new LinkedHashMap<String, Object>();
                newDataMap.put(POSITION_FIELD, i);
                this.deepCopy(dataMap, newDataMap);
                for (Map.Entry entry : flatArrayDataMap.entrySet()) {
                    Object value = entry.getValue();
                    if (value instanceof List) {
                        List list = (List)value;
                        if (i >= list.size()) continue;
                        this.addPropertyData(newDataMap, (String)entry.getKey(), list.get(i));
                        continue;
                    }
                    if (i != 0) continue;
                    this.addPropertyData(newDataMap, (String)entry.getKey(), value);
                }
                this.storeIndex(client, indexName, typeName, newDataMap);
            }
        } else {
            this.storeIndex(client, indexName, typeName, dataMap);
        }
    }

    protected void storeIndex(Client client, String indexName, String typeName, Map<String, Object> dataMap) {
        dataMap.put(TIMESTAMP_FIELD, new Date());
        if (logger.isDebugEnabled()) {
            logger.debug(indexName + "/" + typeName + " : dataMap" + dataMap);
        }
        try {
            client.prepareIndex(indexName, typeName).setRefresh(true).setSource(XContentFactory.jsonBuilder().value(dataMap)).execute().actionGet();
        }
        catch (Exception e) {
            logger.warn("Could not write a content into index.", (Throwable)e);
        }
    }

    protected void deepCopy(Map<String, Object> oldMap, Map<String, Object> newMap) {
        LinkedHashMap<String, Object> flatMap = new LinkedHashMap<String, Object>();
        this.convertFlatMap("", oldMap, flatMap);
        for (Map.Entry entry : flatMap.entrySet()) {
            this.addPropertyData(newMap, (String)entry.getKey(), entry.getValue());
        }
    }

    protected void convertFlatMap(String prefix, Map<String, Object> oldMap, Map<String, Object> newMap) {
        for (Map.Entry<String, Object> entry : oldMap.entrySet()) {
            Object value = entry.getValue();
            if (value instanceof Map) {
                this.convertFlatMap(prefix + entry.getKey() + ".", (Map)value, newMap);
                continue;
            }
            newMap.put(prefix + entry.getKey(), value);
        }
    }

    protected void storeChildUrls(ResponseData responseData, ResultData resultData) {
        Set<String> childLinkSet = this.childUrlSetLocal.get();
        if (childLinkSet != null) {
            List<RequestData> requestDataList = new ArrayList();
            for (String childUrl : childLinkSet) {
                requestDataList.add(RequestDataBuilder.newRequestData().get().url(childUrl).build());
            }
            requestDataList = this.convertChildUrlList(requestDataList);
            resultData.addAllUrl(requestDataList);
            RequestData requestData = responseData.getRequestData();
            resultData.removeUrl(requestData);
            resultData.removeUrl(this.getDuplicateUrl(requestData));
        } else {
            super.storeChildUrls(responseData, resultData);
        }
    }

    public Object getData(AccessResultData accessResultData) {
        return null;
    }

    private static class ScriptInfo {
        private String script;
        private String lang;
        private String scriptType;

        ScriptInfo(String script) {
            this(script, "groovy", "inline");
        }

        ScriptInfo(String script, String lang, String scriptType) {
            this.script = script;
            this.lang = lang;
            this.scriptType = scriptType;
        }

        public String getScript() {
            return this.script;
        }

        public String getLang() {
            return this.lang;
        }

        public String getScriptType() {
            return this.scriptType;
        }
    }
}

