/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.transformer.impl;

import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.HashMap;
import javax.annotation.Resource;
import org.codelibs.fess.crawler.Constants;
import org.codelibs.fess.crawler.container.CrawlerContainer;
import org.codelibs.fess.crawler.entity.AccessResultData;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.extractor.Extractor;
import org.codelibs.fess.crawler.extractor.ExtractorFactory;
import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TextTransformer
extends AbstractTransformer {
    private static final Logger logger = LoggerFactory.getLogger(TextTransformer.class);
    @Resource
    protected CrawlerContainer crawlerContainer;
    protected String charsetName = "UTF-8";

    @Override
    public ResultData transform(ResponseData responseData) {
        if (responseData == null || !responseData.hasResponseBody()) {
            throw new CrawlingAccessException("No response body.");
        }
        ExtractorFactory extractorFactory = (ExtractorFactory)this.crawlerContainer.getComponent("extractorFactory");
        if (extractorFactory == null) {
            throw new CrawlerSystemException("Could not find extractorFactory.");
        }
        Extractor extractor = extractorFactory.getExtractor(responseData.getMimeType());
        HashMap<String, String> params = new HashMap<String, String>();
        params.put("resourceName", this.getResourceName(responseData));
        params.put("Content-Type", responseData.getMimeType());
        String content = null;
        try (InputStream in = responseData.getResponseBody();){
            content = extractor.getText(in, params).getContent();
        }
        catch (Exception e) {
            throw new CrawlingAccessException("Could not extract data.", e);
        }
        ResultData resultData = new ResultData();
        resultData.setTransformerName(this.getName());
        try {
            resultData.setData(content.getBytes(this.charsetName));
        }
        catch (UnsupportedEncodingException e) {
            if (logger.isInfoEnabled()) {
                logger.info("Invalid charsetName: " + this.charsetName + ". Changed to UTF-8", (Throwable)e);
            }
            this.charsetName = Constants.UTF_8_CHARSET.name();
            resultData.setData(content.getBytes(Constants.UTF_8_CHARSET));
        }
        resultData.setEncoding(this.charsetName);
        return resultData;
    }

    @Override
    public Object getData(AccessResultData<?> accessResultData) {
        if (!this.getName().equals(accessResultData.getTransformerName())) {
            throw new CrawlerSystemException("Transformer is invalid. Use " + accessResultData.getTransformerName() + ". This transformer is " + this.getName() + ".");
        }
        byte[] data = accessResultData.getData();
        if (data == null) {
            return null;
        }
        try {
            return new String(data, this.charsetName);
        }
        catch (UnsupportedEncodingException e) {
            throw new CrawlingAccessException("Unsupported encoding: " + this.charsetName, e);
        }
    }

    private String getResourceName(ResponseData responseData) {
        String name = responseData.getUrl();
        String enc = responseData.getCharSet();
        if (name == null || enc == null) {
            return null;
        }
        int idx = (name = name.replaceAll("/+$", "")).lastIndexOf(47);
        if (idx >= 0) {
            name = name.substring(idx + 1);
        }
        try {
            return URLDecoder.decode(name, enc);
        }
        catch (UnsupportedEncodingException e) {
            return name;
        }
    }

    public String getCharsetName() {
        return this.charsetName;
    }

    public void setCharsetName(String charsetName) {
        this.charsetName = charsetName;
    }
}

