/*
 * Copyright 2017 Global Crop Diversity Trust
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.genesys.taxonomy.download;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import dorkbox.cabParser.CabException;
import dorkbox.cabParser.CabParser;
import dorkbox.cabParser.CabStreamSaver;
import dorkbox.cabParser.structure.CabFileEntry;

/**
 * Download and extract GRIN-Taxa CAB files
 * 
 * @author Matija Obreza
 */
public class TaxonomyDownloader {
	private final static Logger LOG = LoggerFactory.getLogger(TaxonomyDownloader.class);


	/** Location of the latest GRIN Taxonomy database published by NPGS */
	public static final String DEFAULT_GRIN_TAXA_CAB_URL = "https://npgsweb.ars-grin.gov/gringlobal/uploads/documents/taxonomy_data.cab";

	private HttpClientBuilder httpClientBuilder;

	/**
	 * Download GRIN Taxonomy database
	 * @param args program arguments
	 */
	public static void main(String[] args) {
		File destinationDir = new File("taxonomy_data");
		if (!destinationDir.exists())
			destinationDir.mkdir();

		LOG.info("Downloading GRIN-Taxonomy database to {}", destinationDir.getAbsolutePath());
		TaxonomyDownloader dl = new TaxonomyDownloader();

		try {
			File downloadedCabFile = new File("taxonomy_data.cab");

			if (!downloadedCabFile.exists()) {
				dl.downloadCurrent(downloadedCabFile);
			} else if (!downloadedCabFile.canRead()) {
				downloadedCabFile = File.createTempFile("grin-", ".cab");
				dl.downloadCurrent(downloadedCabFile);
			}

			unpackCabinetFile(downloadedCabFile, destinationDir, false);

		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * Initialize the HTTP httpClientBuilder
	 */
	public TaxonomyDownloader() {
		this.httpClientBuilder = HttpClientBuilder.create().disableCookieManagement();
	}

	/**
	 * Set HTTP Proxy host
	 * @param proxyHost host address and port
	 */
	public void setHttpProxy(String proxyHost) {
		if (proxyHost != null && proxyHost.trim().length() > 0) {
			HttpHost proxy = new HttpHost(proxyHost);
			LOG.info("Using HTTP proxy {}", proxy.getHostName());
			httpClientBuilder.setProxy(proxy);
		}
	}

	/**
	 * Download current GRIN Taxonomy .cab file to {@code outputFile}.
	 *
	 * @param outputFile target file
	 * @throws IOException if the database cannot be downloaded
	 */
	public void downloadCurrent(File outputFile) throws IOException {
		LOG.info("Downloading {} to {}", DEFAULT_GRIN_TAXA_CAB_URL, outputFile.getAbsolutePath());
		try (OutputStream output = new BufferedOutputStream(new FileOutputStream(outputFile))) {
			fetchUrlToStream(DEFAULT_GRIN_TAXA_CAB_URL, output);
			output.flush();

			LOG.info("Downloaded file size: {}", outputFile.length());
		}
	}

	private void fetchUrlToStream(String grinTaxaCabUrl, OutputStream output) throws IOException {
		try (CloseableHttpClient httpClient = httpClientBuilder.build()) {
			HttpGet httpGet = new HttpGet(grinTaxaCabUrl);

			RequestConfig config = RequestConfig.custom().setCircularRedirectsAllowed(false).setConnectTimeout(30 * 1000).setSocketTimeout(60 * 1000).build();
			httpGet.setConfig(config);

			try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
				// The underlying HTTP connection is still held by the response object
				// to allow the response content to be streamed directly from the network socket.
				// In order to ensure correct deallocation of system resources
				// the user MUST call CloseableHttpResponse#close() from a finally clause.
				// Please note that if response content is not fully consumed the underlying
				// connection cannot be safely re-used and will be shut down and discarded
				// by the connection manager.
				LOG.info("Status: {}", response.getStatusLine());
				HttpEntity responseEntity = response.getEntity();
				LOG.info("Content-Type: {}", responseEntity.getContentType());
				LOG.info("Content-Length: {}", responseEntity.getContentLength());

				IOUtils.copy(responseEntity.getContent(), output);

				// do something useful with the response body
				// and ensure it is fully consumed
				EntityUtils.consume(responseEntity);
			}
		}
	}

	/**
	 * Unpack .cab file to destination folder.
	 * 
	 * @param downloadedCabFile .cab file
	 * @param destinationDir target folder
	 * @param allowOverwrite {@code true} if files can be overwritten
	 * @throws IOException when things go wrong
	 */
	public static void unpackCabinetFile(File downloadedCabFile, File destinationDir, final boolean allowOverwrite) throws IOException {
		LOG.info("Unpacking CAB file {}", downloadedCabFile.getAbsolutePath());

		CabStreamSaver streamSaver = new CabStreamSaver() {
			@Override
			public boolean saveReservedAreaData(byte[] data, int dataLength) {
				return false;
			}

			@Override
			public OutputStream openOutputStream(CabFileEntry cabFile) {
				// String name = cabFile.getName();
				File outputFile = new File(destinationDir, cabFile.getName());
				LOG.info("Extracting file {} size={} to {}", cabFile.getName(), cabFile.getSize(), outputFile.getAbsolutePath());

				if (outputFile.exists() && !allowOverwrite) {
					LOG.info("Refusing to overwrite existing file at {}", outputFile.getAbsolutePath());
					return null;
				}

				try {
					return new BufferedOutputStream(new FileOutputStream(outputFile));
				} catch (FileNotFoundException e) {
					e.printStackTrace();
					return null;
				}
			}

			@Override
			public void closeOutputStream(OutputStream outputStream, CabFileEntry cabFile) {
				if (outputStream != null) {
					try {
						outputStream.close();
					} catch (IOException ignored) {
					}
				}
			}
		};

		CabParser cabParser = null;

		try (InputStream inputStream = new BufferedInputStream(new FileInputStream(downloadedCabFile))) {
			try {
				cabParser = new CabParser(inputStream, streamSaver);
				// for (CabFileEntry file : cabParser.files) {
				// System.err.println(file);
				// }

				cabParser.extractStream();

			} catch (CabException e) {
				LOG.info(e.getMessage());
				e.printStackTrace();
				throw new IOException(e);
			}
		}
	}

}
