/*
 * Copyright 2020 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.genesys.taxonomy.gringlobal.component;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.bean.CsvToBean;
import com.opencsv.bean.CsvToBeanBuilder;
import com.opencsv.bean.HeaderColumnNameMappingStrategy;
import com.opencsv.processor.RowProcessor;

/**
 * The helper class to read USDA-published CAB data.
 */
public class CabReader {

	/**
	 *  Date format used in the Taxonomy CSVs: 2000-12-08 00:00:00.
	 */ 
	public static final String CSV_DATE_FORMAT = "yyyy-MM-dd' 'HH:mm:ss";

	/**
	 * Handle BOM marker in CSV files.
	 * 
	 * @param inputStream the input stream
	 * @throws IOException if reading fails
	 * @return a BOM-safe input stream
	 */
	public static InputStreamReader bomSafeReader(InputStream inputStream) throws IOException {
		String defaultEncoding = "UTF-8";
		BOMInputStream bOMInputStream = new BOMInputStream(inputStream);
		ByteOrderMark bom = bOMInputStream.getBOM();
		String charsetName = bom == null ? defaultEncoding : bom.getCharsetName();
		InputStreamReader reader = new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName);
		return reader;
	}

	/**
	 * Bean reader.
	 *
	 * @param <T> the generic type
	 * @param clazz the clazz
	 * @param reader the reader
	 * @return the csv to bean
	 */
	public static <T> CsvToBean<T> beanReader(Class<T> clazz, CSVReader reader) {
		HeaderColumnNameMappingStrategy<T> headerMapping = new HeaderColumnNameMappingStrategy<T>();
		headerMapping.setType(clazz);
		return new CsvToBeanBuilder<T>(reader).withMappingStrategy(headerMapping).build();
	}

	/**
	 * Returns a {@link CSVReader} that properly handles GRIN-Global taxonomy files in CSV format. It converts \N to <code>null</code> in each row.
	 *
	 * @param inputStream the input stream
	 * @param startAt the start row
	 * @return the CSV reader
	 * @throws IOException I/O exception
	 */
	public static CSVReader openCsvReader(InputStream inputStream, int startAt) throws IOException {

		var parser = new CSVParserBuilder()
			.withSeparator('\t')
			.withQuoteChar('"')
			.withEscapeChar((char) 0)
			.withStrictQuotes(false)
			.withIgnoreQuotations(true)
			.build();

		CSVReader csvReader = new CSVReaderBuilder(bomSafeReader(inputStream))
			.withSkipLines(startAt)
			.withCSVParser(parser)
			.withRowProcessor(new RowProcessor() {
				@Override
				public void processRow(String[] row) {
					for (var i = 0; i<row.length; i++) {
						row[i] = processColumnItem(row[i]);
					}
				}
				@Override
				public String processColumnItem(String column) {
					if ("\\N".equals(column)) {
						return null;
					}
					return column;
				}
			})
			.build();
	
		return csvReader;
	}

}
