/*******************************************************************************
 *     Copyright 2016-2017 the original author or authors.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 *******************************************************************************/
package pro.parseq.vcf.types;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import pro.parseq.vcf.fields.Filter;
import pro.parseq.vcf.fields.Format;
import pro.parseq.vcf.fields.Information;
import pro.parseq.vcf.fields.types.Genotype;
import pro.parseq.vcf.utils.VcfGrammar;

/**
 * Represents VCF data line
 * 
 * @author Alexander Afanasyev <a href="mailto:aafanasyev@parseq.pro">aafanasyev@parseq.pro</a>
 */
public class DataLine extends VcfLine {

	private static final Logger logger = LoggerFactory.getLogger(DataLine.class);

	// Genetic variants described by this data line
	private List<Variant> variants = new ArrayList<>();

	/**
	 * Parses input VCF line against presented meta-information and constructs genetic variants described in the input line
	 * 
	 * @param line {@link String} representing VCF data line
	 * @param metadata Meta-information to create genetic variant against
	 */
	public DataLine(String line, VcfFile metadata) {

		super(line);
		if (!isValid()) {
			logger.error("Malformed data line found: {}", line);
			return;
		}

		String[] columns = line.split(VcfGrammar.COLUMN_DELIMITER);
		if (columns.length != VcfGrammar.MANDATORY_COLUMNS_NUMBER
				&& columns.length != VcfGrammar.MANDATORY_COLUMNS_NUMBER
						+ metadata.getSampleNames().size() + 1) {
			logger.error("Illegal column numbers {} for line: {}",
					columns.length, line);
			// TODO: proper handle logic to implement
			return;
		}

		// CHROM, POS, REF, QUAL
		String chrom = columns[VcfGrammar.CHROM_COLUMN_IDX],
				ref = columns[VcfGrammar.REF_COLUMN_IDX];
		long pos = Long.parseLong(columns[VcfGrammar.POS_COLUMN_IDX]);
		Double qual = null;
		if (!columns[VcfGrammar.QUAL_COLUMN_IDX].equals(VcfGrammar.MISSING_VALUE)) {
			qual = Double.parseDouble(columns[VcfGrammar.QUAL_COLUMN_IDX]);
		}
		// ID
		List<String> ids = new ArrayList<>();
		if (!columns[VcfGrammar.ID_COLUMN_IDX].equals(VcfGrammar.MISSING_VALUE)) {
			String[] idTokens = columns[VcfGrammar.ID_COLUMN_IDX]
					.split(VcfGrammar.ID_DELIMITER);
			ids = Arrays.asList(idTokens);
		}
		// ALT
		String[] altTokens = columns[VcfGrammar.ALT_COLUMN_IDX]
				.split(VcfGrammar.ALLELE_DELIMITER);
		// FILTER
		boolean filtered = false;
		List<Filter> filters = new ArrayList<>();
		if (!(columns[VcfGrammar.FILTER_COLUMN_IDX].equals(VcfGrammar.MISSING_VALUE) 
				|| columns[VcfGrammar.FILTER_COLUMN_IDX].equals(VcfGrammar.FILTER_PASSED))) {

			filtered = true;
			String[] filterTokens = columns[VcfGrammar.FILTER_COLUMN_IDX]
					.split(VcfGrammar.FILTER_DELIMITER);
			for (String filterToken: filterTokens) {

				Filter filter = metadata.getFilters().get(filterToken);
				if (filter == null) {
					logger.error("Unknown filter {} found in line: {}", filterToken, line);
					return;
				}

				filters.add(filter);
			}
		}
		// INFO
		String[] infoTokens = columns[VcfGrammar.INFO_COLUMN_IDX]
				.split(VcfGrammar.INFO_DELIMITER);
		Map<String, List<? extends Serializable>> infos = new HashMap<>();
		for (String infoToken: infoTokens) {

			String[] infoKeyValueTokens = infoToken.split(VcfGrammar.INFO_KEY_VALUE_DELIMITER);
			Information info = metadata.getInfos().get(infoKeyValueTokens[0]);
			if (info == null) {
				logger.error("Unknown info key {} found in line: {}",
						infoKeyValueTokens[0], line);
				// TODO: proper handle logic to implement
				return;
			}

			if (infoKeyValueTokens.length == 1) {
				infos.put(info.getId(), new ArrayList<>());
			} else {
				infos.put(info.getId(), info.getValue(infoKeyValueTokens[1]));
			}
		}

		Map<String, Map<String, List<? extends Serializable>>> sampleFormats = new HashMap<>();
		if (columns.length > VcfGrammar.MANDATORY_COLUMNS_NUMBER) {
			// FORMAT
			List<Format> formats = new ArrayList<>();
			String[] formatTokens = columns[VcfGrammar.FORMAT_COLUMN_IDX]
					.split(VcfGrammar.FORMAT_DELIMITER);
			for (String formatToken: formatTokens) {

				Format format = metadata.getFormats().get(formatToken);
				if (format == null) {
					logger.error("Unknown format id {} found in line: {}",
							formatToken, line);
					// TODO: proper handle logic to implement
					return;
				}

				formats.add(format);
			}
			// SAMPLES
			for (int colIdx = VcfGrammar.MANDATORY_COLUMNS_NUMBER + 1, sampleIdx = 0; colIdx < columns.length; ++colIdx, ++sampleIdx) {

				String[] sampleFormatTokens = columns[colIdx]
						.split(VcfGrammar.FORMAT_DELIMITER);
				if (sampleFormatTokens.length != formats.size()) {
					logger.error("Illegal sample details {} for {} in line: {}",
							columns[colIdx], columns[VcfGrammar.FORMAT_COLUMN_IDX], line);
					// TODO: proper handle logic to implement
					return;
				}
				Map<String, List<? extends Serializable>> sampleFormat = new HashMap<>();
				for (int i = 0; i < sampleFormatTokens.length; ++i) {

					Format format = formats.get(i);
					sampleFormat.put(format.getId(), format.getValue(sampleFormatTokens[i]));
				}

				sampleFormats.put(metadata.getSampleNames().get(sampleIdx), sampleFormat);
			}
		}
		// Genetic variants constructing
		for (int i = 0; i < altTokens.length; ++i) {

			Variant variant = new Variant();
			variant.setChrom(chrom);
			variant.setPos(pos);
			variant.setIds(ids);
			variant.setRef(ref);
			variant.setAlt(altTokens[i]);
			variant.setQual(qual);
			variant.setFiltered(filtered);
			variant.setFilters(filters);
			variant.setInfo(infos);
			variant.setFormats(sampleFormats);
			// 0 if for reference allele
			variant.setAllele(i + 1);
			// Genotype information parsing
			Map<String, Genotype> genotypes = new HashMap<>();
			for (String sampleName: sampleFormats.keySet()) {

				List<? extends Serializable> gt = sampleFormats.get(sampleName)
						.get(VcfGrammar.GENOTYPE_FIELD);
				if (gt == null) {
					genotypes.put(sampleName, Genotype.UNDEFINED);
				}
				@SuppressWarnings("unchecked")
				String genotype = ((List<String>) gt).get(0);
				if (genotype == null) {
					genotypes.put(sampleName, Genotype.UNDEFINED);
				} else {
					genotypes.put(sampleName, Genotype.extractFor(i + 1, genotype));
				}
			}
			variant.setGenotypes(genotypes);

			variants.add(variant);
		}
	}

	public List<Variant> getVariants() {
		return variants;
	}

	@Override
	protected Pattern getPattern() {
		return VcfGrammar.dataLinePattern;
	}
}
