/*******************************************************************************
 *     Copyright 2016-2017 the original author or authors.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 *******************************************************************************/
package pro.parseq.vcf.fields.types;

import java.util.Arrays;
import java.util.List;
import java.util.function.Predicate;

import pro.parseq.vcf.utils.VcfGrammar;

/**
 * Enumeration for possible zygosity values
 * 
 * @author aafanasyev
 */
public enum Genotype {

	UNDEFINED,
	HOMOZYGOUS_REFERENCE,
	HETEROZYGOUS_WITH_REFERENCE,
	HETEROZYGOUS_WITH_VARIANT,
	HOMOZYGOUS_VARIANT,
	HEMIZYGOUS_VARIANT,
	HEMIZYGOUS_REFERENCE;

	/**
	 * Allows to extract zygosity value from genotype format field (see <a href="https://samtools.github.io/hts-specs/VCFv4.2.pdf">VCFv4.2 specification</a> for more details)
	 * 
	 * @param alleleNumber Allele number to extract zygosity information for (0 is for reference allele, 1,2.. are for alternate specified in ALT column)
	 * @param genotype Genotype format field value to extract zygosity information from
	 * @return {@link Genotype} for specified allele number
	 */
	public static final Genotype extractFor(int alleleNumber, String genotype) {

		List<String> alleles = Arrays.asList(genotype
				.split(VcfGrammar.GENOTYPE_DELIMITER));
		Predicate<String> isVariantAllele = new Predicate<String>() {

			@Override
			public boolean test(String allele) {
				return allele.equals(String.valueOf(alleleNumber));
			}
		};

		if (alleles.stream().allMatch(isUndefinedAllele)) {
			return Genotype.UNDEFINED;
		}
		if (alleles.stream().anyMatch(isUndefinedAllele)) {

			if (alleles.stream().anyMatch(isVariantAllele)) {
				return HEMIZYGOUS_VARIANT;
			}
			if (alleles.stream().anyMatch(isReferenceAllele)) {
				return HEMIZYGOUS_REFERENCE;
			}
		}
		if (alleles.stream().allMatch(isReferenceAllele)) {
			return HOMOZYGOUS_REFERENCE;
		}
		if (alleles.stream().anyMatch(isReferenceAllele)) {
			if (alleles.stream().anyMatch(isVariantAllele)) {
				return HETEROZYGOUS_WITH_REFERENCE;
			}
		}
		if (alleles.stream().allMatch(isVariantAllele)) {
			return Genotype.HOMOZYGOUS_VARIANT;
		}
		if (alleles.stream().anyMatch(isVariantAllele)) {
			return HETEROZYGOUS_WITH_VARIANT;
		}

		return Genotype.UNDEFINED;
	}

	private static final Predicate<String> isReferenceAllele = new Predicate<String>() {

		@Override
		public boolean test(String allele) {
			return allele.equals(VcfGrammar.REFERENCE_ALLELE);
		}
	};

	private static final Predicate<String> isUndefinedAllele = new Predicate<String>() {

		@Override
		public boolean test(String allele) {
			return allele.equals(VcfGrammar.MISSING_VALUE);
		}
	};
}
