/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pinot.segment.local.recordtransformer;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.base.Preconditions;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.pinot.common.metrics.AbstractMetrics;
import org.apache.pinot.common.metrics.ServerGauge;
import org.apache.pinot.common.metrics.ServerMeter;
import org.apache.pinot.common.metrics.ServerMetrics;
import org.apache.pinot.segment.local.recordtransformer.ExtraFieldsContainer;
import org.apache.pinot.segment.local.recordtransformer.RecordTransformer;
import org.apache.pinot.segment.local.recordtransformer.SchemaConformingTransformer;
import org.apache.pinot.segment.local.recordtransformer.SchemaTreeNode;
import org.apache.pinot.segment.local.utils.Base64Utils;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.ingestion.SchemaConformingTransformerV2Config;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.metrics.PinotMeter;
import org.apache.pinot.spi.stream.StreamDataDecoderImpl;
import org.apache.pinot.spi.utils.JsonUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SchemaConformingTransformerV2
implements RecordTransformer {
    private static final Logger _logger = LoggerFactory.getLogger(SchemaConformingTransformerV2.class);
    private static final int MAXIMUM_LUCENE_DOCUMENT_SIZE = 32766;
    private static final String MIN_DOCUMENT_LENGTH_DESCRIPTION = "key length + `:` + shingle index overlap length + one non-overlap char";
    private final boolean _continueOnError;
    private final SchemaConformingTransformerV2Config _transformerConfig;
    private final FieldSpec.DataType _indexableExtrasFieldType;
    private final FieldSpec.DataType _unindexableExtrasFieldType;
    private final DimensionFieldSpec _mergedTextIndexFieldSpec;
    @Nullable
    ServerMetrics _serverMetrics = null;
    private SchemaTreeNode _schemaTree;
    @Nullable
    private PinotMeter _realtimeMergedTextIndexTruncatedDocumentSizeMeter = null;
    private String _tableName;
    private long _mergedTextIndexDocumentBytesCount = 0L;
    private long _mergedTextIndexDocumentCount = 0L;

    public SchemaConformingTransformerV2(TableConfig tableConfig, Schema schema) {
        if (null == tableConfig.getIngestionConfig() || null == tableConfig.getIngestionConfig().getSchemaConformingTransformerV2Config()) {
            this._continueOnError = false;
            this._transformerConfig = null;
            this._indexableExtrasFieldType = null;
            this._unindexableExtrasFieldType = null;
            this._mergedTextIndexFieldSpec = null;
            return;
        }
        this._continueOnError = tableConfig.getIngestionConfig().isContinueOnError();
        this._transformerConfig = tableConfig.getIngestionConfig().getSchemaConformingTransformerV2Config();
        String indexableExtrasFieldName = this._transformerConfig.getIndexableExtrasField();
        this._indexableExtrasFieldType = indexableExtrasFieldName == null ? null : SchemaConformingTransformer.getAndValidateExtrasFieldType(schema, indexableExtrasFieldName);
        String unindexableExtrasFieldName = this._transformerConfig.getUnindexableExtrasField();
        this._unindexableExtrasFieldType = unindexableExtrasFieldName == null ? null : SchemaConformingTransformer.getAndValidateExtrasFieldType(schema, unindexableExtrasFieldName);
        this._mergedTextIndexFieldSpec = schema.getDimensionSpec(this._transformerConfig.getMergedTextIndexField());
        this._tableName = tableConfig.getTableName();
        this._schemaTree = SchemaConformingTransformerV2.validateSchemaAndCreateTree(schema, this._transformerConfig);
        this._serverMetrics = ServerMetrics.get();
    }

    public static void validateSchema(@Nonnull Schema schema, @Nonnull SchemaConformingTransformerV2Config transformerConfig) {
        String unindexableExtrasFieldName;
        SchemaConformingTransformerV2.validateSchemaFieldNames(schema.getPhysicalColumnNames(), transformerConfig);
        String indexableExtrasFieldName = transformerConfig.getIndexableExtrasField();
        if (null != indexableExtrasFieldName) {
            SchemaConformingTransformer.getAndValidateExtrasFieldType(schema, indexableExtrasFieldName);
        }
        if (null != (unindexableExtrasFieldName = transformerConfig.getUnindexableExtrasField())) {
            SchemaConformingTransformer.getAndValidateExtrasFieldType(schema, indexableExtrasFieldName);
        }
        SchemaConformingTransformerV2.validateSchemaAndCreateTree(schema, transformerConfig);
    }

    public static boolean base64ValueFilter(byte[] bytes, int minLength) {
        return bytes.length >= minLength && Base64Utils.isBase64IgnoreTrailingPeriods(bytes);
    }

    private static void validateSchemaFieldNames(Set<String> schemaFields, SchemaConformingTransformerV2Config transformerConfig) {
        Set fieldPathsToDrop;
        String unindexableFieldSuffix = transformerConfig.getUnindexableFieldSuffix();
        if (null != unindexableFieldSuffix) {
            for (String field : schemaFields) {
                Preconditions.checkState((!field.endsWith(unindexableFieldSuffix) ? 1 : 0) != 0, (String)"Field '%s' has no-index suffix '%s'", (Object)field, (Object)unindexableFieldSuffix);
            }
        }
        if (null != (fieldPathsToDrop = transformerConfig.getFieldPathsToDrop())) {
            HashSet<String> fieldIntersection = new HashSet<String>(schemaFields);
            fieldIntersection.retainAll(fieldPathsToDrop);
            Preconditions.checkState((boolean)fieldIntersection.isEmpty(), (Object)"Fields in schema overlap with fieldPathsToDrop");
        }
    }

    private static SchemaTreeNode validateSchemaAndCreateTree(@Nonnull Schema schema, @Nonnull SchemaConformingTransformerV2Config transformerConfig) throws IllegalArgumentException {
        TreeSet schemaFields = schema.getPhysicalColumnNames();
        HashMap<String, String> jsonKeyPathToColumnNameMap = new HashMap<String, String>();
        for (Map.Entry entry : transformerConfig.getColumnNameToJsonKeyPathMap().entrySet()) {
            String columnName = (String)entry.getKey();
            String jsonKeyPath = (String)entry.getValue();
            schemaFields.remove(columnName);
            schemaFields.add(jsonKeyPath);
            jsonKeyPathToColumnNameMap.put(jsonKeyPath, columnName);
        }
        SchemaTreeNode rootNode = new SchemaTreeNode("", null, schema);
        ArrayList<String> subKeys = new ArrayList<String>();
        for (String field : schemaFields) {
            SchemaTreeNode currentNode = rootNode;
            int keySeparatorIdx = field.indexOf(".");
            if (-1 == keySeparatorIdx) {
                currentNode = rootNode.getAndCreateChild(field, schema);
            } else {
                subKeys.clear();
                SchemaConformingTransformer.getAndValidateSubKeys(field, keySeparatorIdx, subKeys);
                for (String subKey : subKeys) {
                    SchemaTreeNode childNode;
                    currentNode = childNode = currentNode.getAndCreateChild(subKey, schema);
                }
            }
            currentNode.setColumn((String)jsonKeyPathToColumnNameMap.get(field));
        }
        return rootNode;
    }

    @Override
    public boolean isNoOp() {
        return null == this._transformerConfig;
    }

    @Override
    @Nullable
    public GenericRow transform(GenericRow record) {
        GenericRow outputRecord = new GenericRow();
        HashMap<String, Object> mergedTextIndexMap = new HashMap<String, Object>();
        try {
            ArrayDeque<String> jsonPath = new ArrayDeque<String>();
            ExtraFieldsContainer extraFieldsContainer = new ExtraFieldsContainer(null != this._transformerConfig.getUnindexableExtrasField());
            for (Map.Entry recordEntry : record.getFieldToValueMap().entrySet()) {
                String recordKey = (String)recordEntry.getKey();
                Object recordValue = recordEntry.getValue();
                jsonPath.addLast(recordKey);
                ExtraFieldsContainer currentFieldsContainer = this.processField(this._schemaTree, jsonPath, recordValue, true, outputRecord, mergedTextIndexMap);
                extraFieldsContainer.addChild(currentFieldsContainer);
                jsonPath.removeLast();
            }
            this.putExtrasField(this._transformerConfig.getIndexableExtrasField(), this._indexableExtrasFieldType, extraFieldsContainer.getIndexableExtras(), outputRecord);
            this.putExtrasField(this._transformerConfig.getUnindexableExtrasField(), this._unindexableExtrasFieldType, extraFieldsContainer.getUnindexableExtras(), outputRecord);
            if (null != this._mergedTextIndexFieldSpec && !mergedTextIndexMap.isEmpty()) {
                List<String> luceneDocuments = this.getLuceneDocumentsFromMergedTextIndexMap(mergedTextIndexMap);
                if (this._mergedTextIndexFieldSpec.isSingleValueField()) {
                    outputRecord.putValue(this._transformerConfig.getMergedTextIndexField(), (Object)String.join((CharSequence)" ", luceneDocuments));
                } else {
                    outputRecord.putValue(this._transformerConfig.getMergedTextIndexField(), luceneDocuments);
                }
            }
        }
        catch (Exception e) {
            if (!this._continueOnError) {
                throw e;
            }
            _logger.error("Couldn't transform record: {}", (Object)record.toString(), (Object)e);
            outputRecord.putValue("$INCOMPLETE_RECORD_KEY$", (Object)true);
        }
        return outputRecord;
    }

    private ExtraFieldsContainer processField(SchemaTreeNode parentNode, Deque<String> jsonPath, Object value, boolean isIndexable, GenericRow outputRecord, Map<String, Object> mergedTextIndexMap) {
        boolean storeIndexableExtras = this._transformerConfig.getIndexableExtrasField() != null;
        boolean storeUnindexableExtras = this._transformerConfig.getUnindexableExtrasField() != null;
        String key = jsonPath.peekLast();
        ExtraFieldsContainer extraFieldsContainer = new ExtraFieldsContainer(storeUnindexableExtras);
        if (StreamDataDecoderImpl.isSpecialKeyType((String)key) || GenericRow.isSpecialKeyType((String)key)) {
            outputRecord.putValue(key, value);
            return extraFieldsContainer;
        }
        String keyJsonPath = String.join((CharSequence)".", jsonPath);
        if (this._transformerConfig.getFieldPathsToPreserveInput().contains(keyJsonPath)) {
            outputRecord.putValue(keyJsonPath, value);
            return extraFieldsContainer;
        }
        Set fieldPathsToDrop = this._transformerConfig.getFieldPathsToDrop();
        if (null != fieldPathsToDrop && fieldPathsToDrop.contains(keyJsonPath)) {
            return extraFieldsContainer;
        }
        SchemaTreeNode currentNode = parentNode == null ? null : parentNode.getChild(key);
        String unindexableFieldSuffix = this._transformerConfig.getUnindexableFieldSuffix();
        boolean bl = isIndexable = isIndexable && (null == unindexableFieldSuffix || !key.endsWith(unindexableFieldSuffix));
        if (!(value instanceof Map)) {
            if (!isIndexable) {
                extraFieldsContainer.addUnindexableEntry(key, value);
            } else if (null != currentNode && currentNode.isColumn()) {
                outputRecord.putValue(currentNode.getColumnName(), currentNode.getValue(value));
                if (this._transformerConfig.getFieldsToDoubleIngest().contains(keyJsonPath)) {
                    extraFieldsContainer.addIndexableEntry(key, value);
                }
                mergedTextIndexMap.put(keyJsonPath, value);
            } else if (storeIndexableExtras) {
                extraFieldsContainer.addIndexableEntry(key, value);
                mergedTextIndexMap.put(keyJsonPath, value);
            }
            return extraFieldsContainer;
        }
        Map valueAsMap = (Map)value;
        for (Map.Entry entry : valueAsMap.entrySet()) {
            jsonPath.addLast((String)entry.getKey());
            ExtraFieldsContainer childContainer = this.processField(currentNode, jsonPath, entry.getValue(), isIndexable, outputRecord, mergedTextIndexMap);
            extraFieldsContainer.addChild(key, childContainer);
            jsonPath.removeLast();
        }
        return extraFieldsContainer;
    }

    public void generateTextIndexLuceneDocument(Map.Entry<String, Object> kv, List<String> indexDocuments, Integer mergedTextIndexDocumentMaxLength) {
        String val;
        String key = kv.getKey();
        if (kv.getValue() instanceof Collection || kv.getValue() instanceof Object[]) {
            try {
                val = JsonUtils.objectToString((Object)kv.getValue());
            }
            catch (JsonProcessingException e) {
                val = kv.getValue().toString();
            }
        } else {
            val = kv.getValue().toString();
        }
        if (key.length() + 1 > 32766) {
            _logger.error("The provided key's length is too long, text index document cannot be truncated");
            return;
        }
        int valueTruncationLength = mergedTextIndexDocumentMaxLength - 1 - key.length();
        if (val.length() > valueTruncationLength) {
            this._realtimeMergedTextIndexTruncatedDocumentSizeMeter = this._serverMetrics.addMeteredTableValue(this._tableName, (AbstractMetrics.Meter)ServerMeter.REALTIME_MERGED_TEXT_IDX_TRUNCATED_DOCUMENT_SIZE, (long)(key.length() + 1 + val.length()), this._realtimeMergedTextIndexTruncatedDocumentSizeMeter);
            val = val.substring(0, valueTruncationLength);
        }
        this._mergedTextIndexDocumentBytesCount += (long)(key.length() + 1 + val.length());
        ++this._mergedTextIndexDocumentCount;
        this._serverMetrics.setValueOfTableGauge(this._tableName, (AbstractMetrics.Gauge)ServerGauge.REALTIME_MERGED_TEXT_IDX_DOCUMENT_AVG_LEN, this._mergedTextIndexDocumentBytesCount / this._mergedTextIndexDocumentCount);
        indexDocuments.add(val + ":" + key);
    }

    public void generateShingleTextIndexDocument(Map.Entry<String, Object> kv, List<String> shingleIndexDocuments, int shingleIndexMaxLength, int shingleIndexOverlapLength) {
        String val;
        String key = kv.getKey();
        if (kv.getValue() instanceof Collection || kv.getValue() instanceof Object[]) {
            try {
                val = JsonUtils.objectToString((Object)kv.getValue());
            }
            catch (JsonProcessingException e) {
                val = kv.getValue().toString();
            }
        } else {
            val = kv.getValue().toString();
        }
        int valLength = val.length();
        int documentSuffixLength = key.length() + 1;
        int minDocumentLength = documentSuffixLength + shingleIndexOverlapLength + 1;
        if (shingleIndexOverlapLength >= valLength) {
            if (_logger.isDebugEnabled()) {
                _logger.warn("The shingleIndexOverlapLength {} is longer than the value length {}. Shingling will not be applied since only one document will be generated.", (Object)shingleIndexOverlapLength, (Object)valLength);
            }
            this.generateTextIndexLuceneDocument(kv, shingleIndexDocuments, shingleIndexMaxLength);
            return;
        }
        if (minDocumentLength > 32766) {
            _logger.debug("The minimum document length {} (key length + `:` + shingle index overlap length + one non-overlap char)  exceeds the limit of maximum Lucene document size 32766. Value will be truncated and shingling will not be applied.", (Object)minDocumentLength);
            this.generateTextIndexLuceneDocument(kv, shingleIndexDocuments, shingleIndexMaxLength);
            return;
        }
        if (shingleIndexMaxLength < minDocumentLength) {
            _logger.debug("The shingleIndexMaxLength {} is smaller than the minimum document length {} (key length + `:` + shingle index overlap length + one non-overlap char). Increasing the shingleIndexMaxLength to maximum Lucene document size 32766.", (Object)shingleIndexMaxLength, (Object)minDocumentLength);
            shingleIndexMaxLength = 32766;
        }
        int shingleWindowSlideLength = shingleIndexMaxLength - shingleIndexOverlapLength - documentSuffixLength;
        int i = 0;
        while (i + shingleIndexOverlapLength < valLength) {
            String documentValStr = val.substring(i, Math.min(i + shingleIndexMaxLength - documentSuffixLength, valLength));
            String shingleIndexDocument = documentValStr + ":" + key;
            shingleIndexDocuments.add(shingleIndexDocument);
            this._mergedTextIndexDocumentBytesCount += (long)shingleIndexDocument.length();
            ++this._mergedTextIndexDocumentCount;
            i += shingleWindowSlideLength;
        }
        this._serverMetrics.setValueOfTableGauge(this._tableName, (AbstractMetrics.Gauge)ServerGauge.REALTIME_MERGED_TEXT_IDX_DOCUMENT_AVG_LEN, this._mergedTextIndexDocumentBytesCount / this._mergedTextIndexDocumentCount);
    }

    private void putExtrasField(String fieldName, FieldSpec.DataType fieldType, Map<String, Object> field, GenericRow outputRecord) {
        if (null == field) {
            return;
        }
        switch (fieldType) {
            case JSON: {
                outputRecord.putValue(fieldName, field);
                break;
            }
            case STRING: {
                try {
                    outputRecord.putValue(fieldName, (Object)JsonUtils.objectToString(field));
                    break;
                }
                catch (JsonProcessingException e) {
                    throw new RuntimeException("Failed to convert '" + fieldName + "' to string", e);
                }
            }
            default: {
                throw new UnsupportedOperationException("Cannot convert '" + fieldName + "' to " + fieldType.name());
            }
        }
    }

    private List<String> getLuceneDocumentsFromMergedTextIndexMap(Map<String, Object> mergedTextIndexMap) {
        Integer mergedTextIndexDocumentMaxLength = this._transformerConfig.getMergedTextIndexDocumentMaxLength();
        Integer mergedTextIndexShinglingOverlapLength = this._transformerConfig.getMergedTextIndexShinglingOverlapLength();
        ArrayList<String> luceneDocuments = new ArrayList<String>();
        mergedTextIndexMap.entrySet().stream().filter(kv -> null != kv.getKey() && null != kv.getValue()).filter(kv -> !this._transformerConfig.getMergedTextIndexPathToExclude().contains(kv.getKey())).filter(kv -> !SchemaConformingTransformerV2.base64ValueFilter(kv.getValue().toString().getBytes(), this._transformerConfig.getMergedTextIndexBinaryDocumentDetectionMinLength())).filter(kv -> this._transformerConfig.getMergedTextIndexSuffixToExclude().stream().anyMatch(suffix -> !((String)kv.getKey()).endsWith((String)suffix))).forEach(kv -> {
            if (null == mergedTextIndexShinglingOverlapLength) {
                this.generateTextIndexLuceneDocument((Map.Entry<String, Object>)kv, (List<String>)luceneDocuments, mergedTextIndexDocumentMaxLength);
            } else {
                this.generateShingleTextIndexDocument((Map.Entry<String, Object>)kv, (List<String>)luceneDocuments, mergedTextIndexDocumentMaxLength, mergedTextIndexShinglingOverlapLength);
            }
        });
        return luceneDocuments;
    }
}

