/*
 * Copyright OpenSearch Contributors
 * SPDX-License-Identifier: Apache-2.0
 */

package org.opensearch.timeseries.dataprocessor;

/*
 * An object for imputing feature vectors.
 *
 * In certain situations, due to time and compute cost, we are only allowed to
 * query a sparse sample of data points / feature vectors from a cluster.
 * However, we need a large sample of feature vectors in order to train our
 * anomaly detection algorithms. An Imputer approximates the data points
 * between a given, ordered list of samples.
 */
public abstract class Imputer {

    /**
     * Imputes the given sample feature vectors.
     *
     * Computes a list `numImputed` feature vectors using the ordered list
     * of `numSamples` input sample vectors where each sample vector has size
     * `numFeatures`.
     *
     *
     * @param samples          A `numFeatures x numSamples` list of feature vectors.
     * @param numImputed  The desired number of imputed vectors.
     * @return                 A `numFeatures x numImputed` list of feature vectors.
     */
    public double[][] impute(double[][] samples, int numImputed) {
        int numFeatures = samples.length;
        double[][] interpolants = new double[numFeatures][numImputed];

        for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) {
            interpolants[featureIndex] = singleFeatureImpute(samples[featureIndex], numImputed);
        }
        return interpolants;
    }

    /**
     * compute per-feature impute value
     * @param samples input array
     * @param numImputed number of elements in the return array
     * @return input array with missing values imputed
     */
    protected abstract double[] singleFeatureImpute(double[] samples, int numImputed);
}