/*
* All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or
* its licensors.
*
* For complete copyright and license terms please see the LICENSE at the root of this
* distribution (the "License"). All use of this software is governed by the License,
* or, if provided, by the license below or the license accompanying this file. Do not
* remove or modify any license notices. This file is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
*/

#include <NumericalMethods_precompiled.h>
#include <Optimization/LineSearch.h>
#include <Optimization/Utilities.h>
#include <Optimization/Constants.h>
#include <float.h>

namespace NumericalMethods::Optimization
{
    ScalarVariable CubicMinimum(const double a, const double f_a, const double df_a, const double b, const double f_b,
        const double c, const double f_c)
    {
        double coefficients[4] = {};
        coefficients[1] = df_a;
        const double db = b - a;
        const double dc = c - a;
        const double denominator = (db * db * dc * dc) * (db - dc);
        double e[2];
        e[0] = f_b - f_a - coefficients[1] * db;
        e[1] = f_c - f_a - coefficients[1] * dc;
        coefficients[3] = (dc * dc * e[0] - db * db * e[1]) / denominator;
        coefficients[2] = (-dc * dc * dc * e[0] + db * db * db * e[1]) / denominator;
        const double radical = coefficients[2] * coefficients[2] - 3.0 * coefficients[3] * coefficients[1];

        return a + (-coefficients[2] + sqrt(radical)) / (3.0 * coefficients[3]);
    }

    ScalarVariable QuadraticMinimum(const double a, const double f_a, const double df_a, const double b, const double f_b)
    {
        double coefficients[3] = {};
        const double db = b - a;
        coefficients[1] = df_a;
        coefficients[2] = (f_b - f_a - coefficients[1] * db) / (db * db);
        return a - coefficients[1] / (2.0 * coefficients[2]);
    }

    bool ValidateStepSize(const ScalarVariable alphaNew, const double alpha0, const double alpha1, const double edgeThreshold)
    {
        const double alphaMin = AZStd::GetMin(alpha0, alpha1);
        const double alphaMax = AZStd::GetMax(alpha0, alpha1);
        const double range = alphaMax - alphaMin;
        return (isfinite(alphaNew) && (alphaNew > alphaMin + edgeThreshold * range) &&
            (alphaNew < alphaMax - edgeThreshold * range));
    }

    LineSearchResult SelectStepSizeFromInterval(double alpha0, double alpha1, double f_alpha0, double f_alpha1,
        double df_alpha0, const Function& f, const VectorVariable& x0, const VectorVariable& searchDirection,
        const double f_x0, const double df_x0, const double c1, const double c2)
    {
        const double cubicEdgeThreshold = 0.2;
        const double quadraticEdgeThreshold = 0.1;
        double alphaLast = 0.0;
        double f_alphaLast = f_x0;
        LineSearchResult result;

        for (AZ::u32 iteration = 0; iteration < lineSearchIterations; iteration++)
        {
            ScalarVariable alphaNew;
            if (iteration > 0)
            {
                // first try selecting a new alpha value based on cubic interpolation through the most recent points
                alphaNew = CubicMinimum(alpha0, f_alpha0, df_alpha0, alpha1, f_alpha1, alphaLast, f_alphaLast);
            }

            // if this is the first iteration, or the cubic method failed or is invalid, try a quadratic
            if (iteration == 0 || !ValidateStepSize(alphaNew, alpha0, alpha1, cubicEdgeThreshold))
            {
                alphaNew = QuadraticMinimum(alpha0, f_alpha0, df_alpha0, alpha1, f_alpha1);

                // if the quadratic is invalid, use bisection
                if (!ValidateStepSize(alphaNew, alpha0, alpha1, quadraticEdgeThreshold))
                {
                    alphaNew = ScalarVariable(0.5 * (alpha0 + alpha1));
                }
            }

            // Check if alphaNew satisfies the Wolfe conditions
            // First the sufficient decrease condition
            const double f_alphaNew = FunctionValue(f, x0 + alphaNew * searchDirection);
            if ((f_alphaNew > f_x0 + c1 * alphaNew * df_x0) || (f_alphaNew >= f_alpha0))
            {
                // The decrease is not sufficient, so set up the parameters for the next iteration
                f_alphaLast = f_alpha1;
                alphaLast = alpha1;
                alpha1 = alphaNew;
                f_alpha1 = f_alphaNew;
            }
            else
            {
                // There is sufficient decrease, so test the second Wolfe condition i.e. whether the derivative
                // corresponding to alphaNew is shallower than the derivative at x0
                double df_alphaNew = DirectionalDerivative(f, x0 + alphaNew * searchDirection, searchDirection);
                if (fabs(df_alphaNew) <= -c2 * df_x0)
                {
                    // alphaNew satisfies the Wolfe conditions, so return it
                    result.m_outcome = LineSearchOutcome::Success;
                    result.m_stepSize = alphaNew;
                    result.m_functionValue = f_alphaNew;
                    result.m_derivativeValue = df_alphaNew;
                    return result;
                }

                if (df_alphaNew * (alpha1 - alpha0) >= 0.0)
                {
                    f_alphaLast = f_alpha1;
                    alphaLast = alpha1;
                    alpha1 = alpha0;
                    f_alpha1 = f_alpha0;
                }

                else
                {
                    f_alphaLast = f_alpha0;
                    alphaLast = alpha0;
                }
                alpha0 = alphaNew;
                f_alpha0 = f_alphaNew;
                df_alpha0 = df_alphaNew;
            }
        }

        // Failed to find a conforming step size
        result.m_stepSize = 0.0;
        result.m_functionValue = 0.0;
        result.m_derivativeValue = 0.0;
        result.m_outcome = LineSearchOutcome::FailureExceededIterations;
        return result;
    }

    LineSearchResult LineSearchWolfe(const Function& f, const VectorVariable& x0, double f_x0,
        const VectorVariable& searchDirection)
    {
        // uses the notation from Nocedal and Wright, where alpha represents the step size
        // alpha0 and alpha1 are the lower and upper bounds of an interval which brackets the final value of alpha
        // initial step size of 1 is recommended for quasi-Newton methods (Nocedal and Wright)
        double alpha0 = 0.0;
        double alpha1 = 1.0;

        double f_alpha1 = FunctionValue(f, x0 + alpha1 * searchDirection);
        double f_alpha0 = f_x0;
        const double df_x0 = DirectionalDerivative(f, x0, searchDirection);
        double df_alpha0 = df_x0;

        for (AZ::u32 iteration = 0; iteration < lineSearchIterations; iteration++)
        {
            // if the value of f corresponding to alpha1 isn't sufficiently small compared to f at x0,
            // then the interval [alpha0 ... alpha1] must bracket a suitable point.
            if ((f_alpha1 > f_x0 + c1 * alpha1 * df_x0) || (iteration > 0 && f_alpha1 > f_alpha0))
            {
                return SelectStepSizeFromInterval(alpha0, alpha1, f_alpha0, f_alpha1, df_alpha0,
                    f, x0, searchDirection, f_x0, df_x0, c1, c2);
            }

            // otherwise, if the derivative corresponding to alpha1 is large enough, alpha1 already
            // satisfies the Wolfe conditions and so return alpha1.
            double df_alpha1 = DirectionalDerivative(f, x0 + alpha1 * searchDirection, searchDirection);
            if (fabs(df_alpha1) <= -c2 * df_x0)
            {
                LineSearchResult result;
                result.m_outcome = LineSearchOutcome::Success;
                result.m_stepSize = alpha1;
                result.m_functionValue = f_alpha1;
                result.m_derivativeValue = df_alpha1;
                return result;
            }

            if (df_alpha1 >= 0.0)
            {
                return SelectStepSizeFromInterval(alpha1, alpha0, f_alpha1, f_alpha0, df_alpha1,
                    f, x0, searchDirection, f_x0, df_x0, c1, c2);
            }

            // haven't found an interval which is guaranteed to bracket a suitable point,
            // so expand the search region for the next iteration
            alpha0 = alpha1;
            f_alpha0 = f_alpha1;
            alpha1 = 2.0 * alpha1;
            f_alpha1 = FunctionValue(f, x0 + alpha1 * searchDirection);
            df_alpha0 = df_alpha1;
        }

        LineSearchResult result;
        result.m_outcome = LineSearchOutcome::BestEffort;
        result.m_stepSize = alpha1;
        result.m_functionValue = f_alpha1;
        result.m_derivativeValue = DirectionalDerivative(f, x0 + alpha1 * searchDirection, searchDirection);
        return result;
    }
} // namespace NumericalMethods::Optimization