/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
*  Licensed to Elasticsearch B.V. under one or more contributor
*  license agreements. See the NOTICE file distributed with
*  this work for additional information regarding copyright
*  ownership. Elasticsearch B.V. licenses this file to you under
*  the Apache License, Version 2.0 (the "License"); you may
*  not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
* 	http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing,
*  software distributed under the License is distributed on an
*  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
*  KIND, either express or implied.  See the License for the
*  specific language governing permissions and limitations
*  under the License.
*/

using System;
using System.Threading.Tasks;
using OpenSearch.OpenSearch.Xunit.XunitPlumbing;
using OpenSearch.Net;
using OpenSearch.Net.VirtualizedCluster;
using OpenSearch.Net.VirtualizedCluster.Audit;
using Tests.Framework;
using static OpenSearch.Net.AuditEvent;

namespace Tests.ClientConcepts.ConnectionPooling.MaxRetries
{
	public class RespectsMaxRetry
	{
		/**[[retries]]
		*=== Retries
		* By default, OSC will retry a request as many times as there are nodes in the cluster, that the client knows about.
		*
		* Retries still respects the request timeout however, meaning if you have a 100 node cluster
		* and a request timeout of 20 seconds, the client will retry as many times as it can before
		* giving up at the request timeout of 20 seconds.
		*/
		[U]
		public async Task DefaultMaxIsNumberOfNodes()
		{
			/**
			 * Retry behaviour can be demonstrated using OSC's Virtual cluster test framework. In the following
			 * example, a ten node cluster is defined that always fails on all client calls, except on port 9209
			 */
			var audit = new Auditor(() => VirtualClusterWith
				.Nodes(10)
				.ClientCalls(r => r.FailAlways())
				.ClientCalls(r => r.OnPort(9209).SucceedAlways())
				.StaticConnectionPool()
				.Settings(s => s.DisablePing())
			);

			/**
			 * The trace of a client call shows that a bad response is received from nodes 9200 to 9208,
			 * finally returning a healthy response from the node on port 9209
			 */
			audit = await audit.TraceCall(
				new ClientCall {
					{ BadResponse, 9200 },
					{ BadResponse, 9201 },
					{ BadResponse, 9202 },
					{ BadResponse, 9203 },
					{ BadResponse, 9204 },
					{ BadResponse, 9205 },
					{ BadResponse, 9206 },
					{ BadResponse, 9207 },
					{ BadResponse, 9208 },
					{ HealthyResponse, 9209 }
				}
			);
		}

		/**==== Maximum number of retries
		 *
		* When you have a 100 node cluster for example, you might want to ensure that retries occur only
		* a _fixed_ number of times. This can be done using `MaximumRetries(n)` on `ConnectionSettings`
		*
		* IMPORTANT: the actual number of requests is `initial attempt + set number of retries`
		*/

		[U]
		public async Task FixedMaximumNumberOfRetries()
		{
			var audit = new Auditor(() => VirtualClusterWith
				.Nodes(10)
				.ClientCalls(r => r.FailAlways())
				.ClientCalls(r => r.OnPort(9209).SucceedAlways())
				.StaticConnectionPool()
				.Settings(s => s.DisablePing().MaximumRetries(3)) // <1> Set the maximum number of retries to 3
			);

			audit = await audit.TraceCall(
				new ClientCall {
					{ BadResponse, 9200 },
					{ BadResponse, 9201 },
					{ BadResponse, 9202 },
					{ BadResponse, 9203 },
					{ MaxRetriesReached } // <2> The client call trace returns an `MaxRetriesReached` audit after the initial attempt and the number of retries allowed
				}
			);
		}
		/**
		* In our previous example we simulated very fast failures, but in the real world, a call might take upwards of a second.
		*
		* In this next example, we simulate a particularly heavy search that takes 10 seconds to fail, and set a request timeout of 20 seconds.
		* We see that the request is tried twice and gives up before a third call is attempted, since the call takes 10 seconds and thus can be
		* tried twice (initial call and one retry) _before_ the request timeout.
		*/
		[U]
		public async Task RespectsOveralRequestTimeout()
		{
			var audit = new Auditor(() => VirtualClusterWith
				.Nodes(10)
				.ClientCalls(r => r.FailAlways().Takes(TimeSpan.FromSeconds(10)))
				.ClientCalls(r => r.OnPort(9209).SucceedAlways())
				.StaticConnectionPool()
				.Settings(s => s.DisablePing().RequestTimeout(TimeSpan.FromSeconds(20)))
			);

			audit = await audit.TraceCall(
				new ClientCall {
					{ BadResponse, 9200 },
					{ BadResponse, 9201 },
					{ MaxTimeoutReached }
				}
			);
		}

		/**
		 * ==== Maximum retry timeout
		* If you set a smaller request timeout you might not want it to also affect the retry timeout.
		* In cases like this, you can configure the `MaxRetryTimeout` separately.
		* Here we simulate calls taking 3 seconds, a request timeout of 2 seconds and a max retry timeout of 10 seconds.
		* We should see 5 attempts to perform this query, testing that our request timeout cuts the query off short and that
		* our max retry timeout of 10 seconds wins over the configured request timeout
		*/
		[U]
		public async Task RespectsMaxRetryTimeoutOverRequestTimeout()
		{
			var audit = new Auditor(() => VirtualClusterWith
				.Nodes(10)
				.ClientCalls(r => r.FailAlways().Takes(TimeSpan.FromSeconds(3)))
				.ClientCalls(r => r.OnPort(9209).FailAlways())
				.StaticConnectionPool()
				.Settings(s => s.DisablePing().RequestTimeout(TimeSpan.FromSeconds(2)).MaxRetryTimeout(TimeSpan.FromSeconds(10)))
			);

			audit = await audit.TraceCall(
				new ClientCall {
					{ BadResponse, 9200 },
					{ BadResponse, 9201 },
					{ BadResponse, 9202 },
					{ BadResponse, 9203 },
					{ BadResponse, 9204 },
					{ MaxTimeoutReached }
				}
			);

		}
		/**
		* If your retry policy expands beyond the number of available nodes, the client **won't** retry the same node twice
		*/
		[U]
		public async Task RetriesAreLimitedByNodesInPool()
		{
			var audit = new Auditor(() => VirtualClusterWith
				.Nodes(2)
				.ClientCalls(r => r.FailAlways().Takes(TimeSpan.FromSeconds(3)))
				.ClientCalls(r => r.OnPort(9209).SucceedAlways())
				.StaticConnectionPool()
				.Settings(s => s.DisablePing().RequestTimeout(TimeSpan.FromSeconds(2)).MaxRetryTimeout(TimeSpan.FromSeconds(10)))
			);

			audit = await audit.TraceCall(
				new ClientCall {
					{ BadResponse, 9200 },
					{ BadResponse, 9201 },
					{ MaxRetriesReached },
					{ FailedOverAllNodes }
				}
			);
		}

		/**
		* This makes setting any retry setting on a single node connection pool a no-op by design!
		* Connection pooling and failover is all about trying to fail sanely whilst still utilizing the available resources and
		* not giving up on the fail fast principle; **It is NOT a mechanism for forcing requests to succeed.**
		*/
		[U]
		public async Task DoesNotRetryOnSingleNodeConnectionPool()
		{
			var audit = new Auditor(() => VirtualClusterWith
				.Nodes(10)
				.ClientCalls(r => r.FailAlways().Takes(TimeSpan.FromSeconds(3)))
				.ClientCalls(r => r.OnPort(9209).SucceedAlways())
				.SingleNodeConnection()
				.Settings(s => s.DisablePing().MaximumRetries(10))
			);

			audit = await audit.TraceCall(
				new ClientCall {
					{ BadResponse, 9200 }
				}
			);
		}
	}
}