/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Net;
using System.Security.Authentication;
using System.Threading;
using Amazon.Runtime.Internal;
using Amazon.Runtime.Internal.Transform;
using Amazon.Runtime.Internal.Util;
using Amazon.Util;
using AWSSDK.Runtime.Internal.Util;
namespace Amazon.Runtime
{
///
/// A retry policy specifies all aspects of retry behavior. This includes conditions when the request should be retried,
/// checks of retry limit, preparing the request before retry and introducing delay (backoff) before retries.
///
public abstract partial class RetryPolicy
{
///
/// Maximum number of retries to be performed.
/// This does not count the initial request.
///
public int MaxRetries { get; protected set; }
///
/// The logger used to log messages.
///
public ILogger Logger { get; set; }
///
/// The standard set of throttling error codes
///
public virtual ICollection ThrottlingErrorCodes { get; protected set; } = new HashSet(StringComparer.OrdinalIgnoreCase)
{
"Throttling",
"ThrottlingException",
"ThrottledException",
"RequestThrottledException",
"TooManyRequestsException",
"ProvisionedThroughputExceededException",
"TransactionInProgressException",
"RequestLimitExceeded",
"BandwidthLimitExceeded",
"LimitExceededException",
"RequestThrottled",
"SlowDown",
"PriorRequestNotComplete"
};
///
/// The standard set of timeout error codes to retry on.
///
public ICollection TimeoutErrorCodesToRetryOn { get; protected set; } = new HashSet()
{
"RequestTimeout",
"RequestTimeoutException"
};
///
/// List of AWS specific error codes which are returned as part of the error response.
/// These error codes will be retried.
///
public ICollection ErrorCodesToRetryOn { get; protected set; } = new HashSet();
#region Transient errors
///
/// The standard set of transient error, HTTP status codes to retry on.
/// 502 and 504 are returned by proxies. These can also be returned for
/// S3 accelerate requests which are served by CloudFront.
///
public ICollection HttpStatusCodesToRetryOn { get; protected set; } = new HashSet
{
HttpStatusCode.InternalServerError,
HttpStatusCode.ServiceUnavailable,
HttpStatusCode.BadGateway,
HttpStatusCode.GatewayTimeout
};
///
/// Set of web exception status codes to retry on.
///
public ICollection WebExceptionStatusesToRetryOn { get; protected set; } = new HashSet
{
WebExceptionStatus.ConnectFailure,
WebExceptionStatus.ConnectionClosed,
WebExceptionStatus.KeepAliveFailure,
WebExceptionStatus.NameResolutionFailure,
WebExceptionStatus.ReceiveFailure,
WebExceptionStatus.SendFailure,
WebExceptionStatus.Timeout,
};
#endregion
///
/// This parameter serves as the value to the CapacityManager.Container datastructure.
/// Its properties include the available capacity left for making a retry request and the maximum
/// capacity size.
///
protected RetryCapacity RetryCapacity { get; set; }
///
/// Checks if a retry should be performed with the given execution context and exception.
///
/// The execution context which contains both the
/// requests and response context.
/// The exception thrown after issuing the request.
/// Returns true if the request should be retried, else false. The exception is retried if it matches with clockskew error codes.
public bool Retry(IExecutionContext executionContext, Exception exception)
{
// Boolean that denotes retries have not exceeded maxretries and request is rewindable
bool canRetry = !RetryLimitReached(executionContext) && CanRetry(executionContext);
// If canRetry is false, we still want to evaluate the exception if its retryable or not,
// is CSM is enabled. This is necessary to set the IsLastExceptionRetryable property on
// CSM Call Attempt. For S3, with the BucketRegion mismatch exception, an overhead of 100-
// 115 ms was added(because of GetPreSignedUrl and Http HEAD requests).
if (canRetry || executionContext.RequestContext.CSMEnabled)
{
var isClockSkewError = IsClockskew(executionContext, exception);
if (isClockSkewError || RetryForException(executionContext, exception))
{
executionContext.RequestContext.IsLastExceptionRetryable = true;
// If CSM is enabled but canRetry was false, we should not retry the request.
// Return false after successfully evaluating the last exception for retryable.
if (!canRetry)
{
return false;
}
executionContext.RequestContext.LastCapacityType = IsServiceTimeoutError(exception) ?
CapacityManager.CapacityType.Timeout : CapacityManager.CapacityType.Retry;
return OnRetry(executionContext, isClockSkewError, IsThrottlingError(exception));
}
}
return false;
}
///
/// Returns true if the request is in a state where it can be retried, else false.
///
/// The execution context which contains both the
/// requests and response context.
/// Returns true if the request is in a state where it can be retried, else false.
public abstract bool CanRetry(IExecutionContext executionContext);
///
/// Return true if the request should be retried for the given exception.
///
/// The execution context which contains both the
/// requests and response context.
/// The exception thrown by the previous request.
/// Return true if the request should be retried.
public abstract bool RetryForException(IExecutionContext executionContext, Exception exception);
///
/// Checks if the retry limit is reached.
///
/// The execution context which contains both the
/// requests and response context.
/// Return false if the request can be retried, based on number of retries.
public abstract bool RetryLimitReached(IExecutionContext executionContext);
///
/// Waits before retrying a request.
///
/// The execution context which contains both the
/// requests and response context.
public abstract void WaitBeforeRetry(IExecutionContext executionContext);
///
/// Virtual method that gets called on a successful request response.
///
/// The execution context which contains both the
/// requests and response context.
public virtual void NotifySuccess(IExecutionContext executionContext)
{
}
///
/// Virtual method that gets called before a retry request is initiated. The value
/// returned is True by default(retry throttling feature is disabled).
///
/// The execution context which contains both the
/// requests and response context.
public virtual bool OnRetry(IExecutionContext executionContext)
{
return true;
}
///
/// Virtual method that gets called before a retry request is initiated. The value
/// returned is True by default(retry throttling feature is disabled).
///
/// The execution context which contains both the
/// requests and response context.
/// true to bypass any attempt to acquire capacity on a retry
public virtual bool OnRetry(IExecutionContext executionContext, bool bypassAcquireCapacity)
{
return true;
}
///
/// Virtual method that gets called before a retry request is initiated. The value
/// returned is True by default(retry throttling feature is disabled).
///
/// The execution context which contains both the
/// requests and response context.
/// true to bypass any attempt to acquire capacity on a retry
/// true if the error that will be retried is a throttling error
public virtual bool OnRetry(IExecutionContext executionContext, bool bypassAcquireCapacity, bool isThrottlingError)
{
return OnRetry(executionContext, bypassAcquireCapacity);
}
///
/// This method uses a token bucket to enforce the maximum sending rate.
///
/// The execution context which contains both the
/// requests and response context.
/// If the prior request failed, this exception is expected to be
/// the exception that occurred during the prior request failure.
public virtual void ObtainSendToken(IExecutionContext executionContext, Exception exception)
{
}
///
/// Determines if an AmazonServiceException is a throttling error
///
/// The current exception to check.
/// true if it is a throttling error else false.
public virtual bool IsThrottlingError(Exception exception)
{
var serviceException = exception as AmazonServiceException;
return serviceException?.Retryable?.Throttling == true ||
ThrottlingErrorCodes.Contains(serviceException?.ErrorCode);
}
///
/// Determines if an AmazonServiceException is a transient error that
/// should be retried.
///
/// The current execution context
/// The current exception to check.
/// true if the exception is a transient error else false.
public virtual bool IsTransientError(IExecutionContext executionContext, Exception exception)
{
// An IOException was thrown by the underlying http client.
if (exception is IOException)
{
#if !NETSTANDARD // ThreadAbortException is not NetStandard
// Don't retry IOExceptions that are caused by a ThreadAbortException
if (ExceptionUtils.IsInnerException(exception))
return false;
#endif
// Retry all other IOExceptions
return true;
}
else if (ExceptionUtils.IsInnerException(exception))
{
return true;
}
//Check for AmazonServiceExceptions specifically
var serviceException = exception as AmazonServiceException;
if(serviceException != null)
{
//Check if the exception is marked retryable.
if (serviceException.Retryable != null)
{
return true;
}
//Check for specific HTTP status codes that are associated with transient
//service errors as long as they are not throttling errors.
if (HttpStatusCodesToRetryOn.Contains(serviceException.StatusCode) &&
!IsThrottlingError(exception))
{
return true;
}
//Check for successful responses that couldn't be unmarshalled. These should be considered
//transient errors because the payload could have been corrupted after OK was sent in the
//header.
if(serviceException.StatusCode == HttpStatusCode.OK && serviceException is AmazonUnmarshallingException)
{
return true;
}
}
//Check for WebExceptions that are considered transient
WebException webException;
if (ExceptionUtils.IsInnerException(exception, out webException))
{
if (WebExceptionStatusesToRetryOn.Contains(webException.Status))
{
return true;
}
}
if (IsTransientSslError(exception))
{
return true;
}
#if NETSTANDARD
// Version 7.35 libcurl which is the default version installed with Ubuntu 14.04
// has issues under high concurrency causing response streams being disposed
// during unmarshalling. To work around this issue will add the ObjectDisposedException
// to the list of exceptions to retry.
if (ExceptionUtils.IsInnerException(exception))
return true;
//If it isn't a serviceException that we already processed for StatusCode and it
//is a HttpRequestException, then it is a network type error that did not reach the
//service and it should be retried.
if (serviceException == null && exception is System.Net.Http.HttpRequestException)
{
return true;
}
if (exception is OperationCanceledException)
{
if (!executionContext.RequestContext.CancellationToken.IsCancellationRequested)
{
//OperationCanceledException thrown by HttpClient not the CancellationToken supplied by the user.
//This exception can wrap at least IOExceptions, ObjectDisposedExceptions and should be retried
return true;
}
}
// .NET 5 introduced changes to HttpClient for timed out requests by returning a wrapped TimeoutException.
if (exception is TimeoutException)
return true;
#endif
return false;
}
private const string sslErrorZeroReturn = "SSL_ERROR_ZERO_RETURN";
public static bool IsTransientSslError(Exception exception)
{
var isAuthenticationException = false;
// Scan down the exceptions chain for a sslErrorZeroReturn keyword in the Message,
// given that the one of the parent exceptions is AuthenticationException.
// Based on https://github.com/aws/aws-sdk-net/issues/1556
while (exception != null)
{
if (exception is AuthenticationException)
{
isAuthenticationException = true;
}
if (isAuthenticationException && exception.Message.Contains(sslErrorZeroReturn))
{
return true;
}
exception = exception.InnerException;
}
return false;
}
///
/// Determines if the exception is a known timeout error code that
/// should be retried under the timeout error category.
///
/// The current exception to check.
/// true if the exception is considered a timeout else false
public virtual bool IsServiceTimeoutError(Exception exception)
{
var serviceException = exception as AmazonServiceException;
return TimeoutErrorCodesToRetryOn.Contains(serviceException?.ErrorCode);
}
#region Clock skew correction
private static HashSet clockSkewErrorCodes = new HashSet(StringComparer.OrdinalIgnoreCase)
{
"RequestTimeTooSkewed",
"RequestExpired",
"InvalidSignatureException",
"SignatureDoesNotMatch",
"AuthFailure",
"RequestExpired",
"RequestInTheFuture",
};
private const string clockSkewMessageFormat = "Identified clock skew: local time = {0}, local time with correction = {1}, current clock skew correction = {2}, server time = {3}, service endpoint = {4}.";
private const string clockSkewUpdatedFormat = "Setting clock skew correction: new clock skew correction = {0}, service endpoint = {1}.";
private const string clockSkewMessageParen = "(";
private const string clockSkewMessagePlusSeparator = " + ";
private const string clockSkewMessageMinusSeparator = " - ";
private static TimeSpan clockSkewMaxThreshold = TimeSpan.FromMinutes(5);
private bool IsClockskew(IExecutionContext executionContext, Exception exception)
{
var clientConfig = executionContext.RequestContext.ClientConfig;
var ase = exception as AmazonServiceException;
var isHead =
executionContext.RequestContext.Request != null &&
string.Equals(executionContext.RequestContext.Request.HttpMethod, "HEAD", StringComparison.Ordinal);
var isClockskewErrorCode =
ase != null &&
(ase.ErrorCode == null || clockSkewErrorCodes.Contains(ase.ErrorCode));
if (isHead || isClockskewErrorCode)
{
var endpoint = executionContext.RequestContext.Request.Endpoint.ToString();
var realNow = AWSConfigs.utcNowSource();
var correctedNow = CorrectClockSkew.GetCorrectedUtcNowForEndpoint(endpoint);
DateTime serverTime;
// Try getting server time from the headers
bool serverTimeDetermined = TryParseDateHeader(ase, out serverTime);
// If that fails, try to parse it from the exception message
if (!serverTimeDetermined)
serverTimeDetermined = TryParseExceptionMessage(ase, out serverTime);
if (serverTimeDetermined)
{
// using accurate server time, calculate correction if local time is off
serverTime = serverTime.ToUniversalTime();
var diff = correctedNow - serverTime;
var absDiff = diff.Ticks < 0 ? -diff : diff;
if (absDiff > clockSkewMaxThreshold)
{
var newCorrection = serverTime - realNow;
Logger.InfoFormat(clockSkewMessageFormat,
realNow, correctedNow, clientConfig.ClockOffset, serverTime, endpoint);
// Always set the correction, for informational purposes
CorrectClockSkew.SetClockCorrectionForEndpoint(endpoint, newCorrection);
var shouldRetry = AWSConfigs.CorrectForClockSkew && !AWSConfigs.ManualClockCorrection.HasValue;
// Only retry if clock skew correction is not disabled
if (shouldRetry)
{
// Set clock skew correction
Logger.InfoFormat(clockSkewUpdatedFormat, newCorrection, endpoint);
executionContext.RequestContext.IsSigned = false;
return true;
}
}
}
}
return false;
}
private static bool TryParseDateHeader(AmazonServiceException ase, out DateTime serverTime)
{
var webData = GetWebData(ase);
if (webData != null)
{
// parse server time from "Date" header, if possible
var dateValue = webData.GetHeaderValue(HeaderKeys.DateHeader);
if (!string.IsNullOrEmpty(dateValue))
{
if (DateTime.TryParseExact(
dateValue,
AWSSDKUtils.GMTDateFormat,
CultureInfo.InvariantCulture,
DateTimeStyles.AssumeUniversal,
out serverTime))
{
return true;
}
}
}
serverTime = DateTime.MinValue;
return false;
}
private static bool TryParseExceptionMessage(AmazonServiceException ase, out DateTime serverTime)
{
if (ase != null && !string.IsNullOrEmpty(ase.Message))
{
var message = ase.Message;
// parse server time from exception message, if possible
var parenIndex = message.IndexOf(clockSkewMessageParen, StringComparison.Ordinal);
if (parenIndex >= 0)
{
parenIndex++;
// Locate " + " or " - " separator that follows the server time string
var separatorIndex = message.IndexOf(clockSkewMessagePlusSeparator, parenIndex, StringComparison.Ordinal);
if (separatorIndex < 0)
separatorIndex = message.IndexOf(clockSkewMessageMinusSeparator, parenIndex, StringComparison.Ordinal);
// Get the server time string and parse it
if (separatorIndex > parenIndex)
{
var timestamp = message.Substring(parenIndex, separatorIndex - parenIndex);
if (DateTime.TryParseExact(
timestamp,
AWSSDKUtils.ISO8601BasicDateTimeFormat,
CultureInfo.InvariantCulture,
DateTimeStyles.AssumeUniversal,
out serverTime))
{
return true;
}
}
}
}
serverTime = DateTime.MinValue;
return false;
}
#endregion
private static IWebResponseData GetWebData(AmazonServiceException ase)
{
if (ase != null)
{
Exception e = ase;
do
{
var here = e as HttpErrorResponseException;
if (here != null)
return here.Response;
e = e.InnerException;
} while (e != null);
}
return null;
}
protected static bool ContainErrorMessage(Exception exception, HashSet errorMessages)
{
if (exception == null)
return false;
if (errorMessages.Contains(exception.Message))
return true;
return ContainErrorMessage(exception.InnerException, errorMessages);
}
///
/// Creates a key for storing retry capacity data.
/// Key is based on service's url (we store retry capacity per service's url variant).
/// If ClientConfig's ServiceURL override is set we use it as a key,
/// otherwise we construct key based on ClientConfig's schema, region, service, fips, dualstack parameters.
/// This value is unique key per real service's url variant.
///
protected static string GetRetryCapacityKey(IClientConfig config)
{
return config.ServiceURL != null ? config.ServiceURL :
$"http:{config.UseHttp}//region:{config.RegionEndpoint?.SystemName}.service:{config.RegionEndpointServiceName}.fips:{config.UseFIPSEndpoint}.ipv6:{config.UseDualstackEndpoint}";
}
}
}