diff --git a/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/PooledConnectionRetryHandler.cs b/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/PooledConnectionRetryHandler.cs new file mode 100644 index 000000000000..48bfb5bde327 --- /dev/null +++ b/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/PooledConnectionRetryHandler.cs @@ -0,0 +1,183 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using Amazon.Runtime.Internal.Util; +using System; +using System.IO; +using System.Net.Http; +using System.Net.Sockets; +using System.Threading; +using System.Threading.Tasks; + +namespace Amazon.Runtime.Pipeline.HttpHandler +{ + /// + /// A DelegatingHandler that automatically retries requests when they fail due to + /// stale connections from the HTTP connection pool. This prevents dead pooled + /// connections from counting against the SDK's retry limit. + /// + /// + /// When HttpClient reuses a connection from its pool, it may not immediately know + /// if the server has closed that connection. The first write attempt will fail with + /// errors like "Broken pipe" or "Connection reset". This handler catches these + /// specific errors and retries the request once, allowing HttpClient to establish + /// a fresh connection without consuming a retry from the SDK's retry policy. + /// + public class PooledConnectionRetryHandler : DelegatingHandler + { + // Key used to mark requests that have already been retried by this handler + private const string RetryAttemptedKey = "AmazonSDK_PooledConnectionRetryAttempted"; + + private readonly Logger _logger = Logger.GetLogger(typeof(PooledConnectionRetryHandler)); + + /// + /// Initializes a new instance of the PooledConnectionRetryHandler class. + /// + /// The inner handler to delegate requests to. + public PooledConnectionRetryHandler(HttpMessageHandler innerHandler) + : base(innerHandler) + { + } + + /// + /// Sends an HTTP request, automatically retrying once if the failure is due to + /// a stale pooled connection. + /// + protected override async Task SendAsync( + HttpRequestMessage request, + CancellationToken cancellationToken) + { + try + { + return await base.SendAsync(request, cancellationToken).ConfigureAwait(false); + } + catch (Exception ex) + { + // Only retry if this is a stale connection error and we haven't already retried + if (IsStaleConnectionError(ex) && !HasRetryBeenAttempted(request)) + { + _logger.DebugFormat( + "Detected stale pooled connection error: {0}. Automatically retrying request to {1}", + GetErrorMessage(ex), + request.RequestUri); + + // Mark that we've attempted a retry to prevent infinite loops + MarkRetryAttempted(request); + + try + { + // Retry the request - HttpClient will use a fresh connection + var response = await base.SendAsync(request, cancellationToken).ConfigureAwait(false); + + _logger.DebugFormat( + "Automatic retry succeeded for request to {0}", + request.RequestUri); + + return response; + } + catch (Exception retryEx) + { + _logger.DebugFormat( + "Automatic retry failed for request to {0}: {1}", + request.RequestUri, + GetErrorMessage(retryEx)); + + // Retry failed - throw the new exception + throw; + } + } + + // Not a stale connection error, or already retried - rethrow original exception + throw; + } + } + + /// + /// Determines if an exception indicates a stale pooled connection. + /// + /// + /// This method relies on SocketException error codes rather than error messages, + /// as error codes are stable across platforms and .NET versions, while error + /// messages can vary and are subject to localization. + /// + private static bool IsStaleConnectionError(Exception ex) + { + // Walk the exception chain looking for SocketException with known stale connection error codes + var currentException = ex; + while (currentException != null) + { + if (currentException is SocketException socketException) + { + // SocketError.Shutdown (32) = Broken pipe on Unix/Linux + // SocketError.ConnectionReset (10054) = Connection reset by peer + // SocketError.ConnectionAborted (10053) = Connection aborted + if (socketException.SocketErrorCode == SocketError.Shutdown || + socketException.SocketErrorCode == SocketError.ConnectionReset || + socketException.SocketErrorCode == SocketError.ConnectionAborted) + { + return true; + } + } + + currentException = currentException.InnerException; + } + + return false; + } + + /// + /// Checks if a retry has already been attempted for this request. + /// + private static bool HasRetryBeenAttempted(HttpRequestMessage request) + { +#if NET8_0_OR_GREATER + return request.Options.TryGetValue(new HttpRequestOptionsKey(RetryAttemptedKey), out var attempted) && attempted; +#else + return request.Properties.TryGetValue(RetryAttemptedKey, out var value) && + value is bool attempted && + attempted; +#endif + } + + /// + /// Marks that a retry has been attempted for this request. + /// + private static void MarkRetryAttempted(HttpRequestMessage request) + { +#if NET8_0_OR_GREATER + request.Options.Set(new HttpRequestOptionsKey(RetryAttemptedKey), true); +#else + request.Properties[RetryAttemptedKey] = true; +#endif + } + + /// + /// Extracts a readable error message from an exception. + /// + private static string GetErrorMessage(Exception ex) + { + var currentException = ex; + while (currentException != null) + { + if (currentException is IOException || currentException is SocketException) + { + return $"{currentException.GetType().Name}: {currentException.Message}"; + } + currentException = currentException.InnerException; + } + return ex.Message; + } + } +} diff --git a/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/_netstandard/HttpRequestMessageFactory.cs b/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/_netstandard/HttpRequestMessageFactory.cs index 5290e77ce96b..7b99ab003f5f 100644 --- a/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/_netstandard/HttpRequestMessageFactory.cs +++ b/sdk/src/Core/Amazon.Runtime/Pipeline/HttpHandler/_netstandard/HttpRequestMessageFactory.cs @@ -305,7 +305,11 @@ private static HttpClient CreateManagedHttpClient(IClientConfig clientConfig) Logger.GetLogger(typeof(HttpRequestMessageFactory)).Debug(pns, $"The current runtime does not support modifying proxy settings of HttpClient."); } - var httpClient = new HttpClient(httpMessageHandler); + // Wrap the handler with pooled connection retry middleware to automatically + // retry requests that fail due to stale connections from the connection pool. + // This prevents dead pooled connections from consuming SDK retry attempts. + var pooledConnectionRetryHandler = new PooledConnectionRetryHandler(httpMessageHandler); + var httpClient = new HttpClient(pooledConnectionRetryHandler); if (clientConfig.Timeout.HasValue) { diff --git a/sdk/test/UnitTests/Custom/Runtime/PooledConnectionRetryHandlerTests.cs b/sdk/test/UnitTests/Custom/Runtime/PooledConnectionRetryHandlerTests.cs new file mode 100644 index 000000000000..3d9d7625eb3b --- /dev/null +++ b/sdk/test/UnitTests/Custom/Runtime/PooledConnectionRetryHandlerTests.cs @@ -0,0 +1,343 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +using Amazon.Runtime.Pipeline.HttpHandler; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.IO; +using System.Net; +using System.Net.Http; +using System.Net.Sockets; +using System.Threading; +using System.Threading.Tasks; + +namespace AWSSDK.UnitTests.Custom.Runtime +{ + [TestClass] + public class PooledConnectionRetryHandlerTests + { + + /// + /// Test that SocketException with Shutdown error is detected as a stale connection error + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task SocketExceptionShutdown_IsDetectedAndRetried() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + if (attemptCount == 1) + { + // First attempt: throw socket shutdown error (Broken pipe on Unix) + throw new SocketException((int)SocketError.Shutdown); + } + // Second attempt: succeed + return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + var response = await client.SendAsync(request); + + Assert.AreEqual(2, attemptCount, "Should have made 2 attempts (initial + retry)"); + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode); + } + + /// + /// Test that SocketException with ConnectionReset is detected as a stale connection error + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task SocketExceptionConnectionReset_IsDetectedAndRetried() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + if (attemptCount == 1) + { + throw new SocketException((int)SocketError.ConnectionReset); + } + return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + var response = await client.SendAsync(request); + + Assert.AreEqual(2, attemptCount, "Should have made 2 attempts (initial + retry)"); + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode); + } + + /// + /// Test that HttpRequestException wrapping SocketException is properly unwrapped and detected + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task HttpRequestExceptionWrappingSocketException_IsDetectedAndRetried() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + if (attemptCount == 1) + { + var socketException = new SocketException((int)SocketError.ConnectionReset); + throw new HttpRequestException("Error while copying content", socketException); + } + return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + var response = await client.SendAsync(request); + + Assert.AreEqual(2, attemptCount, "Should have made 2 attempts (initial + retry)"); + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode); + } + + /// + /// Test that only one automatic retry is attempted + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task OnlyOneRetryAttempt_PreventsInfiniteLoop() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + // Always throw stale connection error + throw new SocketException((int)SocketError.Shutdown); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + await Assert.ThrowsExceptionAsync(async () => + { + await client.SendAsync(request); + }); + + Assert.AreEqual(2, attemptCount, "Should have made exactly 2 attempts (initial + one retry)"); + } + + /// + /// Test that non-stale-connection errors are not retried + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task NonStaleConnectionError_IsNotRetried() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + throw new TaskCanceledException("Request timed out"); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + await Assert.ThrowsExceptionAsync(async () => + { + await client.SendAsync(request); + }); + + Assert.AreEqual(1, attemptCount, "Should have made only 1 attempt (no retry for non-stale errors)"); + } + + /// + /// Test that IOException with non-matching message is not retried + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task IOExceptionWithNonMatchingMessage_IsNotRetried() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + throw new IOException("Disk full"); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + await Assert.ThrowsExceptionAsync(async () => + { + await client.SendAsync(request); + }); + + Assert.AreEqual(1, attemptCount, "Should have made only 1 attempt (message doesn't match stale connection patterns)"); + } + + /// + /// Test successful request on first attempt (no retry needed) + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task SuccessfulRequest_NoRetry() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + var response = await client.SendAsync(request); + + Assert.AreEqual(1, attemptCount, "Should have made only 1 attempt (successful)"); + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode); + } + + /// + /// Test that retry throws the new exception if retry also fails + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task RetryFailsWithDifferentError_ThrowsNewException() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + if (attemptCount == 1) + { + throw new SocketException((int)SocketError.Shutdown); + } + // Second attempt throws different error + throw new InvalidOperationException("Different error"); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + await Assert.ThrowsExceptionAsync(async () => + { + await client.SendAsync(request); + }); + + Assert.AreEqual(2, attemptCount, "Should have made 2 attempts"); + } + + /// + /// Test ConnectionAborted SocketException is detected and retried + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task SocketExceptionConnectionAborted_IsDetectedAndRetried() + { + var attemptCount = 0; + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + attemptCount++; + if (attemptCount == 1) + { + throw new SocketException((int)SocketError.ConnectionAborted); + } + return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + var request = new HttpRequestMessage(HttpMethod.Get, "https://example.com"); + + var response = await client.SendAsync(request); + + Assert.AreEqual(2, attemptCount, "Should have made 2 attempts (initial + retry)"); + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode); + } + + /// + /// Test thread safety with concurrent requests + /// + [TestMethod] + [TestCategory("UnitTest")] + [TestCategory("Runtime")] + public async Task ConcurrentRequests_AreHandledSafely() + { + var mockHandler = new MockHttpMessageHandler((request, ct) => + { + // Simulate some work + Thread.Sleep(10); + return Task.FromResult(new HttpResponseMessage(HttpStatusCode.OK)); + }); + + var retryHandler = new PooledConnectionRetryHandler(mockHandler); + var client = new HttpClient(retryHandler); + + // Send 10 concurrent requests + var tasks = new Task[10]; + for (int i = 0; i < 10; i++) + { + var request = new HttpRequestMessage(HttpMethod.Get, $"https://example.com/{i}"); + tasks[i] = client.SendAsync(request); + } + + var responses = await Task.WhenAll(tasks); + + Assert.AreEqual(10, responses.Length); + foreach (var response in responses) + { + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode); + } + } + + /// + /// Mock HttpMessageHandler for testing + /// + private class MockHttpMessageHandler : HttpMessageHandler + { + private readonly Func> _sendFunc; + + public MockHttpMessageHandler(Func> sendFunc) + { + _sendFunc = sendFunc; + } + + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + return _sendFunc(request, cancellationToken); + } + } + } +}