From 74895912012f0da4506db2d37d6dce0fdf0fddda Mon Sep 17 00:00:00 2001 From: pulimsr Date: Thu, 4 Dec 2025 17:02:29 -0500 Subject: [PATCH 1/6] Implementing checksum validation for single-part and multipart downloads --- .../include/aws/transfer/TransferHandle.h | 49 ++++++ .../include/aws/transfer/TransferManager.h | 7 + .../source/transfer/TransferManager.cpp | 161 +++++++++++++++--- 3 files changed, 196 insertions(+), 21 deletions(-) diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h index 0d062be1e00..d7b124d2c10 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h @@ -79,6 +79,9 @@ namespace Aws Aws::String GetChecksum() const { return m_checksum; }; void SetChecksum(const Aws::String& checksum) { m_checksum = checksum; } + + std::shared_ptr GetChecksumHash() const { return m_checksumHash; } + void SetChecksumHash(std::shared_ptr hash) { m_checksumHash = hash; } private: int m_partId = 0; @@ -93,11 +96,48 @@ namespace Aws std::atomic m_downloadBuffer; bool m_lastPart = false; Aws::String m_checksum; + std::shared_ptr m_checksumHash; }; using PartPointer = std::shared_ptr< PartState >; using PartStateMap = Aws::Map< int, PartPointer >; + /** + * Stream buffer wrapper that calculates checksum while forwarding data to underlying stream. + * Used for single-part download checksum validation. + */ + class AWS_TRANSFER_API ChecksumValidatingStreamBuf : public std::streambuf + { + public: + ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, + std::shared_ptr hash) + : m_underlyingBuf(underlyingBuf), m_hash(hash) {} + + std::shared_ptr GetHash() const { return m_hash; } + + protected: + std::streamsize xsputn(const char* s, std::streamsize n) override + { + if (m_hash && n > 0) { + m_hash->Update(const_cast(reinterpret_cast(s)), static_cast(n)); + } + return m_underlyingBuf->sputn(s, n); + } + + int overflow(int c) override + { + if (m_hash && c != EOF) { + unsigned char byte = static_cast(c); + m_hash->Update(&byte, 1); + } + return m_underlyingBuf->sputc(c); + } + + private: + std::streambuf* m_underlyingBuf; + std::shared_ptr m_hash; + }; + enum class TransferStatus { //this value is only used for directory synchronization @@ -389,6 +429,12 @@ namespace Aws Aws::String GetChecksum() const { return m_checksum; } void SetChecksum(const Aws::String& checksum) { this->m_checksum = checksum; } + void SetPartChecksum(int partId, std::shared_ptr hash) { m_partChecksums[partId] = hash; } + std::shared_ptr GetPartChecksum(int partId) const { + auto it = m_partChecksums.find(partId); + return it != m_partChecksums.end() ? it->second : nullptr; + } + private: void CleanupDownloadStream(); @@ -430,6 +476,9 @@ namespace Aws mutable std::condition_variable m_waitUntilFinishedSignal; mutable std::mutex m_getterSetterLock; Aws::String m_checksum; + // Map of part number to Hash instance for multipart download checksum validation + // TODO: Add CRT checksum combining utility when available + Aws::Map> m_partChecksums; }; AWS_TRANSFER_API Aws::OStream& operator << (Aws::OStream& s, TransferStatus status); diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h index a4b5580fd6e..725f14c1219 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferManager.h @@ -144,6 +144,13 @@ namespace Aws * upload. Defaults to CRC64-NVME. */ Aws::S3::Model::ChecksumAlgorithm checksumAlgorithm = S3::Model::ChecksumAlgorithm::CRC64NVME; + + /** + * Enable checksum validation for downloads. When enabled, checksums will be + * calculated during download and validated against S3 response headers. + * Defaults to true. + */ + bool validateChecksums = true; }; /** diff --git a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp index 996e427e114..4e6eba84745 100644 --- a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp +++ b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp @@ -51,6 +51,42 @@ namespace Aws } } + static std::shared_ptr CreateHashForAlgorithm(S3::Model::ChecksumAlgorithm algorithm) { + if (algorithm == S3::Model::ChecksumAlgorithm::CRC32) { + return Aws::MakeShared(CLASS_TAG); + } + if (algorithm == S3::Model::ChecksumAlgorithm::CRC32C) { + return Aws::MakeShared(CLASS_TAG); + } + if (algorithm == S3::Model::ChecksumAlgorithm::SHA1) { + return Aws::MakeShared(CLASS_TAG); + } + if (algorithm == S3::Model::ChecksumAlgorithm::SHA256) { + return Aws::MakeShared(CLASS_TAG); + } + return Aws::MakeShared(CLASS_TAG); + } + + template + static Aws::String GetChecksumFromResult(const ResultT& result, S3::Model::ChecksumAlgorithm algorithm) { + if (algorithm == S3::Model::ChecksumAlgorithm::CRC32) { + return result.GetChecksumCRC32(); + } + if (algorithm == S3::Model::ChecksumAlgorithm::CRC32C) { + return result.GetChecksumCRC32C(); + } + if (algorithm == S3::Model::ChecksumAlgorithm::CRC64NVME) { + return result.GetChecksumCRC64NVME(); + } + if (algorithm == S3::Model::ChecksumAlgorithm::SHA1) { + return result.GetChecksumSHA1(); + } + if (algorithm == S3::Model::ChecksumAlgorithm::SHA256) { + return result.GetChecksumSHA256(); + } + return ""; + } + struct TransferHandleAsyncContext : public Aws::Client::AsyncCallerContext { std::shared_ptr handle; @@ -664,26 +700,7 @@ namespace Aws { if (handle->ShouldContinue()) { - partState->SetChecksum([&]() -> Aws::String { - if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32) - { - return outcome.GetResult().GetChecksumCRC32(); - } - else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32C) - { - return outcome.GetResult().GetChecksumCRC32C(); - } - else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::SHA1) - { - return outcome.GetResult().GetChecksumSHA1(); - } - else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::SHA256) - { - return outcome.GetResult().GetChecksumSHA256(); - } - //Return empty checksum for not set. - return ""; - }()); + partState->SetChecksum(GetChecksumFromResult(outcome.GetResult(), m_transferConfig.checksumAlgorithm)); handle->ChangePartToCompleted(partState, outcome.GetResult().GetETag()); AWS_LOGSTREAM_DEBUG(CLASS_TAG, "Transfer handle [" << handle->GetId() << " successfully uploaded Part: [" << partState->GetPartId() << "] to Bucket: [" @@ -913,7 +930,28 @@ namespace Aws request.SetVersionId(handle->GetVersionId()); } - request.SetResponseStreamFactory(handle->GetCreateDownloadStreamFunction()); + // Wrap user's stream with checksum validator if enabled + std::shared_ptr singlePartHash; + std::shared_ptr checksumWrapper; + + if (m_transferConfig.validateChecksums) + { + singlePartHash = CreateHashForAlgorithm(m_transferConfig.checksumAlgorithm); + auto userStreamFactory = handle->GetCreateDownloadStreamFunction(); + + request.SetResponseStreamFactory([userStreamFactory, singlePartHash, &checksumWrapper]() -> Aws::IOStream* { + auto userStream = userStreamFactory(); + if (userStream && singlePartHash) { + checksumWrapper = Aws::MakeShared(CLASS_TAG, userStream->rdbuf(), singlePartHash); + userStream->rdbuf(checksumWrapper.get()); + } + return userStream; + }); + } + else + { + request.SetResponseStreamFactory(handle->GetCreateDownloadStreamFunction()); + } request.SetDataReceivedEventHandler([this, handle, partState](const Aws::Http::HttpRequest*, Aws::Http::HttpResponse*, long long progress) { @@ -938,6 +976,38 @@ namespace Aws handle->SetContentType(getObjectOutcome.GetResult().GetContentType()); handle->ChangePartToCompleted(partState, getObjectOutcome.GetResult().GetETag()); getObjectOutcome.GetResult().GetBody().flush(); + + // Validate checksum for single-part download + if (m_transferConfig.validateChecksums && singlePartHash) + { + Aws::String expectedChecksum = GetChecksumFromResult(getObjectOutcome.GetResult(), m_transferConfig.checksumAlgorithm); + + if (!expectedChecksum.empty()) + { + auto calculatedResult = singlePartHash->GetHash(); + if (calculatedResult.IsSuccess()) + { + Aws::String calculatedChecksum = Utils::HashingUtils::Base64Encode(calculatedResult.GetResult()); + if (calculatedChecksum != expectedChecksum) + { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Checksum mismatch for single-part download. Expected: " + << expectedChecksum << ", Calculated: " << calculatedChecksum); + handle->ChangePartToFailed(partState); + handle->UpdateStatus(TransferStatus::FAILED); + Aws::Client::AWSError error(Aws::S3::S3Errors::INTERNAL_FAILURE, + "ChecksumMismatch", + "Single-part download checksum validation failed", + false); + handle->SetError(error); + TriggerErrorCallback(handle, error); + TriggerTransferStatusUpdatedCallback(handle); + return; + } + } + } + } + handle->UpdateStatus(TransferStatus::COMPLETED); } else @@ -1074,6 +1144,12 @@ namespace Aws { partState->SetDownloadBuffer(buffer); + // Initialize checksum Hash for this part if validation is enabled + if (m_transferConfig.validateChecksums) + { + handle->SetPartChecksum(partState->GetPartId(), CreateHashForAlgorithm(m_transferConfig.checksumAlgorithm)); + } + auto getObjectRangeRequest = m_transferConfig.getObjectTemplate; getObjectRangeRequest.SetCustomizedAccessLogTag(m_transferConfig.customizedAccessLogTag); getObjectRangeRequest.SetContinueRequestHandler([handle](const Aws::Http::HttpRequest*) { return handle->ShouldContinue(); }); @@ -1202,6 +1278,48 @@ namespace Aws Aws::IOStream* bufferStream = partState->GetDownloadPartStream(); assert(bufferStream); + // Calculate and validate checksum for this part if validation is enabled + if (m_transferConfig.validateChecksums) + { + auto hash = handle->GetPartChecksum(partState->GetPartId()); + if (hash && partState->GetDownloadBuffer()) + { + hash->Update(partState->GetDownloadBuffer(), static_cast(partState->GetSizeInBytes())); + + // Get expected checksum from response + Aws::String expectedChecksum = GetChecksumFromResult(outcome.GetResult(), m_transferConfig.checksumAlgorithm); + + // Validate part checksum + if (!expectedChecksum.empty()) + { + auto calculatedResult = hash->GetHash(); + if (calculatedResult.IsSuccess()) + { + Aws::String calculatedChecksum = Utils::HashingUtils::Base64Encode(calculatedResult.GetResult()); + if (calculatedChecksum != expectedChecksum) + { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Checksum mismatch for part " << partState->GetPartId() + << ". Expected: " << expectedChecksum << ", Calculated: " << calculatedChecksum); + Aws::Client::AWSError error(Aws::S3::S3Errors::INTERNAL_FAILURE, + "ChecksumMismatch", + "Part checksum validation failed", + false); + handle->ChangePartToFailed(partState); + handle->SetError(error); + TriggerErrorCallback(handle, error); + if(partState->GetDownloadBuffer()) + { + m_bufferManager.Release(partState->GetDownloadBuffer()); + partState->SetDownloadBuffer(nullptr); + } + return; + } + } + } + } + } + Aws::String errMsg{handle->WritePartToDownloadStream(bufferStream, partState->GetRangeBegin())}; if (errMsg.empty()) { handle->ChangePartToCompleted(partState, outcome.GetResult().GetETag()); @@ -1239,6 +1357,7 @@ namespace Aws { if (failedParts.size() == 0 && handle->GetBytesTransferred() == handle->GetBytesTotalSize()) { + // TODO: Combine part checksums and validate full-object checksum when CRT provides combining utility outcome.GetResult().GetBody().flush(); handle->UpdateStatus(TransferStatus::COMPLETED); } From 9a55a9c02eec5bf0cfae83632f3fd3de4a176110 Mon Sep 17 00:00:00 2001 From: pulimsr Date: Thu, 4 Dec 2025 23:39:41 -0500 Subject: [PATCH 2/6] refactoring ChecksumValidatingStreamBuf to separate files --- .../transfer/ChecksumValidatingStreamBuf.h | 38 +++++++++++++++++++ .../include/aws/transfer/TransferHandle.h | 37 +----------------- .../transfer/ChecksumValidatingStreamBuf.cpp | 35 +++++++++++++++++ 3 files changed, 74 insertions(+), 36 deletions(-) create mode 100644 src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h create mode 100644 src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h new file mode 100644 index 00000000000..816d7f04783 --- /dev/null +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h @@ -0,0 +1,38 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#pragma once + +#include +#include +#include +#include + +namespace Aws +{ + namespace Transfer + { + /** + * Stream buffer wrapper that calculates checksum while forwarding data to underlying stream. + * Used for single-part download checksum validation. + */ + class AWS_TRANSFER_API ChecksumValidatingStreamBuf : public std::streambuf + { + public: + ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, + std::shared_ptr hash); + + std::shared_ptr GetHash() const { return m_hash; } + + protected: + std::streamsize xsputn(const char* s, std::streamsize n) override; + int overflow(int c) override; + + private: + std::streambuf* m_underlyingBuf; + std::shared_ptr m_hash; + }; + } +} diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h index d7b124d2c10..ec845f6afce 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h @@ -6,6 +6,7 @@ #pragma once #include +#include #include #include #include @@ -102,42 +103,6 @@ namespace Aws using PartPointer = std::shared_ptr< PartState >; using PartStateMap = Aws::Map< int, PartPointer >; - /** - * Stream buffer wrapper that calculates checksum while forwarding data to underlying stream. - * Used for single-part download checksum validation. - */ - class AWS_TRANSFER_API ChecksumValidatingStreamBuf : public std::streambuf - { - public: - ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, - std::shared_ptr hash) - : m_underlyingBuf(underlyingBuf), m_hash(hash) {} - - std::shared_ptr GetHash() const { return m_hash; } - - protected: - std::streamsize xsputn(const char* s, std::streamsize n) override - { - if (m_hash && n > 0) { - m_hash->Update(const_cast(reinterpret_cast(s)), static_cast(n)); - } - return m_underlyingBuf->sputn(s, n); - } - - int overflow(int c) override - { - if (m_hash && c != EOF) { - unsigned char byte = static_cast(c); - m_hash->Update(&byte, 1); - } - return m_underlyingBuf->sputc(c); - } - - private: - std::streambuf* m_underlyingBuf; - std::shared_ptr m_hash; - }; - enum class TransferStatus { //this value is only used for directory synchronization diff --git a/src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp b/src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp new file mode 100644 index 00000000000..9776838bdc7 --- /dev/null +++ b/src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp @@ -0,0 +1,35 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#include + +namespace Aws +{ + namespace Transfer + { + ChecksumValidatingStreamBuf::ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, + std::shared_ptr hash) + : m_underlyingBuf(underlyingBuf), m_hash(hash) + { + } + + std::streamsize ChecksumValidatingStreamBuf::xsputn(const char* s, std::streamsize n) + { + if (m_hash && n > 0) { + m_hash->Update(const_cast(reinterpret_cast(s)), static_cast(n)); + } + return m_underlyingBuf->sputn(s, n); + } + + int ChecksumValidatingStreamBuf::overflow(int c) + { + if (c != EOF && m_hash) { + unsigned char byte = static_cast(c); + m_hash->Update(&byte, 1); + } + return m_underlyingBuf->sputc(c); + } + } +} From 41af26d0b550687180fb0140cae6fcee35bc3942 Mon Sep 17 00:00:00 2001 From: pulimsr Date: Fri, 5 Dec 2025 05:06:21 -0500 Subject: [PATCH 3/6] moving ChecksumValidatingStreamBuf to inline implementation --- .../transfer/ChecksumValidatingStreamBuf.h | 25 ++++++++++--- .../transfer/ChecksumValidatingStreamBuf.cpp | 35 ------------------- 2 files changed, 21 insertions(+), 39 deletions(-) delete mode 100644 src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h index 816d7f04783..04937be704d 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h @@ -18,17 +18,34 @@ namespace Aws * Stream buffer wrapper that calculates checksum while forwarding data to underlying stream. * Used for single-part download checksum validation. */ - class AWS_TRANSFER_API ChecksumValidatingStreamBuf : public std::streambuf + class ChecksumValidatingStreamBuf : public std::streambuf { public: ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, - std::shared_ptr hash); + std::shared_ptr hash) + : m_underlyingBuf(underlyingBuf), m_hash(hash) + { + } std::shared_ptr GetHash() const { return m_hash; } protected: - std::streamsize xsputn(const char* s, std::streamsize n) override; - int overflow(int c) override; + std::streamsize xsputn(const char* s, std::streamsize n) override + { + if (m_hash && n > 0) { + m_hash->Update(const_cast(reinterpret_cast(s)), static_cast(n)); + } + return m_underlyingBuf->sputn(s, n); + } + + int overflow(int c) override + { + if (c != EOF && m_hash) { + unsigned char byte = static_cast(c); + m_hash->Update(&byte, 1); + } + return m_underlyingBuf->sputc(c); + } private: std::streambuf* m_underlyingBuf; diff --git a/src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp b/src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp deleted file mode 100644 index 9776838bdc7..00000000000 --- a/src/aws-cpp-sdk-transfer/source/transfer/ChecksumValidatingStreamBuf.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0. - */ - -#include - -namespace Aws -{ - namespace Transfer - { - ChecksumValidatingStreamBuf::ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, - std::shared_ptr hash) - : m_underlyingBuf(underlyingBuf), m_hash(hash) - { - } - - std::streamsize ChecksumValidatingStreamBuf::xsputn(const char* s, std::streamsize n) - { - if (m_hash && n > 0) { - m_hash->Update(const_cast(reinterpret_cast(s)), static_cast(n)); - } - return m_underlyingBuf->sputn(s, n); - } - - int ChecksumValidatingStreamBuf::overflow(int c) - { - if (c != EOF && m_hash) { - unsigned char byte = static_cast(c); - m_hash->Update(&byte, 1); - } - return m_underlyingBuf->sputc(c); - } - } -} From aa688ff797d97847b3a2ac9f6ba68081baa2a7bd Mon Sep 17 00:00:00 2001 From: pulimsr Date: Fri, 5 Dec 2025 05:42:01 -0500 Subject: [PATCH 4/6] replacing stream wrapper with file-based checksum validation for single-part downloads --- .../transfer/ChecksumValidatingStreamBuf.h | 55 ------------ .../include/aws/transfer/TransferHandle.h | 1 - .../source/transfer/TransferManager.cpp | 86 ++++++++++--------- 3 files changed, 44 insertions(+), 98 deletions(-) delete mode 100644 src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h deleted file mode 100644 index 04937be704d..00000000000 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/ChecksumValidatingStreamBuf.h +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0. - */ - -#pragma once - -#include -#include -#include -#include - -namespace Aws -{ - namespace Transfer - { - /** - * Stream buffer wrapper that calculates checksum while forwarding data to underlying stream. - * Used for single-part download checksum validation. - */ - class ChecksumValidatingStreamBuf : public std::streambuf - { - public: - ChecksumValidatingStreamBuf(std::streambuf* underlyingBuf, - std::shared_ptr hash) - : m_underlyingBuf(underlyingBuf), m_hash(hash) - { - } - - std::shared_ptr GetHash() const { return m_hash; } - - protected: - std::streamsize xsputn(const char* s, std::streamsize n) override - { - if (m_hash && n > 0) { - m_hash->Update(const_cast(reinterpret_cast(s)), static_cast(n)); - } - return m_underlyingBuf->sputn(s, n); - } - - int overflow(int c) override - { - if (c != EOF && m_hash) { - unsigned char byte = static_cast(c); - m_hash->Update(&byte, 1); - } - return m_underlyingBuf->sputc(c); - } - - private: - std::streambuf* m_underlyingBuf; - std::shared_ptr m_hash; - }; - } -} diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h index ec845f6afce..ff4c2e4dbf6 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h @@ -6,7 +6,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp index 4e6eba84745..f6f1ed47bc6 100644 --- a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp +++ b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp @@ -930,28 +930,7 @@ namespace Aws request.SetVersionId(handle->GetVersionId()); } - // Wrap user's stream with checksum validator if enabled - std::shared_ptr singlePartHash; - std::shared_ptr checksumWrapper; - - if (m_transferConfig.validateChecksums) - { - singlePartHash = CreateHashForAlgorithm(m_transferConfig.checksumAlgorithm); - auto userStreamFactory = handle->GetCreateDownloadStreamFunction(); - - request.SetResponseStreamFactory([userStreamFactory, singlePartHash, &checksumWrapper]() -> Aws::IOStream* { - auto userStream = userStreamFactory(); - if (userStream && singlePartHash) { - checksumWrapper = Aws::MakeShared(CLASS_TAG, userStream->rdbuf(), singlePartHash); - userStream->rdbuf(checksumWrapper.get()); - } - return userStream; - }); - } - else - { - request.SetResponseStreamFactory(handle->GetCreateDownloadStreamFunction()); - } + request.SetResponseStreamFactory(handle->GetCreateDownloadStreamFunction()); request.SetDataReceivedEventHandler([this, handle, partState](const Aws::Http::HttpRequest*, Aws::Http::HttpResponse*, long long progress) { @@ -977,32 +956,55 @@ namespace Aws handle->ChangePartToCompleted(partState, getObjectOutcome.GetResult().GetETag()); getObjectOutcome.GetResult().GetBody().flush(); - // Validate checksum for single-part download - if (m_transferConfig.validateChecksums && singlePartHash) + // Validate checksum for single-part download by reading file + if (m_transferConfig.validateChecksums) { Aws::String expectedChecksum = GetChecksumFromResult(getObjectOutcome.GetResult(), m_transferConfig.checksumAlgorithm); - if (!expectedChecksum.empty()) + if (!expectedChecksum.empty() && !handle->GetTargetFilePath().empty()) { - auto calculatedResult = singlePartHash->GetHash(); - if (calculatedResult.IsSuccess()) + auto hash = CreateHashForAlgorithm(m_transferConfig.checksumAlgorithm); + Aws::IFStream fileStream(handle->GetTargetFilePath().c_str(), std::ios::binary); + + if (fileStream.good()) { - Aws::String calculatedChecksum = Utils::HashingUtils::Base64Encode(calculatedResult.GetResult()); - if (calculatedChecksum != expectedChecksum) + const size_t bufferSize = 8192; + char buffer[bufferSize]; + while (fileStream.good()) { - AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() - << "] Checksum mismatch for single-part download. Expected: " - << expectedChecksum << ", Calculated: " << calculatedChecksum); - handle->ChangePartToFailed(partState); - handle->UpdateStatus(TransferStatus::FAILED); - Aws::Client::AWSError error(Aws::S3::S3Errors::INTERNAL_FAILURE, - "ChecksumMismatch", - "Single-part download checksum validation failed", - false); - handle->SetError(error); - TriggerErrorCallback(handle, error); - TriggerTransferStatusUpdatedCallback(handle); - return; + fileStream.read(buffer, bufferSize); + std::streamsize bytesRead = fileStream.gcount(); + if (bytesRead > 0) + { + hash->Update(reinterpret_cast(buffer), static_cast(bytesRead)); + } + } + fileStream.close(); + + auto calculatedResult = hash->GetHash(); + if (calculatedResult.IsSuccess()) + { + Aws::String calculatedChecksum = Utils::HashingUtils::Base64Encode(calculatedResult.GetResult()); + if (calculatedChecksum != expectedChecksum) + { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Checksum mismatch for single-part download. Expected: " + << expectedChecksum << ", Calculated: " << calculatedChecksum); + + // Delete the corrupted file + Aws::FileSystem::RemoveFileIfExists(handle->GetTargetFilePath().c_str()); + + handle->ChangePartToFailed(partState); + handle->UpdateStatus(TransferStatus::FAILED); + Aws::Client::AWSError error(Aws::S3::S3Errors::INTERNAL_FAILURE, + "ChecksumMismatch", + "Single-part download checksum validation failed", + false); + handle->SetError(error); + TriggerErrorCallback(handle, error); + TriggerTransferStatusUpdatedCallback(handle); + return; + } } } } From 3be9e74f879cc9816c6d44717260a28dfc9da481 Mon Sep 17 00:00:00 2001 From: sbiscigl Date: Wed, 17 Dec 2025 13:33:39 -0500 Subject: [PATCH 5/6] Update CRT to v0.36.1 --- crt/aws-crt-cpp | 2 +- prefetch_crt_dependency.sh | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crt/aws-crt-cpp b/crt/aws-crt-cpp index 76150cfa693..8923e174f20 160000 --- a/crt/aws-crt-cpp +++ b/crt/aws-crt-cpp @@ -1 +1 @@ -Subproject commit 76150cfa693aaab82aba88dd1b005cbbf2a713b1 +Subproject commit 8923e174f20c39c647f25abb3e16d1418558ec55 diff --git a/prefetch_crt_dependency.sh b/prefetch_crt_dependency.sh index 2f37a176a90..66576f11eb9 100755 --- a/prefetch_crt_dependency.sh +++ b/prefetch_crt_dependency.sh @@ -3,21 +3,21 @@ # SPDX-License-Identifier: Apache-2.0. CRT_URI_PREFIX=https://codeload.github.com/awslabs -CRT_URI=${CRT_URI_PREFIX}/aws-crt-cpp/zip/76150cfa693aaab82aba88dd1b005cbbf2a713b1 # v0.35.4 +CRT_URI=${CRT_URI_PREFIX}/aws-crt-cpp/zip/8923e174f20c39c647f25abb3e16d1418558ec55 # v0.36.1 -AWS_C_AUTH_URI=${CRT_URI_PREFIX}/aws-c-auth/zip/7d6cfb92530e12109560988abec72c8ac9817281 # v0.9.3 +AWS_C_AUTH_URI=${CRT_URI_PREFIX}/aws-c-auth/zip/37ebf2d27cf44386c7f194221c3e03992fcb4c8d # v0.9.4 AWS_C_CAL_URI=${CRT_URI_PREFIX}/aws-c-cal/zip/1cb9412158890201a6ffceed779f90fe1f48180c # v0.9.13 AWS_C_COMMON_URI=${CRT_URI_PREFIX}/aws-c-common/zip/95515a8b1ff40d5bb14f965ca4cbbe99ad1843df # v0.12.6 AWS_C_COMPRESSION_URI=${CRT_URI_PREFIX}/aws-c-compression/zip/f951ab2b819fc6993b6e5e6cfef64b1a1554bfc8 # v0.3.1 -AWS_C_EVENT_STREAM_URI=${CRT_URI_PREFIX}/aws-c-event-stream/zip/31a44ff9108840a8f3fec54006218f4bc6c505e1 # v0.5.7 +AWS_C_EVENT_STREAM_URI=${CRT_URI_PREFIX}/aws-c-event-stream/zip/f43a3d24a7c1f8b50f709ccb4fdf4c7fd2827fff # v0.5.9 AWS_C_HTTP_URI=${CRT_URI_PREFIX}/aws-c-http/zip/07302aa4a2892adbbf95ee6d458db3bb240030d3 # v0.10.7 -AWS_C_IO_URI=${CRT_URI_PREFIX}/aws-c-io/zip/9cf142c08c28d5b1195aae09d2c05a6d17502e09 # v0.23.3 +AWS_C_IO_URI=${CRT_URI_PREFIX}/aws-c-io/zip/996d42e484a3749c4dd05e4e9fed1d38de95eb81 # v0.24.0 AWS_C_MQTT_URI=${CRT_URI_PREFIX}/aws-c-mqtt/zip/1d512d92709f60b74e2cafa018e69a2e647f28e9 # v0.13.3 -AWS_C_S3_URI=${CRT_URI_PREFIX}/aws-c-s3/zip/332dd22c47a7ed139eee71e7f219b764ef8cdf4c # v0.9.2 +AWS_C_S3_URI=${CRT_URI_PREFIX}/aws-c-s3/zip/3f81fc9e90b11e6b3e434b166e275f65d5c98d39 # v0.11.3 AWS_C_SDKUTILS_URI=${CRT_URI_PREFIX}/aws-c-sdkutils/zip/f678bda9e21f7217e4bbf35e0d1ea59540687933 # v0.2.4 -AWS_CHECKSUMS_URI=${CRT_URI_PREFIX}/aws-checksums/zip/9978ba2c33a7a259c1a6bd0f62abe26827d03b85 # v0.2.6 -AWS_LC_URI=${CRT_URI_PREFIX}/aws-lc/zip/7187ab572ddcdae4fa408e932d3e878c9941137b # v1.64.0 -S2N_URI=${CRT_URI_PREFIX}/s2n/zip/6aefe741f17489211f6c28e837c1a65ee66a1ef2 # v1.6.0 +AWS_CHECKSUMS_URI=${CRT_URI_PREFIX}/aws-checksums/zip/270b15acc1b2125340ec1c6dda6cc3c28ef0fa44 # v0.2.8 +AWS_LC_URI=${CRT_URI_PREFIX}/aws-lc/zip/b5e2f866efc0c7f90fcb6781281ea31063efbd96 # v1.65.1 +S2N_URI=${CRT_URI_PREFIX}/s2n/zip/f6ca8f0941851af4a05739c4a4b426970e953317 # v1.6.2 echo "Removing CRT" From f78bd1342baf47d1c7ff7cf91e380dbad8519230 Mon Sep 17 00:00:00 2001 From: pulimsr Date: Fri, 19 Dec 2025 10:59:21 -0500 Subject: [PATCH 6/6] using crt combine checksum function for validating checksums on download --- .../include/aws/transfer/TransferHandle.h | 1 + .../source/transfer/TransferManager.cpp | 97 ++++++++++++------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h index ff4c2e4dbf6..c4339d00328 100644 --- a/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h +++ b/src/aws-cpp-sdk-transfer/include/aws/transfer/TransferHandle.h @@ -398,6 +398,7 @@ namespace Aws auto it = m_partChecksums.find(partId); return it != m_partChecksums.end() ? it->second : nullptr; } + const Aws::Map>& GetPartChecksums() const { return m_partChecksums; } private: void CleanupDownloadStream(); diff --git a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp index f6f1ed47bc6..8661714911a 100644 --- a/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp +++ b/src/aws-cpp-sdk-transfer/source/transfer/TransferManager.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1280,45 +1281,13 @@ namespace Aws Aws::IOStream* bufferStream = partState->GetDownloadPartStream(); assert(bufferStream); - // Calculate and validate checksum for this part if validation is enabled + // checksum for this part if validation is enabled if (m_transferConfig.validateChecksums) { auto hash = handle->GetPartChecksum(partState->GetPartId()); if (hash && partState->GetDownloadBuffer()) { hash->Update(partState->GetDownloadBuffer(), static_cast(partState->GetSizeInBytes())); - - // Get expected checksum from response - Aws::String expectedChecksum = GetChecksumFromResult(outcome.GetResult(), m_transferConfig.checksumAlgorithm); - - // Validate part checksum - if (!expectedChecksum.empty()) - { - auto calculatedResult = hash->GetHash(); - if (calculatedResult.IsSuccess()) - { - Aws::String calculatedChecksum = Utils::HashingUtils::Base64Encode(calculatedResult.GetResult()); - if (calculatedChecksum != expectedChecksum) - { - AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() - << "] Checksum mismatch for part " << partState->GetPartId() - << ". Expected: " << expectedChecksum << ", Calculated: " << calculatedChecksum); - Aws::Client::AWSError error(Aws::S3::S3Errors::INTERNAL_FAILURE, - "ChecksumMismatch", - "Part checksum validation failed", - false); - handle->ChangePartToFailed(partState); - handle->SetError(error); - TriggerErrorCallback(handle, error); - if(partState->GetDownloadBuffer()) - { - m_bufferManager.Release(partState->GetDownloadBuffer()); - partState->SetDownloadBuffer(nullptr); - } - return; - } - } - } } } @@ -1359,7 +1328,67 @@ namespace Aws { if (failedParts.size() == 0 && handle->GetBytesTransferred() == handle->GetBytesTotalSize()) { - // TODO: Combine part checksums and validate full-object checksum when CRT provides combining utility + // Combine part checksums and validate full-object checksum + if (m_transferConfig.validateChecksums) + { + Aws::String expectedChecksum = GetChecksumFromResult(outcome.GetResult(), m_transferConfig.checksumAlgorithm); + if (!expectedChecksum.empty()) + { + auto combinedChecksum = 0ULL; + bool isCRC64 = (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC64NVME); + + for (auto& partChecksum : handle->GetPartChecksums()) + { + int partNumber = partChecksum.first; + auto hash = partChecksum.second; + + // Get part size from completed parts + auto partSize = handle->GetCompletedParts()[partNumber]->GetSizeInBytes(); + + auto partResult = hash->GetHash(); + auto partData = partResult.GetResult(); + + auto partCrc = isCRC64 ? + *reinterpret_cast(partData.GetUnderlyingData()) : + *reinterpret_cast(partData.GetUnderlyingData()); + + if (combinedChecksum == 0) { + combinedChecksum = partCrc; + } else { + if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32) { + combinedChecksum = Aws::Crt::Checksum::CombineCRC32(combinedChecksum, partCrc, partSize); + } else if (m_transferConfig.checksumAlgorithm == S3::Model::ChecksumAlgorithm::CRC32C) { + combinedChecksum = Aws::Crt::Checksum::CombineCRC32C(combinedChecksum, partCrc, partSize); + } else if (isCRC64) { + combinedChecksum = Aws::Crt::Checksum::CombineCRC64NVME(combinedChecksum, partCrc, partSize); + } + } + } + + // Compare with expected checksum + Aws::Utils::ByteBuffer checksumBuffer(isCRC64 ? 8 : 4); + if (isCRC64) { + *reinterpret_cast(checksumBuffer.GetUnderlyingData()) = combinedChecksum; + } else { + *reinterpret_cast(checksumBuffer.GetUnderlyingData()) = static_cast(combinedChecksum); + } + Aws::String calculatedChecksum = Utils::HashingUtils::Base64Encode(checksumBuffer); + + if (calculatedChecksum != expectedChecksum) { + AWS_LOGSTREAM_ERROR(CLASS_TAG, "Transfer handle [" << handle->GetId() + << "] Full-object checksum mismatch. Expected: " << expectedChecksum + << ", Calculated: " << calculatedChecksum); + Aws::Client::AWSError error(Aws::S3::S3Errors::INTERNAL_FAILURE, + "ChecksumMismatch", + "Full-object checksum validation failed", + false); + handle->SetError(error); + handle->UpdateStatus(TransferStatus::FAILED); + TriggerErrorCallback(handle, error); + return; + } + } + } outcome.GetResult().GetBody().flush(); handle->UpdateStatus(TransferStatus::COMPLETED); }