From 2109ad50aa4a659b8f31d999476969f72dab4834 Mon Sep 17 00:00:00 2001 From: Lauris Kaplinski Date: Fri, 28 Nov 2025 16:13:07 +0200 Subject: [PATCH 1/3] Fixed zero-sized file handling Enforce encryption worksflow for CDoc2 Added test for large files and long unicode filenames --- cdoc/CDoc2Writer.cpp | 19 ++-- cdoc/Tar.cpp | 24 ++--- test/libcdoc_boost.cpp | 200 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 219 insertions(+), 24 deletions(-) diff --git a/cdoc/CDoc2Writer.cpp b/cdoc/CDoc2Writer.cpp index 132f7ed..26f2180 100644 --- a/cdoc/CDoc2Writer.cpp +++ b/cdoc/CDoc2Writer.cpp @@ -470,10 +470,18 @@ libcdoc::result_t CDoc2Writer::beginEncryption() { last_error.clear(); - if(!recipients.empty()) { - LOG_ERROR("Encryption workflow already started"); - setLastError("Encryption workflow already started"); + if(recipients.empty()) { + setLastError("No recipients added"); + LOG_ERROR("{}", last_error); + return libcdoc::WORKFLOW_ERROR; + } + if(tar) { + setLastError("Encryption already started"); + LOG_ERROR("{}", last_error); + return libcdoc::WORKFLOW_ERROR; } + if(auto rv = writeHeader(recipients); rv < 0) + return rv; return libcdoc::OK; } @@ -481,8 +489,9 @@ libcdoc::result_t CDoc2Writer::addFile(const std::string& name, size_t size) { if(!tar) { - if(auto rv = writeHeader(recipients); rv < 0) - return rv; + setLastError("Encryption not started"); + LOG_ERROR("{}", last_error); + return libcdoc::WORKFLOW_ERROR; } if(auto rv = tar->open(name, size); rv < 0) { setLastError(tar->getLastErrorStr(rv)); diff --git a/cdoc/Tar.cpp b/cdoc/Tar.cpp index 32fa4af..ff7ab7a 100644 --- a/cdoc/Tar.cpp +++ b/cdoc/Tar.cpp @@ -24,6 +24,8 @@ using namespace libcdoc; +constexpr unsigned int BLOCKSIZE = 512; + template static int64_t fromOctal(const std::array &data) { @@ -72,7 +74,7 @@ struct libcdoc::Header { { int64_t unsignedSum = 0; int64_t signedSum = 0; - for (size_t i = 0, size = sizeof(Header); i < size; i++) { + for (size_t i = 0, size = BLOCKSIZE; i < size; i++) { unsignedSum += ((unsigned char*) this)[i]; signedSum += ((signed char*) this)[i]; } @@ -97,9 +99,11 @@ struct libcdoc::Header { bool operator==(const Header&) const = default; }; +static_assert (sizeof(Header) == BLOCKSIZE, "Header struct size is incorrect"); + static int padding(int64_t size) { - return sizeof(Header) - size % sizeof(Header); + return BLOCKSIZE * ((size + BLOCKSIZE - 1) / BLOCKSIZE) - size; } std::string toPaxRecord (const std::string &keyword, const std::string &value) { @@ -131,7 +135,7 @@ libcdoc::TarConsumer::write(const uint8_t *src, size_t size) libcdoc::result_t libcdoc::TarConsumer::writeHeader(const Header &h) { - if(auto rv = _dst->write((const uint8_t *)&h, sizeof(Header)); rv != sizeof(Header)) + if(auto rv = _dst->write((const uint8_t *)&h, BLOCKSIZE); rv != BLOCKSIZE) return rv < OK ? rv : OUTPUT_ERROR; return OK; } @@ -146,7 +150,7 @@ libcdoc::TarConsumer::writeHeader(Header &h, int64_t size) { libcdoc::result_t libcdoc::TarConsumer::writePadding(int64_t size) { - std::array pad {}; + static std::array pad {}; auto padSize = padding(size); if(auto rv = _dst->write(pad.data(), padSize); rv != padSize) return rv < OK ? rv : OUTPUT_ERROR; @@ -269,14 +273,14 @@ libcdoc::TarSource::next(std::string& name, int64_t& size) } } while (!_src->isEof()) { - int64_t result = _src->read((uint8_t *)&h, sizeof(Header)); - if (result != sizeof(Header)) { + int64_t result = _src->read((uint8_t *)&h, BLOCKSIZE); + if (result != BLOCKSIZE) { _error = INPUT_STREAM_ERROR; return _error; } if (h.isNull()) { - result = _src->read((uint8_t *)&h, sizeof(Header)); - if (result != sizeof(Header)) { + result = _src->read((uint8_t *)&h, BLOCKSIZE); + if (result != BLOCKSIZE) { _error = INPUT_STREAM_ERROR; return _error; } @@ -299,8 +303,8 @@ libcdoc::TarSource::next(std::string& name, int64_t& size) } std::string paxData(pax_in.data(), pax_in.size()); _src->skip(padding(h_size)); - result = _src->read((uint8_t *)&h, sizeof(Header)); - if (result != sizeof(Header)) { + result = _src->read((uint8_t *)&h, BLOCKSIZE); + if (result != BLOCKSIZE) { _error = INPUT_STREAM_ERROR; return _error; } diff --git a/test/libcdoc_boost.cpp b/test/libcdoc_boost.cpp index 78ced8d..485cfbb 100644 --- a/test/libcdoc_boost.cpp +++ b/test/libcdoc_boost.cpp @@ -19,10 +19,12 @@ #define BOOST_TEST_MODULE "C++ Unit Tests for libcdoc" #include +#include #include #include #include #include +#include #include #include #include @@ -75,14 +77,24 @@ class FixtureBase public: FixtureBase() { - // Get path to test data, provided via argument to the unit tests application - if (utf::framework::master_test_suite().argc <= 1) - { - testDataPath = DATA_DIR; - } - else - { - testDataPath = utf::framework::master_test_suite().argv[1]; + int argc = utf::framework::master_test_suite().argc; + for (int i = 0; i < argc; i++) { + std::string_view arg = utf::framework::master_test_suite().argv[i]; + if (arg == "--data-path") { + if (i >= argc) { + std::cerr << "Missing data path value" << std::endl; + ::exit(1); + } + i += 1; + testDataPath = utf::framework::master_test_suite().argv[i]; + } else if (arg == "--max-filesize") { + if (i >= argc) { + std::cerr << "Missing max filesize value" << std::endl; + ::exit(1); + } + i += 1; + max_filesize = std::stoull(utf::framework::master_test_suite().argv[i]); + } } } @@ -122,9 +134,10 @@ class FixtureBase } } - fs::path testDataPath; + fs::path testDataPath = DATA_DIR; fs::path sourceFilePath; fs::path targetFilePath; + size_t max_filesize = 100000000; }; /** @@ -215,6 +228,175 @@ class DecryptFixture : public FixtureBase } }; +struct PipeSource : public libcdoc::DataSource { + PipeSource(std::vector& data, bool& eof) : _data(data), _eof(eof) {} + + libcdoc::result_t read(uint8_t *dst, size_t size) override { + size = std::min(size, _data.size()); + std::copy(_data.cbegin(), _data.cbegin() + size, dst); + if (_buf.size() < 1024) { + size_t newbufsize = _buf.size() + size; + if (newbufsize > 1024) newbufsize = 1024; + size_t tocopy = newbufsize - _buf.size(); + _buf.insert(_buf.end(), _data.begin(), _data.begin() + tocopy); + } + _data.erase(_data.cbegin(), _data.cbegin() + size); + return size; + } + + libcdoc::result_t seek(size_t pos) override { + if (pos <= _buf.size()) { + _data.insert(_data.begin(), _buf.begin() + pos, _buf.end()); + _buf.erase(_buf.begin() + pos, _buf.end()); + return libcdoc::OK; + } + return libcdoc::NOT_IMPLEMENTED; + } + bool isError() override { return false; } + bool isEof() override { return _eof; } +protected: + std::vector& _data; + bool& _eof; + std::vector _buf; +}; + +struct PipeConsumer : public libcdoc::DataConsumer { + PipeConsumer(std::vector& data, bool& eof) : _data(data), _eof(eof) { _eof = false; } + libcdoc::result_t write(const uint8_t *src, size_t size) override final { + _data.insert(_data.end(), src, src + size); + return size; + } + libcdoc::result_t close() override final { _eof = true; return libcdoc::OK; } + virtual bool isError() override final { return false; } +protected: + std::vector& _data; + bool& _eof; +}; + +struct PipeCrypto : public libcdoc::CryptoBackend { + PipeCrypto(std::string pwd) : _secret(pwd.cbegin(), pwd.cend()) {} + + libcdoc::result_t getSecret(std::vector& dst, unsigned int idx) { + dst = _secret; + return libcdoc::OK; + }; + + std::vector _secret; +}; + +struct PipeWriter { + static constexpr size_t BUFSIZE = 1024 * 1024; + + PipeWriter(libcdoc::CDocWriter *writer, const std::vector& files) : _writer(writer), _files(files), current(-1), cpos(0) {} + + uint8_t getChar(int filenum, size_t pos) { + uint64_t x = pos + ((uint64_t) filenum << 40); + x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL; + x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL; + x = x ^ (x >> 31); + return (uint8_t) (x & 0xff); + } + + libcdoc::result_t writeMore() { + if (current >= (int) _files.size()) return libcdoc::WORKFLOW_ERROR; + + if ((current < 0) || (cpos >= _files[current].size)) { + // Start new file + current += 1; + cpos = 0; + if (current >= (int) _files.size()) { + return _writer->finishEncryption(); + } + return _writer->addFile(_files[current].name, _files[current].size); + } + size_t towrite = _files[current].size - cpos; + if (towrite > BUFSIZE) towrite = BUFSIZE; + uint8_t buf[BUFSIZE]; + for (int i = 0; i < towrite; i++) buf[i] = getChar(current, cpos + i); + cpos += towrite; + return _writer->writeData(buf, towrite); + } + + bool isEof() { + return current >= (int) _files.size(); + } + + int current = 0; + size_t cpos = 0; + + libcdoc::CDocWriter *_writer; + const std::vector& _files; +}; + +BOOST_AUTO_TEST_SUITE(LargeFiles) + +BOOST_FIXTURE_TEST_CASE_WITH_DECOR(EncryptWithPasswordAndLabel, FixtureBase, * utf::description("Testing weird and large files")) +{ + std::vector data; + bool eof = false; + PipeConsumer pipec(data, eof); + PipeSource pipes(data, eof); + PipeCrypto pcrypto("password"); + + // Create writer + libcdoc::CDocWriter *writer = libcdoc::CDocWriter::createWriter(2, &pipec, false, nullptr, &pcrypto, nullptr); + BOOST_TEST(writer != nullptr); + libcdoc::Recipient rcpt = libcdoc::Recipient::makeSymmetric("test", 65536); + BOOST_TEST(writer->addRecipient(rcpt) == libcdoc::OK); + BOOST_TEST(writer->beginEncryption() == libcdoc::OK); + + std::srand(1); + std::vector files; + for (size_t i = max_filesize; i != 0; i = i / 1000) { + size_t len = std::rand() % 1000; + std::u16string u16(len, ' '); + for (int i = 0; i < len; i++) u16[i] = std::rand() % 10000 + 32; + std::string u8 = std::wstring_convert, char16_t>{}.to_bytes(u16); + files.emplace_back(u8, i); + files.emplace_back(u8, 0); + } + + PipeWriter wrt(writer, files); + + // Create reader + libcdoc::CDocReader *reader = libcdoc::CDocReader::createReader(&pipes, false, nullptr, &pcrypto, nullptr); + BOOST_TEST(reader != nullptr); + + // Fill buffer + while((data.size() < 2 * wrt.BUFSIZE) && !wrt.isEof()) { + BOOST_TEST(wrt.writeMore() == libcdoc::OK); + } + std::vector fmk; + BOOST_TEST(reader->getFMK(fmk, 0) == libcdoc::OK); + BOOST_TEST(reader->beginDecryption(fmk) == libcdoc::OK); + libcdoc::FileInfo fi; + for (int cfile = 0; cfile < files.size(); cfile++) { + // Fill buffer + while((data.size() < 2 * wrt.BUFSIZE) && !wrt.isEof()) { + BOOST_TEST(wrt.writeMore() == libcdoc::OK); + } + // Get file + BOOST_TEST(reader->nextFile(fi) == libcdoc::OK); + BOOST_TEST(fi.name == files[cfile].name); + BOOST_TEST(fi.size == files[cfile].size); + for (size_t pos = 0; pos < files[cfile].size; pos += wrt.BUFSIZE) { + // Fill buffer + while((data.size() < 2 * wrt.BUFSIZE) && !wrt.isEof()) { + BOOST_TEST(wrt.writeMore() == libcdoc::OK); + } + size_t toread = files[cfile].size - pos; + if (toread > wrt.BUFSIZE) toread = wrt.BUFSIZE; + uint8_t buf[wrt.BUFSIZE], cbuf[wrt.BUFSIZE]; + BOOST_TEST(reader->readData(buf, toread) == toread); + for (size_t i = 0; i < toread; i++) cbuf[i] = wrt.getChar(cfile, pos + i); + BOOST_TEST(std::memcmp(buf, cbuf, toread) == 0); + } + } + BOOST_TEST(reader->nextFile(fi) == libcdoc::END_OF_STREAM); + BOOST_TEST(reader->finishDecryption() == libcdoc::OK); +} + +BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(PasswordUsageWithLabel) From fe017dae87b8bab4386ef20a62dac2d721982257 Mon Sep 17 00:00:00 2001 From: Lauris Kaplinski Date: Tue, 2 Dec 2025 12:35:05 +0200 Subject: [PATCH 2/3] Check that filenames are valid utf8 strings Signed-off-by: Lauris Kaplinski --- cdoc/CDoc1Writer.cpp | 1 + cdoc/CDoc2Writer.cpp | 1 + cdoc/Utils.cpp | 22 ++++++++++++++++++++++ cdoc/Utils.h | 2 ++ 4 files changed, 26 insertions(+) diff --git a/cdoc/CDoc1Writer.cpp b/cdoc/CDoc1Writer.cpp index d42d1d1..63fb9ac 100644 --- a/cdoc/CDoc1Writer.cpp +++ b/cdoc/CDoc1Writer.cpp @@ -292,6 +292,7 @@ CDoc1Writer::addFile(const std::string& name, size_t size) { if(!d) return WORKFLOW_ERROR; + if (name.empty() || !libcdoc::isValidUtf8(name)) return libcdoc::DATA_FORMAT_ERROR; d->files.push_back({name, size, {}}); return libcdoc::OK; } diff --git a/cdoc/CDoc2Writer.cpp b/cdoc/CDoc2Writer.cpp index 26f2180..e4363b6 100644 --- a/cdoc/CDoc2Writer.cpp +++ b/cdoc/CDoc2Writer.cpp @@ -493,6 +493,7 @@ CDoc2Writer::addFile(const std::string& name, size_t size) LOG_ERROR("{}", last_error); return libcdoc::WORKFLOW_ERROR; } + if (name.empty() || !libcdoc::isValidUtf8(name)) return libcdoc::DATA_FORMAT_ERROR; if(auto rv = tar->open(name, size); rv < 0) { setLastError(tar->getLastErrorStr(rv)); LOG_ERROR("{}", last_error); diff --git a/cdoc/Utils.cpp b/cdoc/Utils.cpp index 081c6ed..7c1862e 100644 --- a/cdoc/Utils.cpp +++ b/cdoc/Utils.cpp @@ -82,6 +82,28 @@ timeToISO(double time) #endif } +bool +isValidUtf8 (std::string str) +{ + const uint8_t *s = (const uint8_t *) str.data(); + const uint8_t *e = s + str.size(); + while (s < e) { + size_t s_len = e - s; + if ((s[0] & 0x80) == 0x0) { + s += 1; + } else if (((s[0] & 0xe0) == 0xc0) && (s_len >= 2) && ((s[1] & 0xc0) == 0x80)) { + s += 2; + } else if (((*s & 0xf0) == 0xe0) && (s_len >= 3) && ((s[1] & 0xc0) == 0x80) && ((s[2] & 0xc0) == 0x80)) { + s += 3; + } else if (((*s & 0xf8) == 0xf0) && (s_len >= 4) && ((s[1] & 0xc0) == 0x80) && ((s[2] & 0xc0) == 0x80) && ((s[3] & 0xc0) == 0x80)) { + s += 4; + } else { + return false; + } + } + return true; +} + int parseURL(const std::string& url, std::string& host, int& port, std::string& path, bool end_with_slash) { diff --git a/cdoc/Utils.h b/cdoc/Utils.h index 872ce00..7256235 100644 --- a/cdoc/Utils.h +++ b/cdoc/Utils.h @@ -86,6 +86,8 @@ double getTime(); double timeFromISO(std::string_view iso); std::string timeToISO(double time); +bool isValidUtf8 (std::string str); + static std::vector readAllBytes(std::istream& ifs) { From c6b2e39fdd7c5592f888337cfa774c90dde8636d Mon Sep 17 00:00:00 2001 From: lauris71 Date: Fri, 28 Nov 2025 22:51:22 +0200 Subject: [PATCH 3/3] Update cdoc/Tar.cpp Co-authored-by: Raul Metsma --- cdoc/Tar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdoc/Tar.cpp b/cdoc/Tar.cpp index ff7ab7a..34331d1 100644 --- a/cdoc/Tar.cpp +++ b/cdoc/Tar.cpp @@ -150,7 +150,7 @@ libcdoc::TarConsumer::writeHeader(Header &h, int64_t size) { libcdoc::result_t libcdoc::TarConsumer::writePadding(int64_t size) { - static std::array pad {}; + static const std::array pad {}; auto padSize = padding(size); if(auto rv = _dst->write(pad.data(), padSize); rv != padSize) return rv < OK ? rv : OUTPUT_ERROR;