diff --git a/.copyrightconfig b/.copyrightconfig index c87b8a91b..018996db6 100644 --- a/.copyrightconfig +++ b/.copyrightconfig @@ -11,4 +11,4 @@ startyear: 2010 # - Dotfiles already skipped automatically # Enable by removing the leading '# ' from the next line and editing values. # filesexcluded: third_party/*, docs/generated/*.md, assets/*.png, scripts/temp_*.py, vendor/lib.js -filesexcluded: .github/*, README.md, Jenkinsfile, gradle/*, docker-compose.yaml, docker-compose.yml, *.gradle, gradle.properties, gradlew, gradlew.bat, **/test/resources/**, *.md, pom.xml, *.properties, *.json, *.xml, CODEOWNERS +filesexcluded: .github/*, README.md, Jenkinsfile, gradle/*, docker-compose.yaml, docker-compose.yml, *.gradle, gradle.properties, gradlew, gradlew.bat, **/test/resources/**, *.md, pom.xml, *.properties, *.json, *.xml, CODEOWNERS, *.txt diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/impl/OkHttpServices.java b/marklogic-client-api/src/main/java/com/marklogic/client/impl/OkHttpServices.java index 3d367aa2e..57ec71dc0 100644 --- a/marklogic-client-api/src/main/java/com/marklogic/client/impl/OkHttpServices.java +++ b/marklogic-client-api/src/main/java/com/marklogic/client/impl/OkHttpServices.java @@ -34,7 +34,9 @@ import jakarta.mail.BodyPart; import jakarta.mail.Header; import jakarta.mail.MessagingException; +import jakarta.mail.internet.ContentDisposition; import jakarta.mail.internet.MimeMultipart; +import jakarta.mail.internet.ParseException; import jakarta.mail.util.ByteArrayDataSource; import jakarta.xml.bind.DatatypeConverter; import okhttp3.*; @@ -1808,16 +1810,50 @@ static private long getHeaderLength(String length) { static private String getHeaderUri(BodyPart part) { try { - if (part != null) { - return part.getFileName(); + if (part == null) { + return null; } - // if it's not found, just return null + + try { + String filename = part.getFileName(); + if (filename != null) { + return filename; + } + } catch (ParseException e) { + // Jakarta Mail's parser failed due to malformed Content-Disposition header. + // Check if MarkLogic sent a malformed "format=" parameter at the end, which violates RFC 2183. + String contentDisposition = getHeader(part, "Content-Disposition"); + if (contentDisposition != null && contentDisposition.matches(".*;\\s*format\\s*=\\s*$")) { + // Remove the trailing "; format=" to fix the malformed header + String cleaned = contentDisposition.replaceFirst(";\\s*format\\s*=\\s*$", "").trim(); + logger.debug("Removed trailing 'format=' from malformed Content-Disposition header: {} -> {}", contentDisposition, cleaned); + return extractFilenameFromContentDisposition(cleaned); + } + throw e; + } + return null; } catch (MessagingException e) { throw new MarkLogicIOException(e); } } + static private String extractFilenameFromContentDisposition(String contentDisposition) { + if (contentDisposition == null) { + return null; + } + try { + // Use Jakarta Mail's ContentDisposition parser to extract the filename parameter. This is the class + // that throws an error when "format=" exists in the value, but that has been removed already. + ContentDisposition cd = new ContentDisposition(contentDisposition); + return cd.getParameter("filename"); + } catch (ParseException e) { + logger.warn("Failed to parse cleaned Content-Disposition header: {}; cause: {}", + contentDisposition, e.getMessage()); + return null; + } + } + static private void updateVersion(DocumentDescriptor descriptor, Headers headers) { updateVersion(descriptor, extractVersion(headers.get(HEADER_ETAG))); } diff --git a/marklogic-client-api/src/test/java/com/marklogic/client/test/document/ReadDocumentPageTest.java b/marklogic-client-api/src/test/java/com/marklogic/client/test/document/ReadDocumentPageTest.java index fdd4a06c1..421aa41cb 100644 --- a/marklogic-client-api/src/test/java/com/marklogic/client/test/document/ReadDocumentPageTest.java +++ b/marklogic-client-api/src/test/java/com/marklogic/client/test/document/ReadDocumentPageTest.java @@ -1,28 +1,30 @@ /* - * Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved. + * Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved. */ package com.marklogic.client.test.document; import com.marklogic.client.DatabaseClient; -import com.marklogic.client.document.*; +import com.marklogic.client.document.DocumentPage; +import com.marklogic.client.document.DocumentRecord; +import com.marklogic.client.document.JSONDocumentManager; import com.marklogic.client.io.BytesHandle; -import com.marklogic.client.io.DocumentMetadataHandle; import com.marklogic.client.io.StringHandle; import com.marklogic.client.query.StructuredQueryBuilder; import com.marklogic.client.query.StructuredQueryDefinition; +import com.marklogic.client.test.AbstractClientTest; import com.marklogic.client.test.Common; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; -class ReadDocumentPageTest { +class ReadDocumentPageTest extends AbstractClientTest { + /** + * Verifies that the jakarta.mail library, instead of javax.mail, can probably read the URI. + * See MLE-15748, which pertains to issues with javax.mail only allowing US-ASCII characters. + */ @Test - void test() { - Common.deleteUrisWithPattern("/aaa-page/*"); - + void uriWithNonUsAsciiCharacters() { final String uri = "/aaa-page/太田佳伸のXMLファイル.xml"; DocumentRecord documentRecord; try (DatabaseClient client = Common.newClient()) { @@ -38,35 +40,27 @@ void test() { } @Test - @Disabled("Disabling for now because this seems to be a server bug.") - void testEmptyDocWithNoExtension() { - final String collection = "empty-binary-test"; + void emptyTextDocument() { + final String uri = "/sample/empty-file.txt"; try (DatabaseClient client = Common.newClient()) { - writeEmptyDocWithNoFileExtension(client, collection); - JSONDocumentManager documentManager = client.newJSONDocumentManager(); - StructuredQueryDefinition query = new StructuredQueryBuilder().collection(collection); + StructuredQueryDefinition query = new StructuredQueryBuilder().document(uri); DocumentRecord documentRecord; try (DocumentPage documentPage = documentManager.search(query, 1)) { assertTrue(documentPage.hasNext(), "Expected a document in the page, but none was found."); documentRecord = documentPage.next(); } - String uri = documentRecord.getUri(); - assertEquals("/test/empty", uri, "The URI of the empty document should match the one written."); - } - } + String actualUri = documentRecord.getUri(); + assertEquals(uri, actualUri, "The URI of the empty document should match the one written."); - protected void writeEmptyDocWithNoFileExtension(DatabaseClient client, String... collections) { - DocumentMetadataHandle metadata = new DocumentMetadataHandle() - .withCollections(collections) - .withPermission("rest-reader", DocumentMetadataHandle.Capability.READ, DocumentMetadataHandle.Capability.UPDATE); - // This needs to be a JSON document manager because the empty document is written without a format. - JSONDocumentManager mgr = client.newJSONDocumentManager(); - DocumentWriteSet set = mgr.newWriteSet(); - BytesHandle emptyBytesHandle = new BytesHandle(new byte[0]); - String uri = "/test/empty"; - set.add(uri, metadata, emptyBytesHandle); - mgr.write(set); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> documentRecord.getContent(new BytesHandle())); + assertEquals("No bytes to write", ex.getMessage(), + "This assertion is documenting existing behavior, where an empty doc will result in an " + + "exception being thrown when an attempt is made to retrieve its content. " + + "This doesn't seem ideal - returning null seems preferable - but it's the " + + "behavior that has likely always existed."); + } } } diff --git a/test-app/src/main/ml-data/sample/empty-file.txt b/test-app/src/main/ml-data/sample/empty-file.txt new file mode 100644 index 000000000..e69de29bb