Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

import com.marklogic.client.document.DocumentWriteOperation;

import java.util.Collections;
import java.util.Map;
import java.util.function.Consumer;

/**
* Configuration for incremental write filtering.
*
* @since 8.1.0
*/
public class IncrementalWriteConfig {

private final String hashKeyName;
private final String timestampKeyName;
private final boolean canonicalizeJson;
private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
private final String[] jsonExclusions;
private final String[] xmlExclusions;
private final Map<String, String> xmlNamespaces;
private final String schemaName;
private final String viewName;

public IncrementalWriteConfig(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer,
String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces,
String schemaName, String viewName) {
this.hashKeyName = hashKeyName;
this.timestampKeyName = timestampKeyName;
this.canonicalizeJson = canonicalizeJson;
this.skippedDocumentsConsumer = skippedDocumentsConsumer;
this.jsonExclusions = jsonExclusions;
this.xmlExclusions = xmlExclusions;
this.xmlNamespaces = xmlNamespaces != null ? Collections.unmodifiableMap(xmlNamespaces) : null;
this.schemaName = schemaName;
this.viewName = viewName;
}

public String getHashKeyName() {
return hashKeyName;
}

public String getTimestampKeyName() {
return timestampKeyName;
}

public boolean isCanonicalizeJson() {
return canonicalizeJson;
}

public Consumer<DocumentWriteOperation[]> getSkippedDocumentsConsumer() {
return skippedDocumentsConsumer;
}

public String[] getJsonExclusions() {
return jsonExclusions;
}

public String[] getXmlExclusions() {
return xmlExclusions;
}

public Map<String, String> getXmlNamespaces() {
return xmlNamespaces != null ? xmlNamespaces : Collections.emptyMap();
}

public String getSchemaName() {
return schemaName;
}

public String getViewName() {
return viewName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
import com.marklogic.client.document.DocumentWriteSet;
import com.marklogic.client.io.JacksonHandle;

import java.util.Map;
import java.util.function.Consumer;

/**
* Uses server-side JavaScript code to get the existing hash values for a set of URIs.
*
Expand All @@ -31,9 +28,8 @@ class IncrementalWriteEvalFilter extends IncrementalWriteFilter {
response
""";

IncrementalWriteEvalFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
IncrementalWriteEvalFilter(IncrementalWriteConfig config) {
super(config);
}

@Override
Expand All @@ -47,7 +43,7 @@ public DocumentWriteSet apply(DocumentWriteSetFilter.Context context) {

try {
JsonNode response = context.getDatabaseClient().newServerEval().javascript(EVAL_SCRIPT)
.addVariable("hashKeyName", hashKeyName)
.addVariable("hashKeyName", getConfig().getHashKeyName())
.addVariable("uris", new JacksonHandle(uris))
.evalAs(JsonNode.class);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public static class Builder {
private String[] jsonExclusions;
private String[] xmlExclusions;
private Map<String, String> xmlNamespaces;
private String schemaName;
private String viewName;

/**
* @param keyName the name of the MarkLogic metadata key that will hold the hash value; defaults to "incrementalWriteHash".
Expand Down Expand Up @@ -128,13 +130,43 @@ public Builder xmlNamespaces(Map<String, String> namespaces) {
return this;
}

/**
* Configures the filter to use a TDE view for retrieving hash values instead of field range indexes.
* This approach requires a TDE template to be deployed that extracts the URI and hash metadata.
*
* @param schemaName the schema name of the TDE view
* @param viewName the view name of the TDE view
* @return this builder
*/
public Builder fromView(String schemaName, String viewName) {
boolean schemaEmpty = schemaName == null || schemaName.trim().isEmpty();
boolean viewEmpty = viewName == null || viewName.trim().isEmpty();

if (schemaEmpty && !viewEmpty) {
throw new IllegalArgumentException("Schema name cannot be null or empty when view name is provided");
}
if (!schemaEmpty && viewEmpty) {
throw new IllegalArgumentException("View name cannot be null or empty when schema name is provided");
}

this.schemaName = schemaName;
this.viewName = viewName;
return this;
}

public IncrementalWriteFilter build() {
validateJsonExclusions();
validateXmlExclusions();
IncrementalWriteConfig config = new IncrementalWriteConfig(hashKeyName, timestampKeyName, canonicalizeJson,
skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces, schemaName, viewName);

if (schemaName != null && viewName != null) {
return new IncrementalWriteViewFilter(config);
}
if (useEvalQuery) {
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
return new IncrementalWriteEvalFilter(config);
}
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
return new IncrementalWriteOpticFilter(config);
}

private void validateJsonExclusions() {
Expand Down Expand Up @@ -181,26 +213,18 @@ private void validateXmlExclusions() {
}
}

protected final String hashKeyName;
private final String timestampKeyName;
private final boolean canonicalizeJson;
private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
private final String[] jsonExclusions;
private final String[] xmlExclusions;
private final Map<String, String> xmlNamespaces;
private final IncrementalWriteConfig config;

// Hardcoding this for now, with a good general purpose hashing function.
// See https://xxhash.com for benchmarks.
private final LongHashFunction hashFunction = LongHashFunction.xx3();

public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
this.hashKeyName = hashKeyName;
this.timestampKeyName = timestampKeyName;
this.canonicalizeJson = canonicalizeJson;
this.skippedDocumentsConsumer = skippedDocumentsConsumer;
this.jsonExclusions = jsonExclusions;
this.xmlExclusions = xmlExclusions;
this.xmlNamespaces = xmlNamespaces;
public IncrementalWriteFilter(IncrementalWriteConfig config) {
this.config = config;
}

public IncrementalWriteConfig getConfig() {
return config;
}

protected final DocumentWriteSet filterDocuments(Context context, Function<String, String> hashRetriever) {
Expand Down Expand Up @@ -230,19 +254,19 @@ protected final DocumentWriteSet filterDocuments(Context context, Function<Strin

if (existingHash != null) {
if (!existingHash.equals(contentHash)) {
newWriteSet.add(addHashToMetadata(doc, hashKeyName, contentHash, timestampKeyName, timestamp));
} else if (skippedDocumentsConsumer != null) {
newWriteSet.add(addHashToMetadata(doc, config.getHashKeyName(), contentHash, config.getTimestampKeyName(), timestamp));
} else if (config.getSkippedDocumentsConsumer() != null) {
skippedDocuments.add(doc);
} else {
// No consumer, so skip the document silently.
}
} else {
newWriteSet.add(addHashToMetadata(doc, hashKeyName, contentHash, timestampKeyName, timestamp));
newWriteSet.add(addHashToMetadata(doc, config.getHashKeyName(), contentHash, config.getTimestampKeyName(), timestamp));
}
}

if (!skippedDocuments.isEmpty() && skippedDocumentsConsumer != null) {
skippedDocumentsConsumer.accept(skippedDocuments.toArray(new DocumentWriteOperation[0]));
if (!skippedDocuments.isEmpty() && config.getSkippedDocumentsConsumer() != null) {
config.getSkippedDocumentsConsumer().accept(skippedDocuments.toArray(new DocumentWriteOperation[0]));
}

return newWriteSet;
Expand All @@ -259,11 +283,11 @@ private String serializeContent(DocumentWriteOperation doc) {
format = baseHandle.getFormat();
}

if (canonicalizeJson && (Format.JSON.equals(format) || isPossiblyJsonContent(content))) {
if (config.isCanonicalizeJson() && (Format.JSON.equals(format) || isPossiblyJsonContent(content))) {
JsonCanonicalizer jc;
try {
if (jsonExclusions != null && jsonExclusions.length > 0) {
content = ContentExclusionUtil.applyJsonExclusions(doc.getUri(), content, jsonExclusions);
if (config.getJsonExclusions() != null && config.getJsonExclusions().length > 0) {
content = ContentExclusionUtil.applyJsonExclusions(doc.getUri(), content, config.getJsonExclusions());
}
jc = new JsonCanonicalizer(content);
return jc.getEncodedString();
Expand All @@ -274,9 +298,9 @@ private String serializeContent(DocumentWriteOperation doc) {
logger.warn("Unable to canonicalize JSON content for URI {}, using original content for hashing; cause: {}",
doc.getUri(), e.getMessage());
}
} else if (xmlExclusions != null && xmlExclusions.length > 0) {
} else if (config.getXmlExclusions() != null && config.getXmlExclusions().length > 0) {
try {
content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, xmlNamespaces, xmlExclusions);
content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, config.getXmlNamespaces(), config.getXmlExclusions());
} catch (Exception e) {
logger.warn("Unable to apply XML exclusions for URI {}, using original content for hashing; cause: {}",
doc.getUri(), e.getMessage());
Expand Down Expand Up @@ -316,4 +340,6 @@ protected static DocumentWriteOperation addHashToMetadata(DocumentWriteOperation

return new DocumentWriteOperationImpl(op.getUri(), newMetadata, op.getContent(), op.getTemporalDocumentURI());
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;

/**
* Uses an Optic query to get the existing hash values for a set of URIs.
Expand All @@ -19,9 +18,8 @@
*/
class IncrementalWriteOpticFilter extends IncrementalWriteFilter {

IncrementalWriteOpticFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
IncrementalWriteOpticFilter(IncrementalWriteConfig config) {
super(config);
}

@Override
Expand All @@ -39,7 +37,7 @@ public DocumentWriteSet apply(Context context) {
Map<String, String> existingHashes = rowTemplate.query(op ->
op.fromLexicons(Map.of(
"uri", op.cts.uriReference(),
"hash", op.cts.fieldReference(super.hashKeyName)
"hash", op.cts.fieldReference(getConfig().getHashKeyName())
)).where(
op.cts.documentQuery(op.xs.stringSeq(uris))
),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

import com.marklogic.client.FailedRequestException;
import com.marklogic.client.document.DocumentWriteOperation;
import com.marklogic.client.document.DocumentWriteSet;
import com.marklogic.client.row.RowTemplate;

import java.util.HashMap;
import java.util.Map;

/**
* Uses an Optic query with fromView to get the existing hash values for a set of URIs from a TDE view.
* This implementation requires a TDE template to be deployed that extracts the URI and hash metadata.
*
* @since 8.1.0
*/
class IncrementalWriteViewFilter extends IncrementalWriteFilter {

IncrementalWriteViewFilter(IncrementalWriteConfig config) {
super(config);
}

@Override
public DocumentWriteSet apply(Context context) {
final String[] uris = context.getDocumentWriteSet().stream()
.filter(op -> DocumentWriteOperation.OperationType.DOCUMENT_WRITE.equals(op.getOperationType()))
.map(DocumentWriteOperation::getUri)
.toArray(String[]::new);

RowTemplate rowTemplate = new RowTemplate(context.getDatabaseClient());

try {
Map<String, String> existingHashes = rowTemplate.query(op ->
op.fromView(getConfig().getSchemaName(), getConfig().getViewName())
.where(op.in(op.col("uri"), op.xs.stringSeq(uris))),

rows -> {
Map<String, String> map = new HashMap<>();
rows.forEach(row -> {
String uri = row.getString("uri");
String existingHash = row.getString("hash");
map.put(uri, existingHash);
});
return map;
}
);

return filterDocuments(context, uri -> existingHashes.get(uri));
} catch (FailedRequestException e) {
String message = "Unable to query for existing incremental write hashes from view " + getConfig().getSchemaName() + "." + getConfig().getViewName() + "; cause: " + e.getMessage();
throw new FailedRequestException(message, e.getFailedRequest());
Copy link

Copilot AI Feb 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rethrown FailedRequestException drops the original exception as the cause, which makes debugging harder (stack trace/cause chain is lost). Prefer a constructor overload that accepts the cause, or initialize the cause on the newly created exception so the original e is preserved.

Suggested change
throw new FailedRequestException(message, e.getFailedRequest());
FailedRequestException fre = new FailedRequestException(message, e.getFailedRequest());
fre.initCause(e);
throw fre;

Copilot uses AI. Check for mistakes.
}
}
}
Loading