apache · stevedlawrence · Dec 19, 2025 · Dec 19, 2025 · mbeckerle · Dec 17, 2025
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/io/LocalBuffer.scala b/daffodil-core/src/main/scala/org/apache/daffodil/io/LocalBuffer.scala
@@ -33,7 +33,13 @@ abstract class LocalBuffer[T <: java.nio.Buffer] {
   def getBuf(length: Long) = {
     Assert.usage(length <= Int.MaxValue)
     if (tempBuf.isEmpty || tempBuf.get.capacity < length) {
-      tempBuf = Maybe(allocate(length.toInt))
+      // allocate a buffer that can store the required length, but with a minimum size. The
+      // majority of LocalBuffers should be smaller than this minimum size and so should avoid
+      // costly reallocations, while still being small enough that the JVM should have no
+      // problem quickly allocating it
+      val minBufferSize = 1024
+      val allocationSize = math.max(length.toInt, minBufferSize)
+      tempBuf = Maybe(allocate(allocationSize))
     }
     val buf = tempBuf.get
     buf.clear

diff --git a/...odil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/NilParsers.scala b/...odil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/NilParsers.scala
@@ -40,21 +40,40 @@ abstract class LiteralNilOfSpecifiedLengthParserBase(erd: ElementRuntimeData)
   def isFieldNilLit(field: String): Boolean
 
   override def parse(start: PState): Unit = {
-
-    val field = parseString(start)
-
-    val isFieldEmpty = field.length() == 0
-
-    if (isFieldEmpty && isEmptyAllowed) {
-      // Valid! Success ParseResult indicates nilled
-    } else if (isFieldEmpty && !isEmptyAllowed) {
-      // Fail!
-      PE(start, "%s - Empty field found but not allowed!", eName)
-    } else if (isFieldNilLit(field)) {
-      // Contains a nilValue, Success ParseResult indicates nilled
+    if (erd.isComplexType) {
+      // nillable complex types must have a nilValue of %ES;. For a literal nil specified length
+      // complex to be nilled, that means either there must be a specified length that is zero
+      // or there isn't a specified length and we have reached the end of the data. If neither
+      // of these conditions are true, then there is non-empty data for this complex element and
+      // it cannot be nilled.
+      val bitLimit0b = start.bitLimit0b
+      val hasSpecifiedLength = bitLimit0b.isDefined
+      if (
+        (hasSpecifiedLength && (bitLimit0b.get - start.bitPos0b) > 0) ||
+        (!hasSpecifiedLength && start.dataInputStream.hasData)
+      ) {
+        // Fail!
+        PE(start, "%s - Does not contain a nil literal", eName)
+      } else {
+        // Valid! Success ParseResult indicates nilled
+      }
     } else {
-      // Fail!
-      PE(start, "%s - Does not contain a nil literal!", eName)
+      // Simple element, read a string up to the bitLimit and see if it matches the nilValue
+      val field = parseString(start)
+
+      val isFieldEmpty = field.length() == 0
+
+      if (isFieldEmpty && isEmptyAllowed) {
+        // Valid! Success ParseResult indicates nilled
+      } else if (isFieldEmpty && !isEmptyAllowed) {
+        // Fail!
+        PE(start, "%s - Empty field found but not allowed", eName)
+      } else if (isFieldNilLit(field)) {
+        // Contains a nilValue, Success ParseResult indicates nilled
+      } else {
+        // Fail!
+        PE(start, "%s - Does not contain a nil literal", eName)
+      }
     }
   }
 

diff --git a/.../src/main/scala/org/apache/daffodil/runtime1/processors/parsers/StringLengthParsers.scala b/.../src/main/scala/org/apache/daffodil/runtime1/processors/parsers/StringLengthParsers.scala
@@ -18,6 +18,7 @@
 package org.apache.daffodil.runtime1.processors.parsers
 
 import org.apache.daffodil.io.processors.charset.BitsCharsetDecoderUnalignedCharDecodeException
+import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.util.MaybeChar
 import org.apache.daffodil.lib.util.Misc
 import org.apache.daffodil.runtime1.processors.CharsetEv
@@ -86,8 +87,32 @@ trait StringOfSpecifiedLengthMixin extends PaddingRuntimeMixin with CaptureParsi
 
   protected final def parseString(start: PState): String = {
     val dis = start.dataInputStream
-    val maxLen = start.tunable.maximumSimpleElementSizeInCharacters
     val startBitPos0b = dis.bitPos0b
+    val bitLimit0b = dis.bitLimit0b
+
+    // We want to limit the maximum length passed into getSomeString since that function can
+    // pre-allocate a buffer that size even if it won't find that many characters. So we
+    // calculate the maximum number of characters that we could possibly decode from the
+    // available bits and the character set.
+    //
+    // For fixed-width encodings, that is just the number of available bits divided by the
+    // fixed width of the encoding.
+    //
+    // For variable length encodings (e.g. UTF-8), the maximum number of characters that the
+    // available bits could possibly decode to is if every decoded character was the smallest
+    // possible representation. That smallest representation for variable-width encodings is
+    // bitWidthOfACodeUnit. So we divide the available bits but bitWidthOfACodeUnit.
+    //
+    // Note that the bitLimit should always be defined because bitLimit is how string of
+    // specified lengths limit lengths
+    Assert.invariant(bitLimit0b.isDefined)
+    val availableBits = bitLimit0b.get - startBitPos0b
+    val charset = charsetEv.evaluate(start)
+    val optWidth = charset.maybeFixedWidth
+    val bitsPerChar = if (optWidth.isDefined) optWidth.get else charset.bitWidthOfACodeUnit
+    // add one to allow for partial bytes at the end that could parse to a replacement char
+    val maxPossibleChars = (availableBits / bitsPerChar) + 1
+    val maxLen = math.min(maxPossibleChars, start.tunable.maximumSimpleElementSizeInCharacters)
 
     val strOpt =
       try {