diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/io/LocalBuffer.scala b/daffodil-core/src/main/scala/org/apache/daffodil/io/LocalBuffer.scala index dca66ea41f..cf37e9c050 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/io/LocalBuffer.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/io/LocalBuffer.scala @@ -33,7 +33,13 @@ abstract class LocalBuffer[T <: java.nio.Buffer] { def getBuf(length: Long) = { Assert.usage(length <= Int.MaxValue) if (tempBuf.isEmpty || tempBuf.get.capacity < length) { - tempBuf = Maybe(allocate(length.toInt)) + // allocate a buffer that can store the required length, but with a minimum size. The + // majority of LocalBuffers should be smaller than this minimum size and so should avoid + // costly reallocations, while still being small enough that the JVM should have no + // problem quickly allocating it + val minBufferSize = 1024 + val allocationSize = math.max(length.toInt, minBufferSize) + tempBuf = Maybe(allocate(allocationSize)) } val buf = tempBuf.get buf.clear diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/NilParsers.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/NilParsers.scala index af2cc7a1c4..faaa285ba7 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/NilParsers.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/NilParsers.scala @@ -40,21 +40,40 @@ abstract class LiteralNilOfSpecifiedLengthParserBase(erd: ElementRuntimeData) def isFieldNilLit(field: String): Boolean override def parse(start: PState): Unit = { - - val field = parseString(start) - - val isFieldEmpty = field.length() == 0 - - if (isFieldEmpty && isEmptyAllowed) { - // Valid! Success ParseResult indicates nilled - } else if (isFieldEmpty && !isEmptyAllowed) { - // Fail! - PE(start, "%s - Empty field found but not allowed!", eName) - } else if (isFieldNilLit(field)) { - // Contains a nilValue, Success ParseResult indicates nilled + if (erd.isComplexType) { + // nillable complex types must have a nilValue of %ES;. For a literal nil specified length + // complex to be nilled, that means either there must be a specified length that is zero + // or there isn't a specified length and we have reached the end of the data. If neither + // of these conditions are true, then there is non-empty data for this complex element and + // it cannot be nilled. + val bitLimit0b = start.bitLimit0b + val hasSpecifiedLength = bitLimit0b.isDefined + if ( + (hasSpecifiedLength && (bitLimit0b.get - start.bitPos0b) > 0) || + (!hasSpecifiedLength && start.dataInputStream.hasData) + ) { + // Fail! + PE(start, "%s - Does not contain a nil literal", eName) + } else { + // Valid! Success ParseResult indicates nilled + } } else { - // Fail! - PE(start, "%s - Does not contain a nil literal!", eName) + // Simple element, read a string up to the bitLimit and see if it matches the nilValue + val field = parseString(start) + + val isFieldEmpty = field.length() == 0 + + if (isFieldEmpty && isEmptyAllowed) { + // Valid! Success ParseResult indicates nilled + } else if (isFieldEmpty && !isEmptyAllowed) { + // Fail! + PE(start, "%s - Empty field found but not allowed", eName) + } else if (isFieldNilLit(field)) { + // Contains a nilValue, Success ParseResult indicates nilled + } else { + // Fail! + PE(start, "%s - Does not contain a nil literal", eName) + } } } diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/StringLengthParsers.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/StringLengthParsers.scala index 16cbbd8126..08e28190ee 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/StringLengthParsers.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/StringLengthParsers.scala @@ -18,6 +18,7 @@ package org.apache.daffodil.runtime1.processors.parsers import org.apache.daffodil.io.processors.charset.BitsCharsetDecoderUnalignedCharDecodeException +import org.apache.daffodil.lib.exceptions.Assert import org.apache.daffodil.lib.util.MaybeChar import org.apache.daffodil.lib.util.Misc import org.apache.daffodil.runtime1.processors.CharsetEv @@ -86,8 +87,32 @@ trait StringOfSpecifiedLengthMixin extends PaddingRuntimeMixin with CaptureParsi protected final def parseString(start: PState): String = { val dis = start.dataInputStream - val maxLen = start.tunable.maximumSimpleElementSizeInCharacters val startBitPos0b = dis.bitPos0b + val bitLimit0b = dis.bitLimit0b + + // We want to limit the maximum length passed into getSomeString since that function can + // pre-allocate a buffer that size even if it won't find that many characters. So we + // calculate the maximum number of characters that we could possibly decode from the + // available bits and the character set. + // + // For fixed-width encodings, that is just the number of available bits divided by the + // fixed width of the encoding. + // + // For variable length encodings (e.g. UTF-8), the maximum number of characters that the + // available bits could possibly decode to is if every decoded character was the smallest + // possible representation. That smallest representation for variable-width encodings is + // bitWidthOfACodeUnit. So we divide the available bits but bitWidthOfACodeUnit. + // + // Note that the bitLimit should always be defined because bitLimit is how string of + // specified lengths limit lengths + Assert.invariant(bitLimit0b.isDefined) + val availableBits = bitLimit0b.get - startBitPos0b + val charset = charsetEv.evaluate(start) + val optWidth = charset.maybeFixedWidth + val bitsPerChar = if (optWidth.isDefined) optWidth.get else charset.bitWidthOfACodeUnit + // add one to allow for partial bytes at the end that could parse to a replacement char + val maxPossibleChars = (availableBits / bitsPerChar) + 1 + val maxLen = math.min(maxPossibleChars, start.tunable.maximumSimpleElementSizeInCharacters) val strOpt = try {