From a914d23e094dfe58e838132899141b9cb5329a25 Mon Sep 17 00:00:00 2001 From: Cameron Ross Date: Sun, 3 Sep 2023 14:19:29 -0300 Subject: [PATCH] remove SliceBuilder and Transaction --- source/dyaml/reader.d | 274 +---------------------------------------- source/dyaml/scanner.d | 246 ++++++++++++++++++------------------ 2 files changed, 126 insertions(+), 394 deletions(-) diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d index ae44c80..d422dbf 100644 --- a/source/dyaml/reader.d +++ b/source/dyaml/reader.d @@ -41,8 +41,7 @@ class ReaderException : YAMLException } } -/// Provides an API to read characters from a UTF-8 buffer and build slices into that -/// buffer to avoid allocations (see SliceBuilder). +/// Provides an API to read characters from a UTF-8 buffer. final class Reader { private: @@ -123,7 +122,6 @@ final class Reader enforce(isPrintableValidUTF8(buffer_), new ReaderException("Special unicode characters are not allowed")); - this.sliceBuilder = SliceBuilder(this); checkASCII(); } @@ -212,8 +210,7 @@ final class Reader /// Get specified number of characters starting at current position. /// /// Note: This gets only a "view" into the internal buffer, which will be - /// invalidated after other Reader calls. Use SliceBuilder to build slices - /// for permanent use. + /// invalidated after other Reader calls. /// /// Params: length = Number of characters (code points, not bytes) to get. May /// reach past the end of the buffer; in that case the returned @@ -228,8 +225,7 @@ final class Reader /// Get specified number of bytes, not code points, starting at current position. /// /// Note: This gets only a "view" into the internal buffer, which will be - /// invalidated after other Reader calls. Use SliceBuilder to build slices - /// for permanent use. + /// invalidated after other Reader calls. /// /// Params: length = Number bytes (not code points) to get. May NOT reach past /// the end of the buffer; should be used with peek() to avoid @@ -396,9 +392,6 @@ final class Reader checkASCII(); } - /// Used to build slices of read data in Reader; to avoid allocations. - SliceBuilder sliceBuilder; - /// Get a string describing current buffer position, used for error messages. Mark mark() const pure nothrow @nogc @safe { return Mark(name_, line_, column_); } @@ -448,267 +441,6 @@ private: } } -/// Used to build slices of already read data in Reader buffer, avoiding allocations. -/// -/// Usually these slices point to unchanged Reader data, but sometimes the data is -/// changed due to how YAML interprets certain characters/strings. -/// -/// See begin() documentation. -struct SliceBuilder -{ -private: - // No copying by the user. - @disable this(this); - @disable void opAssign(ref SliceBuilder); - - // Reader this builder works in. - Reader reader_; - - // Start of the slice om reader_.buffer_ (size_t.max while no slice being build) - size_t start_ = size_t.max; - // End of the slice om reader_.buffer_ (size_t.max while no slice being build) - size_t end_ = size_t.max; - - // Stack of slice ends to revert to (see Transaction) - // - // Very few levels as we don't want arbitrarily nested transactions. - size_t[4] endStack_; - // The number of elements currently in endStack_. - size_t endStackUsed_; - - @safe const pure nothrow @nogc invariant() - { - if(!inProgress) { return; } - assert(end_ <= reader_.bufferOffset_, "Slice ends after buffer position"); - assert(start_ <= end_, "Slice start after slice end"); - } - - // Is a slice currently being built? - bool inProgress() @safe const pure nothrow @nogc - in(start_ == size_t.max ? end_ == size_t.max : end_ != size_t.max, "start_/end_ are not consistent") - { - return start_ != size_t.max; - } - -public: - /// Begin building a slice. - /// - /// Only one slice can be built at any given time; before beginning a new slice, - /// finish the previous one (if any). - /// - /// The slice starts at the current position in the Reader buffer. It can only be - /// extended up to the current position in the buffer; Reader methods get() and - /// forward() move the position. E.g. it is valid to extend a slice by write()-ing - /// a string just returned by get() - but not one returned by prefix() unless the - /// position has changed since the prefix() call. - void begin() @safe pure nothrow @nogc - in(!inProgress, "Beginning a slice while another slice is being built") - in(endStackUsed_ == 0, "Slice stack not empty at slice begin") - { - - start_ = reader_.bufferOffset_; - end_ = reader_.bufferOffset_; - } - - /// Finish building a slice and return it. - /// - /// Any Transactions on the slice must be committed or destroyed before the slice - /// is finished. - /// - /// Returns a string; once a slice is finished it is definitive that its contents - /// will not be changed. - char[] finish() @safe pure nothrow @nogc - in(inProgress, "finish called without begin") - in(endStackUsed_ == 0, "Finishing a slice with running transactions.") - { - - auto result = reader_.buffer_[start_ .. end_]; - start_ = end_ = size_t.max; - return result; - } - - /// Write a string to the slice being built. - /// - /// Data can only be written up to the current position in the Reader buffer. - /// - /// If str is a string returned by a Reader method, and str starts right after the - /// end of the slice being built, the slice is extended (trivial operation). - /// - /// See_Also: begin - void write(scope char[] str) @safe pure nothrow @nogc - { - assert(inProgress, "write called without begin"); - assert(end_ <= reader_.bufferOffset_, - "AT START: Slice ends after buffer position"); - - // Nothing? Already done. - if (str.length == 0) { return; } - // If str starts at the end of the slice (is a string returned by a Reader - // method), just extend the slice to contain str. - if(&str[0] == &reader_.buffer_[end_]) - { - end_ += str.length; - } - // Even if str does not start at the end of the slice, it still may be returned - // by a Reader method and point to buffer. So we need to memmove. - else - { - copy(str, reader_.buffer_[end_..end_ + str.length * char.sizeof]); - end_ += str.length; - } - } - - /// Write a character to the slice being built. - /// - /// Data can only be written up to the current position in the Reader buffer. - /// - /// See_Also: begin - void write(dchar c) @safe pure - in(inProgress, "write called without begin") - { - if(c < 0x80) - { - reader_.buffer_[end_++] = cast(char)c; - return; - } - - // We need to encode a non-ASCII dchar into UTF-8 - char[4] encodeBuf; - const bytes = encode(encodeBuf, c); - reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes]; - end_ += bytes; - } - - /// Insert a character to a specified position in the slice. - /// - /// Enlarges the slice by 1 char. Note that the slice can only extend up to the - /// current position in the Reader buffer. - /// - /// Params: - /// - /// c = The character to insert. - /// position = Position to insert the character at in code units, not code points. - /// Must be less than slice length(); a previously returned length() - /// can be used. - void insert(const dchar c, const size_t position) @safe pure - in(inProgress, "insert called without begin") - in(start_ + position <= end_, "Trying to insert after the end of the slice") - { - - const point = start_ + position; - const movedLength = end_ - point; - - // Encode c into UTF-8 - char[4] encodeBuf; - if(c < 0x80) { encodeBuf[0] = cast(char)c; } - const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); - - if(movedLength > 0) - { - copy(reader_.buffer_[point..point + movedLength * char.sizeof], - reader_.buffer_[point + bytes..point + bytes + movedLength * char.sizeof]); - } - reader_.buffer_[point .. point + bytes] = encodeBuf[0 .. bytes]; - end_ += bytes; - } - - /// Get the current length of the slice. - size_t length() @safe const pure nothrow @nogc - { - return end_ - start_; - } - - /// A slice building transaction. - /// - /// Can be used to save and revert back to slice state. - struct Transaction - { - private: - // The slice builder affected by the transaction. - SliceBuilder* builder_; - // Index of the return point of the transaction in StringBuilder.endStack_. - size_t stackLevel_; - // True after commit() has been called. - bool committed_; - - public: - /// Begins a transaction on a SliceBuilder object. - /// - /// The transaction must end $(B after) any transactions created within the - /// transaction but $(B before) the slice is finish()-ed. A transaction can be - /// ended either by commit()-ing or reverting through the destructor. - /// - /// Saves the current state of a slice. - this(SliceBuilder* builder) @safe pure nothrow @nogc - { - builder_ = builder; - stackLevel_ = builder_.endStackUsed_; - builder_.push(); - } - - /// Commit changes to the slice. - /// - /// Ends the transaction - can only be called once, and removes the possibility - /// to revert slice state. - /// - /// Does nothing for a default-initialized transaction (the transaction has not - /// been started yet). - void commit() @safe pure nothrow @nogc - in(!committed_, "Can't commit a transaction more than once") - { - - if(builder_ is null) { return; } - assert(builder_.endStackUsed_ == stackLevel_ + 1, - "Parent transactions don't fully contain child transactions"); - builder_.apply(); - committed_ = true; - } - - /// Destroy the transaction and revert it if it hasn't been committed yet. - void end() @safe pure nothrow @nogc - in(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions") - { - builder_.pop(); - builder_ = null; - } - - } - -private: - // Push the current end of the slice so we can revert to it if needed. - // - // Used by Transaction. - void push() @safe pure nothrow @nogc - in(inProgress, "push called without begin") - in(endStackUsed_ < endStack_.length, "Slice stack overflow") - { - endStack_[endStackUsed_++] = end_; - } - - // Pop the current end of endStack_ and set the end of the slice to the popped - // value, reverting changes since the old end was pushed. - // - // Used by Transaction. - void pop() @safe pure nothrow @nogc - in(inProgress, "pop called without begin") - in(endStackUsed_ > 0, "Trying to pop an empty slice stack") - { - end_ = endStack_[--endStackUsed_]; - } - - // Pop the current end of endStack_, but keep the current end of the slice, applying - // changes made since pushing the old end. - // - // Used by Transaction. - void apply() @safe pure nothrow @nogc - in(inProgress, "apply called without begin") - in(endStackUsed_ > 0, "Trying to apply an empty slice stack") - { - --endStackUsed_; - } -} - - private: // Convert a UTF-8/16/32 buffer to UTF-8, in-place if possible. diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d index 77c3e38..2f4354b 100644 --- a/source/dyaml/scanner.d +++ b/source/dyaml/scanner.d @@ -753,7 +753,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanAlphaNumericToSlice(string name)(const Mark startMark) + void scanAlphaNumericToSlice(string name)(ref char[] slice, const Mark startMark) { size_t length; dchar c = reader_.peek(); @@ -762,14 +762,14 @@ struct Scanner enforce(length > 0, new ScannerException("While scanning " ~ name, startMark, expected("alphanumeric, '-' or '_'", c), reader_.mark)); - reader_.sliceBuilder.write(reader_.get(length)); + slice ~= reader_.get(length); } /// Scan a string. /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanAnchorAliasToSlice(const Mark startMark) @safe + char[] readAnchorAlias(const Mark startMark) @safe { size_t length; dchar c = reader_.peek(); @@ -781,7 +781,7 @@ struct Scanner enforce(length > 0, new ScannerException("While scanning an anchor or alias", startMark, expected("a printable character besides '[', ']', '{', '}' and ','", c), reader_.mark)); - reader_.sliceBuilder.write(reader_.get(length)); + return reader_.get(length); } /// Scan and throw away all characters until next line break. @@ -794,14 +794,14 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanToNextBreakToSlice() @safe + void scanToNextBreakToSlice(ref char[] slice) @safe { uint length; while(!reader_.peek(length).isBreak) { ++length; } - reader_.sliceBuilder.write(reader_.get(length)); + slice ~= reader_.get(length); } @@ -859,17 +859,15 @@ struct Scanner reader_.forward(); // Scan directive name - reader_.sliceBuilder.begin(); - scanDirectiveNameToSlice(startMark); - const name = reader_.sliceBuilder.finish(); + char[] name; + scanDirectiveNameToSlice(name, startMark); - reader_.sliceBuilder.begin(); + char[] value; // Index where tag handle ends and suffix starts in a tag directive value. uint tagHandleEnd = uint.max; - if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); } - else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); } - char[] value = reader_.sliceBuilder.finish(); + if(name == "YAML") { scanYAMLDirectiveValueToSlice(value, startMark); } + else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(value, startMark); } Mark endMark = reader_.mark; @@ -891,10 +889,10 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanDirectiveNameToSlice(const Mark startMark) @safe + void scanDirectiveNameToSlice(ref char[] slice, const Mark startMark) @safe { // Scan directive name. - scanAlphaNumericToSlice!"a directive"(startMark); + scanAlphaNumericToSlice!"a directive"(slice, startMark); enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), new ScannerException("While scanning a directive", startMark, @@ -905,11 +903,11 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe + void scanYAMLDirectiveValueToSlice(ref char[] slice, const Mark startMark) @safe { findNextNonSpace(); - scanYAMLDirectiveNumberToSlice(startMark); + scanYAMLDirectiveNumberToSlice(slice, startMark); enforce(reader_.peekByte() == '.', new ScannerException("While scanning a directive", startMark, @@ -917,8 +915,8 @@ struct Scanner // Skip the '.'. reader_.forward(); - reader_.sliceBuilder.write('.'); - scanYAMLDirectiveNumberToSlice(startMark); + slice ~= '.'; + scanYAMLDirectiveNumberToSlice(slice, startMark); enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), new ScannerException("While scanning a directive", startMark, @@ -929,7 +927,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe + void scanYAMLDirectiveNumberToSlice(ref char[] slice, const Mark startMark) @safe { enforce(isDigit(reader_.peek()), new ScannerException("While scanning a directive", startMark, @@ -939,7 +937,7 @@ struct Scanner uint length = 1; while(reader_.peek(length).isDigit) { ++length; } - reader_.sliceBuilder.write(reader_.get(length)); + slice ~= reader_.get(length); } /// Scan value of a tag directive. @@ -948,14 +946,14 @@ struct Scanner /// characters into that slice. /// /// Returns: Length of tag handle (which is before tag prefix) in scanned data - uint scanTagDirectiveValueToSlice(const Mark startMark) @safe + uint scanTagDirectiveValueToSlice(ref char[] slice, const Mark startMark) @safe { findNextNonSpace(); - const startLength = reader_.sliceBuilder.length; - scanTagDirectiveHandleToSlice(startMark); - const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength); + const startLength = slice.length; + scanTagDirectiveHandleToSlice(slice, startMark); + const handleLength = cast(uint)(slice.length - startLength); findNextNonSpace(); - scanTagDirectivePrefixToSlice(startMark); + scanTagDirectivePrefixToSlice(slice, startMark); return handleLength; } @@ -964,9 +962,9 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanTagDirectiveHandleToSlice(const Mark startMark) @safe + void scanTagDirectiveHandleToSlice(ref char[] slice, const Mark startMark) @safe { - scanTagHandleToSlice!"directive"(startMark); + scanTagHandleToSlice!"directive"(slice, startMark); enforce(reader_.peekByte() == ' ', new ScannerException("While scanning a directive handle", startMark, expected("' '", reader_.peek()), reader_.mark)); @@ -976,9 +974,9 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanTagDirectivePrefixToSlice(const Mark startMark) @safe + void scanTagDirectivePrefixToSlice(ref char[] slice, const Mark startMark) @safe { - scanTagURIToSlice!"directive"(startMark); + scanTagURIToSlice!"directive"(slice, startMark); enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), new ScannerException("While scanning a directive prefix", startMark, expected("' '", reader_.peek()), reader_.mark)); @@ -1011,10 +1009,7 @@ struct Scanner const startMark = reader_.mark; reader_.forward(); // The */& character was only peeked, so we drop it now - reader_.sliceBuilder.begin(); - scanAnchorAliasToSlice(startMark); - // On error, value is discarded as we return immediately - char[] value = reader_.sliceBuilder.finish(); + char[] value = readAnchorAlias(startMark); assert(!reader_.peek().isNSAnchorName, "Anchor/alias name not fully scanned"); @@ -1035,8 +1030,7 @@ struct Scanner const startMark = reader_.mark; dchar c = reader_.peek(1); - reader_.sliceBuilder.begin(); - scope(failure) { reader_.sliceBuilder.finish(); } + char[] slice; // Index where tag handle ends and tag suffix starts in the tag value // (slice) we will produce. uint handleEnd; @@ -1046,7 +1040,7 @@ struct Scanner reader_.forward(2); handleEnd = 0; - scanTagURIToSlice!"tag"(startMark); + scanTagURIToSlice!"tag"(slice, startMark); enforce(reader_.peekByte() == '>', new ScannerException("While scanning a tag", startMark, expected("'>'", reader_.peek()), reader_.mark)); @@ -1056,7 +1050,7 @@ struct Scanner { reader_.forward(); handleEnd = 0; - reader_.sliceBuilder.write('!'); + slice ~= '!'; } else { @@ -1076,24 +1070,23 @@ struct Scanner if(useHandle) { - scanTagHandleToSlice!"tag"(startMark); - handleEnd = cast(uint)reader_.sliceBuilder.length; + scanTagHandleToSlice!"tag"(slice, startMark); + handleEnd = cast(uint)slice.length; } else { reader_.forward(); - reader_.sliceBuilder.write('!'); - handleEnd = cast(uint)reader_.sliceBuilder.length; + slice ~= '!'; + handleEnd = cast(uint)slice.length; } - scanTagURIToSlice!"tag"(startMark); + scanTagURIToSlice!"tag"(slice, startMark); } enforce(reader_.peek().isBreakOrSpace, new ScannerException("While scanning a tag", startMark, expected("' '", reader_.peek()), reader_.mark)); - char[] slice = reader_.sliceBuilder.finish(); return tagToken(startMark, reader_.mark, slice, handleEnd); } @@ -1115,23 +1108,22 @@ struct Scanner Mark endMark; uint indent = max(1, indent_ + 1); - reader_.sliceBuilder.begin(); - alias Transaction = SliceBuilder.Transaction; + char[] slice; // Used to strip the last line breaks written to the slice at the end of the // scalar, which may be needed based on chomping. - Transaction breaksTransaction = Transaction(&reader_.sliceBuilder); + char[] newBreakSlice; // Read the first indentation/line breaks before the scalar. - size_t startLen = reader_.sliceBuilder.length; + size_t startLen = newBreakSlice.length; if(increment == int.min) { - auto indentation = scanBlockScalarIndentationToSlice(); + auto indentation = scanBlockScalarIndentationToSlice(newBreakSlice); endMark = indentation[1]; indent = max(indent, indentation[0]); } else { indent += increment - 1; - endMark = scanBlockScalarBreaksToSlice(indent); + endMark = scanBlockScalarBreaksToSlice(newBreakSlice, indent); } // int.max means there's no line break (int.max is outside UTF-32). @@ -1140,24 +1132,22 @@ struct Scanner // Scan the inner part of the block scalar. while(reader_.column == indent && reader_.peekByte() != '\0') { - breaksTransaction.commit(); + slice ~= newBreakSlice; const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t'); // This is where the 'interesting' non-whitespace data gets read. - scanToNextBreakToSlice(); + scanToNextBreakToSlice(slice); lineBreak = scanLineBreak(); // This transaction serves to rollback data read in the // scanBlockScalarBreaksToSlice() call. - breaksTransaction = Transaction(&reader_.sliceBuilder); - startLen = reader_.sliceBuilder.length; + newBreakSlice = []; + startLen = slice.length; // The line breaks should actually be written _after_ the if() block // below. We work around that by inserting - endMark = scanBlockScalarBreaksToSlice(indent); + endMark = scanBlockScalarBreaksToSlice(newBreakSlice, indent); - // This will not run during the last iteration (see the if() vs the - // while()), hence breaksTransaction rollback (which happens after this - // loop) will never roll back data written in this if() block. + // This will not run during the last iteration if(reader_.column == indent && reader_.peekByte() != '\0') { // Unfortunately, folding rules are ambiguous. @@ -1168,16 +1158,16 @@ struct Scanner { // No breaks were scanned; no need to insert the space in the // middle of slice. - if(startLen == reader_.sliceBuilder.length) + if(startLen == slice.length + newBreakSlice.length) { - reader_.sliceBuilder.write(' '); + newBreakSlice ~= ' '; } } else { // We need to insert in the middle of the slice in case any line // breaks were scanned. - reader_.sliceBuilder.insert(lineBreak, startLen); + newBreakSlice.insert(lineBreak, 0); } ////this is Clark Evans's interpretation (also in the spec @@ -1211,8 +1201,7 @@ struct Scanner // If chompint is Keep, we keep (commit) the last scanned line breaks // (which are at the end of the scalar). Otherwise re remove them (end the // transaction). - if(chomping == Chomping.keep) { breaksTransaction.commit(); } - else { breaksTransaction.end(); } + if(chomping == Chomping.keep) { slice ~= newBreakSlice; } if(chomping != Chomping.strip && lineBreak != int.max) { // If chomping is Keep, we keep the line break but the first line break @@ -1220,21 +1209,18 @@ struct Scanner // be inserted _before_ the other line breaks. if(chomping == Chomping.keep) { - reader_.sliceBuilder.insert(lineBreak, startLen); + slice.insert(lineBreak, startLen); } - // If chomping is not Keep, breaksTransaction was cancelled so we can - // directly write the first line break (as it isn't stripped - chomping - // is not Strip) + // If chomping is not Keep, discard the line break else { if (lineBreak != '\0') { - reader_.sliceBuilder.write(lineBreak); + slice ~= lineBreak; } } } - char[] slice = reader_.sliceBuilder.finish(); return scalarToken(startMark, endMark, slice, style); } @@ -1324,7 +1310,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe + Tuple!(uint, Mark) scanBlockScalarIndentationToSlice(ref char[] slice) @safe { uint maxIndent; Mark endMark = reader_.mark; @@ -1333,7 +1319,7 @@ struct Scanner { if(reader_.peekByte() != ' ') { - reader_.sliceBuilder.write(scanLineBreak()); + slice ~= scanLineBreak(); endMark = reader_.mark; continue; } @@ -1348,7 +1334,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - Mark scanBlockScalarBreaksToSlice(const uint indent) @safe + Mark scanBlockScalarBreaksToSlice(ref char[] slice, const uint indent) @safe { Mark endMark = reader_.mark; @@ -1356,7 +1342,7 @@ struct Scanner { while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); } if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } - reader_.sliceBuilder.write(scanLineBreak()); + slice ~= scanLineBreak(); endMark = reader_.mark; } @@ -1369,18 +1355,17 @@ struct Scanner const startMark = reader_.mark; const quote = reader_.get(); - reader_.sliceBuilder.begin(); + char[] slice; - scanFlowScalarNonSpacesToSlice(quotes, startMark); + scanFlowScalarNonSpacesToSlice(slice, quotes, startMark); while(reader_.peek() != quote) { - scanFlowScalarSpacesToSlice(startMark); - scanFlowScalarNonSpacesToSlice(quotes, startMark); + scanFlowScalarSpacesToSlice(slice, startMark); + scanFlowScalarNonSpacesToSlice(slice, quotes, startMark); } reader_.forward(); - auto slice = reader_.sliceBuilder.finish(); return scalarToken(startMark, reader_.mark, slice, quotes); } @@ -1388,7 +1373,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark) + void scanFlowScalarNonSpacesToSlice(ref char[] slice, const ScalarStyle quotes, const Mark startMark) @safe { for(;;) @@ -1398,19 +1383,19 @@ struct Scanner size_t numCodePoints; while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; } - if (numCodePoints > 0) { reader_.sliceBuilder.write(reader_.get(numCodePoints)); } + if (numCodePoints > 0) { slice ~= reader_.get(numCodePoints); } c = reader_.peek(); if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'') { reader_.forward(2); - reader_.sliceBuilder.write('\''); + slice ~= '\''; } else if((quotes == ScalarStyle.doubleQuoted && c == '\'') || (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\'))) { reader_.forward(); - reader_.sliceBuilder.write(c); + slice ~= c; } else if(quotes == ScalarStyle.doubleQuoted && c == '\\') { @@ -1423,7 +1408,7 @@ struct Scanner // place (in a slice) in case of '\P' and '\L' (very uncommon, // but we don't want to break the spec) char[2] escapeSequence = ['\\', cast(char)c]; - reader_.sliceBuilder.write(escapeSequence); + slice ~= escapeSequence; } else if(c.among!(escapeHexCodeList)) { @@ -1444,14 +1429,14 @@ struct Scanner "hexadecimal numbers.", reader_.mark)); char[2] escapeStart = ['\\', cast(char) c]; - reader_.sliceBuilder.write(escapeStart); - reader_.sliceBuilder.write(hex); + slice ~= escapeStart; + slice ~= hex; } else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { scanLineBreak(); - scanFlowScalarBreaksToSlice(startMark); + scanFlowScalarBreaksToSlice(slice, startMark); } else { @@ -1468,7 +1453,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// spaces into that slice. - void scanFlowScalarSpacesToSlice(const Mark startMark) @safe + void scanFlowScalarSpacesToSlice(ref char[] slice, const Mark startMark) @safe { // Increase length as long as we see whitespace. size_t length; @@ -1485,7 +1470,7 @@ struct Scanner if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { reader_.forward(length); - reader_.sliceBuilder.write(whitespaces); + slice ~= whitespaces; return; } @@ -1493,21 +1478,21 @@ struct Scanner reader_.forward(length); const lineBreak = scanLineBreak(); - if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } + if(lineBreak != '\n') { slice ~= lineBreak; } // If we have extra line breaks after the first, scan them into the // slice. - const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark); + const bool extraBreaks = scanFlowScalarBreaksToSlice(slice, startMark); // No extra breaks, one normal line break. Replace it with a space. - if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } + if(lineBreak == '\n' && !extraBreaks) { slice ~= ' '; } } /// Scan line breaks in a flow scalar. /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// line breaks into that slice. - bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe + bool scanFlowScalarBreaksToSlice(ref char[] slice, const Mark startMark) @safe { // True if at least one line break was found. bool anyBreaks; @@ -1528,7 +1513,7 @@ struct Scanner const lineBreak = scanLineBreak(); anyBreaks = true; - reader_.sliceBuilder.write(lineBreak); + slice ~= lineBreak; } return anyBreaks; } @@ -1546,10 +1531,9 @@ struct Scanner // document separators at the beginning of the line. // if(indent == 0) { indent = 1; } - reader_.sliceBuilder.begin(); + char[] slice; - alias Transaction = SliceBuilder.Transaction; - Transaction spacesTransaction; + char[] newSpacesSlice; // Stop at a comment. while(reader_.peekByte() != '#') { @@ -1582,25 +1566,22 @@ struct Scanner allowSimpleKey_ = false; - reader_.sliceBuilder.write(reader_.get(length)); + newSpacesSlice ~= reader_.get(length); endMark = reader_.mark; - spacesTransaction.commit(); - spacesTransaction = Transaction(&reader_.sliceBuilder); + slice ~= newSpacesSlice; + newSpacesSlice = []; - const startLength = reader_.sliceBuilder.length; - scanPlainSpacesToSlice(); - if(startLength == reader_.sliceBuilder.length || + const startLength = slice.length; + scanPlainSpacesToSlice(newSpacesSlice); + if(startLength == slice.length + newSpacesSlice.length || (flowLevel_ == 0 && reader_.column < indent)) { break; } } - spacesTransaction.end(); - char[] slice = reader_.sliceBuilder.finish(); - return scalarToken(startMark, endMark, slice, ScalarStyle.plain); } @@ -1608,7 +1589,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the spaces /// into that slice. - void scanPlainSpacesToSlice() @safe + void scanPlainSpacesToSlice(ref char[] slice) @trusted { // The specification is really confusing about tabs in plain scalars. // We just forbid them completely. Do not use tabs in YAML! @@ -1623,7 +1604,7 @@ struct Scanner if(!c.isNSChar) { // We have spaces, but no newline. - if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } + if(whitespaces.length > 0) { slice ~= whitespaces; } return; } @@ -1642,9 +1623,8 @@ struct Scanner bool extraBreaks; - alias Transaction = SliceBuilder.Transaction; - auto transaction = Transaction(&reader_.sliceBuilder); - if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } + char[] newSlice; + if(lineBreak != '\n') { newSlice ~= lineBreak; } while(reader_.peek().isNSChar) { if(reader_.peekByte() == ' ') { reader_.forward(); } @@ -1652,22 +1632,22 @@ struct Scanner { const lBreak = scanLineBreak(); extraBreaks = true; - reader_.sliceBuilder.write(lBreak); + newSlice ~= lBreak; if(end(reader_)) { return; } } } - transaction.commit(); + slice ~= newSlice; // No line breaks, only a space. - if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } + if(lineBreak == '\n' && !extraBreaks) { slice ~= ' '; } } /// Scan handle of a tag token. /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanTagHandleToSlice(string name)(const Mark startMark) + void scanTagHandleToSlice(string name)(ref char[] slice, const Mark startMark) { dchar c = reader_.peek(); enum contextMsg = "While scanning a " ~ name; @@ -1688,18 +1668,18 @@ struct Scanner ++length; } - reader_.sliceBuilder.write(reader_.get(length)); + slice ~= reader_.get(length); } /// Scan URI in a tag token. /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanTagURIToSlice(string name)(const Mark startMark) + void scanTagURIToSlice(string name)(ref char[] slice, const Mark startMark) { // Note: we do not check if URI is well-formed. dchar c = reader_.peek(); - const startLen = reader_.sliceBuilder.length; + const startLen = slice.length; { uint length; while(c.isAlphaNum || c.isURIChar) @@ -1707,9 +1687,9 @@ struct Scanner if(c == '%') { auto chars = reader_.get(length); - reader_.sliceBuilder.write(chars); + slice ~= chars; length = 0; - scanURIEscapesToSlice!name(startMark); + scanURIEscapesToSlice!name(slice, startMark); } else { ++length; } c = reader_.peek(length); @@ -1717,13 +1697,13 @@ struct Scanner if(length > 0) { auto chars = reader_.get(length); - reader_.sliceBuilder.write(chars); + slice ~= chars; length = 0; } } // OK if we scanned something, error otherwise. enum contextMsg = "While parsing a " ~ name; - enforce(reader_.sliceBuilder.length > startLen, + enforce(slice.length > startLen, new ScannerException(contextMsg, startMark, expected("URI", c), reader_.mark)); } @@ -1732,7 +1712,7 @@ struct Scanner /// /// Assumes that the caller is building a slice in Reader, and puts the scanned /// characters into that slice. - void scanURIEscapesToSlice(string name)(const Mark startMark) + void scanURIEscapesToSlice(string name)(ref char[] slice, const Mark startMark) { import core.exception : UnicodeException; // URI escapes encode a UTF-8 string. We store UTF-8 code units here for @@ -1759,7 +1739,7 @@ struct Scanner { foreach (dchar chr; buffer.data) { - reader_.sliceBuilder.write(chr); + slice ~= chr; } } catch (UnicodeException) @@ -1824,3 +1804,23 @@ EOS".chomp; auto elems = s.map!"a.value".filter!"a.length > 0".array; assert(elems[1] == "foobar"); } + +private void insert(ref char[] slice, const dchar c, const size_t position) @safe pure +in(position <= slice.length, text("Trying to insert after the end of the slice (", position, " > ", slice.length, ")")) +{ + const point = position; + const movedLength = slice.length - point; + + // Encode c into UTF-8 + char[4] encodeBuf; + if(c < 0x80) { encodeBuf[0] = cast(char)c; } + const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); + + slice.length += bytes; + if(movedLength > 0) + { + copy(slice[point..point + movedLength * char.sizeof], + slice[point + bytes .. point + bytes + movedLength * char.sizeof]); + } + slice[point .. point + bytes] = encodeBuf[0 .. bytes]; +}