Skip to content

Commit

Permalink
lexer: simplify attribute lexing algorithm
Browse files Browse the repository at this point in the history
Attribute names in HTML can include almost anything.
Essentially, only the () brackets will need to be quoted
since they're also used in pugneum syntax.

Pugneum will be fully static so there will be no need to escape values.
Remove the ! syntax which can now be used in attribute names.

Notably, commas are valid attribute names!
Remove the comma from the pugneum attribute syntax.
It serves no purpose and forces the user to quote them.
The result is that this:

    a(b, c)

Actually refers to two boolean attributes "a," and "b".
However, in this case the trailing comma will result in an error:

    a(b='c', d)

This is because the quote unambiguously ends the value
and values must be separated by spaces.

Fix the attribute value quotes handling.
The quotes aren't included in the values anymore.
Attribute text can include almost anything as well
but validating these contents is more complex than attribute names.
Maybe I'll do it later.

Also update test snapshots to reflect these changes.
  • Loading branch information
matheusmoreira committed Aug 27, 2023
1 parent 2d2acae commit 9cfe08b
Show file tree
Hide file tree
Showing 9 changed files with 1,203 additions and 1,608 deletions.
187 changes: 44 additions & 143 deletions packages/pugneum-lexer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -848,12 +848,12 @@ Lexer.prototype = {
},

/**
* Attribute Name.
* Attribute name and value.
*/
attribute: function(str) {
var quote = '';
var quoteRe = /['"]/;
var key = '';
var key = '', value = '';
var i;

// consume all whitespace before the key
Expand Down Expand Up @@ -883,9 +883,7 @@ Lexer.prototype = {
} else {
if (
this.whitespaceRe.test(str[i]) ||
str[i] === '!' ||
str[i] === '=' ||
str[i] === ','
str[i] === '='
) {
break;
}
Expand All @@ -910,160 +908,63 @@ Lexer.prototype = {

tok.name = key;

var valueResponse = this.attributeValue(str.substr(i));
// consume all whitespace before the =
i = this.skipWhitespace(str, i);

if (valueResponse.val) {
tok.val = valueResponse.val;
tok.mustEscape = valueResponse.mustEscape;
} else {
// was a boolean attribute (ex: `input(disabled)`)
tok.val = true;
tok.mustEscape = true;
}

str = valueResponse.remainingSource;

this.tokens.push(this.tokEnd(tok));

for (i = 0; i < str.length; i++) {
if (!this.whitespaceRe.test(str[i])) {
break;
}
if (str[i] === '\n') {
this.incrementLine(1);
} else {
this.incrementColumn(1);
}
}

if (str[i] === ',') {
this.incrementColumn(1);
i++;
}

return str.substr(i);
},

/**
* Attribute Value.
*/
attributeValue: function(str) {
var quoteRe = /['"]/;
var val = '';
var done, i, x;
var escapeAttr = true;
var state = characterParser.defaultState();
var col = this.colno;
var line = this.lineno;

// consume all whitespace before the equals sign
for (i = 0; i < str.length; i++) {
if (!this.whitespaceRe.test(str[i])) break;
if (str[i] === '\n') {
line++;
col = 1;
} else {
col++;
}
}

if (i === str.length) {
return {remainingSource: str};
}

if (str[i] === '!') {
escapeAttr = false;
col++;
i++;
if (str[i] !== '=')
this.error(
'INVALID_KEY_CHARACTER',
'Unexpected character ' + str[i] + ' expected `=`'
);
}

if (str[i] !== '=') {
// check for anti-pattern `div("foo"bar)`
if (i === 0 && str && !this.whitespaceRe.test(str[0]) && str[0] !== ',') {
this.error(
'INVALID_KEY_CHARACTER',
'Unexpected character ' + str[0] + ' expected `=`'
);
} else {
return {remainingSource: str};
}
}
if (str[i] === '=') {
++i;

this.lineno = line;
this.colno = col + 1;
i++;
// consume all whitespace after the =
i = this.skipWhitespace(str, i);

// consume all whitespace before the value
for (; i < str.length; i++) {
if (!this.whitespaceRe.test(str[i])) break;
if (str[i] === '\n') {
this.incrementLine(1);
} else {
// quote?
if (quoteRe.test(str[i])) {
quote = str[i];
this.incrementColumn(1);
}
}

line = this.lineno;
col = this.colno;

// start looping through the value
for (; i < str.length; i++) {
// if the character is in a string or in parentheses/brackets/braces
if (!(state.isNesting() || state.isString())) {
if (this.whitespaceRe.test(str[i])) {
done = false;

// find the first non-whitespace character
for (x = i; x < str.length; x++) {
if (!this.whitespaceRe.test(str[x])) {
// if it is a JavaScript punctuator, then assume that it is
// a part of the value
const isNotPunctuator = !characterParser.isPunctuator(str[x]);
const isQuote = quoteRe.test(str[x]);
const isColon = str[x] === ':';
const isSpreadOperator =
str[x] + str[x + 1] + str[x + 2] === '...';
if (isNotPunctuator || isQuote || isColon || isSpreadOperator) {
done = true;
}
break;
}
i++;
} else { quote = null; }

// start looping through the value
for (; i < str.length; i++) {
if (quote) {
if (str[i] === quote) {
this.incrementColumn(1);
i++;
break;
}

// if everything else is whitespace, return now so last attribute
// does not include trailing whitespace
if (done || x === str.length) {
} else {
if (this.whitespaceRe.test(str[i])) {
break;
}
}

// if there's no whitespace and the character is not ',', the
// attribute did not end.
if (str[i] === ',') {
break;
value += str[i];

if (str[i] === '\n') {
this.incrementLine(1);
} else {
this.incrementColumn(1);
}
}
} else {
// was a boolean attribute (ex: `input(disabled)`)
value = true;
}

state = characterParser.parseChar(str[i], state);
val += str[i];
tok.val = value;

if (str[i] === '\n') {
line++;
col = 1;
} else {
col++;
}
this.tokens.push(this.tokEnd(tok));

if (quote && str[i] && !this.whitespaceRe.test(str[i])) {
this.error(
'MALFORMED_ATTRIBUTE',
'Invalid code point after attribute value: `' + str[i] + '`'
);
}

this.lineno = line;
this.colno = col;
i = this.skipWhitespace(str, i);

return {val: val, mustEscape: escapeAttr, remainingSource: str.substr(i)};
return str.substr(i);
},

/**
Expand Down
Loading

0 comments on commit 9cfe08b

Please sign in to comment.