Skip to content

Commit

Permalink
syntax: parse all CRLF line endings as LF
Browse files Browse the repository at this point in the history
We treat CR as whitespace, so this already happened in practice
for regular line endings. However, since heredocs are a quoted state
where white space is not skipped, the CRLF line endings caused errors
where we couldn't match `<<EOF` with a CRLF EOF line as we got `EOF\r`.

Fixes #1088.
  • Loading branch information
mvdan committed Sep 29, 2024
1 parent d926916 commit 04e63a5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
17 changes: 8 additions & 9 deletions syntax/filetests_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ var fileTests = []testCase{
"foo <<EOF\nbar\nEOF",
"foo <<EOF \nbar\nEOF",
"foo <<EOF\t\nbar\nEOF",
"foo <<EOF\r\nbar\r\nEOF\r\n",
},
common: &Stmt{
Cmd: litCall("foo"),
Expand Down Expand Up @@ -1389,6 +1390,7 @@ var fileTests = []testCase{
Strs: []string{
"foo <<'EOF'\nbar\\\nEOF",
"foo <<'EOF'\nbar\\\r\nEOF",
"foo <<'EOF'\nbar\\\r\nEOF\r\n",
},
common: &Stmt{
Cmd: litCall("foo"),
Expand All @@ -1400,7 +1402,10 @@ var fileTests = []testCase{
},
},
{
Strs: []string{"foo <<-EOF\n\tbar\nEOF"},
Strs: []string{
"foo <<-EOF\n\tbar\nEOF",
"foo <<-EOF\r\n\tbar\r\nEOF\r\n",
},
common: &Stmt{
Cmd: litCall("foo"),
Redirs: []*Redirect{{
Expand Down Expand Up @@ -4592,18 +4597,12 @@ func recursiveSanityCheck(tb testing.TB, src string, v any) {
if i == 0 {
gotErr = got
}
got = strings.ReplaceAll(got, "\x00", "")
got = strings.ReplaceAll(got, "\r\n", "\n")
if !strings.Contains(want, "\\\n") {
// Hack to let "foobar" match the input "foo\\\nbar".
got = strings.ReplaceAll(got, "\\\n", "")
} else {
// Hack to let "\\\n" match the input "\\\r\n".
got = strings.ReplaceAll(got, "\\\r\n", "\\\n")
}
if !strings.Contains(want, "\\\r\n") {
// Hack to let "foobar" match the input "foo\\\r\nbar".
got = strings.ReplaceAll(got, "\\\r\n", "")
}
got = strings.ReplaceAll(got, "\x00", "")
if strings.HasPrefix(got, want) {
return
}
Expand Down
15 changes: 10 additions & 5 deletions syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,24 @@ retry:
if p.bsp < uint(len(p.bs)) {
if b := p.bs[p.bsp]; b < utf8.RuneSelf {
p.bsp++
if b == '\x00' {
switch b {
case '\x00':
// Ignore null bytes while parsing, like bash.
p.col++
goto retry
}
if b == '\\' {
case '\r':
if p.peekByte('\n') { // \r\n turns into \n
p.col++
goto retry
}
case '\\':
if p.r == '\\' {
} else if p.peekByte('\n') {
p.bsp++
p.w, p.r = 1, escNewl
return escNewl
} else if p.peekBytes("\r\n") {
} else if p.peekBytes("\r\n") { // \\\r\n turns into \\\n
p.col++
p.bsp += 2
p.w, p.r = 2, escNewl
return escNewl
Expand All @@ -90,7 +96,6 @@ retry:
p.bsp < uint(len(p.bs)) && bquoteEscaped(p.bs[p.bsp]) {
// We turn backquote command substitutions into $(),
// so we remove the extra backslashes needed by the backquotes.
// For good position information, we still include them in p.w.
bquotes++
p.col++
goto retry
Expand Down

0 comments on commit 04e63a5

Please sign in to comment.