Skip to content

Commit 67c8b4b

Browse files
authored
Merge pull request #9 from daohoangson/fix_chars_need_escaping
Update urlchar to handle character escaping.
2 parents 6b3c49d + 8934a2a commit 67c8b4b

2 files changed

Lines changed: 18 additions & 4 deletions

File tree

scanner/scanner.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,18 @@ var macros = map[string]string{
114114
"num": `[0-9]*\.[0-9]+|[0-9]+`,
115115
"string": `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
116116
"stringchar": `{urlchar}|[ ]|\\{nl}`,
117-
"urlchar": "[\u0009\u0021\u0023-\u0026\u0027-\u007E]|{nonascii}|{escape}",
118117
"nl": `[\n\r\f]|\r\n`,
119118
"w": `{wc}*`,
120119
"wc": `[\t\n\f\r ]`,
120+
121+
// urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
122+
// ASCII characters range = `[\u0020-\u007e]`
123+
// Skip space \u0020 = `[\u0021-\u007e]`
124+
// Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
125+
// Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
126+
// Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
127+
// Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
128+
"urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
121129
}
122130

123131
// productions maps the list of tokens to patterns to be expanded.
@@ -254,10 +262,10 @@ func (s *Scanner) Next() *Token {
254262
match := matchers[TokenString].FindString(input)
255263
if match != "" {
256264
return s.emitToken(TokenString, match)
257-
} else {
258-
s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
259-
return s.err
260265
}
266+
267+
s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
268+
return s.err
261269
case '/':
262270
// Comment, error or Char.
263271
if len(input) > 1 && input[1] == '*' {

scanner/scanner_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@ func TestMatchers(t *testing.T) {
3131

3232
checkMatch("abcd", TokenIdent, "abcd")
3333
checkMatch(`"abcd"`, TokenString, `"abcd"`)
34+
checkMatch(`"ab'cd"`, TokenString, `"ab'cd"`)
35+
checkMatch(`"ab\"cd"`, TokenString, `"ab\"cd"`)
36+
checkMatch(`"ab\\cd"`, TokenString, `"ab\\cd"`)
3437
checkMatch("'abcd'", TokenString, "'abcd'")
38+
checkMatch(`'ab"cd'`, TokenString, `'ab"cd'`)
39+
checkMatch(`'ab\'cd'`, TokenString, `'ab\'cd'`)
40+
checkMatch(`'ab\\cd'`, TokenString, `'ab\\cd'`)
3541
checkMatch("#name", TokenHash, "#name")
3642
checkMatch("42''", TokenNumber, "42", TokenString, "''")
3743
checkMatch("4.2", TokenNumber, "4.2")

0 commit comments

Comments
 (0)