Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Package.swift

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions binding.gyp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions bindings/go/binding.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion bindings/rust/build.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions plpgsql/src/parser.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 59 additions & 2 deletions plpgsql/src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ bool tree_sitter_plpgsql_external_scanner_scan(
int depth = 0;
bool has_content = false;

/*
* Track whether the next token at depth 0 is expected to be a value.
* `null` is a PL/pgSQL delimiter only as a fresh statement (`NULL;`), but
* inside an expression — after IS, IS NOT, =, <>, !=, IN, AND, OR, LIKE,
* BETWEEN, NOT, AS, or a binary operator — `NULL` is the SQL literal and
* must be consumed as part of `sql_expression`. We default to `false` so
* a bare leading `NULL` still falls through to the kw_null token (which
* `stmt_null` consumes); the flag flips to `true` only after the scanner
* has seen something that requires a value next.
*/
bool expecting_value = false;

while (lexer->lookahead != 0) {
/* At depth 0, semicolon terminates */
if (depth == 0 && lexer->lookahead == ';') break;
Expand All @@ -90,9 +102,13 @@ bool tree_sitter_plpgsql_external_scanner_scan(
}
return false;
}
/* Single < — part of SQL operator, continue */

/* Single < — part of a SQL comparison operator. The catch-all branch
* at the bottom of the loop would set expecting_value=true for '<',
* but this earlier branch consumes the char and `continue`s, so it
* must update the flag itself or `IF x < NULL THEN` would truncate
* at NULL (the next token wouldn't be recognized as a value). */
has_content = true;
expecting_value = true;
continue;
}

Expand Down Expand Up @@ -141,13 +157,15 @@ bool tree_sitter_plpgsql_external_scanner_scan(
depth++;
lexer->advance(lexer, false);
has_content = true;
expecting_value = true;
continue;
}
if (lexer->lookahead == ')' || lexer->lookahead == ']') {
if (depth > 0) {
depth--;
lexer->advance(lexer, false);
has_content = true;
expecting_value = false;
continue;
}
/* Unbalanced close — stop */
Expand All @@ -167,6 +185,7 @@ bool tree_sitter_plpgsql_external_scanner_scan(
}
}
has_content = true;
expecting_value = false;
continue;
}

Expand All @@ -175,6 +194,7 @@ bool tree_sitter_plpgsql_external_scanner_scan(
/* Just consume the $ and let it be part of the expression */
lexer->advance(lexer, false);
has_content = true;
expecting_value = false;
continue;
}

Expand Down Expand Up @@ -235,6 +255,19 @@ bool tree_sitter_plpgsql_external_scanner_scan(
}
word[len] = '\0';

/* `null` is a PL/pgSQL delimiter only as a bare NULL statement.
* Inside an expression — after IS, IS NOT, =, <>, !=, IN, AND, OR,
* NOT, LIKE, etc., or any binary operator — NULL is the SQL literal
* and must be consumed as part of the expression. We approximate
* "inside an expression" with the `expecting_value` flag, which is
* set by operators, opening parens, comma, and value-expecting
* keywords like IS/NOT/AND/OR/IN/LIKE/BETWEEN. */
if (strcmp(word, "null") == 0 && expecting_value) {
has_content = true;
expecting_value = false;
continue;
}

/* Check if this word is a PL/pgSQL structural delimiter.
* These are keywords that, in context, indicate the end of a SQL
* expression in PL/pgSQL. We stop BEFORE consuming them.
Expand Down Expand Up @@ -297,6 +330,17 @@ bool tree_sitter_plpgsql_external_scanner_scan(
return false;
}

/* Non-delimiter word: update expecting_value based on whether the
* word naturally precedes a value (binary operators, IS, NOT, etc.) */
if (strcmp(word, "is") == 0 || strcmp(word, "not") == 0 ||
strcmp(word, "and") == 0 || strcmp(word, "or") == 0 ||
strcmp(word, "in") == 0 || strcmp(word, "like") == 0 ||
strcmp(word, "ilike") == 0 || strcmp(word, "between") == 0 ||
strcmp(word, "similar") == 0 || strcmp(word, "as") == 0) {
expecting_value = true;
} else {
expecting_value = false;
}

has_content = true;
continue;
Expand All @@ -310,6 +354,7 @@ bool tree_sitter_plpgsql_external_scanner_scan(
}

has_content = true;
expecting_value = false;
continue;
}
/* Inside parens, consume identifiers without keyword checking */
Expand All @@ -324,8 +369,20 @@ bool tree_sitter_plpgsql_external_scanner_scan(
}

/* Everything else (operators, digits, etc.) — just consume */
int c = lexer->lookahead;
lexer->advance(lexer, false);
has_content = true;
if (depth == 0) {
if (c == '+' || c == '-' || c == '*' || c == '/' || c == '%' ||
c == '<' || c == '>' || c == '=' || c == '~' || c == '!' ||
c == '@' || c == '#' || c == '^' || c == '&' || c == '|' ||
c == '?' || c == ',') {
expecting_value = true;
} else if (c >= '0' && c <= '9') {
expecting_value = false;
}
/* '.' and other punctuation: leave expecting_value unchanged */
}
}

if (has_content) {
Expand Down
116 changes: 116 additions & 0 deletions plpgsql/test/corpus/control_flow.txt
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,119 @@ END
(any_identifier
(identifier))))))
(kw_end)))

==================
If with IS NULL
==================

BEGIN
IF x IS NULL THEN
NULL;
END IF;
END

---

(source_file
(pl_block
(kw_begin)
(proc_sect
(proc_stmt
(stmt_if
(kw_if)
(sql_expression)
(kw_then)
(proc_sect
(proc_stmt
(stmt_null
(kw_null))))
(kw_end)
(kw_if))))
(kw_end)))

==================
If with IS NOT NULL
==================

BEGIN
IF x IS NOT NULL THEN
NULL;
END IF;
END

---

(source_file
(pl_block
(kw_begin)
(proc_sect
(proc_stmt
(stmt_if
(kw_if)
(sql_expression)
(kw_then)
(proc_sect
(proc_stmt
(stmt_null
(kw_null))))
(kw_end)
(kw_if))))
(kw_end)))

==================
If with NULL comparisons
==================

BEGIN
IF x = NULL OR y <> NULL THEN
NULL;
END IF;
END

---

(source_file
(pl_block
(kw_begin)
(proc_sect
(proc_stmt
(stmt_if
(kw_if)
(sql_expression)
(kw_then)
(proc_sect
(proc_stmt
(stmt_null
(kw_null))))
(kw_end)
(kw_if))))
(kw_end)))

==================
If with single less-than against NULL
==================

BEGIN
IF x < NULL THEN
NULL;
END IF;
END

---

(source_file
(pl_block
(kw_begin)
(proc_sect
(proc_stmt
(stmt_if
(kw_if)
(sql_expression)
(kw_then)
(proc_sect
(proc_stmt
(stmt_null
(kw_null))))
(kw_end)
(kw_if))))
(kw_end)))
15 changes: 12 additions & 3 deletions postgres/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ module.exports = grammar({
// Keywords (prec 1) outrank identifiers (prec 0) on the same text.
word: $ => $.identifier,

// External tokens — implemented in postgres/src/scanner.c.
// dollar_quoted_string must match opening and closing tags exactly,
// which a tree-sitter regex token cannot enforce.
externals: $ => [
$.dollar_quoted_string,
],

// Conflicts: PostgreSQL's grammar has many shift/reduce conflicts that
// Bison resolves via precedence rules. Tree-sitter (GLR) will handle
// these as ambiguities. Conflict pairs are stored in script/known-conflicts.json
Expand Down Expand Up @@ -4734,9 +4741,11 @@ module.exports = grammar({
// treats E'...' the same as a function call to E() at parse time.

// Dollar-quoted string: $$body$$ or $tag$body$tag$
// NOTE: full correctness requires matching the open/close tags;
// this regex accepts any dollar-quoted form and is good enough for highlighting.
dollar_quoted_string: _ => token(/\$[a-zA-Z_0-9]*\$[\s\S]*?\$[a-zA-Z_0-9]*\$/),
// Handled by the external scanner (postgres/src/scanner.c) so the
// open/close tags must match exactly. A pure regex cannot enforce that
// because tree-sitter compiles tokens into a DFA that ignores non-greedy
// `*?`, which previously caused over-capture across multiple quoted
// strings in a single file.

// Bit string: B'0101'
bit_string_literal: _ => token(/[bB]'[01]*'/),
Expand Down
14 changes: 6 additions & 8 deletions postgres/src/grammar.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions postgres/src/parser.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading