Skip to content

Commit abe23b6

Browse files
committed
fix(parser): correct 'not' operator logic and add debug logging
1. Fix 'not' operator parsing: changed from 'expr == false' to 'expr ? false : true' to correctly handle undefined values via is_truthy(). 2. Add Debugging System: introduced JINJA_DEBUG macro for lexer/parser/render tracing. 3. Add Test Case: added test for empty message.content to verify Qwen template rendering.
1 parent 638a84c commit abe23b6

File tree

4 files changed

+1455
-130
lines changed

4 files changed

+1455
-130
lines changed

jinja.hpp

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@
3434
#define JINJA_VERSION_PATCH 1
3535
#define JINJA_VERSION_STRING STR(JINJA_VERSION_MAJOR) "." STR(JINJA_VERSION_MINOR) "." STR(JINJA_VERSION_PATCH)
3636

37+
#ifdef JINJA_DEBUG
38+
#define JINJA_LOG(x) std::cerr << "[JINJA_DEBUG] " << x << std::endl
39+
#else
40+
#define JINJA_LOG(x) do {} while(0)
41+
#endif
42+
3743
namespace jinja {
3844

3945
using json = nlohmann::json;
@@ -117,6 +123,23 @@ std::unique_ptr<T> make_unique(Args&&... args) {
117123

118124
inline std::string to_python_string(const json& val);
119125

126+
inline std::string token_type_to_string(int type) {
127+
switch (type) {
128+
case 0: return "Text";
129+
case 1: return "ExprStart";
130+
case 2: return "ExprEnd";
131+
case 3: return "BlockStart";
132+
case 4: return "BlockEnd";
133+
case 5: return "Identifier";
134+
case 6: return "String";
135+
case 7: return "Number";
136+
case 8: return "Operator";
137+
case 9: return "Punctuation";
138+
case 10: return "Eof";
139+
default: return "Unknown";
140+
}
141+
}
142+
120143
inline std::string to_python_repr(const json& val) {
121144
if (val.is_string()) {
122145
std::string s = val.get<std::string>();
@@ -287,6 +310,13 @@ class Lexer {
287310
std::vector<Token> tokens;
288311
bool trim_next = false;
289312

313+
auto add_token = [&](Token::Type t, const std::string& v) {
314+
tokens.push_back({t, v});
315+
JINJA_LOG("Lexer Token: [" << token_type_to_string(t) << "] "
316+
<< (v == "\n" ? "\\n" : v));
317+
};
318+
JINJA_LOG("Lexer: Start tokenizing input length " << m_input.length());
319+
290320
while (m_cursor < m_input.length()) {
291321
if (m_state == State::Text) {
292322
// Find next {{ or {% or {#
@@ -308,7 +338,7 @@ class Lexer {
308338
text.erase(0, text.find_first_not_of(" \n\r\t"));
309339
trim_next = false;
310340
}
311-
if (!text.empty()) tokens.push_back({Token::Text, text});
341+
if (!text.empty()) add_token(Token::Text, text);
312342
m_cursor = m_input.length();
313343
} else {
314344
std::string text = std::string(m_input.substr(m_cursor, next - m_cursor));
@@ -345,15 +375,15 @@ class Lexer {
345375
}
346376
}
347377

348-
if (!text.empty()) tokens.push_back({Token::Text, text});
378+
if (!text.empty()) add_token(Token::Text, text);
349379

350380
m_cursor = next;
351381
if (token_kind == 1) { // Expr
352-
tokens.push_back({Token::ExpressionStart, "{{"});
382+
add_token(Token::ExpressionStart, "{{");
353383
m_state = State::Expression;
354384
if (trim_prev) m_cursor++;
355385
} else if (token_kind == 2) { // Block
356-
tokens.push_back({Token::BlockStart, "{%"});
386+
add_token(Token::BlockStart, "{%");
357387
m_state = State::Block;
358388
if (trim_prev) m_cursor++;
359389
} else if (token_kind == 3) { // Comment
@@ -397,8 +427,8 @@ class Lexer {
397427
}
398428

399429
if (trim_current) {
400-
if (m_state == State::Expression) tokens.push_back({Token::ExpressionEnd, "}}"});
401-
else tokens.push_back({Token::BlockEnd, "%}"});
430+
if (m_state == State::Expression) add_token(Token::ExpressionEnd, "}}");
431+
else add_token(Token::BlockEnd, "%}");
402432
m_cursor += 3;
403433
m_state = State::Text;
404434
trim_next = true;
@@ -407,13 +437,13 @@ class Lexer {
407437

408438
// Normal end tags
409439
if (m_state == State::Expression && m_input.substr(m_cursor, 2) == "}}") {
410-
tokens.push_back({Token::ExpressionEnd, "}}"});
440+
add_token(Token::ExpressionEnd, "}}");
411441
m_cursor += 2;
412442
m_state = State::Text;
413443
continue;
414444
}
415445
if (m_state == State::Block && m_input.substr(m_cursor, 2) == "%}") {
416-
tokens.push_back({Token::BlockEnd, "%}"});
446+
add_token(Token::BlockEnd, "%}");
417447
m_cursor += 2;
418448
m_state = State::Text;
419449

@@ -426,14 +456,14 @@ class Lexer {
426456

427457
char c = m_input[m_cursor];
428458
if (isalpha(c) || c == '_') {
429-
tokens.push_back({Token::Identifier, read_identifier()});
459+
add_token(Token::Identifier, read_identifier());
430460
} else if (isdigit(c)) {
431-
tokens.push_back({Token::Number, read_number()});
461+
add_token(Token::Number, read_number());
432462
} else if (c == '\'' || c == '"') {
433-
tokens.push_back({Token::String, read_string(c)});
463+
add_token(Token::String, read_string(c));
434464
} else if (strchr("[](){}:.,", c)) {
435465
std::string op(1, c);
436-
tokens.push_back({Token::Punctuation, op});
466+
add_token(Token::Punctuation, op);
437467
m_cursor++;
438468
} else {
439469
// Operator or other symbols
@@ -446,12 +476,12 @@ class Lexer {
446476
else if (c == '>' && next == '=') op = ">=";
447477
}
448478
if (op.length() > 1) m_cursor++; // Consume extra char
449-
tokens.push_back({Token::Operator, op});
479+
add_token(Token::Operator, op);
450480
m_cursor++;
451481
}
452482
}
453483
}
454-
tokens.push_back({Token::Eof, ""});
484+
add_token(Token::Eof, "");
455485
return tokens;
456486
}
457487

@@ -592,6 +622,7 @@ class Context {
592622
return (*it)[name];
593623
}
594624
}
625+
JINJA_LOG("Context: Variable '" << name << "' not found, returning UNDEFINED");
595626
static json undefined_val = UNDEFINED;
596627
return undefined_val;
597628
}
@@ -645,7 +676,9 @@ struct VarExpr : Expr {
645676
std::string name;
646677
explicit VarExpr(std::string n) : name(std::move(n)) {}
647678
json evaluate(Context& context) override {
648-
return context.get(name);
679+
json val = context.get(name);
680+
JINJA_LOG("Eval Var: '" << name << "' -> " << (val.is_string() ? val.get<std::string>() : val.dump()));
681+
return val;
649682
}
650683
std::string dump() const override { return name; }
651684
};
@@ -1327,8 +1360,12 @@ struct ForStmt : Node {
13271360
: loop_vars(std::move(vars)), iterable(std::move(iter)), body(std::move(b)), filter_expr(std::move(filter)) {}
13281361

13291362
void render(Context& context, std::string& out) override {
1363+
JINJA_LOG("Render For: Start loop processing");
13301364
json iter_val = iterable->evaluate(context);
1331-
if (is_undefined(iter_val)) return;
1365+
if (is_undefined(iter_val)) {
1366+
JINJA_LOG("Render For: Iterable is undefined, skipping.");
1367+
return;
1368+
}
13321369

13331370
std::vector<json> items;
13341371

@@ -1382,6 +1419,7 @@ struct ForStmt : Node {
13821419

13831420
len = filtered_items.size();
13841421
index = 0;
1422+
JINJA_LOG("Render For: Iterating " << len << " items.");
13851423

13861424
for (const auto& item : filtered_items) {
13871425
json loop_scope;
@@ -1422,6 +1460,7 @@ struct IfNode : Node {
14221460

14231461
void render(Context& context, std::string& out) override {
14241462
bool res = is_truthy(condition->evaluate(context));
1463+
JINJA_LOG("Render If: Condition evaluated to " << (res ? "TRUE" : "FALSE"));
14251464
if (res) {
14261465
for (const auto& node : true_body) node->render(context, out);
14271466
} else {
@@ -1438,6 +1477,7 @@ class Parser {
14381477
explicit Parser(std::vector<Token> tokens) : m_tokens(std::move(tokens)), m_cursor(0) {}
14391478

14401479
std::vector<std::unique_ptr<Node>> parse() {
1480+
JINJA_LOG("Parser: Start parsing");
14411481
std::vector<std::unique_ptr<Node>> nodes;
14421482
while (!is_at_end()) {
14431483
if (check(Token::Text)) {
@@ -1541,6 +1581,7 @@ class Parser {
15411581
}
15421582

15431583
std::unique_ptr<Node> parse_if() {
1584+
JINJA_LOG("Parser: Parsing IF block");
15441585
// We consumed {% and 'if'
15451586
std::unique_ptr<Expr> condition = parse_expression();
15461587
if (check(Token::BlockEnd)) advance(); // eat %}
@@ -1760,11 +1801,13 @@ class Parser {
17601801
std::unique_ptr<Expr> parse_not() {
17611802
if (check(Token::Identifier) && peek().value == "not") {
17621803
advance();
1763-
// Unary not.
1764-
// We can treat as BinaryExpr "==" false? Or separate UnaryExpr.
1765-
// For simplicity, implement UnaryExpr or just BinaryExpr with null left?
1766-
// Let's do BinaryExpr("==", val, false)
1767-
return make_unique<BinaryExpr>("==", parse_not(), make_unique<LiteralExpr>(false));
1804+
// (not Expr) -> (Expr ? false : true)。
1805+
auto expr = parse_not();
1806+
return make_unique<TernaryExpr>(
1807+
std::move(expr),
1808+
make_unique<LiteralExpr>(false),
1809+
make_unique<LiteralExpr>(true)
1810+
);
17681811
}
17691812
return parse_compare();
17701813
}

tests/generate_assets.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,28 @@ def get_scenarios():
231231
{"role": "system", "content": f"Current Date: {current_date}"},
232232
{"role": "user", "content": "Date?"}
233233
]
234-
}
234+
},
235+
{
236+
"desc": "empty_assistant_content",
237+
"messages": [
238+
{"role": "user", "content": "Say nothing"},
239+
{"role": "assistant", "content": ""}
240+
]
241+
},
242+
{
243+
"desc": "empty_user_content",
244+
"messages": [
245+
{"role": "user", "content": ""},
246+
{"role": "assistant", "content": "No input received."}
247+
]
248+
},
249+
{
250+
"desc": "empty_user_assistant_content",
251+
"messages": [
252+
{"role": "user", "content": ""},
253+
{"role": "assistant", "content": ""}
254+
]
255+
},
235256
]
236257

237258
def main():

0 commit comments

Comments
 (0)