Skip to content

Commit 0bdf84d

Browse files
committed
fix(gemini): cwd filter + summary .jsonl dispatch
Close the last four Gemini parity gaps surfaced in v0.14.0 user validation. Bug 1 — `list --agent gemini --cwd <abspath>` returned `[]`. `resolve_gemini_chat_dirs_for_listing` only accepted an exact hash-dir match for the abspath, so a named-scope layout (e.g. `~/.gemini/tmp/play/ chats/`) was silently excluded whenever the user passed `--cwd /Users/.../play`. The resolver now also scans named-scope slugs and keeps any whose slug equals the abspath's final segment, is embedded as `/<slug>/`, or appears as trailing `/<slug>`. Hex-hash-shaped slugs stay gated on exact hash match. Bug 2 — `summary --agent gemini` reported `message_count: 0` for a session that `read` walked without issue. The summary line walker extracted `role = message.role || json.type` but only mapped `"user"` and `"assistant"` / `"human"`. Gemini's `type: "gemini"` (and `"model"`) fell through. Now `"gemini"` and `"model"` normalize to `"assistant"` at the walker, matching the existing read-path mapping. Single-document Gemini `.json` sessions are expanded into synthetic JSONL-shaped lines so the walker works uniformly across both Gemini layouts. Bugs 3 & 4 — `timeline --cwd` excluding Gemini and `doctor --cwd` reporting `sessions_gemini: warn` were cascades from Bug 1; both go through `adapter.list_sessions(Some(cwd), ...)` and now pass. Rust and Node implementations are updated in lockstep. Conformance gets two new cases: `summary-gemini-jsonl` for the jsonl summary path and `list-gemini-cwd` for the lenient cwd filter. Cargo tests go 146 → 149 (one cwd-filter test, two summary tests covering .jsonl + single-doc .json). Goldens updated: summary-gemini.json (message_count 0 → 1) and timeline.json (Gemini entries now visible under `/workspace/demo`). Release notes appended under the existing v0.14.0 Fixed subhead.
1 parent f0c78ab commit 0bdf84d

8 files changed

Lines changed: 489 additions & 22 deletions

File tree

RELEASE_NOTES.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,22 @@ v0.14.0 ships the thirteen-pass hardening effort planned in `research/agent-cont
5858
for every Gemini session. The scope directory name (e.g. `play`) is now returned as the cwd
5959
hint, so `chorus read --agent gemini --cwd <X>` filtering works for named scopes.
6060
Hex-hash scopes still return the hash (lossy; users can set `--chats-dir` to pin).
61+
- **Gemini adapter: lenient `--cwd <abspath>` filter in listings.** `chorus list --agent gemini
62+
--cwd /abs/path` previously hashed the abspath and returned `[]` whenever a matching hash
63+
directory didn't exist — which is the common case because Gemini CLI actually uses named
64+
scopes (e.g. `~/.gemini/tmp/play/chats/`). The listing resolver now also accepts a named
65+
scope whose slug equals the cwd's final segment or appears as `/<slug>/` or trailing
66+
`/<slug>` within the abspath. This cascades into `chorus timeline --cwd` and
67+
`chorus doctor --cwd` (both of which go through `list_sessions`), so `sessions_gemini` now
68+
reports `pass` and timeline entries include Gemini when the abspath matches a named scope.
69+
Hex-hash scopes are still resolved only by exact hash match.
70+
- **Gemini adapter: `summary` understands Gemini's role vocabulary.** `chorus summary --agent
71+
gemini` used to report `message_count: 0` for `.jsonl` sessions even when `read` returned
72+
multi-message content on the same file. The summary walker's role detector now maps
73+
`type: "gemini"` and `type: "model"` to `assistant` (matching the existing read-path
74+
mapping), and single-document `.json` sessions are expanded into synthetic line-shaped
75+
entries so the walker works uniformly across both Gemini layouts. Rust and Node implementations
76+
apply the same fix.
6177

6278
### Known Limitations
6379

cli/src/agents.rs

Lines changed: 116 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,17 +1465,66 @@ fn resolve_gemini_chat_dirs(chats_dir: Option<&str>, cwd: &str) -> Result<Vec<Pa
14651465
}
14661466

14671467
fn resolve_gemini_chat_dirs_for_listing(cwd: Option<&str>) -> Result<Vec<PathBuf>> {
1468+
let tmp_base = gemini_tmp_base_dir();
1469+
14681470
if let Some(scope) = cwd {
14691471
let normalized_cwd = normalize_path(scope)?;
14701472
let scoped_hash = hash_path(&normalized_cwd);
1471-
let dir = gemini_tmp_base_dir().join(scoped_hash).join("chats");
1472-
if dir.exists() {
1473-
return Ok(vec![dir]);
1473+
1474+
// Exact hash match (Gemini CLI's canonical layout for scoped sessions).
1475+
let exact = tmp_base.join(&scoped_hash).join("chats");
1476+
let mut ordered: Vec<PathBuf> = Vec::new();
1477+
let mut seen: std::collections::HashSet<PathBuf> = std::collections::HashSet::new();
1478+
if exact.exists() {
1479+
seen.insert(exact.clone());
1480+
ordered.push(exact);
1481+
}
1482+
1483+
// Lenient fallback: match scope-slug directories against the abspath.
1484+
// Gemini also lays sessions under `~/.gemini/tmp/<slug>/chats/` where
1485+
// <slug> is a short, human-named scope (e.g. `play`, `sandbox`). We
1486+
// accept a slug if the abspath ends with `/<slug>` or contains
1487+
// `/<slug>/`, or if the abspath's final segment equals the slug. Hex
1488+
// hash-shaped scopes are excluded from the lenient match (they are
1489+
// either an exact hash match handled above or a different project).
1490+
let cwd_str = normalized_cwd.to_string_lossy().to_string();
1491+
let final_segment = normalized_cwd
1492+
.file_name()
1493+
.and_then(|n| n.to_str())
1494+
.map(|s| s.to_string());
1495+
if let Ok(entries) = fs::read_dir(&tmp_base) {
1496+
for entry in entries.flatten() {
1497+
let path = entry.path();
1498+
if !path.is_dir() {
1499+
continue;
1500+
}
1501+
let slug = match path.file_name().and_then(|n| n.to_str()) {
1502+
Some(s) => s.to_string(),
1503+
None => continue,
1504+
};
1505+
// Skip hash-shaped slugs — only exact-hash match is valid.
1506+
let is_hex_hash = slug.len() >= 40 && slug.chars().all(|c| c.is_ascii_hexdigit());
1507+
if is_hex_hash {
1508+
continue;
1509+
}
1510+
let slug_pat_contains = format!("/{}/", slug);
1511+
let slug_pat_suffix = format!("/{}", slug);
1512+
let lenient_match = final_segment.as_deref() == Some(slug.as_str())
1513+
|| cwd_str.contains(&slug_pat_contains)
1514+
|| cwd_str.ends_with(&slug_pat_suffix);
1515+
if !lenient_match {
1516+
continue;
1517+
}
1518+
let chats = path.join("chats");
1519+
if chats.exists() && seen.insert(chats.clone()) {
1520+
ordered.push(chats);
1521+
}
1522+
}
14741523
}
1475-
return Ok(Vec::new());
1524+
1525+
return Ok(ordered);
14761526
}
14771527

1478-
let tmp_base = gemini_tmp_base_dir();
14791528
let mut ordered = Vec::new();
14801529
if let Ok(entries) = fs::read_dir(&tmp_base) {
14811530
for entry in entries.flatten() {
@@ -3795,4 +3844,66 @@ mod tests {
37953844
assert_eq!(cwd3, serde_json::Value::String("abc".into()));
37963845
assert_eq!(hash3, None);
37973846
}
3847+
3848+
/// Bug 1 regression: `--cwd <abspath>` should match a named-scope slug
3849+
/// (e.g. `play`) when the abspath's final segment or an embedded path
3850+
/// component equals the slug. Before the fix, `list_gemini_sessions`
3851+
/// would hash the abspath and look for the hashed directory only —
3852+
/// missing the actual named-scope layout Gemini CLI uses.
3853+
#[test]
3854+
fn gemini_list_lenient_cwd_filter_matches_named_scope() {
3855+
let _guard = gemini_list_env_lock();
3856+
let fixture = gemini_list_fixture("lenient_cwd");
3857+
// Mixed: named `play` scope with a .jsonl session + an unrelated
3858+
// `work` scope with a .json session. A cwd whose final segment is
3859+
// `play` should return only the play session.
3860+
let play_chats = fixture.join("play").join("chats");
3861+
std::fs::create_dir_all(&play_chats).unwrap();
3862+
std::fs::write(
3863+
play_chats.join("session-live.jsonl"),
3864+
"{\"sessionId\":\"live\"}\n{\"type\":\"gemini\",\"content\":\"hi\"}\n",
3865+
)
3866+
.unwrap();
3867+
3868+
let work_chats = fixture.join("work").join("chats");
3869+
std::fs::create_dir_all(&work_chats).unwrap();
3870+
std::fs::write(
3871+
work_chats.join("session-other.json"),
3872+
serde_json::json!({ "messages": [] }).to_string(),
3873+
)
3874+
.unwrap();
3875+
3876+
std::env::set_var("CHORUS_GEMINI_TMP_DIR", &fixture);
3877+
3878+
// Case 1: --cwd ending in `/play` picks only the play scope.
3879+
let out = super::list_gemini_sessions(Some("/Users/testuser/sandbox/play"), 10)
3880+
.expect("list_gemini_sessions play");
3881+
let ids: Vec<String> = out
3882+
.iter()
3883+
.map(|e| e["session_id"].as_str().unwrap_or("").to_string())
3884+
.collect();
3885+
assert_eq!(out.len(), 1, "expected 1 play session, got: {:?}", ids);
3886+
assert_eq!(ids[0], "session-live");
3887+
3888+
// Case 2: --cwd with `/play/` embedded (deeper subdir) also picks play.
3889+
let out2 = super::list_gemini_sessions(
3890+
Some("/Users/testuser/sandbox/play/agent-chorus"),
3891+
10,
3892+
)
3893+
.expect("list_gemini_sessions play subdir");
3894+
assert_eq!(out2.len(), 1);
3895+
assert_eq!(out2[0]["session_id"].as_str(), Some("session-live"));
3896+
3897+
// Case 3: --cwd unrelated to any slug returns empty.
3898+
let out3 = super::list_gemini_sessions(Some("/Users/testuser/elsewhere"), 10)
3899+
.expect("list_gemini_sessions unrelated");
3900+
assert!(
3901+
out3.is_empty(),
3902+
"unrelated cwd should return no sessions, got: {:?}",
3903+
out3,
3904+
);
3905+
3906+
std::env::remove_var("CHORUS_GEMINI_TMP_DIR");
3907+
let _ = std::fs::remove_dir_all(&fixture);
3908+
}
37983909
}

cli/src/summary.rs

Lines changed: 189 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,30 @@ pub fn build_summary(
168168
let source_path = session.source.clone();
169169
let session_warnings = session.warnings.clone();
170170

171-
// Now parse the raw file for summary extraction
171+
// Now parse the raw file for summary extraction.
172+
//
173+
// Extension dispatch: .jsonl files parse line-by-line; Gemini also writes
174+
// single-document .json files (older CLI layout) whose contents won't
175+
// survive the per-line JSON parser. For those, walk `session.messages`
176+
// and `session.history` and re-serialize each entry as a synthetic JSONL
177+
// line so the downstream walker can consume them unchanged.
172178
let path = Path::new(&source_path);
173-
let lines = agents::read_jsonl_lines(path).unwrap_or_default();
179+
let is_single_doc_json = path
180+
.extension()
181+
.and_then(|e| e.to_str())
182+
.map(|e| e.eq_ignore_ascii_case("json"))
183+
.unwrap_or(false);
184+
let lines: Vec<String> = if is_single_doc_json {
185+
match std::fs::read_to_string(path) {
186+
Ok(raw) => match serde_json::from_str::<Value>(&raw) {
187+
Ok(doc) => synthesize_gemini_jsonl_lines(&doc),
188+
Err(_) => Vec::new(),
189+
},
190+
Err(_) => Vec::new(),
191+
}
192+
} else {
193+
agents::read_jsonl_lines(path).unwrap_or_default()
194+
};
174195

175196
let mut user_requests: Vec<String> = Vec::new();
176197
let mut tool_call_counts: BTreeMap<String, usize> = BTreeMap::new();
@@ -226,12 +247,20 @@ pub fn build_summary(
226247

227248
// Claude-format messages
228249
let message = json.get("message").unwrap_or(&json);
229-
let role = message
250+
let raw_role = message
230251
.get("role")
231252
.or_else(|| json.get("type"))
232253
.and_then(|v| v.as_str())
233254
.unwrap_or("")
234255
.to_lowercase();
256+
// Normalize Gemini's role vocabulary: `type: "gemini"` and
257+
// `type: "model"` both map to `assistant`. Without this, Gemini
258+
// .jsonl sessions produce message_count: 0 in the summary even
259+
// though `read` returns a non-empty content.
260+
let role = match raw_role.as_str() {
261+
"gemini" | "model" => "assistant".to_string(),
262+
other => other.to_string(),
263+
};
235264

236265
if role == "user" || role == "human" {
237266
let content = message
@@ -336,6 +365,61 @@ pub fn build_summary(
336365
})
337366
}
338367

368+
/// Expand a single-document Gemini session into synthetic JSONL-shaped lines.
369+
///
370+
/// Accepts both historical Gemini schemas:
371+
/// - `{ "messages": [ { "type": "user"|"gemini"|..., "content": ... } ] }`
372+
/// - `{ "history": [ { "role": "user"|"model", "parts": [...] } ] }`
373+
///
374+
/// Each returned string is a compact JSON object the summary walker can parse
375+
/// like a regular .jsonl line. Non-text and malformed entries are skipped.
376+
fn synthesize_gemini_jsonl_lines(doc: &Value) -> Vec<String> {
377+
let mut out = Vec::new();
378+
if let Some(messages) = doc.get("messages").and_then(|v| v.as_array()) {
379+
for msg in messages {
380+
if msg.is_object() {
381+
if let Ok(s) = serde_json::to_string(msg) {
382+
out.push(s);
383+
}
384+
}
385+
}
386+
return out;
387+
}
388+
if let Some(history) = doc.get("history").and_then(|v| v.as_array()) {
389+
for turn in history {
390+
// Normalize `{role,parts}` into `{type,content}` so the walker's
391+
// role+content extraction path applies unchanged.
392+
let role = turn
393+
.get("role")
394+
.and_then(|v| v.as_str())
395+
.unwrap_or("")
396+
.to_lowercase();
397+
let mapped_type = if role == "user" { "user" } else { "gemini" };
398+
let text = if let Some(arr) = turn.get("parts").and_then(|v| v.as_array()) {
399+
arr.iter()
400+
.filter_map(|p| p.get("text").and_then(|t| t.as_str()))
401+
.collect::<Vec<&str>>()
402+
.join("\n")
403+
} else if let Some(s) = turn.get("parts").and_then(|v| v.as_str()) {
404+
s.to_string()
405+
} else {
406+
String::new()
407+
};
408+
if text.is_empty() {
409+
continue;
410+
}
411+
let synth = serde_json::json!({
412+
"type": mapped_type,
413+
"content": text,
414+
});
415+
if let Ok(s) = serde_json::to_string(&synth) {
416+
out.push(s);
417+
}
418+
}
419+
}
420+
out
421+
}
422+
339423
/// Extract tool call counts from a Claude-style content array.
340424
fn extract_tool_call_summary(content: &[Value], counts: &mut BTreeMap<String, usize>) {
341425
for block in content {
@@ -449,3 +533,105 @@ fn capitalize(s: &str) -> String {
449533
Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
450534
}
451535
}
536+
537+
#[cfg(test)]
538+
mod tests {
539+
use super::*;
540+
use crate::utils::hash_path;
541+
542+
fn summary_env_lock() -> std::sync::MutexGuard<'static, ()> {
543+
use std::sync::{Mutex, OnceLock};
544+
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
545+
LOCK.get_or_init(|| Mutex::new(()))
546+
.lock()
547+
.unwrap_or_else(|e| e.into_inner())
548+
}
549+
550+
fn fresh_fixture(name: &str) -> std::path::PathBuf {
551+
let dir = std::env::temp_dir().join(format!("chorus_summary_{}", name));
552+
let _ = std::fs::remove_dir_all(&dir);
553+
std::fs::create_dir_all(&dir).expect("create fixture dir");
554+
dir
555+
}
556+
557+
/// Bug 2 regression: Gemini .jsonl session produces a non-zero
558+
/// `message_count` in the summary output. Before the fix, the summary
559+
/// walker treated `type: "gemini"` as a non-role and returned 0 even
560+
/// though `read` returned 27+ messages on the same file.
561+
#[test]
562+
fn gemini_summary_counts_assistant_messages_from_jsonl() {
563+
let _guard = summary_env_lock();
564+
let fixture = fresh_fixture("gemini_jsonl_count");
565+
// Use the hash layout so the default resolve path finds the session
566+
// when we pass --cwd /tmp/fake-project.
567+
let fake_cwd_str = "/tmp/fake-project";
568+
let fake_cwd = std::path::PathBuf::from(fake_cwd_str);
569+
let scoped_hash = hash_path(&fake_cwd);
570+
let chats = fixture.join(&scoped_hash).join("chats");
571+
std::fs::create_dir_all(&chats).unwrap();
572+
// Header + two assistant messages + one user message +
573+
// a `$set` metadata event to exercise skipping.
574+
let jsonl = concat!(
575+
"{\"sessionId\":\"test-session\"}\n",
576+
"{\"id\":\"u1\",\"type\":\"user\",\"content\":\"hello there\",\"timestamp\":\"2026-04-24T17:48:52.144Z\"}\n",
577+
"{\"$set\":{\"lastUpdated\":\"2026-04-24T17:48:52.145Z\"}}\n",
578+
"{\"id\":\"g1\",\"type\":\"gemini\",\"content\":\"first reply\",\"timestamp\":\"2026-04-24T17:48:59.748Z\"}\n",
579+
"{\"id\":\"g2\",\"type\":\"gemini\",\"content\":\"second reply\",\"timestamp\":\"2026-04-24T17:49:10.000Z\"}\n",
580+
);
581+
std::fs::write(chats.join("session-test.jsonl"), jsonl).unwrap();
582+
583+
std::env::set_var("CHORUS_GEMINI_TMP_DIR", &fixture);
584+
let res = super::build_summary("gemini", None, fake_cwd_str, None)
585+
.expect("build_summary");
586+
std::env::remove_var("CHORUS_GEMINI_TMP_DIR");
587+
588+
assert_eq!(res.message_count, 2, "expected 2 assistant messages");
589+
assert_eq!(
590+
res.user_requests.len(),
591+
1,
592+
"expected 1 user request captured, got {:?}",
593+
res.user_requests
594+
);
595+
assert!(
596+
res.last_response_snippet.as_deref().unwrap_or("").contains("second reply"),
597+
"last snippet should come from the last gemini message: {:?}",
598+
res.last_response_snippet,
599+
);
600+
601+
let _ = std::fs::remove_dir_all(&fixture);
602+
}
603+
604+
/// Also verify the single-document .json path still summarizes correctly
605+
/// after the extension dispatch was introduced.
606+
#[test]
607+
fn gemini_summary_counts_assistant_messages_from_single_doc_json() {
608+
let _guard = summary_env_lock();
609+
let fixture = fresh_fixture("gemini_json_count");
610+
let fake_cwd_str = "/tmp/fake-project-json";
611+
let fake_cwd = std::path::PathBuf::from(fake_cwd_str);
612+
let scoped_hash = hash_path(&fake_cwd);
613+
let chats = fixture.join(&scoped_hash).join("chats");
614+
std::fs::create_dir_all(&chats).unwrap();
615+
let doc = serde_json::json!({
616+
"sessionId": "json-session",
617+
"messages": [
618+
{ "type": "user", "content": "q1" },
619+
{ "type": "gemini", "content": "a1" },
620+
{ "type": "user", "content": "q2" },
621+
{ "type": "gemini", "content": "a2" },
622+
{ "type": "gemini", "content": "a3" },
623+
],
624+
});
625+
std::fs::write(chats.join("session-test.json"), doc.to_string()).unwrap();
626+
627+
std::env::set_var("CHORUS_GEMINI_TMP_DIR", &fixture);
628+
let res = super::build_summary("gemini", None, fake_cwd_str, None)
629+
.expect("build_summary");
630+
std::env::remove_var("CHORUS_GEMINI_TMP_DIR");
631+
632+
assert_eq!(res.message_count, 3, "expected 3 assistant messages");
633+
assert_eq!(res.user_requests.len(), 2);
634+
635+
let _ = std::fs::remove_dir_all(&fixture);
636+
}
637+
}

0 commit comments

Comments
 (0)