Skip to content

Commit 3aba77d

Browse files
Fix: Prevent context-window bloat in ReWOO + summarization (#121)
* context bloat mitigation rewoo * context bloat mitigation rewoo : unit test * context bloat mitigation rewoo : version upgrade
1 parent f3b76b3 commit 3aba77d

5 files changed

Lines changed: 82 additions & 6 deletions

File tree

agents/reasoner/rewoo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,9 @@ def _execute(self, step: Step, state: ReasonerState) -> None:
180180

181181
if step.output_key:
182182
self.memory[step.output_key] = step.result
183-
state.history.append(f"remembered {step.output_key} : {step.result}")
184183

185-
state.history.append(f"Executed step: {step.text} -> {step.result}")
184+
# Truncate step result to ~8KB to cap history growth and avoid context-window bloat
185+
state.history.append(f"Executed step: {step.text} -> {str(step.result)[:8124]}")
186186
logger.info("step_executed", step_text=step.text, step_type=step_type, result=str(step.result)[:100] if step.result is not None else None)
187187

188188
@observe

agents/standard_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ def solve(self, goal: str) -> ReasoningResult:
101101

102102
try:
103103
result = self.reasoner.run(goal)
104-
result.final_answer = self.llm.prompt(_PROMPTS["summarize"].format(goal=goal, history=getattr(result, "transcript", "")))
104+
# Truncate transcript to the last ~12KB to limit context size and avoid context-window errors
105+
result.final_answer = self.llm.prompt(_PROMPTS["summarize"].format(goal=goal, history=getattr(result, "transcript", "")[-12000:]))
105106

106107
self._record_interaction({"goal": goal, "result": result.final_answer})
107108
self._state = AgentState.READY

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "standard-agent"
3-
version = "0.1.10"
3+
version = "0.1.11"
44
description = "A simple, modular library for building AI agents—with a composable core and plug‑in components."
55
requires-python = ">=3.11"
66
readme = "README.md"

tests/agents/reasoners/test_rewoo.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ def test_rewoo_plan_parses_valid_bullets_and_records_successful_tool_call():
3535

3636
# Successful tool call recorded once
3737
assert result.tool_calls and result.tool_calls[0] == {"tool_id": "t1", "summary": "Tool One"}
38-
# Transcript contains remembered k1 and executed steps
39-
assert "remembered k1" in result.transcript
38+
# Transcript contains executed steps; memory has k1
4039
assert "Executed step:" in result.transcript
40+
assert "k1" in memory
4141

4242

4343
def test_rewoo_plan_raises_on_input_before_output():
@@ -287,6 +287,44 @@ def test_rewoo_selection_invalid_id_records_no_tool_call():
287287
assert result.tool_calls == []
288288

289289

290+
def test_rewoo_history_truncates_step_result_to_8kb():
291+
# Plan with one TOOL step producing a very large payload
292+
plan_text = "- fetch big (output: k1)"
293+
large_payload = "X" * 50000 # 50KB
294+
295+
class BigTool(DummyTool):
296+
def __init__(self, tool_id: str, name: str):
297+
super().__init__(tool_id, name, schema={})
298+
299+
class BigTools(DummyTools):
300+
def execute(self, tool, params): # type: ignore[override]
301+
return large_payload
302+
303+
llm = DummyLLM(
304+
text_queue=[
305+
plan_text, # plan
306+
"TOOL", # classify
307+
"t1", # select tool
308+
],
309+
json_queue=[{}], # params
310+
)
311+
tools = BigTools([BigTool("t1", "Big Tool")])
312+
memory: Dict[str, Any] = DictMemory()
313+
314+
reasoner = ReWOOReasoner(llm=llm, tools=tools, memory=memory)
315+
result = reasoner.run("goal")
316+
317+
# Transcript should contain the executed line with truncated payload (~8124 chars)
318+
assert "Executed step:" in result.transcript
319+
executed_lines = [ln for ln in result.transcript.split("\n") if ln.startswith("Executed step:")]
320+
assert executed_lines, "Expected at least one executed step line"
321+
line = executed_lines[-1]
322+
# Ensure truncation happened (< original 50k)
323+
assert len(line) < 20000
324+
# And memory stores full payload (no truncation in memory)
325+
assert memory.get("k1") == large_payload
326+
327+
290328
def test_rewoo_param_gen_error_triggers_reflection_and_no_tool_call():
291329
# Override LLM to raise a ValueError during param generation
292330
class FailParamLLM(DummyLLM):

tests/agents/test_standard_agent.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ def run(self, goal: str) -> ReasoningResult: # type: ignore[override]
2929
return ReasoningResult(transcript="trace", success=True)
3030

3131

32+
class LongTranscriptReasoner(BaseReasoner):
33+
def __init__(self):
34+
# type: ignore[call-arg]
35+
pass
36+
37+
def run(self, goal: str) -> ReasoningResult: # type: ignore[override]
38+
# Generate a transcript > 50KB to ensure truncation to ~12KB occurs
39+
long_trace = "Y" * 50000
40+
return ReasoningResult(transcript=long_trace, success=True)
41+
42+
3243
class FailingReasoner(BaseReasoner):
3344
def __init__(self):
3445
# type: ignore[call-arg]
@@ -155,6 +166,32 @@ def run(self, goal: str) -> ReasoningResult: # type: ignore[override]
155166
assert result.final_answer == "S"
156167

157168

169+
def test_agent_summarize_uses_only_last_12kb_of_transcript(monkeypatch):
170+
_fixed_uuid4(monkeypatch, "RUN12K")
171+
172+
captured_prompt = {"text": None}
173+
174+
class CapturingLLM(DummyLLM):
175+
def prompt(self, text: str) -> str: # type: ignore[override]
176+
captured_prompt["text"] = text
177+
return "OK"
178+
179+
llm = CapturingLLM()
180+
tools = DummyTools()
181+
memory: Dict[str, Any] = DictMemory()
182+
reasoner = LongTranscriptReasoner()
183+
184+
agent = StandardAgent(llm=llm, tools=tools, memory=memory, reasoner=reasoner)
185+
agent.solve("g")
186+
187+
assert captured_prompt["text"] is not None
188+
# Extract the history block from the summarize prompt
189+
text = captured_prompt["text"] or ""
190+
# The history is inserted via format(... history= ...), so ensure only ~12k included
191+
assert len(text) < 30000 # entire prompt under 30k
192+
assert "Y" * 20000 not in text # definitely not the full 50k
193+
194+
158195
def test_agent_conversation_history_respects_window(monkeypatch):
159196
class SmallReasoner(BaseReasoner):
160197
def __init__(self):

0 commit comments

Comments
 (0)