From 5a3ba8463cef866232da0ef711ce85e16e60db8c Mon Sep 17 00:00:00 2001
From: Mahendra Singh Rathore
 <49229348+mahendrarathore1742@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:38:21 +0000
Subject: [PATCH 1/2] fix: resolve 'TODO: last event is not final' error in
 streaming mode (#600)

When using streaming mode (e.g., with a2a remote agents), Flow.Run()
would return an error 'TODO: last event is not final' if the last
event from runOneStep had Partial=true. This is a legitimate state
that occurs when:
- The model reaches max token limit during streaming
- A sub-agent (e.g., a2a remote agent) emits partial streaming events

The turn is complete in these cases, so we simply return instead of
erroring.

Added regression test TestFlowRunPartialLastEvent with 3 sub-cases
covering single partial, multiple partial, and mixed partial/final
response scenarios.

Fixes #600
---
 internal/llminternal/base_flow.go      |   7 +-
 internal/llminternal/base_flow_test.go | 118 +++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 3 deletions(-)

diff --git a/internal/llminternal/base_flow.go b/internal/llminternal/base_flow.go
index 0891acdeb..32039f85d 100644
--- a/internal/llminternal/base_flow.go
+++ b/internal/llminternal/base_flow.go
@@ -113,9 +113,10 @@ func (f *Flow) Run(ctx agent.InvocationContext) iter.Seq2[*session.Event, error]
 				return
 			}
 			if lastEvent.LLMResponse.Partial {
-				// We may have reached max token limit during streaming mode.
-				// TODO: handle Partial response in model level. CL 781377328
-				yield(nil, fmt.Errorf("TODO: last event is not final"))
+				// The last event is a partial streaming response (e.g., reached
+				// max token limit during streaming, or a sub-agent emitted
+				// partial events). The turn is complete so we simply return
+				// instead of looping again.
 				return
 			}
 		}
diff --git a/internal/llminternal/base_flow_test.go b/internal/llminternal/base_flow_test.go
index 9a8f2ec64..0f47bd1c0 100644
--- a/internal/llminternal/base_flow_test.go
+++ b/internal/llminternal/base_flow_test.go
@@ -15,12 +15,16 @@
 package llminternal
 
 import (
+	"context"
 	"errors"
+	"iter"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
 	"google.golang.org/genai"
 
+	"google.golang.org/adk/agent"
+	"google.golang.org/adk/internal/agent/runconfig"
 	icontext "google.golang.org/adk/internal/context"
 	"google.golang.org/adk/internal/toolinternal"
 	"google.golang.org/adk/model"
@@ -575,3 +579,117 @@ func TestMergeEventActions(t *testing.T) {
 		})
 	}
 }
+
+// partialOnlyModel is a mock LLM that always returns partial responses in streaming mode.
+// This simulates the scenario where the model's last streaming chunk has Partial=true,
+// for example when reaching a max token limit.
+type partialOnlyModel struct {
+	responses []*model.LLMResponse
+}
+
+func (m *partialOnlyModel) Name() string { return "partial-mock" }
+func (m *partialOnlyModel) GenerateContent(_ context.Context, _ *model.LLMRequest, _ bool) iter.Seq2[*model.LLMResponse, error] {
+	return func(yield func(*model.LLMResponse, error) bool) {
+		for _, r := range m.responses {
+			if !yield(r, nil) {
+				return
+			}
+		}
+	}
+}
+
+// TestFlowRunPartialLastEvent verifies that Flow.Run does not return an error
+// when the last event from runOneStep has Partial=true.
+// This is a regression test for https://github.com/google/adk-go/issues/600.
+func TestFlowRunPartialLastEvent(t *testing.T) {
+	tests := []struct {
+		name      string
+		responses []*model.LLMResponse
+		wantTexts []string
+	}{
+		{
+			name: "single partial response completes without error",
+			responses: []*model.LLMResponse{
+				{
+					Content: genai.NewContentFromText("Hello", genai.RoleModel),
+					Partial: true,
+				},
+			},
+			wantTexts: []string{"Hello"},
+		},
+		{
+			name: "multiple partial responses complete without error",
+			responses: []*model.LLMResponse{
+				{
+					Content: genai.NewContentFromText("Hello", genai.RoleModel),
+					Partial: true,
+				},
+				{
+					Content: genai.NewContentFromText(" World", genai.RoleModel),
+					Partial: true,
+				},
+			},
+			wantTexts: []string{"Hello", " World"},
+		},
+		{
+			name: "partial followed by non-partial completes without error",
+			responses: []*model.LLMResponse{
+				{
+					Content: genai.NewContentFromText("Hello", genai.RoleModel),
+					Partial: true,
+				},
+				{
+					Content: genai.NewContentFromText("Hello World", genai.RoleModel),
+					Partial: false,
+				},
+			},
+			wantTexts: []string{"Hello", "Hello World"},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			mockAgent, err := agent.New(agent.Config{Name: "test-agent"})
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			ctx := runconfig.ToContext(t.Context(), &runconfig.RunConfig{
+				StreamingMode: runconfig.StreamingModeSSE,
+			})
+
+			invCtx := icontext.NewInvocationContext(ctx, icontext.InvocationContextParams{
+				Agent: mockAgent,
+			})
+
+			f := &Flow{
+				Model:             &partialOnlyModel{responses: tc.responses},
+				RequestProcessors: nil, // no preprocessors needed
+			}
+
+			var gotTexts []string
+			var gotErr error
+			for ev, err := range f.Run(invCtx) {
+				if err != nil {
+					gotErr = err
+					break
+				}
+				if ev != nil && ev.Content != nil {
+					for _, p := range ev.Content.Parts {
+						if p.Text != "" {
+							gotTexts = append(gotTexts, p.Text)
+						}
+					}
+				}
+			}
+
+			if gotErr != nil {
+				t.Errorf("Flow.Run() returned unexpected error: %v", gotErr)
+			}
+
+			if diff := cmp.Diff(tc.wantTexts, gotTexts); diff != "" {
+				t.Errorf("Flow.Run() text mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}

From ab9b6e4349b5607e161aa963d627cd4de1ef4342 Mon Sep 17 00:00:00 2001
From: Mahendra Singh Rathore
 <49229348+mahendrarathore1742@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:26:34 +0000
Subject: [PATCH 2/2] review: rename partialOnlyModel to mockStreamingLLM for
 clarity

Address code review feedback: the mock type name was misleading
since it's also used in a test case with non-partial responses.
Renamed to mockStreamingLLM to avoid both the naming confusion
and a conflict with the existing mockLLM in the same package.
---
 internal/llminternal/base_flow_test.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/llminternal/base_flow_test.go b/internal/llminternal/base_flow_test.go
index 0f47bd1c0..af1d7d213 100644
--- a/internal/llminternal/base_flow_test.go
+++ b/internal/llminternal/base_flow_test.go
@@ -580,15 +580,15 @@ func TestMergeEventActions(t *testing.T) {
 	}
 }
 
-// partialOnlyModel is a mock LLM that always returns partial responses in streaming mode.
+// mockStreamingLLM is a mock LLM that returns a canned sequence of responses.
 // This simulates the scenario where the model's last streaming chunk has Partial=true,
 // for example when reaching a max token limit.
-type partialOnlyModel struct {
+type mockStreamingLLM struct {
 	responses []*model.LLMResponse
 }
 
-func (m *partialOnlyModel) Name() string { return "partial-mock" }
-func (m *partialOnlyModel) GenerateContent(_ context.Context, _ *model.LLMRequest, _ bool) iter.Seq2[*model.LLMResponse, error] {
+func (m *mockStreamingLLM) Name() string { return "mock-llm" }
+func (m *mockStreamingLLM) GenerateContent(_ context.Context, _ *model.LLMRequest, _ bool) iter.Seq2[*model.LLMResponse, error] {
 	return func(yield func(*model.LLMResponse, error) bool) {
 		for _, r := range m.responses {
 			if !yield(r, nil) {
@@ -663,7 +663,7 @@ func TestFlowRunPartialLastEvent(t *testing.T) {
 			})
 
 			f := &Flow{
-				Model:             &partialOnlyModel{responses: tc.responses},
+				Model:             &mockStreamingLLM{responses: tc.responses},
 				RequestProcessors: nil, // no preprocessors needed
 			}