Skip to content

Commit b0cf269

Browse files
mltheuserMalte Heuser
authored andcommitted
KG-561 Add Gemini 3 Pro Preview (#1236)
## Motivation and Context This PR adds support for Google's latest model, **Gemini 3 Pro Preview** (`gemini-3-pro-preview`). While integrating this model, significant changes were required in how "Thinking" (Reasoning) is configured compared to previous versions (Gemini 2.5): * **Thinking Configuration:** Gemini 3 abandons the token-based limit (`thinkingBudget`) in favor of abstract levels (`thinkingLevel`). Currently, `LOW` and `HIGH` are supported. * **Validation Logic:** The `GoogleThinkingConfig` has been updated to support these levels. An `init` block was added to enforce mutual exclusion: users cannot configure both a `thinkingBudget` (legacy/Gemini 2) and a `thinkingLevel` (Gemini 3) simultaneously. * **Conversation Consistency:** Gemini 3 is stricter regarding conversation history; it will fail if thought signatures are not reproduced correctly in subsequent requests. To verify these changes: * **Unit Tests:** Added tests to `ThinkingConfigTest` to verify the JSON serialization of the new `thinkingLevel` and to ensure the mutual exclusion validation works as expected. * **Integration Tests:** The new model was added to `reasoningCapableModels`. Crucially, this runs it against `integration_testReasoningMultiStep`. This validates that multi-turn conversations maintain the correct thought signatures/context, preventing the API failures specific to Gemini 3 when history is mishandled. ## Breaking Changes No breaking changes. The new configuration parameters are optional and specifically targeted at the new model. Existing configurations for Gemini 2 or other providers remain unaffected. --- #### Type of the changes - [x] New feature (non-breaking change which adds functionality) - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Tests improvement - [ ] Refactoring #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [x] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [x] An issue describing the proposed change exists - [x] The pull request includes a link to the issue - [x] The change was discussed and approved in the issue - [ ] Docs have been added / updated --------- Co-authored-by: Malte Heuser <malte.heuser@ing.com>
1 parent ef58fb5 commit b0cf269

10 files changed

Lines changed: 209 additions & 12 deletions

File tree

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ import ai.koog.prompt.executor.clients.LLMClientException
4040
import ai.koog.prompt.executor.clients.LLMEmbeddingProvider
4141
import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
4242
import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking
43+
import ai.koog.prompt.executor.clients.google.GoogleModels
4344
import ai.koog.prompt.executor.clients.google.GoogleParams
4445
import ai.koog.prompt.executor.clients.google.models.GoogleThinkingConfig
46+
import ai.koog.prompt.executor.clients.google.models.GoogleThinkingLevel
4547
import ai.koog.prompt.executor.clients.openai.OpenAIModels
4648
import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
4749
import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
@@ -56,6 +58,7 @@ import ai.koog.prompt.markdown.markdown
5658
import ai.koog.prompt.message.AttachmentContent
5759
import ai.koog.prompt.message.ContentPart
5860
import ai.koog.prompt.message.Message
61+
import ai.koog.prompt.message.RequestMetaInfo
5962
import ai.koog.prompt.message.ResponseMetaInfo
6063
import ai.koog.prompt.params.LLMParams
6164
import ai.koog.prompt.params.LLMParams.ToolChoice
@@ -131,13 +134,23 @@ abstract class ExecutorIntegrationTestBase {
131134
maxTokens = 256
132135
)
133136

134-
is LLMProvider.Google -> GoogleParams(
135-
thinkingConfig = GoogleThinkingConfig(
136-
includeThoughts = true,
137-
thinkingBudget = 256
138-
),
139-
maxTokens = 256
140-
)
137+
is LLMProvider.Google -> {
138+
val thinkingConfig = if (model.id == GoogleModels.Gemini3_Pro_Preview.id) {
139+
GoogleThinkingConfig(
140+
includeThoughts = true,
141+
thinkingLevel = GoogleThinkingLevel.LOW // with HIGH thoughts often exceed maxTokens causing test failures
142+
)
143+
} else {
144+
GoogleThinkingConfig(
145+
includeThoughts = true,
146+
thinkingBudget = 256
147+
)
148+
}
149+
GoogleParams(
150+
thinkingConfig = thinkingConfig,
151+
maxTokens = 256
152+
)
153+
}
141154

142155
else -> LLMParams(maxTokens = 256)
143156
}
@@ -1034,4 +1047,36 @@ abstract class ExecutorIntegrationTestBase {
10341047
}
10351048
}
10361049
}
1050+
1051+
// This test targets models that support/require passing reasoning back (Google Gemini 3)
1052+
open fun integration_testReasoningMultiStep(model: LLModel) = runTest(timeout = 300.seconds) {
1053+
Models.assumeAvailable(model.provider)
1054+
1055+
val params = createReasoningParams(model)
1056+
val prompt1 = Prompt.build("reasoning-multistep-1", params = params) {
1057+
system("You are a helpful assistant.")
1058+
user("What is 5 + 5? Think step by step.")
1059+
}
1060+
1061+
val client = getLLMClient(model)
1062+
1063+
val response1 = withRetry(times = 3, testName = "integration_testReasoningMultiStep_Turn1[${model.id}]") {
1064+
client.execute(prompt1, model)
1065+
}
1066+
1067+
response1.shouldForAny { it is Message.Reasoning }
1068+
1069+
val prompt2 = Prompt(
1070+
id = "reasoning-multistep-2",
1071+
messages = prompt1.messages + response1 + Message.User(ContentPart.Text("Multiply the result by 2."), metaInfo = RequestMetaInfo.Empty),
1072+
params = params
1073+
)
1074+
1075+
withRetry(times = 3, testName = "integration_testReasoningMultiStep_Turn2[${model.id}]") {
1076+
val response2 = client.execute(prompt2, model)
1077+
response2.shouldNotBeEmpty()
1078+
val answer = response2.filterIsInstance<Message.Assistant>().first().content
1079+
answer.shouldContain("20")
1080+
}
1081+
}
10371082
}

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/MultipleLLMPromptExecutorIntegrationTest.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,4 +256,10 @@ class MultipleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {
256256
override fun integration_testReasoningWithEncryption(model: LLModel) {
257257
super.integration_testReasoningWithEncryption(model)
258258
}
259+
260+
@ParameterizedTest
261+
@MethodSource("reasoningCapableModels")
262+
override fun integration_testReasoningMultiStep(model: LLModel) {
263+
super.integration_testReasoningMultiStep(model)
264+
}
259265
}

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/SingleLLMPromptExecutorIntegrationTest.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,4 +300,10 @@ class SingleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {
300300
override fun integration_testReasoningWithEncryption(model: LLModel) {
301301
super.integration_testReasoningWithEncryption(model)
302302
}
303+
304+
@ParameterizedTest
305+
@MethodSource("reasoningCapableModels")
306+
override fun integration_testReasoningMultiStep(model: LLModel) {
307+
super.integration_testReasoningMultiStep(model)
308+
}
303309
}

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ object Models {
105105
OpenAIModels.Chat.GPT5_1,
106106
AnthropicModels.Haiku_4_5,
107107
GoogleModels.Gemini2_5Pro,
108+
GoogleModels.Gemini3_Pro_Preview,
108109
)
109110
}
110111

koog-ktor/src/commonMain/kotlin/ai/koog/ktor/utils/LLMModelParser.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ private val GOOGLE_MODELS_MAP = mapOf(
260260
"gemini2_5pro" to GoogleModels.Gemini2_5Pro,
261261
"gemini2_5flash" to GoogleModels.Gemini2_5Flash,
262262
"gemini2_5flashlite" to GoogleModels.Gemini2_5FlashLite,
263+
"gemini3propreview" to GoogleModels.Gemini3_Pro_Preview,
263264
"gemini_embedding001" to GoogleModels.Embeddings.GeminiEmbedding001,
264265
)
265266

koog-ktor/src/commonTest/kotlin/ai/koog/ktor/ModelIdentifierParsingTest.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,11 @@ class ModelIdentifierParsingTest {
250250
assertEquals(LLMProvider.Google, gemini25FlashLite.provider)
251251
assertEquals(GoogleModels.Gemini2_5FlashLite, gemini25FlashLite)
252252

253+
val gemini3ProPreview = getModelFromIdentifier("google.gemini3propreview")
254+
assertNotNull(gemini3ProPreview)
255+
assertEquals(LLMProvider.Google, gemini3ProPreview.provider)
256+
assertEquals(GoogleModels.Gemini3_Pro_Preview, gemini3ProPreview)
257+
253258
val geminiEmbedding001 = getModelFromIdentifier("google.gemini_embedding001")
254259
assertNotNull(geminiEmbedding001)
255260
assertEquals(LLMProvider.Google, geminiEmbedding001.provider)

prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ public open class GoogleLLMClient(
321321
flushCalls()
322322
contents.add(
323323
GoogleContent(
324-
role = "assistant",
324+
role = "model",
325325
parts = listOf(
326326
GooglePart.Text(
327327
text = message.content,

prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleModels.kt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import ai.koog.prompt.llm.LLModel
2323
* | [Gemini2_5Pro] | Slow | $1.25-$2.50 / $10.00-$15.00² | Audio, Image, Video, Text, Tools | Text, Tools |
2424
* | [Gemini2_5Flash] | Medium | $0.15-$1.00 / $0.60-$3.50³ | Audio, Image, Video, Text, Tools | Text, Tools |
2525
* | [Gemini2_5FlashLite] | Fast | $0.10-$0.30 / $0.40 | Audio, Image, Video, Text, Tools | Text, Tools |
26+
* | [Gemini3_Pro_Preview] | Slow | $2.00-$4.00 / $12.00-$18.00 | Audio, Image, Video, Text, Tools | Text, Tools |
2627
*
2728
* @see <a href="modelcards.withgoogle.com/model-cards">
2829
*/
@@ -151,6 +152,23 @@ public object GoogleModels : LLModelDefinitions {
151152
maxOutputTokens = 65_536,
152153
)
153154

155+
/**
156+
* Gemini 3 Pro is the first model in the new series, featuring advanced reasoning capabilities.
157+
* It uses `thinking_level` instead of `thinking_budget` for reasoning control.
158+
*
159+
* Context window: 1 million tokens
160+
* Knowledge cutoff: January 2025
161+
*
162+
* @see <a href="ai.google.dev/gemini-api/docs/gemini-3">
163+
*/
164+
public val Gemini3_Pro_Preview: LLModel = LLModel(
165+
provider = LLMProvider.Google,
166+
id = "gemini-3-pro-preview",
167+
capabilities = fullCapabilities,
168+
contextLength = 1_048_576,
169+
maxOutputTokens = 65_536,
170+
)
171+
154172
/**
155173
* Models for generating text embeddings.
156174
*/

prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/models/GoogleGenerateContent.kt

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,15 +328,36 @@ internal class GoogleToolConfig(
328328
* Optional block that controls Gemini's "thinking" mode.
329329
*
330330
* @property includeThoughts When set to `true`, the model will return its intermediate reasoning.
331-
* @property thinkingBudget Token limit for reasoning, `0` disables it (Flash 2.5).
331+
* @property thinkingBudget Token limit for reasoning (Gemini 2.0). Mutually exclusive with [thinkingLevel].
332+
* @property thinkingLevel Reasoning depth level (Gemini 3.0). Mutually exclusive with [thinkingBudget].
332333
*
333-
* API reference: https://ai.google.dev/gemini-api/docs/thinking#set-budget
334+
* API reference: https://ai.google.dev/gemini-api/docs/thinking
334335
*/
335336
@Serializable
336337
public data class GoogleThinkingConfig(
337338
val includeThoughts: Boolean? = null,
338-
val thinkingBudget: Int? = null
339-
)
339+
val thinkingBudget: Int? = null,
340+
val thinkingLevel: GoogleThinkingLevel? = null
341+
) {
342+
init {
343+
require(thinkingBudget == null || thinkingLevel == null) {
344+
"Cannot set both 'thinkingBudget' and 'thinkingLevel'. " +
345+
"Use 'thinkingBudget' for Gemini 2.0 models and 'thinkingLevel' for Gemini 3.0 models."
346+
}
347+
}
348+
}
349+
350+
/**
351+
* Levels of thinking depth for Gemini 3 models.
352+
*/
353+
@Serializable
354+
public enum class GoogleThinkingLevel {
355+
@SerialName("low")
356+
LOW,
357+
358+
@SerialName("high")
359+
HIGH
360+
}
340361

341362
/**
342363
* Configuration for tool calling

prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/jvmTest/kotlin/ai/koog/prompt/executor/clients/google/ThinkingConfigTest.kt

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@ package ai.koog.prompt.executor.clients.google
22

33
import ai.koog.prompt.executor.clients.google.models.GoogleGenerationConfig
44
import ai.koog.prompt.executor.clients.google.models.GoogleThinkingConfig
5+
import ai.koog.prompt.executor.clients.google.models.GoogleThinkingLevel
6+
import ai.koog.test.utils.runWithBothJsonConfigurations
7+
import ai.koog.test.utils.verifyDeserialization
8+
import io.kotest.assertions.json.shouldEqualJson
9+
import io.kotest.assertions.throwables.shouldThrow
10+
import io.kotest.matchers.shouldBe
11+
import io.kotest.matchers.shouldNotBe
512
import kotlinx.serialization.json.Json
613
import org.junit.jupiter.api.Assertions.assertTrue
714
import kotlin.test.Test
@@ -15,4 +22,91 @@ class ThinkingConfigTest {
1522
val json = Json.encodeToString(GoogleGenerationConfig.serializer(), cfg)
1623
assertTrue("\"thinkingBudget\":0" in json)
1724
}
25+
26+
@Test fun `test thinkingConfig serialization (Gemini 2 Legacy)`() =
27+
runWithBothJsonConfigurations("thinkingConfig serialization") { json ->
28+
val request = GoogleGenerationConfig(
29+
responseMimeType = "application/json",
30+
maxOutputTokens = 256,
31+
temperature = 0.2,
32+
thinkingConfig = GoogleThinkingConfig(
33+
includeThoughts = true,
34+
thinkingBudget = 1000,
35+
)
36+
)
37+
38+
val jsonString = json.encodeToString(GoogleGenerationConfig.serializer(), request)
39+
40+
jsonString shouldEqualJson """
41+
{
42+
"responseMimeType": "application/json",
43+
"maxOutputTokens": 256,
44+
"temperature": 0.2,
45+
"thinkingConfig": {
46+
"includeThoughts": true,
47+
"thinkingBudget": 1000
48+
}
49+
}
50+
""".trimIndent().replace("\r\n", "\n")
51+
}
52+
53+
@Test fun `test thinkingConfig serialization (Gemini 3 New)`() =
54+
runWithBothJsonConfigurations("thinkingConfig serialization gemini 3") { json ->
55+
val request = GoogleGenerationConfig(
56+
responseMimeType = "application/json",
57+
thinkingConfig = GoogleThinkingConfig(
58+
includeThoughts = true,
59+
thinkingLevel = GoogleThinkingLevel.HIGH
60+
)
61+
)
62+
63+
val jsonString = json.encodeToString(GoogleGenerationConfig.serializer(), request)
64+
65+
// Verify that thinkingLevel is serialized as "high" and thinkingBudget is absent
66+
jsonString shouldEqualJson """
67+
{
68+
"responseMimeType": "application/json",
69+
"thinkingConfig": {
70+
"includeThoughts": true,
71+
"thinkingLevel": "high"
72+
}
73+
}
74+
""".trimIndent().replace("\r\n", "\n")
75+
}
76+
77+
@Test fun `test thinkingConfig validation prevents mixing old and new params`() {
78+
// Should throw IllegalArgumentException because init block checks mutual exclusivity
79+
shouldThrow<IllegalArgumentException> {
80+
GoogleThinkingConfig(
81+
includeThoughts = true,
82+
thinkingBudget = 1024,
83+
thinkingLevel = GoogleThinkingLevel.LOW
84+
)
85+
}
86+
}
87+
88+
@Test
89+
fun `test thinkingConfig deserialization`() =
90+
runWithBothJsonConfigurations("thinkingConfig deserialization") { json ->
91+
val payload = """
92+
{
93+
"responseMimeType": "application/json",
94+
"maxOutputTokens": 256,
95+
"temperature": 0.2,
96+
"thinkingConfig": {"includeThoughts": true, "thinkingBudget": 1000}
97+
}
98+
""".trimIndent().replace("\r\n", "\n")
99+
100+
val decoded: GoogleGenerationConfig = verifyDeserialization(
101+
payload = payload,
102+
json = json
103+
)
104+
105+
decoded.responseMimeType shouldBe "application/json"
106+
decoded.maxOutputTokens shouldBe 256
107+
decoded.temperature shouldBe 0.2
108+
decoded.thinkingConfig shouldNotBe null
109+
decoded.thinkingConfig?.includeThoughts shouldBe true
110+
decoded.thinkingConfig?.thinkingBudget shouldBe 1000
111+
}
18112
}

0 commit comments

Comments
 (0)