KG-561 Add Gemini 3 Pro Preview (#1236)

mltheuser · Malte Heuser · vova-jb · commit b0cf269b68d7 · 2026-01-27T11:01:58.000+01:00
## Motivation and Context This PR adds support for Google's latest model, **Gemini 3 Pro Preview** (`gemini-3-pro-preview`). While integrating this model, significant changes were required in how "Thinking" (Reasoning) is configured compared to previous versions (Gemini 2.5): * **Thinking Configuration:** Gemini 3 abandons the token-based limit (`thinkingBudget`) in favor of abstract levels (`thinkingLevel`). Currently, `LOW` and `HIGH` are supported. * **Validation Logic:** The `GoogleThinkingConfig` has been updated to support these levels. An `init` block was added to enforce mutual exclusion: users cannot configure both a `thinkingBudget` (legacy/Gemini 2) and a `thinkingLevel` (Gemini 3) simultaneously. * **Conversation Consistency:** Gemini 3 is stricter regarding conversation history; it will fail if thought signatures are not reproduced correctly in subsequent requests. To verify these changes: * **Unit Tests:** Added tests to `ThinkingConfigTest` to verify the JSON serialization of the new `thinkingLevel` and to ensure the mutual exclusion validation works as expected. * **Integration Tests:** The new model was added to `reasoningCapableModels`. Crucially, this runs it against `integration_testReasoningMultiStep`. This validates that multi-turn conversations maintain the correct thought signatures/context, preventing the API failures specific to Gemini 3 when history is mishandled. ## Breaking Changes No breaking changes. The new configuration parameters are optional and specifically targeted at the new model. Existing configurations for Gemini 2 or other providers remain unaffected. --- #### Type of the changes - [x] New feature (non-breaking change which adds functionality) - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Tests improvement - [ ] Refactoring #### Checklist - [x] The pull request has a description of the proposed change - [x] I read the [Contributing Guidelines](https://github.com/JetBrains/koog/blob/main/CONTRIBUTING.md) before opening the pull request - [x] The pull request uses **`develop`** as the base branch - [x] Tests for the changes have been added - [x] All new and existing tests passed ##### Additional steps for pull requests adding a new feature - [x] An issue describing the proposed change exists - [x] The pull request includes a link to the issue - [x] The change was discussed and approved in the issue - [ ] Docs have been added / updated --------- Co-authored-by: Malte Heuser <malte.heuser@ing.com>
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/ExecutorIntegrationTestBase.kt
@@ -40,8 +40,10 @@ import ai.koog.prompt.executor.clients.LLMClientException
 import ai.koog.prompt.executor.clients.LLMEmbeddingProvider
 import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
 import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking
+import ai.koog.prompt.executor.clients.google.GoogleModels
 import ai.koog.prompt.executor.clients.google.GoogleParams
 import ai.koog.prompt.executor.clients.google.models.GoogleThinkingConfig
+import ai.koog.prompt.executor.clients.google.models.GoogleThinkingLevel
 import ai.koog.prompt.executor.clients.openai.OpenAIModels
 import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
 import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
@@ -56,6 +58,7 @@ import ai.koog.prompt.markdown.markdown
 import ai.koog.prompt.message.AttachmentContent
 import ai.koog.prompt.message.ContentPart
 import ai.koog.prompt.message.Message
+import ai.koog.prompt.message.RequestMetaInfo
 import ai.koog.prompt.message.ResponseMetaInfo
 import ai.koog.prompt.params.LLMParams
 import ai.koog.prompt.params.LLMParams.ToolChoice
@@ -131,13 +134,23 @@ abstract class ExecutorIntegrationTestBase {
                 maxTokens = 256
             )
 
-            is LLMProvider.Google -> GoogleParams(
-                thinkingConfig = GoogleThinkingConfig(
-                    includeThoughts = true,
-                    thinkingBudget = 256
-                ),
-                maxTokens = 256
-            )
+            is LLMProvider.Google -> {
+                val thinkingConfig = if (model.id == GoogleModels.Gemini3_Pro_Preview.id) {
+                    GoogleThinkingConfig(
+                        includeThoughts = true,
+                        thinkingLevel = GoogleThinkingLevel.LOW // with HIGH thoughts often exceed maxTokens causing test failures
+                    )
+                } else {
+                    GoogleThinkingConfig(
+                        includeThoughts = true,
+                        thinkingBudget = 256
+                    )
+                }
+                GoogleParams(
+                    thinkingConfig = thinkingConfig,
+                    maxTokens = 256
+                )
+            }
 
             else -> LLMParams(maxTokens = 256)
         }
@@ -1034,4 +1047,36 @@ abstract class ExecutorIntegrationTestBase {
             }
         }
     }
+
+    // This test targets models that support/require passing reasoning back (Google Gemini 3)
+    open fun integration_testReasoningMultiStep(model: LLModel) = runTest(timeout = 300.seconds) {
+        Models.assumeAvailable(model.provider)
+
+        val params = createReasoningParams(model)
+        val prompt1 = Prompt.build("reasoning-multistep-1", params = params) {
+            system("You are a helpful assistant.")
+            user("What is 5 + 5? Think step by step.")
+        }
+
+        val client = getLLMClient(model)
+
+        val response1 = withRetry(times = 3, testName = "integration_testReasoningMultiStep_Turn1[${model.id}]") {
+            client.execute(prompt1, model)
+        }
+
+        response1.shouldForAny { it is Message.Reasoning }
+
+        val prompt2 = Prompt(
+            id = "reasoning-multistep-2",
+            messages = prompt1.messages + response1 + Message.User(ContentPart.Text("Multiply the result by 2."), metaInfo = RequestMetaInfo.Empty),
+            params = params
+        )
+
+        withRetry(times = 3, testName = "integration_testReasoningMultiStep_Turn2[${model.id}]") {
+            val response2 = client.execute(prompt2, model)
+            response2.shouldNotBeEmpty()
+            val answer = response2.filterIsInstance<Message.Assistant>().first().content
+            answer.shouldContain("20")
+        }
+    }
 }
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/MultipleLLMPromptExecutorIntegrationTest.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/MultipleLLMPromptExecutorIntegrationTest.kt
@@ -256,4 +256,10 @@ class MultipleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {
     override fun integration_testReasoningWithEncryption(model: LLModel) {
         super.integration_testReasoningWithEncryption(model)
     }
+
+    @ParameterizedTest
+    @MethodSource("reasoningCapableModels")
+    override fun integration_testReasoningMultiStep(model: LLModel) {
+        super.integration_testReasoningMultiStep(model)
+    }
 }
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/SingleLLMPromptExecutorIntegrationTest.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/SingleLLMPromptExecutorIntegrationTest.kt
@@ -300,4 +300,10 @@ class SingleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {
     override fun integration_testReasoningWithEncryption(model: LLModel) {
         super.integration_testReasoningWithEncryption(model)
     }
+
+    @ParameterizedTest
+    @MethodSource("reasoningCapableModels")
+    override fun integration_testReasoningMultiStep(model: LLModel) {
+        super.integration_testReasoningMultiStep(model)
+    }
 }
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/utils/Models.kt
@@ -105,6 +105,7 @@ object Models {
             OpenAIModels.Chat.GPT5_1,
             AnthropicModels.Haiku_4_5,
             GoogleModels.Gemini2_5Pro,
+            GoogleModels.Gemini3_Pro_Preview,
         )
     }
 
diff --git a/koog-ktor/src/commonMain/kotlin/ai/koog/ktor/utils/LLMModelParser.kt b/koog-ktor/src/commonMain/kotlin/ai/koog/ktor/utils/LLMModelParser.kt
@@ -260,6 +260,7 @@ private val GOOGLE_MODELS_MAP = mapOf(
     "gemini2_5pro" to GoogleModels.Gemini2_5Pro,
     "gemini2_5flash" to GoogleModels.Gemini2_5Flash,
     "gemini2_5flashlite" to GoogleModels.Gemini2_5FlashLite,
+    "gemini3propreview" to GoogleModels.Gemini3_Pro_Preview,
     "gemini_embedding001" to GoogleModels.Embeddings.GeminiEmbedding001,
 )
 
diff --git a/koog-ktor/src/commonTest/kotlin/ai/koog/ktor/ModelIdentifierParsingTest.kt b/koog-ktor/src/commonTest/kotlin/ai/koog/ktor/ModelIdentifierParsingTest.kt
@@ -250,6 +250,11 @@ class ModelIdentifierParsingTest {
         assertEquals(LLMProvider.Google, gemini25FlashLite.provider)
         assertEquals(GoogleModels.Gemini2_5FlashLite, gemini25FlashLite)
 
+        val gemini3ProPreview = getModelFromIdentifier("google.gemini3propreview")
+        assertNotNull(gemini3ProPreview)
+        assertEquals(LLMProvider.Google, gemini3ProPreview.provider)
+        assertEquals(GoogleModels.Gemini3_Pro_Preview, gemini3ProPreview)
+
         val geminiEmbedding001 = getModelFromIdentifier("google.gemini_embedding001")
         assertNotNull(geminiEmbedding001)
         assertEquals(LLMProvider.Google, geminiEmbedding001.provider)
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt
@@ -321,7 +321,7 @@ public open class GoogleLLMClient(
                     flushCalls()
                     contents.add(
                         GoogleContent(
-                            role = "assistant",
+                            role = "model",
                             parts = listOf(
                                 GooglePart.Text(
                                     text = message.content,
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleModels.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleModels.kt
@@ -23,6 +23,7 @@ import ai.koog.prompt.llm.LLModel
  * | [Gemini2_5Pro]              | Slow      | $1.25-$2.50 / $10.00-$15.00² | Audio, Image, Video, Text, Tools | Text, Tools         |
  * | [Gemini2_5Flash]            | Medium    | $0.15-$1.00 / $0.60-$3.50³   | Audio, Image, Video, Text, Tools | Text, Tools         |
  * | [Gemini2_5FlashLite]        | Fast      | $0.10-$0.30 / $0.40          | Audio, Image, Video, Text, Tools | Text, Tools         |
+ * | [Gemini3_Pro_Preview]       | Slow      | $2.00-$4.00 / $12.00-$18.00  | Audio, Image, Video, Text, Tools | Text, Tools         |
  *
  * @see <a href="modelcards.withgoogle.com/model-cards">
  */
@@ -151,6 +152,23 @@ public object GoogleModels : LLModelDefinitions {
         maxOutputTokens = 65_536,
     )
 
+    /**
+     * Gemini 3 Pro is the first model in the new series, featuring advanced reasoning capabilities.
+     * It uses `thinking_level` instead of `thinking_budget` for reasoning control.
+     *
+     * Context window: 1 million tokens
+     * Knowledge cutoff: January 2025
+     *
+     * @see <a href="ai.google.dev/gemini-api/docs/gemini-3">
+     */
+    public val Gemini3_Pro_Preview: LLModel = LLModel(
+        provider = LLMProvider.Google,
+        id = "gemini-3-pro-preview",
+        capabilities = fullCapabilities,
+        contextLength = 1_048_576,
+        maxOutputTokens = 65_536,
+    )
+
     /**
      * Models for generating text embeddings.
      */
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/models/GoogleGenerateContent.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/models/GoogleGenerateContent.kt
@@ -328,15 +328,36 @@ internal class GoogleToolConfig(
  * Optional block that controls Gemini's "thinking" mode.
  *
  * @property includeThoughts When set to `true`, the model will return its intermediate reasoning.
- * @property thinkingBudget Token limit for reasoning, `0` disables it (Flash 2.5).
+ * @property thinkingBudget Token limit for reasoning (Gemini 2.0). Mutually exclusive with [thinkingLevel].
+ * @property thinkingLevel Reasoning depth level (Gemini 3.0). Mutually exclusive with [thinkingBudget].
  *
- * API reference: https://ai.google.dev/gemini-api/docs/thinking#set-budget
+ * API reference: https://ai.google.dev/gemini-api/docs/thinking
  */
 @Serializable
 public data class GoogleThinkingConfig(
     val includeThoughts: Boolean? = null,
-    val thinkingBudget: Int? = null
-)
+    val thinkingBudget: Int? = null,
+    val thinkingLevel: GoogleThinkingLevel? = null
+) {
+    init {
+        require(thinkingBudget == null || thinkingLevel == null) {
+            "Cannot set both 'thinkingBudget' and 'thinkingLevel'. " +
+                "Use 'thinkingBudget' for Gemini 2.0 models and 'thinkingLevel' for Gemini 3.0 models."
+        }
+    }
+}
+
+/**
+ * Levels of thinking depth for Gemini 3 models.
+ */
+@Serializable
+public enum class GoogleThinkingLevel {
+    @SerialName("low")
+    LOW,
+
+    @SerialName("high")
+    HIGH
+}
 
 /**
  * Configuration for tool calling
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/jvmTest/kotlin/ai/koog/prompt/executor/clients/google/ThinkingConfigTest.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/jvmTest/kotlin/ai/koog/prompt/executor/clients/google/ThinkingConfigTest.kt
@@ -2,6 +2,13 @@ package ai.koog.prompt.executor.clients.google
 
 import ai.koog.prompt.executor.clients.google.models.GoogleGenerationConfig
 import ai.koog.prompt.executor.clients.google.models.GoogleThinkingConfig
+import ai.koog.prompt.executor.clients.google.models.GoogleThinkingLevel
+import ai.koog.test.utils.runWithBothJsonConfigurations
+import ai.koog.test.utils.verifyDeserialization
+import io.kotest.assertions.json.shouldEqualJson
+import io.kotest.assertions.throwables.shouldThrow
+import io.kotest.matchers.shouldBe
+import io.kotest.matchers.shouldNotBe
 import kotlinx.serialization.json.Json
 import org.junit.jupiter.api.Assertions.assertTrue
 import kotlin.test.Test
@@ -15,4 +22,91 @@ class ThinkingConfigTest {
         val json = Json.encodeToString(GoogleGenerationConfig.serializer(), cfg)
         assertTrue("\"thinkingBudget\":0" in json)
     }
+
+    @Test fun `test thinkingConfig serialization (Gemini 2 Legacy)`() =
+        runWithBothJsonConfigurations("thinkingConfig serialization") { json ->
+            val request = GoogleGenerationConfig(
+                responseMimeType = "application/json",
+                maxOutputTokens = 256,
+                temperature = 0.2,
+                thinkingConfig = GoogleThinkingConfig(
+                    includeThoughts = true,
+                    thinkingBudget = 1000,
+                )
+            )
+
+            val jsonString = json.encodeToString(GoogleGenerationConfig.serializer(), request)
+
+            jsonString shouldEqualJson """
+            {
+                "responseMimeType": "application/json",
+                "maxOutputTokens": 256,
+                "temperature": 0.2,
+                "thinkingConfig": {
+                    "includeThoughts": true,
+                    "thinkingBudget": 1000
+                }
+            }
+            """.trimIndent().replace("\r\n", "\n")
+        }
+
+    @Test fun `test thinkingConfig serialization (Gemini 3 New)`() =
+        runWithBothJsonConfigurations("thinkingConfig serialization gemini 3") { json ->
+            val request = GoogleGenerationConfig(
+                responseMimeType = "application/json",
+                thinkingConfig = GoogleThinkingConfig(
+                    includeThoughts = true,
+                    thinkingLevel = GoogleThinkingLevel.HIGH
+                )
+            )
+
+            val jsonString = json.encodeToString(GoogleGenerationConfig.serializer(), request)
+
+            // Verify that thinkingLevel is serialized as "high" and thinkingBudget is absent
+            jsonString shouldEqualJson """
+            {
+                "responseMimeType": "application/json",
+                "thinkingConfig": {
+                    "includeThoughts": true,
+                    "thinkingLevel": "high"
+                }
+            }
+            """.trimIndent().replace("\r\n", "\n")
+        }
+
+    @Test fun `test thinkingConfig validation prevents mixing old and new params`() {
+        // Should throw IllegalArgumentException because init block checks mutual exclusivity
+        shouldThrow<IllegalArgumentException> {
+            GoogleThinkingConfig(
+                includeThoughts = true,
+                thinkingBudget = 1024,
+                thinkingLevel = GoogleThinkingLevel.LOW
+            )
+        }
+    }
+
+    @Test
+    fun `test thinkingConfig deserialization`() =
+        runWithBothJsonConfigurations("thinkingConfig deserialization") { json ->
+            val payload = """
+            {
+              "responseMimeType": "application/json",
+              "maxOutputTokens": 256,
+              "temperature": 0.2,
+              "thinkingConfig": {"includeThoughts": true, "thinkingBudget": 1000}
+            }
+            """.trimIndent().replace("\r\n", "\n")
+
+            val decoded: GoogleGenerationConfig = verifyDeserialization(
+                payload = payload,
+                json = json
+            )
+
+            decoded.responseMimeType shouldBe "application/json"
+            decoded.maxOutputTokens shouldBe 256
+            decoded.temperature shouldBe 0.2
+            decoded.thinkingConfig shouldNotBe null
+            decoded.thinkingConfig?.includeThoughts shouldBe true
+            decoded.thinkingConfig?.thinkingBudget shouldBe 1000
+        }
 }

Original file line number	Diff line number	Diff line change
`@@ -256,4 +256,10 @@ class MultipleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {`
`256`	`256`	`override fun integration_testReasoningWithEncryption(model: LLModel) {`
`257`	`257`	`super.integration_testReasoningWithEncryption(model)`
`258`	`258`	`}`
	`259`	`+`
	`260`	`+ @ParameterizedTest`
	`261`	`+ @MethodSource("reasoningCapableModels")`
	`262`	`+ override fun integration_testReasoningMultiStep(model: LLModel) {`
	`263`	`+ super.integration_testReasoningMultiStep(model)`
	`264`	`+ }`
`259`	`265`	`}`
Original file line number	Diff line number	Diff line change
`@@ -300,4 +300,10 @@ class SingleLLMPromptExecutorIntegrationTest : ExecutorIntegrationTestBase() {`
`300`	`300`	`override fun integration_testReasoningWithEncryption(model: LLModel) {`
`301`	`301`	`super.integration_testReasoningWithEncryption(model)`
`302`	`302`	`}`
	`303`	`+`
	`304`	`+ @ParameterizedTest`
	`305`	`+ @MethodSource("reasoningCapableModels")`
	`306`	`+ override fun integration_testReasoningMultiStep(model: LLModel) {`
	`307`	`+ super.integration_testReasoningMultiStep(model)`
	`308`	`+ }`
`303`	`309`	`}`
Original file line number	Diff line number	Diff line change
`@@ -105,6 +105,7 @@ object Models {`
`105`	`105`	`OpenAIModels.Chat.GPT5_1,`
`106`	`106`	`AnthropicModels.Haiku_4_5,`
`107`	`107`	`GoogleModels.Gemini2_5Pro,`
	`108`	`+ GoogleModels.Gemini3_Pro_Preview,`
`108`	`109`	`)`
`109`	`110`	`}`
`110`	`111`
Original file line number	Diff line number	Diff line change
`@@ -260,6 +260,7 @@ private val GOOGLE_MODELS_MAP = mapOf(`
`260`	`260`	`"gemini2_5pro" to GoogleModels.Gemini2_5Pro,`
`261`	`261`	`"gemini2_5flash" to GoogleModels.Gemini2_5Flash,`
`262`	`262`	`"gemini2_5flashlite" to GoogleModels.Gemini2_5FlashLite,`
	`263`	`+ "gemini3propreview" to GoogleModels.Gemini3_Pro_Preview,`
`263`	`264`	`"gemini_embedding001" to GoogleModels.Embeddings.GeminiEmbedding001,`
`264`	`265`	`)`
`265`	`266`