Skip to content

Commit 13d9b70

Browse files
feat(prompt): Support cache control for anthropic provider (#1812)
Anthropic: https://platform.claude.com/docs/en/build-with-claude/prompt-caching * Supported automatic prompt caching by adding "cache_control" into the request parameters * Supported explicit cache breakpoints caching by adding "cache_control" into messages * Added cache tokens to the usage https://platform.claude.com/docs/en/build-with-claude/prompt-caching#tracking-cache-performance Breaking: * Removed deprecated `user` message builders to allow one with cacheControl partially fix KG-707 --------- Co-authored-by: Anastasiia.Zarechneva <Anastasiia.Zarechneva@jetbrains.com>
1 parent 51fe749 commit 13d9b70

17 files changed

Lines changed: 1372 additions & 136 deletions

File tree

docs/docs/prompts/prompt-creation/cache-control.md

Lines changed: 498 additions & 0 deletions
Large diffs are not rendered by default.

docs/docs/prompts/prompt-creation/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,4 @@ This creates a new prompt that includes all messages from `basePrompt` and the n
493493
- Learn how to work with [multimodal content](multimodal-content.md).
494494
- Run prompts with [LLM clients](../llm-clients.md) if you work with a single LLM provider.
495495
- Run prompts with [prompt executors](../prompt-executors.md) if you work with multiple LLM providers.
496+
- Learn how to use llm cache with [cache control](cache-control.md).

docs/mkdocs.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ nav:
2121
- prompts/index.md
2222
- Creating prompts:
2323
- prompts/prompt-creation/index.md
24-
- Multimodal content: prompts/prompt-creation/multimodal-content.md
24+
- Multimodal content: prompts/prompt-creation/cache-control.md
25+
- Cache control: prompts/prompt-creation/multimodal-content.md
2526
- Running prompts:
2627
- LLM clients: prompts/llm-clients.md
2728
- Prompt executors: prompts/prompt-executors.md
@@ -169,6 +170,7 @@ plugins:
169170
- prompts/prompt-creation/multimodal-content.md: This page provides details about multimodal inputs, which allow you to send text, images, audio, video, and documents in your prompts.
170171
- prompts/handling-failures.md: This page provides details about handling failures in prompts.
171172
- prompts/llm-response-caching.md: This page provides details about prompt caching, which lets you cache responses from LLMs to avoid repeated requests.
173+
- prompts/prompt-creation/cache-control.md: This page provides details about provider-side prompt caching control for Anthropic and Amazon Bedrock, including automatic and block-level cache breakpoints and TTL options.
172174
Running prompts:
173175
- prompts/llm-clients.md: This page provides details about LLM clients, which are used to run prompts and receive responses from LLMs. They allow working with a single LLM provider only.
174176
- prompts/prompt-executors.md: This page provides details about prompt executors, which wrap LLM clients and are used to run prompts and receive responses from LLMs. They allow working with multiple LLM providers in a unified way.
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package ai.koog.integration.tests.executor
2+
3+
import ai.koog.agents.core.tools.ToolDescriptor
4+
import ai.koog.agents.core.tools.ToolParameterDescriptor
5+
import ai.koog.agents.core.tools.ToolParameterType
6+
import ai.koog.integration.tests.utils.PromptUtils
7+
import ai.koog.integration.tests.utils.RetryUtils
8+
import ai.koog.integration.tests.utils.annotations.Retry
9+
import ai.koog.integration.tests.utils.getLLMClientForProvider
10+
import ai.koog.prompt.dsl.Prompt
11+
import ai.koog.prompt.executor.clients.InternalLLMClientApi
12+
import ai.koog.prompt.executor.clients.anthropic.AnthropicCacheControl
13+
import ai.koog.prompt.executor.clients.anthropic.AnthropicModels
14+
import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
15+
import ai.koog.prompt.executor.llms.MultiLLMPromptExecutor
16+
import ai.koog.prompt.llm.LLModel
17+
import ai.koog.prompt.message.Message
18+
import io.kotest.assertions.withClue
19+
import io.kotest.matchers.booleans.shouldBeTrue
20+
import io.kotest.matchers.collections.shouldNotBeEmpty
21+
import io.kotest.matchers.nulls.shouldNotBeNull
22+
import kotlinx.coroutines.test.runTest
23+
import kotlinx.serialization.json.JsonObject
24+
import kotlinx.serialization.json.intOrNull
25+
import kotlinx.serialization.json.jsonPrimitive
26+
import org.junit.jupiter.api.Test
27+
import org.junit.jupiter.params.ParameterizedTest
28+
import org.junit.jupiter.params.provider.MethodSource
29+
import java.util.stream.Stream
30+
import kotlin.time.Duration.Companion.seconds
31+
32+
/**
33+
* Integration tests for Anthropic cache control.
34+
*
35+
* Caching requires a minimum prompt length (usually ≥ 1024 tokens).
36+
* https://platform.claude.com/docs/en/build-with-claude/prompt-caching#cache-limitations
37+
* Tests use [ai.koog.integration.tests.utils.PromptUtils.assistantPromptOfAtLeastLength] to ensure
38+
* the prompt is long enough for the API to accept the cache breakpoint.
39+
*/
40+
@OptIn(InternalLLMClientApi::class)
41+
class AnthropicCacheControlIntegrationTest {
42+
43+
companion object {
44+
private val model = AnthropicModels.Sonnet_4_5
45+
private val client = getLLMClientForProvider(model.provider)
46+
private val executor = MultiLLMPromptExecutor(client)
47+
48+
/**
49+
* Asserts that the response metadata shows cache was used (write or read).
50+
* On the first cached request `cacheCreationInputTokens` > 0.
51+
* On a subsequent request hitting the same prefix `cacheReadInputTokens` > 0.
52+
*/
53+
private fun JsonObject.assertCacheWasUsed() {
54+
val cacheWrite = this["cacheCreationInputTokens"]?.jsonPrimitive?.intOrNull ?: 0
55+
val cacheRead = this["cacheReadInputTokens"]?.jsonPrimitive?.intOrNull ?: 0
56+
withClue("Expected cacheCreationInputTokens or cacheReadInputTokens > 0 in metadata $this") {
57+
(cacheWrite > 0 || cacheRead > 0).shouldBeTrue()
58+
}
59+
}
60+
61+
private suspend fun testCacheControl(
62+
executor: MultiLLMPromptExecutor,
63+
prompt: Prompt,
64+
model: LLModel,
65+
tools: List<ToolDescriptor> = emptyList()
66+
) {
67+
val responseWithMetadata = executor.execute(prompt, model, tools)
68+
.shouldNotBeNull()
69+
.shouldNotBeEmpty()
70+
.let { messages ->
71+
messages.filterIsInstance<Message.Assistant>().firstOrNull()
72+
?: if (tools.isNotEmpty()) {
73+
messages.filterIsInstance<Message.Tool.Call>().firstOrNull()
74+
} else {
75+
null
76+
}
77+
}
78+
79+
responseWithMetadata.shouldNotBeNull {
80+
metaInfo.metadata
81+
.shouldNotBeNull()
82+
.assertCacheWasUsed()
83+
}
84+
}
85+
86+
@JvmStatic
87+
fun cacheControlType(): Stream<AnthropicCacheControl> = Stream.of(
88+
AnthropicCacheControl.Default,
89+
AnthropicCacheControl.OneHour,
90+
)
91+
}
92+
93+
@ParameterizedTest
94+
@MethodSource("cacheControlType")
95+
fun integration_testAutomaticCacheControl(cacheControl: AnthropicCacheControl) = runTest(timeout = 120.seconds) {
96+
val params = AnthropicParams(cacheControl = cacheControl)
97+
val prompt = Prompt.build("test-auto-cache-1h", params = params) {
98+
system(PromptUtils.assistantPromptOfAtLeastLength(1200))
99+
user("What is the capital of Italy?")
100+
}
101+
102+
RetryUtils.withRetry(
103+
times = 3,
104+
testName = "integration_testAutomaticCacheControl"
105+
) {
106+
testCacheControl(executor, prompt, model)
107+
}
108+
}
109+
110+
@Retry
111+
@Test
112+
fun integration_testCacheControlOnSystemMessage() = runTest(timeout = 120.seconds) {
113+
val prompt = Prompt.build("test-cache-system-msg") {
114+
system(PromptUtils.assistantPromptOfAtLeastLength(1200), AnthropicCacheControl.Default)
115+
user("What is the capital of France?")
116+
}
117+
testCacheControl(executor, prompt, model)
118+
}
119+
120+
@Retry
121+
@Test
122+
fun integration_testCacheControlOnUserMessage() = runTest(timeout = 120.seconds) {
123+
val prompt = Prompt.build("test-cache-user-msg") {
124+
system(PromptUtils.assistantPromptOfAtLeastLength(1200))
125+
user("What is the capital of France?", AnthropicCacheControl.Default)
126+
}
127+
testCacheControl(executor, prompt, model)
128+
}
129+
130+
@Retry
131+
@Test
132+
fun integration_testCacheControlOnToolDefinition() = runTest(timeout = 120.seconds) {
133+
val cachedTool = ToolDescriptor(
134+
name = "calculator",
135+
description = PromptUtils.assistantPromptOfAtLeastLength(1600, "A calculator tool"),
136+
requiredParameters = listOf(
137+
ToolParameterDescriptor("expression", "Math expression to evaluate", ToolParameterType.String)
138+
),
139+
cacheControl = AnthropicCacheControl.Default
140+
)
141+
val prompt = Prompt.build("test-cache-tool") {
142+
system(PromptUtils.assistantPromptOfAtLeastLength(1200))
143+
user("What is 2 + 2?")
144+
}
145+
146+
testCacheControl(executor, prompt, model, listOf(cachedTool))
147+
}
148+
}

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/executor/BedrockConverseApiIntegrationTest.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import ai.koog.integration.tests.utils.tools.CalculatorTool
1616
import ai.koog.prompt.dsl.Prompt
1717
import ai.koog.prompt.executor.clients.LLMClient
1818
import ai.koog.prompt.executor.clients.bedrock.BedrockAPIMethod
19+
import ai.koog.prompt.executor.clients.bedrock.BedrockCacheControl
1920
import ai.koog.prompt.executor.clients.bedrock.BedrockClientSettings
2021
import ai.koog.prompt.executor.clients.bedrock.BedrockGuardrailsSettings
2122
import ai.koog.prompt.executor.clients.bedrock.BedrockLLMClient
@@ -26,7 +27,6 @@ import ai.koog.prompt.executor.model.PromptExecutor
2627
import ai.koog.prompt.llm.LLMCapability
2728
import ai.koog.prompt.llm.LLMProvider
2829
import ai.koog.prompt.llm.LLModel
29-
import ai.koog.prompt.message.CacheControl
3030
import ai.koog.prompt.message.ContentPart
3131
import ai.koog.prompt.message.Message
3232
import ai.koog.prompt.params.LLMParams
@@ -335,7 +335,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
335335

336336
val prompt = Prompt.build("test-cache-system") {
337337
// Caching requires a minimum prompt length to work.
338-
system(assistantPromptOfAtLeastLength(1600), CacheControl.Bedrock.Default)
338+
system(assistantPromptOfAtLeastLength(1600), BedrockCacheControl.Default)
339339
user("What is the capital of France?")
340340
}
341341

@@ -358,7 +358,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
358358
val prompt = Prompt.build("test-cache-user") {
359359
// Caching requires a minimum prompt length to work.
360360
system(assistantPromptOfAtLeastLength(1600))
361-
user(listOf(ContentPart.Text("What is the capital of France?")), CacheControl.Bedrock.Default)
361+
user(listOf(ContentPart.Text("What is the capital of France?")), BedrockCacheControl.Default)
362362
}
363363

364364
withRetry(times = 3, testName = "integration_testCacheControlOnUserMessage[${model.id}]") {
@@ -378,7 +378,7 @@ class BedrockConverseApiIntegrationTest : ExecutorIntegrationTestBase() {
378378
Models.assumeAvailable(model.provider)
379379
assumeTrue(model.capabilities?.contains(LLMCapability.Tools) ?: false, "Model $model does not support tools")
380380

381-
val cachedDescriptor = CalculatorTool.descriptor.withCacheControl(CacheControl.Bedrock.Default).copy(
381+
val cachedDescriptor = CalculatorTool.descriptor.withCacheControl(BedrockCacheControl.Default).copy(
382382
// Caching requires a minimum prompt length to work - in the case of tools, this appears to apply specifically to the tool section
383383
// rather than the prompt as a whole.
384384
description = assistantPromptOfAtLeastLength(1600, CalculatorTool.descriptor.description)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package ai.koog.prompt.executor.clients.anthropic
2+
3+
import ai.koog.prompt.message.CacheControl
4+
import kotlinx.serialization.Serializable
5+
6+
/**
7+
* Represents caching strategies specific to Anthropic's LLM provider.
8+
*/
9+
@Serializable
10+
public sealed interface AnthropicCacheControl : CacheControl {
11+
/** Cache with the default TTL (no explicit TTL sent to Anthropic, caches for 5 minutes by default). */
12+
@Serializable
13+
public data object Default : AnthropicCacheControl
14+
15+
/** Cache for 1 hour. */
16+
@Serializable
17+
public data object OneHour : AnthropicCacheControl
18+
}

0 commit comments

Comments
 (0)