@@ -338,6 +338,41 @@ def _convert_openai_to_anthropic_messages(
338338
339339 return system_message , anthropic_messages # type: ignore[return-value]
340340
341+ @staticmethod
342+ def _extract_usage (usage_obj : Any ) -> Dict [str , Any ]:
343+ r"""Extract usage information from an Anthropic usage object."""
344+ input_tokens = getattr (usage_obj , "input_tokens" , 0 ) or 0
345+ output_tokens = getattr (usage_obj , "output_tokens" , 0 ) or 0
346+
347+ usage : Dict [str , Any ] = {
348+ "prompt_tokens" : input_tokens ,
349+ "completion_tokens" : output_tokens ,
350+ "total_tokens" : input_tokens + output_tokens ,
351+ }
352+
353+ # Prompt-caching fields — only include when actually set to
354+ # an int (guards against MagicMock auto-attributes in tests)
355+ cache_read = getattr (usage_obj , "cache_read_input_tokens" , None )
356+ if isinstance (cache_read , int ):
357+ usage ["cache_read_input_tokens" ] = cache_read
358+
359+ cache_creation = getattr (
360+ usage_obj , "cache_creation_input_tokens" , None
361+ )
362+ if isinstance (cache_creation , int ):
363+ usage ["cache_creation_input_tokens" ] = cache_creation
364+
365+ # Detailed cache_creation breakdown (mixed TTL).
366+ # Anthropic SDK returns a CacheCreation pydantic model; convert
367+ # it to a plain dict. Accept dicts as well for forward compat.
368+ cache_creation_detail = getattr (usage_obj , "cache_creation" , None )
369+ if isinstance (cache_creation_detail , dict ):
370+ usage ["cache_creation" ] = cache_creation_detail
371+ elif isinstance (cache_creation_detail , BaseModel ):
372+ usage ["cache_creation" ] = cache_creation_detail .model_dump ()
373+
374+ return usage
375+
341376 def _convert_anthropic_to_openai_response (
342377 self , response : Any , model : str
343378 ) -> ChatCompletion :
@@ -436,16 +471,7 @@ def _convert_anthropic_to_openai_response(
436471 # Extract usage information
437472 usage = None
438473 if hasattr (response , "usage" ):
439- usage = {
440- "prompt_tokens" : getattr (response .usage , "input_tokens" , 0 ),
441- "completion_tokens" : getattr (
442- response .usage , "output_tokens" , 0
443- ),
444- "total_tokens" : (
445- getattr (response .usage , "input_tokens" , 0 )
446- + getattr (response .usage , "output_tokens" , 0 )
447- ),
448- }
474+ usage = self ._extract_usage (response .usage )
449475
450476 # Create ChatCompletion
451477 return ChatCompletion .construct (
@@ -494,12 +520,22 @@ def _convert_anthropic_stream_to_openai_chunk(
494520 # Initialize message
495521 if hasattr (chunk , "message" ) and hasattr (chunk .message , "id" ):
496522 chunk_id = chunk .message .id
523+ # Extract usage from message_start (contains cache
524+ # fields like cache_read_input_tokens)
525+ msg_usage = None
526+ if (
527+ hasattr (chunk , "message" )
528+ and hasattr (chunk .message , "usage" )
529+ and chunk .message .usage
530+ ):
531+ msg_usage = self ._extract_usage (chunk .message .usage )
497532 return ChatCompletionChunk .construct (
498533 id = chunk_id ,
499534 choices = [{"index" : 0 , "delta" : {}, "finish_reason" : None }],
500535 created = int (time .time ()),
501536 model = model ,
502537 object = "chat.completion.chunk" ,
538+ usage = msg_usage ,
503539 )
504540 elif chunk_type == "content_block_start" :
505541 # Content block starting
@@ -593,14 +629,7 @@ def _convert_anthropic_stream_to_openai_chunk(
593629 finish_reason = "tool_calls"
594630 # Extract usage info from message_delta
595631 if hasattr (chunk , "usage" ) and chunk .usage :
596- usage_obj = chunk .usage
597- input_tokens = getattr (usage_obj , "input_tokens" , 0 )
598- output_tokens = getattr (usage_obj , "output_tokens" , 0 )
599- usage = {
600- "prompt_tokens" : input_tokens ,
601- "completion_tokens" : output_tokens ,
602- "total_tokens" : input_tokens + output_tokens ,
603- }
632+ usage = self ._extract_usage (chunk .usage )
604633 elif chunk_type == "message_stop" :
605634 # Message finished - only set finish_reason if not already sent
606635 # This prevents duplicate finish_reason triggers in chat_agent
0 commit comments