getsentry · alexander-alderman-webb · Apr 13, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
@@ -237,6 +237,9 @@
     },
     "litellm": {
         "package": "litellm",
+        "deps": {
+            "*": ["anthropic", "google-genai", "pytest-asyncio"],
+        },
     },
     "litestar": {
         "package": "litestar",

diff --git a/scripts/populate_tox/package_dependencies.jsonl b/scripts/populate_tox/package_dependencies.jsonl
diff --git a/scripts/populate_tox/releases.jsonl b/scripts/populate_tox/releases.jsonl
@@ -82,7 +82,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
         provider = "unknown"
 
     call_type = kwargs.get("call_type", None)
-    if call_type == "embedding":
+    if call_type == "embedding" or call_type == "aembedding":
         operation = "embeddings"
     else:
         operation = "chat"
@@ -170,6 +170,10 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
             set_data_normalized(span, f"gen_ai.litellm.{key}", value)
 
 
+async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
+    return _input_callback(kwargs)
+
+
 def _success_callback(
     kwargs: "Dict[str, Any]",
     completion_response: "Any",
@@ -230,8 +234,29 @@ def _success_callback(
             )
 
     finally:
-        # Always finish the span and clean up
-        span.__exit__(None, None, None)
+        is_streaming = kwargs.get("stream")
+        # Callback is fired multiple times when streaming a response.
+        # Streaming flag checked at https://github.com/BerriAI/litellm/blob/33c3f13443eaf990ac8c6e3da78bddbc2b7d0e7a/litellm/litellm_core_utils/litellm_logging.py#L1603
+        if (
+            is_streaming is not True
+            or "complete_streaming_response" in kwargs
+            or "async_complete_streaming_response" in kwargs
+        ):
+            span.__exit__(None, None, None)
+
+
+async def _async_success_callback(
+    kwargs: "Dict[str, Any]",
+    completion_response: "Any",
+    start_time: "datetime",
+    end_time: "datetime",
+) -> None:
+    return _success_callback(
+        kwargs,
+        completion_response,
+        start_time,
+        end_time,
+    )
 
 
 def _failure_callback(
@@ -315,10 +340,14 @@ def setup_once() -> None:
         litellm.input_callback = input_callback or []
         if _input_callback not in litellm.input_callback:
             litellm.input_callback.append(_input_callback)
+        if _async_input_callback not in litellm.input_callback:
+            litellm.input_callback.append(_async_input_callback)
 
         litellm.success_callback = success_callback or []
         if _success_callback not in litellm.success_callback:
             litellm.success_callback.append(_success_callback)
+        if _async_success_callback not in litellm.success_callback:
+            litellm.success_callback.append(_async_success_callback)
 
         litellm.failure_callback = failure_callback or []
         if _failure_callback not in litellm.failure_callback:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -54,6 +54,18 @@
     openai = None
 
 
+try:
+    import anthropic
+except ImportError:
+    anthropic = None
+
+
+try:
+    import google
+except ImportError:
+    google = None
+
+
 from tests import _warning_recorder, _warning_recorder_mgr
 
 from typing import TYPE_CHECKING
@@ -1050,7 +1062,12 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
         )
 
         if serialize_pydantic:
-            response_content = json.dumps(response_content.model_dump()).encode("utf-8")
+            response_content = json.dumps(
+                response_content.model_dump(
+                    by_alias=True,
+                    exclude_none=True,
+                )
+            ).encode("utf-8")
 
         response = HttpxResponse(
             200,
@@ -1063,6 +1080,185 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
     return inner
 
 
+@pytest.fixture
+def get_rate_limit_model_response():
+    def inner(request_headers=None):
+        if request_headers is None:
+            request_headers = {}
+
+        model_request = HttpxRequest(
+            "POST",
+            "/responses",
+            headers=request_headers,
+        )
+
+        response = HttpxResponse(
+            429,
+            request=model_request,
+        )
+
+        return response
+
+    return inner
+
+
+@pytest.fixture
+def streaming_chat_completions_model_response():
+    return [
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                        role="assistant"
+                    ),
+                    finish_reason=None,
+                ),
+            ],
+        ),
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                        content="Tes"
+                    ),
+                    finish_reason=None,
+                ),
+            ],
+        ),
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                        content="t r"
+                    ),
+                    finish_reason=None,
+                ),
+            ],
+        ),
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                        content="esp"
+                    ),
+                    finish_reason=None,
+                ),
+            ],
+        ),
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                        content="ons"
+                    ),
+                    finish_reason=None,
+                ),
+            ],
+        ),
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                        content="e"
+                    ),
+                    finish_reason=None,
+                ),
+            ],
+        ),
+        openai.types.chat.ChatCompletionChunk(
+            id="chatcmpl-test",
+            object="chat.completion.chunk",
+            created=10000000,
+            model="gpt-3.5-turbo",
+            choices=[
+                openai.types.chat.chat_completion_chunk.Choice(
+                    index=0,
+                    delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(),
+                    finish_reason="stop",
+                ),
+            ],
+            usage=openai.types.CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=20,
+                total_tokens=30,
+            ),
+        ),
+    ]
+
+
+@pytest.fixture
+def nonstreaming_chat_completions_model_response():
+    return openai.types.chat.ChatCompletion(
+        id="chatcmpl-test",
+        choices=[
+            openai.types.chat.chat_completion.Choice(
+                index=0,
+                finish_reason="stop",
+                message=openai.types.chat.ChatCompletionMessage(
+                    role="assistant", content="Test response"
+                ),
+            )
+        ],
+        created=1234567890,
+        model="gpt-3.5-turbo",
+        object="chat.completion",
+        usage=openai.types.CompletionUsage(
+            prompt_tokens=10,
+            completion_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+
+@pytest.fixture
+def openai_embedding_model_response():
+    return openai.types.CreateEmbeddingResponse(
+        data=[
+            openai.types.Embedding(
+                embedding=[0.1, 0.2, 0.3],
+                index=0,
+                object="embedding",
+            )
+        ],
+        model="text-embedding-ada-002",
+        object="list",
+        usage=openai.types.create_embedding_response.Usage(
+            prompt_tokens=5,
+            total_tokens=5,
+        ),
+    )
+
+
 @pytest.fixture
 def nonstreaming_responses_model_response():
     return openai.types.responses.Response(
@@ -1102,6 +1298,54 @@ def nonstreaming_responses_model_response():
     )
 
 
+@pytest.fixture
+def nonstreaming_anthropic_model_response():
+    return anthropic.types.Message(
+        id="msg_123",
+        type="message",
+        role="assistant",
+        model="claude-3-opus-20240229",
+        content=[
+            anthropic.types.TextBlock(
+                type="text",
+                text="Hello, how can I help you?",
+            )
+        ],
+        stop_reason="end_turn",
+        stop_sequence=None,
+        usage=anthropic.types.Usage(
+            input_tokens=10,
+            output_tokens=20,
+        ),
+    )
+
+
+@pytest.fixture
+def nonstreaming_google_genai_model_response():
+    return google.genai.types.GenerateContentResponse(
+        response_id="resp_123",
+        candidates=[
+            google.genai.types.Candidate(
+                content=google.genai.types.Content(
+                    role="model",
+                    parts=[
+                        google.genai.types.Part(
+                            text="Hello, how can I help you?",
+                        )
+                    ],
+                ),
+                finish_reason="STOP",
+            )
+        ],
+        model_version="gemini/gemini-pro",
+        usage_metadata=google.genai.types.GenerateContentResponseUsageMetadata(
+            prompt_token_count=10,
+            candidates_token_count=20,
+            total_token_count=30,
+        ),
+    )
+
+
 @pytest.fixture
 def responses_tool_call_model_responses():
     def inner(