Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
a7758b5
fix(litellm): Avoid double span exits when streaming
alexander-alderman-webb Apr 1, 2026
3f761e9
simplify
alexander-alderman-webb Apr 1, 2026
1f94674
test cleanup
alexander-alderman-webb Apr 1, 2026
c31472c
docs
alexander-alderman-webb Apr 1, 2026
50d0b1f
use underscore
alexander-alderman-webb Apr 2, 2026
edd8a90
test(litellm): Replace mocks with httpx types in nonstreaming tests
alexander-alderman-webb Apr 2, 2026
ec3d128
add fixture
alexander-alderman-webb Apr 2, 2026
a4b9b3a
more mocks
alexander-alderman-webb Apr 7, 2026
9ae99be
update tox
alexander-alderman-webb Apr 7, 2026
99105ca
feat(litellm): Add async callbacks
alexander-alderman-webb Apr 10, 2026
079ff1b
tox files
alexander-alderman-webb Apr 10, 2026
c5b92c8
test(litellm): Remove mocks with httpx types in embedding tests
alexander-alderman-webb Apr 10, 2026
ad16c7f
test(litellm): Replace mocks with httpx types in rate-limit test
alexander-alderman-webb Apr 10, 2026
017f854
add more tests and merge
alexander-alderman-webb Apr 10, 2026
d48c85f
add aembedding
alexander-alderman-webb Apr 10, 2026
ecb9526
fix tests
alexander-alderman-webb Apr 10, 2026
6f3c247
cleanup
alexander-alderman-webb Apr 10, 2026
dda374f
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 10, 2026
b622a07
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/error-mock
alexander-alderman-webb Apr 10, 2026
598d6b5
undo merge
alexander-alderman-webb Apr 10, 2026
a7dfb23
.
alexander-alderman-webb Apr 10, 2026
79efa22
merge
alexander-alderman-webb Apr 10, 2026
a8689cd
remove fixture
alexander-alderman-webb Apr 10, 2026
99de614
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/rate-li…
alexander-alderman-webb Apr 10, 2026
c8d86a8
add fixture
alexander-alderman-webb Apr 10, 2026
ecd3718
Merge branch 'master' into webb/litellm/close-spans
alexander-alderman-webb Apr 10, 2026
0536025
re-run tox
alexander-alderman-webb Apr 10, 2026
62c32cb
Merge branch 'master' into webb/litellm/close-spans
alexander-alderman-webb Apr 10, 2026
f352bba
Merge branch 'webb/litellm/close-spans' into webb/litellm/remove-mocks
alexander-alderman-webb Apr 10, 2026
d9cf8b0
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 10, 2026
4226d2c
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/rate-li…
alexander-alderman-webb Apr 10, 2026
c5063a0
merge
alexander-alderman-webb Apr 10, 2026
54925ab
patch completions client instead of embeddings
alexander-alderman-webb Apr 10, 2026
a2b3585
make request headers consistent
alexander-alderman-webb Apr 10, 2026
179f14b
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/rate-li…
alexander-alderman-webb Apr 10, 2026
6eb17c9
reset all executor references
alexander-alderman-webb Apr 10, 2026
1b28574
merge
alexander-alderman-webb Apr 10, 2026
ce5ce74
delete span when finished
alexander-alderman-webb Apr 13, 2026
8435f36
safe exit pattern
alexander-alderman-webb Apr 13, 2026
392eb17
Merge branch 'webb/litellm/close-spans' into webb/litellm/remove-mocks
alexander-alderman-webb Apr 13, 2026
6d52689
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 13, 2026
9eac0f8
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/rate-li…
alexander-alderman-webb Apr 13, 2026
0b745c9
merge
alexander-alderman-webb Apr 13, 2026
5dc9bbe
update tox.ini again
alexander-alderman-webb Apr 13, 2026
0882066
Merge branch 'master' into webb/litellm/close-spans
alexander-alderman-webb Apr 13, 2026
ee4d55c
tox run
alexander-alderman-webb Apr 13, 2026
d9b9700
Merge branch 'webb/litellm/remove-mocks' into webb/litellm/embedding-…
alexander-alderman-webb Apr 13, 2026
73e9bc1
Merge branch 'webb/litellm/embedding-tests' into webb/litellm/rate-li…
alexander-alderman-webb Apr 13, 2026
6b3cd18
tox run
alexander-alderman-webb Apr 13, 2026
6626a1b
merge master
alexander-alderman-webb Apr 13, 2026
f12b7d3
remove test
alexander-alderman-webb Apr 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/populate_tox/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@
},
"litellm": {
"package": "litellm",
"deps": {
"*": ["anthropic", "google-genai", "pytest-asyncio"],
},
},
"litestar": {
"package": "litestar",
Expand Down
74 changes: 38 additions & 36 deletions scripts/populate_tox/package_dependencies.jsonl

Large diffs are not rendered by default.

70 changes: 35 additions & 35 deletions scripts/populate_tox/releases.jsonl

Large diffs are not rendered by default.

35 changes: 32 additions & 3 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
provider = "unknown"

call_type = kwargs.get("call_type", None)
if call_type == "embedding":
if call_type == "embedding" or call_type == "aembedding":
operation = "embeddings"
else:
operation = "chat"
Expand Down Expand Up @@ -170,6 +170,10 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
set_data_normalized(span, f"gen_ai.litellm.{key}", value)


async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
return _input_callback(kwargs)


def _success_callback(
kwargs: "Dict[str, Any]",
completion_response: "Any",
Expand Down Expand Up @@ -230,8 +234,29 @@ def _success_callback(
)

finally:
# Always finish the span and clean up
span.__exit__(None, None, None)
is_streaming = kwargs.get("stream")
# Callback is fired multiple times when streaming a response.
# Streaming flag checked at https://github.com/BerriAI/litellm/blob/33c3f13443eaf990ac8c6e3da78bddbc2b7d0e7a/litellm/litellm_core_utils/litellm_logging.py#L1603
if (
is_streaming is not True
or "complete_streaming_response" in kwargs
or "async_complete_streaming_response" in kwargs
):
span.__exit__(None, None, None)


async def _async_success_callback(
kwargs: "Dict[str, Any]",
completion_response: "Any",
start_time: "datetime",
end_time: "datetime",
) -> None:
return _success_callback(
kwargs,
completion_response,
start_time,
end_time,
)


def _failure_callback(
Expand Down Expand Up @@ -315,10 +340,14 @@ def setup_once() -> None:
litellm.input_callback = input_callback or []
if _input_callback not in litellm.input_callback:
litellm.input_callback.append(_input_callback)
if _async_input_callback not in litellm.input_callback:
litellm.input_callback.append(_async_input_callback)

litellm.success_callback = success_callback or []
if _success_callback not in litellm.success_callback:
litellm.success_callback.append(_success_callback)
if _async_success_callback not in litellm.success_callback:
litellm.success_callback.append(_async_success_callback)

litellm.failure_callback = failure_callback or []
if _failure_callback not in litellm.failure_callback:
Expand Down
246 changes: 245 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@
openai = None


try:
import anthropic
except ImportError:
anthropic = None


try:
import google
except ImportError:
google = None


from tests import _warning_recorder, _warning_recorder_mgr

from typing import TYPE_CHECKING
Expand Down Expand Up @@ -1050,7 +1062,12 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
)

if serialize_pydantic:
response_content = json.dumps(response_content.model_dump()).encode("utf-8")
response_content = json.dumps(
response_content.model_dump(
by_alias=True,
exclude_none=True,
)
).encode("utf-8")

response = HttpxResponse(
200,
Expand All @@ -1063,6 +1080,185 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
return inner


@pytest.fixture
def get_rate_limit_model_response():
def inner(request_headers=None):
if request_headers is None:
request_headers = {}

model_request = HttpxRequest(
"POST",
"/responses",
headers=request_headers,
)

response = HttpxResponse(
429,
request=model_request,
)

return response

return inner


@pytest.fixture
def streaming_chat_completions_model_response():
return [
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
role="assistant"
),
finish_reason=None,
),
],
),
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
content="Tes"
),
finish_reason=None,
),
],
),
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
content="t r"
),
finish_reason=None,
),
],
),
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
content="esp"
),
finish_reason=None,
),
],
),
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
content="ons"
),
finish_reason=None,
),
],
),
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
content="e"
),
finish_reason=None,
),
],
),
openai.types.chat.ChatCompletionChunk(
id="chatcmpl-test",
object="chat.completion.chunk",
created=10000000,
model="gpt-3.5-turbo",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
index=0,
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(),
finish_reason="stop",
),
],
usage=openai.types.CompletionUsage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30,
),
),
]


@pytest.fixture
def nonstreaming_chat_completions_model_response():
return openai.types.chat.ChatCompletion(
id="chatcmpl-test",
choices=[
openai.types.chat.chat_completion.Choice(
index=0,
finish_reason="stop",
message=openai.types.chat.ChatCompletionMessage(
role="assistant", content="Test response"
),
)
],
created=1234567890,
model="gpt-3.5-turbo",
object="chat.completion",
usage=openai.types.CompletionUsage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30,
),
)


@pytest.fixture
def openai_embedding_model_response():
return openai.types.CreateEmbeddingResponse(
data=[
openai.types.Embedding(
embedding=[0.1, 0.2, 0.3],
index=0,
object="embedding",
)
],
model="text-embedding-ada-002",
object="list",
usage=openai.types.create_embedding_response.Usage(
prompt_tokens=5,
total_tokens=5,
),
)


@pytest.fixture
def nonstreaming_responses_model_response():
return openai.types.responses.Response(
Expand Down Expand Up @@ -1102,6 +1298,54 @@ def nonstreaming_responses_model_response():
)


@pytest.fixture
def nonstreaming_anthropic_model_response():
return anthropic.types.Message(
id="msg_123",
type="message",
role="assistant",
model="claude-3-opus-20240229",
content=[
anthropic.types.TextBlock(
type="text",
text="Hello, how can I help you?",
)
],
stop_reason="end_turn",
stop_sequence=None,
usage=anthropic.types.Usage(
input_tokens=10,
output_tokens=20,
),
)


@pytest.fixture
def nonstreaming_google_genai_model_response():
return google.genai.types.GenerateContentResponse(
response_id="resp_123",
candidates=[
google.genai.types.Candidate(
content=google.genai.types.Content(
role="model",
parts=[
google.genai.types.Part(
text="Hello, how can I help you?",
)
],
),
finish_reason="STOP",
)
],
model_version="gemini/gemini-pro",
usage_metadata=google.genai.types.GenerateContentResponseUsageMetadata(
prompt_token_count=10,
candidates_token_count=20,
total_token_count=30,
),
)


@pytest.fixture
def responses_tool_call_model_responses():
def inner(
Expand Down
Loading
Loading