Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions tests/test_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,9 +575,7 @@ def mock_get_invoice(prediction_id):
assert isinstance(response.response, dict)


@pytest.mark.parametrize(
"service_tier", ["auto", "default", "standard", "flex", "priority"]
)
@pytest.mark.parametrize("service_tier", ["default", "flex", "priority"])
def test_generation_config_service_tier(service_tier):
"""service_tier is accepted and round-trips through model_dump()."""
config = GenerationConfig(service_tier=service_tier)
Expand Down
13 changes: 13 additions & 0 deletions vlmrun/cli/_cli/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):

# Available toolsets (must match AgentToolset literal values)
AVAILABLE_TOOLSETS: List[str] = list(AgentToolset.__args__)
AVAILABLE_SERVICE_TIERS = ["default", "flex", "priority"]

DEFAULT_MODEL = "vlmrun-orion-1:auto"

Expand Down Expand Up @@ -583,6 +584,11 @@ def chat(
"-s",
help="Session UUID for persisting chat history (stateful conversations).",
),
service_tier: Optional[str] = typer.Option(
None,
"--service-tier",
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The help text refers to standard as a service tier, but this value is not included in AVAILABLE_SERVICE_TIERS and has been removed from the supported tiers in the client types. It should be updated to default to match the implementation and avoid user confusion.

Suggested change
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
help="Delivery tier: default, flex (50%% discount), or priority (1.8x premium).",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 CLI help text lists "standard" as a valid service tier but the actual valid tier is "default"

The --service-tier help string in chat.py:590 says "Delivery tier: standard, flex ..." but AVAILABLE_SERVICE_TIERS at chat.py:150 and the Literal type in types.py:607 define the valid tiers as ["default", "flex", "priority"]. A user following the help text who passes --service-tier standard will get an error: "Invalid service tier 'standard'". The same bug exists in execute.py:312 and generate.py:178.

Suggested change
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
help="Delivery tier: default, flex (50%% discount), or priority (1.8x premium).",
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

),
timeout: Optional[float] = typer.Option(
None,
"--timeout",
Expand Down Expand Up @@ -647,6 +653,11 @@ def chat(
console.print(f" - {m}{default_marker}")
sys.exit(1)

if service_tier and service_tier not in AVAILABLE_SERVICE_TIERS:
console.print(f"[red]Error:[/] Invalid service tier '{service_tier}'")
console.print(f"\nAvailable tiers: {', '.join(AVAILABLE_SERVICE_TIERS)}")
sys.exit(1)

# Validate input files if provided
if input_files:
for file_path in input_files:
Expand Down Expand Up @@ -730,6 +741,8 @@ def chat(
extra_body["skills"] = agent_skills
if toolsets:
extra_body["toolsets"] = toolsets
if service_tier:
extra_body["service_tier"] = service_tier
if not extra_body:
extra_body = None

Expand Down
33 changes: 20 additions & 13 deletions vlmrun/cli/_cli/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from __future__ import annotations

import json
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
Expand Down Expand Up @@ -36,6 +35,8 @@

AVAILABLE_TOOLSETS: List[str] = list(AgentToolset.__args__)

AVAILABLE_SERVICE_TIERS = ["default", "flex", "priority"]

DEFAULT_MODEL = "vlmrun-orion-1:auto"

EXECUTE_HELP = """Execute an agent via /v1/agent/execute.
Expand All @@ -46,7 +47,7 @@
vlmrun execute -p "Extract invoice fields" -i doc.pdf --schema schema.json
vlmrun execute -n my-agent:v1 -i img.jpg --skill ./my-skill
vlmrun execute -n my-agent:v1 -i img.jpg --skill-id my-skill:latest
vlmrun execute -p "Describe" -i photo.jpg --no-wait
vlmrun execute -p "Describe" -i photo.jpg --wait
vlmrun execute -n my-agent:v1 -i a.jpg -i b.pdf -t image -t document

\b
Expand Down Expand Up @@ -179,9 +180,7 @@ def _upload_files(
file_responses.append(future.result())
status.update(f"Uploading {file_path.name}...")
except Exception as e:
console.print(
f"[red]Error uploading {file_path.name}:[/] {e}"
)
console.print(f"[red]Error uploading {file_path.name}:[/] {e}")
raise typer.Exit(1) from e

return file_responses
Expand Down Expand Up @@ -288,9 +287,9 @@ def execute(
help="Model: vlmrun-orion-1[:lite|fast|auto|pro]",
),
wait: bool = typer.Option(
True,
False,
"--wait/--no-wait",
help="Wait for execution to complete (default: wait).",
help="Wait for execution to complete (default: no-wait).",
),
timeout: int = typer.Option(
300,
Expand All @@ -307,6 +306,11 @@ def execute(
"--callback-url",
help="URL to call when execution completes (webhook).",
),
service_tier: Optional[str] = typer.Option(
None,
"--service-tier",
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The help text refers to standard as a service tier, but this value is not included in AVAILABLE_SERVICE_TIERS and has been removed from the supported tiers in the client types. It should be updated to default to match the implementation and avoid user confusion.

Suggested change
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
help="Delivery tier: default, flex (50%% discount), or priority (1.8x premium).",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 CLI help text lists "standard" as a valid service tier but the actual valid tier is "default"

Same issue as in chat.py: the --service-tier help string in execute.py:312 says "standard" but the valid tier is "default" (per AVAILABLE_SERVICE_TIERS at execute.py:38).

Suggested change
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
help="Delivery tier: default, flex (50%% discount), or priority (1.8x premium).",
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

),
output_format: Optional[str] = typer.Option(
None,
"--format",
Expand All @@ -333,13 +337,16 @@ def execute(
console.print(f"\nAvailable models: {', '.join(AVAILABLE_MODELS)}")
raise typer.Exit(1)

if service_tier and service_tier not in AVAILABLE_SERVICE_TIERS:
console.print(f"[red]Error:[/] Invalid service tier '{service_tier}'")
console.print(f"\nAvailable tiers: {', '.join(AVAILABLE_SERVICE_TIERS)}")
raise typer.Exit(1)

if toolsets:
for ts in toolsets:
if ts not in AVAILABLE_TOOLSETS:
console.print(f"[red]Error:[/] Invalid toolset '{ts}'")
console.print(
f"\nAvailable toolsets: {', '.join(AVAILABLE_TOOLSETS)}"
)
console.print(f"\nAvailable toolsets: {', '.join(AVAILABLE_TOOLSETS)}")
raise typer.Exit(1)

if input_files:
Expand Down Expand Up @@ -399,8 +406,7 @@ def execute(
if file_responses:
inputs = {
"files": [
{"type": "input_file", "file_id": fr.id}
for fr in file_responses
{"type": "input_file", "file_id": fr.id} for fr in file_responses
]
}

Expand All @@ -409,11 +415,12 @@ def execute(
prompt=final_prompt,
json_schema=json_schema,
skills=skills,
service_tier=service_tier,
)

if not output_json:
console.print(
f"\n [bold blue]Submitting execution[/bold blue]"
"\n [bold blue]Submitting execution[/bold blue]"
+ (f" [dim]({name})[/dim]" if name else "")
+ f" [dim]model={model}[/dim]"
)
Expand Down
9 changes: 2 additions & 7 deletions vlmrun/cli/_cli/executions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

import json
from typing import TYPE_CHECKING, List, Optional
from typing import TYPE_CHECKING, Optional

import typer
from rich.console import Console, Group
Expand All @@ -13,7 +13,6 @@

if TYPE_CHECKING:
from vlmrun.client import VLMRun
from vlmrun.client.types import AgentExecutionResponse

app = typer.Typer(
help="List and retrieve agent execution results.",
Expand Down Expand Up @@ -227,11 +226,7 @@ def get(
execution = client.executions.get(execution_id)

if output_json:
print(
json.dumps(
execution.model_dump(mode="json"), indent=2, default=str
)
)
print(json.dumps(execution.model_dump(mode="json"), indent=2, default=str))
return

console.print("\nExecution Details:\n", style="white")
Expand Down
42 changes: 32 additions & 10 deletions vlmrun/cli/_cli/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
from __future__ import annotations

import json
import sys
import time
from pathlib import Path
from typing import Any, Dict, List, Optional

import typer
from rich import print as rprint
from rich.console import Console
from rich.panel import Panel
from rich.status import Status
Expand All @@ -32,6 +30,7 @@
console = Console()

AVAILABLE_TOOLSETS: List[str] = list(AgentToolset.__args__)
AVAILABLE_SERVICE_TIERS = ["default", "flex", "priority"]

GENERATE_HELP = """Generate structured predictions for images, documents, videos, and audio.

Expand Down Expand Up @@ -76,9 +75,7 @@ def _resolve_skills(
) -> Optional[List[AgentSkill]]:
"""Build AgentSkill list from --skill dirs or --skill-id references."""
if skill_dirs and skill_ids:
console.print(
"[red]Error:[/] --skill and --skill-id are mutually exclusive."
)
console.print("[red]Error:[/] --skill and --skill-id are mutually exclusive.")
raise typer.Exit(1)

if skill_dirs:
Expand Down Expand Up @@ -175,6 +172,11 @@ def generate(
"--timeout",
help="Timeout in seconds when waiting for prediction to complete.",
),
service_tier: Optional[str] = typer.Option(
None,
"--service-tier",
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The help text refers to standard as a service tier, but this value is not included in AVAILABLE_SERVICE_TIERS and has been removed from the supported tiers in the client types. It should be updated to default to match the implementation and avoid user confusion.

Suggested change
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
help="Delivery tier: default, flex (50%% discount), or priority (1.8x premium).",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 CLI help text lists "standard" as a valid service tier but the actual valid tier is "default"

Same issue as in chat.py: the --service-tier help string in generate.py:178 says "standard" but the valid tier is "default" (per AVAILABLE_SERVICE_TIERS at generate.py:33).

Suggested change
help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).",
help="Delivery tier: default, flex (50%% discount), or priority (1.8x premium).",
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

),
output_format: Optional[str] = typer.Option(
None,
"--format",
Expand All @@ -196,6 +198,11 @@ def generate(
console.print(f"[red]Error:[/] Unsupported output format '{output_format}'")
raise typer.Exit(1)

if service_tier and service_tier not in AVAILABLE_SERVICE_TIERS:
console.print(f"[red]Error:[/] Invalid service tier '{service_tier}'")
console.print(f"\nAvailable tiers: {', '.join(AVAILABLE_SERVICE_TIERS)}")
raise typer.Exit(1)

suffix = input_file.suffix.lower()
if suffix not in SUPPORTED_INPUT_FILETYPES:
console.print(f"[red]Error:[/] Unsupported file type: {suffix}")
Expand Down Expand Up @@ -223,11 +230,12 @@ def generate(
raise typer.Exit(1) from e

config: Optional[GenerationConfig] = None
if any([skills, json_schema, prompt]):
if any([skills, json_schema, prompt, service_tier]):
config = GenerationConfig(
skills=skills,
json_schema=json_schema,
prompt=prompt,
service_tier=service_tier,
)

try:
Expand All @@ -242,31 +250,45 @@ def generate(
start_time = time.time()

if media_type in ("image", "document"):
with Status("Processing...", console=console, spinner="dots") if not output_json else _noop_ctx():
with (
Status("Processing...", console=console, spinner="dots")
if not output_json
else _noop_ctx()
):
response: PredictionResponse = client.document.generate(
file=input_file,
domain=domain,
batch=batch,
config=config,
)
elif media_type == "video":
with Status("Processing...", console=console, spinner="dots") if not output_json else _noop_ctx():
with (
Status("Processing...", console=console, spinner="dots")
if not output_json
else _noop_ctx()
):
response = client.video.generate(
file=input_file,
domain=domain,
batch=batch,
config=config,
)
elif media_type == "audio":
with Status("Processing...", console=console, spinner="dots") if not output_json else _noop_ctx():
with (
Status("Processing...", console=console, spinner="dots")
if not output_json
else _noop_ctx()
):
response = client.audio.generate(
file=input_file,
domain=domain,
batch=batch,
config=config,
)
else:
console.print(f"[red]Error:[/] Could not determine media type for {input_file}")
console.print(
f"[red]Error:[/] Could not determine media type for {input_file}"
)
raise typer.Exit(1)

# If batch mode and wait requested, poll until complete
Expand Down
4 changes: 1 addition & 3 deletions vlmrun/cli/_cli/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,7 @@ def list(
for prediction in predictions:
usage = prediction.usage
st = _status_style(prediction.status)
dur = _compute_duration(
prediction.created_at, prediction.completed_at, usage
)
dur = _compute_duration(prediction.created_at, prediction.completed_at, usage)
rows.append(
_format_row(
prediction.id,
Expand Down
17 changes: 10 additions & 7 deletions vlmrun/client/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,13 @@ class AgentExecutionOrCreationConfig(BaseModel):
default=None,
description="List of agent skills to enable for this execution. Skills provide domain-specific expertise and capabilities.",
)
service_tier: Literal["default", "flex", "priority"] | None = Field(
default=None,
description=(
"Delivery tier: 'default' (baseline), 'flex' (50%% discount, higher latency), "
"or 'priority' (1.8x premium)."
Comment on lines +245 to +246
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Double percent %% in Pydantic Field descriptions renders as literal %% instead of %

The description strings in both AgentExecutionOrCreationConfig.service_tier (types.py:245) and GenerationConfig.service_tier (types.py:610) use 50%% which is a Click/typer escaping convention. However, Pydantic Field(description=...) strings are plain Python strings — %% is not processed as an escape and will render literally as "50%% discount" instead of "50% discount" in generated JSON schemas and documentation.

Suggested change
"Delivery tier: 'default' (baseline), 'flex' (50%% discount, higher latency), "
"or 'priority' (1.8x premium)."
"Delivery tier: 'default' (baseline), 'flex' (50% discount, higher latency), "
"or 'priority' (1.8x premium)."
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

),
)

@model_validator(mode="after")
def validate_config(self):
Expand Down Expand Up @@ -597,15 +604,11 @@ class GenerationConfig(BaseModel):
default=None,
description="0-indexed page indices to process for document files. If None, all pages are processed.",
)
service_tier: Literal["auto", "default", "standard", "flex", "priority"] | None = Field(
service_tier: Literal["default", "flex", "priority"] | None = Field(
default=None,
description=(
"Delivery tier mirroring OpenAI's service_tier and Vertex AI's "
"Gemini Flex/Priority offering. 'standard'/'default' uses baseline "
"rates, 'flex' applies a 50% discount with higher latency, "
"'priority' applies a 1.8x premium. When omitted (or 'auto'), the "
"server default applies (which itself defaults to 'standard'). The "
"chosen tier drives BOTH billing AND the actual request routing."
"Delivery tier: 'default' (baseline), 'flex' (50%% discount, higher latency), "
"or 'priority' (1.8x premium)."
),
)
skills: Optional[List["AgentSkill"]] = Field(
Expand Down
Loading