-
Notifications
You must be signed in to change notification settings - Fork 3
feat(cli): add --service-tier flag and --no-wait default for execute #189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -147,6 +147,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): | |||||
|
|
||||||
| # Available toolsets (must match AgentToolset literal values) | ||||||
| AVAILABLE_TOOLSETS: List[str] = list(AgentToolset.__args__) | ||||||
| AVAILABLE_SERVICE_TIERS = ["default", "flex", "priority"] | ||||||
|
|
||||||
| DEFAULT_MODEL = "vlmrun-orion-1:auto" | ||||||
|
|
||||||
|
|
@@ -583,6 +584,11 @@ def chat( | |||||
| "-s", | ||||||
| help="Session UUID for persisting chat history (stateful conversations).", | ||||||
| ), | ||||||
| service_tier: Optional[str] = typer.Option( | ||||||
| None, | ||||||
| "--service-tier", | ||||||
| help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).", | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 CLI help text lists "standard" as a valid service tier but the actual valid tier is "default" The
Suggested change
Was this helpful? React with 👍 or 👎 to provide feedback. |
||||||
| ), | ||||||
| timeout: Optional[float] = typer.Option( | ||||||
| None, | ||||||
| "--timeout", | ||||||
|
|
@@ -647,6 +653,11 @@ def chat( | |||||
| console.print(f" - {m}{default_marker}") | ||||||
| sys.exit(1) | ||||||
|
|
||||||
| if service_tier and service_tier not in AVAILABLE_SERVICE_TIERS: | ||||||
| console.print(f"[red]Error:[/] Invalid service tier '{service_tier}'") | ||||||
| console.print(f"\nAvailable tiers: {', '.join(AVAILABLE_SERVICE_TIERS)}") | ||||||
| sys.exit(1) | ||||||
|
|
||||||
| # Validate input files if provided | ||||||
| if input_files: | ||||||
| for file_path in input_files: | ||||||
|
|
@@ -730,6 +741,8 @@ def chat( | |||||
| extra_body["skills"] = agent_skills | ||||||
| if toolsets: | ||||||
| extra_body["toolsets"] = toolsets | ||||||
| if service_tier: | ||||||
| extra_body["service_tier"] = service_tier | ||||||
| if not extra_body: | ||||||
| extra_body = None | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,7 +3,6 @@ | |||||||||
| from __future__ import annotations | ||||||||||
|
|
||||||||||
| import json | ||||||||||
| import sys | ||||||||||
| import time | ||||||||||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||||||||||
| from pathlib import Path | ||||||||||
|
|
@@ -36,6 +35,8 @@ | |||||||||
|
|
||||||||||
| AVAILABLE_TOOLSETS: List[str] = list(AgentToolset.__args__) | ||||||||||
|
|
||||||||||
| AVAILABLE_SERVICE_TIERS = ["default", "flex", "priority"] | ||||||||||
|
|
||||||||||
| DEFAULT_MODEL = "vlmrun-orion-1:auto" | ||||||||||
|
|
||||||||||
| EXECUTE_HELP = """Execute an agent via /v1/agent/execute. | ||||||||||
|
|
@@ -46,7 +47,7 @@ | |||||||||
| vlmrun execute -p "Extract invoice fields" -i doc.pdf --schema schema.json | ||||||||||
| vlmrun execute -n my-agent:v1 -i img.jpg --skill ./my-skill | ||||||||||
| vlmrun execute -n my-agent:v1 -i img.jpg --skill-id my-skill:latest | ||||||||||
| vlmrun execute -p "Describe" -i photo.jpg --no-wait | ||||||||||
| vlmrun execute -p "Describe" -i photo.jpg --wait | ||||||||||
| vlmrun execute -n my-agent:v1 -i a.jpg -i b.pdf -t image -t document | ||||||||||
|
|
||||||||||
| \b | ||||||||||
|
|
@@ -179,9 +180,7 @@ def _upload_files( | |||||||||
| file_responses.append(future.result()) | ||||||||||
| status.update(f"Uploading {file_path.name}...") | ||||||||||
| except Exception as e: | ||||||||||
| console.print( | ||||||||||
| f"[red]Error uploading {file_path.name}:[/] {e}" | ||||||||||
| ) | ||||||||||
| console.print(f"[red]Error uploading {file_path.name}:[/] {e}") | ||||||||||
| raise typer.Exit(1) from e | ||||||||||
|
|
||||||||||
| return file_responses | ||||||||||
|
|
@@ -288,9 +287,9 @@ def execute( | |||||||||
| help="Model: vlmrun-orion-1[:lite|fast|auto|pro]", | ||||||||||
| ), | ||||||||||
| wait: bool = typer.Option( | ||||||||||
| True, | ||||||||||
| False, | ||||||||||
| "--wait/--no-wait", | ||||||||||
| help="Wait for execution to complete (default: wait).", | ||||||||||
| help="Wait for execution to complete (default: no-wait).", | ||||||||||
| ), | ||||||||||
| timeout: int = typer.Option( | ||||||||||
| 300, | ||||||||||
|
|
@@ -307,6 +306,11 @@ def execute( | |||||||||
| "--callback-url", | ||||||||||
| help="URL to call when execution completes (webhook).", | ||||||||||
| ), | ||||||||||
| service_tier: Optional[str] = typer.Option( | ||||||||||
| None, | ||||||||||
| "--service-tier", | ||||||||||
| help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).", | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The help text refers to
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 CLI help text lists "standard" as a valid service tier but the actual valid tier is "default" Same issue as in
Suggested change
Was this helpful? React with 👍 or 👎 to provide feedback. |
||||||||||
| ), | ||||||||||
| output_format: Optional[str] = typer.Option( | ||||||||||
| None, | ||||||||||
| "--format", | ||||||||||
|
|
@@ -333,13 +337,16 @@ def execute( | |||||||||
| console.print(f"\nAvailable models: {', '.join(AVAILABLE_MODELS)}") | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| if service_tier and service_tier not in AVAILABLE_SERVICE_TIERS: | ||||||||||
| console.print(f"[red]Error:[/] Invalid service tier '{service_tier}'") | ||||||||||
| console.print(f"\nAvailable tiers: {', '.join(AVAILABLE_SERVICE_TIERS)}") | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| if toolsets: | ||||||||||
| for ts in toolsets: | ||||||||||
| if ts not in AVAILABLE_TOOLSETS: | ||||||||||
| console.print(f"[red]Error:[/] Invalid toolset '{ts}'") | ||||||||||
| console.print( | ||||||||||
| f"\nAvailable toolsets: {', '.join(AVAILABLE_TOOLSETS)}" | ||||||||||
| ) | ||||||||||
| console.print(f"\nAvailable toolsets: {', '.join(AVAILABLE_TOOLSETS)}") | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| if input_files: | ||||||||||
|
|
@@ -399,8 +406,7 @@ def execute( | |||||||||
| if file_responses: | ||||||||||
| inputs = { | ||||||||||
| "files": [ | ||||||||||
| {"type": "input_file", "file_id": fr.id} | ||||||||||
| for fr in file_responses | ||||||||||
| {"type": "input_file", "file_id": fr.id} for fr in file_responses | ||||||||||
| ] | ||||||||||
| } | ||||||||||
|
|
||||||||||
|
|
@@ -409,11 +415,12 @@ def execute( | |||||||||
| prompt=final_prompt, | ||||||||||
| json_schema=json_schema, | ||||||||||
| skills=skills, | ||||||||||
| service_tier=service_tier, | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| if not output_json: | ||||||||||
| console.print( | ||||||||||
| f"\n [bold blue]Submitting execution[/bold blue]" | ||||||||||
| "\n [bold blue]Submitting execution[/bold blue]" | ||||||||||
| + (f" [dim]({name})[/dim]" if name else "") | ||||||||||
| + f" [dim]model={model}[/dim]" | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,13 +3,11 @@ | |||||||||
| from __future__ import annotations | ||||||||||
|
|
||||||||||
| import json | ||||||||||
| import sys | ||||||||||
| import time | ||||||||||
| from pathlib import Path | ||||||||||
| from typing import Any, Dict, List, Optional | ||||||||||
|
|
||||||||||
| import typer | ||||||||||
| from rich import print as rprint | ||||||||||
| from rich.console import Console | ||||||||||
| from rich.panel import Panel | ||||||||||
| from rich.status import Status | ||||||||||
|
|
@@ -32,6 +30,7 @@ | |||||||||
| console = Console() | ||||||||||
|
|
||||||||||
| AVAILABLE_TOOLSETS: List[str] = list(AgentToolset.__args__) | ||||||||||
| AVAILABLE_SERVICE_TIERS = ["default", "flex", "priority"] | ||||||||||
|
|
||||||||||
| GENERATE_HELP = """Generate structured predictions for images, documents, videos, and audio. | ||||||||||
|
|
||||||||||
|
|
@@ -76,9 +75,7 @@ def _resolve_skills( | |||||||||
| ) -> Optional[List[AgentSkill]]: | ||||||||||
| """Build AgentSkill list from --skill dirs or --skill-id references.""" | ||||||||||
| if skill_dirs and skill_ids: | ||||||||||
| console.print( | ||||||||||
| "[red]Error:[/] --skill and --skill-id are mutually exclusive." | ||||||||||
| ) | ||||||||||
| console.print("[red]Error:[/] --skill and --skill-id are mutually exclusive.") | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| if skill_dirs: | ||||||||||
|
|
@@ -175,6 +172,11 @@ def generate( | |||||||||
| "--timeout", | ||||||||||
| help="Timeout in seconds when waiting for prediction to complete.", | ||||||||||
| ), | ||||||||||
| service_tier: Optional[str] = typer.Option( | ||||||||||
| None, | ||||||||||
| "--service-tier", | ||||||||||
| help="Delivery tier: standard, flex (50%% discount), or priority (1.8x premium).", | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The help text refers to
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 CLI help text lists "standard" as a valid service tier but the actual valid tier is "default" Same issue as in
Suggested change
Was this helpful? React with 👍 or 👎 to provide feedback. |
||||||||||
| ), | ||||||||||
| output_format: Optional[str] = typer.Option( | ||||||||||
| None, | ||||||||||
| "--format", | ||||||||||
|
|
@@ -196,6 +198,11 @@ def generate( | |||||||||
| console.print(f"[red]Error:[/] Unsupported output format '{output_format}'") | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| if service_tier and service_tier not in AVAILABLE_SERVICE_TIERS: | ||||||||||
| console.print(f"[red]Error:[/] Invalid service tier '{service_tier}'") | ||||||||||
| console.print(f"\nAvailable tiers: {', '.join(AVAILABLE_SERVICE_TIERS)}") | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| suffix = input_file.suffix.lower() | ||||||||||
| if suffix not in SUPPORTED_INPUT_FILETYPES: | ||||||||||
| console.print(f"[red]Error:[/] Unsupported file type: {suffix}") | ||||||||||
|
|
@@ -223,11 +230,12 @@ def generate( | |||||||||
| raise typer.Exit(1) from e | ||||||||||
|
|
||||||||||
| config: Optional[GenerationConfig] = None | ||||||||||
| if any([skills, json_schema, prompt]): | ||||||||||
| if any([skills, json_schema, prompt, service_tier]): | ||||||||||
| config = GenerationConfig( | ||||||||||
| skills=skills, | ||||||||||
| json_schema=json_schema, | ||||||||||
| prompt=prompt, | ||||||||||
| service_tier=service_tier, | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| try: | ||||||||||
|
|
@@ -242,31 +250,45 @@ def generate( | |||||||||
| start_time = time.time() | ||||||||||
|
|
||||||||||
| if media_type in ("image", "document"): | ||||||||||
| with Status("Processing...", console=console, spinner="dots") if not output_json else _noop_ctx(): | ||||||||||
| with ( | ||||||||||
| Status("Processing...", console=console, spinner="dots") | ||||||||||
| if not output_json | ||||||||||
| else _noop_ctx() | ||||||||||
| ): | ||||||||||
| response: PredictionResponse = client.document.generate( | ||||||||||
| file=input_file, | ||||||||||
| domain=domain, | ||||||||||
| batch=batch, | ||||||||||
| config=config, | ||||||||||
| ) | ||||||||||
| elif media_type == "video": | ||||||||||
| with Status("Processing...", console=console, spinner="dots") if not output_json else _noop_ctx(): | ||||||||||
| with ( | ||||||||||
| Status("Processing...", console=console, spinner="dots") | ||||||||||
| if not output_json | ||||||||||
| else _noop_ctx() | ||||||||||
| ): | ||||||||||
| response = client.video.generate( | ||||||||||
| file=input_file, | ||||||||||
| domain=domain, | ||||||||||
| batch=batch, | ||||||||||
| config=config, | ||||||||||
| ) | ||||||||||
| elif media_type == "audio": | ||||||||||
| with Status("Processing...", console=console, spinner="dots") if not output_json else _noop_ctx(): | ||||||||||
| with ( | ||||||||||
| Status("Processing...", console=console, spinner="dots") | ||||||||||
| if not output_json | ||||||||||
| else _noop_ctx() | ||||||||||
| ): | ||||||||||
| response = client.audio.generate( | ||||||||||
| file=input_file, | ||||||||||
| domain=domain, | ||||||||||
| batch=batch, | ||||||||||
| config=config, | ||||||||||
| ) | ||||||||||
| else: | ||||||||||
| console.print(f"[red]Error:[/] Could not determine media type for {input_file}") | ||||||||||
| console.print( | ||||||||||
| f"[red]Error:[/] Could not determine media type for {input_file}" | ||||||||||
| ) | ||||||||||
| raise typer.Exit(1) | ||||||||||
|
|
||||||||||
| # If batch mode and wait requested, poll until complete | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -239,6 +239,13 @@ class AgentExecutionOrCreationConfig(BaseModel): | |||||||||
| default=None, | ||||||||||
| description="List of agent skills to enable for this execution. Skills provide domain-specific expertise and capabilities.", | ||||||||||
| ) | ||||||||||
| service_tier: Literal["default", "flex", "priority"] | None = Field( | ||||||||||
| default=None, | ||||||||||
| description=( | ||||||||||
| "Delivery tier: 'default' (baseline), 'flex' (50%% discount, higher latency), " | ||||||||||
| "or 'priority' (1.8x premium)." | ||||||||||
|
Comment on lines
+245
to
+246
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Double percent The
Suggested change
Was this helpful? React with 👍 or 👎 to provide feedback. |
||||||||||
| ), | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| @model_validator(mode="after") | ||||||||||
| def validate_config(self): | ||||||||||
|
|
@@ -597,15 +604,11 @@ class GenerationConfig(BaseModel): | |||||||||
| default=None, | ||||||||||
| description="0-indexed page indices to process for document files. If None, all pages are processed.", | ||||||||||
| ) | ||||||||||
| service_tier: Literal["auto", "default", "standard", "flex", "priority"] | None = Field( | ||||||||||
| service_tier: Literal["default", "flex", "priority"] | None = Field( | ||||||||||
| default=None, | ||||||||||
| description=( | ||||||||||
| "Delivery tier mirroring OpenAI's service_tier and Vertex AI's " | ||||||||||
| "Gemini Flex/Priority offering. 'standard'/'default' uses baseline " | ||||||||||
| "rates, 'flex' applies a 50% discount with higher latency, " | ||||||||||
| "'priority' applies a 1.8x premium. When omitted (or 'auto'), the " | ||||||||||
| "server default applies (which itself defaults to 'standard'). The " | ||||||||||
| "chosen tier drives BOTH billing AND the actual request routing." | ||||||||||
| "Delivery tier: 'default' (baseline), 'flex' (50%% discount, higher latency), " | ||||||||||
| "or 'priority' (1.8x premium)." | ||||||||||
| ), | ||||||||||
| ) | ||||||||||
| skills: Optional[List["AgentSkill"]] = Field( | ||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The help text refers to
standardas a service tier, but this value is not included inAVAILABLE_SERVICE_TIERSand has been removed from the supported tiers in the client types. It should be updated todefaultto match the implementation and avoid user confusion.