Skip to content

Commit 2ccd35d

Browse files
committed
Fix | OpenRouter Video Vision for Supported Models
1 parent cbc678f commit 2ccd35d

7 files changed

Lines changed: 70 additions & 12 deletions

File tree

src/commands/config/model/text.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ const MODEL_SELECT_ID = "model_select";
5454
* Helper function to get localized LLM description based on user's locale
5555
* @param model - LLM model row from database
5656
* @param locale - User's preferred locale (e.g., "ja", "en-US")
57-
* @returns Localized description with flags prepended (e.g., "(FREE+TOOLS) Description")
57+
* @returns Localized description with flags prepended (e.g., "(FREE+TOOLS+IMG+VID) Description")
5858
*/
5959
function getLocalizedDescription(model: LlmRow, locale: string): string {
6060
// Normalize locale to handle variations (e.g., "ja-JP" -> "ja")
@@ -72,7 +72,7 @@ function getLocalizedDescription(model: LlmRow, locale: string): string {
7272
const baseDescription =
7373
description || model.llm_description || `${model.llm_provider} model`;
7474

75-
// Skip flags for account-setting (don't show TOOLS+IMAGES+etc. for this special model)
75+
// Skip flags for account-setting (don't show TOOLS+IMG+VID+etc. for this special model)
7676
if (model.llm_codename === "account-setting") {
7777
return baseDescription;
7878
}
@@ -81,7 +81,8 @@ function getLocalizedDescription(model: LlmRow, locale: string): string {
8181
const flags: string[] = [];
8282
if (model.is_free) flags.push("FREE");
8383
if (model.has_tools) flags.push("TOOLS");
84-
if (model.sees_images) flags.push("IMAGES");
84+
if (model.sees_images) flags.push("IMG");
85+
if (model.sees_videos) flags.push("VID");
8586
if (model.supports_structoutput) flags.push("STRUCT");
8687
//if (model.is_uncensored) flags.push("UNCENSORED");
8788

src/commands/tool/estimate/cost.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,7 @@ async function measureOpenRouterInputTokens(
10541054
const messages = await adapter.buildProbeMessages(
10551055
contextItems,
10561056
providerConfig.seesImages ?? true,
1057+
providerConfig.seesVideos ?? false,
10571058
);
10581059
const requestBody = buildOpenRouterProbeRequest(providerConfig, messages);
10591060

src/db/seed.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ VALUES
4343
('openrouter', 'tngtech/deepseek-r1t2-chimera', false, false, true, false, false, true, false, false, false, true, true, 'Advanced Chimera DeepSeek model that is great at role-playing', 'ロールプレイに優れた高度なChimera DeepSeekモデル'),
4444
('openrouter', 'x-ai/grok-4-fast', false, false, true, true, false, true, true, false, false, false, true, 'Fast and efficient general-purpose model', '高速かつ効率的な汎用モデル'),
4545
('openrouter', 'x-ai/grok-4.1-fast', false, false, true, false, false, true, true, false, false, false, true, 'Latest fast and efficient general-purpose model', '高速かつ効率的な汎用モデル'),
46-
('openrouter', 'google/gemini-3-flash-preview', false, false, false, false, false, true, true, false, false, false, true, 'Latest Gemini 3 Flash preview via OpenRouter with tool use and image understanding', 'OpenRouter経由でツール利用と画像理解に対応した最新のGemini 3 Flashプレビュー'),
47-
('openrouter', 'google/gemini-3-pro-preview', false, false, false, false, false, true, true, false, false, false, true, 'Latest Gemini 3 Pro preview via OpenRouter (same capabilities as Gemini 3 Flash)', 'OpenRouter経由の最新Gemini 3 Proプレビュー(Gemini 3 Flashと同等の機能)'),
46+
('openrouter', 'google/gemini-3-flash-preview', false, false, false, false, false, true, true, true, false, false, true, 'Latest Gemini 3 Flash preview via OpenRouter with tool use and image understanding', 'OpenRouter経由でツール利用と画像理解に対応した最新のGemini 3 Flashプレビュー'),
47+
('openrouter', 'google/gemini-3-pro-preview', false, false, false, false, false, true, true, true, false, false, true, 'Latest Gemini 3 Pro preview via OpenRouter (same capabilities as Gemini 3 Flash)', 'OpenRouter経由の最新Gemini 3 Proプレビュー(Gemini 3 Flashと同等の機能)'),
4848
('openrouter', 'anthropic/claude-sonnet-4.5', false, false, false, false, false, true, true, false, false, false, true, 'State-of-the-art performance in complex tasks and problems, also great in role-playing and creative writing', '複雑なタスクや問題に優れた最先端性能を持ち、ロールプレイや創作にも秀でたモデル'),
4949
('openrouter', 'anthropic/claude-haiku-4.5', false, false, false, false, false, true, true, false, false, false, true, 'Lightweight version of claude-sonnet-4.5', 'claude-sonnet-4.5の軽量版'),
5050
('openrouter', 'openai/gpt-5.1', true, false, true, true, false, true, true, false, false, false, true, 'State-of-the-art performance in complex tasks and problems', '複雑なタスクや問題に優れた最先端性能'),
@@ -58,9 +58,9 @@ VALUES
5858
('openrouter', 'mistralai/mistral-small-3.1-24b-instruct:free', false, false, false, true, true, true, true, false, false, false, false, 'Free multimodal model with enhanced reasoning and vision capabilities', '強化された推論とビジョン機能を備えた無料のマルチモーダルモデル'),
5959
('openrouter', 'z-ai/glm-4.5-air:free', false, true, false, false, true, true, false, false, false, false, false, 'Free lightweight model with thinking mode for reasoning and agent tasks', '推論とエージェントタスク向けのシンキングモードを備えた無料軽量モデル'),
6060
('openrouter', 'tngtech/tng-r1t-chimera:free', false, false, false, true, true, true, false, false, false, false, false, 'Free experimental model for creative storytelling and character interaction', '創作とキャラクター対話に特化した無料の実験モデル'),
61-
('openrouter', 'qwen/qwen3.5-35b-a3b', false, false, false, false, false, true, true, false, false, false, true, 'Qwen 3.5 35B A3B model with tool use, vision, and structured output support', 'ツール利用・画像理解・構造化出力に対応したQwen 3.5 35B A3Bモデル'),
62-
('openrouter', 'qwen/qwen3.5-27b', false, false, false, false, false, true, true, false, false, false, true, 'Qwen 3.5 27B model with tool use, vision, and structured output support', 'ツール利用・画像理解・構造化出力に対応したQwen 3.5 27Bモデル'),
63-
('openrouter', 'qwen/qwen3.5-flash-02-23', false, false, false, false, false, true, true, false, false, false, true, 'Fast Qwen 3.5 Flash (02-23) model with tool use, vision, and structured output support', 'ツール利用・画像理解・構造化出力に対応した高速Qwen 3.5 Flash(02-23)モデル'),
61+
('openrouter', 'qwen/qwen3.5-35b-a3b', false, false, false, false, false, true, true, true, false, false, true, 'Qwen 3.5 35B A3B model with tool use, vision, and structured output support', 'ツール利用・画像理解・構造化出力に対応したQwen 3.5 35B A3Bモデル'),
62+
('openrouter', 'qwen/qwen3.5-27b', false, false, false, false, false, true, true, true, false, false, true, 'Qwen 3.5 27B model with tool use, vision, and structured output support', 'ツール利用・画像理解・構造化出力に対応したQwen 3.5 27Bモデル'),
63+
('openrouter', 'qwen/qwen3.5-flash-02-23', false, false, false, false, false, true, true, true, false, false, true, 'Fast Qwen 3.5 Flash (02-23) model with tool use, vision, and structured output support', 'ツール利用・画像理解・構造化出力に対応した高速Qwen 3.5 Flash(02-23)モデル'),
6464
('openrouter', 'aion-labs/aion-2.0', false, false, false, false, false, false, false, false, false, false, false, 'Cheap role-play fine-tune of DeepSeek with no tools, vision, or structured output support', 'ツール・画像理解・構造化出力に対応しない、DeepSeekベースの低コストなロールプレイ特化ファインチューニングモデル'),
6565
('openrouter', 'account-setting', false, false, false, false, false, true, true, true, true, false, true, 'Advanced: Uses your OpenRouter account default model', '上級者向け:OpenRouterアカウントのデフォルトモデルを使用'),
6666
-- Custom Provider Bootstrap Entry (allows "custom" to appear in provider dropdown)

src/locales/en-US.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,8 @@ You may opt out of my Memory features by using the {personalPrivacy} command, as
13861386
openrouter_model_selection_description: `OpenRouter offers access to many different AI models.
13871387
- Currently available models are based on popularity and performance, with tags for distinction:
13881388
- (TOOLS) = Supports tool usage (web search, self-learning, stickers, etc.)
1389-
- (IMAGES) = Sees images
1389+
- (IMG) = Sees images
1390+
- (VID) = Sees videos
13901391
- (STRUCT) = Supports structured output (needed for persona generation and expression initialization)
13911392
- (FREE) = No cost, but may have rate limits
13921393
- If you can't find what you want, try using the \`account-setting\` provider option

src/locales/ja.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1387,7 +1387,8 @@ TomoriBotをセットアップすることで、あなたとサーバーメン
13871387
openrouter_model_selection_description: `OpenRouterは多くの異なるAIモデルへのアクセスを提供します。
13881388
- 現在利用可能なモデルは人気と性能に基づいており、区別のためのタグが付いています:
13891389
- (TOOLS) = ツール使用をサポート(ウェブ検索、自己学習、ステッカーなど)
1390-
- (IMAGES) = 画像を認識
1390+
- (IMG) = 画像を認識
1391+
- (VID) = 動画を認識
13911392
- (STRUCT) = 構造化出力をサポート(ペルソナ生成や表情の初期化に必要)
13921393
- (FREE) = 無料ですが、レート制限がある場合があります
13931394
- 希望のモデルが見つからない場合は、\`account-setting\`プロバイダーオプションを試してみてください

src/providers/openrouter/openrouterProvider.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ async function getDefaultOpenrouterModel(): Promise<string> {
119119
export interface OpenrouterProviderConfig extends ProviderConfig {
120120
// OpenRouter uses OpenAI-compatible API, simple configuration
121121
seesImages?: boolean; // Whether the model supports image inputs
122+
seesVideos?: boolean; // Whether the model supports video inputs
122123
// Sampling parameters to control output quality
123124
topP?: number; // Nucleus sampling (0.0-1.0)
124125
topK?: number; // Top-k sampling
@@ -145,7 +146,7 @@ export class OpenrouterProvider extends BaseLLMProvider implements LLMProvider {
145146
supportsStreaming: true,
146147
supportsFunctionCalling: true,
147148
supportsImages: true, // Depends on specific models
148-
supportsVideos: false, // Most models don't support video yet
149+
supportsVideos: true, // Supported on video-capable models (e.g. Gemini via OpenRouter)
149150
};
150151
}
151152

@@ -489,6 +490,7 @@ export class OpenrouterProvider extends BaseLLMProvider implements LLMProvider {
489490
temperature: adjustedTemperature,
490491
maxOutputTokens: resolvedMaxOutputTokens,
491492
seesImages: effectiveSeesImages, // Use effective value (may be overridden)
493+
seesVideos: effectiveSeesVideos, // Wire through video capability flag
492494
// repetitionPenalty is hardcoded as a general token repetition dampener
493495
repetitionPenalty: 1.1,
494496
// Conditionally include user-configured sampling params (neutral = omit entirely)

src/providers/openrouter/openrouterStreamAdapter.ts

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import type {
4545
export interface OpenrouterStreamConfig extends StreamConfig {
4646
// OpenRouter uses OpenAI-compatible config, simple structure
4747
seesImages?: boolean; // Whether the model supports image inputs
48+
seesVideos?: boolean; // Whether the model supports video inputs
4849
// Sampling parameters to control output quality
4950
topP?: number; // Nucleus sampling (0.0-1.0)
5051
topK?: number; // Top-k sampling
@@ -195,12 +196,15 @@ export class OpenrouterStreamAdapter implements StreamProvider {
195196
public async buildProbeMessages(
196197
contextItems: StructuredContextItem[],
197198
seesImages = true,
199+
seesVideos = false,
198200
): Promise<Array<Record<string, unknown>>> {
199201
return this.assembleOpenrouterContext(
200202
contextItems,
201203
[],
202204
undefined,
203205
seesImages,
206+
"Assistant",
207+
seesVideos,
204208
);
205209
}
206210

@@ -423,6 +427,7 @@ export class OpenrouterStreamAdapter implements StreamProvider {
423427
context.functionInteractionHistory,
424428
openrouterConfig.seesImages ?? true, // Default to true for backward compatibility
425429
context.tomoriState.tomori_nickname ?? "Assistant",
430+
openrouterConfig.seesVideos ?? false, // Default false — videos are strictly opt-in per model
426431
);
427432

428433
// Ensure model is provided
@@ -2338,6 +2343,7 @@ export class OpenrouterStreamAdapter implements StreamProvider {
23382343
}>,
23392344
seesImages: boolean = true,
23402345
botName: string = "Assistant",
2346+
seesVideos: boolean = false,
23412347
): Promise<Array<Record<string, unknown>>> {
23422348
const messages: Array<Record<string, unknown>> = [];
23432349
const systemInstructionParts: string[] = [];
@@ -2567,8 +2573,54 @@ export class OpenrouterStreamAdapter implements StreamProvider {
25672573
});
25682574
}
25692575
}
2576+
} else if (part.type === "video") {
2577+
// Videos follow the same role restriction as images - only user-role messages.
2578+
// For assistant turns, stage in pendingBotImageParts for a synthetic user turn.
2579+
const videoTargetParts =
2580+
role === "assistant" ? pendingBotImageParts : contentParts;
2581+
2582+
if (!seesVideos) {
2583+
log.info(
2584+
`Skipping video (model doesn't support videos): ${part.uri}`,
2585+
);
2586+
continue;
2587+
}
2588+
2589+
try {
2590+
const isHttpUrl =
2591+
part.uri.startsWith("http://") ||
2592+
part.uri.startsWith("https://");
2593+
const isDataUrl = part.uri.startsWith("data:");
2594+
if (!isHttpUrl && !isDataUrl) {
2595+
log.warn(
2596+
`Skipping unsupported video URI format for OpenRouter: ${part.uri}`,
2597+
);
2598+
continue;
2599+
}
2600+
2601+
// OpenRouter accepts direct public URLs and data URLs for video_url.
2602+
// Prefer direct URLs to avoid unnecessary fetch/encode overhead.
2603+
videoTargetParts.push({
2604+
type: "video_url",
2605+
video_url: { url: part.uri },
2606+
});
2607+
2608+
if (part.isYouTubeLink) {
2609+
log.success(`Added YouTube video to message: ${part.uri}`);
2610+
} else if (isHttpUrl) {
2611+
log.success(`Added direct video URL to message: ${part.uri}`);
2612+
} else {
2613+
log.success(`Added video data URL to message`);
2614+
}
2615+
} catch (videoErr) {
2616+
log.warn(`Error processing video: ${part.uri}`, {
2617+
error:
2618+
videoErr instanceof Error
2619+
? videoErr.message
2620+
: String(videoErr),
2621+
});
2622+
}
25702623
}
2571-
// Note: OpenRouter doesn't widely support video yet, skip video parts
25722624
}
25732625

25742626
// Add message

0 commit comments

Comments
 (0)