diff --git a/c/engine.cc b/c/engine.cc index 8e10e9c30..f48ac794e 100644 --- a/c/engine.cc +++ b/c/engine.cc @@ -396,6 +396,15 @@ void litert_lm_engine_settings_set_max_num_tokens( max_num_tokens); } } + +void litert_lm_engine_settings_set_max_num_images( + LiteRtLmEngineSettings* settings, int max_num_images) { + if (settings && settings->settings && max_num_images > 0) { + settings->settings->GetMutableMainExecutorSettings().SetMaxNumImages( + max_num_images); + } +} + void litert_lm_engine_settings_set_parallel_file_section_loading( LiteRtLmEngineSettings* settings, bool parallel_file_section_loading) { if (settings && settings->settings) { diff --git a/c/engine.h b/c/engine.h index aa1eec8b7..5fcf48abc 100644 --- a/c/engine.h +++ b/c/engine.h @@ -262,6 +262,18 @@ LITERT_LM_C_API_EXPORT void litert_lm_engine_settings_set_max_num_tokens( LiteRtLmEngineSettings* settings, int max_num_tokens); +// Sets the maximum number of images the model can handle in a single +// session. Required for multimodal vision models (e.g. Gemma 3 Nano, +// Gemma 4) — without this set on the C API path, vision input is +// silently ignored and the model hallucinates a response from text +// alone. Mirrors `EngineConfig.maxNumImages` in the Kotlin API. +// +// @param settings The engine settings. +// @param max_num_images The maximum number of images. Must be > 0. +LITERT_LM_C_API_EXPORT +void litert_lm_engine_settings_set_max_num_images( + LiteRtLmEngineSettings* settings, int max_num_images); + // Sets whether the engine should load different sections of the litertlm file // in parallel. Defaults to true. //