diff --git a/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx
new file mode 100644
index 00000000000..166deafcfa5
--- /dev/null
+++ b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx
@@ -0,0 +1,114 @@
+const MODELSLAB_VOICES = [
+ { value: "en_us_001", label: "English (US) - Voice 1" },
+ { value: "en_us_006", label: "English (US) - Voice 2" },
+ { value: "en_us_007", label: "English (US) - Voice 3" },
+ { value: "en_us_009", label: "English (US) - Voice 4" },
+ { value: "en_us_010", label: "English (US) - Voice 5" },
+ { value: "en_uk_001", label: "English (UK) - Voice 1" },
+ { value: "en_uk_003", label: "English (UK) - Voice 2" },
+ { value: "en_au_001", label: "English (AU) - Voice 1" },
+ { value: "en_au_002", label: "English (AU) - Voice 2" },
+];
+
+const MODELSLAB_LANGUAGES = [
+ { value: "english", label: "English" },
+ { value: "spanish", label: "Spanish" },
+ { value: "french", label: "French" },
+ { value: "german", label: "German" },
+ { value: "italian", label: "Italian" },
+ { value: "portuguese", label: "Portuguese" },
+ { value: "polish", label: "Polish" },
+ { value: "hindi", label: "Hindi" },
+];
+
+const MODELSLAB_SPEEDS = [
+ { value: "0.5", label: "0.5x (Slow)" },
+ { value: "0.75", label: "0.75x" },
+ { value: "1", label: "1x (Normal)" },
+ { value: "1.25", label: "1.25x" },
+ { value: "1.5", label: "1.5x (Fast)" },
+ { value: "2", label: "2x (Very Fast)" },
+];
+
+export default function ModelsLabTextToSpeechOptions({ settings }) {
+ return (
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ );
+}
diff --git a/frontend/src/media/ttsproviders/modelslab.png b/frontend/src/media/ttsproviders/modelslab.png
new file mode 100644
index 00000000000..bededd15d30
Binary files /dev/null and b/frontend/src/media/ttsproviders/modelslab.png differ
diff --git a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
index 68f19a7bb24..92361c41c95 100644
--- a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
+++ b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
@@ -9,12 +9,14 @@ import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
import PiperTTSIcon from "@/media/ttsproviders/piper.png";
import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
+import ModelsLabLogo from "@/media/ttsproviders/modelslab.png";
import BrowserNative from "@/components/TextToSpeech/BrowserNative";
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
+import ModelsLabTextToSpeechOptions from "@/components/TextToSpeech/ModelsLabOptions";
const PROVIDERS = [
{
@@ -53,6 +55,14 @@ const PROVIDERS = [
description:
"Connect to an OpenAI compatible TTS service running locally or remotely.",
},
+ {
+ name: "ModelsLab",
+ value: "modelslab",
+ logo: ModelsLabLogo,
+ options: (settings) => ,
+ description:
+ "Use ModelsLab's text-to-speech API with a wide variety of voices and languages.",
+ },
];
export default function TextToSpeechProvider({ settings }) {
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index dac2083ad02..be2e94b2dd5 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -286,6 +286,12 @@ const SystemSettings = {
TTSOpenAICompatibleVoiceModel:
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
+ // ModelsLab TTS
+ TTSModelsLabApiKey: !!process.env.TTS_MODELSLAB_API_KEY,
+ TTSModelsLabVoiceId:
+ process.env.TTS_MODELSLAB_VOICE_ID ?? "en_us_001",
+ TTSModelsLabLanguage: process.env.TTS_MODELSLAB_LANGUAGE ?? "english",
+ TTSModelsLabSpeed: process.env.TTS_MODELSLAB_SPEED ?? "1",
// --------------------------------------------------------
// Agent Settings & Configs
diff --git a/server/utils/TextToSpeech/index.js b/server/utils/TextToSpeech/index.js
index 5ed5684de6d..813eda568e7 100644
--- a/server/utils/TextToSpeech/index.js
+++ b/server/utils/TextToSpeech/index.js
@@ -10,6 +10,9 @@ function getTTSProvider() {
case "generic-openai":
const { GenericOpenAiTTS } = require("./openAiGeneric");
return new GenericOpenAiTTS();
+ case "modelslab":
+ const { ModelsLabTTS } = require("./modelslab");
+ return new ModelsLabTTS();
default:
throw new Error("ENV: No TTS_PROVIDER value found in environment!");
}
diff --git a/server/utils/TextToSpeech/modelslab/index.js b/server/utils/TextToSpeech/modelslab/index.js
new file mode 100644
index 00000000000..1b83b64b5cc
--- /dev/null
+++ b/server/utils/TextToSpeech/modelslab/index.js
@@ -0,0 +1,125 @@
+class ModelsLabTTS {
+ static VOICES = [
+ "en_us_001",
+ "en_us_006",
+ "en_us_007",
+ "en_us_009",
+ "en_us_010",
+ "en_uk_001",
+ "en_uk_003",
+ "en_au_001",
+ "en_au_002",
+ ];
+
+ static DEFAULT_VOICE = "en_us_001";
+
+ constructor() {
+ if (!process.env.TTS_MODELSLAB_API_KEY)
+ throw new Error("No ModelsLab API key was set for TTS.");
+ this.apiKey = process.env.TTS_MODELSLAB_API_KEY;
+ this.voice = process.env.TTS_MODELSLAB_VOICE_ID ?? ModelsLabTTS.DEFAULT_VOICE;
+ this.language = process.env.TTS_MODELSLAB_LANGUAGE ?? "english";
+ this.speed = parseFloat(process.env.TTS_MODELSLAB_SPEED ?? "1");
+ this.#log(`Initialized with voice: ${this.voice}`);
+ }
+
+ #log(text, ...args) {
+ console.log(`\x1b[32m[ModelsLabTTS]\x1b[0m ${text}`, ...args);
+ }
+
+ /**
+ * Fetches a URL and returns the response body as a Buffer.
+ * @param {string} url
+ * @returns {Promise}
+ */
+ async #fetchUrl(url) {
+ const response = await fetch(url);
+ if (!response.ok) throw new Error(`Failed to fetch audio: ${response.statusText}`);
+ const arrayBuffer = await response.arrayBuffer();
+ return Buffer.from(arrayBuffer);
+ }
+
+ /**
+ * Polls the ModelsLab fetch endpoint until the audio is ready.
+ * Uses exponential backoff for better performance.
+ * @param {string|number} requestId
+ * @param {number} maxAttempts
+ * @returns {Promise}
+ */
+ async #pollForResult(requestId, maxAttempts = 20) {
+ const fetchUrl = "https://modelslab.com/api/v6/voice/fetch";
+ let delayMs = 1000; // Start with 1 second
+
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
+ await new Promise((r) => setTimeout(r, delayMs));
+
+ const response = await fetch(fetchUrl, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }),
+ });
+
+ const data = await response.json();
+
+ if (data.status === "success" && data.output?.length > 0) {
+ return await this.#fetchUrl(data.output[0]);
+ }
+
+ if (data.status === "error") {
+ this.#log("Poll error:", data.message || data.messege || "Unknown error");
+ return null;
+ }
+
+ this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`);
+
+ // Exponential backoff: 1s, 2s, 3s, 4s... up to 5s max
+ delayMs = Math.min(delayMs + 1000, 5000);
+ }
+
+ this.#log("Timed out waiting for audio generation.");
+ return null;
+ }
+
+ /**
+ * Generates a buffer from the given text input using the ModelsLab TTS API.
+ * @param {string} textInput - The text to be converted to audio.
+ * @returns {Promise} A buffer containing the audio data.
+ */
+ async ttsBuffer(textInput) {
+ try {
+ const response = await fetch(
+ "https://modelslab.com/api/v6/voice/text_to_speech",
+ {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ key: this.apiKey,
+ prompt: textInput,
+ voice_id: this.voice,
+ language: this.language,
+ speed: this.speed,
+ }),
+ }
+ );
+
+ const data = await response.json();
+
+ if (data.status === "success" && data.output?.length > 0) {
+ return await this.#fetchUrl(data.output[0]);
+ }
+
+ if (data.status === "processing" && data.id) {
+ this.#log(`Processing... polling for request ID: ${data.id}`);
+ return await this.#pollForResult(data.id);
+ }
+
+ this.#log("Unexpected response:", JSON.stringify(data));
+ return null;
+ } catch (e) {
+ console.error("[ModelsLabTTS] Error:", e);
+ return null;
+ }
+ }
+}
+
+module.exports = { ModelsLabTTS };
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index cc08afbb03e..fa940786a91 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -653,6 +653,24 @@ const KEY_MAPPING = {
checks: [isValidURL],
},
+ // ModelsLab TTS
+ TTSModelsLabApiKey: {
+ envKey: "TTS_MODELSLAB_API_KEY",
+ checks: [isNotEmpty],
+ },
+ TTSModelsLabVoiceId: {
+ envKey: "TTS_MODELSLAB_VOICE_ID",
+ checks: [],
+ },
+ TTSModelsLabLanguage: {
+ envKey: "TTS_MODELSLAB_LANGUAGE",
+ checks: [],
+ },
+ TTSModelsLabSpeed: {
+ envKey: "TTS_MODELSLAB_SPEED",
+ checks: [],
+ },
+
// DeepSeek Options
DeepSeekApiKey: {
envKey: "DEEPSEEK_API_KEY",
@@ -898,6 +916,7 @@ function supportedTTSProvider(input = "") {
"elevenlabs",
"piper_local",
"generic-openai",
+ "modelslab",
].includes(input);
return validSelection ? null : `${input} is not a valid TTS provider.`;
}