diff --git a/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx new file mode 100644 index 00000000000..166deafcfa5 --- /dev/null +++ b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx @@ -0,0 +1,114 @@ +const MODELSLAB_VOICES = [ + { value: "en_us_001", label: "English (US) - Voice 1" }, + { value: "en_us_006", label: "English (US) - Voice 2" }, + { value: "en_us_007", label: "English (US) - Voice 3" }, + { value: "en_us_009", label: "English (US) - Voice 4" }, + { value: "en_us_010", label: "English (US) - Voice 5" }, + { value: "en_uk_001", label: "English (UK) - Voice 1" }, + { value: "en_uk_003", label: "English (UK) - Voice 2" }, + { value: "en_au_001", label: "English (AU) - Voice 1" }, + { value: "en_au_002", label: "English (AU) - Voice 2" }, +]; + +const MODELSLAB_LANGUAGES = [ + { value: "english", label: "English" }, + { value: "spanish", label: "Spanish" }, + { value: "french", label: "French" }, + { value: "german", label: "German" }, + { value: "italian", label: "Italian" }, + { value: "portuguese", label: "Portuguese" }, + { value: "polish", label: "Polish" }, + { value: "hindi", label: "Hindi" }, +]; + +const MODELSLAB_SPEEDS = [ + { value: "0.5", label: "0.5x (Slow)" }, + { value: "0.75", label: "0.75x" }, + { value: "1", label: "1x (Normal)" }, + { value: "1.25", label: "1.25x" }, + { value: "1.5", label: "1.5x (Fast)" }, + { value: "2", label: "2x (Very Fast)" }, +]; + +export default function ModelsLabTextToSpeechOptions({ settings }) { + return ( +
+
+
+ + +

+ Get your API key at{" "} + + modelslab.com + +

+
+
+ + +
+
+ + +
+
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/ttsproviders/modelslab.png b/frontend/src/media/ttsproviders/modelslab.png new file mode 100644 index 00000000000..bededd15d30 Binary files /dev/null and b/frontend/src/media/ttsproviders/modelslab.png differ diff --git a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx index 68f19a7bb24..92361c41c95 100644 --- a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx +++ b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx @@ -9,12 +9,14 @@ import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png"; import PiperTTSIcon from "@/media/ttsproviders/piper.png"; import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png"; +import ModelsLabLogo from "@/media/ttsproviders/modelslab.png"; import BrowserNative from "@/components/TextToSpeech/BrowserNative"; import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions"; import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions"; import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions"; import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions"; +import ModelsLabTextToSpeechOptions from "@/components/TextToSpeech/ModelsLabOptions"; const PROVIDERS = [ { @@ -53,6 +55,14 @@ const PROVIDERS = [ description: "Connect to an OpenAI compatible TTS service running locally or remotely.", }, + { + name: "ModelsLab", + value: "modelslab", + logo: ModelsLabLogo, + options: (settings) => , + description: + "Use ModelsLab's text-to-speech API with a wide variety of voices and languages.", + }, ]; export default function TextToSpeechProvider({ settings }) { diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index dac2083ad02..be2e94b2dd5 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -286,6 +286,12 @@ const SystemSettings = { TTSOpenAICompatibleVoiceModel: process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL, TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT, + // ModelsLab TTS + TTSModelsLabApiKey: !!process.env.TTS_MODELSLAB_API_KEY, + TTSModelsLabVoiceId: + process.env.TTS_MODELSLAB_VOICE_ID ?? "en_us_001", + TTSModelsLabLanguage: process.env.TTS_MODELSLAB_LANGUAGE ?? "english", + TTSModelsLabSpeed: process.env.TTS_MODELSLAB_SPEED ?? "1", // -------------------------------------------------------- // Agent Settings & Configs diff --git a/server/utils/TextToSpeech/index.js b/server/utils/TextToSpeech/index.js index 5ed5684de6d..813eda568e7 100644 --- a/server/utils/TextToSpeech/index.js +++ b/server/utils/TextToSpeech/index.js @@ -10,6 +10,9 @@ function getTTSProvider() { case "generic-openai": const { GenericOpenAiTTS } = require("./openAiGeneric"); return new GenericOpenAiTTS(); + case "modelslab": + const { ModelsLabTTS } = require("./modelslab"); + return new ModelsLabTTS(); default: throw new Error("ENV: No TTS_PROVIDER value found in environment!"); } diff --git a/server/utils/TextToSpeech/modelslab/index.js b/server/utils/TextToSpeech/modelslab/index.js new file mode 100644 index 00000000000..1b83b64b5cc --- /dev/null +++ b/server/utils/TextToSpeech/modelslab/index.js @@ -0,0 +1,125 @@ +class ModelsLabTTS { + static VOICES = [ + "en_us_001", + "en_us_006", + "en_us_007", + "en_us_009", + "en_us_010", + "en_uk_001", + "en_uk_003", + "en_au_001", + "en_au_002", + ]; + + static DEFAULT_VOICE = "en_us_001"; + + constructor() { + if (!process.env.TTS_MODELSLAB_API_KEY) + throw new Error("No ModelsLab API key was set for TTS."); + this.apiKey = process.env.TTS_MODELSLAB_API_KEY; + this.voice = process.env.TTS_MODELSLAB_VOICE_ID ?? ModelsLabTTS.DEFAULT_VOICE; + this.language = process.env.TTS_MODELSLAB_LANGUAGE ?? "english"; + this.speed = parseFloat(process.env.TTS_MODELSLAB_SPEED ?? "1"); + this.#log(`Initialized with voice: ${this.voice}`); + } + + #log(text, ...args) { + console.log(`\x1b[32m[ModelsLabTTS]\x1b[0m ${text}`, ...args); + } + + /** + * Fetches a URL and returns the response body as a Buffer. + * @param {string} url + * @returns {Promise} + */ + async #fetchUrl(url) { + const response = await fetch(url); + if (!response.ok) throw new Error(`Failed to fetch audio: ${response.statusText}`); + const arrayBuffer = await response.arrayBuffer(); + return Buffer.from(arrayBuffer); + } + + /** + * Polls the ModelsLab fetch endpoint until the audio is ready. + * Uses exponential backoff for better performance. + * @param {string|number} requestId + * @param {number} maxAttempts + * @returns {Promise} + */ + async #pollForResult(requestId, maxAttempts = 20) { + const fetchUrl = "https://modelslab.com/api/v6/voice/fetch"; + let delayMs = 1000; // Start with 1 second + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + await new Promise((r) => setTimeout(r, delayMs)); + + const response = await fetch(fetchUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }), + }); + + const data = await response.json(); + + if (data.status === "success" && data.output?.length > 0) { + return await this.#fetchUrl(data.output[0]); + } + + if (data.status === "error") { + this.#log("Poll error:", data.message || data.messege || "Unknown error"); + return null; + } + + this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`); + + // Exponential backoff: 1s, 2s, 3s, 4s... up to 5s max + delayMs = Math.min(delayMs + 1000, 5000); + } + + this.#log("Timed out waiting for audio generation."); + return null; + } + + /** + * Generates a buffer from the given text input using the ModelsLab TTS API. + * @param {string} textInput - The text to be converted to audio. + * @returns {Promise} A buffer containing the audio data. + */ + async ttsBuffer(textInput) { + try { + const response = await fetch( + "https://modelslab.com/api/v6/voice/text_to_speech", + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + key: this.apiKey, + prompt: textInput, + voice_id: this.voice, + language: this.language, + speed: this.speed, + }), + } + ); + + const data = await response.json(); + + if (data.status === "success" && data.output?.length > 0) { + return await this.#fetchUrl(data.output[0]); + } + + if (data.status === "processing" && data.id) { + this.#log(`Processing... polling for request ID: ${data.id}`); + return await this.#pollForResult(data.id); + } + + this.#log("Unexpected response:", JSON.stringify(data)); + return null; + } catch (e) { + console.error("[ModelsLabTTS] Error:", e); + return null; + } + } +} + +module.exports = { ModelsLabTTS }; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index cc08afbb03e..fa940786a91 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -653,6 +653,24 @@ const KEY_MAPPING = { checks: [isValidURL], }, + // ModelsLab TTS + TTSModelsLabApiKey: { + envKey: "TTS_MODELSLAB_API_KEY", + checks: [isNotEmpty], + }, + TTSModelsLabVoiceId: { + envKey: "TTS_MODELSLAB_VOICE_ID", + checks: [], + }, + TTSModelsLabLanguage: { + envKey: "TTS_MODELSLAB_LANGUAGE", + checks: [], + }, + TTSModelsLabSpeed: { + envKey: "TTS_MODELSLAB_SPEED", + checks: [], + }, + // DeepSeek Options DeepSeekApiKey: { envKey: "DEEPSEEK_API_KEY", @@ -898,6 +916,7 @@ function supportedTTSProvider(input = "") { "elevenlabs", "piper_local", "generic-openai", + "modelslab", ].includes(input); return validSelection ? null : `${input} is not a valid TTS provider.`; }