Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
const MODELSLAB_VOICES = [
{ value: "en_us_001", label: "English (US) - Voice 1" },
{ value: "en_us_006", label: "English (US) - Voice 2" },
{ value: "en_us_007", label: "English (US) - Voice 3" },
{ value: "en_us_009", label: "English (US) - Voice 4" },
{ value: "en_us_010", label: "English (US) - Voice 5" },
{ value: "en_uk_001", label: "English (UK) - Voice 1" },
{ value: "en_uk_003", label: "English (UK) - Voice 2" },
{ value: "en_au_001", label: "English (AU) - Voice 1" },
{ value: "en_au_002", label: "English (AU) - Voice 2" },
];

const MODELSLAB_LANGUAGES = [
{ value: "english", label: "English" },
{ value: "spanish", label: "Spanish" },
{ value: "french", label: "French" },
{ value: "german", label: "German" },
{ value: "italian", label: "Italian" },
{ value: "portuguese", label: "Portuguese" },
{ value: "polish", label: "Polish" },
{ value: "hindi", label: "Hindi" },
];
Comment on lines +1 to +22
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have language=French but voice be English (UK) - Voice2? I am not sure if that kind of combination is possible.

Additionally, do we have any insight into how often voices are updated or added? This list will not be actively maintained by the team so it can be out of date quickly.

If there is a way to pull from a GET /voice/models or something and render the dynamic list to the user would be best so its always current.


const MODELSLAB_SPEEDS = [
{ value: "0.5", label: "0.5x (Slow)" },
{ value: "0.75", label: "0.75x" },
{ value: "1", label: "1x (Normal)" },
{ value: "1.25", label: "1.25x" },
{ value: "1.5", label: "1.5x (Fast)" },
{ value: "2", label: "2x (Very Fast)" },
];

export default function ModelsLabTextToSpeechOptions({ settings }) {
return (
<div className="flex flex-col gap-y-4">
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
API Key
</label>
<input
type="password"
name="TTSModelsLabApiKey"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="ModelsLab API Key"
defaultValue={settings?.TTSModelsLabApiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
/>
<p className="text-xs text-white/60 mt-1">
Get your API key at{" "}
<a
href="https://modelslab.com/dashboard/api-keys"
target="_blank"
rel="noreferrer"
className="underline hover:text-white"
>
modelslab.com
</a>
</p>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Voice
</label>
<select
name="TTSModelsLabVoiceId"
defaultValue={settings?.TTSModelsLabVoiceId ?? "en_us_001"}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{MODELSLAB_VOICES.map((voice) => (
<option key={voice.value} value={voice.value}>
{voice.label}
</option>
))}
</select>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Language
</label>
<select
name="TTSModelsLabLanguage"
defaultValue={settings?.TTSModelsLabLanguage ?? "english"}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{MODELSLAB_LANGUAGES.map((lang) => (
<option key={lang.value} value={lang.value}>
{lang.label}
</option>
))}
</select>
</div>
<div className="flex flex-col w-40">
<label className="text-white text-sm font-semibold block mb-3">
Speed
</label>
<select
name="TTSModelsLabSpeed"
defaultValue={settings?.TTSModelsLabSpeed ?? "1"}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{MODELSLAB_SPEEDS.map((speed) => (
<option key={speed.value} value={speed.value}>
{speed.label}
</option>
))}
</select>
</div>
</div>
</div>
);
}
Binary file added frontend/src/media/ttsproviders/modelslab.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
import PiperTTSIcon from "@/media/ttsproviders/piper.png";
import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
import ModelsLabLogo from "@/media/ttsproviders/modelslab.png";

import BrowserNative from "@/components/TextToSpeech/BrowserNative";
import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
import ModelsLabTextToSpeechOptions from "@/components/TextToSpeech/ModelsLabOptions";

const PROVIDERS = [
{
Expand Down Expand Up @@ -53,6 +55,14 @@ const PROVIDERS = [
description:
"Connect to an OpenAI compatible TTS service running locally or remotely.",
},
{
name: "ModelsLab",
value: "modelslab",
logo: ModelsLabLogo,
options: (settings) => <ModelsLabTextToSpeechOptions settings={settings} />,
description:
"Use ModelsLab's text-to-speech API with a wide variety of voices and languages.",
},
];

export default function TextToSpeechProvider({ settings }) {
Expand Down
6 changes: 6 additions & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ const SystemSettings = {
TTSOpenAICompatibleVoiceModel:
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
// ModelsLab TTS
TTSModelsLabApiKey: !!process.env.TTS_MODELSLAB_API_KEY,
TTSModelsLabVoiceId:
process.env.TTS_MODELSLAB_VOICE_ID ?? "en_us_001",
TTSModelsLabLanguage: process.env.TTS_MODELSLAB_LANGUAGE ?? "english",
TTSModelsLabSpeed: process.env.TTS_MODELSLAB_SPEED ?? "1",

// --------------------------------------------------------
// Agent Settings & Configs
Expand Down
3 changes: 3 additions & 0 deletions server/utils/TextToSpeech/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ function getTTSProvider() {
case "generic-openai":
const { GenericOpenAiTTS } = require("./openAiGeneric");
return new GenericOpenAiTTS();
case "modelslab":
const { ModelsLabTTS } = require("./modelslab");
return new ModelsLabTTS();
default:
throw new Error("ENV: No TTS_PROVIDER value found in environment!");
}
Expand Down
125 changes: 125 additions & 0 deletions server/utils/TextToSpeech/modelslab/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
class ModelsLabTTS {
static VOICES = [
"en_us_001",
"en_us_006",
"en_us_007",
"en_us_009",
"en_us_010",
"en_uk_001",
"en_uk_003",
"en_au_001",
"en_au_002",
];

static DEFAULT_VOICE = "en_us_001";

constructor() {
if (!process.env.TTS_MODELSLAB_API_KEY)
throw new Error("No ModelsLab API key was set for TTS.");
this.apiKey = process.env.TTS_MODELSLAB_API_KEY;
this.voice = process.env.TTS_MODELSLAB_VOICE_ID ?? ModelsLabTTS.DEFAULT_VOICE;
this.language = process.env.TTS_MODELSLAB_LANGUAGE ?? "english";
this.speed = parseFloat(process.env.TTS_MODELSLAB_SPEED ?? "1");
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TTS_MODELSLAB_SPEED is a property here but is not modifiable by the user via the UI or ENV

this.#log(`Initialized with voice: ${this.voice}`);
}

#log(text, ...args) {
console.log(`\x1b[32m[ModelsLabTTS]\x1b[0m ${text}`, ...args);
}

/**
* Fetches a URL and returns the response body as a Buffer.
* @param {string} url
* @returns {Promise<Buffer>}
*/
async #fetchUrl(url) {
const response = await fetch(url);
if (!response.ok) throw new Error(`Failed to fetch audio: ${response.statusText}`);
const arrayBuffer = await response.arrayBuffer();
return Buffer.from(arrayBuffer);
}

/**
* Polls the ModelsLab fetch endpoint until the audio is ready.
* Uses exponential backoff for better performance.
* @param {string|number} requestId
* @param {number} maxAttempts
* @returns {Promise<Buffer|null>}
*/
async #pollForResult(requestId, maxAttempts = 20) {
const fetchUrl = "https://modelslab.com/api/v6/voice/fetch";
let delayMs = 1000; // Start with 1 second

for (let attempt = 0; attempt < maxAttempts; attempt++) {
await new Promise((r) => setTimeout(r, delayMs));

const response = await fetch(fetchUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The API key is sent as a body param and not an Authorization Header?

});

const data = await response.json();

if (data.status === "success" && data.output?.length > 0) {
return await this.#fetchUrl(data.output[0]);
}

if (data.status === "error") {
this.#log("Poll error:", data.message || data.messege || "Unknown error");
return null;
}

this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`);

// Exponential backoff: 1s, 2s, 3s, 4s... up to 5s max
delayMs = Math.min(delayMs + 1000, 5000);
}

this.#log("Timed out waiting for audio generation.");
return null;
}
Comment on lines +49 to +81
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no async/await for the HTTP request - you have to poll? This seems like a large error surface since a provider failure to process the job can lead to retrying until it dies to timeouts. Are there any docs around this endpoint?

3s flat is an approach, but an exp backoff might make more sense here? I am not sure what the performance is like for this provider to return audio


/**
* Generates a buffer from the given text input using the ModelsLab TTS API.
* @param {string} textInput - The text to be converted to audio.
* @returns {Promise<Buffer|null>} A buffer containing the audio data.
*/
async ttsBuffer(textInput) {
try {
const response = await fetch(
"https://modelslab.com/api/v6/voice/text_to_speech",
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
key: this.apiKey,
prompt: textInput,
voice_id: this.voice,
language: this.language,
speed: this.speed,
}),
}
);

const data = await response.json();

if (data.status === "success" && data.output?.length > 0) {
return await this.#fetchUrl(data.output[0]);
}

if (data.status === "processing" && data.id) {
this.#log(`Processing... polling for request ID: ${data.id}`);
return await this.#pollForResult(data.id);
}

this.#log("Unexpected response:", JSON.stringify(data));
return null;
} catch (e) {
console.error("[ModelsLabTTS] Error:", e);
return null;
}
}
}

module.exports = { ModelsLabTTS };
19 changes: 19 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,24 @@ const KEY_MAPPING = {
checks: [isValidURL],
},

// ModelsLab TTS
TTSModelsLabApiKey: {
envKey: "TTS_MODELSLAB_API_KEY",
checks: [isNotEmpty],
},
TTSModelsLabVoiceId: {
envKey: "TTS_MODELSLAB_VOICE_ID",
checks: [],
},
TTSModelsLabLanguage: {
envKey: "TTS_MODELSLAB_LANGUAGE",
checks: [],
},
TTSModelsLabSpeed: {
envKey: "TTS_MODELSLAB_SPEED",
checks: [],
},

// DeepSeek Options
DeepSeekApiKey: {
envKey: "DEEPSEEK_API_KEY",
Expand Down Expand Up @@ -898,6 +916,7 @@ function supportedTTSProvider(input = "") {
"elevenlabs",
"piper_local",
"generic-openai",
"modelslab",
].includes(input);
return validSelection ? null : `${input} is not a valid TTS provider.`;
}
Expand Down