Mintplex-Labs · adhikjoshi · Mar 7, 2026 · Mar 11, 2026 · Mar 12, 2026 · timothycarambat
diff --git a/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx
@@ -0,0 +1,114 @@
+const MODELSLAB_VOICES = [
+  { value: "en_us_001", label: "English (US) - Voice 1" },
+  { value: "en_us_006", label: "English (US) - Voice 2" },
+  { value: "en_us_007", label: "English (US) - Voice 3" },
+  { value: "en_us_009", label: "English (US) - Voice 4" },
+  { value: "en_us_010", label: "English (US) - Voice 5" },
+  { value: "en_uk_001", label: "English (UK) - Voice 1" },
+  { value: "en_uk_003", label: "English (UK) - Voice 2" },
+  { value: "en_au_001", label: "English (AU) - Voice 1" },
+  { value: "en_au_002", label: "English (AU) - Voice 2" },
+];
+
+const MODELSLAB_LANGUAGES = [
+  { value: "english", label: "English" },
+  { value: "spanish", label: "Spanish" },
+  { value: "french", label: "French" },
+  { value: "german", label: "German" },
+  { value: "italian", label: "Italian" },
+  { value: "portuguese", label: "Portuguese" },
+  { value: "polish", label: "Polish" },
+  { value: "hindi", label: "Hindi" },
+];
+
+const MODELSLAB_SPEEDS = [
+  { value: "0.5", label: "0.5x (Slow)" },
+  { value: "0.75", label: "0.75x" },
+  { value: "1", label: "1x (Normal)" },
+  { value: "1.25", label: "1.25x" },
+  { value: "1.5", label: "1.5x (Fast)" },
+  { value: "2", label: "2x (Very Fast)" },
+];
+
+export default function ModelsLabTextToSpeechOptions({ settings }) {
+  return (
+    <div className="flex flex-col gap-y-4">
+      <div className="flex gap-x-4">
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-3">
+            API Key
+          </label>
+          <input
+            type="password"
+            name="TTSModelsLabApiKey"
+            className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+            placeholder="ModelsLab API Key"
+            defaultValue={settings?.TTSModelsLabApiKey ? "*".repeat(20) : ""}
+            required={true}
+            autoComplete="off"
+            spellCheck={false}
+          />
+          <p className="text-xs text-white/60 mt-1">
+            Get your API key at{" "}
+            <a
+              href="https://modelslab.com/dashboard/api-keys"
+              target="_blank"
+              rel="noreferrer"
+              className="underline hover:text-white"
+            >
+              modelslab.com
+            </a>
+          </p>
+        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-3">
+            Voice
+          </label>
+          <select
+            name="TTSModelsLabVoiceId"
+            defaultValue={settings?.TTSModelsLabVoiceId ?? "en_us_001"}
+            className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+          >
+            {MODELSLAB_VOICES.map((voice) => (
+              <option key={voice.value} value={voice.value}>
+                {voice.label}
+              </option>
+            ))}
+          </select>
+        </div>
+        <div className="flex flex-col w-60">
+          <label className="text-white text-sm font-semibold block mb-3">
+            Language
+          </label>
+          <select
+            name="TTSModelsLabLanguage"
+            defaultValue={settings?.TTSModelsLabLanguage ?? "english"}
+            className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+          >
+            {MODELSLAB_LANGUAGES.map((lang) => (
+              <option key={lang.value} value={lang.value}>
+                {lang.label}
+              </option>
+            ))}
+          </select>
+        </div>
+        <div className="flex flex-col w-40">
+          <label className="text-white text-sm font-semibold block mb-3">
+            Speed
+          </label>
+          <select
+            name="TTSModelsLabSpeed"
+            defaultValue={settings?.TTSModelsLabSpeed ?? "1"}
+            className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+          >
+            {MODELSLAB_SPEEDS.map((speed) => (
+              <option key={speed.value} value={speed.value}>
+                {speed.label}
+              </option>
+            ))}
+          </select>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/media/ttsproviders/modelslab.png b/frontend/src/media/ttsproviders/modelslab.png
diff --git a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx
@@ -9,12 +9,14 @@ import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
 import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png";
 import PiperTTSIcon from "@/media/ttsproviders/piper.png";
 import GenericOpenAiLogo from "@/media/ttsproviders/generic-openai.png";
+import ModelsLabLogo from "@/media/ttsproviders/modelslab.png";
 
 import BrowserNative from "@/components/TextToSpeech/BrowserNative";
 import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions";
 import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions";
 import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions";
 import OpenAiGenericTTSOptions from "@/components/TextToSpeech/OpenAiGenericOptions";
+import ModelsLabTextToSpeechOptions from "@/components/TextToSpeech/ModelsLabOptions";
 
 const PROVIDERS = [
   {
@@ -53,6 +55,14 @@ const PROVIDERS = [
     description:
       "Connect to an OpenAI compatible TTS service running locally or remotely.",
   },
+  {
+    name: "ModelsLab",
+    value: "modelslab",
+    logo: ModelsLabLogo,
+    options: (settings) => <ModelsLabTextToSpeechOptions settings={settings} />,
+    description:
+      "Use ModelsLab's text-to-speech API with a wide variety of voices and languages.",
+  },
 ];
 
 export default function TextToSpeechProvider({ settings }) {

diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
@@ -286,6 +286,12 @@ const SystemSettings = {
       TTSOpenAICompatibleVoiceModel:
         process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
       TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
+      // ModelsLab TTS
+      TTSModelsLabApiKey: !!process.env.TTS_MODELSLAB_API_KEY,
+      TTSModelsLabVoiceId:
+        process.env.TTS_MODELSLAB_VOICE_ID ?? "en_us_001",
+      TTSModelsLabLanguage: process.env.TTS_MODELSLAB_LANGUAGE ?? "english",
+      TTSModelsLabSpeed: process.env.TTS_MODELSLAB_SPEED ?? "1",
 
       // --------------------------------------------------------
       // Agent Settings & Configs

diff --git a/server/utils/TextToSpeech/index.js b/server/utils/TextToSpeech/index.js
@@ -10,6 +10,9 @@ function getTTSProvider() {
     case "generic-openai":
       const { GenericOpenAiTTS } = require("./openAiGeneric");
       return new GenericOpenAiTTS();
+    case "modelslab":
+      const { ModelsLabTTS } = require("./modelslab");
+      return new ModelsLabTTS();
     default:
       throw new Error("ENV: No TTS_PROVIDER value found in environment!");
   }

diff --git a/server/utils/TextToSpeech/modelslab/index.js b/server/utils/TextToSpeech/modelslab/index.js
@@ -0,0 +1,125 @@
+class ModelsLabTTS {
+  static VOICES = [
+    "en_us_001",
+    "en_us_006",
+    "en_us_007",
+    "en_us_009",
+    "en_us_010",
+    "en_uk_001",
+    "en_uk_003",
+    "en_au_001",
+    "en_au_002",
+  ];
+
+  static DEFAULT_VOICE = "en_us_001";
+
+  constructor() {
+    if (!process.env.TTS_MODELSLAB_API_KEY)
+      throw new Error("No ModelsLab API key was set for TTS.");
+    this.apiKey = process.env.TTS_MODELSLAB_API_KEY;
+    this.voice = process.env.TTS_MODELSLAB_VOICE_ID ?? ModelsLabTTS.DEFAULT_VOICE;
+    this.language = process.env.TTS_MODELSLAB_LANGUAGE ?? "english";
+    this.speed = parseFloat(process.env.TTS_MODELSLAB_SPEED ?? "1");
+    this.#log(`Initialized with voice: ${this.voice}`);
+  }
+
+  #log(text, ...args) {
+    console.log(`\x1b[32m[ModelsLabTTS]\x1b[0m ${text}`, ...args);
+  }
+
+  /**
+   * Fetches a URL and returns the response body as a Buffer.
+   * @param {string} url
+   * @returns {Promise<Buffer>}
+   */
+  async #fetchUrl(url) {
+    const response = await fetch(url);
+    if (!response.ok) throw new Error(`Failed to fetch audio: ${response.statusText}`);
+    const arrayBuffer = await response.arrayBuffer();
+    return Buffer.from(arrayBuffer);
+  }
+
+  /**
+   * Polls the ModelsLab fetch endpoint until the audio is ready.
+   * Uses exponential backoff for better performance.
+   * @param {string|number} requestId
+   * @param {number} maxAttempts
+   * @returns {Promise<Buffer|null>}
+   */
+  async #pollForResult(requestId, maxAttempts = 20) {
+    const fetchUrl = "https://modelslab.com/api/v6/voice/fetch";
+    let delayMs = 1000; // Start with 1 second
+
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+      await new Promise((r) => setTimeout(r, delayMs));
+
+      const response = await fetch(fetchUrl, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }),
+      });
+
+      const data = await response.json();
+
+      if (data.status === "success" && data.output?.length > 0) {
+        return await this.#fetchUrl(data.output[0]);
+      }
+
+      if (data.status === "error") {
+        this.#log("Poll error:", data.message || data.messege || "Unknown error");
+        return null;
+      }
+
+      this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`);
+
+      // Exponential backoff: 1s, 2s, 3s, 4s... up to 5s max
+      delayMs = Math.min(delayMs + 1000, 5000);
+    }
+
+    this.#log("Timed out waiting for audio generation.");
+    return null;
+  }
+
+  /**
+   * Generates a buffer from the given text input using the ModelsLab TTS API.
+   * @param {string} textInput - The text to be converted to audio.
+   * @returns {Promise<Buffer|null>} A buffer containing the audio data.
+   */
+  async ttsBuffer(textInput) {
+    try {
+      const response = await fetch(
+        "https://modelslab.com/api/v6/voice/text_to_speech",
+        {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            key: this.apiKey,
+            prompt: textInput,
+            voice_id: this.voice,
+            language: this.language,
+            speed: this.speed,
+          }),
+        }
+      );
+
+      const data = await response.json();
+
+      if (data.status === "success" && data.output?.length > 0) {
+        return await this.#fetchUrl(data.output[0]);
+      }
+
+      if (data.status === "processing" && data.id) {
+        this.#log(`Processing... polling for request ID: ${data.id}`);
+        return await this.#pollForResult(data.id);
+      }
+
+      this.#log("Unexpected response:", JSON.stringify(data));
+      return null;
+    } catch (e) {
+      console.error("[ModelsLabTTS] Error:", e);
+      return null;
+    }
+  }
+}
+
+module.exports = { ModelsLabTTS };
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
@@ -653,6 +653,24 @@ const KEY_MAPPING = {
     checks: [isValidURL],
   },
 
+  // ModelsLab TTS
+  TTSModelsLabApiKey: {
+    envKey: "TTS_MODELSLAB_API_KEY",
+    checks: [isNotEmpty],
+  },
+  TTSModelsLabVoiceId: {
+    envKey: "TTS_MODELSLAB_VOICE_ID",
+    checks: [],
+  },
+  TTSModelsLabLanguage: {
+    envKey: "TTS_MODELSLAB_LANGUAGE",
+    checks: [],
+  },
+  TTSModelsLabSpeed: {
+    envKey: "TTS_MODELSLAB_SPEED",
+    checks: [],
+  },
+
   // DeepSeek Options
   DeepSeekApiKey: {
     envKey: "DEEPSEEK_API_KEY",
@@ -898,6 +916,7 @@ function supportedTTSProvider(input = "") {
     "elevenlabs",
     "piper_local",
     "generic-openai",
+    "modelslab",
   ].includes(input);
   return validSelection ? null : `${input} is not a valid TTS provider.`;
 }