From 0753979551fbc064555a9dc0e6a7490270588f72 Mon Sep 17 00:00:00 2001 From: adhikjoshi Date: Sun, 8 Mar 2026 00:51:35 +0530 Subject: [PATCH 1/3] feat: Add ModelsLab text-to-speech provider Adds ModelsLab (https://modelslab.com) as a TTS provider option in AnythingLLM. ModelsLab offers affordable AI APIs including text-to-speech at $0.0047 per generation with support for multiple English voice variants and languages. Changes: - server/utils/TextToSpeech/modelslab/index.js: New provider class with async polling support for ModelsLab's TTS API - server/utils/TextToSpeech/index.js: Register 'modelslab' provider case - server/utils/helpers/updateENV.js: Add env key mappings + validator - server/models/systemSettings.js: Expose ModelsLab settings to frontend - frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx: Settings UI - frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx: Add to provider list - frontend/src/media/ttsproviders/modelslab.png: Provider logo Env vars: - TTS_MODELSLAB_API_KEY (required) - TTS_MODELSLAB_VOICE_ID (optional, default: en_us_001) - TTS_MODELSLAB_LANGUAGE (optional, default: english) Closes #(issue) --- .../TextToSpeech/ModelsLabOptions/index.jsx | 89 +++++++++++++ frontend/src/media/ttsproviders/modelslab.png | Bin 0 -> 2991 bytes .../GeneralSettings/AudioPreference/tts.jsx | 10 ++ server/models/systemSettings.js | 5 + server/utils/TextToSpeech/index.js | 3 + server/utils/TextToSpeech/modelslab/index.js | 123 ++++++++++++++++++ server/utils/helpers/updateENV.js | 15 +++ 7 files changed, 245 insertions(+) create mode 100644 frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx create mode 100644 frontend/src/media/ttsproviders/modelslab.png create mode 100644 server/utils/TextToSpeech/modelslab/index.js diff --git a/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx new file mode 100644 index 00000000000..ea12029bed9 --- /dev/null +++ b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx @@ -0,0 +1,89 @@ +const MODELSLAB_VOICES = [ + { value: "en_us_001", label: "English (US) - Voice 1" }, + { value: "en_us_006", label: "English (US) - Voice 2" }, + { value: "en_us_007", label: "English (US) - Voice 3" }, + { value: "en_us_009", label: "English (US) - Voice 4" }, + { value: "en_us_010", label: "English (US) - Voice 5" }, + { value: "en_uk_001", label: "English (UK) - Voice 1" }, + { value: "en_uk_003", label: "English (UK) - Voice 2" }, + { value: "en_au_001", label: "English (AU) - Voice 1" }, + { value: "en_au_002", label: "English (AU) - Voice 2" }, +]; + +const MODELSLAB_LANGUAGES = [ + { value: "english", label: "English" }, + { value: "spanish", label: "Spanish" }, + { value: "french", label: "French" }, + { value: "german", label: "German" }, + { value: "italian", label: "Italian" }, + { value: "portuguese", label: "Portuguese" }, + { value: "polish", label: "Polish" }, + { value: "hindi", label: "Hindi" }, +]; + +export default function ModelsLabTextToSpeechOptions({ settings }) { + return ( +
+
+
+ + +

+ Get your API key at{" "} + + modelslab.com + +

+
+
+ + +
+
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/ttsproviders/modelslab.png b/frontend/src/media/ttsproviders/modelslab.png new file mode 100644 index 0000000000000000000000000000000000000000..bededd15d3088407beb7c55cc2a55e11f77dc4bc GIT binary patch literal 2991 zcmV;g3sCflP)*WHz{54(rW!SMQrFysoM0>8@^O5SdSEbx(Cyb@kV;-h1_` zx<&*+P!PWW$E9#9hg*ej=gyt0m}zV)&&rVn;cr!DOdVYJ6ewTj zX--T`G$k($nS?C}LaC!oFsBAU3r-%C~a_Xke?IA z!>UT6j2ACnSc#wtqa4&tUMF}y9E53+GU;BZi%~Yp5~w@C?k%W1)E#@H-VtI$!s>E+ z74C^d0V#uc4#@32$@0gIMxz$=paDK)i@YxRc|YW-96<2GcHAeo>a%Cht_M9hAqo69 zcshIbY@gx{iZEnzxF>P(06*aA%$YOq%6*A+T|Wqv=bwlkVj*?pDY-k47WgG64_f+r z(L*dGd8g%WUv4kC-;%_Q=;6pcQd~jmB5p!b%+Jq@-Q8XN`{vD?`gnbPU2TN9xj7-l zT%-lc$9;h5>1lnOv~JSv5-Te!B2U2e34KZh5I7|Lp3yYQb9{Upr0ea`qeuGO)vH${ z=Wg7%5%>UQQW=u`#QCUh(yR+PO8?tFS`&47z4(YfHS%2>bK_^c{8VGf5s1mPG1cOwhEU z&}V07h15;@9P-2XAT7?w+`W5OZ2*i3%4r9mJT{=&s=6TrBO?k!GA)zAAmzNawiZbj zhQW}PZaI%U01O1mp>+9hR)ZnCMlEGXi{4Tm`}w@jSQ}zxSOxcY-tkPedN9trA5F z(WC{M9-07vCP$W*(m2Q$L>~D-2ui5m(38l=n>>8A<>lp;GPwUJ$x`JIZr{EwY~XpM zlnY6i{!Ps|4bW_nuQ)U`q;ws{xqJXJ01!aB@5G4{E!Pn_AQpMJPBV4o)YOz!CvnFd z`Jo%7q_v!L<-zs1KOA>#T;C=Eoa7LN^4{KF=584(bL-Zvi1gSbuUp^*K?)(&kXj$e zmJ9DhOioS)@bg4gkmNmh@Ibxgv~SZFAOfnOR;$Hc2MMM+E-fu(@zo>f08>v`jvpNfz`~CxH9%jik#4 zNZ;b(qB;g~z68>E2&_MRm4a*JC@D~BC~Yzposv9Q%{2ES>kbk@9Hf?8-lRBn>Qtbc zEe$1#3^;l6q(1Hxc?5)=KnM?uxc~ymit`>v8Z?_t)g#L-7|NS!H^_q_fW>#6=j3w# zk^6v2p`$9~BxeODtQ~pn6nx$wa#vSZ)o)(Ec9z-&M5D^gRmsI@K>xWEH>@)BBK$$T zaV{yy&lNb%*_sr8aFCHi*P8d`FYm&I3nC>{lhuI%ks*>y{m1+F???Keyq`*aPV^&% zrB6y_xX$x((Ff+i1HMr|)vR0GeR%{tP2W}^l1!49rF#Z&$#Vg3i`)gCMivjp)MZ0C z=CzcL{wbF-8mn-Hu#gTtXXY`>Ndi8GK|_xi#62d7{Z@1UXf|k~cmP}F(Ojwj8wUzP zwDa-IvUxBgE#%N6#xVBB#>UjREt)Nj+kPi{hy}C}5T2O% z*M3Xd{D*yF_V*Q$7I(j{i^a7=>$$~C{bJ@fWv{%|cMD=-q^R=Go=aaZ6X>2%7maG< zHSqoU*Nv96x$}O43<7NB0UN(-#FDqU-E`(9tbX=NObi#rM}MfOmyrdKJQ$54TmbO~ zOtsi}?0)zMkDDS7Z0)UwQmG0bkip0$lAINggg8L*e3}^y#LNe7U3u$^pM(!+-1d!dRgw_n zBYn$%?H70cJ}4e74~eQ}aB%&e7kR26s5>5#>KcjdBu`!QxcxOQz{+0z<2xY+><(&|#0 z9H4X_#E-u6=@v}MLo>t1k)_!Z#&dW~iateL73y+y8hG=jR6**-N1_j!hr42v-m1qe zlTg@{!O-88bptk1rp8*Et*3I=a2X|6bB+TIn04Jrwh;p2%w`KDvC1yk@=lMJd?ZiL zWmGlnrp|Lcl#M*!ri-4$(pqbG?7{~nkp=jcGGG z{`ohp_ds)<|5KmHg0jz`Z=SIESs>0PWog}&+h4i+uFHSvw@P;cs7feTg+3+dmY-*a zn@9)Q6nPe%$FVj);2jvHNV0NOC@|0sR|QSVbW(Y9HT|-@LQ@O>J@kGN;K`gN)fLzt zfLp&j^qvhU6t?|^6+lFDzog%51x!N()m+=w(11HE?bQlC`?fI!6bd_bRG(46GJ zcS3g-*rbR*XjXRx15t~AmSL&C4O$6Y{q&Vci*Qczk)MZoX5I4hY*NGtvytaBo}ofg zAKF7wX`{)|d1$t3mJ}ss{cz6*HOnm_jd&nzHl5@fAaGe%55u7ZP0f;m&g)hv-bTcg z>p#Zk@$F@}`mkvYP-ISWNF2=^>KJ@o1)F=56gC4?OWEK8_JJe44qBL|W60uiypGYd zq~Np3C@>&8jVuKC>bNbumD*BMuU$CcU0%u{I!qHS6U|YfH;%9w7wadt1X@f#7!V_ zfo8*}&zS>U7Sq<3ogOP$^C!Xjmp>`>fSCcdJaAuZ_lyI2V(4WkKTqWJ5eor+n#)&1 zKNH<0S3tn#N>m41(`Y=SU6Wh*-(l=|)F&-qZK*?{W(;)N+Pb8`1~rm4oGn0L1McS< z2u}z-dd5G(eeG!1r!{eMAiwbp051f=0#E4aDSDz$xN>1fVDr=B2BfFP?I9%}K;rKI z^O$c49y(7S>p96h@pZ#CkumOJZ0FL9XQ&wnk~CQJj7?SPJepRkzr&Etc5r`xzxOFY z8_;(clarH&MW5Bhe(i{S0VJ91mkGU<8xz1>d=N}R`!*$h5Ir~ne&nzPx;NlUSzEP$ zvfS6PTCM28iCV4p|GmAvI=+?+S1!^u@8<`)>a?Juobhl6HbHceC2EtMmdpmS}7dA`D)x| z`7zKlB+P4B8T=aDqfxC^>$|(Vv_f}X)=w=oP*5yNU~PEW;Vp$^a_5THT-R%`glnc- zD<(pc*?R2y0{VslEDyFO z+Zw_#Y*7eFD_Q;}!2Fj2{;vSGYs`NdsF-Eg{sEv(nHBag1-5Hj{)vE>&+CKhjDJoj lyZ)&_9Q{i} , + description: + "Use ModelsLab's text-to-speech API with a wide variety of voices and languages.", + }, ]; export default function TextToSpeechProvider({ settings }) { diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index dac2083ad02..9690338ec80 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -286,6 +286,11 @@ const SystemSettings = { TTSOpenAICompatibleVoiceModel: process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL, TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT, + // ModelsLab TTS + TTSModelsLabApiKey: !!process.env.TTS_MODELSLAB_API_KEY, + TTSModelsLabVoiceId: + process.env.TTS_MODELSLAB_VOICE_ID ?? "en_us_001", + TTSModelsLabLanguage: process.env.TTS_MODELSLAB_LANGUAGE ?? "english", // -------------------------------------------------------- // Agent Settings & Configs diff --git a/server/utils/TextToSpeech/index.js b/server/utils/TextToSpeech/index.js index 5ed5684de6d..813eda568e7 100644 --- a/server/utils/TextToSpeech/index.js +++ b/server/utils/TextToSpeech/index.js @@ -10,6 +10,9 @@ function getTTSProvider() { case "generic-openai": const { GenericOpenAiTTS } = require("./openAiGeneric"); return new GenericOpenAiTTS(); + case "modelslab": + const { ModelsLabTTS } = require("./modelslab"); + return new ModelsLabTTS(); default: throw new Error("ENV: No TTS_PROVIDER value found in environment!"); } diff --git a/server/utils/TextToSpeech/modelslab/index.js b/server/utils/TextToSpeech/modelslab/index.js new file mode 100644 index 00000000000..e85b876945a --- /dev/null +++ b/server/utils/TextToSpeech/modelslab/index.js @@ -0,0 +1,123 @@ +const https = require("https"); +const http = require("http"); +const { URL } = require("url"); + +class ModelsLabTTS { + static VOICES = [ + "en_us_001", + "en_us_006", + "en_us_007", + "en_us_009", + "en_us_010", + "en_uk_001", + "en_uk_003", + "en_au_001", + "en_au_002", + ]; + + static DEFAULT_VOICE = "en_us_001"; + + constructor() { + if (!process.env.TTS_MODELSLAB_API_KEY) + throw new Error("No ModelsLab API key was set for TTS."); + this.apiKey = process.env.TTS_MODELSLAB_API_KEY; + this.voice = process.env.TTS_MODELSLAB_VOICE_ID ?? ModelsLabTTS.DEFAULT_VOICE; + this.language = process.env.TTS_MODELSLAB_LANGUAGE ?? "english"; + this.speed = parseFloat(process.env.TTS_MODELSLAB_SPEED ?? "1"); + this.#log(`Initialized with voice: ${this.voice}`); + } + + #log(text, ...args) { + console.log(`\x1b[32m[ModelsLabTTS]\x1b[0m ${text}`, ...args); + } + + /** + * Fetches a URL and returns the response body as a Buffer. + * @param {string} url + * @returns {Promise} + */ + #fetchUrl(url) { + return new Promise((resolve, reject) => { + const parsedUrl = new URL(url); + const transport = parsedUrl.protocol === "https:" ? https : http; + transport.get(url, (res) => { + const chunks = []; + res.on("data", (chunk) => chunks.push(chunk)); + res.on("end", () => resolve(Buffer.concat(chunks))); + res.on("error", reject); + }).on("error", reject); + }); + } + + /** + * Polls the ModelsLab fetch endpoint until the audio is ready. + * @param {string|number} requestId + * @param {number} maxAttempts + * @returns {Promise} + */ + async #pollForResult(requestId, maxAttempts = 20) { + const fetchUrl = "https://modelslab.com/api/v6/voice/fetch"; + for (let attempt = 0; attempt < maxAttempts; attempt++) { + await new Promise((r) => setTimeout(r, 3000)); + const response = await fetch(fetchUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }), + }); + const data = await response.json(); + if (data.status === "success" && data.output?.length > 0) { + return await this.#fetchUrl(data.output[0]); + } + if (data.status === "error") { + this.#log("Poll error:", data.message || data.messege || "Unknown error"); + return null; + } + this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`); + } + this.#log("Timed out waiting for audio generation."); + return null; + } + + /** + * Generates a buffer from the given text input using the ModelsLab TTS API. + * @param {string} textInput - The text to be converted to audio. + * @returns {Promise} A buffer containing the audio data. + */ + async ttsBuffer(textInput) { + try { + const response = await fetch( + "https://modelslab.com/api/v6/voice/text_to_speech", + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + key: this.apiKey, + prompt: textInput, + voice_id: this.voice, + language: this.language, + speed: this.speed, + }), + } + ); + + const data = await response.json(); + + if (data.status === "success" && data.output?.length > 0) { + return await this.#fetchUrl(data.output[0]); + } + + if (data.status === "processing" && data.id) { + this.#log(`Processing... polling for request ID: ${data.id}`); + return await this.#pollForResult(data.id); + } + + this.#log("Unexpected response:", JSON.stringify(data)); + return null; + } catch (e) { + console.error("[ModelsLabTTS] Error:", e); + return null; + } + } +} + +module.exports = { ModelsLabTTS }; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index cc08afbb03e..379680e687c 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -653,6 +653,20 @@ const KEY_MAPPING = { checks: [isValidURL], }, + // ModelsLab TTS + TTSModelsLabApiKey: { + envKey: "TTS_MODELSLAB_API_KEY", + checks: [isNotEmpty], + }, + TTSModelsLabVoiceId: { + envKey: "TTS_MODELSLAB_VOICE_ID", + checks: [], + }, + TTSModelsLabLanguage: { + envKey: "TTS_MODELSLAB_LANGUAGE", + checks: [], + }, + // DeepSeek Options DeepSeekApiKey: { envKey: "DEEPSEEK_API_KEY", @@ -898,6 +912,7 @@ function supportedTTSProvider(input = "") { "elevenlabs", "piper_local", "generic-openai", + "modelslab", ].includes(input); return validSelection ? null : `${input} is not a valid TTS provider.`; } From 4d33883bd2901aa163928079db41cb72f9d11eb4 Mon Sep 17 00:00:00 2001 From: Adhik Joshi Date: Wed, 11 Mar 2026 23:34:36 +0530 Subject: [PATCH 2/3] fix: Address review comments - exponential backoff, simplified fetch, removed unused imports --- server/utils/TextToSpeech/modelslab/index.js | 34 +++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/server/utils/TextToSpeech/modelslab/index.js b/server/utils/TextToSpeech/modelslab/index.js index e85b876945a..1b83b64b5cc 100644 --- a/server/utils/TextToSpeech/modelslab/index.js +++ b/server/utils/TextToSpeech/modelslab/index.js @@ -1,7 +1,3 @@ -const https = require("https"); -const http = require("http"); -const { URL } = require("url"); - class ModelsLabTTS { static VOICES = [ "en_us_001", @@ -36,44 +32,50 @@ class ModelsLabTTS { * @param {string} url * @returns {Promise} */ - #fetchUrl(url) { - return new Promise((resolve, reject) => { - const parsedUrl = new URL(url); - const transport = parsedUrl.protocol === "https:" ? https : http; - transport.get(url, (res) => { - const chunks = []; - res.on("data", (chunk) => chunks.push(chunk)); - res.on("end", () => resolve(Buffer.concat(chunks))); - res.on("error", reject); - }).on("error", reject); - }); + async #fetchUrl(url) { + const response = await fetch(url); + if (!response.ok) throw new Error(`Failed to fetch audio: ${response.statusText}`); + const arrayBuffer = await response.arrayBuffer(); + return Buffer.from(arrayBuffer); } /** * Polls the ModelsLab fetch endpoint until the audio is ready. + * Uses exponential backoff for better performance. * @param {string|number} requestId * @param {number} maxAttempts * @returns {Promise} */ async #pollForResult(requestId, maxAttempts = 20) { const fetchUrl = "https://modelslab.com/api/v6/voice/fetch"; + let delayMs = 1000; // Start with 1 second + for (let attempt = 0; attempt < maxAttempts; attempt++) { - await new Promise((r) => setTimeout(r, 3000)); + await new Promise((r) => setTimeout(r, delayMs)); + const response = await fetch(fetchUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }), }); + const data = await response.json(); + if (data.status === "success" && data.output?.length > 0) { return await this.#fetchUrl(data.output[0]); } + if (data.status === "error") { this.#log("Poll error:", data.message || data.messege || "Unknown error"); return null; } + this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`); + + // Exponential backoff: 1s, 2s, 3s, 4s... up to 5s max + delayMs = Math.min(delayMs + 1000, 5000); } + this.#log("Timed out waiting for audio generation."); return null; } From b22fb096865155a1d5dc058aa884f290e7d6092c Mon Sep 17 00:00:00 2001 From: Adhik Joshi Date: Thu, 12 Mar 2026 12:06:10 +0530 Subject: [PATCH 3/3] Add speed option to ModelsLab TTS frontend - Add speed dropdown (0.5x-2x) to ModelsLabOptions component - Map TTSModelsLabSpeed in systemSettings.js - Add TTSModelsLabSpeed to updateENV.js for env var persistence Addresses review comments from @timothycarambat --- .../TextToSpeech/ModelsLabOptions/index.jsx | 25 +++++++++++++++++++ server/models/systemSettings.js | 1 + server/utils/helpers/updateENV.js | 4 +++ 3 files changed, 30 insertions(+) diff --git a/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx index ea12029bed9..166deafcfa5 100644 --- a/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx +++ b/frontend/src/components/TextToSpeech/ModelsLabOptions/index.jsx @@ -21,6 +21,15 @@ const MODELSLAB_LANGUAGES = [ { value: "hindi", label: "Hindi" }, ]; +const MODELSLAB_SPEEDS = [ + { value: "0.5", label: "0.5x (Slow)" }, + { value: "0.75", label: "0.75x" }, + { value: "1", label: "1x (Normal)" }, + { value: "1.25", label: "1.25x" }, + { value: "1.5", label: "1.5x (Fast)" }, + { value: "2", label: "2x (Very Fast)" }, +]; + export default function ModelsLabTextToSpeechOptions({ settings }) { return (
@@ -83,6 +92,22 @@ export default function ModelsLabTextToSpeechOptions({ settings }) { ))}
+
+ + +
); diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 9690338ec80..be2e94b2dd5 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -291,6 +291,7 @@ const SystemSettings = { TTSModelsLabVoiceId: process.env.TTS_MODELSLAB_VOICE_ID ?? "en_us_001", TTSModelsLabLanguage: process.env.TTS_MODELSLAB_LANGUAGE ?? "english", + TTSModelsLabSpeed: process.env.TTS_MODELSLAB_SPEED ?? "1", // -------------------------------------------------------- // Agent Settings & Configs diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 379680e687c..fa940786a91 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -666,6 +666,10 @@ const KEY_MAPPING = { envKey: "TTS_MODELSLAB_LANGUAGE", checks: [], }, + TTSModelsLabSpeed: { + envKey: "TTS_MODELSLAB_SPEED", + checks: [], + }, // DeepSeek Options DeepSeekApiKey: {