anything-llm/server/utils/TextToSpeech/modelslab/index.js at b22fb096865155a1d5dc058aa884f290e7d6092c · Mintplex-Labs/anything-llm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
class ModelsLabTTS {
  static VOICES = [
    "en_us_001",
    "en_us_006",
    "en_us_007",
    "en_us_009",
    "en_us_010",
    "en_uk_001",
    "en_uk_003",
    "en_au_001",
    "en_au_002",
  ];

  static DEFAULT_VOICE = "en_us_001";

  constructor() {
    if (!process.env.TTS_MODELSLAB_API_KEY)
      throw new Error("No ModelsLab API key was set for TTS.");
    this.apiKey = process.env.TTS_MODELSLAB_API_KEY;
    this.voice = process.env.TTS_MODELSLAB_VOICE_ID ?? ModelsLabTTS.DEFAULT_VOICE;
    this.language = process.env.TTS_MODELSLAB_LANGUAGE ?? "english";
    this.speed = parseFloat(process.env.TTS_MODELSLAB_SPEED ?? "1");
    this.#log(`Initialized with voice: ${this.voice}`);
  }

  #log(text, ...args) {
    console.log(`\x1b[32m[ModelsLabTTS]\x1b[0m ${text}`, ...args);
  }

  /**
   * Fetches a URL and returns the response body as a Buffer.
   * @param {string} url
   * @returns {Promise<Buffer>}
   */
  async #fetchUrl(url) {
    const response = await fetch(url);
    if (!response.ok) throw new Error(`Failed to fetch audio: ${response.statusText}`);
    const arrayBuffer = await response.arrayBuffer();
    return Buffer.from(arrayBuffer);
  }

  /**
   * Polls the ModelsLab fetch endpoint until the audio is ready.
   * Uses exponential backoff for better performance.
   * @param {string|number} requestId
   * @param {number} maxAttempts
   * @returns {Promise<Buffer|null>}
   */
  async #pollForResult(requestId, maxAttempts = 20) {
    const fetchUrl = "https://modelslab.com/api/v6/voice/fetch";
    let delayMs = 1000; // Start with 1 second

    for (let attempt = 0; attempt < maxAttempts; attempt++) {
      await new Promise((r) => setTimeout(r, delayMs));

      const response = await fetch(fetchUrl, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({ key: this.apiKey, request_id: String(requestId) }),
      });

      const data = await response.json();

      if (data.status === "success" && data.output?.length > 0) {
        return await this.#fetchUrl(data.output[0]);
      }

      if (data.status === "error") {
        this.#log("Poll error:", data.message || data.messege || "Unknown error");
        return null;
      }

      this.#log(`Polling attempt ${attempt + 1}/${maxAttempts}...`);

      // Exponential backoff: 1s, 2s, 3s, 4s... up to 5s max
      delayMs = Math.min(delayMs + 1000, 5000);
    }

    this.#log("Timed out waiting for audio generation.");
    return null;
  }

  /**
   * Generates a buffer from the given text input using the ModelsLab TTS API.
   * @param {string} textInput - The text to be converted to audio.
   * @returns {Promise<Buffer|null>} A buffer containing the audio data.
   */
  async ttsBuffer(textInput) {
    try {
      const response = await fetch(
        "https://modelslab.com/api/v6/voice/text_to_speech",
        {
          method: "POST",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify({
            key: this.apiKey,
            prompt: textInput,
            voice_id: this.voice,
            language: this.language,
            speed: this.speed,
          }),
        }
      );

      const data = await response.json();

      if (data.status === "success" && data.output?.length > 0) {
        return await this.#fetchUrl(data.output[0]);
      }

      if (data.status === "processing" && data.id) {
        this.#log(`Processing... polling for request ID: ${data.id}`);
        return await this.#pollForResult(data.id);
      }

      this.#log("Unexpected response:", JSON.stringify(data));
      return null;
    } catch (e) {
      console.error("[ModelsLabTTS] Error:", e);
      return null;
    }
  }
}

module.exports = { ModelsLabTTS };