import osfrom supertone import SupertoneVOICE_ID = "20160a4c5ba38967330c84" # replace with your voice IDLONG_TEXT = ( "Once upon a time, in a faraway land, there lived a quiet librarian " "who collected stories of forgotten kingdoms. Every evening she would " "open a leather-bound notebook and continue writing the next chapter " "of a tale she had been telling herself for years. ...continue with " "many more sentences spanning over 300 characters...")with Supertone(api_key=os.environ["SUPERTONE_API_KEY"]) as client: response = client.text_to_speech.create_speech( voice_id=VOICE_ID, text=LONG_TEXT, language="en", ) with open("narration.wav", "wb") as f: f.write(response.result.read())
SDK는 내부적으로 LONG_TEXT를 문장 경계(이후 단어 경계, 단어 하나가 너무 길면 문자 경계)에서 분할하고, 최대 3개의 create_speech 요청을 병렬로 실행한 뒤, 중간 파일 헤더를 제거한 WAV/MP3 오디오를 병합합니다.
import { Supertone } from "@supertone/supertone";import * as fs from "node:fs";const VOICE_ID = "20160a4c5ba38967330c84"; // replace with your voice IDconst LONG_TEXT = ` Once upon a time, in a faraway land, there lived a quiet librarian who collected stories of forgotten kingdoms. Every evening she would open a leather-bound notebook and continue writing the next chapter of a tale she had been telling herself for years. ...continue with many more sentences spanning over 300 characters...`.trim();const client = new Supertone({ apiKey: process.env.SUPERTONE_API_KEY });const response = await client.textToSpeech.createSpeech({ voiceId: VOICE_ID, apiConvertTextToSpeechUsingCharacterRequest: { text: LONG_TEXT, language: "en", },});if (response.result instanceof Uint8Array) { fs.writeFileSync("narration.wav", response.result);} else if (response.result && "getReader" in response.result) { const reader = (response.result as ReadableStream<Uint8Array>).getReader(); const chunks: Uint8Array[] = []; while (true) { const { done, value } = await reader.read(); if (done) break; if (value) chunks.push(value); } fs.writeFileSync("narration.wav", Buffer.concat(chunks));}
SDK는 stream_speech / streamSpeech에서도 자동 분할을 수행합니다. 오디오는 마치 하나의 연속된 스트림인 것처럼 호출자의 이터레이터로 전달되므로, 몇 개의 세그먼트가 사용되었는지 알 필요가 없습니다.스트리밍 패턴에 대해서는 Stream speech를 참고해 주십시오.