fix(tts): add /health endpoint, fix language param for single-lang models
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 1m54s
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 1m54s
- Add GET /health endpoint returning model name and GPU status - Don't pass language/speaker to Coqui TTS when model doesn't support multilingual/multi-speaker (fixes 500 on ljspeech/tacotron2-DDC) - Applied to all three endpoints: POST /, GET /api/tts, POST /stream
This commit is contained in:
@@ -120,6 +120,17 @@ class TTSDeployment:
|
|||||||
realtime_factor=elapsed / duration if duration > 0 else 0,
|
realtime_factor=elapsed / duration if duration > 0 else 0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ── GET /health — simple liveness check ─────────────────────────────
|
||||||
|
|
||||||
|
@_fastapi.get("/health")
|
||||||
|
def health(self) -> dict[str, Any]:
|
||||||
|
"""Simple health/readiness check."""
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"model": self.model_name,
|
||||||
|
"gpu": self.use_gpu,
|
||||||
|
}
|
||||||
|
|
||||||
# ── POST / — JSON API (base64 audio in response) ────────────────────
|
# ── POST / — JSON API (base64 audio in response) ────────────────────
|
||||||
|
|
||||||
@_fastapi.post("/")
|
@_fastapi.post("/")
|
||||||
@@ -140,6 +151,12 @@ class TTSDeployment:
|
|||||||
output_format = request.get("output_format", "wav")
|
output_format = request.get("output_format", "wav")
|
||||||
return_base64 = request.get("return_base64", True)
|
return_base64 = request.get("return_base64", True)
|
||||||
|
|
||||||
|
# Only pass language/speaker if the model supports it
|
||||||
|
if not (hasattr(self.tts, "is_multi_lingual") and self.tts.is_multi_lingual):
|
||||||
|
language = None
|
||||||
|
if not (hasattr(self.tts, "is_multi_speaker") and self.tts.is_multi_speaker):
|
||||||
|
speaker = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio_bytes, sample_rate, duration = self._synthesize(
|
audio_bytes, sample_rate, duration = self._synthesize(
|
||||||
text, speaker, language, speed
|
text, speaker, language, speed
|
||||||
@@ -174,9 +191,13 @@ class TTSDeployment:
|
|||||||
if not text:
|
if not text:
|
||||||
return Response(content="text parameter required", status_code=400)
|
return Response(content="text parameter required", status_code=400)
|
||||||
|
|
||||||
|
# Only pass language/speaker if the model is multi-lingual/multi-speaker
|
||||||
|
lang = language_id if hasattr(self.tts, "is_multi_lingual") and self.tts.is_multi_lingual else None
|
||||||
|
spk = speaker_id if hasattr(self.tts, "is_multi_speaker") and self.tts.is_multi_speaker else None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio_bytes, _sr, duration = self._synthesize(
|
audio_bytes, _sr, duration = self._synthesize(
|
||||||
text, speaker_id, language_id
|
text, spk, lang
|
||||||
)
|
)
|
||||||
self._log(_start, duration, len(text))
|
self._log(_start, duration, len(text))
|
||||||
return Response(content=audio_bytes, media_type="audio/wav")
|
return Response(content=audio_bytes, media_type="audio/wav")
|
||||||
@@ -212,6 +233,13 @@ class TTSDeployment:
|
|||||||
speaker = body.get("speaker")
|
speaker = body.get("speaker")
|
||||||
language = body.get("language")
|
language = body.get("language")
|
||||||
speed = body.get("speed", 1.0)
|
speed = body.get("speed", 1.0)
|
||||||
|
|
||||||
|
# Only pass language/speaker if the model supports it
|
||||||
|
if not (hasattr(self.tts, "is_multi_lingual") and self.tts.is_multi_lingual):
|
||||||
|
language = None
|
||||||
|
if not (hasattr(self.tts, "is_multi_speaker") and self.tts.is_multi_speaker):
|
||||||
|
speaker = None
|
||||||
|
|
||||||
sentences = _split_sentences(text)
|
sentences = _split_sentences(text)
|
||||||
|
|
||||||
async def _generate():
|
async def _generate():
|
||||||
|
|||||||
Reference in New Issue
Block a user