fix(tts): add /health endpoint, fix language param for single-lang models
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 1m54s
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 1m54s
- Add GET /health endpoint returning model name and GPU status - Don't pass language/speaker to Coqui TTS when model doesn't support multilingual/multi-speaker (fixes 500 on ljspeech/tacotron2-DDC) - Applied to all three endpoints: POST /, GET /api/tts, POST /stream
This commit is contained in:
@@ -120,6 +120,17 @@ class TTSDeployment:
|
||||
realtime_factor=elapsed / duration if duration > 0 else 0,
|
||||
)
|
||||
|
||||
# ── GET /health — simple liveness check ─────────────────────────────
|
||||
|
||||
@_fastapi.get("/health")
|
||||
def health(self) -> dict[str, Any]:
|
||||
"""Simple health/readiness check."""
|
||||
return {
|
||||
"status": "ok",
|
||||
"model": self.model_name,
|
||||
"gpu": self.use_gpu,
|
||||
}
|
||||
|
||||
# ── POST / — JSON API (base64 audio in response) ────────────────────
|
||||
|
||||
@_fastapi.post("/")
|
||||
@@ -140,6 +151,12 @@ class TTSDeployment:
|
||||
output_format = request.get("output_format", "wav")
|
||||
return_base64 = request.get("return_base64", True)
|
||||
|
||||
# Only pass language/speaker if the model supports it
|
||||
if not (hasattr(self.tts, "is_multi_lingual") and self.tts.is_multi_lingual):
|
||||
language = None
|
||||
if not (hasattr(self.tts, "is_multi_speaker") and self.tts.is_multi_speaker):
|
||||
speaker = None
|
||||
|
||||
try:
|
||||
audio_bytes, sample_rate, duration = self._synthesize(
|
||||
text, speaker, language, speed
|
||||
@@ -174,9 +191,13 @@ class TTSDeployment:
|
||||
if not text:
|
||||
return Response(content="text parameter required", status_code=400)
|
||||
|
||||
# Only pass language/speaker if the model is multi-lingual/multi-speaker
|
||||
lang = language_id if hasattr(self.tts, "is_multi_lingual") and self.tts.is_multi_lingual else None
|
||||
spk = speaker_id if hasattr(self.tts, "is_multi_speaker") and self.tts.is_multi_speaker else None
|
||||
|
||||
try:
|
||||
audio_bytes, _sr, duration = self._synthesize(
|
||||
text, speaker_id, language_id
|
||||
text, spk, lang
|
||||
)
|
||||
self._log(_start, duration, len(text))
|
||||
return Response(content=audio_bytes, media_type="audio/wav")
|
||||
@@ -212,6 +233,13 @@ class TTSDeployment:
|
||||
speaker = body.get("speaker")
|
||||
language = body.get("language")
|
||||
speed = body.get("speed", 1.0)
|
||||
|
||||
# Only pass language/speaker if the model supports it
|
||||
if not (hasattr(self.tts, "is_multi_lingual") and self.tts.is_multi_lingual):
|
||||
language = None
|
||||
if not (hasattr(self.tts, "is_multi_speaker") and self.tts.is_multi_speaker):
|
||||
speaker = None
|
||||
|
||||
sentences = _split_sentences(text)
|
||||
|
||||
async def _generate():
|
||||
|
||||
Reference in New Issue
Block a user