fix: remove unused imports and apply ruff formatting
Some checks failed
CI / Docker Build & Push (push) Failing after 2m37s
CI / Deploy to Kubernetes (push) Has been skipped
CI / Notify (push) Successful in 1s
CI / Lint (push) Successful in 10s
CI / Release (push) Successful in 4s

- Remove unused imports: json (llm.py), tempfile (stt.py), base64 (tts.py)
- Apply ruff format to all Python files
This commit is contained in:
2026-02-18 18:36:16 -05:00
parent 0cc03aa145
commit faa5dc0d9d
5 changed files with 283 additions and 246 deletions

155
tts.py
View File

@@ -9,11 +9,11 @@ Features:
- MLflow metrics logging
- Multiple TTS backends support (Coqui XTTS, Piper, etc.)
"""
import os
import time
import logging
import io
import base64
import gradio as gr
import httpx
@@ -30,11 +30,10 @@ logger = logging.getLogger("tts-demo")
TTS_URL = os.environ.get(
"TTS_URL",
# Default: Ray Serve TTS endpoint
"http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts"
"http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts",
)
MLFLOW_TRACKING_URI = os.environ.get(
"MLFLOW_TRACKING_URI",
"http://mlflow.mlflow.svc.cluster.local:80"
"MLFLOW_TRACKING_URI", "http://mlflow.mlflow.svc.cluster.local:80"
)
# ─── MLflow experiment tracking ──────────────────────────────────────────
@@ -62,7 +61,9 @@ try:
_mlflow_run_id = _mlflow_run.info.run_id
_mlflow_step = 0
MLFLOW_ENABLED = True
logger.info("MLflow tracking enabled: experiment=%s run=%s", _experiment_id, _mlflow_run_id)
logger.info(
"MLflow tracking enabled: experiment=%s run=%s", _experiment_id, _mlflow_run_id
)
except Exception as exc:
logger.warning("MLflow tracking disabled: %s", exc)
_mlflow_client = None
@@ -72,7 +73,10 @@ except Exception as exc:
def _log_tts_metrics(
latency: float, audio_duration: float, text_chars: int, language: str,
latency: float,
audio_duration: float,
text_chars: int,
language: str,
) -> None:
"""Log TTS inference metrics to MLflow (non-blocking best-effort)."""
global _mlflow_step
@@ -87,7 +91,9 @@ def _log_tts_metrics(
_mlflow_run_id,
metrics=[
mlflow.entities.Metric("latency_s", latency, ts, _mlflow_step),
mlflow.entities.Metric("audio_duration_s", audio_duration, ts, _mlflow_step),
mlflow.entities.Metric(
"audio_duration_s", audio_duration, ts, _mlflow_step
),
mlflow.entities.Metric("realtime_factor", rtf, ts, _mlflow_step),
mlflow.entities.Metric("chars_per_second", cps, ts, _mlflow_step),
mlflow.entities.Metric("text_chars", text_chars, ts, _mlflow_step),
@@ -121,38 +127,39 @@ LANGUAGES = {
}
def synthesize_speech(text: str, language: str) -> tuple[str, tuple[int, np.ndarray] | None, str]:
def synthesize_speech(
text: str, language: str
) -> tuple[str, tuple[int, np.ndarray] | None, str]:
"""Synthesize speech from text using the TTS service."""
if not text.strip():
return "❌ Please enter some text", None, ""
lang_code = LANGUAGES.get(language, "en")
try:
start_time = time.time()
# Call TTS service (Coqui XTTS API format)
response = client.get(
f"{TTS_URL}/api/tts",
params={"text": text, "language_id": lang_code}
f"{TTS_URL}/api/tts", params={"text": text, "language_id": lang_code}
)
response.raise_for_status()
latency = time.time() - start_time
audio_bytes = response.content
# Parse audio data
audio_io = io.BytesIO(audio_bytes)
audio_data, sample_rate = sf.read(audio_io)
# Calculate duration
if len(audio_data.shape) == 1:
duration = len(audio_data) / sample_rate
else:
duration = len(audio_data) / sample_rate
# Status message
status = f"✅ Generated {duration:.2f}s of audio in {latency*1000:.0f}ms"
status = f"✅ Generated {duration:.2f}s of audio in {latency * 1000:.0f}ms"
# Log to MLflow
_log_tts_metrics(
@@ -161,22 +168,22 @@ def synthesize_speech(text: str, language: str) -> tuple[str, tuple[int, np.ndar
text_chars=len(text),
language=lang_code,
)
# Metrics
metrics = f"""
**Audio Statistics:**
- Duration: {duration:.2f} seconds
- Sample Rate: {sample_rate} Hz
- Size: {len(audio_bytes) / 1024:.1f} KB
- Generation Time: {latency*1000:.0f}ms
- Real-time Factor: {latency/duration:.2f}x
- Generation Time: {latency * 1000:.0f}ms
- Real-time Factor: {latency / duration:.2f}x
- Language: {language} ({lang_code})
- Characters: {len(text)}
- Chars/sec: {len(text)/latency:.1f}
- Chars/sec: {len(text) / latency:.1f}
"""
return status, (sample_rate, audio_data), metrics
except httpx.HTTPStatusError as e:
logger.exception("TTS request failed")
return f"❌ TTS service error: {e.response.status_code}", None, ""
@@ -192,12 +199,12 @@ def check_service_health() -> str:
response = client.get(f"{TTS_URL}/health", timeout=5.0)
if response.status_code == 200:
return "🟢 Service is healthy"
# Fall back to root endpoint
response = client.get(f"{TTS_URL}/", timeout=5.0)
if response.status_code == 200:
return "🟢 Service is responding"
return f"🟡 Service returned status {response.status_code}"
except Exception as e:
return f"🔴 Service unavailable: {str(e)}"
@@ -211,14 +218,14 @@ with gr.Blocks(theme=get_lab_theme(), css=CUSTOM_CSS, title="TTS Demo") as demo:
Test the **Coqui XTTS** text-to-speech service. Convert text to natural-sounding speech
in multiple languages.
""")
# Service status
with gr.Row():
health_btn = gr.Button("🔄 Check Service", size="sm")
health_status = gr.Textbox(label="Service Status", interactive=False)
health_btn.click(fn=check_service_health, outputs=health_status)
with gr.Tabs():
# Tab 1: Basic TTS
with gr.TabItem("🎤 Text to Speech"):
@@ -228,114 +235,120 @@ in multiple languages.
label="Text to Synthesize",
placeholder="Enter text to convert to speech...",
lines=5,
max_lines=10
max_lines=10,
)
with gr.Row():
language = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="English",
label="Language"
label="Language",
)
synthesize_btn = gr.Button("🔊 Synthesize", variant="primary", scale=2)
synthesize_btn = gr.Button(
"🔊 Synthesize", variant="primary", scale=2
)
with gr.Column(scale=1):
status_output = gr.Textbox(label="Status", interactive=False)
metrics_output = gr.Markdown(label="Metrics")
audio_output = gr.Audio(label="Generated Audio", type="numpy")
synthesize_btn.click(
fn=synthesize_speech,
inputs=[text_input, language],
outputs=[status_output, audio_output, metrics_output]
outputs=[status_output, audio_output, metrics_output],
)
# Example texts
gr.Examples(
examples=[
["Hello! Welcome to Davies Tech Labs. This is a demonstration of our text-to-speech system.", "English"],
["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.", "English"],
["Bonjour! Bienvenue au laboratoire technique de Davies.", "French"],
[
"Hello! Welcome to Davies Tech Labs. This is a demonstration of our text-to-speech system.",
"English",
],
[
"The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.",
"English",
],
[
"Bonjour! Bienvenue au laboratoire technique de Davies.",
"French",
],
["Hola! Bienvenido al laboratorio de tecnología.", "Spanish"],
["Guten Tag! Willkommen im Techniklabor.", "German"],
],
inputs=[text_input, language],
)
# Tab 2: Comparison
with gr.TabItem("🔄 Language Comparison"):
gr.Markdown("Compare the same text in different languages.")
compare_text = gr.Textbox(
label="Text to Compare",
value="Hello, how are you today?",
lines=2
label="Text to Compare", value="Hello, how are you today?", lines=2
)
with gr.Row():
lang1 = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language 1")
lang2 = gr.Dropdown(choices=list(LANGUAGES.keys()), value="Spanish", label="Language 2")
lang1 = gr.Dropdown(
choices=list(LANGUAGES.keys()), value="English", label="Language 1"
)
lang2 = gr.Dropdown(
choices=list(LANGUAGES.keys()), value="Spanish", label="Language 2"
)
compare_btn = gr.Button("Compare Languages", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("### Language 1")
audio1 = gr.Audio(label="Audio 1", type="numpy")
status1 = gr.Textbox(label="Status", interactive=False)
with gr.Column():
gr.Markdown("### Language 2")
audio2 = gr.Audio(label="Audio 2", type="numpy")
status2 = gr.Textbox(label="Status", interactive=False)
def compare_languages(text, l1, l2):
s1, a1, _ = synthesize_speech(text, l1)
s2, a2, _ = synthesize_speech(text, l2)
return s1, a1, s2, a2
compare_btn.click(
fn=compare_languages,
inputs=[compare_text, lang1, lang2],
outputs=[status1, audio1, status2, audio2]
outputs=[status1, audio1, status2, audio2],
)
# Tab 3: Batch Processing
with gr.TabItem("📚 Batch Synthesis"):
gr.Markdown("Synthesize multiple texts at once (one per line).")
batch_input = gr.Textbox(
label="Texts (one per line)",
placeholder="Enter multiple texts, one per line...",
lines=6
lines=6,
)
batch_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="English",
label="Language"
choices=list(LANGUAGES.keys()), value="English", label="Language"
)
batch_btn = gr.Button("Synthesize All", variant="primary")
batch_status = gr.Textbox(label="Status", interactive=False)
batch_audios = gr.Dataset(
components=[gr.Audio(type="numpy")],
label="Generated Audio Files"
components=[gr.Audio(type="numpy")], label="Generated Audio Files"
)
# Note: Batch processing would need more complex handling
# This is a simplified version
gr.Markdown("""
*Note: For batch processing of many texts, consider using the API directly
or the Kubeflow pipeline for better throughput.*
""")
create_footer()
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)