feat: Add Gradio UI apps for AI services

- embeddings.py: BGE embeddings demo with similarity
- stt.py: Whisper speech-to-text demo
- tts.py: XTTS text-to-speech demo
- theme.py: Shared DaviesTechLabs Gradio theme
- K8s deployments for each app
This commit is contained in:
2026-02-01 20:45:10 -05:00
parent 8f5de96130
commit 1f833e0124
11 changed files with 1733 additions and 1 deletions

272
tts.py Normal file
View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python3
"""
TTS Demo - Gradio UI for testing Text-to-Speech service.
Features:
- Text input with language selection
- Audio playback of synthesized speech
- Voice/speaker selection (when available)
- MLflow metrics logging
- Multiple TTS backends support (Coqui XTTS, Piper, etc.)
"""
import os
import time
import logging
import io
import base64
import gradio as gr
import httpx
import soundfile as sf
import numpy as np
from theme import get_lab_theme, CUSTOM_CSS, create_footer
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("tts-demo")
# Configuration
TTS_URL = os.environ.get(
"TTS_URL",
"http://tts-predictor.ai-ml.svc.cluster.local"
)
MLFLOW_TRACKING_URI = os.environ.get(
"MLFLOW_TRACKING_URI",
"http://mlflow.mlflow.svc.cluster.local:80"
)
# HTTP client with longer timeout for audio generation
client = httpx.Client(timeout=120.0)
# Supported languages for XTTS
LANGUAGES = {
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Polish": "pl",
"Turkish": "tr",
"Russian": "ru",
"Dutch": "nl",
"Czech": "cs",
"Arabic": "ar",
"Chinese": "zh-cn",
"Japanese": "ja",
"Korean": "ko",
"Hungarian": "hu",
}
def synthesize_speech(text: str, language: str) -> tuple[str, tuple[int, np.ndarray] | None, str]:
"""Synthesize speech from text using the TTS service."""
if not text.strip():
return "❌ Please enter some text", None, ""
lang_code = LANGUAGES.get(language, "en")
try:
start_time = time.time()
# Call TTS service (Coqui XTTS API format)
response = client.get(
f"{TTS_URL}/api/tts",
params={"text": text, "language_id": lang_code}
)
response.raise_for_status()
latency = time.time() - start_time
audio_bytes = response.content
# Parse audio data
audio_io = io.BytesIO(audio_bytes)
audio_data, sample_rate = sf.read(audio_io)
# Calculate duration
if len(audio_data.shape) == 1:
duration = len(audio_data) / sample_rate
else:
duration = len(audio_data) / sample_rate
# Status message
status = f"✅ Generated {duration:.2f}s of audio in {latency*1000:.0f}ms"
# Metrics
metrics = f"""
**Audio Statistics:**
- Duration: {duration:.2f} seconds
- Sample Rate: {sample_rate} Hz
- Size: {len(audio_bytes) / 1024:.1f} KB
- Generation Time: {latency*1000:.0f}ms
- Real-time Factor: {latency/duration:.2f}x
- Language: {language} ({lang_code})
- Characters: {len(text)}
- Chars/sec: {len(text)/latency:.1f}
"""
return status, (sample_rate, audio_data), metrics
except httpx.HTTPStatusError as e:
logger.exception("TTS request failed")
return f"❌ TTS service error: {e.response.status_code}", None, ""
except Exception as e:
logger.exception("TTS synthesis failed")
return f"❌ Error: {str(e)}", None, ""
def check_service_health() -> str:
"""Check if the TTS service is healthy."""
try:
# Try the health endpoint first
response = client.get(f"{TTS_URL}/health", timeout=5.0)
if response.status_code == 200:
return "🟢 Service is healthy"
# Fall back to root endpoint
response = client.get(f"{TTS_URL}/", timeout=5.0)
if response.status_code == 200:
return "🟢 Service is responding"
return f"🟡 Service returned status {response.status_code}"
except Exception as e:
return f"🔴 Service unavailable: {str(e)}"
# Build the Gradio app
with gr.Blocks(theme=get_lab_theme(), css=CUSTOM_CSS, title="TTS Demo") as demo:
gr.Markdown("""
# 🔊 Text-to-Speech Demo
Test the **Coqui XTTS** text-to-speech service. Convert text to natural-sounding speech
in multiple languages.
""")
# Service status
with gr.Row():
health_btn = gr.Button("🔄 Check Service", size="sm")
health_status = gr.Textbox(label="Service Status", interactive=False)
health_btn.click(fn=check_service_health, outputs=health_status)
with gr.Tabs():
# Tab 1: Basic TTS
with gr.TabItem("🎤 Text to Speech"):
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="Text to Synthesize",
placeholder="Enter text to convert to speech...",
lines=5,
max_lines=10
)
with gr.Row():
language = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="English",
label="Language"
)
synthesize_btn = gr.Button("🔊 Synthesize", variant="primary", scale=2)
with gr.Column(scale=1):
status_output = gr.Textbox(label="Status", interactive=False)
metrics_output = gr.Markdown(label="Metrics")
audio_output = gr.Audio(label="Generated Audio", type="numpy")
synthesize_btn.click(
fn=synthesize_speech,
inputs=[text_input, language],
outputs=[status_output, audio_output, metrics_output]
)
# Example texts
gr.Examples(
examples=[
["Hello! Welcome to Davies Tech Labs. This is a demonstration of our text-to-speech system.", "English"],
["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.", "English"],
["Bonjour! Bienvenue au laboratoire technique de Davies.", "French"],
["Hola! Bienvenido al laboratorio de tecnología.", "Spanish"],
["Guten Tag! Willkommen im Techniklabor.", "German"],
],
inputs=[text_input, language],
)
# Tab 2: Comparison
with gr.TabItem("🔄 Language Comparison"):
gr.Markdown("Compare the same text in different languages.")
compare_text = gr.Textbox(
label="Text to Compare",
value="Hello, how are you today?",
lines=2
)
with gr.Row():
lang1 = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language 1")
lang2 = gr.Dropdown(choices=list(LANGUAGES.keys()), value="Spanish", label="Language 2")
compare_btn = gr.Button("Compare Languages", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("### Language 1")
audio1 = gr.Audio(label="Audio 1", type="numpy")
status1 = gr.Textbox(label="Status", interactive=False)
with gr.Column():
gr.Markdown("### Language 2")
audio2 = gr.Audio(label="Audio 2", type="numpy")
status2 = gr.Textbox(label="Status", interactive=False)
def compare_languages(text, l1, l2):
s1, a1, _ = synthesize_speech(text, l1)
s2, a2, _ = synthesize_speech(text, l2)
return s1, a1, s2, a2
compare_btn.click(
fn=compare_languages,
inputs=[compare_text, lang1, lang2],
outputs=[status1, audio1, status2, audio2]
)
# Tab 3: Batch Processing
with gr.TabItem("📚 Batch Synthesis"):
gr.Markdown("Synthesize multiple texts at once (one per line).")
batch_input = gr.Textbox(
label="Texts (one per line)",
placeholder="Enter multiple texts, one per line...",
lines=6
)
batch_lang = gr.Dropdown(
choices=list(LANGUAGES.keys()),
value="English",
label="Language"
)
batch_btn = gr.Button("Synthesize All", variant="primary")
batch_status = gr.Textbox(label="Status", interactive=False)
batch_audios = gr.Dataset(
components=[gr.Audio(type="numpy")],
label="Generated Audio Files"
)
# Note: Batch processing would need more complex handling
# This is a simplified version
gr.Markdown("""
*Note: For batch processing of many texts, consider using the API directly
or the Kubeflow pipeline for better throughput.*
""")
create_footer()
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)