feat: Add Gradio UI apps for AI services

- embeddings.py: BGE embeddings demo with similarity - stt.py: Whisper speech-to-text demo - tts.py: XTTS text-to-speech demo - theme.py: Shared DaviesTechLabs Gradio theme - K8s deployments for each app
2026-02-01 20:45:10 -05:00
parent 8f5de96130
commit 1f833e0124
11 changed files with 1733 additions and 1 deletions
--- a/tts.py
+++ b/tts.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+"""
+TTS Demo - Gradio UI for testing Text-to-Speech service.
+
+Features:
+- Text input with language selection
+- Audio playback of synthesized speech
+- Voice/speaker selection (when available)
+- MLflow metrics logging
+- Multiple TTS backends support (Coqui XTTS, Piper, etc.)
+"""
+import os
+import time
+import logging
+import io
+import base64
+
+import gradio as gr
+import httpx
+import soundfile as sf
+import numpy as np
+
+from theme import get_lab_theme, CUSTOM_CSS, create_footer
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("tts-demo")
+
+# Configuration
+TTS_URL = os.environ.get(
+    "TTS_URL",
+    "http://tts-predictor.ai-ml.svc.cluster.local"
+)
+MLFLOW_TRACKING_URI = os.environ.get(
+    "MLFLOW_TRACKING_URI",
+    "http://mlflow.mlflow.svc.cluster.local:80"
+)
+
+# HTTP client with longer timeout for audio generation
+client = httpx.Client(timeout=120.0)
+
+# Supported languages for XTTS
+LANGUAGES = {
+    "English": "en",
+    "Spanish": "es",
+    "French": "fr",
+    "German": "de",
+    "Italian": "it",
+    "Portuguese": "pt",
+    "Polish": "pl",
+    "Turkish": "tr",
+    "Russian": "ru",
+    "Dutch": "nl",
+    "Czech": "cs",
+    "Arabic": "ar",
+    "Chinese": "zh-cn",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Hungarian": "hu",
+}
+
+
+def synthesize_speech(text: str, language: str) -> tuple[str, tuple[int, np.ndarray] | None, str]:
+    """Synthesize speech from text using the TTS service."""
+    if not text.strip():
+        return "❌ Please enter some text", None, ""
+    
+    lang_code = LANGUAGES.get(language, "en")
+    
+    try:
+        start_time = time.time()
+        
+        # Call TTS service (Coqui XTTS API format)
+        response = client.get(
+            f"{TTS_URL}/api/tts",
+            params={"text": text, "language_id": lang_code}
+        )
+        response.raise_for_status()
+        
+        latency = time.time() - start_time
+        audio_bytes = response.content
+        
+        # Parse audio data
+        audio_io = io.BytesIO(audio_bytes)
+        audio_data, sample_rate = sf.read(audio_io)
+        
+        # Calculate duration
+        if len(audio_data.shape) == 1:
+            duration = len(audio_data) / sample_rate
+        else:
+            duration = len(audio_data) / sample_rate
+        
+        # Status message
+        status = f"✅ Generated {duration:.2f}s of audio in {latency*1000:.0f}ms"
+        
+        # Metrics
+        metrics = f"""
+**Audio Statistics:**
+- Duration: {duration:.2f} seconds
+- Sample Rate: {sample_rate} Hz
+- Size: {len(audio_bytes) / 1024:.1f} KB
+- Generation Time: {latency*1000:.0f}ms
+- Real-time Factor: {latency/duration:.2f}x
+- Language: {language} ({lang_code})
+- Characters: {len(text)}
+- Chars/sec: {len(text)/latency:.1f}
+"""
+        
+        return status, (sample_rate, audio_data), metrics
+        
+    except httpx.HTTPStatusError as e:
+        logger.exception("TTS request failed")
+        return f"❌ TTS service error: {e.response.status_code}", None, ""
+    except Exception as e:
+        logger.exception("TTS synthesis failed")
+        return f"❌ Error: {str(e)}", None, ""
+
+
+def check_service_health() -> str:
+    """Check if the TTS service is healthy."""
+    try:
+        # Try the health endpoint first
+        response = client.get(f"{TTS_URL}/health", timeout=5.0)
+        if response.status_code == 200:
+            return "🟢 Service is healthy"
+        
+        # Fall back to root endpoint
+        response = client.get(f"{TTS_URL}/", timeout=5.0)
+        if response.status_code == 200:
+            return "🟢 Service is responding"
+        
+        return f"🟡 Service returned status {response.status_code}"
+    except Exception as e:
+        return f"🔴 Service unavailable: {str(e)}"
+
+
+# Build the Gradio app
+with gr.Blocks(theme=get_lab_theme(), css=CUSTOM_CSS, title="TTS Demo") as demo:
+    gr.Markdown("""
+# 🔊 Text-to-Speech Demo
+
+Test the **Coqui XTTS** text-to-speech service. Convert text to natural-sounding speech
+in multiple languages.
+""")
+    
+    # Service status
+    with gr.Row():
+        health_btn = gr.Button("🔄 Check Service", size="sm")
+        health_status = gr.Textbox(label="Service Status", interactive=False)
+    
+    health_btn.click(fn=check_service_health, outputs=health_status)
+    
+    with gr.Tabs():
+        # Tab 1: Basic TTS
+        with gr.TabItem("🎤 Text to Speech"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    text_input = gr.Textbox(
+                        label="Text to Synthesize",
+                        placeholder="Enter text to convert to speech...",
+                        lines=5,
+                        max_lines=10
+                    )
+                    
+                    with gr.Row():
+                        language = gr.Dropdown(
+                            choices=list(LANGUAGES.keys()),
+                            value="English",
+                            label="Language"
+                        )
+                        synthesize_btn = gr.Button("🔊 Synthesize", variant="primary", scale=2)
+                
+                with gr.Column(scale=1):
+                    status_output = gr.Textbox(label="Status", interactive=False)
+                    metrics_output = gr.Markdown(label="Metrics")
+            
+            audio_output = gr.Audio(label="Generated Audio", type="numpy")
+            
+            synthesize_btn.click(
+                fn=synthesize_speech,
+                inputs=[text_input, language],
+                outputs=[status_output, audio_output, metrics_output]
+            )
+            
+            # Example texts
+            gr.Examples(
+                examples=[
+                    ["Hello! Welcome to Davies Tech Labs. This is a demonstration of our text-to-speech system.", "English"],
+                    ["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.", "English"],
+                    ["Bonjour! Bienvenue au laboratoire technique de Davies.", "French"],
+                    ["Hola! Bienvenido al laboratorio de tecnología.", "Spanish"],
+                    ["Guten Tag! Willkommen im Techniklabor.", "German"],
+                ],
+                inputs=[text_input, language],
+            )
+        
+        # Tab 2: Comparison
+        with gr.TabItem("🔄 Language Comparison"):
+            gr.Markdown("Compare the same text in different languages.")
+            
+            compare_text = gr.Textbox(
+                label="Text to Compare",
+                value="Hello, how are you today?",
+                lines=2
+            )
+            
+            with gr.Row():
+                lang1 = gr.Dropdown(choices=list(LANGUAGES.keys()), value="English", label="Language 1")
+                lang2 = gr.Dropdown(choices=list(LANGUAGES.keys()), value="Spanish", label="Language 2")
+            
+            compare_btn = gr.Button("Compare Languages", variant="primary")
+            
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Language 1")
+                    audio1 = gr.Audio(label="Audio 1", type="numpy")
+                    status1 = gr.Textbox(label="Status", interactive=False)
+                
+                with gr.Column():
+                    gr.Markdown("### Language 2")
+                    audio2 = gr.Audio(label="Audio 2", type="numpy")
+                    status2 = gr.Textbox(label="Status", interactive=False)
+            
+            def compare_languages(text, l1, l2):
+                s1, a1, _ = synthesize_speech(text, l1)
+                s2, a2, _ = synthesize_speech(text, l2)
+                return s1, a1, s2, a2
+            
+            compare_btn.click(
+                fn=compare_languages,
+                inputs=[compare_text, lang1, lang2],
+                outputs=[status1, audio1, status2, audio2]
+            )
+        
+        # Tab 3: Batch Processing
+        with gr.TabItem("📚 Batch Synthesis"):
+            gr.Markdown("Synthesize multiple texts at once (one per line).")
+            
+            batch_input = gr.Textbox(
+                label="Texts (one per line)",
+                placeholder="Enter multiple texts, one per line...",
+                lines=6
+            )
+            batch_lang = gr.Dropdown(
+                choices=list(LANGUAGES.keys()),
+                value="English",
+                label="Language"
+            )
+            batch_btn = gr.Button("Synthesize All", variant="primary")
+            
+            batch_status = gr.Textbox(label="Status", interactive=False)
+            batch_audios = gr.Dataset(
+                components=[gr.Audio(type="numpy")],
+                label="Generated Audio Files"
+            )
+            
+            # Note: Batch processing would need more complex handling
+            # This is a simplified version
+            gr.Markdown("""
+*Note: For batch processing of many texts, consider using the API directly
+or the Kubeflow pipeline for better throughput.*
+""")
+    
+    create_footer()
+
+
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )