feat: add streaming STT service with Whisper backend
- stt_streaming.py: HTTP-based STT using external Whisper service - stt_streaming_local.py: ROCm-based local Whisper inference - Voice Activity Detection (VAD) with WebRTC - Interrupt detection for barge-in support - Session state management (listening/responding) - OpenTelemetry instrumentation with HyperDX support - Dockerfile variants for HTTP and ROCm deployments
This commit is contained in:
52
Dockerfile.rocm
Normal file
52
Dockerfile.rocm
Normal file
@@ -0,0 +1,52 @@
|
||||
# STT Streaming Service with ROCm for AMD GPU Whisper inference
|
||||
# Targets AMD Strix Halo (gfx1151 / RDNA 3.5) but includes RDNA 3 compatibility
|
||||
#
|
||||
# Uses OpenAI Whisper with PyTorch ROCm backend
|
||||
#
|
||||
FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
libsndfile1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# WORKAROUND: ROCm/ROCm#5853 - Standard PyTorch ROCm wheels cause segfault in
|
||||
# libhsa-runtime64.so during VRAM allocation on gfx1151 (Strix Halo).
|
||||
# TheRock nightly builds work correctly. Install BEFORE other deps since
|
||||
# openai-whisper depends on torch.
|
||||
RUN pip install --no-cache-dir --break-system-packages \
|
||||
--index-url https://rocm.nightlies.amd.com/v2/gfx1151/ \
|
||||
torch torchaudio torchvision --force-reinstall
|
||||
|
||||
# Install Python dependencies for STT streaming
|
||||
# Use pip directly (more reliable than uv in this base image)
|
||||
COPY requirements-rocm.txt .
|
||||
RUN pip install --no-cache-dir --break-system-packages -r requirements-rocm.txt
|
||||
|
||||
# Download Whisper model at build time for faster startup
|
||||
# Using medium model for good accuracy/speed balance
|
||||
ARG WHISPER_MODEL=medium
|
||||
ENV WHISPER_MODEL_SIZE=${WHISPER_MODEL}
|
||||
|
||||
# Pre-download the model during build (whisper is installed as openai-whisper)
|
||||
# Use python3 to ensure correct interpreter
|
||||
RUN python3 -c "import whisper; whisper.load_model('${WHISPER_MODEL}')" || echo "Model will be downloaded at runtime"
|
||||
|
||||
# Copy application code
|
||||
COPY stt_streaming_local.py .
|
||||
COPY healthcheck.py .
|
||||
|
||||
# Set ROCm environment for AMD Strix Halo (gfx1151 / RDNA 3.5)
|
||||
ENV HIP_VISIBLE_DEVICES=0
|
||||
ENV HSA_ENABLE_SDMA=0
|
||||
# Ensure PyTorch uses ROCm with expandable segments for large models
|
||||
ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512
|
||||
# Target gfx1151 (Strix Halo) - ROCm 7.1+ has native support
|
||||
# Falls back to runtime override if kernels not available
|
||||
ENV ROCM_TARGET_LST=gfx1151,gfx1100
|
||||
|
||||
# Run the service
|
||||
CMD ["python", "stt_streaming_local.py"]
|
||||
Reference in New Issue
Block a user