- stt_streaming.py: HTTP-based STT using external Whisper service - stt_streaming_local.py: ROCm-based local Whisper inference - Voice Activity Detection (VAD) with WebRTC - Interrupt detection for barge-in support - Session state management (listening/responding) - OpenTelemetry instrumentation with HyperDX support - Dockerfile variants for HTTP and ROCm deployments
53 lines
2.0 KiB
Docker
53 lines
2.0 KiB
Docker
# STT Streaming Service with ROCm for AMD GPU Whisper inference
|
|
# Targets AMD Strix Halo (gfx1151 / RDNA 3.5) but includes RDNA 3 compatibility
|
|
#
|
|
# Uses OpenAI Whisper with PyTorch ROCm backend
|
|
#
|
|
FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS base
|
|
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies
|
|
RUN apt-get update && apt-get install -y \
|
|
ffmpeg \
|
|
libsndfile1 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# WORKAROUND: ROCm/ROCm#5853 - Standard PyTorch ROCm wheels cause segfault in
|
|
# libhsa-runtime64.so during VRAM allocation on gfx1151 (Strix Halo).
|
|
# TheRock nightly builds work correctly. Install BEFORE other deps since
|
|
# openai-whisper depends on torch.
|
|
RUN pip install --no-cache-dir --break-system-packages \
|
|
--index-url https://rocm.nightlies.amd.com/v2/gfx1151/ \
|
|
torch torchaudio torchvision --force-reinstall
|
|
|
|
# Install Python dependencies for STT streaming
|
|
# Use pip directly (more reliable than uv in this base image)
|
|
COPY requirements-rocm.txt .
|
|
RUN pip install --no-cache-dir --break-system-packages -r requirements-rocm.txt
|
|
|
|
# Download Whisper model at build time for faster startup
|
|
# Using medium model for good accuracy/speed balance
|
|
ARG WHISPER_MODEL=medium
|
|
ENV WHISPER_MODEL_SIZE=${WHISPER_MODEL}
|
|
|
|
# Pre-download the model during build (whisper is installed as openai-whisper)
|
|
# Use python3 to ensure correct interpreter
|
|
RUN python3 -c "import whisper; whisper.load_model('${WHISPER_MODEL}')" || echo "Model will be downloaded at runtime"
|
|
|
|
# Copy application code
|
|
COPY stt_streaming_local.py .
|
|
COPY healthcheck.py .
|
|
|
|
# Set ROCm environment for AMD Strix Halo (gfx1151 / RDNA 3.5)
|
|
ENV HIP_VISIBLE_DEVICES=0
|
|
ENV HSA_ENABLE_SDMA=0
|
|
# Ensure PyTorch uses ROCm with expandable segments for large models
|
|
ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512
|
|
# Target gfx1151 (Strix Halo) - ROCm 7.1+ has native support
|
|
# Falls back to runtime override if kernels not available
|
|
ENV ROCM_TARGET_LST=gfx1151,gfx1100
|
|
|
|
# Run the service
|
|
CMD ["python", "stt_streaming_local.py"]
|