# STT Streaming Service with ROCm for AMD GPU Whisper inference # Targets AMD Strix Halo (gfx1151 / RDNA 3.5) but includes RDNA 3 compatibility # # Uses OpenAI Whisper with PyTorch ROCm backend # FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS base WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ ffmpeg \ libsndfile1 \ && rm -rf /var/lib/apt/lists/* # WORKAROUND: ROCm/ROCm#5853 - Standard PyTorch ROCm wheels cause segfault in # libhsa-runtime64.so during VRAM allocation on gfx1151 (Strix Halo). # TheRock nightly builds work correctly. Install BEFORE other deps since # openai-whisper depends on torch. RUN pip install --no-cache-dir --break-system-packages \ --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ \ torch torchaudio torchvision --force-reinstall # Install Python dependencies for STT streaming # Use pip directly (more reliable than uv in this base image) COPY requirements-rocm.txt . RUN pip install --no-cache-dir --break-system-packages -r requirements-rocm.txt # Download Whisper model at build time for faster startup # Using medium model for good accuracy/speed balance ARG WHISPER_MODEL=medium ENV WHISPER_MODEL_SIZE=${WHISPER_MODEL} # Pre-download the model during build (whisper is installed as openai-whisper) # Use python3 to ensure correct interpreter RUN python3 -c "import whisper; whisper.load_model('${WHISPER_MODEL}')" || echo "Model will be downloaded at runtime" # Copy application code COPY stt_streaming_local.py . COPY healthcheck.py . # Set ROCm environment for AMD Strix Halo (gfx1151 / RDNA 3.5) ENV HIP_VISIBLE_DEVICES=0 ENV HSA_ENABLE_SDMA=0 # Ensure PyTorch uses ROCm with expandable segments for large models ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 # Target gfx1151 (Strix Halo) - ROCm 7.1+ has native support # Falls back to runtime override if kernels not available ENV ROCM_TARGET_LST=gfx1151,gfx1100 # Run the service CMD ["python", "stt_streaming_local.py"]