# STT Streaming Service with ROCm for AMD GPU Whisper inference
# Targets AMD Strix Halo (gfx1151 / RDNA 3.5) but includes RDNA 3 compatibility
#
# Uses OpenAI Whisper with PyTorch ROCm backend
#
FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS base

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    ffmpeg \
    libsndfile1 \
    && rm -rf /var/lib/apt/lists/*

# WORKAROUND: ROCm/ROCm#5853 - Standard PyTorch ROCm wheels cause segfault in
# libhsa-runtime64.so during VRAM allocation on gfx1151 (Strix Halo).
# TheRock nightly builds work correctly. Install BEFORE other deps since
# openai-whisper depends on torch.
RUN pip install --no-cache-dir --break-system-packages \
    --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ \
    torch torchaudio torchvision --force-reinstall

# Install Python dependencies for STT streaming
# Use pip directly (more reliable than uv in this base image)
COPY requirements-rocm.txt .
RUN pip install --no-cache-dir --break-system-packages -r requirements-rocm.txt

# Download Whisper model at build time for faster startup
# Using medium model for good accuracy/speed balance
ARG WHISPER_MODEL=medium
ENV WHISPER_MODEL_SIZE=${WHISPER_MODEL}

# Pre-download the model during build (whisper is installed as openai-whisper)
# Use python3 to ensure correct interpreter
RUN python3 -c "import whisper; whisper.load_model('${WHISPER_MODEL}')" || echo "Model will be downloaded at runtime"

# Copy application code
COPY stt_streaming_local.py .
COPY healthcheck.py .

# Set ROCm environment for AMD Strix Halo (gfx1151 / RDNA 3.5)
ENV HIP_VISIBLE_DEVICES=0
ENV HSA_ENABLE_SDMA=0
# Ensure PyTorch uses ROCm with expandable segments for large models
ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512
# Target gfx1151 (Strix Halo) - ROCm 7.1+ has native support
# Falls back to runtime override if kernels not available
ENV ROCM_TARGET_LST=gfx1151,gfx1100

# Run the service
CMD ["python", "stt_streaming_local.py"]