kuberay-images/dockerfiles/Dockerfile.ray-worker-nvidia

# NVIDIA GPU Ray Worker for elminster (RTX 2070)
# Used for: Whisper STT, TTS
#
# Build from llm-workflows root:
#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest -f dockerfiles/Dockerfile.ray-worker-nvidia .
#
FROM rayproject/ray:2.53.0-py311-cu121

LABEL maintainer="billy-davies-2"
LABEL description="Ray worker for NVIDIA GPUs (Whisper, TTS)"
LABEL gpu.target="nvidia-cuda"

WORKDIR /app

# Install system dependencies for audio processing
USER root
RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
    libsndfile1 \
    git \
    && rm -rf /var/lib/apt/lists/*
USER ray

# Install Python dependencies for inference
RUN pip install --no-cache-dir \
    faster-whisper \
    openai-whisper \
    TTS \
    soundfile \
    pydub \
    librosa \
    torch \
    torchaudio \
    fastapi \
    uvicorn \
    httpx \
    pydantic

# Copy Ray Serve Python code
COPY --chown=ray:ray ray-serve/ /app/ray_serve/
ENV PYTHONPATH=/app

# Copy Ray Serve entrypoint
COPY --chown=ray:ray dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
RUN chmod +x /app/ray-entrypoint.sh

# Default environment variables
ENV CUDA_VISIBLE_DEVICES=0
ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
ENV GPU_RESOURCE="gpu_nvidia"
ENV NUM_GPUS="1"

ENTRYPOINT ["/app/ray-entrypoint.sh"]