build: optimize Dockerfiles for production

- Use BuildKit syntax 1.7 with cache mounts for apt/uv - Switch from pip to uv for 10-100x faster installs (ADR-0014) - Add OCI Image Spec labels for container metadata - Add HEALTHCHECK directives for orchestration - Add .dockerignore to reduce context size - Update Makefile with buildx and lint target - Add retry logic to ray-entrypoint.sh Refs: ADR-0012 (uv), ADR-0014 (Docker best practices)
2026-02-02 07:26:27 -05:00
parent a16ffff73f
commit cb80709d3d
8 changed files with 443 additions and 232 deletions
--- a/dockerfiles/Dockerfile.ray-worker-nvidia
+++ b/dockerfiles/Dockerfile.ray-worker-nvidia
@@ -1,53 +1,70 @@
+# syntax=docker/dockerfile:1.7
 # NVIDIA GPU Ray Worker for elminster (RTX 2070)
-# Used for: Whisper STT, TTS
-#
-# Build from llm-workflows root:
-#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest -f dockerfiles/Dockerfile.ray-worker-nvidia .
+# Used for: Whisper STT, XTTS Text-to-Speech
 #
+# Build:
+#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest \
+#     -f dockerfiles/Dockerfile.ray-worker-nvidia .
+
 FROM rayproject/ray:2.53.0-py311-cu121

-LABEL maintainer="billy-davies-2"
-LABEL description="Ray worker for NVIDIA GPUs (Whisper, TTS)"
-LABEL gpu.target="nvidia-cuda"
+# OCI Image Spec labels
+LABEL org.opencontainers.image.title="Ray Worker - NVIDIA GPU"
+LABEL org.opencontainers.image.description="Ray Serve worker for NVIDIA GPUs (Whisper STT, XTTS TTS)"
+LABEL org.opencontainers.image.vendor="DaviesTechLabs"
+LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
+LABEL org.opencontainers.image.licenses="MIT"
+LABEL gpu.target="nvidia-cuda-12.1"
+LABEL ray.version="2.53.0"

 WORKDIR /app

-# Install system dependencies for audio processing
+# Install system dependencies in a single layer with cleanup
 USER root
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    ffmpeg \
-    libsndfile1 \
-    git \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    apt-get update && apt-get install -y --no-install-recommends \
+        ffmpeg \
+        libsndfile1 \
    && rm -rf /var/lib/apt/lists/*
+
+# Install uv for fast Python package management (ADR-0014)
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+
+# Switch back to non-root ray user
 USER ray

-# Install Python dependencies for inference
-RUN pip install --no-cache-dir \
-    faster-whisper \
-    openai-whisper \
-    TTS \
-    soundfile \
-    pydub \
-    librosa \
-    torch \
-    torchaudio \
-    fastapi \
-    uvicorn \
-    httpx \
-    pydantic
+# Install Python dependencies with uv cache mount (10-100x faster than pip)
+# Pinned versions for reproducibility
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
+    uv pip install --system \
+        'faster-whisper>=1.0.0,<2.0' \
+        'TTS>=0.22.0,<1.0' \
+        'soundfile>=0.12.0,<1.0' \
+        'pydub>=0.25.0,<1.0' \
+        'librosa>=0.10.0,<1.0' \
+        'torch>=2.0.0,<3.0' \
+        'torchaudio>=2.0.0,<3.0' \
+        'fastapi>=0.100.0,<1.0' \
+        'uvicorn>=0.23.0,<1.0' \
+        'httpx>=0.27.0,<1.0' \
+        'pydantic>=2.0.0,<3.0'

-# Copy Ray Serve Python code
+# Copy application code with proper ownership
 COPY --chown=ray:ray ray-serve/ /app/ray_serve/
-ENV PYTHONPATH=/app
+COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh

-# Copy Ray Serve entrypoint
-COPY --chown=ray:ray dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
-RUN chmod +x /app/ray-entrypoint.sh
+# Environment configuration
+ENV PYTHONPATH=/app \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    CUDA_VISIBLE_DEVICES=0 \
+    RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
+    GPU_RESOURCE="gpu_nvidia" \
+    NUM_GPUS="1"

-# Default environment variables
-ENV CUDA_VISIBLE_DEVICES=0
-ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
-ENV GPU_RESOURCE="gpu_nvidia"
-ENV NUM_GPUS="1"
+# Health check - verify Ray worker can connect
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD ray status --address=localhost:6379 || exit 1

 ENTRYPOINT ["/app/ray-entrypoint.sh"]