build: optimize Dockerfiles for production

- Use BuildKit syntax 1.7 with cache mounts for apt/uv - Switch from pip to uv for 10-100x faster installs (ADR-0014) - Add OCI Image Spec labels for container metadata - Add HEALTHCHECK directives for orchestration - Add .dockerignore to reduce context size - Update Makefile with buildx and lint target - Add retry logic to ray-entrypoint.sh Refs: ADR-0012 (uv), ADR-0014 (Docker best practices)
2026-02-02 07:26:27 -05:00
parent a16ffff73f
commit cb80709d3d
8 changed files with 443 additions and 232 deletions
--- a/dockerfiles/Dockerfile.ray-worker-intel
+++ b/dockerfiles/Dockerfile.ray-worker-intel
@@ -1,77 +1,98 @@
-# Intel GPU Ray Worker for danilo (Intel i915 iGPU)
-# Used for: Reranker
+# syntax=docker/dockerfile:1.7
+# Intel GPU Ray Worker for danilo (Intel Arc / i915 iGPU)
+# Used for: BGE Reranker
 #
-# Build from llm-workflows root:
-#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-intel:latest -f dockerfiles/Dockerfile.ray-worker-intel .
-#
-# Multi-stage build to ensure Python 3.11.11 matches Ray head node
-FROM rayproject/ray:2.53.0-py311 AS base
+# Build:
+#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-intel:latest \
+#     -f dockerfiles/Dockerfile.ray-worker-intel .

-LABEL maintainer="billy-davies-2"
-LABEL description="Ray worker for Intel GPUs (Reranker)"
+FROM rayproject/ray:2.53.0-py311
+
+# OCI Image Spec labels
+LABEL org.opencontainers.image.title="Ray Worker - Intel GPU"
+LABEL org.opencontainers.image.description="Ray Serve worker for Intel GPUs (BGE Reranker)"
+LABEL org.opencontainers.image.vendor="DaviesTechLabs"
+LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
+LABEL org.opencontainers.image.licenses="MIT"
 LABEL gpu.target="intel-xpu"
+LABEL ray.version="2.53.0"

 WORKDIR /app

-# Install system dependencies for Intel GPU support
+# Install system dependencies and Intel GPU runtime
 USER root
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    git \
-    curl \
-    wget \
-    gnupg2 \
-    && rm -rf /var/lib/apt/lists/*
-
-# Add Intel oneAPI repository for runtime libraries
-RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor -o /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
-    echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/intel-oneapi.list
-
-# Add Intel compute-runtime repository for Level Zero
-RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor -o /usr/share/keyrings/intel-graphics-archive-keyring.gpg && \
-    echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy client" > /etc/apt/sources.list.d/intel-gpu.list && \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt-get update && apt-get install -y --no-install-recommends \
-    intel-oneapi-runtime-opencl \
-    intel-oneapi-runtime-compilers \
-    intel-level-zero-gpu \
-    level-zero \
+        curl \
+        wget \
+        gnupg2 \
    && rm -rf /var/lib/apt/lists/*

+# Add Intel oneAPI and GPU compute repositories
+RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+        | gpg --dearmor -o /usr/share/keyrings/intel-oneapi-archive-keyring.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
+        > /etc/apt/sources.list.d/intel-oneapi.list \
+    && wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
+        | gpg --dearmor -o /usr/share/keyrings/intel-graphics-archive-keyring.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy client" \
+        > /etc/apt/sources.list.d/intel-gpu.list
+
+# Install Intel runtime libraries
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    apt-get update && apt-get install -y --no-install-recommends \
+        intel-oneapi-runtime-opencl \
+        intel-oneapi-runtime-compilers \
+        intel-level-zero-gpu \
+        level-zero \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv for fast Python package management (ADR-0014)
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+
 USER ray

 # Ensure Ray CLI is in PATH
 ENV PATH="/home/ray/.local/bin:${PATH}"

-# Install Intel Extension for PyTorch (IPEX) for Python 3.11
-# This provides XPU support for Intel GPUs
-RUN pip install --no-cache-dir \
-    torch==2.5.1 \
-    intel-extension-for-pytorch==2.5.10+xpu \
-    --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+# Install Intel Extension for PyTorch (IPEX) with XPU support (uv is 10-100x faster)
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
+    uv pip install --system \
+        torch==2.5.1 \
+        intel-extension-for-pytorch==2.5.10+xpu \
+        --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

-# Install Ray Serve and AI inference dependencies
-RUN pip install --no-cache-dir \
-    sentence-transformers \
-    FlagEmbedding \
-    fastapi \
-    uvicorn \
-    httpx \
-    pydantic \
-    transformers \
-    huggingface_hub
+# Install inference dependencies
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
+    uv pip install --system \
+        'sentence-transformers>=2.3.0,<3.0' \
+        'FlagEmbedding>=1.2.0,<2.0' \
+        'transformers>=4.35.0,<5.0' \
+        'huggingface_hub>=0.20.0,<1.0' \
+        'fastapi>=0.100.0,<1.0' \
+        'uvicorn>=0.23.0,<1.0' \
+        'httpx>=0.27.0,<1.0' \
+        'pydantic>=2.0.0,<3.0'

-# Copy Ray Serve Python code
-COPY ray-serve/ /app/ray_serve/
-ENV PYTHONPATH=/app
+# Copy application code
+COPY --chown=ray:ray ray-serve/ /app/ray_serve/
+COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh

-# Copy Ray Serve entrypoint
-COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
+# Environment configuration
+ENV PYTHONPATH=/app \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
+    GPU_RESOURCE="gpu_intel" \
+    NUM_GPUS="1" \
+    # Intel XPU settings
+    ZE_AFFINITY_MASK=0 \
+    SYCL_DEVICE_FILTER="level_zero:gpu"

-# Default environment variables
-ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
-ENV GPU_RESOURCE="gpu_intel"
-ENV NUM_GPUS="1"
-# Intel XPU settings
-ENV ZE_AFFINITY_MASK=0
-ENV SYCL_DEVICE_FILTER=level_zero:gpu
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD ray status --address=localhost:6379 || exit 1

 ENTRYPOINT ["/app/ray-entrypoint.sh"]