build: optimize Dockerfiles for production

- Use BuildKit syntax 1.7 with cache mounts for apt/uv - Switch from pip to uv for 10-100x faster installs (ADR-0014) - Add OCI Image Spec labels for container metadata - Add HEALTHCHECK directives for orchestration - Add .dockerignore to reduce context size - Update Makefile with buildx and lint target - Add retry logic to ray-entrypoint.sh Refs: ADR-0012 (uv), ADR-0014 (Docker best practices)
2026-02-02 07:26:27 -05:00
parent a16ffff73f
commit cb80709d3d
8 changed files with 443 additions and 232 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,44 @@
 # Git
 .git
 .gitignore
 .gitea
 # Documentation
 *.md
 LICENSE
 docs/
 # IDE and editors
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # Python artifacts
 __pycache__/
 *.py[cod]
 *$py.class
 .pytest_cache/
 .venv/
 venv/
 .env
 *.egg-info/
 dist/
 build/
 # OS files
 .DS_Store
 Thumbs.db
 # Build logs
 *.log
 *.tmp
 # Local development
 Makefile
 .goreleaser.yml
 # Don't ignore these (explicitly include)
 !ray-serve/
 !dockerfiles/
--- a/63
+++ b/63
@@ -3,52 +3,73 @@
 REGISTRY := git.daviestechlabs.io/daviestechlabs
 TAG := latest
 PLATFORM := linux/amd64
 # Image names
 IMAGES := ray-worker-nvidia ray-worker-rdna2 ray-worker-strixhalo ray-worker-intel
-.PHONY: all build-all push-all clean help $(addprefix build-,$(IMAGES)) $(addprefix push-,$(IMAGES))
+.PHONY: all build-all push-all clean help lint $(addprefix build-,$(IMAGES)) $(addprefix push-,$(IMAGES))
 help:
 	@echo "KubeRay Images Build System"
 	@echo ""
 	@echo "Usage:"
-	@echo "  make build-all          Build all images"
+	@echo "  make build-all            Build all images"
-	@echo "  make push-all           Push all images to registry"
+	@echo "  make push-all             Push all images to registry"
-	@echo "  make build-nvidia       Build NVIDIA worker image"
+	@echo "  make build-nvidia         Build NVIDIA worker image"
-	@echo "  make build-rdna2        Build AMD RDNA2 worker image"
+	@echo "  make build-rdna2          Build AMD RDNA2 worker image"
-	@echo "  make build-strixhalo    Build AMD Strix Halo worker image"
+	@echo "  make build-strixhalo      Build AMD Strix Halo worker image"
-	@echo "  make build-intel        Build Intel XPU worker image"
+	@echo "  make build-intel          Build Intel XPU worker image"
-	@echo "  make push-nvidia        Push NVIDIA worker image"
+	@echo "  make push-nvidia          Push NVIDIA worker image"
 	@echo "  make lint                 Lint Dockerfiles with hadolint"
 	@echo "  make TAG=v1.0.0 push-all  Push with specific tag"
 	@echo ""
 	@echo "Environment:"
 	@echo "  REGISTRY=$(REGISTRY)"
 	@echo "  TAG=$(TAG)"
 	@echo "  PLATFORM=$(PLATFORM)"
-# Build targets
+# Lint Dockerfiles with hadolint
 lint:
 	@echo "Linting Dockerfiles..."
 	@command -v hadolint >/dev/null 2>&1 || { echo "hadolint not found, skipping..."; exit 0; }
 	hadolint dockerfiles/Dockerfile.ray-worker-nvidia
 	hadolint dockerfiles/Dockerfile.ray-worker-rdna2
 	hadolint dockerfiles/Dockerfile.ray-worker-strixhalo
 	hadolint dockerfiles/Dockerfile.ray-worker-intel
 	@echo "Lint passed!"
 # Build targets using buildx for cache support
 build-nvidia:
-	docker build \
+	docker buildx build \
-		-t $(REGISTRY)/ray-worker-nvidia:$(TAG) \
+		--platform $(PLATFORM) \
-		-f dockerfiles/Dockerfile.ray-worker-nvidia \
+		--tag $(REGISTRY)/ray-worker-nvidia:$(TAG) \
 		--file dockerfiles/Dockerfile.ray-worker-nvidia \
 		--load \
 		.
 build-rdna2:
-	docker build \
+	docker buildx build \
-		-t $(REGISTRY)/ray-worker-rdna2:$(TAG) \
+		--platform $(PLATFORM) \
-		-f dockerfiles/Dockerfile.ray-worker-rdna2 \
+		--tag $(REGISTRY)/ray-worker-rdna2:$(TAG) \
 		--file dockerfiles/Dockerfile.ray-worker-rdna2 \
 		--load \
 		.
 build-strixhalo:
-	docker build \
+	docker buildx build \
-		-t $(REGISTRY)/ray-worker-strixhalo:$(TAG) \
+		--platform $(PLATFORM) \
-		-f dockerfiles/Dockerfile.ray-worker-strixhalo \
+		--tag $(REGISTRY)/ray-worker-strixhalo:$(TAG) \
 		--file dockerfiles/Dockerfile.ray-worker-strixhalo \
 		--load \
 		.
 build-intel:
-	docker build \
+	docker buildx build \
-		-t $(REGISTRY)/ray-worker-intel:$(TAG) \
+		--platform $(PLATFORM) \
-		-f dockerfiles/Dockerfile.ray-worker-intel \
+		--tag $(REGISTRY)/ray-worker-intel:$(TAG) \
 		--file dockerfiles/Dockerfile.ray-worker-intel \
 		--load \
 		.
 build-all: build-nvidia build-rdna2 build-strixhalo build-intel
--- a/README.md
+++ b/README.md
@@ -2,18 +2,29 @@
 GPU-specific Ray worker images for the DaviesTechLabs AI/ML platform.
 ## Features
 - **BuildKit optimized**: Cache mounts for apt and pip speed up rebuilds
 - **OCI compliant**: Standard image labels (`org.opencontainers.image.*`)
 - **Health checks**: Built-in HEALTHCHECK for container orchestration
 - **Non-root execution**: Ray runs as unprivileged `ray` user
 - **Retry logic**: Entrypoint waits for Ray head with exponential backoff
 ## Images
-| Image | GPU Target | Workloads | Registry |
+| Image | GPU Target | Workloads | Base |
-|-------|------------|-----------|----------|
+|-------|------------|-----------|------|
-| `ray-worker-nvidia` | NVIDIA CUDA (RTX 2070) | Whisper STT, XTTS TTS | `git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia` |
+| `ray-worker-nvidia` | NVIDIA CUDA 12.1 (RTX 2070) | Whisper STT, XTTS TTS | `rayproject/ray-ml:2.53.0-py310-cu121` |
-| `ray-worker-rdna2` | AMD ROCm (Radeon 680M) | BGE Embeddings | `git.daviestechlabs.io/daviestechlabs/ray-worker-rdna2` |
+| `ray-worker-rdna2` | AMD ROCm 6.4 (Radeon 680M) | BGE Embeddings | `rocm/pytorch:rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0` |
-| `ray-worker-strixhalo` | AMD ROCm (Strix Halo) | vLLM, BGE | `git.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo` |
+| `ray-worker-strixhalo` | AMD ROCm 7.1 (Strix Halo) | vLLM, BGE | `rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.8.0` |
-| `ray-worker-intel` | Intel XPU (Arc) | BGE Reranker | `git.daviestechlabs.io/daviestechlabs/ray-worker-intel` |
+| `ray-worker-intel` | Intel XPU (Arc) | BGE Reranker | `rayproject/ray-ml:2.53.0-py310` |
 ## Building Locally
 ```bash
 # Lint Dockerfiles (requires hadolint)
 make lint
 # Build all images
 make build-all
@@ -24,8 +35,11 @@ make build-strixhalo
 make build-intel
 # Push to Gitea registry (requires login)
-docker login git.daviestechlabs.io
+make login
 make push-all
 # Release with version tag
 make VERSION=v1.0.0 release
 ```
 ## CI/CD
--- a/dockerfiles/Dockerfile.ray-worker-intel
+++ b/dockerfiles/Dockerfile.ray-worker-intel
@@ -1,77 +1,98 @@
-# Intel GPU Ray Worker for danilo (Intel i915 iGPU)
+# syntax=docker/dockerfile:1.7
-# Used for: Reranker
+# Intel GPU Ray Worker for danilo (Intel Arc / i915 iGPU)
 # Used for: BGE Reranker
 #
-# Build from llm-workflows root:
+# Build:
-#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-intel:latest -f dockerfiles/Dockerfile.ray-worker-intel .
+#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-intel:latest \
-#
+#     -f dockerfiles/Dockerfile.ray-worker-intel .
 # Multi-stage build to ensure Python 3.11.11 matches Ray head node
 FROM rayproject/ray:2.53.0-py311 AS base
-LABEL maintainer="billy-davies-2"
+FROM rayproject/ray:2.53.0-py311
-LABEL description="Ray worker for Intel GPUs (Reranker)"
+
 # OCI Image Spec labels
 LABEL org.opencontainers.image.title="Ray Worker - Intel GPU"
 LABEL org.opencontainers.image.description="Ray Serve worker for Intel GPUs (BGE Reranker)"
 LABEL org.opencontainers.image.vendor="DaviesTechLabs"
 LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
 LABEL org.opencontainers.image.licenses="MIT"
 LABEL gpu.target="intel-xpu"
 LABEL ray.version="2.53.0"
 WORKDIR /app
-# Install system dependencies for Intel GPU support
+# Install system dependencies and Intel GPU runtime
 USER root
-RUN apt-get update && apt-get install -y --no-install-recommends \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    git \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    curl \
    wget \
    gnupg2 \
    && rm -rf /var/lib/apt/lists/*
 # Add Intel oneAPI repository for runtime libraries
 RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor -o /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
    echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/intel-oneapi.list
 # Add Intel compute-runtime repository for Level Zero
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor -o /usr/share/keyrings/intel-graphics-archive-keyring.gpg && \
    echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy client" > /etc/apt/sources.list.d/intel-gpu.list && \
    apt-get update && apt-get install -y --no-install-recommends \
-    intel-oneapi-runtime-opencl \
+        curl \
-    intel-oneapi-runtime-compilers \
+        wget \
-    intel-level-zero-gpu \
+        gnupg2 \
    level-zero \
    && rm -rf /var/lib/apt/lists/*
 # Add Intel oneAPI and GPU compute repositories
 RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
        | gpg --dearmor -o /usr/share/keyrings/intel-oneapi-archive-keyring.gpg \
    && echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
        > /etc/apt/sources.list.d/intel-oneapi.list \
    && wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
        | gpg --dearmor -o /usr/share/keyrings/intel-graphics-archive-keyring.gpg \
    && echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy client" \
        > /etc/apt/sources.list.d/intel-gpu.list
 # Install Intel runtime libraries
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt-get update && apt-get install -y --no-install-recommends \
        intel-oneapi-runtime-opencl \
        intel-oneapi-runtime-compilers \
        intel-level-zero-gpu \
        level-zero \
    && rm -rf /var/lib/apt/lists/*
 # Install uv for fast Python package management (ADR-0014)
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 USER ray
 # Ensure Ray CLI is in PATH
 ENV PATH="/home/ray/.local/bin:${PATH}"
-# Install Intel Extension for PyTorch (IPEX) for Python 3.11
+# Install Intel Extension for PyTorch (IPEX) with XPU support (uv is 10-100x faster)
-# This provides XPU support for Intel GPUs
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-RUN pip install --no-cache-dir \
+    uv pip install --system \
-    torch==2.5.1 \
+        torch==2.5.1 \
-    intel-extension-for-pytorch==2.5.10+xpu \
+        intel-extension-for-pytorch==2.5.10+xpu \
-    --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+        --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-# Install Ray Serve and AI inference dependencies
+# Install inference dependencies
-RUN pip install --no-cache-dir \
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-    sentence-transformers \
+    uv pip install --system \
-    FlagEmbedding \
+        'sentence-transformers>=2.3.0,<3.0' \
-    fastapi \
+        'FlagEmbedding>=1.2.0,<2.0' \
-    uvicorn \
+        'transformers>=4.35.0,<5.0' \
-    httpx \
+        'huggingface_hub>=0.20.0,<1.0' \
-    pydantic \
+        'fastapi>=0.100.0,<1.0' \
-    transformers \
+        'uvicorn>=0.23.0,<1.0' \
-    huggingface_hub
+        'httpx>=0.27.0,<1.0' \
        'pydantic>=2.0.0,<3.0'
-# Copy Ray Serve Python code
+# Copy application code
-COPY ray-serve/ /app/ray_serve/
+COPY --chown=ray:ray ray-serve/ /app/ray_serve/
-ENV PYTHONPATH=/app
+COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
-# Copy Ray Serve entrypoint
+# Environment configuration
-COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
+ENV PYTHONPATH=/app \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
    GPU_RESOURCE="gpu_intel" \
    NUM_GPUS="1" \
    # Intel XPU settings
    ZE_AFFINITY_MASK=0 \
    SYCL_DEVICE_FILTER="level_zero:gpu"
-# Default environment variables
+# Health check
-ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
-ENV GPU_RESOURCE="gpu_intel"
+    CMD ray status --address=localhost:6379 || exit 1
 ENV NUM_GPUS="1"
 # Intel XPU settings
 ENV ZE_AFFINITY_MASK=0
 ENV SYCL_DEVICE_FILTER=level_zero:gpu
 ENTRYPOINT ["/app/ray-entrypoint.sh"]
--- a/dockerfiles/Dockerfile.ray-worker-nvidia
+++ b/dockerfiles/Dockerfile.ray-worker-nvidia
@@ -1,53 +1,70 @@
 # syntax=docker/dockerfile:1.7
 # NVIDIA GPU Ray Worker for elminster (RTX 2070)
-# Used for: Whisper STT, TTS
+# Used for: Whisper STT, XTTS Text-to-Speech
 #
 # Build from llm-workflows root:
 #   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest -f dockerfiles/Dockerfile.ray-worker-nvidia .
 #
 # Build:
 #   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest \
 #     -f dockerfiles/Dockerfile.ray-worker-nvidia .
 FROM rayproject/ray:2.53.0-py311-cu121
-LABEL maintainer="billy-davies-2"
+# OCI Image Spec labels
-LABEL description="Ray worker for NVIDIA GPUs (Whisper, TTS)"
+LABEL org.opencontainers.image.title="Ray Worker - NVIDIA GPU"
-LABEL gpu.target="nvidia-cuda"
+LABEL org.opencontainers.image.description="Ray Serve worker for NVIDIA GPUs (Whisper STT, XTTS TTS)"
 LABEL org.opencontainers.image.vendor="DaviesTechLabs"
 LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
 LABEL org.opencontainers.image.licenses="MIT"
 LABEL gpu.target="nvidia-cuda-12.1"
 LABEL ray.version="2.53.0"
 WORKDIR /app
-# Install system dependencies for audio processing
+# Install system dependencies in a single layer with cleanup
 USER root
-RUN apt-get update && apt-get install -y --no-install-recommends \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    ffmpeg \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    libsndfile1 \
+    apt-get update && apt-get install -y --no-install-recommends \
-    git \
+        ffmpeg \
        libsndfile1 \
    && rm -rf /var/lib/apt/lists/*
 # Install uv for fast Python package management (ADR-0014)
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 # Switch back to non-root ray user
 USER ray
-# Install Python dependencies for inference
+# Install Python dependencies with uv cache mount (10-100x faster than pip)
-RUN pip install --no-cache-dir \
+# Pinned versions for reproducibility
-    faster-whisper \
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-    openai-whisper \
+    uv pip install --system \
-    TTS \
+        'faster-whisper>=1.0.0,<2.0' \
-    soundfile \
+        'TTS>=0.22.0,<1.0' \
-    pydub \
+        'soundfile>=0.12.0,<1.0' \
-    librosa \
+        'pydub>=0.25.0,<1.0' \
-    torch \
+        'librosa>=0.10.0,<1.0' \
-    torchaudio \
+        'torch>=2.0.0,<3.0' \
-    fastapi \
+        'torchaudio>=2.0.0,<3.0' \
-    uvicorn \
+        'fastapi>=0.100.0,<1.0' \
-    httpx \
+        'uvicorn>=0.23.0,<1.0' \
-    pydantic
+        'httpx>=0.27.0,<1.0' \
        'pydantic>=2.0.0,<3.0'
-# Copy Ray Serve Python code
+# Copy application code with proper ownership
 COPY --chown=ray:ray ray-serve/ /app/ray_serve/
-ENV PYTHONPATH=/app
+COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
-# Copy Ray Serve entrypoint
+# Environment configuration
-COPY --chown=ray:ray dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
+ENV PYTHONPATH=/app \
-RUN chmod +x /app/ray-entrypoint.sh
+    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    CUDA_VISIBLE_DEVICES=0 \
    RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
    GPU_RESOURCE="gpu_nvidia" \
    NUM_GPUS="1"
-# Default environment variables
+# Health check - verify Ray worker can connect
-ENV CUDA_VISIBLE_DEVICES=0
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
-ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
+    CMD ray status --address=localhost:6379 || exit 1
 ENV GPU_RESOURCE="gpu_nvidia"
 ENV NUM_GPUS="1"
 ENTRYPOINT ["/app/ray-entrypoint.sh"]
--- a/dockerfiles/Dockerfile.ray-worker-rdna2
+++ b/dockerfiles/Dockerfile.ray-worker-rdna2
@@ -1,65 +1,94 @@
-# Ray Worker for AMD RDNA 2 (gfx1035 - Radeon 680M)
+# syntax=docker/dockerfile:1.7
-# Pre-bakes all dependencies for fast startup
+# AMD RDNA 2 Ray Worker for drizzt (Radeon 680M - gfx1035)
 # Used for: BGE Embeddings
 #
-# Build from llm-workflows root:
+# Build:
-#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-rdna2:latest -f dockerfiles/Dockerfile.ray-worker-rdna2 .
+#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-rdna2:latest \
 #     -f dockerfiles/Dockerfile.ray-worker-rdna2 .
 #
-# Multi-stage build to ensure Python 3.11.11 matches Ray head node
+# Multi-stage build: Extract ROCm from vendor image, use Ray base for Python 3.11
-# Stage 1: Extract ROCm libraries from vendor image
+# Stage 1: ROCm libraries from AMD vendor image
-FROM docker.io/rocm/pytorch:rocm6.4.4_ubuntu22.04_py3.10_pytorch_release_2.7.1 AS rocm-libs
+FROM docker.io/rocm/pytorch:rocm6.4.4_ubuntu22.04_py3.10_pytorch_release_2.7.1 AS rocm-source
-# Stage 2: Build on Ray base with Python 3.11
+# Stage 2: Production image
-FROM rayproject/ray:2.53.0-py311 AS base
+FROM rayproject/ray:2.53.0-py311 AS production
-# Copy ROCm stack from vendor image
+# OCI Image Spec labels
-COPY --from=rocm-libs /opt/rocm /opt/rocm
+LABEL org.opencontainers.image.title="Ray Worker - AMD RDNA 2"
-
+LABEL org.opencontainers.image.description="Ray Serve worker for AMD RDNA 2 GPUs (BGE Embeddings)"
-# Set up ROCm environment
+LABEL org.opencontainers.image.vendor="DaviesTechLabs"
-ENV ROCM_HOME=/opt/rocm
+LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
-ENV PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}"
+LABEL org.opencontainers.image.licenses="MIT"
-ENV LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}"
+LABEL gpu.target="amd-rocm-6.4-gfx1035"
-ENV HSA_PATH="${ROCM_HOME}/hsa"
+LABEL ray.version="2.53.0"
 ENV HIP_PATH="${ROCM_HOME}/hip"
 # ROCm environment for RDNA 2 (gfx1035)
 ENV HIP_VISIBLE_DEVICES=0 \
    HSA_ENABLE_SDMA=0 \
    PYTORCH_HIP_ALLOC_CONF=expandable_segments:True \
    PYTHONPATH=/app
 WORKDIR /app
-# Install ROCm system dependencies
+# Copy ROCm stack from vendor image (single COPY layer)
 COPY --from=rocm-source /opt/rocm /opt/rocm
 # ROCm environment variables
 ENV ROCM_HOME=/opt/rocm \
    PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}" \
    LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}" \
    HSA_PATH="${ROCM_HOME}/hsa" \
    HIP_PATH="${ROCM_HOME}/hip" \
    # RDNA 2 specific settings
    HIP_VISIBLE_DEVICES=0 \
    HSA_ENABLE_SDMA=0 \
    PYTORCH_HIP_ALLOC_CONF=expandable_segments:True
 # Install system dependencies
 USER root
-RUN apt-get update && apt-get install -y --no-install-recommends \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    libelf1 \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    libnuma1 \
+    apt-get update && apt-get install -y --no-install-recommends \
-    libdrm2 \
+        libelf1 \
-    libdrm-amdgpu1 \
+        libnuma1 \
-    kmod \
+        libdrm2 \
        libdrm-amdgpu1 \
        kmod \
    && rm -rf /var/lib/apt/lists/*
 # Install uv for fast Python package management (ADR-0014)
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 USER ray
-# Install PyTorch ROCm wheels compatible with Python 3.11 and ROCm 6.2
+# Install PyTorch with ROCm 6.2 wheels for Python 3.11 (uv is 10-100x faster)
-RUN pip install --no-cache-dir \
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-    torch==2.5.1 torchvision torchaudio \
+    uv pip install --system \
-    --index-url https://download.pytorch.org/whl/rocm6.2
+        torch==2.5.1 torchvision torchaudio \
        --index-url https://download.pytorch.org/whl/rocm6.2
-# Install Ray Serve and AI inference dependencies
+# Install inference dependencies
-RUN pip install --no-cache-dir \
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-    transformers \
+    uv pip install --system \
-    accelerate \
+        'transformers>=4.35.0,<5.0' \
-    sentence-transformers \
+        'accelerate>=0.25.0,<1.0' \
-    httpx \
+        'sentence-transformers>=2.3.0,<3.0' \
-    numpy \
+        'httpx>=0.27.0,<1.0' \
-    scipy
+        'numpy>=1.26.0,<2.0' \
        'scipy>=1.11.0,<2.0'
 # Pre-download embedding model for faster cold starts
 RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-large-en-v1.5')"
 # Copy application code
-COPY ray-serve/ /app/ray_serve/
+COPY --chown=ray:ray ray-serve/ /app/ray_serve/
-COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
+COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
 # Environment configuration
 ENV PYTHONPATH=/app \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
    GPU_RESOURCE="gpu_amd_rdna2" \
    NUM_GPUS="1"
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD ray status --address=localhost:6379 || exit 1
 ENTRYPOINT ["/app/ray-entrypoint.sh"]
--- a/dockerfiles/Dockerfile.ray-worker-strixhalo
+++ b/dockerfiles/Dockerfile.ray-worker-strixhalo
@@ -1,72 +1,100 @@
-# Ray Worker for AMD Strix Halo (gfx1151 / RDNA 3.5)
+# syntax=docker/dockerfile:1.7
-# Pre-bakes all dependencies for fast startup
+# AMD Strix Halo Ray Worker for khelben (gfx1151 / RDNA 3.5)
 # Used for: vLLM (Llama 3.1 70B)
 #
-# Build from llm-workflows root:
+# Build:
-#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:latest -f dockerfiles/Dockerfile.ray-worker-strixhalo .
+#   docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:latest \
 #     -f dockerfiles/Dockerfile.ray-worker-strixhalo .
 #
-# Multi-stage build to ensure Python 3.11.11 matches Ray head node
+# Multi-stage build: Extract ROCm 7.1 from vendor image, use Ray base for Python 3.11
 # Note: Uses TheRock gfx110X wheels due to ROCm/ROCm#5853 segfault issue
-# Stage 1: Extract ROCm 7.1 libraries from vendor image
+# Stage 1: ROCm 7.1 libraries from AMD vendor image
-FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS rocm-libs
+FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS rocm-source
-# Stage 2: Build on Ray base with Python 3.11
+# Stage 2: Production image
-FROM rayproject/ray:2.53.0-py311 AS base
+FROM rayproject/ray:2.53.0-py311 AS production
-# Copy ROCm stack from vendor image
+# OCI Image Spec labels
-COPY --from=rocm-libs /opt/rocm /opt/rocm
+LABEL org.opencontainers.image.title="Ray Worker - AMD Strix Halo"
-
+LABEL org.opencontainers.image.description="Ray Serve worker for AMD Strix Halo (vLLM LLM inference)"
-# Set up ROCm environment
+LABEL org.opencontainers.image.vendor="DaviesTechLabs"
-ENV ROCM_HOME=/opt/rocm
+LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
-ENV PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}"
+LABEL org.opencontainers.image.licenses="MIT"
-ENV LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}"
+LABEL gpu.target="amd-rocm-7.1-gfx1151"
-ENV HSA_PATH="${ROCM_HOME}/hsa"
+LABEL ray.version="2.53.0"
 ENV HIP_PATH="${ROCM_HOME}/hip"
 # ROCm environment for AMD Strix Halo (gfx1151 / RDNA 3.5)
 ENV HIP_VISIBLE_DEVICES=0
 ENV HSA_ENABLE_SDMA=0
 ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512
 ENV HSA_OVERRIDE_GFX_VERSION=11.0.0
 ENV ROCM_TARGET_LST=gfx1151,gfx1100
 ENV PYTHONPATH=/app
 WORKDIR /app
-# Install ROCm system dependencies
+# Copy ROCm stack from vendor image
 COPY --from=rocm-source /opt/rocm /opt/rocm
 # ROCm environment variables
 ENV ROCM_HOME=/opt/rocm \
    PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}" \
    LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}" \
    HSA_PATH="${ROCM_HOME}/hsa" \
    HIP_PATH="${ROCM_HOME}/hip" \
    # Strix Halo (gfx1151) specific settings
    HIP_VISIBLE_DEVICES=0 \
    HSA_ENABLE_SDMA=0 \
    PYTORCH_HIP_ALLOC_CONF="expandable_segments:True,max_split_size_mb:512" \
    HSA_OVERRIDE_GFX_VERSION="11.0.0" \
    ROCM_TARGET_LST="gfx1151,gfx1100"
 # Install system dependencies
 USER root
-RUN apt-get update && apt-get install -y --no-install-recommends \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    libelf1 \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    libnuma1 \
+    apt-get update && apt-get install -y --no-install-recommends \
-    libdrm2 \
+        libelf1 \
-    libdrm-amdgpu1 \
+        libnuma1 \
-    kmod \
+        libdrm2 \
        libdrm-amdgpu1 \
        kmod \
    && rm -rf /var/lib/apt/lists/*
 # Install uv for fast Python package management (ADR-0014)
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 USER ray
 # WORKAROUND: ROCm/ROCm#5853 - Standard PyTorch ROCm wheels cause segfault
 # in libhsa-runtime64.so during VRAM allocation on gfx1151 (Strix Halo).
-# TheRock gfx110X-all packages provide Python 3.11 compatible wheels.
+# TheRock gfx110X-all packages provide compatible Python 3.11 wheels.
-RUN pip install --no-cache-dir \
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-    --index-url https://rocm.nightlies.amd.com/v2/gfx110X-all/ \
+    uv pip install --system \
-    torch torchaudio torchvision
+        --index-url https://rocm.nightlies.amd.com/v2/gfx110X-all/ \
        torch torchaudio torchvision
-# Install Ray Serve and AI inference dependencies
+# Install vLLM and inference dependencies (uv is 10-100x faster than pip)
-RUN pip install --no-cache-dir \
+RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
-    vllm \
+    uv pip install --system \
-    transformers \
+        'vllm>=0.5.0' \
-    accelerate \
+        'transformers>=4.35.0,<5.0' \
-    sentence-transformers \
+        'accelerate>=0.25.0,<1.0' \
-    httpx \
+        'sentence-transformers>=2.3.0,<3.0' \
-    numpy \
+        'httpx>=0.27.0,<1.0' \
-    scipy
+        'numpy>=1.26.0,<2.0' \
        'scipy>=1.11.0,<2.0'
-# Pre-download common models for faster cold starts
+# Pre-download common models for faster cold starts (optional, increases image size)
-RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-large-en-v1.5')" || true
+# RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-large-en-v1.5')"
-# Copy Ray Serve Python code
+# Copy application code
-COPY ray-serve/ /app/ray_serve/
+COPY --chown=ray:ray ray-serve/ /app/ray_serve/
 COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
-# Ray worker entrypoint
+# Environment configuration
-COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
+ENV PYTHONPATH=/app \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
    GPU_RESOURCE="gpu_amd_strixhalo" \
    NUM_GPUS="1"
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
    CMD ray status --address=localhost:6379 || exit 1
 ENTRYPOINT ["/app/ray-entrypoint.sh"]
--- a/dockerfiles/ray-entrypoint.sh
+++ b/dockerfiles/ray-entrypoint.sh
@@ -1,27 +1,64 @@
 #!/bin/bash
 # Ray Worker Entrypoint
-# Connects to Ray head node and registers custom resources
+# Connects to Ray head node and registers custom GPU resources
 #
 # Environment variables:
 #   RAY_HEAD_SVC    - Ray head service name (default: ray-head-svc)
 #   GPU_RESOURCE    - Custom GPU resource name (default: gpu_amd)
 #   NUM_GPUS        - Number of GPUs to register (default: 1)
 #   RAY_OBJECT_STORE_MEMORY - Object store memory limit (optional)
-set -e
+set -euo pipefail
-# Ensure Ray is in PATH (works across all base images)
+# Ensure Ray CLI is in PATH (works across all base images)
 export PATH="/home/ray/.local/bin:/home/ray/anaconda3/bin:${PATH}"
-# Get Ray head address from environment or default
+# Configuration with defaults
 RAY_HEAD_ADDRESS="${RAY_HEAD_SVC:-ray-head-svc}:6379"
 # Get custom resources from environment
 GPU_RESOURCE="${GPU_RESOURCE:-gpu_amd}"
 NUM_GPUS="${NUM_GPUS:-1}"
-echo "Starting Ray worker..."
+# Log startup info
-echo "  Head address: $RAY_HEAD_ADDRESS"
+echo "============================================="
-echo "  GPU resource: $GPU_RESOURCE"
+echo "Ray Worker Starting"
-echo "  Num GPUs: $NUM_GPUS"
+echo "============================================="
 echo "  Head address:  ${RAY_HEAD_ADDRESS}"
 echo "  GPU resource:  ${GPU_RESOURCE}"
 echo "  Num GPUs:      ${NUM_GPUS}"
 echo "  Python:        $(python3 --version)"
 echo "  Ray version:   $(ray --version)"
 echo "============================================="
-# Start Ray worker with custom resources
+# Wait for Ray head to be available (with retry)
-exec ray start \
+MAX_RETRIES=30
-    --address="$RAY_HEAD_ADDRESS" \
+RETRY_INTERVAL=5
-    --num-gpus="$NUM_GPUS" \
+retry_count=0
-    --resources="{\"$GPU_RESOURCE\": 1}" \
+
 echo "Waiting for Ray head node..."
 until ray health-check --address="${RAY_HEAD_ADDRESS}" 2>/dev/null; do
    retry_count=$((retry_count + 1))
    if [ $retry_count -ge $MAX_RETRIES ]; then
        echo "ERROR: Ray head not available after ${MAX_RETRIES} attempts"
        exit 1
    fi
    echo "  Attempt ${retry_count}/${MAX_RETRIES} - retrying in ${RETRY_INTERVAL}s..."
    sleep "${RETRY_INTERVAL}"
 done
 echo "Ray head is ready!"
 # Build ray start command with optional args
 RAY_START_ARGS=(
    --address="${RAY_HEAD_ADDRESS}"
    --num-gpus="${NUM_GPUS}"
    --resources="{\"${GPU_RESOURCE}\": 1}"
    --block
 )
 # Add object store memory limit if specified
 if [ -n "${RAY_OBJECT_STORE_MEMORY:-}" ]; then
    RAY_START_ARGS+=(--object-store-memory="${RAY_OBJECT_STORE_MEMORY}")
 fi
 # Start Ray worker
 echo "Starting Ray worker with resources: {\"${GPU_RESOURCE}\": 1}"
 exec ray start "${RAY_START_ARGS[@]}"