build: optimize Dockerfiles for production
Some checks failed
Some checks failed
- Use BuildKit syntax 1.7 with cache mounts for apt/uv - Switch from pip to uv for 10-100x faster installs (ADR-0014) - Add OCI Image Spec labels for container metadata - Add HEALTHCHECK directives for orchestration - Add .dockerignore to reduce context size - Update Makefile with buildx and lint target - Add retry logic to ray-entrypoint.sh Refs: ADR-0012 (uv), ADR-0014 (Docker best practices)
This commit is contained in:
44
.dockerignore
Normal file
44
.dockerignore
Normal file
@@ -0,0 +1,44 @@
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
.gitea
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
LICENSE
|
||||
docs/
|
||||
|
||||
# IDE and editors
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Python artifacts
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
.pytest_cache/
|
||||
.venv/
|
||||
venv/
|
||||
.env
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Build logs
|
||||
*.log
|
||||
*.tmp
|
||||
|
||||
# Local development
|
||||
Makefile
|
||||
.goreleaser.yml
|
||||
|
||||
# Don't ignore these (explicitly include)
|
||||
!ray-serve/
|
||||
!dockerfiles/
|
||||
63
Makefile
63
Makefile
@@ -3,52 +3,73 @@
|
||||
|
||||
REGISTRY := git.daviestechlabs.io/daviestechlabs
|
||||
TAG := latest
|
||||
PLATFORM := linux/amd64
|
||||
|
||||
# Image names
|
||||
IMAGES := ray-worker-nvidia ray-worker-rdna2 ray-worker-strixhalo ray-worker-intel
|
||||
|
||||
.PHONY: all build-all push-all clean help $(addprefix build-,$(IMAGES)) $(addprefix push-,$(IMAGES))
|
||||
.PHONY: all build-all push-all clean help lint $(addprefix build-,$(IMAGES)) $(addprefix push-,$(IMAGES))
|
||||
|
||||
help:
|
||||
@echo "KubeRay Images Build System"
|
||||
@echo ""
|
||||
@echo "Usage:"
|
||||
@echo " make build-all Build all images"
|
||||
@echo " make push-all Push all images to registry"
|
||||
@echo " make build-nvidia Build NVIDIA worker image"
|
||||
@echo " make build-rdna2 Build AMD RDNA2 worker image"
|
||||
@echo " make build-strixhalo Build AMD Strix Halo worker image"
|
||||
@echo " make build-intel Build Intel XPU worker image"
|
||||
@echo " make push-nvidia Push NVIDIA worker image"
|
||||
@echo " make build-all Build all images"
|
||||
@echo " make push-all Push all images to registry"
|
||||
@echo " make build-nvidia Build NVIDIA worker image"
|
||||
@echo " make build-rdna2 Build AMD RDNA2 worker image"
|
||||
@echo " make build-strixhalo Build AMD Strix Halo worker image"
|
||||
@echo " make build-intel Build Intel XPU worker image"
|
||||
@echo " make push-nvidia Push NVIDIA worker image"
|
||||
@echo " make lint Lint Dockerfiles with hadolint"
|
||||
@echo " make TAG=v1.0.0 push-all Push with specific tag"
|
||||
@echo ""
|
||||
@echo "Environment:"
|
||||
@echo " REGISTRY=$(REGISTRY)"
|
||||
@echo " TAG=$(TAG)"
|
||||
@echo " PLATFORM=$(PLATFORM)"
|
||||
|
||||
# Build targets
|
||||
# Lint Dockerfiles with hadolint
|
||||
lint:
|
||||
@echo "Linting Dockerfiles..."
|
||||
@command -v hadolint >/dev/null 2>&1 || { echo "hadolint not found, skipping..."; exit 0; }
|
||||
hadolint dockerfiles/Dockerfile.ray-worker-nvidia
|
||||
hadolint dockerfiles/Dockerfile.ray-worker-rdna2
|
||||
hadolint dockerfiles/Dockerfile.ray-worker-strixhalo
|
||||
hadolint dockerfiles/Dockerfile.ray-worker-intel
|
||||
@echo "Lint passed!"
|
||||
|
||||
# Build targets using buildx for cache support
|
||||
build-nvidia:
|
||||
docker build \
|
||||
-t $(REGISTRY)/ray-worker-nvidia:$(TAG) \
|
||||
-f dockerfiles/Dockerfile.ray-worker-nvidia \
|
||||
docker buildx build \
|
||||
--platform $(PLATFORM) \
|
||||
--tag $(REGISTRY)/ray-worker-nvidia:$(TAG) \
|
||||
--file dockerfiles/Dockerfile.ray-worker-nvidia \
|
||||
--load \
|
||||
.
|
||||
|
||||
build-rdna2:
|
||||
docker build \
|
||||
-t $(REGISTRY)/ray-worker-rdna2:$(TAG) \
|
||||
-f dockerfiles/Dockerfile.ray-worker-rdna2 \
|
||||
docker buildx build \
|
||||
--platform $(PLATFORM) \
|
||||
--tag $(REGISTRY)/ray-worker-rdna2:$(TAG) \
|
||||
--file dockerfiles/Dockerfile.ray-worker-rdna2 \
|
||||
--load \
|
||||
.
|
||||
|
||||
build-strixhalo:
|
||||
docker build \
|
||||
-t $(REGISTRY)/ray-worker-strixhalo:$(TAG) \
|
||||
-f dockerfiles/Dockerfile.ray-worker-strixhalo \
|
||||
docker buildx build \
|
||||
--platform $(PLATFORM) \
|
||||
--tag $(REGISTRY)/ray-worker-strixhalo:$(TAG) \
|
||||
--file dockerfiles/Dockerfile.ray-worker-strixhalo \
|
||||
--load \
|
||||
.
|
||||
|
||||
build-intel:
|
||||
docker build \
|
||||
-t $(REGISTRY)/ray-worker-intel:$(TAG) \
|
||||
-f dockerfiles/Dockerfile.ray-worker-intel \
|
||||
docker buildx build \
|
||||
--platform $(PLATFORM) \
|
||||
--tag $(REGISTRY)/ray-worker-intel:$(TAG) \
|
||||
--file dockerfiles/Dockerfile.ray-worker-intel \
|
||||
--load \
|
||||
.
|
||||
|
||||
build-all: build-nvidia build-rdna2 build-strixhalo build-intel
|
||||
|
||||
28
README.md
28
README.md
@@ -2,18 +2,29 @@
|
||||
|
||||
GPU-specific Ray worker images for the DaviesTechLabs AI/ML platform.
|
||||
|
||||
## Features
|
||||
|
||||
- **BuildKit optimized**: Cache mounts for apt and pip speed up rebuilds
|
||||
- **OCI compliant**: Standard image labels (`org.opencontainers.image.*`)
|
||||
- **Health checks**: Built-in HEALTHCHECK for container orchestration
|
||||
- **Non-root execution**: Ray runs as unprivileged `ray` user
|
||||
- **Retry logic**: Entrypoint waits for Ray head with exponential backoff
|
||||
|
||||
## Images
|
||||
|
||||
| Image | GPU Target | Workloads | Registry |
|
||||
|-------|------------|-----------|----------|
|
||||
| `ray-worker-nvidia` | NVIDIA CUDA (RTX 2070) | Whisper STT, XTTS TTS | `git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia` |
|
||||
| `ray-worker-rdna2` | AMD ROCm (Radeon 680M) | BGE Embeddings | `git.daviestechlabs.io/daviestechlabs/ray-worker-rdna2` |
|
||||
| `ray-worker-strixhalo` | AMD ROCm (Strix Halo) | vLLM, BGE | `git.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo` |
|
||||
| `ray-worker-intel` | Intel XPU (Arc) | BGE Reranker | `git.daviestechlabs.io/daviestechlabs/ray-worker-intel` |
|
||||
| Image | GPU Target | Workloads | Base |
|
||||
|-------|------------|-----------|------|
|
||||
| `ray-worker-nvidia` | NVIDIA CUDA 12.1 (RTX 2070) | Whisper STT, XTTS TTS | `rayproject/ray-ml:2.53.0-py310-cu121` |
|
||||
| `ray-worker-rdna2` | AMD ROCm 6.4 (Radeon 680M) | BGE Embeddings | `rocm/pytorch:rocm6.4_ubuntu22.04_py3.10_pytorch_release_2.6.0` |
|
||||
| `ray-worker-strixhalo` | AMD ROCm 7.1 (Strix Halo) | vLLM, BGE | `rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.8.0` |
|
||||
| `ray-worker-intel` | Intel XPU (Arc) | BGE Reranker | `rayproject/ray-ml:2.53.0-py310` |
|
||||
|
||||
## Building Locally
|
||||
|
||||
```bash
|
||||
# Lint Dockerfiles (requires hadolint)
|
||||
make lint
|
||||
|
||||
# Build all images
|
||||
make build-all
|
||||
|
||||
@@ -24,8 +35,11 @@ make build-strixhalo
|
||||
make build-intel
|
||||
|
||||
# Push to Gitea registry (requires login)
|
||||
docker login git.daviestechlabs.io
|
||||
make login
|
||||
make push-all
|
||||
|
||||
# Release with version tag
|
||||
make VERSION=v1.0.0 release
|
||||
```
|
||||
|
||||
## CI/CD
|
||||
|
||||
@@ -1,77 +1,98 @@
|
||||
# Intel GPU Ray Worker for danilo (Intel i915 iGPU)
|
||||
# Used for: Reranker
|
||||
# syntax=docker/dockerfile:1.7
|
||||
# Intel GPU Ray Worker for danilo (Intel Arc / i915 iGPU)
|
||||
# Used for: BGE Reranker
|
||||
#
|
||||
# Build from llm-workflows root:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-intel:latest -f dockerfiles/Dockerfile.ray-worker-intel .
|
||||
#
|
||||
# Multi-stage build to ensure Python 3.11.11 matches Ray head node
|
||||
FROM rayproject/ray:2.53.0-py311 AS base
|
||||
# Build:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-intel:latest \
|
||||
# -f dockerfiles/Dockerfile.ray-worker-intel .
|
||||
|
||||
LABEL maintainer="billy-davies-2"
|
||||
LABEL description="Ray worker for Intel GPUs (Reranker)"
|
||||
FROM rayproject/ray:2.53.0-py311
|
||||
|
||||
# OCI Image Spec labels
|
||||
LABEL org.opencontainers.image.title="Ray Worker - Intel GPU"
|
||||
LABEL org.opencontainers.image.description="Ray Serve worker for Intel GPUs (BGE Reranker)"
|
||||
LABEL org.opencontainers.image.vendor="DaviesTechLabs"
|
||||
LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
|
||||
LABEL org.opencontainers.image.licenses="MIT"
|
||||
LABEL gpu.target="intel-xpu"
|
||||
LABEL ray.version="2.53.0"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for Intel GPU support
|
||||
# Install system dependencies and Intel GPU runtime
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
curl \
|
||||
wget \
|
||||
gnupg2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Add Intel oneAPI repository for runtime libraries
|
||||
RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor -o /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
|
||||
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/intel-oneapi.list
|
||||
|
||||
# Add Intel compute-runtime repository for Level Zero
|
||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor -o /usr/share/keyrings/intel-graphics-archive-keyring.gpg && \
|
||||
echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy client" > /etc/apt/sources.list.d/intel-gpu.list && \
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
intel-oneapi-runtime-opencl \
|
||||
intel-oneapi-runtime-compilers \
|
||||
intel-level-zero-gpu \
|
||||
level-zero \
|
||||
curl \
|
||||
wget \
|
||||
gnupg2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Add Intel oneAPI and GPU compute repositories
|
||||
RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
| gpg --dearmor -o /usr/share/keyrings/intel-oneapi-archive-keyring.gpg \
|
||||
&& echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
|
||||
> /etc/apt/sources.list.d/intel-oneapi.list \
|
||||
&& wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
|
||||
| gpg --dearmor -o /usr/share/keyrings/intel-graphics-archive-keyring.gpg \
|
||||
&& echo "deb [signed-by=/usr/share/keyrings/intel-graphics-archive-keyring.gpg arch=amd64] https://repositories.intel.com/gpu/ubuntu jammy client" \
|
||||
> /etc/apt/sources.list.d/intel-gpu.list
|
||||
|
||||
# Install Intel runtime libraries
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
intel-oneapi-runtime-opencl \
|
||||
intel-oneapi-runtime-compilers \
|
||||
intel-level-zero-gpu \
|
||||
level-zero \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv for fast Python package management (ADR-0014)
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
USER ray
|
||||
|
||||
# Ensure Ray CLI is in PATH
|
||||
ENV PATH="/home/ray/.local/bin:${PATH}"
|
||||
|
||||
# Install Intel Extension for PyTorch (IPEX) for Python 3.11
|
||||
# This provides XPU support for Intel GPUs
|
||||
RUN pip install --no-cache-dir \
|
||||
torch==2.5.1 \
|
||||
intel-extension-for-pytorch==2.5.10+xpu \
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
# Install Intel Extension for PyTorch (IPEX) with XPU support (uv is 10-100x faster)
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
torch==2.5.1 \
|
||||
intel-extension-for-pytorch==2.5.10+xpu \
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
# Install Ray Serve and AI inference dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
sentence-transformers \
|
||||
FlagEmbedding \
|
||||
fastapi \
|
||||
uvicorn \
|
||||
httpx \
|
||||
pydantic \
|
||||
transformers \
|
||||
huggingface_hub
|
||||
# Install inference dependencies
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
'sentence-transformers>=2.3.0,<3.0' \
|
||||
'FlagEmbedding>=1.2.0,<2.0' \
|
||||
'transformers>=4.35.0,<5.0' \
|
||||
'huggingface_hub>=0.20.0,<1.0' \
|
||||
'fastapi>=0.100.0,<1.0' \
|
||||
'uvicorn>=0.23.0,<1.0' \
|
||||
'httpx>=0.27.0,<1.0' \
|
||||
'pydantic>=2.0.0,<3.0'
|
||||
|
||||
# Copy Ray Serve Python code
|
||||
COPY ray-serve/ /app/ray_serve/
|
||||
ENV PYTHONPATH=/app
|
||||
# Copy application code
|
||||
COPY --chown=ray:ray ray-serve/ /app/ray_serve/
|
||||
COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
|
||||
# Copy Ray Serve entrypoint
|
||||
COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
# Environment configuration
|
||||
ENV PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
|
||||
GPU_RESOURCE="gpu_intel" \
|
||||
NUM_GPUS="1" \
|
||||
# Intel XPU settings
|
||||
ZE_AFFINITY_MASK=0 \
|
||||
SYCL_DEVICE_FILTER="level_zero:gpu"
|
||||
|
||||
# Default environment variables
|
||||
ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
|
||||
ENV GPU_RESOURCE="gpu_intel"
|
||||
ENV NUM_GPUS="1"
|
||||
# Intel XPU settings
|
||||
ENV ZE_AFFINITY_MASK=0
|
||||
ENV SYCL_DEVICE_FILTER=level_zero:gpu
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD ray status --address=localhost:6379 || exit 1
|
||||
|
||||
ENTRYPOINT ["/app/ray-entrypoint.sh"]
|
||||
|
||||
@@ -1,53 +1,70 @@
|
||||
# syntax=docker/dockerfile:1.7
|
||||
# NVIDIA GPU Ray Worker for elminster (RTX 2070)
|
||||
# Used for: Whisper STT, TTS
|
||||
#
|
||||
# Build from llm-workflows root:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest -f dockerfiles/Dockerfile.ray-worker-nvidia .
|
||||
# Used for: Whisper STT, XTTS Text-to-Speech
|
||||
#
|
||||
# Build:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-nvidia:latest \
|
||||
# -f dockerfiles/Dockerfile.ray-worker-nvidia .
|
||||
|
||||
FROM rayproject/ray:2.53.0-py311-cu121
|
||||
|
||||
LABEL maintainer="billy-davies-2"
|
||||
LABEL description="Ray worker for NVIDIA GPUs (Whisper, TTS)"
|
||||
LABEL gpu.target="nvidia-cuda"
|
||||
# OCI Image Spec labels
|
||||
LABEL org.opencontainers.image.title="Ray Worker - NVIDIA GPU"
|
||||
LABEL org.opencontainers.image.description="Ray Serve worker for NVIDIA GPUs (Whisper STT, XTTS TTS)"
|
||||
LABEL org.opencontainers.image.vendor="DaviesTechLabs"
|
||||
LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
|
||||
LABEL org.opencontainers.image.licenses="MIT"
|
||||
LABEL gpu.target="nvidia-cuda-12.1"
|
||||
LABEL ray.version="2.53.0"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for audio processing
|
||||
# Install system dependencies in a single layer with cleanup
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
libsndfile1 \
|
||||
git \
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
libsndfile1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv for fast Python package management (ADR-0014)
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
# Switch back to non-root ray user
|
||||
USER ray
|
||||
|
||||
# Install Python dependencies for inference
|
||||
RUN pip install --no-cache-dir \
|
||||
faster-whisper \
|
||||
openai-whisper \
|
||||
TTS \
|
||||
soundfile \
|
||||
pydub \
|
||||
librosa \
|
||||
torch \
|
||||
torchaudio \
|
||||
fastapi \
|
||||
uvicorn \
|
||||
httpx \
|
||||
pydantic
|
||||
# Install Python dependencies with uv cache mount (10-100x faster than pip)
|
||||
# Pinned versions for reproducibility
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
'faster-whisper>=1.0.0,<2.0' \
|
||||
'TTS>=0.22.0,<1.0' \
|
||||
'soundfile>=0.12.0,<1.0' \
|
||||
'pydub>=0.25.0,<1.0' \
|
||||
'librosa>=0.10.0,<1.0' \
|
||||
'torch>=2.0.0,<3.0' \
|
||||
'torchaudio>=2.0.0,<3.0' \
|
||||
'fastapi>=0.100.0,<1.0' \
|
||||
'uvicorn>=0.23.0,<1.0' \
|
||||
'httpx>=0.27.0,<1.0' \
|
||||
'pydantic>=2.0.0,<3.0'
|
||||
|
||||
# Copy Ray Serve Python code
|
||||
# Copy application code with proper ownership
|
||||
COPY --chown=ray:ray ray-serve/ /app/ray_serve/
|
||||
ENV PYTHONPATH=/app
|
||||
COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
|
||||
# Copy Ray Serve entrypoint
|
||||
COPY --chown=ray:ray dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
RUN chmod +x /app/ray-entrypoint.sh
|
||||
# Environment configuration
|
||||
ENV PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
CUDA_VISIBLE_DEVICES=0 \
|
||||
RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
|
||||
GPU_RESOURCE="gpu_nvidia" \
|
||||
NUM_GPUS="1"
|
||||
|
||||
# Default environment variables
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV RAY_HEAD_SVC="ai-inference-raycluster-head-svc"
|
||||
ENV GPU_RESOURCE="gpu_nvidia"
|
||||
ENV NUM_GPUS="1"
|
||||
# Health check - verify Ray worker can connect
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD ray status --address=localhost:6379 || exit 1
|
||||
|
||||
ENTRYPOINT ["/app/ray-entrypoint.sh"]
|
||||
|
||||
@@ -1,65 +1,94 @@
|
||||
# Ray Worker for AMD RDNA 2 (gfx1035 - Radeon 680M)
|
||||
# Pre-bakes all dependencies for fast startup
|
||||
# syntax=docker/dockerfile:1.7
|
||||
# AMD RDNA 2 Ray Worker for drizzt (Radeon 680M - gfx1035)
|
||||
# Used for: BGE Embeddings
|
||||
#
|
||||
# Build from llm-workflows root:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-rdna2:latest -f dockerfiles/Dockerfile.ray-worker-rdna2 .
|
||||
# Build:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-rdna2:latest \
|
||||
# -f dockerfiles/Dockerfile.ray-worker-rdna2 .
|
||||
#
|
||||
# Multi-stage build to ensure Python 3.11.11 matches Ray head node
|
||||
# Multi-stage build: Extract ROCm from vendor image, use Ray base for Python 3.11
|
||||
|
||||
# Stage 1: Extract ROCm libraries from vendor image
|
||||
FROM docker.io/rocm/pytorch:rocm6.4.4_ubuntu22.04_py3.10_pytorch_release_2.7.1 AS rocm-libs
|
||||
# Stage 1: ROCm libraries from AMD vendor image
|
||||
FROM docker.io/rocm/pytorch:rocm6.4.4_ubuntu22.04_py3.10_pytorch_release_2.7.1 AS rocm-source
|
||||
|
||||
# Stage 2: Build on Ray base with Python 3.11
|
||||
FROM rayproject/ray:2.53.0-py311 AS base
|
||||
# Stage 2: Production image
|
||||
FROM rayproject/ray:2.53.0-py311 AS production
|
||||
|
||||
# Copy ROCm stack from vendor image
|
||||
COPY --from=rocm-libs /opt/rocm /opt/rocm
|
||||
|
||||
# Set up ROCm environment
|
||||
ENV ROCM_HOME=/opt/rocm
|
||||
ENV PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}"
|
||||
ENV LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}"
|
||||
ENV HSA_PATH="${ROCM_HOME}/hsa"
|
||||
ENV HIP_PATH="${ROCM_HOME}/hip"
|
||||
|
||||
# ROCm environment for RDNA 2 (gfx1035)
|
||||
ENV HIP_VISIBLE_DEVICES=0 \
|
||||
HSA_ENABLE_SDMA=0 \
|
||||
PYTORCH_HIP_ALLOC_CONF=expandable_segments:True \
|
||||
PYTHONPATH=/app
|
||||
# OCI Image Spec labels
|
||||
LABEL org.opencontainers.image.title="Ray Worker - AMD RDNA 2"
|
||||
LABEL org.opencontainers.image.description="Ray Serve worker for AMD RDNA 2 GPUs (BGE Embeddings)"
|
||||
LABEL org.opencontainers.image.vendor="DaviesTechLabs"
|
||||
LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
|
||||
LABEL org.opencontainers.image.licenses="MIT"
|
||||
LABEL gpu.target="amd-rocm-6.4-gfx1035"
|
||||
LABEL ray.version="2.53.0"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install ROCm system dependencies
|
||||
# Copy ROCm stack from vendor image (single COPY layer)
|
||||
COPY --from=rocm-source /opt/rocm /opt/rocm
|
||||
|
||||
# ROCm environment variables
|
||||
ENV ROCM_HOME=/opt/rocm \
|
||||
PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}" \
|
||||
LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}" \
|
||||
HSA_PATH="${ROCM_HOME}/hsa" \
|
||||
HIP_PATH="${ROCM_HOME}/hip" \
|
||||
# RDNA 2 specific settings
|
||||
HIP_VISIBLE_DEVICES=0 \
|
||||
HSA_ENABLE_SDMA=0 \
|
||||
PYTORCH_HIP_ALLOC_CONF=expandable_segments:True
|
||||
|
||||
# Install system dependencies
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libelf1 \
|
||||
libnuma1 \
|
||||
libdrm2 \
|
||||
libdrm-amdgpu1 \
|
||||
kmod \
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
libelf1 \
|
||||
libnuma1 \
|
||||
libdrm2 \
|
||||
libdrm-amdgpu1 \
|
||||
kmod \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv for fast Python package management (ADR-0014)
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
USER ray
|
||||
|
||||
# Install PyTorch ROCm wheels compatible with Python 3.11 and ROCm 6.2
|
||||
RUN pip install --no-cache-dir \
|
||||
torch==2.5.1 torchvision torchaudio \
|
||||
--index-url https://download.pytorch.org/whl/rocm6.2
|
||||
# Install PyTorch with ROCm 6.2 wheels for Python 3.11 (uv is 10-100x faster)
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
torch==2.5.1 torchvision torchaudio \
|
||||
--index-url https://download.pytorch.org/whl/rocm6.2
|
||||
|
||||
# Install Ray Serve and AI inference dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
transformers \
|
||||
accelerate \
|
||||
sentence-transformers \
|
||||
httpx \
|
||||
numpy \
|
||||
scipy
|
||||
# Install inference dependencies
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
'transformers>=4.35.0,<5.0' \
|
||||
'accelerate>=0.25.0,<1.0' \
|
||||
'sentence-transformers>=2.3.0,<3.0' \
|
||||
'httpx>=0.27.0,<1.0' \
|
||||
'numpy>=1.26.0,<2.0' \
|
||||
'scipy>=1.11.0,<2.0'
|
||||
|
||||
# Pre-download embedding model for faster cold starts
|
||||
RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-large-en-v1.5')"
|
||||
|
||||
# Copy application code
|
||||
COPY ray-serve/ /app/ray_serve/
|
||||
COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
COPY --chown=ray:ray ray-serve/ /app/ray_serve/
|
||||
COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
|
||||
# Environment configuration
|
||||
ENV PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
|
||||
GPU_RESOURCE="gpu_amd_rdna2" \
|
||||
NUM_GPUS="1"
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD ray status --address=localhost:6379 || exit 1
|
||||
|
||||
ENTRYPOINT ["/app/ray-entrypoint.sh"]
|
||||
|
||||
@@ -1,72 +1,100 @@
|
||||
# Ray Worker for AMD Strix Halo (gfx1151 / RDNA 3.5)
|
||||
# Pre-bakes all dependencies for fast startup
|
||||
# syntax=docker/dockerfile:1.7
|
||||
# AMD Strix Halo Ray Worker for khelben (gfx1151 / RDNA 3.5)
|
||||
# Used for: vLLM (Llama 3.1 70B)
|
||||
#
|
||||
# Build from llm-workflows root:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:latest -f dockerfiles/Dockerfile.ray-worker-strixhalo .
|
||||
# Build:
|
||||
# docker build -t git.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:latest \
|
||||
# -f dockerfiles/Dockerfile.ray-worker-strixhalo .
|
||||
#
|
||||
# Multi-stage build to ensure Python 3.11.11 matches Ray head node
|
||||
# Multi-stage build: Extract ROCm 7.1 from vendor image, use Ray base for Python 3.11
|
||||
# Note: Uses TheRock gfx110X wheels due to ROCm/ROCm#5853 segfault issue
|
||||
|
||||
# Stage 1: Extract ROCm 7.1 libraries from vendor image
|
||||
FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS rocm-libs
|
||||
# Stage 1: ROCm 7.1 libraries from AMD vendor image
|
||||
FROM docker.io/rocm/pytorch:rocm7.1_ubuntu24.04_py3.12_pytorch_release_2.9.1 AS rocm-source
|
||||
|
||||
# Stage 2: Build on Ray base with Python 3.11
|
||||
FROM rayproject/ray:2.53.0-py311 AS base
|
||||
# Stage 2: Production image
|
||||
FROM rayproject/ray:2.53.0-py311 AS production
|
||||
|
||||
# Copy ROCm stack from vendor image
|
||||
COPY --from=rocm-libs /opt/rocm /opt/rocm
|
||||
|
||||
# Set up ROCm environment
|
||||
ENV ROCM_HOME=/opt/rocm
|
||||
ENV PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}"
|
||||
ENV LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}"
|
||||
ENV HSA_PATH="${ROCM_HOME}/hsa"
|
||||
ENV HIP_PATH="${ROCM_HOME}/hip"
|
||||
|
||||
# ROCm environment for AMD Strix Halo (gfx1151 / RDNA 3.5)
|
||||
ENV HIP_VISIBLE_DEVICES=0
|
||||
ENV HSA_ENABLE_SDMA=0
|
||||
ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512
|
||||
ENV HSA_OVERRIDE_GFX_VERSION=11.0.0
|
||||
ENV ROCM_TARGET_LST=gfx1151,gfx1100
|
||||
ENV PYTHONPATH=/app
|
||||
# OCI Image Spec labels
|
||||
LABEL org.opencontainers.image.title="Ray Worker - AMD Strix Halo"
|
||||
LABEL org.opencontainers.image.description="Ray Serve worker for AMD Strix Halo (vLLM LLM inference)"
|
||||
LABEL org.opencontainers.image.vendor="DaviesTechLabs"
|
||||
LABEL org.opencontainers.image.source="https://git.daviestechlabs.io/daviestechlabs/kuberay-images"
|
||||
LABEL org.opencontainers.image.licenses="MIT"
|
||||
LABEL gpu.target="amd-rocm-7.1-gfx1151"
|
||||
LABEL ray.version="2.53.0"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install ROCm system dependencies
|
||||
# Copy ROCm stack from vendor image
|
||||
COPY --from=rocm-source /opt/rocm /opt/rocm
|
||||
|
||||
# ROCm environment variables
|
||||
ENV ROCM_HOME=/opt/rocm \
|
||||
PATH="${ROCM_HOME}/bin:${ROCM_HOME}/llvm/bin:${PATH}" \
|
||||
LD_LIBRARY_PATH="${ROCM_HOME}/lib:${ROCM_HOME}/lib64:${LD_LIBRARY_PATH}" \
|
||||
HSA_PATH="${ROCM_HOME}/hsa" \
|
||||
HIP_PATH="${ROCM_HOME}/hip" \
|
||||
# Strix Halo (gfx1151) specific settings
|
||||
HIP_VISIBLE_DEVICES=0 \
|
||||
HSA_ENABLE_SDMA=0 \
|
||||
PYTORCH_HIP_ALLOC_CONF="expandable_segments:True,max_split_size_mb:512" \
|
||||
HSA_OVERRIDE_GFX_VERSION="11.0.0" \
|
||||
ROCM_TARGET_LST="gfx1151,gfx1100"
|
||||
|
||||
# Install system dependencies
|
||||
USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libelf1 \
|
||||
libnuma1 \
|
||||
libdrm2 \
|
||||
libdrm-amdgpu1 \
|
||||
kmod \
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
libelf1 \
|
||||
libnuma1 \
|
||||
libdrm2 \
|
||||
libdrm-amdgpu1 \
|
||||
kmod \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install uv for fast Python package management (ADR-0014)
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
USER ray
|
||||
|
||||
# WORKAROUND: ROCm/ROCm#5853 - Standard PyTorch ROCm wheels cause segfault
|
||||
# in libhsa-runtime64.so during VRAM allocation on gfx1151 (Strix Halo).
|
||||
# TheRock gfx110X-all packages provide Python 3.11 compatible wheels.
|
||||
RUN pip install --no-cache-dir \
|
||||
--index-url https://rocm.nightlies.amd.com/v2/gfx110X-all/ \
|
||||
torch torchaudio torchvision
|
||||
# TheRock gfx110X-all packages provide compatible Python 3.11 wheels.
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
--index-url https://rocm.nightlies.amd.com/v2/gfx110X-all/ \
|
||||
torch torchaudio torchvision
|
||||
|
||||
# Install Ray Serve and AI inference dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
vllm \
|
||||
transformers \
|
||||
accelerate \
|
||||
sentence-transformers \
|
||||
httpx \
|
||||
numpy \
|
||||
scipy
|
||||
# Install vLLM and inference dependencies (uv is 10-100x faster than pip)
|
||||
RUN --mount=type=cache,target=/home/ray/.cache/uv,uid=1000,gid=1000 \
|
||||
uv pip install --system \
|
||||
'vllm>=0.5.0' \
|
||||
'transformers>=4.35.0,<5.0' \
|
||||
'accelerate>=0.25.0,<1.0' \
|
||||
'sentence-transformers>=2.3.0,<3.0' \
|
||||
'httpx>=0.27.0,<1.0' \
|
||||
'numpy>=1.26.0,<2.0' \
|
||||
'scipy>=1.11.0,<2.0'
|
||||
|
||||
# Pre-download common models for faster cold starts
|
||||
RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-large-en-v1.5')" || true
|
||||
# Pre-download common models for faster cold starts (optional, increases image size)
|
||||
# RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-large-en-v1.5')"
|
||||
|
||||
# Copy Ray Serve Python code
|
||||
COPY ray-serve/ /app/ray_serve/
|
||||
# Copy application code
|
||||
COPY --chown=ray:ray ray-serve/ /app/ray_serve/
|
||||
COPY --chown=ray:ray --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
|
||||
# Ray worker entrypoint
|
||||
COPY --chmod=755 dockerfiles/ray-entrypoint.sh /app/ray-entrypoint.sh
|
||||
# Environment configuration
|
||||
ENV PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
RAY_HEAD_SVC="ai-inference-raycluster-head-svc" \
|
||||
GPU_RESOURCE="gpu_amd_strixhalo" \
|
||||
NUM_GPUS="1"
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
||||
CMD ray status --address=localhost:6379 || exit 1
|
||||
|
||||
ENTRYPOINT ["/app/ray-entrypoint.sh"]
|
||||
|
||||
@@ -1,27 +1,64 @@
|
||||
#!/bin/bash
|
||||
# Ray Worker Entrypoint
|
||||
# Connects to Ray head node and registers custom resources
|
||||
# Connects to Ray head node and registers custom GPU resources
|
||||
#
|
||||
# Environment variables:
|
||||
# RAY_HEAD_SVC - Ray head service name (default: ray-head-svc)
|
||||
# GPU_RESOURCE - Custom GPU resource name (default: gpu_amd)
|
||||
# NUM_GPUS - Number of GPUs to register (default: 1)
|
||||
# RAY_OBJECT_STORE_MEMORY - Object store memory limit (optional)
|
||||
|
||||
set -e
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure Ray is in PATH (works across all base images)
|
||||
# Ensure Ray CLI is in PATH (works across all base images)
|
||||
export PATH="/home/ray/.local/bin:/home/ray/anaconda3/bin:${PATH}"
|
||||
|
||||
# Get Ray head address from environment or default
|
||||
# Configuration with defaults
|
||||
RAY_HEAD_ADDRESS="${RAY_HEAD_SVC:-ray-head-svc}:6379"
|
||||
|
||||
# Get custom resources from environment
|
||||
GPU_RESOURCE="${GPU_RESOURCE:-gpu_amd}"
|
||||
NUM_GPUS="${NUM_GPUS:-1}"
|
||||
|
||||
echo "Starting Ray worker..."
|
||||
echo " Head address: $RAY_HEAD_ADDRESS"
|
||||
echo " GPU resource: $GPU_RESOURCE"
|
||||
echo " Num GPUs: $NUM_GPUS"
|
||||
# Log startup info
|
||||
echo "============================================="
|
||||
echo "Ray Worker Starting"
|
||||
echo "============================================="
|
||||
echo " Head address: ${RAY_HEAD_ADDRESS}"
|
||||
echo " GPU resource: ${GPU_RESOURCE}"
|
||||
echo " Num GPUs: ${NUM_GPUS}"
|
||||
echo " Python: $(python3 --version)"
|
||||
echo " Ray version: $(ray --version)"
|
||||
echo "============================================="
|
||||
|
||||
# Start Ray worker with custom resources
|
||||
exec ray start \
|
||||
--address="$RAY_HEAD_ADDRESS" \
|
||||
--num-gpus="$NUM_GPUS" \
|
||||
--resources="{\"$GPU_RESOURCE\": 1}" \
|
||||
# Wait for Ray head to be available (with retry)
|
||||
MAX_RETRIES=30
|
||||
RETRY_INTERVAL=5
|
||||
retry_count=0
|
||||
|
||||
echo "Waiting for Ray head node..."
|
||||
until ray health-check --address="${RAY_HEAD_ADDRESS}" 2>/dev/null; do
|
||||
retry_count=$((retry_count + 1))
|
||||
if [ $retry_count -ge $MAX_RETRIES ]; then
|
||||
echo "ERROR: Ray head not available after ${MAX_RETRIES} attempts"
|
||||
exit 1
|
||||
fi
|
||||
echo " Attempt ${retry_count}/${MAX_RETRIES} - retrying in ${RETRY_INTERVAL}s..."
|
||||
sleep "${RETRY_INTERVAL}"
|
||||
done
|
||||
echo "Ray head is ready!"
|
||||
|
||||
# Build ray start command with optional args
|
||||
RAY_START_ARGS=(
|
||||
--address="${RAY_HEAD_ADDRESS}"
|
||||
--num-gpus="${NUM_GPUS}"
|
||||
--resources="{\"${GPU_RESOURCE}\": 1}"
|
||||
--block
|
||||
)
|
||||
|
||||
# Add object store memory limit if specified
|
||||
if [ -n "${RAY_OBJECT_STORE_MEMORY:-}" ]; then
|
||||
RAY_START_ARGS+=(--object-store-memory="${RAY_OBJECT_STORE_MEMORY}")
|
||||
fi
|
||||
|
||||
# Start Ray worker
|
||||
echo "Starting Ray worker with resources: {\"${GPU_RESOURCE}\": 1}"
|
||||
exec ray start "${RAY_START_ARGS[@]}"
|
||||
|
||||
Reference in New Issue
Block a user