diff --git a/dockerfiles/Dockerfile.ray-worker-strixhalo b/dockerfiles/Dockerfile.ray-worker-strixhalo index de4ffac..84f4559 100644 --- a/dockerfiles/Dockerfile.ray-worker-strixhalo +++ b/dockerfiles/Dockerfile.ray-worker-strixhalo @@ -2,7 +2,7 @@ # Used for: vLLM (Llama 3.1 70B) # # Build: -# docker build -t registry.lab.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:v1.0.20 \ +# docker build -t registry.lab.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:v1.0.21 \ # -f dockerfiles/Dockerfile.ray-worker-strixhalo . # # STRATEGY: Full source build of vLLM on AMD's vendor PyTorch image. @@ -126,6 +126,10 @@ ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} \ # Build using setup.py bdist_wheel (same as vLLM CI in Dockerfile.rocm), # then install the wheel. This avoids a develop-mode egg-link to the # build directory so we can safely clean up /tmp/vllm-build afterwards. +# +# --no-deps: vllm's dep tree pulls torch/xgrammar with exact +gitXXX pins +# that conflict with the vendor torch. Runtime deps are installed in the +# next layer instead. RUN --mount=type=cache,target=/root/.cache/ccache \ python3 setup.py bdist_wheel --dist-dir=dist \ && uv pip install --python /opt/venv/bin/python3 --no-deps dist/*.whl @@ -138,9 +142,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --no-deps \ --prerelease=allow \ --extra-index-url https://wheels.vllm.ai/rocm/ \ - triton triton-kernels flash_attn + triton triton-kernels flash_attn \ + xgrammar # ── Runtime Python dependencies ───────────────────────────────────────── +# Because vllm was installed --no-deps (torch pin conflicts), we install +# its runtime deps here. Packages already in the vendor image (torch, +# numpy, pillow, pyyaml, etc.) are satisfied and skipped by uv. RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --python /opt/venv/bin/python3 \ 'ray[default]==2.53.0' \ @@ -152,24 +160,51 @@ RUN --mount=type=cache,target=/root/.cache/uv \ 'pandas>=2.0.0,<3.0' \ 'numpy>=2.1.0,<2.3' \ 'numba>=0.60.0,<0.62' \ - 'tokenizers>=0.20.0' \ - 'safetensors>=0.4.0' \ 'uvloop>=0.21.0' \ - 'pyyaml>=6.0' \ - 'requests>=2.31.0' \ - 'aiohttp>=3.9.0' \ - 'pillow>=10.0' \ - 'prometheus-client>=0.20.0' \ - 'py-cpuinfo>=9.0.0' \ - 'filelock>=3.13.0' \ - 'psutil>=5.9.0' \ 'msgpack>=1.0.0' \ - 'gguf>=0.6.0' \ - 'compressed-tensors>=0.8.0' \ - 'outlines>=0.1.0' \ + # ── vllm runtime deps (not pulled by --no-deps) ── + 'msgspec>=0.18.0' \ + 'fastapi>=0.110.0' \ + 'uvicorn[standard]>=0.28.0' \ + 'openai>=1.0' \ + 'peft>=0.7.0' \ + 'datasets>=2.16.0' \ + 'pydantic>=2.0' \ + 'prometheus-fastapi-instrumentator>=6.0' \ + 'lark>=1.1.0' \ + 'outlines_core>=0.1.0' \ 'lm-format-enforcer>=0.10.0' \ 'partial-json-parser>=0.2.0' \ - 'mistral-common>=1.5.0' + 'mistral-common>=1.5.0' \ + 'compressed-tensors>=0.8.0' \ + 'gguf>=0.6.0' \ + 'tokenizers>=0.20.0' \ + 'safetensors>=0.4.0' \ + 'filelock>=3.13.0' \ + 'psutil>=5.9.0' \ + 'py-cpuinfo>=9.0.0' \ + 'prometheus-client>=0.20.0' \ + 'pillow>=10.0' \ + 'aiohttp>=3.9.0' \ + 'requests>=2.31.0' \ + 'pyyaml>=6.0' \ + 'cloudpickle>=3.0' \ + 'blake3>=0.3.0' \ + 'cbor2>=5.0' \ + 'diskcache>=5.0' \ + 'pyzmq>=25.0' \ + 'python-json-logger>=2.0' \ + 'sentencepiece>=0.2.0' \ + 'tiktoken>=0.5.0' \ + 'tqdm>=4.66.0' \ + 'packaging>=23.0' \ + 'regex>=2023.0' \ + 'six>=1.16.0' \ + 'typing_extensions>=4.8.0' \ + 'einops>=0.7.0' \ + 'depyf>=0.18.0' \ + 'grpcio>=1.60.0' \ + 'protobuf>=4.25.0' # ── Verify vendor torch survived ─────────────────────────────────────── # Fail early if any install step accidentally replaced the vendor torch.