fix(strixhalo): add vllm runtime deps to --no-deps build

vllm was installed with --no-deps to avoid torch/xgrammar pin conflicts. This left msgspec, fastapi, openai, xgrammar, and other runtime deps missing. Now explicitly installs all vllm runtime deps in a separate layer, with xgrammar in the --no-deps ROCm layer.
2026-02-09 20:41:12 -05:00
parent 8adaef62a2
commit 72489b920e
1 changed files with 51 additions and 16 deletions
--- a/dockerfiles/Dockerfile.ray-worker-strixhalo
+++ b/dockerfiles/Dockerfile.ray-worker-strixhalo
@@ -2,7 +2,7 @@
 # Used for: vLLM (Llama 3.1 70B)
 #
 # Build:
-#   docker build -t registry.lab.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:v1.0.20 \
+#   docker build -t registry.lab.daviestechlabs.io/daviestechlabs/ray-worker-strixhalo:v1.0.21 \
 #     -f dockerfiles/Dockerfile.ray-worker-strixhalo .
 #
 # STRATEGY: Full source build of vLLM on AMD's vendor PyTorch image.
@@ -126,6 +126,10 @@ ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} \
 # Build using setup.py bdist_wheel (same as vLLM CI in Dockerfile.rocm),
 # then install the wheel.  This avoids a develop-mode egg-link to the
 # build directory so we can safely clean up /tmp/vllm-build afterwards.
+#
+# --no-deps: vllm's dep tree pulls torch/xgrammar with exact +gitXXX pins
+# that conflict with the vendor torch.  Runtime deps are installed in the
+# next layer instead.
 RUN --mount=type=cache,target=/root/.cache/ccache \
    python3 setup.py bdist_wheel --dist-dir=dist \
    && uv pip install --python /opt/venv/bin/python3 --no-deps dist/*.whl
@@ -138,9 +142,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \
        --no-deps \
        --prerelease=allow \
        --extra-index-url https://wheels.vllm.ai/rocm/ \
-        triton triton-kernels flash_attn
+        triton triton-kernels flash_attn \
+        xgrammar

 # ── Runtime Python dependencies ─────────────────────────────────────────
+# Because vllm was installed --no-deps (torch pin conflicts), we install
+# its runtime deps here.  Packages already in the vendor image (torch,
+# numpy, pillow, pyyaml, etc.) are satisfied and skipped by uv.
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --python /opt/venv/bin/python3 \
        'ray[default]==2.53.0' \
@@ -152,24 +160,51 @@ RUN --mount=type=cache,target=/root/.cache/uv \
        'pandas>=2.0.0,<3.0' \
        'numpy>=2.1.0,<2.3' \
        'numba>=0.60.0,<0.62' \
-        'tokenizers>=0.20.0' \
-        'safetensors>=0.4.0' \
        'uvloop>=0.21.0' \
-        'pyyaml>=6.0' \
-        'requests>=2.31.0' \
-        'aiohttp>=3.9.0' \
-        'pillow>=10.0' \
-        'prometheus-client>=0.20.0' \
-        'py-cpuinfo>=9.0.0' \
-        'filelock>=3.13.0' \
-        'psutil>=5.9.0' \
        'msgpack>=1.0.0' \
-        'gguf>=0.6.0' \
-        'compressed-tensors>=0.8.0' \
-        'outlines>=0.1.0' \
+        # ── vllm runtime deps (not pulled by --no-deps) ──
+        'msgspec>=0.18.0' \
+        'fastapi>=0.110.0' \
+        'uvicorn[standard]>=0.28.0' \
+        'openai>=1.0' \
+        'peft>=0.7.0' \
+        'datasets>=2.16.0' \
+        'pydantic>=2.0' \
+        'prometheus-fastapi-instrumentator>=6.0' \
+        'lark>=1.1.0' \
+        'outlines_core>=0.1.0' \
        'lm-format-enforcer>=0.10.0' \
        'partial-json-parser>=0.2.0' \
-        'mistral-common>=1.5.0'
+        'mistral-common>=1.5.0' \
+        'compressed-tensors>=0.8.0' \
+        'gguf>=0.6.0' \
+        'tokenizers>=0.20.0' \
+        'safetensors>=0.4.0' \
+        'filelock>=3.13.0' \
+        'psutil>=5.9.0' \
+        'py-cpuinfo>=9.0.0' \
+        'prometheus-client>=0.20.0' \
+        'pillow>=10.0' \
+        'aiohttp>=3.9.0' \
+        'requests>=2.31.0' \
+        'pyyaml>=6.0' \
+        'cloudpickle>=3.0' \
+        'blake3>=0.3.0' \
+        'cbor2>=5.0' \
+        'diskcache>=5.0' \
+        'pyzmq>=25.0' \
+        'python-json-logger>=2.0' \
+        'sentencepiece>=0.2.0' \
+        'tiktoken>=0.5.0' \
+        'tqdm>=4.66.0' \
+        'packaging>=23.0' \
+        'regex>=2023.0' \
+        'six>=1.16.0' \
+        'typing_extensions>=4.8.0' \
+        'einops>=0.7.0' \
+        'depyf>=0.18.0' \
+        'grpcio>=1.60.0' \
+        'protobuf>=4.25.0'

 # ── Verify vendor torch survived ───────────────────────────────────────
 # Fail early if any install step accidentally replaced the vendor torch.