diff --git a/.gitea/workflows/build-push.yaml b/.gitea/workflows/build-push.yaml index 0ec02fc..27f0634 100644 --- a/.gitea/workflows/build-push.yaml +++ b/.gitea/workflows/build-push.yaml @@ -88,19 +88,55 @@ jobs: echo "should_release=true" >> $GITHUB_OUTPUT echo "📦 Version: $LATEST → $NEW_VERSION ($BUMP bump)" - build-nvidia: + # Build all GPU worker images using a matrix strategy + # Runs sequentially on single runner, but one job definition = simpler dependency graph + build: needs: [determine-version] - # Skip if commit message contains [skip images] or [ray-serve only] - if: | - !contains(github.event.head_commit.message, '[skip images]') && - !contains(github.event.head_commit.message, '[ray-serve only]') && - (github.event_name != 'workflow_dispatch' || github.event.inputs.image == 'all' || github.event.inputs.image == 'nvidia') + if: "!contains(github.event.head_commit.message, '[skip images]') && !contains(github.event.head_commit.message, '[ray-serve only]')" runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - name: nvidia + dockerfile: Dockerfile.ray-worker-nvidia + - name: rdna2 + dockerfile: Dockerfile.ray-worker-rdna2 + - name: strixhalo + dockerfile: Dockerfile.ray-worker-strixhalo + - name: intel + dockerfile: Dockerfile.ray-worker-intel steps: + # For workflow_dispatch, allow building a single image + - name: Check if should build this variant + id: check + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + INPUT="${{ github.event.inputs.image }}" + if [[ -n "$INPUT" && "$INPUT" != "all" && "$INPUT" != "${{ matrix.name }}" ]]; then + echo "skip=true" >> $GITHUB_OUTPUT + echo "⏭ Skipping ${{ matrix.name }} (dispatch requested: $INPUT)" + exit 0 + fi + fi + echo "skip=false" >> $GITHUB_OUTPUT + echo "🔨 Building ${{ matrix.name }}" + - name: Checkout + if: steps.check.outputs.skip != 'true' uses: actions/checkout@v4 + # Clean stale buildx builders from persistent Docker cache (prevents GPU capability errors) + - name: Clean stale buildx builders + if: steps.check.outputs.skip != 'true' + run: | + for b in $(docker buildx ls --format '{{.Name}}' 2>/dev/null | grep -v default | grep -v '\*' | sort -u); do + docker buildx rm "$b" --force 2>/dev/null || true + done + docker container prune --force 2>/dev/null || true + - name: Set up Docker Buildx + if: steps.check.outputs.skip != 'true' uses: docker/setup-buildx-action@v3 with: buildkitd-config-inline: | @@ -108,17 +144,15 @@ jobs: http = true insecure = true - # Login to Docker Hub to avoid pull rate limits - name: Login to Docker Hub - if: vars.DOCKERHUB_USERNAME != '' + if: steps.check.outputs.skip != 'true' && vars.DOCKERHUB_USERNAME != '' uses: docker/login-action@v3 with: username: ${{ vars.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - # Configure Gitea registry auth (create config.json directly for HTTP registry) - name: Configure Gitea Registry Auth - if: github.event_name != 'pull_request' + if: steps.check.outputs.skip != 'true' && github.event_name != 'pull_request' run: | AUTH=$(echo -n "${{ secrets.REGISTRY_USER }}:${{ secrets.REGISTRY_TOKEN }}" | base64 -w0) mkdir -p ~/.docker @@ -134,219 +168,32 @@ jobs: echo "Auth configured for ${{ env.REGISTRY_HOST }}" - name: Extract metadata + if: steps.check.outputs.skip != 'true' id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/ray-worker-nvidia + images: ${{ env.REGISTRY }}/ray-worker-${{ matrix.name }} tags: | type=raw,value=${{ needs.determine-version.outputs.version }} type=raw,value=latest,enable={{is_default_branch}} - name: Build and push + if: steps.check.outputs.skip != 'true' uses: docker/build-push-action@v5 with: context: . - file: dockerfiles/Dockerfile.ray-worker-nvidia + file: dockerfiles/${{ matrix.dockerfile }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/ray-worker-nvidia:buildcache - cache-to: type=registry,ref=${{ env.REGISTRY }}/ray-worker-nvidia:buildcache,mode=max,image-manifest=true,compression=zstd - - build-rdna2: - needs: [determine-version] - if: | - !contains(github.event.head_commit.message, '[skip images]') && - !contains(github.event.head_commit.message, '[ray-serve only]') && - (github.event_name != 'workflow_dispatch' || github.event.inputs.image == 'all' || github.event.inputs.image == 'rdna2') - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - with: - buildkitd-config-inline: | - [registry."gitea-http.gitea.svc.cluster.local:3000"] - http = true - insecure = true - - - name: Login to Docker Hub - if: vars.DOCKERHUB_USERNAME != '' - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - # Configure Gitea registry auth (create config.json directly for HTTP registry) - - name: Configure Gitea Registry Auth - if: github.event_name != 'pull_request' - run: | - AUTH=$(echo -n "${{ secrets.REGISTRY_USER }}:${{ secrets.REGISTRY_TOKEN }}" | base64 -w0) - mkdir -p ~/.docker - cat > ~/.docker/config.json << EOF - { - "auths": { - "${{ env.REGISTRY_HOST }}": { - "auth": "$AUTH" - } - } - } - EOF - echo "Auth configured for ${{ env.REGISTRY_HOST }}" - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/ray-worker-rdna2 - tags: | - type=raw,value=${{ needs.determine-version.outputs.version }} - type=raw,value=latest,enable={{is_default_branch}} - - - name: Build and push - uses: docker/build-push-action@v5 - with: - context: . - file: dockerfiles/Dockerfile.ray-worker-rdna2 - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/ray-worker-rdna2:buildcache - cache-to: type=registry,ref=${{ env.REGISTRY }}/ray-worker-rdna2:buildcache,mode=max,image-manifest=true,compression=zstd - - build-strixhalo: - needs: [determine-version] - if: | - !contains(github.event.head_commit.message, '[skip images]') && - !contains(github.event.head_commit.message, '[ray-serve only]') && - (github.event_name != 'workflow_dispatch' || github.event.inputs.image == 'all' || github.event.inputs.image == 'strixhalo') - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - with: - buildkitd-config-inline: | - [registry."gitea-http.gitea.svc.cluster.local:3000"] - http = true - insecure = true - - - name: Login to Docker Hub - if: vars.DOCKERHUB_USERNAME != '' - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - # Configure Gitea registry auth (create config.json directly for HTTP registry) - - name: Configure Gitea Registry Auth - if: github.event_name != 'pull_request' - run: | - AUTH=$(echo -n "${{ secrets.REGISTRY_USER }}:${{ secrets.REGISTRY_TOKEN }}" | base64 -w0) - mkdir -p ~/.docker - cat > ~/.docker/config.json << EOF - { - "auths": { - "${{ env.REGISTRY_HOST }}": { - "auth": "$AUTH" - } - } - } - EOF - echo "Auth configured for ${{ env.REGISTRY_HOST }}" - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/ray-worker-strixhalo - tags: | - type=raw,value=${{ needs.determine-version.outputs.version }} - type=raw,value=latest,enable={{is_default_branch}} - - - name: Build and push - uses: docker/build-push-action@v5 - with: - context: . - file: dockerfiles/Dockerfile.ray-worker-strixhalo - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/ray-worker-strixhalo:buildcache - cache-to: type=registry,ref=${{ env.REGISTRY }}/ray-worker-strixhalo:buildcache,mode=max,image-manifest=true,compression=zstd - - build-intel: - needs: [determine-version] - if: | - !contains(github.event.head_commit.message, '[skip images]') && - !contains(github.event.head_commit.message, '[ray-serve only]') && - (github.event_name != 'workflow_dispatch' || github.event.inputs.image == 'all' || github.event.inputs.image == 'intel') - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - with: - buildkitd-config-inline: | - [registry."gitea-http.gitea.svc.cluster.local:3000"] - http = true - insecure = true - - - name: Login to Docker Hub - if: vars.DOCKERHUB_USERNAME != '' - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - # Configure Gitea registry auth (create config.json directly for HTTP registry) - - name: Configure Gitea Registry Auth - if: github.event_name != 'pull_request' - run: | - AUTH=$(echo -n "${{ secrets.REGISTRY_USER }}:${{ secrets.REGISTRY_TOKEN }}" | base64 -w0) - mkdir -p ~/.docker - cat > ~/.docker/config.json << EOF - { - "auths": { - "${{ env.REGISTRY_HOST }}": { - "auth": "$AUTH" - } - } - } - EOF - echo "Auth configured for ${{ env.REGISTRY_HOST }}" - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/ray-worker-intel - tags: | - type=raw,value=${{ needs.determine-version.outputs.version }} - type=raw,value=latest,enable={{is_default_branch}} - - - name: Build and push - uses: docker/build-push-action@v5 - with: - context: . - file: dockerfiles/Dockerfile.ray-worker-intel - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=${{ env.REGISTRY }}/ray-worker-intel:buildcache - cache-to: type=registry,ref=${{ env.REGISTRY }}/ray-worker-intel:buildcache,mode=max,image-manifest=true,compression=zstd + cache-from: type=registry,ref=${{ env.REGISTRY }}/ray-worker-${{ matrix.name }}:buildcache + cache-to: type=registry,ref=${{ env.REGISTRY }}/ray-worker-${{ matrix.name }}:buildcache,mode=max,image-manifest=true,compression=zstd release: name: Release runs-on: ubuntu-latest - needs: [determine-version, build-nvidia, build-rdna2, build-strixhalo, build-intel] - if: needs.determine-version.outputs.should_release == 'true' && github.ref == 'refs/heads/main' && github.event_name == 'push' + needs: [determine-version, build] + if: "needs.determine-version.outputs.should_release == 'true' && github.ref == 'refs/heads/main' && github.event_name == 'push'" steps: - name: Checkout uses: actions/checkout@v4 @@ -366,16 +213,11 @@ jobs: notify: name: Notify runs-on: ubuntu-latest - # Don't depend on release - notify should run even if release never starts - needs: [determine-version, build-nvidia, build-rdna2, build-strixhalo, build-intel] + needs: [determine-version, build] if: always() steps: - name: Notify on success - if: | - (needs.build-nvidia.result == 'success' || needs.build-nvidia.result == 'skipped') && - (needs.build-rdna2.result == 'success' || needs.build-rdna2.result == 'skipped') && - (needs.build-strixhalo.result == 'success' || needs.build-strixhalo.result == 'skipped') && - (needs.build-intel.result == 'success' || needs.build-intel.result == 'skipped') + if: "needs.build.result == 'success' || needs.build.result == 'skipped'" run: | curl -s \ -H "Title: ✅ Images Built: ${{ gitea.repository }}" \ @@ -384,18 +226,11 @@ jobs: -H "Click: ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_id }}" \ -d "Branch: ${{ gitea.ref_name }} Version: ${{ needs.determine-version.outputs.version }} (${{ needs.determine-version.outputs.bump }}) - nvidia: ${{ needs.build-nvidia.result }} - rdna2: ${{ needs.build-rdna2.result }} - strixhalo: ${{ needs.build-strixhalo.result }} - intel: ${{ needs.build-intel.result }}" \ + Build: ${{ needs.build.result }}" \ ${{ env.NTFY_URL }}/gitea-ci - name: Notify on failure or cancellation - if: | - needs.build-nvidia.result == 'failure' || needs.build-nvidia.result == 'cancelled' || - needs.build-rdna2.result == 'failure' || needs.build-rdna2.result == 'cancelled' || - needs.build-strixhalo.result == 'failure' || needs.build-strixhalo.result == 'cancelled' || - needs.build-intel.result == 'failure' || needs.build-intel.result == 'cancelled' + if: "needs.build.result == 'failure' || needs.build.result == 'cancelled'" run: | curl -s \ -H "Title: ❌ Image Build Failed: ${{ gitea.repository }}" \ @@ -404,8 +239,5 @@ jobs: -H "Click: ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_id }}" \ -d "Branch: ${{ gitea.ref_name }} Version: ${{ needs.determine-version.outputs.version }} - nvidia: ${{ needs.build-nvidia.result }} - rdna2: ${{ needs.build-rdna2.result }} - strixhalo: ${{ needs.build-strixhalo.result }} - intel: ${{ needs.build-intel.result }}" \ + Build: ${{ needs.build.result }}" \ ${{ env.NTFY_URL }}/gitea-ci