updating to match everything in my homelab.

This commit is contained in:
2026-02-05 16:13:53 -05:00
parent f8787379c5
commit 80fb911e22
30 changed files with 3107 additions and 7 deletions

View File

@@ -0,0 +1,81 @@
%% Volcano Batch Scheduling Architecture
%% Related: ADR-0034
flowchart TB
subgraph Submissions["Workload Submissions"]
KFP["Kubeflow Pipelines"]
Argo["Argo Workflows"]
Spark["Spark Jobs"]
Ray["Ray Jobs"]
end
subgraph Volcano["Volcano Scheduler"]
Admission["Admission Controller"]
Scheduler["Volcano Scheduler"]
Controller["Job Controller"]
subgraph Plugins["Scheduling Plugins"]
Gang["Gang Scheduling"]
Priority["Priority"]
DRF["Dominant Resource Fairness"]
Binpack["Bin Packing"]
end
end
subgraph Queues["Resource Queues"]
MLQueue["ml-training<br/>weight: 4"]
InferQueue["inference<br/>weight: 3"]
BatchQueue["batch-jobs<br/>weight: 2"]
DefaultQueue["default<br/>weight: 1"]
end
subgraph Resources["Cluster Resources"]
subgraph GPUs["GPU Nodes"]
Khelben["khelben<br/>Strix Halo 64GB"]
Elminster["elminster<br/>RTX 2070"]
Drizzt["drizzt<br/>RDNA2 680M"]
Danilo["danilo<br/>Intel Arc"]
end
subgraph CPU["CPU Nodes"]
Workers["9 x86_64 Workers"]
ARM["5 ARM64 Workers"]
end
end
KFP --> Admission
Argo --> Admission
Spark --> Admission
Ray --> Admission
Admission --> Scheduler
Scheduler --> Controller
Scheduler --> Gang
Scheduler --> Priority
Scheduler --> DRF
Scheduler --> Binpack
Controller --> MLQueue
Controller --> InferQueue
Controller --> BatchQueue
Controller --> DefaultQueue
MLQueue --> GPUs
InferQueue --> GPUs
BatchQueue --> GPUs
BatchQueue --> CPU
DefaultQueue --> CPU
classDef submit fill:#4a5568,stroke:#718096,color:#fff
classDef volcano fill:#667eea,stroke:#5a67d8,color:#fff
classDef plugin fill:#9f7aea,stroke:#805ad5,color:#fff
classDef queue fill:#ed8936,stroke:#dd6b20,color:#fff
classDef gpu fill:#e53e3e,stroke:#c53030,color:#fff
classDef cpu fill:#38a169,stroke:#2f855a,color:#fff
class KFP,Argo,Spark,Ray submit
class Admission,Scheduler,Controller volcano
class Gang,Priority,DRF,Binpack plugin
class MLQueue,InferQueue,BatchQueue,DefaultQueue queue
class Khelben,Elminster,Drizzt,Danilo gpu
class Workers,ARM cpu