82 lines
2.3 KiB
Plaintext
82 lines
2.3 KiB
Plaintext
%% Volcano Batch Scheduling Architecture
|
|
%% Related: ADR-0034
|
|
|
|
flowchart TB
|
|
subgraph Submissions["Workload Submissions"]
|
|
KFP["Kubeflow Pipelines"]
|
|
Argo["Argo Workflows"]
|
|
Spark["Spark Jobs"]
|
|
Ray["Ray Jobs"]
|
|
end
|
|
|
|
subgraph Volcano["Volcano Scheduler"]
|
|
Admission["Admission Controller"]
|
|
Scheduler["Volcano Scheduler"]
|
|
Controller["Job Controller"]
|
|
|
|
subgraph Plugins["Scheduling Plugins"]
|
|
Gang["Gang Scheduling"]
|
|
Priority["Priority"]
|
|
DRF["Dominant Resource Fairness"]
|
|
Binpack["Bin Packing"]
|
|
end
|
|
end
|
|
|
|
subgraph Queues["Resource Queues"]
|
|
MLQueue["ml-training<br/>weight: 4"]
|
|
InferQueue["inference<br/>weight: 3"]
|
|
BatchQueue["batch-jobs<br/>weight: 2"]
|
|
DefaultQueue["default<br/>weight: 1"]
|
|
end
|
|
|
|
subgraph Resources["Cluster Resources"]
|
|
subgraph GPUs["GPU Nodes"]
|
|
Khelben["khelben<br/>Strix Halo 64GB"]
|
|
Elminster["elminster<br/>RTX 2070"]
|
|
Drizzt["drizzt<br/>RDNA2 680M"]
|
|
Danilo["danilo<br/>Intel Arc"]
|
|
end
|
|
subgraph CPU["CPU Nodes"]
|
|
Workers["9 x86_64 Workers"]
|
|
ARM["5 ARM64 Workers"]
|
|
end
|
|
end
|
|
|
|
KFP --> Admission
|
|
Argo --> Admission
|
|
Spark --> Admission
|
|
Ray --> Admission
|
|
|
|
Admission --> Scheduler
|
|
Scheduler --> Controller
|
|
|
|
Scheduler --> Gang
|
|
Scheduler --> Priority
|
|
Scheduler --> DRF
|
|
Scheduler --> Binpack
|
|
|
|
Controller --> MLQueue
|
|
Controller --> InferQueue
|
|
Controller --> BatchQueue
|
|
Controller --> DefaultQueue
|
|
|
|
MLQueue --> GPUs
|
|
InferQueue --> GPUs
|
|
BatchQueue --> GPUs
|
|
BatchQueue --> CPU
|
|
DefaultQueue --> CPU
|
|
|
|
classDef submit fill:#4a5568,stroke:#718096,color:#fff
|
|
classDef volcano fill:#667eea,stroke:#5a67d8,color:#fff
|
|
classDef plugin fill:#9f7aea,stroke:#805ad5,color:#fff
|
|
classDef queue fill:#ed8936,stroke:#dd6b20,color:#fff
|
|
classDef gpu fill:#e53e3e,stroke:#c53030,color:#fff
|
|
classDef cpu fill:#38a169,stroke:#2f855a,color:#fff
|
|
|
|
class KFP,Argo,Spark,Ray submit
|
|
class Admission,Scheduler,Controller volcano
|
|
class Gang,Priority,DRF,Binpack plugin
|
|
class MLQueue,InferQueue,BatchQueue,DefaultQueue queue
|
|
class Khelben,Elminster,Drizzt,Danilo gpu
|
|
class Workers,ARM cpu
|