updating to match everything in my homelab.
This commit is contained in:
77
diagrams/kuberay-unified-backend.mmd
Normal file
77
diagrams/kuberay-unified-backend.mmd
Normal file
@@ -0,0 +1,77 @@
|
||||
```plaintext
|
||||
%% KubeRay Unified GPU Backend (ADR-0011)
|
||||
%% C4 Component diagram showing RayService endpoints and GPU allocation
|
||||
|
||||
flowchart TB
|
||||
subgraph clients["🔌 Clients"]
|
||||
chat["Chat Handler"]
|
||||
voice["Voice Handler"]
|
||||
end
|
||||
|
||||
subgraph rayservice["⚡ KubeRay RayService"]
|
||||
endpoint["ai-inference-serve-svc:8000"]
|
||||
|
||||
subgraph deployments["Ray Serve Deployments"]
|
||||
direction TB
|
||||
|
||||
subgraph strixhalo["🟢 khelben (Strix Halo 64GB)"]
|
||||
llm["/llm<br/>vLLM 70B<br/>0.95 GPU"]
|
||||
end
|
||||
|
||||
subgraph rtx2070["🟡 elminster (RTX 2070 8GB)"]
|
||||
whisper["/whisper<br/>Whisper v3<br/>0.5 GPU"]
|
||||
tts["/tts<br/>XTTS<br/>0.5 GPU"]
|
||||
end
|
||||
|
||||
subgraph radeon680m["🔴 drizzt (Radeon 680M 12GB)"]
|
||||
embeddings["/embeddings<br/>BGE-Large<br/>0.8 GPU"]
|
||||
end
|
||||
|
||||
subgraph intelarc["🔵 danilo (Intel Arc)"]
|
||||
reranker["/reranker<br/>BGE-Reranker<br/>0.8 GPU"]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
subgraph kserve["🎯 KServe Compatibility Layer"]
|
||||
direction TB
|
||||
svc1["whisper-predictor.ai-ml"]
|
||||
svc2["tts-predictor.ai-ml"]
|
||||
svc3["llm-predictor.ai-ml"]
|
||||
svc4["embeddings-predictor.ai-ml"]
|
||||
svc5["reranker-predictor.ai-ml"]
|
||||
end
|
||||
|
||||
%% Client connections
|
||||
chat --> endpoint
|
||||
voice --> endpoint
|
||||
|
||||
%% Path routing
|
||||
endpoint --> llm
|
||||
endpoint --> whisper
|
||||
endpoint --> tts
|
||||
endpoint --> embeddings
|
||||
endpoint --> reranker
|
||||
|
||||
%% KServe aliases
|
||||
svc1 -->|"ExternalName"| endpoint
|
||||
svc2 -->|"ExternalName"| endpoint
|
||||
svc3 -->|"ExternalName"| endpoint
|
||||
svc4 -->|"ExternalName"| endpoint
|
||||
svc5 -->|"ExternalName"| endpoint
|
||||
|
||||
classDef client fill:#3498db,color:white
|
||||
classDef endpoint fill:#9b59b6,color:white
|
||||
classDef amd fill:#ED1C24,color:white
|
||||
classDef nvidia fill:#76B900,color:white
|
||||
classDef intel fill:#0071C5,color:white
|
||||
classDef kserve fill:#f39c12,color:black
|
||||
|
||||
class chat,voice client
|
||||
class endpoint endpoint
|
||||
class llm,embeddings amd
|
||||
class whisper,tts nvidia
|
||||
class reranker intel
|
||||
class svc1,svc2,svc3,svc4,svc5 kserve
|
||||
|
||||
```
|
||||
Reference in New Issue
Block a user