updating to match everything in my homelab.

This commit is contained in:
2026-02-05 16:13:53 -05:00
parent f8787379c5
commit 80fb911e22
30 changed files with 3107 additions and 7 deletions

View File

@@ -0,0 +1,77 @@
```plaintext
%% KubeRay Unified GPU Backend (ADR-0011)
%% C4 Component diagram showing RayService endpoints and GPU allocation
flowchart TB
subgraph clients["🔌 Clients"]
chat["Chat Handler"]
voice["Voice Handler"]
end
subgraph rayservice["⚡ KubeRay RayService"]
endpoint["ai-inference-serve-svc:8000"]
subgraph deployments["Ray Serve Deployments"]
direction TB
subgraph strixhalo["🟢 khelben (Strix Halo 64GB)"]
llm["/llm<br/>vLLM 70B<br/>0.95 GPU"]
end
subgraph rtx2070["🟡 elminster (RTX 2070 8GB)"]
whisper["/whisper<br/>Whisper v3<br/>0.5 GPU"]
tts["/tts<br/>XTTS<br/>0.5 GPU"]
end
subgraph radeon680m["🔴 drizzt (Radeon 680M 12GB)"]
embeddings["/embeddings<br/>BGE-Large<br/>0.8 GPU"]
end
subgraph intelarc["🔵 danilo (Intel Arc)"]
reranker["/reranker<br/>BGE-Reranker<br/>0.8 GPU"]
end
end
end
subgraph kserve["🎯 KServe Compatibility Layer"]
direction TB
svc1["whisper-predictor.ai-ml"]
svc2["tts-predictor.ai-ml"]
svc3["llm-predictor.ai-ml"]
svc4["embeddings-predictor.ai-ml"]
svc5["reranker-predictor.ai-ml"]
end
%% Client connections
chat --> endpoint
voice --> endpoint
%% Path routing
endpoint --> llm
endpoint --> whisper
endpoint --> tts
endpoint --> embeddings
endpoint --> reranker
%% KServe aliases
svc1 -->|"ExternalName"| endpoint
svc2 -->|"ExternalName"| endpoint
svc3 -->|"ExternalName"| endpoint
svc4 -->|"ExternalName"| endpoint
svc5 -->|"ExternalName"| endpoint
classDef client fill:#3498db,color:white
classDef endpoint fill:#9b59b6,color:white
classDef amd fill:#ED1C24,color:white
classDef nvidia fill:#76B900,color:white
classDef intel fill:#0071C5,color:white
classDef kserve fill:#f39c12,color:black
class chat,voice client
class endpoint endpoint
class llm,embeddings amd
class whisper,tts nvidia
class reranker intel
class svc1,svc2,svc3,svc4,svc5 kserve
```