```plaintext
%% KubeRay Unified GPU Backend (ADR-0011)
%% C4 Component diagram showing RayService endpoints and GPU allocation
flowchart TB
subgraph clients["🔌 Clients"]
chat["Chat Handler"]
voice["Voice Handler"]
end
subgraph rayservice["âš¡ KubeRay RayService"]
endpoint["ai-inference-serve-svc:8000"]
subgraph deployments["Ray Serve Deployments"]
direction TB
subgraph strixhalo["🟢 khelben (Strix Halo 64GB)"]
llm["/llm
vLLM 70B
0.95 GPU"]
end
subgraph rtx2070["🟡 elminster (RTX 2070 8GB)"]
whisper["/whisper
Whisper v3
0.5 GPU"]
tts["/tts
XTTS
0.5 GPU"]
end
subgraph radeon680m["🔴 drizzt (Radeon 680M 12GB)"]
embeddings["/embeddings
BGE-Large
0.8 GPU"]
end
subgraph intelarc["🔵 danilo (Intel Arc)"]
reranker["/reranker
BGE-Reranker
0.8 GPU"]
end
end
end
subgraph kserve["🎯 KServe Compatibility Layer"]
direction TB
svc1["whisper-predictor.ai-ml"]
svc2["tts-predictor.ai-ml"]
svc3["llm-predictor.ai-ml"]
svc4["embeddings-predictor.ai-ml"]
svc5["reranker-predictor.ai-ml"]
end
%% Client connections
chat --> endpoint
voice --> endpoint
%% Path routing
endpoint --> llm
endpoint --> whisper
endpoint --> tts
endpoint --> embeddings
endpoint --> reranker
%% KServe aliases
svc1 -->|"ExternalName"| endpoint
svc2 -->|"ExternalName"| endpoint
svc3 -->|"ExternalName"| endpoint
svc4 -->|"ExternalName"| endpoint
svc5 -->|"ExternalName"| endpoint
classDef client fill:#3498db,color:white
classDef endpoint fill:#9b59b6,color:white
classDef amd fill:#ED1C24,color:white
classDef nvidia fill:#76B900,color:white
classDef intel fill:#0071C5,color:white
classDef kserve fill:#f39c12,color:black
class chat,voice client
class endpoint endpoint
class llm,embeddings amd
class whisper,tts nvidia
class reranker intel
class svc1,svc2,svc3,svc4,svc5 kserve
```