%% Chat Request Data Flow
%% Sequence diagram showing chat message processing
sequenceDiagram
autonumber
participant U as User
participant W as WebApp
(companions)
participant N as NATS
participant C as Chat Handler
participant V as Valkey
(Cache)
participant E as BGE Embeddings
participant M as Milvus
participant R as Reranker
participant L as vLLM
U->>W: Send message
W->>N: Publish ai.chat.user.{id}.message
N->>C: Deliver message
C->>V: Get session history
V-->>C: Previous messages
alt RAG Enabled
C->>E: Generate query embedding
E-->>C: Query vector
C->>M: Search similar chunks
M-->>C: Top-K chunks
opt Reranker Enabled
C->>R: Rerank chunks
R-->>C: Reordered chunks
end
end
C->>L: LLM inference (context + query)
alt Streaming Enabled
loop For each token
L-->>C: Token
C->>N: Publish ai.chat.response.stream.{id}
N-->>W: Deliver chunk
W-->>U: Display token
end
else Non-streaming
L-->>C: Full response
C->>N: Publish ai.chat.response.{id}
N-->>W: Deliver response
W-->>U: Display response
end
C->>V: Save to session history