homelab-design/diagrams/data-flow-voice.mmd

%% Voice Request Data Flow
%% Sequence diagram showing voice assistant processing

sequenceDiagram
    autonumber
    participant U as User
    participant W as Voice WebApp
    participant N as NATS
    participant VA as Voice Assistant
    participant STT as Whisper<br/>(STT)
    participant E as BGE Embeddings
    participant M as Milvus
    participant R as Reranker
    participant L as vLLM
    participant TTS as XTTS<br/>(TTS)

    U->>W: Record audio
    W->>N: Publish ai.voice.user.{id}.request<br/>(msgpack with audio bytes)
    N->>VA: Deliver voice request

    VA->>STT: Transcribe audio
    STT-->>VA: Transcription text

    alt RAG Enabled
        VA->>E: Generate query embedding
        E-->>VA: Query vector
        VA->>M: Search similar chunks
        M-->>VA: Top-K chunks

        opt Reranker Enabled
            VA->>R: Rerank chunks
            R-->>VA: Reordered chunks
        end
    end

    VA->>L: LLM inference
    L-->>VA: Response text

    VA->>TTS: Synthesize speech
    TTS-->>VA: Audio bytes

    VA->>N: Publish ai.voice.response.{id}<br/>(text + audio)
    N-->>W: Deliver response
    W-->>U: Play audio + show text

    Note over VA,TTS: Total latency target: < 3s