package main import ( "context" "encoding/json" "net/http" "net/http/httptest" "testing" "time" "git.daviestechlabs.io/daviestechlabs/handler-base/clients" ) // ──────────────────────────────────────────────────────────────────────────── // E2E tests: exercise the voice pipeline (STT → Embed → Rerank → LLM → TTS) // ──────────────────────────────────────────────────────────────────────────── type voiceMocks struct { STT *httptest.Server Embeddings *httptest.Server Reranker *httptest.Server LLM *httptest.Server TTS *httptest.Server } func newVoiceMocks(t *testing.T) *voiceMocks { t.Helper() m := &voiceMocks{} m.STT = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]string{"text": "What is the weather today?"}) })) t.Cleanup(m.STT.Close) m.Embeddings = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "data": []map[string]any{{"embedding": []float64{0.5, 0.6, 0.7}}}, }) })) t.Cleanup(m.Embeddings.Close) m.Reranker = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "results": []map[string]any{{"index": 0, "relevance_score": 0.88}}, }) })) t.Cleanup(m.Reranker.Close) m.LLM = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "choices": []map[string]any{ {"message": map[string]any{"content": "Sunny with a high of 72."}}, }, }) })) t.Cleanup(m.LLM.Close) m.TTS = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write(make([]byte, 8000)) // simulated audio })) t.Cleanup(m.TTS.Close) return m } func TestVoicePipeline_FullFlow(t *testing.T) { m := newVoiceMocks(t) ctx := context.Background() stt := clients.NewSTTClient(m.STT.URL, 5*time.Second) embeddings := clients.NewEmbeddingsClient(m.Embeddings.URL, 5*time.Second, "bge") reranker := clients.NewRerankerClient(m.Reranker.URL, 5*time.Second) llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) tts := clients.NewTTSClient(m.TTS.URL, 5*time.Second, "en") // 1. STT transcription, err := stt.Transcribe(ctx, make([]byte, 1000), "en") if err != nil { t.Fatal(err) } if transcription.Text == "" { t.Fatal("empty transcription") } // 2. Embed embedding, err := embeddings.EmbedSingle(ctx, transcription.Text) if err != nil { t.Fatal(err) } if len(embedding) == 0 { t.Fatal("empty embedding") } // 3. Rerank results, err := reranker.Rerank(ctx, transcription.Text, []string{"doc1"}, 1) if err != nil { t.Fatal(err) } if len(results) == 0 { t.Fatal("no rerank results") } // 4. LLM response, err := llm.Generate(ctx, transcription.Text, results[0].Document, "") if err != nil { t.Fatal(err) } if response == "" { t.Fatal("empty LLM response") } // 5. TTS audio, err := tts.Synthesize(ctx, response, "en", "") if err != nil { t.Fatal(err) } if len(audio) == 0 { t.Fatal("empty audio") } } func TestVoicePipeline_STTFailure(t *testing.T) { failSTT := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(500) _, _ = w.Write([]byte("model not loaded")) })) defer failSTT.Close() stt := clients.NewSTTClient(failSTT.URL, 5*time.Second) _, err := stt.Transcribe(context.Background(), make([]byte, 100), "") if err == nil { t.Error("expected error from failed STT") } } func TestVoicePipeline_TTSLargeResponse(t *testing.T) { // TTS that returns 1 MB of audio. bigTTS := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write(make([]byte, 1<<20)) })) defer bigTTS.Close() tts := clients.NewTTSClient(bigTTS.URL, 10*time.Second, "en") audio, err := tts.Synthesize(context.Background(), "long text", "en", "") if err != nil { t.Fatal(err) } if len(audio) != 1<<20 { t.Errorf("audio size = %d, want %d", len(audio), 1<<20) } } // ──────────────────────────────────────────────────────────────────────────── // Benchmark: voice pipeline latency with mock backends // ──────────────────────────────────────────────────────────────────────────── func BenchmarkVoicePipeline_Full(b *testing.B) { sttSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`{"text":"hello"}`)) })) defer sttSrv.Close() llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`)) })) defer llmSrv.Close() ttsSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write(make([]byte, 4000)) })) defer ttsSrv.Close() stt := clients.NewSTTClient(sttSrv.URL, 10*time.Second) llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second) tts := clients.NewTTSClient(ttsSrv.URL, 10*time.Second, "en") ctx := context.Background() audio := make([]byte, 16384) b.ResetTimer() for b.Loop() { _, _ = stt.Transcribe(ctx, audio, "en") _, _ = llm.Generate(ctx, "question", "", "") _, _ = tts.Synthesize(ctx, "answer", "en", "") } }