package main import ( "context" "encoding/json" "net/http" "net/http/httptest" "testing" "time" "git.daviestechlabs.io/daviestechlabs/handler-base/clients" "git.daviestechlabs.io/daviestechlabs/handler-base/messages" "github.com/vmihailenco/msgpack/v5" ) // ──────────────────────────────────────────────────────────────────────────── // E2E tests: exercise the full chat pipeline with mock backends // ──────────────────────────────────────────────────────────────────────────── // mockBackends starts httptest servers simulating all downstream services. type mockBackends struct { Embeddings *httptest.Server Reranker *httptest.Server LLM *httptest.Server TTS *httptest.Server } func newMockBackends(t *testing.T) *mockBackends { t.Helper() m := &mockBackends{} m.Embeddings = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "data": []map[string]any{ {"embedding": []float64{0.1, 0.2, 0.3, 0.4}}, }, }) })) t.Cleanup(m.Embeddings.Close) m.Reranker = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _ = json.NewEncoder(w).Encode(map[string]any{ "results": []map[string]any{ {"index": 0, "relevance_score": 0.95}, }, }) })) t.Cleanup(m.Reranker.Close) m.LLM = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { var req map[string]any _ = json.NewDecoder(r.Body).Decode(&req) _ = json.NewEncoder(w).Encode(map[string]any{ "choices": []map[string]any{ {"message": map[string]any{ "content": "Paris is the capital of France.", }}, }, }) })) t.Cleanup(m.LLM.Close) m.TTS = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte{0xDE, 0xAD, 0xBE, 0xEF}) })) t.Cleanup(m.TTS.Close) return m } func TestChatPipeline_LLMOnly(t *testing.T) { m := newMockBackends(t) llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) // Simulate what main.go does for a non-RAG request. response, err := llm.Generate(context.Background(), "What is the capital of France?", "", "") if err != nil { t.Fatal(err) } if response != "Paris is the capital of France." { t.Errorf("response = %q", response) } } func TestChatPipeline_WithRAG(t *testing.T) { m := newMockBackends(t) embeddings := clients.NewEmbeddingsClient(m.Embeddings.URL, 5*time.Second, "bge") reranker := clients.NewRerankerClient(m.Reranker.URL, 5*time.Second) llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) ctx := context.Background() // 1. Embed query embedding, err := embeddings.EmbedSingle(ctx, "What is the capital of France?") if err != nil { t.Fatal(err) } if len(embedding) == 0 { t.Fatal("empty embedding") } // 2. Rerank (with mock documents) docs := []string{"France is a country in Europe", "Paris is its capital"} results, err := reranker.Rerank(ctx, "capital of France", docs, 2) if err != nil { t.Fatal(err) } if len(results) == 0 { t.Fatal("no rerank results") } if results[0].Score == 0 { t.Error("expected non-zero score") } // 3. Generate with context contextText := results[0].Document response, err := llm.Generate(ctx, "capital of France?", contextText, "") if err != nil { t.Fatal(err) } if response == "" { t.Error("empty response") } } func TestChatPipeline_WithTTS(t *testing.T) { m := newMockBackends(t) llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) tts := clients.NewTTSClient(m.TTS.URL, 5*time.Second, "en") ctx := context.Background() response, err := llm.Generate(ctx, "hello", "", "") if err != nil { t.Fatal(err) } audio, err := tts.Synthesize(ctx, response, "en", "") if err != nil { t.Fatal(err) } if len(audio) == 0 { t.Error("empty audio") } } func TestChatPipeline_LLMTimeout(t *testing.T) { // Simulate slow LLM. slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { time.Sleep(200 * time.Millisecond) _ = json.NewEncoder(w).Encode(map[string]any{ "choices": []map[string]any{ {"message": map[string]any{"content": "late response"}}, }, }) })) defer slow.Close() llm := clients.NewLLMClient(slow.URL, 100*time.Millisecond) _, err := llm.Generate(context.Background(), "hello", "", "") if err == nil { t.Error("expected timeout error") } } func TestChatPipeline_TypedDecoding(t *testing.T) { // Verify typed struct decoding from msgpack (same path as OnTypedMessage). raw := map[string]any{ "request_id": "req-e2e-001", "user_id": "user-1", "message": "hello", "premium": true, "enable_rag": false, "enable_streaming": false, "system_prompt": "Be brief.", } data, _ := msgpack.Marshal(raw) var req messages.ChatRequest if err := msgpack.Unmarshal(data, &req); err != nil { t.Fatal(err) } if req.RequestID != "req-e2e-001" { t.Errorf("RequestID = %q", req.RequestID) } if req.UserID != "user-1" { t.Errorf("UserID = %q", req.UserID) } if req.EffectiveQuery() != "hello" { t.Errorf("query = %q", req.EffectiveQuery()) } if req.EnableRAG { t.Error("EnableRAG should be false") } if req.SystemPrompt != "Be brief." { t.Errorf("SystemPrompt = %q", req.SystemPrompt) } } // ──────────────────────────────────────────────────────────────────────────── // Benchmark: full chat pipeline overhead (mock backends) // ──────────────────────────────────────────────────────────────────────────── func BenchmarkChatPipeline_LLMOnly(b *testing.B) { llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`)) })) defer llmSrv.Close() llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second) ctx := context.Background() b.ResetTimer() for b.Loop() { _, _ = llm.Generate(ctx, "question", "", "") } } func BenchmarkChatPipeline_RAGFlow(b *testing.B) { embedSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`{"data":[{"embedding":[0.1,0.2]}]}`)) })) defer embedSrv.Close() rerankSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`{"results":[{"index":0,"relevance_score":0.9}]}`)) })) defer rerankSrv.Close() llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`)) })) defer llmSrv.Close() embed := clients.NewEmbeddingsClient(embedSrv.URL, 10*time.Second, "bge") rerank := clients.NewRerankerClient(rerankSrv.URL, 10*time.Second) llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second) ctx := context.Background() b.ResetTimer() for b.Loop() { _, _ = embed.EmbedSingle(ctx, "question") _, _ = rerank.Rerank(ctx, "question", []string{"doc1", "doc2"}, 2) _, _ = llm.Generate(ctx, "question", "context", "") } }