From 609b44de832279380c0d2c6e108ce1d95e30af73 Mon Sep 17 00:00:00 2001 From: "Billy D." Date: Fri, 20 Feb 2026 06:45:21 -0500 Subject: [PATCH] feat: add e2e tests + benchmarks, fix config API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - e2e_test.go: full pipeline tests (LLM-only, RAG, TTS, timeout) - main.go: fix config field->method references (EmbeddingsURL() etc.) - Benchmarks: LLMOnly 136µs/op, RAGFlow 496µs/op --- e2e_test.go | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 1 + go.sum | 2 + main.go | 8 +- 4 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 e2e_test.go diff --git a/e2e_test.go b/e2e_test.go new file mode 100644 index 0000000..8324f2e --- /dev/null +++ b/e2e_test.go @@ -0,0 +1,246 @@ +package main + +import ( +"context" +"encoding/json" +"net/http" +"net/http/httptest" +"testing" +"time" + +"git.daviestechlabs.io/daviestechlabs/handler-base/clients" +) + +// ──────────────────────────────────────────────────────────────────────────── +// E2E tests: exercise the full chat pipeline with mock backends +// ──────────────────────────────────────────────────────────────────────────── + +// mockBackends starts httptest servers simulating all downstream services. +type mockBackends struct { +Embeddings *httptest.Server +Reranker *httptest.Server +LLM *httptest.Server +TTS *httptest.Server +} + +func newMockBackends(t *testing.T) *mockBackends { +t.Helper() +m := &mockBackends{} + +m.Embeddings = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +json.NewEncoder(w).Encode(map[string]any{ +"data": []map[string]any{ +{"embedding": []float64{0.1, 0.2, 0.3, 0.4}}, +}, +}) +})) +t.Cleanup(m.Embeddings.Close) + +m.Reranker = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +json.NewEncoder(w).Encode(map[string]any{ +"results": []map[string]any{ +{"index": 0, "relevance_score": 0.95}, +}, +}) +})) +t.Cleanup(m.Reranker.Close) + +m.LLM = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +var req map[string]any +json.NewDecoder(r.Body).Decode(&req) +json.NewEncoder(w).Encode(map[string]any{ +"choices": []map[string]any{ +{"message": map[string]any{ +"content": "Paris is the capital of France.", +}}, +}, +}) +})) +t.Cleanup(m.LLM.Close) + +m.TTS = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +w.Write([]byte{0xDE, 0xAD, 0xBE, 0xEF}) +})) +t.Cleanup(m.TTS.Close) + +return m +} + +func TestChatPipeline_LLMOnly(t *testing.T) { +m := newMockBackends(t) +llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) + +// Simulate what main.go does for a non-RAG request. +response, err := llm.Generate(context.Background(), "What is the capital of France?", "", "") +if err != nil { +t.Fatal(err) +} +if response != "Paris is the capital of France." { +t.Errorf("response = %q", response) +} +} + +func TestChatPipeline_WithRAG(t *testing.T) { +m := newMockBackends(t) +embeddings := clients.NewEmbeddingsClient(m.Embeddings.URL, 5*time.Second, "bge") +reranker := clients.NewRerankerClient(m.Reranker.URL, 5*time.Second) +llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) + +ctx := context.Background() + +// 1. Embed query +embedding, err := embeddings.EmbedSingle(ctx, "What is the capital of France?") +if err != nil { +t.Fatal(err) +} +if len(embedding) == 0 { +t.Fatal("empty embedding") +} + +// 2. Rerank (with mock documents) +docs := []string{"France is a country in Europe", "Paris is its capital"} +results, err := reranker.Rerank(ctx, "capital of France", docs, 2) +if err != nil { +t.Fatal(err) +} +if len(results) == 0 { +t.Fatal("no rerank results") +} +if results[0].Score == 0 { +t.Error("expected non-zero score") +} + +// 3. Generate with context +contextText := results[0].Document +response, err := llm.Generate(ctx, "capital of France?", contextText, "") +if err != nil { +t.Fatal(err) +} +if response == "" { +t.Error("empty response") +} +} + +func TestChatPipeline_WithTTS(t *testing.T) { +m := newMockBackends(t) +llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second) +tts := clients.NewTTSClient(m.TTS.URL, 5*time.Second, "en") + +ctx := context.Background() + +response, err := llm.Generate(ctx, "hello", "", "") +if err != nil { +t.Fatal(err) +} + +audio, err := tts.Synthesize(ctx, response, "en", "") +if err != nil { +t.Fatal(err) +} +if len(audio) == 0 { +t.Error("empty audio") +} +} + +func TestChatPipeline_LLMTimeout(t *testing.T) { +// Simulate slow LLM. +slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +time.Sleep(200 * time.Millisecond) +json.NewEncoder(w).Encode(map[string]any{ +"choices": []map[string]any{ +{"message": map[string]any{"content": "late response"}}, +}, +}) +})) +defer slow.Close() + +llm := clients.NewLLMClient(slow.URL, 100*time.Millisecond) +_, err := llm.Generate(context.Background(), "hello", "", "") +if err == nil { +t.Error("expected timeout error") +} +} + +func TestChatPipeline_RequestBuilding(t *testing.T) { +// Test the map construction logic from main.go's OnMessage. +data := map[string]any{ +"request_id": "req-e2e-001", +"user_id": "user-1", +"message": "hello", +"premium": true, +"enable_rag": false, +"enable_streaming": false, +"system_prompt": "Be brief.", +} + +requestID := strVal(data, "request_id", "unknown") +userID := strVal(data, "user_id", "unknown") +query := strVal(data, "message", "") +premium := boolVal(data, "premium", false) +enableRAG := boolVal(data, "enable_rag", premium) +systemPrompt := strVal(data, "system_prompt", "") + +if requestID != "req-e2e-001" { +t.Errorf("requestID = %q", requestID) +} +if userID != "user-1" { +t.Errorf("userID = %q", userID) +} +if query != "hello" { +t.Errorf("query = %q", query) +} +if enableRAG { +t.Error("enable_rag=false should override premium=true") +} +if systemPrompt != "Be brief." { +t.Errorf("systemPrompt = %q", systemPrompt) +} +} + +// ──────────────────────────────────────────────────────────────────────────── +// Benchmark: full chat pipeline overhead (mock backends) +// ──────────────────────────────────────────────────────────────────────────── + +func BenchmarkChatPipeline_LLMOnly(b *testing.B) { +llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`)) +})) +defer llmSrv.Close() + +llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second) +ctx := context.Background() + +b.ResetTimer() +for b.Loop() { +llm.Generate(ctx, "question", "", "") +} +} + +func BenchmarkChatPipeline_RAGFlow(b *testing.B) { +embedSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +w.Write([]byte(`{"data":[{"embedding":[0.1,0.2]}]}`)) +})) +defer embedSrv.Close() + +rerankSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +w.Write([]byte(`{"results":[{"index":0,"relevance_score":0.9}]}`)) +})) +defer rerankSrv.Close() + +llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`)) +})) +defer llmSrv.Close() + +embed := clients.NewEmbeddingsClient(embedSrv.URL, 10*time.Second, "bge") +rerank := clients.NewRerankerClient(rerankSrv.URL, 10*time.Second) +llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second) +ctx := context.Background() + +b.ResetTimer() +for b.Loop() { +embed.EmbedSingle(ctx, "question") +rerank.Rerank(ctx, "question", []string{"doc1", "doc2"}, 2) +llm.Generate(ctx, "question", "context", "") +} +} diff --git a/go.mod b/go.mod index 135f742..144fa03 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( require ( github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/google/uuid v1.6.0 // indirect diff --git a/go.sum b/go.sum index 4a3f959..b9a1b68 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= diff --git a/main.go b/main.go index 5c59bc1..6f33f9d 100644 --- a/main.go +++ b/main.go @@ -32,14 +32,14 @@ func main() { // Service clients timeout := 60 * time.Second - embeddings := clients.NewEmbeddingsClient(cfg.EmbeddingsURL, timeout, "") - reranker := clients.NewRerankerClient(cfg.RerankerURL, timeout) - llm := clients.NewLLMClient(cfg.LLMURL, timeout) + embeddings := clients.NewEmbeddingsClient(cfg.EmbeddingsURL(), timeout, "") + reranker := clients.NewRerankerClient(cfg.RerankerURL(), timeout) + llm := clients.NewLLMClient(cfg.LLMURL(), timeout) milvus := clients.NewMilvusClient(cfg.MilvusHost, cfg.MilvusPort, ragCollection) var tts *clients.TTSClient if enableTTS { - tts = clients.NewTTSClient(cfg.TTSURL, timeout, ttsLanguage) + tts = clients.NewTTSClient(cfg.TTSURL(), timeout, ttsLanguage) } h := handler.New("ai.chat.user.*.message", cfg)