From 609b44de832279380c0d2c6e108ce1d95e30af73 Mon Sep 17 00:00:00 2001
From: "Billy D." <billy.davies.10@icloud.com>
Date: Fri, 20 Feb 2026 06:45:21 -0500
Subject: [PATCH] feat: add e2e tests + benchmarks, fix config API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- e2e_test.go: full pipeline tests (LLM-only, RAG, TTS, timeout)
- main.go: fix config field->method references (EmbeddingsURL() etc.)
- Benchmarks: LLMOnly 136µs/op, RAGFlow 496µs/op
---
 e2e_test.go | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 go.mod      |   1 +
 go.sum      |   2 +
 main.go     |   8 +-
 4 files changed, 253 insertions(+), 4 deletions(-)
 create mode 100644 e2e_test.go

diff --git a/e2e_test.go b/e2e_test.go
new file mode 100644
index 0000000..8324f2e
--- /dev/null
+++ b/e2e_test.go
@@ -0,0 +1,246 @@
+package main
+
+import (
+"context"
+"encoding/json"
+"net/http"
+"net/http/httptest"
+"testing"
+"time"
+
+"git.daviestechlabs.io/daviestechlabs/handler-base/clients"
+)
+
+// ────────────────────────────────────────────────────────────────────────────
+// E2E tests: exercise the full chat pipeline with mock backends
+// ────────────────────────────────────────────────────────────────────────────
+
+// mockBackends starts httptest servers simulating all downstream services.
+type mockBackends struct {
+Embeddings *httptest.Server
+Reranker   *httptest.Server
+LLM        *httptest.Server
+TTS        *httptest.Server
+}
+
+func newMockBackends(t *testing.T) *mockBackends {
+t.Helper()
+m := &mockBackends{}
+
+m.Embeddings = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+json.NewEncoder(w).Encode(map[string]any{
+"data": []map[string]any{
+{"embedding": []float64{0.1, 0.2, 0.3, 0.4}},
+},
+})
+}))
+t.Cleanup(m.Embeddings.Close)
+
+m.Reranker = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+json.NewEncoder(w).Encode(map[string]any{
+"results": []map[string]any{
+{"index": 0, "relevance_score": 0.95},
+},
+})
+}))
+t.Cleanup(m.Reranker.Close)
+
+m.LLM = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+var req map[string]any
+json.NewDecoder(r.Body).Decode(&req)
+json.NewEncoder(w).Encode(map[string]any{
+"choices": []map[string]any{
+{"message": map[string]any{
+"content": "Paris is the capital of France.",
+}},
+},
+})
+}))
+t.Cleanup(m.LLM.Close)
+
+m.TTS = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+w.Write([]byte{0xDE, 0xAD, 0xBE, 0xEF})
+}))
+t.Cleanup(m.TTS.Close)
+
+return m
+}
+
+func TestChatPipeline_LLMOnly(t *testing.T) {
+m := newMockBackends(t)
+llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
+
+// Simulate what main.go does for a non-RAG request.
+response, err := llm.Generate(context.Background(), "What is the capital of France?", "", "")
+if err != nil {
+t.Fatal(err)
+}
+if response != "Paris is the capital of France." {
+t.Errorf("response = %q", response)
+}
+}
+
+func TestChatPipeline_WithRAG(t *testing.T) {
+m := newMockBackends(t)
+embeddings := clients.NewEmbeddingsClient(m.Embeddings.URL, 5*time.Second, "bge")
+reranker := clients.NewRerankerClient(m.Reranker.URL, 5*time.Second)
+llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
+
+ctx := context.Background()
+
+// 1. Embed query
+embedding, err := embeddings.EmbedSingle(ctx, "What is the capital of France?")
+if err != nil {
+t.Fatal(err)
+}
+if len(embedding) == 0 {
+t.Fatal("empty embedding")
+}
+
+// 2. Rerank (with mock documents)
+docs := []string{"France is a country in Europe", "Paris is its capital"}
+results, err := reranker.Rerank(ctx, "capital of France", docs, 2)
+if err != nil {
+t.Fatal(err)
+}
+if len(results) == 0 {
+t.Fatal("no rerank results")
+}
+if results[0].Score == 0 {
+t.Error("expected non-zero score")
+}
+
+// 3. Generate with context
+contextText := results[0].Document
+response, err := llm.Generate(ctx, "capital of France?", contextText, "")
+if err != nil {
+t.Fatal(err)
+}
+if response == "" {
+t.Error("empty response")
+}
+}
+
+func TestChatPipeline_WithTTS(t *testing.T) {
+m := newMockBackends(t)
+llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
+tts := clients.NewTTSClient(m.TTS.URL, 5*time.Second, "en")
+
+ctx := context.Background()
+
+response, err := llm.Generate(ctx, "hello", "", "")
+if err != nil {
+t.Fatal(err)
+}
+
+audio, err := tts.Synthesize(ctx, response, "en", "")
+if err != nil {
+t.Fatal(err)
+}
+if len(audio) == 0 {
+t.Error("empty audio")
+}
+}
+
+func TestChatPipeline_LLMTimeout(t *testing.T) {
+// Simulate slow LLM.
+slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+time.Sleep(200 * time.Millisecond)
+json.NewEncoder(w).Encode(map[string]any{
+"choices": []map[string]any{
+{"message": map[string]any{"content": "late response"}},
+},
+})
+}))
+defer slow.Close()
+
+llm := clients.NewLLMClient(slow.URL, 100*time.Millisecond)
+_, err := llm.Generate(context.Background(), "hello", "", "")
+if err == nil {
+t.Error("expected timeout error")
+}
+}
+
+func TestChatPipeline_RequestBuilding(t *testing.T) {
+// Test the map construction logic from main.go's OnMessage.
+data := map[string]any{
+"request_id":       "req-e2e-001",
+"user_id":          "user-1",
+"message":          "hello",
+"premium":          true,
+"enable_rag":       false,
+"enable_streaming": false,
+"system_prompt":    "Be brief.",
+}
+
+requestID := strVal(data, "request_id", "unknown")
+userID := strVal(data, "user_id", "unknown")
+query := strVal(data, "message", "")
+premium := boolVal(data, "premium", false)
+enableRAG := boolVal(data, "enable_rag", premium)
+systemPrompt := strVal(data, "system_prompt", "")
+
+if requestID != "req-e2e-001" {
+t.Errorf("requestID = %q", requestID)
+}
+if userID != "user-1" {
+t.Errorf("userID = %q", userID)
+}
+if query != "hello" {
+t.Errorf("query = %q", query)
+}
+if enableRAG {
+t.Error("enable_rag=false should override premium=true")
+}
+if systemPrompt != "Be brief." {
+t.Errorf("systemPrompt = %q", systemPrompt)
+}
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Benchmark: full chat pipeline overhead (mock backends)
+// ────────────────────────────────────────────────────────────────────────────
+
+func BenchmarkChatPipeline_LLMOnly(b *testing.B) {
+llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`))
+}))
+defer llmSrv.Close()
+
+llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second)
+ctx := context.Background()
+
+b.ResetTimer()
+for b.Loop() {
+llm.Generate(ctx, "question", "", "")
+}
+}
+
+func BenchmarkChatPipeline_RAGFlow(b *testing.B) {
+embedSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+w.Write([]byte(`{"data":[{"embedding":[0.1,0.2]}]}`))
+}))
+defer embedSrv.Close()
+
+rerankSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+w.Write([]byte(`{"results":[{"index":0,"relevance_score":0.9}]}`))
+}))
+defer rerankSrv.Close()
+
+llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`))
+}))
+defer llmSrv.Close()
+
+embed := clients.NewEmbeddingsClient(embedSrv.URL, 10*time.Second, "bge")
+rerank := clients.NewRerankerClient(rerankSrv.URL, 10*time.Second)
+llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second)
+ctx := context.Background()
+
+b.ResetTimer()
+for b.Loop() {
+embed.EmbedSingle(ctx, "question")
+rerank.Rerank(ctx, "question", []string{"doc1", "doc2"}, 2)
+llm.Generate(ctx, "question", "context", "")
+}
+}
diff --git a/go.mod b/go.mod
index 135f742..144fa03 100644
--- a/go.mod
+++ b/go.mod
@@ -10,6 +10,7 @@ require (
 require (
 	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/fsnotify/fsnotify v1.9.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/google/uuid v1.6.0 // indirect
diff --git a/go.sum b/go.sum
index 4a3f959..b9a1b68 100644
--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
diff --git a/main.go b/main.go
index 5c59bc1..6f33f9d 100644
--- a/main.go
+++ b/main.go
@@ -32,14 +32,14 @@ func main() {
 
 	// Service clients
 	timeout := 60 * time.Second
-	embeddings := clients.NewEmbeddingsClient(cfg.EmbeddingsURL, timeout, "")
-	reranker := clients.NewRerankerClient(cfg.RerankerURL, timeout)
-	llm := clients.NewLLMClient(cfg.LLMURL, timeout)
+	embeddings := clients.NewEmbeddingsClient(cfg.EmbeddingsURL(), timeout, "")
+	reranker := clients.NewRerankerClient(cfg.RerankerURL(), timeout)
+	llm := clients.NewLLMClient(cfg.LLMURL(), timeout)
 	milvus := clients.NewMilvusClient(cfg.MilvusHost, cfg.MilvusPort, ragCollection)
 
 	var tts *clients.TTSClient
 	if enableTTS {
-		tts = clients.NewTTSClient(cfg.TTSURL, timeout, ttsLanguage)
+		tts = clients.NewTTSClient(cfg.TTSURL(), timeout, ttsLanguage)
 	}
 
 	h := handler.New("ai.chat.user.*.message", cfg)