Files
chat-handler/e2e_test.go
Billy D. 609b44de83 feat: add e2e tests + benchmarks, fix config API
- e2e_test.go: full pipeline tests (LLM-only, RAG, TTS, timeout)
- main.go: fix config field->method references (EmbeddingsURL() etc.)
- Benchmarks: LLMOnly 136µs/op, RAGFlow 496µs/op
2026-02-20 06:45:21 -05:00

247 lines
7.1 KiB
Go

package main
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"git.daviestechlabs.io/daviestechlabs/handler-base/clients"
)
// ────────────────────────────────────────────────────────────────────────────
// E2E tests: exercise the full chat pipeline with mock backends
// ────────────────────────────────────────────────────────────────────────────
// mockBackends starts httptest servers simulating all downstream services.
type mockBackends struct {
Embeddings *httptest.Server
Reranker *httptest.Server
LLM *httptest.Server
TTS *httptest.Server
}
func newMockBackends(t *testing.T) *mockBackends {
t.Helper()
m := &mockBackends{}
m.Embeddings = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]any{
"data": []map[string]any{
{"embedding": []float64{0.1, 0.2, 0.3, 0.4}},
},
})
}))
t.Cleanup(m.Embeddings.Close)
m.Reranker = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]any{
"results": []map[string]any{
{"index": 0, "relevance_score": 0.95},
},
})
}))
t.Cleanup(m.Reranker.Close)
m.LLM = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req map[string]any
json.NewDecoder(r.Body).Decode(&req)
json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{
{"message": map[string]any{
"content": "Paris is the capital of France.",
}},
},
})
}))
t.Cleanup(m.LLM.Close)
m.TTS = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte{0xDE, 0xAD, 0xBE, 0xEF})
}))
t.Cleanup(m.TTS.Close)
return m
}
func TestChatPipeline_LLMOnly(t *testing.T) {
m := newMockBackends(t)
llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
// Simulate what main.go does for a non-RAG request.
response, err := llm.Generate(context.Background(), "What is the capital of France?", "", "")
if err != nil {
t.Fatal(err)
}
if response != "Paris is the capital of France." {
t.Errorf("response = %q", response)
}
}
func TestChatPipeline_WithRAG(t *testing.T) {
m := newMockBackends(t)
embeddings := clients.NewEmbeddingsClient(m.Embeddings.URL, 5*time.Second, "bge")
reranker := clients.NewRerankerClient(m.Reranker.URL, 5*time.Second)
llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
ctx := context.Background()
// 1. Embed query
embedding, err := embeddings.EmbedSingle(ctx, "What is the capital of France?")
if err != nil {
t.Fatal(err)
}
if len(embedding) == 0 {
t.Fatal("empty embedding")
}
// 2. Rerank (with mock documents)
docs := []string{"France is a country in Europe", "Paris is its capital"}
results, err := reranker.Rerank(ctx, "capital of France", docs, 2)
if err != nil {
t.Fatal(err)
}
if len(results) == 0 {
t.Fatal("no rerank results")
}
if results[0].Score == 0 {
t.Error("expected non-zero score")
}
// 3. Generate with context
contextText := results[0].Document
response, err := llm.Generate(ctx, "capital of France?", contextText, "")
if err != nil {
t.Fatal(err)
}
if response == "" {
t.Error("empty response")
}
}
func TestChatPipeline_WithTTS(t *testing.T) {
m := newMockBackends(t)
llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
tts := clients.NewTTSClient(m.TTS.URL, 5*time.Second, "en")
ctx := context.Background()
response, err := llm.Generate(ctx, "hello", "", "")
if err != nil {
t.Fatal(err)
}
audio, err := tts.Synthesize(ctx, response, "en", "")
if err != nil {
t.Fatal(err)
}
if len(audio) == 0 {
t.Error("empty audio")
}
}
func TestChatPipeline_LLMTimeout(t *testing.T) {
// Simulate slow LLM.
slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(200 * time.Millisecond)
json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{
{"message": map[string]any{"content": "late response"}},
},
})
}))
defer slow.Close()
llm := clients.NewLLMClient(slow.URL, 100*time.Millisecond)
_, err := llm.Generate(context.Background(), "hello", "", "")
if err == nil {
t.Error("expected timeout error")
}
}
func TestChatPipeline_RequestBuilding(t *testing.T) {
// Test the map construction logic from main.go's OnMessage.
data := map[string]any{
"request_id": "req-e2e-001",
"user_id": "user-1",
"message": "hello",
"premium": true,
"enable_rag": false,
"enable_streaming": false,
"system_prompt": "Be brief.",
}
requestID := strVal(data, "request_id", "unknown")
userID := strVal(data, "user_id", "unknown")
query := strVal(data, "message", "")
premium := boolVal(data, "premium", false)
enableRAG := boolVal(data, "enable_rag", premium)
systemPrompt := strVal(data, "system_prompt", "")
if requestID != "req-e2e-001" {
t.Errorf("requestID = %q", requestID)
}
if userID != "user-1" {
t.Errorf("userID = %q", userID)
}
if query != "hello" {
t.Errorf("query = %q", query)
}
if enableRAG {
t.Error("enable_rag=false should override premium=true")
}
if systemPrompt != "Be brief." {
t.Errorf("systemPrompt = %q", systemPrompt)
}
}
// ────────────────────────────────────────────────────────────────────────────
// Benchmark: full chat pipeline overhead (mock backends)
// ────────────────────────────────────────────────────────────────────────────
func BenchmarkChatPipeline_LLMOnly(b *testing.B) {
llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`))
}))
defer llmSrv.Close()
llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second)
ctx := context.Background()
b.ResetTimer()
for b.Loop() {
llm.Generate(ctx, "question", "", "")
}
}
func BenchmarkChatPipeline_RAGFlow(b *testing.B) {
embedSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`{"data":[{"embedding":[0.1,0.2]}]}`))
}))
defer embedSrv.Close()
rerankSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`{"results":[{"index":0,"relevance_score":0.9}]}`))
}))
defer rerankSrv.Close()
llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`))
}))
defer llmSrv.Close()
embed := clients.NewEmbeddingsClient(embedSrv.URL, 10*time.Second, "bge")
rerank := clients.NewRerankerClient(rerankSrv.URL, 10*time.Second)
llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second)
ctx := context.Background()
b.ResetTimer()
for b.Loop() {
embed.EmbedSingle(ctx, "question")
rerank.Rerank(ctx, "question", []string{"doc1", "doc2"}, 2)
llm.Generate(ctx, "question", "context", "")
}
}