package main

import (
	"context"
	"encoding/json"
	"net/http"
	"net/http/httptest"
	"testing"
	"time"

	"git.daviestechlabs.io/daviestechlabs/handler-base/clients"
	"git.daviestechlabs.io/daviestechlabs/handler-base/messages"
	"github.com/vmihailenco/msgpack/v5"
)

// ────────────────────────────────────────────────────────────────────────────
// E2E tests: exercise the full chat pipeline with mock backends
// ────────────────────────────────────────────────────────────────────────────

// mockBackends starts httptest servers simulating all downstream services.
type mockBackends struct {
	Embeddings *httptest.Server
	Reranker   *httptest.Server
	LLM        *httptest.Server
	TTS        *httptest.Server
}

func newMockBackends(t *testing.T) *mockBackends {
	t.Helper()
	m := &mockBackends{}

	m.Embeddings = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_ = json.NewEncoder(w).Encode(map[string]any{
			"data": []map[string]any{
				{"embedding": []float64{0.1, 0.2, 0.3, 0.4}},
			},
		})
	}))
	t.Cleanup(m.Embeddings.Close)

	m.Reranker = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_ = json.NewEncoder(w).Encode(map[string]any{
			"results": []map[string]any{
				{"index": 0, "relevance_score": 0.95},
			},
		})
	}))
	t.Cleanup(m.Reranker.Close)

	m.LLM = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		var req map[string]any
		_ = json.NewDecoder(r.Body).Decode(&req)
		_ = json.NewEncoder(w).Encode(map[string]any{
			"choices": []map[string]any{
				{"message": map[string]any{
					"content": "Paris is the capital of France.",
				}},
			},
		})
	}))
	t.Cleanup(m.LLM.Close)

	m.TTS = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_, _ = w.Write([]byte{0xDE, 0xAD, 0xBE, 0xEF})
	}))
	t.Cleanup(m.TTS.Close)

	return m
}

func TestChatPipeline_LLMOnly(t *testing.T) {
	m := newMockBackends(t)
	llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)

	// Simulate what main.go does for a non-RAG request.
	response, err := llm.Generate(context.Background(), "What is the capital of France?", "", "")
	if err != nil {
		t.Fatal(err)
	}
	if response != "Paris is the capital of France." {
		t.Errorf("response = %q", response)
	}
}

func TestChatPipeline_WithRAG(t *testing.T) {
	m := newMockBackends(t)
	embeddings := clients.NewEmbeddingsClient(m.Embeddings.URL, 5*time.Second, "bge")
	reranker := clients.NewRerankerClient(m.Reranker.URL, 5*time.Second)
	llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)

	ctx := context.Background()

	// 1. Embed query
	embedding, err := embeddings.EmbedSingle(ctx, "What is the capital of France?")
	if err != nil {
		t.Fatal(err)
	}
	if len(embedding) == 0 {
		t.Fatal("empty embedding")
	}

	// 2. Rerank (with mock documents)
	docs := []string{"France is a country in Europe", "Paris is its capital"}
	results, err := reranker.Rerank(ctx, "capital of France", docs, 2)
	if err != nil {
		t.Fatal(err)
	}
	if len(results) == 0 {
		t.Fatal("no rerank results")
	}
	if results[0].Score == 0 {
		t.Error("expected non-zero score")
	}

	// 3. Generate with context
	contextText := results[0].Document
	response, err := llm.Generate(ctx, "capital of France?", contextText, "")
	if err != nil {
		t.Fatal(err)
	}
	if response == "" {
		t.Error("empty response")
	}
}

func TestChatPipeline_WithTTS(t *testing.T) {
	m := newMockBackends(t)
	llm := clients.NewLLMClient(m.LLM.URL, 5*time.Second)
	tts := clients.NewTTSClient(m.TTS.URL, 5*time.Second, "en")

	ctx := context.Background()

	response, err := llm.Generate(ctx, "hello", "", "")
	if err != nil {
		t.Fatal(err)
	}

	audio, err := tts.Synthesize(ctx, response, "en", "")
	if err != nil {
		t.Fatal(err)
	}
	if len(audio) == 0 {
		t.Error("empty audio")
	}
}

func TestChatPipeline_LLMTimeout(t *testing.T) {
	// Simulate slow LLM.
	slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		time.Sleep(200 * time.Millisecond)
		_ = json.NewEncoder(w).Encode(map[string]any{
			"choices": []map[string]any{
				{"message": map[string]any{"content": "late response"}},
			},
		})
	}))
	defer slow.Close()

	llm := clients.NewLLMClient(slow.URL, 100*time.Millisecond)
	_, err := llm.Generate(context.Background(), "hello", "", "")
	if err == nil {
		t.Error("expected timeout error")
	}
}

func TestChatPipeline_TypedDecoding(t *testing.T) {
	// Verify typed struct decoding from msgpack (same path as OnTypedMessage).
	raw := map[string]any{
		"request_id":       "req-e2e-001",
		"user_id":          "user-1",
		"message":          "hello",
		"premium":          true,
		"enable_rag":       false,
		"enable_streaming": false,
		"system_prompt":    "Be brief.",
	}
	data, _ := msgpack.Marshal(raw)

	var req messages.ChatRequest
	if err := msgpack.Unmarshal(data, &req); err != nil {
		t.Fatal(err)
	}

	if req.RequestID != "req-e2e-001" {
		t.Errorf("RequestID = %q", req.RequestID)
	}
	if req.UserID != "user-1" {
		t.Errorf("UserID = %q", req.UserID)
	}
	if req.EffectiveQuery() != "hello" {
		t.Errorf("query = %q", req.EffectiveQuery())
	}
	if req.EnableRAG {
		t.Error("EnableRAG should be false")
	}
	if req.SystemPrompt != "Be brief." {
		t.Errorf("SystemPrompt = %q", req.SystemPrompt)
	}
}

// ────────────────────────────────────────────────────────────────────────────
// Benchmark: full chat pipeline overhead (mock backends)
// ────────────────────────────────────────────────────────────────────────────

func BenchmarkChatPipeline_LLMOnly(b *testing.B) {
	llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_, _ = w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`))
	}))
	defer llmSrv.Close()

	llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second)
	ctx := context.Background()

	b.ResetTimer()
	for b.Loop() {
		_, _ = llm.Generate(ctx, "question", "", "")
	}
}

func BenchmarkChatPipeline_RAGFlow(b *testing.B) {
	embedSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_, _ = w.Write([]byte(`{"data":[{"embedding":[0.1,0.2]}]}`))
	}))
	defer embedSrv.Close()

	rerankSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_, _ = w.Write([]byte(`{"results":[{"index":0,"relevance_score":0.9}]}`))
	}))
	defer rerankSrv.Close()

	llmSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		_, _ = w.Write([]byte(`{"choices":[{"message":{"content":"answer"}}]}`))
	}))
	defer llmSrv.Close()

	embed := clients.NewEmbeddingsClient(embedSrv.URL, 10*time.Second, "bge")
	rerank := clients.NewRerankerClient(rerankSrv.URL, 10*time.Second)
	llm := clients.NewLLMClient(llmSrv.URL, 10*time.Second)
	ctx := context.Background()

	b.ResetTimer()
	for b.Loop() {
		_, _ = embed.EmbedSingle(ctx, "question")
		_, _ = rerank.Rerank(ctx, "question", []string{"doc1", "doc2"}, 2)
		_, _ = llm.Generate(ctx, "question", "context", "")
	}
}