2026-02-20 12:34:32 +00:00
3 changed files with 281 additions and 0 deletions
--- a/e2e_test.go
+++ b/e2e_test.go
@@ -0,0 +1,278 @@
+package main
+
+import (
+	"bytes"
+	"encoding/binary"
+	"encoding/json"
+	"math"
+	"mime/multipart"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+// ────────────────────────────────────────────────────────────────────────────
+// E2E tests: audio buffer lifecycle + transcription pipeline
+// ────────────────────────────────────────────────────────────────────────────
+
+func TestAudioBufferE2E_FullLifecycle(t *testing.T) {
+	// Simulate a full session: start → chunks → process → end
+	ab := newAudioBuffer("e2e-session")
+
+	// Send 10 chunks of 50 KB each
+	for i := 0; i < 10; i++ {
+		ab.addChunk(make([]byte, 50000))
+	}
+
+	ab.mu.Lock()
+	if ab.totalBytes != 500000 {
+		t.Errorf("totalBytes = %d, want 500000", ab.totalBytes)
+	}
+	ab.mu.Unlock()
+
+	// Should process (meets 512KB threshold approximately)
+	if !ab.shouldProcess(512000, 5120000, 2.0) {
+		// Under threshold but check with voice activity
+		ab.hasVoiceActivity = true
+	}
+
+	// Get audio and verify concatenation
+	audio := ab.getAudio()
+	if len(audio) != 500000 {
+		t.Errorf("getAudio() len = %d, want 500000", len(audio))
+	}
+
+	// Clear and verify
+	ab.clear()
+	ab.mu.Lock()
+	seq := ab.sequence
+	total := ab.totalBytes
+	ab.mu.Unlock()
+	if seq != 1 {
+		t.Errorf("sequence = %d, want 1", seq)
+	}
+	if total != 0 {
+		t.Errorf("totalBytes after clear = %d", total)
+	}
+
+	// Mark complete
+	ab.markComplete()
+	ab.mu.Lock()
+	if !ab.isComplete {
+		t.Error("expected isComplete=true")
+	}
+	ab.mu.Unlock()
+}
+
+func TestAudioBufferE2E_InterruptDuringResponse(t *testing.T) {
+	ab := newAudioBuffer("interrupt-session")
+	ab.setState(stateResponding)
+
+	// Simulate loud speech during response (user interrupting)
+	loud := makeLoudAudio(500)
+
+	// First check: starts tracking interrupt timer
+	ab.checkInterrupt(loud, true, 0.01, 0.1)
+
+	// Sleep to exceed duration threshold
+	time.Sleep(150 * time.Millisecond)
+
+	// Second check: should now confirm interrupt
+	interrupted := ab.checkInterrupt(loud, true, 0.01, 0.1)
+	if !interrupted {
+		t.Error("expected interrupt after duration threshold")
+	}
+}
+
+func TestAudioBufferE2E_InterruptDisabled(t *testing.T) {
+	ab := newAudioBuffer("no-interrupt")
+	ab.setState(stateResponding)
+	loud := makeLoudAudio(500)
+
+	if ab.checkInterrupt(loud, false, 0.01, 0.0) {
+		t.Error("interrupt should not trigger when disabled")
+	}
+}
+
+func TestAudioBufferE2E_ConcurrentChunks(t *testing.T) {
+	ab := newAudioBuffer("concurrent")
+	var wg sync.WaitGroup
+	numGoroutines := 20
+	chunkSize := 1000
+
+	wg.Add(numGoroutines)
+	for i := 0; i < numGoroutines; i++ {
+		go func() {
+			defer wg.Done()
+			ab.addChunk(make([]byte, chunkSize))
+		}()
+	}
+	wg.Wait()
+
+	ab.mu.Lock()
+	total := ab.totalBytes
+	chunks := len(ab.chunks)
+	ab.mu.Unlock()
+
+	if total != numGoroutines*chunkSize {
+		t.Errorf("totalBytes = %d, want %d", total, numGoroutines*chunkSize)
+	}
+	if chunks != numGoroutines {
+		t.Errorf("chunks = %d, want %d", chunks, numGoroutines)
+	}
+
+	audio := ab.getAudio()
+	if len(audio) != numGoroutines*chunkSize {
+		t.Errorf("getAudio len = %d, want %d", len(audio), numGoroutines*chunkSize)
+	}
+}
+
+func TestTranscriptionE2E_MockWhisper(t *testing.T) {
+	// Full mock Whisper server that validates multipart form upload
+	whisperSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost {
+			t.Errorf("expected POST, got %s", r.Method)
+		}
+		ct := r.Header.Get("Content-Type")
+		if ct == "" {
+			t.Error("missing Content-Type")
+		}
+
+		// Parse multipart form if present
+		if strings.HasPrefix(ct, "multipart/form-data") {
+			if err := r.ParseMultipartForm(32 << 20); err != nil {
+				t.Errorf("multipart parse: %v", err)
+			}
+			file, _, err := r.FormFile("file")
+			if err != nil {
+				t.Errorf("missing 'file' field: %v", err)
+			} else {
+				file.Close()
+			}
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(map[string]string{"text": "hello world"})
+	}))
+	defer whisperSrv.Close()
+
+	// Build a proper multipart request like stt-module's transcribe() does
+	var buf bytes.Buffer
+	writer := multipart.NewWriter(&buf)
+	part, err := writer.CreateFormFile("file", "audio.wav")
+	if err != nil {
+		t.Fatal(err)
+	}
+	part.Write(make([]byte, 8000)) // simulated audio
+	writer.Close()
+
+	resp, err := http.Post(whisperSrv.URL+"/v1/audio/transcriptions", writer.FormDataContentType(), &buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		t.Errorf("status = %d", resp.StatusCode)
+	}
+
+	var result map[string]string
+	json.NewDecoder(resp.Body).Decode(&result)
+	if result["text"] != "hello world" {
+		t.Errorf("text = %q, want %q", result["text"], "hello world")
+	}
+}
+
+func TestAudioRMSE2E_RealisticSignal(t *testing.T) {
+	// Generate a sine wave at 440 Hz, 16kHz sample rate
+	sampleRate := 16000
+	duration := 0.1 // 100ms
+	numSamples := int(float64(sampleRate) * duration)
+	audio := make([]byte, numSamples*2)
+
+	amplitude := 16000.0
+	for i := 0; i < numSamples; i++ {
+		sample := int16(amplitude * math.Sin(2*math.Pi*440*float64(i)/float64(sampleRate)))
+		binary.LittleEndian.PutUint16(audio[i*2:], uint16(sample))
+	}
+
+	rms := calculateAudioRMS(audio)
+	// RMS of a sine wave = amplitude / sqrt(2) / 32768
+	expectedRMS := amplitude / math.Sqrt(2) / 32768.0
+	tolerance := 0.01
+	if math.Abs(rms-expectedRMS) > tolerance {
+		t.Errorf("RMS = %.4f, expected ~%.4f (±%.2f)", rms, expectedRMS, tolerance)
+	}
+
+	if !detectVoiceActivity(audio) {
+		t.Error("440 Hz sine at amplitude 16000 should be detected as voice")
+	}
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Benchmarks
+// ────────────────────────────────────────────────────────────────────────────
+
+func BenchmarkAudioBufferAddChunk(b *testing.B) {
+	ab := newAudioBuffer("bench")
+	chunk := make([]byte, 4096)
+
+	b.ResetTimer()
+	for b.Loop() {
+		ab.addChunk(chunk)
+	}
+}
+
+func BenchmarkAudioBufferGetAudio(b *testing.B) {
+	ab := newAudioBuffer("bench")
+	for i := 0; i < 100; i++ {
+		ab.addChunk(make([]byte, 4096))
+	}
+
+	b.ResetTimer()
+	for b.Loop() {
+		_ = ab.getAudio()
+	}
+}
+
+func BenchmarkCalculateAudioRMS(b *testing.B) {
+	audio := make([]byte, 32000) // 1 second at 16kHz, 16-bit
+	for i := 0; i < 16000; i++ {
+		val := int16(16000 * math.Sin(float64(i)*0.1))
+		binary.LittleEndian.PutUint16(audio[i*2:], uint16(val))
+	}
+
+	b.ResetTimer()
+	for b.Loop() {
+		calculateAudioRMS(audio)
+	}
+}
+
+func BenchmarkDetectVoiceActivity(b *testing.B) {
+	audio := make([]byte, 8000) // 250ms at 16kHz
+	for i := 0; i < 4000; i++ {
+		val := int16(10000 * math.Sin(float64(i)*0.2))
+		binary.LittleEndian.PutUint16(audio[i*2:], uint16(val))
+	}
+
+	b.ResetTimer()
+	for b.Loop() {
+		detectVoiceActivity(audio)
+	}
+}
+
+func BenchmarkCheckInterrupt(b *testing.B) {
+	ab := newAudioBuffer("bench")
+	ab.setState(stateResponding)
+	audio := makeLoudAudio(500)
+
+	b.ResetTimer()
+	for b.Loop() {
+		ab.checkInterrupt(audio, true, 0.02, 999.0) // high duration so it never triggers
+		ab.mu.Lock()
+		ab.interruptStartTime = nil // reset for clean iteration
+		ab.mu.Unlock()
+	}
+}
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
 require (
 	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/fsnotify/fsnotify v1.9.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/google/uuid v1.6.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -4,6 +4,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
+github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=