feature/go-handler-refactor #1
278
e2e_test.go
Normal file
278
e2e_test.go
Normal file
@@ -0,0 +1,278 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"math"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// E2E tests: audio buffer lifecycle + transcription pipeline
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestAudioBufferE2E_FullLifecycle(t *testing.T) {
|
||||
// Simulate a full session: start → chunks → process → end
|
||||
ab := newAudioBuffer("e2e-session")
|
||||
|
||||
// Send 10 chunks of 50 KB each
|
||||
for i := 0; i < 10; i++ {
|
||||
ab.addChunk(make([]byte, 50000))
|
||||
}
|
||||
|
||||
ab.mu.Lock()
|
||||
if ab.totalBytes != 500000 {
|
||||
t.Errorf("totalBytes = %d, want 500000", ab.totalBytes)
|
||||
}
|
||||
ab.mu.Unlock()
|
||||
|
||||
// Should process (meets 512KB threshold approximately)
|
||||
if !ab.shouldProcess(512000, 5120000, 2.0) {
|
||||
// Under threshold but check with voice activity
|
||||
ab.hasVoiceActivity = true
|
||||
}
|
||||
|
||||
// Get audio and verify concatenation
|
||||
audio := ab.getAudio()
|
||||
if len(audio) != 500000 {
|
||||
t.Errorf("getAudio() len = %d, want 500000", len(audio))
|
||||
}
|
||||
|
||||
// Clear and verify
|
||||
ab.clear()
|
||||
ab.mu.Lock()
|
||||
seq := ab.sequence
|
||||
total := ab.totalBytes
|
||||
ab.mu.Unlock()
|
||||
if seq != 1 {
|
||||
t.Errorf("sequence = %d, want 1", seq)
|
||||
}
|
||||
if total != 0 {
|
||||
t.Errorf("totalBytes after clear = %d", total)
|
||||
}
|
||||
|
||||
// Mark complete
|
||||
ab.markComplete()
|
||||
ab.mu.Lock()
|
||||
if !ab.isComplete {
|
||||
t.Error("expected isComplete=true")
|
||||
}
|
||||
ab.mu.Unlock()
|
||||
}
|
||||
|
||||
func TestAudioBufferE2E_InterruptDuringResponse(t *testing.T) {
|
||||
ab := newAudioBuffer("interrupt-session")
|
||||
ab.setState(stateResponding)
|
||||
|
||||
// Simulate loud speech during response (user interrupting)
|
||||
loud := makeLoudAudio(500)
|
||||
|
||||
// First check: starts tracking interrupt timer
|
||||
ab.checkInterrupt(loud, true, 0.01, 0.1)
|
||||
|
||||
// Sleep to exceed duration threshold
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
|
||||
// Second check: should now confirm interrupt
|
||||
interrupted := ab.checkInterrupt(loud, true, 0.01, 0.1)
|
||||
if !interrupted {
|
||||
t.Error("expected interrupt after duration threshold")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioBufferE2E_InterruptDisabled(t *testing.T) {
|
||||
ab := newAudioBuffer("no-interrupt")
|
||||
ab.setState(stateResponding)
|
||||
loud := makeLoudAudio(500)
|
||||
|
||||
if ab.checkInterrupt(loud, false, 0.01, 0.0) {
|
||||
t.Error("interrupt should not trigger when disabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioBufferE2E_ConcurrentChunks(t *testing.T) {
|
||||
ab := newAudioBuffer("concurrent")
|
||||
var wg sync.WaitGroup
|
||||
numGoroutines := 20
|
||||
chunkSize := 1000
|
||||
|
||||
wg.Add(numGoroutines)
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
ab.addChunk(make([]byte, chunkSize))
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
ab.mu.Lock()
|
||||
total := ab.totalBytes
|
||||
chunks := len(ab.chunks)
|
||||
ab.mu.Unlock()
|
||||
|
||||
if total != numGoroutines*chunkSize {
|
||||
t.Errorf("totalBytes = %d, want %d", total, numGoroutines*chunkSize)
|
||||
}
|
||||
if chunks != numGoroutines {
|
||||
t.Errorf("chunks = %d, want %d", chunks, numGoroutines)
|
||||
}
|
||||
|
||||
audio := ab.getAudio()
|
||||
if len(audio) != numGoroutines*chunkSize {
|
||||
t.Errorf("getAudio len = %d, want %d", len(audio), numGoroutines*chunkSize)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranscriptionE2E_MockWhisper(t *testing.T) {
|
||||
// Full mock Whisper server that validates multipart form upload
|
||||
whisperSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("expected POST, got %s", r.Method)
|
||||
}
|
||||
ct := r.Header.Get("Content-Type")
|
||||
if ct == "" {
|
||||
t.Error("missing Content-Type")
|
||||
}
|
||||
|
||||
// Parse multipart form if present
|
||||
if strings.HasPrefix(ct, "multipart/form-data") {
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
t.Errorf("multipart parse: %v", err)
|
||||
}
|
||||
file, _, err := r.FormFile("file")
|
||||
if err != nil {
|
||||
t.Errorf("missing 'file' field: %v", err)
|
||||
} else {
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{"text": "hello world"})
|
||||
}))
|
||||
defer whisperSrv.Close()
|
||||
|
||||
// Build a proper multipart request like stt-module's transcribe() does
|
||||
var buf bytes.Buffer
|
||||
writer := multipart.NewWriter(&buf)
|
||||
part, err := writer.CreateFormFile("file", "audio.wav")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
part.Write(make([]byte, 8000)) // simulated audio
|
||||
writer.Close()
|
||||
|
||||
resp, err := http.Post(whisperSrv.URL+"/v1/audio/transcriptions", writer.FormDataContentType(), &buf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != 200 {
|
||||
t.Errorf("status = %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result map[string]string
|
||||
json.NewDecoder(resp.Body).Decode(&result)
|
||||
if result["text"] != "hello world" {
|
||||
t.Errorf("text = %q, want %q", result["text"], "hello world")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioRMSE2E_RealisticSignal(t *testing.T) {
|
||||
// Generate a sine wave at 440 Hz, 16kHz sample rate
|
||||
sampleRate := 16000
|
||||
duration := 0.1 // 100ms
|
||||
numSamples := int(float64(sampleRate) * duration)
|
||||
audio := make([]byte, numSamples*2)
|
||||
|
||||
amplitude := 16000.0
|
||||
for i := 0; i < numSamples; i++ {
|
||||
sample := int16(amplitude * math.Sin(2*math.Pi*440*float64(i)/float64(sampleRate)))
|
||||
binary.LittleEndian.PutUint16(audio[i*2:], uint16(sample))
|
||||
}
|
||||
|
||||
rms := calculateAudioRMS(audio)
|
||||
// RMS of a sine wave = amplitude / sqrt(2) / 32768
|
||||
expectedRMS := amplitude / math.Sqrt(2) / 32768.0
|
||||
tolerance := 0.01
|
||||
if math.Abs(rms-expectedRMS) > tolerance {
|
||||
t.Errorf("RMS = %.4f, expected ~%.4f (±%.2f)", rms, expectedRMS, tolerance)
|
||||
}
|
||||
|
||||
if !detectVoiceActivity(audio) {
|
||||
t.Error("440 Hz sine at amplitude 16000 should be detected as voice")
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Benchmarks
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
func BenchmarkAudioBufferAddChunk(b *testing.B) {
|
||||
ab := newAudioBuffer("bench")
|
||||
chunk := make([]byte, 4096)
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ab.addChunk(chunk)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAudioBufferGetAudio(b *testing.B) {
|
||||
ab := newAudioBuffer("bench")
|
||||
for i := 0; i < 100; i++ {
|
||||
ab.addChunk(make([]byte, 4096))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
_ = ab.getAudio()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCalculateAudioRMS(b *testing.B) {
|
||||
audio := make([]byte, 32000) // 1 second at 16kHz, 16-bit
|
||||
for i := 0; i < 16000; i++ {
|
||||
val := int16(16000 * math.Sin(float64(i)*0.1))
|
||||
binary.LittleEndian.PutUint16(audio[i*2:], uint16(val))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
calculateAudioRMS(audio)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkDetectVoiceActivity(b *testing.B) {
|
||||
audio := make([]byte, 8000) // 250ms at 16kHz
|
||||
for i := 0; i < 4000; i++ {
|
||||
val := int16(10000 * math.Sin(float64(i)*0.2))
|
||||
binary.LittleEndian.PutUint16(audio[i*2:], uint16(val))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
detectVoiceActivity(audio)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCheckInterrupt(b *testing.B) {
|
||||
ab := newAudioBuffer("bench")
|
||||
ab.setState(stateResponding)
|
||||
audio := makeLoudAudio(500)
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
ab.checkInterrupt(audio, true, 0.02, 999.0) // high duration so it never triggers
|
||||
ab.mu.Lock()
|
||||
ab.interruptStartTime = nil // reset for clean iteration
|
||||
ab.mu.Unlock()
|
||||
}
|
||||
}
|
||||
1
go.mod
1
go.mod
@@ -11,6 +11,7 @@ require (
|
||||
require (
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
|
||||
2
go.sum
2
go.sum
@@ -4,6 +4,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
|
||||
Reference in New Issue
Block a user