Files
stt-module/e2e_test.go
Billy D. dbaabe1f65
Some checks failed
CI / Test (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / Docker Build & Push (push) Has been cancelled
CI / Notify (push) Has been cancelled
CI / Lint (push) Has been cancelled
fix: resolve golangci-lint errcheck warnings
- Add error checks for unchecked return values (errcheck)
- Remove unused struct fields (unused)
- Fix gofmt formatting issues
2026-02-20 08:45:44 -05:00

279 lines
7.5 KiB
Go

package main
import (
"bytes"
"encoding/binary"
"encoding/json"
"math"
"mime/multipart"
"net/http"
"net/http/httptest"
"strings"
"sync"
"testing"
"time"
)
// ────────────────────────────────────────────────────────────────────────────
// E2E tests: audio buffer lifecycle + transcription pipeline
// ────────────────────────────────────────────────────────────────────────────
func TestAudioBufferE2E_FullLifecycle(t *testing.T) {
// Simulate a full session: start → chunks → process → end
ab := newAudioBuffer("e2e-session")
// Send 10 chunks of 50 KB each
for i := 0; i < 10; i++ {
ab.addChunk(make([]byte, 50000))
}
ab.mu.Lock()
if ab.totalBytes != 500000 {
t.Errorf("totalBytes = %d, want 500000", ab.totalBytes)
}
ab.mu.Unlock()
// Should process (meets 512KB threshold approximately)
if !ab.shouldProcess(512000, 5120000, 2.0) {
// Under threshold but check with voice activity
ab.hasVoiceActivity = true
}
// Get audio and verify concatenation
audio := ab.getAudio()
if len(audio) != 500000 {
t.Errorf("getAudio() len = %d, want 500000", len(audio))
}
// Clear and verify
ab.clear()
ab.mu.Lock()
seq := ab.sequence
total := ab.totalBytes
ab.mu.Unlock()
if seq != 1 {
t.Errorf("sequence = %d, want 1", seq)
}
if total != 0 {
t.Errorf("totalBytes after clear = %d", total)
}
// Mark complete
ab.markComplete()
ab.mu.Lock()
if !ab.isComplete {
t.Error("expected isComplete=true")
}
ab.mu.Unlock()
}
func TestAudioBufferE2E_InterruptDuringResponse(t *testing.T) {
ab := newAudioBuffer("interrupt-session")
ab.setState(stateResponding)
// Simulate loud speech during response (user interrupting)
loud := makeLoudAudio(500)
// First check: starts tracking interrupt timer
ab.checkInterrupt(loud, true, 0.01, 0.1)
// Sleep to exceed duration threshold
time.Sleep(150 * time.Millisecond)
// Second check: should now confirm interrupt
interrupted := ab.checkInterrupt(loud, true, 0.01, 0.1)
if !interrupted {
t.Error("expected interrupt after duration threshold")
}
}
func TestAudioBufferE2E_InterruptDisabled(t *testing.T) {
ab := newAudioBuffer("no-interrupt")
ab.setState(stateResponding)
loud := makeLoudAudio(500)
if ab.checkInterrupt(loud, false, 0.01, 0.0) {
t.Error("interrupt should not trigger when disabled")
}
}
func TestAudioBufferE2E_ConcurrentChunks(t *testing.T) {
ab := newAudioBuffer("concurrent")
var wg sync.WaitGroup
numGoroutines := 20
chunkSize := 1000
wg.Add(numGoroutines)
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
ab.addChunk(make([]byte, chunkSize))
}()
}
wg.Wait()
ab.mu.Lock()
total := ab.totalBytes
chunks := len(ab.chunks)
ab.mu.Unlock()
if total != numGoroutines*chunkSize {
t.Errorf("totalBytes = %d, want %d", total, numGoroutines*chunkSize)
}
if chunks != numGoroutines {
t.Errorf("chunks = %d, want %d", chunks, numGoroutines)
}
audio := ab.getAudio()
if len(audio) != numGoroutines*chunkSize {
t.Errorf("getAudio len = %d, want %d", len(audio), numGoroutines*chunkSize)
}
}
func TestTranscriptionE2E_MockWhisper(t *testing.T) {
// Full mock Whisper server that validates multipart form upload
whisperSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
t.Errorf("expected POST, got %s", r.Method)
}
ct := r.Header.Get("Content-Type")
if ct == "" {
t.Error("missing Content-Type")
}
// Parse multipart form if present
if strings.HasPrefix(ct, "multipart/form-data") {
if err := r.ParseMultipartForm(32 << 20); err != nil {
t.Errorf("multipart parse: %v", err)
}
file, _, err := r.FormFile("file")
if err != nil {
t.Errorf("missing 'file' field: %v", err)
} else {
_ = file.Close()
}
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]string{"text": "hello world"})
}))
defer whisperSrv.Close()
// Build a proper multipart request like stt-module's transcribe() does
var buf bytes.Buffer
writer := multipart.NewWriter(&buf)
part, err := writer.CreateFormFile("file", "audio.wav")
if err != nil {
t.Fatal(err)
}
_, _ = part.Write(make([]byte, 8000)) // simulated audio
_ = writer.Close()
resp, err := http.Post(whisperSrv.URL+"/v1/audio/transcriptions", writer.FormDataContentType(), &buf)
if err != nil {
t.Fatal(err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 {
t.Errorf("status = %d", resp.StatusCode)
}
var result map[string]string
_ = json.NewDecoder(resp.Body).Decode(&result)
if result["text"] != "hello world" {
t.Errorf("text = %q, want %q", result["text"], "hello world")
}
}
func TestAudioRMSE2E_RealisticSignal(t *testing.T) {
// Generate a sine wave at 440 Hz, 16kHz sample rate
sampleRate := 16000
duration := 0.1 // 100ms
numSamples := int(float64(sampleRate) * duration)
audio := make([]byte, numSamples*2)
amplitude := 16000.0
for i := 0; i < numSamples; i++ {
sample := int16(amplitude * math.Sin(2*math.Pi*440*float64(i)/float64(sampleRate)))
binary.LittleEndian.PutUint16(audio[i*2:], uint16(sample))
}
rms := calculateAudioRMS(audio)
// RMS of a sine wave = amplitude / sqrt(2) / 32768
expectedRMS := amplitude / math.Sqrt(2) / 32768.0
tolerance := 0.01
if math.Abs(rms-expectedRMS) > tolerance {
t.Errorf("RMS = %.4f, expected ~%.4f (±%.2f)", rms, expectedRMS, tolerance)
}
if !detectVoiceActivity(audio) {
t.Error("440 Hz sine at amplitude 16000 should be detected as voice")
}
}
// ────────────────────────────────────────────────────────────────────────────
// Benchmarks
// ────────────────────────────────────────────────────────────────────────────
func BenchmarkAudioBufferAddChunk(b *testing.B) {
ab := newAudioBuffer("bench")
chunk := make([]byte, 4096)
b.ResetTimer()
for b.Loop() {
ab.addChunk(chunk)
}
}
func BenchmarkAudioBufferGetAudio(b *testing.B) {
ab := newAudioBuffer("bench")
for i := 0; i < 100; i++ {
ab.addChunk(make([]byte, 4096))
}
b.ResetTimer()
for b.Loop() {
_ = ab.getAudio()
}
}
func BenchmarkCalculateAudioRMS(b *testing.B) {
audio := make([]byte, 32000) // 1 second at 16kHz, 16-bit
for i := 0; i < 16000; i++ {
val := int16(16000 * math.Sin(float64(i)*0.1))
binary.LittleEndian.PutUint16(audio[i*2:], uint16(val))
}
b.ResetTimer()
for b.Loop() {
calculateAudioRMS(audio)
}
}
func BenchmarkDetectVoiceActivity(b *testing.B) {
audio := make([]byte, 8000) // 250ms at 16kHz
for i := 0; i < 4000; i++ {
val := int16(10000 * math.Sin(float64(i)*0.2))
binary.LittleEndian.PutUint16(audio[i*2:], uint16(val))
}
b.ResetTimer()
for b.Loop() {
detectVoiceActivity(audio)
}
}
func BenchmarkCheckInterrupt(b *testing.B) {
ab := newAudioBuffer("bench")
ab.setState(stateResponding)
audio := makeLoudAudio(500)
b.ResetTimer()
for b.Loop() {
ab.checkInterrupt(audio, true, 0.02, 999.0) // high duration so it never triggers
ab.mu.Lock()
ab.interruptStartTime = nil // reset for clean iteration
ab.mu.Unlock()
}
}