package main import ( "encoding/binary" "encoding/json" "math" "net/http" "net/http/httptest" "testing" "time" ) func TestCalculateAudioRMS(t *testing.T) { // Silence: all zeros silence := make([]byte, 200) rms := calculateAudioRMS(silence) if rms != 0.0 { t.Errorf("silence RMS = %f, want 0.0", rms) } // Max amplitude: 16-bit samples at max value loud := make([]byte, 200) for i := 0; i < 100; i++ { binary.LittleEndian.PutUint16(loud[i*2:], uint16(32767)) } rms = calculateAudioRMS(loud) if rms < 0.99 { t.Errorf("max amplitude RMS = %f, want ~1.0", rms) } // Small input if rms := calculateAudioRMS([]byte{0}); rms != 0.0 { t.Errorf("single byte RMS = %f", rms) } } func TestDetectVoiceActivity(t *testing.T) { // Silence should not be detected as voice silence := make([]byte, 200) if detectVoiceActivity(silence) { t.Error("silence detected as voice") } // Loud audio should be detected as voice loud := make([]byte, 200) for i := 0; i < 100; i++ { val := int16(16000 * math.Sin(float64(i)*0.1)) binary.LittleEndian.PutUint16(loud[i*2:], uint16(val)) } if !detectVoiceActivity(loud) { t.Error("loud audio not detected as voice") } } func TestAudioBufferBasic(t *testing.T) { ab := newAudioBuffer("test-session") if ab.sessionID != "test-session" { t.Error("wrong session ID") } if ab.state != stateListening { t.Error("initial state should be listening") } // Add chunk chunk := make([]byte, 1000) ab.addChunk(chunk) if ab.totalBytes != 1000 { t.Errorf("totalBytes = %d, want 1000", ab.totalBytes) } // Get audio audio := ab.getAudio() if len(audio) != 1000 { t.Errorf("getAudio len = %d, want 1000", len(audio)) } // Clear ab.clear() if ab.totalBytes != 0 { t.Error("totalBytes should be 0 after clear") } if ab.sequence != 1 { t.Errorf("sequence = %d, want 1", ab.sequence) } } func TestAudioBufferStateChange(t *testing.T) { ab := newAudioBuffer("s1") ab.setState(stateResponding) ab.mu.Lock() if ab.state != stateResponding { t.Error("state should be responding") } ab.mu.Unlock() ab.setState("invalid") ab.mu.Lock() if ab.state != stateResponding { t.Error("state should still be responding") } ab.mu.Unlock() } func TestAudioBufferShouldProcess(t *testing.T) { ab := newAudioBuffer("s2") // Empty buffer, recent time — should not process if ab.shouldProcess(512000, 5120000, 2.0) { t.Error("empty buffer should not process") } // Add enough data to meet threshold ab.addChunk(make([]byte, 512000)) ab.hasVoiceActivity = true if !ab.shouldProcess(512000, 5120000, 2.0) { t.Error("full buffer should process") } } func TestAudioBufferTimeout(t *testing.T) { ab := newAudioBuffer("s3") ab.addChunk(make([]byte, 100)) ab.hasVoiceActivity = true // Simulate old lastChunkTime ab.mu.Lock() ab.lastChunkTime = time.Now().Add(-3 * time.Second) ab.mu.Unlock() if !ab.shouldProcess(512000, 5120000, 2.0) { t.Error("timed-out buffer should process") } } func TestAudioBufferCheckInterrupt(t *testing.T) { ab := newAudioBuffer("s4") // Not in responding state — no interrupt loud := makeLoudAudio(100) if ab.checkInterrupt(loud, true, 0.001, 0.0) { t.Error("should not interrupt in listening state") } // Switch to responding ab.setState(stateResponding) // With 0 duration threshold, immediate interrupt if !ab.checkInterrupt(loud, true, 0.001, 0.0) { t.Error("should interrupt in responding state with loud audio") } } func TestTranscribeHTTP(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/v1/audio/transcriptions" { t.Errorf("unexpected path: %s", r.URL.Path) } if r.Method != http.MethodPost { t.Errorf("expected POST, got %s", r.Method) } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{"text": "hello world"}) })) defer ts.Close() // Verify the mock responds correctly resp, err := http.Post(ts.URL+"/v1/audio/transcriptions", "audio/wav", nil) if err != nil { t.Fatal(err) } defer resp.Body.Close() if resp.StatusCode != 200 { t.Errorf("status = %d", resp.StatusCode) } } func TestHelpers(t *testing.T) { t.Setenv("STT_TEST", "val") if got := getEnv("STT_TEST", "x"); got != "val" { t.Errorf("getEnv = %q", got) } t.Setenv("STT_PORT", "9090") if got := getEnvInt("STT_PORT", 0); got != 9090 { t.Errorf("getEnvInt = %d", got) } t.Setenv("STT_TIMEOUT", "1.5") if got := getEnvFloat("STT_TIMEOUT", 0); got != 1.5 { t.Errorf("getEnvFloat = %f", got) } t.Setenv("STT_FLAG", "true") if got := getEnvBool("STT_FLAG", false); !got { t.Error("getEnvBool should be true") } } // makeLoudAudio creates a 16-bit PCM audio buffer with high amplitude. func makeLoudAudio(numSamples int) []byte { buf := make([]byte, numSamples*2) for i := 0; i < numSamples; i++ { val := int16(20000 * math.Sin(float64(i)*0.3)) binary.LittleEndian.PutUint16(buf[i*2:], uint16(val)) } return buf }