""" Unit tests for STT streaming service. """ import time from unittest.mock import AsyncMock, MagicMock, patch import msgpack import pytest from stt_streaming import ( TRANSCRIPTION_SUBJECT_PREFIX, AudioBuffer, StreamingSTT, calculate_audio_rms, ) # --------------------------------------------------------------------------- # Utility function tests # --------------------------------------------------------------------------- class TestCalculateAudioRms: """Tests for calculate_audio_rms helper.""" def test_silence_returns_zero(self, silent_pcm_bytes): rms = calculate_audio_rms(silent_pcm_bytes) assert rms == 0.0 def test_noisy_signal_above_zero(self, noisy_pcm_bytes): rms = calculate_audio_rms(noisy_pcm_bytes) assert rms > 0.0 def test_empty_bytes(self): rms = calculate_audio_rms(b"") assert rms == 0.0 def test_single_byte(self): rms = calculate_audio_rms(b"\x00") assert rms == 0.0 # --------------------------------------------------------------------------- # AudioBuffer tests # --------------------------------------------------------------------------- class TestAudioBuffer: """Tests for AudioBuffer session management.""" def test_init(self): buf = AudioBuffer("sess-1") assert buf.session_id == "sess-1" assert buf.total_bytes == 0 assert buf.is_complete is False assert buf.sequence == 0 assert buf.state == "listening" def test_add_chunk(self, silent_pcm_bytes): buf = AudioBuffer("sess-1") buf.add_chunk(silent_pcm_bytes) assert buf.total_bytes == len(silent_pcm_bytes) assert len(buf.chunks) == 1 def test_get_audio_concatenates(self, silent_pcm_bytes): buf = AudioBuffer("sess-1") buf.add_chunk(silent_pcm_bytes) buf.add_chunk(silent_pcm_bytes) audio = buf.get_audio() assert len(audio) == len(silent_pcm_bytes) * 2 def test_clear_resets(self, silent_pcm_bytes): buf = AudioBuffer("sess-1") buf.add_chunk(silent_pcm_bytes) buf.clear() assert buf.total_bytes == 0 assert buf.chunks == [] assert buf.sequence == 1 # Incremented after clear def test_mark_complete(self): buf = AudioBuffer("sess-1") buf.mark_complete() assert buf.is_complete is True def test_set_state(self): buf = AudioBuffer("sess-1") assert buf.state == "listening" buf.set_state("responding") assert buf.state == "responding" buf.set_state("listening") assert buf.state == "listening" def test_set_invalid_state_ignored(self): buf = AudioBuffer("sess-1") buf.set_state("invalid") assert buf.state == "listening" # Unchanged @patch("stt_streaming.BUFFER_SIZE_BYTES", 100) def test_should_process_when_buffer_full(self, silent_pcm_bytes): buf = AudioBuffer("sess-1") buf.add_chunk(silent_pcm_bytes) # 2000 bytes > 100 threshold assert buf.should_process() is True def test_should_not_process_when_empty(self): buf = AudioBuffer("sess-1") assert buf.should_process() is False @patch("stt_streaming.CHUNK_TIMEOUT_SECONDS", 0.0) def test_should_process_on_timeout(self, silent_pcm_bytes): buf = AudioBuffer("sess-1") buf.add_chunk(silent_pcm_bytes) buf.last_chunk_time = time.time() - 10 # Force timeout assert buf.should_process() is True # --------------------------------------------------------------------------- # StreamingSTT tests # --------------------------------------------------------------------------- class TestStreamingSTT: """Tests for the StreamingSTT service.""" @pytest.fixture def service(self, mock_nats, mock_http_client): svc = StreamingSTT() svc.nc = mock_nats svc.js = mock_nats.jetstream() svc.http_client = mock_http_client svc.is_healthy = True return svc @pytest.mark.asyncio async def test_transcribe_success(self, service): """Successful transcription returns text.""" result = await service.transcribe(b"fake-audio") assert result == "Hello world" service.http_client.post.assert_called_once() @pytest.mark.asyncio async def test_transcribe_failure(self, service): """Transcription failure returns None.""" service.http_client.post = AsyncMock(side_effect=Exception("timeout")) result = await service.transcribe(b"fake-audio") assert result is None @pytest.mark.asyncio async def test_handle_start_message(self, service): """Start message creates a new session buffer.""" msg = MagicMock() msg.subject = "ai.voice.stream.sess-1" msg.data = msgpack.packb({"type": "start"}) await service.handle_stream_message(msg) assert "sess-1" in service.sessions assert service.sessions["sess-1"].state == "listening" @pytest.mark.asyncio async def test_handle_start_with_speaker_id(self, service): """Start message with speaker_id stores it on the buffer.""" msg = MagicMock() msg.subject = "ai.voice.stream.sess-2" msg.data = msgpack.packb({"type": "start", "speaker_id": "user-42"}) await service.handle_stream_message(msg) assert service.sessions["sess-2"].speaker_id == "user-42" @pytest.mark.asyncio async def test_handle_state_change(self, service): """State change message updates the buffer state.""" # Create session first service.sessions["sess-1"] = AudioBuffer("sess-1") msg = MagicMock() msg.subject = "ai.voice.stream.sess-1" msg.data = msgpack.packb({"type": "state_change", "state": "responding"}) await service.handle_stream_message(msg) assert service.sessions["sess-1"].state == "responding" @pytest.mark.asyncio async def test_handle_audio_chunk(self, service, sample_audio_b64): """Audio chunks are added to the session buffer.""" service.sessions["sess-1"] = AudioBuffer("sess-1") # Prevent auto-creation of monitoring task service.processing_tasks["sess-1"] = MagicMock() msg = MagicMock() msg.subject = "ai.voice.stream.sess-1" msg.data = msgpack.packb({"type": "chunk", "audio_b64": sample_audio_b64}) await service.handle_stream_message(msg) assert service.sessions["sess-1"].total_bytes > 0 @pytest.mark.asyncio async def test_handle_end_message(self, service, mock_http_client): """End message triggers processing of remaining audio.""" buf = AudioBuffer("sess-1") buf.add_chunk(b"\x00" * 100) service.sessions["sess-1"] = buf msg = MagicMock() msg.subject = "ai.voice.stream.sess-1" msg.data = msgpack.packb({"type": "end"}) await service.handle_stream_message(msg) # Should have published a transcription assert service.nc.publish.called @pytest.mark.asyncio async def test_handle_auto_create_session(self, service, sample_audio_b64): """Chunk message auto-creates session when start was missed.""" msg = MagicMock() msg.subject = "ai.voice.stream.new-sess" msg.data = msgpack.packb({"type": "chunk", "audio_b64": sample_audio_b64}) await service.handle_stream_message(msg) assert "new-sess" in service.sessions @pytest.mark.asyncio async def test_process_buffer_publishes_result(self, service, mock_http_client): """process_buffer publishes transcription to NATS.""" buf = AudioBuffer("sess-1") buf.add_chunk(b"\x00" * 100) service.sessions["sess-1"] = buf await service.process_buffer("sess-1") # Verify transcription published pub_calls = service.nc.publish.call_args_list assert len(pub_calls) >= 1 subject = pub_calls[0].args[0] assert subject == f"{TRANSCRIPTION_SUBJECT_PREFIX}.sess-1" data = msgpack.unpackb(pub_calls[0].args[1], raw=False) assert data["transcript"] == "Hello world" assert data["session_id"] == "sess-1" @pytest.mark.asyncio async def test_process_buffer_no_session(self, service): """process_buffer handles missing session gracefully.""" await service.process_buffer("nonexistent") service.nc.publish.assert_not_called() @pytest.mark.asyncio async def test_process_buffer_empty_audio(self, service): """process_buffer skips empty buffers.""" service.sessions["sess-1"] = AudioBuffer("sess-1") await service.process_buffer("sess-1") service.nc.publish.assert_not_called() def test_invalid_subject(self, service): """Messages with invalid subjects are skipped.""" msg = MagicMock() msg.subject = "ai.voice" # Too few parts msg.data = msgpack.packb({"type": "start"}) import asyncio asyncio.get_event_loop().run_until_complete(service.handle_stream_message(msg)) assert len(service.sessions) == 0