feat: CI pipeline, lint fixes, and Renovate config

- pyproject.toml with ruff/pytest config (setuptools<81 pin) - Full test suite (26 tests) - Gitea Actions CI (lint, test, docker, notify) - Ruff lint/format fixes across source files - Renovate config for automated dependency updates Ref: ADR-0057
2026-02-13 15:33:35 -05:00
parent 8fc5eb1193
commit 55cd657364
10 changed files with 1655 additions and 279 deletions
--- a/tests/test_stt_streaming.py
+++ b/tests/test_stt_streaming.py
@@ -0,0 +1,262 @@
+"""
+Unit tests for STT streaming service.
+"""
+
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import msgpack
+import pytest
+
+from stt_streaming import (
+    TRANSCRIPTION_SUBJECT_PREFIX,
+    AudioBuffer,
+    StreamingSTT,
+    calculate_audio_rms,
+)
+
+# ---------------------------------------------------------------------------
+# Utility function tests
+# ---------------------------------------------------------------------------
+
+
+class TestCalculateAudioRms:
+    """Tests for calculate_audio_rms helper."""
+
+    def test_silence_returns_zero(self, silent_pcm_bytes):
+        rms = calculate_audio_rms(silent_pcm_bytes)
+        assert rms == 0.0
+
+    def test_noisy_signal_above_zero(self, noisy_pcm_bytes):
+        rms = calculate_audio_rms(noisy_pcm_bytes)
+        assert rms > 0.0
+
+    def test_empty_bytes(self):
+        rms = calculate_audio_rms(b"")
+        assert rms == 0.0
+
+    def test_single_byte(self):
+        rms = calculate_audio_rms(b"\x00")
+        assert rms == 0.0
+
+
+# ---------------------------------------------------------------------------
+# AudioBuffer tests
+# ---------------------------------------------------------------------------
+
+
+class TestAudioBuffer:
+    """Tests for AudioBuffer session management."""
+
+    def test_init(self):
+        buf = AudioBuffer("sess-1")
+        assert buf.session_id == "sess-1"
+        assert buf.total_bytes == 0
+        assert buf.is_complete is False
+        assert buf.sequence == 0
+        assert buf.state == "listening"
+
+    def test_add_chunk(self, silent_pcm_bytes):
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(silent_pcm_bytes)
+        assert buf.total_bytes == len(silent_pcm_bytes)
+        assert len(buf.chunks) == 1
+
+    def test_get_audio_concatenates(self, silent_pcm_bytes):
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(silent_pcm_bytes)
+        buf.add_chunk(silent_pcm_bytes)
+        audio = buf.get_audio()
+        assert len(audio) == len(silent_pcm_bytes) * 2
+
+    def test_clear_resets(self, silent_pcm_bytes):
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(silent_pcm_bytes)
+        buf.clear()
+        assert buf.total_bytes == 0
+        assert buf.chunks == []
+        assert buf.sequence == 1  # Incremented after clear
+
+    def test_mark_complete(self):
+        buf = AudioBuffer("sess-1")
+        buf.mark_complete()
+        assert buf.is_complete is True
+
+    def test_set_state(self):
+        buf = AudioBuffer("sess-1")
+        assert buf.state == "listening"
+        buf.set_state("responding")
+        assert buf.state == "responding"
+        buf.set_state("listening")
+        assert buf.state == "listening"
+
+    def test_set_invalid_state_ignored(self):
+        buf = AudioBuffer("sess-1")
+        buf.set_state("invalid")
+        assert buf.state == "listening"  # Unchanged
+
+    @patch("stt_streaming.BUFFER_SIZE_BYTES", 100)
+    def test_should_process_when_buffer_full(self, silent_pcm_bytes):
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(silent_pcm_bytes)  # 2000 bytes > 100 threshold
+        assert buf.should_process() is True
+
+    def test_should_not_process_when_empty(self):
+        buf = AudioBuffer("sess-1")
+        assert buf.should_process() is False
+
+    @patch("stt_streaming.CHUNK_TIMEOUT_SECONDS", 0.0)
+    def test_should_process_on_timeout(self, silent_pcm_bytes):
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(silent_pcm_bytes)
+        buf.last_chunk_time = time.time() - 10  # Force timeout
+        assert buf.should_process() is True
+
+
+# ---------------------------------------------------------------------------
+# StreamingSTT tests
+# ---------------------------------------------------------------------------
+
+
+class TestStreamingSTT:
+    """Tests for the StreamingSTT service."""
+
+    @pytest.fixture
+    def service(self, mock_nats, mock_http_client):
+        svc = StreamingSTT()
+        svc.nc = mock_nats
+        svc.js = mock_nats.jetstream()
+        svc.http_client = mock_http_client
+        svc.is_healthy = True
+        return svc
+
+    @pytest.mark.asyncio
+    async def test_transcribe_success(self, service):
+        """Successful transcription returns text."""
+        result = await service.transcribe(b"fake-audio")
+        assert result == "Hello world"
+        service.http_client.post.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_transcribe_failure(self, service):
+        """Transcription failure returns None."""
+        service.http_client.post = AsyncMock(side_effect=Exception("timeout"))
+        result = await service.transcribe(b"fake-audio")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_handle_start_message(self, service):
+        """Start message creates a new session buffer."""
+        msg = MagicMock()
+        msg.subject = "ai.voice.stream.sess-1"
+        msg.data = msgpack.packb({"type": "start"})
+
+        await service.handle_stream_message(msg)
+
+        assert "sess-1" in service.sessions
+        assert service.sessions["sess-1"].state == "listening"
+
+    @pytest.mark.asyncio
+    async def test_handle_start_with_speaker_id(self, service):
+        """Start message with speaker_id stores it on the buffer."""
+        msg = MagicMock()
+        msg.subject = "ai.voice.stream.sess-2"
+        msg.data = msgpack.packb({"type": "start", "speaker_id": "user-42"})
+
+        await service.handle_stream_message(msg)
+        assert service.sessions["sess-2"].speaker_id == "user-42"
+
+    @pytest.mark.asyncio
+    async def test_handle_state_change(self, service):
+        """State change message updates the buffer state."""
+        # Create session first
+        service.sessions["sess-1"] = AudioBuffer("sess-1")
+
+        msg = MagicMock()
+        msg.subject = "ai.voice.stream.sess-1"
+        msg.data = msgpack.packb({"type": "state_change", "state": "responding"})
+
+        await service.handle_stream_message(msg)
+        assert service.sessions["sess-1"].state == "responding"
+
+    @pytest.mark.asyncio
+    async def test_handle_audio_chunk(self, service, sample_audio_b64):
+        """Audio chunks are added to the session buffer."""
+        service.sessions["sess-1"] = AudioBuffer("sess-1")
+        # Prevent auto-creation of monitoring task
+        service.processing_tasks["sess-1"] = MagicMock()
+
+        msg = MagicMock()
+        msg.subject = "ai.voice.stream.sess-1"
+        msg.data = msgpack.packb({"type": "chunk", "audio_b64": sample_audio_b64})
+
+        await service.handle_stream_message(msg)
+        assert service.sessions["sess-1"].total_bytes > 0
+
+    @pytest.mark.asyncio
+    async def test_handle_end_message(self, service, mock_http_client):
+        """End message triggers processing of remaining audio."""
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(b"\x00" * 100)
+        service.sessions["sess-1"] = buf
+
+        msg = MagicMock()
+        msg.subject = "ai.voice.stream.sess-1"
+        msg.data = msgpack.packb({"type": "end"})
+
+        await service.handle_stream_message(msg)
+
+        # Should have published a transcription
+        assert service.nc.publish.called
+
+    @pytest.mark.asyncio
+    async def test_handle_auto_create_session(self, service, sample_audio_b64):
+        """Chunk message auto-creates session when start was missed."""
+        msg = MagicMock()
+        msg.subject = "ai.voice.stream.new-sess"
+        msg.data = msgpack.packb({"type": "chunk", "audio_b64": sample_audio_b64})
+
+        await service.handle_stream_message(msg)
+        assert "new-sess" in service.sessions
+
+    @pytest.mark.asyncio
+    async def test_process_buffer_publishes_result(self, service, mock_http_client):
+        """process_buffer publishes transcription to NATS."""
+        buf = AudioBuffer("sess-1")
+        buf.add_chunk(b"\x00" * 100)
+        service.sessions["sess-1"] = buf
+
+        await service.process_buffer("sess-1")
+
+        # Verify transcription published
+        pub_calls = service.nc.publish.call_args_list
+        assert len(pub_calls) >= 1
+        subject = pub_calls[0].args[0]
+        assert subject == f"{TRANSCRIPTION_SUBJECT_PREFIX}.sess-1"
+        data = msgpack.unpackb(pub_calls[0].args[1], raw=False)
+        assert data["transcript"] == "Hello world"
+        assert data["session_id"] == "sess-1"
+
+    @pytest.mark.asyncio
+    async def test_process_buffer_no_session(self, service):
+        """process_buffer handles missing session gracefully."""
+        await service.process_buffer("nonexistent")
+        service.nc.publish.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_process_buffer_empty_audio(self, service):
+        """process_buffer skips empty buffers."""
+        service.sessions["sess-1"] = AudioBuffer("sess-1")
+        await service.process_buffer("sess-1")
+        service.nc.publish.assert_not_called()
+
+    def test_invalid_subject(self, service):
+        """Messages with invalid subjects are skipped."""
+        msg = MagicMock()
+        msg.subject = "ai.voice"  # Too few parts
+        msg.data = msgpack.packb({"type": "start"})
+
+        import asyncio
+
+        asyncio.get_event_loop().run_until_complete(service.handle_stream_message(msg))
+        assert len(service.sessions) == 0