feature/go-handler-refactor #1

Merged
billy merged 3 commits from feature/go-handler-refactor into main 2026-02-20 12:34:32 +00:00
3 changed files with 39 additions and 49 deletions
Showing only changes of commit af9f8cc01e - Show all commits

9
.dockerignore Normal file
View File

@@ -0,0 +1,9 @@
.git
.gitignore
*.md
LICENSE
renovate.json
*_test.go
e2e_test.go
__pycache__
.env*

View File

@@ -10,7 +10,7 @@ RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-w -s" -o /stt-module .
RUN CGO_ENABLED=0 GOOS=linux GOAMD64=v3 go build -ldflags="-w -s" -o /stt-module .
# Runtime stage
FROM scratch

77
main.go
View File

@@ -3,7 +3,6 @@ package main
import (
"bytes"
"context"
"encoding/base64"
"encoding/binary"
"encoding/json"
"fmt"
@@ -25,6 +24,7 @@ import (
"git.daviestechlabs.io/daviestechlabs/handler-base/config"
"git.daviestechlabs.io/daviestechlabs/handler-base/health"
"git.daviestechlabs.io/daviestechlabs/handler-base/messages"
"git.daviestechlabs.io/daviestechlabs/handler-base/natsutil"
"git.daviestechlabs.io/daviestechlabs/handler-base/telemetry"
)
@@ -137,11 +137,7 @@ func (ab *AudioBuffer) shouldProcess(bufferSize, maxBufferSize int, chunkTimeout
func (ab *AudioBuffer) getAudio() []byte {
ab.mu.Lock()
defer ab.mu.Unlock()
var total int
for _, c := range ab.chunks {
total += len(c)
}
result := make([]byte, 0, total)
result := make([]byte, 0, ab.totalBytes)
for _, c := range ab.chunks {
result = append(result, c...)
}
@@ -327,16 +323,16 @@ func main() {
}
if transcript != "" {
result := map[string]any{
"session_id": sessionID,
"transcript": transcript,
"sequence": seq,
"is_partial": !complete,
"is_final": complete,
"timestamp": time.Now().Unix(),
"speaker_id": speakerID,
"has_voice_activity": hasVoice,
"state": state,
result := &messages.STTTranscription{
SessionID: sessionID,
Transcript: transcript,
Sequence: seq,
IsPartial: !complete,
IsFinal: complete,
Timestamp: time.Now().Unix(),
SpeakerID: speakerID,
HasVoiceActivity: hasVoice,
State: state,
}
packed, _ := msgpack.Marshal(result)
nc.Conn().Publish(fmt.Sprintf("%s.%s", transcriptionSubjectPrefix, sessionID), packed)
@@ -382,26 +378,21 @@ func main() {
}
sessionID := parts[3]
data, err := natsutil.DecodeMsgpackMap(natMsg.Data)
streamMsg, err := natsutil.Decode[messages.STTStreamMessage](natMsg.Data)
if err != nil {
slog.Error("decode error", "error", err)
return
}
msgType := ""
if t, ok := data["type"].(string); ok {
msgType = t
}
switch msgType {
switch streamMsg.Type {
case "start":
slog.Info("starting stream session", "session", sessionID)
buf := newAudioBuffer(sessionID)
if s, ok := data["state"].(string); ok {
buf.setState(s)
if streamMsg.State != "" {
buf.setState(streamMsg.State)
}
if s, ok := data["speaker_id"].(string); ok {
buf.speakerID = s
if streamMsg.SpeakerID != "" {
buf.speakerID = streamMsg.SpeakerID
}
sessionsMu.Lock()
sessions[sessionID] = buf
@@ -412,10 +403,8 @@ func main() {
sessionsMu.RLock()
buffer, ok := sessions[sessionID]
sessionsMu.RUnlock()
if ok {
if s, ok := data["state"].(string); ok {
buffer.setState(s)
}
if ok && streamMsg.State != "" {
buffer.setState(streamMsg.State)
}
case "end":
@@ -434,16 +423,8 @@ func main() {
}
case "chunk":
audioB64 := ""
if s, ok := data["audio_b64"].(string); ok {
audioB64 = s
}
if audioB64 == "" {
return
}
audioBytes, err := base64.StdEncoding.DecodeString(audioB64)
if err != nil {
slog.Error("base64 decode failed", "error", err)
// Audio arrives as raw bytes — no base64 decode needed
if len(streamMsg.Audio) == 0 {
return
}
@@ -459,12 +440,12 @@ func main() {
sessionsMu.Unlock()
// Check for interrupt
if buffer.checkInterrupt(audioBytes, enableInterrupt, audioLevelThreshold, interruptDuration) {
interruptMsg := map[string]any{
"session_id": sessionID,
"type": "interrupt",
"timestamp": time.Now().Unix(),
"speaker_id": buffer.speakerID,
if buffer.checkInterrupt(streamMsg.Audio, enableInterrupt, audioLevelThreshold, interruptDuration) {
interruptMsg := &messages.STTInterrupt{
SessionID: sessionID,
Type: "interrupt",
Timestamp: time.Now().Unix(),
SpeakerID: buffer.speakerID,
}
packed, _ := msgpack.Marshal(interruptMsg)
nc.Conn().Publish(fmt.Sprintf("%s.%s", transcriptionSubjectPrefix, sessionID), packed)
@@ -472,7 +453,7 @@ func main() {
buffer.setState(stateListening)
}
buffer.addChunk(audioBytes)
buffer.addChunk(streamMsg.Audio)
}
}