6 Commits

Author SHA1 Message Date
3585d81ff5 feat: add StreamGenerate for real SSE streaming from LLM
Some checks failed
CI / Lint (push) Failing after 2m44s
CI / Test (push) Successful in 3m7s
CI / Release (push) Has been skipped
CI / Notify Downstream (chat-handler) (push) Has been skipped
CI / Notify Downstream (pipeline-bridge) (push) Has been skipped
CI / Notify Downstream (stt-module) (push) Has been skipped
CI / Notify Downstream (tts-module) (push) Has been skipped
CI / Notify Downstream (voice-assistant) (push) Has been skipped
CI / Notify (push) Successful in 2s
- Add postJSONStream() for incremental response body reading
- Add LLMClient.StreamGenerate() with SSE parsing and onToken callback
- Supports stream:true, parses data: lines, handles [DONE] sentinel
- Graceful partial-text return on stream interruption
- 9 new tests covering happy path, edge cases, cancellation
2026-02-20 17:55:01 -05:00
fba7b62573 fix: rename GITEA_TOKEN to DISPATCH_TOKEN to avoid built-in prefix
All checks were successful
CI / Lint (push) Successful in 2m52s
CI / Test (push) Successful in 2m46s
CI / Release (push) Successful in 52s
CI / Notify Downstream (chat-handler) (push) Successful in 2s
CI / Notify Downstream (pipeline-bridge) (push) Successful in 2s
CI / Notify Downstream (stt-module) (push) Successful in 2s
CI / Notify Downstream (tts-module) (push) Successful in 2s
CI / Notify Downstream (voice-assistant) (push) Successful in 2s
CI / Notify (push) Successful in 2s
2026-02-20 09:10:13 -05:00
6fd0b9a265 feat: add downstream dependency cascade on release
Some checks failed
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / Notify Downstream (chat-handler) (push) Has been cancelled
CI / Notify Downstream (pipeline-bridge) (push) Has been cancelled
CI / Notify Downstream (voice-assistant) (push) Has been cancelled
CI / Notify (push) Has been cancelled
CI / Notify Downstream (stt-module) (push) Has been cancelled
CI / Notify Downstream (tts-module) (push) Has been cancelled
After a successful release tag, notify 5 downstream repos via
Gitea repository_dispatch so they auto-update handler-base.
2026-02-20 09:05:46 -05:00
8b6232141a ci: verify Go CI pipeline
Some checks failed
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / Notify (push) Has been cancelled
2026-02-20 09:01:44 -05:00
9876cb9388 fix: replace Python CI workflow with Go CI
All checks were successful
CI / Release (push) Successful in 58s
CI / Notify (push) Successful in 1s
CI / Lint (push) Successful in 3m24s
CI / Test (push) Successful in 3m14s
- Replace uv/ruff/pytest with Go setup, golangci-lint, go test
- Library-only: lint + test + release (tag) + notify, no Docker build
2026-02-20 08:49:29 -05:00
39673d31b8 fix: resolve golangci-lint errcheck warnings
Some checks failed
CI / Lint (push) Failing after 59s
CI / Test (push) Failing after 1m39s
CI / Release (push) Has been cancelled
CI / Notify (push) Has been cancelled
- Add error checks for unchecked return values (errcheck)
- Remove unused struct fields (unused)
- Fix gofmt formatting issues
2026-02-20 08:45:19 -05:00
8 changed files with 761 additions and 348 deletions

View File

@@ -17,20 +17,22 @@ jobs:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Set up uv - name: Set up Go
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Set up Python - name: Run go vet
run: uv python install 3.13 run: go vet ./...
- name: Install dependencies - name: Install golangci-lint
run: uv sync --frozen --extra dev run: |
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh | sh -s -- -b "$(go env GOPATH)/bin"
echo "$(go env GOPATH)/bin" >> $GITHUB_PATH
- name: Run ruff check - name: Run golangci-lint
run: uv run ruff check . run: golangci-lint run ./...
- name: Run ruff format check
run: uv run ruff format --check .
test: test:
name: Test name: Test
@@ -39,23 +41,28 @@ jobs:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Set up uv - name: Set up Go
run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Set up Python - name: Verify dependencies
run: uv python install 3.13 run: go mod verify
- name: Install dependencies - name: Build
run: uv sync --frozen --extra dev run: go build -v ./...
- name: Run tests with coverage - name: Run tests
run: uv run pytest --cov=handler_base --cov-report=xml --cov-report=term run: go test -v -race -coverprofile=coverage.out -covermode=atomic ./...
release: release:
name: Release name: Release
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [lint, test] needs: [lint, test]
if: gitea.ref == 'refs/heads/main' && gitea.event_name == 'push' if: gitea.ref == 'refs/heads/main' && gitea.event_name == 'push'
outputs:
version: ${{ steps.version.outputs.version }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -95,10 +102,32 @@ jobs:
git tag -a ${{ steps.version.outputs.version }} -m "Release ${{ steps.version.outputs.version }}" git tag -a ${{ steps.version.outputs.version }} -m "Release ${{ steps.version.outputs.version }}"
git push origin ${{ steps.version.outputs.version }} git push origin ${{ steps.version.outputs.version }}
notify-downstream:
name: Notify Downstream
runs-on: ubuntu-latest
needs: [release]
if: needs.release.result == 'success'
strategy:
matrix:
repo:
- chat-handler
- pipeline-bridge
- tts-module
- voice-assistant
- stt-module
steps:
- name: Trigger dependency update
run: |
curl -s -X POST \
-H "Authorization: token ${{ secrets.DISPATCH_TOKEN }}" \
-H "Content-Type: application/json" \
-d '{"event_type":"handler-base-release","client_payload":{"version":"${{ needs.release.outputs.version }}"}}' \
"${{ gitea.server_url }}/api/v1/repos/daviestechlabs/${{ matrix.repo }}/dispatches"
notify: notify:
name: Notify name: Notify
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [lint, test, release] needs: [lint, test, release, notify-downstream]
if: always() if: always()
steps: steps:
- name: Notify on success - name: Notify on success

View File

@@ -6,16 +6,18 @@
package clients package clients
import ( import (
"bytes" "bufio"
"context" "bytes"
"encoding/json" "context"
"fmt" "encoding/json"
"io" "fmt"
"mime/multipart" "io"
"net/http" "mime/multipart"
"net/url" "net/http"
"sync" "net/url"
"time" "strings"
"sync"
"time"
) )
// ─── Shared transport & buffer pool ───────────────────────────────────────── // ─── Shared transport & buffer pool ─────────────────────────────────────────
@@ -23,393 +25,490 @@ import (
// SharedTransport is the process-wide HTTP transport used by every service // SharedTransport is the process-wide HTTP transport used by every service
// client. Tweak pool sizes here rather than creating per-client transports. // client. Tweak pool sizes here rather than creating per-client transports.
var SharedTransport = &http.Transport{ var SharedTransport = &http.Transport{
MaxIdleConns: 100, MaxIdleConns: 100,
MaxIdleConnsPerHost: 10, MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second, IdleConnTimeout: 90 * time.Second,
DisableCompression: true, // in-cluster traffic; skip gzip overhead DisableCompression: true, // in-cluster traffic; skip gzip overhead
} }
// bufPool recycles *bytes.Buffer to avoid per-request allocations. // bufPool recycles *bytes.Buffer to avoid per-request allocations.
var bufPool = sync.Pool{ var bufPool = sync.Pool{
New: func() any { return new(bytes.Buffer) }, New: func() any { return new(bytes.Buffer) },
} }
func getBuf() *bytes.Buffer { func getBuf() *bytes.Buffer {
buf := bufPool.Get().(*bytes.Buffer) buf := bufPool.Get().(*bytes.Buffer)
buf.Reset() buf.Reset()
return buf return buf
} }
func putBuf(buf *bytes.Buffer) { func putBuf(buf *bytes.Buffer) {
if buf.Cap() > 1<<20 { // don't cache buffers > 1 MiB if buf.Cap() > 1<<20 { // don't cache buffers > 1 MiB
return return
} }
bufPool.Put(buf) bufPool.Put(buf)
} }
// ─── httpClient base ──────────────────────────────────────────────────────── // ─── httpClient base ────────────────────────────────────────────────────────
// httpClient is the shared base for all service clients. // httpClient is the shared base for all service clients.
type httpClient struct { type httpClient struct {
client *http.Client client *http.Client
baseURL string baseURL string
} }
func newHTTPClient(baseURL string, timeout time.Duration) *httpClient { func newHTTPClient(baseURL string, timeout time.Duration) *httpClient {
return &httpClient{ return &httpClient{
client: &http.Client{ client: &http.Client{
Timeout: timeout, Timeout: timeout,
Transport: SharedTransport, Transport: SharedTransport,
}, },
baseURL: baseURL, baseURL: baseURL,
} }
} }
func (h *httpClient) postJSON(ctx context.Context, path string, body any) ([]byte, error) { func (h *httpClient) postJSON(ctx context.Context, path string, body any) ([]byte, error) {
buf := getBuf() buf := getBuf()
defer putBuf(buf) defer putBuf(buf)
if err := json.NewEncoder(buf).Encode(body); err != nil { if err := json.NewEncoder(buf).Encode(body); err != nil {
return nil, fmt.Errorf("marshal: %w", err) return nil, fmt.Errorf("marshal: %w", err)
} }
req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.baseURL+path, buf) req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.baseURL+path, buf)
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
return h.do(req) return h.do(req)
} }
func (h *httpClient) get(ctx context.Context, path string, params url.Values) ([]byte, error) { func (h *httpClient) get(ctx context.Context, path string, params url.Values) ([]byte, error) {
u := h.baseURL + path u := h.baseURL + path
if len(params) > 0 { if len(params) > 0 {
u += "?" + params.Encode() u += "?" + params.Encode()
} }
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return h.do(req) return h.do(req)
} }
func (h *httpClient) getRaw(ctx context.Context, path string, params url.Values) ([]byte, error) { func (h *httpClient) getRaw(ctx context.Context, path string, params url.Values) ([]byte, error) {
return h.get(ctx, path, params) return h.get(ctx, path, params)
} }
func (h *httpClient) postMultipart(ctx context.Context, path string, fieldName string, fileName string, fileData []byte, fields map[string]string) ([]byte, error) { func (h *httpClient) postMultipart(ctx context.Context, path string, fieldName string, fileName string, fileData []byte, fields map[string]string) ([]byte, error) {
buf := getBuf() buf := getBuf()
defer putBuf(buf) defer putBuf(buf)
w := multipart.NewWriter(buf) w := multipart.NewWriter(buf)
part, err := w.CreateFormFile(fieldName, fileName) part, err := w.CreateFormFile(fieldName, fileName)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if _, err := part.Write(fileData); err != nil { if _, err := part.Write(fileData); err != nil {
return nil, err return nil, err
} }
for k, v := range fields { for k, v := range fields {
_ = w.WriteField(k, v) _ = w.WriteField(k, v)
} }
_ = w.Close() _ = w.Close()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.baseURL+path, buf) req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.baseURL+path, buf)
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("Content-Type", w.FormDataContentType()) req.Header.Set("Content-Type", w.FormDataContentType())
return h.do(req) return h.do(req)
} }
func (h *httpClient) do(req *http.Request) ([]byte, error) { func (h *httpClient) do(req *http.Request) ([]byte, error) {
resp, err := h.client.Do(req) resp, err := h.client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("http %s %s: %w", req.Method, req.URL.Path, err) return nil, fmt.Errorf("http %s %s: %w", req.Method, req.URL.Path, err)
} }
defer resp.Body.Close() defer func() { _ = resp.Body.Close() }()
buf := getBuf() buf := getBuf()
defer putBuf(buf) defer putBuf(buf)
if _, err := io.Copy(buf, resp.Body); err != nil { if _, err := io.Copy(buf, resp.Body); err != nil {
return nil, fmt.Errorf("read body: %w", err) return nil, fmt.Errorf("read body: %w", err)
}
// Return a copy so the pooled buffer can be safely recycled.
body := make([]byte, buf.Len())
copy(body, buf.Bytes())
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("http %d: %s", resp.StatusCode, string(body))
}
return body, nil
} }
// Return a copy so the pooled buffer can be safely recycled. // postJSONStream sends a JSON POST and returns the raw *http.Response so the
body := make([]byte, buf.Len()) // caller can read the body incrementally (e.g. for SSE streaming). The caller
copy(body, buf.Bytes()) // is responsible for closing resp.Body.
func (h *httpClient) postJSONStream(ctx context.Context, path string, body any) (*http.Response, error) {
buf := getBuf()
defer putBuf(buf)
if err := json.NewEncoder(buf).Encode(body); err != nil {
return nil, fmt.Errorf("marshal: %w", err)
}
// Copy to a non-pooled buffer so we can safely return the pool buffer.
payload := make([]byte, buf.Len())
copy(payload, buf.Bytes())
if resp.StatusCode >= 400 { req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.baseURL+path, bytes.NewReader(payload))
return nil, fmt.Errorf("http %d: %s", resp.StatusCode, string(body)) if err != nil {
} return nil, err
return body, nil }
req.Header.Set("Content-Type", "application/json")
resp, err := h.client.Do(req)
if err != nil {
return nil, fmt.Errorf("http %s %s: %w", req.Method, req.URL.Path, err)
}
if resp.StatusCode >= 400 {
respBody, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return nil, fmt.Errorf("http %d: %s", resp.StatusCode, string(respBody))
}
return resp, nil
} }
func (h *httpClient) healthCheck(ctx context.Context) bool { func (h *httpClient) healthCheck(ctx context.Context) bool {
data, err := h.get(ctx, "/health", nil) data, err := h.get(ctx, "/health", nil)
_ = data _ = data
return err == nil return err == nil
} }
// ─── Embeddings Client ────────────────────────────────────────────────────── // ─── Embeddings Client ──────────────────────────────────────────────────────
// EmbeddingsClient calls the embeddings service (Infinity/BGE). // EmbeddingsClient calls the embeddings service (Infinity/BGE).
type EmbeddingsClient struct { type EmbeddingsClient struct {
*httpClient *httpClient
Model string Model string
} }
// NewEmbeddingsClient creates an embeddings client. // NewEmbeddingsClient creates an embeddings client.
func NewEmbeddingsClient(baseURL string, timeout time.Duration, model string) *EmbeddingsClient { func NewEmbeddingsClient(baseURL string, timeout time.Duration, model string) *EmbeddingsClient {
if model == "" { if model == "" {
model = "bge" model = "bge"
} }
return &EmbeddingsClient{httpClient: newHTTPClient(baseURL, timeout), Model: model} return &EmbeddingsClient{httpClient: newHTTPClient(baseURL, timeout), Model: model}
} }
// Embed generates embeddings for a list of texts. // Embed generates embeddings for a list of texts.
func (c *EmbeddingsClient) Embed(ctx context.Context, texts []string) ([][]float64, error) { func (c *EmbeddingsClient) Embed(ctx context.Context, texts []string) ([][]float64, error) {
body, err := c.postJSON(ctx, "/embeddings", map[string]any{ body, err := c.postJSON(ctx, "/embeddings", map[string]any{
"input": texts, "input": texts,
"model": c.Model, "model": c.Model,
}) })
if err != nil { if err != nil {
return nil, err return nil, err
} }
var resp struct { var resp struct {
Data []struct { Data []struct {
Embedding []float64 `json:"embedding"` Embedding []float64 `json:"embedding"`
} `json:"data"` } `json:"data"`
} }
if err := json.Unmarshal(body, &resp); err != nil { if err := json.Unmarshal(body, &resp); err != nil {
return nil, err return nil, err
} }
result := make([][]float64, len(resp.Data)) result := make([][]float64, len(resp.Data))
for i, d := range resp.Data { for i, d := range resp.Data {
result[i] = d.Embedding result[i] = d.Embedding
} }
return result, nil return result, nil
} }
// EmbedSingle generates an embedding for a single text. // EmbedSingle generates an embedding for a single text.
func (c *EmbeddingsClient) EmbedSingle(ctx context.Context, text string) ([]float64, error) { func (c *EmbeddingsClient) EmbedSingle(ctx context.Context, text string) ([]float64, error) {
results, err := c.Embed(ctx, []string{text}) results, err := c.Embed(ctx, []string{text})
if err != nil { if err != nil {
return nil, err return nil, err
} }
if len(results) == 0 { if len(results) == 0 {
return nil, fmt.Errorf("empty embedding result") return nil, fmt.Errorf("empty embedding result")
} }
return results[0], nil return results[0], nil
} }
// Health checks if the embeddings service is healthy. // Health checks if the embeddings service is healthy.
func (c *EmbeddingsClient) Health(ctx context.Context) bool { func (c *EmbeddingsClient) Health(ctx context.Context) bool {
return c.healthCheck(ctx) return c.healthCheck(ctx)
} }
// ─── Reranker Client ──────────────────────────────────────────────────────── // ─── Reranker Client ────────────────────────────────────────────────────────
// RerankerClient calls the reranker service (BGE Reranker). // RerankerClient calls the reranker service (BGE Reranker).
type RerankerClient struct { type RerankerClient struct {
*httpClient *httpClient
} }
// NewRerankerClient creates a reranker client. // NewRerankerClient creates a reranker client.
func NewRerankerClient(baseURL string, timeout time.Duration) *RerankerClient { func NewRerankerClient(baseURL string, timeout time.Duration) *RerankerClient {
return &RerankerClient{httpClient: newHTTPClient(baseURL, timeout)} return &RerankerClient{httpClient: newHTTPClient(baseURL, timeout)}
} }
// RerankResult represents a reranked document. // RerankResult represents a reranked document.
type RerankResult struct { type RerankResult struct {
Index int `json:"index"` Index int `json:"index"`
Score float64 `json:"score"` Score float64 `json:"score"`
Document string `json:"document"` Document string `json:"document"`
} }
// Rerank reranks documents by relevance to the query. // Rerank reranks documents by relevance to the query.
func (c *RerankerClient) Rerank(ctx context.Context, query string, documents []string, topK int) ([]RerankResult, error) { func (c *RerankerClient) Rerank(ctx context.Context, query string, documents []string, topK int) ([]RerankResult, error) {
payload := map[string]any{ payload := map[string]any{
"query": query, "query": query,
"documents": documents, "documents": documents,
} }
if topK > 0 { if topK > 0 {
payload["top_n"] = topK payload["top_n"] = topK
} }
body, err := c.postJSON(ctx, "/rerank", payload) body, err := c.postJSON(ctx, "/rerank", payload)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var resp struct { var resp struct {
Results []struct { Results []struct {
Index int `json:"index"` Index int `json:"index"`
RelevanceScore float64 `json:"relevance_score"` RelevanceScore float64 `json:"relevance_score"`
Score float64 `json:"score"` Score float64 `json:"score"`
} `json:"results"` } `json:"results"`
} }
if err := json.Unmarshal(body, &resp); err != nil { if err := json.Unmarshal(body, &resp); err != nil {
return nil, err return nil, err
} }
results := make([]RerankResult, len(resp.Results)) results := make([]RerankResult, len(resp.Results))
for i, r := range resp.Results { for i, r := range resp.Results {
score := r.RelevanceScore score := r.RelevanceScore
if score == 0 { if score == 0 {
score = r.Score score = r.Score
} }
doc := "" doc := ""
if r.Index < len(documents) { if r.Index < len(documents) {
doc = documents[r.Index] doc = documents[r.Index]
} }
results[i] = RerankResult{Index: r.Index, Score: score, Document: doc} results[i] = RerankResult{Index: r.Index, Score: score, Document: doc}
} }
return results, nil return results, nil
} }
// ─── LLM Client ───────────────────────────────────────────────────────────── // ─── LLM Client ─────────────────────────────────────────────────────────────
// LLMClient calls the vLLM-compatible LLM service. // LLMClient calls the vLLM-compatible LLM service.
type LLMClient struct { type LLMClient struct {
*httpClient *httpClient
Model string Model string
MaxTokens int MaxTokens int
Temperature float64 Temperature float64
TopP float64 TopP float64
} }
// NewLLMClient creates an LLM client. // NewLLMClient creates an LLM client.
func NewLLMClient(baseURL string, timeout time.Duration) *LLMClient { func NewLLMClient(baseURL string, timeout time.Duration) *LLMClient {
return &LLMClient{ return &LLMClient{
httpClient: newHTTPClient(baseURL, timeout), httpClient: newHTTPClient(baseURL, timeout),
Model: "default", Model: "default",
MaxTokens: 2048, MaxTokens: 2048,
Temperature: 0.7, Temperature: 0.7,
TopP: 0.9, TopP: 0.9,
} }
} }
// ChatMessage is an OpenAI-compatible message. // ChatMessage is an OpenAI-compatible message.
type ChatMessage struct { type ChatMessage struct {
Role string `json:"role"` Role string `json:"role"`
Content string `json:"content"` Content string `json:"content"`
} }
// Generate sends a chat completion request and returns the response text. // Generate sends a chat completion request and returns the response text.
func (c *LLMClient) Generate(ctx context.Context, prompt string, context_ string, systemPrompt string) (string, error) { func (c *LLMClient) Generate(ctx context.Context, prompt string, context_ string, systemPrompt string) (string, error) {
messages := buildMessages(prompt, context_, systemPrompt) messages := buildMessages(prompt, context_, systemPrompt)
payload := map[string]any{ payload := map[string]any{
"model": c.Model, "model": c.Model,
"messages": messages, "messages": messages,
"max_tokens": c.MaxTokens, "max_tokens": c.MaxTokens,
"temperature": c.Temperature, "temperature": c.Temperature,
"top_p": c.TopP, "top_p": c.TopP,
}
body, err := c.postJSON(ctx, "/v1/chat/completions", payload)
if err != nil {
return "", err
}
var resp struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
}
if err := json.Unmarshal(body, &resp); err != nil {
return "", err
}
if len(resp.Choices) == 0 {
return "", fmt.Errorf("no choices in LLM response")
}
return resp.Choices[0].Message.Content, nil
} }
body, err := c.postJSON(ctx, "/v1/chat/completions", payload)
if err != nil { // StreamGenerate sends a streaming chat completion request and calls onToken
return "", err // for each content delta received via SSE. Returns the fully assembled text.
} // The onToken callback is invoked synchronously on the calling goroutine; it
var resp struct { // should be fast (e.g. publish a NATS message).
Choices []struct { func (c *LLMClient) StreamGenerate(ctx context.Context, prompt string, context_ string, systemPrompt string, onToken func(token string)) (string, error) {
Message struct { msgs := buildMessages(prompt, context_, systemPrompt)
Content string `json:"content"` payload := map[string]any{
} `json:"message"` "model": c.Model,
} `json:"choices"` "messages": msgs,
} "max_tokens": c.MaxTokens,
if err := json.Unmarshal(body, &resp); err != nil { "temperature": c.Temperature,
return "", err "top_p": c.TopP,
} "stream": true,
if len(resp.Choices) == 0 { }
return "", fmt.Errorf("no choices in LLM response")
} resp, err := c.postJSONStream(ctx, "/v1/chat/completions", payload)
return resp.Choices[0].Message.Content, nil if err != nil {
return "", err
}
defer func() { _ = resp.Body.Close() }()
var full strings.Builder
scanner := bufio.NewScanner(resp.Body)
// SSE lines can be up to 64 KiB for large token batches.
scanner.Buffer(make([]byte, 0, 64*1024), 64*1024)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data: ") {
continue
}
data := strings.TrimPrefix(line, "data: ")
if data == "[DONE]" {
break
}
var chunk struct {
Choices []struct {
Delta struct {
Content string `json:"content"`
} `json:"delta"`
} `json:"choices"`
}
if err := json.Unmarshal([]byte(data), &chunk); err != nil {
continue // skip malformed chunks
}
if len(chunk.Choices) == 0 {
continue
}
token := chunk.Choices[0].Delta.Content
if token == "" {
continue
}
full.WriteString(token)
if onToken != nil {
onToken(token)
}
}
if err := scanner.Err(); err != nil {
// If we already collected some text, return it with the error.
if full.Len() > 0 {
return full.String(), fmt.Errorf("stream interrupted: %w", err)
}
return "", fmt.Errorf("stream read: %w", err)
}
return full.String(), nil
} }
func buildMessages(prompt, ctx, systemPrompt string) []ChatMessage { func buildMessages(prompt, ctx, systemPrompt string) []ChatMessage {
var msgs []ChatMessage var msgs []ChatMessage
if systemPrompt != "" { if systemPrompt != "" {
msgs = append(msgs, ChatMessage{Role: "system", Content: systemPrompt}) msgs = append(msgs, ChatMessage{Role: "system", Content: systemPrompt})
} else if ctx != "" { } else if ctx != "" {
msgs = append(msgs, ChatMessage{Role: "system", Content: "You are a helpful assistant. Use the provided context to answer the user's question. If the context doesn't contain relevant information, say so."}) msgs = append(msgs, ChatMessage{Role: "system", Content: "You are a helpful assistant. Use the provided context to answer the user's question. If the context doesn't contain relevant information, say so."})
} }
if ctx != "" { if ctx != "" {
msgs = append(msgs, ChatMessage{Role: "user", Content: fmt.Sprintf("Context:\n%s\n\nQuestion: %s", ctx, prompt)}) msgs = append(msgs, ChatMessage{Role: "user", Content: fmt.Sprintf("Context:\n%s\n\nQuestion: %s", ctx, prompt)})
} else { } else {
msgs = append(msgs, ChatMessage{Role: "user", Content: prompt}) msgs = append(msgs, ChatMessage{Role: "user", Content: prompt})
} }
return msgs return msgs
} }
// ─── TTS Client ───────────────────────────────────────────────────────────── // ─── TTS Client ─────────────────────────────────────────────────────────────
// TTSClient calls the TTS service (Coqui XTTS). // TTSClient calls the TTS service (Coqui XTTS).
type TTSClient struct { type TTSClient struct {
*httpClient *httpClient
Language string Language string
} }
// NewTTSClient creates a TTS client. // NewTTSClient creates a TTS client.
func NewTTSClient(baseURL string, timeout time.Duration, language string) *TTSClient { func NewTTSClient(baseURL string, timeout time.Duration, language string) *TTSClient {
if language == "" { if language == "" {
language = "en" language = "en"
} }
return &TTSClient{httpClient: newHTTPClient(baseURL, timeout), Language: language} return &TTSClient{httpClient: newHTTPClient(baseURL, timeout), Language: language}
} }
// Synthesize generates audio bytes from text. // Synthesize generates audio bytes from text.
func (c *TTSClient) Synthesize(ctx context.Context, text, language, speaker string) ([]byte, error) { func (c *TTSClient) Synthesize(ctx context.Context, text, language, speaker string) ([]byte, error) {
if language == "" { if language == "" {
language = c.Language language = c.Language
} }
params := url.Values{ params := url.Values{
"text": {text}, "text": {text},
"language_id": {language}, "language_id": {language},
} }
if speaker != "" { if speaker != "" {
params.Set("speaker_id", speaker) params.Set("speaker_id", speaker)
} }
return c.getRaw(ctx, "/api/tts", params) return c.getRaw(ctx, "/api/tts", params)
} }
// ─── STT Client ───────────────────────────────────────────────────────────── // ─── STT Client ─────────────────────────────────────────────────────────────
// STTClient calls the Whisper STT service. // STTClient calls the Whisper STT service.
type STTClient struct { type STTClient struct {
*httpClient *httpClient
Language string Language string
Task string Task string
} }
// NewSTTClient creates an STT client. // NewSTTClient creates an STT client.
func NewSTTClient(baseURL string, timeout time.Duration) *STTClient { func NewSTTClient(baseURL string, timeout time.Duration) *STTClient {
return &STTClient{httpClient: newHTTPClient(baseURL, timeout), Task: "transcribe"} return &STTClient{httpClient: newHTTPClient(baseURL, timeout), Task: "transcribe"}
} }
// TranscribeResult holds transcription output. // TranscribeResult holds transcription output.
type TranscribeResult struct { type TranscribeResult struct {
Text string `json:"text"` Text string `json:"text"`
Language string `json:"language,omitempty"` Language string `json:"language,omitempty"`
} }
// Transcribe sends audio to Whisper and returns the transcription. // Transcribe sends audio to Whisper and returns the transcription.
func (c *STTClient) Transcribe(ctx context.Context, audio []byte, language string) (*TranscribeResult, error) { func (c *STTClient) Transcribe(ctx context.Context, audio []byte, language string) (*TranscribeResult, error) {
if language == "" { if language == "" {
language = c.Language language = c.Language
} }
fields := map[string]string{ fields := map[string]string{
"response_format": "json", "response_format": "json",
} }
if language != "" { if language != "" {
fields["language"] = language fields["language"] = language
} }
endpoint := "/v1/audio/transcriptions" endpoint := "/v1/audio/transcriptions"
if c.Task == "translate" { if c.Task == "translate" {
endpoint = "/v1/audio/translations" endpoint = "/v1/audio/translations"
} }
body, err := c.postMultipart(ctx, endpoint, "file", "audio.wav", audio, fields) body, err := c.postMultipart(ctx, endpoint, "file", "audio.wav", audio, fields)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var result TranscribeResult var result TranscribeResult
if err := json.Unmarshal(body, &result); err != nil { if err := json.Unmarshal(body, &result); err != nil {
return nil, err return nil, err
} }
return &result, nil return &result, nil
} }
// ─── Milvus Client ────────────────────────────────────────────────────────── // ─── Milvus Client ──────────────────────────────────────────────────────────
@@ -417,21 +516,20 @@ return &result, nil
// MilvusClient provides vector search via the Milvus HTTP/gRPC API. // MilvusClient provides vector search via the Milvus HTTP/gRPC API.
// For the Go port we use the Milvus Go SDK. // For the Go port we use the Milvus Go SDK.
type MilvusClient struct { type MilvusClient struct {
Host string Host string
Port int Port int
Collection string Collection string
connected bool
} }
// NewMilvusClient creates a Milvus client. // NewMilvusClient creates a Milvus client.
func NewMilvusClient(host string, port int, collection string) *MilvusClient { func NewMilvusClient(host string, port int, collection string) *MilvusClient {
return &MilvusClient{Host: host, Port: port, Collection: collection} return &MilvusClient{Host: host, Port: port, Collection: collection}
} }
// SearchResult holds a single vector search hit. // SearchResult holds a single vector search hit.
type SearchResult struct { type SearchResult struct {
ID int64 `json:"id"` ID int64 `json:"id"`
Distance float64 `json:"distance"` Distance float64 `json:"distance"`
Score float64 `json:"score"` Score float64 `json:"score"`
Fields map[string]any `json:"fields,omitempty"` Fields map[string]any `json:"fields,omitempty"`
} }

View File

@@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
@@ -90,14 +91,14 @@ func TestEmbeddingsClient_Embed(t *testing.T) {
t.Errorf("method = %s, want POST", r.Method) t.Errorf("method = %s, want POST", r.Method)
} }
var req map[string]any var req map[string]any
json.NewDecoder(r.Body).Decode(&req) _ = json.NewDecoder(r.Body).Decode(&req)
input, _ := req["input"].([]any) input, _ := req["input"].([]any)
if len(input) != 2 { if len(input) != 2 {
t.Errorf("input len = %d, want 2", len(input)) t.Errorf("input len = %d, want 2", len(input))
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{
"data": []map[string]any{ "data": []map[string]any{
{"embedding": []float64{0.1, 0.2, 0.3}}, {"embedding": []float64{0.1, 0.2, 0.3}},
{"embedding": []float64{0.4, 0.5, 0.6}}, {"embedding": []float64{0.4, 0.5, 0.6}},
@@ -121,7 +122,7 @@ func TestEmbeddingsClient_Embed(t *testing.T) {
func TestEmbeddingsClient_EmbedSingle(t *testing.T) { func TestEmbeddingsClient_EmbedSingle(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{
"data": []map[string]any{ "data": []map[string]any{
{"embedding": []float64{1.0, 2.0}}, {"embedding": []float64{1.0, 2.0}},
}, },
@@ -141,7 +142,7 @@ func TestEmbeddingsClient_EmbedSingle(t *testing.T) {
func TestEmbeddingsClient_EmbedEmpty(t *testing.T) { func TestEmbeddingsClient_EmbedEmpty(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]any{"data": []any{}}) _ = json.NewEncoder(w).Encode(map[string]any{"data": []any{}})
})) }))
defer ts.Close() defer ts.Close()
@@ -175,11 +176,11 @@ func TestEmbeddingsClient_Health(t *testing.T) {
func TestRerankerClient_Rerank(t *testing.T) { func TestRerankerClient_Rerank(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req map[string]any var req map[string]any
json.NewDecoder(r.Body).Decode(&req) _ = json.NewDecoder(r.Body).Decode(&req)
if req["query"] != "test query" { if req["query"] != "test query" {
t.Errorf("query = %v", req["query"]) t.Errorf("query = %v", req["query"])
} }
json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{
"results": []map[string]any{ "results": []map[string]any{
{"index": 1, "relevance_score": 0.95}, {"index": 1, "relevance_score": 0.95},
{"index": 0, "relevance_score": 0.80}, {"index": 0, "relevance_score": 0.80},
@@ -207,7 +208,7 @@ func TestRerankerClient_Rerank(t *testing.T) {
func TestRerankerClient_RerankFallbackScore(t *testing.T) { func TestRerankerClient_RerankFallbackScore(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{
"results": []map[string]any{ "results": []map[string]any{
{"index": 0, "score": 0.77, "relevance_score": 0}, // some APIs only set score {"index": 0, "score": 0.77, "relevance_score": 0}, // some APIs only set score
}, },
@@ -235,13 +236,13 @@ func TestLLMClient_Generate(t *testing.T) {
t.Errorf("path = %q", r.URL.Path) t.Errorf("path = %q", r.URL.Path)
} }
var req map[string]any var req map[string]any
json.NewDecoder(r.Body).Decode(&req) _ = json.NewDecoder(r.Body).Decode(&req)
msgs, _ := req["messages"].([]any) msgs, _ := req["messages"].([]any)
if len(msgs) == 0 { if len(msgs) == 0 {
t.Error("no messages in request") t.Error("no messages in request")
} }
json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{ "choices": []map[string]any{
{"message": map[string]any{"content": "Paris is the capital of France."}}, {"message": map[string]any{"content": "Paris is the capital of France."}},
}, },
@@ -262,13 +263,13 @@ func TestLLMClient_Generate(t *testing.T) {
func TestLLMClient_GenerateWithContext(t *testing.T) { func TestLLMClient_GenerateWithContext(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req map[string]any var req map[string]any
json.NewDecoder(r.Body).Decode(&req) _ = json.NewDecoder(r.Body).Decode(&req)
msgs, _ := req["messages"].([]any) msgs, _ := req["messages"].([]any)
// Should have system + user message // Should have system + user message
if len(msgs) != 2 { if len(msgs) != 2 {
t.Errorf("expected 2 messages, got %d", len(msgs)) t.Errorf("expected 2 messages, got %d", len(msgs))
} }
json.NewEncoder(w).Encode(map[string]any{ _ = json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{ "choices": []map[string]any{
{"message": map[string]any{"content": "answer with context"}}, {"message": map[string]any{"content": "answer with context"}},
}, },
@@ -288,7 +289,7 @@ func TestLLMClient_GenerateWithContext(t *testing.T) {
func TestLLMClient_GenerateNoChoices(t *testing.T) { func TestLLMClient_GenerateNoChoices(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]any{"choices": []any{}}) _ = json.NewEncoder(w).Encode(map[string]any{"choices": []any{}})
})) }))
defer ts.Close() defer ts.Close()
@@ -299,6 +300,293 @@ func TestLLMClient_GenerateNoChoices(t *testing.T) {
} }
} }
// ────────────────────────────────────────────────────────────────────────────
// LLM client — StreamGenerate
// ────────────────────────────────────────────────────────────────────────────
// sseChunk builds an OpenAI-compatible SSE chat.completion.chunk line.
func sseChunk(content string) string {
chunk := map[string]any{
"choices": []map[string]any{
{"delta": map[string]any{"content": content}},
},
}
b, _ := json.Marshal(chunk)
return "data: " + string(b) + "\n\n"
}
func TestLLMClient_StreamGenerate(t *testing.T) {
tokens := []string{"Hello", " world", "!"}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/v1/chat/completions" {
t.Errorf("path = %q", r.URL.Path)
}
var req map[string]any
_ = json.NewDecoder(r.Body).Decode(&req)
if req["stream"] != true {
t.Errorf("stream = %v, want true", req["stream"])
}
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
for _, tok := range tokens {
_, _ = w.Write([]byte(sseChunk(tok)))
flusher.Flush()
}
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
var received []string
result, err := c.StreamGenerate(context.Background(), "hi", "", "", func(tok string) {
received = append(received, tok)
})
if err != nil {
t.Fatal(err)
}
if result != "Hello world!" {
t.Errorf("result = %q, want %q", result, "Hello world!")
}
if len(received) != 3 {
t.Fatalf("callback count = %d, want 3", len(received))
}
if received[0] != "Hello" || received[1] != " world" || received[2] != "!" {
t.Errorf("received = %v", received)
}
}
func TestLLMClient_StreamGenerateWithSystemPrompt(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var req map[string]any
_ = json.NewDecoder(r.Body).Decode(&req)
msgs, _ := req["messages"].([]any)
if len(msgs) != 2 {
t.Errorf("expected system+user, got %d messages", len(msgs))
}
first, _ := msgs[0].(map[string]any)
if first["role"] != "system" || first["content"] != "You are a DM" {
t.Errorf("system msg = %v", first)
}
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
_, _ = w.Write([]byte(sseChunk("ok")))
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
result, err := c.StreamGenerate(context.Background(), "roll dice", "", "You are a DM", nil)
if err != nil {
t.Fatal(err)
}
if result != "ok" {
t.Errorf("result = %q", result)
}
}
func TestLLMClient_StreamGenerateNilCallback(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
_, _ = w.Write([]byte(sseChunk("token")))
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
// nil callback should not panic
result, err := c.StreamGenerate(context.Background(), "hi", "", "", nil)
if err != nil {
t.Fatal(err)
}
if result != "token" {
t.Errorf("result = %q", result)
}
}
func TestLLMClient_StreamGenerateEmptyDelta(t *testing.T) {
// SSE chunks with empty content should be silently skipped.
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
// role-only chunk (no content) — common for first chunk from vLLM
_, _ = w.Write([]byte("data: {\"choices\":[{\"delta\":{\"role\":\"assistant\"}}]}\n\n"))
// empty content string
_, _ = w.Write([]byte(sseChunk("")))
// real token
_, _ = w.Write([]byte(sseChunk("hello")))
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
var count int
result, err := c.StreamGenerate(context.Background(), "q", "", "", func(tok string) {
count++
})
if err != nil {
t.Fatal(err)
}
if result != "hello" {
t.Errorf("result = %q", result)
}
if count != 1 {
t.Errorf("callback count = %d, want 1 (empty deltas should be skipped)", count)
}
}
func TestLLMClient_StreamGenerateMalformedChunks(t *testing.T) {
// Malformed JSON should be skipped without error.
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
_, _ = w.Write([]byte("data: {invalid json}\n\n"))
_, _ = w.Write([]byte("data: {\"choices\":[]}\n\n")) // empty choices
_, _ = w.Write([]byte(sseChunk("good")))
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
result, err := c.StreamGenerate(context.Background(), "q", "", "", nil)
if err != nil {
t.Fatal(err)
}
if result != "good" {
t.Errorf("result = %q, want %q", result, "good")
}
}
func TestLLMClient_StreamGenerateHTTPError(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(500)
_, _ = w.Write([]byte("internal server error"))
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
_, err := c.StreamGenerate(context.Background(), "q", "", "", nil)
if err == nil {
t.Fatal("expected error for 500")
}
if !strings.Contains(err.Error(), "500") {
t.Errorf("error should contain 500: %v", err)
}
}
func TestLLMClient_StreamGenerateContextCanceled(t *testing.T) {
started := make(chan struct{})
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
// Send several tokens so the client receives some before cancel.
for i := range 20 {
_, _ = w.Write([]byte(sseChunk(fmt.Sprintf("tok%d ", i))))
flusher.Flush()
}
close(started)
// Block until client cancels
<-r.Context().Done()
}))
defer ts.Close()
ctx, cancel := context.WithCancel(context.Background())
c := NewLLMClient(ts.URL, 10*time.Second)
var streamErr error
done := make(chan struct{})
go func() {
defer close(done)
_, streamErr = c.StreamGenerate(ctx, "q", "", "", nil)
}()
<-started
cancel()
<-done
// After cancel the stream should return an error (context canceled or
// stream interrupted). The exact partial text depends on timing.
if streamErr == nil {
t.Error("expected error after context cancel")
}
}
func TestLLMClient_StreamGenerateNoSSEPrefix(t *testing.T) {
// Lines without "data: " prefix should be silently ignored (comments, blank lines, event IDs).
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
_, _ = w.Write([]byte(": this is an SSE comment\n\n"))
_, _ = w.Write([]byte("event: message\n"))
_, _ = w.Write([]byte(sseChunk("word")))
_, _ = w.Write([]byte("\n")) // blank line
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
result, err := c.StreamGenerate(context.Background(), "q", "", "", nil)
if err != nil {
t.Fatal(err)
}
if result != "word" {
t.Errorf("result = %q, want %q", result, "word")
}
}
func TestLLMClient_StreamGenerateManyTokens(t *testing.T) {
// Verify token ordering and full assembly with many chunks.
n := 100
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
flusher, _ := w.(http.Flusher)
for i := range n {
tok := fmt.Sprintf("t%d ", i)
_, _ = w.Write([]byte(sseChunk(tok)))
flusher.Flush()
}
_, _ = w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
}))
defer ts.Close()
c := NewLLMClient(ts.URL, 5*time.Second)
var mu sync.Mutex
var order []int
result, err := c.StreamGenerate(context.Background(), "q", "", "", func(tok string) {
var idx int
fmt.Sscanf(tok, "t%d ", &idx)
mu.Lock()
order = append(order, idx)
mu.Unlock()
})
if err != nil {
t.Fatal(err)
}
// Verify all tokens arrived in order
if len(order) != n {
t.Fatalf("got %d tokens, want %d", len(order), n)
}
for i, v := range order {
if v != i {
t.Errorf("order[%d] = %d", i, v)
break
}
}
// Quick sanity: result should start with "t0 " and end with last token
if !strings.HasPrefix(result, "t0 ") {
t.Errorf("result prefix = %q", result[:10])
}
}
// ──────────────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────────────
// TTS client // TTS client
// ──────────────────────────────────────────────────────────────────────────── // ────────────────────────────────────────────────────────────────────────────
@@ -312,7 +600,7 @@ func TestTTSClient_Synthesize(t *testing.T) {
if r.URL.Query().Get("text") != "hello world" { if r.URL.Query().Get("text") != "hello world" {
t.Errorf("text = %q", r.URL.Query().Get("text")) t.Errorf("text = %q", r.URL.Query().Get("text"))
} }
w.Write(expected) _, _ = w.Write(expected)
})) }))
defer ts.Close() defer ts.Close()
@@ -331,7 +619,7 @@ func TestTTSClient_SynthesizeWithSpeaker(t *testing.T) {
if r.URL.Query().Get("speaker_id") != "alice" { if r.URL.Query().Get("speaker_id") != "alice" {
t.Errorf("speaker_id = %q", r.URL.Query().Get("speaker_id")) t.Errorf("speaker_id = %q", r.URL.Query().Get("speaker_id"))
} }
w.Write([]byte{0x01}) _, _ = w.Write([]byte{0x01})
})) }))
defer ts.Close() defer ts.Close()
@@ -365,7 +653,7 @@ func TestSTTClient_Transcribe(t *testing.T) {
t.Errorf("file size = %d, want 100", len(data)) t.Errorf("file size = %d, want 100", len(data))
} }
json.NewEncoder(w).Encode(map[string]string{"text": "hello world"}) _ = json.NewEncoder(w).Encode(map[string]string{"text": "hello world"})
})) }))
defer ts.Close() defer ts.Close()
@@ -384,7 +672,7 @@ func TestSTTClient_TranscribeTranslate(t *testing.T) {
if r.URL.Path != "/v1/audio/translations" { if r.URL.Path != "/v1/audio/translations" {
t.Errorf("path = %q, want /v1/audio/translations", r.URL.Path) t.Errorf("path = %q, want /v1/audio/translations", r.URL.Path)
} }
json.NewEncoder(w).Encode(map[string]string{"text": "translated"}) _ = json.NewEncoder(w).Encode(map[string]string{"text": "translated"})
})) }))
defer ts.Close() defer ts.Close()
@@ -406,7 +694,7 @@ func TestSTTClient_TranscribeTranslate(t *testing.T) {
func TestHTTPError4xx(t *testing.T) { func TestHTTPError4xx(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(422) w.WriteHeader(422)
w.Write([]byte(`{"error": "bad input"}`)) _, _ = w.Write([]byte(`{"error": "bad input"}`))
})) }))
defer ts.Close() defer ts.Close()
@@ -423,7 +711,7 @@ func TestHTTPError4xx(t *testing.T) {
func TestHTTPError5xx(t *testing.T) { func TestHTTPError5xx(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(500) w.WriteHeader(500)
w.Write([]byte("internal server error")) _, _ = w.Write([]byte("internal server error"))
})) }))
defer ts.Close() defer ts.Close()
@@ -467,8 +755,8 @@ func TestBuildMessages(t *testing.T) {
func BenchmarkPostJSON(b *testing.B) { func BenchmarkPostJSON(b *testing.B) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
io.Copy(io.Discard, r.Body) _, _ = io.Copy(io.Discard, r.Body)
w.Write([]byte(`{"ok":true}`)) _, _ = w.Write([]byte(`{"ok":true}`))
})) }))
defer ts.Close() defer ts.Close()
@@ -482,7 +770,7 @@ func BenchmarkPostJSON(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
c.postJSON(ctx, "/test", payload) _, _ = c.postJSON(ctx, "/test", payload)
} }
} }

View File

@@ -66,8 +66,8 @@ func TestCallbackRegistration(t *testing.T) {
} }
// Verify setup/teardown work when called directly. // Verify setup/teardown work when called directly.
h.onSetup(context.Background()) _ = h.onSetup(context.Background())
h.onTeardown(context.Background()) _ = h.onTeardown(context.Background())
if !setupCalled || !teardownCalled { if !setupCalled || !teardownCalled {
t.Error("callbacks should have been invoked") t.Error("callbacks should have been invoked")
} }
@@ -290,7 +290,7 @@ func BenchmarkWrapTypedHandler(b *testing.B) {
h := New("ai.test", cfg) h := New("ai.test", cfg)
h.OnTypedMessage(func(ctx context.Context, msg *nats.Msg) (any, error) { h.OnTypedMessage(func(ctx context.Context, msg *nats.Msg) (any, error) {
var req benchReq var req benchReq
msgpack.Unmarshal(msg.Data, &req) _ = msgpack.Unmarshal(msg.Data, &req)
return map[string]any{"ok": true}, nil return map[string]any{"ok": true}, nil
}) })

View File

@@ -8,7 +8,6 @@ import (
"log/slog" "log/slog"
"net" "net"
"net/http" "net/http"
"sync/atomic"
"time" "time"
) )
@@ -22,7 +21,6 @@ type Server struct {
readyPath string readyPath string
readyCheck ReadyFunc readyCheck ReadyFunc
srv *http.Server srv *http.Server
ready atomic.Bool
} }
// New creates a health server on the given port. // New creates a health server on the given port.

View File

@@ -19,7 +19,7 @@ func TestHealthEndpoint(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("health request failed: %v", err) t.Fatalf("health request failed: %v", err)
} }
defer resp.Body.Close() defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 { if resp.StatusCode != 200 {
t.Errorf("expected 200, got %d", resp.StatusCode) t.Errorf("expected 200, got %d", resp.StatusCode)
@@ -42,7 +42,7 @@ func TestReadyEndpointDefault(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("ready request failed: %v", err) t.Fatalf("ready request failed: %v", err)
} }
defer resp.Body.Close() defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 { if resp.StatusCode != 200 {
t.Errorf("expected 200, got %d", resp.StatusCode) t.Errorf("expected 200, got %d", resp.StatusCode)
@@ -60,7 +60,7 @@ func TestReadyEndpointNotReady(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("ready request failed: %v", err) t.Fatalf("ready request failed: %v", err)
} }
resp.Body.Close() _ = resp.Body.Close()
if resp.StatusCode != 503 { if resp.StatusCode != 503 {
t.Errorf("expected 503 when not ready, got %d", resp.StatusCode) t.Errorf("expected 503 when not ready, got %d", resp.StatusCode)
} }
@@ -70,7 +70,7 @@ func TestReadyEndpointNotReady(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("ready request failed: %v", err) t.Fatalf("ready request failed: %v", err)
} }
resp2.Body.Close() _ = resp2.Body.Close()
if resp2.StatusCode != 200 { if resp2.StatusCode != 200 {
t.Errorf("expected 200 when ready, got %d", resp2.StatusCode) t.Errorf("expected 200 when ready, got %d", resp2.StatusCode)
} }

View File

@@ -178,7 +178,7 @@ func BenchmarkEncode_ChatRequest_MsgpackMap(b *testing.B) {
data := chatRequestMap() data := chatRequestMap()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -186,7 +186,7 @@ func BenchmarkEncode_ChatRequest_MsgpackStruct(b *testing.B) {
data := chatRequestStruct() data := chatRequestStruct()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -194,7 +194,7 @@ func BenchmarkEncode_ChatRequest_Protobuf(b *testing.B) {
data := chatRequestProto() data := chatRequestProto()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
proto.Marshal(data) _, _ = proto.Marshal(data)
} }
} }
@@ -202,7 +202,7 @@ func BenchmarkEncode_VoiceResponse_MsgpackMap(b *testing.B) {
data := voiceResponseMap() data := voiceResponseMap()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -210,7 +210,7 @@ func BenchmarkEncode_VoiceResponse_MsgpackStruct(b *testing.B) {
data := voiceResponseStruct() data := voiceResponseStruct()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -218,7 +218,7 @@ func BenchmarkEncode_VoiceResponse_Protobuf(b *testing.B) {
data := voiceResponseProto() data := voiceResponseProto()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
proto.Marshal(data) _, _ = proto.Marshal(data)
} }
} }
@@ -226,7 +226,7 @@ func BenchmarkEncode_TTSChunk_MsgpackMap(b *testing.B) {
data := ttsChunkMap() data := ttsChunkMap()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -234,7 +234,7 @@ func BenchmarkEncode_TTSChunk_MsgpackStruct(b *testing.B) {
data := ttsChunkStruct() data := ttsChunkStruct()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -242,7 +242,7 @@ func BenchmarkEncode_TTSChunk_Protobuf(b *testing.B) {
data := ttsChunkProto() data := ttsChunkProto()
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
proto.Marshal(data) _, _ = proto.Marshal(data)
} }
} }
@@ -255,7 +255,7 @@ func BenchmarkDecode_ChatRequest_MsgpackMap(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m map[string]any var m map[string]any
msgpack.Unmarshal(encoded, &m) _ = msgpack.Unmarshal(encoded, &m)
} }
} }
@@ -264,7 +264,7 @@ func BenchmarkDecode_ChatRequest_MsgpackStruct(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m ChatRequest var m ChatRequest
msgpack.Unmarshal(encoded, &m) _ = msgpack.Unmarshal(encoded, &m)
} }
} }
@@ -273,7 +273,7 @@ func BenchmarkDecode_ChatRequest_Protobuf(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m pb.ChatRequest var m pb.ChatRequest
proto.Unmarshal(encoded, &m) _ = proto.Unmarshal(encoded, &m)
} }
} }
@@ -282,7 +282,7 @@ func BenchmarkDecode_VoiceResponse_MsgpackMap(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m map[string]any var m map[string]any
msgpack.Unmarshal(encoded, &m) _ = msgpack.Unmarshal(encoded, &m)
} }
} }
@@ -291,7 +291,7 @@ func BenchmarkDecode_VoiceResponse_MsgpackStruct(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m VoiceResponse var m VoiceResponse
msgpack.Unmarshal(encoded, &m) _ = msgpack.Unmarshal(encoded, &m)
} }
} }
@@ -300,7 +300,7 @@ func BenchmarkDecode_VoiceResponse_Protobuf(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m pb.VoiceResponse var m pb.VoiceResponse
proto.Unmarshal(encoded, &m) _ = proto.Unmarshal(encoded, &m)
} }
} }
@@ -309,7 +309,7 @@ func BenchmarkDecode_TTSChunk_MsgpackMap(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m map[string]any var m map[string]any
msgpack.Unmarshal(encoded, &m) _ = msgpack.Unmarshal(encoded, &m)
} }
} }
@@ -318,7 +318,7 @@ func BenchmarkDecode_TTSChunk_MsgpackStruct(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m TTSAudioChunk var m TTSAudioChunk
msgpack.Unmarshal(encoded, &m) _ = msgpack.Unmarshal(encoded, &m)
} }
} }
@@ -327,7 +327,7 @@ func BenchmarkDecode_TTSChunk_Protobuf(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for b.Loop() { for b.Loop() {
var m pb.TTSAudioChunk var m pb.TTSAudioChunk
proto.Unmarshal(encoded, &m) _ = proto.Unmarshal(encoded, &m)
} }
} }
@@ -341,7 +341,7 @@ func BenchmarkRoundtrip_ChatRequest_MsgpackMap(b *testing.B) {
for b.Loop() { for b.Loop() {
enc, _ := msgpack.Marshal(data) enc, _ := msgpack.Marshal(data)
var dec map[string]any var dec map[string]any
msgpack.Unmarshal(enc, &dec) _ = msgpack.Unmarshal(enc, &dec)
} }
} }
@@ -351,7 +351,7 @@ func BenchmarkRoundtrip_ChatRequest_MsgpackStruct(b *testing.B) {
for b.Loop() { for b.Loop() {
enc, _ := msgpack.Marshal(data) enc, _ := msgpack.Marshal(data)
var dec ChatRequest var dec ChatRequest
msgpack.Unmarshal(enc, &dec) _ = msgpack.Unmarshal(enc, &dec)
} }
} }
@@ -361,7 +361,7 @@ func BenchmarkRoundtrip_ChatRequest_Protobuf(b *testing.B) {
for b.Loop() { for b.Loop() {
enc, _ := proto.Marshal(data) enc, _ := proto.Marshal(data)
var dec pb.ChatRequest var dec pb.ChatRequest
proto.Unmarshal(enc, &dec) _ = proto.Unmarshal(enc, &dec)
} }
} }

View File

@@ -200,7 +200,7 @@ func BenchmarkEncodeMap(b *testing.B) {
"top_k": 10, "top_k": 10,
} }
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -212,7 +212,7 @@ func BenchmarkEncodeStruct(b *testing.B) {
Active: true, Active: true,
} }
for b.Loop() { for b.Loop() {
msgpack.Marshal(data) _, _ = msgpack.Marshal(data)
} }
} }
@@ -226,7 +226,7 @@ func BenchmarkDecodeMap(b *testing.B) {
}) })
for b.Loop() { for b.Loop() {
var m map[string]any var m map[string]any
msgpack.Unmarshal(raw, &m) _ = msgpack.Unmarshal(raw, &m)
} }
} }
@@ -239,7 +239,7 @@ func BenchmarkDecodeStruct(b *testing.B) {
}) })
for b.Loop() { for b.Loop() {
var m testMessage var m testMessage
msgpack.Unmarshal(raw, &m) _ = msgpack.Unmarshal(raw, &m)
} }
} }
@@ -251,6 +251,6 @@ func BenchmarkDecodeAudio32KB(b *testing.B) {
}) })
for b.Loop() { for b.Loop() {
var m audioMessage var m audioMessage
msgpack.Unmarshal(raw, &m) _ = msgpack.Unmarshal(raw, &m)
} }
} }