feat: add e2e tests, perf benchmarks, and infrastructure improvements

- messages/bench_test.go: serialization benchmarks (msgpack map vs struct vs protobuf)
- clients/clients_test.go: HTTP client tests with pooling verification (20 tests)
- natsutil/natsutil_test.go: encode/decode roundtrip + binary data tests
- handler/handler_test.go: handler dispatch tests + benchmark
- config/config.go: live reload via fsnotify + RWMutex getter methods
- clients/clients.go: SharedTransport + sync.Pool buffer pooling
- messages/messages.go: typed structs with msgpack+json tags
- messages/proto/: protobuf schema + generated code

Benchmark baseline (ChatRequest roundtrip):
  MsgpackMap:    2949 ns/op, 36 allocs
  MsgpackStruct: 2030 ns/op, 13 allocs (31% faster, 64% fewer allocs)
  Protobuf:       793 ns/op,  8 allocs (73% faster, 78% fewer allocs)
This commit is contained in:
2026-02-20 06:44:37 -05:00
parent d321c9852b
commit 35912d5844
12 changed files with 4260 additions and 391 deletions

View File

@@ -1,145 +1,268 @@
// Package config provides environment-based configuration for handler services.
// Package config provides environment-based configuration for handler services
// with optional live reload of secrets and service endpoints.
package config
import (
"os"
"strconv"
"time"
"context"
"log/slog"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/fsnotify/fsnotify"
)
// Settings holds base configuration for all handler services.
// Values are loaded from environment variables with sensible defaults.
// Fields in the "hot-reload" section are protected by a RWMutex and can be
// updated at runtime via WatchSecrets(). All other fields are immutable
// after Load() returns.
type Settings struct {
// Service identification
ServiceName string
ServiceVersion string
ServiceNamespace string
DeploymentEnv string
// Service identification (immutable)
ServiceName string
ServiceVersion string
ServiceNamespace string
DeploymentEnv string
// NATS configuration
NATSURL string
NATSUser string
NATSPassword string
NATSQueueGroup string
// NATS configuration (immutable)
NATSURL string
NATSUser string
NATSPassword string
NATSQueueGroup string
// Redis/Valkey configuration
RedisURL string
RedisPassword string
// Redis/Valkey configuration (immutable)
RedisURL string
RedisPassword string
// Milvus configuration
MilvusHost string
MilvusPort int
MilvusCollection string
// Milvus configuration (immutable)
MilvusHost string
MilvusPort int
MilvusCollection string
// Service endpoints
EmbeddingsURL string
RerankerURL string
LLMURL string
TTSURL string
STTURL string
// OpenTelemetry configuration (immutable)
OTELEnabled bool
OTELEndpoint string
OTELUseHTTP bool
// OpenTelemetry configuration
OTELEnabled bool
OTELEndpoint string
OTELUseHTTP bool
// HyperDX configuration (immutable)
HyperDXEnabled bool
HyperDXAPIKey string
HyperDXEndpoint string
// HyperDX configuration
HyperDXEnabled bool
HyperDXAPIKey string
HyperDXEndpoint string
// MLflow configuration (immutable)
MLflowTrackingURI string
MLflowExperimentName string
MLflowEnabled bool
// MLflow configuration
MLflowTrackingURI string
MLflowExperimentName string
MLflowEnabled bool
// Health check configuration (immutable)
HealthPort int
HealthPath string
ReadyPath string
// Health check configuration
HealthPort int
HealthPath string
ReadyPath string
// Timeouts (immutable)
HTTPTimeout time.Duration
NATSTimeout time.Duration
// Timeouts
HTTPTimeout time.Duration
NATSTimeout time.Duration
// Hot-reloadable fields — access via getter methods.
mu sync.RWMutex
embeddingsURL string
rerankerURL string
llmURL string
ttsURL string
sttURL string
// Secrets path for file-based hot reload (Kubernetes secret mounts)
SecretsPath string
}
// Load creates a Settings populated from environment variables with defaults.
func Load() *Settings {
return &Settings{
ServiceName: getEnv("SERVICE_NAME", "handler"),
ServiceVersion: getEnv("SERVICE_VERSION", "1.0.0"),
ServiceNamespace: getEnv("SERVICE_NAMESPACE", "ai-ml"),
DeploymentEnv: getEnv("DEPLOYMENT_ENV", "production"),
return &Settings{
ServiceName: getEnv("SERVICE_NAME", "handler"),
ServiceVersion: getEnv("SERVICE_VERSION", "1.0.0"),
ServiceNamespace: getEnv("SERVICE_NAMESPACE", "ai-ml"),
DeploymentEnv: getEnv("DEPLOYMENT_ENV", "production"),
NATSURL: getEnv("NATS_URL", "nats://nats.ai-ml.svc.cluster.local:4222"),
NATSUser: getEnv("NATS_USER", ""),
NATSPassword: getEnv("NATS_PASSWORD", ""),
NATSQueueGroup: getEnv("NATS_QUEUE_GROUP", ""),
NATSURL: getEnv("NATS_URL", "nats://nats.ai-ml.svc.cluster.local:4222"),
NATSUser: getEnv("NATS_USER", ""),
NATSPassword: getEnv("NATS_PASSWORD", ""),
NATSQueueGroup: getEnv("NATS_QUEUE_GROUP", ""),
RedisURL: getEnv("REDIS_URL", "redis://valkey.ai-ml.svc.cluster.local:6379"),
RedisPassword: getEnv("REDIS_PASSWORD", ""),
RedisURL: getEnv("REDIS_URL", "redis://valkey.ai-ml.svc.cluster.local:6379"),
RedisPassword: getEnv("REDIS_PASSWORD", ""),
MilvusHost: getEnv("MILVUS_HOST", "milvus.ai-ml.svc.cluster.local"),
MilvusPort: getEnvInt("MILVUS_PORT", 19530),
MilvusCollection: getEnv("MILVUS_COLLECTION", "documents"),
MilvusHost: getEnv("MILVUS_HOST", "milvus.ai-ml.svc.cluster.local"),
MilvusPort: getEnvInt("MILVUS_PORT", 19530),
MilvusCollection: getEnv("MILVUS_COLLECTION", "documents"),
EmbeddingsURL: getEnv("EMBEDDINGS_URL", "http://embeddings-predictor.ai-ml.svc.cluster.local"),
RerankerURL: getEnv("RERANKER_URL", "http://reranker-predictor.ai-ml.svc.cluster.local"),
LLMURL: getEnv("LLM_URL", "http://vllm-predictor.ai-ml.svc.cluster.local"),
TTSURL: getEnv("TTS_URL", "http://tts-predictor.ai-ml.svc.cluster.local"),
STTURL: getEnv("STT_URL", "http://whisper-predictor.ai-ml.svc.cluster.local"),
embeddingsURL: getEnv("EMBEDDINGS_URL", "http://embeddings-predictor.ai-ml.svc.cluster.local"),
rerankerURL: getEnv("RERANKER_URL", "http://reranker-predictor.ai-ml.svc.cluster.local"),
llmURL: getEnv("LLM_URL", "http://vllm-predictor.ai-ml.svc.cluster.local"),
ttsURL: getEnv("TTS_URL", "http://tts-predictor.ai-ml.svc.cluster.local"),
sttURL: getEnv("STT_URL", "http://whisper-predictor.ai-ml.svc.cluster.local"),
OTELEnabled: getEnvBool("OTEL_ENABLED", true),
OTELEndpoint: getEnv("OTEL_ENDPOINT", "http://opentelemetry-collector.observability.svc.cluster.local:4317"),
OTELUseHTTP: getEnvBool("OTEL_USE_HTTP", false),
OTELEnabled: getEnvBool("OTEL_ENABLED", true),
OTELEndpoint: getEnv("OTEL_ENDPOINT", "http://opentelemetry-collector.observability.svc.cluster.local:4317"),
OTELUseHTTP: getEnvBool("OTEL_USE_HTTP", false),
HyperDXEnabled: getEnvBool("HYPERDX_ENABLED", false),
HyperDXAPIKey: getEnv("HYPERDX_API_KEY", ""),
HyperDXEndpoint: getEnv("HYPERDX_ENDPOINT", "https://in-otel.hyperdx.io"),
HyperDXEnabled: getEnvBool("HYPERDX_ENABLED", false),
HyperDXAPIKey: getEnv("HYPERDX_API_KEY", ""),
HyperDXEndpoint: getEnv("HYPERDX_ENDPOINT", "https://in-otel.hyperdx.io"),
MLflowTrackingURI: getEnv("MLFLOW_TRACKING_URI", "http://mlflow.mlflow.svc.cluster.local:80"),
MLflowExperimentName: getEnv("MLFLOW_EXPERIMENT_NAME", ""),
MLflowEnabled: getEnvBool("MLFLOW_ENABLED", true),
MLflowTrackingURI: getEnv("MLFLOW_TRACKING_URI", "http://mlflow.mlflow.svc.cluster.local:80"),
MLflowExperimentName: getEnv("MLFLOW_EXPERIMENT_NAME", ""),
MLflowEnabled: getEnvBool("MLFLOW_ENABLED", true),
HealthPort: getEnvInt("HEALTH_PORT", 8080),
HealthPath: getEnv("HEALTH_PATH", "/health"),
ReadyPath: getEnv("READY_PATH", "/ready"),
HealthPort: getEnvInt("HEALTH_PORT", 8080),
HealthPath: getEnv("HEALTH_PATH", "/health"),
ReadyPath: getEnv("READY_PATH", "/ready"),
HTTPTimeout: getEnvDuration("HTTP_TIMEOUT", 60*time.Second),
NATSTimeout: getEnvDuration("NATS_TIMEOUT", 30*time.Second),
}
HTTPTimeout: getEnvDuration("HTTP_TIMEOUT", 60*time.Second),
NATSTimeout: getEnvDuration("NATS_TIMEOUT", 30*time.Second),
SecretsPath: getEnv("SECRETS_PATH", ""),
}
}
// EmbeddingsURL returns the current embeddings service URL (thread-safe).
func (s *Settings) EmbeddingsURL() string {
s.mu.RLock()
defer s.mu.RUnlock()
return s.embeddingsURL
}
// RerankerURL returns the current reranker service URL (thread-safe).
func (s *Settings) RerankerURL() string {
s.mu.RLock()
defer s.mu.RUnlock()
return s.rerankerURL
}
// LLMURL returns the current LLM service URL (thread-safe).
func (s *Settings) LLMURL() string {
s.mu.RLock()
defer s.mu.RUnlock()
return s.llmURL
}
// TTSURL returns the current TTS service URL (thread-safe).
func (s *Settings) TTSURL() string {
s.mu.RLock()
defer s.mu.RUnlock()
return s.ttsURL
}
// STTURL returns the current STT service URL (thread-safe).
func (s *Settings) STTURL() string {
s.mu.RLock()
defer s.mu.RUnlock()
return s.sttURL
}
// WatchSecrets watches the SecretsPath directory for changes and reloads
// hot-reloadable fields. Blocks until ctx is cancelled.
func (s *Settings) WatchSecrets(ctx context.Context) {
if s.SecretsPath == "" {
return
}
watcher, err := fsnotify.NewWatcher()
if err != nil {
slog.Error("config: failed to create fsnotify watcher", "error", err)
return
}
defer func() { _ = watcher.Close() }()
if err := watcher.Add(s.SecretsPath); err != nil {
slog.Error("config: failed to watch secrets path", "error", err, "path", s.SecretsPath)
return
}
slog.Info("config: watching secrets for hot reload", "path", s.SecretsPath)
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
if event.Has(fsnotify.Create) || event.Has(fsnotify.Write) {
s.reloadFromSecrets()
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
slog.Error("config: fsnotify error", "error", err)
case <-ctx.Done():
return
}
}
}
// reloadFromSecrets reads hot-reloadable values from the secrets directory.
func (s *Settings) reloadFromSecrets() {
s.mu.Lock()
defer s.mu.Unlock()
updated := 0
reload := func(filename string, target *string) {
path := filepath.Join(s.SecretsPath, filename)
data, err := os.ReadFile(path)
if err != nil {
return
}
val := strings.TrimSpace(string(data))
if val != "" && val != *target {
*target = val
updated++
slog.Info("config: reloaded secret", "key", filename)
}
}
reload("embeddings-url", &s.embeddingsURL)
reload("reranker-url", &s.rerankerURL)
reload("llm-url", &s.llmURL)
reload("tts-url", &s.ttsURL)
reload("stt-url", &s.sttURL)
if updated > 0 {
slog.Info("config: secrets reloaded", "updated", updated)
}
}
func getEnv(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
func getEnvInt(key string, fallback int) int {
if v := os.Getenv(key); v != "" {
if i, err := strconv.Atoi(v); err == nil {
return i
}
}
return fallback
if v := os.Getenv(key); v != "" {
if i, err := strconv.Atoi(v); err == nil {
return i
}
}
return fallback
}
func getEnvBool(key string, fallback bool) bool {
if v := os.Getenv(key); v != "" {
if b, err := strconv.ParseBool(v); err == nil {
return b
}
}
return fallback
if v := os.Getenv(key); v != "" {
if b, err := strconv.ParseBool(v); err == nil {
return b
}
}
return fallback
}
func getEnvDuration(key string, fallback time.Duration) time.Duration {
if v := os.Getenv(key); v != "" {
if f, err := strconv.ParseFloat(v, 64); err == nil {
return time.Duration(f * float64(time.Second))
}
}
return fallback
if v := os.Getenv(key); v != "" {
if f, err := strconv.ParseFloat(v, 64); err == nil {
return time.Duration(f * float64(time.Second))
}
}
return fallback
}

View File

@@ -1,42 +1,123 @@
package config
import (
"os"
"testing"
"time"
"os"
"path/filepath"
"testing"
"time"
)
func TestLoadDefaults(t *testing.T) {
s := Load()
if s.ServiceName != "handler" {
t.Errorf("expected default ServiceName 'handler', got %q", s.ServiceName)
}
if s.HealthPort != 8080 {
t.Errorf("expected default HealthPort 8080, got %d", s.HealthPort)
}
if s.HTTPTimeout != 60*time.Second {
t.Errorf("expected default HTTPTimeout 60s, got %v", s.HTTPTimeout)
}
s := Load()
if s.ServiceName != "handler" {
t.Errorf("expected default ServiceName 'handler', got %q", s.ServiceName)
}
if s.HealthPort != 8080 {
t.Errorf("expected default HealthPort 8080, got %d", s.HealthPort)
}
if s.HTTPTimeout != 60*time.Second {
t.Errorf("expected default HTTPTimeout 60s, got %v", s.HTTPTimeout)
}
}
func TestLoadFromEnv(t *testing.T) {
os.Setenv("SERVICE_NAME", "test-svc")
os.Setenv("HEALTH_PORT", "9090")
os.Setenv("OTEL_ENABLED", "false")
defer func() {
os.Unsetenv("SERVICE_NAME")
os.Unsetenv("HEALTH_PORT")
os.Unsetenv("OTEL_ENABLED")
}()
t.Setenv("SERVICE_NAME", "test-svc")
t.Setenv("HEALTH_PORT", "9090")
t.Setenv("OTEL_ENABLED", "false")
s := Load()
if s.ServiceName != "test-svc" {
t.Errorf("expected ServiceName 'test-svc', got %q", s.ServiceName)
}
if s.HealthPort != 9090 {
t.Errorf("expected HealthPort 9090, got %d", s.HealthPort)
}
if s.OTELEnabled {
t.Error("expected OTELEnabled false")
}
s := Load()
if s.ServiceName != "test-svc" {
t.Errorf("expected ServiceName 'test-svc', got %q", s.ServiceName)
}
if s.HealthPort != 9090 {
t.Errorf("expected HealthPort 9090, got %d", s.HealthPort)
}
if s.OTELEnabled {
t.Error("expected OTELEnabled false")
}
}
func TestURLGetters(t *testing.T) {
s := Load()
if s.EmbeddingsURL() == "" {
t.Error("EmbeddingsURL should have a default")
}
if s.RerankerURL() == "" {
t.Error("RerankerURL should have a default")
}
if s.LLMURL() == "" {
t.Error("LLMURL should have a default")
}
if s.TTSURL() == "" {
t.Error("TTSURL should have a default")
}
if s.STTURL() == "" {
t.Error("STTURL should have a default")
}
}
func TestURLGettersFromEnv(t *testing.T) {
t.Setenv("EMBEDDINGS_URL", "http://embed:8000")
t.Setenv("LLM_URL", "http://llm:9000")
s := Load()
if s.EmbeddingsURL() != "http://embed:8000" {
t.Errorf("expected custom EmbeddingsURL, got %q", s.EmbeddingsURL())
}
if s.LLMURL() != "http://llm:9000" {
t.Errorf("expected custom LLMURL, got %q", s.LLMURL())
}
}
func TestReloadFromSecrets(t *testing.T) {
dir := t.TempDir()
// Write initial secret files
writeSecret(t, dir, "embeddings-url", "http://old-embed:8000")
writeSecret(t, dir, "llm-url", "http://old-llm:9000")
s := Load()
s.SecretsPath = dir
s.reloadFromSecrets()
if s.EmbeddingsURL() != "http://old-embed:8000" {
t.Errorf("expected reloaded EmbeddingsURL, got %q", s.EmbeddingsURL())
}
if s.LLMURL() != "http://old-llm:9000" {
t.Errorf("expected reloaded LLMURL, got %q", s.LLMURL())
}
// Simulate secret update
writeSecret(t, dir, "embeddings-url", "http://new-embed:8000")
s.reloadFromSecrets()
if s.EmbeddingsURL() != "http://new-embed:8000" {
t.Errorf("expected updated EmbeddingsURL, got %q", s.EmbeddingsURL())
}
// LLM should remain unchanged
if s.LLMURL() != "http://old-llm:9000" {
t.Errorf("expected unchanged LLMURL, got %q", s.LLMURL())
}
}
func TestReloadFromSecretsNoPath(t *testing.T) {
s := Load()
s.SecretsPath = ""
// Should not panic
s.reloadFromSecrets()
}
func TestGetEnvDuration(t *testing.T) {
t.Setenv("TEST_DUR", "30")
d := getEnvDuration("TEST_DUR", 10*time.Second)
if d != 30*time.Second {
t.Errorf("expected 30s, got %v", d)
}
}
func writeSecret(t *testing.T, dir, name, value string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, name), []byte(value), 0644); err != nil {
t.Fatal(err)
}
}