- Handler base class with graceful shutdown and signal handling - NATSClient with JetStream and msgpack serialization - Pydantic Settings for environment configuration - HealthServer for Kubernetes probes - OpenTelemetry telemetry setup - Service clients: STT, TTS, LLM, Embeddings, Reranker, Milvus
100 lines
2.9 KiB
Python
100 lines
2.9 KiB
Python
"""
|
|
Configuration management using Pydantic Settings.
|
|
|
|
Environment variables are automatically loaded and validated.
|
|
"""
|
|
from typing import Optional
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""Base settings for all handler services."""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
extra="ignore",
|
|
)
|
|
|
|
# Service identification
|
|
service_name: str = "handler"
|
|
service_version: str = "1.0.0"
|
|
service_namespace: str = "ai-ml"
|
|
deployment_env: str = "production"
|
|
|
|
# NATS configuration
|
|
nats_url: str = "nats://nats.ai-ml.svc.cluster.local:4222"
|
|
nats_user: Optional[str] = None
|
|
nats_password: Optional[str] = None
|
|
nats_queue_group: Optional[str] = None
|
|
|
|
# Redis/Valkey configuration
|
|
redis_url: str = "redis://valkey.ai-ml.svc.cluster.local:6379"
|
|
redis_password: Optional[str] = None
|
|
|
|
# Milvus configuration
|
|
milvus_host: str = "milvus.ai-ml.svc.cluster.local"
|
|
milvus_port: int = 19530
|
|
milvus_collection: str = "documents"
|
|
|
|
# Service endpoints
|
|
embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local"
|
|
reranker_url: str = "http://reranker-predictor.ai-ml.svc.cluster.local"
|
|
llm_url: str = "http://vllm-predictor.ai-ml.svc.cluster.local"
|
|
tts_url: str = "http://tts-predictor.ai-ml.svc.cluster.local"
|
|
stt_url: str = "http://whisper-predictor.ai-ml.svc.cluster.local"
|
|
|
|
# OpenTelemetry configuration
|
|
otel_enabled: bool = True
|
|
otel_endpoint: str = "http://opentelemetry-collector.observability.svc.cluster.local:4317"
|
|
otel_use_http: bool = False
|
|
|
|
# HyperDX configuration
|
|
hyperdx_enabled: bool = False
|
|
hyperdx_api_key: Optional[str] = None
|
|
hyperdx_endpoint: str = "https://in-otel.hyperdx.io"
|
|
|
|
# MLflow configuration
|
|
mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80"
|
|
mlflow_experiment_name: Optional[str] = None
|
|
mlflow_enabled: bool = True
|
|
|
|
# Health check configuration
|
|
health_port: int = 8080
|
|
health_path: str = "/health"
|
|
ready_path: str = "/ready"
|
|
|
|
# Timeouts (seconds)
|
|
http_timeout: float = 60.0
|
|
nats_timeout: float = 30.0
|
|
|
|
|
|
class EmbeddingsSettings(Settings):
|
|
"""Settings for embeddings service client."""
|
|
|
|
embeddings_model: str = "bge"
|
|
embeddings_batch_size: int = 32
|
|
|
|
|
|
class LLMSettings(Settings):
|
|
"""Settings for LLM service client."""
|
|
|
|
llm_model: str = "default"
|
|
llm_max_tokens: int = 2048
|
|
llm_temperature: float = 0.7
|
|
llm_top_p: float = 0.9
|
|
|
|
|
|
class TTSSettings(Settings):
|
|
"""Settings for TTS service client."""
|
|
|
|
tts_language: str = "en"
|
|
tts_speaker: Optional[str] = None
|
|
|
|
|
|
class STTSettings(Settings):
|
|
"""Settings for STT service client."""
|
|
|
|
stt_language: Optional[str] = None # Auto-detect
|
|
stt_task: str = "transcribe" # or "translate"
|