feat: add py.typed, Ray handles for clients, and pre-commit config
All checks were successful
CI / Test (push) Successful in 4m8s
CI / Lint (push) Successful in 4m19s
CI / Release (push) Successful in 58s
CI / Notify (push) Successful in 2s

- Add py.typed marker for PEP 561 type hint support
- Add ray_utils module for Ray handle detection and caching
- Update all clients (Embeddings, LLM, TTS, STT, Reranker) to use
  Ray handles when running inside Ray cluster for faster internal calls
- Add .pre-commit-config.yaml with ruff and standard hooks
- Add pre-commit and ray[serve] to optional dependencies
- Bump ruff version to 0.4.0
This commit is contained in:
2026-02-02 09:08:43 -05:00
parent dbf1a93141
commit 408f31e56d
10 changed files with 1506 additions and 8 deletions

View File

@@ -1,13 +1,16 @@
"""
Reranker service client (Infinity/BGE Reranker).
Supports both HTTP (external) and Ray handles (internal) for optimal performance.
"""
import logging
from typing import Optional
from typing import Any, Optional
import httpx
from handler_base.config import Settings
from handler_base.ray_utils import get_ray_handle
from handler_base.telemetry import create_span
logger = logging.getLogger(__name__)
@@ -17,17 +20,33 @@ class RerankerClient:
"""
Client for the reranker service (Infinity with BGE Reranker).
When running inside Ray, automatically uses Ray handles for faster
internal communication. Falls back to HTTP for external calls.
Usage:
client = RerankerClient()
reranked = await client.rerank("query", ["doc1", "doc2"])
"""
# Ray Serve deployment configuration
RAY_DEPLOYMENT_NAME = "RerankerDeployment"
RAY_APP_NAME = "reranker"
def __init__(self, settings: Optional[Settings] = None):
self.settings = settings or Settings()
self._client = httpx.AsyncClient(
base_url=self.settings.reranker_url,
timeout=self.settings.http_timeout,
)
self._ray_handle: Optional[Any] = None
self._ray_checked = False
def _get_ray_handle(self) -> Optional[Any]:
"""Get Ray handle, checking only once."""
if not self._ray_checked:
self._ray_handle = get_ray_handle(self.RAY_DEPLOYMENT_NAME, self.RAY_APP_NAME)
self._ray_checked = True
return self._ray_handle
async def close(self) -> None:
"""Close the HTTP client."""
@@ -64,6 +83,32 @@ class RerankerClient:
if top_k:
payload["top_n"] = top_k
# Try Ray handle first (faster internal path)
handle = self._get_ray_handle()
if handle:
try:
if span:
span.set_attribute("reranker.transport", "ray")
results = await handle.rerank.remote(query, documents, top_k)
# Enrich with original documents
enriched = []
for r in results:
idx = r.get("index", 0)
enriched.append(
{
"index": idx,
"score": r.get("relevance_score", r.get("score", 0)),
"document": documents[idx] if idx < len(documents) else "",
}
)
return enriched
except Exception as e:
logger.warning(f"Ray handle failed, falling back to HTTP: {e}")
# HTTP fallback
if span:
span.set_attribute("reranker.transport", "http")
response = await self._client.post("/rerank", json=payload)
response.raise_for_status()