fixing llm readiness check.

2026-02-18 07:31:23 -05:00
parent 53afea9352
commit 4069647495
1 changed files with 10 additions and 1 deletions
--- a/llm.py
+++ b/llm.py
@@ -115,7 +115,7 @@ DEFAULT_SYSTEM_PROMPT = (

 # Use async client for streaming
 async_client = httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=30.0))
-sync_client = httpx.Client(timeout=10.0)
+sync_client = httpx.Client(timeout=httpx.Timeout(60.0, connect=10.0))


 async def chat_stream(
@@ -199,6 +199,13 @@ async def chat_stream(
 def check_service_health() -> str:
    """Check if the LLM service is reachable."""
    try:
+        # Try a lightweight GET against the Ray Serve base first.
+        # This avoids burning GPU time on a full inference round-trip.
+        base_url = LLM_URL.rsplit("/", 1)[0]  # strip /llm path
+        response = sync_client.get(f"{base_url}/-/routes")
+        if response.status_code == 200:
+            return "🟢 LLM service is healthy"
+        # Fall back to a minimal inference probe
        response = sync_client.post(
            LLM_URL,
            json={
@@ -212,6 +219,8 @@ def check_service_health() -> str:
        return f"🟡 LLM responded with status {response.status_code}"
    except httpx.ConnectError:
        return "🔴 Cannot connect to LLM service"
+    except httpx.TimeoutException:
+        return "🟡 LLM service is reachable but slow to respond"
    except Exception as e:
        return f"🔴 Service unavailable: {e}"