fixing llm readiness check.

This commit is contained in:
2026-02-18 07:31:23 -05:00
parent 53afea9352
commit 4069647495

11
llm.py
View File

@@ -115,7 +115,7 @@ DEFAULT_SYSTEM_PROMPT = (
# Use async client for streaming
async_client = httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=30.0))
sync_client = httpx.Client(timeout=10.0)
sync_client = httpx.Client(timeout=httpx.Timeout(60.0, connect=10.0))
async def chat_stream(
@@ -199,6 +199,13 @@ async def chat_stream(
def check_service_health() -> str:
"""Check if the LLM service is reachable."""
try:
# Try a lightweight GET against the Ray Serve base first.
# This avoids burning GPU time on a full inference round-trip.
base_url = LLM_URL.rsplit("/", 1)[0] # strip /llm path
response = sync_client.get(f"{base_url}/-/routes")
if response.status_code == 200:
return "🟢 LLM service is healthy"
# Fall back to a minimal inference probe
response = sync_client.post(
LLM_URL,
json={
@@ -212,6 +219,8 @@ def check_service_health() -> str:
return f"🟡 LLM responded with status {response.status_code}"
except httpx.ConnectError:
return "🔴 Cannot connect to LLM service"
except httpx.TimeoutException:
return "🟡 LLM service is reachable but slow to respond"
except Exception as e:
return f"🔴 Service unavailable: {e}"