fixing llm readiness check.
This commit is contained in:
11
llm.py
11
llm.py
@@ -115,7 +115,7 @@ DEFAULT_SYSTEM_PROMPT = (
|
||||
|
||||
# Use async client for streaming
|
||||
async_client = httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=30.0))
|
||||
sync_client = httpx.Client(timeout=10.0)
|
||||
sync_client = httpx.Client(timeout=httpx.Timeout(60.0, connect=10.0))
|
||||
|
||||
|
||||
async def chat_stream(
|
||||
@@ -199,6 +199,13 @@ async def chat_stream(
|
||||
def check_service_health() -> str:
|
||||
"""Check if the LLM service is reachable."""
|
||||
try:
|
||||
# Try a lightweight GET against the Ray Serve base first.
|
||||
# This avoids burning GPU time on a full inference round-trip.
|
||||
base_url = LLM_URL.rsplit("/", 1)[0] # strip /llm path
|
||||
response = sync_client.get(f"{base_url}/-/routes")
|
||||
if response.status_code == 200:
|
||||
return "🟢 LLM service is healthy"
|
||||
# Fall back to a minimal inference probe
|
||||
response = sync_client.post(
|
||||
LLM_URL,
|
||||
json={
|
||||
@@ -212,6 +219,8 @@ def check_service_health() -> str:
|
||||
return f"🟡 LLM responded with status {response.status_code}"
|
||||
except httpx.ConnectError:
|
||||
return "🔴 Cannot connect to LLM service"
|
||||
except httpx.TimeoutException:
|
||||
return "🟡 LLM service is reachable but slow to respond"
|
||||
except Exception as e:
|
||||
return f"🔴 Service unavailable: {e}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user