fixing serve-llm stuff.
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 12s

This commit is contained in:
2026-02-18 07:30:00 -05:00
parent 969e93cdd4
commit a973768aee

View File

@@ -120,7 +120,7 @@ class LLMDeployment:
except ImportError: except ImportError:
self._mlflow = None self._mlflow = None
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]: async def __call__(self, request) -> dict[str, Any]:
""" """
Handle OpenAI-compatible chat completion requests. Handle OpenAI-compatible chat completion requests.
@@ -134,14 +134,23 @@ class LLMDeployment:
"stream": false "stream": false
} }
""" """
messages = request.get("messages", []) # Ray Serve passes a Starlette Request for HTTP calls —
temperature = request.get("temperature", 0.7) # parse the JSON body so we actually read the user's payload.
from starlette.requests import Request
if isinstance(request, Request):
body = await request.json()
else:
body = request
messages = body.get("messages", [])
temperature = body.get("temperature", 0.7)
max_tokens = min( max_tokens = min(
request.get("max_tokens", self.default_max_tokens), body.get("max_tokens", self.default_max_tokens),
self.max_model_len, self.max_model_len,
) )
top_p = request.get("top_p", 1.0) top_p = body.get("top_p", 1.0)
stop = request.get("stop") stop = body.get("stop")
# Convert messages to prompt # Convert messages to prompt
prompt = self._format_messages(messages) prompt = self._format_messages(messages)