fix: auto-fix ruff linting errors and remove unsupported upload-artifact
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
"""
|
||||
LLM service client (vLLM/OpenAI-compatible).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, AsyncIterator
|
||||
from typing import AsyncIterator, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -15,33 +16,33 @@ logger = logging.getLogger(__name__)
|
||||
class LLMClient:
|
||||
"""
|
||||
Client for the LLM service (vLLM with OpenAI-compatible API).
|
||||
|
||||
|
||||
Usage:
|
||||
client = LLMClient()
|
||||
response = await client.generate("Hello, how are you?")
|
||||
|
||||
|
||||
# With context for RAG
|
||||
response = await client.generate(
|
||||
"What is the capital?",
|
||||
context="France is a country in Europe..."
|
||||
)
|
||||
|
||||
|
||||
# Streaming
|
||||
async for chunk in client.stream("Tell me a story"):
|
||||
print(chunk, end="")
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, settings: Optional[LLMSettings] = None):
|
||||
self.settings = settings or LLMSettings()
|
||||
self._client = httpx.AsyncClient(
|
||||
base_url=self.settings.llm_url,
|
||||
timeout=self.settings.http_timeout,
|
||||
)
|
||||
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the HTTP client."""
|
||||
await self._client.aclose()
|
||||
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -54,7 +55,7 @@ class LLMClient:
|
||||
) -> str:
|
||||
"""
|
||||
Generate a response from the LLM.
|
||||
|
||||
|
||||
Args:
|
||||
prompt: User prompt/query
|
||||
context: Optional context for RAG
|
||||
@@ -63,19 +64,19 @@ class LLMClient:
|
||||
temperature: Sampling temperature
|
||||
top_p: Top-p sampling
|
||||
stop: Stop sequences
|
||||
|
||||
|
||||
Returns:
|
||||
Generated text response
|
||||
"""
|
||||
with create_span("llm.generate") as span:
|
||||
messages = self._build_messages(prompt, context, system_prompt)
|
||||
|
||||
|
||||
if span:
|
||||
span.set_attribute("llm.model", self.settings.llm_model)
|
||||
span.set_attribute("llm.prompt_length", len(prompt))
|
||||
if context:
|
||||
span.set_attribute("llm.context_length", len(context))
|
||||
|
||||
|
||||
payload = {
|
||||
"model": self.settings.llm_model,
|
||||
"messages": messages,
|
||||
@@ -85,21 +86,21 @@ class LLMClient:
|
||||
}
|
||||
if stop:
|
||||
payload["stop"] = stop
|
||||
|
||||
|
||||
response = await self._client.post("/v1/chat/completions", json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
result = response.json()
|
||||
content = result["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
if span:
|
||||
span.set_attribute("llm.response_length", len(content))
|
||||
usage = result.get("usage", {})
|
||||
span.set_attribute("llm.prompt_tokens", usage.get("prompt_tokens", 0))
|
||||
span.set_attribute("llm.completion_tokens", usage.get("completion_tokens", 0))
|
||||
|
||||
|
||||
return content
|
||||
|
||||
|
||||
async def stream(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -110,19 +111,19 @@ class LLMClient:
|
||||
) -> AsyncIterator[str]:
|
||||
"""
|
||||
Stream a response from the LLM.
|
||||
|
||||
|
||||
Args:
|
||||
prompt: User prompt/query
|
||||
context: Optional context for RAG
|
||||
system_prompt: Optional system prompt
|
||||
max_tokens: Maximum tokens to generate
|
||||
temperature: Sampling temperature
|
||||
|
||||
|
||||
Yields:
|
||||
Text chunks as they're generated
|
||||
"""
|
||||
messages = self._build_messages(prompt, context, system_prompt)
|
||||
|
||||
|
||||
payload = {
|
||||
"model": self.settings.llm_model,
|
||||
"messages": messages,
|
||||
@@ -130,25 +131,24 @@ class LLMClient:
|
||||
"temperature": temperature or self.settings.llm_temperature,
|
||||
"stream": True,
|
||||
}
|
||||
|
||||
async with self._client.stream(
|
||||
"POST", "/v1/chat/completions", json=payload
|
||||
) as response:
|
||||
|
||||
async with self._client.stream("POST", "/v1/chat/completions", json=payload) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if line.startswith("data: "):
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
break
|
||||
|
||||
|
||||
import json
|
||||
|
||||
chunk = json.loads(data)
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
yield content
|
||||
|
||||
|
||||
def _build_messages(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -157,32 +157,36 @@ class LLMClient:
|
||||
) -> list[dict]:
|
||||
"""Build the messages list for the API call."""
|
||||
messages = []
|
||||
|
||||
|
||||
# System prompt
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
elif context:
|
||||
# Default RAG system prompt
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a helpful assistant. Use the provided context to answer "
|
||||
"the user's question. If the context doesn't contain relevant "
|
||||
"information, say so."
|
||||
),
|
||||
})
|
||||
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a helpful assistant. Use the provided context to answer "
|
||||
"the user's question. If the context doesn't contain relevant "
|
||||
"information, say so."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# Add context as a separate message if provided
|
||||
if context:
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": f"Context:\n{context}\n\nQuestion: {prompt}",
|
||||
})
|
||||
messages.append(
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Context:\n{context}\n\nQuestion: {prompt}",
|
||||
}
|
||||
)
|
||||
else:
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
async def health(self) -> bool:
|
||||
"""Check if the LLM service is healthy."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user