fix: auto-fix ruff linting errors and remove unsupported upload-artifact
All checks were successful
CI / Lint (push) Successful in 52s
CI / Test (push) Successful in 1m1s
CI / Release (push) Successful in 5s
CI / Notify (push) Successful in 1s

This commit is contained in:
2026-02-02 08:34:00 -05:00
parent 7b30ff6a05
commit dbf1a93141
19 changed files with 414 additions and 400 deletions

View File

@@ -1,8 +1,9 @@
"""
LLM service client (vLLM/OpenAI-compatible).
"""
import logging
from typing import Optional, AsyncIterator
from typing import AsyncIterator, Optional
import httpx
@@ -15,33 +16,33 @@ logger = logging.getLogger(__name__)
class LLMClient:
"""
Client for the LLM service (vLLM with OpenAI-compatible API).
Usage:
client = LLMClient()
response = await client.generate("Hello, how are you?")
# With context for RAG
response = await client.generate(
"What is the capital?",
context="France is a country in Europe..."
)
# Streaming
async for chunk in client.stream("Tell me a story"):
print(chunk, end="")
"""
def __init__(self, settings: Optional[LLMSettings] = None):
self.settings = settings or LLMSettings()
self._client = httpx.AsyncClient(
base_url=self.settings.llm_url,
timeout=self.settings.http_timeout,
)
async def close(self) -> None:
"""Close the HTTP client."""
await self._client.aclose()
async def generate(
self,
prompt: str,
@@ -54,7 +55,7 @@ class LLMClient:
) -> str:
"""
Generate a response from the LLM.
Args:
prompt: User prompt/query
context: Optional context for RAG
@@ -63,19 +64,19 @@ class LLMClient:
temperature: Sampling temperature
top_p: Top-p sampling
stop: Stop sequences
Returns:
Generated text response
"""
with create_span("llm.generate") as span:
messages = self._build_messages(prompt, context, system_prompt)
if span:
span.set_attribute("llm.model", self.settings.llm_model)
span.set_attribute("llm.prompt_length", len(prompt))
if context:
span.set_attribute("llm.context_length", len(context))
payload = {
"model": self.settings.llm_model,
"messages": messages,
@@ -85,21 +86,21 @@ class LLMClient:
}
if stop:
payload["stop"] = stop
response = await self._client.post("/v1/chat/completions", json=payload)
response.raise_for_status()
result = response.json()
content = result["choices"][0]["message"]["content"]
if span:
span.set_attribute("llm.response_length", len(content))
usage = result.get("usage", {})
span.set_attribute("llm.prompt_tokens", usage.get("prompt_tokens", 0))
span.set_attribute("llm.completion_tokens", usage.get("completion_tokens", 0))
return content
async def stream(
self,
prompt: str,
@@ -110,19 +111,19 @@ class LLMClient:
) -> AsyncIterator[str]:
"""
Stream a response from the LLM.
Args:
prompt: User prompt/query
context: Optional context for RAG
system_prompt: Optional system prompt
max_tokens: Maximum tokens to generate
temperature: Sampling temperature
Yields:
Text chunks as they're generated
"""
messages = self._build_messages(prompt, context, system_prompt)
payload = {
"model": self.settings.llm_model,
"messages": messages,
@@ -130,25 +131,24 @@ class LLMClient:
"temperature": temperature or self.settings.llm_temperature,
"stream": True,
}
async with self._client.stream(
"POST", "/v1/chat/completions", json=payload
) as response:
async with self._client.stream("POST", "/v1/chat/completions", json=payload) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if line.startswith("data: "):
data = line[6:]
if data == "[DONE]":
break
import json
chunk = json.loads(data)
delta = chunk["choices"][0].get("delta", {})
content = delta.get("content", "")
if content:
yield content
def _build_messages(
self,
prompt: str,
@@ -157,32 +157,36 @@ class LLMClient:
) -> list[dict]:
"""Build the messages list for the API call."""
messages = []
# System prompt
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
elif context:
# Default RAG system prompt
messages.append({
"role": "system",
"content": (
"You are a helpful assistant. Use the provided context to answer "
"the user's question. If the context doesn't contain relevant "
"information, say so."
),
})
messages.append(
{
"role": "system",
"content": (
"You are a helpful assistant. Use the provided context to answer "
"the user's question. If the context doesn't contain relevant "
"information, say so."
),
}
)
# Add context as a separate message if provided
if context:
messages.append({
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {prompt}",
})
messages.append(
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {prompt}",
}
)
else:
messages.append({"role": "user", "content": prompt})
return messages
async def health(self) -> bool:
"""Check if the LLM service is healthy."""
try: