fix: auto-fix ruff linting errors and remove unsupported upload-artifact

2026-02-02 08:34:00 -05:00
parent 7b30ff6a05
commit dbf1a93141
19 changed files with 414 additions and 400 deletions
--- a/handler_base/clients/llm.py
+++ b/handler_base/clients/llm.py
@@ -1,8 +1,9 @@
 """
 LLM service client (vLLM/OpenAI-compatible).
 """
+
 import logging
-from typing import Optional, AsyncIterator
+from typing import AsyncIterator, Optional

 import httpx

@@ -15,33 +16,33 @@ logger = logging.getLogger(__name__)
 class LLMClient:
    """
    Client for the LLM service (vLLM with OpenAI-compatible API).
-    
+
    Usage:
        client = LLMClient()
        response = await client.generate("Hello, how are you?")
-        
+
        # With context for RAG
        response = await client.generate(
            "What is the capital?",
            context="France is a country in Europe..."
        )
-        
+
        # Streaming
        async for chunk in client.stream("Tell me a story"):
            print(chunk, end="")
    """
-    
+
    def __init__(self, settings: Optional[LLMSettings] = None):
        self.settings = settings or LLMSettings()
        self._client = httpx.AsyncClient(
            base_url=self.settings.llm_url,
            timeout=self.settings.http_timeout,
        )
-    
+
    async def close(self) -> None:
        """Close the HTTP client."""
        await self._client.aclose()
-    
+
    async def generate(
        self,
        prompt: str,
@@ -54,7 +55,7 @@ class LLMClient:
    ) -> str:
        """
        Generate a response from the LLM.
-        
+
        Args:
            prompt: User prompt/query
            context: Optional context for RAG
@@ -63,19 +64,19 @@ class LLMClient:
            temperature: Sampling temperature
            top_p: Top-p sampling
            stop: Stop sequences
-            
+
        Returns:
            Generated text response
        """
        with create_span("llm.generate") as span:
            messages = self._build_messages(prompt, context, system_prompt)
-            
+
            if span:
                span.set_attribute("llm.model", self.settings.llm_model)
                span.set_attribute("llm.prompt_length", len(prompt))
                if context:
                    span.set_attribute("llm.context_length", len(context))
-            
+
            payload = {
                "model": self.settings.llm_model,
                "messages": messages,
@@ -85,21 +86,21 @@ class LLMClient:
            }
            if stop:
                payload["stop"] = stop
-            
+
            response = await self._client.post("/v1/chat/completions", json=payload)
            response.raise_for_status()
-            
+
            result = response.json()
            content = result["choices"][0]["message"]["content"]
-            
+
            if span:
                span.set_attribute("llm.response_length", len(content))
                usage = result.get("usage", {})
                span.set_attribute("llm.prompt_tokens", usage.get("prompt_tokens", 0))
                span.set_attribute("llm.completion_tokens", usage.get("completion_tokens", 0))
-            
+
            return content
-    
+
    async def stream(
        self,
        prompt: str,
@@ -110,19 +111,19 @@ class LLMClient:
    ) -> AsyncIterator[str]:
        """
        Stream a response from the LLM.
-        
+
        Args:
            prompt: User prompt/query
            context: Optional context for RAG
            system_prompt: Optional system prompt
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
-            
+
        Yields:
            Text chunks as they're generated
        """
        messages = self._build_messages(prompt, context, system_prompt)
-        
+
        payload = {
            "model": self.settings.llm_model,
            "messages": messages,
@@ -130,25 +131,24 @@ class LLMClient:
            "temperature": temperature or self.settings.llm_temperature,
            "stream": True,
        }
-        
-        async with self._client.stream(
-            "POST", "/v1/chat/completions", json=payload
-        ) as response:
+
+        async with self._client.stream("POST", "/v1/chat/completions", json=payload) as response:
            response.raise_for_status()
-            
+
            async for line in response.aiter_lines():
                if line.startswith("data: "):
                    data = line[6:]
                    if data == "[DONE]":
                        break
-                    
+
                    import json
+
                    chunk = json.loads(data)
                    delta = chunk["choices"][0].get("delta", {})
                    content = delta.get("content", "")
                    if content:
                        yield content
-    
+
    def _build_messages(
        self,
        prompt: str,
@@ -157,32 +157,36 @@ class LLMClient:
    ) -> list[dict]:
        """Build the messages list for the API call."""
        messages = []
-        
+
        # System prompt
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        elif context:
            # Default RAG system prompt
-            messages.append({
-                "role": "system",
-                "content": (
-                    "You are a helpful assistant. Use the provided context to answer "
-                    "the user's question. If the context doesn't contain relevant "
-                    "information, say so."
-                ),
-            })
-        
+            messages.append(
+                {
+                    "role": "system",
+                    "content": (
+                        "You are a helpful assistant. Use the provided context to answer "
+                        "the user's question. If the context doesn't contain relevant "
+                        "information, say so."
+                    ),
+                }
+            )
+
        # Add context as a separate message if provided
        if context:
-            messages.append({
-                "role": "user",
-                "content": f"Context:\n{context}\n\nQuestion: {prompt}",
-            })
+            messages.append(
+                {
+                    "role": "user",
+                    "content": f"Context:\n{context}\n\nQuestion: {prompt}",
+                }
+            )
        else:
            messages.append({"role": "user", "content": prompt})
-        
+
        return messages
-    
+
    async def health(self) -> bool:
        """Check if the LLM service is healthy."""
        try: