fix: apply ruff fixes to ray_serve package
Some checks failed
Build and Publish ray-serve-apps / lint (push) Successful in 1m30s
Build and Publish ray-serve-apps / publish (push) Failing after 2m44s

[ray-serve only]

- Fix whitespace in docstrings
- Add strict=True to zip() calls
- Use ternary operators where appropriate
- Rename unused loop variables
This commit is contained in:
2026-02-02 11:09:35 -05:00
parent 16f6199534
commit 12987c6adc
6 changed files with 128 additions and 131 deletions

View File

@@ -4,9 +4,7 @@ Runs on: drizzt (Radeon 680M iGPU, ROCm)
"""
import os
import time
import uuid
from typing import Any, Dict, List, Union
from typing import Any
from ray import serve
@@ -14,8 +12,8 @@ from ray import serve
@serve.deployment(name="EmbeddingsDeployment", num_replicas=1)
class EmbeddingsDeployment:
def __init__(self):
from sentence_transformers import SentenceTransformer
import torch
from sentence_transformers import SentenceTransformer
self.model_id = os.environ.get("MODEL_ID", "BAAI/bge-large-en-v1.5")
@@ -35,7 +33,7 @@ class EmbeddingsDeployment:
print(f"Model loaded. Embedding dimension: {self.embedding_dim}")
async def __call__(self, request: Dict[str, Any]) -> Dict[str, Any]:
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
"""
Handle OpenAI-compatible embedding requests.
@@ -49,10 +47,7 @@ class EmbeddingsDeployment:
input_data = request.get("input", "")
# Handle both single string and list of strings
if isinstance(input_data, str):
texts = [input_data]
else:
texts = input_data
texts = [input_data] if isinstance(input_data, str) else input_data
# Generate embeddings
embeddings = self.model.encode(
@@ -64,12 +59,14 @@ class EmbeddingsDeployment:
# Build response data
data = []
total_tokens = 0
for i, (text, embedding) in enumerate(zip(texts, embeddings)):
data.append({
"object": "embedding",
"index": i,
"embedding": embedding.tolist(),
})
for i, (text, embedding) in enumerate(zip(texts, embeddings, strict=False)):
data.append(
{
"object": "embedding",
"index": i,
"embedding": embedding.tolist(),
}
)
total_tokens += len(text.split())
# Return OpenAI-compatible response

View File

@@ -6,7 +6,7 @@ Runs on: khelben (Strix Halo 64GB, ROCm)
import os
import time
import uuid
from typing import Any, Dict, List, Optional
from typing import Any
from ray import serve
@@ -33,7 +33,7 @@ class LLMDeployment:
self.SamplingParams = SamplingParams
print(f"Model {self.model_id} loaded successfully")
async def __call__(self, request: Dict[str, Any]) -> Dict[str, Any]:
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
"""
Handle OpenAI-compatible chat completion requests.
@@ -51,7 +51,7 @@ class LLMDeployment:
temperature = request.get("temperature", 0.7)
max_tokens = request.get("max_tokens", 256)
top_p = request.get("top_p", 1.0)
stop = request.get("stop", None)
stop = request.get("stop")
# Convert messages to prompt
prompt = self._format_messages(messages)
@@ -89,7 +89,7 @@ class LLMDeployment:
},
}
def _format_messages(self, messages: List[Dict[str, str]]) -> str:
def _format_messages(self, messages: list[dict[str, str]]) -> str:
"""Format chat messages into a prompt string."""
formatted = ""
for msg in messages:

View File

@@ -4,9 +4,7 @@ Runs on: drizzt (Radeon 680M iGPU, ROCm) or danilo (Intel i915 iGPU, OpenVINO/IP
"""
import os
import time
import uuid
from typing import Any, Dict, List, Tuple
from typing import Any
from ray import serve
@@ -14,8 +12,8 @@ from ray import serve
@serve.deployment(name="RerankerDeployment", num_replicas=1)
class RerankerDeployment:
def __init__(self):
from sentence_transformers import CrossEncoder
import torch
from sentence_transformers import CrossEncoder
self.model_id = os.environ.get("MODEL_ID", "BAAI/bge-reranker-v2-m3")
self.use_ipex = False
@@ -24,6 +22,7 @@ class RerankerDeployment:
# Detect device - check for Intel GPU first via IPEX
try:
import intel_extension_for_pytorch as ipex
self.use_ipex = True
if hasattr(torch, "xpu") and torch.xpu.is_available():
self.device = "xpu"
@@ -37,7 +36,7 @@ class RerankerDeployment:
if not self.use_ipex:
if torch.cuda.is_available():
self.device = "cuda"
print(f"Using CUDA/ROCm device")
print("Using CUDA/ROCm device")
else:
print("No GPU detected, using CPU")
@@ -51,14 +50,15 @@ class RerankerDeployment:
if self.use_ipex and self.device == "cpu":
try:
import intel_extension_for_pytorch as ipex
self.model.model = ipex.optimize(self.model.model)
print("IPEX CPU optimization applied")
except Exception as e:
print(f"IPEX optimization failed: {e}")
print(f"Reranker model loaded successfully")
print("Reranker model loaded successfully")
async def __call__(self, request: Dict[str, Any]) -> Dict[str, Any]:
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
"""
Handle reranking requests.
@@ -81,11 +81,13 @@ class RerankerDeployment:
scores = self.model.predict(pairs)
results = []
for i, (pair, score) in enumerate(zip(pairs, scores)):
results.append({
"index": i,
"score": float(score),
})
for i, (_pair, score) in enumerate(zip(pairs, scores, strict=False)):
results.append(
{
"index": i,
"score": float(score),
}
)
return {
"object": "list",
@@ -114,7 +116,7 @@ class RerankerDeployment:
# Create results with indices and scores
results = []
for i, (doc, score) in enumerate(zip(documents, scores)):
for i, (doc, score) in enumerate(zip(documents, scores, strict=False)):
result = {
"index": i,
"score": float(score),

View File

@@ -3,12 +3,10 @@ Ray Serve deployment for Coqui TTS.
Runs on: elminster (RTX 2070 8GB, CUDA)
"""
import os
import io
import time
import uuid
import base64
from typing import Any, Dict, Optional
import io
import os
from typing import Any
from ray import serve
@@ -16,8 +14,8 @@ from ray import serve
@serve.deployment(name="TTSDeployment", num_replicas=1)
class TTSDeployment:
def __init__(self):
from TTS.api import TTS
import torch
from TTS.api import TTS
self.model_name = os.environ.get("MODEL_NAME", "tts_models/en/ljspeech/tacotron2-DDC")
@@ -32,9 +30,9 @@ class TTSDeployment:
if self.use_gpu:
self.tts = self.tts.to("cuda")
print(f"TTS model loaded successfully")
print("TTS model loaded successfully")
async def __call__(self, request: Dict[str, Any]) -> Dict[str, Any]:
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
"""
Handle text-to-speech requests.
@@ -52,8 +50,8 @@ class TTSDeployment:
from scipy.io import wavfile
text = request.get("text", "")
speaker = request.get("speaker", None)
language = request.get("language", None)
speaker = request.get("speaker")
language = request.get("language")
speed = request.get("speed", 1.0)
output_format = request.get("output_format", "wav")
return_base64 = request.get("return_base64", True)
@@ -79,7 +77,11 @@ class TTSDeployment:
wav_int16 = (wav * 32767).astype(np.int16)
# Get sample rate from model config
sample_rate = self.tts.synthesizer.output_sample_rate if hasattr(self.tts, 'synthesizer') else 22050
sample_rate = (
self.tts.synthesizer.output_sample_rate
if hasattr(self.tts, "synthesizer")
else 22050
)
# Write to buffer
buffer = io.BytesIO()
@@ -106,10 +108,10 @@ class TTSDeployment:
"model": self.model_name,
}
def list_speakers(self) -> Dict[str, Any]:
def list_speakers(self) -> dict[str, Any]:
"""List available speakers for multi-speaker models."""
speakers = []
if hasattr(self.tts, 'speakers') and self.tts.speakers:
if hasattr(self.tts, "speakers") and self.tts.speakers:
speakers = self.tts.speakers
return {

View File

@@ -3,12 +3,10 @@ Ray Serve deployment for faster-whisper STT.
Runs on: elminster (RTX 2070 8GB, CUDA)
"""
import os
import io
import time
import uuid
import base64
from typing import Any, Dict, Optional
import io
import os
from typing import Any
from ray import serve
@@ -16,8 +14,8 @@ from ray import serve
@serve.deployment(name="WhisperDeployment", num_replicas=1)
class WhisperDeployment:
def __init__(self):
from faster_whisper import WhisperModel
import torch
from faster_whisper import WhisperModel
self.model_size = os.environ.get("MODEL_SIZE", "large-v3")
@@ -38,9 +36,9 @@ class WhisperDeployment:
compute_type=self.compute_type,
)
print(f"Whisper model loaded successfully")
print("Whisper model loaded successfully")
async def __call__(self, request: Dict[str, Any]) -> Dict[str, Any]:
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
"""
Handle transcription requests.
@@ -60,10 +58,8 @@ class WhisperDeployment:
...
}
"""
import numpy as np
from scipy.io import wavfile
language = request.get("language", None)
language = request.get("language")
task = request.get("task", "transcribe") # transcribe or translate
response_format = request.get("response_format", "json")
word_timestamps = request.get("word_timestamps", False)