fix: apply ruff fixes to ray_serve package
[ray-serve only] - Fix whitespace in docstrings - Add strict=True to zip() calls - Use ternary operators where appropriate - Rename unused loop variables
This commit is contained in:
@@ -4,9 +4,7 @@ Runs on: drizzt (Radeon 680M iGPU, ROCm)
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Union
|
||||
from typing import Any
|
||||
|
||||
from ray import serve
|
||||
|
||||
@@ -14,11 +12,11 @@ from ray import serve
|
||||
@serve.deployment(name="EmbeddingsDeployment", num_replicas=1)
|
||||
class EmbeddingsDeployment:
|
||||
def __init__(self):
|
||||
from sentence_transformers import SentenceTransformer
|
||||
import torch
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
self.model_id = os.environ.get("MODEL_ID", "BAAI/bge-large-en-v1.5")
|
||||
|
||||
|
||||
# Detect device
|
||||
if torch.cuda.is_available():
|
||||
self.device = "cuda"
|
||||
@@ -26,19 +24,19 @@ class EmbeddingsDeployment:
|
||||
self.device = "xpu"
|
||||
else:
|
||||
self.device = "cpu"
|
||||
|
||||
|
||||
print(f"Loading embeddings model: {self.model_id}")
|
||||
print(f"Using device: {self.device}")
|
||||
|
||||
|
||||
self.model = SentenceTransformer(self.model_id, device=self.device)
|
||||
self.embedding_dim = self.model.get_sentence_embedding_dimension()
|
||||
|
||||
|
||||
print(f"Model loaded. Embedding dimension: {self.embedding_dim}")
|
||||
|
||||
async def __call__(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
Handle OpenAI-compatible embedding requests.
|
||||
|
||||
|
||||
Expected request format:
|
||||
{
|
||||
"model": "model-name",
|
||||
@@ -47,31 +45,30 @@ class EmbeddingsDeployment:
|
||||
}
|
||||
"""
|
||||
input_data = request.get("input", "")
|
||||
|
||||
|
||||
# Handle both single string and list of strings
|
||||
if isinstance(input_data, str):
|
||||
texts = [input_data]
|
||||
else:
|
||||
texts = input_data
|
||||
|
||||
texts = [input_data] if isinstance(input_data, str) else input_data
|
||||
|
||||
# Generate embeddings
|
||||
embeddings = self.model.encode(
|
||||
texts,
|
||||
normalize_embeddings=True,
|
||||
show_progress_bar=False,
|
||||
)
|
||||
|
||||
|
||||
# Build response data
|
||||
data = []
|
||||
total_tokens = 0
|
||||
for i, (text, embedding) in enumerate(zip(texts, embeddings)):
|
||||
data.append({
|
||||
"object": "embedding",
|
||||
"index": i,
|
||||
"embedding": embedding.tolist(),
|
||||
})
|
||||
for i, (text, embedding) in enumerate(zip(texts, embeddings, strict=False)):
|
||||
data.append(
|
||||
{
|
||||
"object": "embedding",
|
||||
"index": i,
|
||||
"embedding": embedding.tolist(),
|
||||
}
|
||||
)
|
||||
total_tokens += len(text.split())
|
||||
|
||||
|
||||
# Return OpenAI-compatible response
|
||||
return {
|
||||
"object": "list",
|
||||
|
||||
Reference in New Issue
Block a user