fix: make mlflow_logger import optional with no-op fallback
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 11s
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 11s
The strixhalo LLM worker uses py_executable pointing to the Docker image venv which doesn't have the updated ray-serve-apps package. Wrap all InferenceLogger imports in try/except and guard usage with None checks so apps degrade gracefully without MLflow logging.
This commit is contained in:
@@ -9,7 +9,10 @@ from typing import Any
|
||||
|
||||
from ray import serve
|
||||
|
||||
from ray_serve.mlflow_logger import InferenceLogger
|
||||
try:
|
||||
from ray_serve.mlflow_logger import InferenceLogger
|
||||
except ImportError:
|
||||
InferenceLogger = None
|
||||
|
||||
|
||||
@serve.deployment(name="EmbeddingsDeployment", num_replicas=1)
|
||||
@@ -37,15 +40,18 @@ class EmbeddingsDeployment:
|
||||
print(f"Model loaded. Embedding dimension: {self.embedding_dim}")
|
||||
|
||||
# MLflow metrics
|
||||
self._mlflow = InferenceLogger(
|
||||
experiment_name="ray-serve-embeddings",
|
||||
run_name=f"embeddings-{self.model_id.split('/')[-1]}",
|
||||
tags={"model.name": self.model_id, "model.framework": "sentence-transformers", "device": self.device},
|
||||
flush_every=10,
|
||||
)
|
||||
self._mlflow.initialize(
|
||||
params={"model_id": self.model_id, "embedding_dim": str(self.embedding_dim), "device": self.device}
|
||||
)
|
||||
if InferenceLogger is not None:
|
||||
self._mlflow = InferenceLogger(
|
||||
experiment_name="ray-serve-embeddings",
|
||||
run_name=f"embeddings-{self.model_id.split('/')[-1]}",
|
||||
tags={"model.name": self.model_id, "model.framework": "sentence-transformers", "device": self.device},
|
||||
flush_every=10,
|
||||
)
|
||||
self._mlflow.initialize(
|
||||
params={"model_id": self.model_id, "embedding_dim": str(self.embedding_dim), "device": self.device}
|
||||
)
|
||||
else:
|
||||
self._mlflow = None
|
||||
|
||||
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
@@ -86,11 +92,12 @@ class EmbeddingsDeployment:
|
||||
total_tokens += len(text.split())
|
||||
|
||||
# Log to MLflow
|
||||
self._mlflow.log_request(
|
||||
latency_s=time.time() - _start,
|
||||
batch_size=len(texts),
|
||||
total_tokens=total_tokens,
|
||||
)
|
||||
if self._mlflow:
|
||||
self._mlflow.log_request(
|
||||
latency_s=time.time() - _start,
|
||||
batch_size=len(texts),
|
||||
total_tokens=total_tokens,
|
||||
)
|
||||
|
||||
# Return OpenAI-compatible response
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user