fix: make mlflow_logger import optional with no-op fallback
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 11s

The strixhalo LLM worker uses py_executable pointing to the Docker
image venv which doesn't have the updated ray-serve-apps package.
Wrap all InferenceLogger imports in try/except and guard usage with
None checks so apps degrade gracefully without MLflow logging.
This commit is contained in:
2026-02-12 07:01:17 -05:00
parent 7ec2107e0c
commit 15e4b8afa3
5 changed files with 124 additions and 88 deletions

View File

@@ -11,7 +11,10 @@ from typing import Any
from ray import serve
from ray_serve.mlflow_logger import InferenceLogger
try:
from ray_serve.mlflow_logger import InferenceLogger
except ImportError:
InferenceLogger = None
@serve.deployment(name="WhisperDeployment", num_replicas=1)
@@ -42,15 +45,18 @@ class WhisperDeployment:
print("Whisper model loaded successfully")
# MLflow metrics
self._mlflow = InferenceLogger(
experiment_name="ray-serve-whisper",
run_name=f"whisper-{self.model_size}",
tags={"model.name": f"whisper-{self.model_size}", "model.framework": "faster-whisper", "device": self.device},
flush_every=5,
)
self._mlflow.initialize(
params={"model_size": self.model_size, "device": self.device, "compute_type": self.compute_type}
)
if InferenceLogger is not None:
self._mlflow = InferenceLogger(
experiment_name="ray-serve-whisper",
run_name=f"whisper-{self.model_size}",
tags={"model.name": f"whisper-{self.model_size}", "model.framework": "faster-whisper", "device": self.device},
flush_every=5,
)
self._mlflow.initialize(
params={"model_size": self.model_size, "device": self.device, "compute_type": self.compute_type}
)
else:
self._mlflow = None
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
"""
@@ -146,12 +152,13 @@ class WhisperDeployment:
}
# Log to MLflow
self._mlflow.log_request(
latency_s=time.time() - _start,
audio_duration_s=info.duration,
segments=len(segment_list),
realtime_factor=(time.time() - _start) / info.duration if info.duration > 0 else 0,
)
if self._mlflow:
self._mlflow.log_request(
latency_s=time.time() - _start,
audio_duration_s=info.duration,
segments=len(segment_list),
realtime_factor=(time.time() - _start) / info.duration if info.duration > 0 else 0,
)
# Default JSON format (OpenAI-compatible)
return {