fix: move mlflow import inside __init__ to avoid cloudpickle serialization failure
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 16s
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 16s
The strixhalo LLM worker uses py_executable which bypasses pip runtime_env. Module-level try/except still fails because cloudpickle on the head node resolves the real InferenceLogger class and serializes a module reference. Moving the import inside __init__ means it runs at actor construction time on the worker, where ImportError is caught gracefully.
This commit is contained in:
@@ -10,11 +10,6 @@ from typing import Any
|
||||
|
||||
from ray import serve
|
||||
|
||||
try:
|
||||
from ray_serve.mlflow_logger import InferenceLogger
|
||||
except ImportError:
|
||||
InferenceLogger = None
|
||||
|
||||
|
||||
@serve.deployment(name="LLMDeployment", num_replicas=1)
|
||||
class LLMDeployment:
|
||||
@@ -42,8 +37,12 @@ class LLMDeployment:
|
||||
self.SamplingParams = SamplingParams
|
||||
print(f"Model {self.model_id} async engine created")
|
||||
|
||||
# MLflow metrics
|
||||
if InferenceLogger is not None:
|
||||
# MLflow metrics — import locally to avoid cloudpickle
|
||||
# serializing a module reference that fails on the worker
|
||||
# (strixhalo uses py_executable which bypasses pip runtime_env)
|
||||
try:
|
||||
from ray_serve.mlflow_logger import InferenceLogger
|
||||
|
||||
self._mlflow = InferenceLogger(
|
||||
experiment_name="ray-serve-llm",
|
||||
run_name=f"llm-{self.model_id.split('/')[-1]}",
|
||||
@@ -57,7 +56,7 @@ class LLMDeployment:
|
||||
"gpu_memory_utilization": str(self.gpu_memory_utilization),
|
||||
}
|
||||
)
|
||||
else:
|
||||
except ImportError:
|
||||
self._mlflow = None
|
||||
|
||||
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user