fix: move mlflow import inside __init__ to avoid cloudpickle serialization failure
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 16s
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 16s
The strixhalo LLM worker uses py_executable which bypasses pip runtime_env. Module-level try/except still fails because cloudpickle on the head node resolves the real InferenceLogger class and serializes a module reference. Moving the import inside __init__ means it runs at actor construction time on the worker, where ImportError is caught gracefully.
This commit is contained in:
@@ -10,11 +10,6 @@ from typing import Any
|
|||||||
|
|
||||||
from ray import serve
|
from ray import serve
|
||||||
|
|
||||||
try:
|
|
||||||
from ray_serve.mlflow_logger import InferenceLogger
|
|
||||||
except ImportError:
|
|
||||||
InferenceLogger = None
|
|
||||||
|
|
||||||
|
|
||||||
@serve.deployment(name="LLMDeployment", num_replicas=1)
|
@serve.deployment(name="LLMDeployment", num_replicas=1)
|
||||||
class LLMDeployment:
|
class LLMDeployment:
|
||||||
@@ -42,8 +37,12 @@ class LLMDeployment:
|
|||||||
self.SamplingParams = SamplingParams
|
self.SamplingParams = SamplingParams
|
||||||
print(f"Model {self.model_id} async engine created")
|
print(f"Model {self.model_id} async engine created")
|
||||||
|
|
||||||
# MLflow metrics
|
# MLflow metrics — import locally to avoid cloudpickle
|
||||||
if InferenceLogger is not None:
|
# serializing a module reference that fails on the worker
|
||||||
|
# (strixhalo uses py_executable which bypasses pip runtime_env)
|
||||||
|
try:
|
||||||
|
from ray_serve.mlflow_logger import InferenceLogger
|
||||||
|
|
||||||
self._mlflow = InferenceLogger(
|
self._mlflow = InferenceLogger(
|
||||||
experiment_name="ray-serve-llm",
|
experiment_name="ray-serve-llm",
|
||||||
run_name=f"llm-{self.model_id.split('/')[-1]}",
|
run_name=f"llm-{self.model_id.split('/')[-1]}",
|
||||||
@@ -57,7 +56,7 @@ class LLMDeployment:
|
|||||||
"gpu_memory_utilization": str(self.gpu_memory_utilization),
|
"gpu_memory_utilization": str(self.gpu_memory_utilization),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
except ImportError:
|
||||||
self._mlflow = None
|
self._mlflow = None
|
||||||
|
|
||||||
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
|
async def __call__(self, request: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
|||||||
Reference in New Issue
Block a user