fix: add ENFORCE_EAGER env var to skip torch.compile on ROCm
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 12s
All checks were successful
Build and Publish ray-serve-apps / build-and-publish (push) Successful in 12s
torch._dynamo.exc.Unsupported crashes EngineCore during graph tracing of LlamaDecoderLayer on gfx1151. ENFORCE_EAGER=true bypasses torch.compile and CUDA graph capture entirely.
This commit is contained in:
@@ -54,6 +54,7 @@ class LLMDeployment:
|
||||
self.enable_chunked_prefill = os.environ.get("ENABLE_CHUNKED_PREFILL", "true").lower() == "true"
|
||||
self.num_speculative_tokens = int(os.environ.get("NUM_SPECULATIVE_TOKENS", "0"))
|
||||
self.ngram_prompt_lookup_max = int(os.environ.get("NGRAM_PROMPT_LOOKUP_MAX", "0"))
|
||||
self.enforce_eager = os.environ.get("ENFORCE_EAGER", "false").lower() == "true"
|
||||
|
||||
engine_kwargs: dict[str, Any] = {
|
||||
"model": self.model_id,
|
||||
@@ -62,6 +63,7 @@ class LLMDeployment:
|
||||
"trust_remote_code": True,
|
||||
"enable_prefix_caching": self.enable_prefix_caching,
|
||||
"enable_chunked_prefill": self.enable_chunked_prefill,
|
||||
"enforce_eager": self.enforce_eager,
|
||||
}
|
||||
|
||||
# n-gram speculative decoding (no draft model needed)
|
||||
@@ -75,6 +77,7 @@ class LLMDeployment:
|
||||
|
||||
print(f"Prefix caching: {self.enable_prefix_caching}")
|
||||
print(f"Chunked prefill: {self.enable_chunked_prefill}")
|
||||
print(f"Enforce eager (no torch.compile): {self.enforce_eager}")
|
||||
|
||||
engine_args = AsyncEngineArgs(**engine_kwargs)
|
||||
self.engine = AsyncLLMEngine.from_engine_args(engine_args)
|
||||
@@ -102,6 +105,7 @@ class LLMDeployment:
|
||||
"enable_chunked_prefill": str(self.enable_chunked_prefill),
|
||||
"num_speculative_tokens": str(self.num_speculative_tokens),
|
||||
"ngram_prompt_lookup_max": str(self.ngram_prompt_lookup_max),
|
||||
"enforce_eager": str(self.enforce_eager),
|
||||
}
|
||||
)
|
||||
except ImportError:
|
||||
|
||||
Reference in New Issue
Block a user