diff --git a/ray_serve/serve_llm.py b/ray_serve/serve_llm.py
index 265c87e..1992083 100644
--- a/ray_serve/serve_llm.py
+++ b/ray_serve/serve_llm.py
@@ -16,10 +16,10 @@ class LLMDeployment:
     def __init__(self):
         # Workaround: vLLM's rocm.py:verify_quantization unconditionally
         # sets VLLM_USE_TRITON_AWQ=1 (a bug — the os.environ line is
-        # outside the if-block).  Triton AWQ kernels can't compile for
-        # gfx1151, so we must keep it at 0.  Monkey-patch before engine
-        # creation to prevent the override.
-        os.environ["VLLM_USE_TRITON_AWQ"] = "0"
+        # outside the if-block).  Monkey-patch before engine creation to
+        # preserve whatever the operator set in runtime_env env_vars.
+        # On gfx1151 Triton AWQ works; the C++ awq_dequantize does NOT.
+        os.environ.setdefault("VLLM_USE_TRITON_AWQ", "1")
         try:
             from vllm.platforms.rocm import RocmPlatform
 
@@ -34,7 +34,8 @@ class LLMDeployment:
                     os.environ["VLLM_USE_TRITON_AWQ"] = saved
 
             RocmPlatform.verify_quantization = _patched_verify
-            print("Patched RocmPlatform.verify_quantization to preserve VLLM_USE_TRITON_AWQ=0")
+            triton_val = os.environ.get("VLLM_USE_TRITON_AWQ", "?")
+            print(f"Patched RocmPlatform.verify_quantization to preserve VLLM_USE_TRITON_AWQ={triton_val}")
         except Exception as exc:
             print(f"Could not patch RocmPlatform: {exc}")