diff --git a/embeddings.py b/embeddings.py
index b5c0110..6adc0ec 100644
--- a/embeddings.py
+++ b/embeddings.py
@@ -27,7 +27,8 @@ logger = logging.getLogger("embeddings-demo")
 # Configuration
 EMBEDDINGS_URL = os.environ.get(
     "EMBEDDINGS_URL", 
-    "http://embeddings-predictor.ai-ml.svc.cluster.local"
+    # Default: Ray Serve Embeddings endpoint
+    "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings"
 )
 MLFLOW_TRACKING_URI = os.environ.get(
     "MLFLOW_TRACKING_URI",
diff --git a/embeddings.yaml b/embeddings.yaml
index a9882a8..6b057ba 100644
--- a/embeddings.yaml
+++ b/embeddings.yaml
@@ -29,7 +29,8 @@ spec:
               protocol: TCP
           env:
             - name: EMBEDDINGS_URL
-              value: "http://embeddings-predictor.ai-ml.svc.cluster.local"
+              # Ray Serve endpoint - routes to /embeddings prefix
+              value: "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings"
             - name: MLFLOW_TRACKING_URI
               value: "http://mlflow.mlflow.svc.cluster.local:80"
           resources:
diff --git a/stt.py b/stt.py
index e9f662d..95da4ef 100644
--- a/stt.py
+++ b/stt.py
@@ -29,7 +29,8 @@ logger = logging.getLogger("stt-demo")
 # Configuration
 STT_URL = os.environ.get(
     "STT_URL",
-    "http://whisper-predictor.ai-ml.svc.cluster.local"
+    # Default: Ray Serve whisper endpoint
+    "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/whisper"
 )
 MLFLOW_TRACKING_URI = os.environ.get(
     "MLFLOW_TRACKING_URI",
diff --git a/stt.yaml b/stt.yaml
index d1dae38..e18a748 100644
--- a/stt.yaml
+++ b/stt.yaml
@@ -29,7 +29,8 @@ spec:
               protocol: TCP
           env:
             - name: WHISPER_URL
-              value: "http://whisper-predictor.ai-ml.svc.cluster.local"
+              # Ray Serve endpoint - routes to /whisper prefix
+              value: "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/whisper"
             - name: MLFLOW_TRACKING_URI
               value: "http://mlflow.mlflow.svc.cluster.local:80"
           resources:
diff --git a/tts.py b/tts.py
index 6481268..7041d7f 100644
--- a/tts.py
+++ b/tts.py
@@ -29,7 +29,8 @@ logger = logging.getLogger("tts-demo")
 # Configuration
 TTS_URL = os.environ.get(
     "TTS_URL",
-    "http://tts-predictor.ai-ml.svc.cluster.local"
+    # Default: Ray Serve TTS endpoint
+    "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts"
 )
 MLFLOW_TRACKING_URI = os.environ.get(
     "MLFLOW_TRACKING_URI",
diff --git a/tts.yaml b/tts.yaml
index 2699480..d6e5734 100644
--- a/tts.yaml
+++ b/tts.yaml
@@ -29,7 +29,8 @@ spec:
               protocol: TCP
           env:
             - name: TTS_URL
-              value: "http://tts-predictor.ai-ml.svc.cluster.local"
+              # Ray Serve endpoint - routes to /tts prefix
+              value: "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts"
             - name: MLFLOW_TRACKING_URI
               value: "http://mlflow.mlflow.svc.cluster.local:80"
           resources: