From bd8c8616d0de837068d52e75555c7ee78028b904 Mon Sep 17 00:00:00 2001
From: "Billy D." <billy.davies.10@icloud.com>
Date: Mon, 2 Feb 2026 07:12:05 -0500
Subject: [PATCH] updates.

---
 document_ingestion_mlflow_pipeline.py |  2 +-
 document_ingestion_pipeline.py        |  2 +-
 voice_pipeline.py                     | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/document_ingestion_mlflow_pipeline.py b/document_ingestion_mlflow_pipeline.py
index ebecdd2..670ded1 100644
--- a/document_ingestion_mlflow_pipeline.py
+++ b/document_ingestion_mlflow_pipeline.py
@@ -216,7 +216,7 @@ def chunk_text_with_tracking(
 def generate_embeddings_with_tracking(
     chunks: list,
     run_id: str,
-    embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local",
+    embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings",
     embeddings_model: str = "bge-small-en-v1.5",
     batch_size: int = 32,
     mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80",
diff --git a/document_ingestion_pipeline.py b/document_ingestion_pipeline.py
index cd5bd3d..1d42985 100644
--- a/document_ingestion_pipeline.py
+++ b/document_ingestion_pipeline.py
@@ -101,7 +101,7 @@ def chunk_text(
 )
 def generate_embeddings_batch(
     chunks: list,
-    embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local",
+    embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings",
     batch_size: int = 32
 ) -> list:
     """Generate embeddings for all chunks."""
diff --git a/voice_pipeline.py b/voice_pipeline.py
index 886f6c0..f953fe9 100644
--- a/voice_pipeline.py
+++ b/voice_pipeline.py
@@ -20,7 +20,7 @@ from kfp import compiler
 )
 def transcribe_audio(
     audio_b64: str,
-    whisper_url: str = "http://whisper-predictor.ai-ml.svc.cluster.local"
+    whisper_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/whisper"
 ) -> str:
     """Transcribe audio using Whisper STT service."""
     import base64
@@ -45,7 +45,7 @@ def transcribe_audio(
 )
 def generate_embeddings(
     text: str,
-    embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local"
+    embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings"
 ) -> list:
     """Generate embeddings for RAG retrieval."""
     import httpx
@@ -108,7 +108,7 @@ def retrieve_context(
 def rerank_documents(
     query: str,
     documents: list,
-    reranker_url: str = "http://reranker-predictor.ai-ml.svc.cluster.local",
+    reranker_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/reranker",
     top_k: int = 3
 ) -> list:
     """Rerank documents using BGE reranker."""
@@ -145,8 +145,8 @@ def rerank_documents(
 def generate_response(
     query: str,
     context: list,
-    vllm_url: str = "http://llm-draft.ai-ml.svc.cluster.local:8000",
-    model: str = "mistralai/Mistral-7B-Instruct-v0.3"
+    vllm_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/llm",
+    model: str = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
 ) -> str:
     """Generate response using vLLM."""
     import httpx
@@ -188,7 +188,7 @@ Keep responses concise and natural for speech synthesis."""
 )
 def synthesize_speech(
     text: str,
-    tts_url: str = "http://tts-predictor.ai-ml.svc.cluster.local"
+    tts_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts"
 ) -> str:
     """Convert text to speech using TTS service."""
     import base64