updates.

2026-02-02 07:12:05 -05:00
parent c26e4e5ef0
commit bd8c8616d0
3 changed files with 8 additions and 8 deletions
--- a/document_ingestion_mlflow_pipeline.py
+++ b/document_ingestion_mlflow_pipeline.py
@@ -216,7 +216,7 @@ def chunk_text_with_tracking(
 def generate_embeddings_with_tracking(
    chunks: list,
    run_id: str,
-    embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local",
+    embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings",
    embeddings_model: str = "bge-small-en-v1.5",
    batch_size: int = 32,
    mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80",
--- a/document_ingestion_pipeline.py
+++ b/document_ingestion_pipeline.py
@@ -101,7 +101,7 @@ def chunk_text(
 )
 def generate_embeddings_batch(
    chunks: list,
-    embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local",
+    embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings",
    batch_size: int = 32
 ) -> list:
    """Generate embeddings for all chunks."""
--- a/voice_pipeline.py
+++ b/voice_pipeline.py
@@ -20,7 +20,7 @@ from kfp import compiler
 )
 def transcribe_audio(
    audio_b64: str,
-    whisper_url: str = "http://whisper-predictor.ai-ml.svc.cluster.local"
+    whisper_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/whisper"
 ) -> str:
    """Transcribe audio using Whisper STT service."""
    import base64
@@ -45,7 +45,7 @@ def transcribe_audio(
 )
 def generate_embeddings(
    text: str,
-    embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local"
+    embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings"
 ) -> list:
    """Generate embeddings for RAG retrieval."""
    import httpx
@@ -108,7 +108,7 @@ def retrieve_context(
 def rerank_documents(
    query: str,
    documents: list,
-    reranker_url: str = "http://reranker-predictor.ai-ml.svc.cluster.local",
+    reranker_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/reranker",
    top_k: int = 3
 ) -> list:
    """Rerank documents using BGE reranker."""
@@ -145,8 +145,8 @@ def rerank_documents(
 def generate_response(
    query: str,
    context: list,
-    vllm_url: str = "http://llm-draft.ai-ml.svc.cluster.local:8000",
+    vllm_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/llm",
-    model: str = "mistralai/Mistral-7B-Instruct-v0.3"
+    model: str = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
 ) -> str:
    """Generate response using vLLM."""
    import httpx
@@ -188,7 +188,7 @@ Keep responses concise and natural for speech synthesis."""
 )
 def synthesize_speech(
    text: str,
-    tts_url: str = "http://tts-predictor.ai-ml.svc.cluster.local"
+    tts_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts"
 ) -> str:
    """Convert text to speech using TTS service."""
    import base64