This commit is contained in:
2026-02-02 07:12:05 -05:00
parent c26e4e5ef0
commit bd8c8616d0
3 changed files with 8 additions and 8 deletions

View File

@@ -216,7 +216,7 @@ def chunk_text_with_tracking(
def generate_embeddings_with_tracking( def generate_embeddings_with_tracking(
chunks: list, chunks: list,
run_id: str, run_id: str,
embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local", embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings",
embeddings_model: str = "bge-small-en-v1.5", embeddings_model: str = "bge-small-en-v1.5",
batch_size: int = 32, batch_size: int = 32,
mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80", mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80",

View File

@@ -101,7 +101,7 @@ def chunk_text(
) )
def generate_embeddings_batch( def generate_embeddings_batch(
chunks: list, chunks: list,
embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local", embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings",
batch_size: int = 32 batch_size: int = 32
) -> list: ) -> list:
"""Generate embeddings for all chunks.""" """Generate embeddings for all chunks."""

View File

@@ -20,7 +20,7 @@ from kfp import compiler
) )
def transcribe_audio( def transcribe_audio(
audio_b64: str, audio_b64: str,
whisper_url: str = "http://whisper-predictor.ai-ml.svc.cluster.local" whisper_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/whisper"
) -> str: ) -> str:
"""Transcribe audio using Whisper STT service.""" """Transcribe audio using Whisper STT service."""
import base64 import base64
@@ -45,7 +45,7 @@ def transcribe_audio(
) )
def generate_embeddings( def generate_embeddings(
text: str, text: str,
embeddings_url: str = "http://embeddings-predictor.ai-ml.svc.cluster.local" embeddings_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/embeddings"
) -> list: ) -> list:
"""Generate embeddings for RAG retrieval.""" """Generate embeddings for RAG retrieval."""
import httpx import httpx
@@ -108,7 +108,7 @@ def retrieve_context(
def rerank_documents( def rerank_documents(
query: str, query: str,
documents: list, documents: list,
reranker_url: str = "http://reranker-predictor.ai-ml.svc.cluster.local", reranker_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/reranker",
top_k: int = 3 top_k: int = 3
) -> list: ) -> list:
"""Rerank documents using BGE reranker.""" """Rerank documents using BGE reranker."""
@@ -145,8 +145,8 @@ def rerank_documents(
def generate_response( def generate_response(
query: str, query: str,
context: list, context: list,
vllm_url: str = "http://llm-draft.ai-ml.svc.cluster.local:8000", vllm_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/llm",
model: str = "mistralai/Mistral-7B-Instruct-v0.3" model: str = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
) -> str: ) -> str:
"""Generate response using vLLM.""" """Generate response using vLLM."""
import httpx import httpx
@@ -188,7 +188,7 @@ Keep responses concise and natural for speech synthesis."""
) )
def synthesize_speech( def synthesize_speech(
text: str, text: str,
tts_url: str = "http://tts-predictor.ai-ml.svc.cluster.local" tts_url: str = "http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts"
) -> str: ) -> str:
"""Convert text to speech using TTS service.""" """Convert text to speech using TTS service."""
import base64 import base64