Files
kubeflow/tts_pipeline.yaml
Billy D. bc4b230dd9 feat: add vLLM tuning pipeline + recompile voice pipelines with MLflow
New:
- vllm_tuning_pipeline.py: A/B benchmark different vLLM configs,
  logs latency/TPS/TTFT to MLflow (vllm-tuning experiment)
- vllm_tuning_pipeline.yaml: compiled KFP YAML

Updated:
- voice_pipeline.py: per-step NamedTuple outputs with latency tracking,
  new log_pipeline_metrics MLflow component
- voice_pipeline.yaml, tts_pipeline.yaml, rag_pipeline.yaml: recompiled
2026-02-13 08:24:11 -05:00

88 lines
3.3 KiB
YAML

# PIPELINE DEFINITION
# Name: text-to-speech-pipeline
# Description: Simple text to speech pipeline
# Inputs:
# text: str
components:
comp-synthesize-speech:
executorLabel: exec-synthesize-speech
inputDefinitions:
parameters:
text:
parameterType: STRING
tts_url:
defaultValue: http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts
isOptional: true
parameterType: STRING
outputDefinitions:
parameters:
audio_b64:
parameterType: STRING
latency_s:
parameterType: NUMBER_DOUBLE
deploymentSpec:
executors:
exec-synthesize-speech:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- synthesize_speech
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'httpx' && \"\
$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef synthesize_speech(\n text: str,\n tts_url: str = \"http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts\"\
\n) -> NamedTuple(\"TTSResult\", [(\"audio_b64\", str), (\"latency_s\",\
\ float)]):\n \"\"\"Convert text to speech using TTS service.\"\"\"\n\
\ import base64\n import time\n import httpx\n from collections\
\ import namedtuple\n\n start = time.perf_counter()\n with httpx.Client(timeout=120.0)\
\ as client:\n response = client.post(\n f\"{tts_url}/v1/audio/speech\"\
,\n json={\n \"input\": text,\n \
\ \"voice\": \"en_US-lessac-high\",\n \"response_format\"\
: \"wav\"\n }\n )\n audio_b64 = base64.b64encode(response.content).decode(\"\
utf-8\")\n latency = time.perf_counter() - start\n\n TTSResult = namedtuple(\"\
TTSResult\", [\"audio_b64\", \"latency_s\"])\n return TTSResult(audio_b64,\
\ latency)\n\n"
image: python:3.13-slim
pipelineInfo:
description: Simple text to speech pipeline
name: text-to-speech-pipeline
root:
dag:
tasks:
synthesize-speech:
cachingOptions:
enableCache: true
componentRef:
name: comp-synthesize-speech
inputs:
parameters:
text:
componentInputParameter: text
taskInfo:
name: synthesize-speech
inputDefinitions:
parameters:
text:
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.12.1