feat: add vLLM tuning pipeline + recompile voice pipelines with MLflow
New: - vllm_tuning_pipeline.py: A/B benchmark different vLLM configs, logs latency/TPS/TTFT to MLflow (vllm-tuning experiment) - vllm_tuning_pipeline.yaml: compiled KFP YAML Updated: - voice_pipeline.py: per-step NamedTuple outputs with latency tracking, new log_pipeline_metrics MLflow component - voice_pipeline.yaml, tts_pipeline.yaml, rag_pipeline.yaml: recompiled
This commit is contained in:
87
tts_pipeline.yaml
Normal file
87
tts_pipeline.yaml
Normal file
@@ -0,0 +1,87 @@
|
||||
# PIPELINE DEFINITION
|
||||
# Name: text-to-speech-pipeline
|
||||
# Description: Simple text to speech pipeline
|
||||
# Inputs:
|
||||
# text: str
|
||||
components:
|
||||
comp-synthesize-speech:
|
||||
executorLabel: exec-synthesize-speech
|
||||
inputDefinitions:
|
||||
parameters:
|
||||
text:
|
||||
parameterType: STRING
|
||||
tts_url:
|
||||
defaultValue: http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts
|
||||
isOptional: true
|
||||
parameterType: STRING
|
||||
outputDefinitions:
|
||||
parameters:
|
||||
audio_b64:
|
||||
parameterType: STRING
|
||||
latency_s:
|
||||
parameterType: NUMBER_DOUBLE
|
||||
deploymentSpec:
|
||||
executors:
|
||||
exec-synthesize-speech:
|
||||
container:
|
||||
args:
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- synthesize_speech
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
||||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
||||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
|
||||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
|
||||
\ python3 -m pip install --quiet --no-warn-script-location 'httpx' && \"\
|
||||
$0\" \"$@\"\n"
|
||||
- sh
|
||||
- -ec
|
||||
- 'program_path=$(mktemp -d)
|
||||
|
||||
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
|
||||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||||
|
||||
'
|
||||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
||||
\ *\n\ndef synthesize_speech(\n text: str,\n tts_url: str = \"http://ai-inference-serve-svc.ai-ml.svc.cluster.local:8000/tts\"\
|
||||
\n) -> NamedTuple(\"TTSResult\", [(\"audio_b64\", str), (\"latency_s\",\
|
||||
\ float)]):\n \"\"\"Convert text to speech using TTS service.\"\"\"\n\
|
||||
\ import base64\n import time\n import httpx\n from collections\
|
||||
\ import namedtuple\n\n start = time.perf_counter()\n with httpx.Client(timeout=120.0)\
|
||||
\ as client:\n response = client.post(\n f\"{tts_url}/v1/audio/speech\"\
|
||||
,\n json={\n \"input\": text,\n \
|
||||
\ \"voice\": \"en_US-lessac-high\",\n \"response_format\"\
|
||||
: \"wav\"\n }\n )\n audio_b64 = base64.b64encode(response.content).decode(\"\
|
||||
utf-8\")\n latency = time.perf_counter() - start\n\n TTSResult = namedtuple(\"\
|
||||
TTSResult\", [\"audio_b64\", \"latency_s\"])\n return TTSResult(audio_b64,\
|
||||
\ latency)\n\n"
|
||||
image: python:3.13-slim
|
||||
pipelineInfo:
|
||||
description: Simple text to speech pipeline
|
||||
name: text-to-speech-pipeline
|
||||
root:
|
||||
dag:
|
||||
tasks:
|
||||
synthesize-speech:
|
||||
cachingOptions:
|
||||
enableCache: true
|
||||
componentRef:
|
||||
name: comp-synthesize-speech
|
||||
inputs:
|
||||
parameters:
|
||||
text:
|
||||
componentInputParameter: text
|
||||
taskInfo:
|
||||
name: synthesize-speech
|
||||
inputDefinitions:
|
||||
parameters:
|
||||
text:
|
||||
parameterType: STRING
|
||||
schemaVersion: 2.1.0
|
||||
sdkVersion: kfp-2.12.1
|
||||
Reference in New Issue
Block a user