kubeflow/dvd_transcription_pipeline.yaml

# PIPELINE DEFINITION
# Name: dvd-video-transcription
# Description: Extract audio from a DVD or video file, transcribe it via Whisper STT, and produce a timestamped transcript (SRT/VTT/TXT).
# Inputs:
#    chunk_duration_s: int [Default: 300.0]
#    language: str [Default: 'en']
#    mlflow_tracking_uri: str [Default: 'http://mlflow.mlflow.svc.cluster.local:80']
#    output_format: str [Default: 'srt']
#    source_path: str [Default: '/data/dvd/movie.mkv']
#    whisper_url: str [Default: 'http://ai-inference-serve-svc.kuberay.svc.cluster.local:8000/whisper']
components:
  comp-chunk-audio:
    executorLabel: exec-chunk-audio
    inputDefinitions:
      parameters:
        chunk_duration_s:
          defaultValue: 300.0
          description: Seconds per chunk (default 5 minutes).
          isOptional: true
          parameterType: NUMBER_INTEGER
        wav_path:
          description: '        Path to the mono 16 kHz WAV.'
          parameterType: STRING
    outputDefinitions:
      parameters:
        chunk_paths:
          parameterType: LIST
        num_chunks:
          parameterType: NUMBER_INTEGER
  comp-extract-audio:
    executorLabel: exec-extract-audio
    inputDefinitions:
      parameters:
        mono:
          defaultValue: true
          description: '       Down-mix to mono (Whisper expects single-channel).'
          isOptional: true
          parameterType: BOOLEAN
        sample_rate:
          defaultValue: 16000.0
          description: Target sample rate (16 kHz is optimal for Whisper).
          isOptional: true
          parameterType: NUMBER_INTEGER
        source_path:
          description: 'Path to DVD ISO, VOB, MKV, MP4, or any ffmpeg-

            supported file.  Can also be a /dev/sr0 device.'
          parameterType: STRING
    outputDefinitions:
      parameters:
        duration_s:
          parameterType: NUMBER_DOUBLE
        wav_path:
          parameterType: STRING
  comp-format-transcript:
    executorLabel: exec-format-transcript
    inputDefinitions:
      parameters:
        full_text:
          description: '      Full concatenated text.'
          parameterType: STRING
        output_format:
          defaultValue: srt
          description: '  ''srt'', ''vtt'', or ''txt''.'
          isOptional: true
          parameterType: STRING
        segments:
          description: '       List of segment dicts with start/end/text.'
          parameterType: LIST
        total_duration_s:
          description: Total audio duration in seconds.
          parameterType: NUMBER_DOUBLE
    outputDefinitions:
      parameters:
        output_path:
          parameterType: STRING
        transcript:
          parameterType: STRING
  comp-log-transcription-metrics:
    executorLabel: exec-log-transcription-metrics
    inputDefinitions:
      parameters:
        experiment_name:
          defaultValue: dvd-transcription
          isOptional: true
          parameterType: STRING
        full_text:
          parameterType: STRING
        mlflow_tracking_uri:
          defaultValue: http://mlflow.mlflow.svc.cluster.local:80
          isOptional: true
          parameterType: STRING
        source_path:
          parameterType: STRING
        total_duration_s:
          parameterType: NUMBER_DOUBLE
  comp-transcribe-chunks:
    executorLabel: exec-transcribe-chunks
    inputDefinitions:
      parameters:
        chunk_paths:
          description: '    List of WAV file paths to transcribe.'
          parameterType: LIST
        language:
          defaultValue: en
          description: '       Language code for Whisper (None for auto-detect).'
          isOptional: true
          parameterType: STRING
        response_format:
          defaultValue: verbose_json
          description: '''json'', ''verbose_json'', or ''text''.'
          isOptional: true
          parameterType: STRING
        whisper_url:
          defaultValue: http://ai-inference-serve-svc.kuberay.svc.cluster.local:8000/whisper
          description: '    In-cluster Whisper endpoint URL.'
          isOptional: true
          parameterType: STRING
    outputDefinitions:
      parameters:
        full_text:
          parameterType: STRING
        segments:
          parameterType: LIST
        total_duration_s:
          parameterType: NUMBER_DOUBLE
deploymentSpec:
  executors:
    exec-chunk-audio:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - chunk_audio
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
          $0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)


          printf "%s" "$0" > "$program_path/ephemeral_component.py"

          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"

          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef chunk_audio(\n    wav_path: str,\n    chunk_duration_s: int =\
          \ 300,\n) -> NamedTuple(\"ChunkOutput\", [(\"chunk_paths\", list), (\"num_chunks\"\
          , int)]):\n    \"\"\"Split a WAV file into fixed-duration chunks.\n\n  \
          \  Args:\n        wav_path:         Path to the mono 16 kHz WAV.\n     \
          \   chunk_duration_s: Seconds per chunk (default 5 minutes).\n    \"\"\"\
          \n    import os\n    import subprocess\n    import glob\n\n    subprocess.run(\n\
          \        [\"apt-get\", \"update\", \"-qq\"],\n        check=True, capture_output=True,\n\
          \    )\n    subprocess.run(\n        [\"apt-get\", \"install\", \"-y\",\
          \ \"-qq\", \"ffmpeg\"],\n        check=True, capture_output=True,\n    )\n\
          \n    out_dir = \"/tmp/dvd_chunks\"\n    os.makedirs(out_dir, exist_ok=True)\n\
          \    pattern = os.path.join(out_dir, \"chunk_%04d.wav\")\n\n    cmd = [\n\
          \        \"ffmpeg\", \"-y\",\n        \"-i\", wav_path,\n        \"-f\"\
          , \"segment\",\n        \"-segment_time\", str(chunk_duration_s),\n    \
          \    \"-c\", \"copy\",\n        pattern,\n    ]\n    print(f\"Chunking:\
          \ {' '.join(cmd)}\")\n    result = subprocess.run(cmd, capture_output=True,\
          \ text=True, timeout=3600)\n    if result.returncode != 0:\n        raise\
          \ RuntimeError(f\"ffmpeg chunk failed:\\n{result.stderr}\")\n\n    chunks\
          \ = sorted(glob.glob(os.path.join(out_dir, \"chunk_*.wav\")))\n    print(f\"\
          Created {len(chunks)} chunks of ~{chunk_duration_s}s each\")\n\n    from\
          \ collections import namedtuple\n    ChunkOutput = namedtuple(\"ChunkOutput\"\
          , [\"chunk_paths\", \"num_chunks\"])\n    return ChunkOutput(chunk_paths=chunks,\
          \ num_chunks=len(chunks))\n\n"
        image: python:3.13-slim
    exec-extract-audio:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - extract_audio
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"'  &&\
          \  python3 -m pip install --quiet --no-warn-script-location 'requests' &&\
          \ \"$0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)


          printf "%s" "$0" > "$program_path/ephemeral_component.py"

          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"

          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef extract_audio(\n    source_path: str,\n    sample_rate: int =\
          \ 16000,\n    mono: bool = True,\n) -> NamedTuple(\"AudioOutput\", [(\"\
          wav_path\", str), (\"duration_s\", float)]):\n    \"\"\"Extract audio from\
          \ a video/DVD file using ffmpeg.\n\n    Args:\n        source_path: Path\
          \ to DVD ISO, VOB, MKV, MP4, or any ffmpeg-\n                     supported\
          \ file.  Can also be a /dev/sr0 device.\n        sample_rate: Target sample\
          \ rate (16 kHz is optimal for Whisper).\n        mono:        Down-mix to\
          \ mono (Whisper expects single-channel).\n    \"\"\"\n    import os\n  \
          \  import subprocess\n    import json\n\n    # Install ffmpeg inside the\
          \ container\n    subprocess.run(\n        [\"apt-get\", \"update\", \"-qq\"\
          ],\n        check=True, capture_output=True,\n    )\n    subprocess.run(\n\
          \        [\"apt-get\", \"install\", \"-y\", \"-qq\", \"ffmpeg\"],\n    \
          \    check=True, capture_output=True,\n    )\n\n    out_dir = \"/tmp/dvd_audio\"\
          \n    os.makedirs(out_dir, exist_ok=True)\n    wav_path = os.path.join(out_dir,\
          \ \"full_audio.wav\")\n\n    # Build ffmpeg command\n    cmd = [\"ffmpeg\"\
          , \"-y\"]\n\n    # Handle DVD ISOs \u2013 mount via concat demuxer or direct\
          \ input\n    if source_path.lower().endswith(\".iso\"):\n        # For ISOs,\
          \ ffmpeg can read via dvdread protocol\n        cmd += [\"-i\", f\"dvd://{source_path}\"\
          ]\n    else:\n        cmd += [\"-i\", source_path]\n\n    # Audio extraction\
          \ options\n    cmd += [\n        \"-vn\",                        # drop\
          \ video\n        \"-acodec\", \"pcm_s16le\",       # 16-bit WAV\n      \
          \  \"-ar\", str(sample_rate),      # resample\n    ]\n    if mono:\n   \
          \     cmd += [\"-ac\", \"1\"]           # down-mix to mono\n\n    cmd +=\
          \ [wav_path]\n\n    print(f\"Running: {' '.join(cmd)}\")\n    result = subprocess.run(cmd,\
          \ capture_output=True, text=True, timeout=7200)\n    if result.returncode\
          \ != 0:\n        raise RuntimeError(f\"ffmpeg failed:\\n{result.stderr}\"\
          )\n\n    # Get duration via ffprobe\n    probe = subprocess.run(\n     \
          \   [\n            \"ffprobe\", \"-v\", \"quiet\",\n            \"-show_entries\"\
          , \"format=duration\",\n            \"-of\", \"json\", wav_path,\n     \
          \   ],\n        capture_output=True, text=True,\n    )\n    duration_s =\
          \ float(json.loads(probe.stdout)[\"format\"][\"duration\"])\n    file_size_mb\
          \ = os.path.getsize(wav_path) / (1024 * 1024)\n    print(f\"Extracted: {wav_path}\
          \ ({file_size_mb:.1f} MB, {duration_s:.1f}s)\")\n\n    from collections\
          \ import namedtuple\n    AudioOutput = namedtuple(\"AudioOutput\", [\"wav_path\"\
          , \"duration_s\"])\n    return AudioOutput(wav_path=wav_path, duration_s=duration_s)\n\
          \n"
        image: python:3.13-slim
    exec-format-transcript:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - format_transcript
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
          $0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)


          printf "%s" "$0" > "$program_path/ephemeral_component.py"

          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"

          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef format_transcript(\n    segments: list,\n    full_text: str,\n\
          \    total_duration_s: float,\n    output_format: str = \"srt\",\n) -> NamedTuple(\"\
          FormattedOutput\", [(\"transcript\", str), (\"output_path\", str)]):\n \
          \   \"\"\"Format the transcript as SRT, VTT, or plain text.\n\n    Args:\n\
          \        segments:        List of segment dicts with start/end/text.\n \
          \       full_text:       Full concatenated text.\n        total_duration_s:\
          \ Total audio duration in seconds.\n        output_format:   'srt', 'vtt',\
          \ or 'txt'.\n    \"\"\"\n    import os\n\n    def _fmt_ts_srt(seconds: float)\
          \ -> str:\n        h = int(seconds // 3600)\n        m = int((seconds %\
          \ 3600) // 60)\n        s = int(seconds % 60)\n        ms = int((seconds\
          \ % 1) * 1000)\n        return f\"{h:02d}:{m:02d}:{s:02d},{ms:03d}\"\n\n\
          \    def _fmt_ts_vtt(seconds: float) -> str:\n        h = int(seconds //\
          \ 3600)\n        m = int((seconds % 3600) // 60)\n        s = int(seconds\
          \ % 60)\n        ms = int((seconds % 1) * 1000)\n        return f\"{h:02d}:{m:02d}:{s:02d}.{ms:03d}\"\
          \n\n    out_dir = \"/tmp/dvd_transcript\"\n    os.makedirs(out_dir, exist_ok=True)\n\
          \n    if output_format == \"srt\":\n        lines = []\n        for i, seg\
          \ in enumerate(segments, 1):\n            start_ts = _fmt_ts_srt(seg[\"\
          start\"])\n            end_ts = _fmt_ts_srt(seg[\"end\"])\n            text\
          \ = seg.get(\"text\", \"\").strip()\n            lines.append(f\"{i}\\n{start_ts}\
          \ --> {end_ts}\\n{text}\\n\")\n        transcript = \"\\n\".join(lines)\n\
          \        ext = \"srt\"\n\n    elif output_format == \"vtt\":\n        lines\
          \ = [\"WEBVTT\\n\"]\n        for seg in segments:\n            start_ts\
          \ = _fmt_ts_vtt(seg[\"start\"])\n            end_ts = _fmt_ts_vtt(seg[\"\
          end\"])\n            text = seg.get(\"text\", \"\").strip()\n          \
          \  lines.append(f\"{start_ts} --> {end_ts}\\n{text}\\n\")\n        transcript\
          \ = \"\\n\".join(lines)\n        ext = \"vtt\"\n\n    else:  # txt\n   \
          \     transcript = full_text\n        ext = \"txt\"\n\n    out_path = os.path.join(out_dir,\
          \ f\"transcript.{ext}\")\n    with open(out_path, \"w\", encoding=\"utf-8\"\
          ) as f:\n        f.write(transcript)\n\n    h = int(total_duration_s //\
          \ 3600)\n    m = int((total_duration_s % 3600) // 60)\n    print(f\"Transcript\
          \ saved: {out_path}\")\n    print(f\"Audio duration: {h}h {m}m, Segments:\
          \ {len(segments)}\")\n    print(f\"Format: {output_format.upper()}, Size:\
          \ {len(transcript)} chars\")\n\n    from collections import namedtuple\n\
          \    FormattedOutput = namedtuple(\"FormattedOutput\", [\"transcript\",\
          \ \"output_path\"])\n    return FormattedOutput(transcript=transcript, output_path=out_path)\n\
          \n"
        image: python:3.13-slim
    exec-log-transcription-metrics:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - log_transcription_metrics
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"'  &&\
          \  python3 -m pip install --quiet --no-warn-script-location 'mlflow==2.22.0'\
          \ && \"$0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)


          printf "%s" "$0" > "$program_path/ephemeral_component.py"

          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"

          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef log_transcription_metrics(\n    total_duration_s: float,\n  \
          \  full_text: str,\n    source_path: str,\n    mlflow_tracking_uri: str\
          \ = \"http://mlflow.mlflow.svc.cluster.local:80\",\n    experiment_name:\
          \ str = \"dvd-transcription\",\n):\n    \"\"\"Log transcription run metrics\
          \ to MLflow.\"\"\"\n    import mlflow\n\n    mlflow.set_tracking_uri(mlflow_tracking_uri)\n\
          \    mlflow.set_experiment(experiment_name)\n\n    with mlflow.start_run(run_name=f\"\
          transcribe-{source_path.split('/')[-1]}\"):\n        mlflow.log_params({\n\
          \            \"source_path\": source_path,\n            \"model\": \"whisper-large-v3\"\
          ,\n        })\n        mlflow.log_metrics({\n            \"audio_duration_s\"\
          : total_duration_s,\n            \"transcript_chars\": float(len(full_text)),\n\
          \        })\n\n"
        image: python:3.13-slim
    exec-transcribe-chunks:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - transcribe_chunks
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.12.1'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"'  &&\
          \  python3 -m pip install --quiet --no-warn-script-location 'requests' &&\
          \ \"$0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)


          printf "%s" "$0" > "$program_path/ephemeral_component.py"

          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"

          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef transcribe_chunks(\n    chunk_paths: list,\n    whisper_url:\
          \ str = \"http://ai-inference-serve-svc.kuberay.svc.cluster.local:8000/whisper\"\
          ,\n    language: str = \"en\",\n    response_format: str = \"verbose_json\"\
          ,\n) -> NamedTuple(\"TranscriptOutput\", [(\"segments\", list), (\"full_text\"\
          , str), (\"total_duration_s\", float)]):\n    \"\"\"Send each audio chunk\
          \ to the Whisper STT endpoint.\n\n    Args:\n        chunk_paths:     List\
          \ of WAV file paths to transcribe.\n        whisper_url:     In-cluster\
          \ Whisper endpoint URL.\n        language:        Language code for Whisper\
          \ (None for auto-detect).\n        response_format: 'json', 'verbose_json',\
          \ or 'text'.\n    \"\"\"\n    import base64\n    import json\n    import\
          \ time\n    import requests\n\n    all_segments = []\n    full_text_parts\
          \ = []\n    total_audio_duration = 0.0\n    time_offset = 0.0  # cumulative\
          \ offset for absolute timestamps\n\n    for i, path in enumerate(chunk_paths):\n\
          \        print(f\"Transcribing chunk {i + 1}/{len(chunk_paths)}: {path}\"\
          )\n\n        # Read and base64-encode the chunk\n        with open(path,\
          \ \"rb\") as f:\n            audio_b64 = base64.b64encode(f.read()).decode(\"\
          utf-8\")\n\n        payload = {\n            \"audio\": audio_b64,\n   \
          \         \"audio_format\": \"wav\",\n            \"language\": language,\n\
          \            \"task\": \"transcribe\",\n            \"response_format\"\
          : response_format,\n            \"word_timestamps\": False,\n        }\n\
          \n        start = time.time()\n        resp = requests.post(whisper_url,\
          \ json=payload, timeout=600)\n        elapsed = time.time() - start\n  \
          \      resp.raise_for_status()\n        data = resp.json()\n\n        chunk_duration\
          \ = data.get(\"duration\", 0.0)\n        total_audio_duration += chunk_duration\n\
          \n        if \"segments\" in data:\n            for seg in data[\"segments\"\
          ]:\n                # Offset timestamps to be absolute within the full audio\n\
          \                seg[\"start\"] += time_offset\n                seg[\"end\"\
          ] += time_offset\n                all_segments.append(seg)\n\n        chunk_text\
          \ = data.get(\"text\", \"\")\n        full_text_parts.append(chunk_text)\n\
          \        time_offset += chunk_duration\n        rtf = elapsed / chunk_duration\
          \ if chunk_duration > 0 else 0\n        print(f\"  \u2192 {len(chunk_text)}\
          \ chars, {chunk_duration:.1f}s audio in {elapsed:.1f}s (RTF={rtf:.2f})\"\
          )\n\n    full_text = \"\\n\".join(full_text_parts)\n    print(f\"\\nTotal:\
          \ {len(all_segments)} segments, {total_audio_duration:.1f}s audio\")\n \
          \   print(f\"Transcript length: {len(full_text)} characters\")\n\n    from\
          \ collections import namedtuple\n    TranscriptOutput = namedtuple(\"TranscriptOutput\"\
          , [\"segments\", \"full_text\", \"total_duration_s\"])\n    return TranscriptOutput(\n\
          \        segments=all_segments,\n        full_text=full_text.strip(),\n\
          \        total_duration_s=total_audio_duration,\n    )\n\n"
        image: python:3.13-slim
pipelineInfo:
  description: Extract audio from a DVD or video file, transcribe it via Whisper STT,
    and produce a timestamped transcript (SRT/VTT/TXT).
  name: dvd-video-transcription
root:
  dag:
    tasks:
      chunk-audio:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-chunk-audio
        dependentTasks:
        - extract-audio
        inputs:
          parameters:
            chunk_duration_s:
              componentInputParameter: chunk_duration_s
            wav_path:
              taskOutputParameter:
                outputParameterKey: wav_path
                producerTask: extract-audio
        taskInfo:
          name: chunk-audio
      extract-audio:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-extract-audio
        inputs:
          parameters:
            mono:
              runtimeValue:
                constant: true
            sample_rate:
              runtimeValue:
                constant: 16000.0
            source_path:
              componentInputParameter: source_path
        taskInfo:
          name: extract-audio
      format-transcript:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-format-transcript
        dependentTasks:
        - transcribe-chunks
        inputs:
          parameters:
            full_text:
              taskOutputParameter:
                outputParameterKey: full_text
                producerTask: transcribe-chunks
            output_format:
              componentInputParameter: output_format
            segments:
              taskOutputParameter:
                outputParameterKey: segments
                producerTask: transcribe-chunks
            total_duration_s:
              taskOutputParameter:
                outputParameterKey: total_duration_s
                producerTask: transcribe-chunks
        taskInfo:
          name: format-transcript
      log-transcription-metrics:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-log-transcription-metrics
        dependentTasks:
        - transcribe-chunks
        inputs:
          parameters:
            full_text:
              taskOutputParameter:
                outputParameterKey: full_text
                producerTask: transcribe-chunks
            mlflow_tracking_uri:
              componentInputParameter: mlflow_tracking_uri
            source_path:
              componentInputParameter: source_path
            total_duration_s:
              taskOutputParameter:
                outputParameterKey: total_duration_s
                producerTask: transcribe-chunks
        taskInfo:
          name: log-transcription-metrics
      transcribe-chunks:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-transcribe-chunks
        dependentTasks:
        - chunk-audio
        inputs:
          parameters:
            chunk_paths:
              taskOutputParameter:
                outputParameterKey: chunk_paths
                producerTask: chunk-audio
            language:
              componentInputParameter: language
            response_format:
              runtimeValue:
                constant: verbose_json
            whisper_url:
              componentInputParameter: whisper_url
        taskInfo:
          name: transcribe-chunks
  inputDefinitions:
    parameters:
      chunk_duration_s:
        defaultValue: 300.0
        isOptional: true
        parameterType: NUMBER_INTEGER
      language:
        defaultValue: en
        isOptional: true
        parameterType: STRING
      mlflow_tracking_uri:
        defaultValue: http://mlflow.mlflow.svc.cluster.local:80
        isOptional: true
        parameterType: STRING
      output_format:
        defaultValue: srt
        isOptional: true
        parameterType: STRING
      source_path:
        defaultValue: /data/dvd/movie.mkv
        isOptional: true
        parameterType: STRING
      whisper_url:
        defaultValue: http://ai-inference-serve-svc.kuberay.svc.cluster.local:8000/whisper
        isOptional: true
        parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.12.1