pipelines go to gravenhollow now.
This commit is contained in:
@@ -29,7 +29,7 @@ def transcribe_and_diarise(
|
||||
s3_key: str,
|
||||
whisper_url: str = "http://ai-inference-serve-svc.kuberay.svc.cluster.local:8000/whisper",
|
||||
) -> NamedTuple("TranscriptOutput", [("transcript_json", str), ("speakers", str), ("audio_path", str)]):
|
||||
"""Download audio from Quobjects S3, transcribe via Whisper with timestamps."""
|
||||
"""Download audio from S3, transcribe via Whisper with timestamps."""
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
@@ -41,7 +41,7 @@ def transcribe_and_diarise(
|
||||
out = NamedTuple("TranscriptOutput", [("transcript_json", str), ("speakers", str), ("audio_path", str)])
|
||||
work = tempfile.mkdtemp()
|
||||
|
||||
# ── Download audio from Quobjects S3 ─────────────────────
|
||||
# ── Download audio from S3 ─────────────────────
|
||||
ext = os.path.splitext(s3_key)[-1] or ".wav"
|
||||
audio_path = os.path.join(work, f"audio_raw{ext}")
|
||||
|
||||
@@ -609,7 +609,7 @@ def voice_cloning_pipeline(
|
||||
# MLflow
|
||||
mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80",
|
||||
):
|
||||
# 1 - Download from Quobjects S3 and transcribe with Whisper
|
||||
# 1 - Download from S3 and transcribe with Whisper
|
||||
transcribed = transcribe_and_diarise(
|
||||
s3_endpoint=s3_endpoint,
|
||||
s3_bucket=s3_bucket,
|
||||
|
||||
Reference in New Issue
Block a user