From d4eb54d92b542b16c5b910beae9f19935e79cf04 Mon Sep 17 00:00:00 2001
From: "Billy D." <billy.davies.10@icloud.com>
Date: Wed, 18 Feb 2026 07:14:12 -0500
Subject: [PATCH] pipelines go to gravenhollow now.

---
 cpu_training_pipeline.py  | 10 +++++-----
 qlora_pdf_pipeline.py     | 12 ++++++------
 voice_cloning_pipeline.py |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/cpu_training_pipeline.py b/cpu_training_pipeline.py
index e2260a4..4a98b89 100644
--- a/cpu_training_pipeline.py
+++ b/cpu_training_pipeline.py
@@ -7,7 +7,7 @@ distributed across multiple cluster nodes via KubeRay RayJob.
 GPUs remain 100 % dedicated to inference serving.
 
 Architecture:
-  1. Fetch PDFs from Quobjects S3
+  1. Fetch PDFs from RustFS S3
   2. Prepare instruction-tuning dataset
   3. Upload prepared data to S3 (shared storage for Ray workers)
   4. Submit a KubeRay RayJob that runs Ray Train TorchTrainer
@@ -41,7 +41,7 @@ from typing import NamedTuple
 
 
 # ──────────────────────────────────────────────────────────────
-# 1. Fetch PDFs from Quobjects S3
+# 1. Fetch PDFs from RustFS S3
 # ──────────────────────────────────────────────────────────────
 @dsl.component(
     base_image="python:3.13-slim",
@@ -54,7 +54,7 @@ def fetch_pdfs_from_s3(
     aws_access_key_id: str,
     aws_secret_access_key: str,
 ) -> NamedTuple("PDFOutput", [("pdf_dir", str), ("num_files", int)]):
-    """Download all PDFs from a Quobjects S3 bucket."""
+    """Download all PDFs from an S3 bucket."""
     import os
 
     import boto3
@@ -994,7 +994,7 @@ def log_training_metrics(
                 "num_epochs": num_epochs,
                 "num_pdfs": num_pdfs,
                 "backend": "ray-train-gloo",
-                "data_source": "quobjects/training-data",
+                "data_source": "rustfs/training-data",
             }
         )
         mlflow.log_metrics(
@@ -1023,7 +1023,7 @@ def log_training_metrics(
     ),
 )
 def cpu_training_pipeline(
-    # ── S3 / Quobjects ──
+    # ── S3 / RustFS ──
     s3_endpoint: str = "https://gravenhollow.lab.daviestechlabs.io:30292",
     s3_bucket: str = "training-data",
     s3_prefix: str = "",
diff --git a/qlora_pdf_pipeline.py b/qlora_pdf_pipeline.py
index 06bf17c..5927b6f 100644
--- a/qlora_pdf_pipeline.py
+++ b/qlora_pdf_pipeline.py
@@ -2,7 +2,7 @@
 """
 QLoRA Fine-Tuning Pipeline – Kubeflow Pipelines SDK
 
-Fetches PDFs from a Quobjects S3 bucket, extracts instruction-tuning
+Fetches PDFs from a RustFS S3 bucket, extracts instruction-tuning
 data, trains a QLoRA adapter on the Llama 3.1 70B base model using
 the Strix Halo's 128 GB unified memory, evaluates it, and pushes the
 adapter weights to a Gitea repository.
@@ -23,7 +23,7 @@ from typing import NamedTuple
 
 
 # ──────────────────────────────────────────────────────────────
-# 1. Fetch PDFs from Quobjects S3
+# 1. Fetch PDFs from RustFS S3
 # ──────────────────────────────────────────────────────────────
 @dsl.component(
     base_image="python:3.13-slim",
@@ -36,7 +36,7 @@ def fetch_pdfs_from_s3(
     aws_access_key_id: str,
     aws_secret_access_key: str,
 ) -> NamedTuple("PDFOutput", [("pdf_dir", str), ("num_files", int)]):
-    """Download all PDFs from a Quobjects S3 bucket."""
+    """Download all PDFs from an S3 bucket."""
     import os
     import boto3
     from botocore.client import Config
@@ -571,7 +571,7 @@ def log_training_metrics(
                 "learning_rate": learning_rate,
                 "num_epochs": num_epochs,
                 "num_pdfs": num_pdfs,
-                "data_source": "quobjects/training-data",
+                "data_source": "rustfs/training-data",
             }
         )
         mlflow.log_metrics(
@@ -591,13 +591,13 @@ def log_training_metrics(
 @dsl.pipeline(
     name="QLoRA PDF Fine-Tuning",
     description=(
-        "Fine-tune Llama 3.1 70B via QLoRA on PDFs from the Quobjects "
+        "Fine-tune Llama 3.1 70B via QLoRA on PDFs from the RustFS "
         "training-data bucket. Pushes the adapter to Gitea and logs "
         "metrics to MLflow."
     ),
 )
 def qlora_pdf_pipeline(
-    # ── S3 / Quobjects ──
+    # ── S3 / RustFS ──
     s3_endpoint: str = "https://gravenhollow.lab.daviestechlabs.io:30292",
     s3_bucket: str = "training-data",
     s3_prefix: str = "",
diff --git a/voice_cloning_pipeline.py b/voice_cloning_pipeline.py
index 303eaee..2cd824b 100644
--- a/voice_cloning_pipeline.py
+++ b/voice_cloning_pipeline.py
@@ -29,7 +29,7 @@ def transcribe_and_diarise(
     s3_key: str,
     whisper_url: str = "http://ai-inference-serve-svc.kuberay.svc.cluster.local:8000/whisper",
 ) -> NamedTuple("TranscriptOutput", [("transcript_json", str), ("speakers", str), ("audio_path", str)]):
-    """Download audio from Quobjects S3, transcribe via Whisper with timestamps."""
+    """Download audio from S3, transcribe via Whisper with timestamps."""
     import json
     import os
     import subprocess
@@ -41,7 +41,7 @@ def transcribe_and_diarise(
     out = NamedTuple("TranscriptOutput", [("transcript_json", str), ("speakers", str), ("audio_path", str)])
     work = tempfile.mkdtemp()
 
-    # ── Download audio from Quobjects S3 ─────────────────────
+    # ── Download audio from S3 ─────────────────────
     ext = os.path.splitext(s3_key)[-1] or ".wav"
     audio_path = os.path.join(work, f"audio_raw{ext}")
 
@@ -609,7 +609,7 @@ def voice_cloning_pipeline(
     # MLflow
     mlflow_tracking_uri: str = "http://mlflow.mlflow.svc.cluster.local:80",
 ):
-    # 1 - Download from Quobjects S3 and transcribe with Whisper
+    # 1 - Download from S3 and transcribe with Whisper
     transcribed = transcribe_and_diarise(
         s3_endpoint=s3_endpoint,
         s3_bucket=s3_bucket,