pipelines go to gravenhollow now.
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
"""
|
||||
QLoRA Fine-Tuning Pipeline – Kubeflow Pipelines SDK
|
||||
|
||||
Fetches PDFs from a Quobjects S3 bucket, extracts instruction-tuning
|
||||
Fetches PDFs from a RustFS S3 bucket, extracts instruction-tuning
|
||||
data, trains a QLoRA adapter on the Llama 3.1 70B base model using
|
||||
the Strix Halo's 128 GB unified memory, evaluates it, and pushes the
|
||||
adapter weights to a Gitea repository.
|
||||
@@ -23,7 +23,7 @@ from typing import NamedTuple
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────
|
||||
# 1. Fetch PDFs from Quobjects S3
|
||||
# 1. Fetch PDFs from RustFS S3
|
||||
# ──────────────────────────────────────────────────────────────
|
||||
@dsl.component(
|
||||
base_image="python:3.13-slim",
|
||||
@@ -36,7 +36,7 @@ def fetch_pdfs_from_s3(
|
||||
aws_access_key_id: str,
|
||||
aws_secret_access_key: str,
|
||||
) -> NamedTuple("PDFOutput", [("pdf_dir", str), ("num_files", int)]):
|
||||
"""Download all PDFs from a Quobjects S3 bucket."""
|
||||
"""Download all PDFs from an S3 bucket."""
|
||||
import os
|
||||
import boto3
|
||||
from botocore.client import Config
|
||||
@@ -571,7 +571,7 @@ def log_training_metrics(
|
||||
"learning_rate": learning_rate,
|
||||
"num_epochs": num_epochs,
|
||||
"num_pdfs": num_pdfs,
|
||||
"data_source": "quobjects/training-data",
|
||||
"data_source": "rustfs/training-data",
|
||||
}
|
||||
)
|
||||
mlflow.log_metrics(
|
||||
@@ -591,13 +591,13 @@ def log_training_metrics(
|
||||
@dsl.pipeline(
|
||||
name="QLoRA PDF Fine-Tuning",
|
||||
description=(
|
||||
"Fine-tune Llama 3.1 70B via QLoRA on PDFs from the Quobjects "
|
||||
"Fine-tune Llama 3.1 70B via QLoRA on PDFs from the RustFS "
|
||||
"training-data bucket. Pushes the adapter to Gitea and logs "
|
||||
"metrics to MLflow."
|
||||
),
|
||||
)
|
||||
def qlora_pdf_pipeline(
|
||||
# ── S3 / Quobjects ──
|
||||
# ── S3 / RustFS ──
|
||||
s3_endpoint: str = "https://gravenhollow.lab.daviestechlabs.io:30292",
|
||||
s3_bucket: str = "training-data",
|
||||
s3_prefix: str = "",
|
||||
|
||||
Reference in New Issue
Block a user