feat: add QLoRA PDF pipeline and Gitea CI workflow
- qlora_pdf_pipeline.py: 6-step QLoRA fine-tuning pipeline (S3 PDFs → prepare data → train → evaluate → push to Gitea → MLflow) - .gitea/workflows/compile-upload.yaml: auto-compile and upload all pipelines to Kubeflow on push, with ntfy notifications
This commit is contained in:
221
.gitea/workflows/compile-upload.yaml
Normal file
221
.gitea/workflows/compile-upload.yaml
Normal file
@@ -0,0 +1,221 @@
|
||||
name: Compile and Upload Pipelines
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "**/*_pipeline.py"
|
||||
- "**/*pipeline*.py"
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
NTFY_URL: http://ntfy.observability.svc.cluster.local:80
|
||||
KUBEFLOW_HOST: http://ml-pipeline.kubeflow.svc.cluster.local:8888
|
||||
|
||||
jobs:
|
||||
compile-and-upload:
|
||||
name: Compile & Upload
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
compiled: ${{ steps.compile.outputs.compiled }}
|
||||
failed: ${{ steps.compile.outputs.failed }}
|
||||
uploaded: ${{ steps.upload.outputs.uploaded }}
|
||||
upload_failed: ${{ steps.upload.outputs.failed }}
|
||||
version: ${{ steps.upload.outputs.version }}
|
||||
uploaded_names: ${{ steps.upload.outputs.uploaded_names }}
|
||||
failed_names: ${{ steps.upload.outputs.failed_names }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
|
||||
- name: Install KFP
|
||||
run: pip install kfp==2.12.1
|
||||
|
||||
- name: Discover pipeline files
|
||||
id: discover
|
||||
run: |
|
||||
# Find all pipeline Python files
|
||||
FILES=$(find . -maxdepth 1 -name '*_pipeline.py' -o -name '*pipeline*.py' | sort)
|
||||
COUNT=$(echo "$FILES" | grep -c '.' || true)
|
||||
echo "files<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$FILES" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
echo "count=$COUNT" >> $GITHUB_OUTPUT
|
||||
echo "Found $COUNT pipeline files:"
|
||||
echo "$FILES"
|
||||
|
||||
- name: Compile pipelines
|
||||
id: compile
|
||||
run: |
|
||||
COMPILED=0
|
||||
FAILED=0
|
||||
COMPILED_LIST=""
|
||||
FAILED_LIST=""
|
||||
|
||||
for py_file in ${{ steps.discover.outputs.files }}; do
|
||||
name=$(basename "$py_file" .py)
|
||||
echo "::group::Compiling $name"
|
||||
|
||||
if python "$py_file"; then
|
||||
yaml_file="${name}.yaml"
|
||||
if [ -f "$yaml_file" ]; then
|
||||
echo "✓ Compiled $name → $yaml_file"
|
||||
COMPILED=$((COMPILED + 1))
|
||||
COMPILED_LIST="${COMPILED_LIST}${name}\n"
|
||||
else
|
||||
echo "✗ $name produced no YAML output"
|
||||
FAILED=$((FAILED + 1))
|
||||
FAILED_LIST="${FAILED_LIST}${name}\n"
|
||||
fi
|
||||
else
|
||||
echo "✗ Failed to compile $name"
|
||||
FAILED=$((FAILED + 1))
|
||||
FAILED_LIST="${FAILED_LIST}${name}\n"
|
||||
fi
|
||||
|
||||
echo "::endgroup::"
|
||||
done
|
||||
|
||||
echo "compiled=$COMPILED" >> $GITHUB_OUTPUT
|
||||
echo "failed=$FAILED" >> $GITHUB_OUTPUT
|
||||
echo "compiled_list<<EOF" >> $GITHUB_OUTPUT
|
||||
echo -e "$COMPILED_LIST" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
echo "failed_list<<EOF" >> $GITHUB_OUTPUT
|
||||
echo -e "$FAILED_LIST" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
|
||||
echo ""
|
||||
echo "=== Summary ==="
|
||||
echo "Compiled: $COMPILED"
|
||||
echo "Failed: $FAILED"
|
||||
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
echo "::warning::$FAILED pipeline(s) failed to compile"
|
||||
fi
|
||||
|
||||
- name: Upload pipelines to Kubeflow
|
||||
id: upload
|
||||
run: |
|
||||
python3 << 'UPLOAD_SCRIPT'
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from kfp import Client
|
||||
|
||||
host = os.environ["KUBEFLOW_HOST"]
|
||||
print(f"Connecting to Kubeflow at {host}")
|
||||
|
||||
try:
|
||||
client = Client(host=host)
|
||||
client.list_pipelines(page_size=1)
|
||||
print("Connected to Kubeflow Pipelines")
|
||||
except Exception as e:
|
||||
print(f"ERROR: Cannot connect to Kubeflow: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get all compiled YAML files
|
||||
yaml_files = sorted(Path(".").glob("*_pipeline.yaml"))
|
||||
if not yaml_files:
|
||||
yaml_files = sorted(Path(".").glob("*pipeline*.yaml"))
|
||||
|
||||
uploaded = 0
|
||||
failed = 0
|
||||
uploaded_names = []
|
||||
failed_names = []
|
||||
version_tag = f"v{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
||||
|
||||
for yaml_path in yaml_files:
|
||||
pipeline_name = yaml_path.stem.replace("_", "-")
|
||||
print(f"\n--- {pipeline_name} ---")
|
||||
|
||||
try:
|
||||
# Check if pipeline already exists
|
||||
existing = None
|
||||
all_pipelines = client.list_pipelines(page_size=200)
|
||||
if all_pipelines.pipelines:
|
||||
for p in all_pipelines.pipelines:
|
||||
if p.display_name == pipeline_name:
|
||||
existing = p
|
||||
break
|
||||
|
||||
if existing:
|
||||
print(f" Updating: {pipeline_name} ({existing.pipeline_id})")
|
||||
client.upload_pipeline_version(
|
||||
pipeline_package_path=str(yaml_path),
|
||||
pipeline_version_name=version_tag,
|
||||
pipeline_id=existing.pipeline_id,
|
||||
)
|
||||
else:
|
||||
print(f" Creating: {pipeline_name}")
|
||||
client.upload_pipeline(
|
||||
pipeline_package_path=str(yaml_path),
|
||||
pipeline_name=pipeline_name,
|
||||
)
|
||||
|
||||
uploaded += 1
|
||||
uploaded_names.append(pipeline_name)
|
||||
print(f" ✓ Done")
|
||||
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
failed_names.append(pipeline_name)
|
||||
print(f" ✗ Error: {e}")
|
||||
|
||||
# Write outputs
|
||||
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
|
||||
f.write(f"uploaded={uploaded}\n")
|
||||
f.write(f"failed={failed}\n")
|
||||
f.write(f"version={version_tag}\n")
|
||||
f.write(f"uploaded_names={', '.join(uploaded_names)}\n")
|
||||
f.write(f"failed_names={', '.join(failed_names)}\n")
|
||||
|
||||
print(f"\n=== Upload Summary ===")
|
||||
print(f"Uploaded: {uploaded}")
|
||||
print(f"Failed: {failed}")
|
||||
|
||||
if failed > 0:
|
||||
sys.exit(1)
|
||||
UPLOAD_SCRIPT
|
||||
|
||||
notify:
|
||||
name: Notify
|
||||
runs-on: ubuntu-latest
|
||||
needs: [compile-and-upload]
|
||||
if: always()
|
||||
steps:
|
||||
- name: Notify on success
|
||||
if: needs.compile-and-upload.result == 'success'
|
||||
run: |
|
||||
curl -s \
|
||||
-H "Title: ✅ Pipelines uploaded to Kubeflow" \
|
||||
-H "Priority: default" \
|
||||
-H "Tags: white_check_mark,rocket" \
|
||||
-H "Click: ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_id }}" \
|
||||
-d "Branch: ${{ gitea.ref_name }}
|
||||
Commit: ${{ gitea.event.head_commit.message || gitea.sha }}
|
||||
Compiled: ${{ needs.compile-and-upload.outputs.compiled || '?' }} pipeline(s)
|
||||
Uploaded: ${{ needs.compile-and-upload.outputs.uploaded || '?' }} pipeline(s)
|
||||
Version: ${{ needs.compile-and-upload.outputs.version || 'n/a' }}" \
|
||||
${{ env.NTFY_URL }}/gitea-ci
|
||||
|
||||
- name: Notify on failure
|
||||
if: needs.compile-and-upload.result == 'failure'
|
||||
run: |
|
||||
curl -s \
|
||||
-H "Title: ❌ Pipeline upload failed" \
|
||||
-H "Priority: high" \
|
||||
-H "Tags: x,rocket" \
|
||||
-H "Click: ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_id }}" \
|
||||
-d "Branch: ${{ gitea.ref_name }}
|
||||
Commit: ${{ gitea.event.head_commit.message || gitea.sha }}
|
||||
Compiled: ${{ needs.compile-and-upload.outputs.compiled || '?' }}, Failed compile: ${{ needs.compile-and-upload.outputs.failed || '?' }}
|
||||
Upload failures: ${{ needs.compile-and-upload.outputs.failed_names || 'unknown' }}
|
||||
Check logs for details." \
|
||||
${{ env.NTFY_URL }}/gitea-ci
|
||||
Reference in New Issue
Block a user