# KFP Pipeline Sync Job # Automatically compiles Python pipeline definitions and uploads to Kubeflow # Runs as a CronJob to keep pipelines in sync with Git --- # RBAC to allow reading GitRepository status for artifact URL apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: kfp-sync-flux-reader namespace: ai-ml rules: - apiGroups: ["source.toolkit.fluxcd.io"] resources: ["gitrepositories"] verbs: ["get", "list"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: kfp-sync-flux-reader namespace: ai-ml subjects: - kind: ServiceAccount name: pipeline-bridge namespace: ai-ml roleRef: kind: Role name: kfp-sync-flux-reader apiGroup: rbac.authorization.k8s.io --- apiVersion: batch/v1 kind: CronJob metadata: name: kfp-pipeline-sync namespace: ai-ml labels: app.kubernetes.io/name: kfp-pipeline-sync app.kubernetes.io/part-of: llm-workflows spec: # Run every 30 minutes schedule: "*/30 * * * *" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 3 jobTemplate: spec: backoffLimit: 2 template: metadata: labels: app: kfp-pipeline-sync spec: schedulerName: volcano restartPolicy: OnFailure serviceAccountName: pipeline-bridge containers: - name: sync image: python:3.13-slim command: - python - /scripts/sync_pipelines.py env: - name: KUBEFLOW_HOST value: "http://ml-pipeline.kubeflow.svc.cluster.local:8888" - name: GIT_REPO_NAME value: "llm-workflows" - name: GIT_REPO_NAMESPACE value: "ai-ml" volumeMounts: - name: scripts mountPath: /scripts resources: requests: cpu: 100m memory: 256Mi limits: cpu: 500m memory: 512Mi volumes: - name: scripts configMap: name: kfp-sync-scripts --- # Manual trigger job (run this to sync immediately) apiVersion: batch/v1 kind: Job metadata: name: kfp-pipeline-sync-manual namespace: ai-ml labels: app.kubernetes.io/name: kfp-pipeline-sync app.kubernetes.io/part-of: llm-workflows annotations: description: "Delete and recreate to manually trigger sync" spec: backoffLimit: 1 template: metadata: labels: app: kfp-pipeline-sync spec: schedulerName: volcano restartPolicy: Never serviceAccountName: pipeline-bridge containers: - name: sync image: python:3.13-slim command: - python - /scripts/sync_pipelines.py env: - name: KUBEFLOW_HOST value: "http://ml-pipeline.kubeflow.svc.cluster.local:8888" - name: GIT_REPO_NAME value: "llm-workflows" - name: GIT_REPO_NAMESPACE value: "ai-ml" volumeMounts: - name: scripts mountPath: /scripts resources: requests: cpu: 100m memory: 256Mi limits: cpu: 500m memory: 512Mi volumes: - name: scripts configMap: name: kfp-sync-scripts --- apiVersion: v1 kind: ConfigMap metadata: name: kfp-sync-scripts namespace: ai-ml data: sync_pipelines.py: | #!/usr/bin/env python3 """ KFP Pipeline Sync Compiles Python pipeline definitions and uploads to Kubeflow Pipelines. Downloads from Flux source-controller artifact for secure access. """ import os import sys import subprocess import tempfile import tarfile import hashlib from pathlib import Path from datetime import datetime # Install KFP and kubernetes client subprocess.check_call([ sys.executable, "-m", "pip", "install", "-q", "kfp==2.12.1", "httpx", "kubernetes" ]) from kfp import Client from kfp import compiler import httpx from kubernetes import client as k8s_client, config as k8s_config KUBEFLOW_HOST = os.environ.get("KUBEFLOW_HOST", "http://ml-pipeline.kubeflow.svc.cluster.local:8888") PIPELINES_DIR = os.environ.get("PIPELINES_DIR", "/pipelines") # GitRepository to get artifact from GIT_REPO_NAME = os.environ.get("GIT_REPO_NAME", "llm-workflows") GIT_REPO_NAMESPACE = os.environ.get("GIT_REPO_NAMESPACE", "ai-ml") def get_flux_artifact_url() -> str: """Get artifact URL from Flux GitRepository status.""" try: k8s_config.load_incluster_config() api = k8s_client.CustomObjectsApi() gitrepo = api.get_namespaced_custom_object( group="source.toolkit.fluxcd.io", version="v1", namespace=GIT_REPO_NAMESPACE, plural="gitrepositories", name=GIT_REPO_NAME ) return gitrepo.get("status", {}).get("artifact", {}).get("url", "") except Exception as e: print(f"Error getting GitRepository: {e}") return "" def get_file_hash(filepath: str) -> str: """Get MD5 hash of file for change detection.""" with open(filepath, "rb") as f: return hashlib.md5(f.read()).hexdigest() def compile_pipeline(py_file: Path, output_dir: Path) -> Path: """Compile a Python pipeline file to YAML.""" output_file = output_dir / f"{py_file.stem}.yaml" # Execute the pipeline file to compile it result = subprocess.run( [sys.executable, str(py_file)], cwd=str(py_file.parent), capture_output=True, text=True ) if result.returncode != 0: print(f"Warning: Failed to compile {py_file}: {result.stderr}") return None # Check if YAML was generated generated = py_file.parent / f"{py_file.stem}.yaml" if generated.exists(): generated.rename(output_file) return output_file return None def upload_pipeline(client: Client, yaml_path: Path, pipeline_name: str) -> bool: """Upload or update a pipeline in Kubeflow.""" try: # Check if pipeline exists by listing and filtering client-side # KFP v2 API has different filter syntax all_pipelines = client.list_pipelines(page_size=100) existing = None if all_pipelines.pipelines: for p in all_pipelines.pipelines: if p.display_name == pipeline_name: existing = p break if existing: # Update existing pipeline print(f"Updating existing pipeline: {pipeline_name}") # Create new version version_name = f"v{datetime.now().strftime('%Y%m%d-%H%M%S')}" client.upload_pipeline_version( pipeline_package_path=str(yaml_path), pipeline_version_name=version_name, pipeline_id=existing.pipeline_id ) print(f"Created version {version_name}") else: # Create new pipeline print(f"Creating new pipeline: {pipeline_name}") client.upload_pipeline( pipeline_package_path=str(yaml_path), pipeline_name=pipeline_name ) return True except Exception as e: print(f"Error uploading {pipeline_name}: {e}") return False def sync_from_flux_artifact(client: Client, artifact_url: str): """Download from Flux source-controller and sync pipelines.""" with tempfile.TemporaryDirectory() as tmpdir: tarball = Path(tmpdir) / "source.tar.gz" extract_dir = Path(tmpdir) / "source" extract_dir.mkdir() # Download artifact from Flux source-controller print(f"Downloading from Flux: {artifact_url}") response = httpx.get(artifact_url, follow_redirects=True, timeout=60.0) response.raise_for_status() tarball.write_bytes(response.content) # Extract tarball with tarfile.open(tarball, 'r:gz') as tar: tar.extractall(extract_dir) pipelines_dir = extract_dir / "pipelines" if not pipelines_dir.exists(): print("No pipelines directory found in artifact") return sync_directory(client, pipelines_dir) def sync_directory(client: Client, pipelines_dir: Path): """Sync all pipeline files from a directory.""" output_dir = Path("/tmp/compiled") output_dir.mkdir(exist_ok=True) # Find all Python pipeline files py_files = list(pipelines_dir.glob("*_pipeline.py")) + list(pipelines_dir.glob("*.pipeline.py")) if not py_files: # Fall back to all Python files py_files = list(pipelines_dir.glob("*.py")) print(f"Found {len(py_files)} pipeline files") for py_file in py_files: print(f"\nProcessing: {py_file.name}") # Compile pipeline yaml_path = compile_pipeline(py_file, output_dir) if not yaml_path: # Try direct YAML files yaml_path = pipelines_dir / f"{py_file.stem}.yaml" if not yaml_path.exists(): continue pipeline_name = py_file.stem.replace("_", "-") upload_pipeline(client, yaml_path, pipeline_name) def main(): print(f"KFP Pipeline Sync starting...") print(f"Kubeflow host: {KUBEFLOW_HOST}") # Wait for Kubeflow to be ready client = None for i in range(5): try: client = Client(host=KUBEFLOW_HOST) # Test connection client.list_pipelines(page_size=1) print("Connected to Kubeflow Pipelines") break except Exception as e: print(f"Waiting for Kubeflow... ({e})") import time time.sleep(10) if not client: print("Failed to connect to Kubeflow") sys.exit(1) # Get artifact URL from Flux GitRepository artifact_url = get_flux_artifact_url() if artifact_url: print(f"\nSyncing from Flux artifact...") sync_from_flux_artifact(client, artifact_url) elif PIPELINES_DIR and Path(PIPELINES_DIR).exists(): # Sync from local directory print(f"\nSyncing from local: {PIPELINES_DIR}") sync_directory(client, Path(PIPELINES_DIR)) else: print("No pipeline source configured!") sys.exit(1) print("\nSync complete!") if __name__ == "__main__": main()