fix: resolve all ruff lint errors
Some checks failed
CI / Test (push) Successful in 1m46s
CI / Lint (push) Failing after 1m49s
CI / Publish (push) Has been skipped
CI / Notify (push) Successful in 2s

This commit is contained in:
2026-02-13 10:57:57 -05:00
parent 6bcf84549c
commit 1c841729a0
9 changed files with 456 additions and 464 deletions

View File

@@ -7,21 +7,21 @@ Command-line interface for querying and comparing MLflow experiments.
Usage:
# Compare recent runs in an experiment
python -m mlflow_utils.cli compare --experiment chat-inference --runs 5
# Get best run by metric
python -m mlflow_utils.cli best --experiment evaluation --metric eval.accuracy
# Generate performance report
python -m mlflow_utils.cli report --service chat-handler --hours 24
# Check model promotion criteria
python -m mlflow_utils.cli promote --model whisper-finetuned \\
--experiment voice-evaluation \\
--criteria "eval.accuracy>=0.9,total_latency_p95<=2.0"
# List experiments
python -m mlflow_utils.cli list-experiments
# Query runs
python -m mlflow_utils.cli query --experiment chat-inference \\
--filter "metrics.total_latency_mean < 1.0" --limit 10
@@ -30,19 +30,16 @@ Usage:
import argparse
import json
import sys
from typing import Optional
from .client import get_mlflow_client, health_check
from .experiment_comparison import (
ExperimentAnalyzer,
compare_experiments,
promotion_recommendation,
get_inference_performance_report,
promotion_recommendation,
)
from .model_registry import (
list_model_versions,
get_production_model,
generate_kserve_yaml,
list_model_versions,
)
@@ -57,7 +54,7 @@ def cmd_list_experiments(args):
"""List all experiments."""
client = get_mlflow_client(tracking_uri=args.tracking_uri)
experiments = client.search_experiments()
print(f"{'ID':<10} {'Name':<40} {'Artifact Location'}")
print("-" * 80)
for exp in experiments:
@@ -70,13 +67,13 @@ def cmd_compare(args):
args.experiment,
tracking_uri=args.tracking_uri
)
if args.run_ids:
run_ids = args.run_ids.split(",")
comparison = analyzer.compare_runs(run_ids=run_ids)
else:
comparison = analyzer.compare_runs(n_recent=args.runs)
if args.json:
print(json.dumps(comparison.to_dict(), indent=2, default=str))
else:
@@ -89,17 +86,17 @@ def cmd_best(args):
args.experiment,
tracking_uri=args.tracking_uri
)
best_run = analyzer.get_best_run(
metric=args.metric,
minimize=args.minimize,
filter_string=args.filter or "",
)
if not best_run:
print(f"No runs found with metric '{args.metric}'")
sys.exit(1)
result = {
"run_id": best_run.info.run_id,
"run_name": best_run.info.run_name,
@@ -107,7 +104,7 @@ def cmd_best(args):
"all_metrics": dict(best_run.data.metrics),
"params": dict(best_run.data.params),
}
if args.json:
print(json.dumps(result, indent=2))
else:
@@ -122,12 +119,12 @@ def cmd_summary(args):
args.experiment,
tracking_uri=args.tracking_uri
)
summary = analyzer.get_metrics_summary(
hours=args.hours,
metrics=args.metrics.split(",") if args.metrics else None,
)
if args.json:
print(json.dumps(summary, indent=2))
else:
@@ -146,7 +143,7 @@ def cmd_report(args):
hours=args.hours,
tracking_uri=args.tracking_uri,
)
if args.json:
print(json.dumps(report, indent=2))
else:
@@ -154,18 +151,18 @@ def cmd_report(args):
print(f"Period: Last {report['period_hours']} hours")
print(f"Generated: {report['generated_at']}")
print()
if report["latency"]:
print("Latency Metrics:")
for metric, stats in report["latency"].items():
if "mean" in stats:
print(f" {metric}: {stats['mean']:.4f}s (p50: {stats.get('median', 'N/A')})")
if report["rag"]:
print("\nRAG Usage:")
for metric, stats in report["rag"].items():
print(f" {metric}: {stats.get('mean', 'N/A')}")
if report["errors"]:
print("\nError Rates:")
for metric, stats in report["errors"].items():
@@ -183,14 +180,14 @@ def cmd_promote(args):
metric, value = criterion.split(op)
criteria[metric.strip()] = (op, float(value.strip()))
break
rec = promotion_recommendation(
model_name=args.model,
experiment_name=args.experiment,
criteria=criteria,
tracking_uri=args.tracking_uri,
)
if args.json:
print(json.dumps(rec.to_dict(), indent=2))
else:
@@ -208,12 +205,12 @@ def cmd_query(args):
args.experiment,
tracking_uri=args.tracking_uri
)
runs = analyzer.search_runs(
filter_string=args.filter or "",
max_results=args.limit,
)
if args.json:
result = [
{
@@ -237,20 +234,21 @@ def cmd_query(args):
def cmd_models(args):
"""List registered models."""
client = get_mlflow_client(tracking_uri=args.tracking_uri)
if args.model:
versions = list_model_versions(args.model, tracking_uri=args.tracking_uri)
if args.json:
print(json.dumps(versions, indent=2, default=str))
else:
print(f"Model: {args.model}")
for v in versions:
print(f" v{v['version']} ({v['stage']}): {v['description'][:50] if v['description'] else 'No description'}")
desc = v["description"][:50] if v["description"] else "No description"
print(f" v{v['version']} ({v['stage']}): {desc}")
else:
# List all models
models = client.search_registered_models()
if args.json:
result = [{"name": m.name, "description": m.description} for m in models]
print(json.dumps(result, indent=2))
@@ -271,7 +269,7 @@ def cmd_kserve(args):
output_path=args.output,
tracking_uri=args.tracking_uri,
)
if not args.output:
print(yaml_str)
@@ -281,7 +279,7 @@ def main():
description="MLflow Experiment CLI",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--tracking-uri",
default=None,
@@ -292,24 +290,24 @@ def main():
action="store_true",
help="Output as JSON",
)
subparsers = parser.add_subparsers(dest="command", help="Commands")
# health
health_parser = subparsers.add_parser("health", help="Check MLflow connectivity")
health_parser.set_defaults(func=cmd_health)
# list-experiments
list_parser = subparsers.add_parser("list-experiments", help="List experiments")
list_parser.set_defaults(func=cmd_list_experiments)
# compare
compare_parser = subparsers.add_parser("compare", help="Compare runs")
compare_parser.add_argument("--experiment", "-e", required=True, help="Experiment name")
compare_parser.add_argument("--runs", "-n", type=int, default=5, help="Number of recent runs")
compare_parser.add_argument("--run-ids", help="Comma-separated run IDs to compare")
compare_parser.set_defaults(func=cmd_compare)
# best
best_parser = subparsers.add_parser("best", help="Find best run by metric")
best_parser.add_argument("--experiment", "-e", required=True, help="Experiment name")
@@ -317,39 +315,39 @@ def main():
best_parser.add_argument("--minimize", action="store_true", help="Minimize metric (default: maximize)")
best_parser.add_argument("--filter", "-f", help="Filter string")
best_parser.set_defaults(func=cmd_best)
# summary
summary_parser = subparsers.add_parser("summary", help="Get metrics summary")
summary_parser.add_argument("--experiment", "-e", required=True, help="Experiment name")
summary_parser.add_argument("--hours", type=int, default=24, help="Hours of data")
summary_parser.add_argument("--metrics", help="Comma-separated metric names")
summary_parser.set_defaults(func=cmd_summary)
# report
report_parser = subparsers.add_parser("report", help="Generate performance report")
report_parser.add_argument("--service", "-s", required=True, help="Service name")
report_parser.add_argument("--hours", type=int, default=24, help="Hours of data")
report_parser.set_defaults(func=cmd_report)
# promote
promote_parser = subparsers.add_parser("promote", help="Check promotion criteria")
promote_parser.add_argument("--model", "-m", required=True, help="Model name")
promote_parser.add_argument("--experiment", "-e", required=True, help="Experiment name")
promote_parser.add_argument("--criteria", "-c", required=True, help="Criteria (e.g., 'accuracy>=0.9,latency<=2.0')")
promote_parser.set_defaults(func=cmd_promote)
# query
query_parser = subparsers.add_parser("query", help="Query runs")
query_parser.add_argument("--experiment", "-e", required=True, help="Experiment name")
query_parser.add_argument("--filter", "-f", help="MLflow filter string")
query_parser.add_argument("--limit", "-l", type=int, default=20, help="Max results")
query_parser.set_defaults(func=cmd_query)
# models
models_parser = subparsers.add_parser("models", help="List registered models")
models_parser.add_argument("--model", "-m", help="Specific model name")
models_parser.set_defaults(func=cmd_models)
# kserve
kserve_parser = subparsers.add_parser("kserve", help="Generate KServe manifest")
kserve_parser.add_argument("--model", "-m", required=True, help="Model name")
@@ -357,13 +355,13 @@ def main():
kserve_parser.add_argument("--namespace", "-n", default="ai-ml", help="K8s namespace")
kserve_parser.add_argument("--output", "-o", help="Output file path")
kserve_parser.set_defaults(func=cmd_kserve)
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
args.func(args)