perf(ci): reduce GitHub Actions costs ~60-65% across all workflows

Analysis of Feb 17 data showed 400+ workflow runs/day consuming ~398 billable minutes (~200 hours/month projected). Implemented targeted optimizations: High-impact changes: - sec-audit.yml: add path filters (Cargo.toml, src/**, crates/**, deny.toml); skip docs-only PRs - test-benchmarks.yml: move from every-push-to-main to weekly schedule; retention 30d -> 7d - pub-docker-img.yml: tighten PR smoke build path filters to Docker-specific files only - sec-codeql.yml: reduce from twice-daily (14 runs/week) to weekly Medium-impact changes: - ci-run.yml: merge lint + lint-strict-delta into single job; drop --release from smoke build - feature-matrix.yml: remove push trigger (weekly-only); remove redundant cargo test step - dependabot.yml: monthly instead of weekly; reduce PR limits from 11 to 5/month; group all deps Runner cost savings: - Switch 6 lightweight API-only workflows to ubuntu-latest (PR Labeler, Intake, Auto Responder, Check Stale, Check Status, Sync Contributors) - pr-check-status.yml: reduce from every 12h to daily New files: - docs/ci-cost-optimization.md: comprehensive analysis and revised architecture documentation - scripts/ci/fetch_actions_data.py: reusable GitHub Actions cost analysis script Estimated impact: daily billable minutes ~400 -> ~120-150 (60-65%% reduction), monthly hours ~200 -> ~60-75, Dependabot PRs ~44/month -> ~5 (89%% reduction)
2026-02-18 11:26:09 -08:00 · 2026-02-18 11:26:09 -08:00 · 44725da08c
commit 44725da08c
parent 8f7d879fd5
15 changed files with 512 additions and 85 deletions
--- a/scripts/ci/fetch_actions_data.py
+++ b/scripts/ci/fetch_actions_data.py
@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Fetch GitHub Actions workflow runs for a given date and summarize costs."""
+
+import json
+import subprocess
+import sys
+from datetime import datetime, timezone
+
+
+def fetch_runs(repo, date_str, page=1, per_page=100):
+    """Fetch completed workflow runs for a given date."""
+    url = (
+        f"https://api.github.com/repos/{repo}/actions/runs"
+        f"?created={date_str}&per_page={per_page}&page={page}"
+    )
+    result = subprocess.run(
+        ["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
+        capture_output=True, text=True
+    )
+    return json.loads(result.stdout)
+
+
+def fetch_jobs(repo, run_id):
+    """Fetch jobs for a specific run."""
+    url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/jobs?per_page=100"
+    result = subprocess.run(
+        ["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
+        capture_output=True, text=True
+    )
+    return json.loads(result.stdout)
+
+
+def parse_duration(started, completed):
+    """Return duration in seconds between two ISO timestamps."""
+    if not started or not completed:
+        return 0
+    try:
+        s = datetime.fromisoformat(started.replace("Z", "+00:00"))
+        c = datetime.fromisoformat(completed.replace("Z", "+00:00"))
+        return max(0, (c - s).total_seconds())
+    except Exception:
+        return 0
+
+
+def main():
+    repo = "zeroclaw-labs/zeroclaw"
+    date_str = "2026-02-17"
+
+    print(f"Fetching workflow runs for {repo} on {date_str}...")
+    print("=" * 100)
+
+    all_runs = []
+    for page in range(1, 5):  # up to 400 runs
+        data = fetch_runs(repo, date_str, page=page)
+        runs = data.get("workflow_runs", [])
+        if not runs:
+            break
+        all_runs.extend(runs)
+        if len(runs) < 100:
+            break
+
+    print(f"Total workflow runs found: {len(all_runs)}")
+    print()
+
+    # Group by workflow name
+    workflow_stats = {}
+    for run in all_runs:
+        name = run.get("name", "Unknown")
+        event = run.get("event", "unknown")
+        conclusion = run.get("conclusion", "unknown")
+        run_id = run.get("id")
+
+        if name not in workflow_stats:
+            workflow_stats[name] = {
+                "count": 0,
+                "events": {},
+                "conclusions": {},
+                "total_job_seconds": 0,
+                "total_jobs": 0,
+                "run_ids": [],
+            }
+
+        workflow_stats[name]["count"] += 1
+        workflow_stats[name]["events"][event] = workflow_stats[name]["events"].get(event, 0) + 1
+        workflow_stats[name]["conclusions"][conclusion] = workflow_stats[name]["conclusions"].get(conclusion, 0) + 1
+        workflow_stats[name]["run_ids"].append(run_id)
+
+    # For each workflow, sample up to 3 runs to get job-level timing
+    print("Sampling job-level timing (up to 3 runs per workflow)...")
+    print()
+
+    for name, stats in workflow_stats.items():
+        sample_ids = stats["run_ids"][:3]
+        for run_id in sample_ids:
+            jobs_data = fetch_jobs(repo, run_id)
+            jobs = jobs_data.get("jobs", [])
+            for job in jobs:
+                started = job.get("started_at")
+                completed = job.get("completed_at")
+                duration = parse_duration(started, completed)
+                stats["total_job_seconds"] += duration
+                stats["total_jobs"] += 1
+
+        # Extrapolate: if we sampled N runs but there are M total, scale up
+        sampled = len(sample_ids)
+        total = stats["count"]
+        if sampled > 0 and sampled < total:
+            scale = total / sampled
+            stats["estimated_total_seconds"] = stats["total_job_seconds"] * scale
+        else:
+            stats["estimated_total_seconds"] = stats["total_job_seconds"]
+
+    # Print summary sorted by estimated cost (descending)
+    sorted_workflows = sorted(
+        workflow_stats.items(),
+        key=lambda x: x[1]["estimated_total_seconds"],
+        reverse=True
+    )
+
+    print("=" * 100)
+    print(f"{'Workflow':<40} {'Runs':>5} {'SampledJobs':>12} {'SampledMins':>12} {'Est.TotalMins':>14} {'Events'}")
+    print("-" * 100)
+
+    grand_total_minutes = 0
+    for name, stats in sorted_workflows:
+        sampled_mins = stats["total_job_seconds"] / 60
+        est_total_mins = stats["estimated_total_seconds"] / 60
+        grand_total_minutes += est_total_mins
+        events_str = ", ".join(f"{k}={v}" for k, v in stats["events"].items())
+        conclusions_str = ", ".join(f"{k}={v}" for k, v in stats["conclusions"].items())
+        print(
+            f"{name:<40} {stats['count']:>5} {stats['total_jobs']:>12} "
+            f"{sampled_mins:>12.1f} {est_total_mins:>14.1f}   {events_str}"
+        )
+        print(f"{'':>40} {'':>5} {'':>12} {'':>12} {'':>14}   outcomes: {conclusions_str}")
+
+    print("-" * 100)
+    print(f"{'GRAND TOTAL':>40} {len(all_runs):>5} {'':>12} {'':>12} {grand_total_minutes:>14.1f}")
+    print(f"\nEstimated total billable minutes on {date_str}: {grand_total_minutes:.0f} min ({grand_total_minutes/60:.1f} hours)")
+    print()
+
+    # Also show raw run list
+    print("\n" + "=" * 100)
+    print("DETAILED RUN LIST")
+    print("=" * 100)
+    for run in all_runs:
+        name = run.get("name", "Unknown")
+        event = run.get("event", "unknown")
+        conclusion = run.get("conclusion", "unknown")
+        run_id = run.get("id")
+        started = run.get("run_started_at", "?")
+        print(f"  [{run_id}] {name:<40} conclusion={conclusion:<12} event={event:<20} started={started}")
+
+
+if __name__ == "__main__":
+    main()