perf(ci): reduce GitHub Actions costs ~60-65% across all workflows

Analysis of Feb 17 data showed 400+ workflow runs/day consuming ~398 billable minutes (~200 hours/month projected). Implemented targeted optimizations:

High-impact changes:

- sec-audit.yml: add path filters (Cargo.toml, src/**, crates/**, deny.toml); skip docs-only PRs

- test-benchmarks.yml: move from every-push-to-main to weekly schedule; retention 30d -> 7d

- pub-docker-img.yml: tighten PR smoke build path filters to Docker-specific files only

- sec-codeql.yml: reduce from twice-daily (14 runs/week) to weekly

Medium-impact changes:

- ci-run.yml: merge lint + lint-strict-delta into single job; drop --release from smoke build

- feature-matrix.yml: remove push trigger (weekly-only); remove redundant cargo test step

- dependabot.yml: monthly instead of weekly; reduce PR limits from 11 to 5/month; group all deps

Runner cost savings:

- Switch 6 lightweight API-only workflows to ubuntu-latest (PR Labeler, Intake, Auto Responder, Check Stale, Check Status, Sync Contributors)

- pr-check-status.yml: reduce from every 12h to daily

New files:

- docs/ci-cost-optimization.md: comprehensive analysis and revised architecture documentation

- scripts/ci/fetch_actions_data.py: reusable GitHub Actions cost analysis script

Estimated impact: daily billable minutes ~400 -> ~120-150 (60-65%% reduction), monthly hours ~200 -> ~60-75, Dependabot PRs ~44/month -> ~5 (89%% reduction)
This commit is contained in:
Alex Gorevski 2026-02-18 11:26:09 -08:00
parent 8f7d879fd5
commit 44725da08c
15 changed files with 512 additions and 85 deletions

View file

@ -4,13 +4,13 @@ updates:
- package-ecosystem: cargo - package-ecosystem: cargo
directory: "/" directory: "/"
schedule: schedule:
interval: weekly interval: monthly
target-branch: main target-branch: main
open-pull-requests-limit: 5 open-pull-requests-limit: 3
labels: labels:
- "dependencies" - "dependencies"
groups: groups:
rust-minor-patch: rust-all:
patterns: patterns:
- "*" - "*"
update-types: update-types:
@ -20,14 +20,31 @@ updates:
- package-ecosystem: github-actions - package-ecosystem: github-actions
directory: "/" directory: "/"
schedule: schedule:
interval: weekly interval: monthly
target-branch: main target-branch: main
open-pull-requests-limit: 3 open-pull-requests-limit: 1
labels: labels:
- "ci" - "ci"
- "dependencies" - "dependencies"
groups: groups:
actions-minor-patch: actions-all:
patterns:
- "*"
update-types:
- minor
- patch
- package-ecosystem: docker
directory: "/"
schedule:
interval: monthly
target-branch: main
open-pull-requests-limit: 1
labels:
- "ci"
- "dependencies"
groups:
docker-all:
patterns: patterns:
- "*" - "*"
update-types: update-types:

View file

@ -41,25 +41,7 @@ jobs:
run: ./scripts/ci/detect_change_scope.sh run: ./scripts/ci/detect_change_scope.sh
lint: lint:
name: Lint Gate (Format + Clippy) name: Lint Gate (Format + Clippy + Strict Delta)
needs: [changes]
if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full'))
runs-on: blacksmith-2vcpu-ubuntu-2404
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with:
toolchain: 1.92.0
components: rustfmt, clippy
- uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
- name: Run rust quality gate
run: ./scripts/ci/rust_quality_gate.sh
lint-strict-delta:
name: Lint Gate (Strict Delta)
needs: [changes] needs: [changes]
if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full')) if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full'))
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: blacksmith-2vcpu-ubuntu-2404
@ -71,8 +53,10 @@ jobs:
- uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
with: with:
toolchain: 1.92.0 toolchain: 1.92.0
components: clippy components: rustfmt, clippy
- uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3 - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
- name: Run rust quality gate
run: ./scripts/ci/rust_quality_gate.sh
- name: Run strict lint delta gate - name: Run strict lint delta gate
env: env:
BASE_SHA: ${{ needs.changes.outputs.base_sha }} BASE_SHA: ${{ needs.changes.outputs.base_sha }}
@ -80,8 +64,8 @@ jobs:
test: test:
name: Test name: Test
needs: [changes, lint, lint-strict-delta] needs: [changes, lint]
if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full')) && needs.lint.result == 'success' && needs.lint-strict-delta.result == 'success' if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full')) && needs.lint.result == 'success'
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: blacksmith-2vcpu-ubuntu-2404
timeout-minutes: 30 timeout-minutes: 30
steps: steps:
@ -106,8 +90,8 @@ jobs:
with: with:
toolchain: 1.92.0 toolchain: 1.92.0
- uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3 - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
- name: Build release binary - name: Build binary (smoke check)
run: cargo build --release --locked --verbose run: cargo build --locked --verbose
docs-only: docs-only:
name: Docs-Only Fast Path name: Docs-Only Fast Path
@ -185,7 +169,7 @@ jobs:
lint-feedback: lint-feedback:
name: Lint Feedback name: Lint Feedback
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
needs: [changes, lint, lint-strict-delta, docs-quality] needs: [changes, lint, docs-quality]
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: blacksmith-2vcpu-ubuntu-2404
permissions: permissions:
contents: read contents: read
@ -201,7 +185,7 @@ jobs:
RUST_CHANGED: ${{ needs.changes.outputs.rust_changed }} RUST_CHANGED: ${{ needs.changes.outputs.rust_changed }}
DOCS_CHANGED: ${{ needs.changes.outputs.docs_changed }} DOCS_CHANGED: ${{ needs.changes.outputs.docs_changed }}
LINT_RESULT: ${{ needs.lint.result }} LINT_RESULT: ${{ needs.lint.result }}
LINT_DELTA_RESULT: ${{ needs.lint-strict-delta.result }} LINT_DELTA_RESULT: ${{ needs.lint.result }}
DOCS_RESULT: ${{ needs.docs-quality.result }} DOCS_RESULT: ${{ needs.docs-quality.result }}
with: with:
script: | script: |
@ -231,7 +215,7 @@ jobs:
ci-required: ci-required:
name: CI Required Gate name: CI Required Gate
if: always() if: always()
needs: [changes, lint, lint-strict-delta, test, build, docs-only, non-rust, docs-quality, lint-feedback, workflow-owner-approval] needs: [changes, lint, test, build, docs-only, non-rust, docs-quality, lint-feedback, workflow-owner-approval]
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: blacksmith-2vcpu-ubuntu-2404
steps: steps:
- name: Enforce required status - name: Enforce required status
@ -276,7 +260,7 @@ jobs:
fi fi
lint_result="${{ needs.lint.result }}" lint_result="${{ needs.lint.result }}"
lint_strict_delta_result="${{ needs.lint-strict-delta.result }}" lint_strict_delta_result="${{ needs.lint.result }}"
test_result="${{ needs.test.result }}" test_result="${{ needs.test.result }}"
build_result="${{ needs.build.result }}" build_result="${{ needs.build.result }}"

View file

@ -1,12 +1,6 @@
name: Feature Matrix name: Feature Matrix
on: on:
push:
branches: [main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
schedule: schedule:
- cron: "30 4 * * 1" # Weekly Monday 4:30am UTC - cron: "30 4 * * 1" # Weekly Monday 4:30am UTC
workflow_dispatch: workflow_dispatch:
@ -61,6 +55,3 @@ jobs:
- name: Check feature combination - name: Check feature combination
run: cargo check --locked ${{ matrix.args }} run: cargo check --locked ${{ matrix.args }}
- name: Test feature combination
run: cargo test --locked ${{ matrix.args }}

View file

@ -15,16 +15,7 @@ jobs:
(github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'labeled' || github.event.action == 'unlabeled')) || (github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'labeled' || github.event.action == 'unlabeled')) ||
(github.event_name == 'pull_request_target' && (github.event_name == 'pull_request_target' &&
(github.event.action == 'labeled' || github.event.action == 'unlabeled')) (github.event.action == 'labeled' || github.event.action == 'unlabeled'))
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
permissions:
contents: read
issues: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Apply contributor tier label for issue author
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
env: env:
LABEL_POLICY_PATH: .github/label-policy.json LABEL_POLICY_PATH: .github/label-policy.json
@ -34,7 +25,7 @@ jobs:
await script({ github, context, core }); await script({ github, context, core });
first-interaction: first-interaction:
if: github.event.action == 'opened' if: github.event.action == 'opened'
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
permissions: permissions:
issues: write issues: write
pull-requests: write pull-requests: write
@ -65,7 +56,7 @@ jobs:
labeled-routes: labeled-routes:
if: github.event.action == 'labeled' if: github.event.action == 'labeled'
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
permissions: permissions:
contents: read contents: read
issues: write issues: write

View file

@ -12,9 +12,7 @@ jobs:
permissions: permissions:
issues: write issues: write
pull-requests: write pull-requests: write
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
steps:
- name: Mark stale issues and pull requests
uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0 uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}

View file

@ -2,7 +2,7 @@ name: PR Check Status
on: on:
schedule: schedule:
- cron: "15 */12 * * *" - cron: "15 8 * * *" # Once daily at 8:15am UTC
workflow_dispatch: workflow_dispatch:
permissions: {} permissions: {}
@ -13,12 +13,7 @@ concurrency:
jobs: jobs:
nudge-stale-prs: nudge-stale-prs:
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
issues: write
env:
STALE_HOURS: "48" STALE_HOURS: "48"
steps: steps:
- name: Checkout repository - name: Checkout repository

View file

@ -16,13 +16,7 @@ permissions:
jobs: jobs:
intake: intake:
name: Intake Checks name: Intake Checks
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Run safe PR intake checks
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
with: with:
script: | script: |

View file

@ -25,8 +25,7 @@ permissions:
jobs: jobs:
label: label:
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
timeout-minutes: 10
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

View file

@ -21,13 +21,8 @@ on:
paths: paths:
- "Dockerfile" - "Dockerfile"
- ".dockerignore" - ".dockerignore"
- "Cargo.toml" - "docker-compose.yml"
- "Cargo.lock"
- "rust-toolchain.toml" - "rust-toolchain.toml"
- "src/**"
- "crates/**"
- "benches/**"
- "firmware/**"
- "dev/config.template.toml" - "dev/config.template.toml"
- ".github/workflows/pub-docker-img.yml" - ".github/workflows/pub-docker-img.yml"
workflow_dispatch: workflow_dispatch:

View file

@ -3,8 +3,20 @@ name: Sec Audit
on: on:
push: push:
branches: [main] branches: [main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "deny.toml"
pull_request: pull_request:
branches: [main] branches: [main]
paths:
- "Cargo.toml"
- "Cargo.lock"
- "src/**"
- "crates/**"
- "deny.toml"
schedule: schedule:
- cron: "0 6 * * 1" # Weekly on Monday 6am UTC - cron: "0 6 * * 1" # Weekly on Monday 6am UTC

View file

@ -2,7 +2,7 @@ name: Sec CodeQL
on: on:
schedule: schedule:
- cron: "0 6,18 * * *" # Twice daily at 6am and 6pm UTC - cron: "0 6 * * 1" # Weekly Monday 6am UTC
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:

View file

@ -17,7 +17,7 @@ permissions:
jobs: jobs:
update-notice: update-notice:
name: Update NOTICE with new contributors name: Update NOTICE with new contributors
runs-on: blacksmith-2vcpu-ubuntu-2404 runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

View file

@ -1,8 +1,8 @@
name: Test Benchmarks name: Test Benchmarks
on: on:
push: schedule:
branches: [main] - cron: "0 3 * * 1" # Weekly Monday 3am UTC
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:
@ -39,7 +39,7 @@ jobs:
path: | path: |
target/criterion/ target/criterion/
benchmark_output.txt benchmark_output.txt
retention-days: 30 retention-days: 7
- name: Post benchmark summary on PR - name: Post benchmark summary on PR
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'

View file

@ -0,0 +1,295 @@
# CI Cost Optimization — February 2026
> **Date:** 2026-02-18
> **Status:** Implemented
> **Impact:** ~60-65% reduction in estimated monthly GitHub Actions billable minutes
---
## Executive Summary
On February 17, 2026, the ZeroClaw repository consumed **400+ workflow runs** in a single day, totaling an estimated **398 billable minutes** (~6.6 hours). At this rate, monthly costs were projected at **~200 hours/month** (~12,000 billable minutes). This document describes the analysis performed, optimizations implemented, and the revised CI/CD architecture.
---
## Analysis Methodology
A Python script (`scripts/ci/fetch_actions_data.py`) was created to programmatically fetch and analyze all GitHub Actions workflow runs from the GitHub API for February 17, 2026. The script:
1. Fetched all completed workflow runs for the date via the GitHub REST API
2. Grouped runs by workflow name
3. Sampled job-level timing (up to 3 runs per workflow) to compute per-job durations
4. Extrapolated to estimate total billable minutes per workflow
### Raw Data Summary (February 17, 2026)
| Rank | Workflow | Runs/Day | Est. Minutes/Day | Primary Trigger |
|------|----------|----------|-------------------|-----------------|
| 1 | Rust Package Security Audit | 57 | 102 | Every PR + push |
| 2 | CI Run | 57 | 70 | Every PR + push |
| 3 | Performance Benchmarks | 15 | 63 | Every push to main |
| 4 | Docker | 20 | 63 | PR + push |
| 5 | PR Labeler | 69 | 20 | Every PR event |
| 6 | Feature Matrix | 3 | 19 | Push to main |
| 7 | Integration / E2E Tests | 15 | 17 | Every push to main |
| 8 | Workflow Sanity | 31 | 16 | Push + PR |
| 9 | Copilot Code Review | 6 | 14 | Dynamic |
| 10 | PR Intake Checks | 70 | 7 | Every PR event |
| 11 | PR Auto Responder | 47 | 4 | PR + issues |
| | **Total** | **400+** | **~398** | |
### Key Findings
- **15 pushes to main in ~2 hours** on Feb 17, each triggering 6-8 parallel workflows
- **Security Audit** was the single largest cost driver (102 min/day) with no path filtering
- **PR Auto Responder** had an **81% failure rate** (38/47 runs failing) — wasting runner time
- **CodeQL** runs twice daily (not captured in Feb 17 data since it's schedule-only) — adding ~3.5h/week
- **Benchmarks** ran on every push to main (15x in one day) despite being regression-focused
- **Dependabot** could generate up to 11 PRs/week, each triggering the full CI cascade
---
## Changes Implemented
### 1. Security Audit — Path Filters Added
**File:** `.github/workflows/sec-audit.yml`
**Before:** Ran on every PR and every push to main, regardless of what files changed.
**After:** Only runs when dependency or source files change:
- `Cargo.toml`, `Cargo.lock`, `src/**`, `crates/**`, `deny.toml`
**Weekly schedule retained** as a safety net for advisory database updates.
**Estimated savings:** ~60-70% of security audit runs eliminated (~30-35 hours/month)
### 2. Performance Benchmarks — Moved to Weekly Schedule
**File:** `.github/workflows/test-benchmarks.yml`
**Before:** Ran on every push to main (15x/day on Feb 17).
**After:** Runs weekly (Monday 3am UTC) + on-demand via `workflow_dispatch`.
**Artifact retention** reduced from 30 days to 7 days to lower storage costs.
**Rationale:** Benchmark regressions don't need per-commit detection. Weekly cadence catches regressions within one development cycle.
**Estimated savings:** ~90% reduction (~28 hours/month)
### 3. Docker PR Smoke Builds — Tightened Path Filters
**File:** `.github/workflows/pub-docker-img.yml`
**Before:** PR smoke builds triggered on any change to `src/**`, `crates/**`, `benches/**`, `firmware/**`, etc.
**After:** PR smoke builds only trigger on Docker-specific files:
- `Dockerfile`, `.dockerignore`, `docker-compose.yml`, `rust-toolchain.toml`, `dev/config.template.toml`, `.github/workflows/pub-docker-img.yml`
**Push-to-main triggers unchanged** — production Docker images still rebuild on source changes.
**Estimated savings:** ~40-50% fewer Docker smoke builds (~12-15 hours/month)
### 4. CodeQL — Reduced from Twice-Daily to Weekly
**File:** `.github/workflows/sec-codeql.yml`
**Before:** Ran twice daily at 6am and 6pm UTC (14 runs/week), each performing a full `cargo build --workspace --all-targets`.
**After:** Runs weekly (Monday 6am UTC) + on-demand.
**Rationale:** CodeQL for Rust is still maturing. Weekly scans are standard practice for security-focused projects. On-demand dispatch available for urgent scans.
**Estimated savings:** ~12 hours/month
### 5. CI Run — Merged Lint Jobs + Dropped `--release` Build
**File:** `.github/workflows/ci-run.yml`
**Changes:**
1. **Merged `lint` and `lint-strict-delta` into a single job** — Previously these were two separate parallel jobs, each requiring a full runner spin-up, Rust toolchain install, and cache restore. Now they run sequentially in one job.
2. **Dropped `--release` flag from smoke build**`cargo build --release` is 2-3x slower than debug due to optimizations. For a smoke check validating compilation, debug mode is equivalent.
**Estimated savings:** ~1 runner job per CI invocation + faster build times
### 6. Feature Matrix — Weekly-Only + Check-Only
**File:** `.github/workflows/feature-matrix.yml`
**Before:** Ran on every push to main touching `src/**` (3x on Feb 17) with 4 matrix entries, each running both `cargo check` AND `cargo test`.
**After:**
1. **Removed push trigger** — Now weekly-only (Monday 4:30am UTC) + on-demand
2. **Removed `cargo test`** — Only runs `cargo check --locked` per feature combination. Tests are already covered by the main CI Run workflow.
**Estimated savings:** ~50-75% of feature matrix compute eliminated
### 7. Lightweight Jobs Moved to `ubuntu-latest`
**Files affected:**
- `.github/workflows/pr-check-stale.yml`
- `.github/workflows/pr-check-status.yml`
- `.github/workflows/pr-auto-response.yml`
- `.github/workflows/pr-intake-checks.yml`
- `.github/workflows/pr-labeler.yml`
- `.github/workflows/sync-contributors.yml`
**Before:** All jobs used `blacksmith-2vcpu-ubuntu-2404` runners, even for lightweight API-only operations (labeling, stale checks, greetings).
**After:** Moved to `ubuntu-latest` (GitHub-hosted runners). These jobs only make API calls and run JavaScript scripts — they don't need Rust toolchains or specialized runners.
**Additional change:** `pr-check-status.yml` schedule reduced from every 12 hours to once daily (8:15am UTC).
### 8. Dependabot — Reduced Frequency and PR Limits
**File:** `.github/dependabot.yml`
**Before:**
- Cargo: weekly, 5 open PRs max
- GitHub Actions: weekly, 3 open PRs max
- Docker: weekly, 3 open PRs max
- Total: up to 11 Dependabot PRs/week, each triggering full CI
**After:**
- Cargo: **monthly**, 3 open PRs max, all deps grouped into single PR
- GitHub Actions: **monthly**, 1 open PR max, all grouped
- Docker: **monthly**, 1 open PR max, all grouped
- Total: up to 5 Dependabot PRs/month
**Rationale:** Each Dependabot PR triggers the full CI pipeline. Reducing from weekly to monthly and grouping updates into fewer PRs dramatically reduces CI cascade costs while still keeping dependencies current.
---
## Known Issues to Investigate
### PR Auto Responder — 81% Failure Rate
The `pr-auto-response.yml` workflow had 38 failures out of 47 runs on Feb 17. The `contributor-tier-issues` job fires on every issue `labeled`/`unlabeled` event, even when the label is not contributor-tier related. While the JavaScript handler exits early for non-tier labels, the runner still spins up and checks out the repository.
**Recommendations for further investigation:**
1. Add more specific event filtering at the workflow level to reduce unnecessary runs
2. Check if the failures are related to GitHub API rate limiting on the search endpoint
3. Consider whether `continue-on-error: true` should be added to non-critical jobs
---
## Revised Workflow Architecture
### Workflow Frequency Overview
| Workflow | Trigger | Runner |
|----------|---------|--------|
| **CI Run** | Push to main + PR | Blacksmith |
| **Sec Audit** | Push/PR (path-filtered) + weekly schedule | Blacksmith |
| **Sec CodeQL** | Weekly schedule | Blacksmith |
| **Test E2E** | Push to main | Blacksmith |
| **Test Benchmarks** | Weekly schedule | Blacksmith |
| **Test Fuzz** | Weekly schedule | Blacksmith |
| **Feature Matrix** | Weekly schedule | Blacksmith |
| **Docker Publish** | Push to main (broad paths) + PR (Docker-only paths) | Blacksmith |
| **Release** | Tag push only | GitHub-hosted |
| **Workflow Sanity** | Push/PR (workflow paths only) | Blacksmith |
| **Label Policy** | Push/PR (policy paths only) | Blacksmith |
| **PR Labeler** | PR events | **ubuntu-latest** |
| **PR Intake Checks** | PR events | **ubuntu-latest** |
| **PR Auto Responder** | PR + issue events | **ubuntu-latest** |
| **PR Check Stale** | Daily schedule | **ubuntu-latest** |
| **PR Check Status** | Daily schedule | **ubuntu-latest** |
| **Sync Contributors** | Weekly schedule | **ubuntu-latest** |
### Weekly Schedule Summary
| Day | Time (UTC) | Workflow |
|-----|-----------|----------|
| Monday | 03:00 | Test Benchmarks |
| Monday | 04:30 | Feature Matrix |
| Monday | 06:00 | Sec Audit (schedule) |
| Monday | 06:00 | Sec CodeQL |
| Sunday | 00:00 | Sync Contributors |
| Sunday | 02:00 | Test Fuzz |
| Daily | 02:20 | PR Check Stale |
| Daily | 08:15 | PR Check Status |
### CI Run Job Dependency Graph
```
changes ──┬── lint (Format + Clippy + Strict Delta)
│ └── test
├── build (Smoke, debug mode)
├── docs-only (fast path)
├── non-rust (fast path)
├── docs-quality
└── workflow-owner-approval
All above ──── ci-required (final gate)
```
### Push-to-Main Trigger Cascade
When code is pushed to `main`, the following workflows trigger:
1. **CI Run** — Always (change-detection gates individual jobs)
2. **Sec Audit** — Only if `Cargo.toml`, `Cargo.lock`, `src/**`, `crates/**`, or `deny.toml` changed
3. **Test E2E** — Always
4. **Docker Publish** — Only if broad source paths changed
5. **Workflow Sanity** — Only if workflow files changed
**No longer triggered on push:**
- ~~Performance Benchmarks~~ → Weekly only
- ~~Feature Matrix~~ → Weekly only
---
## Estimated Impact
| Metric | Before | After | Savings |
|--------|--------|-------|---------|
| Daily workflow runs | 400+ | ~150-180 | ~55-60% |
| Daily billable minutes | ~400 min | ~120-150 min | ~60-65% |
| Monthly billable hours | ~200 hours | ~60-75 hours | ~60-65% |
| Dependabot PRs/month | ~44 | ~5 | ~89% |
| CodeQL runs/week | 14 | 1 | ~93% |
| Benchmark runs/day | ~15 | 0 (weekly: ~1) | ~99% |
---
## Rollback Strategy
Each change is isolated to a single workflow file. To rollback any specific optimization:
1. **Revert the specific file** using `git checkout <commit>^ -- <file-path>`
2. Changes are backward-compatible — no downstream code or configuration depends on the CI schedule/trigger changes
3. All workflows retain `workflow_dispatch` triggers for manual invocation when needed
---
## Validation Checklist
- [ ] Verify CI Run workflow passes on next PR with Rust changes
- [ ] Verify Security Audit skips docs-only PRs
- [ ] Verify Docker smoke build only triggers on Dockerfile changes in PRs
- [ ] Verify weekly schedules fire correctly (check after first Monday)
- [ ] Monitor PR Auto Responder failure rate after switching to `ubuntu-latest`
- [ ] Verify Dependabot respects new monthly schedule and limits
---
## Files Modified
| File | Change Summary |
|------|---------------|
| `.github/workflows/sec-audit.yml` | Added path filters for push and PR triggers |
| `.github/workflows/test-benchmarks.yml` | Changed to weekly schedule; reduced artifact retention to 7 days |
| `.github/workflows/pub-docker-img.yml` | Tightened PR path filters to Docker-specific files |
| `.github/workflows/sec-codeql.yml` | Changed from twice-daily to weekly schedule |
| `.github/workflows/ci-run.yml` | Merged lint jobs; dropped `--release` from smoke build |
| `.github/workflows/feature-matrix.yml` | Removed push trigger; removed `cargo test` step |
| `.github/workflows/pr-check-stale.yml` | Switched to `ubuntu-latest` |
| `.github/workflows/pr-check-status.yml` | Switched to `ubuntu-latest`; reduced to daily schedule |
| `.github/workflows/pr-auto-response.yml` | Switched all jobs to `ubuntu-latest` |
| `.github/workflows/pr-intake-checks.yml` | Switched to `ubuntu-latest` |
| `.github/workflows/pr-labeler.yml` | Switched to `ubuntu-latest` |
| `.github/workflows/sync-contributors.yml` | Switched to `ubuntu-latest` |
| `.github/dependabot.yml` | Changed to monthly schedule; reduced PR limits; grouped all deps |
| `scripts/ci/fetch_actions_data.py` | New: cost analysis script for GitHub Actions runs |

View file

@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""Fetch GitHub Actions workflow runs for a given date and summarize costs."""
import json
import subprocess
import sys
from datetime import datetime, timezone
def fetch_runs(repo, date_str, page=1, per_page=100):
"""Fetch completed workflow runs for a given date."""
url = (
f"https://api.github.com/repos/{repo}/actions/runs"
f"?created={date_str}&per_page={per_page}&page={page}"
)
result = subprocess.run(
["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
capture_output=True, text=True
)
return json.loads(result.stdout)
def fetch_jobs(repo, run_id):
"""Fetch jobs for a specific run."""
url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/jobs?per_page=100"
result = subprocess.run(
["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
capture_output=True, text=True
)
return json.loads(result.stdout)
def parse_duration(started, completed):
"""Return duration in seconds between two ISO timestamps."""
if not started or not completed:
return 0
try:
s = datetime.fromisoformat(started.replace("Z", "+00:00"))
c = datetime.fromisoformat(completed.replace("Z", "+00:00"))
return max(0, (c - s).total_seconds())
except Exception:
return 0
def main():
repo = "zeroclaw-labs/zeroclaw"
date_str = "2026-02-17"
print(f"Fetching workflow runs for {repo} on {date_str}...")
print("=" * 100)
all_runs = []
for page in range(1, 5): # up to 400 runs
data = fetch_runs(repo, date_str, page=page)
runs = data.get("workflow_runs", [])
if not runs:
break
all_runs.extend(runs)
if len(runs) < 100:
break
print(f"Total workflow runs found: {len(all_runs)}")
print()
# Group by workflow name
workflow_stats = {}
for run in all_runs:
name = run.get("name", "Unknown")
event = run.get("event", "unknown")
conclusion = run.get("conclusion", "unknown")
run_id = run.get("id")
if name not in workflow_stats:
workflow_stats[name] = {
"count": 0,
"events": {},
"conclusions": {},
"total_job_seconds": 0,
"total_jobs": 0,
"run_ids": [],
}
workflow_stats[name]["count"] += 1
workflow_stats[name]["events"][event] = workflow_stats[name]["events"].get(event, 0) + 1
workflow_stats[name]["conclusions"][conclusion] = workflow_stats[name]["conclusions"].get(conclusion, 0) + 1
workflow_stats[name]["run_ids"].append(run_id)
# For each workflow, sample up to 3 runs to get job-level timing
print("Sampling job-level timing (up to 3 runs per workflow)...")
print()
for name, stats in workflow_stats.items():
sample_ids = stats["run_ids"][:3]
for run_id in sample_ids:
jobs_data = fetch_jobs(repo, run_id)
jobs = jobs_data.get("jobs", [])
for job in jobs:
started = job.get("started_at")
completed = job.get("completed_at")
duration = parse_duration(started, completed)
stats["total_job_seconds"] += duration
stats["total_jobs"] += 1
# Extrapolate: if we sampled N runs but there are M total, scale up
sampled = len(sample_ids)
total = stats["count"]
if sampled > 0 and sampled < total:
scale = total / sampled
stats["estimated_total_seconds"] = stats["total_job_seconds"] * scale
else:
stats["estimated_total_seconds"] = stats["total_job_seconds"]
# Print summary sorted by estimated cost (descending)
sorted_workflows = sorted(
workflow_stats.items(),
key=lambda x: x[1]["estimated_total_seconds"],
reverse=True
)
print("=" * 100)
print(f"{'Workflow':<40} {'Runs':>5} {'SampledJobs':>12} {'SampledMins':>12} {'Est.TotalMins':>14} {'Events'}")
print("-" * 100)
grand_total_minutes = 0
for name, stats in sorted_workflows:
sampled_mins = stats["total_job_seconds"] / 60
est_total_mins = stats["estimated_total_seconds"] / 60
grand_total_minutes += est_total_mins
events_str = ", ".join(f"{k}={v}" for k, v in stats["events"].items())
conclusions_str = ", ".join(f"{k}={v}" for k, v in stats["conclusions"].items())
print(
f"{name:<40} {stats['count']:>5} {stats['total_jobs']:>12} "
f"{sampled_mins:>12.1f} {est_total_mins:>14.1f} {events_str}"
)
print(f"{'':>40} {'':>5} {'':>12} {'':>12} {'':>14} outcomes: {conclusions_str}")
print("-" * 100)
print(f"{'GRAND TOTAL':>40} {len(all_runs):>5} {'':>12} {'':>12} {grand_total_minutes:>14.1f}")
print(f"\nEstimated total billable minutes on {date_str}: {grand_total_minutes:.0f} min ({grand_total_minutes/60:.1f} hours)")
print()
# Also show raw run list
print("\n" + "=" * 100)
print("DETAILED RUN LIST")
print("=" * 100)
for run in all_runs:
name = run.get("name", "Unknown")
event = run.get("event", "unknown")
conclusion = run.get("conclusion", "unknown")
run_id = run.get("id")
started = run.get("run_started_at", "?")
print(f" [{run_id}] {name:<40} conclusion={conclusion:<12} event={event:<20} started={started}")
if __name__ == "__main__":
main()