From 44725da08cd871cfec2ae98c7363066c972934a2 Mon Sep 17 00:00:00 2001
From: Alex Gorevski <algore@microsoft.com>
Date: Wed, 18 Feb 2026 11:26:09 -0800
Subject: [PATCH 1/4] perf(ci): reduce GitHub Actions costs ~60-65% across all
 workflows

Analysis of Feb 17 data showed 400+ workflow runs/day consuming ~398 billable minutes (~200 hours/month projected). Implemented targeted optimizations:

High-impact changes:

- sec-audit.yml: add path filters (Cargo.toml, src/**, crates/**, deny.toml); skip docs-only PRs

- test-benchmarks.yml: move from every-push-to-main to weekly schedule; retention 30d -> 7d

- pub-docker-img.yml: tighten PR smoke build path filters to Docker-specific files only

- sec-codeql.yml: reduce from twice-daily (14 runs/week) to weekly

Medium-impact changes:

- ci-run.yml: merge lint + lint-strict-delta into single job; drop --release from smoke build

- feature-matrix.yml: remove push trigger (weekly-only); remove redundant cargo test step

- dependabot.yml: monthly instead of weekly; reduce PR limits from 11 to 5/month; group all deps

Runner cost savings:

- Switch 6 lightweight API-only workflows to ubuntu-latest (PR Labeler, Intake, Auto Responder, Check Stale, Check Status, Sync Contributors)

- pr-check-status.yml: reduce from every 12h to daily

New files:

- docs/ci-cost-optimization.md: comprehensive analysis and revised architecture documentation

- scripts/ci/fetch_actions_data.py: reusable GitHub Actions cost analysis script

Estimated impact: daily billable minutes ~400 -> ~120-150 (60-65%% reduction), monthly hours ~200 -> ~60-75, Dependabot PRs ~44/month -> ~5 (89%% reduction)
---
 .github/dependabot.yml                  |  29 ++-
 .github/workflows/ci-run.yml            |  40 +---
 .github/workflows/feature-matrix.yml    |   9 -
 .github/workflows/pr-auto-response.yml  |  15 +-
 .github/workflows/pr-check-stale.yml    |   4 +-
 .github/workflows/pr-check-status.yml   |   9 +-
 .github/workflows/pr-intake-checks.yml  |   8 +-
 .github/workflows/pr-labeler.yml        |   3 +-
 .github/workflows/pub-docker-img.yml    |   7 +-
 .github/workflows/sec-audit.yml         |  12 +
 .github/workflows/sec-codeql.yml        |   2 +-
 .github/workflows/sync-contributors.yml |   2 +-
 .github/workflows/test-benchmarks.yml   |   6 +-
 docs/ci-cost-optimization.md            | 295 ++++++++++++++++++++++++
 scripts/ci/fetch_actions_data.py        | 156 +++++++++++++
 15 files changed, 512 insertions(+), 85 deletions(-)
 create mode 100644 docs/ci-cost-optimization.md
 create mode 100644 scripts/ci/fetch_actions_data.py

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 2f88c8e..b44e111 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -4,13 +4,13 @@ updates:
   - package-ecosystem: cargo
     directory: "/"
     schedule:
-      interval: weekly
+      interval: monthly
     target-branch: main
-    open-pull-requests-limit: 5
+    open-pull-requests-limit: 3
     labels:
       - "dependencies"
     groups:
-      rust-minor-patch:
+      rust-all:
         patterns:
           - "*"
         update-types:
@@ -20,14 +20,31 @@ updates:
   - package-ecosystem: github-actions
     directory: "/"
     schedule:
-      interval: weekly
+      interval: monthly
     target-branch: main
-    open-pull-requests-limit: 3
+    open-pull-requests-limit: 1
     labels:
       - "ci"
       - "dependencies"
     groups:
-      actions-minor-patch:
+      actions-all:
+        patterns:
+          - "*"
+        update-types:
+          - minor
+          - patch
+
+  - package-ecosystem: docker
+    directory: "/"
+    schedule:
+      interval: monthly
+    target-branch: main
+    open-pull-requests-limit: 1
+    labels:
+      - "ci"
+      - "dependencies"
+    groups:
+      docker-all:
         patterns:
           - "*"
         update-types:
diff --git a/.github/workflows/ci-run.yml b/.github/workflows/ci-run.yml
index 373b879..dea6208 100644
--- a/.github/workflows/ci-run.yml
+++ b/.github/workflows/ci-run.yml
@@ -41,25 +41,7 @@ jobs:
               run: ./scripts/ci/detect_change_scope.sh
 
     lint:
-        name: Lint Gate (Format + Clippy)
-        needs: [changes]
-        if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full'))
-        runs-on: blacksmith-2vcpu-ubuntu-2404
-        timeout-minutes: 20
-        steps:
-            - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-              with:
-                  fetch-depth: 0
-            - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
-              with:
-                  toolchain: 1.92.0
-                  components: rustfmt, clippy
-            - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
-            - name: Run rust quality gate
-              run: ./scripts/ci/rust_quality_gate.sh
-
-    lint-strict-delta:
-        name: Lint Gate (Strict Delta)
+        name: Lint Gate (Format + Clippy + Strict Delta)
         needs: [changes]
         if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full'))
         runs-on: blacksmith-2vcpu-ubuntu-2404
@@ -71,8 +53,10 @@ jobs:
             - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7 # stable
               with:
                   toolchain: 1.92.0
-                  components: clippy
+                  components: rustfmt, clippy
             - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
+            - name: Run rust quality gate
+              run: ./scripts/ci/rust_quality_gate.sh
             - name: Run strict lint delta gate
               env:
                   BASE_SHA: ${{ needs.changes.outputs.base_sha }}
@@ -80,8 +64,8 @@ jobs:
 
     test:
         name: Test
-        needs: [changes, lint, lint-strict-delta]
-        if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full')) && needs.lint.result == 'success' && needs.lint-strict-delta.result == 'success'
+        needs: [changes, lint]
+        if: needs.changes.outputs.rust_changed == 'true' && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci:full')) && needs.lint.result == 'success'
         runs-on: blacksmith-2vcpu-ubuntu-2404
         timeout-minutes: 30
         steps:
@@ -106,8 +90,8 @@ jobs:
               with:
                   toolchain: 1.92.0
             - uses: useblacksmith/rust-cache@f53e7f127245d2a269b3d90879ccf259876842d5 # v3
-            - name: Build release binary
-              run: cargo build --release --locked --verbose
+            - name: Build binary (smoke check)
+              run: cargo build --locked --verbose
 
     docs-only:
         name: Docs-Only Fast Path
@@ -185,7 +169,7 @@ jobs:
     lint-feedback:
         name: Lint Feedback
         if: github.event_name == 'pull_request'
-        needs: [changes, lint, lint-strict-delta, docs-quality]
+        needs: [changes, lint, docs-quality]
         runs-on: blacksmith-2vcpu-ubuntu-2404
         permissions:
             contents: read
@@ -201,7 +185,7 @@ jobs:
                   RUST_CHANGED: ${{ needs.changes.outputs.rust_changed }}
                   DOCS_CHANGED: ${{ needs.changes.outputs.docs_changed }}
                   LINT_RESULT: ${{ needs.lint.result }}
-                  LINT_DELTA_RESULT: ${{ needs.lint-strict-delta.result }}
+                  LINT_DELTA_RESULT: ${{ needs.lint.result }}
                   DOCS_RESULT: ${{ needs.docs-quality.result }}
               with:
                   script: |
@@ -231,7 +215,7 @@ jobs:
     ci-required:
         name: CI Required Gate
         if: always()
-        needs: [changes, lint, lint-strict-delta, test, build, docs-only, non-rust, docs-quality, lint-feedback, workflow-owner-approval]
+        needs: [changes, lint, test, build, docs-only, non-rust, docs-quality, lint-feedback, workflow-owner-approval]
         runs-on: blacksmith-2vcpu-ubuntu-2404
         steps:
             - name: Enforce required status
@@ -276,7 +260,7 @@ jobs:
                   fi
 
                   lint_result="${{ needs.lint.result }}"
-                  lint_strict_delta_result="${{ needs.lint-strict-delta.result }}"
+                  lint_strict_delta_result="${{ needs.lint.result }}"
                   test_result="${{ needs.test.result }}"
                   build_result="${{ needs.build.result }}"
 
diff --git a/.github/workflows/feature-matrix.yml b/.github/workflows/feature-matrix.yml
index 875b0c5..18953e1 100644
--- a/.github/workflows/feature-matrix.yml
+++ b/.github/workflows/feature-matrix.yml
@@ -1,12 +1,6 @@
 name: Feature Matrix
 
 on:
-    push:
-        branches: [main]
-        paths:
-            - "Cargo.toml"
-            - "Cargo.lock"
-            - "src/**"
     schedule:
         - cron: "30 4 * * 1" # Weekly Monday 4:30am UTC
     workflow_dispatch:
@@ -61,6 +55,3 @@ jobs:
 
             - name: Check feature combination
               run: cargo check --locked ${{ matrix.args }}
-
-            - name: Test feature combination
-              run: cargo test --locked ${{ matrix.args }}
diff --git a/.github/workflows/pr-auto-response.yml b/.github/workflows/pr-auto-response.yml
index ee6e100..d883a81 100644
--- a/.github/workflows/pr-auto-response.yml
+++ b/.github/workflows/pr-auto-response.yml
@@ -15,16 +15,7 @@ jobs:
       (github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'labeled' || github.event.action == 'unlabeled')) ||
       (github.event_name == 'pull_request_target' &&
       (github.event.action == 'labeled' || github.event.action == 'unlabeled'))
-    runs-on: blacksmith-2vcpu-ubuntu-2404
-    permissions:
-      contents: read
-      issues: write
-      pull-requests: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-
-      - name: Apply contributor tier label for issue author
+    runs-on: ubuntu-latest
         uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
         env:
           LABEL_POLICY_PATH: .github/label-policy.json
@@ -34,7 +25,7 @@ jobs:
             await script({ github, context, core });
   first-interaction:
     if: github.event.action == 'opened'
-    runs-on: blacksmith-2vcpu-ubuntu-2404
+    runs-on: ubuntu-latest
     permissions:
       issues: write
       pull-requests: write
@@ -65,7 +56,7 @@ jobs:
 
   labeled-routes:
     if: github.event.action == 'labeled'
-    runs-on: blacksmith-2vcpu-ubuntu-2404
+    runs-on: ubuntu-latest
     permissions:
       contents: read
       issues: write
diff --git a/.github/workflows/pr-check-stale.yml b/.github/workflows/pr-check-stale.yml
index 0120547..6048349 100644
--- a/.github/workflows/pr-check-stale.yml
+++ b/.github/workflows/pr-check-stale.yml
@@ -12,9 +12,7 @@ jobs:
         permissions:
             issues: write
             pull-requests: write
-        runs-on: blacksmith-2vcpu-ubuntu-2404
-        steps:
-            - name: Mark stale issues and pull requests
+        runs-on: ubuntu-latest
               uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
               with:
                   repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/pr-check-status.yml b/.github/workflows/pr-check-status.yml
index 83684f9..c9a4b3b 100644
--- a/.github/workflows/pr-check-status.yml
+++ b/.github/workflows/pr-check-status.yml
@@ -2,7 +2,7 @@ name: PR Check Status
 
 on:
   schedule:
-    - cron: "15 */12 * * *"
+    - cron: "15 8 * * *" # Once daily at 8:15am UTC
   workflow_dispatch:
 
 permissions: {}
@@ -13,12 +13,7 @@ concurrency:
 
 jobs:
   nudge-stale-prs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404
-    permissions:
-      contents: read
-      pull-requests: write
-      issues: write
-    env:
+    runs-on: ubuntu-latest
       STALE_HOURS: "48"
     steps:
       - name: Checkout repository
diff --git a/.github/workflows/pr-intake-checks.yml b/.github/workflows/pr-intake-checks.yml
index 0cacf88..6997300 100644
--- a/.github/workflows/pr-intake-checks.yml
+++ b/.github/workflows/pr-intake-checks.yml
@@ -16,13 +16,7 @@ permissions:
 jobs:
     intake:
         name: Intake Checks
-        runs-on: blacksmith-2vcpu-ubuntu-2404
-        timeout-minutes: 10
-        steps:
-            - name: Checkout repository
-              uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-
-            - name: Run safe PR intake checks
+        runs-on: ubuntu-latest
               uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
               with:
                   script: |
diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml
index 8349352..38cf054 100644
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -25,8 +25,7 @@ permissions:
 
 jobs:
     label:
-        runs-on: blacksmith-2vcpu-ubuntu-2404
-        timeout-minutes: 10
+        runs-on: ubuntu-latest
         steps:
             - name: Checkout repository
               uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
diff --git a/.github/workflows/pub-docker-img.yml b/.github/workflows/pub-docker-img.yml
index 15ea8aa..43b0900 100644
--- a/.github/workflows/pub-docker-img.yml
+++ b/.github/workflows/pub-docker-img.yml
@@ -21,13 +21,8 @@ on:
         paths:
             - "Dockerfile"
             - ".dockerignore"
-            - "Cargo.toml"
-            - "Cargo.lock"
+            - "docker-compose.yml"
             - "rust-toolchain.toml"
-            - "src/**"
-            - "crates/**"
-            - "benches/**"
-            - "firmware/**"
             - "dev/config.template.toml"
             - ".github/workflows/pub-docker-img.yml"
     workflow_dispatch:
diff --git a/.github/workflows/sec-audit.yml b/.github/workflows/sec-audit.yml
index 3667725..89b4a32 100644
--- a/.github/workflows/sec-audit.yml
+++ b/.github/workflows/sec-audit.yml
@@ -3,8 +3,20 @@ name: Sec Audit
 on:
     push:
         branches: [main]
+        paths:
+            - "Cargo.toml"
+            - "Cargo.lock"
+            - "src/**"
+            - "crates/**"
+            - "deny.toml"
     pull_request:
         branches: [main]
+        paths:
+            - "Cargo.toml"
+            - "Cargo.lock"
+            - "src/**"
+            - "crates/**"
+            - "deny.toml"
     schedule:
         - cron: "0 6 * * 1" # Weekly on Monday 6am UTC
 
diff --git a/.github/workflows/sec-codeql.yml b/.github/workflows/sec-codeql.yml
index f5c6c35..300e1ef 100644
--- a/.github/workflows/sec-codeql.yml
+++ b/.github/workflows/sec-codeql.yml
@@ -2,7 +2,7 @@ name: Sec CodeQL
 
 on:
     schedule:
-        - cron: "0 6,18 * * *" # Twice daily at 6am and 6pm UTC
+        - cron: "0 6 * * 1" # Weekly Monday 6am UTC
     workflow_dispatch:
 
 concurrency:
diff --git a/.github/workflows/sync-contributors.yml b/.github/workflows/sync-contributors.yml
index a5fb2ec..50c7955 100644
--- a/.github/workflows/sync-contributors.yml
+++ b/.github/workflows/sync-contributors.yml
@@ -17,7 +17,7 @@ permissions:
 jobs:
   update-notice:
     name: Update NOTICE with new contributors
-    runs-on: blacksmith-2vcpu-ubuntu-2404
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
diff --git a/.github/workflows/test-benchmarks.yml b/.github/workflows/test-benchmarks.yml
index 329f530..036904a 100644
--- a/.github/workflows/test-benchmarks.yml
+++ b/.github/workflows/test-benchmarks.yml
@@ -1,8 +1,8 @@
 name: Test Benchmarks
 
 on:
-    push:
-        branches: [main]
+    schedule:
+        - cron: "0 3 * * 1" # Weekly Monday 3am UTC
     workflow_dispatch:
 
 concurrency:
@@ -39,7 +39,7 @@ jobs:
                   path: |
                       target/criterion/
                       benchmark_output.txt
-                  retention-days: 30
+                  retention-days: 7
 
             - name: Post benchmark summary on PR
               if: github.event_name == 'pull_request'
diff --git a/docs/ci-cost-optimization.md b/docs/ci-cost-optimization.md
new file mode 100644
index 0000000..2485483
--- /dev/null
+++ b/docs/ci-cost-optimization.md
@@ -0,0 +1,295 @@
+# CI Cost Optimization — February 2026
+
+> **Date:** 2026-02-18
+> **Status:** Implemented
+> **Impact:** ~60-65% reduction in estimated monthly GitHub Actions billable minutes
+
+---
+
+## Executive Summary
+
+On February 17, 2026, the ZeroClaw repository consumed **400+ workflow runs** in a single day, totaling an estimated **398 billable minutes** (~6.6 hours). At this rate, monthly costs were projected at **~200 hours/month** (~12,000 billable minutes). This document describes the analysis performed, optimizations implemented, and the revised CI/CD architecture.
+
+---
+
+## Analysis Methodology
+
+A Python script (`scripts/ci/fetch_actions_data.py`) was created to programmatically fetch and analyze all GitHub Actions workflow runs from the GitHub API for February 17, 2026. The script:
+
+1. Fetched all completed workflow runs for the date via the GitHub REST API
+2. Grouped runs by workflow name
+3. Sampled job-level timing (up to 3 runs per workflow) to compute per-job durations
+4. Extrapolated to estimate total billable minutes per workflow
+
+### Raw Data Summary (February 17, 2026)
+
+| Rank | Workflow | Runs/Day | Est. Minutes/Day | Primary Trigger |
+|------|----------|----------|-------------------|-----------------|
+| 1 | Rust Package Security Audit | 57 | 102 | Every PR + push |
+| 2 | CI Run | 57 | 70 | Every PR + push |
+| 3 | Performance Benchmarks | 15 | 63 | Every push to main |
+| 4 | Docker | 20 | 63 | PR + push |
+| 5 | PR Labeler | 69 | 20 | Every PR event |
+| 6 | Feature Matrix | 3 | 19 | Push to main |
+| 7 | Integration / E2E Tests | 15 | 17 | Every push to main |
+| 8 | Workflow Sanity | 31 | 16 | Push + PR |
+| 9 | Copilot Code Review | 6 | 14 | Dynamic |
+| 10 | PR Intake Checks | 70 | 7 | Every PR event |
+| 11 | PR Auto Responder | 47 | 4 | PR + issues |
+| | **Total** | **400+** | **~398** | |
+
+### Key Findings
+
+- **15 pushes to main in ~2 hours** on Feb 17, each triggering 6-8 parallel workflows
+- **Security Audit** was the single largest cost driver (102 min/day) with no path filtering
+- **PR Auto Responder** had an **81% failure rate** (38/47 runs failing) — wasting runner time
+- **CodeQL** runs twice daily (not captured in Feb 17 data since it's schedule-only) — adding ~3.5h/week
+- **Benchmarks** ran on every push to main (15x in one day) despite being regression-focused
+- **Dependabot** could generate up to 11 PRs/week, each triggering the full CI cascade
+
+---
+
+## Changes Implemented
+
+### 1. Security Audit — Path Filters Added
+
+**File:** `.github/workflows/sec-audit.yml`
+
+**Before:** Ran on every PR and every push to main, regardless of what files changed.
+
+**After:** Only runs when dependency or source files change:
+- `Cargo.toml`, `Cargo.lock`, `src/**`, `crates/**`, `deny.toml`
+
+**Weekly schedule retained** as a safety net for advisory database updates.
+
+**Estimated savings:** ~60-70% of security audit runs eliminated (~30-35 hours/month)
+
+### 2. Performance Benchmarks — Moved to Weekly Schedule
+
+**File:** `.github/workflows/test-benchmarks.yml`
+
+**Before:** Ran on every push to main (15x/day on Feb 17).
+
+**After:** Runs weekly (Monday 3am UTC) + on-demand via `workflow_dispatch`.
+
+**Artifact retention** reduced from 30 days to 7 days to lower storage costs.
+
+**Rationale:** Benchmark regressions don't need per-commit detection. Weekly cadence catches regressions within one development cycle.
+
+**Estimated savings:** ~90% reduction (~28 hours/month)
+
+### 3. Docker PR Smoke Builds — Tightened Path Filters
+
+**File:** `.github/workflows/pub-docker-img.yml`
+
+**Before:** PR smoke builds triggered on any change to `src/**`, `crates/**`, `benches/**`, `firmware/**`, etc.
+
+**After:** PR smoke builds only trigger on Docker-specific files:
+- `Dockerfile`, `.dockerignore`, `docker-compose.yml`, `rust-toolchain.toml`, `dev/config.template.toml`, `.github/workflows/pub-docker-img.yml`
+
+**Push-to-main triggers unchanged** — production Docker images still rebuild on source changes.
+
+**Estimated savings:** ~40-50% fewer Docker smoke builds (~12-15 hours/month)
+
+### 4. CodeQL — Reduced from Twice-Daily to Weekly
+
+**File:** `.github/workflows/sec-codeql.yml`
+
+**Before:** Ran twice daily at 6am and 6pm UTC (14 runs/week), each performing a full `cargo build --workspace --all-targets`.
+
+**After:** Runs weekly (Monday 6am UTC) + on-demand.
+
+**Rationale:** CodeQL for Rust is still maturing. Weekly scans are standard practice for security-focused projects. On-demand dispatch available for urgent scans.
+
+**Estimated savings:** ~12 hours/month
+
+### 5. CI Run — Merged Lint Jobs + Dropped `--release` Build
+
+**File:** `.github/workflows/ci-run.yml`
+
+**Changes:**
+1. **Merged `lint` and `lint-strict-delta` into a single job** — Previously these were two separate parallel jobs, each requiring a full runner spin-up, Rust toolchain install, and cache restore. Now they run sequentially in one job.
+2. **Dropped `--release` flag from smoke build** — `cargo build --release` is 2-3x slower than debug due to optimizations. For a smoke check validating compilation, debug mode is equivalent.
+
+**Estimated savings:** ~1 runner job per CI invocation + faster build times
+
+### 6. Feature Matrix — Weekly-Only + Check-Only
+
+**File:** `.github/workflows/feature-matrix.yml`
+
+**Before:** Ran on every push to main touching `src/**` (3x on Feb 17) with 4 matrix entries, each running both `cargo check` AND `cargo test`.
+
+**After:**
+1. **Removed push trigger** — Now weekly-only (Monday 4:30am UTC) + on-demand
+2. **Removed `cargo test`** — Only runs `cargo check --locked` per feature combination. Tests are already covered by the main CI Run workflow.
+
+**Estimated savings:** ~50-75% of feature matrix compute eliminated
+
+### 7. Lightweight Jobs Moved to `ubuntu-latest`
+
+**Files affected:**
+- `.github/workflows/pr-check-stale.yml`
+- `.github/workflows/pr-check-status.yml`
+- `.github/workflows/pr-auto-response.yml`
+- `.github/workflows/pr-intake-checks.yml`
+- `.github/workflows/pr-labeler.yml`
+- `.github/workflows/sync-contributors.yml`
+
+**Before:** All jobs used `blacksmith-2vcpu-ubuntu-2404` runners, even for lightweight API-only operations (labeling, stale checks, greetings).
+
+**After:** Moved to `ubuntu-latest` (GitHub-hosted runners). These jobs only make API calls and run JavaScript scripts — they don't need Rust toolchains or specialized runners.
+
+**Additional change:** `pr-check-status.yml` schedule reduced from every 12 hours to once daily (8:15am UTC).
+
+### 8. Dependabot — Reduced Frequency and PR Limits
+
+**File:** `.github/dependabot.yml`
+
+**Before:**
+- Cargo: weekly, 5 open PRs max
+- GitHub Actions: weekly, 3 open PRs max
+- Docker: weekly, 3 open PRs max
+- Total: up to 11 Dependabot PRs/week, each triggering full CI
+
+**After:**
+- Cargo: **monthly**, 3 open PRs max, all deps grouped into single PR
+- GitHub Actions: **monthly**, 1 open PR max, all grouped
+- Docker: **monthly**, 1 open PR max, all grouped
+- Total: up to 5 Dependabot PRs/month
+
+**Rationale:** Each Dependabot PR triggers the full CI pipeline. Reducing from weekly to monthly and grouping updates into fewer PRs dramatically reduces CI cascade costs while still keeping dependencies current.
+
+---
+
+## Known Issues to Investigate
+
+### PR Auto Responder — 81% Failure Rate
+
+The `pr-auto-response.yml` workflow had 38 failures out of 47 runs on Feb 17. The `contributor-tier-issues` job fires on every issue `labeled`/`unlabeled` event, even when the label is not contributor-tier related. While the JavaScript handler exits early for non-tier labels, the runner still spins up and checks out the repository.
+
+**Recommendations for further investigation:**
+1. Add more specific event filtering at the workflow level to reduce unnecessary runs
+2. Check if the failures are related to GitHub API rate limiting on the search endpoint
+3. Consider whether `continue-on-error: true` should be added to non-critical jobs
+
+---
+
+## Revised Workflow Architecture
+
+### Workflow Frequency Overview
+
+| Workflow | Trigger | Runner |
+|----------|---------|--------|
+| **CI Run** | Push to main + PR | Blacksmith |
+| **Sec Audit** | Push/PR (path-filtered) + weekly schedule | Blacksmith |
+| **Sec CodeQL** | Weekly schedule | Blacksmith |
+| **Test E2E** | Push to main | Blacksmith |
+| **Test Benchmarks** | Weekly schedule | Blacksmith |
+| **Test Fuzz** | Weekly schedule | Blacksmith |
+| **Feature Matrix** | Weekly schedule | Blacksmith |
+| **Docker Publish** | Push to main (broad paths) + PR (Docker-only paths) | Blacksmith |
+| **Release** | Tag push only | GitHub-hosted |
+| **Workflow Sanity** | Push/PR (workflow paths only) | Blacksmith |
+| **Label Policy** | Push/PR (policy paths only) | Blacksmith |
+| **PR Labeler** | PR events | **ubuntu-latest** |
+| **PR Intake Checks** | PR events | **ubuntu-latest** |
+| **PR Auto Responder** | PR + issue events | **ubuntu-latest** |
+| **PR Check Stale** | Daily schedule | **ubuntu-latest** |
+| **PR Check Status** | Daily schedule | **ubuntu-latest** |
+| **Sync Contributors** | Weekly schedule | **ubuntu-latest** |
+
+### Weekly Schedule Summary
+
+| Day | Time (UTC) | Workflow |
+|-----|-----------|----------|
+| Monday | 03:00 | Test Benchmarks |
+| Monday | 04:30 | Feature Matrix |
+| Monday | 06:00 | Sec Audit (schedule) |
+| Monday | 06:00 | Sec CodeQL |
+| Sunday | 00:00 | Sync Contributors |
+| Sunday | 02:00 | Test Fuzz |
+| Daily | 02:20 | PR Check Stale |
+| Daily | 08:15 | PR Check Status |
+
+### CI Run Job Dependency Graph
+
+```
+changes ──┬── lint (Format + Clippy + Strict Delta)
+           │     └── test
+           ├── build (Smoke, debug mode)
+           ├── docs-only (fast path)
+           ├── non-rust (fast path)
+           ├── docs-quality
+           └── workflow-owner-approval
+
+All above ──── ci-required (final gate)
+```
+
+### Push-to-Main Trigger Cascade
+
+When code is pushed to `main`, the following workflows trigger:
+
+1. **CI Run** — Always (change-detection gates individual jobs)
+2. **Sec Audit** — Only if `Cargo.toml`, `Cargo.lock`, `src/**`, `crates/**`, or `deny.toml` changed
+3. **Test E2E** — Always
+4. **Docker Publish** — Only if broad source paths changed
+5. **Workflow Sanity** — Only if workflow files changed
+
+**No longer triggered on push:**
+- ~~Performance Benchmarks~~ → Weekly only
+- ~~Feature Matrix~~ → Weekly only
+
+---
+
+## Estimated Impact
+
+| Metric | Before | After | Savings |
+|--------|--------|-------|---------|
+| Daily workflow runs | 400+ | ~150-180 | ~55-60% |
+| Daily billable minutes | ~400 min | ~120-150 min | ~60-65% |
+| Monthly billable hours | ~200 hours | ~60-75 hours | ~60-65% |
+| Dependabot PRs/month | ~44 | ~5 | ~89% |
+| CodeQL runs/week | 14 | 1 | ~93% |
+| Benchmark runs/day | ~15 | 0 (weekly: ~1) | ~99% |
+
+---
+
+## Rollback Strategy
+
+Each change is isolated to a single workflow file. To rollback any specific optimization:
+
+1. **Revert the specific file** using `git checkout <commit>^ -- <file-path>`
+2. Changes are backward-compatible — no downstream code or configuration depends on the CI schedule/trigger changes
+3. All workflows retain `workflow_dispatch` triggers for manual invocation when needed
+
+---
+
+## Validation Checklist
+
+- [ ] Verify CI Run workflow passes on next PR with Rust changes
+- [ ] Verify Security Audit skips docs-only PRs
+- [ ] Verify Docker smoke build only triggers on Dockerfile changes in PRs
+- [ ] Verify weekly schedules fire correctly (check after first Monday)
+- [ ] Monitor PR Auto Responder failure rate after switching to `ubuntu-latest`
+- [ ] Verify Dependabot respects new monthly schedule and limits
+
+---
+
+## Files Modified
+
+| File | Change Summary |
+|------|---------------|
+| `.github/workflows/sec-audit.yml` | Added path filters for push and PR triggers |
+| `.github/workflows/test-benchmarks.yml` | Changed to weekly schedule; reduced artifact retention to 7 days |
+| `.github/workflows/pub-docker-img.yml` | Tightened PR path filters to Docker-specific files |
+| `.github/workflows/sec-codeql.yml` | Changed from twice-daily to weekly schedule |
+| `.github/workflows/ci-run.yml` | Merged lint jobs; dropped `--release` from smoke build |
+| `.github/workflows/feature-matrix.yml` | Removed push trigger; removed `cargo test` step |
+| `.github/workflows/pr-check-stale.yml` | Switched to `ubuntu-latest` |
+| `.github/workflows/pr-check-status.yml` | Switched to `ubuntu-latest`; reduced to daily schedule |
+| `.github/workflows/pr-auto-response.yml` | Switched all jobs to `ubuntu-latest` |
+| `.github/workflows/pr-intake-checks.yml` | Switched to `ubuntu-latest` |
+| `.github/workflows/pr-labeler.yml` | Switched to `ubuntu-latest` |
+| `.github/workflows/sync-contributors.yml` | Switched to `ubuntu-latest` |
+| `.github/dependabot.yml` | Changed to monthly schedule; reduced PR limits; grouped all deps |
+| `scripts/ci/fetch_actions_data.py` | New: cost analysis script for GitHub Actions runs |
diff --git a/scripts/ci/fetch_actions_data.py b/scripts/ci/fetch_actions_data.py
new file mode 100644
index 0000000..fa52ba4
--- /dev/null
+++ b/scripts/ci/fetch_actions_data.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Fetch GitHub Actions workflow runs for a given date and summarize costs."""
+
+import json
+import subprocess
+import sys
+from datetime import datetime, timezone
+
+
+def fetch_runs(repo, date_str, page=1, per_page=100):
+    """Fetch completed workflow runs for a given date."""
+    url = (
+        f"https://api.github.com/repos/{repo}/actions/runs"
+        f"?created={date_str}&per_page={per_page}&page={page}"
+    )
+    result = subprocess.run(
+        ["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
+        capture_output=True, text=True
+    )
+    return json.loads(result.stdout)
+
+
+def fetch_jobs(repo, run_id):
+    """Fetch jobs for a specific run."""
+    url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/jobs?per_page=100"
+    result = subprocess.run(
+        ["curl", "-sS", "-H", "Accept: application/vnd.github+json", url],
+        capture_output=True, text=True
+    )
+    return json.loads(result.stdout)
+
+
+def parse_duration(started, completed):
+    """Return duration in seconds between two ISO timestamps."""
+    if not started or not completed:
+        return 0
+    try:
+        s = datetime.fromisoformat(started.replace("Z", "+00:00"))
+        c = datetime.fromisoformat(completed.replace("Z", "+00:00"))
+        return max(0, (c - s).total_seconds())
+    except Exception:
+        return 0
+
+
+def main():
+    repo = "zeroclaw-labs/zeroclaw"
+    date_str = "2026-02-17"
+
+    print(f"Fetching workflow runs for {repo} on {date_str}...")
+    print("=" * 100)
+
+    all_runs = []
+    for page in range(1, 5):  # up to 400 runs
+        data = fetch_runs(repo, date_str, page=page)
+        runs = data.get("workflow_runs", [])
+        if not runs:
+            break
+        all_runs.extend(runs)
+        if len(runs) < 100:
+            break
+
+    print(f"Total workflow runs found: {len(all_runs)}")
+    print()
+
+    # Group by workflow name
+    workflow_stats = {}
+    for run in all_runs:
+        name = run.get("name", "Unknown")
+        event = run.get("event", "unknown")
+        conclusion = run.get("conclusion", "unknown")
+        run_id = run.get("id")
+
+        if name not in workflow_stats:
+            workflow_stats[name] = {
+                "count": 0,
+                "events": {},
+                "conclusions": {},
+                "total_job_seconds": 0,
+                "total_jobs": 0,
+                "run_ids": [],
+            }
+
+        workflow_stats[name]["count"] += 1
+        workflow_stats[name]["events"][event] = workflow_stats[name]["events"].get(event, 0) + 1
+        workflow_stats[name]["conclusions"][conclusion] = workflow_stats[name]["conclusions"].get(conclusion, 0) + 1
+        workflow_stats[name]["run_ids"].append(run_id)
+
+    # For each workflow, sample up to 3 runs to get job-level timing
+    print("Sampling job-level timing (up to 3 runs per workflow)...")
+    print()
+
+    for name, stats in workflow_stats.items():
+        sample_ids = stats["run_ids"][:3]
+        for run_id in sample_ids:
+            jobs_data = fetch_jobs(repo, run_id)
+            jobs = jobs_data.get("jobs", [])
+            for job in jobs:
+                started = job.get("started_at")
+                completed = job.get("completed_at")
+                duration = parse_duration(started, completed)
+                stats["total_job_seconds"] += duration
+                stats["total_jobs"] += 1
+
+        # Extrapolate: if we sampled N runs but there are M total, scale up
+        sampled = len(sample_ids)
+        total = stats["count"]
+        if sampled > 0 and sampled < total:
+            scale = total / sampled
+            stats["estimated_total_seconds"] = stats["total_job_seconds"] * scale
+        else:
+            stats["estimated_total_seconds"] = stats["total_job_seconds"]
+
+    # Print summary sorted by estimated cost (descending)
+    sorted_workflows = sorted(
+        workflow_stats.items(),
+        key=lambda x: x[1]["estimated_total_seconds"],
+        reverse=True
+    )
+
+    print("=" * 100)
+    print(f"{'Workflow':<40} {'Runs':>5} {'SampledJobs':>12} {'SampledMins':>12} {'Est.TotalMins':>14} {'Events'}")
+    print("-" * 100)
+
+    grand_total_minutes = 0
+    for name, stats in sorted_workflows:
+        sampled_mins = stats["total_job_seconds"] / 60
+        est_total_mins = stats["estimated_total_seconds"] / 60
+        grand_total_minutes += est_total_mins
+        events_str = ", ".join(f"{k}={v}" for k, v in stats["events"].items())
+        conclusions_str = ", ".join(f"{k}={v}" for k, v in stats["conclusions"].items())
+        print(
+            f"{name:<40} {stats['count']:>5} {stats['total_jobs']:>12} "
+            f"{sampled_mins:>12.1f} {est_total_mins:>14.1f}   {events_str}"
+        )
+        print(f"{'':>40} {'':>5} {'':>12} {'':>12} {'':>14}   outcomes: {conclusions_str}")
+
+    print("-" * 100)
+    print(f"{'GRAND TOTAL':>40} {len(all_runs):>5} {'':>12} {'':>12} {grand_total_minutes:>14.1f}")
+    print(f"\nEstimated total billable minutes on {date_str}: {grand_total_minutes:.0f} min ({grand_total_minutes/60:.1f} hours)")
+    print()
+
+    # Also show raw run list
+    print("\n" + "=" * 100)
+    print("DETAILED RUN LIST")
+    print("=" * 100)
+    for run in all_runs:
+        name = run.get("name", "Unknown")
+        event = run.get("event", "unknown")
+        conclusion = run.get("conclusion", "unknown")
+        run_id = run.get("id")
+        started = run.get("run_started_at", "?")
+        print(f"  [{run_id}] {name:<40} conclusion={conclusion:<12} event={event:<20} started={started}")
+
+
+if __name__ == "__main__":
+    main()

From a17c35679ef322e7b586aced8cf493f193278b0a Mon Sep 17 00:00:00 2001
From: Alex Gorevski <algore@microsoft.com>
Date: Wed, 18 Feb 2026 21:23:31 -0800
Subject: [PATCH 2/4] add params to actions data

---
 scripts/ci/fetch_actions_data.py | 123 ++++++++++++++++++++++---------
 1 file changed, 88 insertions(+), 35 deletions(-)

diff --git a/scripts/ci/fetch_actions_data.py b/scripts/ci/fetch_actions_data.py
index fa52ba4..32ebb5b 100644
--- a/scripts/ci/fetch_actions_data.py
+++ b/scripts/ci/fetch_actions_data.py
@@ -1,10 +1,47 @@
 #!/usr/bin/env python3
-"""Fetch GitHub Actions workflow runs for a given date and summarize costs."""
+"""Fetch GitHub Actions workflow runs for a given date and summarize costs.
 
+Usage:
+    python fetch_actions_data.py [OPTIONS]
+
+Options:
+    --date YYYY-MM-DD   Date to query (default: yesterday)
+    --mode brief|full   Output mode (default: full)
+                        brief: billable minutes/hours table only
+                        full:  detailed breakdown with per-run list
+    --repo OWNER/NAME   Repository (default: zeroclaw-labs/zeroclaw)
+    -h, --help          Show this help message
+"""
+
+import argparse
 import json
 import subprocess
-import sys
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
+
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Fetch GitHub Actions workflow runs and summarize costs.",
+    )
+    yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).strftime("%Y-%m-%d")
+    parser.add_argument(
+        "--date",
+        default=yesterday,
+        help="Date to query in YYYY-MM-DD format (default: yesterday)",
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["brief", "full"],
+        default="full",
+        help="Output mode: 'brief' for billable hours only, 'full' for detailed breakdown (default: full)",
+    )
+    parser.add_argument(
+        "--repo",
+        default="zeroclaw-labs/zeroclaw",
+        help="Repository in OWNER/NAME format (default: zeroclaw-labs/zeroclaw)",
+    )
+    return parser.parse_args()
 
 
 def fetch_runs(repo, date_str, page=1, per_page=100):
@@ -43,8 +80,10 @@ def parse_duration(started, completed):
 
 
 def main():
-    repo = "zeroclaw-labs/zeroclaw"
-    date_str = "2026-02-17"
+    args = parse_args()
+    repo = args.repo
+    date_str = args.date
+    brief = args.mode == "brief"
 
     print(f"Fetching workflow runs for {repo} on {date_str}...")
     print("=" * 100)
@@ -117,39 +156,53 @@ def main():
         reverse=True
     )
 
-    print("=" * 100)
-    print(f"{'Workflow':<40} {'Runs':>5} {'SampledJobs':>12} {'SampledMins':>12} {'Est.TotalMins':>14} {'Events'}")
-    print("-" * 100)
+    if brief:
+        # Brief mode: compact billable hours table
+        print(f"{'Workflow':<40} {'Runs':>5} {'Est.Mins':>9} {'Est.Hours':>10}")
+        print("-" * 68)
+        grand_total_minutes = 0
+        for name, stats in sorted_workflows:
+            est_mins = stats["estimated_total_seconds"] / 60
+            grand_total_minutes += est_mins
+            print(f"{name:<40} {stats['count']:>5} {est_mins:>9.1f} {est_mins/60:>10.2f}")
+        print("-" * 68)
+        print(f"{'TOTAL':<40} {len(all_runs):>5} {grand_total_minutes:>9.0f} {grand_total_minutes/60:>10.1f}")
+        print(f"\nProjected monthly: ~{grand_total_minutes/60*30:.0f} hours")
+    else:
+        # Full mode: detailed breakdown with per-run list
+        print("=" * 100)
+        print(f"{'Workflow':<40} {'Runs':>5} {'SampledJobs':>12} {'SampledMins':>12} {'Est.TotalMins':>14} {'Events'}")
+        print("-" * 100)
 
-    grand_total_minutes = 0
-    for name, stats in sorted_workflows:
-        sampled_mins = stats["total_job_seconds"] / 60
-        est_total_mins = stats["estimated_total_seconds"] / 60
-        grand_total_minutes += est_total_mins
-        events_str = ", ".join(f"{k}={v}" for k, v in stats["events"].items())
-        conclusions_str = ", ".join(f"{k}={v}" for k, v in stats["conclusions"].items())
-        print(
-            f"{name:<40} {stats['count']:>5} {stats['total_jobs']:>12} "
-            f"{sampled_mins:>12.1f} {est_total_mins:>14.1f}   {events_str}"
-        )
-        print(f"{'':>40} {'':>5} {'':>12} {'':>12} {'':>14}   outcomes: {conclusions_str}")
+        grand_total_minutes = 0
+        for name, stats in sorted_workflows:
+            sampled_mins = stats["total_job_seconds"] / 60
+            est_total_mins = stats["estimated_total_seconds"] / 60
+            grand_total_minutes += est_total_mins
+            events_str = ", ".join(f"{k}={v}" for k, v in stats["events"].items())
+            conclusions_str = ", ".join(f"{k}={v}" for k, v in stats["conclusions"].items())
+            print(
+                f"{name:<40} {stats['count']:>5} {stats['total_jobs']:>12} "
+                f"{sampled_mins:>12.1f} {est_total_mins:>14.1f}   {events_str}"
+            )
+            print(f"{'':>40} {'':>5} {'':>12} {'':>12} {'':>14}   outcomes: {conclusions_str}")
 
-    print("-" * 100)
-    print(f"{'GRAND TOTAL':>40} {len(all_runs):>5} {'':>12} {'':>12} {grand_total_minutes:>14.1f}")
-    print(f"\nEstimated total billable minutes on {date_str}: {grand_total_minutes:.0f} min ({grand_total_minutes/60:.1f} hours)")
-    print()
+        print("-" * 100)
+        print(f"{'GRAND TOTAL':>40} {len(all_runs):>5} {'':>12} {'':>12} {grand_total_minutes:>14.1f}")
+        print(f"\nEstimated total billable minutes on {date_str}: {grand_total_minutes:.0f} min ({grand_total_minutes/60:.1f} hours)")
+        print()
 
-    # Also show raw run list
-    print("\n" + "=" * 100)
-    print("DETAILED RUN LIST")
-    print("=" * 100)
-    for run in all_runs:
-        name = run.get("name", "Unknown")
-        event = run.get("event", "unknown")
-        conclusion = run.get("conclusion", "unknown")
-        run_id = run.get("id")
-        started = run.get("run_started_at", "?")
-        print(f"  [{run_id}] {name:<40} conclusion={conclusion:<12} event={event:<20} started={started}")
+        # Also show raw run list
+        print("\n" + "=" * 100)
+        print("DETAILED RUN LIST")
+        print("=" * 100)
+        for run in all_runs:
+            name = run.get("name", "Unknown")
+            event = run.get("event", "unknown")
+            conclusion = run.get("conclusion", "unknown")
+            run_id = run.get("id")
+            started = run.get("run_started_at", "?")
+            print(f"  [{run_id}] {name:<40} conclusion={conclusion:<12} event={event:<20} started={started}")
 
 
 if __name__ == "__main__":

From 00c09952133a772a8be2501bc6a4a3b909e5c160 Mon Sep 17 00:00:00 2001
From: Alex Gorevski <algore@microsoft.com>
Date: Wed, 18 Feb 2026 21:26:14 -0800
Subject: [PATCH 3/4] fix(ci): restore broken YAML structure in 3 workflows,
 revert aggressive STALE_HOURS

- pr-auto-response.yml: restore permissions, steps, and checkout in
  contributor-tier-issues job (broken by runner swap)
- pr-check-stale.yml: restore steps block and step name
- pr-intake-checks.yml: restore steps block, checkout, and timeout
- pr-check-status.yml: revert STALE_HOURS from 4 to 48 (not a cost
  optimization; 4h is too aggressive), switch to ubuntu-latest per
  PR description

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/pr-auto-response.yml | 9 +++++++++
 .github/workflows/pr-check-stale.yml   | 2 ++
 .github/workflows/pr-check-status.yml  | 4 ++--
 .github/workflows/pr-intake-checks.yml | 6 ++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr-auto-response.yml b/.github/workflows/pr-auto-response.yml
index d883a81..e5f068e 100644
--- a/.github/workflows/pr-auto-response.yml
+++ b/.github/workflows/pr-auto-response.yml
@@ -16,6 +16,15 @@ jobs:
       (github.event_name == 'pull_request_target' &&
       (github.event.action == 'labeled' || github.event.action == 'unlabeled'))
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      issues: write
+      pull-requests: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+
+      - name: Apply contributor tier label for issue author
         uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
         env:
           LABEL_POLICY_PATH: .github/label-policy.json
diff --git a/.github/workflows/pr-check-stale.yml b/.github/workflows/pr-check-stale.yml
index 6048349..a2cf24c 100644
--- a/.github/workflows/pr-check-stale.yml
+++ b/.github/workflows/pr-check-stale.yml
@@ -13,6 +13,8 @@ jobs:
             issues: write
             pull-requests: write
         runs-on: ubuntu-latest
+        steps:
+            - name: Mark stale issues and pull requests
               uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
               with:
                   repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/pr-check-status.yml b/.github/workflows/pr-check-status.yml
index e53bab4..b057e88 100644
--- a/.github/workflows/pr-check-status.yml
+++ b/.github/workflows/pr-check-status.yml
@@ -13,13 +13,13 @@ concurrency:
 
 jobs:
   nudge-stale-prs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404
+    runs-on: ubuntu-latest
     permissions:
       contents: read
       pull-requests: write
       issues: write
     env:
-      STALE_HOURS: "4"
+      STALE_HOURS: "48"
     steps:
       - name: Checkout repository
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
diff --git a/.github/workflows/pr-intake-checks.yml b/.github/workflows/pr-intake-checks.yml
index 6997300..e703387 100644
--- a/.github/workflows/pr-intake-checks.yml
+++ b/.github/workflows/pr-intake-checks.yml
@@ -17,6 +17,12 @@ jobs:
     intake:
         name: Intake Checks
         runs-on: ubuntu-latest
+        timeout-minutes: 10
+        steps:
+            - name: Checkout repository
+              uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+
+            - name: Run safe PR intake checks
               uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
               with:
                   script: |

From 3abadc45744ca77a9f43f3f5f72956f392fd473a Mon Sep 17 00:00:00 2001
From: Alex Gorevski <algore@microsoft.com>
Date: Wed, 18 Feb 2026 21:30:09 -0800
Subject: [PATCH 4/4] remove cost optimization analysis doc

---
 docs/ci-cost-optimization.md | 295 -----------------------------------
 1 file changed, 295 deletions(-)
 delete mode 100644 docs/ci-cost-optimization.md

diff --git a/docs/ci-cost-optimization.md b/docs/ci-cost-optimization.md
deleted file mode 100644
index 2485483..0000000
--- a/docs/ci-cost-optimization.md
+++ /dev/null
@@ -1,295 +0,0 @@
-# CI Cost Optimization — February 2026
-
-> **Date:** 2026-02-18
-> **Status:** Implemented
-> **Impact:** ~60-65% reduction in estimated monthly GitHub Actions billable minutes
-
----
-
-## Executive Summary
-
-On February 17, 2026, the ZeroClaw repository consumed **400+ workflow runs** in a single day, totaling an estimated **398 billable minutes** (~6.6 hours). At this rate, monthly costs were projected at **~200 hours/month** (~12,000 billable minutes). This document describes the analysis performed, optimizations implemented, and the revised CI/CD architecture.
-
----
-
-## Analysis Methodology
-
-A Python script (`scripts/ci/fetch_actions_data.py`) was created to programmatically fetch and analyze all GitHub Actions workflow runs from the GitHub API for February 17, 2026. The script:
-
-1. Fetched all completed workflow runs for the date via the GitHub REST API
-2. Grouped runs by workflow name
-3. Sampled job-level timing (up to 3 runs per workflow) to compute per-job durations
-4. Extrapolated to estimate total billable minutes per workflow
-
-### Raw Data Summary (February 17, 2026)
-
-| Rank | Workflow | Runs/Day | Est. Minutes/Day | Primary Trigger |
-|------|----------|----------|-------------------|-----------------|
-| 1 | Rust Package Security Audit | 57 | 102 | Every PR + push |
-| 2 | CI Run | 57 | 70 | Every PR + push |
-| 3 | Performance Benchmarks | 15 | 63 | Every push to main |
-| 4 | Docker | 20 | 63 | PR + push |
-| 5 | PR Labeler | 69 | 20 | Every PR event |
-| 6 | Feature Matrix | 3 | 19 | Push to main |
-| 7 | Integration / E2E Tests | 15 | 17 | Every push to main |
-| 8 | Workflow Sanity | 31 | 16 | Push + PR |
-| 9 | Copilot Code Review | 6 | 14 | Dynamic |
-| 10 | PR Intake Checks | 70 | 7 | Every PR event |
-| 11 | PR Auto Responder | 47 | 4 | PR + issues |
-| | **Total** | **400+** | **~398** | |
-
-### Key Findings
-
-- **15 pushes to main in ~2 hours** on Feb 17, each triggering 6-8 parallel workflows
-- **Security Audit** was the single largest cost driver (102 min/day) with no path filtering
-- **PR Auto Responder** had an **81% failure rate** (38/47 runs failing) — wasting runner time
-- **CodeQL** runs twice daily (not captured in Feb 17 data since it's schedule-only) — adding ~3.5h/week
-- **Benchmarks** ran on every push to main (15x in one day) despite being regression-focused
-- **Dependabot** could generate up to 11 PRs/week, each triggering the full CI cascade
-
----
-
-## Changes Implemented
-
-### 1. Security Audit — Path Filters Added
-
-**File:** `.github/workflows/sec-audit.yml`
-
-**Before:** Ran on every PR and every push to main, regardless of what files changed.
-
-**After:** Only runs when dependency or source files change:
-- `Cargo.toml`, `Cargo.lock`, `src/**`, `crates/**`, `deny.toml`
-
-**Weekly schedule retained** as a safety net for advisory database updates.
-
-**Estimated savings:** ~60-70% of security audit runs eliminated (~30-35 hours/month)
-
-### 2. Performance Benchmarks — Moved to Weekly Schedule
-
-**File:** `.github/workflows/test-benchmarks.yml`
-
-**Before:** Ran on every push to main (15x/day on Feb 17).
-
-**After:** Runs weekly (Monday 3am UTC) + on-demand via `workflow_dispatch`.
-
-**Artifact retention** reduced from 30 days to 7 days to lower storage costs.
-
-**Rationale:** Benchmark regressions don't need per-commit detection. Weekly cadence catches regressions within one development cycle.
-
-**Estimated savings:** ~90% reduction (~28 hours/month)
-
-### 3. Docker PR Smoke Builds — Tightened Path Filters
-
-**File:** `.github/workflows/pub-docker-img.yml`
-
-**Before:** PR smoke builds triggered on any change to `src/**`, `crates/**`, `benches/**`, `firmware/**`, etc.
-
-**After:** PR smoke builds only trigger on Docker-specific files:
-- `Dockerfile`, `.dockerignore`, `docker-compose.yml`, `rust-toolchain.toml`, `dev/config.template.toml`, `.github/workflows/pub-docker-img.yml`
-
-**Push-to-main triggers unchanged** — production Docker images still rebuild on source changes.
-
-**Estimated savings:** ~40-50% fewer Docker smoke builds (~12-15 hours/month)
-
-### 4. CodeQL — Reduced from Twice-Daily to Weekly
-
-**File:** `.github/workflows/sec-codeql.yml`
-
-**Before:** Ran twice daily at 6am and 6pm UTC (14 runs/week), each performing a full `cargo build --workspace --all-targets`.
-
-**After:** Runs weekly (Monday 6am UTC) + on-demand.
-
-**Rationale:** CodeQL for Rust is still maturing. Weekly scans are standard practice for security-focused projects. On-demand dispatch available for urgent scans.
-
-**Estimated savings:** ~12 hours/month
-
-### 5. CI Run — Merged Lint Jobs + Dropped `--release` Build
-
-**File:** `.github/workflows/ci-run.yml`
-
-**Changes:**
-1. **Merged `lint` and `lint-strict-delta` into a single job** — Previously these were two separate parallel jobs, each requiring a full runner spin-up, Rust toolchain install, and cache restore. Now they run sequentially in one job.
-2. **Dropped `--release` flag from smoke build** — `cargo build --release` is 2-3x slower than debug due to optimizations. For a smoke check validating compilation, debug mode is equivalent.
-
-**Estimated savings:** ~1 runner job per CI invocation + faster build times
-
-### 6. Feature Matrix — Weekly-Only + Check-Only
-
-**File:** `.github/workflows/feature-matrix.yml`
-
-**Before:** Ran on every push to main touching `src/**` (3x on Feb 17) with 4 matrix entries, each running both `cargo check` AND `cargo test`.
-
-**After:**
-1. **Removed push trigger** — Now weekly-only (Monday 4:30am UTC) + on-demand
-2. **Removed `cargo test`** — Only runs `cargo check --locked` per feature combination. Tests are already covered by the main CI Run workflow.
-
-**Estimated savings:** ~50-75% of feature matrix compute eliminated
-
-### 7. Lightweight Jobs Moved to `ubuntu-latest`
-
-**Files affected:**
-- `.github/workflows/pr-check-stale.yml`
-- `.github/workflows/pr-check-status.yml`
-- `.github/workflows/pr-auto-response.yml`
-- `.github/workflows/pr-intake-checks.yml`
-- `.github/workflows/pr-labeler.yml`
-- `.github/workflows/sync-contributors.yml`
-
-**Before:** All jobs used `blacksmith-2vcpu-ubuntu-2404` runners, even for lightweight API-only operations (labeling, stale checks, greetings).
-
-**After:** Moved to `ubuntu-latest` (GitHub-hosted runners). These jobs only make API calls and run JavaScript scripts — they don't need Rust toolchains or specialized runners.
-
-**Additional change:** `pr-check-status.yml` schedule reduced from every 12 hours to once daily (8:15am UTC).
-
-### 8. Dependabot — Reduced Frequency and PR Limits
-
-**File:** `.github/dependabot.yml`
-
-**Before:**
-- Cargo: weekly, 5 open PRs max
-- GitHub Actions: weekly, 3 open PRs max
-- Docker: weekly, 3 open PRs max
-- Total: up to 11 Dependabot PRs/week, each triggering full CI
-
-**After:**
-- Cargo: **monthly**, 3 open PRs max, all deps grouped into single PR
-- GitHub Actions: **monthly**, 1 open PR max, all grouped
-- Docker: **monthly**, 1 open PR max, all grouped
-- Total: up to 5 Dependabot PRs/month
-
-**Rationale:** Each Dependabot PR triggers the full CI pipeline. Reducing from weekly to monthly and grouping updates into fewer PRs dramatically reduces CI cascade costs while still keeping dependencies current.
-
----
-
-## Known Issues to Investigate
-
-### PR Auto Responder — 81% Failure Rate
-
-The `pr-auto-response.yml` workflow had 38 failures out of 47 runs on Feb 17. The `contributor-tier-issues` job fires on every issue `labeled`/`unlabeled` event, even when the label is not contributor-tier related. While the JavaScript handler exits early for non-tier labels, the runner still spins up and checks out the repository.
-
-**Recommendations for further investigation:**
-1. Add more specific event filtering at the workflow level to reduce unnecessary runs
-2. Check if the failures are related to GitHub API rate limiting on the search endpoint
-3. Consider whether `continue-on-error: true` should be added to non-critical jobs
-
----
-
-## Revised Workflow Architecture
-
-### Workflow Frequency Overview
-
-| Workflow | Trigger | Runner |
-|----------|---------|--------|
-| **CI Run** | Push to main + PR | Blacksmith |
-| **Sec Audit** | Push/PR (path-filtered) + weekly schedule | Blacksmith |
-| **Sec CodeQL** | Weekly schedule | Blacksmith |
-| **Test E2E** | Push to main | Blacksmith |
-| **Test Benchmarks** | Weekly schedule | Blacksmith |
-| **Test Fuzz** | Weekly schedule | Blacksmith |
-| **Feature Matrix** | Weekly schedule | Blacksmith |
-| **Docker Publish** | Push to main (broad paths) + PR (Docker-only paths) | Blacksmith |
-| **Release** | Tag push only | GitHub-hosted |
-| **Workflow Sanity** | Push/PR (workflow paths only) | Blacksmith |
-| **Label Policy** | Push/PR (policy paths only) | Blacksmith |
-| **PR Labeler** | PR events | **ubuntu-latest** |
-| **PR Intake Checks** | PR events | **ubuntu-latest** |
-| **PR Auto Responder** | PR + issue events | **ubuntu-latest** |
-| **PR Check Stale** | Daily schedule | **ubuntu-latest** |
-| **PR Check Status** | Daily schedule | **ubuntu-latest** |
-| **Sync Contributors** | Weekly schedule | **ubuntu-latest** |
-
-### Weekly Schedule Summary
-
-| Day | Time (UTC) | Workflow |
-|-----|-----------|----------|
-| Monday | 03:00 | Test Benchmarks |
-| Monday | 04:30 | Feature Matrix |
-| Monday | 06:00 | Sec Audit (schedule) |
-| Monday | 06:00 | Sec CodeQL |
-| Sunday | 00:00 | Sync Contributors |
-| Sunday | 02:00 | Test Fuzz |
-| Daily | 02:20 | PR Check Stale |
-| Daily | 08:15 | PR Check Status |
-
-### CI Run Job Dependency Graph
-
-```
-changes ──┬── lint (Format + Clippy + Strict Delta)
-           │     └── test
-           ├── build (Smoke, debug mode)
-           ├── docs-only (fast path)
-           ├── non-rust (fast path)
-           ├── docs-quality
-           └── workflow-owner-approval
-
-All above ──── ci-required (final gate)
-```
-
-### Push-to-Main Trigger Cascade
-
-When code is pushed to `main`, the following workflows trigger:
-
-1. **CI Run** — Always (change-detection gates individual jobs)
-2. **Sec Audit** — Only if `Cargo.toml`, `Cargo.lock`, `src/**`, `crates/**`, or `deny.toml` changed
-3. **Test E2E** — Always
-4. **Docker Publish** — Only if broad source paths changed
-5. **Workflow Sanity** — Only if workflow files changed
-
-**No longer triggered on push:**
-- ~~Performance Benchmarks~~ → Weekly only
-- ~~Feature Matrix~~ → Weekly only
-
----
-
-## Estimated Impact
-
-| Metric | Before | After | Savings |
-|--------|--------|-------|---------|
-| Daily workflow runs | 400+ | ~150-180 | ~55-60% |
-| Daily billable minutes | ~400 min | ~120-150 min | ~60-65% |
-| Monthly billable hours | ~200 hours | ~60-75 hours | ~60-65% |
-| Dependabot PRs/month | ~44 | ~5 | ~89% |
-| CodeQL runs/week | 14 | 1 | ~93% |
-| Benchmark runs/day | ~15 | 0 (weekly: ~1) | ~99% |
-
----
-
-## Rollback Strategy
-
-Each change is isolated to a single workflow file. To rollback any specific optimization:
-
-1. **Revert the specific file** using `git checkout <commit>^ -- <file-path>`
-2. Changes are backward-compatible — no downstream code or configuration depends on the CI schedule/trigger changes
-3. All workflows retain `workflow_dispatch` triggers for manual invocation when needed
-
----
-
-## Validation Checklist
-
-- [ ] Verify CI Run workflow passes on next PR with Rust changes
-- [ ] Verify Security Audit skips docs-only PRs
-- [ ] Verify Docker smoke build only triggers on Dockerfile changes in PRs
-- [ ] Verify weekly schedules fire correctly (check after first Monday)
-- [ ] Monitor PR Auto Responder failure rate after switching to `ubuntu-latest`
-- [ ] Verify Dependabot respects new monthly schedule and limits
-
----
-
-## Files Modified
-
-| File | Change Summary |
-|------|---------------|
-| `.github/workflows/sec-audit.yml` | Added path filters for push and PR triggers |
-| `.github/workflows/test-benchmarks.yml` | Changed to weekly schedule; reduced artifact retention to 7 days |
-| `.github/workflows/pub-docker-img.yml` | Tightened PR path filters to Docker-specific files |
-| `.github/workflows/sec-codeql.yml` | Changed from twice-daily to weekly schedule |
-| `.github/workflows/ci-run.yml` | Merged lint jobs; dropped `--release` from smoke build |
-| `.github/workflows/feature-matrix.yml` | Removed push trigger; removed `cargo test` step |
-| `.github/workflows/pr-check-stale.yml` | Switched to `ubuntu-latest` |
-| `.github/workflows/pr-check-status.yml` | Switched to `ubuntu-latest`; reduced to daily schedule |
-| `.github/workflows/pr-auto-response.yml` | Switched all jobs to `ubuntu-latest` |
-| `.github/workflows/pr-intake-checks.yml` | Switched to `ubuntu-latest` |
-| `.github/workflows/pr-labeler.yml` | Switched to `ubuntu-latest` |
-| `.github/workflows/sync-contributors.yml` | Switched to `ubuntu-latest` |
-| `.github/dependabot.yml` | Changed to monthly schedule; reduced PR limits; grouped all deps |
-| `scripts/ci/fetch_actions_data.py` | New: cost analysis script for GitHub Actions runs |