From a35d1e37c8b66654083a61719bf8dc189067eb04 Mon Sep 17 00:00:00 2001
From: Chummy <chumyin0912@gmail.com>
Date: Tue, 17 Feb 2026 21:25:50 +0800
Subject: [PATCH] chore(labeler): normalize module labels and backfill
 contributor tiers (#462)

Co-authored-by: Will Sarg <12886992+willsarg@users.noreply.github.com>
---
 .github/pull_request_template.md       |   4 +
 .github/workflows/auto-response.yml    |   4 +
 .github/workflows/labeler.yml          |  27 ++-
 docs/ci-map.md                         |   2 +-
 docs/pr-workflow.md                    |   2 +-
 docs/reviewer-playbook.md              |   2 +-
 scripts/recompute_contributor_tiers.sh | 324 +++++++++++++++++++++++++
 7 files changed, 351 insertions(+), 14 deletions(-)
 create mode 100755 scripts/recompute_contributor_tiers.sh
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 550bd95..7c9e601 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -12,7 +12,11 @@ Describe this PR in 2-5 bullets:
 - Risk label (`risk: low|medium|high`):
 - Size label (`size: XS|S|M|L|XL`, auto-managed/read-only):
 - Scope labels (`core|agent|channel|config|cron|daemon|doctor|gateway|health|heartbeat|integration|memory|observability|onboard|provider|runtime|security|service|skillforge|skills|tool|tunnel|docs|dependencies|ci|tests|scripts|dev`, comma-separated):
+<<<<<<< chore/labeler-spacing-trusted-tier
+- Module labels (`<module>: <component>`, for example `channel: telegram`, `provider: kimi`, `tool: shell`):
+=======
 - Module labels (`<module>:<component>`, for example `channel:telegram`, `provider:kimi`, `tool:shell`):
+>>>>>>> main
 - Contributor tier label (`trusted contributor|experienced contributor|principal contributor|distinguished contributor`, auto-managed/read-only; author merged PRs >=5/10/20/50):
 - If any auto-label is incorrect, note requested correction:
 
diff --git a/.github/workflows/auto-response.yml b/.github/workflows/auto-response.yml
index 753bb52..c49ac8d 100644
--- a/.github/workflows/auto-response.yml
+++ b/.github/workflows/auto-response.yml
@@ -36,7 +36,11 @@ jobs:
               { label: "trusted contributor", minMergedPRs: 5 },
             ];
             const contributorTierLabels = contributorTierRules.map((rule) => rule.label);
+<<<<<<< chore/labeler-spacing-trusted-tier
+            const contributorTierColor = "39FF14";
+=======
             const contributorTierColor = "2ED9FF"; // Keep in sync with .github/workflows/labeler.yml
+>>>>>>> main
             const managedContributorLabels = new Set(contributorTierLabels);
             const action = context.payload.action;
             const changedLabel = context.payload.label?.name;
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index d629a1f..10d8bfb 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -325,13 +325,18 @@ jobs:
                         return pattern.test(text);
                       }
 
+                      function formatModuleLabel(prefix, segment) {
+                        return `${prefix}: ${segment}`;
+                      }
+
                       function parseModuleLabel(label) {
-                        const separatorIndex = label.indexOf(":");
-                        if (separatorIndex <= 0 || separatorIndex >= label.length - 1) return null;
-                        return {
-                          prefix: label.slice(0, separatorIndex),
-                          segment: label.slice(separatorIndex + 1),
-                        };
+                        if (typeof label !== "string") return null;
+                        const match = label.match(/^([^:]+):\s*(.+)$/);
+                        if (!match) return null;
+                        const prefix = match[1].trim().toLowerCase();
+                        const segment = (match[2] || "").trim().toLowerCase();
+                        if (!prefix || !segment) return null;
+                        return { prefix, segment };
                       }
 
                       function sortByPriority(labels, priorityIndex) {
@@ -389,7 +394,7 @@ jobs:
                         for (const [prefix, segments] of segmentsByPrefix) {
                           const hasSpecificSegment = [...segments].some((segment) => segment !== "core");
                           if (hasSpecificSegment) {
-                            refined.delete(`${prefix}:core`);
+                            refined.delete(formatModuleLabel(prefix, "core"));
                           }
                         }
 
@@ -418,7 +423,7 @@ jobs:
                           if (uniqueSegments.length === 0) continue;
 
                           if (uniqueSegments.length === 1) {
-                            compactedModuleLabels.add(`${prefix}:${uniqueSegments[0]}`);
+                            compactedModuleLabels.add(formatModuleLabel(prefix, uniqueSegments[0]));
                           } else {
                             forcePathPrefixes.add(prefix);
                           }
@@ -609,7 +614,7 @@ jobs:
                           segment = normalizeLabelSegment(segment);
                           if (!segment) continue;
 
-                          detectedModuleLabels.add(`${rule.prefix}:${segment}`);
+                          detectedModuleLabels.add(formatModuleLabel(rule.prefix, segment));
                         }
                       }
 
@@ -635,7 +640,7 @@ jobs:
 
                         for (const keyword of providerKeywordHints) {
                           if (containsKeyword(searchableText, keyword)) {
-                            detectedModuleLabels.add(`provider:${keyword}`);
+                            detectedModuleLabels.add(formatModuleLabel("provider", keyword));
                           }
                         }
                       }
@@ -661,7 +666,7 @@ jobs:
 
                         for (const keyword of channelKeywordHints) {
                           if (containsKeyword(searchableText, keyword)) {
-                            detectedModuleLabels.add(`channel:${keyword}`);
+                            detectedModuleLabels.add(formatModuleLabel("channel", keyword));
                           }
                         }
                       }
diff --git a/docs/ci-map.md b/docs/ci-map.md
index 108a9d0..6a2260d 100644
--- a/docs/ci-map.md
+++ b/docs/ci-map.md
@@ -27,7 +27,7 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u
 ### Optional Repository Automation
 
 - `.github/workflows/labeler.yml` (`PR Labeler`)
-    - Purpose: scope/path labels + size/risk labels + fine-grained module labels (`<module>:<component>`)
+    - Purpose: scope/path labels + size/risk labels + fine-grained module labels (`<module>: <component>`)
     - Additional behavior: label descriptions are auto-managed as hover tooltips to explain each auto-judgment rule
     - Additional behavior: provider-related keywords in provider/config/onboard/integration changes are promoted to `provider:*` labels (for example `provider:kimi`, `provider:deepseek`)
     - Additional behavior: hierarchical de-duplication keeps only the most specific scope labels (for example `tool:composio` suppresses `tool:core` and `tool`)
diff --git a/docs/pr-workflow.md b/docs/pr-workflow.md
index 3c62711..2c154ef 100644
--- a/docs/pr-workflow.md
+++ b/docs/pr-workflow.md
@@ -244,7 +244,7 @@ Label discipline:
 - Path labels identify subsystem ownership quickly.
 - Size labels drive batching strategy.
 - Risk labels drive review depth (`risk: low/medium/high`).
-- Module labels (`<module>:<component>`) improve reviewer routing for integration-specific changes and future newly-added modules.
+- Module labels (`<module>: <component>`) improve reviewer routing for integration-specific changes and future newly-added modules.
 - `risk: manual` allows maintainers to preserve a human risk judgment when automation lacks context.
 - `no-stale` is reserved for accepted-but-blocked work.
 
diff --git a/docs/reviewer-playbook.md b/docs/reviewer-playbook.md
index bc42509..6f72fea 100644
--- a/docs/reviewer-playbook.md
+++ b/docs/reviewer-playbook.md
@@ -14,7 +14,7 @@ Use it to reduce review latency without reducing quality.
 For every new PR, do a fast intake pass:
 
 1. Confirm template completeness (`summary`, `validation`, `security`, `rollback`).
-2. Confirm labels (`size:*`, `risk:*`, scope labels such as `provider`/`channel`/`security`, module-scoped labels such as `channel:*`/`provider:*`/`tool:*`, and contributor tier labels when applicable) are present and plausible.
+2. Confirm labels (`size:*`, `risk:*`, scope labels such as `provider`/`channel`/`security`, module-scoped labels such as `channel: *`/`provider: *`/`tool: *`, and contributor tier labels when applicable) are present and plausible.
 3. Confirm CI signal status (`CI Required Gate`).
 4. Confirm scope is one concern (reject mixed mega-PRs unless justified).
 5. Confirm privacy/data-hygiene and neutral test wording requirements are satisfied.
diff --git a/scripts/recompute_contributor_tiers.sh b/scripts/recompute_contributor_tiers.sh
new file mode 100755
index 0000000..6e3e528
--- /dev/null
+++ b/scripts/recompute_contributor_tiers.sh
@@ -0,0 +1,324 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+SCRIPT_NAME="$(basename "$0")"
+
+usage() {
+  cat <<USAGE
+Recompute contributor tier labels for historical PRs/issues.
+
+Usage:
+  ./$SCRIPT_NAME [options]
+
+Options:
+  --repo <owner/repo>     Target repository (default: current gh repo)
+  --kind <both|prs|issues>
+                          Target objects (default: both)
+  --state <all|open|closed>
+                          State filter for listing objects (default: all)
+  --limit <N>             Limit processed objects after fetch (default: 0 = no limit)
+  --apply                 Apply label updates (default is dry-run)
+  --dry-run               Preview only (default)
+  -h, --help              Show this help
+
+Examples:
+  ./$SCRIPT_NAME --repo zeroclaw-labs/zeroclaw --limit 50
+  ./$SCRIPT_NAME --repo zeroclaw-labs/zeroclaw --kind prs --state open --apply
+USAGE
+}
+
+die() {
+  echo "[$SCRIPT_NAME] ERROR: $*" >&2
+  exit 1
+}
+
+require_cmd() {
+  if ! command -v "$1" >/dev/null 2>&1; then
+    die "Required command not found: $1"
+  fi
+}
+
+urlencode() {
+  jq -nr --arg value "$1" '$value|@uri'
+}
+
+select_contributor_tier() {
+  local merged_count="$1"
+  if (( merged_count >= 50 )); then
+    echo "distinguished contributor"
+  elif (( merged_count >= 20 )); then
+    echo "principal contributor"
+  elif (( merged_count >= 10 )); then
+    echo "experienced contributor"
+  elif (( merged_count >= 5 )); then
+    echo "trusted contributor"
+  else
+    echo ""
+  fi
+}
+
+DRY_RUN=1
+KIND="both"
+STATE="all"
+LIMIT=0
+REPO=""
+
+while (($# > 0)); do
+  case "$1" in
+    --repo)
+      [[ $# -ge 2 ]] || die "Missing value for --repo"
+      REPO="$2"
+      shift 2
+      ;;
+    --kind)
+      [[ $# -ge 2 ]] || die "Missing value for --kind"
+      KIND="$2"
+      shift 2
+      ;;
+    --state)
+      [[ $# -ge 2 ]] || die "Missing value for --state"
+      STATE="$2"
+      shift 2
+      ;;
+    --limit)
+      [[ $# -ge 2 ]] || die "Missing value for --limit"
+      LIMIT="$2"
+      shift 2
+      ;;
+    --apply)
+      DRY_RUN=0
+      shift
+      ;;
+    --dry-run)
+      DRY_RUN=1
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      die "Unknown option: $1"
+      ;;
+  esac
+done
+
+case "$KIND" in
+  both|prs|issues) ;;
+  *) die "--kind must be one of: both, prs, issues" ;;
+esac
+
+case "$STATE" in
+  all|open|closed) ;;
+  *) die "--state must be one of: all, open, closed" ;;
+esac
+
+if ! [[ "$LIMIT" =~ ^[0-9]+$ ]]; then
+  die "--limit must be a non-negative integer"
+fi
+
+require_cmd gh
+require_cmd jq
+
+if ! gh auth status >/dev/null 2>&1; then
+  die "gh CLI is not authenticated. Run: gh auth login"
+fi
+
+if [[ -z "$REPO" ]]; then
+  REPO="$(gh repo view --json nameWithOwner --jq '.nameWithOwner' 2>/dev/null || true)"
+  [[ -n "$REPO" ]] || die "Unable to infer repo. Pass --repo <owner/repo>."
+fi
+
+echo "[$SCRIPT_NAME] Repo: $REPO"
+echo "[$SCRIPT_NAME] Mode: $([[ "$DRY_RUN" -eq 1 ]] && echo "dry-run" || echo "apply")"
+echo "[$SCRIPT_NAME] Kind: $KIND | State: $STATE | Limit: $LIMIT"
+
+TIERS_JSON='["trusted contributor","experienced contributor","principal contributor","distinguished contributor"]'
+
+TMP_FILES=()
+cleanup() {
+  if ((${#TMP_FILES[@]} > 0)); then
+    rm -f "${TMP_FILES[@]}"
+  fi
+}
+trap cleanup EXIT
+
+new_tmp_file() {
+  local tmp
+  tmp="$(mktemp)"
+  TMP_FILES+=("$tmp")
+  echo "$tmp"
+}
+
+targets_file="$(new_tmp_file)"
+
+if [[ "$KIND" == "both" || "$KIND" == "prs" ]]; then
+  gh api --paginate "repos/$REPO/pulls?state=$STATE&per_page=100" \
+    --jq '.[] | {
+      kind: "pr",
+      number: .number,
+      author: (.user.login // ""),
+      author_type: (.user.type // ""),
+      labels: [(.labels[]?.name // empty)]
+    }' >> "$targets_file"
+fi
+
+if [[ "$KIND" == "both" || "$KIND" == "issues" ]]; then
+  gh api --paginate "repos/$REPO/issues?state=$STATE&per_page=100" \
+    --jq '.[] | select(.pull_request | not) | {
+      kind: "issue",
+      number: .number,
+      author: (.user.login // ""),
+      author_type: (.user.type // ""),
+      labels: [(.labels[]?.name // empty)]
+    }' >> "$targets_file"
+fi
+
+if [[ "$LIMIT" -gt 0 ]]; then
+  limited_file="$(new_tmp_file)"
+  head -n "$LIMIT" "$targets_file" > "$limited_file"
+  mv "$limited_file" "$targets_file"
+fi
+
+target_count="$(wc -l < "$targets_file" | tr -d ' ')"
+if [[ "$target_count" -eq 0 ]]; then
+  echo "[$SCRIPT_NAME] No targets found."
+  exit 0
+fi
+
+echo "[$SCRIPT_NAME] Targets fetched: $target_count"
+
+# Ensure tier labels exist (trusted contributor might be new).
+label_color=""
+for probe_label in "experienced contributor" "principal contributor" "distinguished contributor" "trusted contributor"; do
+  encoded_label="$(urlencode "$probe_label")"
+  if color_candidate="$(gh api "repos/$REPO/labels/$encoded_label" --jq '.color' 2>/dev/null || true)"; then
+    if [[ -n "$color_candidate" ]]; then
+      label_color="$(echo "$color_candidate" | tr '[:lower:]' '[:upper:]')"
+      break
+    fi
+  fi
+done
+[[ -n "$label_color" ]] || label_color="C5D7A2"
+
+while IFS= read -r tier_label; do
+  [[ -n "$tier_label" ]] || continue
+  encoded_label="$(urlencode "$tier_label")"
+  if gh api "repos/$REPO/labels/$encoded_label" >/dev/null 2>&1; then
+    continue
+  fi
+
+  if [[ "$DRY_RUN" -eq 1 ]]; then
+    echo "[dry-run] Would create missing label: $tier_label (color=$label_color)"
+  else
+    gh api -X POST "repos/$REPO/labels" \
+      -f name="$tier_label" \
+      -f color="$label_color" >/dev/null
+    echo "[apply] Created missing label: $tier_label"
+  fi
+done < <(jq -r '.[]' <<<"$TIERS_JSON")
+
+# Build merged PR count cache by unique human authors.
+authors_file="$(new_tmp_file)"
+jq -r 'select(.author != "" and .author_type != "Bot") | .author' "$targets_file" | sort -u > "$authors_file"
+author_count="$(wc -l < "$authors_file" | tr -d ' ')"
+echo "[$SCRIPT_NAME] Unique human authors: $author_count"
+
+author_counts_file="$(new_tmp_file)"
+while IFS= read -r author; do
+  [[ -n "$author" ]] || continue
+  query="repo:$REPO is:pr is:merged author:$author"
+  merged_count="$(gh api search/issues -f q="$query" -F per_page=1 --jq '.total_count' 2>/dev/null || true)"
+  if ! [[ "$merged_count" =~ ^[0-9]+$ ]]; then
+    merged_count=0
+  fi
+  printf '%s\t%s\n' "$author" "$merged_count" >> "$author_counts_file"
+done < "$authors_file"
+
+updated=0
+unchanged=0
+skipped=0
+failed=0
+
+while IFS= read -r target_json; do
+  [[ -n "$target_json" ]] || continue
+
+  number="$(jq -r '.number' <<<"$target_json")"
+  kind="$(jq -r '.kind' <<<"$target_json")"
+  author="$(jq -r '.author' <<<"$target_json")"
+  author_type="$(jq -r '.author_type' <<<"$target_json")"
+  current_labels_json="$(jq -c '.labels // []' <<<"$target_json")"
+
+  if [[ -z "$author" || "$author_type" == "Bot" ]]; then
+    skipped=$((skipped + 1))
+    continue
+  fi
+
+  merged_count="$(awk -F '\t' -v key="$author" '$1 == key { print $2; exit }' "$author_counts_file")"
+  if ! [[ "$merged_count" =~ ^[0-9]+$ ]]; then
+    merged_count=0
+  fi
+  desired_tier="$(select_contributor_tier "$merged_count")"
+
+  if ! current_tier="$(jq -r --argjson tiers "$TIERS_JSON" '[.[] | select(. as $label | ($tiers | index($label)) != null)][0] // ""' <<<"$current_labels_json" 2>/dev/null)"; then
+    echo "[warn] Skipping ${kind} #${number}: cannot parse current labels JSON" >&2
+    failed=$((failed + 1))
+    continue
+  fi
+
+  if ! next_labels_json="$(jq -c --arg desired "$desired_tier" --argjson tiers "$TIERS_JSON" '
+    (. // [])
+    | map(select(. as $label | ($tiers | index($label)) == null))
+    | if $desired != "" then . + [$desired] else . end
+    | unique
+  ' <<<"$current_labels_json" 2>/dev/null)"; then
+    echo "[warn] Skipping ${kind} #${number}: cannot compute next labels" >&2
+    failed=$((failed + 1))
+    continue
+  fi
+
+  if ! normalized_current="$(jq -c 'unique | sort' <<<"$current_labels_json" 2>/dev/null)"; then
+    echo "[warn] Skipping ${kind} #${number}: cannot normalize current labels" >&2
+    failed=$((failed + 1))
+    continue
+  fi
+
+  if ! normalized_next="$(jq -c 'unique | sort' <<<"$next_labels_json" 2>/dev/null)"; then
+    echo "[warn] Skipping ${kind} #${number}: cannot normalize next labels" >&2
+    failed=$((failed + 1))
+    continue
+  fi
+
+  if [[ "$normalized_current" == "$normalized_next" ]]; then
+    unchanged=$((unchanged + 1))
+    continue
+  fi
+
+  if [[ "$DRY_RUN" -eq 1 ]]; then
+    echo "[dry-run] ${kind} #${number} @${author} merged=${merged_count} tier: '${current_tier:-none}' -> '${desired_tier:-none}'"
+    updated=$((updated + 1))
+    continue
+  fi
+
+  payload="$(jq -cn --argjson labels "$next_labels_json" '{labels: $labels}')"
+  if gh api -X PUT "repos/$REPO/issues/$number/labels" --input - <<<"$payload" >/dev/null; then
+    echo "[apply] Updated ${kind} #${number} @${author} tier: '${current_tier:-none}' -> '${desired_tier:-none}'"
+    updated=$((updated + 1))
+  else
+    echo "[apply] FAILED ${kind} #${number}" >&2
+    failed=$((failed + 1))
+  fi
+done < "$targets_file"
+
+echo ""
+echo "[$SCRIPT_NAME] Summary"
+echo "  Targets:   $target_count"
+echo "  Updated:   $updated"
+echo "  Unchanged: $unchanged"
+echo "  Skipped:   $skipped"
+echo "  Failed:    $failed"
+
+if [[ "$failed" -gt 0 ]]; then
+  exit 1
+fi