From a35d1e37c8b66654083a61719bf8dc189067eb04 Mon Sep 17 00:00:00 2001 From: Chummy Date: Tue, 17 Feb 2026 21:25:50 +0800 Subject: [PATCH] chore(labeler): normalize module labels and backfill contributor tiers (#462) Co-authored-by: Will Sarg <12886992+willsarg@users.noreply.github.com> --- .github/pull_request_template.md | 4 + .github/workflows/auto-response.yml | 4 + .github/workflows/labeler.yml | 27 ++- docs/ci-map.md | 2 +- docs/pr-workflow.md | 2 +- docs/reviewer-playbook.md | 2 +- scripts/recompute_contributor_tiers.sh | 324 +++++++++++++++++++++++++ 7 files changed, 351 insertions(+), 14 deletions(-) create mode 100755 scripts/recompute_contributor_tiers.sh diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 550bd95..7c9e601 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,7 +12,11 @@ Describe this PR in 2-5 bullets: - Risk label (`risk: low|medium|high`): - Size label (`size: XS|S|M|L|XL`, auto-managed/read-only): - Scope labels (`core|agent|channel|config|cron|daemon|doctor|gateway|health|heartbeat|integration|memory|observability|onboard|provider|runtime|security|service|skillforge|skills|tool|tunnel|docs|dependencies|ci|tests|scripts|dev`, comma-separated): +<<<<<<< chore/labeler-spacing-trusted-tier +- Module labels (`: `, for example `channel: telegram`, `provider: kimi`, `tool: shell`): +======= - Module labels (`:`, for example `channel:telegram`, `provider:kimi`, `tool:shell`): +>>>>>>> main - Contributor tier label (`trusted contributor|experienced contributor|principal contributor|distinguished contributor`, auto-managed/read-only; author merged PRs >=5/10/20/50): - If any auto-label is incorrect, note requested correction: diff --git a/.github/workflows/auto-response.yml b/.github/workflows/auto-response.yml index 753bb52..c49ac8d 100644 --- a/.github/workflows/auto-response.yml +++ b/.github/workflows/auto-response.yml @@ -36,7 +36,11 @@ jobs: { label: "trusted contributor", minMergedPRs: 5 }, ]; const contributorTierLabels = contributorTierRules.map((rule) => rule.label); +<<<<<<< chore/labeler-spacing-trusted-tier + const contributorTierColor = "39FF14"; +======= const contributorTierColor = "2ED9FF"; // Keep in sync with .github/workflows/labeler.yml +>>>>>>> main const managedContributorLabels = new Set(contributorTierLabels); const action = context.payload.action; const changedLabel = context.payload.label?.name; diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index d629a1f..10d8bfb 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -325,13 +325,18 @@ jobs: return pattern.test(text); } + function formatModuleLabel(prefix, segment) { + return `${prefix}: ${segment}`; + } + function parseModuleLabel(label) { - const separatorIndex = label.indexOf(":"); - if (separatorIndex <= 0 || separatorIndex >= label.length - 1) return null; - return { - prefix: label.slice(0, separatorIndex), - segment: label.slice(separatorIndex + 1), - }; + if (typeof label !== "string") return null; + const match = label.match(/^([^:]+):\s*(.+)$/); + if (!match) return null; + const prefix = match[1].trim().toLowerCase(); + const segment = (match[2] || "").trim().toLowerCase(); + if (!prefix || !segment) return null; + return { prefix, segment }; } function sortByPriority(labels, priorityIndex) { @@ -389,7 +394,7 @@ jobs: for (const [prefix, segments] of segmentsByPrefix) { const hasSpecificSegment = [...segments].some((segment) => segment !== "core"); if (hasSpecificSegment) { - refined.delete(`${prefix}:core`); + refined.delete(formatModuleLabel(prefix, "core")); } } @@ -418,7 +423,7 @@ jobs: if (uniqueSegments.length === 0) continue; if (uniqueSegments.length === 1) { - compactedModuleLabels.add(`${prefix}:${uniqueSegments[0]}`); + compactedModuleLabels.add(formatModuleLabel(prefix, uniqueSegments[0])); } else { forcePathPrefixes.add(prefix); } @@ -609,7 +614,7 @@ jobs: segment = normalizeLabelSegment(segment); if (!segment) continue; - detectedModuleLabels.add(`${rule.prefix}:${segment}`); + detectedModuleLabels.add(formatModuleLabel(rule.prefix, segment)); } } @@ -635,7 +640,7 @@ jobs: for (const keyword of providerKeywordHints) { if (containsKeyword(searchableText, keyword)) { - detectedModuleLabels.add(`provider:${keyword}`); + detectedModuleLabels.add(formatModuleLabel("provider", keyword)); } } } @@ -661,7 +666,7 @@ jobs: for (const keyword of channelKeywordHints) { if (containsKeyword(searchableText, keyword)) { - detectedModuleLabels.add(`channel:${keyword}`); + detectedModuleLabels.add(formatModuleLabel("channel", keyword)); } } } diff --git a/docs/ci-map.md b/docs/ci-map.md index 108a9d0..6a2260d 100644 --- a/docs/ci-map.md +++ b/docs/ci-map.md @@ -27,7 +27,7 @@ Merge-blocking checks should stay small and deterministic. Optional checks are u ### Optional Repository Automation - `.github/workflows/labeler.yml` (`PR Labeler`) - - Purpose: scope/path labels + size/risk labels + fine-grained module labels (`:`) + - Purpose: scope/path labels + size/risk labels + fine-grained module labels (`: `) - Additional behavior: label descriptions are auto-managed as hover tooltips to explain each auto-judgment rule - Additional behavior: provider-related keywords in provider/config/onboard/integration changes are promoted to `provider:*` labels (for example `provider:kimi`, `provider:deepseek`) - Additional behavior: hierarchical de-duplication keeps only the most specific scope labels (for example `tool:composio` suppresses `tool:core` and `tool`) diff --git a/docs/pr-workflow.md b/docs/pr-workflow.md index 3c62711..2c154ef 100644 --- a/docs/pr-workflow.md +++ b/docs/pr-workflow.md @@ -244,7 +244,7 @@ Label discipline: - Path labels identify subsystem ownership quickly. - Size labels drive batching strategy. - Risk labels drive review depth (`risk: low/medium/high`). -- Module labels (`:`) improve reviewer routing for integration-specific changes and future newly-added modules. +- Module labels (`: `) improve reviewer routing for integration-specific changes and future newly-added modules. - `risk: manual` allows maintainers to preserve a human risk judgment when automation lacks context. - `no-stale` is reserved for accepted-but-blocked work. diff --git a/docs/reviewer-playbook.md b/docs/reviewer-playbook.md index bc42509..6f72fea 100644 --- a/docs/reviewer-playbook.md +++ b/docs/reviewer-playbook.md @@ -14,7 +14,7 @@ Use it to reduce review latency without reducing quality. For every new PR, do a fast intake pass: 1. Confirm template completeness (`summary`, `validation`, `security`, `rollback`). -2. Confirm labels (`size:*`, `risk:*`, scope labels such as `provider`/`channel`/`security`, module-scoped labels such as `channel:*`/`provider:*`/`tool:*`, and contributor tier labels when applicable) are present and plausible. +2. Confirm labels (`size:*`, `risk:*`, scope labels such as `provider`/`channel`/`security`, module-scoped labels such as `channel: *`/`provider: *`/`tool: *`, and contributor tier labels when applicable) are present and plausible. 3. Confirm CI signal status (`CI Required Gate`). 4. Confirm scope is one concern (reject mixed mega-PRs unless justified). 5. Confirm privacy/data-hygiene and neutral test wording requirements are satisfied. diff --git a/scripts/recompute_contributor_tiers.sh b/scripts/recompute_contributor_tiers.sh new file mode 100755 index 0000000..6e3e528 --- /dev/null +++ b/scripts/recompute_contributor_tiers.sh @@ -0,0 +1,324 @@ +#!/usr/bin/env bash + +set -euo pipefail + +SCRIPT_NAME="$(basename "$0")" + +usage() { + cat < Target repository (default: current gh repo) + --kind + Target objects (default: both) + --state + State filter for listing objects (default: all) + --limit Limit processed objects after fetch (default: 0 = no limit) + --apply Apply label updates (default is dry-run) + --dry-run Preview only (default) + -h, --help Show this help + +Examples: + ./$SCRIPT_NAME --repo zeroclaw-labs/zeroclaw --limit 50 + ./$SCRIPT_NAME --repo zeroclaw-labs/zeroclaw --kind prs --state open --apply +USAGE +} + +die() { + echo "[$SCRIPT_NAME] ERROR: $*" >&2 + exit 1 +} + +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + die "Required command not found: $1" + fi +} + +urlencode() { + jq -nr --arg value "$1" '$value|@uri' +} + +select_contributor_tier() { + local merged_count="$1" + if (( merged_count >= 50 )); then + echo "distinguished contributor" + elif (( merged_count >= 20 )); then + echo "principal contributor" + elif (( merged_count >= 10 )); then + echo "experienced contributor" + elif (( merged_count >= 5 )); then + echo "trusted contributor" + else + echo "" + fi +} + +DRY_RUN=1 +KIND="both" +STATE="all" +LIMIT=0 +REPO="" + +while (($# > 0)); do + case "$1" in + --repo) + [[ $# -ge 2 ]] || die "Missing value for --repo" + REPO="$2" + shift 2 + ;; + --kind) + [[ $# -ge 2 ]] || die "Missing value for --kind" + KIND="$2" + shift 2 + ;; + --state) + [[ $# -ge 2 ]] || die "Missing value for --state" + STATE="$2" + shift 2 + ;; + --limit) + [[ $# -ge 2 ]] || die "Missing value for --limit" + LIMIT="$2" + shift 2 + ;; + --apply) + DRY_RUN=0 + shift + ;; + --dry-run) + DRY_RUN=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + die "Unknown option: $1" + ;; + esac +done + +case "$KIND" in + both|prs|issues) ;; + *) die "--kind must be one of: both, prs, issues" ;; +esac + +case "$STATE" in + all|open|closed) ;; + *) die "--state must be one of: all, open, closed" ;; +esac + +if ! [[ "$LIMIT" =~ ^[0-9]+$ ]]; then + die "--limit must be a non-negative integer" +fi + +require_cmd gh +require_cmd jq + +if ! gh auth status >/dev/null 2>&1; then + die "gh CLI is not authenticated. Run: gh auth login" +fi + +if [[ -z "$REPO" ]]; then + REPO="$(gh repo view --json nameWithOwner --jq '.nameWithOwner' 2>/dev/null || true)" + [[ -n "$REPO" ]] || die "Unable to infer repo. Pass --repo ." +fi + +echo "[$SCRIPT_NAME] Repo: $REPO" +echo "[$SCRIPT_NAME] Mode: $([[ "$DRY_RUN" -eq 1 ]] && echo "dry-run" || echo "apply")" +echo "[$SCRIPT_NAME] Kind: $KIND | State: $STATE | Limit: $LIMIT" + +TIERS_JSON='["trusted contributor","experienced contributor","principal contributor","distinguished contributor"]' + +TMP_FILES=() +cleanup() { + if ((${#TMP_FILES[@]} > 0)); then + rm -f "${TMP_FILES[@]}" + fi +} +trap cleanup EXIT + +new_tmp_file() { + local tmp + tmp="$(mktemp)" + TMP_FILES+=("$tmp") + echo "$tmp" +} + +targets_file="$(new_tmp_file)" + +if [[ "$KIND" == "both" || "$KIND" == "prs" ]]; then + gh api --paginate "repos/$REPO/pulls?state=$STATE&per_page=100" \ + --jq '.[] | { + kind: "pr", + number: .number, + author: (.user.login // ""), + author_type: (.user.type // ""), + labels: [(.labels[]?.name // empty)] + }' >> "$targets_file" +fi + +if [[ "$KIND" == "both" || "$KIND" == "issues" ]]; then + gh api --paginate "repos/$REPO/issues?state=$STATE&per_page=100" \ + --jq '.[] | select(.pull_request | not) | { + kind: "issue", + number: .number, + author: (.user.login // ""), + author_type: (.user.type // ""), + labels: [(.labels[]?.name // empty)] + }' >> "$targets_file" +fi + +if [[ "$LIMIT" -gt 0 ]]; then + limited_file="$(new_tmp_file)" + head -n "$LIMIT" "$targets_file" > "$limited_file" + mv "$limited_file" "$targets_file" +fi + +target_count="$(wc -l < "$targets_file" | tr -d ' ')" +if [[ "$target_count" -eq 0 ]]; then + echo "[$SCRIPT_NAME] No targets found." + exit 0 +fi + +echo "[$SCRIPT_NAME] Targets fetched: $target_count" + +# Ensure tier labels exist (trusted contributor might be new). +label_color="" +for probe_label in "experienced contributor" "principal contributor" "distinguished contributor" "trusted contributor"; do + encoded_label="$(urlencode "$probe_label")" + if color_candidate="$(gh api "repos/$REPO/labels/$encoded_label" --jq '.color' 2>/dev/null || true)"; then + if [[ -n "$color_candidate" ]]; then + label_color="$(echo "$color_candidate" | tr '[:lower:]' '[:upper:]')" + break + fi + fi +done +[[ -n "$label_color" ]] || label_color="C5D7A2" + +while IFS= read -r tier_label; do + [[ -n "$tier_label" ]] || continue + encoded_label="$(urlencode "$tier_label")" + if gh api "repos/$REPO/labels/$encoded_label" >/dev/null 2>&1; then + continue + fi + + if [[ "$DRY_RUN" -eq 1 ]]; then + echo "[dry-run] Would create missing label: $tier_label (color=$label_color)" + else + gh api -X POST "repos/$REPO/labels" \ + -f name="$tier_label" \ + -f color="$label_color" >/dev/null + echo "[apply] Created missing label: $tier_label" + fi +done < <(jq -r '.[]' <<<"$TIERS_JSON") + +# Build merged PR count cache by unique human authors. +authors_file="$(new_tmp_file)" +jq -r 'select(.author != "" and .author_type != "Bot") | .author' "$targets_file" | sort -u > "$authors_file" +author_count="$(wc -l < "$authors_file" | tr -d ' ')" +echo "[$SCRIPT_NAME] Unique human authors: $author_count" + +author_counts_file="$(new_tmp_file)" +while IFS= read -r author; do + [[ -n "$author" ]] || continue + query="repo:$REPO is:pr is:merged author:$author" + merged_count="$(gh api search/issues -f q="$query" -F per_page=1 --jq '.total_count' 2>/dev/null || true)" + if ! [[ "$merged_count" =~ ^[0-9]+$ ]]; then + merged_count=0 + fi + printf '%s\t%s\n' "$author" "$merged_count" >> "$author_counts_file" +done < "$authors_file" + +updated=0 +unchanged=0 +skipped=0 +failed=0 + +while IFS= read -r target_json; do + [[ -n "$target_json" ]] || continue + + number="$(jq -r '.number' <<<"$target_json")" + kind="$(jq -r '.kind' <<<"$target_json")" + author="$(jq -r '.author' <<<"$target_json")" + author_type="$(jq -r '.author_type' <<<"$target_json")" + current_labels_json="$(jq -c '.labels // []' <<<"$target_json")" + + if [[ -z "$author" || "$author_type" == "Bot" ]]; then + skipped=$((skipped + 1)) + continue + fi + + merged_count="$(awk -F '\t' -v key="$author" '$1 == key { print $2; exit }' "$author_counts_file")" + if ! [[ "$merged_count" =~ ^[0-9]+$ ]]; then + merged_count=0 + fi + desired_tier="$(select_contributor_tier "$merged_count")" + + if ! current_tier="$(jq -r --argjson tiers "$TIERS_JSON" '[.[] | select(. as $label | ($tiers | index($label)) != null)][0] // ""' <<<"$current_labels_json" 2>/dev/null)"; then + echo "[warn] Skipping ${kind} #${number}: cannot parse current labels JSON" >&2 + failed=$((failed + 1)) + continue + fi + + if ! next_labels_json="$(jq -c --arg desired "$desired_tier" --argjson tiers "$TIERS_JSON" ' + (. // []) + | map(select(. as $label | ($tiers | index($label)) == null)) + | if $desired != "" then . + [$desired] else . end + | unique + ' <<<"$current_labels_json" 2>/dev/null)"; then + echo "[warn] Skipping ${kind} #${number}: cannot compute next labels" >&2 + failed=$((failed + 1)) + continue + fi + + if ! normalized_current="$(jq -c 'unique | sort' <<<"$current_labels_json" 2>/dev/null)"; then + echo "[warn] Skipping ${kind} #${number}: cannot normalize current labels" >&2 + failed=$((failed + 1)) + continue + fi + + if ! normalized_next="$(jq -c 'unique | sort' <<<"$next_labels_json" 2>/dev/null)"; then + echo "[warn] Skipping ${kind} #${number}: cannot normalize next labels" >&2 + failed=$((failed + 1)) + continue + fi + + if [[ "$normalized_current" == "$normalized_next" ]]; then + unchanged=$((unchanged + 1)) + continue + fi + + if [[ "$DRY_RUN" -eq 1 ]]; then + echo "[dry-run] ${kind} #${number} @${author} merged=${merged_count} tier: '${current_tier:-none}' -> '${desired_tier:-none}'" + updated=$((updated + 1)) + continue + fi + + payload="$(jq -cn --argjson labels "$next_labels_json" '{labels: $labels}')" + if gh api -X PUT "repos/$REPO/issues/$number/labels" --input - <<<"$payload" >/dev/null; then + echo "[apply] Updated ${kind} #${number} @${author} tier: '${current_tier:-none}' -> '${desired_tier:-none}'" + updated=$((updated + 1)) + else + echo "[apply] FAILED ${kind} #${number}" >&2 + failed=$((failed + 1)) + fi +done < "$targets_file" + +echo "" +echo "[$SCRIPT_NAME] Summary" +echo " Targets: $target_count" +echo " Updated: $updated" +echo " Unchanged: $unchanged" +echo " Skipped: $skipped" +echo " Failed: $failed" + +if [[ "$failed" -gt 0 ]]; then + exit 1 +fi