ci: unify rust quality gate and add incremental docs/link checks
This commit is contained in:
parent
8a6273b988
commit
6528613c8d
12 changed files with 514 additions and 47 deletions
178
scripts/ci/collect_changed_links.py
Executable file
178
scripts/ci/collect_changed_links.py
Executable file
|
|
@ -0,0 +1,178 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
DOC_PATH_RE = re.compile(r"\.mdx?$")
|
||||
URL_RE = re.compile(r"https?://[^\s<>'\"]+")
|
||||
INLINE_LINK_RE = re.compile(r"!?\[[^\]]*\]\(([^)]+)\)")
|
||||
REF_LINK_RE = re.compile(r"^\s*\[[^\]]+\]:\s*(\S+)")
|
||||
TRAILING_PUNCTUATION = ").,;:!?]}'\""
|
||||
|
||||
|
||||
def run_git(args: list[str]) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(["git", *args], check=False, capture_output=True, text=True)
|
||||
|
||||
|
||||
def commit_exists(rev: str) -> bool:
|
||||
if not rev:
|
||||
return False
|
||||
return run_git(["cat-file", "-e", f"{rev}^{{commit}}"]).returncode == 0
|
||||
|
||||
|
||||
def normalize_docs_files(raw: str) -> list[str]:
|
||||
if not raw:
|
||||
return []
|
||||
files: list[str] = []
|
||||
for line in raw.splitlines():
|
||||
path = line.strip()
|
||||
if path:
|
||||
files.append(path)
|
||||
return files
|
||||
|
||||
|
||||
def infer_base_sha(provided: str) -> str:
|
||||
if commit_exists(provided):
|
||||
return provided
|
||||
if run_git(["rev-parse", "--verify", "origin/main"]).returncode != 0:
|
||||
return ""
|
||||
proc = run_git(["merge-base", "origin/main", "HEAD"])
|
||||
candidate = proc.stdout.strip()
|
||||
return candidate if commit_exists(candidate) else ""
|
||||
|
||||
|
||||
def infer_docs_files(base_sha: str, provided: list[str]) -> list[str]:
|
||||
if provided:
|
||||
return provided
|
||||
if not base_sha:
|
||||
return []
|
||||
diff = run_git(["diff", "--name-only", base_sha, "HEAD"])
|
||||
files: list[str] = []
|
||||
for line in diff.stdout.splitlines():
|
||||
path = line.strip()
|
||||
if not path:
|
||||
continue
|
||||
if DOC_PATH_RE.search(path) or path in {"LICENSE", ".github/pull_request_template.md"}:
|
||||
files.append(path)
|
||||
return files
|
||||
|
||||
|
||||
def normalize_link_target(raw_target: str, source_path: str) -> str | None:
|
||||
target = raw_target.strip()
|
||||
if target.startswith("<") and target.endswith(">"):
|
||||
target = target[1:-1].strip()
|
||||
|
||||
if not target:
|
||||
return None
|
||||
|
||||
if " " in target:
|
||||
target = target.split()[0].strip()
|
||||
|
||||
if not target or target.startswith("#"):
|
||||
return None
|
||||
|
||||
lower = target.lower()
|
||||
if lower.startswith(("mailto:", "tel:", "javascript:")):
|
||||
return None
|
||||
|
||||
if target.startswith(("http://", "https://")):
|
||||
return target.rstrip(TRAILING_PUNCTUATION)
|
||||
|
||||
path_without_fragment = target.split("#", 1)[0].split("?", 1)[0]
|
||||
if not path_without_fragment:
|
||||
return None
|
||||
|
||||
if path_without_fragment.startswith("/"):
|
||||
resolved = path_without_fragment.lstrip("/")
|
||||
else:
|
||||
resolved = os.path.normpath(
|
||||
os.path.join(os.path.dirname(source_path) or ".", path_without_fragment)
|
||||
)
|
||||
|
||||
if not resolved or resolved == ".":
|
||||
return None
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def extract_links(text: str, source_path: str) -> list[str]:
|
||||
links: list[str] = []
|
||||
for match in URL_RE.findall(text):
|
||||
url = match.rstrip(TRAILING_PUNCTUATION)
|
||||
if url:
|
||||
links.append(url)
|
||||
|
||||
for match in INLINE_LINK_RE.findall(text):
|
||||
normalized = normalize_link_target(match, source_path)
|
||||
if normalized:
|
||||
links.append(normalized)
|
||||
|
||||
ref_match = REF_LINK_RE.match(text)
|
||||
if ref_match:
|
||||
normalized = normalize_link_target(ref_match.group(1), source_path)
|
||||
if normalized:
|
||||
links.append(normalized)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
def added_lines_for_file(base_sha: str, path: str) -> list[str]:
|
||||
if base_sha:
|
||||
diff = run_git(["diff", "--unified=0", base_sha, "HEAD", "--", path])
|
||||
lines: list[str] = []
|
||||
for raw_line in diff.stdout.splitlines():
|
||||
if raw_line.startswith("+++"):
|
||||
continue
|
||||
if raw_line.startswith("+"):
|
||||
lines.append(raw_line[1:])
|
||||
return lines
|
||||
|
||||
file_path = Path(path)
|
||||
if not file_path.is_file():
|
||||
return []
|
||||
return file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Collect HTTP(S) links added in changed docs lines")
|
||||
parser.add_argument("--base", default="", help="Base commit SHA")
|
||||
parser.add_argument(
|
||||
"--docs-files",
|
||||
default="",
|
||||
help="Newline-separated docs files list",
|
||||
)
|
||||
parser.add_argument("--output", required=True, help="Output file for unique URLs")
|
||||
args = parser.parse_args()
|
||||
|
||||
base_sha = infer_base_sha(args.base)
|
||||
docs_files = infer_docs_files(base_sha, normalize_docs_files(args.docs_files))
|
||||
|
||||
existing_files = [path for path in docs_files if Path(path).is_file()]
|
||||
if not existing_files:
|
||||
Path(args.output).write_text("", encoding="utf-8")
|
||||
print("No docs files available for link collection.")
|
||||
return 0
|
||||
|
||||
unique_urls: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for path in existing_files:
|
||||
for line in added_lines_for_file(base_sha, path):
|
||||
for link in extract_links(line, path):
|
||||
if link not in seen:
|
||||
seen.add(link)
|
||||
unique_urls.append(link)
|
||||
|
||||
Path(args.output).write_text("\n".join(unique_urls) + ("\n" if unique_urls else ""), encoding="utf-8")
|
||||
print(f"Collected {len(unique_urls)} added link(s) from {len(existing_files)} docs file(s).")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
28
scripts/ci/docs_links_gate.sh
Executable file
28
scripts/ci/docs_links_gate.sh
Executable file
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BASE_SHA="${BASE_SHA:-}"
|
||||
DOCS_FILES_RAW="${DOCS_FILES:-}"
|
||||
|
||||
LINKS_FILE="$(mktemp)"
|
||||
trap 'rm -f "$LINKS_FILE"' EXIT
|
||||
|
||||
python3 ./scripts/ci/collect_changed_links.py \
|
||||
--base "$BASE_SHA" \
|
||||
--docs-files "$DOCS_FILES_RAW" \
|
||||
--output "$LINKS_FILE"
|
||||
|
||||
if [ ! -s "$LINKS_FILE" ]; then
|
||||
echo "No added links detected in changed docs lines."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! command -v lychee >/dev/null 2>&1; then
|
||||
echo "lychee is required to run docs link gate locally."
|
||||
echo "Install via: cargo install lychee"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Checking added links with lychee (offline mode)..."
|
||||
lychee --offline --no-progress --format detailed "$LINKS_FILE"
|
||||
181
scripts/ci/docs_quality_gate.sh
Executable file
181
scripts/ci/docs_quality_gate.sh
Executable file
|
|
@ -0,0 +1,181 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BASE_SHA="${BASE_SHA:-}"
|
||||
DOCS_FILES_RAW="${DOCS_FILES:-}"
|
||||
|
||||
if [ -z "$BASE_SHA" ] && git rev-parse --verify origin/main >/dev/null 2>&1; then
|
||||
BASE_SHA="$(git merge-base origin/main HEAD)"
|
||||
fi
|
||||
|
||||
if [ -z "$DOCS_FILES_RAW" ] && [ -n "$BASE_SHA" ] && git cat-file -e "$BASE_SHA^{commit}" 2>/dev/null; then
|
||||
DOCS_FILES_RAW="$(git diff --name-only "$BASE_SHA" HEAD | awk '
|
||||
/\.md$/ || /\.mdx$/ || $0 == "LICENSE" || $0 == ".github/pull_request_template.md" {
|
||||
print
|
||||
}
|
||||
')"
|
||||
fi
|
||||
|
||||
if [ -z "$DOCS_FILES_RAW" ]; then
|
||||
echo "No docs files detected; skipping docs quality gate."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -z "$BASE_SHA" ] || ! git cat-file -e "$BASE_SHA^{commit}" 2>/dev/null; then
|
||||
echo "BASE_SHA is missing or invalid; falling back to full-file markdown lint."
|
||||
BASE_SHA=""
|
||||
fi
|
||||
|
||||
ALL_FILES=()
|
||||
while IFS= read -r file; do
|
||||
if [ -n "$file" ]; then
|
||||
ALL_FILES+=("$file")
|
||||
fi
|
||||
done < <(printf '%s\n' "$DOCS_FILES_RAW")
|
||||
|
||||
if [ "${#ALL_FILES[@]}" -eq 0 ]; then
|
||||
echo "No docs files detected after normalization; skipping docs quality gate."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
EXISTING_FILES=()
|
||||
for file in "${ALL_FILES[@]}"; do
|
||||
if [ -f "$file" ]; then
|
||||
EXISTING_FILES+=("$file")
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${#EXISTING_FILES[@]}" -eq 0 ]; then
|
||||
echo "No existing docs files to lint; skipping docs quality gate."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if command -v npx >/dev/null 2>&1; then
|
||||
MD_CMD=(npx --yes markdownlint-cli2@0.20.0)
|
||||
elif command -v markdownlint-cli2 >/dev/null 2>&1; then
|
||||
MD_CMD=(markdownlint-cli2)
|
||||
else
|
||||
echo "markdownlint-cli2 is required (via npx or local binary)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Linting docs files: ${EXISTING_FILES[*]}"
|
||||
|
||||
LINT_OUTPUT_FILE="$(mktemp)"
|
||||
set +e
|
||||
"${MD_CMD[@]}" "${EXISTING_FILES[@]}" >"$LINT_OUTPUT_FILE" 2>&1
|
||||
LINT_EXIT=$?
|
||||
set -e
|
||||
|
||||
if [ "$LINT_EXIT" -eq 0 ]; then
|
||||
cat "$LINT_OUTPUT_FILE"
|
||||
rm -f "$LINT_OUTPUT_FILE"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -z "$BASE_SHA" ]; then
|
||||
cat "$LINT_OUTPUT_FILE"
|
||||
rm -f "$LINT_OUTPUT_FILE"
|
||||
exit "$LINT_EXIT"
|
||||
fi
|
||||
|
||||
CHANGED_LINES_JSON_FILE="$(mktemp)"
|
||||
python3 - "$BASE_SHA" "${EXISTING_FILES[@]}" >"$CHANGED_LINES_JSON_FILE" <<'PY'
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
base = sys.argv[1]
|
||||
files = sys.argv[2:]
|
||||
|
||||
changed = {}
|
||||
hunk = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@")
|
||||
|
||||
for path in files:
|
||||
proc = subprocess.run(
|
||||
["git", "diff", "--unified=0", base, "HEAD", "--", path],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
ranges = []
|
||||
for line in proc.stdout.splitlines():
|
||||
m = hunk.match(line)
|
||||
if not m:
|
||||
continue
|
||||
start = int(m.group(1))
|
||||
count = int(m.group(2) or "1")
|
||||
if count > 0:
|
||||
ranges.append([start, start + count - 1])
|
||||
changed[path] = ranges
|
||||
|
||||
print(json.dumps(changed))
|
||||
PY
|
||||
|
||||
FILTERED_OUTPUT_FILE="$(mktemp)"
|
||||
set +e
|
||||
python3 - "$LINT_OUTPUT_FILE" "$CHANGED_LINES_JSON_FILE" >"$FILTERED_OUTPUT_FILE" <<'PY'
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
lint_file = sys.argv[1]
|
||||
changed_file = sys.argv[2]
|
||||
|
||||
with open(changed_file, "r", encoding="utf-8") as f:
|
||||
changed = json.load(f)
|
||||
|
||||
line_re = re.compile(r"^(.+?):(\d+)\s+error\s+(MD\d+(?:/[^\s]+)?)\s+(.*)$")
|
||||
|
||||
blocking = []
|
||||
baseline = []
|
||||
other_lines = []
|
||||
|
||||
with open(lint_file, "r", encoding="utf-8") as f:
|
||||
for raw_line in f:
|
||||
line = raw_line.rstrip("\n")
|
||||
m = line_re.match(line)
|
||||
if not m:
|
||||
other_lines.append(line)
|
||||
continue
|
||||
|
||||
path, line_no_s, rule, msg = m.groups()
|
||||
line_no = int(line_no_s)
|
||||
ranges = changed.get(path, [])
|
||||
|
||||
is_changed_line = any(start <= line_no <= end for start, end in ranges)
|
||||
entry = f"{path}:{line_no} {rule} {msg}"
|
||||
if is_changed_line:
|
||||
blocking.append(entry)
|
||||
else:
|
||||
baseline.append(entry)
|
||||
|
||||
if baseline:
|
||||
print("Existing markdown issues outside changed lines (non-blocking):")
|
||||
for entry in baseline:
|
||||
print(f" - {entry}")
|
||||
|
||||
if blocking:
|
||||
print("Markdown issues introduced on changed lines (blocking):")
|
||||
for entry in blocking:
|
||||
print(f" - {entry}")
|
||||
print(f"Blocking markdown issues: {len(blocking)}")
|
||||
sys.exit(1)
|
||||
|
||||
if baseline:
|
||||
print("No blocking markdown issues on changed lines.")
|
||||
sys.exit(0)
|
||||
|
||||
for line in other_lines:
|
||||
print(line)
|
||||
print("No blocking markdown issues on changed lines.")
|
||||
PY
|
||||
SCRIPT_EXIT=$?
|
||||
set -e
|
||||
|
||||
cat "$FILTERED_OUTPUT_FILE"
|
||||
|
||||
rm -f "$LINT_OUTPUT_FILE" "$CHANGED_LINES_JSON_FILE" "$FILTERED_OUTPUT_FILE"
|
||||
exit "$SCRIPT_EXIT"
|
||||
19
scripts/ci/rust_quality_gate.sh
Executable file
19
scripts/ci/rust_quality_gate.sh
Executable file
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MODE="correctness"
|
||||
if [ "${1:-}" = "--strict" ]; then
|
||||
MODE="strict"
|
||||
fi
|
||||
|
||||
echo "==> rust quality: cargo fmt --all -- --check"
|
||||
cargo fmt --all -- --check
|
||||
|
||||
if [ "$MODE" = "strict" ]; then
|
||||
echo "==> rust quality: cargo clippy --locked --all-targets -- -D warnings"
|
||||
cargo clippy --locked --all-targets -- -D warnings
|
||||
else
|
||||
echo "==> rust quality: cargo clippy --locked --all-targets -- -D clippy::correctness"
|
||||
cargo clippy --locked --all-targets -- -D clippy::correctness
|
||||
fi
|
||||
Loading…
Add table
Add a link
Reference in a new issue