feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)

* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
2026-02-15 09:26:13 -05:00 · 2026-02-15 09:26:13 -05:00 · 35b63d6b12
commit 35b63d6b12
parent 2ac571f406
7 changed files with 1098 additions and 1 deletions
--- a/src/skillforge/evaluate.rs
+++ b/src/skillforge/evaluate.rs
@ -0,0 +1,261 @@
+//! Evaluator — scores discovered skill candidates across multiple dimensions.
+
+use serde::{Deserialize, Serialize};
+
+use super::scout::ScoutResult;
+
+// ---------------------------------------------------------------------------
+// Scoring dimensions
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Scores {
+    /// OS / arch / runtime compatibility (0.0–1.0).
+    pub compatibility: f64,
+    /// Code quality signals: stars, tests, docs (0.0–1.0).
+    pub quality: f64,
+    /// Security posture: license, known-bad patterns (0.0–1.0).
+    pub security: f64,
+}
+
+impl Scores {
+    /// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35.
+    pub fn total(&self) -> f64 {
+        self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Recommendation
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum Recommendation {
+    /// Score >= threshold → safe to auto-integrate.
+    Auto,
+    /// Score in [0.4, threshold) → needs human review.
+    Manual,
+    /// Score < 0.4 → skip entirely.
+    Skip,
+}
+
+// ---------------------------------------------------------------------------
+// EvalResult
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EvalResult {
+    pub candidate: ScoutResult,
+    pub scores: Scores,
+    pub total_score: f64,
+    pub recommendation: Recommendation,
+}
+
+// ---------------------------------------------------------------------------
+// Evaluator
+// ---------------------------------------------------------------------------
+
+pub struct Evaluator {
+    /// Minimum total score for auto-integration.
+    min_score: f64,
+}
+
+/// Known-bad patterns in repo names / descriptions (matched as whole words).
+const BAD_PATTERNS: &[&str] = &[
+    "malware",
+    "exploit",
+    "hack",
+    "crack",
+    "keygen",
+    "ransomware",
+    "trojan",
+];
+
+/// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars).
+fn contains_word(haystack: &str, word: &str) -> bool {
+    for (i, _) in haystack.match_indices(word) {
+        let before_ok = i == 0
+            || !haystack.as_bytes()[i - 1].is_ascii_alphanumeric();
+        let after = i + word.len();
+        let after_ok = after >= haystack.len()
+            || !haystack.as_bytes()[after].is_ascii_alphanumeric();
+        if before_ok && after_ok {
+            return true;
+        }
+    }
+    false
+}
+
+impl Evaluator {
+    pub fn new(min_score: f64) -> Self {
+        Self { min_score }
+    }
+
+    pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult {
+        let compatibility = self.score_compatibility(&candidate);
+        let quality = self.score_quality(&candidate);
+        let security = self.score_security(&candidate);
+
+        let scores = Scores {
+            compatibility,
+            quality,
+            security,
+        };
+        let total_score = scores.total();
+
+        let recommendation = if total_score >= self.min_score {
+            Recommendation::Auto
+        } else if total_score >= 0.4 {
+            Recommendation::Manual
+        } else {
+            Recommendation::Skip
+        };
+
+        EvalResult {
+            candidate,
+            scores,
+            total_score,
+            recommendation,
+        }
+    }
+
+    // -- Dimension scorers --------------------------------------------------
+
+    /// Compatibility: favour Rust repos; penalise unknown languages.
+    fn score_compatibility(&self, c: &ScoutResult) -> f64 {
+        match c.language.as_deref() {
+            Some("Rust") => 1.0,
+            Some("Python" | "TypeScript" | "JavaScript") => 0.6,
+            Some(_) => 0.3,
+            None => 0.2,
+        }
+    }
+
+    /// Quality: based on star count (log scale, capped at 1.0).
+    fn score_quality(&self, c: &ScoutResult) -> f64 {
+        // log2(stars + 1) / 10, capped at 1.0
+        let raw = ((c.stars as f64) + 1.0).log2() / 10.0;
+        raw.min(1.0)
+    }
+
+    /// Security: license presence + bad-pattern check.
+    fn score_security(&self, c: &ScoutResult) -> f64 {
+        let mut score: f64 = 0.5;
+
+        // License bonus
+        if c.has_license {
+            score += 0.3;
+        }
+
+        // Bad-pattern penalty (whole-word match)
+        let lower_name = c.name.to_lowercase();
+        let lower_desc = c.description.to_lowercase();
+        for pat in BAD_PATTERNS {
+            if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) {
+                score -= 0.5;
+                break;
+            }
+        }
+
+        // Recency bonus: updated within last 180 days (guard against future timestamps)
+        if let Some(updated) = c.updated_at {
+            let age_days = (chrono::Utc::now() - updated).num_days();
+            if (0..180).contains(&age_days) {
+                score += 0.2;
+            }
+        }
+
+        score.clamp(0.0, 1.0)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::skillforge::scout::{ScoutResult, ScoutSource};
+
+    fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult {
+        ScoutResult {
+            name: "test-skill".into(),
+            url: "https://github.com/test/test-skill".into(),
+            description: "A test skill".into(),
+            stars,
+            language: lang.map(String::from),
+            updated_at: Some(chrono::Utc::now()),
+            source: ScoutSource::GitHub,
+            owner: "test".into(),
+            has_license,
+        }
+    }
+
+    #[test]
+    fn high_quality_rust_repo_gets_auto() {
+        let eval = Evaluator::new(0.7);
+        let c = make_candidate(500, Some("Rust"), true);
+        let res = eval.evaluate(c);
+        assert!(res.total_score >= 0.7, "score: {}", res.total_score);
+        assert_eq!(res.recommendation, Recommendation::Auto);
+    }
+
+    #[test]
+    fn low_star_no_license_gets_manual_or_skip() {
+        let eval = Evaluator::new(0.7);
+        let c = make_candidate(1, None, false);
+        let res = eval.evaluate(c);
+        assert!(res.total_score < 0.7, "score: {}", res.total_score);
+        assert_ne!(res.recommendation, Recommendation::Auto);
+    }
+
+    #[test]
+    fn bad_pattern_tanks_security() {
+        let eval = Evaluator::new(0.7);
+        let mut c = make_candidate(1000, Some("Rust"), true);
+        c.name = "malware-skill".into();
+        let res = eval.evaluate(c);
+        // 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5
+        assert!(res.scores.security <= 0.5, "security: {}", res.scores.security);
+    }
+
+    #[test]
+    fn scores_total_weighted() {
+        let s = Scores {
+            compatibility: 1.0,
+            quality: 1.0,
+            security: 1.0,
+        };
+        assert!((s.total() - 1.0).abs() < f64::EPSILON);
+
+        let s2 = Scores {
+            compatibility: 0.0,
+            quality: 0.0,
+            security: 0.0,
+        };
+        assert!((s2.total()).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn hackathon_not_flagged_as_bad() {
+        let eval = Evaluator::new(0.7);
+        let mut c = make_candidate(500, Some("Rust"), true);
+        c.name = "hackathon-tools".into();
+        c.description = "Tools for hackathons and lifehacks".into();
+        let res = eval.evaluate(c);
+        // "hack" should NOT match "hackathon" or "lifehacks"
+        assert!(res.scores.security >= 0.5, "security: {}", res.scores.security);
+    }
+
+    #[test]
+    fn exact_hack_is_flagged() {
+        let eval = Evaluator::new(0.7);
+        let mut c = make_candidate(500, Some("Rust"), false);
+        c.name = "hack-tool".into();
+        c.updated_at = None;
+        let res = eval.evaluate(c);
+        // 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0
+        assert!(res.scores.security < 0.5, "security: {}", res.scores.security);
+    }
+}
--- a/src/skillforge/integrate.rs
+++ b/src/skillforge/integrate.rs
@ -0,0 +1,248 @@
+//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
+
+use std::fs;
+use std::path::PathBuf;
+
+use anyhow::{bail, Context, Result};
+use chrono::Utc;
+use tracing::info;
+
+use super::scout::ScoutResult;
+
+// ---------------------------------------------------------------------------
+// Integrator
+// ---------------------------------------------------------------------------
+
+pub struct Integrator {
+    output_dir: PathBuf,
+}
+
+impl Integrator {
+    pub fn new(output_dir: String) -> Self {
+        Self {
+            output_dir: PathBuf::from(output_dir),
+        }
+    }
+
+    /// Write SKILL.toml and SKILL.md for the given candidate.
+    pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
+        let safe_name = sanitize_path_component(&candidate.name)?;
+        let skill_dir = self.output_dir.join(&safe_name);
+        fs::create_dir_all(&skill_dir)
+            .with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
+
+        let toml_path = skill_dir.join("SKILL.toml");
+        let md_path = skill_dir.join("SKILL.md");
+
+        let toml_content = self.generate_toml(candidate);
+        let md_content = self.generate_md(candidate);
+
+        fs::write(&toml_path, &toml_content)
+            .with_context(|| format!("Failed to write {}", toml_path.display()))?;
+        fs::write(&md_path, &md_content)
+            .with_context(|| format!("Failed to write {}", md_path.display()))?;
+
+        info!(
+            skill = candidate.name.as_str(),
+            path = %skill_dir.display(),
+            "Integrated skill"
+        );
+
+        Ok(skill_dir)
+    }
+
+    // -- Generators ---------------------------------------------------------
+
+    fn generate_toml(&self, c: &ScoutResult) -> String {
+        let lang = c.language.as_deref().unwrap_or("unknown");
+        let updated = c
+            .updated_at
+            .map(|d| d.format("%Y-%m-%d").to_string())
+            .unwrap_or_else(|| "unknown".into());
+
+        format!(
+            r#"# Auto-generated by SkillForge on {now}
+
+[skill]
+name = "{name}"
+version = "0.1.0"
+description = "{description}"
+source = "{url}"
+owner = "{owner}"
+language = "{lang}"
+license = {license}
+stars = {stars}
+updated_at = "{updated}"
+
+[skill.requirements]
+runtime = "zeroclaw >= 0.1"
+
+[skill.metadata]
+auto_integrated = true
+forge_timestamp = "{now}"
+"#,
+            now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
+            name = escape_toml(&c.name),
+            description = escape_toml(&c.description),
+            url = escape_toml(&c.url),
+            owner = escape_toml(&c.owner),
+            lang = lang,
+            license = if c.has_license { "true" } else { "false" },
+            stars = c.stars,
+            updated = updated,
+        )
+    }
+
+    fn generate_md(&self, c: &ScoutResult) -> String {
+        let lang = c.language.as_deref().unwrap_or("unknown");
+        format!(
+            r#"# {name}
+
+> Auto-generated by SkillForge
+
+## Overview
+
+- **Source**: [{url}]({url})
+- **Owner**: {owner}
+- **Language**: {lang}
+- **Stars**: {stars}
+- **License**: {license}
+
+## Description
+
+{description}
+
+## Usage
+
+```toml
+# Add to your ZeroClaw config:
+[skills.{name}]
+enabled = true
+```
+
+## Notes
+
+This manifest was auto-generated from repository metadata.
+Review before enabling in production.
+"#,
+            name = c.name,
+            url = c.url,
+            owner = c.owner,
+            lang = lang,
+            stars = c.stars,
+            license = if c.has_license { "yes" } else { "unknown" },
+            description = c.description,
+        )
+    }
+}
+
+/// Escape special characters for TOML basic string values.
+fn escape_toml(s: &str) -> String {
+    s.replace('\\', "\\\\")
+        .replace('"', "\\\"")
+        .replace('\n', "\\n")
+        .replace('\r', "\\r")
+        .replace('\t', "\\t")
+        .replace('\u{08}', "\\b")
+        .replace('\u{0C}', "\\f")
+}
+
+/// Sanitize a string for use as a single path component.
+/// Rejects empty names, "..", and names containing path separators or NUL.
+fn sanitize_path_component(name: &str) -> Result<String> {
+    let trimmed = name.trim().trim_matches('.');
+    if trimmed.is_empty() {
+        bail!("Skill name is empty or only dots after sanitization");
+    }
+    let sanitized: String = trimmed
+        .chars()
+        .map(|c| match c {
+            '/' | '\\' | '\0' => '_',
+            _ => c,
+        })
+        .collect();
+    if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
+        bail!("Skill name '{}' is unsafe as a path component", name);
+    }
+    Ok(sanitized)
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::skillforge::scout::{ScoutResult, ScoutSource};
+    use std::fs;
+
+    fn sample_candidate() -> ScoutResult {
+        ScoutResult {
+            name: "test-skill".into(),
+            url: "https://github.com/user/test-skill".into(),
+            description: "A test skill for unit tests".into(),
+            stars: 42,
+            language: Some("Rust".into()),
+            updated_at: Some(Utc::now()),
+            source: ScoutSource::GitHub,
+            owner: "user".into(),
+            has_license: true,
+        }
+    }
+
+    #[test]
+    fn integrate_creates_files() {
+        let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
+        let _ = fs::remove_dir_all(&tmp);
+
+        let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
+        let c = sample_candidate();
+        let path = integrator.integrate(&c).unwrap();
+
+        assert!(path.join("SKILL.toml").exists());
+        assert!(path.join("SKILL.md").exists());
+
+        let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
+        assert!(toml.contains("name = \"test-skill\""));
+        assert!(toml.contains("stars = 42"));
+
+        let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
+        assert!(md.contains("# test-skill"));
+        assert!(md.contains("A test skill for unit tests"));
+
+        let _ = fs::remove_dir_all(&tmp);
+    }
+
+    #[test]
+    fn escape_toml_handles_quotes_and_control_chars() {
+        assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
+        assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
+        assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
+        assert_eq!(escape_toml("tab\there"), "tab\\there");
+        assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
+    }
+
+    #[test]
+    fn sanitize_rejects_traversal() {
+        assert!(sanitize_path_component("..").is_err());
+        assert!(sanitize_path_component("...").is_err());
+        assert!(sanitize_path_component("").is_err());
+        assert!(sanitize_path_component("  ").is_err());
+    }
+
+    #[test]
+    fn sanitize_replaces_separators() {
+        let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
+        assert!(!s.contains('/'));
+        assert!(!s.contains('\\'));
+        assert!(!s.contains('\0'));
+        assert_eq!(s, "foo_bar_baz_qux");
+    }
+
+    #[test]
+    fn sanitize_trims_dots() {
+        let s = sanitize_path_component(".hidden.").unwrap();
+        assert_eq!(s, "hidden");
+    }
+}
--- a/src/skillforge/mod.rs
+++ b/src/skillforge/mod.rs
@ -0,0 +1,255 @@
+//! SkillForge — Skill auto-discovery, evaluation, and integration engine.
+//!
+//! Pipeline: Scout → Evaluate → Integrate
+//! Discovers skills from external sources, scores them, and generates
+//! ZeroClaw-compatible manifests for qualified candidates.
+
+pub mod evaluate;
+pub mod integrate;
+pub mod scout;
+
+use anyhow::Result;
+use serde::{Deserialize, Serialize};
+use tracing::{info, warn};
+
+use self::evaluate::{EvalResult, Evaluator, Recommendation};
+use self::integrate::Integrator;
+use self::scout::{GitHubScout, Scout, ScoutResult, ScoutSource};
+
+// ---------------------------------------------------------------------------
+// Configuration
+// ---------------------------------------------------------------------------
+
+#[derive(Clone, Serialize, Deserialize)]
+pub struct SkillForgeConfig {
+    #[serde(default)]
+    pub enabled: bool,
+    #[serde(default = "default_auto_integrate")]
+    pub auto_integrate: bool,
+    #[serde(default = "default_sources")]
+    pub sources: Vec<String>,
+    #[serde(default = "default_scan_interval")]
+    pub scan_interval_hours: u64,
+    #[serde(default = "default_min_score")]
+    pub min_score: f64,
+    /// Optional GitHub personal-access token for higher rate limits.
+    #[serde(default)]
+    pub github_token: Option<String>,
+    /// Directory where integrated skills are written.
+    #[serde(default = "default_output_dir")]
+    pub output_dir: String,
+}
+
+fn default_auto_integrate() -> bool {
+    true
+}
+fn default_sources() -> Vec<String> {
+    vec!["github".into(), "clawhub".into()]
+}
+fn default_scan_interval() -> u64 {
+    24
+}
+fn default_min_score() -> f64 {
+    0.7
+}
+fn default_output_dir() -> String {
+    "./skills".into()
+}
+
+impl Default for SkillForgeConfig {
+    fn default() -> Self {
+        Self {
+            enabled: false,
+            auto_integrate: default_auto_integrate(),
+            sources: default_sources(),
+            scan_interval_hours: default_scan_interval(),
+            min_score: default_min_score(),
+            github_token: None,
+            output_dir: default_output_dir(),
+        }
+    }
+}
+
+impl std::fmt::Debug for SkillForgeConfig {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("SkillForgeConfig")
+            .field("enabled", &self.enabled)
+            .field("auto_integrate", &self.auto_integrate)
+            .field("sources", &self.sources)
+            .field("scan_interval_hours", &self.scan_interval_hours)
+            .field("min_score", &self.min_score)
+            .field(
+                "github_token",
+                &self.github_token.as_ref().map(|_| "***"),
+            )
+            .field("output_dir", &self.output_dir)
+            .finish()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// ForgeReport — summary of a single pipeline run
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ForgeReport {
+    pub discovered: usize,
+    pub evaluated: usize,
+    pub auto_integrated: usize,
+    pub manual_review: usize,
+    pub skipped: usize,
+    pub results: Vec<EvalResult>,
+}
+
+// ---------------------------------------------------------------------------
+// SkillForge
+// ---------------------------------------------------------------------------
+
+pub struct SkillForge {
+    config: SkillForgeConfig,
+    evaluator: Evaluator,
+    integrator: Integrator,
+}
+
+impl SkillForge {
+    pub fn new(config: SkillForgeConfig) -> Self {
+        let evaluator = Evaluator::new(config.min_score);
+        let integrator = Integrator::new(config.output_dir.clone());
+        Self {
+            config,
+            evaluator,
+            integrator,
+        }
+    }
+
+    /// Run the full pipeline: Scout → Evaluate → Integrate.
+    pub async fn forge(&self) -> Result<ForgeReport> {
+        if !self.config.enabled {
+            warn!("SkillForge is disabled — skipping");
+            return Ok(ForgeReport {
+                discovered: 0,
+                evaluated: 0,
+                auto_integrated: 0,
+                manual_review: 0,
+                skipped: 0,
+                results: vec![],
+            });
+        }
+
+        // --- Scout ----------------------------------------------------------
+        let mut candidates: Vec<ScoutResult> = Vec::new();
+
+        for src in &self.config.sources {
+            let source: ScoutSource = src.parse().unwrap(); // Infallible
+            match source {
+                ScoutSource::GitHub => {
+                    let scout = GitHubScout::new(self.config.github_token.clone());
+                    match scout.discover().await {
+                        Ok(mut found) => {
+                            info!(count = found.len(), "GitHub scout returned candidates");
+                            candidates.append(&mut found);
+                        }
+                        Err(e) => {
+                            warn!(error = %e, "GitHub scout failed, continuing with other sources");
+                        }
+                    }
+                }
+                ScoutSource::ClawHub | ScoutSource::HuggingFace => {
+                    info!(source = src.as_str(), "Source not yet implemented — skipping");
+                }
+            }
+        }
+
+        // Deduplicate by URL
+        scout::dedup(&mut candidates);
+        let discovered = candidates.len();
+        info!(discovered, "Total unique candidates after dedup");
+
+        // --- Evaluate -------------------------------------------------------
+        let results: Vec<EvalResult> = candidates
+            .into_iter()
+            .map(|c| self.evaluator.evaluate(c))
+            .collect();
+        let evaluated = results.len();
+
+        // --- Integrate ------------------------------------------------------
+        let mut auto_integrated = 0usize;
+        let mut manual_review = 0usize;
+        let mut skipped = 0usize;
+
+        for res in &results {
+            match res.recommendation {
+                Recommendation::Auto => {
+                    if self.config.auto_integrate {
+                        match self.integrator.integrate(&res.candidate) {
+                            Ok(_) => {
+                                auto_integrated += 1;
+                            }
+                            Err(e) => {
+                                warn!(
+                                    skill = res.candidate.name.as_str(),
+                                    error = %e,
+                                    "Integration failed for candidate, continuing"
+                                );
+                            }
+                        }
+                    } else {
+                        // Count as would-be auto but not actually integrated
+                        manual_review += 1;
+                    }
+                }
+                Recommendation::Manual => {
+                    manual_review += 1;
+                }
+                Recommendation::Skip => {
+                    skipped += 1;
+                }
+            }
+        }
+
+        info!(
+            auto_integrated,
+            manual_review, skipped, "Forge pipeline complete"
+        );
+
+        Ok(ForgeReport {
+            discovered,
+            evaluated,
+            auto_integrated,
+            manual_review,
+            skipped,
+            results,
+        })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn disabled_forge_returns_empty_report() {
+        let cfg = SkillForgeConfig {
+            enabled: false,
+            ..Default::default()
+        };
+        let forge = SkillForge::new(cfg);
+        let report = forge.forge().await.unwrap();
+        assert_eq!(report.discovered, 0);
+        assert_eq!(report.auto_integrated, 0);
+    }
+
+    #[test]
+    fn default_config_values() {
+        let cfg = SkillForgeConfig::default();
+        assert!(!cfg.enabled);
+        assert!(cfg.auto_integrate);
+        assert_eq!(cfg.scan_interval_hours, 24);
+        assert!((cfg.min_score - 0.7).abs() < f64::EPSILON);
+        assert_eq!(cfg.sources, vec!["github", "clawhub"]);
+    }
+}
--- a/src/skillforge/scout.rs
+++ b/src/skillforge/scout.rs
@ -0,0 +1,331 @@
+//! Scout — skill discovery from external sources.
+
+use anyhow::Result;
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use tracing::{debug, warn};
+
+// ---------------------------------------------------------------------------
+// ScoutSource
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ScoutSource {
+    GitHub,
+    ClawHub,
+    HuggingFace,
+}
+
+impl std::str::FromStr for ScoutSource {
+    type Err = std::convert::Infallible;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        Ok(match s.to_lowercase().as_str() {
+            "github" => Self::GitHub,
+            "clawhub" => Self::ClawHub,
+            "huggingface" | "hf" => Self::HuggingFace,
+            _ => {
+                warn!(source = s, "Unknown scout source, defaulting to GitHub");
+                Self::GitHub
+            }
+        })
+    }
+}
+
+// ---------------------------------------------------------------------------
+// ScoutResult
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ScoutResult {
+    pub name: String,
+    pub url: String,
+    pub description: String,
+    pub stars: u64,
+    pub language: Option<String>,
+    pub updated_at: Option<DateTime<Utc>>,
+    pub source: ScoutSource,
+    /// Owner / org extracted from the URL or API response.
+    pub owner: String,
+    /// Whether the repo has a license file.
+    pub has_license: bool,
+}
+
+// ---------------------------------------------------------------------------
+// Scout trait
+// ---------------------------------------------------------------------------
+
+#[async_trait]
+pub trait Scout: Send + Sync {
+    /// Discover candidate skills from the source.
+    async fn discover(&self) -> Result<Vec<ScoutResult>>;
+}
+
+// ---------------------------------------------------------------------------
+// GitHubScout
+// ---------------------------------------------------------------------------
+
+/// Searches GitHub for repos matching skill-related queries.
+pub struct GitHubScout {
+    client: reqwest::Client,
+    queries: Vec<String>,
+}
+
+impl GitHubScout {
+    pub fn new(token: Option<String>) -> Self {
+        use std::time::Duration;
+
+        let mut headers = reqwest::header::HeaderMap::new();
+        headers.insert(
+            reqwest::header::ACCEPT,
+            "application/vnd.github+json"
+                .parse()
+                .expect("valid header"),
+        );
+        headers.insert(
+            reqwest::header::USER_AGENT,
+            "ZeroClaw-SkillForge/0.1".parse().expect("valid header"),
+        );
+        if let Some(ref t) = token {
+            if let Ok(val) = format!("Bearer {t}").parse() {
+                headers.insert(reqwest::header::AUTHORIZATION, val);
+            }
+        }
+
+        let client = reqwest::Client::builder()
+            .default_headers(headers)
+            .timeout(Duration::from_secs(30))
+            .build()
+            .expect("failed to build reqwest client");
+
+        Self {
+            client,
+            queries: vec![
+                "zeroclaw skill".into(),
+                "ai agent skill".into(),
+            ],
+        }
+    }
+
+    /// Parse the GitHub search/repositories JSON response.
+    fn parse_items(body: &serde_json::Value) -> Vec<ScoutResult> {
+        let items = match body.get("items").and_then(|v| v.as_array()) {
+            Some(arr) => arr,
+            None => return vec![],
+        };
+
+        items
+            .iter()
+            .filter_map(|item| {
+                let name = item.get("name")?.as_str()?.to_string();
+                let url = item.get("html_url")?.as_str()?.to_string();
+                let description = item
+                    .get("description")
+                    .and_then(|v| v.as_str())
+                    .unwrap_or("")
+                    .to_string();
+                let stars = item
+                    .get("stargazers_count")
+                    .and_then(|v| v.as_u64())
+                    .unwrap_or(0);
+                let language = item
+                    .get("language")
+                    .and_then(|v| v.as_str())
+                    .map(String::from);
+                let updated_at = item
+                    .get("updated_at")
+                    .and_then(|v| v.as_str())
+                    .and_then(|s| s.parse::<DateTime<Utc>>().ok());
+                let owner = item
+                    .get("owner")
+                    .and_then(|o| o.get("login"))
+                    .and_then(|v| v.as_str())
+                    .unwrap_or("unknown")
+                    .to_string();
+                let has_license = item
+                    .get("license")
+                    .map(|v| !v.is_null())
+                    .unwrap_or(false);
+
+                Some(ScoutResult {
+                    name,
+                    url,
+                    description,
+                    stars,
+                    language,
+                    updated_at,
+                    source: ScoutSource::GitHub,
+                    owner,
+                    has_license,
+                })
+            })
+            .collect()
+    }
+}
+
+#[async_trait]
+impl Scout for GitHubScout {
+    async fn discover(&self) -> Result<Vec<ScoutResult>> {
+        let mut all: Vec<ScoutResult> = Vec::new();
+
+        for query in &self.queries {
+            let url = format!(
+                "https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30",
+                urlencoding(query)
+            );
+            debug!(query = query.as_str(), "Searching GitHub");
+
+            let resp = match self.client.get(&url).send().await {
+                Ok(r) => r,
+                Err(e) => {
+                    warn!(
+                        query = query.as_str(),
+                        error = %e,
+                        "GitHub API request failed, skipping query"
+                    );
+                    continue;
+                }
+            };
+
+            if !resp.status().is_success() {
+                warn!(
+                    status = %resp.status(),
+                    query = query.as_str(),
+                    "GitHub search returned non-200"
+                );
+                continue;
+            }
+
+            let body: serde_json::Value = match resp.json().await {
+                Ok(v) => v,
+                Err(e) => {
+                    warn!(
+                        query = query.as_str(),
+                        error = %e,
+                        "Failed to parse GitHub response, skipping query"
+                    );
+                    continue;
+                }
+            };
+
+            let mut items = Self::parse_items(&body);
+            debug!(count = items.len(), query = query.as_str(), "Parsed items");
+            all.append(&mut items);
+        }
+
+        dedup(&mut all);
+        Ok(all)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Minimal percent-encoding for query strings (space → +).
+fn urlencoding(s: &str) -> String {
+    s.replace(' ', "+")
+        .replace('&', "%26")
+        .replace('#', "%23")
+}
+
+/// Deduplicate scout results by URL (keeps first occurrence).
+pub fn dedup(results: &mut Vec<ScoutResult>) {
+    let mut seen = std::collections::HashSet::new();
+    results.retain(|r| seen.insert(r.url.clone()));
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn scout_source_from_str() {
+        assert_eq!("github".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
+        assert_eq!("GitHub".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
+        assert_eq!("clawhub".parse::<ScoutSource>().unwrap(), ScoutSource::ClawHub);
+        assert_eq!("huggingface".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
+        assert_eq!("hf".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
+        // unknown falls back to GitHub
+        assert_eq!("unknown".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
+    }
+
+    #[test]
+    fn dedup_removes_duplicates() {
+        let mut results = vec![
+            ScoutResult {
+                name: "a".into(),
+                url: "https://github.com/x/a".into(),
+                description: String::new(),
+                stars: 10,
+                language: None,
+                updated_at: None,
+                source: ScoutSource::GitHub,
+                owner: "x".into(),
+                has_license: true,
+            },
+            ScoutResult {
+                name: "a-dup".into(),
+                url: "https://github.com/x/a".into(),
+                description: String::new(),
+                stars: 10,
+                language: None,
+                updated_at: None,
+                source: ScoutSource::GitHub,
+                owner: "x".into(),
+                has_license: true,
+            },
+            ScoutResult {
+                name: "b".into(),
+                url: "https://github.com/x/b".into(),
+                description: String::new(),
+                stars: 5,
+                language: None,
+                updated_at: None,
+                source: ScoutSource::GitHub,
+                owner: "x".into(),
+                has_license: false,
+            },
+        ];
+        dedup(&mut results);
+        assert_eq!(results.len(), 2);
+        assert_eq!(results[0].name, "a");
+        assert_eq!(results[1].name, "b");
+    }
+
+    #[test]
+    fn parse_github_items() {
+        let json = serde_json::json!({
+            "total_count": 1,
+            "items": [
+                {
+                    "name": "cool-skill",
+                    "html_url": "https://github.com/user/cool-skill",
+                    "description": "A cool skill",
+                    "stargazers_count": 42,
+                    "language": "Rust",
+                    "updated_at": "2026-01-15T10:00:00Z",
+                    "owner": { "login": "user" },
+                    "license": { "spdx_id": "MIT" }
+                }
+            ]
+        });
+        let items = GitHubScout::parse_items(&json);
+        assert_eq!(items.len(), 1);
+        assert_eq!(items[0].name, "cool-skill");
+        assert_eq!(items[0].stars, 42);
+        assert!(items[0].has_license);
+        assert_eq!(items[0].owner, "user");
+    }
+
+    #[test]
+    fn urlencoding_works() {
+        assert_eq!(urlencoding("hello world"), "hello+world");
+        assert_eq!(urlencoding("a&b#c"), "a%26b%23c");
+    }
+}