feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)

* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
2026-02-15 09:26:13 -05:00 · 2026-02-15 09:26:13 -05:00 · 35b63d6b12
commit 35b63d6b12
parent 2ac571f406
7 changed files with 1098 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -297,6 +297,7 @@ checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
 dependencies = [
 "iana-time-zone",
 "num-traits",
 "serde",
 "windows-link",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -53,7 +53,7 @@ async-trait = "0.1"
 # Memory / persistence
 rusqlite = { version = "0.32", features = ["bundled"] }
-chrono = { version = "0.4", default-features = false, features = ["clock", "std"] }
+chrono = { version = "0.4", default-features = false, features = ["clock", "std", "serde"] }
 cron = "0.12"
 # Interactive CLI prompts
--- a/src/main.rs
+++ b/src/main.rs
@ -31,6 +31,7 @@ mod providers;
 mod runtime;
 mod security;
 mod service;
 mod skillforge;
 mod skills;
 mod tools;
 mod tunnel;
--- a/src/skillforge/evaluate.rs
+++ b/src/skillforge/evaluate.rs
@ -0,0 +1,261 @@
 //! Evaluator — scores discovered skill candidates across multiple dimensions.
 use serde::{Deserialize, Serialize};
 use super::scout::ScoutResult;
 // ---------------------------------------------------------------------------
 // Scoring dimensions
 // ---------------------------------------------------------------------------
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Scores {
    /// OS / arch / runtime compatibility (0.0–1.0).
    pub compatibility: f64,
    /// Code quality signals: stars, tests, docs (0.0–1.0).
    pub quality: f64,
    /// Security posture: license, known-bad patterns (0.0–1.0).
    pub security: f64,
 }
 impl Scores {
    /// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35.
    pub fn total(&self) -> f64 {
        self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35
    }
 }
 // ---------------------------------------------------------------------------
 // Recommendation
 // ---------------------------------------------------------------------------
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub enum Recommendation {
    /// Score >= threshold → safe to auto-integrate.
    Auto,
    /// Score in [0.4, threshold) → needs human review.
    Manual,
    /// Score < 0.4 → skip entirely.
    Skip,
 }
 // ---------------------------------------------------------------------------
 // EvalResult
 // ---------------------------------------------------------------------------
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct EvalResult {
    pub candidate: ScoutResult,
    pub scores: Scores,
    pub total_score: f64,
    pub recommendation: Recommendation,
 }
 // ---------------------------------------------------------------------------
 // Evaluator
 // ---------------------------------------------------------------------------
 pub struct Evaluator {
    /// Minimum total score for auto-integration.
    min_score: f64,
 }
 /// Known-bad patterns in repo names / descriptions (matched as whole words).
 const BAD_PATTERNS: &[&str] = &[
    "malware",
    "exploit",
    "hack",
    "crack",
    "keygen",
    "ransomware",
    "trojan",
 ];
 /// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars).
 fn contains_word(haystack: &str, word: &str) -> bool {
    for (i, _) in haystack.match_indices(word) {
        let before_ok = i == 0
            || !haystack.as_bytes()[i - 1].is_ascii_alphanumeric();
        let after = i + word.len();
        let after_ok = after >= haystack.len()
            || !haystack.as_bytes()[after].is_ascii_alphanumeric();
        if before_ok && after_ok {
            return true;
        }
    }
    false
 }
 impl Evaluator {
    pub fn new(min_score: f64) -> Self {
        Self { min_score }
    }
    pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult {
        let compatibility = self.score_compatibility(&candidate);
        let quality = self.score_quality(&candidate);
        let security = self.score_security(&candidate);
        let scores = Scores {
            compatibility,
            quality,
            security,
        };
        let total_score = scores.total();
        let recommendation = if total_score >= self.min_score {
            Recommendation::Auto
        } else if total_score >= 0.4 {
            Recommendation::Manual
        } else {
            Recommendation::Skip
        };
        EvalResult {
            candidate,
            scores,
            total_score,
            recommendation,
        }
    }
    // -- Dimension scorers --------------------------------------------------
    /// Compatibility: favour Rust repos; penalise unknown languages.
    fn score_compatibility(&self, c: &ScoutResult) -> f64 {
        match c.language.as_deref() {
            Some("Rust") => 1.0,
            Some("Python" | "TypeScript" | "JavaScript") => 0.6,
            Some(_) => 0.3,
            None => 0.2,
        }
    }
    /// Quality: based on star count (log scale, capped at 1.0).
    fn score_quality(&self, c: &ScoutResult) -> f64 {
        // log2(stars + 1) / 10, capped at 1.0
        let raw = ((c.stars as f64) + 1.0).log2() / 10.0;
        raw.min(1.0)
    }
    /// Security: license presence + bad-pattern check.
    fn score_security(&self, c: &ScoutResult) -> f64 {
        let mut score: f64 = 0.5;
        // License bonus
        if c.has_license {
            score += 0.3;
        }
        // Bad-pattern penalty (whole-word match)
        let lower_name = c.name.to_lowercase();
        let lower_desc = c.description.to_lowercase();
        for pat in BAD_PATTERNS {
            if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) {
                score -= 0.5;
                break;
            }
        }
        // Recency bonus: updated within last 180 days (guard against future timestamps)
        if let Some(updated) = c.updated_at {
            let age_days = (chrono::Utc::now() - updated).num_days();
            if (0..180).contains(&age_days) {
                score += 0.2;
            }
        }
        score.clamp(0.0, 1.0)
    }
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::skillforge::scout::{ScoutResult, ScoutSource};
    fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult {
        ScoutResult {
            name: "test-skill".into(),
            url: "https://github.com/test/test-skill".into(),
            description: "A test skill".into(),
            stars,
            language: lang.map(String::from),
            updated_at: Some(chrono::Utc::now()),
            source: ScoutSource::GitHub,
            owner: "test".into(),
            has_license,
        }
    }
    #[test]
    fn high_quality_rust_repo_gets_auto() {
        let eval = Evaluator::new(0.7);
        let c = make_candidate(500, Some("Rust"), true);
        let res = eval.evaluate(c);
        assert!(res.total_score >= 0.7, "score: {}", res.total_score);
        assert_eq!(res.recommendation, Recommendation::Auto);
    }
    #[test]
    fn low_star_no_license_gets_manual_or_skip() {
        let eval = Evaluator::new(0.7);
        let c = make_candidate(1, None, false);
        let res = eval.evaluate(c);
        assert!(res.total_score < 0.7, "score: {}", res.total_score);
        assert_ne!(res.recommendation, Recommendation::Auto);
    }
    #[test]
    fn bad_pattern_tanks_security() {
        let eval = Evaluator::new(0.7);
        let mut c = make_candidate(1000, Some("Rust"), true);
        c.name = "malware-skill".into();
        let res = eval.evaluate(c);
        // 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5
        assert!(res.scores.security <= 0.5, "security: {}", res.scores.security);
    }
    #[test]
    fn scores_total_weighted() {
        let s = Scores {
            compatibility: 1.0,
            quality: 1.0,
            security: 1.0,
        };
        assert!((s.total() - 1.0).abs() < f64::EPSILON);
        let s2 = Scores {
            compatibility: 0.0,
            quality: 0.0,
            security: 0.0,
        };
        assert!((s2.total()).abs() < f64::EPSILON);
    }
    #[test]
    fn hackathon_not_flagged_as_bad() {
        let eval = Evaluator::new(0.7);
        let mut c = make_candidate(500, Some("Rust"), true);
        c.name = "hackathon-tools".into();
        c.description = "Tools for hackathons and lifehacks".into();
        let res = eval.evaluate(c);
        // "hack" should NOT match "hackathon" or "lifehacks"
        assert!(res.scores.security >= 0.5, "security: {}", res.scores.security);
    }
    #[test]
    fn exact_hack_is_flagged() {
        let eval = Evaluator::new(0.7);
        let mut c = make_candidate(500, Some("Rust"), false);
        c.name = "hack-tool".into();
        c.updated_at = None;
        let res = eval.evaluate(c);
        // 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0
        assert!(res.scores.security < 0.5, "security: {}", res.scores.security);
    }
 }
--- a/src/skillforge/integrate.rs
+++ b/src/skillforge/integrate.rs
@ -0,0 +1,248 @@
 //! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
 use std::fs;
 use std::path::PathBuf;
 use anyhow::{bail, Context, Result};
 use chrono::Utc;
 use tracing::info;
 use super::scout::ScoutResult;
 // ---------------------------------------------------------------------------
 // Integrator
 // ---------------------------------------------------------------------------
 pub struct Integrator {
    output_dir: PathBuf,
 }
 impl Integrator {
    pub fn new(output_dir: String) -> Self {
        Self {
            output_dir: PathBuf::from(output_dir),
        }
    }
    /// Write SKILL.toml and SKILL.md for the given candidate.
    pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
        let safe_name = sanitize_path_component(&candidate.name)?;
        let skill_dir = self.output_dir.join(&safe_name);
        fs::create_dir_all(&skill_dir)
            .with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
        let toml_path = skill_dir.join("SKILL.toml");
        let md_path = skill_dir.join("SKILL.md");
        let toml_content = self.generate_toml(candidate);
        let md_content = self.generate_md(candidate);
        fs::write(&toml_path, &toml_content)
            .with_context(|| format!("Failed to write {}", toml_path.display()))?;
        fs::write(&md_path, &md_content)
            .with_context(|| format!("Failed to write {}", md_path.display()))?;
        info!(
            skill = candidate.name.as_str(),
            path = %skill_dir.display(),
            "Integrated skill"
        );
        Ok(skill_dir)
    }
    // -- Generators ---------------------------------------------------------
    fn generate_toml(&self, c: &ScoutResult) -> String {
        let lang = c.language.as_deref().unwrap_or("unknown");
        let updated = c
            .updated_at
            .map(|d| d.format("%Y-%m-%d").to_string())
            .unwrap_or_else(|| "unknown".into());
        format!(
            r#"# Auto-generated by SkillForge on {now}
 [skill]
 name = "{name}"
 version = "0.1.0"
 description = "{description}"
 source = "{url}"
 owner = "{owner}"
 language = "{lang}"
 license = {license}
 stars = {stars}
 updated_at = "{updated}"
 [skill.requirements]
 runtime = "zeroclaw >= 0.1"
 [skill.metadata]
 auto_integrated = true
 forge_timestamp = "{now}"
 "#,
            now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
            name = escape_toml(&c.name),
            description = escape_toml(&c.description),
            url = escape_toml(&c.url),
            owner = escape_toml(&c.owner),
            lang = lang,
            license = if c.has_license { "true" } else { "false" },
            stars = c.stars,
            updated = updated,
        )
    }
    fn generate_md(&self, c: &ScoutResult) -> String {
        let lang = c.language.as_deref().unwrap_or("unknown");
        format!(
            r#"# {name}
 > Auto-generated by SkillForge
 ## Overview
 - **Source**: [{url}]({url})
 - **Owner**: {owner}
 - **Language**: {lang}
 - **Stars**: {stars}
 - **License**: {license}
 ## Description
 {description}
 ## Usage
 ```toml
 # Add to your ZeroClaw config:
 [skills.{name}]
 enabled = true
 ```
 ## Notes
 This manifest was auto-generated from repository metadata.
 Review before enabling in production.
 "#,
            name = c.name,
            url = c.url,
            owner = c.owner,
            lang = lang,
            stars = c.stars,
            license = if c.has_license { "yes" } else { "unknown" },
            description = c.description,
        )
    }
 }
 /// Escape special characters for TOML basic string values.
 fn escape_toml(s: &str) -> String {
    s.replace('\\', "\\\\")
        .replace('"', "\\\"")
        .replace('\n', "\\n")
        .replace('\r', "\\r")
        .replace('\t', "\\t")
        .replace('\u{08}', "\\b")
        .replace('\u{0C}', "\\f")
 }
 /// Sanitize a string for use as a single path component.
 /// Rejects empty names, "..", and names containing path separators or NUL.
 fn sanitize_path_component(name: &str) -> Result<String> {
    let trimmed = name.trim().trim_matches('.');
    if trimmed.is_empty() {
        bail!("Skill name is empty or only dots after sanitization");
    }
    let sanitized: String = trimmed
        .chars()
        .map(|c| match c {
            '/' | '\\' | '\0' => '_',
            _ => c,
        })
        .collect();
    if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
        bail!("Skill name '{}' is unsafe as a path component", name);
    }
    Ok(sanitized)
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::skillforge::scout::{ScoutResult, ScoutSource};
    use std::fs;
    fn sample_candidate() -> ScoutResult {
        ScoutResult {
            name: "test-skill".into(),
            url: "https://github.com/user/test-skill".into(),
            description: "A test skill for unit tests".into(),
            stars: 42,
            language: Some("Rust".into()),
            updated_at: Some(Utc::now()),
            source: ScoutSource::GitHub,
            owner: "user".into(),
            has_license: true,
        }
    }
    #[test]
    fn integrate_creates_files() {
        let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
        let _ = fs::remove_dir_all(&tmp);
        let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
        let c = sample_candidate();
        let path = integrator.integrate(&c).unwrap();
        assert!(path.join("SKILL.toml").exists());
        assert!(path.join("SKILL.md").exists());
        let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
        assert!(toml.contains("name = \"test-skill\""));
        assert!(toml.contains("stars = 42"));
        let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
        assert!(md.contains("# test-skill"));
        assert!(md.contains("A test skill for unit tests"));
        let _ = fs::remove_dir_all(&tmp);
    }
    #[test]
    fn escape_toml_handles_quotes_and_control_chars() {
        assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
        assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
        assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
        assert_eq!(escape_toml("tab\there"), "tab\\there");
        assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
    }
    #[test]
    fn sanitize_rejects_traversal() {
        assert!(sanitize_path_component("..").is_err());
        assert!(sanitize_path_component("...").is_err());
        assert!(sanitize_path_component("").is_err());
        assert!(sanitize_path_component("  ").is_err());
    }
    #[test]
    fn sanitize_replaces_separators() {
        let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
        assert!(!s.contains('/'));
        assert!(!s.contains('\\'));
        assert!(!s.contains('\0'));
        assert_eq!(s, "foo_bar_baz_qux");
    }
    #[test]
    fn sanitize_trims_dots() {
        let s = sanitize_path_component(".hidden.").unwrap();
        assert_eq!(s, "hidden");
    }
 }
--- a/src/skillforge/mod.rs
+++ b/src/skillforge/mod.rs
@ -0,0 +1,255 @@
 //! SkillForge — Skill auto-discovery, evaluation, and integration engine.
 //!
 //! Pipeline: Scout → Evaluate → Integrate
 //! Discovers skills from external sources, scores them, and generates
 //! ZeroClaw-compatible manifests for qualified candidates.
 pub mod evaluate;
 pub mod integrate;
 pub mod scout;
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
 use tracing::{info, warn};
 use self::evaluate::{EvalResult, Evaluator, Recommendation};
 use self::integrate::Integrator;
 use self::scout::{GitHubScout, Scout, ScoutResult, ScoutSource};
 // ---------------------------------------------------------------------------
 // Configuration
 // ---------------------------------------------------------------------------
 #[derive(Clone, Serialize, Deserialize)]
 pub struct SkillForgeConfig {
    #[serde(default)]
    pub enabled: bool,
    #[serde(default = "default_auto_integrate")]
    pub auto_integrate: bool,
    #[serde(default = "default_sources")]
    pub sources: Vec<String>,
    #[serde(default = "default_scan_interval")]
    pub scan_interval_hours: u64,
    #[serde(default = "default_min_score")]
    pub min_score: f64,
    /// Optional GitHub personal-access token for higher rate limits.
    #[serde(default)]
    pub github_token: Option<String>,
    /// Directory where integrated skills are written.
    #[serde(default = "default_output_dir")]
    pub output_dir: String,
 }
 fn default_auto_integrate() -> bool {
    true
 }
 fn default_sources() -> Vec<String> {
    vec!["github".into(), "clawhub".into()]
 }
 fn default_scan_interval() -> u64 {
    24
 }
 fn default_min_score() -> f64 {
    0.7
 }
 fn default_output_dir() -> String {
    "./skills".into()
 }
 impl Default for SkillForgeConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            auto_integrate: default_auto_integrate(),
            sources: default_sources(),
            scan_interval_hours: default_scan_interval(),
            min_score: default_min_score(),
            github_token: None,
            output_dir: default_output_dir(),
        }
    }
 }
 impl std::fmt::Debug for SkillForgeConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SkillForgeConfig")
            .field("enabled", &self.enabled)
            .field("auto_integrate", &self.auto_integrate)
            .field("sources", &self.sources)
            .field("scan_interval_hours", &self.scan_interval_hours)
            .field("min_score", &self.min_score)
            .field(
                "github_token",
                &self.github_token.as_ref().map(|_| "***"),
            )
            .field("output_dir", &self.output_dir)
            .finish()
    }
 }
 // ---------------------------------------------------------------------------
 // ForgeReport — summary of a single pipeline run
 // ---------------------------------------------------------------------------
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ForgeReport {
    pub discovered: usize,
    pub evaluated: usize,
    pub auto_integrated: usize,
    pub manual_review: usize,
    pub skipped: usize,
    pub results: Vec<EvalResult>,
 }
 // ---------------------------------------------------------------------------
 // SkillForge
 // ---------------------------------------------------------------------------
 pub struct SkillForge {
    config: SkillForgeConfig,
    evaluator: Evaluator,
    integrator: Integrator,
 }
 impl SkillForge {
    pub fn new(config: SkillForgeConfig) -> Self {
        let evaluator = Evaluator::new(config.min_score);
        let integrator = Integrator::new(config.output_dir.clone());
        Self {
            config,
            evaluator,
            integrator,
        }
    }
    /// Run the full pipeline: Scout → Evaluate → Integrate.
    pub async fn forge(&self) -> Result<ForgeReport> {
        if !self.config.enabled {
            warn!("SkillForge is disabled — skipping");
            return Ok(ForgeReport {
                discovered: 0,
                evaluated: 0,
                auto_integrated: 0,
                manual_review: 0,
                skipped: 0,
                results: vec![],
            });
        }
        // --- Scout ----------------------------------------------------------
        let mut candidates: Vec<ScoutResult> = Vec::new();
        for src in &self.config.sources {
            let source: ScoutSource = src.parse().unwrap(); // Infallible
            match source {
                ScoutSource::GitHub => {
                    let scout = GitHubScout::new(self.config.github_token.clone());
                    match scout.discover().await {
                        Ok(mut found) => {
                            info!(count = found.len(), "GitHub scout returned candidates");
                            candidates.append(&mut found);
                        }
                        Err(e) => {
                            warn!(error = %e, "GitHub scout failed, continuing with other sources");
                        }
                    }
                }
                ScoutSource::ClawHub | ScoutSource::HuggingFace => {
                    info!(source = src.as_str(), "Source not yet implemented — skipping");
                }
            }
        }
        // Deduplicate by URL
        scout::dedup(&mut candidates);
        let discovered = candidates.len();
        info!(discovered, "Total unique candidates after dedup");
        // --- Evaluate -------------------------------------------------------
        let results: Vec<EvalResult> = candidates
            .into_iter()
            .map(|c| self.evaluator.evaluate(c))
            .collect();
        let evaluated = results.len();
        // --- Integrate ------------------------------------------------------
        let mut auto_integrated = 0usize;
        let mut manual_review = 0usize;
        let mut skipped = 0usize;
        for res in &results {
            match res.recommendation {
                Recommendation::Auto => {
                    if self.config.auto_integrate {
                        match self.integrator.integrate(&res.candidate) {
                            Ok(_) => {
                                auto_integrated += 1;
                            }
                            Err(e) => {
                                warn!(
                                    skill = res.candidate.name.as_str(),
                                    error = %e,
                                    "Integration failed for candidate, continuing"
                                );
                            }
                        }
                    } else {
                        // Count as would-be auto but not actually integrated
                        manual_review += 1;
                    }
                }
                Recommendation::Manual => {
                    manual_review += 1;
                }
                Recommendation::Skip => {
                    skipped += 1;
                }
            }
        }
        info!(
            auto_integrated,
            manual_review, skipped, "Forge pipeline complete"
        );
        Ok(ForgeReport {
            discovered,
            evaluated,
            auto_integrated,
            manual_review,
            skipped,
            results,
        })
    }
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    #[tokio::test]
    async fn disabled_forge_returns_empty_report() {
        let cfg = SkillForgeConfig {
            enabled: false,
            ..Default::default()
        };
        let forge = SkillForge::new(cfg);
        let report = forge.forge().await.unwrap();
        assert_eq!(report.discovered, 0);
        assert_eq!(report.auto_integrated, 0);
    }
    #[test]
    fn default_config_values() {
        let cfg = SkillForgeConfig::default();
        assert!(!cfg.enabled);
        assert!(cfg.auto_integrate);
        assert_eq!(cfg.scan_interval_hours, 24);
        assert!((cfg.min_score - 0.7).abs() < f64::EPSILON);
        assert_eq!(cfg.sources, vec!["github", "clawhub"]);
    }
 }
--- a/src/skillforge/scout.rs
+++ b/src/skillforge/scout.rs
@ -0,0 +1,331 @@
 //! Scout — skill discovery from external sources.
 use anyhow::Result;
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 use tracing::{debug, warn};
 // ---------------------------------------------------------------------------
 // ScoutSource
 // ---------------------------------------------------------------------------
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub enum ScoutSource {
    GitHub,
    ClawHub,
    HuggingFace,
 }
 impl std::str::FromStr for ScoutSource {
    type Err = std::convert::Infallible;
    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
        Ok(match s.to_lowercase().as_str() {
            "github" => Self::GitHub,
            "clawhub" => Self::ClawHub,
            "huggingface" | "hf" => Self::HuggingFace,
            _ => {
                warn!(source = s, "Unknown scout source, defaulting to GitHub");
                Self::GitHub
            }
        })
    }
 }
 // ---------------------------------------------------------------------------
 // ScoutResult
 // ---------------------------------------------------------------------------
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ScoutResult {
    pub name: String,
    pub url: String,
    pub description: String,
    pub stars: u64,
    pub language: Option<String>,
    pub updated_at: Option<DateTime<Utc>>,
    pub source: ScoutSource,
    /// Owner / org extracted from the URL or API response.
    pub owner: String,
    /// Whether the repo has a license file.
    pub has_license: bool,
 }
 // ---------------------------------------------------------------------------
 // Scout trait
 // ---------------------------------------------------------------------------
 #[async_trait]
 pub trait Scout: Send + Sync {
    /// Discover candidate skills from the source.
    async fn discover(&self) -> Result<Vec<ScoutResult>>;
 }
 // ---------------------------------------------------------------------------
 // GitHubScout
 // ---------------------------------------------------------------------------
 /// Searches GitHub for repos matching skill-related queries.
 pub struct GitHubScout {
    client: reqwest::Client,
    queries: Vec<String>,
 }
 impl GitHubScout {
    pub fn new(token: Option<String>) -> Self {
        use std::time::Duration;
        let mut headers = reqwest::header::HeaderMap::new();
        headers.insert(
            reqwest::header::ACCEPT,
            "application/vnd.github+json"
                .parse()
                .expect("valid header"),
        );
        headers.insert(
            reqwest::header::USER_AGENT,
            "ZeroClaw-SkillForge/0.1".parse().expect("valid header"),
        );
        if let Some(ref t) = token {
            if let Ok(val) = format!("Bearer {t}").parse() {
                headers.insert(reqwest::header::AUTHORIZATION, val);
            }
        }
        let client = reqwest::Client::builder()
            .default_headers(headers)
            .timeout(Duration::from_secs(30))
            .build()
            .expect("failed to build reqwest client");
        Self {
            client,
            queries: vec![
                "zeroclaw skill".into(),
                "ai agent skill".into(),
            ],
        }
    }
    /// Parse the GitHub search/repositories JSON response.
    fn parse_items(body: &serde_json::Value) -> Vec<ScoutResult> {
        let items = match body.get("items").and_then(|v| v.as_array()) {
            Some(arr) => arr,
            None => return vec![],
        };
        items
            .iter()
            .filter_map(|item| {
                let name = item.get("name")?.as_str()?.to_string();
                let url = item.get("html_url")?.as_str()?.to_string();
                let description = item
                    .get("description")
                    .and_then(|v| v.as_str())
                    .unwrap_or("")
                    .to_string();
                let stars = item
                    .get("stargazers_count")
                    .and_then(|v| v.as_u64())
                    .unwrap_or(0);
                let language = item
                    .get("language")
                    .and_then(|v| v.as_str())
                    .map(String::from);
                let updated_at = item
                    .get("updated_at")
                    .and_then(|v| v.as_str())
                    .and_then(|s| s.parse::<DateTime<Utc>>().ok());
                let owner = item
                    .get("owner")
                    .and_then(|o| o.get("login"))
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown")
                    .to_string();
                let has_license = item
                    .get("license")
                    .map(|v| !v.is_null())
                    .unwrap_or(false);
                Some(ScoutResult {
                    name,
                    url,
                    description,
                    stars,
                    language,
                    updated_at,
                    source: ScoutSource::GitHub,
                    owner,
                    has_license,
                })
            })
            .collect()
    }
 }
 #[async_trait]
 impl Scout for GitHubScout {
    async fn discover(&self) -> Result<Vec<ScoutResult>> {
        let mut all: Vec<ScoutResult> = Vec::new();
        for query in &self.queries {
            let url = format!(
                "https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30",
                urlencoding(query)
            );
            debug!(query = query.as_str(), "Searching GitHub");
            let resp = match self.client.get(&url).send().await {
                Ok(r) => r,
                Err(e) => {
                    warn!(
                        query = query.as_str(),
                        error = %e,
                        "GitHub API request failed, skipping query"
                    );
                    continue;
                }
            };
            if !resp.status().is_success() {
                warn!(
                    status = %resp.status(),
                    query = query.as_str(),
                    "GitHub search returned non-200"
                );
                continue;
            }
            let body: serde_json::Value = match resp.json().await {
                Ok(v) => v,
                Err(e) => {
                    warn!(
                        query = query.as_str(),
                        error = %e,
                        "Failed to parse GitHub response, skipping query"
                    );
                    continue;
                }
            };
            let mut items = Self::parse_items(&body);
            debug!(count = items.len(), query = query.as_str(), "Parsed items");
            all.append(&mut items);
        }
        dedup(&mut all);
        Ok(all)
    }
 }
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 /// Minimal percent-encoding for query strings (space → +).
 fn urlencoding(s: &str) -> String {
    s.replace(' ', "+")
        .replace('&', "%26")
        .replace('#', "%23")
 }
 /// Deduplicate scout results by URL (keeps first occurrence).
 pub fn dedup(results: &mut Vec<ScoutResult>) {
    let mut seen = std::collections::HashSet::new();
    results.retain(|r| seen.insert(r.url.clone()));
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn scout_source_from_str() {
        assert_eq!("github".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
        assert_eq!("GitHub".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
        assert_eq!("clawhub".parse::<ScoutSource>().unwrap(), ScoutSource::ClawHub);
        assert_eq!("huggingface".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
        assert_eq!("hf".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
        // unknown falls back to GitHub
        assert_eq!("unknown".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
    }
    #[test]
    fn dedup_removes_duplicates() {
        let mut results = vec![
            ScoutResult {
                name: "a".into(),
                url: "https://github.com/x/a".into(),
                description: String::new(),
                stars: 10,
                language: None,
                updated_at: None,
                source: ScoutSource::GitHub,
                owner: "x".into(),
                has_license: true,
            },
            ScoutResult {
                name: "a-dup".into(),
                url: "https://github.com/x/a".into(),
                description: String::new(),
                stars: 10,
                language: None,
                updated_at: None,
                source: ScoutSource::GitHub,
                owner: "x".into(),
                has_license: true,
            },
            ScoutResult {
                name: "b".into(),
                url: "https://github.com/x/b".into(),
                description: String::new(),
                stars: 5,
                language: None,
                updated_at: None,
                source: ScoutSource::GitHub,
                owner: "x".into(),
                has_license: false,
            },
        ];
        dedup(&mut results);
        assert_eq!(results.len(), 2);
        assert_eq!(results[0].name, "a");
        assert_eq!(results[1].name, "b");
    }
    #[test]
    fn parse_github_items() {
        let json = serde_json::json!({
            "total_count": 1,
            "items": [
                {
                    "name": "cool-skill",
                    "html_url": "https://github.com/user/cool-skill",
                    "description": "A cool skill",
                    "stargazers_count": 42,
                    "language": "Rust",
                    "updated_at": "2026-01-15T10:00:00Z",
                    "owner": { "login": "user" },
                    "license": { "spdx_id": "MIT" }
                }
            ]
        });
        let items = GitHubScout::parse_items(&json);
        assert_eq!(items.len(), 1);
        assert_eq!(items[0].name, "cool-skill");
        assert_eq!(items[0].stars, 42);
        assert!(items[0].has_license);
        assert_eq!(items[0].owner, "user");
    }
    #[test]
    fn urlencoding_works() {
        assert_eq!(urlencoding("hello world"), "hello+world");
        assert_eq!(urlencoding("a&b#c"), "a%26b%23c");
    }
 }