feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)

* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
2026-02-15 09:26:13 -05:00 · 2026-02-15 09:26:13 -05:00 · 35b63d6b12
commit 35b63d6b12
parent 2ac571f406
7 changed files with 1098 additions and 1 deletions
--- a/src/skillforge/evaluate.rs
+++ b/src/skillforge/evaluate.rs
@ -0,0 +1,261 @@
+//! Evaluator — scores discovered skill candidates across multiple dimensions.
+
+use serde::{Deserialize, Serialize};
+
+use super::scout::ScoutResult;
+
+// ---------------------------------------------------------------------------
+// Scoring dimensions
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Scores {
+    /// OS / arch / runtime compatibility (0.0–1.0).
+    pub compatibility: f64,
+    /// Code quality signals: stars, tests, docs (0.0–1.0).
+    pub quality: f64,
+    /// Security posture: license, known-bad patterns (0.0–1.0).
+    pub security: f64,
+}
+
+impl Scores {
+    /// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35.
+    pub fn total(&self) -> f64 {
+        self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Recommendation
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum Recommendation {
+    /// Score >= threshold → safe to auto-integrate.
+    Auto,
+    /// Score in [0.4, threshold) → needs human review.
+    Manual,
+    /// Score < 0.4 → skip entirely.
+    Skip,
+}
+
+// ---------------------------------------------------------------------------
+// EvalResult
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EvalResult {
+    pub candidate: ScoutResult,
+    pub scores: Scores,
+    pub total_score: f64,
+    pub recommendation: Recommendation,
+}
+
+// ---------------------------------------------------------------------------
+// Evaluator
+// ---------------------------------------------------------------------------
+
+pub struct Evaluator {
+    /// Minimum total score for auto-integration.
+    min_score: f64,
+}
+
+/// Known-bad patterns in repo names / descriptions (matched as whole words).
+const BAD_PATTERNS: &[&str] = &[
+    "malware",
+    "exploit",
+    "hack",
+    "crack",
+    "keygen",
+    "ransomware",
+    "trojan",
+];
+
+/// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars).
+fn contains_word(haystack: &str, word: &str) -> bool {
+    for (i, _) in haystack.match_indices(word) {
+        let before_ok = i == 0
+            || !haystack.as_bytes()[i - 1].is_ascii_alphanumeric();
+        let after = i + word.len();
+        let after_ok = after >= haystack.len()
+            || !haystack.as_bytes()[after].is_ascii_alphanumeric();
+        if before_ok && after_ok {
+            return true;
+        }
+    }
+    false
+}
+
+impl Evaluator {
+    pub fn new(min_score: f64) -> Self {
+        Self { min_score }
+    }
+
+    pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult {
+        let compatibility = self.score_compatibility(&candidate);
+        let quality = self.score_quality(&candidate);
+        let security = self.score_security(&candidate);
+
+        let scores = Scores {
+            compatibility,
+            quality,
+            security,
+        };
+        let total_score = scores.total();
+
+        let recommendation = if total_score >= self.min_score {
+            Recommendation::Auto
+        } else if total_score >= 0.4 {
+            Recommendation::Manual
+        } else {
+            Recommendation::Skip
+        };
+
+        EvalResult {
+            candidate,
+            scores,
+            total_score,
+            recommendation,
+        }
+    }
+
+    // -- Dimension scorers --------------------------------------------------
+
+    /// Compatibility: favour Rust repos; penalise unknown languages.
+    fn score_compatibility(&self, c: &ScoutResult) -> f64 {
+        match c.language.as_deref() {
+            Some("Rust") => 1.0,
+            Some("Python" | "TypeScript" | "JavaScript") => 0.6,
+            Some(_) => 0.3,
+            None => 0.2,
+        }
+    }
+
+    /// Quality: based on star count (log scale, capped at 1.0).
+    fn score_quality(&self, c: &ScoutResult) -> f64 {
+        // log2(stars + 1) / 10, capped at 1.0
+        let raw = ((c.stars as f64) + 1.0).log2() / 10.0;
+        raw.min(1.0)
+    }
+
+    /// Security: license presence + bad-pattern check.
+    fn score_security(&self, c: &ScoutResult) -> f64 {
+        let mut score: f64 = 0.5;
+
+        // License bonus
+        if c.has_license {
+            score += 0.3;
+        }
+
+        // Bad-pattern penalty (whole-word match)
+        let lower_name = c.name.to_lowercase();
+        let lower_desc = c.description.to_lowercase();
+        for pat in BAD_PATTERNS {
+            if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) {
+                score -= 0.5;
+                break;
+            }
+        }
+
+        // Recency bonus: updated within last 180 days (guard against future timestamps)
+        if let Some(updated) = c.updated_at {
+            let age_days = (chrono::Utc::now() - updated).num_days();
+            if (0..180).contains(&age_days) {
+                score += 0.2;
+            }
+        }
+
+        score.clamp(0.0, 1.0)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::skillforge::scout::{ScoutResult, ScoutSource};
+
+    fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult {
+        ScoutResult {
+            name: "test-skill".into(),
+            url: "https://github.com/test/test-skill".into(),
+            description: "A test skill".into(),
+            stars,
+            language: lang.map(String::from),
+            updated_at: Some(chrono::Utc::now()),
+            source: ScoutSource::GitHub,
+            owner: "test".into(),
+            has_license,
+        }
+    }
+
+    #[test]
+    fn high_quality_rust_repo_gets_auto() {
+        let eval = Evaluator::new(0.7);
+        let c = make_candidate(500, Some("Rust"), true);
+        let res = eval.evaluate(c);
+        assert!(res.total_score >= 0.7, "score: {}", res.total_score);
+        assert_eq!(res.recommendation, Recommendation::Auto);
+    }
+
+    #[test]
+    fn low_star_no_license_gets_manual_or_skip() {
+        let eval = Evaluator::new(0.7);
+        let c = make_candidate(1, None, false);
+        let res = eval.evaluate(c);
+        assert!(res.total_score < 0.7, "score: {}", res.total_score);
+        assert_ne!(res.recommendation, Recommendation::Auto);
+    }
+
+    #[test]
+    fn bad_pattern_tanks_security() {
+        let eval = Evaluator::new(0.7);
+        let mut c = make_candidate(1000, Some("Rust"), true);
+        c.name = "malware-skill".into();
+        let res = eval.evaluate(c);
+        // 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5
+        assert!(res.scores.security <= 0.5, "security: {}", res.scores.security);
+    }
+
+    #[test]
+    fn scores_total_weighted() {
+        let s = Scores {
+            compatibility: 1.0,
+            quality: 1.0,
+            security: 1.0,
+        };
+        assert!((s.total() - 1.0).abs() < f64::EPSILON);
+
+        let s2 = Scores {
+            compatibility: 0.0,
+            quality: 0.0,
+            security: 0.0,
+        };
+        assert!((s2.total()).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn hackathon_not_flagged_as_bad() {
+        let eval = Evaluator::new(0.7);
+        let mut c = make_candidate(500, Some("Rust"), true);
+        c.name = "hackathon-tools".into();
+        c.description = "Tools for hackathons and lifehacks".into();
+        let res = eval.evaluate(c);
+        // "hack" should NOT match "hackathon" or "lifehacks"
+        assert!(res.scores.security >= 0.5, "security: {}", res.scores.security);
+    }
+
+    #[test]
+    fn exact_hack_is_flagged() {
+        let eval = Evaluator::new(0.7);
+        let mut c = make_candidate(500, Some("Rust"), false);
+        c.name = "hack-tool".into();
+        c.updated_at = None;
+        let res = eval.evaluate(c);
+        // 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0
+        assert!(res.scores.security < 0.5, "security: {}", res.scores.security);
+    }
+}