feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)
* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
This commit is contained in:
parent
2ac571f406
commit
35b63d6b12
7 changed files with 1098 additions and 1 deletions
261
src/skillforge/evaluate.rs
Normal file
261
src/skillforge/evaluate.rs
Normal file
|
|
@ -0,0 +1,261 @@
|
|||
//! Evaluator — scores discovered skill candidates across multiple dimensions.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::scout::ScoutResult;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scoring dimensions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Scores {
|
||||
/// OS / arch / runtime compatibility (0.0–1.0).
|
||||
pub compatibility: f64,
|
||||
/// Code quality signals: stars, tests, docs (0.0–1.0).
|
||||
pub quality: f64,
|
||||
/// Security posture: license, known-bad patterns (0.0–1.0).
|
||||
pub security: f64,
|
||||
}
|
||||
|
||||
impl Scores {
|
||||
/// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35.
|
||||
pub fn total(&self) -> f64 {
|
||||
self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Recommendation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Recommendation {
|
||||
/// Score >= threshold → safe to auto-integrate.
|
||||
Auto,
|
||||
/// Score in [0.4, threshold) → needs human review.
|
||||
Manual,
|
||||
/// Score < 0.4 → skip entirely.
|
||||
Skip,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// EvalResult
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EvalResult {
|
||||
pub candidate: ScoutResult,
|
||||
pub scores: Scores,
|
||||
pub total_score: f64,
|
||||
pub recommendation: Recommendation,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Evaluator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct Evaluator {
|
||||
/// Minimum total score for auto-integration.
|
||||
min_score: f64,
|
||||
}
|
||||
|
||||
/// Known-bad patterns in repo names / descriptions (matched as whole words).
|
||||
const BAD_PATTERNS: &[&str] = &[
|
||||
"malware",
|
||||
"exploit",
|
||||
"hack",
|
||||
"crack",
|
||||
"keygen",
|
||||
"ransomware",
|
||||
"trojan",
|
||||
];
|
||||
|
||||
/// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars).
|
||||
fn contains_word(haystack: &str, word: &str) -> bool {
|
||||
for (i, _) in haystack.match_indices(word) {
|
||||
let before_ok = i == 0
|
||||
|| !haystack.as_bytes()[i - 1].is_ascii_alphanumeric();
|
||||
let after = i + word.len();
|
||||
let after_ok = after >= haystack.len()
|
||||
|| !haystack.as_bytes()[after].is_ascii_alphanumeric();
|
||||
if before_ok && after_ok {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
impl Evaluator {
|
||||
pub fn new(min_score: f64) -> Self {
|
||||
Self { min_score }
|
||||
}
|
||||
|
||||
pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult {
|
||||
let compatibility = self.score_compatibility(&candidate);
|
||||
let quality = self.score_quality(&candidate);
|
||||
let security = self.score_security(&candidate);
|
||||
|
||||
let scores = Scores {
|
||||
compatibility,
|
||||
quality,
|
||||
security,
|
||||
};
|
||||
let total_score = scores.total();
|
||||
|
||||
let recommendation = if total_score >= self.min_score {
|
||||
Recommendation::Auto
|
||||
} else if total_score >= 0.4 {
|
||||
Recommendation::Manual
|
||||
} else {
|
||||
Recommendation::Skip
|
||||
};
|
||||
|
||||
EvalResult {
|
||||
candidate,
|
||||
scores,
|
||||
total_score,
|
||||
recommendation,
|
||||
}
|
||||
}
|
||||
|
||||
// -- Dimension scorers --------------------------------------------------
|
||||
|
||||
/// Compatibility: favour Rust repos; penalise unknown languages.
|
||||
fn score_compatibility(&self, c: &ScoutResult) -> f64 {
|
||||
match c.language.as_deref() {
|
||||
Some("Rust") => 1.0,
|
||||
Some("Python" | "TypeScript" | "JavaScript") => 0.6,
|
||||
Some(_) => 0.3,
|
||||
None => 0.2,
|
||||
}
|
||||
}
|
||||
|
||||
/// Quality: based on star count (log scale, capped at 1.0).
|
||||
fn score_quality(&self, c: &ScoutResult) -> f64 {
|
||||
// log2(stars + 1) / 10, capped at 1.0
|
||||
let raw = ((c.stars as f64) + 1.0).log2() / 10.0;
|
||||
raw.min(1.0)
|
||||
}
|
||||
|
||||
/// Security: license presence + bad-pattern check.
|
||||
fn score_security(&self, c: &ScoutResult) -> f64 {
|
||||
let mut score: f64 = 0.5;
|
||||
|
||||
// License bonus
|
||||
if c.has_license {
|
||||
score += 0.3;
|
||||
}
|
||||
|
||||
// Bad-pattern penalty (whole-word match)
|
||||
let lower_name = c.name.to_lowercase();
|
||||
let lower_desc = c.description.to_lowercase();
|
||||
for pat in BAD_PATTERNS {
|
||||
if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) {
|
||||
score -= 0.5;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Recency bonus: updated within last 180 days (guard against future timestamps)
|
||||
if let Some(updated) = c.updated_at {
|
||||
let age_days = (chrono::Utc::now() - updated).num_days();
|
||||
if (0..180).contains(&age_days) {
|
||||
score += 0.2;
|
||||
}
|
||||
}
|
||||
|
||||
score.clamp(0.0, 1.0)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::skillforge::scout::{ScoutResult, ScoutSource};
|
||||
|
||||
fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult {
|
||||
ScoutResult {
|
||||
name: "test-skill".into(),
|
||||
url: "https://github.com/test/test-skill".into(),
|
||||
description: "A test skill".into(),
|
||||
stars,
|
||||
language: lang.map(String::from),
|
||||
updated_at: Some(chrono::Utc::now()),
|
||||
source: ScoutSource::GitHub,
|
||||
owner: "test".into(),
|
||||
has_license,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn high_quality_rust_repo_gets_auto() {
|
||||
let eval = Evaluator::new(0.7);
|
||||
let c = make_candidate(500, Some("Rust"), true);
|
||||
let res = eval.evaluate(c);
|
||||
assert!(res.total_score >= 0.7, "score: {}", res.total_score);
|
||||
assert_eq!(res.recommendation, Recommendation::Auto);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn low_star_no_license_gets_manual_or_skip() {
|
||||
let eval = Evaluator::new(0.7);
|
||||
let c = make_candidate(1, None, false);
|
||||
let res = eval.evaluate(c);
|
||||
assert!(res.total_score < 0.7, "score: {}", res.total_score);
|
||||
assert_ne!(res.recommendation, Recommendation::Auto);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_pattern_tanks_security() {
|
||||
let eval = Evaluator::new(0.7);
|
||||
let mut c = make_candidate(1000, Some("Rust"), true);
|
||||
c.name = "malware-skill".into();
|
||||
let res = eval.evaluate(c);
|
||||
// 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5
|
||||
assert!(res.scores.security <= 0.5, "security: {}", res.scores.security);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scores_total_weighted() {
|
||||
let s = Scores {
|
||||
compatibility: 1.0,
|
||||
quality: 1.0,
|
||||
security: 1.0,
|
||||
};
|
||||
assert!((s.total() - 1.0).abs() < f64::EPSILON);
|
||||
|
||||
let s2 = Scores {
|
||||
compatibility: 0.0,
|
||||
quality: 0.0,
|
||||
security: 0.0,
|
||||
};
|
||||
assert!((s2.total()).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hackathon_not_flagged_as_bad() {
|
||||
let eval = Evaluator::new(0.7);
|
||||
let mut c = make_candidate(500, Some("Rust"), true);
|
||||
c.name = "hackathon-tools".into();
|
||||
c.description = "Tools for hackathons and lifehacks".into();
|
||||
let res = eval.evaluate(c);
|
||||
// "hack" should NOT match "hackathon" or "lifehacks"
|
||||
assert!(res.scores.security >= 0.5, "security: {}", res.scores.security);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exact_hack_is_flagged() {
|
||||
let eval = Evaluator::new(0.7);
|
||||
let mut c = make_candidate(500, Some("Rust"), false);
|
||||
c.name = "hack-tool".into();
|
||||
c.updated_at = None;
|
||||
let res = eval.evaluate(c);
|
||||
// 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0
|
||||
assert!(res.scores.security < 0.5, "security: {}", res.scores.security);
|
||||
}
|
||||
}
|
||||
248
src/skillforge/integrate.rs
Normal file
248
src/skillforge/integrate.rs
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use chrono::Utc;
|
||||
use tracing::info;
|
||||
|
||||
use super::scout::ScoutResult;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integrator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct Integrator {
|
||||
output_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl Integrator {
|
||||
pub fn new(output_dir: String) -> Self {
|
||||
Self {
|
||||
output_dir: PathBuf::from(output_dir),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write SKILL.toml and SKILL.md for the given candidate.
|
||||
pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
|
||||
let safe_name = sanitize_path_component(&candidate.name)?;
|
||||
let skill_dir = self.output_dir.join(&safe_name);
|
||||
fs::create_dir_all(&skill_dir)
|
||||
.with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
|
||||
|
||||
let toml_path = skill_dir.join("SKILL.toml");
|
||||
let md_path = skill_dir.join("SKILL.md");
|
||||
|
||||
let toml_content = self.generate_toml(candidate);
|
||||
let md_content = self.generate_md(candidate);
|
||||
|
||||
fs::write(&toml_path, &toml_content)
|
||||
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
|
||||
fs::write(&md_path, &md_content)
|
||||
.with_context(|| format!("Failed to write {}", md_path.display()))?;
|
||||
|
||||
info!(
|
||||
skill = candidate.name.as_str(),
|
||||
path = %skill_dir.display(),
|
||||
"Integrated skill"
|
||||
);
|
||||
|
||||
Ok(skill_dir)
|
||||
}
|
||||
|
||||
// -- Generators ---------------------------------------------------------
|
||||
|
||||
fn generate_toml(&self, c: &ScoutResult) -> String {
|
||||
let lang = c.language.as_deref().unwrap_or("unknown");
|
||||
let updated = c
|
||||
.updated_at
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
|
||||
format!(
|
||||
r#"# Auto-generated by SkillForge on {now}
|
||||
|
||||
[skill]
|
||||
name = "{name}"
|
||||
version = "0.1.0"
|
||||
description = "{description}"
|
||||
source = "{url}"
|
||||
owner = "{owner}"
|
||||
language = "{lang}"
|
||||
license = {license}
|
||||
stars = {stars}
|
||||
updated_at = "{updated}"
|
||||
|
||||
[skill.requirements]
|
||||
runtime = "zeroclaw >= 0.1"
|
||||
|
||||
[skill.metadata]
|
||||
auto_integrated = true
|
||||
forge_timestamp = "{now}"
|
||||
"#,
|
||||
now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
|
||||
name = escape_toml(&c.name),
|
||||
description = escape_toml(&c.description),
|
||||
url = escape_toml(&c.url),
|
||||
owner = escape_toml(&c.owner),
|
||||
lang = lang,
|
||||
license = if c.has_license { "true" } else { "false" },
|
||||
stars = c.stars,
|
||||
updated = updated,
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_md(&self, c: &ScoutResult) -> String {
|
||||
let lang = c.language.as_deref().unwrap_or("unknown");
|
||||
format!(
|
||||
r#"# {name}
|
||||
|
||||
> Auto-generated by SkillForge
|
||||
|
||||
## Overview
|
||||
|
||||
- **Source**: [{url}]({url})
|
||||
- **Owner**: {owner}
|
||||
- **Language**: {lang}
|
||||
- **Stars**: {stars}
|
||||
- **License**: {license}
|
||||
|
||||
## Description
|
||||
|
||||
{description}
|
||||
|
||||
## Usage
|
||||
|
||||
```toml
|
||||
# Add to your ZeroClaw config:
|
||||
[skills.{name}]
|
||||
enabled = true
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
This manifest was auto-generated from repository metadata.
|
||||
Review before enabling in production.
|
||||
"#,
|
||||
name = c.name,
|
||||
url = c.url,
|
||||
owner = c.owner,
|
||||
lang = lang,
|
||||
stars = c.stars,
|
||||
license = if c.has_license { "yes" } else { "unknown" },
|
||||
description = c.description,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape special characters for TOML basic string values.
|
||||
fn escape_toml(s: &str) -> String {
|
||||
s.replace('\\', "\\\\")
|
||||
.replace('"', "\\\"")
|
||||
.replace('\n', "\\n")
|
||||
.replace('\r', "\\r")
|
||||
.replace('\t', "\\t")
|
||||
.replace('\u{08}', "\\b")
|
||||
.replace('\u{0C}', "\\f")
|
||||
}
|
||||
|
||||
/// Sanitize a string for use as a single path component.
|
||||
/// Rejects empty names, "..", and names containing path separators or NUL.
|
||||
fn sanitize_path_component(name: &str) -> Result<String> {
|
||||
let trimmed = name.trim().trim_matches('.');
|
||||
if trimmed.is_empty() {
|
||||
bail!("Skill name is empty or only dots after sanitization");
|
||||
}
|
||||
let sanitized: String = trimmed
|
||||
.chars()
|
||||
.map(|c| match c {
|
||||
'/' | '\\' | '\0' => '_',
|
||||
_ => c,
|
||||
})
|
||||
.collect();
|
||||
if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
|
||||
bail!("Skill name '{}' is unsafe as a path component", name);
|
||||
}
|
||||
Ok(sanitized)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::skillforge::scout::{ScoutResult, ScoutSource};
|
||||
use std::fs;
|
||||
|
||||
fn sample_candidate() -> ScoutResult {
|
||||
ScoutResult {
|
||||
name: "test-skill".into(),
|
||||
url: "https://github.com/user/test-skill".into(),
|
||||
description: "A test skill for unit tests".into(),
|
||||
stars: 42,
|
||||
language: Some("Rust".into()),
|
||||
updated_at: Some(Utc::now()),
|
||||
source: ScoutSource::GitHub,
|
||||
owner: "user".into(),
|
||||
has_license: true,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integrate_creates_files() {
|
||||
let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
|
||||
let _ = fs::remove_dir_all(&tmp);
|
||||
|
||||
let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
|
||||
let c = sample_candidate();
|
||||
let path = integrator.integrate(&c).unwrap();
|
||||
|
||||
assert!(path.join("SKILL.toml").exists());
|
||||
assert!(path.join("SKILL.md").exists());
|
||||
|
||||
let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
|
||||
assert!(toml.contains("name = \"test-skill\""));
|
||||
assert!(toml.contains("stars = 42"));
|
||||
|
||||
let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
|
||||
assert!(md.contains("# test-skill"));
|
||||
assert!(md.contains("A test skill for unit tests"));
|
||||
|
||||
let _ = fs::remove_dir_all(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape_toml_handles_quotes_and_control_chars() {
|
||||
assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
|
||||
assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
|
||||
assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
|
||||
assert_eq!(escape_toml("tab\there"), "tab\\there");
|
||||
assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_rejects_traversal() {
|
||||
assert!(sanitize_path_component("..").is_err());
|
||||
assert!(sanitize_path_component("...").is_err());
|
||||
assert!(sanitize_path_component("").is_err());
|
||||
assert!(sanitize_path_component(" ").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_replaces_separators() {
|
||||
let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
|
||||
assert!(!s.contains('/'));
|
||||
assert!(!s.contains('\\'));
|
||||
assert!(!s.contains('\0'));
|
||||
assert_eq!(s, "foo_bar_baz_qux");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_trims_dots() {
|
||||
let s = sanitize_path_component(".hidden.").unwrap();
|
||||
assert_eq!(s, "hidden");
|
||||
}
|
||||
}
|
||||
255
src/skillforge/mod.rs
Normal file
255
src/skillforge/mod.rs
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
//! SkillForge — Skill auto-discovery, evaluation, and integration engine.
|
||||
//!
|
||||
//! Pipeline: Scout → Evaluate → Integrate
|
||||
//! Discovers skills from external sources, scores them, and generates
|
||||
//! ZeroClaw-compatible manifests for qualified candidates.
|
||||
|
||||
pub mod evaluate;
|
||||
pub mod integrate;
|
||||
pub mod scout;
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use self::evaluate::{EvalResult, Evaluator, Recommendation};
|
||||
use self::integrate::Integrator;
|
||||
use self::scout::{GitHubScout, Scout, ScoutResult, ScoutSource};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct SkillForgeConfig {
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
#[serde(default = "default_auto_integrate")]
|
||||
pub auto_integrate: bool,
|
||||
#[serde(default = "default_sources")]
|
||||
pub sources: Vec<String>,
|
||||
#[serde(default = "default_scan_interval")]
|
||||
pub scan_interval_hours: u64,
|
||||
#[serde(default = "default_min_score")]
|
||||
pub min_score: f64,
|
||||
/// Optional GitHub personal-access token for higher rate limits.
|
||||
#[serde(default)]
|
||||
pub github_token: Option<String>,
|
||||
/// Directory where integrated skills are written.
|
||||
#[serde(default = "default_output_dir")]
|
||||
pub output_dir: String,
|
||||
}
|
||||
|
||||
fn default_auto_integrate() -> bool {
|
||||
true
|
||||
}
|
||||
fn default_sources() -> Vec<String> {
|
||||
vec!["github".into(), "clawhub".into()]
|
||||
}
|
||||
fn default_scan_interval() -> u64 {
|
||||
24
|
||||
}
|
||||
fn default_min_score() -> f64 {
|
||||
0.7
|
||||
}
|
||||
fn default_output_dir() -> String {
|
||||
"./skills".into()
|
||||
}
|
||||
|
||||
impl Default for SkillForgeConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
auto_integrate: default_auto_integrate(),
|
||||
sources: default_sources(),
|
||||
scan_interval_hours: default_scan_interval(),
|
||||
min_score: default_min_score(),
|
||||
github_token: None,
|
||||
output_dir: default_output_dir(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SkillForgeConfig {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SkillForgeConfig")
|
||||
.field("enabled", &self.enabled)
|
||||
.field("auto_integrate", &self.auto_integrate)
|
||||
.field("sources", &self.sources)
|
||||
.field("scan_interval_hours", &self.scan_interval_hours)
|
||||
.field("min_score", &self.min_score)
|
||||
.field(
|
||||
"github_token",
|
||||
&self.github_token.as_ref().map(|_| "***"),
|
||||
)
|
||||
.field("output_dir", &self.output_dir)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ForgeReport — summary of a single pipeline run
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ForgeReport {
|
||||
pub discovered: usize,
|
||||
pub evaluated: usize,
|
||||
pub auto_integrated: usize,
|
||||
pub manual_review: usize,
|
||||
pub skipped: usize,
|
||||
pub results: Vec<EvalResult>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SkillForge
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct SkillForge {
|
||||
config: SkillForgeConfig,
|
||||
evaluator: Evaluator,
|
||||
integrator: Integrator,
|
||||
}
|
||||
|
||||
impl SkillForge {
|
||||
pub fn new(config: SkillForgeConfig) -> Self {
|
||||
let evaluator = Evaluator::new(config.min_score);
|
||||
let integrator = Integrator::new(config.output_dir.clone());
|
||||
Self {
|
||||
config,
|
||||
evaluator,
|
||||
integrator,
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the full pipeline: Scout → Evaluate → Integrate.
|
||||
pub async fn forge(&self) -> Result<ForgeReport> {
|
||||
if !self.config.enabled {
|
||||
warn!("SkillForge is disabled — skipping");
|
||||
return Ok(ForgeReport {
|
||||
discovered: 0,
|
||||
evaluated: 0,
|
||||
auto_integrated: 0,
|
||||
manual_review: 0,
|
||||
skipped: 0,
|
||||
results: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// --- Scout ----------------------------------------------------------
|
||||
let mut candidates: Vec<ScoutResult> = Vec::new();
|
||||
|
||||
for src in &self.config.sources {
|
||||
let source: ScoutSource = src.parse().unwrap(); // Infallible
|
||||
match source {
|
||||
ScoutSource::GitHub => {
|
||||
let scout = GitHubScout::new(self.config.github_token.clone());
|
||||
match scout.discover().await {
|
||||
Ok(mut found) => {
|
||||
info!(count = found.len(), "GitHub scout returned candidates");
|
||||
candidates.append(&mut found);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "GitHub scout failed, continuing with other sources");
|
||||
}
|
||||
}
|
||||
}
|
||||
ScoutSource::ClawHub | ScoutSource::HuggingFace => {
|
||||
info!(source = src.as_str(), "Source not yet implemented — skipping");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by URL
|
||||
scout::dedup(&mut candidates);
|
||||
let discovered = candidates.len();
|
||||
info!(discovered, "Total unique candidates after dedup");
|
||||
|
||||
// --- Evaluate -------------------------------------------------------
|
||||
let results: Vec<EvalResult> = candidates
|
||||
.into_iter()
|
||||
.map(|c| self.evaluator.evaluate(c))
|
||||
.collect();
|
||||
let evaluated = results.len();
|
||||
|
||||
// --- Integrate ------------------------------------------------------
|
||||
let mut auto_integrated = 0usize;
|
||||
let mut manual_review = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
|
||||
for res in &results {
|
||||
match res.recommendation {
|
||||
Recommendation::Auto => {
|
||||
if self.config.auto_integrate {
|
||||
match self.integrator.integrate(&res.candidate) {
|
||||
Ok(_) => {
|
||||
auto_integrated += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
skill = res.candidate.name.as_str(),
|
||||
error = %e,
|
||||
"Integration failed for candidate, continuing"
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Count as would-be auto but not actually integrated
|
||||
manual_review += 1;
|
||||
}
|
||||
}
|
||||
Recommendation::Manual => {
|
||||
manual_review += 1;
|
||||
}
|
||||
Recommendation::Skip => {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
auto_integrated,
|
||||
manual_review, skipped, "Forge pipeline complete"
|
||||
);
|
||||
|
||||
Ok(ForgeReport {
|
||||
discovered,
|
||||
evaluated,
|
||||
auto_integrated,
|
||||
manual_review,
|
||||
skipped,
|
||||
results,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn disabled_forge_returns_empty_report() {
|
||||
let cfg = SkillForgeConfig {
|
||||
enabled: false,
|
||||
..Default::default()
|
||||
};
|
||||
let forge = SkillForge::new(cfg);
|
||||
let report = forge.forge().await.unwrap();
|
||||
assert_eq!(report.discovered, 0);
|
||||
assert_eq!(report.auto_integrated, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_config_values() {
|
||||
let cfg = SkillForgeConfig::default();
|
||||
assert!(!cfg.enabled);
|
||||
assert!(cfg.auto_integrate);
|
||||
assert_eq!(cfg.scan_interval_hours, 24);
|
||||
assert!((cfg.min_score - 0.7).abs() < f64::EPSILON);
|
||||
assert_eq!(cfg.sources, vec!["github", "clawhub"]);
|
||||
}
|
||||
}
|
||||
331
src/skillforge/scout.rs
Normal file
331
src/skillforge/scout.rs
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
//! Scout — skill discovery from external sources.
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ScoutSource
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum ScoutSource {
|
||||
GitHub,
|
||||
ClawHub,
|
||||
HuggingFace,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for ScoutSource {
|
||||
type Err = std::convert::Infallible;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
Ok(match s.to_lowercase().as_str() {
|
||||
"github" => Self::GitHub,
|
||||
"clawhub" => Self::ClawHub,
|
||||
"huggingface" | "hf" => Self::HuggingFace,
|
||||
_ => {
|
||||
warn!(source = s, "Unknown scout source, defaulting to GitHub");
|
||||
Self::GitHub
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ScoutResult
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ScoutResult {
|
||||
pub name: String,
|
||||
pub url: String,
|
||||
pub description: String,
|
||||
pub stars: u64,
|
||||
pub language: Option<String>,
|
||||
pub updated_at: Option<DateTime<Utc>>,
|
||||
pub source: ScoutSource,
|
||||
/// Owner / org extracted from the URL or API response.
|
||||
pub owner: String,
|
||||
/// Whether the repo has a license file.
|
||||
pub has_license: bool,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scout trait
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[async_trait]
|
||||
pub trait Scout: Send + Sync {
|
||||
/// Discover candidate skills from the source.
|
||||
async fn discover(&self) -> Result<Vec<ScoutResult>>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GitHubScout
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Searches GitHub for repos matching skill-related queries.
|
||||
pub struct GitHubScout {
|
||||
client: reqwest::Client,
|
||||
queries: Vec<String>,
|
||||
}
|
||||
|
||||
impl GitHubScout {
|
||||
pub fn new(token: Option<String>) -> Self {
|
||||
use std::time::Duration;
|
||||
|
||||
let mut headers = reqwest::header::HeaderMap::new();
|
||||
headers.insert(
|
||||
reqwest::header::ACCEPT,
|
||||
"application/vnd.github+json"
|
||||
.parse()
|
||||
.expect("valid header"),
|
||||
);
|
||||
headers.insert(
|
||||
reqwest::header::USER_AGENT,
|
||||
"ZeroClaw-SkillForge/0.1".parse().expect("valid header"),
|
||||
);
|
||||
if let Some(ref t) = token {
|
||||
if let Ok(val) = format!("Bearer {t}").parse() {
|
||||
headers.insert(reqwest::header::AUTHORIZATION, val);
|
||||
}
|
||||
}
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.default_headers(headers)
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()
|
||||
.expect("failed to build reqwest client");
|
||||
|
||||
Self {
|
||||
client,
|
||||
queries: vec![
|
||||
"zeroclaw skill".into(),
|
||||
"ai agent skill".into(),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the GitHub search/repositories JSON response.
|
||||
fn parse_items(body: &serde_json::Value) -> Vec<ScoutResult> {
|
||||
let items = match body.get("items").and_then(|v| v.as_array()) {
|
||||
Some(arr) => arr,
|
||||
None => return vec![],
|
||||
};
|
||||
|
||||
items
|
||||
.iter()
|
||||
.filter_map(|item| {
|
||||
let name = item.get("name")?.as_str()?.to_string();
|
||||
let url = item.get("html_url")?.as_str()?.to_string();
|
||||
let description = item
|
||||
.get("description")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let stars = item
|
||||
.get("stargazers_count")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0);
|
||||
let language = item
|
||||
.get("language")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
let updated_at = item
|
||||
.get("updated_at")
|
||||
.and_then(|v| v.as_str())
|
||||
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
|
||||
let owner = item
|
||||
.get("owner")
|
||||
.and_then(|o| o.get("login"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let has_license = item
|
||||
.get("license")
|
||||
.map(|v| !v.is_null())
|
||||
.unwrap_or(false);
|
||||
|
||||
Some(ScoutResult {
|
||||
name,
|
||||
url,
|
||||
description,
|
||||
stars,
|
||||
language,
|
||||
updated_at,
|
||||
source: ScoutSource::GitHub,
|
||||
owner,
|
||||
has_license,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Scout for GitHubScout {
|
||||
async fn discover(&self) -> Result<Vec<ScoutResult>> {
|
||||
let mut all: Vec<ScoutResult> = Vec::new();
|
||||
|
||||
for query in &self.queries {
|
||||
let url = format!(
|
||||
"https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30",
|
||||
urlencoding(query)
|
||||
);
|
||||
debug!(query = query.as_str(), "Searching GitHub");
|
||||
|
||||
let resp = match self.client.get(&url).send().await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
query = query.as_str(),
|
||||
error = %e,
|
||||
"GitHub API request failed, skipping query"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if !resp.status().is_success() {
|
||||
warn!(
|
||||
status = %resp.status(),
|
||||
query = query.as_str(),
|
||||
"GitHub search returned non-200"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let body: serde_json::Value = match resp.json().await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
query = query.as_str(),
|
||||
error = %e,
|
||||
"Failed to parse GitHub response, skipping query"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let mut items = Self::parse_items(&body);
|
||||
debug!(count = items.len(), query = query.as_str(), "Parsed items");
|
||||
all.append(&mut items);
|
||||
}
|
||||
|
||||
dedup(&mut all);
|
||||
Ok(all)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Minimal percent-encoding for query strings (space → +).
|
||||
fn urlencoding(s: &str) -> String {
|
||||
s.replace(' ', "+")
|
||||
.replace('&', "%26")
|
||||
.replace('#', "%23")
|
||||
}
|
||||
|
||||
/// Deduplicate scout results by URL (keeps first occurrence).
|
||||
pub fn dedup(results: &mut Vec<ScoutResult>) {
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
results.retain(|r| seen.insert(r.url.clone()));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn scout_source_from_str() {
|
||||
assert_eq!("github".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
|
||||
assert_eq!("GitHub".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
|
||||
assert_eq!("clawhub".parse::<ScoutSource>().unwrap(), ScoutSource::ClawHub);
|
||||
assert_eq!("huggingface".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
|
||||
assert_eq!("hf".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
|
||||
// unknown falls back to GitHub
|
||||
assert_eq!("unknown".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dedup_removes_duplicates() {
|
||||
let mut results = vec![
|
||||
ScoutResult {
|
||||
name: "a".into(),
|
||||
url: "https://github.com/x/a".into(),
|
||||
description: String::new(),
|
||||
stars: 10,
|
||||
language: None,
|
||||
updated_at: None,
|
||||
source: ScoutSource::GitHub,
|
||||
owner: "x".into(),
|
||||
has_license: true,
|
||||
},
|
||||
ScoutResult {
|
||||
name: "a-dup".into(),
|
||||
url: "https://github.com/x/a".into(),
|
||||
description: String::new(),
|
||||
stars: 10,
|
||||
language: None,
|
||||
updated_at: None,
|
||||
source: ScoutSource::GitHub,
|
||||
owner: "x".into(),
|
||||
has_license: true,
|
||||
},
|
||||
ScoutResult {
|
||||
name: "b".into(),
|
||||
url: "https://github.com/x/b".into(),
|
||||
description: String::new(),
|
||||
stars: 5,
|
||||
language: None,
|
||||
updated_at: None,
|
||||
source: ScoutSource::GitHub,
|
||||
owner: "x".into(),
|
||||
has_license: false,
|
||||
},
|
||||
];
|
||||
dedup(&mut results);
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].name, "a");
|
||||
assert_eq!(results[1].name, "b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_github_items() {
|
||||
let json = serde_json::json!({
|
||||
"total_count": 1,
|
||||
"items": [
|
||||
{
|
||||
"name": "cool-skill",
|
||||
"html_url": "https://github.com/user/cool-skill",
|
||||
"description": "A cool skill",
|
||||
"stargazers_count": 42,
|
||||
"language": "Rust",
|
||||
"updated_at": "2026-01-15T10:00:00Z",
|
||||
"owner": { "login": "user" },
|
||||
"license": { "spdx_id": "MIT" }
|
||||
}
|
||||
]
|
||||
});
|
||||
let items = GitHubScout::parse_items(&json);
|
||||
assert_eq!(items.len(), 1);
|
||||
assert_eq!(items[0].name, "cool-skill");
|
||||
assert_eq!(items[0].stars, 42);
|
||||
assert!(items[0].has_license);
|
||||
assert_eq!(items[0].owner, "user");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn urlencoding_works() {
|
||||
assert_eq!(urlencoding("hello world"), "hello+world");
|
||||
assert_eq!(urlencoding("a&b#c"), "a%26b%23c");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue