feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)
* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
This commit is contained in:
parent
2ac571f406
commit
35b63d6b12
7 changed files with 1098 additions and 1 deletions
248
src/skillforge/integrate.rs
Normal file
248
src/skillforge/integrate.rs
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use chrono::Utc;
|
||||
use tracing::info;
|
||||
|
||||
use super::scout::ScoutResult;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integrator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub struct Integrator {
|
||||
output_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl Integrator {
|
||||
pub fn new(output_dir: String) -> Self {
|
||||
Self {
|
||||
output_dir: PathBuf::from(output_dir),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write SKILL.toml and SKILL.md for the given candidate.
|
||||
pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
|
||||
let safe_name = sanitize_path_component(&candidate.name)?;
|
||||
let skill_dir = self.output_dir.join(&safe_name);
|
||||
fs::create_dir_all(&skill_dir)
|
||||
.with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
|
||||
|
||||
let toml_path = skill_dir.join("SKILL.toml");
|
||||
let md_path = skill_dir.join("SKILL.md");
|
||||
|
||||
let toml_content = self.generate_toml(candidate);
|
||||
let md_content = self.generate_md(candidate);
|
||||
|
||||
fs::write(&toml_path, &toml_content)
|
||||
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
|
||||
fs::write(&md_path, &md_content)
|
||||
.with_context(|| format!("Failed to write {}", md_path.display()))?;
|
||||
|
||||
info!(
|
||||
skill = candidate.name.as_str(),
|
||||
path = %skill_dir.display(),
|
||||
"Integrated skill"
|
||||
);
|
||||
|
||||
Ok(skill_dir)
|
||||
}
|
||||
|
||||
// -- Generators ---------------------------------------------------------
|
||||
|
||||
fn generate_toml(&self, c: &ScoutResult) -> String {
|
||||
let lang = c.language.as_deref().unwrap_or("unknown");
|
||||
let updated = c
|
||||
.updated_at
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
|
||||
format!(
|
||||
r#"# Auto-generated by SkillForge on {now}
|
||||
|
||||
[skill]
|
||||
name = "{name}"
|
||||
version = "0.1.0"
|
||||
description = "{description}"
|
||||
source = "{url}"
|
||||
owner = "{owner}"
|
||||
language = "{lang}"
|
||||
license = {license}
|
||||
stars = {stars}
|
||||
updated_at = "{updated}"
|
||||
|
||||
[skill.requirements]
|
||||
runtime = "zeroclaw >= 0.1"
|
||||
|
||||
[skill.metadata]
|
||||
auto_integrated = true
|
||||
forge_timestamp = "{now}"
|
||||
"#,
|
||||
now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
|
||||
name = escape_toml(&c.name),
|
||||
description = escape_toml(&c.description),
|
||||
url = escape_toml(&c.url),
|
||||
owner = escape_toml(&c.owner),
|
||||
lang = lang,
|
||||
license = if c.has_license { "true" } else { "false" },
|
||||
stars = c.stars,
|
||||
updated = updated,
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_md(&self, c: &ScoutResult) -> String {
|
||||
let lang = c.language.as_deref().unwrap_or("unknown");
|
||||
format!(
|
||||
r#"# {name}
|
||||
|
||||
> Auto-generated by SkillForge
|
||||
|
||||
## Overview
|
||||
|
||||
- **Source**: [{url}]({url})
|
||||
- **Owner**: {owner}
|
||||
- **Language**: {lang}
|
||||
- **Stars**: {stars}
|
||||
- **License**: {license}
|
||||
|
||||
## Description
|
||||
|
||||
{description}
|
||||
|
||||
## Usage
|
||||
|
||||
```toml
|
||||
# Add to your ZeroClaw config:
|
||||
[skills.{name}]
|
||||
enabled = true
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
This manifest was auto-generated from repository metadata.
|
||||
Review before enabling in production.
|
||||
"#,
|
||||
name = c.name,
|
||||
url = c.url,
|
||||
owner = c.owner,
|
||||
lang = lang,
|
||||
stars = c.stars,
|
||||
license = if c.has_license { "yes" } else { "unknown" },
|
||||
description = c.description,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape special characters for TOML basic string values.
|
||||
fn escape_toml(s: &str) -> String {
|
||||
s.replace('\\', "\\\\")
|
||||
.replace('"', "\\\"")
|
||||
.replace('\n', "\\n")
|
||||
.replace('\r', "\\r")
|
||||
.replace('\t', "\\t")
|
||||
.replace('\u{08}', "\\b")
|
||||
.replace('\u{0C}', "\\f")
|
||||
}
|
||||
|
||||
/// Sanitize a string for use as a single path component.
|
||||
/// Rejects empty names, "..", and names containing path separators or NUL.
|
||||
fn sanitize_path_component(name: &str) -> Result<String> {
|
||||
let trimmed = name.trim().trim_matches('.');
|
||||
if trimmed.is_empty() {
|
||||
bail!("Skill name is empty or only dots after sanitization");
|
||||
}
|
||||
let sanitized: String = trimmed
|
||||
.chars()
|
||||
.map(|c| match c {
|
||||
'/' | '\\' | '\0' => '_',
|
||||
_ => c,
|
||||
})
|
||||
.collect();
|
||||
if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
|
||||
bail!("Skill name '{}' is unsafe as a path component", name);
|
||||
}
|
||||
Ok(sanitized)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::skillforge::scout::{ScoutResult, ScoutSource};
|
||||
use std::fs;
|
||||
|
||||
fn sample_candidate() -> ScoutResult {
|
||||
ScoutResult {
|
||||
name: "test-skill".into(),
|
||||
url: "https://github.com/user/test-skill".into(),
|
||||
description: "A test skill for unit tests".into(),
|
||||
stars: 42,
|
||||
language: Some("Rust".into()),
|
||||
updated_at: Some(Utc::now()),
|
||||
source: ScoutSource::GitHub,
|
||||
owner: "user".into(),
|
||||
has_license: true,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn integrate_creates_files() {
|
||||
let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
|
||||
let _ = fs::remove_dir_all(&tmp);
|
||||
|
||||
let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
|
||||
let c = sample_candidate();
|
||||
let path = integrator.integrate(&c).unwrap();
|
||||
|
||||
assert!(path.join("SKILL.toml").exists());
|
||||
assert!(path.join("SKILL.md").exists());
|
||||
|
||||
let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
|
||||
assert!(toml.contains("name = \"test-skill\""));
|
||||
assert!(toml.contains("stars = 42"));
|
||||
|
||||
let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
|
||||
assert!(md.contains("# test-skill"));
|
||||
assert!(md.contains("A test skill for unit tests"));
|
||||
|
||||
let _ = fs::remove_dir_all(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape_toml_handles_quotes_and_control_chars() {
|
||||
assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
|
||||
assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
|
||||
assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
|
||||
assert_eq!(escape_toml("tab\there"), "tab\\there");
|
||||
assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_rejects_traversal() {
|
||||
assert!(sanitize_path_component("..").is_err());
|
||||
assert!(sanitize_path_component("...").is_err());
|
||||
assert!(sanitize_path_component("").is_err());
|
||||
assert!(sanitize_path_component(" ").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_replaces_separators() {
|
||||
let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
|
||||
assert!(!s.contains('/'));
|
||||
assert!(!s.contains('\\'));
|
||||
assert!(!s.contains('\0'));
|
||||
assert_eq!(s, "foo_bar_baz_qux");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_trims_dots() {
|
||||
let s = sanitize_path_component(".hidden.").unwrap();
|
||||
assert_eq!(s, "hidden");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue