zeroclaw/src/skillforge/integrate.rs
Argenis 35b63d6b12
feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)
* feat: add SkillForge — automated skill discovery, evaluation, and integration engine

SkillForge adds a 3-stage pipeline for autonomous skill management:

- Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace)
- Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35)
- Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates

Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip.
Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow).
Includes unit tests for all modules.

* fix: address code review feedback on SkillForge PR #115

- evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive)
- evaluate: guard against future timestamps in recency bonus
- integrate: escape URLs in TOML output via escape_toml()
- integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml()
- mod: redact github_token in Debug impl to prevent log leakage
- mod: fix auto_integrated count when auto_integrate=false
- mod: per-candidate error handling (single failure no longer aborts pipeline)
- scout: add 30s request timeout, remove unused token field
- deps: enable chrono serde feature for DateTime serialization
- tests: add hackathon/exact-hack tests, update escape_toml test coverage

* fix: address round-2 CodeRabbit review feedback

- integrate: add sanitize_path_component() to prevent directory traversal
- mod: GitHub scout failure now logs warning and continues (no pipeline abort)
- scout: network/parse errors per-query use warn+continue instead of ?
- scout: implement std::str::FromStr for ScoutSource (replaces custom from_str)
- tests: add path sanitization tests (traversal, separators, dot trimming)

---------

Co-authored-by: stawky <stakeswky@gmail.com>
2026-02-15 09:26:13 -05:00

248 lines
7.1 KiB
Rust

//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
use std::fs;
use std::path::PathBuf;
use anyhow::{bail, Context, Result};
use chrono::Utc;
use tracing::info;
use super::scout::ScoutResult;
// ---------------------------------------------------------------------------
// Integrator
// ---------------------------------------------------------------------------
pub struct Integrator {
output_dir: PathBuf,
}
impl Integrator {
pub fn new(output_dir: String) -> Self {
Self {
output_dir: PathBuf::from(output_dir),
}
}
/// Write SKILL.toml and SKILL.md for the given candidate.
pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
let safe_name = sanitize_path_component(&candidate.name)?;
let skill_dir = self.output_dir.join(&safe_name);
fs::create_dir_all(&skill_dir)
.with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
let toml_path = skill_dir.join("SKILL.toml");
let md_path = skill_dir.join("SKILL.md");
let toml_content = self.generate_toml(candidate);
let md_content = self.generate_md(candidate);
fs::write(&toml_path, &toml_content)
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
fs::write(&md_path, &md_content)
.with_context(|| format!("Failed to write {}", md_path.display()))?;
info!(
skill = candidate.name.as_str(),
path = %skill_dir.display(),
"Integrated skill"
);
Ok(skill_dir)
}
// -- Generators ---------------------------------------------------------
fn generate_toml(&self, c: &ScoutResult) -> String {
let lang = c.language.as_deref().unwrap_or("unknown");
let updated = c
.updated_at
.map(|d| d.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| "unknown".into());
format!(
r#"# Auto-generated by SkillForge on {now}
[skill]
name = "{name}"
version = "0.1.0"
description = "{description}"
source = "{url}"
owner = "{owner}"
language = "{lang}"
license = {license}
stars = {stars}
updated_at = "{updated}"
[skill.requirements]
runtime = "zeroclaw >= 0.1"
[skill.metadata]
auto_integrated = true
forge_timestamp = "{now}"
"#,
now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
name = escape_toml(&c.name),
description = escape_toml(&c.description),
url = escape_toml(&c.url),
owner = escape_toml(&c.owner),
lang = lang,
license = if c.has_license { "true" } else { "false" },
stars = c.stars,
updated = updated,
)
}
fn generate_md(&self, c: &ScoutResult) -> String {
let lang = c.language.as_deref().unwrap_or("unknown");
format!(
r#"# {name}
> Auto-generated by SkillForge
## Overview
- **Source**: [{url}]({url})
- **Owner**: {owner}
- **Language**: {lang}
- **Stars**: {stars}
- **License**: {license}
## Description
{description}
## Usage
```toml
# Add to your ZeroClaw config:
[skills.{name}]
enabled = true
```
## Notes
This manifest was auto-generated from repository metadata.
Review before enabling in production.
"#,
name = c.name,
url = c.url,
owner = c.owner,
lang = lang,
stars = c.stars,
license = if c.has_license { "yes" } else { "unknown" },
description = c.description,
)
}
}
/// Escape special characters for TOML basic string values.
fn escape_toml(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t")
.replace('\u{08}', "\\b")
.replace('\u{0C}', "\\f")
}
/// Sanitize a string for use as a single path component.
/// Rejects empty names, "..", and names containing path separators or NUL.
fn sanitize_path_component(name: &str) -> Result<String> {
let trimmed = name.trim().trim_matches('.');
if trimmed.is_empty() {
bail!("Skill name is empty or only dots after sanitization");
}
let sanitized: String = trimmed
.chars()
.map(|c| match c {
'/' | '\\' | '\0' => '_',
_ => c,
})
.collect();
if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
bail!("Skill name '{}' is unsafe as a path component", name);
}
Ok(sanitized)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::skillforge::scout::{ScoutResult, ScoutSource};
use std::fs;
fn sample_candidate() -> ScoutResult {
ScoutResult {
name: "test-skill".into(),
url: "https://github.com/user/test-skill".into(),
description: "A test skill for unit tests".into(),
stars: 42,
language: Some("Rust".into()),
updated_at: Some(Utc::now()),
source: ScoutSource::GitHub,
owner: "user".into(),
has_license: true,
}
}
#[test]
fn integrate_creates_files() {
let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
let _ = fs::remove_dir_all(&tmp);
let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
let c = sample_candidate();
let path = integrator.integrate(&c).unwrap();
assert!(path.join("SKILL.toml").exists());
assert!(path.join("SKILL.md").exists());
let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
assert!(toml.contains("name = \"test-skill\""));
assert!(toml.contains("stars = 42"));
let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
assert!(md.contains("# test-skill"));
assert!(md.contains("A test skill for unit tests"));
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn escape_toml_handles_quotes_and_control_chars() {
assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
assert_eq!(escape_toml("tab\there"), "tab\\there");
assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
}
#[test]
fn sanitize_rejects_traversal() {
assert!(sanitize_path_component("..").is_err());
assert!(sanitize_path_component("...").is_err());
assert!(sanitize_path_component("").is_err());
assert!(sanitize_path_component(" ").is_err());
}
#[test]
fn sanitize_replaces_separators() {
let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
assert!(!s.contains('/'));
assert!(!s.contains('\\'));
assert!(!s.contains('\0'));
assert_eq!(s, "foo_bar_baz_qux");
}
#[test]
fn sanitize_trims_dots() {
let s = sanitize_path_component(".hidden.").unwrap();
assert_eq!(s, "hidden");
}
}