* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
248 lines
7.1 KiB
Rust
248 lines
7.1 KiB
Rust
//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
|
|
|
|
use std::fs;
|
|
use std::path::PathBuf;
|
|
|
|
use anyhow::{bail, Context, Result};
|
|
use chrono::Utc;
|
|
use tracing::info;
|
|
|
|
use super::scout::ScoutResult;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Integrator
|
|
// ---------------------------------------------------------------------------
|
|
|
|
pub struct Integrator {
|
|
output_dir: PathBuf,
|
|
}
|
|
|
|
impl Integrator {
|
|
pub fn new(output_dir: String) -> Self {
|
|
Self {
|
|
output_dir: PathBuf::from(output_dir),
|
|
}
|
|
}
|
|
|
|
/// Write SKILL.toml and SKILL.md for the given candidate.
|
|
pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
|
|
let safe_name = sanitize_path_component(&candidate.name)?;
|
|
let skill_dir = self.output_dir.join(&safe_name);
|
|
fs::create_dir_all(&skill_dir)
|
|
.with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
|
|
|
|
let toml_path = skill_dir.join("SKILL.toml");
|
|
let md_path = skill_dir.join("SKILL.md");
|
|
|
|
let toml_content = self.generate_toml(candidate);
|
|
let md_content = self.generate_md(candidate);
|
|
|
|
fs::write(&toml_path, &toml_content)
|
|
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
|
|
fs::write(&md_path, &md_content)
|
|
.with_context(|| format!("Failed to write {}", md_path.display()))?;
|
|
|
|
info!(
|
|
skill = candidate.name.as_str(),
|
|
path = %skill_dir.display(),
|
|
"Integrated skill"
|
|
);
|
|
|
|
Ok(skill_dir)
|
|
}
|
|
|
|
// -- Generators ---------------------------------------------------------
|
|
|
|
fn generate_toml(&self, c: &ScoutResult) -> String {
|
|
let lang = c.language.as_deref().unwrap_or("unknown");
|
|
let updated = c
|
|
.updated_at
|
|
.map(|d| d.format("%Y-%m-%d").to_string())
|
|
.unwrap_or_else(|| "unknown".into());
|
|
|
|
format!(
|
|
r#"# Auto-generated by SkillForge on {now}
|
|
|
|
[skill]
|
|
name = "{name}"
|
|
version = "0.1.0"
|
|
description = "{description}"
|
|
source = "{url}"
|
|
owner = "{owner}"
|
|
language = "{lang}"
|
|
license = {license}
|
|
stars = {stars}
|
|
updated_at = "{updated}"
|
|
|
|
[skill.requirements]
|
|
runtime = "zeroclaw >= 0.1"
|
|
|
|
[skill.metadata]
|
|
auto_integrated = true
|
|
forge_timestamp = "{now}"
|
|
"#,
|
|
now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
|
|
name = escape_toml(&c.name),
|
|
description = escape_toml(&c.description),
|
|
url = escape_toml(&c.url),
|
|
owner = escape_toml(&c.owner),
|
|
lang = lang,
|
|
license = if c.has_license { "true" } else { "false" },
|
|
stars = c.stars,
|
|
updated = updated,
|
|
)
|
|
}
|
|
|
|
fn generate_md(&self, c: &ScoutResult) -> String {
|
|
let lang = c.language.as_deref().unwrap_or("unknown");
|
|
format!(
|
|
r#"# {name}
|
|
|
|
> Auto-generated by SkillForge
|
|
|
|
## Overview
|
|
|
|
- **Source**: [{url}]({url})
|
|
- **Owner**: {owner}
|
|
- **Language**: {lang}
|
|
- **Stars**: {stars}
|
|
- **License**: {license}
|
|
|
|
## Description
|
|
|
|
{description}
|
|
|
|
## Usage
|
|
|
|
```toml
|
|
# Add to your ZeroClaw config:
|
|
[skills.{name}]
|
|
enabled = true
|
|
```
|
|
|
|
## Notes
|
|
|
|
This manifest was auto-generated from repository metadata.
|
|
Review before enabling in production.
|
|
"#,
|
|
name = c.name,
|
|
url = c.url,
|
|
owner = c.owner,
|
|
lang = lang,
|
|
stars = c.stars,
|
|
license = if c.has_license { "yes" } else { "unknown" },
|
|
description = c.description,
|
|
)
|
|
}
|
|
}
|
|
|
|
/// Escape special characters for TOML basic string values.
|
|
fn escape_toml(s: &str) -> String {
|
|
s.replace('\\', "\\\\")
|
|
.replace('"', "\\\"")
|
|
.replace('\n', "\\n")
|
|
.replace('\r', "\\r")
|
|
.replace('\t', "\\t")
|
|
.replace('\u{08}', "\\b")
|
|
.replace('\u{0C}', "\\f")
|
|
}
|
|
|
|
/// Sanitize a string for use as a single path component.
|
|
/// Rejects empty names, "..", and names containing path separators or NUL.
|
|
fn sanitize_path_component(name: &str) -> Result<String> {
|
|
let trimmed = name.trim().trim_matches('.');
|
|
if trimmed.is_empty() {
|
|
bail!("Skill name is empty or only dots after sanitization");
|
|
}
|
|
let sanitized: String = trimmed
|
|
.chars()
|
|
.map(|c| match c {
|
|
'/' | '\\' | '\0' => '_',
|
|
_ => c,
|
|
})
|
|
.collect();
|
|
if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
|
|
bail!("Skill name '{}' is unsafe as a path component", name);
|
|
}
|
|
Ok(sanitized)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Tests
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::skillforge::scout::{ScoutResult, ScoutSource};
|
|
use std::fs;
|
|
|
|
fn sample_candidate() -> ScoutResult {
|
|
ScoutResult {
|
|
name: "test-skill".into(),
|
|
url: "https://github.com/user/test-skill".into(),
|
|
description: "A test skill for unit tests".into(),
|
|
stars: 42,
|
|
language: Some("Rust".into()),
|
|
updated_at: Some(Utc::now()),
|
|
source: ScoutSource::GitHub,
|
|
owner: "user".into(),
|
|
has_license: true,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn integrate_creates_files() {
|
|
let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
|
|
let _ = fs::remove_dir_all(&tmp);
|
|
|
|
let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
|
|
let c = sample_candidate();
|
|
let path = integrator.integrate(&c).unwrap();
|
|
|
|
assert!(path.join("SKILL.toml").exists());
|
|
assert!(path.join("SKILL.md").exists());
|
|
|
|
let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
|
|
assert!(toml.contains("name = \"test-skill\""));
|
|
assert!(toml.contains("stars = 42"));
|
|
|
|
let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
|
|
assert!(md.contains("# test-skill"));
|
|
assert!(md.contains("A test skill for unit tests"));
|
|
|
|
let _ = fs::remove_dir_all(&tmp);
|
|
}
|
|
|
|
#[test]
|
|
fn escape_toml_handles_quotes_and_control_chars() {
|
|
assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
|
|
assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
|
|
assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
|
|
assert_eq!(escape_toml("tab\there"), "tab\\there");
|
|
assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
|
|
}
|
|
|
|
#[test]
|
|
fn sanitize_rejects_traversal() {
|
|
assert!(sanitize_path_component("..").is_err());
|
|
assert!(sanitize_path_component("...").is_err());
|
|
assert!(sanitize_path_component("").is_err());
|
|
assert!(sanitize_path_component(" ").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn sanitize_replaces_separators() {
|
|
let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
|
|
assert!(!s.contains('/'));
|
|
assert!(!s.contains('\\'));
|
|
assert!(!s.contains('\0'));
|
|
assert_eq!(s, "foo_bar_baz_qux");
|
|
}
|
|
|
|
#[test]
|
|
fn sanitize_trims_dots() {
|
|
let s = sanitize_path_component(".hidden.").unwrap();
|
|
assert_eq!(s, "hidden");
|
|
}
|
|
}
|