feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)
* feat: add SkillForge — automated skill discovery, evaluation, and integration engine SkillForge adds a 3-stage pipeline for autonomous skill management: - Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace) - Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35) - Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip. Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow). Includes unit tests for all modules. * fix: address code review feedback on SkillForge PR #115 - evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive) - evaluate: guard against future timestamps in recency bonus - integrate: escape URLs in TOML output via escape_toml() - integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml() - mod: redact github_token in Debug impl to prevent log leakage - mod: fix auto_integrated count when auto_integrate=false - mod: per-candidate error handling (single failure no longer aborts pipeline) - scout: add 30s request timeout, remove unused token field - deps: enable chrono serde feature for DateTime serialization - tests: add hackathon/exact-hack tests, update escape_toml test coverage * fix: address round-2 CodeRabbit review feedback - integrate: add sanitize_path_component() to prevent directory traversal - mod: GitHub scout failure now logs warning and continues (no pipeline abort) - scout: network/parse errors per-query use warn+continue instead of ? - scout: implement std::str::FromStr for ScoutSource (replaces custom from_str) - tests: add path sanitization tests (traversal, separators, dot trimming) --------- Co-authored-by: stawky <stakeswky@gmail.com>
This commit is contained in:
parent
2ac571f406
commit
35b63d6b12
7 changed files with 1098 additions and 1 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -297,6 +297,7 @@ checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"iana-time-zone",
|
"iana-time-zone",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
"serde",
|
||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ async-trait = "0.1"
|
||||||
|
|
||||||
# Memory / persistence
|
# Memory / persistence
|
||||||
rusqlite = { version = "0.32", features = ["bundled"] }
|
rusqlite = { version = "0.32", features = ["bundled"] }
|
||||||
chrono = { version = "0.4", default-features = false, features = ["clock", "std"] }
|
chrono = { version = "0.4", default-features = false, features = ["clock", "std", "serde"] }
|
||||||
cron = "0.12"
|
cron = "0.12"
|
||||||
|
|
||||||
# Interactive CLI prompts
|
# Interactive CLI prompts
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ mod providers;
|
||||||
mod runtime;
|
mod runtime;
|
||||||
mod security;
|
mod security;
|
||||||
mod service;
|
mod service;
|
||||||
|
mod skillforge;
|
||||||
mod skills;
|
mod skills;
|
||||||
mod tools;
|
mod tools;
|
||||||
mod tunnel;
|
mod tunnel;
|
||||||
|
|
|
||||||
261
src/skillforge/evaluate.rs
Normal file
261
src/skillforge/evaluate.rs
Normal file
|
|
@ -0,0 +1,261 @@
|
||||||
|
//! Evaluator — scores discovered skill candidates across multiple dimensions.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::scout::ScoutResult;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Scoring dimensions
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Scores {
|
||||||
|
/// OS / arch / runtime compatibility (0.0–1.0).
|
||||||
|
pub compatibility: f64,
|
||||||
|
/// Code quality signals: stars, tests, docs (0.0–1.0).
|
||||||
|
pub quality: f64,
|
||||||
|
/// Security posture: license, known-bad patterns (0.0–1.0).
|
||||||
|
pub security: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Scores {
|
||||||
|
/// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35.
|
||||||
|
pub fn total(&self) -> f64 {
|
||||||
|
self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Recommendation
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
pub enum Recommendation {
|
||||||
|
/// Score >= threshold → safe to auto-integrate.
|
||||||
|
Auto,
|
||||||
|
/// Score in [0.4, threshold) → needs human review.
|
||||||
|
Manual,
|
||||||
|
/// Score < 0.4 → skip entirely.
|
||||||
|
Skip,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// EvalResult
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct EvalResult {
|
||||||
|
pub candidate: ScoutResult,
|
||||||
|
pub scores: Scores,
|
||||||
|
pub total_score: f64,
|
||||||
|
pub recommendation: Recommendation,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Evaluator
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub struct Evaluator {
|
||||||
|
/// Minimum total score for auto-integration.
|
||||||
|
min_score: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Known-bad patterns in repo names / descriptions (matched as whole words).
|
||||||
|
const BAD_PATTERNS: &[&str] = &[
|
||||||
|
"malware",
|
||||||
|
"exploit",
|
||||||
|
"hack",
|
||||||
|
"crack",
|
||||||
|
"keygen",
|
||||||
|
"ransomware",
|
||||||
|
"trojan",
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars).
|
||||||
|
fn contains_word(haystack: &str, word: &str) -> bool {
|
||||||
|
for (i, _) in haystack.match_indices(word) {
|
||||||
|
let before_ok = i == 0
|
||||||
|
|| !haystack.as_bytes()[i - 1].is_ascii_alphanumeric();
|
||||||
|
let after = i + word.len();
|
||||||
|
let after_ok = after >= haystack.len()
|
||||||
|
|| !haystack.as_bytes()[after].is_ascii_alphanumeric();
|
||||||
|
if before_ok && after_ok {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Evaluator {
|
||||||
|
pub fn new(min_score: f64) -> Self {
|
||||||
|
Self { min_score }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult {
|
||||||
|
let compatibility = self.score_compatibility(&candidate);
|
||||||
|
let quality = self.score_quality(&candidate);
|
||||||
|
let security = self.score_security(&candidate);
|
||||||
|
|
||||||
|
let scores = Scores {
|
||||||
|
compatibility,
|
||||||
|
quality,
|
||||||
|
security,
|
||||||
|
};
|
||||||
|
let total_score = scores.total();
|
||||||
|
|
||||||
|
let recommendation = if total_score >= self.min_score {
|
||||||
|
Recommendation::Auto
|
||||||
|
} else if total_score >= 0.4 {
|
||||||
|
Recommendation::Manual
|
||||||
|
} else {
|
||||||
|
Recommendation::Skip
|
||||||
|
};
|
||||||
|
|
||||||
|
EvalResult {
|
||||||
|
candidate,
|
||||||
|
scores,
|
||||||
|
total_score,
|
||||||
|
recommendation,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Dimension scorers --------------------------------------------------
|
||||||
|
|
||||||
|
/// Compatibility: favour Rust repos; penalise unknown languages.
|
||||||
|
fn score_compatibility(&self, c: &ScoutResult) -> f64 {
|
||||||
|
match c.language.as_deref() {
|
||||||
|
Some("Rust") => 1.0,
|
||||||
|
Some("Python" | "TypeScript" | "JavaScript") => 0.6,
|
||||||
|
Some(_) => 0.3,
|
||||||
|
None => 0.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Quality: based on star count (log scale, capped at 1.0).
|
||||||
|
fn score_quality(&self, c: &ScoutResult) -> f64 {
|
||||||
|
// log2(stars + 1) / 10, capped at 1.0
|
||||||
|
let raw = ((c.stars as f64) + 1.0).log2() / 10.0;
|
||||||
|
raw.min(1.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Security: license presence + bad-pattern check.
|
||||||
|
fn score_security(&self, c: &ScoutResult) -> f64 {
|
||||||
|
let mut score: f64 = 0.5;
|
||||||
|
|
||||||
|
// License bonus
|
||||||
|
if c.has_license {
|
||||||
|
score += 0.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bad-pattern penalty (whole-word match)
|
||||||
|
let lower_name = c.name.to_lowercase();
|
||||||
|
let lower_desc = c.description.to_lowercase();
|
||||||
|
for pat in BAD_PATTERNS {
|
||||||
|
if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) {
|
||||||
|
score -= 0.5;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recency bonus: updated within last 180 days (guard against future timestamps)
|
||||||
|
if let Some(updated) = c.updated_at {
|
||||||
|
let age_days = (chrono::Utc::now() - updated).num_days();
|
||||||
|
if (0..180).contains(&age_days) {
|
||||||
|
score += 0.2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
score.clamp(0.0, 1.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::skillforge::scout::{ScoutResult, ScoutSource};
|
||||||
|
|
||||||
|
fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult {
|
||||||
|
ScoutResult {
|
||||||
|
name: "test-skill".into(),
|
||||||
|
url: "https://github.com/test/test-skill".into(),
|
||||||
|
description: "A test skill".into(),
|
||||||
|
stars,
|
||||||
|
language: lang.map(String::from),
|
||||||
|
updated_at: Some(chrono::Utc::now()),
|
||||||
|
source: ScoutSource::GitHub,
|
||||||
|
owner: "test".into(),
|
||||||
|
has_license,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn high_quality_rust_repo_gets_auto() {
|
||||||
|
let eval = Evaluator::new(0.7);
|
||||||
|
let c = make_candidate(500, Some("Rust"), true);
|
||||||
|
let res = eval.evaluate(c);
|
||||||
|
assert!(res.total_score >= 0.7, "score: {}", res.total_score);
|
||||||
|
assert_eq!(res.recommendation, Recommendation::Auto);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn low_star_no_license_gets_manual_or_skip() {
|
||||||
|
let eval = Evaluator::new(0.7);
|
||||||
|
let c = make_candidate(1, None, false);
|
||||||
|
let res = eval.evaluate(c);
|
||||||
|
assert!(res.total_score < 0.7, "score: {}", res.total_score);
|
||||||
|
assert_ne!(res.recommendation, Recommendation::Auto);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bad_pattern_tanks_security() {
|
||||||
|
let eval = Evaluator::new(0.7);
|
||||||
|
let mut c = make_candidate(1000, Some("Rust"), true);
|
||||||
|
c.name = "malware-skill".into();
|
||||||
|
let res = eval.evaluate(c);
|
||||||
|
// 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5
|
||||||
|
assert!(res.scores.security <= 0.5, "security: {}", res.scores.security);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn scores_total_weighted() {
|
||||||
|
let s = Scores {
|
||||||
|
compatibility: 1.0,
|
||||||
|
quality: 1.0,
|
||||||
|
security: 1.0,
|
||||||
|
};
|
||||||
|
assert!((s.total() - 1.0).abs() < f64::EPSILON);
|
||||||
|
|
||||||
|
let s2 = Scores {
|
||||||
|
compatibility: 0.0,
|
||||||
|
quality: 0.0,
|
||||||
|
security: 0.0,
|
||||||
|
};
|
||||||
|
assert!((s2.total()).abs() < f64::EPSILON);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hackathon_not_flagged_as_bad() {
|
||||||
|
let eval = Evaluator::new(0.7);
|
||||||
|
let mut c = make_candidate(500, Some("Rust"), true);
|
||||||
|
c.name = "hackathon-tools".into();
|
||||||
|
c.description = "Tools for hackathons and lifehacks".into();
|
||||||
|
let res = eval.evaluate(c);
|
||||||
|
// "hack" should NOT match "hackathon" or "lifehacks"
|
||||||
|
assert!(res.scores.security >= 0.5, "security: {}", res.scores.security);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exact_hack_is_flagged() {
|
||||||
|
let eval = Evaluator::new(0.7);
|
||||||
|
let mut c = make_candidate(500, Some("Rust"), false);
|
||||||
|
c.name = "hack-tool".into();
|
||||||
|
c.updated_at = None;
|
||||||
|
let res = eval.evaluate(c);
|
||||||
|
// 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0
|
||||||
|
assert!(res.scores.security < 0.5, "security: {}", res.scores.security);
|
||||||
|
}
|
||||||
|
}
|
||||||
248
src/skillforge/integrate.rs
Normal file
248
src/skillforge/integrate.rs
Normal file
|
|
@ -0,0 +1,248 @@
|
||||||
|
//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
|
||||||
|
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use anyhow::{bail, Context, Result};
|
||||||
|
use chrono::Utc;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use super::scout::ScoutResult;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Integrator
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub struct Integrator {
|
||||||
|
output_dir: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Integrator {
|
||||||
|
pub fn new(output_dir: String) -> Self {
|
||||||
|
Self {
|
||||||
|
output_dir: PathBuf::from(output_dir),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write SKILL.toml and SKILL.md for the given candidate.
|
||||||
|
pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
|
||||||
|
let safe_name = sanitize_path_component(&candidate.name)?;
|
||||||
|
let skill_dir = self.output_dir.join(&safe_name);
|
||||||
|
fs::create_dir_all(&skill_dir)
|
||||||
|
.with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
|
||||||
|
|
||||||
|
let toml_path = skill_dir.join("SKILL.toml");
|
||||||
|
let md_path = skill_dir.join("SKILL.md");
|
||||||
|
|
||||||
|
let toml_content = self.generate_toml(candidate);
|
||||||
|
let md_content = self.generate_md(candidate);
|
||||||
|
|
||||||
|
fs::write(&toml_path, &toml_content)
|
||||||
|
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
|
||||||
|
fs::write(&md_path, &md_content)
|
||||||
|
.with_context(|| format!("Failed to write {}", md_path.display()))?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
skill = candidate.name.as_str(),
|
||||||
|
path = %skill_dir.display(),
|
||||||
|
"Integrated skill"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(skill_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Generators ---------------------------------------------------------
|
||||||
|
|
||||||
|
fn generate_toml(&self, c: &ScoutResult) -> String {
|
||||||
|
let lang = c.language.as_deref().unwrap_or("unknown");
|
||||||
|
let updated = c
|
||||||
|
.updated_at
|
||||||
|
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".into());
|
||||||
|
|
||||||
|
format!(
|
||||||
|
r#"# Auto-generated by SkillForge on {now}
|
||||||
|
|
||||||
|
[skill]
|
||||||
|
name = "{name}"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "{description}"
|
||||||
|
source = "{url}"
|
||||||
|
owner = "{owner}"
|
||||||
|
language = "{lang}"
|
||||||
|
license = {license}
|
||||||
|
stars = {stars}
|
||||||
|
updated_at = "{updated}"
|
||||||
|
|
||||||
|
[skill.requirements]
|
||||||
|
runtime = "zeroclaw >= 0.1"
|
||||||
|
|
||||||
|
[skill.metadata]
|
||||||
|
auto_integrated = true
|
||||||
|
forge_timestamp = "{now}"
|
||||||
|
"#,
|
||||||
|
now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
|
||||||
|
name = escape_toml(&c.name),
|
||||||
|
description = escape_toml(&c.description),
|
||||||
|
url = escape_toml(&c.url),
|
||||||
|
owner = escape_toml(&c.owner),
|
||||||
|
lang = lang,
|
||||||
|
license = if c.has_license { "true" } else { "false" },
|
||||||
|
stars = c.stars,
|
||||||
|
updated = updated,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_md(&self, c: &ScoutResult) -> String {
|
||||||
|
let lang = c.language.as_deref().unwrap_or("unknown");
|
||||||
|
format!(
|
||||||
|
r#"# {name}
|
||||||
|
|
||||||
|
> Auto-generated by SkillForge
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
- **Source**: [{url}]({url})
|
||||||
|
- **Owner**: {owner}
|
||||||
|
- **Language**: {lang}
|
||||||
|
- **Stars**: {stars}
|
||||||
|
- **License**: {license}
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
{description}
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```toml
|
||||||
|
# Add to your ZeroClaw config:
|
||||||
|
[skills.{name}]
|
||||||
|
enabled = true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
This manifest was auto-generated from repository metadata.
|
||||||
|
Review before enabling in production.
|
||||||
|
"#,
|
||||||
|
name = c.name,
|
||||||
|
url = c.url,
|
||||||
|
owner = c.owner,
|
||||||
|
lang = lang,
|
||||||
|
stars = c.stars,
|
||||||
|
license = if c.has_license { "yes" } else { "unknown" },
|
||||||
|
description = c.description,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escape special characters for TOML basic string values.
|
||||||
|
fn escape_toml(s: &str) -> String {
|
||||||
|
s.replace('\\', "\\\\")
|
||||||
|
.replace('"', "\\\"")
|
||||||
|
.replace('\n', "\\n")
|
||||||
|
.replace('\r', "\\r")
|
||||||
|
.replace('\t', "\\t")
|
||||||
|
.replace('\u{08}', "\\b")
|
||||||
|
.replace('\u{0C}', "\\f")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sanitize a string for use as a single path component.
|
||||||
|
/// Rejects empty names, "..", and names containing path separators or NUL.
|
||||||
|
fn sanitize_path_component(name: &str) -> Result<String> {
|
||||||
|
let trimmed = name.trim().trim_matches('.');
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
bail!("Skill name is empty or only dots after sanitization");
|
||||||
|
}
|
||||||
|
let sanitized: String = trimmed
|
||||||
|
.chars()
|
||||||
|
.map(|c| match c {
|
||||||
|
'/' | '\\' | '\0' => '_',
|
||||||
|
_ => c,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
|
||||||
|
bail!("Skill name '{}' is unsafe as a path component", name);
|
||||||
|
}
|
||||||
|
Ok(sanitized)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::skillforge::scout::{ScoutResult, ScoutSource};
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
fn sample_candidate() -> ScoutResult {
|
||||||
|
ScoutResult {
|
||||||
|
name: "test-skill".into(),
|
||||||
|
url: "https://github.com/user/test-skill".into(),
|
||||||
|
description: "A test skill for unit tests".into(),
|
||||||
|
stars: 42,
|
||||||
|
language: Some("Rust".into()),
|
||||||
|
updated_at: Some(Utc::now()),
|
||||||
|
source: ScoutSource::GitHub,
|
||||||
|
owner: "user".into(),
|
||||||
|
has_license: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn integrate_creates_files() {
|
||||||
|
let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
|
||||||
|
let _ = fs::remove_dir_all(&tmp);
|
||||||
|
|
||||||
|
let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
|
||||||
|
let c = sample_candidate();
|
||||||
|
let path = integrator.integrate(&c).unwrap();
|
||||||
|
|
||||||
|
assert!(path.join("SKILL.toml").exists());
|
||||||
|
assert!(path.join("SKILL.md").exists());
|
||||||
|
|
||||||
|
let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
|
||||||
|
assert!(toml.contains("name = \"test-skill\""));
|
||||||
|
assert!(toml.contains("stars = 42"));
|
||||||
|
|
||||||
|
let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
|
||||||
|
assert!(md.contains("# test-skill"));
|
||||||
|
assert!(md.contains("A test skill for unit tests"));
|
||||||
|
|
||||||
|
let _ = fs::remove_dir_all(&tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escape_toml_handles_quotes_and_control_chars() {
|
||||||
|
assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
|
||||||
|
assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
|
||||||
|
assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
|
||||||
|
assert_eq!(escape_toml("tab\there"), "tab\\there");
|
||||||
|
assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sanitize_rejects_traversal() {
|
||||||
|
assert!(sanitize_path_component("..").is_err());
|
||||||
|
assert!(sanitize_path_component("...").is_err());
|
||||||
|
assert!(sanitize_path_component("").is_err());
|
||||||
|
assert!(sanitize_path_component(" ").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sanitize_replaces_separators() {
|
||||||
|
let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
|
||||||
|
assert!(!s.contains('/'));
|
||||||
|
assert!(!s.contains('\\'));
|
||||||
|
assert!(!s.contains('\0'));
|
||||||
|
assert_eq!(s, "foo_bar_baz_qux");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sanitize_trims_dots() {
|
||||||
|
let s = sanitize_path_component(".hidden.").unwrap();
|
||||||
|
assert_eq!(s, "hidden");
|
||||||
|
}
|
||||||
|
}
|
||||||
255
src/skillforge/mod.rs
Normal file
255
src/skillforge/mod.rs
Normal file
|
|
@ -0,0 +1,255 @@
|
||||||
|
//! SkillForge — Skill auto-discovery, evaluation, and integration engine.
|
||||||
|
//!
|
||||||
|
//! Pipeline: Scout → Evaluate → Integrate
|
||||||
|
//! Discovers skills from external sources, scores them, and generates
|
||||||
|
//! ZeroClaw-compatible manifests for qualified candidates.
|
||||||
|
|
||||||
|
pub mod evaluate;
|
||||||
|
pub mod integrate;
|
||||||
|
pub mod scout;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use self::evaluate::{EvalResult, Evaluator, Recommendation};
|
||||||
|
use self::integrate::Integrator;
|
||||||
|
use self::scout::{GitHubScout, Scout, ScoutResult, ScoutSource};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Configuration
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SkillForgeConfig {
|
||||||
|
#[serde(default)]
|
||||||
|
pub enabled: bool,
|
||||||
|
#[serde(default = "default_auto_integrate")]
|
||||||
|
pub auto_integrate: bool,
|
||||||
|
#[serde(default = "default_sources")]
|
||||||
|
pub sources: Vec<String>,
|
||||||
|
#[serde(default = "default_scan_interval")]
|
||||||
|
pub scan_interval_hours: u64,
|
||||||
|
#[serde(default = "default_min_score")]
|
||||||
|
pub min_score: f64,
|
||||||
|
/// Optional GitHub personal-access token for higher rate limits.
|
||||||
|
#[serde(default)]
|
||||||
|
pub github_token: Option<String>,
|
||||||
|
/// Directory where integrated skills are written.
|
||||||
|
#[serde(default = "default_output_dir")]
|
||||||
|
pub output_dir: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_auto_integrate() -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
fn default_sources() -> Vec<String> {
|
||||||
|
vec!["github".into(), "clawhub".into()]
|
||||||
|
}
|
||||||
|
fn default_scan_interval() -> u64 {
|
||||||
|
24
|
||||||
|
}
|
||||||
|
fn default_min_score() -> f64 {
|
||||||
|
0.7
|
||||||
|
}
|
||||||
|
fn default_output_dir() -> String {
|
||||||
|
"./skills".into()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SkillForgeConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: false,
|
||||||
|
auto_integrate: default_auto_integrate(),
|
||||||
|
sources: default_sources(),
|
||||||
|
scan_interval_hours: default_scan_interval(),
|
||||||
|
min_score: default_min_score(),
|
||||||
|
github_token: None,
|
||||||
|
output_dir: default_output_dir(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for SkillForgeConfig {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("SkillForgeConfig")
|
||||||
|
.field("enabled", &self.enabled)
|
||||||
|
.field("auto_integrate", &self.auto_integrate)
|
||||||
|
.field("sources", &self.sources)
|
||||||
|
.field("scan_interval_hours", &self.scan_interval_hours)
|
||||||
|
.field("min_score", &self.min_score)
|
||||||
|
.field(
|
||||||
|
"github_token",
|
||||||
|
&self.github_token.as_ref().map(|_| "***"),
|
||||||
|
)
|
||||||
|
.field("output_dir", &self.output_dir)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// ForgeReport — summary of a single pipeline run
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ForgeReport {
|
||||||
|
pub discovered: usize,
|
||||||
|
pub evaluated: usize,
|
||||||
|
pub auto_integrated: usize,
|
||||||
|
pub manual_review: usize,
|
||||||
|
pub skipped: usize,
|
||||||
|
pub results: Vec<EvalResult>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// SkillForge
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub struct SkillForge {
|
||||||
|
config: SkillForgeConfig,
|
||||||
|
evaluator: Evaluator,
|
||||||
|
integrator: Integrator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SkillForge {
|
||||||
|
pub fn new(config: SkillForgeConfig) -> Self {
|
||||||
|
let evaluator = Evaluator::new(config.min_score);
|
||||||
|
let integrator = Integrator::new(config.output_dir.clone());
|
||||||
|
Self {
|
||||||
|
config,
|
||||||
|
evaluator,
|
||||||
|
integrator,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the full pipeline: Scout → Evaluate → Integrate.
|
||||||
|
pub async fn forge(&self) -> Result<ForgeReport> {
|
||||||
|
if !self.config.enabled {
|
||||||
|
warn!("SkillForge is disabled — skipping");
|
||||||
|
return Ok(ForgeReport {
|
||||||
|
discovered: 0,
|
||||||
|
evaluated: 0,
|
||||||
|
auto_integrated: 0,
|
||||||
|
manual_review: 0,
|
||||||
|
skipped: 0,
|
||||||
|
results: vec![],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Scout ----------------------------------------------------------
|
||||||
|
let mut candidates: Vec<ScoutResult> = Vec::new();
|
||||||
|
|
||||||
|
for src in &self.config.sources {
|
||||||
|
let source: ScoutSource = src.parse().unwrap(); // Infallible
|
||||||
|
match source {
|
||||||
|
ScoutSource::GitHub => {
|
||||||
|
let scout = GitHubScout::new(self.config.github_token.clone());
|
||||||
|
match scout.discover().await {
|
||||||
|
Ok(mut found) => {
|
||||||
|
info!(count = found.len(), "GitHub scout returned candidates");
|
||||||
|
candidates.append(&mut found);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(error = %e, "GitHub scout failed, continuing with other sources");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ScoutSource::ClawHub | ScoutSource::HuggingFace => {
|
||||||
|
info!(source = src.as_str(), "Source not yet implemented — skipping");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deduplicate by URL
|
||||||
|
scout::dedup(&mut candidates);
|
||||||
|
let discovered = candidates.len();
|
||||||
|
info!(discovered, "Total unique candidates after dedup");
|
||||||
|
|
||||||
|
// --- Evaluate -------------------------------------------------------
|
||||||
|
let results: Vec<EvalResult> = candidates
|
||||||
|
.into_iter()
|
||||||
|
.map(|c| self.evaluator.evaluate(c))
|
||||||
|
.collect();
|
||||||
|
let evaluated = results.len();
|
||||||
|
|
||||||
|
// --- Integrate ------------------------------------------------------
|
||||||
|
let mut auto_integrated = 0usize;
|
||||||
|
let mut manual_review = 0usize;
|
||||||
|
let mut skipped = 0usize;
|
||||||
|
|
||||||
|
for res in &results {
|
||||||
|
match res.recommendation {
|
||||||
|
Recommendation::Auto => {
|
||||||
|
if self.config.auto_integrate {
|
||||||
|
match self.integrator.integrate(&res.candidate) {
|
||||||
|
Ok(_) => {
|
||||||
|
auto_integrated += 1;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
skill = res.candidate.name.as_str(),
|
||||||
|
error = %e,
|
||||||
|
"Integration failed for candidate, continuing"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Count as would-be auto but not actually integrated
|
||||||
|
manual_review += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Recommendation::Manual => {
|
||||||
|
manual_review += 1;
|
||||||
|
}
|
||||||
|
Recommendation::Skip => {
|
||||||
|
skipped += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
auto_integrated,
|
||||||
|
manual_review, skipped, "Forge pipeline complete"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(ForgeReport {
|
||||||
|
discovered,
|
||||||
|
evaluated,
|
||||||
|
auto_integrated,
|
||||||
|
manual_review,
|
||||||
|
skipped,
|
||||||
|
results,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn disabled_forge_returns_empty_report() {
|
||||||
|
let cfg = SkillForgeConfig {
|
||||||
|
enabled: false,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let forge = SkillForge::new(cfg);
|
||||||
|
let report = forge.forge().await.unwrap();
|
||||||
|
assert_eq!(report.discovered, 0);
|
||||||
|
assert_eq!(report.auto_integrated, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn default_config_values() {
|
||||||
|
let cfg = SkillForgeConfig::default();
|
||||||
|
assert!(!cfg.enabled);
|
||||||
|
assert!(cfg.auto_integrate);
|
||||||
|
assert_eq!(cfg.scan_interval_hours, 24);
|
||||||
|
assert!((cfg.min_score - 0.7).abs() < f64::EPSILON);
|
||||||
|
assert_eq!(cfg.sources, vec!["github", "clawhub"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
331
src/skillforge/scout.rs
Normal file
331
src/skillforge/scout.rs
Normal file
|
|
@ -0,0 +1,331 @@
|
||||||
|
//! Scout — skill discovery from external sources.
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// ScoutSource
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
pub enum ScoutSource {
|
||||||
|
GitHub,
|
||||||
|
ClawHub,
|
||||||
|
HuggingFace,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::str::FromStr for ScoutSource {
|
||||||
|
type Err = std::convert::Infallible;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||||
|
Ok(match s.to_lowercase().as_str() {
|
||||||
|
"github" => Self::GitHub,
|
||||||
|
"clawhub" => Self::ClawHub,
|
||||||
|
"huggingface" | "hf" => Self::HuggingFace,
|
||||||
|
_ => {
|
||||||
|
warn!(source = s, "Unknown scout source, defaulting to GitHub");
|
||||||
|
Self::GitHub
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// ScoutResult
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ScoutResult {
|
||||||
|
pub name: String,
|
||||||
|
pub url: String,
|
||||||
|
pub description: String,
|
||||||
|
pub stars: u64,
|
||||||
|
pub language: Option<String>,
|
||||||
|
pub updated_at: Option<DateTime<Utc>>,
|
||||||
|
pub source: ScoutSource,
|
||||||
|
/// Owner / org extracted from the URL or API response.
|
||||||
|
pub owner: String,
|
||||||
|
/// Whether the repo has a license file.
|
||||||
|
pub has_license: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Scout trait
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Scout: Send + Sync {
|
||||||
|
/// Discover candidate skills from the source.
|
||||||
|
async fn discover(&self) -> Result<Vec<ScoutResult>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// GitHubScout
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Searches GitHub for repos matching skill-related queries.
|
||||||
|
pub struct GitHubScout {
|
||||||
|
client: reqwest::Client,
|
||||||
|
queries: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GitHubScout {
|
||||||
|
pub fn new(token: Option<String>) -> Self {
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
let mut headers = reqwest::header::HeaderMap::new();
|
||||||
|
headers.insert(
|
||||||
|
reqwest::header::ACCEPT,
|
||||||
|
"application/vnd.github+json"
|
||||||
|
.parse()
|
||||||
|
.expect("valid header"),
|
||||||
|
);
|
||||||
|
headers.insert(
|
||||||
|
reqwest::header::USER_AGENT,
|
||||||
|
"ZeroClaw-SkillForge/0.1".parse().expect("valid header"),
|
||||||
|
);
|
||||||
|
if let Some(ref t) = token {
|
||||||
|
if let Ok(val) = format!("Bearer {t}").parse() {
|
||||||
|
headers.insert(reqwest::header::AUTHORIZATION, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = reqwest::Client::builder()
|
||||||
|
.default_headers(headers)
|
||||||
|
.timeout(Duration::from_secs(30))
|
||||||
|
.build()
|
||||||
|
.expect("failed to build reqwest client");
|
||||||
|
|
||||||
|
Self {
|
||||||
|
client,
|
||||||
|
queries: vec![
|
||||||
|
"zeroclaw skill".into(),
|
||||||
|
"ai agent skill".into(),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the GitHub search/repositories JSON response.
|
||||||
|
fn parse_items(body: &serde_json::Value) -> Vec<ScoutResult> {
|
||||||
|
let items = match body.get("items").and_then(|v| v.as_array()) {
|
||||||
|
Some(arr) => arr,
|
||||||
|
None => return vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
items
|
||||||
|
.iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
let name = item.get("name")?.as_str()?.to_string();
|
||||||
|
let url = item.get("html_url")?.as_str()?.to_string();
|
||||||
|
let description = item
|
||||||
|
.get("description")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let stars = item
|
||||||
|
.get("stargazers_count")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.unwrap_or(0);
|
||||||
|
let language = item
|
||||||
|
.get("language")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from);
|
||||||
|
let updated_at = item
|
||||||
|
.get("updated_at")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
|
||||||
|
let owner = item
|
||||||
|
.get("owner")
|
||||||
|
.and_then(|o| o.get("login"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
|
.to_string();
|
||||||
|
let has_license = item
|
||||||
|
.get("license")
|
||||||
|
.map(|v| !v.is_null())
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
|
Some(ScoutResult {
|
||||||
|
name,
|
||||||
|
url,
|
||||||
|
description,
|
||||||
|
stars,
|
||||||
|
language,
|
||||||
|
updated_at,
|
||||||
|
source: ScoutSource::GitHub,
|
||||||
|
owner,
|
||||||
|
has_license,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Scout for GitHubScout {
|
||||||
|
async fn discover(&self) -> Result<Vec<ScoutResult>> {
|
||||||
|
let mut all: Vec<ScoutResult> = Vec::new();
|
||||||
|
|
||||||
|
for query in &self.queries {
|
||||||
|
let url = format!(
|
||||||
|
"https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30",
|
||||||
|
urlencoding(query)
|
||||||
|
);
|
||||||
|
debug!(query = query.as_str(), "Searching GitHub");
|
||||||
|
|
||||||
|
let resp = match self.client.get(&url).send().await {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
query = query.as_str(),
|
||||||
|
error = %e,
|
||||||
|
"GitHub API request failed, skipping query"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if !resp.status().is_success() {
|
||||||
|
warn!(
|
||||||
|
status = %resp.status(),
|
||||||
|
query = query.as_str(),
|
||||||
|
"GitHub search returned non-200"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let body: serde_json::Value = match resp.json().await {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
query = query.as_str(),
|
||||||
|
error = %e,
|
||||||
|
"Failed to parse GitHub response, skipping query"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut items = Self::parse_items(&body);
|
||||||
|
debug!(count = items.len(), query = query.as_str(), "Parsed items");
|
||||||
|
all.append(&mut items);
|
||||||
|
}
|
||||||
|
|
||||||
|
dedup(&mut all);
|
||||||
|
Ok(all)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Minimal percent-encoding for query strings (space → +).
|
||||||
|
fn urlencoding(s: &str) -> String {
|
||||||
|
s.replace(' ', "+")
|
||||||
|
.replace('&', "%26")
|
||||||
|
.replace('#', "%23")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deduplicate scout results by URL (keeps first occurrence).
|
||||||
|
pub fn dedup(results: &mut Vec<ScoutResult>) {
|
||||||
|
let mut seen = std::collections::HashSet::new();
|
||||||
|
results.retain(|r| seen.insert(r.url.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn scout_source_from_str() {
|
||||||
|
assert_eq!("github".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
|
||||||
|
assert_eq!("GitHub".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
|
||||||
|
assert_eq!("clawhub".parse::<ScoutSource>().unwrap(), ScoutSource::ClawHub);
|
||||||
|
assert_eq!("huggingface".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
|
||||||
|
assert_eq!("hf".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
|
||||||
|
// unknown falls back to GitHub
|
||||||
|
assert_eq!("unknown".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dedup_removes_duplicates() {
|
||||||
|
let mut results = vec![
|
||||||
|
ScoutResult {
|
||||||
|
name: "a".into(),
|
||||||
|
url: "https://github.com/x/a".into(),
|
||||||
|
description: String::new(),
|
||||||
|
stars: 10,
|
||||||
|
language: None,
|
||||||
|
updated_at: None,
|
||||||
|
source: ScoutSource::GitHub,
|
||||||
|
owner: "x".into(),
|
||||||
|
has_license: true,
|
||||||
|
},
|
||||||
|
ScoutResult {
|
||||||
|
name: "a-dup".into(),
|
||||||
|
url: "https://github.com/x/a".into(),
|
||||||
|
description: String::new(),
|
||||||
|
stars: 10,
|
||||||
|
language: None,
|
||||||
|
updated_at: None,
|
||||||
|
source: ScoutSource::GitHub,
|
||||||
|
owner: "x".into(),
|
||||||
|
has_license: true,
|
||||||
|
},
|
||||||
|
ScoutResult {
|
||||||
|
name: "b".into(),
|
||||||
|
url: "https://github.com/x/b".into(),
|
||||||
|
description: String::new(),
|
||||||
|
stars: 5,
|
||||||
|
language: None,
|
||||||
|
updated_at: None,
|
||||||
|
source: ScoutSource::GitHub,
|
||||||
|
owner: "x".into(),
|
||||||
|
has_license: false,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
dedup(&mut results);
|
||||||
|
assert_eq!(results.len(), 2);
|
||||||
|
assert_eq!(results[0].name, "a");
|
||||||
|
assert_eq!(results[1].name, "b");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_github_items() {
|
||||||
|
let json = serde_json::json!({
|
||||||
|
"total_count": 1,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"name": "cool-skill",
|
||||||
|
"html_url": "https://github.com/user/cool-skill",
|
||||||
|
"description": "A cool skill",
|
||||||
|
"stargazers_count": 42,
|
||||||
|
"language": "Rust",
|
||||||
|
"updated_at": "2026-01-15T10:00:00Z",
|
||||||
|
"owner": { "login": "user" },
|
||||||
|
"license": { "spdx_id": "MIT" }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
let items = GitHubScout::parse_items(&json);
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(items[0].name, "cool-skill");
|
||||||
|
assert_eq!(items[0].stars, 42);
|
||||||
|
assert!(items[0].has_license);
|
||||||
|
assert_eq!(items[0].owner, "user");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn urlencoding_works() {
|
||||||
|
assert_eq!(urlencoding("hello world"), "hello+world");
|
||||||
|
assert_eq!(urlencoding("a&b#c"), "a%26b%23c");
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue