feat: SkillForge — automated skill discovery, evaluation & integration engine (#144)

* feat: add SkillForge — automated skill discovery, evaluation, and integration engine

SkillForge adds a 3-stage pipeline for autonomous skill management:

- Scout: discovers candidate skills from GitHub (extensible to ClawHub, HuggingFace)
- Evaluate: scores candidates on compatibility, quality, and security (weighted 0.30/0.35/0.35)
- Integrate: generates standard SKILL.toml + SKILL.md manifests for approved candidates

Thresholds: >=0.7 auto-integrate, 0.4-0.7 manual review, <0.4 skip.
Uses only existing dependencies (reqwest, serde, tokio, tracing, chrono, anyhow).
Includes unit tests for all modules.

* fix: address code review feedback on SkillForge PR #115

- evaluate: whole-word matching for BAD_PATTERNS (fixes hackathon false positive)
- evaluate: guard against future timestamps in recency bonus
- integrate: escape URLs in TOML output via escape_toml()
- integrate: handle control chars (\n, \r, \t, \b, \f) in escape_toml()
- mod: redact github_token in Debug impl to prevent log leakage
- mod: fix auto_integrated count when auto_integrate=false
- mod: per-candidate error handling (single failure no longer aborts pipeline)
- scout: add 30s request timeout, remove unused token field
- deps: enable chrono serde feature for DateTime serialization
- tests: add hackathon/exact-hack tests, update escape_toml test coverage

* fix: address round-2 CodeRabbit review feedback

- integrate: add sanitize_path_component() to prevent directory traversal
- mod: GitHub scout failure now logs warning and continues (no pipeline abort)
- scout: network/parse errors per-query use warn+continue instead of ?
- scout: implement std::str::FromStr for ScoutSource (replaces custom from_str)
- tests: add path sanitization tests (traversal, separators, dot trimming)

---------

Co-authored-by: stawky <stakeswky@gmail.com>
This commit is contained in:
Argenis 2026-02-15 09:26:13 -05:00 committed by GitHub
parent 2ac571f406
commit 35b63d6b12
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 1098 additions and 1 deletions

1
Cargo.lock generated
View file

@ -297,6 +297,7 @@ checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
dependencies = [ dependencies = [
"iana-time-zone", "iana-time-zone",
"num-traits", "num-traits",
"serde",
"windows-link", "windows-link",
] ]

View file

@ -53,7 +53,7 @@ async-trait = "0.1"
# Memory / persistence # Memory / persistence
rusqlite = { version = "0.32", features = ["bundled"] } rusqlite = { version = "0.32", features = ["bundled"] }
chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } chrono = { version = "0.4", default-features = false, features = ["clock", "std", "serde"] }
cron = "0.12" cron = "0.12"
# Interactive CLI prompts # Interactive CLI prompts

View file

@ -31,6 +31,7 @@ mod providers;
mod runtime; mod runtime;
mod security; mod security;
mod service; mod service;
mod skillforge;
mod skills; mod skills;
mod tools; mod tools;
mod tunnel; mod tunnel;

261
src/skillforge/evaluate.rs Normal file
View file

@ -0,0 +1,261 @@
//! Evaluator — scores discovered skill candidates across multiple dimensions.
use serde::{Deserialize, Serialize};
use super::scout::ScoutResult;
// ---------------------------------------------------------------------------
// Scoring dimensions
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Scores {
/// OS / arch / runtime compatibility (0.01.0).
pub compatibility: f64,
/// Code quality signals: stars, tests, docs (0.01.0).
pub quality: f64,
/// Security posture: license, known-bad patterns (0.01.0).
pub security: f64,
}
impl Scores {
/// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35.
pub fn total(&self) -> f64 {
self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35
}
}
// ---------------------------------------------------------------------------
// Recommendation
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Recommendation {
/// Score >= threshold → safe to auto-integrate.
Auto,
/// Score in [0.4, threshold) → needs human review.
Manual,
/// Score < 0.4 → skip entirely.
Skip,
}
// ---------------------------------------------------------------------------
// EvalResult
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalResult {
pub candidate: ScoutResult,
pub scores: Scores,
pub total_score: f64,
pub recommendation: Recommendation,
}
// ---------------------------------------------------------------------------
// Evaluator
// ---------------------------------------------------------------------------
pub struct Evaluator {
/// Minimum total score for auto-integration.
min_score: f64,
}
/// Known-bad patterns in repo names / descriptions (matched as whole words).
const BAD_PATTERNS: &[&str] = &[
"malware",
"exploit",
"hack",
"crack",
"keygen",
"ransomware",
"trojan",
];
/// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars).
fn contains_word(haystack: &str, word: &str) -> bool {
for (i, _) in haystack.match_indices(word) {
let before_ok = i == 0
|| !haystack.as_bytes()[i - 1].is_ascii_alphanumeric();
let after = i + word.len();
let after_ok = after >= haystack.len()
|| !haystack.as_bytes()[after].is_ascii_alphanumeric();
if before_ok && after_ok {
return true;
}
}
false
}
impl Evaluator {
pub fn new(min_score: f64) -> Self {
Self { min_score }
}
pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult {
let compatibility = self.score_compatibility(&candidate);
let quality = self.score_quality(&candidate);
let security = self.score_security(&candidate);
let scores = Scores {
compatibility,
quality,
security,
};
let total_score = scores.total();
let recommendation = if total_score >= self.min_score {
Recommendation::Auto
} else if total_score >= 0.4 {
Recommendation::Manual
} else {
Recommendation::Skip
};
EvalResult {
candidate,
scores,
total_score,
recommendation,
}
}
// -- Dimension scorers --------------------------------------------------
/// Compatibility: favour Rust repos; penalise unknown languages.
fn score_compatibility(&self, c: &ScoutResult) -> f64 {
match c.language.as_deref() {
Some("Rust") => 1.0,
Some("Python" | "TypeScript" | "JavaScript") => 0.6,
Some(_) => 0.3,
None => 0.2,
}
}
/// Quality: based on star count (log scale, capped at 1.0).
fn score_quality(&self, c: &ScoutResult) -> f64 {
// log2(stars + 1) / 10, capped at 1.0
let raw = ((c.stars as f64) + 1.0).log2() / 10.0;
raw.min(1.0)
}
/// Security: license presence + bad-pattern check.
fn score_security(&self, c: &ScoutResult) -> f64 {
let mut score: f64 = 0.5;
// License bonus
if c.has_license {
score += 0.3;
}
// Bad-pattern penalty (whole-word match)
let lower_name = c.name.to_lowercase();
let lower_desc = c.description.to_lowercase();
for pat in BAD_PATTERNS {
if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) {
score -= 0.5;
break;
}
}
// Recency bonus: updated within last 180 days (guard against future timestamps)
if let Some(updated) = c.updated_at {
let age_days = (chrono::Utc::now() - updated).num_days();
if (0..180).contains(&age_days) {
score += 0.2;
}
}
score.clamp(0.0, 1.0)
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::skillforge::scout::{ScoutResult, ScoutSource};
fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult {
ScoutResult {
name: "test-skill".into(),
url: "https://github.com/test/test-skill".into(),
description: "A test skill".into(),
stars,
language: lang.map(String::from),
updated_at: Some(chrono::Utc::now()),
source: ScoutSource::GitHub,
owner: "test".into(),
has_license,
}
}
#[test]
fn high_quality_rust_repo_gets_auto() {
let eval = Evaluator::new(0.7);
let c = make_candidate(500, Some("Rust"), true);
let res = eval.evaluate(c);
assert!(res.total_score >= 0.7, "score: {}", res.total_score);
assert_eq!(res.recommendation, Recommendation::Auto);
}
#[test]
fn low_star_no_license_gets_manual_or_skip() {
let eval = Evaluator::new(0.7);
let c = make_candidate(1, None, false);
let res = eval.evaluate(c);
assert!(res.total_score < 0.7, "score: {}", res.total_score);
assert_ne!(res.recommendation, Recommendation::Auto);
}
#[test]
fn bad_pattern_tanks_security() {
let eval = Evaluator::new(0.7);
let mut c = make_candidate(1000, Some("Rust"), true);
c.name = "malware-skill".into();
let res = eval.evaluate(c);
// 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5
assert!(res.scores.security <= 0.5, "security: {}", res.scores.security);
}
#[test]
fn scores_total_weighted() {
let s = Scores {
compatibility: 1.0,
quality: 1.0,
security: 1.0,
};
assert!((s.total() - 1.0).abs() < f64::EPSILON);
let s2 = Scores {
compatibility: 0.0,
quality: 0.0,
security: 0.0,
};
assert!((s2.total()).abs() < f64::EPSILON);
}
#[test]
fn hackathon_not_flagged_as_bad() {
let eval = Evaluator::new(0.7);
let mut c = make_candidate(500, Some("Rust"), true);
c.name = "hackathon-tools".into();
c.description = "Tools for hackathons and lifehacks".into();
let res = eval.evaluate(c);
// "hack" should NOT match "hackathon" or "lifehacks"
assert!(res.scores.security >= 0.5, "security: {}", res.scores.security);
}
#[test]
fn exact_hack_is_flagged() {
let eval = Evaluator::new(0.7);
let mut c = make_candidate(500, Some("Rust"), false);
c.name = "hack-tool".into();
c.updated_at = None;
let res = eval.evaluate(c);
// 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0
assert!(res.scores.security < 0.5, "security: {}", res.scores.security);
}
}

248
src/skillforge/integrate.rs Normal file
View file

@ -0,0 +1,248 @@
//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results.
use std::fs;
use std::path::PathBuf;
use anyhow::{bail, Context, Result};
use chrono::Utc;
use tracing::info;
use super::scout::ScoutResult;
// ---------------------------------------------------------------------------
// Integrator
// ---------------------------------------------------------------------------
pub struct Integrator {
output_dir: PathBuf,
}
impl Integrator {
pub fn new(output_dir: String) -> Self {
Self {
output_dir: PathBuf::from(output_dir),
}
}
/// Write SKILL.toml and SKILL.md for the given candidate.
pub fn integrate(&self, candidate: &ScoutResult) -> Result<PathBuf> {
let safe_name = sanitize_path_component(&candidate.name)?;
let skill_dir = self.output_dir.join(&safe_name);
fs::create_dir_all(&skill_dir)
.with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?;
let toml_path = skill_dir.join("SKILL.toml");
let md_path = skill_dir.join("SKILL.md");
let toml_content = self.generate_toml(candidate);
let md_content = self.generate_md(candidate);
fs::write(&toml_path, &toml_content)
.with_context(|| format!("Failed to write {}", toml_path.display()))?;
fs::write(&md_path, &md_content)
.with_context(|| format!("Failed to write {}", md_path.display()))?;
info!(
skill = candidate.name.as_str(),
path = %skill_dir.display(),
"Integrated skill"
);
Ok(skill_dir)
}
// -- Generators ---------------------------------------------------------
fn generate_toml(&self, c: &ScoutResult) -> String {
let lang = c.language.as_deref().unwrap_or("unknown");
let updated = c
.updated_at
.map(|d| d.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| "unknown".into());
format!(
r#"# Auto-generated by SkillForge on {now}
[skill]
name = "{name}"
version = "0.1.0"
description = "{description}"
source = "{url}"
owner = "{owner}"
language = "{lang}"
license = {license}
stars = {stars}
updated_at = "{updated}"
[skill.requirements]
runtime = "zeroclaw >= 0.1"
[skill.metadata]
auto_integrated = true
forge_timestamp = "{now}"
"#,
now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
name = escape_toml(&c.name),
description = escape_toml(&c.description),
url = escape_toml(&c.url),
owner = escape_toml(&c.owner),
lang = lang,
license = if c.has_license { "true" } else { "false" },
stars = c.stars,
updated = updated,
)
}
fn generate_md(&self, c: &ScoutResult) -> String {
let lang = c.language.as_deref().unwrap_or("unknown");
format!(
r#"# {name}
> Auto-generated by SkillForge
## Overview
- **Source**: [{url}]({url})
- **Owner**: {owner}
- **Language**: {lang}
- **Stars**: {stars}
- **License**: {license}
## Description
{description}
## Usage
```toml
# Add to your ZeroClaw config:
[skills.{name}]
enabled = true
```
## Notes
This manifest was auto-generated from repository metadata.
Review before enabling in production.
"#,
name = c.name,
url = c.url,
owner = c.owner,
lang = lang,
stars = c.stars,
license = if c.has_license { "yes" } else { "unknown" },
description = c.description,
)
}
}
/// Escape special characters for TOML basic string values.
fn escape_toml(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t")
.replace('\u{08}', "\\b")
.replace('\u{0C}', "\\f")
}
/// Sanitize a string for use as a single path component.
/// Rejects empty names, "..", and names containing path separators or NUL.
fn sanitize_path_component(name: &str) -> Result<String> {
let trimmed = name.trim().trim_matches('.');
if trimmed.is_empty() {
bail!("Skill name is empty or only dots after sanitization");
}
let sanitized: String = trimmed
.chars()
.map(|c| match c {
'/' | '\\' | '\0' => '_',
_ => c,
})
.collect();
if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') {
bail!("Skill name '{}' is unsafe as a path component", name);
}
Ok(sanitized)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::skillforge::scout::{ScoutResult, ScoutSource};
use std::fs;
fn sample_candidate() -> ScoutResult {
ScoutResult {
name: "test-skill".into(),
url: "https://github.com/user/test-skill".into(),
description: "A test skill for unit tests".into(),
stars: 42,
language: Some("Rust".into()),
updated_at: Some(Utc::now()),
source: ScoutSource::GitHub,
owner: "user".into(),
has_license: true,
}
}
#[test]
fn integrate_creates_files() {
let tmp = std::env::temp_dir().join("zeroclaw-test-integrate");
let _ = fs::remove_dir_all(&tmp);
let integrator = Integrator::new(tmp.to_string_lossy().into_owned());
let c = sample_candidate();
let path = integrator.integrate(&c).unwrap();
assert!(path.join("SKILL.toml").exists());
assert!(path.join("SKILL.md").exists());
let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap();
assert!(toml.contains("name = \"test-skill\""));
assert!(toml.contains("stars = 42"));
let md = fs::read_to_string(path.join("SKILL.md")).unwrap();
assert!(md.contains("# test-skill"));
assert!(md.contains("A test skill for unit tests"));
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn escape_toml_handles_quotes_and_control_chars() {
assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#);
assert_eq!(escape_toml(r"back\slash"), r"back\\slash");
assert_eq!(escape_toml("line\nbreak"), "line\\nbreak");
assert_eq!(escape_toml("tab\there"), "tab\\there");
assert_eq!(escape_toml("cr\rhere"), "cr\\rhere");
}
#[test]
fn sanitize_rejects_traversal() {
assert!(sanitize_path_component("..").is_err());
assert!(sanitize_path_component("...").is_err());
assert!(sanitize_path_component("").is_err());
assert!(sanitize_path_component(" ").is_err());
}
#[test]
fn sanitize_replaces_separators() {
let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap();
assert!(!s.contains('/'));
assert!(!s.contains('\\'));
assert!(!s.contains('\0'));
assert_eq!(s, "foo_bar_baz_qux");
}
#[test]
fn sanitize_trims_dots() {
let s = sanitize_path_component(".hidden.").unwrap();
assert_eq!(s, "hidden");
}
}

255
src/skillforge/mod.rs Normal file
View file

@ -0,0 +1,255 @@
//! SkillForge — Skill auto-discovery, evaluation, and integration engine.
//!
//! Pipeline: Scout → Evaluate → Integrate
//! Discovers skills from external sources, scores them, and generates
//! ZeroClaw-compatible manifests for qualified candidates.
pub mod evaluate;
pub mod integrate;
pub mod scout;
use anyhow::Result;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use self::evaluate::{EvalResult, Evaluator, Recommendation};
use self::integrate::Integrator;
use self::scout::{GitHubScout, Scout, ScoutResult, ScoutSource};
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
#[derive(Clone, Serialize, Deserialize)]
pub struct SkillForgeConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default = "default_auto_integrate")]
pub auto_integrate: bool,
#[serde(default = "default_sources")]
pub sources: Vec<String>,
#[serde(default = "default_scan_interval")]
pub scan_interval_hours: u64,
#[serde(default = "default_min_score")]
pub min_score: f64,
/// Optional GitHub personal-access token for higher rate limits.
#[serde(default)]
pub github_token: Option<String>,
/// Directory where integrated skills are written.
#[serde(default = "default_output_dir")]
pub output_dir: String,
}
fn default_auto_integrate() -> bool {
true
}
fn default_sources() -> Vec<String> {
vec!["github".into(), "clawhub".into()]
}
fn default_scan_interval() -> u64 {
24
}
fn default_min_score() -> f64 {
0.7
}
fn default_output_dir() -> String {
"./skills".into()
}
impl Default for SkillForgeConfig {
fn default() -> Self {
Self {
enabled: false,
auto_integrate: default_auto_integrate(),
sources: default_sources(),
scan_interval_hours: default_scan_interval(),
min_score: default_min_score(),
github_token: None,
output_dir: default_output_dir(),
}
}
}
impl std::fmt::Debug for SkillForgeConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SkillForgeConfig")
.field("enabled", &self.enabled)
.field("auto_integrate", &self.auto_integrate)
.field("sources", &self.sources)
.field("scan_interval_hours", &self.scan_interval_hours)
.field("min_score", &self.min_score)
.field(
"github_token",
&self.github_token.as_ref().map(|_| "***"),
)
.field("output_dir", &self.output_dir)
.finish()
}
}
// ---------------------------------------------------------------------------
// ForgeReport — summary of a single pipeline run
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForgeReport {
pub discovered: usize,
pub evaluated: usize,
pub auto_integrated: usize,
pub manual_review: usize,
pub skipped: usize,
pub results: Vec<EvalResult>,
}
// ---------------------------------------------------------------------------
// SkillForge
// ---------------------------------------------------------------------------
pub struct SkillForge {
config: SkillForgeConfig,
evaluator: Evaluator,
integrator: Integrator,
}
impl SkillForge {
pub fn new(config: SkillForgeConfig) -> Self {
let evaluator = Evaluator::new(config.min_score);
let integrator = Integrator::new(config.output_dir.clone());
Self {
config,
evaluator,
integrator,
}
}
/// Run the full pipeline: Scout → Evaluate → Integrate.
pub async fn forge(&self) -> Result<ForgeReport> {
if !self.config.enabled {
warn!("SkillForge is disabled — skipping");
return Ok(ForgeReport {
discovered: 0,
evaluated: 0,
auto_integrated: 0,
manual_review: 0,
skipped: 0,
results: vec![],
});
}
// --- Scout ----------------------------------------------------------
let mut candidates: Vec<ScoutResult> = Vec::new();
for src in &self.config.sources {
let source: ScoutSource = src.parse().unwrap(); // Infallible
match source {
ScoutSource::GitHub => {
let scout = GitHubScout::new(self.config.github_token.clone());
match scout.discover().await {
Ok(mut found) => {
info!(count = found.len(), "GitHub scout returned candidates");
candidates.append(&mut found);
}
Err(e) => {
warn!(error = %e, "GitHub scout failed, continuing with other sources");
}
}
}
ScoutSource::ClawHub | ScoutSource::HuggingFace => {
info!(source = src.as_str(), "Source not yet implemented — skipping");
}
}
}
// Deduplicate by URL
scout::dedup(&mut candidates);
let discovered = candidates.len();
info!(discovered, "Total unique candidates after dedup");
// --- Evaluate -------------------------------------------------------
let results: Vec<EvalResult> = candidates
.into_iter()
.map(|c| self.evaluator.evaluate(c))
.collect();
let evaluated = results.len();
// --- Integrate ------------------------------------------------------
let mut auto_integrated = 0usize;
let mut manual_review = 0usize;
let mut skipped = 0usize;
for res in &results {
match res.recommendation {
Recommendation::Auto => {
if self.config.auto_integrate {
match self.integrator.integrate(&res.candidate) {
Ok(_) => {
auto_integrated += 1;
}
Err(e) => {
warn!(
skill = res.candidate.name.as_str(),
error = %e,
"Integration failed for candidate, continuing"
);
}
}
} else {
// Count as would-be auto but not actually integrated
manual_review += 1;
}
}
Recommendation::Manual => {
manual_review += 1;
}
Recommendation::Skip => {
skipped += 1;
}
}
}
info!(
auto_integrated,
manual_review, skipped, "Forge pipeline complete"
);
Ok(ForgeReport {
discovered,
evaluated,
auto_integrated,
manual_review,
skipped,
results,
})
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn disabled_forge_returns_empty_report() {
let cfg = SkillForgeConfig {
enabled: false,
..Default::default()
};
let forge = SkillForge::new(cfg);
let report = forge.forge().await.unwrap();
assert_eq!(report.discovered, 0);
assert_eq!(report.auto_integrated, 0);
}
#[test]
fn default_config_values() {
let cfg = SkillForgeConfig::default();
assert!(!cfg.enabled);
assert!(cfg.auto_integrate);
assert_eq!(cfg.scan_interval_hours, 24);
assert!((cfg.min_score - 0.7).abs() < f64::EPSILON);
assert_eq!(cfg.sources, vec!["github", "clawhub"]);
}
}

331
src/skillforge/scout.rs Normal file
View file

@ -0,0 +1,331 @@
//! Scout — skill discovery from external sources.
use anyhow::Result;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use tracing::{debug, warn};
// ---------------------------------------------------------------------------
// ScoutSource
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ScoutSource {
GitHub,
ClawHub,
HuggingFace,
}
impl std::str::FromStr for ScoutSource {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s.to_lowercase().as_str() {
"github" => Self::GitHub,
"clawhub" => Self::ClawHub,
"huggingface" | "hf" => Self::HuggingFace,
_ => {
warn!(source = s, "Unknown scout source, defaulting to GitHub");
Self::GitHub
}
})
}
}
// ---------------------------------------------------------------------------
// ScoutResult
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoutResult {
pub name: String,
pub url: String,
pub description: String,
pub stars: u64,
pub language: Option<String>,
pub updated_at: Option<DateTime<Utc>>,
pub source: ScoutSource,
/// Owner / org extracted from the URL or API response.
pub owner: String,
/// Whether the repo has a license file.
pub has_license: bool,
}
// ---------------------------------------------------------------------------
// Scout trait
// ---------------------------------------------------------------------------
#[async_trait]
pub trait Scout: Send + Sync {
/// Discover candidate skills from the source.
async fn discover(&self) -> Result<Vec<ScoutResult>>;
}
// ---------------------------------------------------------------------------
// GitHubScout
// ---------------------------------------------------------------------------
/// Searches GitHub for repos matching skill-related queries.
pub struct GitHubScout {
client: reqwest::Client,
queries: Vec<String>,
}
impl GitHubScout {
pub fn new(token: Option<String>) -> Self {
use std::time::Duration;
let mut headers = reqwest::header::HeaderMap::new();
headers.insert(
reqwest::header::ACCEPT,
"application/vnd.github+json"
.parse()
.expect("valid header"),
);
headers.insert(
reqwest::header::USER_AGENT,
"ZeroClaw-SkillForge/0.1".parse().expect("valid header"),
);
if let Some(ref t) = token {
if let Ok(val) = format!("Bearer {t}").parse() {
headers.insert(reqwest::header::AUTHORIZATION, val);
}
}
let client = reqwest::Client::builder()
.default_headers(headers)
.timeout(Duration::from_secs(30))
.build()
.expect("failed to build reqwest client");
Self {
client,
queries: vec![
"zeroclaw skill".into(),
"ai agent skill".into(),
],
}
}
/// Parse the GitHub search/repositories JSON response.
fn parse_items(body: &serde_json::Value) -> Vec<ScoutResult> {
let items = match body.get("items").and_then(|v| v.as_array()) {
Some(arr) => arr,
None => return vec![],
};
items
.iter()
.filter_map(|item| {
let name = item.get("name")?.as_str()?.to_string();
let url = item.get("html_url")?.as_str()?.to_string();
let description = item
.get("description")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let stars = item
.get("stargazers_count")
.and_then(|v| v.as_u64())
.unwrap_or(0);
let language = item
.get("language")
.and_then(|v| v.as_str())
.map(String::from);
let updated_at = item
.get("updated_at")
.and_then(|v| v.as_str())
.and_then(|s| s.parse::<DateTime<Utc>>().ok());
let owner = item
.get("owner")
.and_then(|o| o.get("login"))
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let has_license = item
.get("license")
.map(|v| !v.is_null())
.unwrap_or(false);
Some(ScoutResult {
name,
url,
description,
stars,
language,
updated_at,
source: ScoutSource::GitHub,
owner,
has_license,
})
})
.collect()
}
}
#[async_trait]
impl Scout for GitHubScout {
async fn discover(&self) -> Result<Vec<ScoutResult>> {
let mut all: Vec<ScoutResult> = Vec::new();
for query in &self.queries {
let url = format!(
"https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30",
urlencoding(query)
);
debug!(query = query.as_str(), "Searching GitHub");
let resp = match self.client.get(&url).send().await {
Ok(r) => r,
Err(e) => {
warn!(
query = query.as_str(),
error = %e,
"GitHub API request failed, skipping query"
);
continue;
}
};
if !resp.status().is_success() {
warn!(
status = %resp.status(),
query = query.as_str(),
"GitHub search returned non-200"
);
continue;
}
let body: serde_json::Value = match resp.json().await {
Ok(v) => v,
Err(e) => {
warn!(
query = query.as_str(),
error = %e,
"Failed to parse GitHub response, skipping query"
);
continue;
}
};
let mut items = Self::parse_items(&body);
debug!(count = items.len(), query = query.as_str(), "Parsed items");
all.append(&mut items);
}
dedup(&mut all);
Ok(all)
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Minimal percent-encoding for query strings (space → +).
fn urlencoding(s: &str) -> String {
s.replace(' ', "+")
.replace('&', "%26")
.replace('#', "%23")
}
/// Deduplicate scout results by URL (keeps first occurrence).
pub fn dedup(results: &mut Vec<ScoutResult>) {
let mut seen = std::collections::HashSet::new();
results.retain(|r| seen.insert(r.url.clone()));
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scout_source_from_str() {
assert_eq!("github".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
assert_eq!("GitHub".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
assert_eq!("clawhub".parse::<ScoutSource>().unwrap(), ScoutSource::ClawHub);
assert_eq!("huggingface".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
assert_eq!("hf".parse::<ScoutSource>().unwrap(), ScoutSource::HuggingFace);
// unknown falls back to GitHub
assert_eq!("unknown".parse::<ScoutSource>().unwrap(), ScoutSource::GitHub);
}
#[test]
fn dedup_removes_duplicates() {
let mut results = vec![
ScoutResult {
name: "a".into(),
url: "https://github.com/x/a".into(),
description: String::new(),
stars: 10,
language: None,
updated_at: None,
source: ScoutSource::GitHub,
owner: "x".into(),
has_license: true,
},
ScoutResult {
name: "a-dup".into(),
url: "https://github.com/x/a".into(),
description: String::new(),
stars: 10,
language: None,
updated_at: None,
source: ScoutSource::GitHub,
owner: "x".into(),
has_license: true,
},
ScoutResult {
name: "b".into(),
url: "https://github.com/x/b".into(),
description: String::new(),
stars: 5,
language: None,
updated_at: None,
source: ScoutSource::GitHub,
owner: "x".into(),
has_license: false,
},
];
dedup(&mut results);
assert_eq!(results.len(), 2);
assert_eq!(results[0].name, "a");
assert_eq!(results[1].name, "b");
}
#[test]
fn parse_github_items() {
let json = serde_json::json!({
"total_count": 1,
"items": [
{
"name": "cool-skill",
"html_url": "https://github.com/user/cool-skill",
"description": "A cool skill",
"stargazers_count": 42,
"language": "Rust",
"updated_at": "2026-01-15T10:00:00Z",
"owner": { "login": "user" },
"license": { "spdx_id": "MIT" }
}
]
});
let items = GitHubScout::parse_items(&json);
assert_eq!(items.len(), 1);
assert_eq!(items[0].name, "cool-skill");
assert_eq!(items[0].stars, 42);
assert!(items[0].has_license);
assert_eq!(items[0].owner, "user");
}
#[test]
fn urlencoding_works() {
assert_eq!(urlencoding("hello world"), "hello+world");
assert_eq!(urlencoding("a&b#c"), "a%26b%23c");
}
}