diff --git a/Cargo.lock b/Cargo.lock index 3458276..33f07c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -297,6 +297,7 @@ checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "num-traits", + "serde", "windows-link", ] diff --git a/Cargo.toml b/Cargo.toml index 7565c2b..8bdc4a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,7 @@ async-trait = "0.1" # Memory / persistence rusqlite = { version = "0.32", features = ["bundled"] } -chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } +chrono = { version = "0.4", default-features = false, features = ["clock", "std", "serde"] } cron = "0.12" # Interactive CLI prompts diff --git a/src/main.rs b/src/main.rs index 7fa11b1..012a4d3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,6 +31,7 @@ mod providers; mod runtime; mod security; mod service; +mod skillforge; mod skills; mod tools; mod tunnel; diff --git a/src/skillforge/evaluate.rs b/src/skillforge/evaluate.rs new file mode 100644 index 0000000..e9971ec --- /dev/null +++ b/src/skillforge/evaluate.rs @@ -0,0 +1,261 @@ +//! Evaluator — scores discovered skill candidates across multiple dimensions. + +use serde::{Deserialize, Serialize}; + +use super::scout::ScoutResult; + +// --------------------------------------------------------------------------- +// Scoring dimensions +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Scores { + /// OS / arch / runtime compatibility (0.0–1.0). + pub compatibility: f64, + /// Code quality signals: stars, tests, docs (0.0–1.0). + pub quality: f64, + /// Security posture: license, known-bad patterns (0.0–1.0). + pub security: f64, +} + +impl Scores { + /// Weighted total. Weights: compatibility 0.3, quality 0.35, security 0.35. + pub fn total(&self) -> f64 { + self.compatibility * 0.30 + self.quality * 0.35 + self.security * 0.35 + } +} + +// --------------------------------------------------------------------------- +// Recommendation +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum Recommendation { + /// Score >= threshold → safe to auto-integrate. + Auto, + /// Score in [0.4, threshold) → needs human review. + Manual, + /// Score < 0.4 → skip entirely. + Skip, +} + +// --------------------------------------------------------------------------- +// EvalResult +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EvalResult { + pub candidate: ScoutResult, + pub scores: Scores, + pub total_score: f64, + pub recommendation: Recommendation, +} + +// --------------------------------------------------------------------------- +// Evaluator +// --------------------------------------------------------------------------- + +pub struct Evaluator { + /// Minimum total score for auto-integration. + min_score: f64, +} + +/// Known-bad patterns in repo names / descriptions (matched as whole words). +const BAD_PATTERNS: &[&str] = &[ + "malware", + "exploit", + "hack", + "crack", + "keygen", + "ransomware", + "trojan", +]; + +/// Check if `haystack` contains `word` as a whole word (bounded by non-alphanumeric chars). +fn contains_word(haystack: &str, word: &str) -> bool { + for (i, _) in haystack.match_indices(word) { + let before_ok = i == 0 + || !haystack.as_bytes()[i - 1].is_ascii_alphanumeric(); + let after = i + word.len(); + let after_ok = after >= haystack.len() + || !haystack.as_bytes()[after].is_ascii_alphanumeric(); + if before_ok && after_ok { + return true; + } + } + false +} + +impl Evaluator { + pub fn new(min_score: f64) -> Self { + Self { min_score } + } + + pub fn evaluate(&self, candidate: ScoutResult) -> EvalResult { + let compatibility = self.score_compatibility(&candidate); + let quality = self.score_quality(&candidate); + let security = self.score_security(&candidate); + + let scores = Scores { + compatibility, + quality, + security, + }; + let total_score = scores.total(); + + let recommendation = if total_score >= self.min_score { + Recommendation::Auto + } else if total_score >= 0.4 { + Recommendation::Manual + } else { + Recommendation::Skip + }; + + EvalResult { + candidate, + scores, + total_score, + recommendation, + } + } + + // -- Dimension scorers -------------------------------------------------- + + /// Compatibility: favour Rust repos; penalise unknown languages. + fn score_compatibility(&self, c: &ScoutResult) -> f64 { + match c.language.as_deref() { + Some("Rust") => 1.0, + Some("Python" | "TypeScript" | "JavaScript") => 0.6, + Some(_) => 0.3, + None => 0.2, + } + } + + /// Quality: based on star count (log scale, capped at 1.0). + fn score_quality(&self, c: &ScoutResult) -> f64 { + // log2(stars + 1) / 10, capped at 1.0 + let raw = ((c.stars as f64) + 1.0).log2() / 10.0; + raw.min(1.0) + } + + /// Security: license presence + bad-pattern check. + fn score_security(&self, c: &ScoutResult) -> f64 { + let mut score: f64 = 0.5; + + // License bonus + if c.has_license { + score += 0.3; + } + + // Bad-pattern penalty (whole-word match) + let lower_name = c.name.to_lowercase(); + let lower_desc = c.description.to_lowercase(); + for pat in BAD_PATTERNS { + if contains_word(&lower_name, pat) || contains_word(&lower_desc, pat) { + score -= 0.5; + break; + } + } + + // Recency bonus: updated within last 180 days (guard against future timestamps) + if let Some(updated) = c.updated_at { + let age_days = (chrono::Utc::now() - updated).num_days(); + if (0..180).contains(&age_days) { + score += 0.2; + } + } + + score.clamp(0.0, 1.0) + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::skillforge::scout::{ScoutResult, ScoutSource}; + + fn make_candidate(stars: u64, lang: Option<&str>, has_license: bool) -> ScoutResult { + ScoutResult { + name: "test-skill".into(), + url: "https://github.com/test/test-skill".into(), + description: "A test skill".into(), + stars, + language: lang.map(String::from), + updated_at: Some(chrono::Utc::now()), + source: ScoutSource::GitHub, + owner: "test".into(), + has_license, + } + } + + #[test] + fn high_quality_rust_repo_gets_auto() { + let eval = Evaluator::new(0.7); + let c = make_candidate(500, Some("Rust"), true); + let res = eval.evaluate(c); + assert!(res.total_score >= 0.7, "score: {}", res.total_score); + assert_eq!(res.recommendation, Recommendation::Auto); + } + + #[test] + fn low_star_no_license_gets_manual_or_skip() { + let eval = Evaluator::new(0.7); + let c = make_candidate(1, None, false); + let res = eval.evaluate(c); + assert!(res.total_score < 0.7, "score: {}", res.total_score); + assert_ne!(res.recommendation, Recommendation::Auto); + } + + #[test] + fn bad_pattern_tanks_security() { + let eval = Evaluator::new(0.7); + let mut c = make_candidate(1000, Some("Rust"), true); + c.name = "malware-skill".into(); + let res = eval.evaluate(c); + // 0.5 base + 0.3 license - 0.5 bad_pattern + 0.2 recency = 0.5 + assert!(res.scores.security <= 0.5, "security: {}", res.scores.security); + } + + #[test] + fn scores_total_weighted() { + let s = Scores { + compatibility: 1.0, + quality: 1.0, + security: 1.0, + }; + assert!((s.total() - 1.0).abs() < f64::EPSILON); + + let s2 = Scores { + compatibility: 0.0, + quality: 0.0, + security: 0.0, + }; + assert!((s2.total()).abs() < f64::EPSILON); + } + + #[test] + fn hackathon_not_flagged_as_bad() { + let eval = Evaluator::new(0.7); + let mut c = make_candidate(500, Some("Rust"), true); + c.name = "hackathon-tools".into(); + c.description = "Tools for hackathons and lifehacks".into(); + let res = eval.evaluate(c); + // "hack" should NOT match "hackathon" or "lifehacks" + assert!(res.scores.security >= 0.5, "security: {}", res.scores.security); + } + + #[test] + fn exact_hack_is_flagged() { + let eval = Evaluator::new(0.7); + let mut c = make_candidate(500, Some("Rust"), false); + c.name = "hack-tool".into(); + c.updated_at = None; + let res = eval.evaluate(c); + // 0.5 base + 0.0 license - 0.5 bad_pattern + 0.0 recency = 0.0 + assert!(res.scores.security < 0.5, "security: {}", res.scores.security); + } +} diff --git a/src/skillforge/integrate.rs b/src/skillforge/integrate.rs new file mode 100644 index 0000000..540dd8b --- /dev/null +++ b/src/skillforge/integrate.rs @@ -0,0 +1,248 @@ +//! Integrator — generates ZeroClaw-standard SKILL.toml + SKILL.md from scout results. + +use std::fs; +use std::path::PathBuf; + +use anyhow::{bail, Context, Result}; +use chrono::Utc; +use tracing::info; + +use super::scout::ScoutResult; + +// --------------------------------------------------------------------------- +// Integrator +// --------------------------------------------------------------------------- + +pub struct Integrator { + output_dir: PathBuf, +} + +impl Integrator { + pub fn new(output_dir: String) -> Self { + Self { + output_dir: PathBuf::from(output_dir), + } + } + + /// Write SKILL.toml and SKILL.md for the given candidate. + pub fn integrate(&self, candidate: &ScoutResult) -> Result { + let safe_name = sanitize_path_component(&candidate.name)?; + let skill_dir = self.output_dir.join(&safe_name); + fs::create_dir_all(&skill_dir) + .with_context(|| format!("Failed to create dir: {}", skill_dir.display()))?; + + let toml_path = skill_dir.join("SKILL.toml"); + let md_path = skill_dir.join("SKILL.md"); + + let toml_content = self.generate_toml(candidate); + let md_content = self.generate_md(candidate); + + fs::write(&toml_path, &toml_content) + .with_context(|| format!("Failed to write {}", toml_path.display()))?; + fs::write(&md_path, &md_content) + .with_context(|| format!("Failed to write {}", md_path.display()))?; + + info!( + skill = candidate.name.as_str(), + path = %skill_dir.display(), + "Integrated skill" + ); + + Ok(skill_dir) + } + + // -- Generators --------------------------------------------------------- + + fn generate_toml(&self, c: &ScoutResult) -> String { + let lang = c.language.as_deref().unwrap_or("unknown"); + let updated = c + .updated_at + .map(|d| d.format("%Y-%m-%d").to_string()) + .unwrap_or_else(|| "unknown".into()); + + format!( + r#"# Auto-generated by SkillForge on {now} + +[skill] +name = "{name}" +version = "0.1.0" +description = "{description}" +source = "{url}" +owner = "{owner}" +language = "{lang}" +license = {license} +stars = {stars} +updated_at = "{updated}" + +[skill.requirements] +runtime = "zeroclaw >= 0.1" + +[skill.metadata] +auto_integrated = true +forge_timestamp = "{now}" +"#, + now = Utc::now().format("%Y-%m-%dT%H:%M:%SZ"), + name = escape_toml(&c.name), + description = escape_toml(&c.description), + url = escape_toml(&c.url), + owner = escape_toml(&c.owner), + lang = lang, + license = if c.has_license { "true" } else { "false" }, + stars = c.stars, + updated = updated, + ) + } + + fn generate_md(&self, c: &ScoutResult) -> String { + let lang = c.language.as_deref().unwrap_or("unknown"); + format!( + r#"# {name} + +> Auto-generated by SkillForge + +## Overview + +- **Source**: [{url}]({url}) +- **Owner**: {owner} +- **Language**: {lang} +- **Stars**: {stars} +- **License**: {license} + +## Description + +{description} + +## Usage + +```toml +# Add to your ZeroClaw config: +[skills.{name}] +enabled = true +``` + +## Notes + +This manifest was auto-generated from repository metadata. +Review before enabling in production. +"#, + name = c.name, + url = c.url, + owner = c.owner, + lang = lang, + stars = c.stars, + license = if c.has_license { "yes" } else { "unknown" }, + description = c.description, + ) + } +} + +/// Escape special characters for TOML basic string values. +fn escape_toml(s: &str) -> String { + s.replace('\\', "\\\\") + .replace('"', "\\\"") + .replace('\n', "\\n") + .replace('\r', "\\r") + .replace('\t', "\\t") + .replace('\u{08}', "\\b") + .replace('\u{0C}', "\\f") +} + +/// Sanitize a string for use as a single path component. +/// Rejects empty names, "..", and names containing path separators or NUL. +fn sanitize_path_component(name: &str) -> Result { + let trimmed = name.trim().trim_matches('.'); + if trimmed.is_empty() { + bail!("Skill name is empty or only dots after sanitization"); + } + let sanitized: String = trimmed + .chars() + .map(|c| match c { + '/' | '\\' | '\0' => '_', + _ => c, + }) + .collect(); + if sanitized == ".." || sanitized.contains('/') || sanitized.contains('\\') { + bail!("Skill name '{}' is unsafe as a path component", name); + } + Ok(sanitized) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::skillforge::scout::{ScoutResult, ScoutSource}; + use std::fs; + + fn sample_candidate() -> ScoutResult { + ScoutResult { + name: "test-skill".into(), + url: "https://github.com/user/test-skill".into(), + description: "A test skill for unit tests".into(), + stars: 42, + language: Some("Rust".into()), + updated_at: Some(Utc::now()), + source: ScoutSource::GitHub, + owner: "user".into(), + has_license: true, + } + } + + #[test] + fn integrate_creates_files() { + let tmp = std::env::temp_dir().join("zeroclaw-test-integrate"); + let _ = fs::remove_dir_all(&tmp); + + let integrator = Integrator::new(tmp.to_string_lossy().into_owned()); + let c = sample_candidate(); + let path = integrator.integrate(&c).unwrap(); + + assert!(path.join("SKILL.toml").exists()); + assert!(path.join("SKILL.md").exists()); + + let toml = fs::read_to_string(path.join("SKILL.toml")).unwrap(); + assert!(toml.contains("name = \"test-skill\"")); + assert!(toml.contains("stars = 42")); + + let md = fs::read_to_string(path.join("SKILL.md")).unwrap(); + assert!(md.contains("# test-skill")); + assert!(md.contains("A test skill for unit tests")); + + let _ = fs::remove_dir_all(&tmp); + } + + #[test] + fn escape_toml_handles_quotes_and_control_chars() { + assert_eq!(escape_toml(r#"say "hello""#), r#"say \"hello\""#); + assert_eq!(escape_toml(r"back\slash"), r"back\\slash"); + assert_eq!(escape_toml("line\nbreak"), "line\\nbreak"); + assert_eq!(escape_toml("tab\there"), "tab\\there"); + assert_eq!(escape_toml("cr\rhere"), "cr\\rhere"); + } + + #[test] + fn sanitize_rejects_traversal() { + assert!(sanitize_path_component("..").is_err()); + assert!(sanitize_path_component("...").is_err()); + assert!(sanitize_path_component("").is_err()); + assert!(sanitize_path_component(" ").is_err()); + } + + #[test] + fn sanitize_replaces_separators() { + let s = sanitize_path_component("foo/bar\\baz\0qux").unwrap(); + assert!(!s.contains('/')); + assert!(!s.contains('\\')); + assert!(!s.contains('\0')); + assert_eq!(s, "foo_bar_baz_qux"); + } + + #[test] + fn sanitize_trims_dots() { + let s = sanitize_path_component(".hidden.").unwrap(); + assert_eq!(s, "hidden"); + } +} diff --git a/src/skillforge/mod.rs b/src/skillforge/mod.rs new file mode 100644 index 0000000..d16b8dc --- /dev/null +++ b/src/skillforge/mod.rs @@ -0,0 +1,255 @@ +//! SkillForge — Skill auto-discovery, evaluation, and integration engine. +//! +//! Pipeline: Scout → Evaluate → Integrate +//! Discovers skills from external sources, scores them, and generates +//! ZeroClaw-compatible manifests for qualified candidates. + +pub mod evaluate; +pub mod integrate; +pub mod scout; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use tracing::{info, warn}; + +use self::evaluate::{EvalResult, Evaluator, Recommendation}; +use self::integrate::Integrator; +use self::scout::{GitHubScout, Scout, ScoutResult, ScoutSource}; + +// --------------------------------------------------------------------------- +// Configuration +// --------------------------------------------------------------------------- + +#[derive(Clone, Serialize, Deserialize)] +pub struct SkillForgeConfig { + #[serde(default)] + pub enabled: bool, + #[serde(default = "default_auto_integrate")] + pub auto_integrate: bool, + #[serde(default = "default_sources")] + pub sources: Vec, + #[serde(default = "default_scan_interval")] + pub scan_interval_hours: u64, + #[serde(default = "default_min_score")] + pub min_score: f64, + /// Optional GitHub personal-access token for higher rate limits. + #[serde(default)] + pub github_token: Option, + /// Directory where integrated skills are written. + #[serde(default = "default_output_dir")] + pub output_dir: String, +} + +fn default_auto_integrate() -> bool { + true +} +fn default_sources() -> Vec { + vec!["github".into(), "clawhub".into()] +} +fn default_scan_interval() -> u64 { + 24 +} +fn default_min_score() -> f64 { + 0.7 +} +fn default_output_dir() -> String { + "./skills".into() +} + +impl Default for SkillForgeConfig { + fn default() -> Self { + Self { + enabled: false, + auto_integrate: default_auto_integrate(), + sources: default_sources(), + scan_interval_hours: default_scan_interval(), + min_score: default_min_score(), + github_token: None, + output_dir: default_output_dir(), + } + } +} + +impl std::fmt::Debug for SkillForgeConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SkillForgeConfig") + .field("enabled", &self.enabled) + .field("auto_integrate", &self.auto_integrate) + .field("sources", &self.sources) + .field("scan_interval_hours", &self.scan_interval_hours) + .field("min_score", &self.min_score) + .field( + "github_token", + &self.github_token.as_ref().map(|_| "***"), + ) + .field("output_dir", &self.output_dir) + .finish() + } +} + +// --------------------------------------------------------------------------- +// ForgeReport — summary of a single pipeline run +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ForgeReport { + pub discovered: usize, + pub evaluated: usize, + pub auto_integrated: usize, + pub manual_review: usize, + pub skipped: usize, + pub results: Vec, +} + +// --------------------------------------------------------------------------- +// SkillForge +// --------------------------------------------------------------------------- + +pub struct SkillForge { + config: SkillForgeConfig, + evaluator: Evaluator, + integrator: Integrator, +} + +impl SkillForge { + pub fn new(config: SkillForgeConfig) -> Self { + let evaluator = Evaluator::new(config.min_score); + let integrator = Integrator::new(config.output_dir.clone()); + Self { + config, + evaluator, + integrator, + } + } + + /// Run the full pipeline: Scout → Evaluate → Integrate. + pub async fn forge(&self) -> Result { + if !self.config.enabled { + warn!("SkillForge is disabled — skipping"); + return Ok(ForgeReport { + discovered: 0, + evaluated: 0, + auto_integrated: 0, + manual_review: 0, + skipped: 0, + results: vec![], + }); + } + + // --- Scout ---------------------------------------------------------- + let mut candidates: Vec = Vec::new(); + + for src in &self.config.sources { + let source: ScoutSource = src.parse().unwrap(); // Infallible + match source { + ScoutSource::GitHub => { + let scout = GitHubScout::new(self.config.github_token.clone()); + match scout.discover().await { + Ok(mut found) => { + info!(count = found.len(), "GitHub scout returned candidates"); + candidates.append(&mut found); + } + Err(e) => { + warn!(error = %e, "GitHub scout failed, continuing with other sources"); + } + } + } + ScoutSource::ClawHub | ScoutSource::HuggingFace => { + info!(source = src.as_str(), "Source not yet implemented — skipping"); + } + } + } + + // Deduplicate by URL + scout::dedup(&mut candidates); + let discovered = candidates.len(); + info!(discovered, "Total unique candidates after dedup"); + + // --- Evaluate ------------------------------------------------------- + let results: Vec = candidates + .into_iter() + .map(|c| self.evaluator.evaluate(c)) + .collect(); + let evaluated = results.len(); + + // --- Integrate ------------------------------------------------------ + let mut auto_integrated = 0usize; + let mut manual_review = 0usize; + let mut skipped = 0usize; + + for res in &results { + match res.recommendation { + Recommendation::Auto => { + if self.config.auto_integrate { + match self.integrator.integrate(&res.candidate) { + Ok(_) => { + auto_integrated += 1; + } + Err(e) => { + warn!( + skill = res.candidate.name.as_str(), + error = %e, + "Integration failed for candidate, continuing" + ); + } + } + } else { + // Count as would-be auto but not actually integrated + manual_review += 1; + } + } + Recommendation::Manual => { + manual_review += 1; + } + Recommendation::Skip => { + skipped += 1; + } + } + } + + info!( + auto_integrated, + manual_review, skipped, "Forge pipeline complete" + ); + + Ok(ForgeReport { + discovered, + evaluated, + auto_integrated, + manual_review, + skipped, + results, + }) + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn disabled_forge_returns_empty_report() { + let cfg = SkillForgeConfig { + enabled: false, + ..Default::default() + }; + let forge = SkillForge::new(cfg); + let report = forge.forge().await.unwrap(); + assert_eq!(report.discovered, 0); + assert_eq!(report.auto_integrated, 0); + } + + #[test] + fn default_config_values() { + let cfg = SkillForgeConfig::default(); + assert!(!cfg.enabled); + assert!(cfg.auto_integrate); + assert_eq!(cfg.scan_interval_hours, 24); + assert!((cfg.min_score - 0.7).abs() < f64::EPSILON); + assert_eq!(cfg.sources, vec!["github", "clawhub"]); + } +} diff --git a/src/skillforge/scout.rs b/src/skillforge/scout.rs new file mode 100644 index 0000000..df3a4a8 --- /dev/null +++ b/src/skillforge/scout.rs @@ -0,0 +1,331 @@ +//! Scout — skill discovery from external sources. + +use anyhow::Result; +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use tracing::{debug, warn}; + +// --------------------------------------------------------------------------- +// ScoutSource +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum ScoutSource { + GitHub, + ClawHub, + HuggingFace, +} + +impl std::str::FromStr for ScoutSource { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> std::result::Result { + Ok(match s.to_lowercase().as_str() { + "github" => Self::GitHub, + "clawhub" => Self::ClawHub, + "huggingface" | "hf" => Self::HuggingFace, + _ => { + warn!(source = s, "Unknown scout source, defaulting to GitHub"); + Self::GitHub + } + }) + } +} + +// --------------------------------------------------------------------------- +// ScoutResult +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ScoutResult { + pub name: String, + pub url: String, + pub description: String, + pub stars: u64, + pub language: Option, + pub updated_at: Option>, + pub source: ScoutSource, + /// Owner / org extracted from the URL or API response. + pub owner: String, + /// Whether the repo has a license file. + pub has_license: bool, +} + +// --------------------------------------------------------------------------- +// Scout trait +// --------------------------------------------------------------------------- + +#[async_trait] +pub trait Scout: Send + Sync { + /// Discover candidate skills from the source. + async fn discover(&self) -> Result>; +} + +// --------------------------------------------------------------------------- +// GitHubScout +// --------------------------------------------------------------------------- + +/// Searches GitHub for repos matching skill-related queries. +pub struct GitHubScout { + client: reqwest::Client, + queries: Vec, +} + +impl GitHubScout { + pub fn new(token: Option) -> Self { + use std::time::Duration; + + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert( + reqwest::header::ACCEPT, + "application/vnd.github+json" + .parse() + .expect("valid header"), + ); + headers.insert( + reqwest::header::USER_AGENT, + "ZeroClaw-SkillForge/0.1".parse().expect("valid header"), + ); + if let Some(ref t) = token { + if let Ok(val) = format!("Bearer {t}").parse() { + headers.insert(reqwest::header::AUTHORIZATION, val); + } + } + + let client = reqwest::Client::builder() + .default_headers(headers) + .timeout(Duration::from_secs(30)) + .build() + .expect("failed to build reqwest client"); + + Self { + client, + queries: vec![ + "zeroclaw skill".into(), + "ai agent skill".into(), + ], + } + } + + /// Parse the GitHub search/repositories JSON response. + fn parse_items(body: &serde_json::Value) -> Vec { + let items = match body.get("items").and_then(|v| v.as_array()) { + Some(arr) => arr, + None => return vec![], + }; + + items + .iter() + .filter_map(|item| { + let name = item.get("name")?.as_str()?.to_string(); + let url = item.get("html_url")?.as_str()?.to_string(); + let description = item + .get("description") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let stars = item + .get("stargazers_count") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + let language = item + .get("language") + .and_then(|v| v.as_str()) + .map(String::from); + let updated_at = item + .get("updated_at") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::>().ok()); + let owner = item + .get("owner") + .and_then(|o| o.get("login")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let has_license = item + .get("license") + .map(|v| !v.is_null()) + .unwrap_or(false); + + Some(ScoutResult { + name, + url, + description, + stars, + language, + updated_at, + source: ScoutSource::GitHub, + owner, + has_license, + }) + }) + .collect() + } +} + +#[async_trait] +impl Scout for GitHubScout { + async fn discover(&self) -> Result> { + let mut all: Vec = Vec::new(); + + for query in &self.queries { + let url = format!( + "https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30", + urlencoding(query) + ); + debug!(query = query.as_str(), "Searching GitHub"); + + let resp = match self.client.get(&url).send().await { + Ok(r) => r, + Err(e) => { + warn!( + query = query.as_str(), + error = %e, + "GitHub API request failed, skipping query" + ); + continue; + } + }; + + if !resp.status().is_success() { + warn!( + status = %resp.status(), + query = query.as_str(), + "GitHub search returned non-200" + ); + continue; + } + + let body: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(e) => { + warn!( + query = query.as_str(), + error = %e, + "Failed to parse GitHub response, skipping query" + ); + continue; + } + }; + + let mut items = Self::parse_items(&body); + debug!(count = items.len(), query = query.as_str(), "Parsed items"); + all.append(&mut items); + } + + dedup(&mut all); + Ok(all) + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Minimal percent-encoding for query strings (space → +). +fn urlencoding(s: &str) -> String { + s.replace(' ', "+") + .replace('&', "%26") + .replace('#', "%23") +} + +/// Deduplicate scout results by URL (keeps first occurrence). +pub fn dedup(results: &mut Vec) { + let mut seen = std::collections::HashSet::new(); + results.retain(|r| seen.insert(r.url.clone())); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn scout_source_from_str() { + assert_eq!("github".parse::().unwrap(), ScoutSource::GitHub); + assert_eq!("GitHub".parse::().unwrap(), ScoutSource::GitHub); + assert_eq!("clawhub".parse::().unwrap(), ScoutSource::ClawHub); + assert_eq!("huggingface".parse::().unwrap(), ScoutSource::HuggingFace); + assert_eq!("hf".parse::().unwrap(), ScoutSource::HuggingFace); + // unknown falls back to GitHub + assert_eq!("unknown".parse::().unwrap(), ScoutSource::GitHub); + } + + #[test] + fn dedup_removes_duplicates() { + let mut results = vec![ + ScoutResult { + name: "a".into(), + url: "https://github.com/x/a".into(), + description: String::new(), + stars: 10, + language: None, + updated_at: None, + source: ScoutSource::GitHub, + owner: "x".into(), + has_license: true, + }, + ScoutResult { + name: "a-dup".into(), + url: "https://github.com/x/a".into(), + description: String::new(), + stars: 10, + language: None, + updated_at: None, + source: ScoutSource::GitHub, + owner: "x".into(), + has_license: true, + }, + ScoutResult { + name: "b".into(), + url: "https://github.com/x/b".into(), + description: String::new(), + stars: 5, + language: None, + updated_at: None, + source: ScoutSource::GitHub, + owner: "x".into(), + has_license: false, + }, + ]; + dedup(&mut results); + assert_eq!(results.len(), 2); + assert_eq!(results[0].name, "a"); + assert_eq!(results[1].name, "b"); + } + + #[test] + fn parse_github_items() { + let json = serde_json::json!({ + "total_count": 1, + "items": [ + { + "name": "cool-skill", + "html_url": "https://github.com/user/cool-skill", + "description": "A cool skill", + "stargazers_count": 42, + "language": "Rust", + "updated_at": "2026-01-15T10:00:00Z", + "owner": { "login": "user" }, + "license": { "spdx_id": "MIT" } + } + ] + }); + let items = GitHubScout::parse_items(&json); + assert_eq!(items.len(), 1); + assert_eq!(items[0].name, "cool-skill"); + assert_eq!(items[0].stars, 42); + assert!(items[0].has_license); + assert_eq!(items[0].owner, "user"); + } + + #[test] + fn urlencoding_works() { + assert_eq!(urlencoding("hello world"), "hello+world"); + assert_eq!(urlencoding("a&b#c"), "a%26b%23c"); + } +}