From 572aa77c2aa7cb49f85391765e1b1320fa0218e9 Mon Sep 17 00:00:00 2001 From: Chummy Date: Thu, 19 Feb 2026 17:51:35 +0800 Subject: [PATCH] feat(memory): add embedding hint routes and upgrade guidance --- docs/config-reference.md | 28 +++++ docs/providers-reference.md | 53 ++++++++++ src/agent/agent.rs | 3 +- src/config/mod.rs | 6 +- src/config/schema.rs | 37 +++++++ src/doctor/mod.rs | 137 ++++++++++++++++++++++++- src/memory/mod.rs | 198 ++++++++++++++++++++++++++++++++++-- src/onboard/wizard.rs | 2 + 8 files changed, 449 insertions(+), 15 deletions(-) diff --git a/docs/config-reference.md b/docs/config-reference.md index 8c905f9..973b567 100644 --- a/docs/config-reference.md +++ b/docs/config-reference.md @@ -87,9 +87,37 @@ Notes: | `backend` | `sqlite` | `sqlite`, `lucid`, `markdown`, `none` | | `auto_save` | `true` | automatic persistence | | `embedding_provider` | `none` | `none`, `openai`, or custom endpoint | +| `embedding_model` | `text-embedding-3-small` | embedding model ID, or `hint:` route | +| `embedding_dimensions` | `1536` | expected vector size for selected embedding model | | `vector_weight` | `0.7` | hybrid ranking vector weight | | `keyword_weight` | `0.3` | hybrid ranking keyword weight | +## `[[model_routes]]` and `[[embedding_routes]]` + +Use route hints so integrations can keep stable names while model IDs evolve. + +```toml +[memory] +embedding_model = "hint:semantic" + +[[model_routes]] +hint = "reasoning" +provider = "openrouter" +model = "provider/model-id" + +[[embedding_routes]] +hint = "semantic" +provider = "openai" +model = "text-embedding-3-small" +dimensions = 1536 +``` + +Upgrade strategy: + +1. Keep hints stable (`hint:reasoning`, `hint:semantic`). +2. Update only `model = "...new-version..."` in the route entries. +3. Validate with `zeroclaw doctor` before restart/rollout. + ## `[channels_config]` Top-level channel options are configured under `channels_config`. diff --git a/docs/providers-reference.md b/docs/providers-reference.md index a399dc7..40d1109 100644 --- a/docs/providers-reference.md +++ b/docs/providers-reference.md @@ -143,3 +143,56 @@ Then call with a hint model name (for example from tool or integration paths): ```text hint:reasoning ``` + +## Embedding Routing (`hint:`) + +You can route embedding calls with the same hint pattern using `[[embedding_routes]]`. +Set `[memory].embedding_model` to a `hint:` value to activate routing. + +```toml +[memory] +embedding_model = "hint:semantic" + +[[embedding_routes]] +hint = "semantic" +provider = "openai" +model = "text-embedding-3-small" +dimensions = 1536 + +[[embedding_routes]] +hint = "archive" +provider = "custom:https://embed.example.com/v1" +model = "your-embedding-model-id" +dimensions = 1024 +``` + +Supported embedding providers: + +- `none` +- `openai` +- `custom:` (OpenAI-compatible embeddings endpoint) + +Optional per-route key override: + +```toml +[[embedding_routes]] +hint = "semantic" +provider = "openai" +model = "text-embedding-3-small" +api_key = "sk-route-specific" +``` + +## Upgrading Models Safely + +Use stable hints and update only route targets when providers deprecate model IDs. + +Recommended workflow: + +1. Keep call sites stable (`hint:reasoning`, `hint:semantic`). +2. Change only the target model under `[[model_routes]]` or `[[embedding_routes]]`. +3. Run: + - `zeroclaw doctor` + - `zeroclaw status` +4. Smoke test one representative flow (chat + memory retrieval) before rollout. + +This minimizes breakage because integrations and prompts do not need to change when model IDs are upgraded. diff --git a/src/agent/agent.rs b/src/agent/agent.rs index dc8f74d..c85473b 100644 --- a/src/agent/agent.rs +++ b/src/agent/agent.rs @@ -229,8 +229,9 @@ impl Agent { &config.workspace_dir, )); - let memory: Arc = Arc::from(memory::create_memory_with_storage( + let memory: Arc = Arc::from(memory::create_memory_with_storage_and_routes( &config.memory, + &config.embedding_routes, Some(&config.storage.provider.config), &config.workspace_dir, config.api_key.as_deref(), diff --git a/src/config/mod.rs b/src/config/mod.rs index 7f3fe29..72fbbf0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -6,9 +6,9 @@ pub use schema::{ build_runtime_proxy_client_with_timeouts, runtime_proxy_config, set_runtime_proxy_config, AgentConfig, AuditConfig, AutonomyConfig, BrowserComputerUseConfig, BrowserConfig, ChannelsConfig, ClassificationRule, ComposioConfig, Config, CostConfig, CronConfig, - DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, GatewayConfig, HardwareConfig, - HardwareTransport, HeartbeatConfig, HttpRequestConfig, IMessageConfig, IdentityConfig, - LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig, + DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, EmbeddingRouteConfig, GatewayConfig, + HardwareConfig, HardwareTransport, HeartbeatConfig, HttpRequestConfig, IMessageConfig, + IdentityConfig, LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig, PeripheralBoardConfig, PeripheralsConfig, ProxyConfig, ProxyScope, QueryClassificationConfig, ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig, SchedulerConfig, SecretsConfig, SecurityConfig, SlackConfig, StorageConfig, diff --git a/src/config/schema.rs b/src/config/schema.rs index 7814f10..a591be8 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -83,6 +83,10 @@ pub struct Config { #[serde(default)] pub model_routes: Vec, + /// Embedding routing rules — route `hint:` to specific provider+model combos. + #[serde(default)] + pub embedding_routes: Vec, + /// Automatic query classification — maps user messages to model hints. #[serde(default)] pub query_classification: QueryClassificationConfig, @@ -1821,6 +1825,36 @@ pub struct ModelRouteConfig { pub api_key: Option, } +// ── Embedding routing ─────────────────────────────────────────── + +/// Route an embedding hint to a specific provider + model. +/// +/// ```toml +/// [[embedding_routes]] +/// hint = "semantic" +/// provider = "openai" +/// model = "text-embedding-3-small" +/// dimensions = 1536 +/// +/// [memory] +/// embedding_model = "hint:semantic" +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EmbeddingRouteConfig { + /// Route hint name (e.g. "semantic", "archive", "faq") + pub hint: String, + /// Embedding provider (`none`, `openai`, or `custom:`) + pub provider: String, + /// Embedding model to use with that provider + pub model: String, + /// Optional embedding dimension override for this route + #[serde(default)] + pub dimensions: Option, + /// Optional API key override for this route's provider + #[serde(default)] + pub api_key: Option, +} + // ── Query Classification ───────────────────────────────────────── /// Automatic query classification — classifies user messages by keyword/pattern @@ -2480,6 +2514,7 @@ impl Default for Config { scheduler: SchedulerConfig::default(), agent: AgentConfig::default(), model_routes: Vec::new(), + embedding_routes: Vec::new(), heartbeat: HeartbeatConfig::default(), cron: CronConfig::default(), channels_config: ChannelsConfig::default(), @@ -3407,6 +3442,7 @@ default_temperature = 0.7 reliability: ReliabilityConfig::default(), scheduler: SchedulerConfig::default(), model_routes: Vec::new(), + embedding_routes: Vec::new(), query_classification: QueryClassificationConfig::default(), heartbeat: HeartbeatConfig { enabled: true, @@ -3574,6 +3610,7 @@ tool_dispatcher = "xml" reliability: ReliabilityConfig::default(), scheduler: SchedulerConfig::default(), model_routes: Vec::new(), + embedding_routes: Vec::new(), query_classification: QueryClassificationConfig::default(), heartbeat: HeartbeatConfig::default(), cron: CronConfig::default(), diff --git a/src/doctor/mod.rs b/src/doctor/mod.rs index 210f860..f0335db 100644 --- a/src/doctor/mod.rs +++ b/src/doctor/mod.rs @@ -344,6 +344,58 @@ fn check_config_semantics(config: &Config, items: &mut Vec) { } } + // Embedding routes validation + for route in &config.embedding_routes { + if route.hint.trim().is_empty() { + items.push(DiagItem::warn(cat, "embedding route with empty hint")); + } + if let Some(reason) = embedding_provider_validation_error(&route.provider) { + items.push(DiagItem::warn( + cat, + format!( + "embedding route \"{}\" uses invalid provider \"{}\": {}", + route.hint, route.provider, reason + ), + )); + } + if route.model.trim().is_empty() { + items.push(DiagItem::warn( + cat, + format!("embedding route \"{}\" has empty model", route.hint), + )); + } + if route.dimensions.is_some_and(|value| value == 0) { + items.push(DiagItem::warn( + cat, + format!( + "embedding route \"{}\" has invalid dimensions=0", + route.hint + ), + )); + } + } + + if let Some(hint) = config + .memory + .embedding_model + .strip_prefix("hint:") + .map(str::trim) + .filter(|value| !value.is_empty()) + { + if !config + .embedding_routes + .iter() + .any(|route| route.hint.trim() == hint) + { + items.push(DiagItem::warn( + cat, + format!( + "memory.embedding_model uses hint \"{hint}\" but no matching [[embedding_routes]] entry exists" + ), + )); + } + } + // Channel: at least one configured let cc = &config.channels_config; let has_channel = cc.telegram.is_some() @@ -396,6 +448,31 @@ fn provider_validation_error(name: &str) -> Option { } } +fn embedding_provider_validation_error(name: &str) -> Option { + let normalized = name.trim(); + if normalized.eq_ignore_ascii_case("none") || normalized.eq_ignore_ascii_case("openai") { + return None; + } + + let Some(url) = normalized.strip_prefix("custom:") else { + return Some("supported values: none, openai, custom:".into()); + }; + + let url = url.trim(); + if url.is_empty() { + return Some("custom provider requires a non-empty URL after 'custom:'".into()); + } + + match reqwest::Url::parse(url) { + Ok(parsed) if matches!(parsed.scheme(), "http" | "https") => None, + Ok(parsed) => Some(format!( + "custom provider URL must use http/https, got '{}'", + parsed.scheme() + )), + Err(err) => Some(format!("invalid custom provider URL: {err}")), + } +} + // ── Workspace integrity ────────────────────────────────────────── fn check_workspace(config: &Config, items: &mut Vec) { @@ -891,6 +968,62 @@ mod tests { assert_eq!(route_item.unwrap().severity, Severity::Warn); } + #[test] + fn config_validation_warns_empty_embedding_route_model() { + let mut config = Config::default(); + config.embedding_routes = vec![crate::config::EmbeddingRouteConfig { + hint: "semantic".into(), + provider: "openai".into(), + model: String::new(), + dimensions: Some(1536), + api_key: None, + }]; + + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let route_item = items.iter().find(|item| { + item.message + .contains("embedding route \"semantic\" has empty model") + }); + assert!(route_item.is_some()); + assert_eq!(route_item.unwrap().severity, Severity::Warn); + } + + #[test] + fn config_validation_warns_invalid_embedding_route_provider() { + let mut config = Config::default(); + config.embedding_routes = vec![crate::config::EmbeddingRouteConfig { + hint: "semantic".into(), + provider: "groq".into(), + model: "text-embedding-3-small".into(), + dimensions: None, + api_key: None, + }]; + + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let route_item = items + .iter() + .find(|item| item.message.contains("uses invalid provider \"groq\"")); + assert!(route_item.is_some()); + assert_eq!(route_item.unwrap().severity, Severity::Warn); + } + + #[test] + fn config_validation_warns_missing_embedding_hint_target() { + let mut config = Config::default(); + config.memory.embedding_model = "hint:semantic".into(); + + let mut items = Vec::new(); + check_config_semantics(&config, &mut items); + let route_item = items.iter().find(|item| { + item.message + .contains("no matching [[embedding_routes]] entry exists") + }); + assert!(route_item.is_some()); + assert_eq!(route_item.unwrap().severity, Severity::Warn); + } + #[test] fn environment_check_finds_git() { let mut items = Vec::new(); @@ -910,8 +1043,8 @@ mod tests { #[test] fn truncate_for_display_preserves_utf8_boundaries() { - let preview = truncate_for_display("版本号-alpha-build", 3); - assert_eq!(preview, "版本号…"); + let preview = truncate_for_display("🙂example-alpha-build", 3); + assert_eq!(preview, "🙂ex…"); } #[test] diff --git a/src/memory/mod.rs b/src/memory/mod.rs index b4ea5e7..dd9f0d1 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -27,7 +27,7 @@ pub use traits::Memory; #[allow(unused_imports)] pub use traits::{MemoryCategory, MemoryEntry}; -use crate::config::{MemoryConfig, StorageProviderConfig}; +use crate::config::{EmbeddingRouteConfig, MemoryConfig, StorageProviderConfig}; use anyhow::Context; use std::path::Path; use std::sync::Arc; @@ -75,13 +75,83 @@ pub fn effective_memory_backend_name( memory_backend.trim().to_ascii_lowercase() } +#[derive(Debug, Clone, PartialEq, Eq)] +struct ResolvedEmbeddingConfig { + provider: String, + model: String, + dimensions: usize, + api_key: Option, +} + +fn resolve_embedding_config( + config: &MemoryConfig, + embedding_routes: &[EmbeddingRouteConfig], + api_key: Option<&str>, +) -> ResolvedEmbeddingConfig { + let fallback_api_key = api_key + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string); + let fallback = ResolvedEmbeddingConfig { + provider: config.embedding_provider.trim().to_string(), + model: config.embedding_model.trim().to_string(), + dimensions: config.embedding_dimensions, + api_key: fallback_api_key.clone(), + }; + + let Some(hint) = config + .embedding_model + .strip_prefix("hint:") + .map(str::trim) + .filter(|value| !value.is_empty()) + else { + return fallback; + }; + + let Some(route) = embedding_routes + .iter() + .find(|route| route.hint.trim() == hint) + else { + tracing::warn!( + hint, + "Unknown embedding route hint; falling back to [memory] embedding settings" + ); + return fallback; + }; + + let provider = route.provider.trim(); + let model = route.model.trim(); + let dimensions = route.dimensions.unwrap_or(config.embedding_dimensions); + if provider.is_empty() || model.is_empty() || dimensions == 0 { + tracing::warn!( + hint, + "Invalid embedding route configuration; falling back to [memory] embedding settings" + ); + return fallback; + } + + let routed_api_key = route + .api_key + .as_deref() + .map(str::trim) + .filter(|value: &&str| !value.is_empty()) + .map(|value| value.to_string()); + + ResolvedEmbeddingConfig { + provider: provider.to_string(), + model: model.to_string(), + dimensions, + api_key: routed_api_key.or(fallback_api_key), + } +} + /// Factory: create the right memory backend from config pub fn create_memory( config: &MemoryConfig, workspace_dir: &Path, api_key: Option<&str>, ) -> anyhow::Result> { - create_memory_with_storage(config, None, workspace_dir, api_key) + create_memory_with_storage_and_routes(config, &[], None, workspace_dir, api_key) } /// Factory: create memory with optional storage-provider override. @@ -90,9 +160,21 @@ pub fn create_memory_with_storage( storage_provider: Option<&StorageProviderConfig>, workspace_dir: &Path, api_key: Option<&str>, +) -> anyhow::Result> { + create_memory_with_storage_and_routes(config, &[], storage_provider, workspace_dir, api_key) +} + +/// Factory: create memory with optional storage-provider override and embedding routes. +pub fn create_memory_with_storage_and_routes( + config: &MemoryConfig, + embedding_routes: &[EmbeddingRouteConfig], + storage_provider: Option<&StorageProviderConfig>, + workspace_dir: &Path, + api_key: Option<&str>, ) -> anyhow::Result> { let backend_name = effective_memory_backend_name(&config.backend, storage_provider); let backend_kind = classify_memory_backend(&backend_name); + let resolved_embedding = resolve_embedding_config(config, embedding_routes, api_key); // Best-effort memory hygiene/retention pass (throttled by state file). if let Err(e) = hygiene::run_if_due(config, workspace_dir) { @@ -137,14 +219,14 @@ pub fn create_memory_with_storage( fn build_sqlite_memory( config: &MemoryConfig, workspace_dir: &Path, - api_key: Option<&str>, + resolved_embedding: &ResolvedEmbeddingConfig, ) -> anyhow::Result { let embedder: Arc = Arc::from(embeddings::create_embedding_provider( - &config.embedding_provider, - api_key, - &config.embedding_model, - config.embedding_dimensions, + &resolved_embedding.provider, + resolved_embedding.api_key.as_deref(), + &resolved_embedding.model, + resolved_embedding.dimensions, )); #[allow(clippy::cast_possible_truncation)] @@ -184,7 +266,7 @@ pub fn create_memory_with_storage( create_memory_with_builders( &backend_name, workspace_dir, - || build_sqlite_memory(config, workspace_dir, api_key), + || build_sqlite_memory(config, workspace_dir, &resolved_embedding), || build_postgres_memory(storage_provider), "", ) @@ -247,7 +329,7 @@ pub fn create_response_cache(config: &MemoryConfig, workspace_dir: &Path) -> Opt #[cfg(test)] mod tests { use super::*; - use crate::config::StorageProviderConfig; + use crate::config::{EmbeddingRouteConfig, StorageProviderConfig}; use tempfile::TempDir; #[test] @@ -353,4 +435,102 @@ mod tests { .expect("postgres without db_url should be rejected"); assert!(error.to_string().contains("db_url")); } + + #[test] + fn resolve_embedding_config_uses_base_config_when_model_is_not_hint() { + let cfg = MemoryConfig { + embedding_provider: "openai".into(), + embedding_model: "text-embedding-3-small".into(), + embedding_dimensions: 1536, + ..MemoryConfig::default() + }; + + let resolved = resolve_embedding_config(&cfg, &[], Some("base-key")); + assert_eq!( + resolved, + ResolvedEmbeddingConfig { + provider: "openai".into(), + model: "text-embedding-3-small".into(), + dimensions: 1536, + api_key: Some("base-key".into()), + } + ); + } + + #[test] + fn resolve_embedding_config_uses_matching_route_with_api_key_override() { + let cfg = MemoryConfig { + embedding_provider: "none".into(), + embedding_model: "hint:semantic".into(), + embedding_dimensions: 1536, + ..MemoryConfig::default() + }; + let routes = vec![EmbeddingRouteConfig { + hint: "semantic".into(), + provider: "custom:https://api.example.com/v1".into(), + model: "custom-embed-v2".into(), + dimensions: Some(1024), + api_key: Some("route-key".into()), + }]; + + let resolved = resolve_embedding_config(&cfg, &routes, Some("base-key")); + assert_eq!( + resolved, + ResolvedEmbeddingConfig { + provider: "custom:https://api.example.com/v1".into(), + model: "custom-embed-v2".into(), + dimensions: 1024, + api_key: Some("route-key".into()), + } + ); + } + + #[test] + fn resolve_embedding_config_falls_back_when_hint_is_missing() { + let cfg = MemoryConfig { + embedding_provider: "openai".into(), + embedding_model: "hint:semantic".into(), + embedding_dimensions: 1536, + ..MemoryConfig::default() + }; + + let resolved = resolve_embedding_config(&cfg, &[], Some("base-key")); + assert_eq!( + resolved, + ResolvedEmbeddingConfig { + provider: "openai".into(), + model: "hint:semantic".into(), + dimensions: 1536, + api_key: Some("base-key".into()), + } + ); + } + + #[test] + fn resolve_embedding_config_falls_back_when_route_is_invalid() { + let cfg = MemoryConfig { + embedding_provider: "openai".into(), + embedding_model: "hint:semantic".into(), + embedding_dimensions: 1536, + ..MemoryConfig::default() + }; + let routes = vec![EmbeddingRouteConfig { + hint: "semantic".into(), + provider: String::new(), + model: "text-embedding-3-small".into(), + dimensions: Some(0), + api_key: None, + }]; + + let resolved = resolve_embedding_config(&cfg, &routes, Some("base-key")); + assert_eq!( + resolved, + ResolvedEmbeddingConfig { + provider: "openai".into(), + model: "hint:semantic".into(), + dimensions: 1536, + api_key: Some("base-key".into()), + } + ); + } } diff --git a/src/onboard/wizard.rs b/src/onboard/wizard.rs index 792e5af..6618f13 100644 --- a/src/onboard/wizard.rs +++ b/src/onboard/wizard.rs @@ -160,6 +160,7 @@ pub async fn run_wizard() -> Result { scheduler: crate::config::schema::SchedulerConfig::default(), agent: crate::config::schema::AgentConfig::default(), model_routes: Vec::new(), + embedding_routes: Vec::new(), heartbeat: HeartbeatConfig::default(), cron: crate::config::CronConfig::default(), channels_config, @@ -377,6 +378,7 @@ pub async fn run_quick_setup( scheduler: crate::config::schema::SchedulerConfig::default(), agent: crate::config::schema::AgentConfig::default(), model_routes: Vec::new(), + embedding_routes: Vec::new(), heartbeat: HeartbeatConfig::default(), cron: crate::config::CronConfig::default(), channels_config: ChannelsConfig::default(),