feat(memory): add embedding hint routes and upgrade guidance

2026-02-19 17:51:35 +08:00 · 2026-02-19 17:51:35 +08:00 · 572aa77c2a
commit 572aa77c2a
parent 2b8547b386
8 changed files with 449 additions and 15 deletions
--- a/docs/config-reference.md
+++ b/docs/config-reference.md
@ -87,9 +87,37 @@ Notes:
 | `backend` | `sqlite` | `sqlite`, `lucid`, `markdown`, `none` |
 | `auto_save` | `true` | automatic persistence |
 | `embedding_provider` | `none` | `none`, `openai`, or custom endpoint |
 | `embedding_model` | `text-embedding-3-small` | embedding model ID, or `hint:<name>` route |
 | `embedding_dimensions` | `1536` | expected vector size for selected embedding model |
 | `vector_weight` | `0.7` | hybrid ranking vector weight |
 | `keyword_weight` | `0.3` | hybrid ranking keyword weight |
 ## `[[model_routes]]` and `[[embedding_routes]]`
 Use route hints so integrations can keep stable names while model IDs evolve.
 ```toml
 [memory]
 embedding_model = "hint:semantic"
 [[model_routes]]
 hint = "reasoning"
 provider = "openrouter"
 model = "provider/model-id"
 [[embedding_routes]]
 hint = "semantic"
 provider = "openai"
 model = "text-embedding-3-small"
 dimensions = 1536
 ```
 Upgrade strategy:
 1. Keep hints stable (`hint:reasoning`, `hint:semantic`).
 2. Update only `model = "...new-version..."` in the route entries.
 3. Validate with `zeroclaw doctor` before restart/rollout.
 ## `[channels_config]`
 Top-level channel options are configured under `channels_config`.
--- a/docs/providers-reference.md
+++ b/docs/providers-reference.md
@ -143,3 +143,56 @@ Then call with a hint model name (for example from tool or integration paths):
 ```text
 hint:reasoning
 ```
 ## Embedding Routing (`hint:<name>`)
 You can route embedding calls with the same hint pattern using `[[embedding_routes]]`.
 Set `[memory].embedding_model` to a `hint:<name>` value to activate routing.
 ```toml
 [memory]
 embedding_model = "hint:semantic"
 [[embedding_routes]]
 hint = "semantic"
 provider = "openai"
 model = "text-embedding-3-small"
 dimensions = 1536
 [[embedding_routes]]
 hint = "archive"
 provider = "custom:https://embed.example.com/v1"
 model = "your-embedding-model-id"
 dimensions = 1024
 ```
 Supported embedding providers:
 - `none`
 - `openai`
 - `custom:<url>` (OpenAI-compatible embeddings endpoint)
 Optional per-route key override:
 ```toml
 [[embedding_routes]]
 hint = "semantic"
 provider = "openai"
 model = "text-embedding-3-small"
 api_key = "sk-route-specific"
 ```
 ## Upgrading Models Safely
 Use stable hints and update only route targets when providers deprecate model IDs.
 Recommended workflow:
 1. Keep call sites stable (`hint:reasoning`, `hint:semantic`).
 2. Change only the target model under `[[model_routes]]` or `[[embedding_routes]]`.
 3. Run:
   - `zeroclaw doctor`
   - `zeroclaw status`
 4. Smoke test one representative flow (chat + memory retrieval) before rollout.
 This minimizes breakage because integrations and prompts do not need to change when model IDs are upgraded.
--- a/src/agent/agent.rs
+++ b/src/agent/agent.rs
@ -229,8 +229,9 @@ impl Agent {
            &config.workspace_dir,
        ));
-        let memory: Arc<dyn Memory> = Arc::from(memory::create_memory_with_storage(
+        let memory: Arc<dyn Memory> = Arc::from(memory::create_memory_with_storage_and_routes(
            &config.memory,
            &config.embedding_routes,
            Some(&config.storage.provider.config),
            &config.workspace_dir,
            config.api_key.as_deref(),
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@ -6,9 +6,9 @@ pub use schema::{
    build_runtime_proxy_client_with_timeouts, runtime_proxy_config, set_runtime_proxy_config,
    AgentConfig, AuditConfig, AutonomyConfig, BrowserComputerUseConfig, BrowserConfig,
    ChannelsConfig, ClassificationRule, ComposioConfig, Config, CostConfig, CronConfig,
-    DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, GatewayConfig, HardwareConfig,
+    DelegateAgentConfig, DiscordConfig, DockerRuntimeConfig, EmbeddingRouteConfig, GatewayConfig,
-    HardwareTransport, HeartbeatConfig, HttpRequestConfig, IMessageConfig, IdentityConfig,
+    HardwareConfig, HardwareTransport, HeartbeatConfig, HttpRequestConfig, IMessageConfig,
-    LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig,
+    IdentityConfig, LarkConfig, MatrixConfig, MemoryConfig, ModelRouteConfig, ObservabilityConfig,
    PeripheralBoardConfig, PeripheralsConfig, ProxyConfig, ProxyScope, QueryClassificationConfig,
    ReliabilityConfig, ResourceLimitsConfig, RuntimeConfig, SandboxBackend, SandboxConfig,
    SchedulerConfig, SecretsConfig, SecurityConfig, SlackConfig, StorageConfig,
--- a/src/config/schema.rs
+++ b/src/config/schema.rs
@ -83,6 +83,10 @@ pub struct Config {
    #[serde(default)]
    pub model_routes: Vec<ModelRouteConfig>,
    /// Embedding routing rules — route `hint:<name>` to specific provider+model combos.
    #[serde(default)]
    pub embedding_routes: Vec<EmbeddingRouteConfig>,
    /// Automatic query classification — maps user messages to model hints.
    #[serde(default)]
    pub query_classification: QueryClassificationConfig,
@ -1821,6 +1825,36 @@ pub struct ModelRouteConfig {
    pub api_key: Option<String>,
 }
 // ── Embedding routing ───────────────────────────────────────────
 /// Route an embedding hint to a specific provider + model.
 ///
 /// ```toml
 /// [[embedding_routes]]
 /// hint = "semantic"
 /// provider = "openai"
 /// model = "text-embedding-3-small"
 /// dimensions = 1536
 ///
 /// [memory]
 /// embedding_model = "hint:semantic"
 /// ```
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct EmbeddingRouteConfig {
    /// Route hint name (e.g. "semantic", "archive", "faq")
    pub hint: String,
    /// Embedding provider (`none`, `openai`, or `custom:<url>`)
    pub provider: String,
    /// Embedding model to use with that provider
    pub model: String,
    /// Optional embedding dimension override for this route
    #[serde(default)]
    pub dimensions: Option<usize>,
    /// Optional API key override for this route's provider
    #[serde(default)]
    pub api_key: Option<String>,
 }
 // ── Query Classification ─────────────────────────────────────────
 /// Automatic query classification — classifies user messages by keyword/pattern
@ -2480,6 +2514,7 @@ impl Default for Config {
            scheduler: SchedulerConfig::default(),
            agent: AgentConfig::default(),
            model_routes: Vec::new(),
            embedding_routes: Vec::new(),
            heartbeat: HeartbeatConfig::default(),
            cron: CronConfig::default(),
            channels_config: ChannelsConfig::default(),
@ -3407,6 +3442,7 @@ default_temperature = 0.7
            reliability: ReliabilityConfig::default(),
            scheduler: SchedulerConfig::default(),
            model_routes: Vec::new(),
            embedding_routes: Vec::new(),
            query_classification: QueryClassificationConfig::default(),
            heartbeat: HeartbeatConfig {
                enabled: true,
@ -3574,6 +3610,7 @@ tool_dispatcher = "xml"
            reliability: ReliabilityConfig::default(),
            scheduler: SchedulerConfig::default(),
            model_routes: Vec::new(),
            embedding_routes: Vec::new(),
            query_classification: QueryClassificationConfig::default(),
            heartbeat: HeartbeatConfig::default(),
            cron: CronConfig::default(),
--- a/src/doctor/mod.rs
+++ b/src/doctor/mod.rs
@ -344,6 +344,58 @@ fn check_config_semantics(config: &Config, items: &mut Vec<DiagItem>) {
        }
    }
    // Embedding routes validation
    for route in &config.embedding_routes {
        if route.hint.trim().is_empty() {
            items.push(DiagItem::warn(cat, "embedding route with empty hint"));
        }
        if let Some(reason) = embedding_provider_validation_error(&route.provider) {
            items.push(DiagItem::warn(
                cat,
                format!(
                    "embedding route \"{}\" uses invalid provider \"{}\": {}",
                    route.hint, route.provider, reason
                ),
            ));
        }
        if route.model.trim().is_empty() {
            items.push(DiagItem::warn(
                cat,
                format!("embedding route \"{}\" has empty model", route.hint),
            ));
        }
        if route.dimensions.is_some_and(|value| value == 0) {
            items.push(DiagItem::warn(
                cat,
                format!(
                    "embedding route \"{}\" has invalid dimensions=0",
                    route.hint
                ),
            ));
        }
    }
    if let Some(hint) = config
        .memory
        .embedding_model
        .strip_prefix("hint:")
        .map(str::trim)
        .filter(|value| !value.is_empty())
    {
        if !config
            .embedding_routes
            .iter()
            .any(|route| route.hint.trim() == hint)
        {
            items.push(DiagItem::warn(
                cat,
                format!(
                    "memory.embedding_model uses hint \"{hint}\" but no matching [[embedding_routes]] entry exists"
                ),
            ));
        }
    }
    // Channel: at least one configured
    let cc = &config.channels_config;
    let has_channel = cc.telegram.is_some()
@ -396,6 +448,31 @@ fn provider_validation_error(name: &str) -> Option<String> {
    }
 }
 fn embedding_provider_validation_error(name: &str) -> Option<String> {
    let normalized = name.trim();
    if normalized.eq_ignore_ascii_case("none") || normalized.eq_ignore_ascii_case("openai") {
        return None;
    }
    let Some(url) = normalized.strip_prefix("custom:") else {
        return Some("supported values: none, openai, custom:<url>".into());
    };
    let url = url.trim();
    if url.is_empty() {
        return Some("custom provider requires a non-empty URL after 'custom:'".into());
    }
    match reqwest::Url::parse(url) {
        Ok(parsed) if matches!(parsed.scheme(), "http" | "https") => None,
        Ok(parsed) => Some(format!(
            "custom provider URL must use http/https, got '{}'",
            parsed.scheme()
        )),
        Err(err) => Some(format!("invalid custom provider URL: {err}")),
    }
 }
 // ── Workspace integrity ──────────────────────────────────────────
 fn check_workspace(config: &Config, items: &mut Vec<DiagItem>) {
@ -891,6 +968,62 @@ mod tests {
        assert_eq!(route_item.unwrap().severity, Severity::Warn);
    }
    #[test]
    fn config_validation_warns_empty_embedding_route_model() {
        let mut config = Config::default();
        config.embedding_routes = vec![crate::config::EmbeddingRouteConfig {
            hint: "semantic".into(),
            provider: "openai".into(),
            model: String::new(),
            dimensions: Some(1536),
            api_key: None,
        }];
        let mut items = Vec::new();
        check_config_semantics(&config, &mut items);
        let route_item = items.iter().find(|item| {
            item.message
                .contains("embedding route \"semantic\" has empty model")
        });
        assert!(route_item.is_some());
        assert_eq!(route_item.unwrap().severity, Severity::Warn);
    }
    #[test]
    fn config_validation_warns_invalid_embedding_route_provider() {
        let mut config = Config::default();
        config.embedding_routes = vec![crate::config::EmbeddingRouteConfig {
            hint: "semantic".into(),
            provider: "groq".into(),
            model: "text-embedding-3-small".into(),
            dimensions: None,
            api_key: None,
        }];
        let mut items = Vec::new();
        check_config_semantics(&config, &mut items);
        let route_item = items
            .iter()
            .find(|item| item.message.contains("uses invalid provider \"groq\""));
        assert!(route_item.is_some());
        assert_eq!(route_item.unwrap().severity, Severity::Warn);
    }
    #[test]
    fn config_validation_warns_missing_embedding_hint_target() {
        let mut config = Config::default();
        config.memory.embedding_model = "hint:semantic".into();
        let mut items = Vec::new();
        check_config_semantics(&config, &mut items);
        let route_item = items.iter().find(|item| {
            item.message
                .contains("no matching [[embedding_routes]] entry exists")
        });
        assert!(route_item.is_some());
        assert_eq!(route_item.unwrap().severity, Severity::Warn);
    }
    #[test]
    fn environment_check_finds_git() {
        let mut items = Vec::new();
@ -910,8 +1043,8 @@ mod tests {
    #[test]
    fn truncate_for_display_preserves_utf8_boundaries() {
-        let preview = truncate_for_display("版本号-alpha-build", 3);
+        let preview = truncate_for_display("🙂example-alpha-build", 3);
-        assert_eq!(preview, "版本号…");
+        assert_eq!(preview, "🙂ex…");
    }
    #[test]
--- a/src/memory/mod.rs
+++ b/src/memory/mod.rs
@ -27,7 +27,7 @@ pub use traits::Memory;
 #[allow(unused_imports)]
 pub use traits::{MemoryCategory, MemoryEntry};
-use crate::config::{MemoryConfig, StorageProviderConfig};
+use crate::config::{EmbeddingRouteConfig, MemoryConfig, StorageProviderConfig};
 use anyhow::Context;
 use std::path::Path;
 use std::sync::Arc;
@ -75,13 +75,83 @@ pub fn effective_memory_backend_name(
    memory_backend.trim().to_ascii_lowercase()
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 struct ResolvedEmbeddingConfig {
    provider: String,
    model: String,
    dimensions: usize,
    api_key: Option<String>,
 }
 fn resolve_embedding_config(
    config: &MemoryConfig,
    embedding_routes: &[EmbeddingRouteConfig],
    api_key: Option<&str>,
 ) -> ResolvedEmbeddingConfig {
    let fallback_api_key = api_key
        .map(str::trim)
        .filter(|value| !value.is_empty())
        .map(str::to_string);
    let fallback = ResolvedEmbeddingConfig {
        provider: config.embedding_provider.trim().to_string(),
        model: config.embedding_model.trim().to_string(),
        dimensions: config.embedding_dimensions,
        api_key: fallback_api_key.clone(),
    };
    let Some(hint) = config
        .embedding_model
        .strip_prefix("hint:")
        .map(str::trim)
        .filter(|value| !value.is_empty())
    else {
        return fallback;
    };
    let Some(route) = embedding_routes
        .iter()
        .find(|route| route.hint.trim() == hint)
    else {
        tracing::warn!(
            hint,
            "Unknown embedding route hint; falling back to [memory] embedding settings"
        );
        return fallback;
    };
    let provider = route.provider.trim();
    let model = route.model.trim();
    let dimensions = route.dimensions.unwrap_or(config.embedding_dimensions);
    if provider.is_empty() || model.is_empty() || dimensions == 0 {
        tracing::warn!(
            hint,
            "Invalid embedding route configuration; falling back to [memory] embedding settings"
        );
        return fallback;
    }
    let routed_api_key = route
        .api_key
        .as_deref()
        .map(str::trim)
        .filter(|value: &&str| !value.is_empty())
        .map(|value| value.to_string());
    ResolvedEmbeddingConfig {
        provider: provider.to_string(),
        model: model.to_string(),
        dimensions,
        api_key: routed_api_key.or(fallback_api_key),
    }
 }
 /// Factory: create the right memory backend from config
 pub fn create_memory(
    config: &MemoryConfig,
    workspace_dir: &Path,
    api_key: Option<&str>,
 ) -> anyhow::Result<Box<dyn Memory>> {
-    create_memory_with_storage(config, None, workspace_dir, api_key)
+    create_memory_with_storage_and_routes(config, &[], None, workspace_dir, api_key)
 }
 /// Factory: create memory with optional storage-provider override.
@ -90,9 +160,21 @@ pub fn create_memory_with_storage(
    storage_provider: Option<&StorageProviderConfig>,
    workspace_dir: &Path,
    api_key: Option<&str>,
 ) -> anyhow::Result<Box<dyn Memory>> {
    create_memory_with_storage_and_routes(config, &[], storage_provider, workspace_dir, api_key)
 }
 /// Factory: create memory with optional storage-provider override and embedding routes.
 pub fn create_memory_with_storage_and_routes(
    config: &MemoryConfig,
    embedding_routes: &[EmbeddingRouteConfig],
    storage_provider: Option<&StorageProviderConfig>,
    workspace_dir: &Path,
    api_key: Option<&str>,
 ) -> anyhow::Result<Box<dyn Memory>> {
    let backend_name = effective_memory_backend_name(&config.backend, storage_provider);
    let backend_kind = classify_memory_backend(&backend_name);
    let resolved_embedding = resolve_embedding_config(config, embedding_routes, api_key);
    // Best-effort memory hygiene/retention pass (throttled by state file).
    if let Err(e) = hygiene::run_if_due(config, workspace_dir) {
@ -137,14 +219,14 @@ pub fn create_memory_with_storage(
    fn build_sqlite_memory(
        config: &MemoryConfig,
        workspace_dir: &Path,
-        api_key: Option<&str>,
+        resolved_embedding: &ResolvedEmbeddingConfig,
    ) -> anyhow::Result<SqliteMemory> {
        let embedder: Arc<dyn embeddings::EmbeddingProvider> =
            Arc::from(embeddings::create_embedding_provider(
-                &config.embedding_provider,
+                &resolved_embedding.provider,
-                api_key,
+                resolved_embedding.api_key.as_deref(),
-                &config.embedding_model,
+                &resolved_embedding.model,
-                config.embedding_dimensions,
+                resolved_embedding.dimensions,
            ));
        #[allow(clippy::cast_possible_truncation)]
@ -184,7 +266,7 @@ pub fn create_memory_with_storage(
    create_memory_with_builders(
        &backend_name,
        workspace_dir,
-        || build_sqlite_memory(config, workspace_dir, api_key),
+        || build_sqlite_memory(config, workspace_dir, &resolved_embedding),
        || build_postgres_memory(storage_provider),
        "",
    )
@ -247,7 +329,7 @@ pub fn create_response_cache(config: &MemoryConfig, workspace_dir: &Path) -> Opt
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::config::StorageProviderConfig;
+    use crate::config::{EmbeddingRouteConfig, StorageProviderConfig};
    use tempfile::TempDir;
    #[test]
@ -353,4 +435,102 @@ mod tests {
            .expect("postgres without db_url should be rejected");
        assert!(error.to_string().contains("db_url"));
    }
    #[test]
    fn resolve_embedding_config_uses_base_config_when_model_is_not_hint() {
        let cfg = MemoryConfig {
            embedding_provider: "openai".into(),
            embedding_model: "text-embedding-3-small".into(),
            embedding_dimensions: 1536,
            ..MemoryConfig::default()
        };
        let resolved = resolve_embedding_config(&cfg, &[], Some("base-key"));
        assert_eq!(
            resolved,
            ResolvedEmbeddingConfig {
                provider: "openai".into(),
                model: "text-embedding-3-small".into(),
                dimensions: 1536,
                api_key: Some("base-key".into()),
            }
        );
    }
    #[test]
    fn resolve_embedding_config_uses_matching_route_with_api_key_override() {
        let cfg = MemoryConfig {
            embedding_provider: "none".into(),
            embedding_model: "hint:semantic".into(),
            embedding_dimensions: 1536,
            ..MemoryConfig::default()
        };
        let routes = vec![EmbeddingRouteConfig {
            hint: "semantic".into(),
            provider: "custom:https://api.example.com/v1".into(),
            model: "custom-embed-v2".into(),
            dimensions: Some(1024),
            api_key: Some("route-key".into()),
        }];
        let resolved = resolve_embedding_config(&cfg, &routes, Some("base-key"));
        assert_eq!(
            resolved,
            ResolvedEmbeddingConfig {
                provider: "custom:https://api.example.com/v1".into(),
                model: "custom-embed-v2".into(),
                dimensions: 1024,
                api_key: Some("route-key".into()),
            }
        );
    }
    #[test]
    fn resolve_embedding_config_falls_back_when_hint_is_missing() {
        let cfg = MemoryConfig {
            embedding_provider: "openai".into(),
            embedding_model: "hint:semantic".into(),
            embedding_dimensions: 1536,
            ..MemoryConfig::default()
        };
        let resolved = resolve_embedding_config(&cfg, &[], Some("base-key"));
        assert_eq!(
            resolved,
            ResolvedEmbeddingConfig {
                provider: "openai".into(),
                model: "hint:semantic".into(),
                dimensions: 1536,
                api_key: Some("base-key".into()),
            }
        );
    }
    #[test]
    fn resolve_embedding_config_falls_back_when_route_is_invalid() {
        let cfg = MemoryConfig {
            embedding_provider: "openai".into(),
            embedding_model: "hint:semantic".into(),
            embedding_dimensions: 1536,
            ..MemoryConfig::default()
        };
        let routes = vec![EmbeddingRouteConfig {
            hint: "semantic".into(),
            provider: String::new(),
            model: "text-embedding-3-small".into(),
            dimensions: Some(0),
            api_key: None,
        }];
        let resolved = resolve_embedding_config(&cfg, &routes, Some("base-key"));
        assert_eq!(
            resolved,
            ResolvedEmbeddingConfig {
                provider: "openai".into(),
                model: "hint:semantic".into(),
                dimensions: 1536,
                api_key: Some("base-key".into()),
            }
        );
    }
 }
--- a/src/onboard/wizard.rs
+++ b/src/onboard/wizard.rs
@ -160,6 +160,7 @@ pub async fn run_wizard() -> Result<Config> {
        scheduler: crate::config::schema::SchedulerConfig::default(),
        agent: crate::config::schema::AgentConfig::default(),
        model_routes: Vec::new(),
        embedding_routes: Vec::new(),
        heartbeat: HeartbeatConfig::default(),
        cron: crate::config::CronConfig::default(),
        channels_config,
@ -377,6 +378,7 @@ pub async fn run_quick_setup(
        scheduler: crate::config::schema::SchedulerConfig::default(),
        agent: crate::config::schema::AgentConfig::default(),
        model_routes: Vec::new(),
        embedding_routes: Vec::new(),
        heartbeat: HeartbeatConfig::default(),
        cron: crate::config::CronConfig::default(),
        channels_config: ChannelsConfig::default(),