feat: full-stack search engine — FTS5, vector search, hybrid merge, embedding cache, chunker
The Full Stack (All Custom): - Vector DB: embeddings stored as BLOB, cosine similarity in pure Rust - Keyword Search: FTS5 virtual tables with BM25 scoring + auto-sync triggers - Hybrid Merge: weighted fusion of vector + keyword results (configurable weights) - Embeddings: provider abstraction (OpenAI, custom URL, noop fallback) - Chunking: line-based markdown chunker with heading preservation - Caching: embedding_cache table with LRU eviction - Safe Reindex: rebuild FTS5 + re-embed missing vectors New modules: - src/memory/embeddings.rs — EmbeddingProvider trait + OpenAI + Noop + factory - src/memory/vector.rs — cosine similarity, vec↔bytes, ScoredResult, hybrid_merge - src/memory/chunker.rs — markdown-aware document splitting Upgraded: - src/memory/sqlite.rs — FTS5 schema, embedding column, hybrid recall, cache, reindex - src/config/schema.rs — MemoryConfig expanded with embedding/search settings - All callers updated to pass api_key for embedding provider 739 tests passing, 0 clippy warnings (Rust 1.93.1), cargo-deny clean
This commit is contained in:
parent
4fceba0740
commit
0e7f501fd6
10 changed files with 1423 additions and 96 deletions
|
|
@ -1,6 +1,9 @@
|
|||
pub mod chunker;
|
||||
pub mod embeddings;
|
||||
pub mod markdown;
|
||||
pub mod sqlite;
|
||||
pub mod traits;
|
||||
pub mod vector;
|
||||
|
||||
pub use markdown::MarkdownMemory;
|
||||
pub use sqlite::SqliteMemory;
|
||||
|
|
@ -10,14 +13,34 @@ pub use traits::{MemoryCategory, MemoryEntry};
|
|||
|
||||
use crate::config::MemoryConfig;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Factory: create the right memory backend from config
|
||||
pub fn create_memory(
|
||||
config: &MemoryConfig,
|
||||
workspace_dir: &Path,
|
||||
api_key: Option<&str>,
|
||||
) -> anyhow::Result<Box<dyn Memory>> {
|
||||
match config.backend.as_str() {
|
||||
"sqlite" => Ok(Box::new(SqliteMemory::new(workspace_dir)?)),
|
||||
"sqlite" => {
|
||||
let embedder: Arc<dyn embeddings::EmbeddingProvider> =
|
||||
Arc::from(embeddings::create_embedding_provider(
|
||||
&config.embedding_provider,
|
||||
api_key,
|
||||
&config.embedding_model,
|
||||
config.embedding_dimensions,
|
||||
));
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
let mem = SqliteMemory::with_embedder(
|
||||
workspace_dir,
|
||||
embedder,
|
||||
config.vector_weight as f32,
|
||||
config.keyword_weight as f32,
|
||||
config.embedding_cache_size,
|
||||
)?;
|
||||
Ok(Box::new(mem))
|
||||
}
|
||||
"markdown" | "none" => Ok(Box::new(MarkdownMemory::new(workspace_dir))),
|
||||
other => {
|
||||
tracing::warn!("Unknown memory backend '{other}', falling back to markdown");
|
||||
|
|
@ -36,9 +59,9 @@ mod tests {
|
|||
let tmp = TempDir::new().unwrap();
|
||||
let cfg = MemoryConfig {
|
||||
backend: "sqlite".into(),
|
||||
auto_save: true,
|
||||
..MemoryConfig::default()
|
||||
};
|
||||
let mem = create_memory(&cfg, tmp.path()).unwrap();
|
||||
let mem = create_memory(&cfg, tmp.path(), None).unwrap();
|
||||
assert_eq!(mem.name(), "sqlite");
|
||||
}
|
||||
|
||||
|
|
@ -47,9 +70,9 @@ mod tests {
|
|||
let tmp = TempDir::new().unwrap();
|
||||
let cfg = MemoryConfig {
|
||||
backend: "markdown".into(),
|
||||
auto_save: true,
|
||||
..MemoryConfig::default()
|
||||
};
|
||||
let mem = create_memory(&cfg, tmp.path()).unwrap();
|
||||
let mem = create_memory(&cfg, tmp.path(), None).unwrap();
|
||||
assert_eq!(mem.name(), "markdown");
|
||||
}
|
||||
|
||||
|
|
@ -58,9 +81,9 @@ mod tests {
|
|||
let tmp = TempDir::new().unwrap();
|
||||
let cfg = MemoryConfig {
|
||||
backend: "none".into(),
|
||||
auto_save: true,
|
||||
..MemoryConfig::default()
|
||||
};
|
||||
let mem = create_memory(&cfg, tmp.path()).unwrap();
|
||||
let mem = create_memory(&cfg, tmp.path(), None).unwrap();
|
||||
assert_eq!(mem.name(), "markdown");
|
||||
}
|
||||
|
||||
|
|
@ -69,9 +92,9 @@ mod tests {
|
|||
let tmp = TempDir::new().unwrap();
|
||||
let cfg = MemoryConfig {
|
||||
backend: "redis".into(),
|
||||
auto_save: true,
|
||||
..MemoryConfig::default()
|
||||
};
|
||||
let mem = create_memory(&cfg, tmp.path()).unwrap();
|
||||
let mem = create_memory(&cfg, tmp.path(), None).unwrap();
|
||||
assert_eq!(mem.name(), "markdown");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue