zeroclaw/src/memory/snapshot.rs

470 lines
15 KiB
Rust

//! Memory snapshot — export/import core memories as human-readable Markdown.
//!
//! **Atomic Soul Export**: dumps `MemoryCategory::Core` from SQLite into
//! `MEMORY_SNAPSHOT.md` so the agent's "soul" is always Git-visible.
//!
//! **Auto-Hydration**: if `brain.db` is missing but `MEMORY_SNAPSHOT.md` exists,
//! re-indexes all entries back into a fresh SQLite database.
use anyhow::Result;
use chrono::Local;
use rusqlite::{params, Connection};
use std::fmt::Write;
use std::fs;
use std::path::{Path, PathBuf};
/// Filename for the snapshot (lives at workspace root for Git visibility).
pub const SNAPSHOT_FILENAME: &str = "MEMORY_SNAPSHOT.md";
/// Header written at the top of every snapshot file.
const SNAPSHOT_HEADER: &str = "# 🧠 ZeroClaw Memory Snapshot\n\n\
> Auto-generated by ZeroClaw. Do not edit manually unless you know what you're doing.\n\
> This file is the \"soul\" of your agent — if `brain.db` is lost, start the agent\n\
> in this workspace and it will auto-hydrate from this file.\n\n";
/// Export all `Core` memories from SQLite → `MEMORY_SNAPSHOT.md`.
///
/// Returns the number of entries exported.
pub fn export_snapshot(workspace_dir: &Path) -> Result<usize> {
let db_path = workspace_dir.join("memory").join("brain.db");
if !db_path.exists() {
tracing::debug!("snapshot export skipped: brain.db does not exist");
return Ok(0);
}
let conn = Connection::open(&db_path)?;
conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL;")?;
let mut stmt = conn.prepare(
"SELECT key, content, category, created_at, updated_at
FROM memories
WHERE category = 'core'
ORDER BY updated_at DESC",
)?;
let rows: Vec<(String, String, String, String, String)> = stmt
.query_map([], |row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
))
})?
.filter_map(|r| r.ok())
.collect();
if rows.is_empty() {
tracing::debug!("snapshot export: no core memories to export");
return Ok(0);
}
let mut output = String::with_capacity(rows.len() * 200);
output.push_str(SNAPSHOT_HEADER);
let now = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
write!(output, "**Last exported:** {now}\n\n").unwrap();
write!(output, "**Total core memories:** {}\n\n---\n\n", rows.len()).unwrap();
for (key, content, _category, created_at, updated_at) in &rows {
write!(output, "### 🔑 `{key}`\n\n").unwrap();
write!(output, "{content}\n\n").unwrap();
write!(
output,
"*Created: {created_at} | Updated: {updated_at}*\n\n---\n\n"
)
.unwrap();
}
let snapshot_path = snapshot_path(workspace_dir);
fs::write(&snapshot_path, output)?;
tracing::info!(
"📸 Memory snapshot exported: {} core memories → {}",
rows.len(),
snapshot_path.display()
);
Ok(rows.len())
}
/// Import memories from `MEMORY_SNAPSHOT.md` into SQLite.
///
/// Called during cold-boot when `brain.db` doesn't exist but the snapshot does.
/// Returns the number of entries hydrated.
pub fn hydrate_from_snapshot(workspace_dir: &Path) -> Result<usize> {
let snapshot = snapshot_path(workspace_dir);
if !snapshot.exists() {
return Ok(0);
}
let content = fs::read_to_string(&snapshot)?;
let entries = parse_snapshot(&content);
if entries.is_empty() {
return Ok(0);
}
// Ensure the memory directory exists
let db_dir = workspace_dir.join("memory");
fs::create_dir_all(&db_dir)?;
let db_path = db_dir.join("brain.db");
let conn = Connection::open(&db_path)?;
conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL;")?;
// Initialize schema (same as SqliteMemory::init_schema)
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
key TEXT NOT NULL UNIQUE,
content TEXT NOT NULL,
category TEXT NOT NULL DEFAULT 'core',
embedding BLOB,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_mem_key ON memories(key);
CREATE INDEX IF NOT EXISTS idx_mem_cat ON memories(category);
CREATE INDEX IF NOT EXISTS idx_mem_updated ON memories(updated_at);
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
USING fts5(key, content, content='memories', content_rowid='rowid');
CREATE TABLE IF NOT EXISTS embedding_cache (
content_hash TEXT PRIMARY KEY,
embedding BLOB NOT NULL,
created_at TEXT NOT NULL
);",
)?;
let now = Local::now().to_rfc3339();
let mut hydrated = 0;
for (key, content) in &entries {
let id = uuid::Uuid::new_v4().to_string();
let result = conn.execute(
"INSERT OR IGNORE INTO memories (id, key, content, category, created_at, updated_at)
VALUES (?1, ?2, ?3, 'core', ?4, ?5)",
params![id, key, content, now, now],
);
match result {
Ok(changed) if changed > 0 => {
// Populate FTS5
let _ = conn.execute(
"INSERT INTO memories_fts(key, content) VALUES (?1, ?2)",
params![key, content],
);
hydrated += 1;
}
Ok(_) => {
tracing::debug!("hydrate: key '{key}' already exists, skipping");
}
Err(e) => {
tracing::warn!("hydrate: failed to insert key '{key}': {e}");
}
}
}
tracing::info!(
"🧬 Memory hydration complete: {} entries restored from {}",
hydrated,
snapshot.display()
);
Ok(hydrated)
}
/// Check if we should auto-hydrate on startup.
///
/// Returns `true` if:
/// 1. `brain.db` does NOT exist (or is empty)
/// 2. `MEMORY_SNAPSHOT.md` DOES exist
pub fn should_hydrate(workspace_dir: &Path) -> bool {
let db_path = workspace_dir.join("memory").join("brain.db");
let snapshot = snapshot_path(workspace_dir);
let db_missing_or_empty = if db_path.exists() {
// DB exists but might be empty (freshly created)
fs::metadata(&db_path)
.map(|m| m.len() < 4096) // SQLite header is ~4096 bytes minimum
.unwrap_or(true)
} else {
true
};
db_missing_or_empty && snapshot.exists()
}
/// Path to the snapshot file.
fn snapshot_path(workspace_dir: &Path) -> PathBuf {
workspace_dir.join(SNAPSHOT_FILENAME)
}
/// Parse the structured markdown snapshot back into (key, content) pairs.
fn parse_snapshot(input: &str) -> Vec<(String, String)> {
let mut entries = Vec::new();
let mut current_key: Option<String> = None;
let mut current_content = String::new();
for line in input.lines() {
let trimmed = line.trim();
// Match: ### 🔑 `key_name`
if trimmed.starts_with("### 🔑 `") && trimmed.ends_with('`') {
// Save previous entry
if let Some(key) = current_key.take() {
let content = current_content.trim().to_string();
if !content.is_empty() {
entries.push((key, content));
}
}
// Extract new key
let key = trimmed
.strip_prefix("### 🔑 `")
.and_then(|s| s.strip_suffix('`'))
.unwrap_or("")
.to_string();
if !key.is_empty() {
current_key = Some(key);
current_content = String::new();
}
} else if current_key.is_some() {
// Skip metadata lines and separators
if trimmed.starts_with("*Created:") || trimmed == "---" {
continue;
}
// Accumulate content
if !current_content.is_empty() || !trimmed.is_empty() {
if !current_content.is_empty() {
current_content.push('\n');
}
current_content.push_str(line);
}
}
}
// Don't forget the last entry
if let Some(key) = current_key {
let content = current_content.trim().to_string();
if !content.is_empty() {
entries.push((key, content));
}
}
entries
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn parse_snapshot_basic() {
let input = r#"# 🧠 ZeroClaw Memory Snapshot
> Auto-generated by ZeroClaw.
**Last exported:** 2025-01-15 14:30:00
**Total core memories:** 2
---
### 🔑 `identity`
I am ZeroClaw, a self-preserving AI agent.
*Created: 2025-01-15 | Updated: 2025-01-15*
---
### 🔑 `preference_lang`
The user prefers Rust for systems programming.
*Created: 2025-01-14 | Updated: 2025-01-15*
---
"#;
let entries = parse_snapshot(input);
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].0, "identity");
assert!(entries[0].1.contains("self-preserving"));
assert_eq!(entries[1].0, "preference_lang");
assert!(entries[1].1.contains("Rust"));
}
#[test]
fn parse_snapshot_empty() {
let input = "# 🧠 ZeroClaw Memory Snapshot\n\n> Nothing here.\n";
let entries = parse_snapshot(input);
assert!(entries.is_empty());
}
#[test]
fn parse_snapshot_multiline_content() {
let input = r#"### 🔑 `rules`
Rule 1: Always be helpful.
Rule 2: Never lie.
Rule 3: Protect the user.
*Created: 2025-01-15 | Updated: 2025-01-15*
---
"#;
let entries = parse_snapshot(input);
assert_eq!(entries.len(), 1);
assert!(entries[0].1.contains("Rule 1"));
assert!(entries[0].1.contains("Rule 3"));
}
#[test]
fn export_no_db_returns_zero() {
let tmp = TempDir::new().unwrap();
let count = export_snapshot(tmp.path()).unwrap();
assert_eq!(count, 0);
}
#[test]
fn export_and_hydrate_roundtrip() {
let tmp = TempDir::new().unwrap();
let workspace = tmp.path();
// Create a brain.db manually with some core memories
let db_dir = workspace.join("memory");
fs::create_dir_all(&db_dir).unwrap();
let db_path = db_dir.join("brain.db");
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch(
"PRAGMA journal_mode = WAL;
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
key TEXT NOT NULL UNIQUE,
content TEXT NOT NULL,
category TEXT NOT NULL DEFAULT 'core',
embedding BLOB,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_mem_key ON memories(key);",
)
.unwrap();
let now = Local::now().to_rfc3339();
conn.execute(
"INSERT INTO memories (id, key, content, category, created_at, updated_at)
VALUES ('id1', 'identity', 'I am a test agent', 'core', ?1, ?2)",
params![now, now],
)
.unwrap();
conn.execute(
"INSERT INTO memories (id, key, content, category, created_at, updated_at)
VALUES ('id2', 'preference', 'User likes Rust', 'core', ?1, ?2)",
params![now, now],
)
.unwrap();
// Non-core entry (should NOT be exported)
conn.execute(
"INSERT INTO memories (id, key, content, category, created_at, updated_at)
VALUES ('id3', 'conv1', 'Random convo', 'conversation', ?1, ?2)",
params![now, now],
)
.unwrap();
drop(conn);
// Export snapshot
let exported = export_snapshot(workspace).unwrap();
assert_eq!(exported, 2, "Should export only core memories");
// Verify the file exists and is readable
let snapshot = workspace.join(SNAPSHOT_FILENAME);
assert!(snapshot.exists());
let content = fs::read_to_string(&snapshot).unwrap();
assert!(content.contains("identity"));
assert!(content.contains("I am a test agent"));
assert!(content.contains("preference"));
assert!(!content.contains("Random convo"));
// Simulate catastrophic failure: delete brain.db
fs::remove_file(&db_path).unwrap();
assert!(!db_path.exists());
// Verify should_hydrate detects the scenario
assert!(should_hydrate(workspace));
// Hydrate from snapshot
let hydrated = hydrate_from_snapshot(workspace).unwrap();
assert_eq!(hydrated, 2, "Should hydrate both core memories");
// Verify brain.db was recreated
assert!(db_path.exists());
// Verify the data is actually in the new database
let conn = Connection::open(&db_path).unwrap();
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM memories", [], |row| row.get(0))
.unwrap();
assert_eq!(count, 2);
let identity: String = conn
.query_row(
"SELECT content FROM memories WHERE key = 'identity'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(identity, "I am a test agent");
}
#[test]
fn should_hydrate_only_when_needed() {
let tmp = TempDir::new().unwrap();
let workspace = tmp.path();
// No DB, no snapshot → false
assert!(!should_hydrate(workspace));
// Create snapshot but no DB → true
let snapshot = workspace.join(SNAPSHOT_FILENAME);
fs::write(&snapshot, "### 🔑 `test`\n\nHello\n").unwrap();
assert!(should_hydrate(workspace));
// Create a real DB → false
let db_dir = workspace.join("memory");
fs::create_dir_all(&db_dir).unwrap();
let db_path = db_dir.join("brain.db");
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
key TEXT NOT NULL UNIQUE,
content TEXT NOT NULL,
category TEXT NOT NULL DEFAULT 'core',
embedding BLOB,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
INSERT INTO memories VALUES('x','x','x','core',NULL,'2025-01-01','2025-01-01');",
)
.unwrap();
drop(conn);
assert!(!should_hydrate(workspace));
}
#[test]
fn hydrate_no_snapshot_returns_zero() {
let tmp = TempDir::new().unwrap();
let count = hydrate_from_snapshot(tmp.path()).unwrap();
assert_eq!(count, 0);
}
}