use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, Ordering};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::bootstrap::ironclaw_base_dir;
use crate::workspace::Workspace;
static RUNNING: AtomicBool = AtomicBool::new(false);
const IDENTITY_PATHS: &[&str] = &[
crate::workspace::document::paths::MEMORY,
crate::workspace::document::paths::IDENTITY,
crate::workspace::document::paths::SOUL,
crate::workspace::document::paths::AGENTS,
crate::workspace::document::paths::USER,
crate::workspace::document::paths::HEARTBEAT,
crate::workspace::document::paths::README,
crate::workspace::document::paths::TOOLS,
crate::workspace::document::paths::BOOTSTRAP,
];
fn is_identity_path(path: &str) -> bool {
let file_name = path.rsplit('/').next().unwrap_or(path);
let file_name_lower = file_name.to_lowercase();
IDENTITY_PATHS
.iter()
.any(|&p| p.to_lowercase() == file_name_lower)
}
#[derive(Debug, Clone)]
pub struct HygieneConfig {
pub enabled: bool,
pub daily_retention_days: u32,
pub conversation_retention_days: u32,
pub cadence_hours: u32,
pub state_dir: PathBuf,
}
impl Default for HygieneConfig {
fn default() -> Self {
Self {
enabled: true,
daily_retention_days: 30,
conversation_retention_days: 7,
cadence_hours: 12,
state_dir: ironclaw_base_dir(),
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct HygieneState {
last_run: DateTime<Utc>,
}
#[derive(Debug, Default)]
pub struct HygieneReport {
pub daily_logs_deleted: u32,
pub conversation_docs_deleted: u32,
pub skipped: bool,
}
impl HygieneReport {
pub fn had_work(&self) -> bool {
self.daily_logs_deleted > 0 || self.conversation_docs_deleted > 0
}
}
pub async fn run_if_due(workspace: &Workspace, config: &HygieneConfig) -> HygieneReport {
if !config.enabled {
return HygieneReport {
skipped: true,
..Default::default()
};
}
if RUNNING
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
.is_err()
{
tracing::debug!("memory hygiene: skipping (another pass is running)");
return HygieneReport {
skipped: true,
..Default::default()
};
}
let _guard = RunningGuard;
let state_file = config.state_dir.join("memory_hygiene_state.json");
if let Some(state) = load_state(&state_file) {
let elapsed = Utc::now().signed_duration_since(state.last_run);
let cadence = chrono::Duration::hours(i64::from(config.cadence_hours));
if elapsed < cadence {
tracing::debug!(
hours_since_last = elapsed.num_hours(),
cadence_hours = config.cadence_hours,
"memory hygiene: skipping (cadence not elapsed)"
);
return HygieneReport {
skipped: true,
..Default::default()
};
}
}
save_state(&state_file);
tracing::info!(
daily_retention_days = config.daily_retention_days,
conversation_retention_days = config.conversation_retention_days,
"memory hygiene: starting cleanup pass"
);
let mut report = HygieneReport::default();
match cleanup_daily_logs(workspace, config.daily_retention_days).await {
Ok(count) => report.daily_logs_deleted = count,
Err(e) => tracing::warn!("memory hygiene: failed to clean daily logs: {e}"),
}
match cleanup_conversation_docs(workspace, config.conversation_retention_days).await {
Ok(count) => report.conversation_docs_deleted = count,
Err(e) => tracing::warn!("memory hygiene: failed to clean conversation docs: {e}"),
}
if report.had_work() {
tracing::info!(
daily_logs_deleted = report.daily_logs_deleted,
conversation_docs_deleted = report.conversation_docs_deleted,
"memory hygiene: cleanup complete"
);
} else {
tracing::debug!("memory hygiene: nothing to clean");
}
report
}
struct RunningGuard;
impl Drop for RunningGuard {
fn drop(&mut self) {
RUNNING.store(false, Ordering::SeqCst);
}
}
async fn cleanup_daily_logs(
workspace: &Workspace,
retention_days: u32,
) -> Result<u32, anyhow::Error> {
let cutoff = Utc::now() - chrono::Duration::days(i64::from(retention_days));
let entries = workspace.list("daily/").await?;
let mut deleted = 0u32;
for entry in entries {
if entry.is_directory {
continue;
}
if is_identity_path(&entry.path) {
continue;
}
if let Some(updated_at) = entry.updated_at
&& updated_at < cutoff
{
let path = if entry.path.starts_with("daily/") {
entry.path.clone()
} else {
format!("daily/{}", entry.path)
};
if let Err(e) = workspace.delete(&path).await {
tracing::warn!(path, "memory hygiene: failed to delete: {e}");
} else {
tracing::debug!(path, "memory hygiene: deleted old daily log");
deleted += 1;
}
}
}
Ok(deleted)
}
async fn cleanup_conversation_docs(
workspace: &Workspace,
retention_days: u32,
) -> Result<u32, anyhow::Error> {
let cutoff = Utc::now() - chrono::Duration::days(i64::from(retention_days));
let entries = workspace.list("conversations/").await?;
let mut deleted = 0u32;
for entry in entries {
if entry.is_directory {
continue;
}
if is_identity_path(&entry.path) {
continue;
}
if let Some(updated_at) = entry.updated_at
&& updated_at < cutoff
{
let path = if entry.path.starts_with("conversations/") {
entry.path.clone()
} else {
format!("conversations/{}", entry.path)
};
if let Err(e) = workspace.delete(&path).await {
tracing::warn!(
path,
"memory hygiene: failed to delete conversation doc: {e}"
);
} else {
tracing::debug!(path, "memory hygiene: deleted old conversation doc");
deleted += 1;
}
}
}
Ok(deleted)
}
fn state_path_dir(state_file: &std::path::Path) -> Option<&std::path::Path> {
state_file.parent()
}
fn load_state(path: &std::path::Path) -> Option<HygieneState> {
let data = std::fs::read_to_string(path).ok()?;
serde_json::from_str(&data).ok()
}
fn save_state(path: &std::path::Path) {
let state = HygieneState {
last_run: Utc::now(),
};
if let Some(dir) = state_path_dir(path)
&& let Err(e) = std::fs::create_dir_all(dir)
{
tracing::warn!("memory hygiene: failed to create state dir: {e}");
return;
}
let Ok(json) = serde_json::to_string_pretty(&state) else {
return;
};
let tmp_path = path.with_extension("json.tmp");
if let Err(e) = std::fs::write(&tmp_path, &json) {
tracing::warn!("memory hygiene: failed to write temp state: {e}");
return;
}
if let Err(e) = std::fs::rename(&tmp_path, path) {
tracing::warn!("memory hygiene: failed to rename state file: {e}");
let _ = std::fs::remove_file(&tmp_path);
}
}
#[cfg(test)]
mod tests {
use std::sync::Mutex;
use crate::workspace::hygiene::*;
static RUNNING_TESTS: Mutex<()> = Mutex::new(());
#[test]
fn default_config_is_reasonable() {
let cfg = HygieneConfig::default();
assert!(cfg.enabled);
assert_eq!(cfg.daily_retention_days, 30);
assert_eq!(cfg.conversation_retention_days, 7);
assert_eq!(cfg.cadence_hours, 12);
}
#[test]
fn report_defaults_to_no_work() {
let report = HygieneReport::default();
assert!(!report.had_work());
assert!(!report.skipped);
}
#[test]
fn report_had_work_when_deleted() {
let report = HygieneReport {
daily_logs_deleted: 3,
conversation_docs_deleted: 0,
skipped: false,
};
assert!(report.had_work());
}
#[test]
fn report_had_work_when_conversation_deleted() {
let report = HygieneReport {
daily_logs_deleted: 0,
conversation_docs_deleted: 2,
skipped: false,
};
assert!(report.had_work());
}
#[test]
fn is_identity_path_excludes_sacred_docs() {
for name in [
"MEMORY.md",
"IDENTITY.md",
"SOUL.md",
"AGENTS.md",
"USER.md",
"HEARTBEAT.md",
"README.md",
"TOOLS.md",
"BOOTSTRAP.md",
] {
assert!(is_identity_path(name), "{name} should be excluded");
assert!(
is_identity_path(&format!("conversations/{name}")),
"conversations/{name} should be excluded via path"
);
}
}
#[test]
fn is_identity_path_case_insensitive() {
assert!(
is_identity_path("memory.md"),
"lowercase memory.md should be excluded"
);
assert!(
is_identity_path("Memory.md"),
"mixed case Memory.md should be excluded"
);
assert!(
is_identity_path("MEMORY.MD"),
"uppercase MEMORY.MD should be excluded"
);
assert!(
is_identity_path("identity.md"),
"lowercase identity.md should be excluded"
);
assert!(
is_identity_path("conversations/soul.md"),
"conversations/soul.md should be excluded"
);
assert!(
is_identity_path("conversations/SOUL.MD"),
"conversations/SOUL.MD should be excluded"
);
}
#[test]
fn is_identity_path_allows_normal_docs() {
for path in [
"daily/2024-01-01.md",
"conversations/chat-abc.md",
"notes.md",
] {
assert!(!is_identity_path(path), "{path} should not be excluded");
}
}
#[test]
fn load_state_returns_none_for_missing_file() {
assert!(load_state(std::path::Path::new("/tmp/nonexistent_hygiene.json")).is_none());
}
#[test]
fn save_and_load_state_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("hygiene_state.json");
save_state(&path);
let state = load_state(&path).expect("state should be loadable after save");
let elapsed = Utc::now().signed_duration_since(state.last_run);
assert!(elapsed.num_seconds() < 2);
}
#[test]
fn save_state_creates_parent_dirs() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("nested").join("deep").join("state.json");
save_state(&path);
assert!(path.exists());
}
#[test]
fn save_state_is_atomic_no_tmp_left_behind() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("state.json");
let tmp = dir.path().join("state.json.tmp");
save_state(&path);
assert!(path.exists(), "state file should exist");
assert!(!tmp.exists(), "temp file should be cleaned up after rename");
let state = load_state(&path).expect("saved state should be loadable");
let elapsed = Utc::now().signed_duration_since(state.last_run);
assert!(elapsed.num_seconds() < 2);
}
#[test]
fn running_guard_prevents_reentry() {
let _lock = RUNNING_TESTS.lock().unwrap();
RUNNING.store(false, Ordering::SeqCst);
assert!(
RUNNING
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
.is_ok(),
"first acquisition should succeed"
);
assert!(
RUNNING
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
.is_err(),
"second acquisition should fail while first is held"
);
RUNNING.store(false, Ordering::SeqCst);
assert!(
RUNNING
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
.is_ok(),
"acquisition should succeed after release"
);
RUNNING.store(false, Ordering::SeqCst);
}
#[cfg(feature = "libsql")]
mod async_tests {
use super::*;
use crate::db::Database;
use std::sync::Arc;
async fn create_test_db() -> (Arc<dyn crate::db::Database>, tempfile::TempDir) {
use crate::db::libsql::LibSqlBackend;
let temp_dir = tempfile::tempdir().expect("tempdir");
let db_path = temp_dir.path().join("test_hygiene.db");
let backend = LibSqlBackend::new_local(&db_path)
.await
.expect("LibSqlBackend::new_local");
backend.run_migrations().await.expect("run_migrations");
let db: Arc<dyn Database> = Arc::new(backend);
(db, temp_dir)
}
fn create_workspace(db: &Arc<dyn Database>) -> Arc<Workspace> {
Arc::new(Workspace::new_with_db("default", db.clone()))
}
#[tokio::test]
async fn cleanup_daily_logs_preserves_identity_documents() {
let (db, _tmp) = create_test_db().await;
let ws = create_workspace(&db);
ws.write("daily/2024-01-15.md", "Old log")
.await
.expect("write log 1");
ws.write("daily/2024-01-20.md", "Another log")
.await
.expect("write log 2");
ws.write("MEMORY.md", "Long-term curated memory")
.await
.expect("write identity");
let before = ws.list("daily/").await.expect("list before");
let daily_count_before = before.iter().filter(|e| !e.is_directory).count();
assert!(daily_count_before >= 2, "should have at least 2 daily logs");
let deleted = cleanup_daily_logs(&ws, 0)
.await
.expect("cleanup_daily_logs");
assert!(deleted > 0, "should have deleted old daily documents");
let identity = db
.get_document_by_path("default", None, "MEMORY.md")
.await
.expect("get identity doc");
assert_eq!(identity.path, "MEMORY.md");
assert_eq!(identity.content, "Long-term curated memory");
}
#[tokio::test]
async fn cleanup_conversation_docs_handles_empty_directory() {
let (db, _tmp) = create_test_db().await;
let ws = create_workspace(&db);
let deleted = cleanup_conversation_docs(&ws, 7)
.await
.expect("cleanup_conversation_docs");
assert_eq!(deleted, 0, "should delete 0 from empty directory");
}
#[tokio::test]
async fn cleanup_respects_cadence_prevents_concurrent_runs() {
let (db, _tmp) = create_test_db().await;
let ws = create_workspace(&db);
let config = HygieneConfig {
enabled: true,
daily_retention_days: 30,
conversation_retention_days: 7,
cadence_hours: 12,
state_dir: _tmp.path().to_path_buf(),
};
let report1 = run_if_due(&ws, &config).await;
assert!(!report1.skipped, "first run should not be skipped");
let report2 = run_if_due(&ws, &config).await;
assert!(report2.skipped, "second run should be skipped by cadence");
assert_eq!(
report1.daily_logs_deleted + report1.conversation_docs_deleted,
0,
"first run should have clean counts"
);
}
#[tokio::test]
async fn cleanup_reports_deletion_counts_correctly() {
let (db, _tmp) = create_test_db().await;
let ws = create_workspace(&db);
ws.write("daily/log1.md", "content 1")
.await
.expect("write doc 1");
ws.write("daily/log2.md", "content 2")
.await
.expect("write doc 2");
ws.write("conversations/chat1.md", "content 3")
.await
.expect("write doc 3");
let deleted_daily = cleanup_daily_logs(&ws, 0).await.expect("cleanup daily");
let deleted_conv = cleanup_conversation_docs(&ws, 0)
.await
.expect("cleanup conversations");
assert!(deleted_daily > 0, "should report deleted daily logs");
assert_eq!(deleted_conv, 1, "should report 1 deleted conversation doc");
let report = HygieneReport {
daily_logs_deleted: deleted_daily,
conversation_docs_deleted: deleted_conv,
skipped: false,
};
assert!(!report.skipped, "should not be skipped");
assert!(report.had_work(), "report should indicate work was done");
assert!(
report.daily_logs_deleted > 0 || report.conversation_docs_deleted > 0,
"report should have at least one deletion count > 0"
);
let no_work = HygieneReport {
daily_logs_deleted: 0,
conversation_docs_deleted: 0,
skipped: false,
};
assert!(!no_work.had_work(), "empty report should indicate no work");
}
}
}