reasonkit-mem 0.1.7

High-performance vector database & RAG memory layer - hybrid search, embeddings, RAPTOR trees, BM25 fusion, and semantic retrieval for AI systems
use crate::{Error, Result};
use chrono::{DateTime, Duration, Utc};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use uuid::Uuid;

pub type DocsetId = Uuid;

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RefreshPolicy {
    Manual,
    IntervalSecs(u64),
}

impl RefreshPolicy {
    pub fn weekly() -> Self {
        Self::IntervalSecs(7 * 24 * 60 * 60)
    }

    pub fn is_due(&self, last_success_at: Option<DateTime<Utc>>, now: DateTime<Utc>) -> bool {
        match self {
            RefreshPolicy::Manual => false,
            RefreshPolicy::IntervalSecs(secs) => {
                let Some(last) = last_success_at else {
                    // Never refreshed successfully -> due now.
                    return true;
                };
                now.signed_duration_since(last) >= Duration::seconds(*secs as i64)
            }
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RefreshStatus {
    Never,
    Ok { at: DateTime<Utc> },
    Error { at: DateTime<Utc>, message: String },
}

impl RefreshStatus {
    pub fn last_success_at(&self) -> Option<DateTime<Utc>> {
        match self {
            RefreshStatus::Ok { at } => Some(*at),
            _ => None,
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Docset {
    pub id: DocsetId,
    pub name: String,
    pub start_url: String,
    pub allowed_prefixes: Vec<String>,
    pub refresh: RefreshPolicy,
    pub status: RefreshStatus,
    pub created_at: DateTime<Utc>,
    pub updated_at: DateTime<Utc>,
}

impl Docset {
    pub fn new(
        name: impl Into<String>,
        start_url: impl Into<String>,
        allowed_prefixes: Vec<String>,
    ) -> Self {
        let now = Utc::now();
        Self {
            id: Uuid::new_v4(),
            name: name.into(),
            start_url: start_url.into(),
            allowed_prefixes,
            refresh: RefreshPolicy::weekly(),
            status: RefreshStatus::Never,
            created_at: now,
            updated_at: now,
        }
    }

    pub fn is_due(&self, now: DateTime<Utc>) -> bool {
        self.refresh.is_due(self.status.last_success_at(), now)
    }
}

#[derive(Debug, Clone)]
pub struct DocsetStoreConfig {
    pub file_name: String,
}

impl Default for DocsetStoreConfig {
    fn default() -> Self {
        Self {
            file_name: "docsets.json".to_string(),
        }
    }
}

#[derive(Debug, Clone)]
pub struct DocsetStore {
    path: PathBuf,
}

impl DocsetStore {
    pub fn new(base_dir: impl AsRef<Path>, config: DocsetStoreConfig) -> Self {
        Self {
            path: base_dir.as_ref().join(config.file_name),
        }
    }

    pub fn path(&self) -> &Path {
        &self.path
    }

    pub async fn load(&self) -> Result<Vec<Docset>> {
        if !self.path.exists() {
            return Ok(Vec::new());
        }
        let bytes = tokio::fs::read(&self.path)
            .await
            .map_err(|e| Error::io(format!("Failed to read docset store {:?}: {e}", self.path)))?;
        serde_json::from_slice(&bytes)
            .map_err(|e| Error::parse(format!("Failed to parse docset store {:?}: {e}", self.path)))
    }

    pub async fn save(&self, docsets: &[Docset]) -> Result<()> {
        if let Some(parent) = self.path.parent() {
            tokio::fs::create_dir_all(parent).await.map_err(|e| {
                Error::io(format!(
                    "Failed to create docset store directory {:?}: {e}",
                    parent
                ))
            })?;
        }

        let tmp_path = self.path.with_extension("json.tmp");
        let bytes = serde_json::to_vec_pretty(docsets)
            .map_err(|e| Error::parse(format!("Failed to serialize docsets: {e}")))?;

        tokio::fs::write(&tmp_path, bytes)
            .await
            .map_err(|e| Error::io(format!("Failed to write temp docset store: {e}")))?;
        tokio::fs::rename(&tmp_path, &self.path)
            .await
            .map_err(|e| Error::io(format!("Failed to replace docset store: {e}")))?;
        Ok(())
    }

    pub async fn upsert(&self, mut docset: Docset) -> Result<Docset> {
        let mut docsets = self.load().await?;
        let now = Utc::now();
        docset.updated_at = now;

        if let Some(existing) = docsets.iter_mut().find(|d| d.id == docset.id) {
            *existing = docset.clone();
        } else {
            docsets.push(docset.clone());
        }

        self.save(&docsets).await?;
        Ok(docset)
    }

    pub async fn delete(&self, id: DocsetId) -> Result<bool> {
        let mut docsets = self.load().await?;
        let before = docsets.len();
        docsets.retain(|d| d.id != id);
        let deleted = docsets.len() != before;
        if deleted {
            self.save(&docsets).await?;
        }
        Ok(deleted)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    #[tokio::test]
    async fn store_roundtrip() {
        let dir = tempdir().unwrap();
        let store = DocsetStore::new(dir.path(), DocsetStoreConfig::default());

        let mut docset = Docset::new(
            "React",
            "https://react.dev/reference/",
            vec!["https://react.dev/reference/".to_string()],
        );
        docset.refresh = RefreshPolicy::Manual;

        let saved = store.upsert(docset.clone()).await.unwrap();
        assert_eq!(saved.name, "React");

        let loaded = store.load().await.unwrap();
        assert_eq!(loaded.len(), 1);
        assert_eq!(loaded[0].start_url, "https://react.dev/reference/");

        let deleted = store.delete(saved.id).await.unwrap();
        assert!(deleted);
        assert!(store.load().await.unwrap().is_empty());
    }

    #[test]
    fn refresh_due_logic() {
        let now = Utc::now();
        let weekly = RefreshPolicy::weekly();

        assert!(weekly.is_due(None, now));
        assert!(!weekly.is_due(Some(now), now));

        let eight_days_ago = now - Duration::days(8);
        assert!(weekly.is_due(Some(eight_days_ago), now));
    }
}