Skip to main content

crabtalk_memory/
memory.rs

1use crate::{
2    bm25::{Index, tokenize},
3    dump,
4    entry::{Entry, EntryId, EntryKind},
5    error::{Error, Result},
6    file,
7    op::Op,
8};
9use std::{
10    collections::HashMap,
11    fs,
12    path::{Path, PathBuf},
13    time::{SystemTime, UNIX_EPOCH},
14};
15
16/// Memory connection. `open(path)` is persistent (auto-flushes every
17/// `apply` via atomic write); `new()` is in-RAM only.
18pub struct Memory {
19    path: Option<PathBuf>,
20    entries: HashMap<EntryId, Entry>,
21    by_name: HashMap<String, EntryId>,
22    index: Index,
23    next_id: EntryId,
24}
25
26#[derive(Clone, Debug)]
27pub struct SearchHit {
28    pub entry: Entry,
29    pub score: f64,
30}
31
32impl Default for Memory {
33    fn default() -> Self {
34        Self::new()
35    }
36}
37
38impl Memory {
39    /// In-RAM memory. Nothing is persisted.
40    pub fn new() -> Self {
41        Self {
42            path: None,
43            entries: HashMap::new(),
44            by_name: HashMap::new(),
45            index: Index::new(),
46            next_id: 1,
47        }
48    }
49
50    /// Open (or create) a memory db at `path`. Reads the file if it
51    /// exists; otherwise the db starts empty and the file is created on
52    /// the first write.
53    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
54        let path = path.as_ref().to_path_buf();
55        let mut mem = Self {
56            path: Some(path.clone()),
57            entries: HashMap::new(),
58            by_name: HashMap::new(),
59            index: Index::new(),
60            next_id: 1,
61        };
62        if let Some(snap) = file::read(&path)? {
63            mem.next_id = snap.next_id;
64            for entry in snap.entries {
65                mem.by_name.insert(entry.name.clone(), entry.id);
66                mem.reindex(&entry);
67                mem.entries.insert(entry.id, entry);
68            }
69        }
70        Ok(mem)
71    }
72
73    /// Apply a write op and persist. RAM is mutated before `flush`, so a
74    /// flush failure leaves RAM ahead of disk until the next successful
75    /// op (or the next `open`, which re-reads the file). WAL will close
76    /// this window in v2.
77    pub fn apply(&mut self, op: Op) -> Result<()> {
78        match op {
79            Op::Add {
80                name,
81                content,
82                aliases,
83                kind,
84            } => self.add(name, content, aliases, kind)?,
85            Op::Update {
86                name,
87                content,
88                aliases,
89            } => self.update(&name, content, aliases)?,
90            Op::Alias { name, aliases } => self.set_aliases(&name, aliases)?,
91            Op::Remove { name } => self.remove(&name)?,
92        }
93        self.flush()
94    }
95
96    pub fn get(&self, name: &str) -> Option<&Entry> {
97        self.by_name.get(name).and_then(|id| self.entries.get(id))
98    }
99
100    pub fn list(&self) -> impl Iterator<Item = &Entry> {
101        self.entries.values()
102    }
103
104    pub fn search(&self, query: &str, limit: usize) -> Vec<SearchHit> {
105        self.index
106            .search(query, limit)
107            .into_iter()
108            .filter_map(|(id, score)| {
109                self.entries.get(&id).map(|e| SearchHit {
110                    entry: e.clone(),
111                    score,
112                })
113            })
114            .collect()
115    }
116
117    /// BM25 search restricted to a single `EntryKind`. The inner search
118    /// runs unbounded so the kind filter can't truncate matches mid-list;
119    /// we clone only the survivors that fit inside `limit`.
120    pub fn search_kind(&self, query: &str, limit: usize, kind: EntryKind) -> Vec<SearchHit> {
121        if limit == 0 {
122            return Vec::new();
123        }
124        self.index
125            .search(query, usize::MAX)
126            .into_iter()
127            .filter_map(|(id, score)| {
128                let entry = self.entries.get(&id)?;
129                if entry.kind != kind {
130                    return None;
131                }
132                Some(SearchHit {
133                    entry: entry.clone(),
134                    score,
135                })
136            })
137            .take(limit)
138            .collect()
139    }
140
141    fn add(
142        &mut self,
143        name: String,
144        content: String,
145        aliases: Vec<String>,
146        kind: EntryKind,
147    ) -> Result<()> {
148        if self.by_name.contains_key(&name) {
149            return Err(Error::Duplicate(name));
150        }
151        let id = self.next_id;
152        self.next_id += 1;
153        let entry = Entry {
154            id,
155            name: name.clone(),
156            content,
157            aliases,
158            created_at: now_unix(),
159            kind,
160        };
161        self.reindex(&entry);
162        self.by_name.insert(name, id);
163        self.entries.insert(id, entry);
164        Ok(())
165    }
166
167    fn update(&mut self, name: &str, content: String, aliases: Vec<String>) -> Result<()> {
168        let id = *self
169            .by_name
170            .get(name)
171            .ok_or_else(|| Error::NotFound(name.to_owned()))?;
172        let entry = self.entries.get_mut(&id).expect("entry id out of sync");
173        entry.content = content;
174        entry.aliases = aliases;
175        let snapshot = entry.clone();
176        self.reindex(&snapshot);
177        Ok(())
178    }
179
180    fn set_aliases(&mut self, name: &str, aliases: Vec<String>) -> Result<()> {
181        let id = *self
182            .by_name
183            .get(name)
184            .ok_or_else(|| Error::NotFound(name.to_owned()))?;
185        let entry = self.entries.get_mut(&id).expect("entry id out of sync");
186        entry.aliases = aliases;
187        let snapshot = entry.clone();
188        self.reindex(&snapshot);
189        Ok(())
190    }
191
192    fn remove(&mut self, name: &str) -> Result<()> {
193        let id = self
194            .by_name
195            .remove(name)
196            .ok_or_else(|| Error::NotFound(name.to_owned()))?;
197        self.entries.remove(&id);
198        self.index.remove(id);
199        Ok(())
200    }
201
202    fn reindex(&mut self, entry: &Entry) {
203        let mut terms = tokenize(&entry.content);
204        for alias in &entry.aliases {
205            terms.extend(tokenize(alias));
206        }
207        self.index.insert(entry.id, &terms);
208    }
209
210    fn flush(&self) -> Result<()> {
211        let Some(path) = &self.path else {
212            return Ok(());
213        };
214        let mut entries: Vec<&Entry> = self.entries.values().collect();
215        entries.sort_by_key(|e| e.id);
216        file::write(path, self.next_id, &entries)
217    }
218
219    /// Force a write of the current state to disk, whether or not any
220    /// mutation has happened. Useful for one-shot migration paths that
221    /// need the db file to exist even when every incoming op failed.
222    /// A no-op when the memory is in-RAM only (no path).
223    pub fn checkpoint(&self) -> Result<()> {
224        self.flush()
225    }
226
227    /// Materialize the db as a markdown tree at `dir`. Each kind's
228    /// subdirectory is cleared before writing so renames and deletes
229    /// don't leave orphan files behind. Anything else in `dir` (e.g. a
230    /// user's `book.toml`) is left alone.
231    pub fn dump(&self, dir: impl AsRef<Path>) -> Result<()> {
232        let dir = dir.as_ref();
233        let mut by_kind: HashMap<EntryKind, Vec<&Entry>> = HashMap::new();
234        for e in self.entries.values() {
235            dump::validate_name(&e.name)?;
236            by_kind.entry(e.kind).or_default().push(e);
237        }
238
239        fs::create_dir_all(dir)?;
240        for (kind, subdir, _) in dump::KIND_SECTIONS {
241            let path = dir.join(subdir);
242            if path.exists() {
243                fs::remove_dir_all(&path)?;
244            }
245            if by_kind.get(kind).is_some_and(|v| !v.is_empty()) {
246                fs::create_dir_all(&path)?;
247                for e in &by_kind[kind] {
248                    fs::write(
249                        path.join(format!("{}.md", e.name)),
250                        dump::serialize_entry(e),
251                    )?;
252                }
253            }
254        }
255
256        fs::write(dir.join("SUMMARY.md"), dump::build_summary(&by_kind))?;
257        // Seed book.toml so the tree is `mdbook serve`-ready. Only
258        // written when absent — any user edits survive re-dumps.
259        let book_toml = dir.join("book.toml");
260        if !book_toml.exists() {
261            fs::write(&book_toml, dump::BOOK_TOML)?;
262        }
263        Ok(())
264    }
265
266    /// Replace the db's contents with entries read from a markdown tree
267    /// at `dir`. Validates fully before mutating — a mid-load error
268    /// leaves the current state untouched.
269    pub fn load(&mut self, dir: impl AsRef<Path>) -> Result<()> {
270        let dir = dir.as_ref();
271        let loaded = dump::read_tree(dir)?;
272
273        let mut entries: HashMap<EntryId, Entry> = HashMap::with_capacity(loaded.len());
274        let mut by_name: HashMap<String, EntryId> = HashMap::with_capacity(loaded.len());
275        let mut index = Index::new();
276        let mut next_id: EntryId = 1;
277
278        for item in loaded {
279            if by_name.contains_key(&item.name) {
280                return Err(Error::Duplicate(item.name));
281            }
282            let id = next_id;
283            next_id += 1;
284            let entry = Entry {
285                id,
286                name: item.name.clone(),
287                content: item.content,
288                aliases: item.aliases,
289                created_at: item.created_at.unwrap_or_else(now_unix),
290                kind: item.kind,
291            };
292            let mut terms = tokenize(&entry.content);
293            for alias in &entry.aliases {
294                terms.extend(tokenize(alias));
295            }
296            index.insert(id, &terms);
297            by_name.insert(item.name, id);
298            entries.insert(id, entry);
299        }
300
301        self.entries = entries;
302        self.by_name = by_name;
303        self.index = index;
304        self.next_id = next_id;
305        self.flush()
306    }
307}
308
309fn now_unix() -> u64 {
310    SystemTime::now()
311        .duration_since(UNIX_EPOCH)
312        .map(|d| d.as_secs())
313        .unwrap_or(0)
314}