Skip to main content

trident/package/store/
mod.rs

1//! Definitions store — hash-keyed definitions storage.
2//!
3//! Inspired by Unison: every function definition is stored by its content hash.
4//! Names are metadata pointing to hashes. This allows instant rename, perfect
5//! caching, and semantic deduplication.
6//!
7//! Persistence layout:
8//! ```text
9//! ~/.trident/codebase/
10//!   defs/
11//!     <2-char-prefix>/
12//!       <full-hex-hash>.def
13//!   names.txt
14//!   history.txt
15//! ```
16
17use std::collections::BTreeMap;
18use std::path::{Path, PathBuf};
19
20use crate::ast::{self, Item};
21use crate::hash::{self, ContentHash};
22
23// ─── Data Structures ───────────────────────────────────────────────
24
25/// The codebase database.
26///
27/// Stores function definitions by content hash, with name mappings.
28/// Persisted to disk at `~/.trident/codebase/` (or `$TRIDENT_CODEBASE_DIR`).
29pub struct Codebase {
30    /// Hash -> definition source code.
31    pub(super) definitions: BTreeMap<ContentHash, Definition>,
32    /// Name -> hash mapping (current bindings).
33    pub(super) names: BTreeMap<String, ContentHash>,
34    /// Hash -> list of names that have pointed to it (history).
35    pub(super) name_history: BTreeMap<ContentHash, Vec<NameEntry>>,
36    /// Root directory for persistence.
37    pub(super) root: PathBuf,
38}
39
40/// A stored function definition.
41#[derive(Clone)]
42pub struct Definition {
43    /// The source code of the function (formatted).
44    pub source: String,
45    /// Module where this was last seen.
46    pub module: String,
47    /// Is it public?
48    pub is_pub: bool,
49    /// Parameters (name, type) pairs as strings.
50    pub params: Vec<(String, String)>,
51    /// Return type (as string), None for void.
52    pub return_ty: Option<String>,
53    /// Dependencies: hashes of functions called by this one.
54    pub dependencies: Vec<ContentHash>,
55    /// Spec annotations: preconditions.
56    pub requires: Vec<String>,
57    /// Spec annotations: postconditions.
58    pub ensures: Vec<String>,
59    /// When this was first stored (Unix timestamp).
60    pub first_seen: u64,
61}
62
63/// A name binding entry in history.
64pub struct NameEntry {
65    pub name: String,
66    pub timestamp: u64,
67}
68
69/// Result of adding a file to the codebase.
70pub struct AddResult {
71    /// New definitions stored.
72    pub added: usize,
73    /// Names rebound to new hashes.
74    pub updated: usize,
75    /// Already at same hash.
76    pub unchanged: usize,
77}
78
79/// Codebase statistics.
80pub struct CodebaseStats {
81    /// Number of unique definitions.
82    pub definitions: usize,
83    /// Number of name bindings.
84    pub names: usize,
85    /// Total source bytes across all definitions.
86    pub total_source_bytes: usize,
87}
88
89mod deps;
90mod format;
91mod persist;
92
93use deps::extract_dependencies;
94use format::{format_fn_source, format_type};
95use persist::{atomic_write, codebase_dir, serialize_definition, unix_timestamp};
96
97#[cfg(test)]
98mod tests;
99
100// ─── Codebase Implementation ───────────────────────────────────────
101
102impl Codebase {
103    /// Open or create a codebase at the default location.
104    ///
105    /// Uses `$TRIDENT_CODEBASE_DIR` if set, otherwise `~/.trident/codebase/`.
106    pub fn open() -> std::io::Result<Self> {
107        let root = codebase_dir().ok_or_else(|| {
108            std::io::Error::new(
109                std::io::ErrorKind::NotFound,
110                "cannot determine codebase directory (no $HOME)",
111            )
112        })?;
113        Self::open_at(&root)
114    }
115
116    /// Open or create a codebase at a specific directory.
117    pub fn open_at(root: &Path) -> std::io::Result<Self> {
118        std::fs::create_dir_all(root)?;
119        std::fs::create_dir_all(root.join("defs"))?;
120
121        let mut cb = Codebase {
122            definitions: BTreeMap::new(),
123            names: BTreeMap::new(),
124            name_history: BTreeMap::new(),
125            root: root.to_path_buf(),
126        };
127
128        cb.load()?;
129        Ok(cb)
130    }
131
132    /// Add a parsed file to the codebase: hash all functions, store definitions.
133    pub fn add_file(&mut self, file: &ast::File) -> AddResult {
134        let fn_hashes = hash::hash_file(file);
135        let module = file.name.node.clone();
136        let now = unix_timestamp();
137
138        let mut added = 0usize;
139        let mut updated = 0usize;
140        let mut unchanged = 0usize;
141
142        for item in &file.items {
143            if let Item::Fn(func) = &item.node {
144                let name = func.name.node.clone();
145                let Some(hash) = fn_hashes.get(&name).copied() else {
146                    continue;
147                };
148
149                // Check if this name already points to this hash.
150                if let Some(existing) = self.names.get(&name) {
151                    if *existing == hash {
152                        unchanged += 1;
153                        continue;
154                    }
155                    // Name rebound to a new hash.
156                    updated += 1;
157                } else {
158                    added += 1;
159                }
160
161                // Extract dependencies.
162                let deps = extract_dependencies(func, &fn_hashes);
163
164                // Build the Definition.
165                let def = Definition {
166                    source: format_fn_source(func),
167                    module: module.clone(),
168                    is_pub: func.is_pub,
169                    params: func
170                        .params
171                        .iter()
172                        .map(|p| (p.name.node.clone(), format_type(&p.ty.node)))
173                        .collect(),
174                    return_ty: func.return_ty.as_ref().map(|t| format_type(&t.node)),
175                    dependencies: deps,
176                    requires: func.requires.iter().map(|s| s.node.clone()).collect(),
177                    ensures: func.ensures.iter().map(|s| s.node.clone()).collect(),
178                    first_seen: self
179                        .definitions
180                        .get(&hash)
181                        .map(|d| d.first_seen)
182                        .unwrap_or(now),
183                };
184
185                self.definitions.insert(hash, def);
186
187                // Record history entry.
188                let entry = NameEntry {
189                    name: name.clone(),
190                    timestamp: now,
191                };
192                self.name_history.entry(hash).or_default().push(entry);
193
194                // Update current name binding.
195                self.names.insert(name, hash);
196            }
197        }
198
199        AddResult {
200            added,
201            updated,
202            unchanged,
203        }
204    }
205
206    /// Look up a definition by name.
207    pub fn lookup(&self, name: &str) -> Option<&Definition> {
208        let hash = self.names.get(name)?;
209        self.definitions.get(hash)
210    }
211
212    /// Get the content hash for a name.
213    pub fn hash_for_name(&self, name: &str) -> Option<&ContentHash> {
214        self.names.get(name)
215    }
216
217    /// Look up a definition by hash.
218    pub fn lookup_hash(&self, hash: &ContentHash) -> Option<&Definition> {
219        self.definitions.get(hash)
220    }
221
222    /// List all names in the codebase, sorted alphabetically.
223    pub fn list_names(&self) -> Vec<(&str, &ContentHash)> {
224        let mut list: Vec<(&str, &ContentHash)> =
225            self.names.iter().map(|(n, h)| (n.as_str(), h)).collect();
226        list.sort_by_key(|(name, _)| *name);
227        list
228    }
229
230    /// Rename: rebind `new_name` to the hash currently bound to `old_name`,
231    /// and remove the `old_name` binding.
232    pub fn rename(&mut self, old_name: &str, new_name: &str) -> Result<(), String> {
233        let hash = self
234            .names
235            .get(old_name)
236            .copied()
237            .ok_or_else(|| format!("name '{}' not found", old_name))?;
238        if self.names.contains_key(new_name) {
239            return Err(format!("name '{}' already exists", new_name));
240        }
241        self.names.remove(old_name);
242        self.names.insert(new_name.to_string(), hash);
243
244        // Record history for the new name.
245        let entry = NameEntry {
246            name: new_name.to_string(),
247            timestamp: unix_timestamp(),
248        };
249        self.name_history.entry(hash).or_default().push(entry);
250
251        Ok(())
252    }
253
254    /// Alias: add an additional name pointing to the same hash as `name`.
255    pub fn alias(&mut self, name: &str, alias: &str) -> Result<(), String> {
256        let hash = self
257            .names
258            .get(name)
259            .copied()
260            .ok_or_else(|| format!("name '{}' not found", name))?;
261        if self.names.contains_key(alias) {
262            return Err(format!("name '{}' already exists", alias));
263        }
264        self.names.insert(alias.to_string(), hash);
265
266        let entry = NameEntry {
267            name: alias.to_string(),
268            timestamp: unix_timestamp(),
269        };
270        self.name_history.entry(hash).or_default().push(entry);
271
272        Ok(())
273    }
274
275    /// Get history of a name: all hashes it has pointed to, with timestamps.
276    pub fn name_history(&self, name: &str) -> Vec<(ContentHash, u64)> {
277        let mut result = Vec::new();
278        for (hash, entries) in &self.name_history {
279            for entry in entries {
280                if entry.name == name {
281                    result.push((*hash, entry.timestamp));
282                }
283            }
284        }
285        result.sort_by_key(|(_, ts)| *ts);
286        result
287    }
288
289    /// Get all names that currently point to a given hash.
290    pub fn names_for_hash(&self, hash: &ContentHash) -> Vec<&str> {
291        let mut names: Vec<&str> = self
292            .names
293            .iter()
294            .filter(|(_, h)| *h == hash)
295            .map(|(n, _)| n.as_str())
296            .collect();
297        names.sort();
298        names
299    }
300
301    /// Get dependencies of a definition: (name, hash) pairs for each called function.
302    pub fn dependencies(&self, hash: &ContentHash) -> Vec<(&str, &ContentHash)> {
303        let def = match self.definitions.get(hash) {
304            Some(d) => d,
305            None => return Vec::new(),
306        };
307        let mut result = Vec::new();
308        for dep_hash in &def.dependencies {
309            // Find a name for this dependency hash.
310            let name = self
311                .names
312                .iter()
313                .find(|(_, h)| *h == dep_hash)
314                .map(|(n, _)| n.as_str())
315                .unwrap_or("<unnamed>");
316            result.push((name, dep_hash));
317        }
318        result
319    }
320
321    /// Get reverse dependencies: definitions that depend on a given hash.
322    pub fn dependents(&self, hash: &ContentHash) -> Vec<(&str, &ContentHash)> {
323        let mut result = Vec::new();
324        for (def_hash, def) in &self.definitions {
325            if def.dependencies.contains(hash) {
326                let name = self
327                    .names
328                    .iter()
329                    .find(|(_, h)| *h == def_hash)
330                    .map(|(n, _)| n.as_str())
331                    .unwrap_or("<unnamed>");
332                result.push((name, def_hash));
333            }
334        }
335        result.sort_by_key(|(name, _)| *name);
336        result
337    }
338
339    /// Codebase statistics.
340    pub fn stats(&self) -> CodebaseStats {
341        let total_source_bytes = self.definitions.values().map(|d| d.source.len()).sum();
342        CodebaseStats {
343            definitions: self.definitions.len(),
344            names: self.names.len(),
345            total_source_bytes,
346        }
347    }
348
349    /// Save the codebase to disk.
350    pub fn save(&self) -> std::io::Result<()> {
351        // Write definitions.
352        let defs_dir = self.root.join("defs");
353        std::fs::create_dir_all(&defs_dir)?;
354
355        for (hash, def) in &self.definitions {
356            let hex = hash.to_hex();
357            let prefix = &hex[..2];
358            let prefix_dir = defs_dir.join(prefix);
359            std::fs::create_dir_all(&prefix_dir)?;
360
361            let def_path = prefix_dir.join(format!("{}.def", hex));
362            let content = serialize_definition(def);
363            atomic_write(&def_path, &content)?;
364        }
365
366        // Write names.txt
367        let names_path = self.root.join("names.txt");
368        let mut names_content = String::new();
369        let mut sorted_names: Vec<_> = self.names.iter().collect();
370        sorted_names.sort_by_key(|(n, _)| (*n).clone());
371        for (name, hash) in sorted_names {
372            names_content.push_str(name);
373            names_content.push('=');
374            names_content.push_str(&hash.to_hex());
375            names_content.push('\n');
376        }
377        atomic_write(&names_path, &names_content)?;
378
379        // Write history.txt
380        let history_path = self.root.join("history.txt");
381        let mut history_content = String::new();
382        let mut all_entries: Vec<(&ContentHash, &NameEntry)> = Vec::new();
383        for (hash, entries) in &self.name_history {
384            for entry in entries {
385                all_entries.push((hash, entry));
386            }
387        }
388        all_entries.sort_by_key(|(_, e)| e.timestamp);
389        for (hash, entry) in all_entries {
390            history_content.push_str(&entry.name);
391            history_content.push(' ');
392            history_content.push_str(&hash.to_hex());
393            history_content.push(' ');
394            history_content.push_str(&entry.timestamp.to_string());
395            history_content.push('\n');
396        }
397        atomic_write(&history_path, &history_content)?;
398
399        Ok(())
400    }
401
402    /// Store a definition directly by hash (used by registry publish).
403    pub fn store_definition(&mut self, hash: ContentHash, def: Definition) {
404        self.definitions.insert(hash, def);
405    }
406
407    /// Bind a name to a hash directly (used by registry pull).
408    pub fn bind_name(&mut self, name: &str, hash: ContentHash) {
409        self.names.insert(name.to_string(), hash);
410        let entry = NameEntry {
411            name: name.to_string(),
412            timestamp: unix_timestamp(),
413        };
414        self.name_history.entry(hash).or_default().push(entry);
415    }
416
417    /// Pretty-print a definition by name.
418    pub fn view(&self, name: &str) -> Option<String> {
419        let hash = self.names.get(name)?;
420        let def = self.definitions.get(hash)?;
421        let mut out = String::new();
422
423        // Header
424        out.push_str(&format!("-- {} {}\n", name, hash));
425
426        // Spec annotations
427        for req in &def.requires {
428            out.push_str(&format!("#[requires({})]\n", req));
429        }
430        for ens in &def.ensures {
431            out.push_str(&format!("#[ensures({})]\n", ens));
432        }
433
434        // Source
435        out.push_str(&def.source);
436        if !out.ends_with('\n') {
437            out.push('\n');
438        }
439
440        // Dependencies
441        if !def.dependencies.is_empty() {
442            out.push_str("\n-- Dependencies:\n");
443            for dep_hash in &def.dependencies {
444                let dep_name = self
445                    .names
446                    .iter()
447                    .find(|(_, h)| *h == dep_hash)
448                    .map(|(n, _)| n.as_str())
449                    .unwrap_or("<unnamed>");
450                out.push_str(&format!("--   {} {}\n", dep_name, dep_hash));
451            }
452        }
453
454        Some(out)
455    }
456
457    /// Look up a definition by hash prefix (short hex or full hex).
458    pub fn lookup_by_prefix(&self, prefix: &str) -> Option<(&ContentHash, &Definition)> {
459        // Strip leading '#' if present.
460        let prefix = prefix.strip_prefix('#').unwrap_or(prefix);
461        for (hash, def) in &self.definitions {
462            let hex = hash.to_hex();
463            if hex.starts_with(prefix) {
464                return Some((hash, def));
465            }
466            let short = hash.to_short();
467            if short.starts_with(prefix) || short == prefix {
468                return Some((hash, def));
469            }
470        }
471        None
472    }
473}