Skip to main content

llm_diff/
store.rs

1// SPDX-License-Identifier: MIT
2//! Content-addressable version store for LLM outputs.
3
4use std::collections::{HashMap, HashSet};
5use serde::{Deserialize, Serialize};
6use uuid::Uuid;
7use chrono::{DateTime, Utc};
8use crate::diff::TextDiff;
9use crate::error::DiffError;
10
11/// Computes an FNV-1a content address for deduplication.
12fn content_address(content: &str) -> String {
13    let mut hash: u64 = 0xcbf29ce484222325;
14    for byte in content.bytes() {
15        hash ^= byte as u64;
16        hash = hash.wrapping_mul(0x100000001b3);
17    }
18    format!("{hash:016x}")
19}
20
21/// Metadata describing why a new version was created.
22#[derive(Debug, Clone, Serialize, Deserialize, Default)]
23pub struct VersionAnnotation {
24    /// Whether the prompt changed relative to the parent.
25    pub prompt_changed: bool,
26    /// Whether the model changed relative to the parent.
27    pub model_changed: bool,
28    /// Whether the temperature changed relative to the parent.
29    pub temperature_changed: bool,
30    /// Free-form human note.
31    pub note: Option<String>,
32}
33
34
35/// A stored version of an LLM output.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct OutputVersion {
38    /// Unique UUID for this version.
39    pub id: String,
40    /// FNV-1a hash of the content, used for deduplication.
41    pub content_address: String,
42    /// The raw text content.
43    pub content: String,
44    /// Model that produced this output.
45    pub model: String,
46    /// UTC timestamp of creation.
47    pub created_at: DateTime<Utc>,
48    /// Why this version was created.
49    pub annotation: VersionAnnotation,
50    /// Optional parent version ID for lineage tracking.
51    pub parent_id: Option<String>,
52}
53
54impl OutputVersion {
55    /// Creates a new version, computing the content address automatically.
56    pub fn new(
57        content: impl Into<String>,
58        model: impl Into<String>,
59        annotation: VersionAnnotation,
60        parent_id: Option<String>,
61    ) -> Self {
62        let content = content.into();
63        let addr = content_address(&content);
64        Self {
65            id: Uuid::new_v4().to_string(),
66            content_address: addr,
67            content,
68            model: model.into(),
69            created_at: Utc::now(),
70            annotation,
71            parent_id,
72        }
73    }
74}
75
76/// Content-addressable version store with branch and lineage support.
77pub struct VersionStore {
78    versions: HashMap<String, OutputVersion>,
79    by_address: HashMap<String, String>,
80    branches: HashMap<String, String>,
81    max_output_tokens: usize,
82}
83
84impl VersionStore {
85    /// Creates a new store with the given maximum output token limit per version.
86    pub fn new(max_output_tokens: usize) -> Self {
87        Self {
88            versions: HashMap::new(),
89            by_address: HashMap::new(),
90            branches: HashMap::new(),
91            max_output_tokens,
92        }
93    }
94
95    /// Stores a version, returning its ID.
96    ///
97    /// # Errors
98    /// Returns [`DiffError::OutputTooLarge`] if the content exceeds the token limit.
99    pub fn store(&mut self, version: OutputVersion) -> Result<String, DiffError> {
100        let token_estimate = version.content.len() / 4;
101        if token_estimate > self.max_output_tokens {
102            return Err(DiffError::OutputTooLarge {
103                size: token_estimate,
104                limit: self.max_output_tokens,
105            });
106        }
107        let id = version.id.clone();
108        self.by_address.insert(version.content_address.clone(), id.clone());
109        self.versions.insert(id.clone(), version);
110        Ok(id)
111    }
112
113    /// Retrieves a version by ID.
114    ///
115    /// # Errors
116    /// Returns [`DiffError::VersionNotFound`] if no version with that ID exists.
117    pub fn get(&self, id: &str) -> Result<&OutputVersion, DiffError> {
118        self.versions.get(id).ok_or_else(|| DiffError::VersionNotFound(id.to_string()))
119    }
120
121    /// Retrieves a version by its content address, if present.
122    pub fn get_by_address(&self, addr: &str) -> Option<&OutputVersion> {
123        self.by_address.get(addr).and_then(|id| self.versions.get(id))
124    }
125
126    /// Points a named branch at a version.
127    ///
128    /// # Errors
129    /// Returns [`DiffError::VersionNotFound`] if the version ID does not exist.
130    pub fn set_branch(&mut self, branch: impl Into<String>, version_id: impl Into<String>) -> Result<(), DiffError> {
131        let vid = version_id.into();
132        if !self.versions.contains_key(&vid) {
133            return Err(DiffError::VersionNotFound(vid));
134        }
135        self.branches.insert(branch.into(), vid);
136        Ok(())
137    }
138
139    /// Returns the head version of a named branch.
140    ///
141    /// # Errors
142    /// - [`DiffError::BranchNotFound`] if the branch does not exist.
143    /// - [`DiffError::VersionNotFound`] if the branch head ID is stale.
144    pub fn branch_head(&self, branch: &str) -> Result<&OutputVersion, DiffError> {
145        let id = self.branches.get(branch)
146            .ok_or_else(|| DiffError::BranchNotFound(branch.to_string()))?;
147        self.get(id)
148    }
149
150    /// Computes a text diff between two stored versions.
151    ///
152    /// # Errors
153    /// Returns [`DiffError::VersionNotFound`] if either ID is missing.
154    pub fn diff_versions(&self, from_id: &str, to_id: &str) -> Result<TextDiff, DiffError> {
155        let from = self.get(from_id)?;
156        let to = self.get(to_id)?;
157        Ok(TextDiff::compute(&from.content, &to.content))
158    }
159
160    /// Returns the parent version of the given version, or `None` if it is a root.
161    ///
162    /// # Errors
163    /// Returns [`DiffError::VersionNotFound`] if the version ID or its parent ID is missing.
164    pub fn rollback(&self, version_id: &str) -> Result<Option<&OutputVersion>, DiffError> {
165        let v = self.get(version_id)?;
166        match &v.parent_id {
167            Some(pid) => Ok(Some(self.get(pid)?)),
168            None => Ok(None),
169        }
170    }
171
172    /// Returns the full ancestor chain starting from `version_id`, oldest last.
173    ///
174    /// # Errors
175    /// Returns [`DiffError::VersionNotFound`] if any version in the chain is missing.
176    pub fn lineage(&self, version_id: &str) -> Result<Vec<&OutputVersion>, DiffError> {
177        let mut chain = Vec::new();
178        let mut current_id = version_id.to_string();
179        let mut visited = HashSet::new();
180        loop {
181            if visited.contains(&current_id) {
182                break;
183            }
184            visited.insert(current_id.clone());
185            let v = self.get(&current_id)?;
186            chain.push(v);
187            match &v.parent_id {
188                Some(pid) => current_id = pid.clone(),
189                None => break,
190            }
191        }
192        Ok(chain)
193    }
194
195    /// Returns the total number of stored versions.
196    pub fn version_count(&self) -> usize { self.versions.len() }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    fn v(content: &str, parent: Option<String>) -> OutputVersion {
204        OutputVersion::new(content, "claude-sonnet-4-6", VersionAnnotation::default(), parent)
205    }
206
207    #[test]
208    fn test_store_and_retrieve_version_by_id() {
209        let mut store = VersionStore::new(100_000);
210        let ver = v("Hello world", None);
211        let id = store.store(ver).unwrap();
212        assert!(store.get(&id).is_ok());
213    }
214
215    #[test]
216    fn test_store_get_nonexistent_id_returns_version_not_found() {
217        let store = VersionStore::new(100_000);
218        let err = store.get("nonexistent").unwrap_err();
219        assert!(matches!(err, DiffError::VersionNotFound(_)));
220    }
221
222    #[test]
223    fn test_store_output_too_large_returns_error() {
224        let mut store = VersionStore::new(1);
225        let large = "a ".repeat(1000);
226        let err = store.store(v(&large, None)).unwrap_err();
227        assert!(matches!(err, DiffError::OutputTooLarge { .. }));
228    }
229
230    #[test]
231    fn test_store_content_address_dedup_maps_same_content() {
232        let mut store = VersionStore::new(100_000);
233        let ver1 = v("same content", None);
234        let addr = ver1.content_address.clone();
235        store.store(ver1).unwrap();
236        let ver2 = v("same content", None);
237        store.store(ver2).unwrap();
238        assert!(store.get_by_address(&addr).is_some());
239    }
240
241    #[test]
242    fn test_store_set_branch_ok() {
243        let mut store = VersionStore::new(100_000);
244        let id = store.store(v("content", None)).unwrap();
245        assert!(store.set_branch("main", id).is_ok());
246    }
247
248    #[test]
249    fn test_store_branch_not_found_returns_error() {
250        let store = VersionStore::new(100_000);
251        let err = store.branch_head("nonexistent").unwrap_err();
252        assert!(matches!(err, DiffError::BranchNotFound(_)));
253    }
254
255    #[test]
256    fn test_store_set_branch_with_invalid_version_returns_version_not_found() {
257        let mut store = VersionStore::new(100_000);
258        let err = store.set_branch("main", "bad-id").unwrap_err();
259        assert!(matches!(err, DiffError::VersionNotFound(_)));
260    }
261
262    #[test]
263    fn test_store_diff_versions_identical_content_is_identical() {
264        let mut store = VersionStore::new(100_000);
265        let id1 = store.store(v("same text", None)).unwrap();
266        let id2 = store.store(v("same text", None)).unwrap();
267        let diff = store.diff_versions(&id1, &id2).unwrap();
268        assert!(diff.is_identical());
269    }
270
271    #[test]
272    fn test_store_rollback_returns_parent_version() {
273        let mut store = VersionStore::new(100_000);
274        let parent_id = store.store(v("version 1", None)).unwrap();
275        let child = v("version 2", Some(parent_id.clone()));
276        let child_id = store.store(child).unwrap();
277        let parent = store.rollback(&child_id).unwrap().unwrap();
278        assert_eq!(parent.id, parent_id);
279    }
280
281    #[test]
282    fn test_store_rollback_root_returns_none() {
283        let mut store = VersionStore::new(100_000);
284        let id = store.store(v("root version", None)).unwrap();
285        let result = store.rollback(&id).unwrap();
286        assert!(result.is_none());
287    }
288
289    #[test]
290    fn test_store_lineage_three_generations_length() {
291        let mut store = VersionStore::new(100_000);
292        let id1 = store.store(v("v1", None)).unwrap();
293        let id2 = store.store(v("v2", Some(id1.clone()))).unwrap();
294        let id3 = store.store(v("v3", Some(id2.clone()))).unwrap();
295        let lineage = store.lineage(&id3).unwrap();
296        assert_eq!(lineage.len(), 3);
297    }
298
299    #[test]
300    fn test_store_version_count_increments() {
301        let mut store = VersionStore::new(100_000);
302        assert_eq!(store.version_count(), 0);
303        store.store(v("a", None)).unwrap();
304        assert_eq!(store.version_count(), 1);
305    }
306
307    #[test]
308    fn test_store_branch_head_returns_correct_version() {
309        let mut store = VersionStore::new(100_000);
310        let id = store.store(v("content", None)).unwrap();
311        store.set_branch("main", id.clone()).unwrap();
312        let head = store.branch_head("main").unwrap();
313        assert_eq!(head.id, id);
314    }
315}