Skip to main content

lex_vcs/
intent.rs

1//! First-class `Intent` object linked to operations (#131).
2//!
3//! Today the op log records *what* changed (typed deltas on the
4//! AST). Intent captures *why* — the prompt that caused an agent
5//! to make the change, the model that interpreted it, and the
6//! session that grouped it with sibling ops.
7//!
8//! This matters for two reasons:
9//! 1. **Audit.** When an agent commits a regression, the maintainer
10//!    needs the prompt that led to it. The commit message can be
11//!    made up; the prompt is the actual causal event.
12//! 2. **Coordination.** When multiple agents work in parallel,
13//!    knowing which operations belong to which intent lets the
14//!    harness group them — agent A's work on intent-X is
15//!    independent of agent B's work on intent-Y.
16//!
17//! # Identity
18//!
19//! [`IntentId`] is the SHA-256 of the canonical form of
20//! `(prompt, session_id, model, parent_intent)` — `created_at` is
21//! deliberately *not* part of the hash, so two runs of the same
22//! prompt at different times still dedupe. The
23//! "same `(prompt, model, session)` → same `intent_id`" invariant
24//! is what #131's audit story rests on.
25//!
26//! # Storage
27//!
28//! `<root>/intents/<IntentId>.json` — same shape as `<root>/ops/`
29//! and `<root>/stages/`. Atomic writes via tempfile + rename;
30//! idempotent on existing IDs.
31//!
32//! # Privacy boundary
33//!
34//! Prompts may contain sensitive data. Keeping intents in their
35//! own addressable namespace (rather than inlining the prompt on
36//! every op) makes per-intent ACLs tractable as a follow-up
37//! without touching the op log itself.
38
39use serde::{Deserialize, Serialize};
40use std::fs;
41use std::io::{self, Write};
42use std::path::{Path, PathBuf};
43use std::time::{SystemTime, UNIX_EPOCH};
44
45use crate::canonical;
46
47/// Content-addressed identity of an intent. Lowercase-hex SHA-256
48/// of the canonical form of `(prompt, session_id, model,
49/// parent_intent)`. Excludes `created_at` so two runs of the same
50/// prompt produce the same id.
51pub type IntentId = String;
52
53/// Groups intents from the same agent session. Free-form string
54/// so callers can use whatever session model their harness has.
55pub type SessionId = String;
56
57/// Which model produced the intent. Tracked so audit / blame can
58/// answer "what model wrote this?" without joining against an
59/// external table.
60#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
61pub struct ModelDescriptor {
62    /// Vendor / origin: `"anthropic"`, `"openai"`, `"local"`, etc.
63    pub provider: String,
64    /// The model name: `"claude-opus-4-7"`, `"gpt-5"`, etc.
65    pub name: String,
66    /// Optional version pin. `None` means "whatever the provider
67    /// served"; `Some("2026-04-01")` lets the harness record an
68    /// exact API revision.
69    #[serde(default, skip_serializing_if = "Option::is_none")]
70    pub version: Option<String>,
71}
72
73/// The persisted intent. Carries the prompt that caused some
74/// operations to be produced, the model that interpreted it, and
75/// the session that grouped them. Many ops can share one intent;
76/// duplicating the prompt on each would be wasteful and break the
77/// "two equal ops hash equal" invariant.
78#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79pub struct Intent {
80    pub intent_id: IntentId,
81    pub prompt: String,
82    pub session_id: SessionId,
83    pub model: ModelDescriptor,
84    /// For refinement chains ("the user said X, then said 'now also
85    /// handle Y'"). `None` for top-level intents.
86    #[serde(default, skip_serializing_if = "Option::is_none")]
87    pub parent_intent: Option<IntentId>,
88    /// Wall-clock seconds since epoch when this intent was first
89    /// created. Excluded from `intent_id` so the dedup property
90    /// holds across runs.
91    pub created_at: u64,
92}
93
94impl Intent {
95    /// Build an intent and compute its content-addressed id.
96    /// `created_at` is filled in from the current wall clock; pass
97    /// to [`Intent::with_timestamp`] if you want to control it
98    /// explicitly (e.g. in tests).
99    pub fn new(
100        prompt: impl Into<String>,
101        session_id: impl Into<SessionId>,
102        model: ModelDescriptor,
103        parent_intent: Option<IntentId>,
104    ) -> Self {
105        let now = SystemTime::now()
106            .duration_since(UNIX_EPOCH)
107            .map(|d| d.as_secs())
108            .unwrap_or(0);
109        Self::with_timestamp(prompt, session_id, model, parent_intent, now)
110    }
111
112    /// Build an intent with a caller-controlled `created_at`. Used
113    /// in tests to keep golden hashes stable; production code uses
114    /// [`Intent::new`].
115    pub fn with_timestamp(
116        prompt: impl Into<String>,
117        session_id: impl Into<SessionId>,
118        model: ModelDescriptor,
119        parent_intent: Option<IntentId>,
120        created_at: u64,
121    ) -> Self {
122        let prompt = prompt.into();
123        let session_id = session_id.into();
124        let intent_id = compute_intent_id(&prompt, &session_id, &model, parent_intent.as_deref());
125        Self {
126            intent_id,
127            prompt,
128            session_id,
129            model,
130            parent_intent,
131            created_at,
132        }
133    }
134}
135
136fn compute_intent_id(
137    prompt: &str,
138    session_id: &str,
139    model: &ModelDescriptor,
140    parent_intent: Option<&str>,
141) -> IntentId {
142    let view = CanonicalIntentView {
143        prompt,
144        session_id,
145        model,
146        parent_intent,
147    };
148    canonical::hash(&view)
149}
150
151/// Hashable shadow of [`Intent`] omitting `intent_id` (we're
152/// computing it) and `created_at` (timestamp drift would break
153/// dedup). Lives only as a transient for hashing.
154#[derive(Serialize)]
155struct CanonicalIntentView<'a> {
156    prompt: &'a str,
157    session_id: &'a str,
158    model: &'a ModelDescriptor,
159    #[serde(skip_serializing_if = "Option::is_none")]
160    parent_intent: Option<&'a str>,
161}
162
163// ---- Persistence -------------------------------------------------
164
165/// Persistent log of [`Intent`] records. Mirrors [`crate::OpLog`]'s
166/// shape: one canonical-JSON file per intent, atomic writes via
167/// tempfile + rename, idempotent on re-puts.
168pub struct IntentLog {
169    dir: PathBuf,
170}
171
172impl IntentLog {
173    pub fn open(root: &Path) -> io::Result<Self> {
174        let dir = root.join("intents");
175        fs::create_dir_all(&dir)?;
176        Ok(Self { dir })
177    }
178
179    fn path(&self, id: &IntentId) -> PathBuf {
180        self.dir.join(format!("{id}.json"))
181    }
182
183    /// Persist an intent. Idempotent on existing ids — the bytes
184    /// must match by content addressing, so re-putting the same
185    /// intent is a no-op.
186    pub fn put(&self, intent: &Intent) -> io::Result<()> {
187        let path = self.path(&intent.intent_id);
188        if path.exists() {
189            return Ok(());
190        }
191        let bytes = serde_json::to_vec(intent)
192            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
193        let tmp = path.with_extension("json.tmp");
194        let mut f = fs::File::create(&tmp)?;
195        f.write_all(&bytes)?;
196        f.sync_all()?;
197        fs::rename(&tmp, &path)?;
198        Ok(())
199    }
200
201    pub fn get(&self, id: &IntentId) -> io::Result<Option<Intent>> {
202        let path = self.path(id);
203        if !path.exists() {
204            return Ok(None);
205        }
206        let bytes = fs::read(&path)?;
207        let intent: Intent = serde_json::from_slice(&bytes)
208            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
209        Ok(Some(intent))
210    }
211}
212
213// ---- Tests --------------------------------------------------------
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    fn anthropic() -> ModelDescriptor {
220        ModelDescriptor {
221            provider: "anthropic".into(),
222            name: "claude-opus-4-7".into(),
223            version: None,
224        }
225    }
226
227    #[test]
228    fn same_prompt_session_model_hashes_equal() {
229        // The load-bearing dedup invariant: the same logical
230        // intent (same prompt, same session, same model) should
231        // produce the same `intent_id` regardless of which agent
232        // session re-recorded it. `created_at` differs but is not
233        // in the hash.
234        let a = Intent::with_timestamp(
235            "fix the auth bug", "ses_abc", anthropic(), None, 1000,
236        );
237        let b = Intent::with_timestamp(
238            "fix the auth bug", "ses_abc", anthropic(), None, 99999,
239        );
240        assert_eq!(a.intent_id, b.intent_id);
241        assert_ne!(a.created_at, b.created_at);
242    }
243
244    #[test]
245    fn different_prompts_hash_differently() {
246        let a = Intent::with_timestamp(
247            "fix the auth bug", "ses_abc", anthropic(), None, 0,
248        );
249        let b = Intent::with_timestamp(
250            "fix the cache bug", "ses_abc", anthropic(), None, 0,
251        );
252        assert_ne!(a.intent_id, b.intent_id);
253    }
254
255    #[test]
256    fn different_sessions_hash_differently() {
257        let a = Intent::with_timestamp(
258            "fix the auth bug", "ses_abc", anthropic(), None, 0,
259        );
260        let b = Intent::with_timestamp(
261            "fix the auth bug", "ses_xyz", anthropic(), None, 0,
262        );
263        assert_ne!(a.intent_id, b.intent_id);
264    }
265
266    #[test]
267    fn different_models_hash_differently() {
268        let a = Intent::with_timestamp(
269            "fix the auth bug", "ses_abc", anthropic(), None, 0,
270        );
271        let mut model = anthropic();
272        model.name = "claude-sonnet-4-6".into();
273        let b = Intent::with_timestamp(
274            "fix the auth bug", "ses_abc", model, None, 0,
275        );
276        assert_ne!(a.intent_id, b.intent_id);
277    }
278
279    #[test]
280    fn refinement_chain_distinguishes_parent_intent() {
281        let a = Intent::with_timestamp(
282            "now also handle Y", "ses_abc", anthropic(), None, 0,
283        );
284        let b = Intent::with_timestamp(
285            "now also handle Y", "ses_abc", anthropic(),
286            Some("parent-intent-id".into()), 0,
287        );
288        assert_ne!(
289            a.intent_id, b.intent_id,
290            "an intent with a parent is causally distinct from one without",
291        );
292    }
293
294    #[test]
295    fn intent_id_is_64_char_lowercase_hex() {
296        let i = Intent::with_timestamp(
297            "test", "ses_abc", anthropic(), None, 0,
298        );
299        assert_eq!(i.intent_id.len(), 64);
300        assert!(i.intent_id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
301    }
302
303    #[test]
304    fn round_trip_through_serde_json() {
305        let i = Intent::with_timestamp(
306            "fix the auth bug", "ses_abc", anthropic(),
307            Some("parent".into()), 12345,
308        );
309        let json = serde_json::to_string(&i).unwrap();
310        let back: Intent = serde_json::from_str(&json).unwrap();
311        assert_eq!(i, back);
312    }
313
314    /// Golden hash. If this changes, the canonical form has shifted
315    /// — every `IntentId` in every existing store has changed too.
316    /// That's a major-version event for the data model and should
317    /// be a deliberate decision; update with care. Same protective
318    /// shape as the operation.rs golden test.
319    #[test]
320    fn canonical_form_is_stable_for_a_known_input() {
321        let i = Intent::with_timestamp(
322            "fix the auth bug",
323            "ses_abc",
324            ModelDescriptor {
325                provider: "anthropic".into(),
326                name: "claude-opus-4-7".into(),
327                version: None,
328            },
329            None,
330            0,
331        );
332        assert_eq!(
333            i.intent_id,
334            "5ede62683a249cd00afff49fdf56e8f659fe878a668c8b61e36f5fbc1de7c734",
335        );
336    }
337
338    // ---- IntentLog ----
339
340    #[test]
341    fn intent_log_round_trips_through_disk() {
342        let tmp = tempfile::tempdir().unwrap();
343        let log = IntentLog::open(tmp.path()).unwrap();
344        let i = Intent::with_timestamp(
345            "fix the auth bug", "ses_abc", anthropic(), None, 100,
346        );
347        log.put(&i).unwrap();
348        let read_back = log.get(&i.intent_id).unwrap().unwrap();
349        assert_eq!(i, read_back);
350    }
351
352    #[test]
353    fn intent_log_get_unknown_returns_none() {
354        let tmp = tempfile::tempdir().unwrap();
355        let log = IntentLog::open(tmp.path()).unwrap();
356        assert!(log.get(&"nonexistent".to_string()).unwrap().is_none());
357    }
358
359    #[test]
360    fn intent_log_put_is_idempotent() {
361        let tmp = tempfile::tempdir().unwrap();
362        let log = IntentLog::open(tmp.path()).unwrap();
363        let i = Intent::with_timestamp(
364            "fix the auth bug", "ses_abc", anthropic(), None, 100,
365        );
366        log.put(&i).unwrap();
367        // Second put with the same content is a no-op (the file
368        // already exists; content addressing guarantees the bytes
369        // match).
370        log.put(&i).unwrap();
371        let read_back = log.get(&i.intent_id).unwrap().unwrap();
372        assert_eq!(i, read_back);
373    }
374}