git_internal/internal/object/context.rs
1//! AI Context Snapshot Definition
2//!
3//! A [`ContextSnapshot`] is an optional static capture of the codebase
4//! and external resources that an agent observed when a
5//! [`Run`](super::run::Run) began. Unlike the incremental
6//! [`ContextFrame`](super::context_frame::ContextFrame) event stream,
7//! a ContextSnapshot is a **point-in-time** record that does not
8//! change after creation.
9//!
10//! # How Libra should use this object
11//!
12//! - Create a `ContextSnapshot` only when a stable, reproducible
13//! baseline is worth preserving for a run.
14//! - Populate its items completely before persistence.
15//! - Keep the live, moving context window in Libra and express
16//! incremental changes through `ContextFrame`.
17//!
18//! # Position in Lifecycle
19//!
20//! ```text
21//! ② Intent (Active)
22//! │
23//! └─ ③ Plan references ContextFrame IDs used for planning
24//! │
25//! │ incremental ContextFrame events may continue later
26//! ▼
27//! ⑤ Run created
28//! │
29//! └─ context snapshot captured ──▶ ContextSnapshot (optional, static)
30//! │
31//! ▼
32//! Reproducible execution baseline
33//! ```
34//!
35//! A ContextSnapshot is created at step ⑤ when the Run is initialized.
36//! It complements incremental ContextFrame events: the snapshot captures the
37//! **initial** state (what files, URLs, snippets the agent sees at
38//! start), while ContextFrame events record **incremental** context
39//! changes during execution. Libra may additionally maintain a live
40//! runtime context window as a projection over those immutable frames.
41//!
42//! # Items
43//!
44//! Each [`ContextItem`] has three layers:
45//!
46//! - **`path`** — human-readable locator (repo path, URL, command,
47//! label).
48//! - **`blob`** — Git blob hash pointing to the **full content** at
49//! capture time.
50//! - **`preview`** — truncated text for quick display without reading
51//! the blob.
52//!
53//! All item kinds use `blob` as the unified content reference:
54//!
55//! | Kind | `path` example | `blob` content |
56//! |---|---|---|
57//! | `File` | `src/main.rs` | Same blob in git tree (zero extra storage) |
58//! | `Url` | `https://docs.rs/...` | Fetched page content stored as blob |
59//! | `Snippet` | `"design notes"` | Snippet text stored as blob |
60//! | `Command` | `cargo test` | Command output stored as blob |
61//! | `Image` | `screenshot.png` | Image binary stored as blob |
62//!
63//! `blob` is `Option` because it may be `None` during the
64//! draft/collection phase; by the time the snapshot is finalized,
65//! items should have their blob set.
66//!
67//! # Blob Retention
68//!
69//! Standard `git gc` only considers objects reachable from
70//! refs → commits → trees → blobs. A blob referenced solely by an AI
71//! object's JSON payload is **not** reachable in git's graph and
72//! **will be pruned** after `gc.pruneExpire` (default 2 weeks).
73//!
74//! For `File` items this is not a concern — the blob is already
75//! reachable through the commit tree. For all other kinds,
76//! applications must choose a retention strategy:
77//!
78//! | Strategy | Pros | Cons |
79//! |---|---|---|
80//! | **Ref anchoring** (`refs/ai/blobs/<hex>`) | Simple, works with stock git | Ref namespace pollution |
81//! | **Orphan commit** (`refs/ai/uploads`) | Standard reachability; packable | Extra commit/tree overhead |
82//! | **Keep pack** (`.keep` marker) | Zero ref management | Must repack manually |
83//! | **Custom GC mark** (scan AI objects) | Cleanest long-term | Requires custom gc |
84//!
85//! This library does **not** enforce any particular strategy — the
86//! consuming application is responsible for ensuring referenced blobs
87//! remain reachable.
88//!
89//! # Purpose
90//!
91//! - **Reproducibility**: Given the same ContextSnapshot and Plan, an
92//! agent should produce equivalent results.
93//! - **Auditing**: Reviewers can inspect exactly what context the agent
94//! had access to when making decisions.
95//! - **Content Deduplication**: Using Git blob hashes means identical
96//! file content is stored only once, regardless of how many snapshots
97//! reference it.
98
99use std::fmt::Display;
100
101use serde::{Deserialize, Serialize};
102
103use crate::{
104 errors::GitError,
105 hash::ObjectHash,
106 internal::object::{
107 ObjectTrait,
108 types::{ActorRef, Header, ObjectType},
109 },
110};
111
112/// How the items in a [`ContextSnapshot`] were selected.
113#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
114#[serde(rename_all = "snake_case")]
115pub enum SelectionStrategy {
116 /// Items were explicitly chosen by the user (e.g. "look at these
117 /// files"). The agent should treat these as authoritative context.
118 Explicit,
119 /// Items were automatically selected by the agent or system based
120 /// on relevance heuristics (e.g. file dependency analysis, search
121 /// results). The agent may decide to fetch additional context.
122 Heuristic,
123}
124
125/// The kind of content a [`ContextItem`] represents.
126///
127/// Determines how `path` and `blob` should be interpreted.
128#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
129#[serde(rename_all = "snake_case")]
130pub enum ContextItemKind {
131 /// A regular file in the repository. `path` is a repo-relative
132 /// path (e.g. `src/main.rs`). `blob` is the same object already
133 /// in the git tree (zero extra storage).
134 File,
135 /// A URL (web page, API docs, etc.). `path` is the full URL.
136 /// `blob` contains the fetched page content.
137 Url,
138 /// A free-form text snippet (e.g. design note, doc fragment).
139 /// `path` is a descriptive label. `blob` contains the snippet text.
140 Snippet,
141 /// Command or terminal output. `path` is the command that was run
142 /// (e.g. `cargo test`). `blob` contains the captured output.
143 Command,
144 /// Image or other binary visual content. `path` is the file name.
145 /// `blob` contains the raw binary data.
146 Image,
147 /// Application-defined kind not covered by the variants above.
148 Other(String),
149}
150
151/// A single input item within a [`ContextSnapshot`].
152///
153/// Represents one piece of context the agent has access to — a source
154/// file, a URL, a text snippet, command output, or an image. See
155/// module documentation for the three-layer design (`path` / `blob` /
156/// `preview`) and blob retention strategies.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158#[serde(deny_unknown_fields)]
159pub struct ContextItem {
160 /// The kind of content this item represents. Determines how
161 /// `path` and `blob` should be interpreted.
162 pub kind: ContextItemKind,
163 /// Human-readable locator for this item.
164 ///
165 /// Meaning depends on `kind`: repo-relative path for `File`,
166 /// full URL for `Url`, descriptive label for `Snippet`, shell
167 /// command for `Command`, file name for `Image`.
168 pub path: String,
169 /// Truncated preview of the content for quick display.
170 ///
171 /// Should be kept under 500 characters. `None` when no preview
172 /// is available (e.g. binary content, very short items where
173 /// the full content fits in `path`).
174 #[serde(default, skip_serializing_if = "Option::is_none")]
175 pub preview: Option<String>,
176 /// Git blob hash referencing the **full content** at capture time.
177 ///
178 /// For `File` items, this is the same blob already in the git
179 /// tree (zero extra storage due to content-addressing). For
180 /// other kinds (Url, Snippet, Command, Image), the content is
181 /// stored as a new blob — see module-level docs for retention
182 /// strategies. `None` during the draft/collection phase; should
183 /// be set before the snapshot is finalized.
184 #[serde(default, skip_serializing_if = "Option::is_none")]
185 pub blob: Option<ObjectHash>,
186}
187
188impl ContextItem {
189 /// Create a new draft context item with the given kind and locator.
190 pub fn new(kind: ContextItemKind, path: impl Into<String>) -> Result<Self, String> {
191 let path = path.into();
192 if path.trim().is_empty() {
193 return Err("path cannot be empty".to_string());
194 }
195 Ok(Self {
196 kind,
197 path,
198 preview: None,
199 blob: None,
200 })
201 }
202
203 /// Set or clear the blob hash referencing the full captured content.
204 pub fn set_blob(&mut self, blob: Option<ObjectHash>) {
205 self.blob = blob;
206 }
207}
208
209/// A static capture of the context an agent observed at Run start.
210///
211/// Created once per Run (optional). Records which files, URLs,
212/// snippets, etc. the agent had access to. See module documentation
213/// for lifecycle position, item design, blob retention, and Libra
214/// calling guidance.
215#[derive(Debug, Clone, Serialize, Deserialize)]
216#[serde(deny_unknown_fields)]
217pub struct ContextSnapshot {
218 /// Common header (object ID, type, timestamps, creator, etc.).
219 #[serde(flatten)]
220 header: Header,
221 /// How the items were selected — by the user (`Explicit`) or
222 /// by the agent/system (`Heuristic`).
223 selection_strategy: SelectionStrategy,
224 /// The context items included in this snapshot.
225 ///
226 /// Each item references a piece of content (file, URL, snippet,
227 /// etc.) via its `blob` field. Items are ordered as added; no
228 /// implicit ordering is guaranteed. Empty when the snapshot has
229 /// just been created and items haven't been added yet.
230 #[serde(default, skip_serializing_if = "Vec::is_empty")]
231 items: Vec<ContextItem>,
232 /// Aggregated human-readable summary of all items.
233 ///
234 /// A brief description of the overall context (e.g. "3 source
235 /// files + API docs for /users endpoint"). `None` when no
236 /// summary has been provided.
237 #[serde(default, skip_serializing_if = "Option::is_none")]
238 summary: Option<String>,
239}
240
241impl ContextSnapshot {
242 /// Create a new empty context snapshot with the given selection
243 /// strategy.
244 pub fn new(
245 created_by: ActorRef,
246 selection_strategy: SelectionStrategy,
247 ) -> Result<Self, String> {
248 Ok(Self {
249 header: Header::new(ObjectType::ContextSnapshot, created_by)?,
250 selection_strategy,
251 items: Vec::new(),
252 summary: None,
253 })
254 }
255
256 pub fn header(&self) -> &Header {
257 &self.header
258 }
259
260 pub fn selection_strategy(&self) -> &SelectionStrategy {
261 &self.selection_strategy
262 }
263
264 pub fn items(&self) -> &[ContextItem] {
265 &self.items
266 }
267
268 pub fn summary(&self) -> Option<&str> {
269 self.summary.as_deref()
270 }
271
272 pub fn add_item(&mut self, item: ContextItem) {
273 self.items.push(item);
274 }
275
276 pub fn set_summary(&mut self, summary: Option<String>) {
277 self.summary = summary;
278 }
279}
280
281impl Display for ContextSnapshot {
282 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
283 write!(f, "ContextSnapshot: {}", self.header.object_id())
284 }
285}
286
287impl ObjectTrait for ContextSnapshot {
288 fn from_bytes(data: &[u8], _hash: ObjectHash) -> Result<Self, GitError>
289 where
290 Self: Sized,
291 {
292 serde_json::from_slice(data).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
293 }
294
295 fn get_type(&self) -> ObjectType {
296 ObjectType::ContextSnapshot
297 }
298
299 fn get_size(&self) -> usize {
300 match serde_json::to_vec(self) {
301 Ok(v) => v.len(),
302 Err(e) => {
303 tracing::warn!("failed to compute ContextSnapshot size: {}", e);
304 0
305 }
306 }
307 }
308
309 fn to_data(&self) -> Result<Vec<u8>, GitError> {
310 serde_json::to_vec(self).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
311 }
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317
318 #[test]
319 fn test_context_snapshot_accessors_and_mutators() {
320 let actor = ActorRef::agent("coder").expect("actor");
321 let mut snapshot =
322 ContextSnapshot::new(actor, SelectionStrategy::Heuristic).expect("snapshot");
323
324 assert_eq!(snapshot.selection_strategy(), &SelectionStrategy::Heuristic);
325 assert!(snapshot.items().is_empty());
326 assert!(snapshot.summary().is_none());
327
328 let item = ContextItem::new(ContextItemKind::File, "src/main.rs").expect("item");
329 snapshot.add_item(item);
330 snapshot.set_summary(Some("selected by relevance".to_string()));
331
332 assert_eq!(snapshot.items().len(), 1);
333 assert_eq!(snapshot.summary(), Some("selected by relevance"));
334 }
335}