git_internal/internal/object/context.rs
1//! AI Context Snapshot Definition
2//!
3//! A [`ContextSnapshot`] is an optional static capture of the codebase
4//! and external resources that an agent observed when a
5//! [`Run`](super::run::Run) began. Unlike the dynamic
6//! [`ContextPipeline`](super::pipeline::ContextPipeline) (which
7//! accumulates frames during execution), a ContextSnapshot is a
8//! **point-in-time** record that does not change after creation.
9//!
10//! # Position in Lifecycle
11//!
12//! ```text
13//! ⑤ Run ──snapshot──▶ ContextSnapshot (optional, static)
14//! │
15//! ├──▶ ContextPipeline (dynamic, via Plan.pipeline)
16//! │
17//! ▼
18//! ⑥ ToolInvocations ...
19//! ```
20//!
21//! A ContextSnapshot is created at step ⑤ when the Run is initialized.
22//! It complements the ContextPipeline: the snapshot captures the
23//! **initial** state (what files, URLs, snippets the agent sees at
24//! start), while the pipeline tracks **incremental** context changes
25//! during execution.
26//!
27//! # Items
28//!
29//! Each [`ContextItem`] has three layers:
30//!
31//! - **`path`** — human-readable locator (repo path, URL, command,
32//! label).
33//! - **`blob`** — Git blob hash pointing to the **full content** at
34//! capture time.
35//! - **`preview`** — truncated text for quick display without reading
36//! the blob.
37//!
38//! All item kinds use `blob` as the unified content reference:
39//!
40//! | Kind | `path` example | `blob` content |
41//! |---|---|---|
42//! | `File` | `src/main.rs` | Same blob in git tree (zero extra storage) |
43//! | `Url` | `https://docs.rs/...` | Fetched page content stored as blob |
44//! | `Snippet` | `"design notes"` | Snippet text stored as blob |
45//! | `Command` | `cargo test` | Command output stored as blob |
46//! | `Image` | `screenshot.png` | Image binary stored as blob |
47//!
48//! `blob` is `Option` because it may be `None` during the
49//! draft/collection phase; by the time the snapshot is finalized,
50//! items should have their blob set.
51//!
52//! # Blob Retention
53//!
54//! Standard `git gc` only considers objects reachable from
55//! refs → commits → trees → blobs. A blob referenced solely by an AI
56//! object's JSON payload is **not** reachable in git's graph and
57//! **will be pruned** after `gc.pruneExpire` (default 2 weeks).
58//!
59//! For `File` items this is not a concern — the blob is already
60//! reachable through the commit tree. For all other kinds,
61//! applications must choose a retention strategy:
62//!
63//! | Strategy | Pros | Cons |
64//! |---|---|---|
65//! | **Ref anchoring** (`refs/ai/blobs/<hex>`) | Simple, works with stock git | Ref namespace pollution |
66//! | **Orphan commit** (`refs/ai/uploads`) | Standard reachability; packable | Extra commit/tree overhead |
67//! | **Keep pack** (`.keep` marker) | Zero ref management | Must repack manually |
68//! | **Custom GC mark** (scan AI objects) | Cleanest long-term | Requires custom gc |
69//!
70//! This library does **not** enforce any particular strategy — the
71//! consuming application is responsible for ensuring referenced blobs
72//! remain reachable.
73//!
74//! # Purpose
75//!
76//! - **Reproducibility**: Given the same ContextSnapshot and Plan, an
77//! agent should produce equivalent results.
78//! - **Auditing**: Reviewers can inspect exactly what context the agent
79//! had access to when making decisions.
80//! - **Content Deduplication**: Using Git blob hashes means identical
81//! file content is stored only once, regardless of how many snapshots
82//! reference it.
83
84use std::fmt::Display;
85
86use serde::{Deserialize, Serialize};
87
88use crate::{
89 errors::GitError,
90 hash::ObjectHash,
91 internal::object::{
92 ObjectTrait,
93 types::{ActorRef, Header, ObjectType},
94 },
95};
96
97/// How the items in a [`ContextSnapshot`] were selected.
98#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
99#[serde(rename_all = "snake_case")]
100pub enum SelectionStrategy {
101 /// Items were explicitly chosen by the user (e.g. "look at these
102 /// files"). The agent should treat these as authoritative context.
103 Explicit,
104 /// Items were automatically selected by the agent or system based
105 /// on relevance heuristics (e.g. file dependency analysis, search
106 /// results). The agent may decide to fetch additional context.
107 Heuristic,
108}
109
110/// The kind of content a [`ContextItem`] represents.
111///
112/// Determines how `path` and `blob` should be interpreted.
113#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
114#[serde(rename_all = "snake_case")]
115pub enum ContextItemKind {
116 /// A regular file in the repository. `path` is a repo-relative
117 /// path (e.g. `src/main.rs`). `blob` is the same object already
118 /// in the git tree (zero extra storage).
119 File,
120 /// A URL (web page, API docs, etc.). `path` is the full URL.
121 /// `blob` contains the fetched page content.
122 Url,
123 /// A free-form text snippet (e.g. design note, doc fragment).
124 /// `path` is a descriptive label. `blob` contains the snippet text.
125 Snippet,
126 /// Command or terminal output. `path` is the command that was run
127 /// (e.g. `cargo test`). `blob` contains the captured output.
128 Command,
129 /// Image or other binary visual content. `path` is the file name.
130 /// `blob` contains the raw binary data.
131 Image,
132 /// Application-defined kind not covered by the variants above.
133 Other(String),
134}
135
136/// A single input item within a [`ContextSnapshot`].
137///
138/// Represents one piece of context the agent has access to — a source
139/// file, a URL, a text snippet, command output, or an image. See
140/// module documentation for the three-layer design (`path` / `blob` /
141/// `preview`) and blob retention strategies.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct ContextItem {
144 /// The kind of content this item represents. Determines how
145 /// `path` and `blob` should be interpreted.
146 pub kind: ContextItemKind,
147 /// Human-readable locator for this item.
148 ///
149 /// Meaning depends on `kind`: repo-relative path for `File`,
150 /// full URL for `Url`, descriptive label for `Snippet`, shell
151 /// command for `Command`, file name for `Image`.
152 pub path: String,
153 /// Truncated preview of the content for quick display.
154 ///
155 /// Should be kept under 500 characters. `None` when no preview
156 /// is available (e.g. binary content, very short items where
157 /// the full content fits in `path`).
158 #[serde(default, skip_serializing_if = "Option::is_none")]
159 pub preview: Option<String>,
160 /// Git blob hash referencing the **full content** at capture time.
161 ///
162 /// For `File` items, this is the same blob already in the git
163 /// tree (zero extra storage due to content-addressing). For
164 /// other kinds (Url, Snippet, Command, Image), the content is
165 /// stored as a new blob — see module-level docs for retention
166 /// strategies. `None` during the draft/collection phase; should
167 /// be set before the snapshot is finalized.
168 #[serde(default, skip_serializing_if = "Option::is_none")]
169 pub blob: Option<ObjectHash>,
170}
171
172impl ContextItem {
173 pub fn new(kind: ContextItemKind, path: impl Into<String>) -> Result<Self, String> {
174 let path = path.into();
175 if path.trim().is_empty() {
176 return Err("path cannot be empty".to_string());
177 }
178 Ok(Self {
179 kind,
180 path,
181 preview: None,
182 blob: None,
183 })
184 }
185
186 pub fn set_blob(&mut self, blob: Option<ObjectHash>) {
187 self.blob = blob;
188 }
189}
190
191/// A static capture of the context an agent observed at Run start.
192///
193/// Created once per Run (optional). Records which files, URLs,
194/// snippets, etc. the agent had access to. See module documentation
195/// for lifecycle position, item design, and blob retention.
196#[derive(Debug, Clone, Serialize, Deserialize)]
197pub struct ContextSnapshot {
198 /// Common header (object ID, type, timestamps, creator, etc.).
199 #[serde(flatten)]
200 header: Header,
201 /// How the items were selected — by the user (`Explicit`) or
202 /// by the agent/system (`Heuristic`).
203 selection_strategy: SelectionStrategy,
204 /// The context items included in this snapshot.
205 ///
206 /// Each item references a piece of content (file, URL, snippet,
207 /// etc.) via its `blob` field. Items are ordered as added; no
208 /// implicit ordering is guaranteed. Empty when the snapshot has
209 /// just been created and items haven't been added yet.
210 #[serde(default, skip_serializing_if = "Vec::is_empty")]
211 items: Vec<ContextItem>,
212 /// Aggregated human-readable summary of all items.
213 ///
214 /// A brief description of the overall context (e.g. "3 source
215 /// files + API docs for /users endpoint"). `None` when no
216 /// summary has been provided.
217 #[serde(default, skip_serializing_if = "Option::is_none")]
218 summary: Option<String>,
219}
220
221impl ContextSnapshot {
222 pub fn new(
223 created_by: ActorRef,
224 selection_strategy: SelectionStrategy,
225 ) -> Result<Self, String> {
226 Ok(Self {
227 header: Header::new(ObjectType::ContextSnapshot, created_by)?,
228 selection_strategy,
229 items: Vec::new(),
230 summary: None,
231 })
232 }
233
234 pub fn header(&self) -> &Header {
235 &self.header
236 }
237
238 pub fn selection_strategy(&self) -> &SelectionStrategy {
239 &self.selection_strategy
240 }
241
242 pub fn items(&self) -> &[ContextItem] {
243 &self.items
244 }
245
246 pub fn summary(&self) -> Option<&str> {
247 self.summary.as_deref()
248 }
249
250 pub fn add_item(&mut self, item: ContextItem) {
251 self.items.push(item);
252 }
253
254 pub fn set_summary(&mut self, summary: Option<String>) {
255 self.summary = summary;
256 }
257}
258
259impl Display for ContextSnapshot {
260 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
261 write!(f, "ContextSnapshot: {}", self.header.object_id())
262 }
263}
264
265impl ObjectTrait for ContextSnapshot {
266 fn from_bytes(data: &[u8], _hash: ObjectHash) -> Result<Self, GitError>
267 where
268 Self: Sized,
269 {
270 serde_json::from_slice(data).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
271 }
272
273 fn get_type(&self) -> ObjectType {
274 ObjectType::ContextSnapshot
275 }
276
277 fn get_size(&self) -> usize {
278 match serde_json::to_vec(self) {
279 Ok(v) => v.len(),
280 Err(e) => {
281 tracing::warn!("failed to compute ContextSnapshot size: {}", e);
282 0
283 }
284 }
285 }
286
287 fn to_data(&self) -> Result<Vec<u8>, GitError> {
288 serde_json::to_vec(self).map_err(|e| GitError::InvalidObjectInfo(e.to_string()))
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295
296 #[test]
297 fn test_context_snapshot_accessors_and_mutators() {
298 let actor = ActorRef::agent("coder").expect("actor");
299 let mut snapshot =
300 ContextSnapshot::new(actor, SelectionStrategy::Heuristic).expect("snapshot");
301
302 assert_eq!(snapshot.selection_strategy(), &SelectionStrategy::Heuristic);
303 assert!(snapshot.items().is_empty());
304 assert!(snapshot.summary().is_none());
305
306 let item = ContextItem::new(ContextItemKind::File, "src/main.rs").expect("item");
307 snapshot.add_item(item);
308 snapshot.set_summary(Some("selected by relevance".to_string()));
309
310 assert_eq!(snapshot.items().len(), 1);
311 assert_eq!(snapshot.summary(), Some("selected by relevance"));
312 }
313}