Skip to main content

engram/context/
bundle.rs

1//! Compact Operational Context bundles for session resumption.
2
3use rusqlite::Connection;
4use serde::{Deserialize, Serialize};
5
6use crate::context::search::{
7    search_context, ArtifactPointer, ContextProvenance, ContextSearchItem, ContextSearchRequest,
8    StalenessWarning,
9};
10use crate::context::{estimate_tokens, ContextBundleMetrics};
11use crate::error::Result;
12
13const DEFAULT_BUNDLE_LIMIT: usize = 80;
14const DEFAULT_SECTION_LIMIT: usize = 12;
15
16#[derive(Debug, Clone, Default, Deserialize)]
17pub struct ContextBundleRequest {
18    #[serde(default)]
19    pub query: Option<String>,
20    #[serde(default)]
21    pub repo_id: Option<String>,
22    #[serde(default)]
23    pub workspace_path_hash: Option<String>,
24    #[serde(default)]
25    pub workspace: Option<String>,
26    #[serde(default)]
27    pub session_id: Option<String>,
28    #[serde(default)]
29    pub task_id: Option<String>,
30    #[serde(default)]
31    pub max_results: Option<usize>,
32    #[serde(default)]
33    pub section_limit: Option<usize>,
34    #[serde(default)]
35    pub include_artifact_pointers: bool,
36    #[serde(default)]
37    pub current_git_branch: Option<String>,
38    #[serde(default)]
39    pub current_commit_hash: Option<String>,
40    #[serde(default)]
41    pub stale_after_days: Option<i64>,
42}
43
44#[derive(Debug, Clone, Serialize)]
45pub struct ContextBundle {
46    pub bundle_type: String,
47    pub query: Option<String>,
48    pub scope: BundleScope,
49    pub summary_policy: String,
50    pub artifact_policy: String,
51    pub failures: Vec<BundleEntry>,
52    pub unresolved_blockers: Vec<BundleEntry>,
53    pub recent_decisions: Vec<BundleEntry>,
54    pub commands_already_run: Vec<CommandEntry>,
55    pub files_inspected_or_touched: Vec<FileEntry>,
56    pub stale_warnings: Vec<BundleStaleWarning>,
57    pub artifact_pointers: Vec<ArtifactPointer>,
58    pub relevant_context: Vec<BundleEntry>,
59    pub metrics: ContextBundleMetrics,
60    pub markdown: String,
61}
62
63#[derive(Debug, Clone, Serialize)]
64pub struct BundleScope {
65    pub repo_id: Option<String>,
66    pub workspace_path_hash: Option<String>,
67    pub session_id: Option<String>,
68    pub task_id: Option<String>,
69}
70
71#[derive(Debug, Clone, Serialize)]
72pub struct BundleEntry {
73    pub kind: String,
74    pub title: String,
75    pub summary: Option<BundleSummary>,
76    pub event_type: String,
77    pub source: String,
78    pub started_at: String,
79    pub provenance: ContextProvenance,
80    pub staleness: Vec<StalenessWarning>,
81    pub artifact_pointers: Vec<ArtifactPointer>,
82}
83
84#[derive(Debug, Clone, Serialize)]
85pub struct BundleSummary {
86    pub text: String,
87    pub derived: bool,
88    pub lossy: bool,
89    pub confidence: f64,
90}
91
92#[derive(Debug, Clone, Serialize)]
93pub struct CommandEntry {
94    pub command_name: String,
95    pub exit_code: Option<i64>,
96    pub cwd: Option<String>,
97    pub started_at: String,
98    pub provenance: ContextProvenance,
99}
100
101#[derive(Debug, Clone, Serialize)]
102pub struct FileEntry {
103    pub path: String,
104    pub signal: String,
105    pub provenance: ContextProvenance,
106}
107
108#[derive(Debug, Clone, Serialize)]
109pub struct BundleStaleWarning {
110    pub warning: StalenessWarning,
111    pub provenance: ContextProvenance,
112}
113
114pub fn build_context_bundle(
115    conn: &Connection,
116    request: &ContextBundleRequest,
117) -> Result<ContextBundle> {
118    let section_limit = request
119        .section_limit
120        .unwrap_or(DEFAULT_SECTION_LIMIT)
121        .clamp(1, 50);
122    let search_request = ContextSearchRequest {
123        query: request.query.clone(),
124        repo_id: request.repo_id.clone(),
125        workspace_path_hash: request.workspace_path_hash.clone(),
126        workspace: request.workspace.clone(),
127        session_id: request.session_id.clone(),
128        task_id: request.task_id.clone(),
129        max_results: Some(request.max_results.unwrap_or(DEFAULT_BUNDLE_LIMIT)),
130        include_artifact_pointers: request.include_artifact_pointers,
131        current_git_branch: request.current_git_branch.clone(),
132        current_commit_hash: request.current_commit_hash.clone(),
133        stale_after_days: request.stale_after_days,
134        ..Default::default()
135    };
136    let search = search_context(conn, &search_request)?;
137
138    let mut failures = Vec::new();
139    let mut blockers = Vec::new();
140    let mut decisions = Vec::new();
141    let mut commands = Vec::new();
142    let mut files = Vec::new();
143    let mut stale_warnings = Vec::new();
144    let mut artifact_pointers = Vec::new();
145    let mut relevant = Vec::new();
146
147    for item in &search.results {
148        if is_failure(item) && failures.len() < section_limit {
149            failures.push(bundle_entry("failure", item));
150        }
151        if is_blocker(item) && blockers.len() < section_limit {
152            blockers.push(bundle_entry("unresolved_blocker", item));
153        }
154        if is_decision(item) && decisions.len() < section_limit {
155            decisions.push(bundle_entry("decision", item));
156        }
157        if let Some(command) = command_entry(item) {
158            if commands.len() < section_limit {
159                commands.push(command);
160            }
161        }
162        for file in &item.extracted_files {
163            if files.len() >= section_limit {
164                break;
165            }
166            if !files.iter().any(|entry: &FileEntry| entry.path == *file) {
167                files.push(FileEntry {
168                    path: file.clone(),
169                    signal: "metadata_path".to_string(),
170                    provenance: item.provenance.clone(),
171                });
172            }
173        }
174        for warning in &item.staleness {
175            stale_warnings.push(BundleStaleWarning {
176                warning: warning.clone(),
177                provenance: item.provenance.clone(),
178            });
179        }
180        for pointer in &item.artifact_pointers {
181            artifact_pointers.push(pointer.clone());
182        }
183        if relevant.len() < section_limit {
184            relevant.push(bundle_entry("relevant_context", item));
185        }
186    }
187
188    artifact_pointers.sort_by(|a, b| {
189        a.artifact_id
190            .cmp(&b.artifact_id)
191            .then(a.pointer_type.cmp(&b.pointer_type))
192    });
193    artifact_pointers.dedup_by(|a, b| {
194        a.artifact_id == b.artifact_id
195            && a.pointer_type == b.pointer_type
196            && a.event_id == b.event_id
197            && a.summary_id == b.summary_id
198    });
199
200    let retrieved_item_count = search.results.len();
201    let retrieved_context_tokens_est = search.results.iter().map(item_tokens_est).sum();
202    let included_section_entry_count = failures.len()
203        + blockers.len()
204        + decisions.len()
205        + commands.len()
206        + files.len()
207        + relevant.len();
208    let excluded_item_count = retrieved_item_count.saturating_sub(relevant.len());
209    let summarized_artifact_ref_count = artifact_pointers
210        .iter()
211        .filter(|pointer| !pointer.pointer_type.contains("raw"))
212        .count();
213    let raw_artifact_ref_count = artifact_pointers
214        .iter()
215        .filter(|pointer| pointer.pointer_type.contains("raw"))
216        .count();
217
218    let mut bundle = ContextBundle {
219        bundle_type: "operational_context".to_string(),
220        query: search.query,
221        scope: BundleScope {
222            repo_id: search.scope.repo_id,
223            workspace_path_hash: search.scope.workspace_path_hash,
224            session_id: search.scope.session_id,
225            task_id: search.scope.task_id,
226        },
227        summary_policy: "Summaries are derived/lossy when marked so by reducers.".to_string(),
228        artifact_policy: if request.include_artifact_pointers {
229            "Artifact pointers included; raw artifact content is never included.".to_string()
230        } else {
231            "Artifact pointers omitted; raw artifact content is never included.".to_string()
232        },
233        failures,
234        unresolved_blockers: blockers,
235        recent_decisions: decisions,
236        commands_already_run: commands,
237        files_inspected_or_touched: files,
238        stale_warnings,
239        artifact_pointers,
240        relevant_context: relevant,
241        metrics: ContextBundleMetrics::default(),
242        markdown: String::new(),
243    };
244    bundle.markdown = render_markdown(&bundle);
245    bundle.metrics = ContextBundleMetrics {
246        metric_type: "bundle_reuse_audit".to_string(),
247        estimated: true,
248        method: "chars_div_4_estimate".to_string(),
249        retrieved_item_count,
250        included_section_entry_count,
251        excluded_item_count,
252        artifact_pointer_count: bundle.artifact_pointers.len(),
253        summarized_artifact_ref_count,
254        raw_artifact_ref_count,
255        raw_artifact_return_count: 0,
256        retrieved_context_tokens_est,
257        bundle_tokens_est: estimate_tokens(&bundle.markdown),
258        excluded_tokens_est: None,
259        notes: vec![
260            "Raw artifact content is never included in bundles.".to_string(),
261            "Token counts are estimates for audit, not guaranteed savings.".to_string(),
262        ],
263    };
264    Ok(bundle)
265}
266
267fn bundle_entry(kind: &str, item: &ContextSearchItem) -> BundleEntry {
268    BundleEntry {
269        kind: kind.to_string(),
270        title: title_for(item),
271        summary: item.summary.as_ref().map(|summary| BundleSummary {
272            text: truncate(&summary.summary, 700),
273            derived: summary.derived,
274            lossy: summary.lossy,
275            confidence: summary.confidence,
276        }),
277        event_type: item.event.event_type.clone(),
278        source: item.event.source.clone(),
279        started_at: item.event.started_at.clone(),
280        provenance: item.provenance.clone(),
281        staleness: item.staleness.clone(),
282        artifact_pointers: item.artifact_pointers.clone(),
283    }
284}
285
286fn command_entry(item: &ContextSearchItem) -> Option<CommandEntry> {
287    item.event
288        .command_name
289        .as_ref()
290        .map(|command_name| CommandEntry {
291            command_name: command_name.clone(),
292            exit_code: item.event.exit_code,
293            cwd: item.event.cwd.clone(),
294            started_at: item.event.started_at.clone(),
295            provenance: item.provenance.clone(),
296        })
297}
298
299fn is_failure(item: &ContextSearchItem) -> bool {
300    item.event.exit_code.map(|code| code != 0).unwrap_or(false)
301        || contains_any(&item.event.event_type, &["fail", "error"])
302        || item
303            .summary
304            .as_ref()
305            .map(|summary| contains_any(&summary.summary, &["fail", "error"]))
306            .unwrap_or(false)
307}
308
309fn is_blocker(item: &ContextSearchItem) -> bool {
310    contains_any(&item.event.event_type, &["block", "blocked", "blocker"])
311        || contains_any(
312            &item.event.metadata.to_string(),
313            &["blocker", "blocked", "unresolved"],
314        )
315        || item
316            .summary
317            .as_ref()
318            .map(|summary| contains_any(&summary.summary, &["blocker", "blocked", "unresolved"]))
319            .unwrap_or(false)
320}
321
322fn is_decision(item: &ContextSearchItem) -> bool {
323    contains_any(&item.event.event_type, &["decision"])
324        || item
325            .summary
326            .as_ref()
327            .map(|summary| contains_any(&summary.summary, &["decision", "decided"]))
328            .unwrap_or(false)
329        || contains_any(&item.event.metadata.to_string(), &["decision", "decided"])
330}
331
332fn contains_any(value: &str, needles: &[&str]) -> bool {
333    let value = value.to_lowercase();
334    needles.iter().any(|needle| value.contains(needle))
335}
336
337fn item_tokens_est(item: &ContextSearchItem) -> i64 {
338    let summary_tokens = item
339        .summary
340        .as_ref()
341        .and_then(|summary| summary.tokens_compact_est)
342        .or_else(|| {
343            item.summary
344                .as_ref()
345                .map(|summary| estimate_tokens(&summary.summary))
346        })
347        .unwrap_or(0);
348    let metadata_tokens = estimate_tokens(&item.event.metadata.to_string());
349    summary_tokens + metadata_tokens
350}
351
352fn title_for(item: &ContextSearchItem) -> String {
353    if let Some(summary) = &item.summary {
354        return truncate(&summary.summary, 120);
355    }
356    if let Some(command) = &item.event.command_name {
357        return format!("command: {command}");
358    }
359    if let Some(tool) = &item.event.tool_name {
360        return format!("tool: {tool}");
361    }
362    item.event.event_type.clone()
363}
364
365fn render_markdown(bundle: &ContextBundle) -> String {
366    let mut out = String::new();
367    out.push_str("# Operational Context Bundle\n\n");
368    if let Some(query) = &bundle.query {
369        out.push_str(&format!("Query: `{}`\n\n", escape_inline(query)));
370    }
371    out.push_str(
372        "Policy: raw artifact content is excluded. Summaries marked lossy are derived/lossy.\n\n",
373    );
374    render_entries(&mut out, "Recent relevant failures", &bundle.failures);
375    render_entries(
376        &mut out,
377        "Unresolved blockers (inferred)",
378        &bundle.unresolved_blockers,
379    );
380    render_entries(&mut out, "Recent decisions", &bundle.recent_decisions);
381    render_commands(&mut out, &bundle.commands_already_run);
382    render_files(&mut out, &bundle.files_inspected_or_touched);
383    render_stale_warnings(&mut out, &bundle.stale_warnings);
384    render_artifacts(&mut out, &bundle.artifact_pointers);
385    render_entries(&mut out, "Relevant context", &bundle.relevant_context);
386    out
387}
388
389fn render_entries(out: &mut String, title: &str, entries: &[BundleEntry]) {
390    out.push_str(&format!("## {title}\n"));
391    if entries.is_empty() {
392        out.push_str("- None found.\n\n");
393        return;
394    }
395    for entry in entries {
396        out.push_str(&format!(
397            "- {} [{}]\n",
398            entry.title,
399            provenance_label(&entry.provenance)
400        ));
401        if let Some(summary) = &entry.summary {
402            out.push_str(&format!(
403                "  Summary: {} (derived={}, lossy={}, confidence={:.2})\n",
404                summary.text, summary.derived, summary.lossy, summary.confidence
405            ));
406        }
407    }
408    out.push('\n');
409}
410
411fn render_commands(out: &mut String, commands: &[CommandEntry]) {
412    out.push_str("## Commands already run\n");
413    if commands.is_empty() {
414        out.push_str("- None found.\n\n");
415        return;
416    }
417    for command in commands {
418        out.push_str(&format!(
419            "- `{}` exit={:?} [{}]\n",
420            escape_inline(&command.command_name),
421            command.exit_code,
422            provenance_label(&command.provenance)
423        ));
424    }
425    out.push('\n');
426}
427
428fn render_files(out: &mut String, files: &[FileEntry]) {
429    out.push_str("## Files inspected or touched\n");
430    if files.is_empty() {
431        out.push_str("- None found.\n\n");
432        return;
433    }
434    for file in files {
435        out.push_str(&format!(
436            "- `{}` signal={} [{}]\n",
437            escape_inline(&file.path),
438            file.signal,
439            provenance_label(&file.provenance)
440        ));
441    }
442    out.push('\n');
443}
444
445fn render_stale_warnings(out: &mut String, warnings: &[BundleStaleWarning]) {
446    out.push_str("## Staleness warnings\n");
447    if warnings.is_empty() {
448        out.push_str("- None found.\n\n");
449        return;
450    }
451    for stale in warnings {
452        out.push_str(&format!(
453            "- {}: {} [{}]\n",
454            stale.warning.kind,
455            stale.warning.message,
456            provenance_label(&stale.provenance)
457        ));
458    }
459    out.push('\n');
460}
461
462fn render_artifacts(out: &mut String, pointers: &[ArtifactPointer]) {
463    out.push_str("## Artifact pointers\n");
464    if pointers.is_empty() {
465        out.push_str("- None included.\n\n");
466        return;
467    }
468    for pointer in pointers {
469        out.push_str(&format!(
470            "- {} `{}` [{}]\n",
471            pointer.pointer_type,
472            escape_inline(&pointer.artifact_id),
473            provenance_label(&pointer.provenance)
474        ));
475    }
476    out.push('\n');
477}
478
479fn provenance_label(provenance: &ContextProvenance) -> String {
480    format!(
481        "event_id={} summary_id={:?} session_id={} task_id={:?} source={} started_at={}",
482        provenance.event_id,
483        provenance.summary_id,
484        provenance.session_id,
485        provenance.task_id,
486        provenance.source,
487        provenance.started_at
488    )
489}
490
491fn truncate(value: &str, max_bytes: usize) -> String {
492    if value.len() <= max_bytes {
493        return value.to_string();
494    }
495    let mut boundary = max_bytes;
496    while boundary > 0 && !value.is_char_boundary(boundary) {
497        boundary -= 1;
498    }
499    format!("{}...", &value[..boundary])
500}
501
502fn escape_inline(value: &str) -> String {
503    value.replace('`', "'")
504}