Skip to main content

rig_compose/
context.rs

1//! [`InvestigationContext`] — the runtime object that flows through every
2//! [`super::Skill`] in an agent step.
3//!
4//! Skills mutate the context by appending [`Evidence`] and adjusting
5//! confidence; they do not own it. The owning [`super::Agent`] threads a
6//! single context through its skill chain for one investigation.
7
8use std::time::SystemTime;
9
10use serde::{Deserialize, Serialize};
11use serde_json::Value;
12use uuid::Uuid;
13
14/// Provider-neutral category for a piece of context that may enter a model
15/// window.
16///
17/// The enum names where the item came from without coupling the kernel to a
18/// concrete backend such as Memvid, MCP, a vector database, or a provider SDK.
19#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub enum ContextSourceKind {
21    /// Long-term memory, episodic recall, summaries, or structured memory cards.
22    Memory,
23    /// Result returned by a tool call.
24    ToolResult,
25    /// Resource lookup such as a graph, baseline, policy, or document store.
26    Resource,
27    /// File or document content selected for the task.
28    File,
29    /// Working notes, plans, hypotheses, or other non-durable reasoning state.
30    Reasoning,
31    /// System, developer, or application instructions carried into context.
32    Instruction,
33    /// Current user input or task text.
34    UserInput,
35    /// Caller-defined source kind.
36    Other(String),
37}
38
39/// Provider-neutral lifecycle state for a projected context item.
40///
41/// Producer crates can attach this to [`ContextProvenance`] when the host needs
42/// to explain why a candidate was expanded, skipped, suppressed, superseded, or
43/// escalated before it reached [`ContextPack::pack`]. The packer still records
44/// its own final [`ContextOmissionReason`] for items omitted by budget or item
45/// count.
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum ContextProjectionState {
49    /// Candidate is eligible for packing.
50    Candidate,
51    /// Candidate was expanded from a source item into derived context.
52    Expanded,
53    /// Candidate was skipped before packing.
54    Skipped,
55    /// Candidate was suppressed by caller policy.
56    Suppressed,
57    /// Candidate was rejected by caller policy.
58    Rejected,
59    /// Candidate was superseded by a newer or more authoritative item.
60    Superseded,
61    /// Candidate is stale relative to a newer version.
62    Stale,
63    /// Candidate conflicts with another item and needs host resolution.
64    Conflict,
65    /// Candidate was escalated for higher-level handling.
66    Escalated,
67    /// Caller-defined state.
68    Other(String),
69}
70
71/// Shared provenance keys for context projected by memory, resource, graph, or
72/// tool-result producers.
73///
74/// `rig-compose` continues to store provenance on [`ContextItem`] as JSON so
75/// downstream crates can attach crate-specific fields without depending on each
76/// other. This helper gives those crates a common vocabulary for the fields that
77/// matter to replay, evaluation, and omission explanations.
78///
79/// ```rust
80/// use rig_compose::{ContextItem, ContextProvenance, ContextSourceKind};
81///
82/// let provenance = ContextProvenance::new()
83///     .with_source_uri("memory://incident/42")
84///     .with_principal("alice")
85///     .with_scope("workspace")
86///     .with_confidence(0.92);
87///
88/// let item = ContextItem::new(ContextSourceKind::Memory, "frame-42", "prior incident")
89///     .with_context_provenance(provenance);
90///
91/// assert_eq!(
92///     item.context_provenance().unwrap().source_uri.as_deref(),
93///     Some("memory://incident/42")
94/// );
95/// ```
96#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
97#[serde(default)]
98pub struct ContextProvenance {
99    /// URI or locator for the original source record.
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub source_uri: Option<String>,
102    /// Principal, actor, tenant, or subject associated with the source record.
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub principal: Option<String>,
105    /// Caller-defined scope such as tenant, workspace, profile, or project.
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub scope: Option<String>,
108    /// Retention or archive tier associated with the source record.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub retention_tier: Option<String>,
111    /// Milliseconds since the Unix epoch when the source record was recorded.
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub recorded_at_millis: Option<i64>,
114    /// Milliseconds since the Unix epoch when the source record became
115    /// effective for supersession or freshness comparisons.
116    #[serde(skip_serializing_if = "Option::is_none")]
117    pub effective_at_millis: Option<i64>,
118    /// Source-provided confidence score, when it is distinct from
119    /// [`ContextItem::score`].
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub confidence: Option<f64>,
122    /// Stable key used to compare competing versions of the same fact.
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub version_key: Option<String>,
125    /// Source frame/document id used by memory stores and eval fixtures.
126    ///
127    /// Stored as JSON so existing producers can keep numeric frame ids while
128    /// others use string document keys.
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub source_frame_id: Option<Value>,
131    /// Lifecycle state assigned before the packer makes final budget decisions.
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub projection_state: Option<ContextProjectionState>,
134    /// Machine-readable reason for the projection state.
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub reason: Option<String>,
137}
138
139impl ContextProvenance {
140    /// Create empty provenance ready for builder-style population.
141    #[must_use]
142    pub fn new() -> Self {
143        Self::default()
144    }
145
146    /// Set [`Self::source_uri`].
147    #[must_use]
148    pub fn with_source_uri(mut self, source_uri: impl Into<String>) -> Self {
149        self.source_uri = Some(source_uri.into());
150        self
151    }
152
153    /// Set [`Self::principal`].
154    #[must_use]
155    pub fn with_principal(mut self, principal: impl Into<String>) -> Self {
156        self.principal = Some(principal.into());
157        self
158    }
159
160    /// Set [`Self::scope`].
161    #[must_use]
162    pub fn with_scope(mut self, scope: impl Into<String>) -> Self {
163        self.scope = Some(scope.into());
164        self
165    }
166
167    /// Set [`Self::retention_tier`].
168    #[must_use]
169    pub fn with_retention_tier(mut self, retention_tier: impl Into<String>) -> Self {
170        self.retention_tier = Some(retention_tier.into());
171        self
172    }
173
174    /// Set [`Self::recorded_at_millis`].
175    #[must_use]
176    pub fn with_recorded_at_millis(mut self, recorded_at_millis: i64) -> Self {
177        self.recorded_at_millis = Some(recorded_at_millis);
178        self
179    }
180
181    /// Set [`Self::effective_at_millis`].
182    #[must_use]
183    pub fn with_effective_at_millis(mut self, effective_at_millis: i64) -> Self {
184        self.effective_at_millis = Some(effective_at_millis);
185        self
186    }
187
188    /// Set [`Self::confidence`].
189    #[must_use]
190    pub fn with_confidence(mut self, confidence: f64) -> Self {
191        self.confidence = Some(confidence);
192        self
193    }
194
195    /// Set [`Self::version_key`].
196    #[must_use]
197    pub fn with_version_key(mut self, version_key: impl Into<String>) -> Self {
198        self.version_key = Some(version_key.into());
199        self
200    }
201
202    /// Set [`Self::source_frame_id`].
203    #[must_use]
204    pub fn with_source_frame_id(mut self, source_frame_id: impl Into<String>) -> Self {
205        self.source_frame_id = Some(Value::String(source_frame_id.into()));
206        self
207    }
208
209    /// Set [`Self::source_frame_id`] from an existing JSON value.
210    #[must_use]
211    pub fn with_source_frame_id_value(mut self, source_frame_id: Value) -> Self {
212        self.source_frame_id = Some(source_frame_id);
213        self
214    }
215
216    /// Set [`Self::projection_state`].
217    #[must_use]
218    pub fn with_projection_state(mut self, projection_state: ContextProjectionState) -> Self {
219        self.projection_state = Some(projection_state);
220        self
221    }
222
223    /// Set [`Self::reason`].
224    #[must_use]
225    pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
226        self.reason = Some(reason.into());
227        self
228    }
229}
230
231/// One ranked piece of context that may be packed into a bounded model window.
232///
233/// `ContextItem` is intentionally backend-neutral. Memory crates, MCP/resource
234/// adapters, and harnesses can all project their native records into this shape
235/// so tests can assert what context was selected, omitted, and rendered.
236///
237/// ```rust
238/// use rig_compose::{ContextItem, ContextSourceKind};
239///
240/// let item = ContextItem::new(
241///     ContextSourceKind::Memory,
242///     "profile/alice/location",
243///     "fact alice lives in Berlin",
244/// )
245/// .with_rank(0)
246/// .with_score(9.5);
247///
248/// assert_eq!(item.estimated_chars, item.text.chars().count());
249/// ```
250#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
251pub struct ContextItem {
252    /// Backend-neutral source category.
253    pub source: ContextSourceKind,
254    /// Stable id inside the source system.
255    pub source_id: String,
256    /// Zero-based rank after source-local selection.
257    pub rank: usize,
258    /// Relevance score used for ordering within the source or planner.
259    pub score: f64,
260    /// Prompt-ready text.
261    pub text: String,
262    /// Character count estimate for early context packing.
263    pub estimated_chars: usize,
264    /// Source-specific provenance such as frame id, URI, tool call id, or path.
265    pub provenance: Value,
266    /// Caller-defined metadata not required for packing.
267    pub metadata: Value,
268}
269
270impl ContextItem {
271    /// Build a context item with a source, source id, and prompt-ready text.
272    #[must_use]
273    pub fn new(
274        source: ContextSourceKind,
275        source_id: impl Into<String>,
276        text: impl Into<String>,
277    ) -> Self {
278        let text = text.into();
279        Self {
280            source,
281            source_id: source_id.into(),
282            rank: 0,
283            score: 0.0,
284            estimated_chars: text.chars().count(),
285            text,
286            provenance: Value::Null,
287            metadata: Value::Null,
288        }
289    }
290
291    /// Set the source-local rank used by [`ContextPack::pack`].
292    #[must_use]
293    pub fn with_rank(mut self, rank: usize) -> Self {
294        self.rank = rank;
295        self
296    }
297
298    /// Set the relevance score attached by the source or planner.
299    #[must_use]
300    pub fn with_score(mut self, score: f64) -> Self {
301        self.score = score;
302        self
303    }
304
305    /// Override the character estimate when a caller has a better tokenizer or
306    /// sizing approximation.
307    #[must_use]
308    pub fn with_estimated_chars(mut self, estimated_chars: usize) -> Self {
309        self.estimated_chars = estimated_chars;
310        self
311    }
312
313    /// Attach source-specific provenance.
314    #[must_use]
315    pub fn with_provenance(mut self, provenance: Value) -> Self {
316        self.provenance = provenance;
317        self
318    }
319
320    /// Attach source-specific provenance using the shared typed vocabulary.
321    #[must_use]
322    pub fn with_context_provenance(mut self, provenance: ContextProvenance) -> Self {
323        self.provenance = serde_json::to_value(provenance).unwrap_or(Value::Null);
324        self
325    }
326
327    /// Decode [`Self::provenance`] as the shared typed vocabulary.
328    ///
329    /// Returns an empty [`ContextProvenance`] when no provenance was attached.
330    pub fn context_provenance(&self) -> serde_json::Result<ContextProvenance> {
331        if self.provenance.is_null() {
332            Ok(ContextProvenance::default())
333        } else {
334            serde_json::from_value(self.provenance.clone())
335        }
336    }
337
338    /// Attach caller-defined metadata.
339    #[must_use]
340    pub fn with_metadata(mut self, metadata: Value) -> Self {
341        self.metadata = metadata;
342        self
343    }
344}
345
346/// Reason a context item was not selected for a [`ContextPack`].
347#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
348pub enum ContextOmissionReason {
349    /// The pack already reached [`ContextPackConfig::max_items`].
350    MaxItems,
351    /// Adding the item would exceed the available character budget.
352    OverBudget,
353}
354
355/// Context item plus the reason it was omitted.
356#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
357pub struct OmittedContextItem {
358    /// Item considered by the packer.
359    pub item: ContextItem,
360    /// Why the item was not selected.
361    pub reason: ContextOmissionReason,
362}
363
364/// Configuration for packing context items into a bounded model window.
365#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
366pub struct ContextPackConfig {
367    /// Maximum characters available to selected item text, including separators.
368    pub max_chars: usize,
369    /// Maximum number of items to include.
370    pub max_items: usize,
371    /// Characters reserved for instructions, user input, or other context.
372    pub reserve_chars: usize,
373    /// Separator inserted between selected item text when rendering.
374    pub separator: String,
375}
376
377impl Default for ContextPackConfig {
378    fn default() -> Self {
379        Self {
380            max_chars: 4_000,
381            max_items: 16,
382            reserve_chars: 0,
383            separator: "\n".into(),
384        }
385    }
386}
387
388impl ContextPackConfig {
389    /// Build a config with a character budget and otherwise default limits.
390    #[must_use]
391    pub fn new(max_chars: usize) -> Self {
392        Self {
393            max_chars,
394            ..Self::default()
395        }
396    }
397
398    /// Set the maximum number of selected items.
399    #[must_use]
400    pub fn with_max_items(mut self, max_items: usize) -> Self {
401        self.max_items = max_items;
402        self
403    }
404
405    /// Reserve part of the character budget for non-packed context.
406    #[must_use]
407    pub fn with_reserve_chars(mut self, reserve_chars: usize) -> Self {
408        self.reserve_chars = reserve_chars;
409        self
410    }
411
412    /// Use a custom separator when rendering selected context.
413    #[must_use]
414    pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
415        self.separator = separator.into();
416        self
417    }
418
419    fn context_budget(&self) -> usize {
420        self.max_chars.saturating_sub(self.reserve_chars)
421    }
422}
423
424/// Selected and omitted context for one bounded model window.
425///
426/// ```rust
427/// use rig_compose::{ContextItem, ContextPack, ContextPackConfig, ContextSourceKind};
428///
429/// let item = ContextItem::new(ContextSourceKind::Memory, "m1", "fact alice lives in Berlin");
430/// let pack = ContextPack::pack(vec![item], ContextPackConfig::new(1_000));
431/// assert_eq!(pack.render_text(), "fact alice lives in Berlin");
432/// ```
433#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
434pub struct ContextPack {
435    /// Configuration used to build this pack.
436    pub config: ContextPackConfig,
437    /// Items selected for prompt context, in render order.
438    pub selected: Vec<ContextItem>,
439    /// Items considered but omitted, with explicit reasons.
440    pub omitted: Vec<OmittedContextItem>,
441    /// Estimated characters consumed by selected text and separators.
442    pub total_estimated_chars: usize,
443}
444
445impl ContextPack {
446    /// Pack ranked context items into the configured character window.
447    ///
448    /// Items are sorted by `rank` before packing so recorded fixtures can be
449    /// replayed even if a source returns equivalent items in a different order.
450    #[must_use]
451    pub fn pack(mut items: Vec<ContextItem>, config: ContextPackConfig) -> Self {
452        items.sort_by_key(|item| item.rank);
453
454        let budget = config.context_budget();
455        let separator_chars = config.separator.chars().count();
456        let mut selected = Vec::new();
457        let mut omitted = Vec::new();
458        let mut total_estimated_chars = 0usize;
459
460        for item in items {
461            if selected.len() >= config.max_items {
462                omitted.push(OmittedContextItem {
463                    item,
464                    reason: ContextOmissionReason::MaxItems,
465                });
466                continue;
467            }
468
469            let item_chars = item.estimated_chars.max(item.text.chars().count());
470            let separator_cost = if selected.is_empty() {
471                0
472            } else {
473                separator_chars
474            };
475            let Some(next_total) = total_estimated_chars
476                .checked_add(separator_cost)
477                .and_then(|total| total.checked_add(item_chars))
478            else {
479                omitted.push(OmittedContextItem {
480                    item,
481                    reason: ContextOmissionReason::OverBudget,
482                });
483                continue;
484            };
485
486            if next_total > budget {
487                omitted.push(OmittedContextItem {
488                    item,
489                    reason: ContextOmissionReason::OverBudget,
490                });
491                continue;
492            }
493
494            total_estimated_chars = next_total;
495            selected.push(item);
496        }
497
498        Self {
499            config,
500            selected,
501            omitted,
502            total_estimated_chars,
503        }
504    }
505
506    /// Render selected item text as prompt-ready context.
507    #[must_use]
508    pub fn render_text(&self) -> String {
509        self.selected
510            .iter()
511            .map(|item| item.text.as_str())
512            .collect::<Vec<_>>()
513            .join(&self.config.separator)
514    }
515}
516
517/// A named, lightweight signal lifted from a sketch, baseline check, or
518/// upstream skill. Skills key their `applies` predicate on signal names.
519#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
520pub struct Signal(pub String);
521
522impl Signal {
523    pub fn new(s: impl Into<String>) -> Self {
524        Self(s.into())
525    }
526    pub fn as_str(&self) -> &str {
527        &self.0
528    }
529}
530
531/// A single piece of evidence accumulated during an investigation.
532#[derive(Debug, Clone, Serialize, Deserialize)]
533pub struct Evidence {
534    pub source_skill: String,
535    pub label: String,
536    pub detail: Value,
537    pub recorded_at: SystemTime,
538}
539
540impl Evidence {
541    pub fn new(source_skill: impl Into<String>, label: impl Into<String>) -> Self {
542        Self {
543            source_skill: source_skill.into(),
544            label: label.into(),
545            detail: Value::Null,
546            recorded_at: SystemTime::now(),
547        }
548    }
549
550    pub fn with_detail(mut self, detail: Value) -> Self {
551        self.detail = detail;
552        self
553    }
554}
555
556/// Hint a skill may emit to drive subsequent skill selection. The agent
557/// loop is free to honour or ignore these — they are advisory.
558#[derive(Debug, Clone, Serialize, Deserialize)]
559pub enum NextAction {
560    /// Suggest a follow-up skill by id.
561    RunSkill(String),
562    /// Suggest invoking a named tool with prepared args.
563    InvokeTool { tool: String, args: Value },
564    /// Stop the investigation; sufficient evidence has been gathered.
565    Conclude,
566    /// Drop the investigation; the entity is benign.
567    Discard,
568}
569
570/// Runtime state for one investigation. Cheap to construct; passed by
571/// `&mut` reference through the skill chain.
572#[derive(Debug, Clone, Serialize, Deserialize)]
573pub struct InvestigationContext {
574    /// Stable identifier for the entity under investigation. May be a block
575    /// id stringified, an actor id from the grammar layer (Phase 2), or any
576    /// caller-defined key.
577    pub entity_id: String,
578
579    /// Optional originating block — present when the investigation was
580    /// triggered by an upstream pipeline. Stored as an opaque UUID so the
581    /// kernel does not depend on any specific block-id newtype.
582    pub block_id: Option<Uuid>,
583
584    /// Free-form partition tag (caller-defined).
585    pub partition: String,
586
587    /// Signals that triggered this investigation and any signals lifted by
588    /// earlier skills. Skills add to this set as evidence accumulates.
589    pub signals: Vec<Signal>,
590
591    /// Accumulated evidence in chronological order.
592    pub evidence: Vec<Evidence>,
593
594    /// Running confidence in `[0, 1]` that the entity exhibits malicious
595    /// behaviour. Skills emit deltas; the agent clamps after each step.
596    pub confidence: f32,
597
598    /// Hints from the most recently executed skill.
599    pub pending_actions: Vec<NextAction>,
600}
601
602impl InvestigationContext {
603    pub fn new(entity_id: impl Into<String>, partition: impl Into<String>) -> Self {
604        Self {
605            entity_id: entity_id.into(),
606            block_id: None,
607            partition: partition.into(),
608            signals: Vec::new(),
609            evidence: Vec::new(),
610            confidence: 0.0,
611            pending_actions: Vec::new(),
612        }
613    }
614
615    pub fn with_block<I: Into<Uuid>>(mut self, id: I) -> Self {
616        self.block_id = Some(id.into());
617        self
618    }
619
620    pub fn with_signal(mut self, s: impl Into<String>) -> Self {
621        self.signals.push(Signal::new(s));
622        self
623    }
624
625    pub fn has_signal(&self, name: &str) -> bool {
626        self.signals.iter().any(|s| s.as_str() == name)
627    }
628}