rig_compose/context.rs
1//! [`InvestigationContext`] — the runtime object that flows through every
2//! [`super::Skill`] in an agent step.
3//!
4//! Skills mutate the context by appending [`Evidence`] and adjusting
5//! confidence; they do not own it. The owning [`super::Agent`] threads a
6//! single context through its skill chain for one investigation.
7
8use std::time::SystemTime;
9
10use serde::{Deserialize, Serialize};
11use serde_json::Value;
12use uuid::Uuid;
13
14/// Provider-neutral category for a piece of context that may enter a model
15/// window.
16///
17/// The enum names where the item came from without coupling the kernel to a
18/// concrete backend such as Memvid, MCP, a vector database, or a provider SDK.
19#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub enum ContextSourceKind {
21 /// Long-term memory, episodic recall, summaries, or structured memory cards.
22 Memory,
23 /// Result returned by a tool call.
24 ToolResult,
25 /// Resource lookup such as a graph, baseline, policy, or document store.
26 Resource,
27 /// File or document content selected for the task.
28 File,
29 /// Working notes, plans, hypotheses, or other non-durable reasoning state.
30 Reasoning,
31 /// System, developer, or application instructions carried into context.
32 Instruction,
33 /// Current user input or task text.
34 UserInput,
35 /// Caller-defined source kind.
36 Other(String),
37}
38
39/// Provider-neutral lifecycle state for a projected context item.
40///
41/// Producer crates can attach this to [`ContextProvenance`] when the host needs
42/// to explain why a candidate was expanded, skipped, suppressed, superseded, or
43/// escalated before it reached [`ContextPack::pack`]. The packer still records
44/// its own final [`ContextOmissionReason`] for items omitted by budget or item
45/// count.
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum ContextProjectionState {
49 /// Candidate is eligible for packing.
50 Candidate,
51 /// Candidate was expanded from a source item into derived context.
52 Expanded,
53 /// Candidate was skipped before packing.
54 Skipped,
55 /// Candidate was suppressed by caller policy.
56 Suppressed,
57 /// Candidate was rejected by caller policy.
58 Rejected,
59 /// Candidate was superseded by a newer or more authoritative item.
60 Superseded,
61 /// Candidate is stale relative to a newer version.
62 Stale,
63 /// Candidate conflicts with another item and needs host resolution.
64 Conflict,
65 /// Candidate was escalated for higher-level handling.
66 Escalated,
67 /// Caller-defined state.
68 Other(String),
69}
70
71/// Shared provenance keys for context projected by memory, resource, graph, or
72/// tool-result producers.
73///
74/// `rig-compose` continues to store provenance on [`ContextItem`] as JSON so
75/// downstream crates can attach crate-specific fields without depending on each
76/// other. This helper gives those crates a common vocabulary for the fields that
77/// matter to replay, evaluation, and omission explanations.
78///
79/// ```rust
80/// use rig_compose::{ContextItem, ContextProvenance, ContextSourceKind};
81///
82/// let provenance = ContextProvenance::new()
83/// .with_source_uri("memory://incident/42")
84/// .with_principal("alice")
85/// .with_scope("workspace")
86/// .with_confidence(0.92);
87///
88/// let item = ContextItem::new(ContextSourceKind::Memory, "frame-42", "prior incident")
89/// .with_context_provenance(provenance);
90///
91/// assert_eq!(
92/// item.context_provenance().unwrap().source_uri.as_deref(),
93/// Some("memory://incident/42")
94/// );
95/// ```
96#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
97#[serde(default)]
98pub struct ContextProvenance {
99 /// URI or locator for the original source record.
100 #[serde(skip_serializing_if = "Option::is_none")]
101 pub source_uri: Option<String>,
102 /// Principal, actor, tenant, or subject associated with the source record.
103 #[serde(skip_serializing_if = "Option::is_none")]
104 pub principal: Option<String>,
105 /// Caller-defined scope such as tenant, workspace, profile, or project.
106 #[serde(skip_serializing_if = "Option::is_none")]
107 pub scope: Option<String>,
108 /// Retention or archive tier associated with the source record.
109 #[serde(skip_serializing_if = "Option::is_none")]
110 pub retention_tier: Option<String>,
111 /// Milliseconds since the Unix epoch when the source record was recorded.
112 #[serde(skip_serializing_if = "Option::is_none")]
113 pub recorded_at_millis: Option<i64>,
114 /// Milliseconds since the Unix epoch when the source record became
115 /// effective for supersession or freshness comparisons.
116 #[serde(skip_serializing_if = "Option::is_none")]
117 pub effective_at_millis: Option<i64>,
118 /// Source-provided confidence score, when it is distinct from
119 /// [`ContextItem::score`].
120 #[serde(skip_serializing_if = "Option::is_none")]
121 pub confidence: Option<f64>,
122 /// Stable key used to compare competing versions of the same fact.
123 #[serde(skip_serializing_if = "Option::is_none")]
124 pub version_key: Option<String>,
125 /// Source frame/document id used by memory stores and eval fixtures.
126 ///
127 /// Stored as JSON so existing producers can keep numeric frame ids while
128 /// others use string document keys.
129 #[serde(skip_serializing_if = "Option::is_none")]
130 pub source_frame_id: Option<Value>,
131 /// Lifecycle state assigned before the packer makes final budget decisions.
132 #[serde(skip_serializing_if = "Option::is_none")]
133 pub projection_state: Option<ContextProjectionState>,
134 /// Machine-readable reason for the projection state.
135 #[serde(skip_serializing_if = "Option::is_none")]
136 pub reason: Option<String>,
137}
138
139impl ContextProvenance {
140 /// Create empty provenance ready for builder-style population.
141 #[must_use]
142 pub fn new() -> Self {
143 Self::default()
144 }
145
146 /// Set [`Self::source_uri`].
147 #[must_use]
148 pub fn with_source_uri(mut self, source_uri: impl Into<String>) -> Self {
149 self.source_uri = Some(source_uri.into());
150 self
151 }
152
153 /// Set [`Self::principal`].
154 #[must_use]
155 pub fn with_principal(mut self, principal: impl Into<String>) -> Self {
156 self.principal = Some(principal.into());
157 self
158 }
159
160 /// Set [`Self::scope`].
161 #[must_use]
162 pub fn with_scope(mut self, scope: impl Into<String>) -> Self {
163 self.scope = Some(scope.into());
164 self
165 }
166
167 /// Set [`Self::retention_tier`].
168 #[must_use]
169 pub fn with_retention_tier(mut self, retention_tier: impl Into<String>) -> Self {
170 self.retention_tier = Some(retention_tier.into());
171 self
172 }
173
174 /// Set [`Self::recorded_at_millis`].
175 #[must_use]
176 pub fn with_recorded_at_millis(mut self, recorded_at_millis: i64) -> Self {
177 self.recorded_at_millis = Some(recorded_at_millis);
178 self
179 }
180
181 /// Set [`Self::effective_at_millis`].
182 #[must_use]
183 pub fn with_effective_at_millis(mut self, effective_at_millis: i64) -> Self {
184 self.effective_at_millis = Some(effective_at_millis);
185 self
186 }
187
188 /// Set [`Self::confidence`].
189 #[must_use]
190 pub fn with_confidence(mut self, confidence: f64) -> Self {
191 self.confidence = Some(confidence);
192 self
193 }
194
195 /// Set [`Self::version_key`].
196 #[must_use]
197 pub fn with_version_key(mut self, version_key: impl Into<String>) -> Self {
198 self.version_key = Some(version_key.into());
199 self
200 }
201
202 /// Set [`Self::source_frame_id`].
203 #[must_use]
204 pub fn with_source_frame_id(mut self, source_frame_id: impl Into<String>) -> Self {
205 self.source_frame_id = Some(Value::String(source_frame_id.into()));
206 self
207 }
208
209 /// Set [`Self::source_frame_id`] from an existing JSON value.
210 #[must_use]
211 pub fn with_source_frame_id_value(mut self, source_frame_id: Value) -> Self {
212 self.source_frame_id = Some(source_frame_id);
213 self
214 }
215
216 /// Set [`Self::projection_state`].
217 #[must_use]
218 pub fn with_projection_state(mut self, projection_state: ContextProjectionState) -> Self {
219 self.projection_state = Some(projection_state);
220 self
221 }
222
223 /// Set [`Self::reason`].
224 #[must_use]
225 pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
226 self.reason = Some(reason.into());
227 self
228 }
229}
230
231/// One ranked piece of context that may be packed into a bounded model window.
232///
233/// `ContextItem` is intentionally backend-neutral. Memory crates, MCP/resource
234/// adapters, and harnesses can all project their native records into this shape
235/// so tests can assert what context was selected, omitted, and rendered.
236///
237/// ```rust
238/// use rig_compose::{ContextItem, ContextSourceKind};
239///
240/// let item = ContextItem::new(
241/// ContextSourceKind::Memory,
242/// "profile/alice/location",
243/// "fact alice lives in Berlin",
244/// )
245/// .with_rank(0)
246/// .with_score(9.5);
247///
248/// assert_eq!(item.estimated_chars, item.text.chars().count());
249/// ```
250#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
251pub struct ContextItem {
252 /// Backend-neutral source category.
253 pub source: ContextSourceKind,
254 /// Stable id inside the source system.
255 pub source_id: String,
256 /// Zero-based rank after source-local selection.
257 pub rank: usize,
258 /// Relevance score used for ordering within the source or planner.
259 pub score: f64,
260 /// Prompt-ready text.
261 pub text: String,
262 /// Character count estimate for early context packing.
263 pub estimated_chars: usize,
264 /// Source-specific provenance such as frame id, URI, tool call id, or path.
265 pub provenance: Value,
266 /// Caller-defined metadata not required for packing.
267 pub metadata: Value,
268}
269
270impl ContextItem {
271 /// Build a context item with a source, source id, and prompt-ready text.
272 #[must_use]
273 pub fn new(
274 source: ContextSourceKind,
275 source_id: impl Into<String>,
276 text: impl Into<String>,
277 ) -> Self {
278 let text = text.into();
279 Self {
280 source,
281 source_id: source_id.into(),
282 rank: 0,
283 score: 0.0,
284 estimated_chars: text.chars().count(),
285 text,
286 provenance: Value::Null,
287 metadata: Value::Null,
288 }
289 }
290
291 /// Set the source-local rank used by [`ContextPack::pack`].
292 #[must_use]
293 pub fn with_rank(mut self, rank: usize) -> Self {
294 self.rank = rank;
295 self
296 }
297
298 /// Set the relevance score attached by the source or planner.
299 #[must_use]
300 pub fn with_score(mut self, score: f64) -> Self {
301 self.score = score;
302 self
303 }
304
305 /// Override the character estimate when a caller has a better tokenizer or
306 /// sizing approximation.
307 #[must_use]
308 pub fn with_estimated_chars(mut self, estimated_chars: usize) -> Self {
309 self.estimated_chars = estimated_chars;
310 self
311 }
312
313 /// Attach source-specific provenance.
314 #[must_use]
315 pub fn with_provenance(mut self, provenance: Value) -> Self {
316 self.provenance = provenance;
317 self
318 }
319
320 /// Attach source-specific provenance using the shared typed vocabulary.
321 #[must_use]
322 pub fn with_context_provenance(mut self, provenance: ContextProvenance) -> Self {
323 self.provenance = serde_json::to_value(provenance).unwrap_or(Value::Null);
324 self
325 }
326
327 /// Decode [`Self::provenance`] as the shared typed vocabulary.
328 ///
329 /// Returns an empty [`ContextProvenance`] when no provenance was attached.
330 pub fn context_provenance(&self) -> serde_json::Result<ContextProvenance> {
331 if self.provenance.is_null() {
332 Ok(ContextProvenance::default())
333 } else {
334 serde_json::from_value(self.provenance.clone())
335 }
336 }
337
338 /// Attach caller-defined metadata.
339 #[must_use]
340 pub fn with_metadata(mut self, metadata: Value) -> Self {
341 self.metadata = metadata;
342 self
343 }
344}
345
346/// Reason a context item was not selected for a [`ContextPack`].
347#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
348pub enum ContextOmissionReason {
349 /// The pack already reached [`ContextPackConfig::max_items`].
350 MaxItems,
351 /// Adding the item would exceed the available character budget.
352 OverBudget,
353}
354
355/// Context item plus the reason it was omitted.
356#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
357pub struct OmittedContextItem {
358 /// Item considered by the packer.
359 pub item: ContextItem,
360 /// Why the item was not selected.
361 pub reason: ContextOmissionReason,
362}
363
364/// Configuration for packing context items into a bounded model window.
365#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
366pub struct ContextPackConfig {
367 /// Maximum characters available to selected item text, including separators.
368 pub max_chars: usize,
369 /// Maximum number of items to include.
370 pub max_items: usize,
371 /// Characters reserved for instructions, user input, or other context.
372 pub reserve_chars: usize,
373 /// Separator inserted between selected item text when rendering.
374 pub separator: String,
375}
376
377impl Default for ContextPackConfig {
378 fn default() -> Self {
379 Self {
380 max_chars: 4_000,
381 max_items: 16,
382 reserve_chars: 0,
383 separator: "\n".into(),
384 }
385 }
386}
387
388impl ContextPackConfig {
389 /// Build a config with a character budget and otherwise default limits.
390 #[must_use]
391 pub fn new(max_chars: usize) -> Self {
392 Self {
393 max_chars,
394 ..Self::default()
395 }
396 }
397
398 /// Set the maximum number of selected items.
399 #[must_use]
400 pub fn with_max_items(mut self, max_items: usize) -> Self {
401 self.max_items = max_items;
402 self
403 }
404
405 /// Reserve part of the character budget for non-packed context.
406 #[must_use]
407 pub fn with_reserve_chars(mut self, reserve_chars: usize) -> Self {
408 self.reserve_chars = reserve_chars;
409 self
410 }
411
412 /// Use a custom separator when rendering selected context.
413 #[must_use]
414 pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
415 self.separator = separator.into();
416 self
417 }
418
419 fn context_budget(&self) -> usize {
420 self.max_chars.saturating_sub(self.reserve_chars)
421 }
422}
423
424/// Selected and omitted context for one bounded model window.
425///
426/// ```rust
427/// use rig_compose::{ContextItem, ContextPack, ContextPackConfig, ContextSourceKind};
428///
429/// let item = ContextItem::new(ContextSourceKind::Memory, "m1", "fact alice lives in Berlin");
430/// let pack = ContextPack::pack(vec![item], ContextPackConfig::new(1_000));
431/// assert_eq!(pack.render_text(), "fact alice lives in Berlin");
432/// ```
433#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
434pub struct ContextPack {
435 /// Configuration used to build this pack.
436 pub config: ContextPackConfig,
437 /// Items selected for prompt context, in render order.
438 pub selected: Vec<ContextItem>,
439 /// Items considered but omitted, with explicit reasons.
440 pub omitted: Vec<OmittedContextItem>,
441 /// Estimated characters consumed by selected text and separators.
442 pub total_estimated_chars: usize,
443}
444
445impl ContextPack {
446 /// Pack ranked context items into the configured character window.
447 ///
448 /// Items are sorted by `rank` before packing so recorded fixtures can be
449 /// replayed even if a source returns equivalent items in a different order.
450 #[must_use]
451 pub fn pack(mut items: Vec<ContextItem>, config: ContextPackConfig) -> Self {
452 items.sort_by_key(|item| item.rank);
453
454 let budget = config.context_budget();
455 let separator_chars = config.separator.chars().count();
456 let mut selected = Vec::new();
457 let mut omitted = Vec::new();
458 let mut total_estimated_chars = 0usize;
459
460 for item in items {
461 if selected.len() >= config.max_items {
462 omitted.push(OmittedContextItem {
463 item,
464 reason: ContextOmissionReason::MaxItems,
465 });
466 continue;
467 }
468
469 let item_chars = item.estimated_chars.max(item.text.chars().count());
470 let separator_cost = if selected.is_empty() {
471 0
472 } else {
473 separator_chars
474 };
475 let Some(next_total) = total_estimated_chars
476 .checked_add(separator_cost)
477 .and_then(|total| total.checked_add(item_chars))
478 else {
479 omitted.push(OmittedContextItem {
480 item,
481 reason: ContextOmissionReason::OverBudget,
482 });
483 continue;
484 };
485
486 if next_total > budget {
487 omitted.push(OmittedContextItem {
488 item,
489 reason: ContextOmissionReason::OverBudget,
490 });
491 continue;
492 }
493
494 total_estimated_chars = next_total;
495 selected.push(item);
496 }
497
498 Self {
499 config,
500 selected,
501 omitted,
502 total_estimated_chars,
503 }
504 }
505
506 /// Render selected item text as prompt-ready context.
507 #[must_use]
508 pub fn render_text(&self) -> String {
509 self.selected
510 .iter()
511 .map(|item| item.text.as_str())
512 .collect::<Vec<_>>()
513 .join(&self.config.separator)
514 }
515}
516
517/// A named, lightweight signal lifted from a sketch, baseline check, or
518/// upstream skill. Skills key their `applies` predicate on signal names.
519#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
520pub struct Signal(pub String);
521
522impl Signal {
523 pub fn new(s: impl Into<String>) -> Self {
524 Self(s.into())
525 }
526 pub fn as_str(&self) -> &str {
527 &self.0
528 }
529}
530
531/// A single piece of evidence accumulated during an investigation.
532#[derive(Debug, Clone, Serialize, Deserialize)]
533pub struct Evidence {
534 pub source_skill: String,
535 pub label: String,
536 pub detail: Value,
537 pub recorded_at: SystemTime,
538}
539
540impl Evidence {
541 pub fn new(source_skill: impl Into<String>, label: impl Into<String>) -> Self {
542 Self {
543 source_skill: source_skill.into(),
544 label: label.into(),
545 detail: Value::Null,
546 recorded_at: SystemTime::now(),
547 }
548 }
549
550 pub fn with_detail(mut self, detail: Value) -> Self {
551 self.detail = detail;
552 self
553 }
554}
555
556/// Hint a skill may emit to drive subsequent skill selection. The agent
557/// loop is free to honour or ignore these — they are advisory.
558#[derive(Debug, Clone, Serialize, Deserialize)]
559pub enum NextAction {
560 /// Suggest a follow-up skill by id.
561 RunSkill(String),
562 /// Suggest invoking a named tool with prepared args.
563 InvokeTool { tool: String, args: Value },
564 /// Stop the investigation; sufficient evidence has been gathered.
565 Conclude,
566 /// Drop the investigation; the entity is benign.
567 Discard,
568}
569
570/// Runtime state for one investigation. Cheap to construct; passed by
571/// `&mut` reference through the skill chain.
572#[derive(Debug, Clone, Serialize, Deserialize)]
573pub struct InvestigationContext {
574 /// Stable identifier for the entity under investigation. May be a block
575 /// id stringified, an actor id from the grammar layer (Phase 2), or any
576 /// caller-defined key.
577 pub entity_id: String,
578
579 /// Optional originating block — present when the investigation was
580 /// triggered by an upstream pipeline. Stored as an opaque UUID so the
581 /// kernel does not depend on any specific block-id newtype.
582 pub block_id: Option<Uuid>,
583
584 /// Free-form partition tag (caller-defined).
585 pub partition: String,
586
587 /// Signals that triggered this investigation and any signals lifted by
588 /// earlier skills. Skills add to this set as evidence accumulates.
589 pub signals: Vec<Signal>,
590
591 /// Accumulated evidence in chronological order.
592 pub evidence: Vec<Evidence>,
593
594 /// Running confidence in `[0, 1]` that the entity exhibits malicious
595 /// behaviour. Skills emit deltas; the agent clamps after each step.
596 pub confidence: f32,
597
598 /// Hints from the most recently executed skill.
599 pub pending_actions: Vec<NextAction>,
600}
601
602impl InvestigationContext {
603 pub fn new(entity_id: impl Into<String>, partition: impl Into<String>) -> Self {
604 Self {
605 entity_id: entity_id.into(),
606 block_id: None,
607 partition: partition.into(),
608 signals: Vec::new(),
609 evidence: Vec::new(),
610 confidence: 0.0,
611 pending_actions: Vec::new(),
612 }
613 }
614
615 pub fn with_block<I: Into<Uuid>>(mut self, id: I) -> Self {
616 self.block_id = Some(id.into());
617 self
618 }
619
620 pub fn with_signal(mut self, s: impl Into<String>) -> Self {
621 self.signals.push(Signal::new(s));
622 self
623 }
624
625 pub fn has_signal(&self, name: &str) -> bool {
626 self.signals.iter().any(|s| s.as_str() == name)
627 }
628}