rig-compose 0.5.0

Composable agent kernel: stateless skills, transport-agnostic tools, registry-driven agents, signal-routing coordinator. Companion crate for rig.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
//! [`InvestigationContext`] — the runtime object that flows through every
//! [`super::Skill`] in an agent step.
//!
//! Skills mutate the context by appending [`Evidence`] and adjusting
//! confidence; they do not own it. The owning [`super::Agent`] threads a
//! single context through its skill chain for one investigation.

use std::time::SystemTime;

use serde::{Deserialize, Serialize};
use serde_json::Value;
use uuid::Uuid;

/// Provider-neutral category for a piece of context that may enter a model
/// window.
///
/// The enum names where the item came from without coupling the kernel to a
/// concrete backend such as Memvid, MCP, a vector database, or a provider SDK.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ContextSourceKind {
    /// Long-term memory, episodic recall, summaries, or structured memory cards.
    Memory,
    /// Result returned by a tool call.
    ToolResult,
    /// Resource lookup such as a graph, baseline, policy, or document store.
    Resource,
    /// File or document content selected for the task.
    File,
    /// Working notes, plans, hypotheses, or other non-durable reasoning state.
    Reasoning,
    /// System, developer, or application instructions carried into context.
    Instruction,
    /// Current user input or task text.
    UserInput,
    /// Caller-defined source kind.
    Other(String),
}

/// Provider-neutral lifecycle state for a projected context item.
///
/// Producer crates can attach this to [`ContextProvenance`] when the host needs
/// to explain why a candidate was expanded, skipped, suppressed, superseded, or
/// escalated before it reached [`ContextPack::pack`]. The packer still records
/// its own final [`ContextOmissionReason`] for items omitted by budget or item
/// count.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ContextProjectionState {
    /// Candidate is eligible for packing.
    Candidate,
    /// Candidate was expanded from a source item into derived context.
    Expanded,
    /// Candidate was skipped before packing.
    Skipped,
    /// Candidate was suppressed by caller policy.
    Suppressed,
    /// Candidate was rejected by caller policy.
    Rejected,
    /// Candidate was superseded by a newer or more authoritative item.
    Superseded,
    /// Candidate is stale relative to a newer version.
    Stale,
    /// Candidate conflicts with another item and needs host resolution.
    Conflict,
    /// Candidate was escalated for higher-level handling.
    Escalated,
    /// Caller-defined state.
    Other(String),
}

/// Shared provenance keys for context projected by memory, resource, graph, or
/// tool-result producers.
///
/// `rig-compose` continues to store provenance on [`ContextItem`] as JSON so
/// downstream crates can attach crate-specific fields without depending on each
/// other. This helper gives those crates a common vocabulary for the fields that
/// matter to replay, evaluation, and omission explanations.
///
/// ```rust
/// use rig_compose::{ContextItem, ContextProvenance, ContextSourceKind};
///
/// let provenance = ContextProvenance::new()
///     .with_source_uri("memory://incident/42")
///     .with_principal("alice")
///     .with_scope("workspace")
///     .with_confidence(0.92);
///
/// let item = ContextItem::new(ContextSourceKind::Memory, "frame-42", "prior incident")
///     .with_context_provenance(provenance);
///
/// assert_eq!(
///     item.context_provenance().unwrap().source_uri.as_deref(),
///     Some("memory://incident/42")
/// );
/// ```
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[serde(default)]
pub struct ContextProvenance {
    /// URI or locator for the original source record.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub source_uri: Option<String>,
    /// Principal, actor, tenant, or subject associated with the source record.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub principal: Option<String>,
    /// Caller-defined scope such as tenant, workspace, profile, or project.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub scope: Option<String>,
    /// Retention or archive tier associated with the source record.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub retention_tier: Option<String>,
    /// Milliseconds since the Unix epoch when the source record was recorded.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub recorded_at_millis: Option<i64>,
    /// Milliseconds since the Unix epoch when the source record became
    /// effective for supersession or freshness comparisons.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub effective_at_millis: Option<i64>,
    /// Source-provided confidence score, when it is distinct from
    /// [`ContextItem::score`].
    #[serde(skip_serializing_if = "Option::is_none")]
    pub confidence: Option<f64>,
    /// Stable key used to compare competing versions of the same fact.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub version_key: Option<String>,
    /// Source frame/document id used by memory stores and eval fixtures.
    ///
    /// Stored as JSON so existing producers can keep numeric frame ids while
    /// others use string document keys.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub source_frame_id: Option<Value>,
    /// Lifecycle state assigned before the packer makes final budget decisions.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub projection_state: Option<ContextProjectionState>,
    /// Machine-readable reason for the projection state.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
}

impl ContextProvenance {
    /// Create empty provenance ready for builder-style population.
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Set [`Self::source_uri`].
    #[must_use]
    pub fn with_source_uri(mut self, source_uri: impl Into<String>) -> Self {
        self.source_uri = Some(source_uri.into());
        self
    }

    /// Set [`Self::principal`].
    #[must_use]
    pub fn with_principal(mut self, principal: impl Into<String>) -> Self {
        self.principal = Some(principal.into());
        self
    }

    /// Set [`Self::scope`].
    #[must_use]
    pub fn with_scope(mut self, scope: impl Into<String>) -> Self {
        self.scope = Some(scope.into());
        self
    }

    /// Set [`Self::retention_tier`].
    #[must_use]
    pub fn with_retention_tier(mut self, retention_tier: impl Into<String>) -> Self {
        self.retention_tier = Some(retention_tier.into());
        self
    }

    /// Set [`Self::recorded_at_millis`].
    #[must_use]
    pub fn with_recorded_at_millis(mut self, recorded_at_millis: i64) -> Self {
        self.recorded_at_millis = Some(recorded_at_millis);
        self
    }

    /// Set [`Self::effective_at_millis`].
    #[must_use]
    pub fn with_effective_at_millis(mut self, effective_at_millis: i64) -> Self {
        self.effective_at_millis = Some(effective_at_millis);
        self
    }

    /// Set [`Self::confidence`].
    #[must_use]
    pub fn with_confidence(mut self, confidence: f64) -> Self {
        self.confidence = Some(confidence);
        self
    }

    /// Set [`Self::version_key`].
    #[must_use]
    pub fn with_version_key(mut self, version_key: impl Into<String>) -> Self {
        self.version_key = Some(version_key.into());
        self
    }

    /// Set [`Self::source_frame_id`].
    #[must_use]
    pub fn with_source_frame_id(mut self, source_frame_id: impl Into<String>) -> Self {
        self.source_frame_id = Some(Value::String(source_frame_id.into()));
        self
    }

    /// Set [`Self::source_frame_id`] from an existing JSON value.
    #[must_use]
    pub fn with_source_frame_id_value(mut self, source_frame_id: Value) -> Self {
        self.source_frame_id = Some(source_frame_id);
        self
    }

    /// Set [`Self::projection_state`].
    #[must_use]
    pub fn with_projection_state(mut self, projection_state: ContextProjectionState) -> Self {
        self.projection_state = Some(projection_state);
        self
    }

    /// Set [`Self::reason`].
    #[must_use]
    pub fn with_reason(mut self, reason: impl Into<String>) -> Self {
        self.reason = Some(reason.into());
        self
    }
}

/// One ranked piece of context that may be packed into a bounded model window.
///
/// `ContextItem` is intentionally backend-neutral. Memory crates, MCP/resource
/// adapters, and harnesses can all project their native records into this shape
/// so tests can assert what context was selected, omitted, and rendered.
///
/// ```rust
/// use rig_compose::{ContextItem, ContextSourceKind};
///
/// let item = ContextItem::new(
///     ContextSourceKind::Memory,
///     "profile/alice/location",
///     "fact alice lives in Berlin",
/// )
/// .with_rank(0)
/// .with_score(9.5);
///
/// assert_eq!(item.estimated_chars, item.text.chars().count());
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ContextItem {
    /// Backend-neutral source category.
    pub source: ContextSourceKind,
    /// Stable id inside the source system.
    pub source_id: String,
    /// Zero-based rank after source-local selection.
    pub rank: usize,
    /// Relevance score used for ordering within the source or planner.
    pub score: f64,
    /// Prompt-ready text.
    pub text: String,
    /// Character count estimate for early context packing.
    pub estimated_chars: usize,
    /// Source-specific provenance such as frame id, URI, tool call id, or path.
    pub provenance: Value,
    /// Caller-defined metadata not required for packing.
    pub metadata: Value,
}

impl ContextItem {
    /// Build a context item with a source, source id, and prompt-ready text.
    #[must_use]
    pub fn new(
        source: ContextSourceKind,
        source_id: impl Into<String>,
        text: impl Into<String>,
    ) -> Self {
        let text = text.into();
        Self {
            source,
            source_id: source_id.into(),
            rank: 0,
            score: 0.0,
            estimated_chars: text.chars().count(),
            text,
            provenance: Value::Null,
            metadata: Value::Null,
        }
    }

    /// Set the source-local rank used by [`ContextPack::pack`].
    #[must_use]
    pub fn with_rank(mut self, rank: usize) -> Self {
        self.rank = rank;
        self
    }

    /// Set the relevance score attached by the source or planner.
    #[must_use]
    pub fn with_score(mut self, score: f64) -> Self {
        self.score = score;
        self
    }

    /// Override the character estimate when a caller has a better tokenizer or
    /// sizing approximation.
    #[must_use]
    pub fn with_estimated_chars(mut self, estimated_chars: usize) -> Self {
        self.estimated_chars = estimated_chars;
        self
    }

    /// Attach source-specific provenance.
    #[must_use]
    pub fn with_provenance(mut self, provenance: Value) -> Self {
        self.provenance = provenance;
        self
    }

    /// Attach source-specific provenance using the shared typed vocabulary.
    #[must_use]
    pub fn with_context_provenance(mut self, provenance: ContextProvenance) -> Self {
        self.provenance = serde_json::to_value(provenance).unwrap_or(Value::Null);
        self
    }

    /// Decode [`Self::provenance`] as the shared typed vocabulary.
    ///
    /// Returns an empty [`ContextProvenance`] when no provenance was attached.
    pub fn context_provenance(&self) -> serde_json::Result<ContextProvenance> {
        if self.provenance.is_null() {
            Ok(ContextProvenance::default())
        } else {
            serde_json::from_value(self.provenance.clone())
        }
    }

    /// Attach caller-defined metadata.
    #[must_use]
    pub fn with_metadata(mut self, metadata: Value) -> Self {
        self.metadata = metadata;
        self
    }
}

/// Reason a context item was not selected for a [`ContextPack`].
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ContextOmissionReason {
    /// The pack already reached [`ContextPackConfig::max_items`].
    MaxItems,
    /// Adding the item would exceed the available character budget.
    OverBudget,
}

/// Context item plus the reason it was omitted.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OmittedContextItem {
    /// Item considered by the packer.
    pub item: ContextItem,
    /// Why the item was not selected.
    pub reason: ContextOmissionReason,
}

/// Configuration for packing context items into a bounded model window.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ContextPackConfig {
    /// Maximum characters available to selected item text, including separators.
    pub max_chars: usize,
    /// Maximum number of items to include.
    pub max_items: usize,
    /// Characters reserved for instructions, user input, or other context.
    pub reserve_chars: usize,
    /// Separator inserted between selected item text when rendering.
    pub separator: String,
}

impl Default for ContextPackConfig {
    fn default() -> Self {
        Self {
            max_chars: 4_000,
            max_items: 16,
            reserve_chars: 0,
            separator: "\n".into(),
        }
    }
}

impl ContextPackConfig {
    /// Build a config with a character budget and otherwise default limits.
    #[must_use]
    pub fn new(max_chars: usize) -> Self {
        Self {
            max_chars,
            ..Self::default()
        }
    }

    /// Set the maximum number of selected items.
    #[must_use]
    pub fn with_max_items(mut self, max_items: usize) -> Self {
        self.max_items = max_items;
        self
    }

    /// Reserve part of the character budget for non-packed context.
    #[must_use]
    pub fn with_reserve_chars(mut self, reserve_chars: usize) -> Self {
        self.reserve_chars = reserve_chars;
        self
    }

    /// Use a custom separator when rendering selected context.
    #[must_use]
    pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
        self.separator = separator.into();
        self
    }

    fn context_budget(&self) -> usize {
        self.max_chars.saturating_sub(self.reserve_chars)
    }
}

/// Selected and omitted context for one bounded model window.
///
/// ```rust
/// use rig_compose::{ContextItem, ContextPack, ContextPackConfig, ContextSourceKind};
///
/// let item = ContextItem::new(ContextSourceKind::Memory, "m1", "fact alice lives in Berlin");
/// let pack = ContextPack::pack(vec![item], ContextPackConfig::new(1_000));
/// assert_eq!(pack.render_text(), "fact alice lives in Berlin");
/// ```
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ContextPack {
    /// Configuration used to build this pack.
    pub config: ContextPackConfig,
    /// Items selected for prompt context, in render order.
    pub selected: Vec<ContextItem>,
    /// Items considered but omitted, with explicit reasons.
    pub omitted: Vec<OmittedContextItem>,
    /// Estimated characters consumed by selected text and separators.
    pub total_estimated_chars: usize,
}

impl ContextPack {
    /// Pack ranked context items into the configured character window.
    ///
    /// Items are sorted by `rank` before packing so recorded fixtures can be
    /// replayed even if a source returns equivalent items in a different order.
    #[must_use]
    pub fn pack(mut items: Vec<ContextItem>, config: ContextPackConfig) -> Self {
        items.sort_by_key(|item| item.rank);

        let budget = config.context_budget();
        let separator_chars = config.separator.chars().count();
        let mut selected = Vec::new();
        let mut omitted = Vec::new();
        let mut total_estimated_chars = 0usize;

        for item in items {
            if selected.len() >= config.max_items {
                omitted.push(OmittedContextItem {
                    item,
                    reason: ContextOmissionReason::MaxItems,
                });
                continue;
            }

            let item_chars = item.estimated_chars.max(item.text.chars().count());
            let separator_cost = if selected.is_empty() {
                0
            } else {
                separator_chars
            };
            let Some(next_total) = total_estimated_chars
                .checked_add(separator_cost)
                .and_then(|total| total.checked_add(item_chars))
            else {
                omitted.push(OmittedContextItem {
                    item,
                    reason: ContextOmissionReason::OverBudget,
                });
                continue;
            };

            if next_total > budget {
                omitted.push(OmittedContextItem {
                    item,
                    reason: ContextOmissionReason::OverBudget,
                });
                continue;
            }

            total_estimated_chars = next_total;
            selected.push(item);
        }

        Self {
            config,
            selected,
            omitted,
            total_estimated_chars,
        }
    }

    /// Render selected item text as prompt-ready context.
    #[must_use]
    pub fn render_text(&self) -> String {
        self.selected
            .iter()
            .map(|item| item.text.as_str())
            .collect::<Vec<_>>()
            .join(&self.config.separator)
    }
}

/// A named, lightweight signal lifted from a sketch, baseline check, or
/// upstream skill. Skills key their `applies` predicate on signal names.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Signal(pub String);

impl Signal {
    pub fn new(s: impl Into<String>) -> Self {
        Self(s.into())
    }
    pub fn as_str(&self) -> &str {
        &self.0
    }
}

/// A single piece of evidence accumulated during an investigation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Evidence {
    pub source_skill: String,
    pub label: String,
    pub detail: Value,
    pub recorded_at: SystemTime,
}

impl Evidence {
    pub fn new(source_skill: impl Into<String>, label: impl Into<String>) -> Self {
        Self {
            source_skill: source_skill.into(),
            label: label.into(),
            detail: Value::Null,
            recorded_at: SystemTime::now(),
        }
    }

    pub fn with_detail(mut self, detail: Value) -> Self {
        self.detail = detail;
        self
    }
}

/// Hint a skill may emit to drive subsequent skill selection. The agent
/// loop is free to honour or ignore these — they are advisory.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NextAction {
    /// Suggest a follow-up skill by id.
    RunSkill(String),
    /// Suggest invoking a named tool with prepared args.
    InvokeTool { tool: String, args: Value },
    /// Stop the investigation; sufficient evidence has been gathered.
    Conclude,
    /// Drop the investigation; the entity is benign.
    Discard,
}

/// Runtime state for one investigation. Cheap to construct; passed by
/// `&mut` reference through the skill chain.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InvestigationContext {
    /// Stable identifier for the entity under investigation. May be a block
    /// id stringified, an actor id from the grammar layer (Phase 2), or any
    /// caller-defined key.
    pub entity_id: String,

    /// Optional originating block — present when the investigation was
    /// triggered by an upstream pipeline. Stored as an opaque UUID so the
    /// kernel does not depend on any specific block-id newtype.
    pub block_id: Option<Uuid>,

    /// Free-form partition tag (caller-defined).
    pub partition: String,

    /// Signals that triggered this investigation and any signals lifted by
    /// earlier skills. Skills add to this set as evidence accumulates.
    pub signals: Vec<Signal>,

    /// Accumulated evidence in chronological order.
    pub evidence: Vec<Evidence>,

    /// Running confidence in `[0, 1]` that the entity exhibits malicious
    /// behaviour. Skills emit deltas; the agent clamps after each step.
    pub confidence: f32,

    /// Hints from the most recently executed skill.
    pub pending_actions: Vec<NextAction>,
}

impl InvestigationContext {
    pub fn new(entity_id: impl Into<String>, partition: impl Into<String>) -> Self {
        Self {
            entity_id: entity_id.into(),
            block_id: None,
            partition: partition.into(),
            signals: Vec::new(),
            evidence: Vec::new(),
            confidence: 0.0,
            pending_actions: Vec::new(),
        }
    }

    pub fn with_block<I: Into<Uuid>>(mut self, id: I) -> Self {
        self.block_id = Some(id.into());
        self
    }

    pub fn with_signal(mut self, s: impl Into<String>) -> Self {
        self.signals.push(Signal::new(s));
        self
    }

    pub fn has_signal(&self, name: &str) -> bool {
        self.signals.iter().any(|s| s.as_str() == name)
    }
}