nemo_flow_adaptive/acg/types.rs
1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Core data types for the Adaptive Cache Governor (ACG) crate.
5//!
6//! This module defines the vocabulary types used by the Adaptive Cache
7//! Governor (ACG) system:
8//! [`OptimizationIntent`] enum with 9 variants, per-variant payload structs,
9//! [`OptimizationIntentBundle`], [`AgentIdentity`], and supporting enums
10//! ([`SharingScope`], [`RetentionTier`], [`PlacementTarget`], [`ModelClass`],
11//! [`IntentType`]).
12//!
13//! All types derive [`serde::Serialize`] and [`serde::Deserialize`] so they
14//! can be round-tripped through JSON without loss.
15
16use chrono::{DateTime, Utc};
17use serde::{Deserialize, Serialize};
18use uuid::Uuid;
19
20// ===================================================================
21// Supporting enums
22// ===================================================================
23
24/// Sharing scope for cached content -- stability does not imply shareability.
25/// Default is `Session` per security requirements.
26#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum SharingScope {
29 /// Request-scoped; content is not shared beyond the current request.
30 Request,
31 /// Session-scoped; content is shared within a single user session.
32 #[default]
33 Session,
34 /// Tenant-scoped; content is shared across sessions within a tenant.
35 Tenant,
36 /// Globally shared; content is available across all tenants.
37 Global,
38}
39
40/// Retention tier for cached state.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
42#[serde(rename_all = "snake_case")]
43pub enum RetentionTier {
44 /// Discarded after immediate use.
45 Ephemeral,
46 /// Retained for a short period (seconds to minutes).
47 ShortLived,
48 /// Retained for the duration of the session.
49 SessionDuration,
50 /// Retained beyond session boundaries.
51 LongLived,
52 /// Retained indefinitely.
53 Permanent,
54}
55
56/// Target location for context placement.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
58#[serde(rename_all = "snake_case")]
59pub enum PlacementTarget {
60 /// Stable content placed in the cacheable prefix zone.
61 CacheablePrefix,
62 /// Tool output deferred to a separate block.
63 DeferredToolBlock,
64 /// Large content replaced with a reference handle.
65 ArtifactReference,
66 /// Content fetched on demand rather than inlined.
67 RetrievalOnDemand,
68 /// Summarized session memory.
69 SessionMemorySummary,
70 /// Volatile content placed in the non-cacheable suffix.
71 NonCacheableSuffix,
72}
73
74/// Model complexity/criticality class for routing.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
76#[serde(rename_all = "snake_case")]
77pub enum ModelClass {
78 /// Low-cost, high-throughput model for simple tasks.
79 Economy,
80 /// General-purpose model.
81 Standard,
82 /// High-capability model for complex reasoning.
83 Premium,
84 /// Most capable model, reserved for critical operations.
85 Critical,
86}
87
88/// Discriminant enum for intent types (used in translation report outcomes).
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
90#[serde(rename_all = "snake_case")]
91pub enum IntentType {
92 /// Cache stability analysis intent.
93 CacheStability,
94 /// Content extraction and variable detection intent.
95 ContentExtraction,
96 /// Serialization and fanout optimization intent.
97 Serialization,
98 /// Latency and priority routing intent.
99 Priority,
100 /// Model routing and selection intent.
101 ModelRouting,
102 /// Context placement optimization intent.
103 Placement,
104 /// Cache retention policy intent.
105 Retention,
106 /// Tool scope and phase management intent.
107 ToolScope,
108 /// Content compression intent.
109 Compression,
110}
111
112// ===================================================================
113// Per-variant payload structs (9 total)
114// ===================================================================
115
116/// Cache stability analysis results for a prompt region.
117///
118/// Signals how stable a prefix is across requests and recommends
119/// retention and sharing policies.
120#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
121pub struct CacheStabilityIntent {
122 /// Stability score in the range `[0.0, 1.0]`.
123 pub stability_score: f64,
124 /// Byte offset marking the end of the stable prefix.
125 pub stable_prefix_end: usize,
126 /// Recommended retention tier based on stability analysis.
127 #[serde(skip_serializing_if = "Option::is_none")]
128 #[serde(default)]
129 pub recommended_retention_tier: Option<RetentionTier>,
130 /// Sharing scope label for this cached region.
131 pub scope_label: SharingScope,
132 /// Confidence in the stability assessment `[0.0, 1.0]`.
133 pub confidence: f64,
134 /// Number of observations backing this assessment.
135 pub evidence_count: u32,
136}
137
138/// Content extraction intent for variable content detection.
139///
140/// Identifies dynamic regions within a prompt block that can be
141/// extracted and templated for cache reuse.
142#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
143pub struct ContentExtractionIntent {
144 /// Identifier of the prompt block containing the variable content.
145 pub block_id: String,
146 /// Pattern describing the variable content (e.g., regex or template syntax).
147 pub variable_pattern: String,
148 /// Strategy for extracting the variable content.
149 pub extraction_strategy: String,
150 /// Sharing scope for the extracted template.
151 pub scope_label: SharingScope,
152}
153
154/// Serialization and fanout optimization intent.
155///
156/// Indicates that a prompt region is reused across multiple parallel
157/// requests and can benefit from serialized (shared) caching.
158#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
159pub struct SerializationIntent {
160 /// Number of parallel requests sharing this content.
161 pub fanout_width: u32,
162 /// Expected token savings from caching.
163 pub expected_savings_tokens: u64,
164 /// Probability that the cached content will be reused `[0.0, 1.0]`.
165 pub reuse_probability: f64,
166 /// Additional latency introduced by the serialization strategy.
167 #[serde(skip_serializing_if = "Option::is_none")]
168 #[serde(default)]
169 pub added_latency_ms: Option<f64>,
170 /// Sharing scope for the serialized content.
171 pub scope_label: SharingScope,
172}
173
174/// Latency and priority routing intent.
175///
176/// Communicates the caller's latency sensitivity and workflow context
177/// to influence scheduling and model selection.
178#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
179pub struct PriorityIntent {
180 /// Latency sensitivity score `[0.0, 1.0]` where 1.0 is most sensitive.
181 pub latency_sensitivity: f64,
182 /// Current workflow phase label (e.g., "research", "synthesis").
183 #[serde(skip_serializing_if = "Option::is_none")]
184 #[serde(default)]
185 pub workflow_phase: Option<String>,
186 /// Caller tier label (e.g., "free", "premium", "enterprise").
187 #[serde(skip_serializing_if = "Option::is_none")]
188 #[serde(default)]
189 pub caller_tier: Option<String>,
190}
191
192/// Model routing and selection intent.
193///
194/// Guides backend selection based on task complexity, criticality,
195/// and fallback preferences.
196#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
197pub struct ModelRoutingIntent {
198 /// Target model class for this request.
199 pub model_class: ModelClass,
200 /// Estimated complexity of the task `[0.0, 1.0]`.
201 pub complexity_score: f64,
202 /// How critical correct output is `[0.0, 1.0]`.
203 pub criticality: f64,
204 /// Whether fallback to a lower model class is acceptable.
205 pub fallback_allowed: bool,
206}
207
208/// Context placement optimization intent.
209///
210/// Recommends where a prompt block should be placed within the
211/// prompt structure for optimal caching.
212#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
213pub struct PlacementIntent {
214 /// Identifier of the prompt block to place.
215 pub block_id: String,
216 /// Recommended placement target.
217 pub target: PlacementTarget,
218 /// Stability score of the block `[0.0, 1.0]`.
219 pub stability_score: f64,
220 /// Sharing scope for the placed content.
221 pub scope_label: SharingScope,
222}
223
224/// Cache retention policy intent.
225///
226/// Recommends how long cached content should be retained based on
227/// session patterns and inter-call timing.
228#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
229pub struct RetentionIntent {
230 /// Recommended retention tier.
231 pub recommended_tier: RetentionTier,
232 /// Expected session duration in seconds.
233 #[serde(skip_serializing_if = "Option::is_none")]
234 #[serde(default)]
235 pub expected_session_duration_secs: Option<f64>,
236 /// Median inter-call gap in milliseconds.
237 #[serde(skip_serializing_if = "Option::is_none")]
238 #[serde(default)]
239 pub inter_call_gap_p50_ms: Option<f64>,
240 /// Sharing scope for the retained content.
241 pub scope_label: SharingScope,
242}
243
244/// Tool scope and phase management intent.
245///
246/// Communicates which tools are active in the current workflow phase
247/// to enable tool schema optimization (e.g., deferred tool blocks).
248#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
249pub struct ToolScopeIntent {
250 /// Tools currently active in this workflow phase.
251 pub active_tools: Vec<String>,
252 /// Optional label for the current workflow phase.
253 #[serde(skip_serializing_if = "Option::is_none")]
254 #[serde(default)]
255 pub phase_label: Option<String>,
256 /// Tools deferred to later phases.
257 pub deferred_tools: Vec<String>,
258}
259
260/// Content compression intent.
261///
262/// Recommends compression of a prompt block, balancing token savings
263/// against information loss.
264#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
265pub struct CompressionIntent {
266 /// Identifier of the prompt block to compress.
267 pub block_id: String,
268 /// Achievable compression ratio `[0.0, 1.0]` where lower is more compressed.
269 pub compression_ratio: f64,
270 /// Whether the compression is reversible (lossless).
271 pub reversible: bool,
272 /// Contribution score of this block to output quality `[0.0, 1.0]`.
273 pub contribution_score: f64,
274}
275
276// ===================================================================
277// Main intent enum
278// ===================================================================
279
280/// A single optimization intent emitted by a behavioral model.
281///
282/// Each variant wraps a dedicated payload struct with fields specific
283/// to that intent type. The enum uses internally-tagged JSON
284/// representation with the `intent_type` field as the discriminant.
285#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
286#[serde(tag = "intent_type", rename_all = "snake_case")]
287pub enum OptimizationIntent {
288 /// Cache stability analysis results.
289 CacheStability(CacheStabilityIntent),
290 /// Content extraction and variable detection.
291 ContentExtraction(ContentExtractionIntent),
292 /// Serialization and fanout optimization.
293 Serialization(SerializationIntent),
294 /// Latency and priority routing.
295 Priority(PriorityIntent),
296 /// Model routing and selection.
297 ModelRouting(ModelRoutingIntent),
298 /// Context placement optimization.
299 Placement(PlacementIntent),
300 /// Cache retention policy.
301 Retention(RetentionIntent),
302 /// Tool scope and phase management.
303 ToolScope(ToolScopeIntent),
304 /// Content compression.
305 Compression(CompressionIntent),
306}
307
308impl OptimizationIntent {
309 /// Returns the intent type discriminant for this intent variant.
310 pub fn discriminant(&self) -> IntentType {
311 match self {
312 Self::CacheStability(_) => IntentType::CacheStability,
313 Self::ContentExtraction(_) => IntentType::ContentExtraction,
314 Self::Serialization(_) => IntentType::Serialization,
315 Self::Priority(_) => IntentType::Priority,
316 Self::ModelRouting(_) => IntentType::ModelRouting,
317 Self::Placement(_) => IntentType::Placement,
318 Self::Retention(_) => IntentType::Retention,
319 Self::ToolScope(_) => IntentType::ToolScope,
320 Self::Compression(_) => IntentType::Compression,
321 }
322 }
323}
324
325// ===================================================================
326// OptimizationIntentBundle
327// ===================================================================
328
329/// A bundle of optimization intents for a single request.
330///
331/// Bundles are the primary data contract between behavioral models
332/// (which emit intents) and the translation layer (which converts
333/// intents into provider-specific actions).
334#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
335pub struct OptimizationIntentBundle {
336 /// Unique identifier for this request.
337 pub request_id: Uuid,
338 /// Identity of the agent that generated this bundle.
339 pub agent_identity: AgentIdentity,
340 /// Version of the policy that produced these intents.
341 pub policy_version: String,
342 /// Ordered list of optimization intents.
343 pub intents: Vec<OptimizationIntent>,
344 /// When the bundle was created.
345 pub created_at: DateTime<Utc>,
346}
347
348// ===================================================================
349// AgentIdentity
350// ===================================================================
351
352/// Identity model for an agent type.
353///
354/// Used as a key for per-agent policy lookup, behavioral model selection,
355/// and telemetry grouping. Two agents with identical identity fields are
356/// considered the same agent type.
357///
358/// # Examples
359///
360/// ```
361/// use nemo_flow_adaptive::acg::AgentIdentity;
362/// use std::collections::HashMap;
363///
364/// let id = AgentIdentity {
365/// agent_id: "research".to_string(),
366/// template_version: "1.0.0".to_string(),
367/// toolset_hash: "abc123".to_string(),
368/// model_family: "claude".to_string(),
369/// tenant_scope: "acme-corp".to_string(),
370/// };
371///
372/// let mut policies = HashMap::new();
373/// policies.insert(id.clone(), "aggressive-caching");
374/// assert_eq!(policies.get(&id), Some(&"aggressive-caching"));
375/// ```
376#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
377pub struct AgentIdentity {
378 /// Unique identifier for the agent (e.g., "research-agent").
379 pub agent_id: String,
380 /// Version of the prompt template in use.
381 pub template_version: String,
382 /// Hash of the active toolset configuration.
383 pub toolset_hash: String,
384 /// Model family name (e.g., "claude", "gpt").
385 pub model_family: String,
386 /// Tenant scope for isolation and access control.
387 pub tenant_scope: String,
388}
389
390impl std::fmt::Display for AgentIdentity {
391 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
392 write!(f, "{}@{}", self.agent_id, self.template_version)
393 }
394}
395
396// ===================================================================
397// Translation Report contract
398// ===================================================================
399
400/// Outcome status for a single intent translation.
401///
402/// Plugins return one of these for each intent in the bundle, describing
403/// what happened when the plugin tried to express that intent.
404#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
405#[serde(rename_all = "snake_case")]
406pub enum TranslationStatus {
407 /// Intent was fully expressed in the native API call.
408 Applied,
409 /// Intent was partially expressed (e.g., reduced breakpoints due to model limits).
410 Degraded,
411 /// Intent was silently passed through with no action (e.g., not relevant to this backend).
412 Ignored,
413 /// Intent was actively rejected (e.g., unsafe for this request, feature disabled).
414 Rejected,
415}
416
417/// Machine-readable reason for the translation outcome.
418///
419/// Each variant describes WHY an intent received its status. This allows
420/// operators to distinguish between plugin limitations, backend limitations,
421/// policy decisions, and safety constraints.
422#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
423#[serde(tag = "code", rename_all = "snake_case")]
424pub enum ReasonCode {
425 /// Intent was fully supported and applied.
426 FullySupported,
427 /// Backend does not support this intent type at all.
428 UnsupportedByBackend,
429 /// Backend supports the intent but the specific model lacks the feature.
430 UnsupportedByModel,
431 /// Intent was degraded due to backend-specific limits (e.g., max breakpoints).
432 BackendLimitReached,
433 /// Not enough historical evidence to apply the intent confidently.
434 InsufficientEvidence,
435 /// The feature is available but administratively disabled.
436 FeatureDisabled,
437 /// Applying this intent would be unsafe for the current request.
438 UnsafeForRequest,
439 /// The plugin implementation is incomplete for this intent type.
440 PluginIncomplete,
441 /// Intent was not relevant to the current request context.
442 NotRelevant,
443 /// Escape hatch for reason codes not yet in the enum.
444 Custom {
445 /// Human-readable reason string (for debugging, not machine consumption).
446 reason: String,
447 },
448}
449
450/// Records the outcome of translating a single optimization intent.
451#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
452pub struct IntentOutcome {
453 /// ID of the intent this outcome refers to.
454 pub intent_id: Uuid,
455 /// Type discriminant of the intent.
456 pub intent_type: IntentType,
457 /// What happened to this intent.
458 pub status: TranslationStatus,
459 /// Machine-readable reason for the outcome.
460 pub reason: ReasonCode,
461 /// Optional human-readable detail (for debugging, not machine consumption).
462 #[serde(skip_serializing_if = "Option::is_none")]
463 #[serde(default)]
464 pub detail: Option<String>,
465}
466
467/// A plugin's complete report on how it handled an intent bundle.
468///
469/// Every intent in the input bundle MUST have a corresponding outcome in the report.
470/// This is the critical observability contract per the design doc.
471#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
472pub struct TranslationReport {
473 /// ID of the request this report pertains to.
474 pub request_id: Uuid,
475 /// Identity of the plugin that produced this report.
476 pub plugin_id: String,
477 /// Per-intent outcomes.
478 pub outcomes: Vec<IntentOutcome>,
479 /// When this report was generated.
480 pub created_at: DateTime<Utc>,
481}
482
483impl TranslationReport {
484 /// Returns `true` if every intent was fully applied.
485 pub fn all_applied(&self) -> bool {
486 self.outcomes
487 .iter()
488 .all(|o| o.status == TranslationStatus::Applied)
489 }
490
491 /// Filter outcomes by status.
492 pub fn outcomes_by_status(&self, status: TranslationStatus) -> Vec<&IntentOutcome> {
493 self.outcomes
494 .iter()
495 .filter(|o| o.status == status)
496 .collect()
497 }
498
499 /// Count of outcomes with the given status.
500 pub fn count_by_status(&self, status: TranslationStatus) -> usize {
501 self.outcomes.iter().filter(|o| o.status == status).count()
502 }
503
504 /// Create a report where all intents are ignored (passthrough behavior).
505 ///
506 /// Generates one [`IntentOutcome`] per intent in the bundle, each with
507 /// [`TranslationStatus::Ignored`] and the given reason code. This is the
508 /// standard helper for passthrough and default plugin implementations.
509 pub fn all_ignored(
510 bundle: &OptimizationIntentBundle,
511 plugin_id: &str,
512 reason: ReasonCode,
513 detail: Option<String>,
514 ) -> Self {
515 let outcomes = bundle
516 .intents
517 .iter()
518 .map(|intent| IntentOutcome {
519 intent_id: Uuid::new_v4(),
520 intent_type: intent.discriminant(),
521 status: TranslationStatus::Ignored,
522 reason: reason.clone(),
523 detail: detail.clone(),
524 })
525 .collect();
526 Self {
527 request_id: bundle.request_id,
528 plugin_id: plugin_id.to_string(),
529 outcomes,
530 created_at: Utc::now(),
531 }
532 }
533}
534
535#[cfg(test)]
536#[path = "../../tests/unit/acg/types_tests.rs"]
537mod tests;