Skip to main content

nemo_flow_adaptive/acg/
types.rs

1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Core data types for the Adaptive Cache Governor (ACG) crate.
5//!
6//! This module defines the vocabulary types used by the Adaptive Cache
7//! Governor (ACG) system:
8//! [`OptimizationIntent`] enum with 9 variants, per-variant payload structs,
9//! [`OptimizationIntentBundle`], [`AgentIdentity`], and supporting enums
10//! ([`SharingScope`], [`RetentionTier`], [`PlacementTarget`], [`ModelClass`],
11//! [`IntentType`]).
12//!
13//! All types derive [`serde::Serialize`] and [`serde::Deserialize`] so they
14//! can be round-tripped through JSON without loss.
15
16use chrono::{DateTime, Utc};
17use serde::{Deserialize, Serialize};
18use uuid::Uuid;
19
20// ===================================================================
21// Supporting enums
22// ===================================================================
23
24/// Sharing scope for cached content -- stability does not imply shareability.
25/// Default is `Session` per security requirements.
26#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum SharingScope {
29    /// Request-scoped; content is not shared beyond the current request.
30    Request,
31    /// Session-scoped; content is shared within a single user session.
32    #[default]
33    Session,
34    /// Tenant-scoped; content is shared across sessions within a tenant.
35    Tenant,
36    /// Globally shared; content is available across all tenants.
37    Global,
38}
39
40/// Retention tier for cached state.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
42#[serde(rename_all = "snake_case")]
43pub enum RetentionTier {
44    /// Discarded after immediate use.
45    Ephemeral,
46    /// Retained for a short period (seconds to minutes).
47    ShortLived,
48    /// Retained for the duration of the session.
49    SessionDuration,
50    /// Retained beyond session boundaries.
51    LongLived,
52    /// Retained indefinitely.
53    Permanent,
54}
55
56/// Target location for context placement.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
58#[serde(rename_all = "snake_case")]
59pub enum PlacementTarget {
60    /// Stable content placed in the cacheable prefix zone.
61    CacheablePrefix,
62    /// Tool output deferred to a separate block.
63    DeferredToolBlock,
64    /// Large content replaced with a reference handle.
65    ArtifactReference,
66    /// Content fetched on demand rather than inlined.
67    RetrievalOnDemand,
68    /// Summarized session memory.
69    SessionMemorySummary,
70    /// Volatile content placed in the non-cacheable suffix.
71    NonCacheableSuffix,
72}
73
74/// Model complexity/criticality class for routing.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
76#[serde(rename_all = "snake_case")]
77pub enum ModelClass {
78    /// Low-cost, high-throughput model for simple tasks.
79    Economy,
80    /// General-purpose model.
81    Standard,
82    /// High-capability model for complex reasoning.
83    Premium,
84    /// Most capable model, reserved for critical operations.
85    Critical,
86}
87
88/// Discriminant enum for intent types (used in translation report outcomes).
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
90#[serde(rename_all = "snake_case")]
91pub enum IntentType {
92    /// Cache stability analysis intent.
93    CacheStability,
94    /// Content extraction and variable detection intent.
95    ContentExtraction,
96    /// Serialization and fanout optimization intent.
97    Serialization,
98    /// Latency and priority routing intent.
99    Priority,
100    /// Model routing and selection intent.
101    ModelRouting,
102    /// Context placement optimization intent.
103    Placement,
104    /// Cache retention policy intent.
105    Retention,
106    /// Tool scope and phase management intent.
107    ToolScope,
108    /// Content compression intent.
109    Compression,
110}
111
112// ===================================================================
113// Per-variant payload structs (9 total)
114// ===================================================================
115
116/// Cache stability analysis results for a prompt region.
117///
118/// Signals how stable a prefix is across requests and recommends
119/// retention and sharing policies.
120#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
121pub struct CacheStabilityIntent {
122    /// Stability score in the range `[0.0, 1.0]`.
123    pub stability_score: f64,
124    /// Byte offset marking the end of the stable prefix.
125    pub stable_prefix_end: usize,
126    /// Recommended retention tier based on stability analysis.
127    #[serde(skip_serializing_if = "Option::is_none")]
128    #[serde(default)]
129    pub recommended_retention_tier: Option<RetentionTier>,
130    /// Sharing scope label for this cached region.
131    pub scope_label: SharingScope,
132    /// Confidence in the stability assessment `[0.0, 1.0]`.
133    pub confidence: f64,
134    /// Number of observations backing this assessment.
135    pub evidence_count: u32,
136}
137
138/// Content extraction intent for variable content detection.
139///
140/// Identifies dynamic regions within a prompt block that can be
141/// extracted and templated for cache reuse.
142#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
143pub struct ContentExtractionIntent {
144    /// Identifier of the prompt block containing the variable content.
145    pub block_id: String,
146    /// Pattern describing the variable content (e.g., regex or template syntax).
147    pub variable_pattern: String,
148    /// Strategy for extracting the variable content.
149    pub extraction_strategy: String,
150    /// Sharing scope for the extracted template.
151    pub scope_label: SharingScope,
152}
153
154/// Serialization and fanout optimization intent.
155///
156/// Indicates that a prompt region is reused across multiple parallel
157/// requests and can benefit from serialized (shared) caching.
158#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
159pub struct SerializationIntent {
160    /// Number of parallel requests sharing this content.
161    pub fanout_width: u32,
162    /// Expected token savings from caching.
163    pub expected_savings_tokens: u64,
164    /// Probability that the cached content will be reused `[0.0, 1.0]`.
165    pub reuse_probability: f64,
166    /// Additional latency introduced by the serialization strategy.
167    #[serde(skip_serializing_if = "Option::is_none")]
168    #[serde(default)]
169    pub added_latency_ms: Option<f64>,
170    /// Sharing scope for the serialized content.
171    pub scope_label: SharingScope,
172}
173
174/// Latency and priority routing intent.
175///
176/// Communicates the caller's latency sensitivity and workflow context
177/// to influence scheduling and model selection.
178#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
179pub struct PriorityIntent {
180    /// Latency sensitivity score `[0.0, 1.0]` where 1.0 is most sensitive.
181    pub latency_sensitivity: f64,
182    /// Current workflow phase label (e.g., "research", "synthesis").
183    #[serde(skip_serializing_if = "Option::is_none")]
184    #[serde(default)]
185    pub workflow_phase: Option<String>,
186    /// Caller tier label (e.g., "free", "premium", "enterprise").
187    #[serde(skip_serializing_if = "Option::is_none")]
188    #[serde(default)]
189    pub caller_tier: Option<String>,
190}
191
192/// Model routing and selection intent.
193///
194/// Guides backend selection based on task complexity, criticality,
195/// and fallback preferences.
196#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
197pub struct ModelRoutingIntent {
198    /// Target model class for this request.
199    pub model_class: ModelClass,
200    /// Estimated complexity of the task `[0.0, 1.0]`.
201    pub complexity_score: f64,
202    /// How critical correct output is `[0.0, 1.0]`.
203    pub criticality: f64,
204    /// Whether fallback to a lower model class is acceptable.
205    pub fallback_allowed: bool,
206}
207
208/// Context placement optimization intent.
209///
210/// Recommends where a prompt block should be placed within the
211/// prompt structure for optimal caching.
212#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
213pub struct PlacementIntent {
214    /// Identifier of the prompt block to place.
215    pub block_id: String,
216    /// Recommended placement target.
217    pub target: PlacementTarget,
218    /// Stability score of the block `[0.0, 1.0]`.
219    pub stability_score: f64,
220    /// Sharing scope for the placed content.
221    pub scope_label: SharingScope,
222}
223
224/// Cache retention policy intent.
225///
226/// Recommends how long cached content should be retained based on
227/// session patterns and inter-call timing.
228#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
229pub struct RetentionIntent {
230    /// Recommended retention tier.
231    pub recommended_tier: RetentionTier,
232    /// Expected session duration in seconds.
233    #[serde(skip_serializing_if = "Option::is_none")]
234    #[serde(default)]
235    pub expected_session_duration_secs: Option<f64>,
236    /// Median inter-call gap in milliseconds.
237    #[serde(skip_serializing_if = "Option::is_none")]
238    #[serde(default)]
239    pub inter_call_gap_p50_ms: Option<f64>,
240    /// Sharing scope for the retained content.
241    pub scope_label: SharingScope,
242}
243
244/// Tool scope and phase management intent.
245///
246/// Communicates which tools are active in the current workflow phase
247/// to enable tool schema optimization (e.g., deferred tool blocks).
248#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
249pub struct ToolScopeIntent {
250    /// Tools currently active in this workflow phase.
251    pub active_tools: Vec<String>,
252    /// Optional label for the current workflow phase.
253    #[serde(skip_serializing_if = "Option::is_none")]
254    #[serde(default)]
255    pub phase_label: Option<String>,
256    /// Tools deferred to later phases.
257    pub deferred_tools: Vec<String>,
258}
259
260/// Content compression intent.
261///
262/// Recommends compression of a prompt block, balancing token savings
263/// against information loss.
264#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
265pub struct CompressionIntent {
266    /// Identifier of the prompt block to compress.
267    pub block_id: String,
268    /// Achievable compression ratio `[0.0, 1.0]` where lower is more compressed.
269    pub compression_ratio: f64,
270    /// Whether the compression is reversible (lossless).
271    pub reversible: bool,
272    /// Contribution score of this block to output quality `[0.0, 1.0]`.
273    pub contribution_score: f64,
274}
275
276// ===================================================================
277// Main intent enum
278// ===================================================================
279
280/// A single optimization intent emitted by a behavioral model.
281///
282/// Each variant wraps a dedicated payload struct with fields specific
283/// to that intent type. The enum uses internally-tagged JSON
284/// representation with the `intent_type` field as the discriminant.
285#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
286#[serde(tag = "intent_type", rename_all = "snake_case")]
287pub enum OptimizationIntent {
288    /// Cache stability analysis results.
289    CacheStability(CacheStabilityIntent),
290    /// Content extraction and variable detection.
291    ContentExtraction(ContentExtractionIntent),
292    /// Serialization and fanout optimization.
293    Serialization(SerializationIntent),
294    /// Latency and priority routing.
295    Priority(PriorityIntent),
296    /// Model routing and selection.
297    ModelRouting(ModelRoutingIntent),
298    /// Context placement optimization.
299    Placement(PlacementIntent),
300    /// Cache retention policy.
301    Retention(RetentionIntent),
302    /// Tool scope and phase management.
303    ToolScope(ToolScopeIntent),
304    /// Content compression.
305    Compression(CompressionIntent),
306}
307
308impl OptimizationIntent {
309    /// Returns the intent type discriminant for this intent variant.
310    pub fn discriminant(&self) -> IntentType {
311        match self {
312            Self::CacheStability(_) => IntentType::CacheStability,
313            Self::ContentExtraction(_) => IntentType::ContentExtraction,
314            Self::Serialization(_) => IntentType::Serialization,
315            Self::Priority(_) => IntentType::Priority,
316            Self::ModelRouting(_) => IntentType::ModelRouting,
317            Self::Placement(_) => IntentType::Placement,
318            Self::Retention(_) => IntentType::Retention,
319            Self::ToolScope(_) => IntentType::ToolScope,
320            Self::Compression(_) => IntentType::Compression,
321        }
322    }
323}
324
325// ===================================================================
326// OptimizationIntentBundle
327// ===================================================================
328
329/// A bundle of optimization intents for a single request.
330///
331/// Bundles are the primary data contract between behavioral models
332/// (which emit intents) and the translation layer (which converts
333/// intents into provider-specific actions).
334#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
335pub struct OptimizationIntentBundle {
336    /// Unique identifier for this request.
337    pub request_id: Uuid,
338    /// Identity of the agent that generated this bundle.
339    pub agent_identity: AgentIdentity,
340    /// Version of the policy that produced these intents.
341    pub policy_version: String,
342    /// Ordered list of optimization intents.
343    pub intents: Vec<OptimizationIntent>,
344    /// When the bundle was created.
345    pub created_at: DateTime<Utc>,
346}
347
348// ===================================================================
349// AgentIdentity
350// ===================================================================
351
352/// Identity model for an agent type.
353///
354/// Used as a key for per-agent policy lookup, behavioral model selection,
355/// and telemetry grouping. Two agents with identical identity fields are
356/// considered the same agent type.
357///
358/// # Examples
359///
360/// ```
361/// use nemo_flow_adaptive::acg::AgentIdentity;
362/// use std::collections::HashMap;
363///
364/// let id = AgentIdentity {
365///     agent_id: "research".to_string(),
366///     template_version: "1.0.0".to_string(),
367///     toolset_hash: "abc123".to_string(),
368///     model_family: "claude".to_string(),
369///     tenant_scope: "acme-corp".to_string(),
370/// };
371///
372/// let mut policies = HashMap::new();
373/// policies.insert(id.clone(), "aggressive-caching");
374/// assert_eq!(policies.get(&id), Some(&"aggressive-caching"));
375/// ```
376#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
377pub struct AgentIdentity {
378    /// Unique identifier for the agent (e.g., "research-agent").
379    pub agent_id: String,
380    /// Version of the prompt template in use.
381    pub template_version: String,
382    /// Hash of the active toolset configuration.
383    pub toolset_hash: String,
384    /// Model family name (e.g., "claude", "gpt").
385    pub model_family: String,
386    /// Tenant scope for isolation and access control.
387    pub tenant_scope: String,
388}
389
390impl std::fmt::Display for AgentIdentity {
391    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
392        write!(f, "{}@{}", self.agent_id, self.template_version)
393    }
394}
395
396// ===================================================================
397// Translation Report contract
398// ===================================================================
399
400/// Outcome status for a single intent translation.
401///
402/// Plugins return one of these for each intent in the bundle, describing
403/// what happened when the plugin tried to express that intent.
404#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
405#[serde(rename_all = "snake_case")]
406pub enum TranslationStatus {
407    /// Intent was fully expressed in the native API call.
408    Applied,
409    /// Intent was partially expressed (e.g., reduced breakpoints due to model limits).
410    Degraded,
411    /// Intent was silently passed through with no action (e.g., not relevant to this backend).
412    Ignored,
413    /// Intent was actively rejected (e.g., unsafe for this request, feature disabled).
414    Rejected,
415}
416
417/// Machine-readable reason for the translation outcome.
418///
419/// Each variant describes WHY an intent received its status. This allows
420/// operators to distinguish between plugin limitations, backend limitations,
421/// policy decisions, and safety constraints.
422#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
423#[serde(tag = "code", rename_all = "snake_case")]
424pub enum ReasonCode {
425    /// Intent was fully supported and applied.
426    FullySupported,
427    /// Backend does not support this intent type at all.
428    UnsupportedByBackend,
429    /// Backend supports the intent but the specific model lacks the feature.
430    UnsupportedByModel,
431    /// Intent was degraded due to backend-specific limits (e.g., max breakpoints).
432    BackendLimitReached,
433    /// Not enough historical evidence to apply the intent confidently.
434    InsufficientEvidence,
435    /// The feature is available but administratively disabled.
436    FeatureDisabled,
437    /// Applying this intent would be unsafe for the current request.
438    UnsafeForRequest,
439    /// The plugin implementation is incomplete for this intent type.
440    PluginIncomplete,
441    /// Intent was not relevant to the current request context.
442    NotRelevant,
443    /// Escape hatch for reason codes not yet in the enum.
444    Custom {
445        /// Human-readable reason string (for debugging, not machine consumption).
446        reason: String,
447    },
448}
449
450/// Records the outcome of translating a single optimization intent.
451#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
452pub struct IntentOutcome {
453    /// ID of the intent this outcome refers to.
454    pub intent_id: Uuid,
455    /// Type discriminant of the intent.
456    pub intent_type: IntentType,
457    /// What happened to this intent.
458    pub status: TranslationStatus,
459    /// Machine-readable reason for the outcome.
460    pub reason: ReasonCode,
461    /// Optional human-readable detail (for debugging, not machine consumption).
462    #[serde(skip_serializing_if = "Option::is_none")]
463    #[serde(default)]
464    pub detail: Option<String>,
465}
466
467/// A plugin's complete report on how it handled an intent bundle.
468///
469/// Every intent in the input bundle MUST have a corresponding outcome in the report.
470/// This is the critical observability contract per the design doc.
471#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
472pub struct TranslationReport {
473    /// ID of the request this report pertains to.
474    pub request_id: Uuid,
475    /// Identity of the plugin that produced this report.
476    pub plugin_id: String,
477    /// Per-intent outcomes.
478    pub outcomes: Vec<IntentOutcome>,
479    /// When this report was generated.
480    pub created_at: DateTime<Utc>,
481}
482
483impl TranslationReport {
484    /// Returns `true` if every intent was fully applied.
485    pub fn all_applied(&self) -> bool {
486        self.outcomes
487            .iter()
488            .all(|o| o.status == TranslationStatus::Applied)
489    }
490
491    /// Filter outcomes by status.
492    pub fn outcomes_by_status(&self, status: TranslationStatus) -> Vec<&IntentOutcome> {
493        self.outcomes
494            .iter()
495            .filter(|o| o.status == status)
496            .collect()
497    }
498
499    /// Count of outcomes with the given status.
500    pub fn count_by_status(&self, status: TranslationStatus) -> usize {
501        self.outcomes.iter().filter(|o| o.status == status).count()
502    }
503
504    /// Create a report where all intents are ignored (passthrough behavior).
505    ///
506    /// Generates one [`IntentOutcome`] per intent in the bundle, each with
507    /// [`TranslationStatus::Ignored`] and the given reason code. This is the
508    /// standard helper for passthrough and default plugin implementations.
509    pub fn all_ignored(
510        bundle: &OptimizationIntentBundle,
511        plugin_id: &str,
512        reason: ReasonCode,
513        detail: Option<String>,
514    ) -> Self {
515        let outcomes = bundle
516            .intents
517            .iter()
518            .map(|intent| IntentOutcome {
519                intent_id: Uuid::new_v4(),
520                intent_type: intent.discriminant(),
521                status: TranslationStatus::Ignored,
522                reason: reason.clone(),
523                detail: detail.clone(),
524            })
525            .collect();
526        Self {
527            request_id: bundle.request_id,
528            plugin_id: plugin_id.to_string(),
529            outcomes,
530            created_at: Utc::now(),
531        }
532    }
533}
534
535#[cfg(test)]
536#[path = "../../tests/unit/acg/types_tests.rs"]
537mod tests;