nemo-flow-adaptive 0.2.0

Adaptive runtime primitives and Redis-backed learning components for NeMo Flow.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Core data types for the Adaptive Cache Governor (ACG) crate.
//!
//! This module defines the vocabulary types used by the Adaptive Cache
//! Governor (ACG) system:
//! [`OptimizationIntent`] enum with 9 variants, per-variant payload structs,
//! [`OptimizationIntentBundle`], [`AgentIdentity`], and supporting enums
//! ([`SharingScope`], [`RetentionTier`], [`PlacementTarget`], [`ModelClass`],
//! [`IntentType`]).
//!
//! All types derive [`serde::Serialize`] and [`serde::Deserialize`] so they
//! can be round-tripped through JSON without loss.

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

// ===================================================================
// Supporting enums
// ===================================================================

/// Sharing scope for cached content -- stability does not imply shareability.
/// Default is `Session` per security requirements.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SharingScope {
    /// Request-scoped; content is not shared beyond the current request.
    Request,
    /// Session-scoped; content is shared within a single user session.
    #[default]
    Session,
    /// Tenant-scoped; content is shared across sessions within a tenant.
    Tenant,
    /// Globally shared; content is available across all tenants.
    Global,
}

/// Retention tier for cached state.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RetentionTier {
    /// Discarded after immediate use.
    Ephemeral,
    /// Retained for a short period (seconds to minutes).
    ShortLived,
    /// Retained for the duration of the session.
    SessionDuration,
    /// Retained beyond session boundaries.
    LongLived,
    /// Retained indefinitely.
    Permanent,
}

/// Target location for context placement.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum PlacementTarget {
    /// Stable content placed in the cacheable prefix zone.
    CacheablePrefix,
    /// Tool output deferred to a separate block.
    DeferredToolBlock,
    /// Large content replaced with a reference handle.
    ArtifactReference,
    /// Content fetched on demand rather than inlined.
    RetrievalOnDemand,
    /// Summarized session memory.
    SessionMemorySummary,
    /// Volatile content placed in the non-cacheable suffix.
    NonCacheableSuffix,
}

/// Model complexity/criticality class for routing.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ModelClass {
    /// Low-cost, high-throughput model for simple tasks.
    Economy,
    /// General-purpose model.
    Standard,
    /// High-capability model for complex reasoning.
    Premium,
    /// Most capable model, reserved for critical operations.
    Critical,
}

/// Discriminant enum for intent types (used in translation report outcomes).
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IntentType {
    /// Cache stability analysis intent.
    CacheStability,
    /// Content extraction and variable detection intent.
    ContentExtraction,
    /// Serialization and fanout optimization intent.
    Serialization,
    /// Latency and priority routing intent.
    Priority,
    /// Model routing and selection intent.
    ModelRouting,
    /// Context placement optimization intent.
    Placement,
    /// Cache retention policy intent.
    Retention,
    /// Tool scope and phase management intent.
    ToolScope,
    /// Content compression intent.
    Compression,
}

// ===================================================================
// Per-variant payload structs (9 total)
// ===================================================================

/// Cache stability analysis results for a prompt region.
///
/// Signals how stable a prefix is across requests and recommends
/// retention and sharing policies.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CacheStabilityIntent {
    /// Stability score in the range `[0.0, 1.0]`.
    pub stability_score: f64,
    /// Byte offset marking the end of the stable prefix.
    pub stable_prefix_end: usize,
    /// Recommended retention tier based on stability analysis.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub recommended_retention_tier: Option<RetentionTier>,
    /// Sharing scope label for this cached region.
    pub scope_label: SharingScope,
    /// Confidence in the stability assessment `[0.0, 1.0]`.
    pub confidence: f64,
    /// Number of observations backing this assessment.
    pub evidence_count: u32,
}

/// Content extraction intent for variable content detection.
///
/// Identifies dynamic regions within a prompt block that can be
/// extracted and templated for cache reuse.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ContentExtractionIntent {
    /// Identifier of the prompt block containing the variable content.
    pub block_id: String,
    /// Pattern describing the variable content (e.g., regex or template syntax).
    pub variable_pattern: String,
    /// Strategy for extracting the variable content.
    pub extraction_strategy: String,
    /// Sharing scope for the extracted template.
    pub scope_label: SharingScope,
}

/// Serialization and fanout optimization intent.
///
/// Indicates that a prompt region is reused across multiple parallel
/// requests and can benefit from serialized (shared) caching.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SerializationIntent {
    /// Number of parallel requests sharing this content.
    pub fanout_width: u32,
    /// Expected token savings from caching.
    pub expected_savings_tokens: u64,
    /// Probability that the cached content will be reused `[0.0, 1.0]`.
    pub reuse_probability: f64,
    /// Additional latency introduced by the serialization strategy.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub added_latency_ms: Option<f64>,
    /// Sharing scope for the serialized content.
    pub scope_label: SharingScope,
}

/// Latency and priority routing intent.
///
/// Communicates the caller's latency sensitivity and workflow context
/// to influence scheduling and model selection.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PriorityIntent {
    /// Latency sensitivity score `[0.0, 1.0]` where 1.0 is most sensitive.
    pub latency_sensitivity: f64,
    /// Current workflow phase label (e.g., "research", "synthesis").
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub workflow_phase: Option<String>,
    /// Caller tier label (e.g., "free", "premium", "enterprise").
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub caller_tier: Option<String>,
}

/// Model routing and selection intent.
///
/// Guides backend selection based on task complexity, criticality,
/// and fallback preferences.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ModelRoutingIntent {
    /// Target model class for this request.
    pub model_class: ModelClass,
    /// Estimated complexity of the task `[0.0, 1.0]`.
    pub complexity_score: f64,
    /// How critical correct output is `[0.0, 1.0]`.
    pub criticality: f64,
    /// Whether fallback to a lower model class is acceptable.
    pub fallback_allowed: bool,
}

/// Context placement optimization intent.
///
/// Recommends where a prompt block should be placed within the
/// prompt structure for optimal caching.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PlacementIntent {
    /// Identifier of the prompt block to place.
    pub block_id: String,
    /// Recommended placement target.
    pub target: PlacementTarget,
    /// Stability score of the block `[0.0, 1.0]`.
    pub stability_score: f64,
    /// Sharing scope for the placed content.
    pub scope_label: SharingScope,
}

/// Cache retention policy intent.
///
/// Recommends how long cached content should be retained based on
/// session patterns and inter-call timing.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RetentionIntent {
    /// Recommended retention tier.
    pub recommended_tier: RetentionTier,
    /// Expected session duration in seconds.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub expected_session_duration_secs: Option<f64>,
    /// Median inter-call gap in milliseconds.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub inter_call_gap_p50_ms: Option<f64>,
    /// Sharing scope for the retained content.
    pub scope_label: SharingScope,
}

/// Tool scope and phase management intent.
///
/// Communicates which tools are active in the current workflow phase
/// to enable tool schema optimization (e.g., deferred tool blocks).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ToolScopeIntent {
    /// Tools currently active in this workflow phase.
    pub active_tools: Vec<String>,
    /// Optional label for the current workflow phase.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub phase_label: Option<String>,
    /// Tools deferred to later phases.
    pub deferred_tools: Vec<String>,
}

/// Content compression intent.
///
/// Recommends compression of a prompt block, balancing token savings
/// against information loss.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CompressionIntent {
    /// Identifier of the prompt block to compress.
    pub block_id: String,
    /// Achievable compression ratio `[0.0, 1.0]` where lower is more compressed.
    pub compression_ratio: f64,
    /// Whether the compression is reversible (lossless).
    pub reversible: bool,
    /// Contribution score of this block to output quality `[0.0, 1.0]`.
    pub contribution_score: f64,
}

// ===================================================================
// Main intent enum
// ===================================================================

/// A single optimization intent emitted by a behavioral model.
///
/// Each variant wraps a dedicated payload struct with fields specific
/// to that intent type. The enum uses internally-tagged JSON
/// representation with the `intent_type` field as the discriminant.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "intent_type", rename_all = "snake_case")]
pub enum OptimizationIntent {
    /// Cache stability analysis results.
    CacheStability(CacheStabilityIntent),
    /// Content extraction and variable detection.
    ContentExtraction(ContentExtractionIntent),
    /// Serialization and fanout optimization.
    Serialization(SerializationIntent),
    /// Latency and priority routing.
    Priority(PriorityIntent),
    /// Model routing and selection.
    ModelRouting(ModelRoutingIntent),
    /// Context placement optimization.
    Placement(PlacementIntent),
    /// Cache retention policy.
    Retention(RetentionIntent),
    /// Tool scope and phase management.
    ToolScope(ToolScopeIntent),
    /// Content compression.
    Compression(CompressionIntent),
}

impl OptimizationIntent {
    /// Returns the intent type discriminant for this intent variant.
    pub fn discriminant(&self) -> IntentType {
        match self {
            Self::CacheStability(_) => IntentType::CacheStability,
            Self::ContentExtraction(_) => IntentType::ContentExtraction,
            Self::Serialization(_) => IntentType::Serialization,
            Self::Priority(_) => IntentType::Priority,
            Self::ModelRouting(_) => IntentType::ModelRouting,
            Self::Placement(_) => IntentType::Placement,
            Self::Retention(_) => IntentType::Retention,
            Self::ToolScope(_) => IntentType::ToolScope,
            Self::Compression(_) => IntentType::Compression,
        }
    }
}

// ===================================================================
// OptimizationIntentBundle
// ===================================================================

/// A bundle of optimization intents for a single request.
///
/// Bundles are the primary data contract between behavioral models
/// (which emit intents) and the translation layer (which converts
/// intents into provider-specific actions).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OptimizationIntentBundle {
    /// Unique identifier for this request.
    pub request_id: Uuid,
    /// Identity of the agent that generated this bundle.
    pub agent_identity: AgentIdentity,
    /// Version of the policy that produced these intents.
    pub policy_version: String,
    /// Ordered list of optimization intents.
    pub intents: Vec<OptimizationIntent>,
    /// When the bundle was created.
    pub created_at: DateTime<Utc>,
}

// ===================================================================
// AgentIdentity
// ===================================================================

/// Identity model for an agent type.
///
/// Used as a key for per-agent policy lookup, behavioral model selection,
/// and telemetry grouping. Two agents with identical identity fields are
/// considered the same agent type.
///
/// # Examples
///
/// ```
/// use nemo_flow_adaptive::acg::AgentIdentity;
/// use std::collections::HashMap;
///
/// let id = AgentIdentity {
///     agent_id: "research".to_string(),
///     template_version: "1.0.0".to_string(),
///     toolset_hash: "abc123".to_string(),
///     model_family: "claude".to_string(),
///     tenant_scope: "acme-corp".to_string(),
/// };
///
/// let mut policies = HashMap::new();
/// policies.insert(id.clone(), "aggressive-caching");
/// assert_eq!(policies.get(&id), Some(&"aggressive-caching"));
/// ```
#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
pub struct AgentIdentity {
    /// Unique identifier for the agent (e.g., "research-agent").
    pub agent_id: String,
    /// Version of the prompt template in use.
    pub template_version: String,
    /// Hash of the active toolset configuration.
    pub toolset_hash: String,
    /// Model family name (e.g., "claude", "gpt").
    pub model_family: String,
    /// Tenant scope for isolation and access control.
    pub tenant_scope: String,
}

impl std::fmt::Display for AgentIdentity {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}@{}", self.agent_id, self.template_version)
    }
}

// ===================================================================
// Translation Report contract
// ===================================================================

/// Outcome status for a single intent translation.
///
/// Plugins return one of these for each intent in the bundle, describing
/// what happened when the plugin tried to express that intent.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TranslationStatus {
    /// Intent was fully expressed in the native API call.
    Applied,
    /// Intent was partially expressed (e.g., reduced breakpoints due to model limits).
    Degraded,
    /// Intent was silently passed through with no action (e.g., not relevant to this backend).
    Ignored,
    /// Intent was actively rejected (e.g., unsafe for this request, feature disabled).
    Rejected,
}

/// Machine-readable reason for the translation outcome.
///
/// Each variant describes WHY an intent received its status. This allows
/// operators to distinguish between plugin limitations, backend limitations,
/// policy decisions, and safety constraints.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(tag = "code", rename_all = "snake_case")]
pub enum ReasonCode {
    /// Intent was fully supported and applied.
    FullySupported,
    /// Backend does not support this intent type at all.
    UnsupportedByBackend,
    /// Backend supports the intent but the specific model lacks the feature.
    UnsupportedByModel,
    /// Intent was degraded due to backend-specific limits (e.g., max breakpoints).
    BackendLimitReached,
    /// Not enough historical evidence to apply the intent confidently.
    InsufficientEvidence,
    /// The feature is available but administratively disabled.
    FeatureDisabled,
    /// Applying this intent would be unsafe for the current request.
    UnsafeForRequest,
    /// The plugin implementation is incomplete for this intent type.
    PluginIncomplete,
    /// Intent was not relevant to the current request context.
    NotRelevant,
    /// Escape hatch for reason codes not yet in the enum.
    Custom {
        /// Human-readable reason string (for debugging, not machine consumption).
        reason: String,
    },
}

/// Records the outcome of translating a single optimization intent.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct IntentOutcome {
    /// ID of the intent this outcome refers to.
    pub intent_id: Uuid,
    /// Type discriminant of the intent.
    pub intent_type: IntentType,
    /// What happened to this intent.
    pub status: TranslationStatus,
    /// Machine-readable reason for the outcome.
    pub reason: ReasonCode,
    /// Optional human-readable detail (for debugging, not machine consumption).
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub detail: Option<String>,
}

/// A plugin's complete report on how it handled an intent bundle.
///
/// Every intent in the input bundle MUST have a corresponding outcome in the report.
/// This is the critical observability contract per the design doc.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TranslationReport {
    /// ID of the request this report pertains to.
    pub request_id: Uuid,
    /// Identity of the plugin that produced this report.
    pub plugin_id: String,
    /// Per-intent outcomes.
    pub outcomes: Vec<IntentOutcome>,
    /// When this report was generated.
    pub created_at: DateTime<Utc>,
}

impl TranslationReport {
    /// Returns `true` if every intent was fully applied.
    pub fn all_applied(&self) -> bool {
        self.outcomes
            .iter()
            .all(|o| o.status == TranslationStatus::Applied)
    }

    /// Filter outcomes by status.
    pub fn outcomes_by_status(&self, status: TranslationStatus) -> Vec<&IntentOutcome> {
        self.outcomes
            .iter()
            .filter(|o| o.status == status)
            .collect()
    }

    /// Count of outcomes with the given status.
    pub fn count_by_status(&self, status: TranslationStatus) -> usize {
        self.outcomes.iter().filter(|o| o.status == status).count()
    }

    /// Create a report where all intents are ignored (passthrough behavior).
    ///
    /// Generates one [`IntentOutcome`] per intent in the bundle, each with
    /// [`TranslationStatus::Ignored`] and the given reason code. This is the
    /// standard helper for passthrough and default plugin implementations.
    pub fn all_ignored(
        bundle: &OptimizationIntentBundle,
        plugin_id: &str,
        reason: ReasonCode,
        detail: Option<String>,
    ) -> Self {
        let outcomes = bundle
            .intents
            .iter()
            .map(|intent| IntentOutcome {
                intent_id: Uuid::new_v4(),
                intent_type: intent.discriminant(),
                status: TranslationStatus::Ignored,
                reason: reason.clone(),
                detail: detail.clone(),
            })
            .collect();
        Self {
            request_id: bundle.request_id,
            plugin_id: plugin_id.to_string(),
            outcomes,
            created_at: Utc::now(),
        }
    }
}

#[cfg(test)]
#[path = "../../tests/unit/acg/types_tests.rs"]
mod tests;