Skip to main content

nemo_flow_adaptive/types/
metadata.rs

1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Metadata and hint payload types used by adaptive planning.
5
6use serde::{Deserialize, Serialize};
7use uuid::Uuid;
8
9/// JSON value alias used by adaptive metadata payloads.
10pub type Json = serde_json::Value;
11
12/// Metadata template attached to an adaptive execution plan.
13///
14/// This payload is copied into run-level metadata snapshots and carries
15/// parallelism hints plus any backend- or integration-specific extensions.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct MetadataEnvelope {
18    /// Run identifier the metadata template was last derived from.
19    pub run_id: Uuid,
20    /// Agent identifier the template applies to.
21    pub agent_id: String,
22    /// Tool parallelism hints discovered for the agent.
23    pub parallel_hints: Vec<ParallelHint>,
24    /// Arbitrary caller-defined metadata extensions.
25    pub extensions: Json,
26}
27
28/// Hint describing one tool's membership in a parallel-execution cohort.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ParallelHint {
31    /// Tool name that participates in the hinted group.
32    pub tool_name: String,
33    /// Stable group identifier shared by all tools in the cohort.
34    pub group_id: String,
35    /// Whether the hint was explicitly authored rather than inferred.
36    pub explicit: bool,
37}
38
39/// Runtime hint bundle exposed to downstream integrations.
40///
41/// These values summarize the current learned default behavior for an agent and
42/// are suitable for transport in provider-specific headers or metadata fields.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct AgentHints {
45    /// Output-size limit hint in tokens.
46    pub osl: u32,
47    /// Inter-arrival-time hint in milliseconds.
48    pub iat: u32,
49    /// Scheduling priority hint derived from latency sensitivity.
50    pub priority: i32,
51    /// Learned latency sensitivity score for the current prefix.
52    pub latency_sensitivity: f64,
53    /// Identifier of the prefix or trie node the hints came from.
54    pub prefix_id: String,
55    /// Estimated total number of requests in the workflow.
56    pub total_requests: u32,
57}