Skip to main content

nv_runtime/batch/
config.rs

1use std::time::Duration;
2
3// ---------------------------------------------------------------------------
4// BatchConfig
5// ---------------------------------------------------------------------------
6
7/// Configuration for a batch coordinator.
8///
9/// Controls batch formation: how many items accumulate before dispatch
10/// and how long to wait for a full batch.
11///
12/// # Tradeoffs
13///
14/// - **`max_batch_size`**: Larger batches improve throughput (better GPU
15///   utilization) but increase per-frame latency because each frame waits
16///   for the batch to fill.
17/// - **`max_latency`**: Lower values reduce worst-case latency for partial
18///   batches but may dispatch smaller, less efficient batches.
19///
20/// Reasonable starting points for multi-feed inference:
21/// - `max_batch_size`: 4–16 (depends on GPU memory / model size)
22/// - `max_latency`: 20–100ms (depends on frame rate / latency tolerance)
23#[derive(Debug, Clone)]
24pub struct BatchConfig {
25    /// Maximum items in a single batch.
26    ///
27    /// When this many items accumulate, the batch is dispatched
28    /// immediately without waiting for `max_latency`.
29    ///
30    /// Must be ≥ 1.
31    pub max_batch_size: usize,
32    /// Maximum time to wait for a full batch before dispatching a
33    /// partial one.
34    ///
35    /// After the first item arrives, the coordinator waits up to this
36    /// duration for more items. If the batch is still not full when the
37    /// deadline expires, it is dispatched as-is.
38    ///
39    /// Must be > 0.
40    pub max_latency: Duration,
41    /// Submission queue capacity.
42    ///
43    /// Controls how many pending items can be buffered before
44    /// `submit_and_wait` returns
45    /// `BatchSubmitError::QueueFull`.
46    ///
47    /// Defaults to `max_batch_size * 4` (minimum 4) when `None`.
48    /// When specified, must be ≥ `max_batch_size`.
49    pub queue_capacity: Option<usize>,
50    /// Safety timeout added beyond `max_latency` when a feed thread waits
51    /// for a batch response.
52    ///
53    /// The total wait is `max_latency + response_timeout`. This bounds
54    /// how long a feed thread can block if the coordinator is wedged or
55    /// processing is severely delayed.
56    ///
57    /// In practice, responses arrive within `max_latency + processing_time`.
58    /// This safety margin exists only to guarantee eventual unblocking.
59    ///
60    /// Defaults to 5 seconds when `None`. Must be > 0 when specified.
61    pub response_timeout: Option<Duration>,
62    /// Maximum number of in-flight submissions allowed per feed.
63    ///
64    /// An item is "in-flight" from the moment it enters the submission
65    /// queue until the coordinator routes its result back (or drains it
66    /// at shutdown). When a feed reaches this limit, further
67    /// `submit_and_wait` calls fail
68    /// immediately with `BatchSubmitError::InFlightCapReached`
69    /// rather than adding to the queue.
70    ///
71    /// This prevents a feed from accumulating orphaned items in the
72    /// shared queue after timeouts: when `submit_and_wait` times out,
73    /// the item remains in-flight inside the coordinator. Without a
74    /// cap, the feed could immediately submit another frame, stacking
75    /// multiple items and crowding other feeds.
76    ///
77    /// Default: 1 — each feed contributes at most one item to the
78    /// shared queue at any time. Must be ≥ 1.
79    pub max_in_flight_per_feed: usize,
80    /// Maximum time to wait for `BatchProcessor::on_start()` to
81    /// complete before returning an error.
82    ///
83    /// GPU-backed processors (e.g. TensorRT engine compilation) may
84    /// need significantly longer than CPU-only models. Set this to
85    /// accommodate worst-case first-run warm-up on the target hardware.
86    ///
87    /// Defaults to 30 seconds when `None`. Must be > 0 when specified.
88    pub startup_timeout: Option<Duration>,
89}
90
91impl BatchConfig {
92    /// Create a validated batch configuration.
93    ///
94    /// # Errors
95    ///
96    /// Returns [`ConfigError::InvalidPolicy`](nv_core::error::ConfigError::InvalidPolicy)
97    /// if `max_batch_size` is 0 or `max_latency` is zero.
98    pub fn new(
99        max_batch_size: usize,
100        max_latency: Duration,
101    ) -> Result<Self, nv_core::error::ConfigError> {
102        if max_batch_size == 0 {
103            return Err(nv_core::error::ConfigError::InvalidPolicy {
104                detail: "batch max_batch_size must be >= 1".into(),
105            });
106        }
107        if max_latency.is_zero() {
108            return Err(nv_core::error::ConfigError::InvalidPolicy {
109                detail: "batch max_latency must be > 0".into(),
110            });
111        }
112        Ok(Self {
113            max_batch_size,
114            max_latency,
115            queue_capacity: None,
116            response_timeout: None,
117            max_in_flight_per_feed: 1,
118            startup_timeout: None,
119        })
120    }
121
122    /// Set the submission queue capacity.
123    ///
124    /// When specified, must be ≥ `max_batch_size`. Pass `None` for the
125    /// default (`max_batch_size * 4`, minimum 4).
126    #[must_use]
127    pub fn with_queue_capacity(mut self, capacity: Option<usize>) -> Self {
128        self.queue_capacity = capacity;
129        self
130    }
131
132    /// Set the response safety timeout.
133    ///
134    /// This is the safety margin added beyond `max_latency` when blocking
135    /// for a batch response. Pass `None` for the default (5 seconds).
136    /// Must be > 0 when specified.
137    #[must_use]
138    pub fn with_response_timeout(mut self, timeout: Option<Duration>) -> Self {
139        self.response_timeout = timeout;
140        self
141    }
142
143    /// Set the maximum number of in-flight submissions per feed.
144    ///
145    /// Default is 1. Must be ≥ 1.
146    #[must_use]
147    pub fn with_max_in_flight_per_feed(mut self, max: usize) -> Self {
148        self.max_in_flight_per_feed = max;
149        self
150    }
151
152    /// Set the maximum time to wait for `on_start()` to complete.
153    ///
154    /// Pass `None` for the default (30 seconds). GPU-backed processors
155    /// (e.g. TensorRT engine build on first run) may need 2–5 minutes.
156    /// Must be > 0 when specified.
157    #[must_use]
158    pub fn with_startup_timeout(mut self, timeout: Option<Duration>) -> Self {
159        self.startup_timeout = timeout;
160        self
161    }
162
163    /// Validate all configuration fields.
164    ///
165    /// Called internally by `BatchCoordinator::start`.
166    /// Also available for early validation before passing a config to the runtime.
167    ///
168    /// # Errors
169    ///
170    /// Returns [`ConfigError::InvalidPolicy`](nv_core::error::ConfigError::InvalidPolicy)
171    /// if any field violates its constraints.
172    pub fn validate(&self) -> Result<(), nv_core::error::ConfigError> {
173        use nv_core::error::ConfigError;
174        if self.max_batch_size == 0 {
175            return Err(ConfigError::InvalidPolicy {
176                detail: "batch max_batch_size must be >= 1".into(),
177            });
178        }
179        if self.max_latency.is_zero() {
180            return Err(ConfigError::InvalidPolicy {
181                detail: "batch max_latency must be > 0".into(),
182            });
183        }
184        if let Some(rt) = self.response_timeout
185            && rt.is_zero()
186        {
187            return Err(ConfigError::InvalidPolicy {
188                detail: "batch response_timeout must be > 0".into(),
189            });
190        }
191        if let Some(cap) = self.queue_capacity
192            && cap < self.max_batch_size
193        {
194            return Err(ConfigError::InvalidPolicy {
195                detail: format!(
196                    "batch queue_capacity ({cap}) must be >= max_batch_size ({})",
197                    self.max_batch_size
198                ),
199            });
200        }
201        if self.max_in_flight_per_feed == 0 {
202            return Err(ConfigError::InvalidPolicy {
203                detail: "batch max_in_flight_per_feed must be >= 1".into(),
204            });
205        }
206        if let Some(st) = self.startup_timeout
207            && st.is_zero()
208        {
209            return Err(ConfigError::InvalidPolicy {
210                detail: "batch startup_timeout must be > 0".into(),
211            });
212        }
213        Ok(())
214    }
215}
216
217impl Default for BatchConfig {
218    /// Sensible defaults: batch size 4, 50 ms latency, auto queue capacity,
219    /// 5-second response safety timeout.
220    fn default() -> Self {
221        Self {
222            max_batch_size: 4,
223            max_latency: Duration::from_millis(50),
224            queue_capacity: None,
225            response_timeout: None,
226            max_in_flight_per_feed: 1,
227            startup_timeout: None,
228        }
229    }
230}