nv_runtime/batch/config.rs
1use std::time::Duration;
2
3// ---------------------------------------------------------------------------
4// BatchConfig
5// ---------------------------------------------------------------------------
6
7/// Configuration for a batch coordinator.
8///
9/// Controls batch formation: how many items accumulate before dispatch
10/// and how long to wait for a full batch.
11///
12/// # Tradeoffs
13///
14/// - **`max_batch_size`**: Larger batches improve throughput (better GPU
15/// utilization) but increase per-frame latency because each frame waits
16/// for the batch to fill.
17/// - **`max_latency`**: Lower values reduce worst-case latency for partial
18/// batches but may dispatch smaller, less efficient batches.
19///
20/// Reasonable starting points for multi-feed inference:
21/// - `max_batch_size`: 4–16 (depends on GPU memory / model size)
22/// - `max_latency`: 20–100ms (depends on frame rate / latency tolerance)
23#[derive(Debug, Clone)]
24pub struct BatchConfig {
25 /// Maximum items in a single batch.
26 ///
27 /// When this many items accumulate, the batch is dispatched
28 /// immediately without waiting for `max_latency`.
29 ///
30 /// Must be ≥ 1.
31 pub max_batch_size: usize,
32 /// Maximum time to wait for a full batch before dispatching a
33 /// partial one.
34 ///
35 /// After the first item arrives, the coordinator waits up to this
36 /// duration for more items. If the batch is still not full when the
37 /// deadline expires, it is dispatched as-is.
38 ///
39 /// Must be > 0.
40 pub max_latency: Duration,
41 /// Submission queue capacity.
42 ///
43 /// Controls how many pending items can be buffered before
44 /// `submit_and_wait` returns
45 /// `BatchSubmitError::QueueFull`.
46 ///
47 /// Defaults to `max_batch_size * 4` (minimum 4) when `None`.
48 /// When specified, must be ≥ `max_batch_size`.
49 pub queue_capacity: Option<usize>,
50 /// Safety timeout added beyond `max_latency` when a feed thread waits
51 /// for a batch response.
52 ///
53 /// The total wait is `max_latency + response_timeout`. This bounds
54 /// how long a feed thread can block if the coordinator is wedged or
55 /// processing is severely delayed.
56 ///
57 /// In practice, responses arrive within `max_latency + processing_time`.
58 /// This safety margin exists only to guarantee eventual unblocking.
59 ///
60 /// Defaults to 5 seconds when `None`. Must be > 0 when specified.
61 pub response_timeout: Option<Duration>,
62 /// Maximum number of in-flight submissions allowed per feed.
63 ///
64 /// An item is "in-flight" from the moment it enters the submission
65 /// queue until the coordinator routes its result back (or drains it
66 /// at shutdown). When a feed reaches this limit, further
67 /// `submit_and_wait` calls fail
68 /// immediately with `BatchSubmitError::InFlightCapReached`
69 /// rather than adding to the queue.
70 ///
71 /// This prevents a feed from accumulating orphaned items in the
72 /// shared queue after timeouts: when `submit_and_wait` times out,
73 /// the item remains in-flight inside the coordinator. Without a
74 /// cap, the feed could immediately submit another frame, stacking
75 /// multiple items and crowding other feeds.
76 ///
77 /// Default: 1 — each feed contributes at most one item to the
78 /// shared queue at any time. Must be ≥ 1.
79 pub max_in_flight_per_feed: usize,
80 /// Maximum time to wait for `BatchProcessor::on_start()` to
81 /// complete before returning an error.
82 ///
83 /// GPU-backed processors (e.g. TensorRT engine compilation) may
84 /// need significantly longer than CPU-only models. Set this to
85 /// accommodate worst-case first-run warm-up on the target hardware.
86 ///
87 /// Defaults to 30 seconds when `None`. Must be > 0 when specified.
88 pub startup_timeout: Option<Duration>,
89}
90
91impl BatchConfig {
92 /// Create a validated batch configuration.
93 ///
94 /// # Errors
95 ///
96 /// Returns [`ConfigError::InvalidPolicy`](nv_core::error::ConfigError::InvalidPolicy)
97 /// if `max_batch_size` is 0 or `max_latency` is zero.
98 pub fn new(
99 max_batch_size: usize,
100 max_latency: Duration,
101 ) -> Result<Self, nv_core::error::ConfigError> {
102 if max_batch_size == 0 {
103 return Err(nv_core::error::ConfigError::InvalidPolicy {
104 detail: "batch max_batch_size must be >= 1".into(),
105 });
106 }
107 if max_latency.is_zero() {
108 return Err(nv_core::error::ConfigError::InvalidPolicy {
109 detail: "batch max_latency must be > 0".into(),
110 });
111 }
112 Ok(Self {
113 max_batch_size,
114 max_latency,
115 queue_capacity: None,
116 response_timeout: None,
117 max_in_flight_per_feed: 1,
118 startup_timeout: None,
119 })
120 }
121
122 /// Set the submission queue capacity.
123 ///
124 /// When specified, must be ≥ `max_batch_size`. Pass `None` for the
125 /// default (`max_batch_size * 4`, minimum 4).
126 #[must_use]
127 pub fn with_queue_capacity(mut self, capacity: Option<usize>) -> Self {
128 self.queue_capacity = capacity;
129 self
130 }
131
132 /// Set the response safety timeout.
133 ///
134 /// This is the safety margin added beyond `max_latency` when blocking
135 /// for a batch response. Pass `None` for the default (5 seconds).
136 /// Must be > 0 when specified.
137 #[must_use]
138 pub fn with_response_timeout(mut self, timeout: Option<Duration>) -> Self {
139 self.response_timeout = timeout;
140 self
141 }
142
143 /// Set the maximum number of in-flight submissions per feed.
144 ///
145 /// Default is 1. Must be ≥ 1.
146 #[must_use]
147 pub fn with_max_in_flight_per_feed(mut self, max: usize) -> Self {
148 self.max_in_flight_per_feed = max;
149 self
150 }
151
152 /// Set the maximum time to wait for `on_start()` to complete.
153 ///
154 /// Pass `None` for the default (30 seconds). GPU-backed processors
155 /// (e.g. TensorRT engine build on first run) may need 2–5 minutes.
156 /// Must be > 0 when specified.
157 #[must_use]
158 pub fn with_startup_timeout(mut self, timeout: Option<Duration>) -> Self {
159 self.startup_timeout = timeout;
160 self
161 }
162
163 /// Validate all configuration fields.
164 ///
165 /// Called internally by `BatchCoordinator::start`.
166 /// Also available for early validation before passing a config to the runtime.
167 ///
168 /// # Errors
169 ///
170 /// Returns [`ConfigError::InvalidPolicy`](nv_core::error::ConfigError::InvalidPolicy)
171 /// if any field violates its constraints.
172 pub fn validate(&self) -> Result<(), nv_core::error::ConfigError> {
173 use nv_core::error::ConfigError;
174 if self.max_batch_size == 0 {
175 return Err(ConfigError::InvalidPolicy {
176 detail: "batch max_batch_size must be >= 1".into(),
177 });
178 }
179 if self.max_latency.is_zero() {
180 return Err(ConfigError::InvalidPolicy {
181 detail: "batch max_latency must be > 0".into(),
182 });
183 }
184 if let Some(rt) = self.response_timeout
185 && rt.is_zero()
186 {
187 return Err(ConfigError::InvalidPolicy {
188 detail: "batch response_timeout must be > 0".into(),
189 });
190 }
191 if let Some(cap) = self.queue_capacity
192 && cap < self.max_batch_size
193 {
194 return Err(ConfigError::InvalidPolicy {
195 detail: format!(
196 "batch queue_capacity ({cap}) must be >= max_batch_size ({})",
197 self.max_batch_size
198 ),
199 });
200 }
201 if self.max_in_flight_per_feed == 0 {
202 return Err(ConfigError::InvalidPolicy {
203 detail: "batch max_in_flight_per_feed must be >= 1".into(),
204 });
205 }
206 if let Some(st) = self.startup_timeout
207 && st.is_zero()
208 {
209 return Err(ConfigError::InvalidPolicy {
210 detail: "batch startup_timeout must be > 0".into(),
211 });
212 }
213 Ok(())
214 }
215}
216
217impl Default for BatchConfig {
218 /// Sensible defaults: batch size 4, 50 ms latency, auto queue capacity,
219 /// 5-second response safety timeout.
220 fn default() -> Self {
221 Self {
222 max_batch_size: 4,
223 max_latency: Duration::from_millis(50),
224 queue_capacity: None,
225 response_timeout: None,
226 max_in_flight_per_feed: 1,
227 startup_timeout: None,
228 }
229 }
230}