rave_tensorrt/
tensorrt_stub.rs1#![allow(missing_docs)]
2use std::path::PathBuf;
5use std::sync::atomic::{AtomicU64, Ordering};
6use std::sync::Arc;
7
8use async_trait::async_trait;
9
10use rave_core::backend::{ModelMetadata, UpscaleBackend};
11use rave_core::context::GpuContext;
12use rave_core::error::{EngineError, Result};
13use rave_core::types::GpuTexture;
14
15#[derive(Clone, Debug, Default)]
17pub enum PrecisionPolicy {
18 Fp32,
20 #[default]
22 Fp16,
23 Int8 { calibration_table: PathBuf },
25}
26
27#[derive(Clone, Debug)]
29pub struct BatchConfig {
30 pub max_batch: usize,
31 pub latency_deadline_us: u64,
32}
33
34impl Default for BatchConfig {
35 fn default() -> Self {
36 Self {
37 max_batch: 1,
38 latency_deadline_us: 8_000,
39 }
40 }
41}
42
43pub fn validate_batch_config(cfg: &BatchConfig) -> Result<()> {
45 if cfg.max_batch > 1 {
46 return Err(EngineError::InvariantViolation(
47 "micro-batching is not implemented; max_batch must be 1 (set max_batch=1)".into(),
48 ));
49 }
50 Ok(())
51}
52
53#[derive(Debug)]
55pub struct InferenceMetrics {
56 pub frames_inferred: AtomicU64,
57 pub total_inference_us: AtomicU64,
58 pub peak_inference_us: AtomicU64,
59}
60
61impl InferenceMetrics {
62 pub const fn new() -> Self {
63 Self {
64 frames_inferred: AtomicU64::new(0),
65 total_inference_us: AtomicU64::new(0),
66 peak_inference_us: AtomicU64::new(0),
67 }
68 }
69
70 pub fn record(&self, elapsed_us: u64) {
71 self.frames_inferred.fetch_add(1, Ordering::Relaxed);
72 self.total_inference_us
73 .fetch_add(elapsed_us, Ordering::Relaxed);
74 self.peak_inference_us
75 .fetch_max(elapsed_us, Ordering::Relaxed);
76 }
77
78 pub fn snapshot(&self) -> InferenceMetricsSnapshot {
79 let frames = self.frames_inferred.load(Ordering::Relaxed);
80 let total = self.total_inference_us.load(Ordering::Relaxed);
81 let peak = self.peak_inference_us.load(Ordering::Relaxed);
82 InferenceMetricsSnapshot {
83 frames_inferred: frames,
84 avg_inference_us: if frames > 0 { total / frames } else { 0 },
85 peak_inference_us: peak,
86 }
87 }
88}
89
90impl Default for InferenceMetrics {
91 fn default() -> Self {
92 Self::new()
93 }
94}
95
96#[derive(Clone, Debug)]
98pub struct InferenceMetricsSnapshot {
99 pub frames_inferred: u64,
100 pub avg_inference_us: u64,
101 pub peak_inference_us: u64,
102}
103
104#[derive(Debug, Clone, Copy)]
106pub struct RingMetricsSnapshot {
107 pub reuse: u64,
108 pub contention: u64,
109 pub first_use: u64,
110}
111
112#[derive(Debug)]
114pub struct RingMetrics {
115 pub slot_reuse_count: AtomicU64,
116 pub slot_contention_events: AtomicU64,
117 pub slot_first_use_count: AtomicU64,
118}
119
120impl RingMetrics {
121 pub const fn new() -> Self {
122 Self {
123 slot_reuse_count: AtomicU64::new(0),
124 slot_contention_events: AtomicU64::new(0),
125 slot_first_use_count: AtomicU64::new(0),
126 }
127 }
128
129 pub fn snapshot(&self) -> RingMetricsSnapshot {
130 RingMetricsSnapshot {
131 reuse: self.slot_reuse_count.load(Ordering::Relaxed),
132 contention: self.slot_contention_events.load(Ordering::Relaxed),
133 first_use: self.slot_first_use_count.load(Ordering::Relaxed),
134 }
135 }
136}
137
138impl Default for RingMetrics {
139 fn default() -> Self {
140 Self::new()
141 }
142}
143
144pub struct OutputRing {
146 pub slot_bytes: usize,
147 pub alloc_dims: (u32, u32),
148 pub metrics: RingMetrics,
149}
150
151impl OutputRing {
152 #[allow(clippy::too_many_arguments)]
153 pub fn new(
154 _ctx: &GpuContext,
155 _in_w: u32,
156 _in_h: u32,
157 _scale: u32,
158 _count: usize,
159 _min_slots: usize,
160 ) -> Result<Self> {
161 Err(runtime_disabled_err())
162 }
163}
164
165pub struct TensorRtBackend {
167 pub inference_metrics: InferenceMetrics,
169 pub precision_policy: PrecisionPolicy,
171 pub batch_config: BatchConfig,
173 selected_provider: Option<String>,
174}
175
176impl TensorRtBackend {
177 pub fn new(
178 model_path: PathBuf,
179 ctx: Arc<GpuContext>,
180 device_id: i32,
181 ring_size: usize,
182 downstream_capacity: usize,
183 ) -> Self {
184 Self::with_precision(
185 model_path,
186 ctx,
187 device_id,
188 ring_size,
189 downstream_capacity,
190 PrecisionPolicy::default(),
191 BatchConfig::default(),
192 )
193 }
194
195 #[allow(clippy::too_many_arguments)]
196 pub fn with_precision(
197 _model_path: PathBuf,
198 _ctx: Arc<GpuContext>,
199 _device_id: i32,
200 _ring_size: usize,
201 _downstream_capacity: usize,
202 precision_policy: PrecisionPolicy,
203 batch_config: BatchConfig,
204 ) -> Self {
205 Self {
206 inference_metrics: InferenceMetrics::new(),
207 precision_policy,
208 batch_config,
209 selected_provider: None,
210 }
211 }
212
213 pub async fn ring_metrics(&self) -> Option<RingMetricsSnapshot> {
214 None
215 }
216
217 pub fn selected_provider(&self) -> Option<&str> {
218 self.selected_provider.as_deref()
219 }
220}
221
222#[async_trait]
223impl UpscaleBackend for TensorRtBackend {
224 async fn initialize(&self) -> Result<()> {
225 Err(runtime_disabled_err())
226 }
227
228 async fn process(&self, _input: GpuTexture) -> Result<GpuTexture> {
229 Err(runtime_disabled_err())
230 }
231
232 async fn shutdown(&self) -> Result<()> {
233 Err(runtime_disabled_err())
234 }
235
236 fn metadata(&self) -> Result<&ModelMetadata> {
237 Err(runtime_disabled_err())
238 }
239}
240
241fn runtime_disabled_err() -> EngineError {
242 EngineError::Inference(
243 "rave-tensorrt built without `tensorrt-runtime`; TensorRT backend is unavailable".into(),
244 )
245}