Skip to main content

rave_tensorrt/
tensorrt_stub.rs

1#![allow(missing_docs)]
2//! Stub TensorRT backend for docs.rs / no-runtime builds.
3
4use std::path::PathBuf;
5use std::sync::atomic::{AtomicU64, Ordering};
6use std::sync::Arc;
7
8use async_trait::async_trait;
9
10use rave_core::backend::{ModelMetadata, UpscaleBackend};
11use rave_core::context::GpuContext;
12use rave_core::error::{EngineError, Result};
13use rave_core::types::GpuTexture;
14
15/// TensorRT precision policy — controls EP optimization flags.
16#[derive(Clone, Debug, Default)]
17pub enum PrecisionPolicy {
18    /// FP32 only — maximum accuracy, baseline performance.
19    Fp32,
20    /// FP16 mixed precision — 2× throughput on Tensor Cores.
21    #[default]
22    Fp16,
23    /// INT8 quantized with calibration table — 4× throughput.
24    Int8 { calibration_table: PathBuf },
25}
26
27/// Batch inference configuration.
28#[derive(Clone, Debug)]
29pub struct BatchConfig {
30    pub max_batch: usize,
31    pub latency_deadline_us: u64,
32}
33
34impl Default for BatchConfig {
35    fn default() -> Self {
36        Self {
37            max_batch: 1,
38            latency_deadline_us: 8_000,
39        }
40    }
41}
42
43/// Validate a [`BatchConfig`], returning an error if `max_batch > 1`.
44pub fn validate_batch_config(cfg: &BatchConfig) -> Result<()> {
45    if cfg.max_batch > 1 {
46        return Err(EngineError::InvariantViolation(
47            "micro-batching is not implemented; max_batch must be 1 (set max_batch=1)".into(),
48        ));
49    }
50    Ok(())
51}
52
53/// Atomic counters for inference stage observability.
54#[derive(Debug)]
55pub struct InferenceMetrics {
56    pub frames_inferred: AtomicU64,
57    pub total_inference_us: AtomicU64,
58    pub peak_inference_us: AtomicU64,
59}
60
61impl InferenceMetrics {
62    pub const fn new() -> Self {
63        Self {
64            frames_inferred: AtomicU64::new(0),
65            total_inference_us: AtomicU64::new(0),
66            peak_inference_us: AtomicU64::new(0),
67        }
68    }
69
70    pub fn record(&self, elapsed_us: u64) {
71        self.frames_inferred.fetch_add(1, Ordering::Relaxed);
72        self.total_inference_us
73            .fetch_add(elapsed_us, Ordering::Relaxed);
74        self.peak_inference_us
75            .fetch_max(elapsed_us, Ordering::Relaxed);
76    }
77
78    pub fn snapshot(&self) -> InferenceMetricsSnapshot {
79        let frames = self.frames_inferred.load(Ordering::Relaxed);
80        let total = self.total_inference_us.load(Ordering::Relaxed);
81        let peak = self.peak_inference_us.load(Ordering::Relaxed);
82        InferenceMetricsSnapshot {
83            frames_inferred: frames,
84            avg_inference_us: if frames > 0 { total / frames } else { 0 },
85            peak_inference_us: peak,
86        }
87    }
88}
89
90impl Default for InferenceMetrics {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96/// Snapshot of inference metrics for reporting.
97#[derive(Clone, Debug)]
98pub struct InferenceMetricsSnapshot {
99    pub frames_inferred: u64,
100    pub avg_inference_us: u64,
101    pub peak_inference_us: u64,
102}
103
104/// A point-in-time snapshot of [`RingMetrics`] counters.
105#[derive(Debug, Clone, Copy)]
106pub struct RingMetricsSnapshot {
107    pub reuse: u64,
108    pub contention: u64,
109    pub first_use: u64,
110}
111
112/// Atomic counters for output ring buffer activity.
113#[derive(Debug)]
114pub struct RingMetrics {
115    pub slot_reuse_count: AtomicU64,
116    pub slot_contention_events: AtomicU64,
117    pub slot_first_use_count: AtomicU64,
118}
119
120impl RingMetrics {
121    pub const fn new() -> Self {
122        Self {
123            slot_reuse_count: AtomicU64::new(0),
124            slot_contention_events: AtomicU64::new(0),
125            slot_first_use_count: AtomicU64::new(0),
126        }
127    }
128
129    pub fn snapshot(&self) -> RingMetricsSnapshot {
130        RingMetricsSnapshot {
131            reuse: self.slot_reuse_count.load(Ordering::Relaxed),
132            contention: self.slot_contention_events.load(Ordering::Relaxed),
133            first_use: self.slot_first_use_count.load(Ordering::Relaxed),
134        }
135    }
136}
137
138impl Default for RingMetrics {
139    fn default() -> Self {
140        Self::new()
141    }
142}
143
144/// Fixed-size ring of pre-allocated device buffers for inference output.
145pub struct OutputRing {
146    pub slot_bytes: usize,
147    pub alloc_dims: (u32, u32),
148    pub metrics: RingMetrics,
149}
150
151impl OutputRing {
152    #[allow(clippy::too_many_arguments)]
153    pub fn new(
154        _ctx: &GpuContext,
155        _in_w: u32,
156        _in_h: u32,
157        _scale: u32,
158        _count: usize,
159        _min_slots: usize,
160    ) -> Result<Self> {
161        Err(runtime_disabled_err())
162    }
163}
164
165/// Stub TensorRT/CUDA ORT inference backend.
166pub struct TensorRtBackend {
167    /// Atomic inference latency and frame count metrics.
168    pub inference_metrics: InferenceMetrics,
169    /// Precision policy used when building the TensorRT EP session.
170    pub precision_policy: PrecisionPolicy,
171    /// Batch configuration.
172    pub batch_config: BatchConfig,
173    selected_provider: Option<String>,
174}
175
176impl TensorRtBackend {
177    pub fn new(
178        model_path: PathBuf,
179        ctx: Arc<GpuContext>,
180        device_id: i32,
181        ring_size: usize,
182        downstream_capacity: usize,
183    ) -> Self {
184        Self::with_precision(
185            model_path,
186            ctx,
187            device_id,
188            ring_size,
189            downstream_capacity,
190            PrecisionPolicy::default(),
191            BatchConfig::default(),
192        )
193    }
194
195    #[allow(clippy::too_many_arguments)]
196    pub fn with_precision(
197        _model_path: PathBuf,
198        _ctx: Arc<GpuContext>,
199        _device_id: i32,
200        _ring_size: usize,
201        _downstream_capacity: usize,
202        precision_policy: PrecisionPolicy,
203        batch_config: BatchConfig,
204    ) -> Self {
205        Self {
206            inference_metrics: InferenceMetrics::new(),
207            precision_policy,
208            batch_config,
209            selected_provider: None,
210        }
211    }
212
213    pub async fn ring_metrics(&self) -> Option<RingMetricsSnapshot> {
214        None
215    }
216
217    pub fn selected_provider(&self) -> Option<&str> {
218        self.selected_provider.as_deref()
219    }
220}
221
222#[async_trait]
223impl UpscaleBackend for TensorRtBackend {
224    async fn initialize(&self) -> Result<()> {
225        Err(runtime_disabled_err())
226    }
227
228    async fn process(&self, _input: GpuTexture) -> Result<GpuTexture> {
229        Err(runtime_disabled_err())
230    }
231
232    async fn shutdown(&self) -> Result<()> {
233        Err(runtime_disabled_err())
234    }
235
236    fn metadata(&self) -> Result<&ModelMetadata> {
237        Err(runtime_disabled_err())
238    }
239}
240
241fn runtime_disabled_err() -> EngineError {
242    EngineError::Inference(
243        "rave-tensorrt built without `tensorrt-runtime`; TensorRT backend is unavailable".into(),
244    )
245}