pub struct TensorRTConfig { /* private fields */ }Expand description
Configuration for TensorRT execution provider
This struct provides fine-grained control over TensorRT optimization settings.
For most users, the default crate::ClassifierBuilder::with_tensorrt() method provides
optimal performance.
§Performance Notes
The default configuration enables:
- FP16 precision: 2x faster inference on GPUs with tensor cores
- CUDA graphs: Reduced CPU launch overhead for models with many small layers
- Engine caching: Reduces session creation from minutes to seconds
- Timing cache: Accelerates future builds with similar layer configurations
§Example: Custom Configuration
use birdnet_onnx::TensorRTConfig;
let config = TensorRTConfig::new()
.with_fp16(false)
.with_builder_optimization_level(5)
.with_engine_cache_path("/tmp/trt_cache")
.with_device_id(1); // Use second GPU§Example: Disable Optimizations
use birdnet_onnx::TensorRTConfig;
let config = TensorRTConfig::new()
.with_fp16(false)
.with_cuda_graph(false)
.with_engine_cache(false)
.with_timing_cache(false);Implementations§
Source§impl TensorRTConfig
impl TensorRTConfig
Sourcepub const fn with_fp16(self, enable: bool) -> Self
pub const fn with_fp16(self, enable: bool) -> Self
Enable or disable FP16 precision mode
FP16 provides ~2x speedup on GPUs with tensor cores (Volta and newer). Disable if you need full FP32 precision for accuracy-critical applications.
Default: true
Sourcepub const fn with_int8(self, enable: bool) -> Self
pub const fn with_int8(self, enable: bool) -> Self
Enable or disable INT8 precision mode
Requires calibration data. Provides additional speedup over FP16.
See TensorRT documentation for calibration requirements.
Default: false (not enabled by default)
Sourcepub const fn with_cuda_graph(self, enable: bool) -> Self
pub const fn with_cuda_graph(self, enable: bool) -> Self
Enable or disable CUDA graph capture
Reduces CPU launch overhead for models with many small layers. Provides significant speedup by batching GPU operations.
Default: true
Sourcepub const fn with_builder_optimization_level(self, level: u8) -> Self
pub const fn with_builder_optimization_level(self, level: u8) -> Self
Set builder optimization level (0-5)
Higher values take longer to build but may produce faster engines.
- Level 3 (default): Balanced optimization
- Level 5: Maximum optimization (longer build time)
- Level 0-2: Faster builds, may sacrifice performance
Default: 3
Sourcepub const fn with_engine_cache(self, enable: bool) -> Self
pub const fn with_engine_cache(self, enable: bool) -> Self
Enable or disable engine caching
Caches compiled TensorRT engines to disk, dramatically reducing
session creation time on subsequent runs (384s → 9s in benchmarks).
Important: Clear cache when model, ONNX Runtime, or TensorRT version changes.
Default: true
Sourcepub fn with_engine_cache_path(self, path: impl Into<String>) -> Self
pub fn with_engine_cache_path(self, path: impl Into<String>) -> Self
Set custom path for engine cache
By default, TensorRT uses system temp directory.
Set a custom path for persistent caching across system restarts.
Default: None (uses TensorRT default)
Sourcepub const fn with_timing_cache(self, enable: bool) -> Self
pub const fn with_timing_cache(self, enable: bool) -> Self
Enable or disable timing cache
Stores kernel timing data to accelerate future builds with similar layer configurations (34.6s → 7.7s in benchmarks).
Default: true
Sourcepub fn with_timing_cache_path(self, path: impl Into<String>) -> Self
pub fn with_timing_cache_path(self, path: impl Into<String>) -> Self
Set custom path for timing cache
By default, TensorRT uses system temp directory.
Default: None (uses TensorRT default)
Sourcepub const fn with_device_id(self, device_id: i32) -> Self
pub const fn with_device_id(self, device_id: i32) -> Self
Set GPU device ID for multi-GPU systems
Default: None (uses default GPU)
Sourcepub const fn with_max_workspace_size(self, max_size: usize) -> Self
pub const fn with_max_workspace_size(self, max_size: usize) -> Self
Set maximum workspace size in bytes
TensorRT may allocate up to this much GPU memory for optimization.
Larger values may enable more optimizations but use more memory.
Default: None (uses TensorRT default)
Sourcepub const fn with_min_subgraph_size(self, min_size: usize) -> Self
pub const fn with_min_subgraph_size(self, min_size: usize) -> Self
Set minimum subgraph size for TensorRT acceleration
Subgraphs smaller than this will not be accelerated by TensorRT.
Default: None (uses TensorRT default)
Sourcepub const fn with_layer_norm_fp32_fallback(self, enable: bool) -> Self
pub const fn with_layer_norm_fp32_fallback(self, enable: bool) -> Self
Enable or disable FP32 fallback for layer normalization
When enabled, layer norm operations use FP32 even in FP16 mode, improving accuracy at slight performance cost.
Default: None (uses TensorRT default)
Trait Implementations§
Source§impl Clone for TensorRTConfig
impl Clone for TensorRTConfig
Source§fn clone(&self) -> TensorRTConfig
fn clone(&self) -> TensorRTConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more