Struct TensorRTConfig

Source

pub struct TensorRTConfig { /* private fields */ }

Expand description

Configuration for TensorRT execution provider

This struct provides fine-grained control over TensorRT optimization settings. For most users, the default crate::ClassifierBuilder::with_tensorrt() method provides optimal performance.

§Performance Notes

The default configuration enables:

FP16 precision: 2x faster inference on GPUs with tensor cores
CUDA graphs: Reduced CPU launch overhead for models with many small layers
Engine caching: Reduces session creation from minutes to seconds
Timing cache: Accelerates future builds with similar layer configurations

§Example: Custom Configuration

use birdnet_onnx::TensorRTConfig;

let config = TensorRTConfig::new()
    .with_fp16(false)
    .with_builder_optimization_level(5)
    .with_engine_cache_path("/tmp/trt_cache")
    .with_device_id(1);  // Use second GPU

§Example: Disable Optimizations

use birdnet_onnx::TensorRTConfig;

let config = TensorRTConfig::new()
    .with_fp16(false)
    .with_cuda_graph(false)
    .with_engine_cache(false)
    .with_timing_cache(false);

Implementations§

Source §

impl TensorRTConfig

Source

pub const fn new() -> Self

Create a new TensorRT configuration with optimized defaults

Source

pub const fn with_fp16(self, enable: bool) -> Self

Enable or disable FP16 precision mode

FP16 provides ~2x speedup on GPUs with tensor cores (Volta and newer). Disable if you need full FP32 precision for accuracy-critical applications.

Default: true

Source

pub const fn with_int8(self, enable: bool) -> Self

Enable or disable INT8 precision mode

Requires calibration data. Provides additional speedup over FP16. See TensorRT documentation for calibration requirements.

Default: false (not enabled by default)

Source

pub const fn with_cuda_graph(self, enable: bool) -> Self

Enable or disable CUDA graph capture

Reduces CPU launch overhead for models with many small layers. Provides significant speedup by batching GPU operations.

Default: true

Source

pub const fn with_builder_optimization_level(self, level: u8) -> Self

Set builder optimization level (0-5)

Higher values take longer to build but may produce faster engines.

Level 3 (default): Balanced optimization
Level 5: Maximum optimization (longer build time)
Level 0-2: Faster builds, may sacrifice performance

Default: 3

Source

pub const fn with_engine_cache(self, enable: bool) -> Self

Enable or disable engine caching

Caches compiled TensorRT engines to disk, dramatically reducing session creation time on subsequent runs (384s → 9s in benchmarks).

Important: Clear cache when model, ONNX Runtime, or TensorRT version changes.

Default: true

Source

pub fn with_engine_cache_path(self, path: impl Into<String>) -> Self

Set custom path for engine cache

By default, TensorRT uses system temp directory. Set a custom path for persistent caching across system restarts.

Default: None (uses TensorRT default)

Source

pub const fn with_timing_cache(self, enable: bool) -> Self

Enable or disable timing cache

Stores kernel timing data to accelerate future builds with similar layer configurations (34.6s → 7.7s in benchmarks).

Default: true

Source

pub fn with_timing_cache_path(self, path: impl Into<String>) -> Self

Set custom path for timing cache

By default, TensorRT uses system temp directory.

Default: None (uses TensorRT default)

Source

pub const fn with_device_id(self, device_id: i32) -> Self

Set GPU device ID for multi-GPU systems

Default: None (uses default GPU)

Source

pub const fn with_max_workspace_size(self, max_size: usize) -> Self

Set maximum workspace size in bytes

TensorRT may allocate up to this much GPU memory for optimization. Larger values may enable more optimizations but use more memory.

Default: None (uses TensorRT default)

Source

pub const fn with_min_subgraph_size(self, min_size: usize) -> Self

Set minimum subgraph size for TensorRT acceleration

Subgraphs smaller than this will not be accelerated by TensorRT.

Default: None (uses TensorRT default)

Source

pub const fn with_layer_norm_fp32_fallback(self, enable: bool) -> Self

Enable or disable FP32 fallback for layer normalization

When enabled, layer norm operations use FP32 even in FP16 mode, improving accuracy at slight performance cost.

Default: None (uses TensorRT default)

Trait Implementations§

Source §

impl Clone for TensorRTConfig

Source §

fn clone(&self) -> TensorRTConfig

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for TensorRTConfig

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Default for TensorRTConfig

Source §

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl UnwindSafe for TensorRTConfig

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Struct TensorRTConfig Copy item path

§Performance Notes

§Example: Custom Configuration

§Example: Disable Optimizations

Implementations§

impl TensorRTConfig

pub const fn new() -> Self

pub const fn with_fp16(self, enable: bool) -> Self

pub const fn with_int8(self, enable: bool) -> Self

pub const fn with_cuda_graph(self, enable: bool) -> Self

pub const fn with_builder_optimization_level(self, level: u8) -> Self

pub const fn with_engine_cache(self, enable: bool) -> Self

pub fn with_engine_cache_path(self, path: impl Into<String>) -> Self

pub const fn with_timing_cache(self, enable: bool) -> Self

pub fn with_timing_cache_path(self, path: impl Into<String>) -> Self

pub const fn with_device_id(self, device_id: i32) -> Self

pub const fn with_max_workspace_size(self, max_size: usize) -> Self

pub const fn with_min_subgraph_size(self, min_size: usize) -> Self

pub const fn with_layer_norm_fp32_fallback(self, enable: bool) -> Self

Trait Implementations§

impl Clone for TensorRTConfig

fn clone(&self) -> TensorRTConfig

fn clone_from(&mut self, source: &Self)

impl Debug for TensorRTConfig

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for TensorRTConfig

fn default() -> Self

Auto Trait Implementations§

impl Freeze for TensorRTConfig

impl RefUnwindSafe for TensorRTConfig

impl Send for TensorRTConfig

impl Sync for TensorRTConfig

impl Unpin for TensorRTConfig

impl UnwindSafe for TensorRTConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct TensorRTConfig

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,