pub struct CudaEngine { /* private fields */ }Expand description
CUDA engine (real mode)
Implementations§
Source§impl CudaEngine
impl CudaEngine
Sourcepub fn get_nb_io_tensors(&self) -> Result<i32>
pub fn get_nb_io_tensors(&self) -> Result<i32>
Examples found in repository?
examples/basic_workflow.rs (line 84)
18fn main() -> Result<(), Box<dyn Error>> {
19 #[cfg(feature = "dlopen_tensorrt_rtx")]
20 trtx::dynamically_load_tensorrt(None::<String>).unwrap();
21 #[cfg(feature = "dlopen_tensorrt_onnxparser")]
22 trtx::dynamically_load_tensorrt_onnxparser(None::<String>).unwrap();
23
24 println!("TensorRT-RTX Basic Workflow Example");
25 println!("=====================================\n");
26
27 // Step 1: Create logger
28 println!("1. Creating logger...");
29 let logger = Logger::stderr()?;
30 println!(" ✓ Logger created\n");
31
32 // Step 2: Build phase
33 println!("2. Building engine...");
34
35 let builder = Builder::new(&logger)?;
36 println!(" ✓ Builder created");
37
38 // Create network with explicit batch dimensions
39 let mut network = builder.create_network(network_flags::EXPLICIT_BATCH)?;
40 println!(" ✓ Network created");
41
42 // Create and configure builder config
43 let mut config = builder.create_config()?;
44 println!(" ✓ Config created");
45
46 // Set workspace memory limit (1GB)
47 config.set_memory_pool_limit(MemoryPoolType::Workspace, 1 << 30)?;
48 println!(" ✓ Workspace limit set to 1GB");
49
50 // Note: In a real application, you would add layers to the network here
51 // For example:
52 // - network.add_input(...)
53 // - network.add_convolution(...)
54 // - network.add_activation(...)
55 // - etc.
56
57 println!("\n Note: This example uses an empty network.");
58 println!(" In production, you would:");
59 println!(" - Parse an ONNX model");
60 println!(" - Or programmatically add layers");
61 println!(" - Define input/output tensors\n");
62
63 // Build serialized network
64 println!(" Building serialized engine...");
65 match builder.build_serialized_network(&mut network, &mut config) {
66 Ok(engine_data) => {
67 println!(" ✓ Engine built ({} bytes)", engine_data.len());
68
69 // Save to disk
70 let engine_path = "/tmp/example.engine";
71 std::fs::write(engine_path, &engine_data)?;
72 println!(" ✓ Engine saved to {}\n", engine_path);
73
74 // Step 3: Inference phase
75 println!("3. Loading engine for inference...");
76
77 let runtime = Runtime::new(&logger)?;
78 println!(" ✓ Runtime created");
79
80 let engine = runtime.deserialize_cuda_engine(&engine_data)?;
81 println!(" ✓ Engine deserialized");
82
83 // Query engine information
84 let num_tensors = engine.get_nb_io_tensors()?;
85 println!(" ✓ Engine has {} I/O tensors", num_tensors);
86
87 for i in 0..num_tensors {
88 let name = engine.get_tensor_name(i)?;
89 println!(" - Tensor {}: {}", i, name);
90 }
91
92 // Create execution context
93 let _context = engine.create_execution_context()?;
94 println!(" ✓ Execution context created\n");
95
96 println!("4. Next steps for real inference:");
97 println!(" - Allocate CUDA memory for inputs/outputs");
98 println!(" - Copy input data to GPU");
99 println!(" - Bind tensor addresses with context.set_tensor_address()");
100 println!(" - Execute with context.enqueue_v3()");
101 println!(" - Copy results back to CPU");
102 }
103 Err(e) => {
104 eprintln!(" ✗ Failed to build engine: {}", e);
105 eprintln!("\n This is expected for an empty network.");
106 eprintln!(" In production, add layers before building.");
107 return Err(e.into());
108 }
109 }
110
111 println!("\n✓ Example completed successfully!");
112
113 Ok(())
114}More examples
examples/tiny_network.rs (line 42)
18fn main() -> Result<()> {
19 #[cfg(feature = "dlopen_tensorrt_rtx")]
20 trtx::dynamically_load_tensorrt(None::<String>).unwrap();
21 #[cfg(feature = "dlopen_tensorrt_onnxparser")]
22 trtx::dynamically_load_tensorrt_onnxparser(None::<String>).unwrap();
23
24 println!("=== Tiny Network Example ===\n");
25
26 // 1. Create logger
27 println!("1. Creating logger...");
28 let logger = Logger::stderr()?;
29
30 // 2. Build the network
31 println!("2. Building network...");
32 let engine_data = build_tiny_network(&logger)?;
33 println!(" Engine size: {} bytes", engine_data.len());
34
35 // 3. Create runtime and deserialize engine
36 println!("\n3. Creating runtime and loading engine...");
37 let runtime = Runtime::new(&logger)?;
38 let engine = runtime.deserialize_cuda_engine(&engine_data)?;
39
40 // 4. Inspect engine
41 println!("4. Engine information:");
42 let num_io_tensors = engine.get_nb_io_tensors()?;
43 println!(" Number of I/O tensors: {}", num_io_tensors);
44
45 for i in 0..num_io_tensors {
46 let name = engine.get_tensor_name(i)?;
47 println!(" Tensor {}: {}", i, name);
48 }
49
50 // 5. Create execution context
51 println!("\n5. Creating execution context...");
52 let mut context = engine.create_execution_context()?;
53
54 // 6. Prepare input/output buffers
55 println!("6. Preparing buffers...");
56 let input_size = 3 * 4 * 4; // [1, 3, 4, 4]
57 let output_size = 3 * 4 * 4; // Same as input
58
59 // Create input with mix of positive and negative values
60 let input_data: Vec<f32> = (0..input_size)
61 .map(|i| {
62 // Create pattern: positive, negative, zero, positive, ...
63 match i % 4 {
64 0 => (i as f32) * 0.5, // Positive values
65 1 => -(i as f32) * 0.3, // Negative values
66 2 => 0.0, // Zero
67 _ => (i as f32) * 0.1, // Small positive values
68 }
69 })
70 .collect();
71
72 println!(" Input shape: [1, 3, 4, 4] ({} elements)", input_size);
73 println!(" First 8 input values: {:?}", &input_data[..8]);
74
75 // Allocate device memory
76 let mut input_device = DeviceBuffer::new(input_size * std::mem::size_of::<f32>())?;
77 let output_device = DeviceBuffer::new(output_size * std::mem::size_of::<f32>())?;
78
79 // Copy input to device (convert f32 slice to bytes)
80 let input_bytes = unsafe {
81 std::slice::from_raw_parts(
82 input_data.as_ptr() as *const u8,
83 input_data.len() * std::mem::size_of::<f32>(),
84 )
85 };
86 input_device.copy_from_host(input_bytes)?;
87
88 // 7. Set tensor addresses
89 println!("\n7. Binding tensors...");
90 unsafe {
91 context.set_tensor_address("input", input_device.as_ptr())?;
92 context.set_tensor_address("output", output_device.as_ptr())?;
93 }
94
95 // 8. Execute inference
96 println!("8. Running inference...");
97 let stream = trtx::cuda::get_default_stream();
98 unsafe {
99 context.enqueue_v3(stream)?;
100 }
101 synchronize()?;
102 println!(" ✓ Inference completed");
103
104 // 9. Copy output back to host
105 println!("\n9. Reading results...");
106 let mut output_data: Vec<f32> = vec![0.0; output_size];
107 let output_bytes = unsafe {
108 std::slice::from_raw_parts_mut(
109 output_data.as_mut_ptr() as *mut u8,
110 output_data.len() * std::mem::size_of::<f32>(),
111 )
112 };
113 output_device.copy_to_host(output_bytes)?;
114
115 println!(" Output shape: [1, 3, 4, 4] ({} elements)", output_size);
116 println!(" First 8 output values: {:?}", &output_data[..8]);
117
118 // 10. Verify results
119 println!("\n10. Verification:");
120 println!(" ReLU function: max(0, x)");
121 println!(" - Positive inputs should pass through unchanged");
122 println!(" - Negative inputs should become 0.0");
123 println!(" - Zero inputs should remain 0.0");
124
125 let mut passed = true;
126 let mut failures = Vec::new();
127
128 for (i, (&input, &output)) in input_data.iter().zip(output_data.iter()).enumerate() {
129 let expected = if input > 0.0 { input } else { 0.0 };
130 let diff = (output - expected).abs();
131
132 if diff > 1e-6 {
133 passed = false;
134 if failures.len() < 5 {
135 failures.push((i, input, expected, output));
136 }
137 }
138 }
139
140 if passed {
141 println!(
142 "\n ✓ PASS: All {} outputs match expected ReLU behavior!",
143 output_size
144 );
145
146 // Show some examples
147 println!("\n Sample verification (first 8 elements):");
148 for i in 0..8.min(input_size) {
149 let input = input_data[i];
150 let output = output_data[i];
151 let expected = if input > 0.0 { input } else { 0.0 };
152 println!(
153 " [{:2}] ReLU({:7.3}) = {:7.3} (expected {:7.3}) ✓",
154 i, input, output, expected
155 );
156 }
157 } else {
158 println!("\n ✗ FAIL: {} mismatches found!", failures.len());
159 for (i, input, expected, output) in failures {
160 println!(
161 " [{:2}] ReLU({:7.3}) = {:7.3}, expected {:7.3}",
162 i, input, output, expected
163 );
164 }
165 }
166
167 println!("\n=== Example completed ===");
168 Ok(())
169}Sourcepub fn get_tensor_name(&self, index: i32) -> Result<String>
pub fn get_tensor_name(&self, index: i32) -> Result<String>
Examples found in repository?
examples/basic_workflow.rs (line 88)
18fn main() -> Result<(), Box<dyn Error>> {
19 #[cfg(feature = "dlopen_tensorrt_rtx")]
20 trtx::dynamically_load_tensorrt(None::<String>).unwrap();
21 #[cfg(feature = "dlopen_tensorrt_onnxparser")]
22 trtx::dynamically_load_tensorrt_onnxparser(None::<String>).unwrap();
23
24 println!("TensorRT-RTX Basic Workflow Example");
25 println!("=====================================\n");
26
27 // Step 1: Create logger
28 println!("1. Creating logger...");
29 let logger = Logger::stderr()?;
30 println!(" ✓ Logger created\n");
31
32 // Step 2: Build phase
33 println!("2. Building engine...");
34
35 let builder = Builder::new(&logger)?;
36 println!(" ✓ Builder created");
37
38 // Create network with explicit batch dimensions
39 let mut network = builder.create_network(network_flags::EXPLICIT_BATCH)?;
40 println!(" ✓ Network created");
41
42 // Create and configure builder config
43 let mut config = builder.create_config()?;
44 println!(" ✓ Config created");
45
46 // Set workspace memory limit (1GB)
47 config.set_memory_pool_limit(MemoryPoolType::Workspace, 1 << 30)?;
48 println!(" ✓ Workspace limit set to 1GB");
49
50 // Note: In a real application, you would add layers to the network here
51 // For example:
52 // - network.add_input(...)
53 // - network.add_convolution(...)
54 // - network.add_activation(...)
55 // - etc.
56
57 println!("\n Note: This example uses an empty network.");
58 println!(" In production, you would:");
59 println!(" - Parse an ONNX model");
60 println!(" - Or programmatically add layers");
61 println!(" - Define input/output tensors\n");
62
63 // Build serialized network
64 println!(" Building serialized engine...");
65 match builder.build_serialized_network(&mut network, &mut config) {
66 Ok(engine_data) => {
67 println!(" ✓ Engine built ({} bytes)", engine_data.len());
68
69 // Save to disk
70 let engine_path = "/tmp/example.engine";
71 std::fs::write(engine_path, &engine_data)?;
72 println!(" ✓ Engine saved to {}\n", engine_path);
73
74 // Step 3: Inference phase
75 println!("3. Loading engine for inference...");
76
77 let runtime = Runtime::new(&logger)?;
78 println!(" ✓ Runtime created");
79
80 let engine = runtime.deserialize_cuda_engine(&engine_data)?;
81 println!(" ✓ Engine deserialized");
82
83 // Query engine information
84 let num_tensors = engine.get_nb_io_tensors()?;
85 println!(" ✓ Engine has {} I/O tensors", num_tensors);
86
87 for i in 0..num_tensors {
88 let name = engine.get_tensor_name(i)?;
89 println!(" - Tensor {}: {}", i, name);
90 }
91
92 // Create execution context
93 let _context = engine.create_execution_context()?;
94 println!(" ✓ Execution context created\n");
95
96 println!("4. Next steps for real inference:");
97 println!(" - Allocate CUDA memory for inputs/outputs");
98 println!(" - Copy input data to GPU");
99 println!(" - Bind tensor addresses with context.set_tensor_address()");
100 println!(" - Execute with context.enqueue_v3()");
101 println!(" - Copy results back to CPU");
102 }
103 Err(e) => {
104 eprintln!(" ✗ Failed to build engine: {}", e);
105 eprintln!("\n This is expected for an empty network.");
106 eprintln!(" In production, add layers before building.");
107 return Err(e.into());
108 }
109 }
110
111 println!("\n✓ Example completed successfully!");
112
113 Ok(())
114}More examples
examples/tiny_network.rs (line 46)
18fn main() -> Result<()> {
19 #[cfg(feature = "dlopen_tensorrt_rtx")]
20 trtx::dynamically_load_tensorrt(None::<String>).unwrap();
21 #[cfg(feature = "dlopen_tensorrt_onnxparser")]
22 trtx::dynamically_load_tensorrt_onnxparser(None::<String>).unwrap();
23
24 println!("=== Tiny Network Example ===\n");
25
26 // 1. Create logger
27 println!("1. Creating logger...");
28 let logger = Logger::stderr()?;
29
30 // 2. Build the network
31 println!("2. Building network...");
32 let engine_data = build_tiny_network(&logger)?;
33 println!(" Engine size: {} bytes", engine_data.len());
34
35 // 3. Create runtime and deserialize engine
36 println!("\n3. Creating runtime and loading engine...");
37 let runtime = Runtime::new(&logger)?;
38 let engine = runtime.deserialize_cuda_engine(&engine_data)?;
39
40 // 4. Inspect engine
41 println!("4. Engine information:");
42 let num_io_tensors = engine.get_nb_io_tensors()?;
43 println!(" Number of I/O tensors: {}", num_io_tensors);
44
45 for i in 0..num_io_tensors {
46 let name = engine.get_tensor_name(i)?;
47 println!(" Tensor {}: {}", i, name);
48 }
49
50 // 5. Create execution context
51 println!("\n5. Creating execution context...");
52 let mut context = engine.create_execution_context()?;
53
54 // 6. Prepare input/output buffers
55 println!("6. Preparing buffers...");
56 let input_size = 3 * 4 * 4; // [1, 3, 4, 4]
57 let output_size = 3 * 4 * 4; // Same as input
58
59 // Create input with mix of positive and negative values
60 let input_data: Vec<f32> = (0..input_size)
61 .map(|i| {
62 // Create pattern: positive, negative, zero, positive, ...
63 match i % 4 {
64 0 => (i as f32) * 0.5, // Positive values
65 1 => -(i as f32) * 0.3, // Negative values
66 2 => 0.0, // Zero
67 _ => (i as f32) * 0.1, // Small positive values
68 }
69 })
70 .collect();
71
72 println!(" Input shape: [1, 3, 4, 4] ({} elements)", input_size);
73 println!(" First 8 input values: {:?}", &input_data[..8]);
74
75 // Allocate device memory
76 let mut input_device = DeviceBuffer::new(input_size * std::mem::size_of::<f32>())?;
77 let output_device = DeviceBuffer::new(output_size * std::mem::size_of::<f32>())?;
78
79 // Copy input to device (convert f32 slice to bytes)
80 let input_bytes = unsafe {
81 std::slice::from_raw_parts(
82 input_data.as_ptr() as *const u8,
83 input_data.len() * std::mem::size_of::<f32>(),
84 )
85 };
86 input_device.copy_from_host(input_bytes)?;
87
88 // 7. Set tensor addresses
89 println!("\n7. Binding tensors...");
90 unsafe {
91 context.set_tensor_address("input", input_device.as_ptr())?;
92 context.set_tensor_address("output", output_device.as_ptr())?;
93 }
94
95 // 8. Execute inference
96 println!("8. Running inference...");
97 let stream = trtx::cuda::get_default_stream();
98 unsafe {
99 context.enqueue_v3(stream)?;
100 }
101 synchronize()?;
102 println!(" ✓ Inference completed");
103
104 // 9. Copy output back to host
105 println!("\n9. Reading results...");
106 let mut output_data: Vec<f32> = vec![0.0; output_size];
107 let output_bytes = unsafe {
108 std::slice::from_raw_parts_mut(
109 output_data.as_mut_ptr() as *mut u8,
110 output_data.len() * std::mem::size_of::<f32>(),
111 )
112 };
113 output_device.copy_to_host(output_bytes)?;
114
115 println!(" Output shape: [1, 3, 4, 4] ({} elements)", output_size);
116 println!(" First 8 output values: {:?}", &output_data[..8]);
117
118 // 10. Verify results
119 println!("\n10. Verification:");
120 println!(" ReLU function: max(0, x)");
121 println!(" - Positive inputs should pass through unchanged");
122 println!(" - Negative inputs should become 0.0");
123 println!(" - Zero inputs should remain 0.0");
124
125 let mut passed = true;
126 let mut failures = Vec::new();
127
128 for (i, (&input, &output)) in input_data.iter().zip(output_data.iter()).enumerate() {
129 let expected = if input > 0.0 { input } else { 0.0 };
130 let diff = (output - expected).abs();
131
132 if diff > 1e-6 {
133 passed = false;
134 if failures.len() < 5 {
135 failures.push((i, input, expected, output));
136 }
137 }
138 }
139
140 if passed {
141 println!(
142 "\n ✓ PASS: All {} outputs match expected ReLU behavior!",
143 output_size
144 );
145
146 // Show some examples
147 println!("\n Sample verification (first 8 elements):");
148 for i in 0..8.min(input_size) {
149 let input = input_data[i];
150 let output = output_data[i];
151 let expected = if input > 0.0 { input } else { 0.0 };
152 println!(
153 " [{:2}] ReLU({:7.3}) = {:7.3} (expected {:7.3}) ✓",
154 i, input, output, expected
155 );
156 }
157 } else {
158 println!("\n ✗ FAIL: {} mismatches found!", failures.len());
159 for (i, input, expected, output) in failures {
160 println!(
161 " [{:2}] ReLU({:7.3}) = {:7.3}, expected {:7.3}",
162 i, input, output, expected
163 );
164 }
165 }
166
167 println!("\n=== Example completed ===");
168 Ok(())
169}pub fn get_tensor_shape(&self, name: &str) -> Result<Vec<i64>>
Sourcepub fn create_execution_context(&self) -> Result<ExecutionContext<'_>>
pub fn create_execution_context(&self) -> Result<ExecutionContext<'_>>
Examples found in repository?
examples/basic_workflow.rs (line 93)
18fn main() -> Result<(), Box<dyn Error>> {
19 #[cfg(feature = "dlopen_tensorrt_rtx")]
20 trtx::dynamically_load_tensorrt(None::<String>).unwrap();
21 #[cfg(feature = "dlopen_tensorrt_onnxparser")]
22 trtx::dynamically_load_tensorrt_onnxparser(None::<String>).unwrap();
23
24 println!("TensorRT-RTX Basic Workflow Example");
25 println!("=====================================\n");
26
27 // Step 1: Create logger
28 println!("1. Creating logger...");
29 let logger = Logger::stderr()?;
30 println!(" ✓ Logger created\n");
31
32 // Step 2: Build phase
33 println!("2. Building engine...");
34
35 let builder = Builder::new(&logger)?;
36 println!(" ✓ Builder created");
37
38 // Create network with explicit batch dimensions
39 let mut network = builder.create_network(network_flags::EXPLICIT_BATCH)?;
40 println!(" ✓ Network created");
41
42 // Create and configure builder config
43 let mut config = builder.create_config()?;
44 println!(" ✓ Config created");
45
46 // Set workspace memory limit (1GB)
47 config.set_memory_pool_limit(MemoryPoolType::Workspace, 1 << 30)?;
48 println!(" ✓ Workspace limit set to 1GB");
49
50 // Note: In a real application, you would add layers to the network here
51 // For example:
52 // - network.add_input(...)
53 // - network.add_convolution(...)
54 // - network.add_activation(...)
55 // - etc.
56
57 println!("\n Note: This example uses an empty network.");
58 println!(" In production, you would:");
59 println!(" - Parse an ONNX model");
60 println!(" - Or programmatically add layers");
61 println!(" - Define input/output tensors\n");
62
63 // Build serialized network
64 println!(" Building serialized engine...");
65 match builder.build_serialized_network(&mut network, &mut config) {
66 Ok(engine_data) => {
67 println!(" ✓ Engine built ({} bytes)", engine_data.len());
68
69 // Save to disk
70 let engine_path = "/tmp/example.engine";
71 std::fs::write(engine_path, &engine_data)?;
72 println!(" ✓ Engine saved to {}\n", engine_path);
73
74 // Step 3: Inference phase
75 println!("3. Loading engine for inference...");
76
77 let runtime = Runtime::new(&logger)?;
78 println!(" ✓ Runtime created");
79
80 let engine = runtime.deserialize_cuda_engine(&engine_data)?;
81 println!(" ✓ Engine deserialized");
82
83 // Query engine information
84 let num_tensors = engine.get_nb_io_tensors()?;
85 println!(" ✓ Engine has {} I/O tensors", num_tensors);
86
87 for i in 0..num_tensors {
88 let name = engine.get_tensor_name(i)?;
89 println!(" - Tensor {}: {}", i, name);
90 }
91
92 // Create execution context
93 let _context = engine.create_execution_context()?;
94 println!(" ✓ Execution context created\n");
95
96 println!("4. Next steps for real inference:");
97 println!(" - Allocate CUDA memory for inputs/outputs");
98 println!(" - Copy input data to GPU");
99 println!(" - Bind tensor addresses with context.set_tensor_address()");
100 println!(" - Execute with context.enqueue_v3()");
101 println!(" - Copy results back to CPU");
102 }
103 Err(e) => {
104 eprintln!(" ✗ Failed to build engine: {}", e);
105 eprintln!("\n This is expected for an empty network.");
106 eprintln!(" In production, add layers before building.");
107 return Err(e.into());
108 }
109 }
110
111 println!("\n✓ Example completed successfully!");
112
113 Ok(())
114}More examples
examples/tiny_network.rs (line 52)
18fn main() -> Result<()> {
19 #[cfg(feature = "dlopen_tensorrt_rtx")]
20 trtx::dynamically_load_tensorrt(None::<String>).unwrap();
21 #[cfg(feature = "dlopen_tensorrt_onnxparser")]
22 trtx::dynamically_load_tensorrt_onnxparser(None::<String>).unwrap();
23
24 println!("=== Tiny Network Example ===\n");
25
26 // 1. Create logger
27 println!("1. Creating logger...");
28 let logger = Logger::stderr()?;
29
30 // 2. Build the network
31 println!("2. Building network...");
32 let engine_data = build_tiny_network(&logger)?;
33 println!(" Engine size: {} bytes", engine_data.len());
34
35 // 3. Create runtime and deserialize engine
36 println!("\n3. Creating runtime and loading engine...");
37 let runtime = Runtime::new(&logger)?;
38 let engine = runtime.deserialize_cuda_engine(&engine_data)?;
39
40 // 4. Inspect engine
41 println!("4. Engine information:");
42 let num_io_tensors = engine.get_nb_io_tensors()?;
43 println!(" Number of I/O tensors: {}", num_io_tensors);
44
45 for i in 0..num_io_tensors {
46 let name = engine.get_tensor_name(i)?;
47 println!(" Tensor {}: {}", i, name);
48 }
49
50 // 5. Create execution context
51 println!("\n5. Creating execution context...");
52 let mut context = engine.create_execution_context()?;
53
54 // 6. Prepare input/output buffers
55 println!("6. Preparing buffers...");
56 let input_size = 3 * 4 * 4; // [1, 3, 4, 4]
57 let output_size = 3 * 4 * 4; // Same as input
58
59 // Create input with mix of positive and negative values
60 let input_data: Vec<f32> = (0..input_size)
61 .map(|i| {
62 // Create pattern: positive, negative, zero, positive, ...
63 match i % 4 {
64 0 => (i as f32) * 0.5, // Positive values
65 1 => -(i as f32) * 0.3, // Negative values
66 2 => 0.0, // Zero
67 _ => (i as f32) * 0.1, // Small positive values
68 }
69 })
70 .collect();
71
72 println!(" Input shape: [1, 3, 4, 4] ({} elements)", input_size);
73 println!(" First 8 input values: {:?}", &input_data[..8]);
74
75 // Allocate device memory
76 let mut input_device = DeviceBuffer::new(input_size * std::mem::size_of::<f32>())?;
77 let output_device = DeviceBuffer::new(output_size * std::mem::size_of::<f32>())?;
78
79 // Copy input to device (convert f32 slice to bytes)
80 let input_bytes = unsafe {
81 std::slice::from_raw_parts(
82 input_data.as_ptr() as *const u8,
83 input_data.len() * std::mem::size_of::<f32>(),
84 )
85 };
86 input_device.copy_from_host(input_bytes)?;
87
88 // 7. Set tensor addresses
89 println!("\n7. Binding tensors...");
90 unsafe {
91 context.set_tensor_address("input", input_device.as_ptr())?;
92 context.set_tensor_address("output", output_device.as_ptr())?;
93 }
94
95 // 8. Execute inference
96 println!("8. Running inference...");
97 let stream = trtx::cuda::get_default_stream();
98 unsafe {
99 context.enqueue_v3(stream)?;
100 }
101 synchronize()?;
102 println!(" ✓ Inference completed");
103
104 // 9. Copy output back to host
105 println!("\n9. Reading results...");
106 let mut output_data: Vec<f32> = vec![0.0; output_size];
107 let output_bytes = unsafe {
108 std::slice::from_raw_parts_mut(
109 output_data.as_mut_ptr() as *mut u8,
110 output_data.len() * std::mem::size_of::<f32>(),
111 )
112 };
113 output_device.copy_to_host(output_bytes)?;
114
115 println!(" Output shape: [1, 3, 4, 4] ({} elements)", output_size);
116 println!(" First 8 output values: {:?}", &output_data[..8]);
117
118 // 10. Verify results
119 println!("\n10. Verification:");
120 println!(" ReLU function: max(0, x)");
121 println!(" - Positive inputs should pass through unchanged");
122 println!(" - Negative inputs should become 0.0");
123 println!(" - Zero inputs should remain 0.0");
124
125 let mut passed = true;
126 let mut failures = Vec::new();
127
128 for (i, (&input, &output)) in input_data.iter().zip(output_data.iter()).enumerate() {
129 let expected = if input > 0.0 { input } else { 0.0 };
130 let diff = (output - expected).abs();
131
132 if diff > 1e-6 {
133 passed = false;
134 if failures.len() < 5 {
135 failures.push((i, input, expected, output));
136 }
137 }
138 }
139
140 if passed {
141 println!(
142 "\n ✓ PASS: All {} outputs match expected ReLU behavior!",
143 output_size
144 );
145
146 // Show some examples
147 println!("\n Sample verification (first 8 elements):");
148 for i in 0..8.min(input_size) {
149 let input = input_data[i];
150 let output = output_data[i];
151 let expected = if input > 0.0 { input } else { 0.0 };
152 println!(
153 " [{:2}] ReLU({:7.3}) = {:7.3} (expected {:7.3}) ✓",
154 i, input, output, expected
155 );
156 }
157 } else {
158 println!("\n ✗ FAIL: {} mismatches found!", failures.len());
159 for (i, input, expected, output) in failures {
160 println!(
161 " [{:2}] ReLU({:7.3}) = {:7.3}, expected {:7.3}",
162 i, input, output, expected
163 );
164 }
165 }
166
167 println!("\n=== Example completed ===");
168 Ok(())
169}Trait Implementations§
Auto Trait Implementations§
impl Freeze for CudaEngine
impl RefUnwindSafe for CudaEngine
impl Unpin for CudaEngine
impl UnwindSafe for CudaEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more