full_inference_pipeline/
full_inference_pipeline.rs

1use npu_rs::{
2    NpuDevice, DeviceInfo, Tensor, ExecutionContext, ModelConfig, ModelRuntime,
3    QuantFormat, OptimizationLevel, NeuralNetwork, Layer, LayerType,
4    QuantStats, QuantConverter, PTQEngine, GraphOptimizer, ComputationGraph,
5    ComputeNode, Profiler, ProfileEvent,
6};
7use std::sync::Arc;
8
9/// Complete inference pipeline example.
10fn main() {
11    println!("=== Full NPU Inference Pipeline ===\n");
12
13    setup_device();
14    build_model();
15    quantize_model();
16    execute_inference();
17    monitor_performance();
18
19    println!("\n=== Pipeline Completed ===\n");
20}
21
22fn setup_device() {
23    println!("1. Device Setup");
24    
25    let device_info = DeviceInfo {
26        device_id: 0,
27        peak_throughput_tops: 20.0,
28        memory_mb: 512,
29        compute_units: 4,
30        frequency_mhz: 800,
31        power_tdp_watts: 5.0,
32        vendor: "Xilinx/SiFive".to_string(),
33        device_name: "XilinxAI-Engine 20-TOPS".to_string(),
34    };
35
36    let device = Arc::new(NpuDevice::with_config(device_info));
37    match device.initialize() {
38        Ok(_) => {
39            println!("   ✓ Device initialized");
40            let info = device.get_info();
41            println!("   Device: {} ({} MB memory, {} TOPS peak)", 
42                info.device_name, info.memory_mb, info.peak_throughput_tops
43            );
44            
45            let memory_pool = device.get_memory_pool();
46            let manager = memory_pool.get_manager();
47            println!("   Available memory: {} MB\n", 
48                manager.get_available_bytes() / 1024 / 1024
49            );
50        }
51        Err(e) => println!("   ✗ Init failed: {}\n", e),
52    }
53}
54
55fn build_model() {
56    println!("2. Model Building");
57
58    let model_config = ModelConfig {
59        name: "ResNet18-Lite".to_string(),
60        input_shape: vec![1, 224, 224, 3],
61        output_shape: vec![1, 1000],
62        quant_format: QuantFormat::Int8,
63        optimization_level: OptimizationLevel::O3,
64        use_cache: true,
65    };
66
67    let runtime = ModelRuntime::new(model_config);
68    println!("   Model: {}", runtime.get_config().name);
69    println!("   Input: {:?}", runtime.input_shape());
70    println!("   Output: {:?}", runtime.output_shape());
71
72    let mut network = NeuralNetwork::new(runtime.get_config().name.clone());
73    
74    network.add_layer(Layer::new(
75        "stem_conv".to_string(),
76        LayerType::Convolution,
77        vec![1, 224, 224, 3],
78        vec![1, 112, 112, 64],
79    ));
80    
81    network.add_layer(Layer::new(
82        "residual_block_1".to_string(),
83        LayerType::PointwiseConvolution,
84        vec![1, 112, 112, 64],
85        vec![1, 112, 112, 64],
86    ));
87    
88    network.add_layer(Layer::new(
89        "residual_block_2".to_string(),
90        LayerType::PointwiseConvolution,
91        vec![1, 56, 56, 128],
92        vec![1, 56, 56, 128],
93    ));
94    
95    network.add_layer(Layer::new(
96        "global_avg_pool".to_string(),
97        LayerType::Pooling,
98        vec![1, 7, 7, 512],
99        vec![1, 512],
100    ));
101    
102    network.add_layer(Layer::new(
103        "classifier".to_string(),
104        LayerType::FullyConnected,
105        vec![1, 512],
106        vec![1, 1000],
107    ));
108
109    println!("   Layers: {}", network.layer_count());
110    println!("   Estimated TOPS: {:.6}\n", network.total_tops());
111}
112
113fn quantize_model() {
114    println!("3. Model Quantization");
115
116    let calibration_data = vec![
117        Tensor::random(&[1, 224, 224, 3]).data,
118        Tensor::random(&[1, 224, 224, 3]).data,
119        Tensor::random(&[1, 224, 224, 3]).data,
120    ];
121
122    let ptq = PTQEngine::new(8, false);
123    match ptq.calibrate(&calibration_data) {
124        Ok(converter) => {
125            println!("   ✓ Calibration complete");
126            
127            let sample = &calibration_data[0];
128            let stats = QuantStats::from_tensor(sample);
129            println!("   Calibration Stats:");
130            println!("   - Min: {:.6}", stats.min_val);
131            println!("   - Max: {:.6}", stats.max_val);
132            println!("   - Mean: {:.6}", stats.mean_val);
133            println!("   - Std: {:.6}", stats.std_val);
134            
135            match converter.quantize_tensor(sample) {
136                Ok(quantized) => {
137                    println!("   ✓ Quantization complete: {} values", quantized.len());
138                    println!("   Compression: {:.2}x\n", 
139                        (sample.len() * 4) as f64 / quantized.len() as f64
140                    );
141                }
142                Err(e) => println!("   ✗ Quantization failed: {}\n", e),
143            }
144        }
145        Err(e) => println!("   ✗ Calibration failed: {}\n", e),
146    }
147}
148
149fn execute_inference() {
150    println!("4. Inference Execution");
151
152    let device = Arc::new(NpuDevice::new());
153    match device.initialize() {
154        Ok(_) => {
155            let ctx = ExecutionContext::new(device);
156            
157            let input = Tensor::random(&[1, 224, 224, 3]);
158            let weights = Tensor::random(&[1, 1, 3, 64]);
159
160            println!("   Input: {:?}", input.shape());
161            println!("   Weights: {:?}", weights.shape());
162
163            match ctx.execute_conv1x1(&input.data, &weights.data) {
164                Ok(output) => {
165                    println!("   ✓ Conv1x1 executed");
166                    println!("   Output: {:?}", output.shape());
167                    println!("   Throughput: {:.4} GOPS\n", ctx.get_current_throughput_gops());
168                }
169                Err(e) => println!("   ✗ Execution failed: {}\n", e),
170            }
171        }
172        Err(e) => println!("   ✗ Device init failed: {}\n", e),
173    }
174}
175
176fn monitor_performance() {
177    println!("5. Performance Monitoring");
178
179    let device = Arc::new(NpuDevice::new());
180    let _ = device.initialize();
181
182    let mut profiler = Profiler::new(device);
183
184    let ops_profile = vec![
185        ("Conv3x3_In16_Out32", 1728, 0.25, 4.5),
186        ("MatMul_512x1000", 1_024_000, 0.15, 4.2),
187        ("ReLU_Activation", 512_000, 0.05, 2.1),
188    ];
189
190    println!("   Recording {} operations...", ops_profile.len());
191    for (name, ops, duration, power) in ops_profile {
192        profiler.record_event(ProfileEvent {
193            event_name: name.to_string(),
194            start_time_ms: 0.0,
195            duration_ms: duration,
196            ops_count: ops,
197            power_watts: power,
198        });
199    }
200
201    println!("   \n   Operations Profile:");
202    for event in profiler.get_events() {
203        println!("   - {}: {:.2} GOPS, {:.2} W",
204            event.event_name,
205            event.get_throughput_gops(),
206            event.power_watts
207        );
208    }
209
210    let report = profiler.generate_report();
211    println!("\n   Performance Summary:");
212    println!("   - Total Ops: {}", report.total_operations);
213    println!("   - Total Time: {:.4} ms", report.total_time_ms);
214    println!("   - Avg Throughput: {:.2} GOPS", report.avg_throughput_gops);
215    println!("   - Peak Power: {:.2} W\n", report.peak_power_watts);
216}
full_inference_pipeline/full_inference_pipeline.rs

full_inference_pipeline/
full_inference_pipeline.rs