1use std::sync::atomic::{AtomicU64, Ordering};
7use std::sync::Arc;
8use std::time::{Duration, Instant};
9
10#[derive(Debug, Clone)]
12pub struct ArmFeatures {
13 pub has_neon: bool,
15 pub is_aarch64: bool,
17 pub is_armv7: bool,
19}
20
21impl ArmFeatures {
22 pub fn detect() -> Self {
24 let is_aarch64 = cfg!(target_arch = "aarch64");
25 let is_armv7 = cfg!(target_arch = "arm");
26
27 let has_neon = if is_aarch64 {
29 true
30 } else if is_armv7 {
31 #[cfg(target_arch = "arm")]
33 {
34 std::arch::is_arm_feature_detected!("neon")
37 }
38 #[cfg(not(target_arch = "arm"))]
39 {
40 false
41 }
42 } else {
43 false
44 };
45
46 Self {
47 has_neon,
48 is_aarch64,
49 is_armv7,
50 }
51 }
52
53 pub fn is_arm(&self) -> bool {
55 self.is_aarch64 || self.is_armv7
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct ArmPerfCounter {
62 name: String,
63 count: Arc<AtomicU64>,
64 total_time_ns: Arc<AtomicU64>,
65}
66
67impl ArmPerfCounter {
68 pub fn new(name: impl Into<String>) -> Self {
70 Self {
71 name: name.into(),
72 count: Arc::new(AtomicU64::new(0)),
73 total_time_ns: Arc::new(AtomicU64::new(0)),
74 }
75 }
76
77 pub fn start(&self) -> ArmPerfTimer {
79 ArmPerfTimer {
80 counter: self.clone(),
81 start: Instant::now(),
82 }
83 }
84
85 pub fn count(&self) -> u64 {
87 self.count.load(Ordering::Relaxed)
88 }
89
90 pub fn total_time(&self) -> Duration {
92 Duration::from_nanos(self.total_time_ns.load(Ordering::Relaxed))
93 }
94
95 pub fn avg_time(&self) -> Duration {
97 let count = self.count();
98 if count == 0 {
99 Duration::from_nanos(0)
100 } else {
101 Duration::from_nanos(self.total_time_ns.load(Ordering::Relaxed) / count)
102 }
103 }
104
105 pub fn reset(&self) {
107 self.count.store(0, Ordering::Relaxed);
108 self.total_time_ns.store(0, Ordering::Relaxed);
109 }
110
111 pub fn name(&self) -> &str {
113 &self.name
114 }
115}
116
117pub struct ArmPerfTimer {
119 counter: ArmPerfCounter,
120 start: Instant,
121}
122
123impl Drop for ArmPerfTimer {
124 fn drop(&mut self) {
125 let elapsed = self.start.elapsed().as_nanos() as u64;
126 self.counter.count.fetch_add(1, Ordering::Relaxed);
127 self.counter
128 .total_time_ns
129 .fetch_add(elapsed, Ordering::Relaxed);
130 }
131}
132
133#[derive(Debug, Clone)]
135pub struct ArmPerfReport {
136 pub features: ArmFeatures,
138 pub counters: Vec<(String, u64, Duration, Duration)>, }
141
142impl ArmPerfReport {
143 pub fn from_counters(counters: &[ArmPerfCounter]) -> Self {
145 let features = ArmFeatures::detect();
146 let counters = counters
147 .iter()
148 .map(|c| {
149 (
150 c.name().to_string(),
151 c.count(),
152 c.total_time(),
153 c.avg_time(),
154 )
155 })
156 .collect();
157
158 Self { features, counters }
159 }
160
161 pub fn print(&self) {
163 println!("=== ARM Performance Report ===");
164 println!(
165 "Architecture: {}",
166 if self.features.is_aarch64 {
167 "AArch64"
168 } else if self.features.is_armv7 {
169 "ARMv7"
170 } else {
171 "x86_64 (not ARM)"
172 }
173 );
174 println!("NEON support: {}", self.features.has_neon);
175 println!("\nPerformance Counters:");
176
177 for (name, count, total, avg) in &self.counters {
178 println!(" {name}: {count} ops, total: {total:?}, avg: {avg:?}");
179 }
180 }
181}
182
183#[cfg(target_arch = "aarch64")]
185pub mod neon_hash {
186 use std::arch::aarch64::*;
187
188 #[target_feature(enable = "neon")]
193 pub unsafe fn hash_block_neon(data: &[u8]) -> u64 {
194 let mut hash = 0xcbf29ce484222325u64; const FNV_PRIME: u64 = 0x100000001b3;
196
197 let chunks = data.chunks_exact(16);
199 let remainder = chunks.remainder();
200
201 for chunk in chunks {
202 let v = vld1q_u8(chunk.as_ptr());
204
205 let bytes: [u8; 16] = std::mem::transmute(v);
209 for &byte in &bytes {
210 hash ^= byte as u64;
211 hash = hash.wrapping_mul(FNV_PRIME);
212 }
213 }
214
215 for &byte in remainder {
217 hash ^= byte as u64;
218 hash = hash.wrapping_mul(FNV_PRIME);
219 }
220
221 hash
222 }
223}
224
225pub fn hash_block_fallback(data: &[u8]) -> u64 {
227 let mut hash = 0xcbf29ce484222325u64; const FNV_PRIME: u64 = 0x100000001b3;
229
230 for &byte in data {
231 hash ^= byte as u64;
232 hash = hash.wrapping_mul(FNV_PRIME);
233 }
234
235 hash
236}
237
238pub fn hash_block(data: &[u8]) -> u64 {
240 #[cfg(target_arch = "aarch64")]
241 {
242 unsafe { neon_hash::hash_block_neon(data) }
244 }
245
246 #[cfg(not(target_arch = "aarch64"))]
247 {
248 hash_block_fallback(data)
250 }
251}
252
253#[derive(Debug, Clone, Copy, PartialEq, Eq)]
255pub enum PowerProfile {
256 Performance,
258 Balanced,
260 LowPower,
262 Custom {
264 batch_size: usize,
265 batch_delay_ms: u64,
266 },
267}
268
269impl PowerProfile {
270 pub fn batch_size(&self) -> usize {
272 match self {
273 PowerProfile::Performance => 1,
274 PowerProfile::Balanced => 10,
275 PowerProfile::LowPower => 50,
276 PowerProfile::Custom { batch_size, .. } => *batch_size,
277 }
278 }
279
280 pub fn batch_delay_ms(&self) -> u64 {
282 match self {
283 PowerProfile::Performance => 0,
284 PowerProfile::Balanced => 10,
285 PowerProfile::LowPower => 100,
286 PowerProfile::Custom { batch_delay_ms, .. } => *batch_delay_ms,
287 }
288 }
289
290 pub fn batch_delay(&self) -> Duration {
292 Duration::from_millis(self.batch_delay_ms())
293 }
294}
295
296pub struct LowPowerBatcher<T> {
301 profile: PowerProfile,
302 buffer: Arc<std::sync::Mutex<Vec<T>>>,
303}
304
305impl<T> LowPowerBatcher<T> {
306 pub fn new(profile: PowerProfile) -> Self {
308 Self {
309 profile,
310 buffer: Arc::new(std::sync::Mutex::new(Vec::new())),
311 }
312 }
313
314 pub fn push(&self, item: T) -> Option<Vec<T>> {
318 let mut buffer = self.buffer.lock().unwrap();
319 buffer.push(item);
320
321 if buffer.len() >= self.profile.batch_size() {
322 Some(std::mem::take(&mut *buffer))
323 } else {
324 None
325 }
326 }
327
328 pub fn flush(&self) -> Vec<T> {
330 let mut buffer = self.buffer.lock().unwrap();
331 std::mem::take(&mut *buffer)
332 }
333
334 pub fn profile(&self) -> PowerProfile {
336 self.profile
337 }
338
339 pub fn pending(&self) -> usize {
341 self.buffer.lock().unwrap().len()
342 }
343}
344
345#[derive(Debug, Clone, Default)]
347pub struct PowerStats {
348 pub wakeups: u64,
350 pub operations: u64,
352 pub delay_time: Duration,
354}
355
356impl PowerStats {
357 pub fn new() -> Self {
359 Self::default()
360 }
361
362 pub fn record_batch(&mut self, ops: usize, delay: Duration) {
364 self.wakeups += 1;
365 self.operations += ops as u64;
366 self.delay_time += delay;
367 }
368
369 pub fn avg_ops_per_wakeup(&self) -> f64 {
371 if self.wakeups == 0 {
372 0.0
373 } else {
374 self.operations as f64 / self.wakeups as f64
375 }
376 }
377
378 pub fn power_saving_ratio(&self) -> f64 {
383 if self.operations == 0 {
384 1.0
385 } else {
386 self.wakeups as f64 / self.operations as f64
387 }
388 }
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
396 fn test_arm_features() {
397 let _features = ArmFeatures::detect();
398
399 #[cfg(target_arch = "aarch64")]
401 {
402 assert!(_features.is_aarch64);
403 assert!(_features.has_neon);
404 }
405
406 #[cfg(target_arch = "arm")]
407 {
408 assert!(_features.is_armv7);
409 }
410
411 #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
413 {
414 assert!(!_features.is_arm());
415 }
416 }
417
418 #[test]
419 fn test_perf_counter() {
420 let counter = ArmPerfCounter::new("test_op");
421
422 {
423 let _timer = counter.start();
424 std::thread::sleep(Duration::from_millis(10));
425 }
426
427 assert_eq!(counter.count(), 1);
428 assert!(counter.total_time() >= Duration::from_millis(10));
429 assert!(counter.avg_time() >= Duration::from_millis(10));
430 }
431
432 #[test]
433 fn test_hash_block() {
434 let data = b"hello world";
435 let hash1 = hash_block(data);
436
437 #[cfg(not(target_arch = "aarch64"))]
439 {
440 let hash2 = hash_block_fallback(data);
441 assert_eq!(hash1, hash2);
442 }
443
444 assert_eq!(hash1, hash_block(data));
446 }
447
448 #[test]
449 fn test_perf_report() {
450 let counter1 = ArmPerfCounter::new("op1");
451 let counter2 = ArmPerfCounter::new("op2");
452
453 {
454 let _t = counter1.start();
455 std::thread::sleep(Duration::from_millis(1));
456 }
457
458 {
459 let _t = counter2.start();
460 std::thread::sleep(Duration::from_millis(1));
461 }
462
463 let report = ArmPerfReport::from_counters(&[counter1, counter2]);
464 assert_eq!(report.counters.len(), 2);
465 }
466
467 #[test]
468 fn test_power_profile() {
469 let perf = PowerProfile::Performance;
470 assert_eq!(perf.batch_size(), 1);
471 assert_eq!(perf.batch_delay_ms(), 0);
472
473 let balanced = PowerProfile::Balanced;
474 assert_eq!(balanced.batch_size(), 10);
475 assert_eq!(balanced.batch_delay_ms(), 10);
476
477 let low = PowerProfile::LowPower;
478 assert_eq!(low.batch_size(), 50);
479 assert_eq!(low.batch_delay_ms(), 100);
480
481 let custom = PowerProfile::Custom {
482 batch_size: 20,
483 batch_delay_ms: 30,
484 };
485 assert_eq!(custom.batch_size(), 20);
486 assert_eq!(custom.batch_delay_ms(), 30);
487 }
488
489 #[test]
490 fn test_low_power_batcher() {
491 let batcher: LowPowerBatcher<i32> = LowPowerBatcher::new(PowerProfile::Custom {
492 batch_size: 3,
493 batch_delay_ms: 0,
494 });
495
496 assert_eq!(batcher.pending(), 0);
497
498 assert!(batcher.push(1).is_none());
500 assert_eq!(batcher.pending(), 1);
501
502 assert!(batcher.push(2).is_none());
503 assert_eq!(batcher.pending(), 2);
504
505 let batch = batcher.push(3);
507 assert!(batch.is_some());
508 let batch = batch.unwrap();
509 assert_eq!(batch, vec![1, 2, 3]);
510 assert_eq!(batcher.pending(), 0);
511
512 batcher.push(4);
514 batcher.push(5);
515 let flushed = batcher.flush();
516 assert_eq!(flushed, vec![4, 5]);
517 assert_eq!(batcher.pending(), 0);
518 }
519
520 #[test]
521 fn test_power_stats() {
522 let mut stats = PowerStats::new();
523 assert_eq!(stats.wakeups, 0);
524 assert_eq!(stats.operations, 0);
525
526 stats.record_batch(10, Duration::from_millis(5));
527 assert_eq!(stats.wakeups, 1);
528 assert_eq!(stats.operations, 10);
529 assert_eq!(stats.avg_ops_per_wakeup(), 10.0);
530
531 stats.record_batch(5, Duration::from_millis(5));
532 assert_eq!(stats.wakeups, 2);
533 assert_eq!(stats.operations, 15);
534 assert_eq!(stats.avg_ops_per_wakeup(), 7.5);
535
536 let ratio = stats.power_saving_ratio();
538 assert!(ratio > 0.0 && ratio < 1.0);
539 }
540}