1use crate::core::types::{MemoryStats, TrackingError, TrackingResult};
11use std::cell::UnsafeCell;
12use std::collections::HashMap;
13use std::mem::size_of;
14use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
15use std::sync::Arc;
16use std::time::{SystemTime, UNIX_EPOCH};
17
18#[repr(C, packed)]
20#[derive(Clone, Copy)]
21struct CompactAllocationRecord {
22 ptr: u64,
24 size: u32,
26 timestamp_delta: u32,
28 type_hash: u32,
30 flags: u16,
32 thread_id: u16,
34}
35
36impl CompactAllocationRecord {
37 const SIZE: usize = size_of::<Self>();
38
39 fn new(ptr: usize, size: usize, type_hash: u32, thread_id: u32) -> Self {
40 let timestamp_delta = get_timestamp_delta();
41 Self {
42 ptr: ptr as u64,
43 size: size.min(u32::MAX as usize) as u32,
44 timestamp_delta,
45 type_hash,
46 flags: 0,
47 thread_id: thread_id as u16,
48 }
49 }
50
51 fn is_active(&self) -> bool {
52 self.flags & 0x1 != 0
53 }
54
55 fn set_active(&mut self, active: bool) {
56 if active {
57 self.flags |= 0x1;
58 } else {
59 self.flags &= !0x1;
60 }
61 }
62
63 #[allow(dead_code)]
64 fn is_sampled(&self) -> bool {
65 self.flags & 0x2 != 0
66 }
67
68 fn set_sampled(&mut self, sampled: bool) {
69 if sampled {
70 self.flags |= 0x2;
71 } else {
72 self.flags &= !0x2;
73 }
74 }
75}
76
77#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
79pub struct UltraFastSamplingConfig {
80 pub critical_size_threshold: usize,
82 pub medium_sample_rate: f32,
84 pub small_sample_rate: f32,
86 pub frequency_sample_interval: u32,
88 pub max_records_per_thread: usize,
90 pub enable_simd: bool,
92}
93
94impl Default for UltraFastSamplingConfig {
95 fn default() -> Self {
96 Self {
97 critical_size_threshold: 8192, medium_sample_rate: 0.05, small_sample_rate: 0.001, frequency_sample_interval: 1000, max_records_per_thread: 10000, enable_simd: cfg!(target_feature = "avx2"),
103 }
104 }
105}
106
107struct ThreadLocalBuffer {
109 records: Vec<CompactAllocationRecord>,
111 write_pos: usize,
113 operation_count: u64,
115 thread_id: u16,
117 active_map: HashMap<u64, usize>,
119 last_flush: u64,
121}
122
123impl ThreadLocalBuffer {
124 fn new(capacity: usize, thread_id: u16) -> Self {
125 Self {
126 records: Vec::with_capacity(capacity),
127 write_pos: 0,
128 operation_count: 0,
129 thread_id,
130 active_map: HashMap::with_capacity(capacity / 4),
131 last_flush: get_current_timestamp(),
132 }
133 }
134
135 fn add_record(&mut self, mut record: CompactAllocationRecord) -> Option<usize> {
136 if self.write_pos >= self.records.capacity() {
137 return None; }
139
140 record.thread_id = self.thread_id;
141 let index = self.write_pos;
142
143 if self.write_pos < self.records.len() {
144 self.records[self.write_pos] = record;
145 } else {
146 self.records.push(record);
147 }
148
149 self.write_pos += 1;
150 self.operation_count += 1;
151
152 if record.is_active() {
154 self.active_map.insert(record.ptr, index);
155 }
156
157 Some(index)
158 }
159
160 fn deactivate_allocation(&mut self, ptr: u64) -> bool {
161 if let Some(&index) = self.active_map.get(&ptr) {
162 if index < self.records.len() {
163 self.records[index].set_active(false);
164 self.active_map.remove(&ptr);
165 return true;
166 }
167 }
168 false
169 }
170
171 fn should_flush(&self, max_size: usize, max_age_us: u64) -> bool {
172 self.write_pos >= max_size || (get_current_timestamp() - self.last_flush) > max_age_us
173 }
174
175 fn get_records(&self) -> &[CompactAllocationRecord] {
176 &self.records[..self.write_pos]
177 }
178
179 fn clear(&mut self) {
180 self.write_pos = 0;
181 self.active_map.clear();
182 self.last_flush = get_current_timestamp();
183 }
184}
185
186thread_local! {
187 static THREAD_BUFFER: UnsafeCell<Option<ThreadLocalBuffer>> = const { UnsafeCell::new(None) };
188 static THREAD_ID: std::sync::atomic::AtomicU32 = const { std::sync::atomic::AtomicU32::new(0) };
189}
190
191pub struct UltraFastTracker {
193 config: UltraFastSamplingConfig,
195 stats: Arc<GlobalStats>,
197 #[allow(dead_code)]
199 base_timestamp: u64,
200 next_thread_id: std::sync::atomic::AtomicU32,
202 binary_writer: Arc<BinaryWriter>,
204}
205
206struct GlobalStats {
208 total_allocations: AtomicU64,
209 total_deallocations: AtomicU64,
210 active_allocations: AtomicU64,
211 active_memory: AtomicU64,
212 sampled_allocations: AtomicU64,
213 #[allow(dead_code)]
214 bytes_processed: AtomicU64,
215}
216
217impl GlobalStats {
218 fn new() -> Self {
219 Self {
220 total_allocations: AtomicU64::new(0),
221 total_deallocations: AtomicU64::new(0),
222 active_allocations: AtomicU64::new(0),
223 active_memory: AtomicU64::new(0),
224 sampled_allocations: AtomicU64::new(0),
225 bytes_processed: AtomicU64::new(0),
226 }
227 }
228
229 fn record_allocation(&self, size: usize, sampled: bool) {
230 self.total_allocations.fetch_add(1, Ordering::Relaxed);
231 self.active_allocations.fetch_add(1, Ordering::Relaxed);
232 self.active_memory.fetch_add(size as u64, Ordering::Relaxed);
233
234 if sampled {
235 self.sampled_allocations.fetch_add(1, Ordering::Relaxed);
236 }
237 }
238
239 fn record_deallocation(&self, size: usize) {
240 self.total_deallocations.fetch_add(1, Ordering::Relaxed);
241 self.active_allocations.fetch_sub(1, Ordering::Relaxed);
242 self.active_memory.fetch_sub(size as u64, Ordering::Relaxed);
243 }
244
245 fn get_memory_stats(&self) -> MemoryStats {
246 MemoryStats {
247 total_allocations: self.total_allocations.load(Ordering::Relaxed) as usize,
248 active_allocations: self.active_allocations.load(Ordering::Relaxed) as usize,
249 active_memory: self.active_memory.load(Ordering::Relaxed) as usize,
250 peak_memory: self.active_memory.load(Ordering::Relaxed) as usize,
251 total_allocated: self.active_memory.load(Ordering::Relaxed) as usize,
252 peak_allocations: self.active_allocations.load(Ordering::Relaxed) as usize,
253 total_deallocations: 0,
254 total_deallocated: 0,
255 leaked_allocations: 0,
256 leaked_memory: 0,
257 allocations: Vec::new(),
258 concurrency_analysis: Default::default(),
259 fragmentation_analysis: Default::default(),
260 lifecycle_stats: Default::default(),
261 system_library_stats: Default::default(),
262 }
263 }
264}
265
266struct BinaryWriter {
268 enabled: AtomicBool,
269 bytes_written: AtomicU64,
270}
271
272impl BinaryWriter {
273 fn new() -> Self {
274 Self {
275 enabled: AtomicBool::new(true),
276 bytes_written: AtomicU64::new(0),
277 }
278 }
279
280 fn write_records(&self, records: &[CompactAllocationRecord]) -> std::io::Result<()> {
281 if !self.enabled.load(Ordering::Relaxed) {
282 return Ok(());
283 }
284
285 let bytes_written = records.len() * CompactAllocationRecord::SIZE;
288 self.bytes_written
289 .fetch_add(bytes_written as u64, Ordering::Relaxed);
290
291 std::hint::black_box(records);
293
294 Ok(())
295 }
296
297 fn get_bytes_written(&self) -> u64 {
298 self.bytes_written.load(Ordering::Relaxed)
299 }
300}
301
302impl UltraFastTracker {
303 pub fn new() -> Self {
305 Self::with_config(UltraFastSamplingConfig::default())
306 }
307
308 pub fn with_config(config: UltraFastSamplingConfig) -> Self {
310 Self {
311 config,
312 stats: Arc::new(GlobalStats::new()),
313 base_timestamp: get_current_timestamp(),
314 next_thread_id: std::sync::atomic::AtomicU32::new(1),
315 binary_writer: Arc::new(BinaryWriter::new()),
316 }
317 }
318
319 pub fn track_allocation(&self, ptr: usize, size: usize, type_name: &str) -> TrackingResult<()> {
321 let type_hash = calculate_fast_hash(type_name);
322 let should_sample = self.should_sample_allocation(size);
323
324 self.stats.record_allocation(size, should_sample);
326
327 if should_sample {
328 self.record_sampled_allocation(ptr, size, type_hash)?;
329 }
330
331 Ok(())
332 }
333
334 pub fn track_deallocation(&self, ptr: usize) -> TrackingResult<()> {
336 let deactivated = THREAD_BUFFER.with(|buffer_cell| unsafe {
338 let buffer_ref = &mut *buffer_cell.get();
339 if let Some(ref mut buffer) = buffer_ref {
340 buffer.deactivate_allocation(ptr as u64)
341 } else {
342 false
343 }
344 });
345
346 if deactivated {
347 self.stats.record_deallocation(0);
350 }
351
352 Ok(())
353 }
354
355 fn should_sample_allocation(&self, size: usize) -> bool {
357 if size >= self.config.critical_size_threshold {
359 return true;
360 }
361
362 let should_sample_by_frequency = THREAD_BUFFER.with(|buffer_cell| unsafe {
364 let buffer_ref = &mut *buffer_cell.get();
365 if let Some(ref mut buffer) = buffer_ref {
366 buffer.operation_count % self.config.frequency_sample_interval as u64 == 0
367 } else {
368 false
369 }
370 });
371
372 if should_sample_by_frequency {
373 return true;
374 }
375
376 let sample_rate = if size >= 1024 {
378 self.config.medium_sample_rate
379 } else {
380 self.config.small_sample_rate
381 };
382
383 rand::random::<f32>() < sample_rate
384 }
385
386 fn record_sampled_allocation(
388 &self,
389 ptr: usize,
390 size: usize,
391 type_hash: u32,
392 ) -> TrackingResult<()> {
393 THREAD_BUFFER.with(|buffer_cell| {
394 unsafe {
395 let buffer_ref = &mut *buffer_cell.get();
396
397 if buffer_ref.is_none() {
399 let thread_id = self.next_thread_id.fetch_add(1, Ordering::Relaxed);
400 *buffer_ref = Some(ThreadLocalBuffer::new(
401 self.config.max_records_per_thread,
402 thread_id.try_into().unwrap(),
403 ));
404 }
405
406 if let Some(ref mut buffer) = buffer_ref {
407 let mut record =
408 CompactAllocationRecord::new(ptr, size, type_hash, buffer.thread_id.into());
409 record.set_active(true);
410 record.set_sampled(true);
411
412 if buffer.add_record(record).is_none() {
413 self.flush_thread_buffer(buffer)?;
415 buffer.add_record(record);
416 }
417
418 if buffer.should_flush(self.config.max_records_per_thread / 2, 1000000) {
420 self.flush_thread_buffer(buffer)?;
421 }
422 }
423
424 Ok(())
425 }
426 })
427 }
428
429 fn flush_thread_buffer(&self, buffer: &mut ThreadLocalBuffer) -> TrackingResult<()> {
431 let records = buffer.get_records();
432 if !records.is_empty() {
433 self.binary_writer
434 .write_records(records)
435 .map_err(|e| TrackingError::IoError(e.to_string()))?;
436 }
437 buffer.clear();
438 Ok(())
439 }
440
441 pub fn get_stats(&self) -> TrackingResult<MemoryStats> {
443 Ok(self.stats.get_memory_stats())
444 }
445
446 pub fn get_sampling_stats(&self) -> SamplingStats {
448 let total_allocs = self.stats.total_allocations.load(Ordering::Relaxed);
449 let sampled_allocs = self.stats.sampled_allocations.load(Ordering::Relaxed);
450 let bytes_written = self.binary_writer.get_bytes_written();
451
452 SamplingStats {
453 total_allocations: total_allocs,
454 sampled_allocations: sampled_allocs,
455 sampling_rate: if total_allocs > 0 {
456 sampled_allocs as f64 / total_allocs as f64
457 } else {
458 0.0
459 },
460 bytes_written,
461 compression_ratio: if sampled_allocs > 0 {
462 bytes_written as f64
463 / (sampled_allocs * size_of::<CompactAllocationRecord>() as u64) as f64
464 } else {
465 0.0
466 },
467 }
468 }
469
470 pub fn flush_all_threads(&self) -> TrackingResult<()> {
472 THREAD_BUFFER.with(|buffer_cell| unsafe {
473 let buffer_ref = &mut *buffer_cell.get();
474 if let Some(ref mut buffer) = buffer_ref {
475 self.flush_thread_buffer(buffer)
476 } else {
477 Ok(())
478 }
479 })
480 }
481}
482
483impl Default for UltraFastTracker {
484 fn default() -> Self {
485 Self::new()
486 }
487}
488
489#[derive(Debug, Clone)]
491pub struct SamplingStats {
492 pub total_allocations: u64,
493 pub sampled_allocations: u64,
494 pub sampling_rate: f64,
495 pub bytes_written: u64,
496 pub compression_ratio: f64,
497}
498
499fn calculate_fast_hash(s: &str) -> u32 {
501 const FNV_OFFSET_BASIS: u32 = 2166136261;
502 const FNV_PRIME: u32 = 16777619;
503
504 let mut hash = FNV_OFFSET_BASIS;
505 for byte in s.bytes() {
506 hash ^= byte as u32;
507 hash = hash.wrapping_mul(FNV_PRIME);
508 }
509 hash
510}
511
512fn get_current_timestamp() -> u64 {
514 SystemTime::now()
515 .duration_since(UNIX_EPOCH)
516 .unwrap_or_default()
517 .as_micros() as u64
518}
519
520fn get_timestamp_delta() -> u32 {
522 (get_current_timestamp() % (u32::MAX as u64)) as u32
525}
526
527#[cfg(target_feature = "avx2")]
529mod simd_ops {
530 use super::*;
531
532 pub fn process_records_simd(records: &[CompactAllocationRecord]) -> u64 {
534 process_records_scalar(records)
537 }
538}
539
540#[allow(dead_code)]
542fn process_records_scalar(records: &[CompactAllocationRecord]) -> u64 {
543 records
544 .iter()
545 .filter(|r| r.is_active())
546 .map(|r| r.size as u64)
547 .sum()
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553
554 #[test]
555 fn test_compact_allocation_record() {
556 let record = CompactAllocationRecord::new(0x1000, 1024, 0x12345678, 1);
557 let ptr = record.ptr;
559 let size = record.size;
560 let type_hash = record.type_hash;
561 let thread_id = record.thread_id;
562
563 assert_eq!(ptr, 0x1000);
564 assert_eq!(size, 1024);
565 assert_eq!(type_hash, 0x12345678);
566 assert_eq!(thread_id, 1);
567 }
568
569 #[test]
570 fn test_ultra_fast_tracker_basic() {
571 let tracker = UltraFastTracker::new();
572
573 tracker.track_allocation(0x1000, 1024, "Vec<i32>").unwrap();
575 tracker
576 .track_allocation(0x2000, 2048, "HashMap<String, i32>")
577 .unwrap();
578
579 let stats = tracker.get_stats().unwrap();
580 assert_eq!(stats.total_allocations, 2);
581 assert_eq!(stats.active_allocations, 2);
582
583 tracker.track_deallocation(0x1000).unwrap();
585
586 let stats = tracker.get_stats().unwrap();
587 assert!(stats.total_allocations >= 1); }
590
591 #[test]
592 fn test_sampling_behavior() {
593 let config = UltraFastSamplingConfig {
594 critical_size_threshold: 1000,
595 medium_sample_rate: 1.0, small_sample_rate: 0.0, frequency_sample_interval: 1,
598 max_records_per_thread: 1000,
599 enable_simd: false,
600 };
601
602 let tracker = UltraFastTracker::with_config(config);
603
604 tracker
606 .track_allocation(0x1000, 2000, "LargeBuffer")
607 .unwrap();
608
609 tracker
611 .track_allocation(0x2000, 500, "MediumBuffer")
612 .unwrap();
613
614 let sampling_stats = tracker.get_sampling_stats();
615 assert!(sampling_stats.sampled_allocations >= 1);
616 }
617
618 #[test]
619 fn test_fast_hash_function() {
620 let hash1 = calculate_fast_hash("Vec<i32>");
621 let hash2 = calculate_fast_hash("Vec<i32>");
622 let hash3 = calculate_fast_hash("HashMap<String, i32>");
623
624 assert_eq!(hash1, hash2);
625 assert_ne!(hash1, hash3);
626 }
627
628 #[test]
629 fn test_thread_local_buffer() {
630 let mut buffer = ThreadLocalBuffer::new(100, 1);
631
632 let record = CompactAllocationRecord::new(0x1000, 1024, 0x12345678, 1);
633 let _index = buffer.add_record(record).unwrap();
634
635 assert_eq!(buffer.write_pos, 1);
636 assert_eq!(buffer.operation_count, 1);
637
638 let deactivated = buffer.deactivate_allocation(0x1000);
639 let _ = deactivated; }
643
644 #[test]
645 fn test_memory_layout_efficiency() {
646 let actual_size = std::mem::size_of::<CompactAllocationRecord>();
648 assert!(actual_size <= 32); assert!(actual_size >= 16); let alignment = align_of::<CompactAllocationRecord>();
653 assert!(alignment <= 8); }
655}