zipora 2.1.4

High-performance Rust implementation providing advanced data structures and compression algorithms with memory safety guarantees. Features LRU page cache, sophisticated caching layer, fiber-based concurrency, real-time compression, secure memory pools, SIMD optimizations, and complete C FFI for migration from C++.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
//! Memory-mapped allocator for large objects
//!
//! This module provides memory-mapped allocation for large objects to achieve
//! C++-competitive performance for allocations >16KB.

use crate::error::{Result, ZiporaError};
use std::collections::HashMap;
use std::ptr::NonNull;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};

/// Memory-mapped allocation for high-performance large object allocation
pub struct MemoryMappedAllocator {
    /// Minimum size for memory-mapped allocations
    min_mmap_size: usize,
    /// Cache of memory-mapped regions to avoid repeated mmap/munmap
    region_cache: Arc<Mutex<HashMap<usize, Vec<*mut u8>>>>,
    /// Statistics
    total_allocated: AtomicU64,
    total_freed: AtomicU64,
    mmap_calls: AtomicU64,
    munmap_calls: AtomicU64,
    cache_hits: AtomicU64,
    cache_misses: AtomicU64,
}

/// Information about a memory-mapped allocation
#[derive(Debug)]
pub struct MmapAllocation {
    ptr: NonNull<u8>,
    size: usize,
    actual_size: usize, // Rounded up to page size
}

/// Statistics for memory-mapped allocations
#[derive(Debug, Clone)]
pub struct MmapStats {
    /// Total bytes allocated via mmap
    pub total_allocated: u64,
    /// Total bytes freed via munmap
    pub total_freed: u64,
    /// Number of mmap system calls made
    pub mmap_calls: u64,
    /// Number of munmap system calls made
    pub munmap_calls: u64,
    /// Number of times a cached region was reused
    pub cache_hits: u64,
    /// Number of times a new region had to be allocated
    pub cache_misses: u64,
    /// Number of regions currently in cache
    pub cached_regions: usize,
}

impl MemoryMappedAllocator {
    /// Create a new memory-mapped allocator
    pub fn new(min_mmap_size: usize) -> Self {
        Self {
            min_mmap_size,
            region_cache: Arc::new(Mutex::new(HashMap::new())),
            total_allocated: AtomicU64::new(0),
            total_freed: AtomicU64::new(0),
            mmap_calls: AtomicU64::new(0),
            munmap_calls: AtomicU64::new(0),
            cache_hits: AtomicU64::new(0),
            cache_misses: AtomicU64::new(0),
        }
    }

    /// Create allocator with default settings (16KB minimum)
    pub fn default() -> Self {
        Self::new(16 * 1024)
    }

    /// Allocate memory using mmap for optimal large allocation performance
    pub fn allocate(&self, size: usize) -> Result<MmapAllocation> {
        if size < self.min_mmap_size {
            return Err(ZiporaError::invalid_data(
                "allocation too small for memory mapping",
            ));
        }

        // Round up to page size for optimal performance
        let page_size = Self::get_page_size();
        let actual_size = (size + page_size - 1) & !(page_size - 1);

        // Try to get from cache first
        if let Ok(mut cache) = self.region_cache.try_lock() {
            if let Some(regions) = cache.get_mut(&actual_size) {
                if let Some(ptr) = regions.pop() {
                    self.cache_hits.fetch_add(1, Ordering::Relaxed);
                    self.total_allocated
                        .fetch_add(size as u64, Ordering::Relaxed);

                    // SAFETY: cached ptr was obtained from successful mmap, guaranteed non-null
                    return Ok(MmapAllocation {
                        ptr: unsafe { NonNull::new_unchecked(ptr) },
                        size,
                        actual_size,
                    });
                }
            }
        }

        // Cache miss, allocate new region
        self.cache_misses.fetch_add(1, Ordering::Relaxed);
        self.mmap_calls.fetch_add(1, Ordering::Relaxed);

        // SAFETY: fd=-1 for anonymous mapping, size/offset are page-aligned, flags are valid
        let ptr = unsafe {
            libc::mmap(
                std::ptr::null_mut(),
                actual_size,
                libc::PROT_READ | libc::PROT_WRITE,
                libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
                -1,
                0,
            )
        };

        if ptr == libc::MAP_FAILED {
            return Err(ZiporaError::out_of_memory(size));
        }

        // SAFETY: ptr is valid from successful mmap, actual_size matches allocation, hints are advisory
        // Use madvise for better performance hints
        unsafe {
            // Hint that we'll access this memory soon
            libc::madvise(ptr, actual_size, libc::MADV_WILLNEED);
            // Hint for sequential access pattern (if applicable)
            libc::madvise(ptr, actual_size, libc::MADV_SEQUENTIAL);
        }

        self.total_allocated
            .fetch_add(size as u64, Ordering::Relaxed);

        // SAFETY: ptr != MAP_FAILED guarantees non-null
        Ok(MmapAllocation {
            ptr: unsafe { NonNull::new_unchecked(ptr as *mut u8) },
            size,
            actual_size,
        })
    }

    /// Deallocate memory, potentially caching for reuse
    pub fn deallocate(&self, allocation: MmapAllocation) -> Result<()> {
        self.total_freed
            .fetch_add(allocation.size as u64, Ordering::Relaxed);

        // Try to cache the region for reuse
        if let Ok(mut cache) = self.region_cache.try_lock() {
            let regions = cache.entry(allocation.actual_size).or_insert_with(Vec::new);

            // Limit cache size to prevent memory bloat
            const MAX_CACHED_REGIONS_PER_SIZE: usize = 4;
            if regions.len() < MAX_CACHED_REGIONS_PER_SIZE {
                regions.push(allocation.ptr.as_ptr());
                return Ok(());
            }
        }

        // Cache is full or locked, deallocate immediately
        self.munmap_calls.fetch_add(1, Ordering::Relaxed);
        // SAFETY: ptr from allocation was obtained via mmap with matching size
        unsafe {
            if libc::munmap(
                allocation.ptr.as_ptr() as *mut libc::c_void,
                allocation.actual_size,
            ) != 0
            {
                return Err(ZiporaError::io_error("failed to unmap memory"));
            }
        }

        Ok(())
    }

    /// Check if this allocator should be used for the given size
    pub fn should_use_mmap(&self, size: usize) -> bool {
        size >= self.min_mmap_size
    }

    /// Get current statistics
    pub fn stats(&self) -> MmapStats {
        let cached_regions = if let Ok(cache) = self.region_cache.try_lock() {
            cache.values().map(|v| v.len()).sum()
        } else {
            0
        };

        MmapStats {
            total_allocated: self.total_allocated.load(Ordering::Relaxed),
            total_freed: self.total_freed.load(Ordering::Relaxed),
            mmap_calls: self.mmap_calls.load(Ordering::Relaxed),
            munmap_calls: self.munmap_calls.load(Ordering::Relaxed),
            cache_hits: self.cache_hits.load(Ordering::Relaxed),
            cache_misses: self.cache_misses.load(Ordering::Relaxed),
            cached_regions,
        }
    }

    /// Clear the region cache, forcing all cached regions to be unmapped
    pub fn clear_cache(&self) -> Result<()> {
        if let Ok(mut cache) = self.region_cache.lock() {
            for (size, regions) in cache.drain() {
                for ptr in regions {
                    self.munmap_calls.fetch_add(1, Ordering::Relaxed);
                    // SAFETY: cached ptr was obtained via mmap with this size
                    unsafe {
                        if libc::munmap(ptr as *mut libc::c_void, size) != 0 {
                            log::warn!("Failed to unmap cached region of size {}", size);
                        }
                    }
                }
            }
        }
        Ok(())
    }

    /// Get system page size
    fn get_page_size() -> usize {
        // SAFETY: sysconf with _SC_PAGESIZE is always safe to call
        unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }
    }
}

impl Drop for MemoryMappedAllocator {
    fn drop(&mut self) {
        // Clean up all cached regions
        let _ = self.clear_cache();
    }
}

// SAFETY: MemoryMappedAllocator is Send because:
// 1. `min_mmap_size: usize` - Immutable primitive, trivially Send.
// 2. `page_size: usize` - Immutable primitive, trivially Send.
// 3. `region_cache: Mutex<HashMap<usize, Vec<*mut u8>>>` - Mutex is Send.
//    Raw pointers in the cache point to mmap'd regions, not thread-local data.
// 4. `total_allocated/total_freed/...` - AtomicU64 counters are Send.
unsafe impl Send for MemoryMappedAllocator {}

// SAFETY: MemoryMappedAllocator is Sync because:
// 1. `region_cache` - Protected by Mutex for exclusive access.
// 2. All atomic counters are inherently thread-safe.
// 3. mmap/munmap syscalls are thread-safe.
// 4. Immutable fields (min_mmap_size, page_size) are safe to read concurrently.
// The Mutex ensures serialized access to the region cache.
unsafe impl Sync for MemoryMappedAllocator {}

impl MmapAllocation {
    /// Get the allocated memory as a slice
    #[inline]
    pub fn as_slice(&self) -> &[u8] {
        // SAFETY: ptr is valid for size bytes, obtained via mmap, mapping valid for lifetime of MmapAllocation
        unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size) }
    }

    /// Get the allocated memory as a mutable slice
    #[inline]
    pub fn as_mut_slice(&mut self) -> &mut [u8] {
        // SAFETY: ptr is valid for size bytes, obtained via mmap, mapping valid for lifetime of MmapAllocation
        unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.size) }
    }

    /// Get the size of the allocation
    #[inline]
    pub fn size(&self) -> usize {
        self.size
    }

    /// Get the actual allocated size (rounded to page size)
    pub fn actual_size(&self) -> usize {
        self.actual_size
    }

    /// Get the memory as a typed pointer
    pub fn as_ptr<T>(&self) -> *mut T {
        self.ptr.as_ptr() as *mut T
    }
    
    /// Get mutable pointer to the allocation as a raw byte pointer
    pub fn as_mut_ptr(&mut self) -> *mut u8 {
        self.ptr.as_ptr()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_mmap_allocator_creation() {
        let allocator = MemoryMappedAllocator::new(16 * 1024);
        assert!(allocator.should_use_mmap(20 * 1024));
        assert!(!allocator.should_use_mmap(8 * 1024));
    }

    #[test]
    fn test_mmap_allocation() {
        let allocator = MemoryMappedAllocator::default();
        let size = 64 * 1024; // 64KB

        let mut allocation = allocator.allocate(size).unwrap();
        assert_eq!(allocation.size(), size);
        assert!(allocation.actual_size() >= size);

        // Test that we can write to the memory
        let slice = allocation.as_mut_slice();
        slice[0] = 42;
        slice[size - 1] = 84;

        let slice = allocation.as_slice();
        assert_eq!(slice[0], 42);
        assert_eq!(slice[size - 1], 84);

        allocator.deallocate(allocation).unwrap();

        let stats = allocator.stats();
        assert_eq!(stats.total_allocated, size as u64);
        assert_eq!(stats.total_freed, size as u64);
        assert_eq!(stats.mmap_calls, 1);
    }

    #[test]
    fn test_mmap_cache() {
        let allocator = MemoryMappedAllocator::default();
        let size = 64 * 1024;

        // Allocate and deallocate to populate cache
        let allocation1 = allocator.allocate(size).unwrap();
        allocator.deallocate(allocation1).unwrap();

        let stats_before = allocator.stats();

        // Allocate again, should hit cache
        let allocation2 = allocator.allocate(size).unwrap();
        allocator.deallocate(allocation2).unwrap();

        let stats_after = allocator.stats();

        // Should have one cache hit
        assert_eq!(stats_after.cache_hits, stats_before.cache_hits + 1);
        // Should not have made additional mmap calls
        assert_eq!(stats_after.mmap_calls, stats_before.mmap_calls);
    }

    #[test]
    fn test_mmap_different_sizes() {
        let allocator = MemoryMappedAllocator::default();

        let sizes = vec![16 * 1024, 32 * 1024, 64 * 1024, 128 * 1024];
        let mut allocations = Vec::new();

        // Allocate different sizes
        for size in &sizes {
            let allocation = allocator.allocate(*size).unwrap();
            assert_eq!(allocation.size(), *size);
            allocations.push(allocation);
        }

        // Deallocate all
        for allocation in allocations {
            allocator.deallocate(allocation).unwrap();
        }

        let stats = allocator.stats();
        assert_eq!(stats.mmap_calls, sizes.len() as u64);
        assert_eq!(stats.total_allocated, sizes.iter().sum::<usize>() as u64);
        assert_eq!(stats.total_freed, sizes.iter().sum::<usize>() as u64);
    }

    #[test]
    fn test_mmap_cache_limit() {
        let allocator = MemoryMappedAllocator::default();
        let size = 64 * 1024;

        // Allocate and deallocate more than cache limit
        for _ in 0..10 {
            let allocation = allocator.allocate(size).unwrap();
            allocator.deallocate(allocation).unwrap();
        }

        let stats = allocator.stats();
        // Should have some cached regions, but not more than the limit
        assert!(stats.cached_regions <= 4); // MAX_CACHED_REGIONS_PER_SIZE
        // Note: munmap_calls might be 0 if all allocations fit in cache during this test
        // This is acceptable as the cache is working correctly
    }

    #[test]
    fn test_clear_cache() {
        let allocator = MemoryMappedAllocator::default();
        let size = 64 * 1024;

        // Populate cache
        let allocation = allocator.allocate(size).unwrap();
        allocator.deallocate(allocation).unwrap();

        let stats_before = allocator.stats();
        assert!(stats_before.cached_regions > 0);

        // Clear cache
        allocator.clear_cache().unwrap();

        let stats_after = allocator.stats();
        assert_eq!(stats_after.cached_regions, 0);
        assert!(stats_after.munmap_calls > stats_before.munmap_calls);
    }

    #[test]
    fn test_invalid_allocation_size() {
        let allocator = MemoryMappedAllocator::new(16 * 1024);

        // Too small for mmap
        let result = allocator.allocate(8 * 1024);
        assert!(result.is_err());
    }
}