zipora 3.1.7

High-performance Rust implementation providing advanced data structures and compression algorithms with memory safety guarantees. Features LRU page cache, sophisticated caching layer, fiber-based concurrency, real-time compression, secure memory pools, SIMD optimizations, and complete C FFI for migration from C++.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
//! Performance validation tests for IntVec<T>
//!
//! These tests validate the performance targets and ensure the implementation
//! meets the compression ratio and speed requirements.
//!
//! **IMPORTANT**: These tests only run in release mode (`cargo test --release`).
//! Debug mode has no optimizations, making timing measurements meaningless.

#[cfg(test)]
use super::*;
#[cfg(test)]
use std::time::Instant;

/// Performance test data generator
#[cfg(test)]
struct PerfDataGen;

#[cfg(test)]
impl PerfDataGen {
    /// Generate sorted sequence - should achieve excellent compression
    pub fn sorted_sequence(size: usize) -> Vec<u32> {
        (0..size as u32).collect()
    }

    /// Generate small range data - should compress very well
    pub fn small_range(size: usize) -> Vec<u32> {
        (0..size).map(|i| (i % 1000) as u32).collect()
    }

    /// Generate sparse data with larger gaps
    pub fn sparse_data(size: usize) -> Vec<u32> {
        (0..size).map(|i| (i * 113 + 1000) as u32).collect()
    }

    /// Generate nearly identical values
    pub fn nearly_identical(size: usize) -> Vec<u32> {
        (0..size).map(|i| 42 + (i % 3) as u32).collect()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Skip performance tests in debug mode — timing measurements are meaningless
    /// without optimizations. Run with `cargo test --release`.
    fn is_release_build() -> bool {
        !cfg!(debug_assertions)
    }

    #[test]
    fn test_compression_performance() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }
        let sizes = vec![1000, 10000, 100000];

        for size in sizes {
            println!(
                "\n=== Testing compression performance for {} elements ===",
                size
            );

            // Test different data patterns
            let test_cases = vec![
                ("sorted", PerfDataGen::sorted_sequence(size)),
                ("small_range", PerfDataGen::small_range(size)),
                ("sparse", PerfDataGen::sparse_data(size)),
                ("nearly_identical", PerfDataGen::nearly_identical(size)),
            ];

            for (pattern, data) in test_cases {
                let original_size = data.len() * 4; // u32 = 4 bytes

                let start = Instant::now();
                let compressed = IntVec::<u32>::from_slice(&data).unwrap();
                let compression_time = start.elapsed();

                let ratio = compressed.compression_ratio();
                let memory_usage = compressed.memory_usage();
                let stats = compressed.stats();

                println!("{} pattern ({} elements):", pattern, size);
                println!("  Original size: {} bytes", original_size);
                println!("  Compressed size: {} bytes", stats.compressed_size);
                println!("  Memory usage: {} bytes", memory_usage);
                println!("  Compression ratio: {:.3}", ratio);
                println!("  Space savings: {:.1}%", (1.0 - ratio) * 100.0);
                println!("  Compression time: {:?}", compression_time);
                println!(
                    "  Throughput: {:.1} MB/s",
                    (original_size as f64 / 1_048_576.0) / compression_time.as_secs_f64()
                );

                // Validate compression targets
                match pattern {
                    "sorted" | "nearly_identical" => {
                        assert!(
                            ratio < 0.2,
                            "Pattern '{}' should achieve >80% compression, got {:.3}",
                            pattern,
                            ratio
                        );
                    }
                    "small_range" => {
                        assert!(
                            ratio < 0.4,
                            "Pattern '{}' should achieve >60% compression, got {:.3}",
                            pattern,
                            ratio
                        );
                    }
                    _ => {
                        // Other patterns should still provide some compression
                        assert!(ratio <= 1.0, "Should not expand data");
                    }
                }

                // Validate performance targets
                assert!(
                    memory_usage < original_size,
                    "Should use less memory than original"
                );

                // Verify correctness
                for (i, &expected) in data.iter().enumerate() {
                    assert_eq!(
                        compressed.get(i),
                        Some(expected),
                        "Mismatch at index {} for pattern '{}'",
                        i,
                        pattern
                    );
                }
            }
        }
    }

    #[test]
    fn test_random_access_performance() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }
        let size = 100000;
        let data = PerfDataGen::small_range(size);
        let compressed = IntVec::<u32>::from_slice(&data).unwrap();

        // Generate random indices
        let indices: Vec<usize> = (0..10000).map(|i| (i * 97) % size).collect();

        println!("\n=== Testing random access performance ===");
        println!("Dataset size: {} elements", size);
        println!("Number of accesses: {}", indices.len());

        let start = Instant::now();
        for &index in &indices {
            let _value = compressed.get(index);
        }
        let access_time = start.elapsed();

        let access_per_sec = indices.len() as f64 / access_time.as_secs_f64();

        println!("Total time: {:?}", access_time);
        println!("Access rate: {:.0} accesses/sec", access_per_sec);
        println!(
            "Average access time: {:.1} ns",
            access_time.as_nanos() as f64 / indices.len() as f64
        );

        // Validate performance - should be very fast (millions of accesses per second)
        assert!(
            access_per_sec > 1_000_000.0,
            "Random access should exceed 1M accesses/sec, got {:.0}",
            access_per_sec
        );
    }

    #[test]
    fn test_sequential_access_performance() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }
        let size = 100000;
        let data = PerfDataGen::small_range(size);
        let compressed = IntVec::<u32>::from_slice(&data).unwrap();

        println!("\n=== Testing sequential access performance ===");
        println!("Dataset size: {} elements", size);

        let start = Instant::now();
        for i in 0..size {
            let _value = compressed.get(i);
        }
        let access_time = start.elapsed();

        let throughput = size as f64 / access_time.as_secs_f64();

        println!("Total time: {:?}", access_time);
        println!("Throughput: {:.0} accesses/sec", throughput);
        println!(
            "Average access time: {:.1} ns",
            access_time.as_nanos() as f64 / size as f64
        );

        // Sequential access should be even faster than random access
        assert!(
            throughput > 2_000_000.0,
            "Sequential access should exceed 2M accesses/sec, got {:.0}",
            throughput
        );
    }

    #[test]
    fn test_construction_performance() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }

        let sizes = vec![10000, 100000, 1000000];

        println!("\n=== Testing construction performance ===");

        for size in sizes {
            let data = PerfDataGen::small_range(size);
            let data_size_mb = (data.len() * 4) as f64 / 1_048_576.0;

            let start = Instant::now();
            let compressed = IntVec::<u32>::from_slice(&data).unwrap();
            let construction_time = start.elapsed();

            let throughput_mb_s = data_size_mb / construction_time.as_secs_f64();

            println!("Size: {} elements ({:.1} MB)", size, data_size_mb);
            println!("  Construction time: {:?}", construction_time);
            println!("  Throughput: {:.1} MB/s", throughput_mb_s);
            println!("  Compression ratio: {:.3}", compressed.compression_ratio());

            // Construction throughput: strategy analysis + bit-packing compression.
            // Small datasets are dominated by per-element analysis overhead.
            // Thresholds are conservative minimums for loaded machines.
            let expected_throughput = if data_size_mb < 0.1 {
                15.0 // Small datasets (40KB): analysis overhead dominates
            } else if data_size_mb < 1.0 {
                30.0 // Medium datasets: better amortization
            } else {
                50.0 // Large datasets: compression throughput dominates
            };

            assert!(
                throughput_mb_s > expected_throughput,
                "Construction should exceed {:.0} MB/s for {:.1} MB dataset, got {:.1}",
                expected_throughput,
                data_size_mb,
                throughput_mb_s
            );
        }
    }

    #[test]
    fn test_integer_type_performance() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }
        let size = 50000;

        println!("\n=== Testing performance across integer types ===");

        // Use small ranges for all types to create fair compression test conditions
        // u8: 0-99 (needs 7 bits instead of 8)
        // u32: 0-999 (needs 10 bits instead of 32)
        // u64: 0-999 (needs 10 bits instead of 64)

        // Test u8 - use small range for fair comparison
        let u8_data: Vec<u8> = (0..size).map(|i| (i % 100) as u8).collect();
        let start = Instant::now();
        let u8_compressed = IntVec::<u8>::from_slice(&u8_data).unwrap();
        let u8_time = start.elapsed();

        // Test u32
        let u32_data: Vec<u32> = (0..size).map(|i| (i % 1000) as u32).collect();
        let start = Instant::now();
        let u32_compressed = IntVec::<u32>::from_slice(&u32_data).unwrap();
        let u32_time = start.elapsed();

        // Test u64
        let u64_data: Vec<u64> = (0..size).map(|i| (i % 1000) as u64).collect();
        let start = Instant::now();
        let u64_compressed = IntVec::<u64>::from_slice(&u64_data).unwrap();
        let u64_time = start.elapsed();

        println!(
            "u8:  time={:?}, ratio={:.3}, memory={} bytes",
            u8_time,
            u8_compressed.compression_ratio(),
            u8_compressed.memory_usage()
        );
        println!(
            "u32: time={:?}, ratio={:.3}, memory={} bytes",
            u32_time,
            u32_compressed.compression_ratio(),
            u32_compressed.memory_usage()
        );
        println!(
            "u64: time={:?}, ratio={:.3}, memory={} bytes",
            u64_time,
            u64_compressed.compression_ratio(),
            u64_compressed.memory_usage()
        );

        // Compression expectations based on type constraints:
        // u8 with range 0-99: needs 7 bits vs 8 bits = ~12.5% theoretical savings, but overhead limits actual compression
        // u32 with range 0-999: needs 10 bits vs 32 bits = ~69% theoretical savings
        // u64 with range 0-999: needs 10 bits vs 64 bits = ~84% theoretical savings
        assert!(
            u8_compressed.compression_ratio() < 0.9,
            "u8 should achieve some compression, got {:.3}",
            u8_compressed.compression_ratio()
        );
        assert!(
            u32_compressed.compression_ratio() < 0.5,
            "u32 should achieve good compression, got {:.3}",
            u32_compressed.compression_ratio()
        );
        assert!(
            u64_compressed.compression_ratio() < 0.5,
            "u64 should achieve good compression, got {:.3}",
            u64_compressed.compression_ratio()
        );

        // Verify correctness
        for i in 0..1000 {
            assert_eq!(u8_compressed.get(i), Some(u8_data[i]));
            assert_eq!(u32_compressed.get(i), Some(u32_data[i]));
            assert_eq!(u64_compressed.get(i), Some(u64_data[i]));
        }
    }

    #[test]
    fn test_memory_efficiency() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }
        let size = 100000;
        let data = PerfDataGen::small_range(size);
        let original_size = data.len() * 4;

        let compressed = IntVec::<u32>::from_slice(&data).unwrap();
        let memory_usage = compressed.memory_usage();
        let compression_ratio = compressed.compression_ratio();

        println!("\n=== Memory efficiency analysis ===");
        println!(
            "Original size: {} bytes ({:.1} MB)",
            original_size,
            original_size as f64 / 1_048_576.0
        );
        println!(
            "Memory usage: {} bytes ({:.1} MB)",
            memory_usage,
            memory_usage as f64 / 1_048_576.0
        );
        println!("Compression ratio: {:.3}", compression_ratio);
        println!("Space savings: {:.1}%", (1.0 - compression_ratio) * 100.0);

        // Validate memory efficiency targets
        assert!(
            memory_usage < original_size,
            "Should use less memory than original"
        );
        assert!(
            compression_ratio < 0.5,
            "Should achieve >50% compression for this pattern"
        );

        // Memory usage should be close to compressed size (minimal overhead)
        let stats = compressed.stats();
        let overhead = memory_usage as f64 / stats.compressed_size as f64;
        println!("Memory overhead factor: {:.2}x", overhead);
        assert!(
            overhead < 2.0,
            "Memory overhead should be reasonable, got {:.2}x",
            overhead
        );
    }

    #[test]
    fn test_stress_large_dataset() {
        if !is_release_build() {
            println!("Skipping performance test in debug mode");
            return;
        }
        let size = 1_000_000; // 1M elements
        let data = PerfDataGen::small_range(size);
        let original_size_mb = (data.len() * 4) as f64 / 1_048_576.0;

        println!("\n=== Stress test with large dataset ===");
        println!(
            "Dataset size: {} elements ({:.1} MB)",
            size, original_size_mb
        );

        let start = Instant::now();
        let compressed = IntVec::<u32>::from_slice(&data).unwrap();
        let construction_time = start.elapsed();

        let ratio = compressed.compression_ratio();
        let memory_mb = compressed.memory_usage() as f64 / 1_048_576.0;

        println!("Construction time: {:?}", construction_time);
        println!("Compression ratio: {:.3}", ratio);
        println!("Memory usage: {:.1} MB", memory_mb);
        println!(
            "Throughput: {:.1} MB/s",
            original_size_mb / construction_time.as_secs_f64()
        );

        // Test random access on large dataset
        let test_indices: Vec<usize> = (0..10000).map(|i| (i * 997) % size).collect();
        let start = Instant::now();
        for &idx in &test_indices {
            let _value = compressed.get(idx);
        }
        let access_time = start.elapsed();

        println!("Random access time (10K accesses): {:?}", access_time);
        println!(
            "Random access rate: {:.0} accesses/sec",
            test_indices.len() as f64 / access_time.as_secs_f64()
        );

        // Validate large dataset performance
        assert!(
            ratio < 0.5,
            "Should maintain good compression for large datasets"
        );
        assert!(
            memory_mb < original_size_mb,
            "Should use less memory than original"
        );

        // Verify correctness on sample
        for i in (0..size).step_by(1000) {
            assert_eq!(compressed.get(i), Some(data[i]), "Mismatch at index {}", i);
        }
    }
}