blvm-consensus 0.1.10

Bitcoin Commons BLVM: Direct mathematical implementation of Bitcoin consensus rules from the Orange Paper
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
//! BLVM Runtime Optimization Passes
//!
//! Additional optimization passes for 10-30% performance gains
//!
//! This module provides runtime optimization passes:
//! - Constant folding (pre-computed constants)
//! - Bounds check optimization (proven bounds)
//! - Inlining hints (hot function markers)
//! - Memory layout optimization (cache-friendly structures)
//!
//! Reference: Orange Paper Section 13.1 - Performance Considerations

use crate::constants::*;

/// Pre-computed constants for constant folding optimization
///
/// These constants are computed at compile time to avoid runtime computation
/// in hot paths. Reference: BLVM Optimization Pass 2 - Constant Folding
#[cfg(feature = "production")]
pub mod precomputed_constants {
    use super::*;

    /// Pre-computed: 2^64 - 1 (used for wrapping arithmetic checks)
    pub const U64_MAX: u64 = u64::MAX;

    /// Pre-computed: MAX_MONEY as u64 (for comparisons)
    pub const MAX_MONEY_U64: u64 = MAX_MONEY as u64;

    /// Pre-computed: Inverse of SATOSHIS_PER_BTC (for BTC conversion)
    pub const BTC_PER_SATOSHI: f64 = 1.0 / (SATOSHIS_PER_BTC as f64);

    /// Pre-computed: 2^32 - 1 (for 32-bit wrapping checks)
    pub const U32_MAX: u32 = u32::MAX;

    /// Pre-computed: Number of satoshis in 1 BTC (for readability)
    pub const ONE_BTC_SATOSHIS: i64 = SATOSHIS_PER_BTC;
}

/// Memory layout optimization: Cache-friendly hash array
///
/// Optimizes hash array access for cache locality.
/// Uses 32-byte aligned structures for better cache performance.
///
/// This structure ensures each hash is aligned to a 32-byte boundary, which:
/// - Reduces cache line splits
/// - Improves prefetching behavior
/// - Better fits modern CPU cache architectures (64-byte cache lines)
///
/// Reference: BLVM Optimization Pass 3 - Memory Layout Optimization
/// Cache-aligned hash for optimized batch operations
#[repr(align(32))]
#[derive(Clone)]
pub struct CacheAlignedHash([u8; 32]);

impl CacheAlignedHash {
    #[inline]
    pub fn new(hash: [u8; 32]) -> Self {
        Self(hash)
    }

    #[inline]
    pub fn as_bytes(&self) -> &[u8; 32] {
        &self.0
    }
}

/// Memory prefetching optimization
///
/// Provides platform-specific prefetch hints to improve cache performance
/// for sequential memory accesses. Used before batch UTXO lookups and
/// other sequential data structure traversals.
///
/// Reference: BLVM Optimization Pass 1.3 - Memory Prefetching
#[cfg(feature = "production")]
pub mod prefetch {
    /// Prefetch data for read access
    ///
    /// Hints the CPU to prefetch data into cache before it's needed.
    /// This improves performance for sequential memory access patterns.
    ///
    /// # Safety
    /// The pointer must be valid, but it doesn't need to be dereferenceable
    /// at the time of the call. The prefetch is a hint and may be ignored.
    #[cfg(target_arch = "x86_64")]
    #[inline(always)]
    pub unsafe fn prefetch_read(ptr: *const i8) {
        use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
        _mm_prefetch(ptr, _MM_HINT_T0);
    }

    #[cfg(target_arch = "aarch64")]
    #[inline(always)]
    pub unsafe fn prefetch_read(ptr: *const i8) {
        use std::arch::aarch64::_prefetch;
        _prefetch(ptr, 0, 0); // Read, temporal locality
    }

    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
    #[inline(always)]
    pub unsafe fn prefetch_read(_ptr: *const i8) {
        // No-op for unsupported architectures
    }

    /// Prefetch a slice of data for sequential access
    ///
    /// Prefetches the next cache line(s) of data to improve sequential access.
    /// Safe wrapper around prefetch_read that works with slices.
    #[inline(always)]
    pub fn prefetch_slice<T>(slice: &[T], index: usize) {
        if index < slice.len() {
            unsafe {
                let ptr = slice.as_ptr().add(index) as *const i8;
                prefetch_read(ptr);
            }
        }
    }

    /// Prefetch multiple elements ahead in a slice
    ///
    /// Prefetches elements at `index + offset` to prepare for future access.
    /// Useful for sequential loops where you know you'll access elements ahead.
    #[inline(always)]
    pub fn prefetch_ahead<T>(slice: &[T], index: usize, offset: usize) {
        let prefetch_index = index.saturating_add(offset);
        prefetch_slice(slice, prefetch_index);
    }
}

/// Memory layout optimization: Compact stack frame
///
/// Compact stack frame for script execution optimization
/// Optimized stack frame structure for cache locality.
#[repr(C, packed)]
pub struct CompactStackFrame {
    pub opcode: u8,
    pub flags: u32,
    pub script_offset: u16,
    pub stack_height: u16,
}

impl CompactStackFrame {
    #[inline]
    pub fn new(opcode: u8, flags: u32, script_offset: u16, stack_height: u16) -> Self {
        Self {
            opcode,
            flags,
            script_offset,
            stack_height,
        }
    }
}

/// Inlining hints for hot functions
///
/// Functions marked with HOT_INLINE should be aggressively inlined.
/// These are called in tight loops and benefit from inlining.
#[macro_export]
#[cfg(feature = "production")]
macro_rules! hot_inline {
    () => {
        #[inline(always)]
    };
}

/// Constant folding: Pre-compute common hash results
///
/// Caches common hash pre-images for constant folding.
#[cfg(feature = "production")]
pub mod constant_folding {
    /// Pre-computed: SHA256 of empty string
    pub const EMPTY_STRING_HASH: [u8; 32] = [
        0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9,
        0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52,
        0xb8, 0x55,
    ];

    /// Pre-computed: Double SHA256 of empty string
    pub const EMPTY_STRING_DOUBLE_HASH: [u8; 32] = [
        0x5d, 0xf6, 0xe0, 0xe2, 0x76, 0x13, 0x59, 0xf3, 0x73, 0x9a, 0x1c, 0x6f, 0x87, 0x40, 0x64,
        0x0a, 0xf1, 0x2e, 0xc7, 0xc3, 0x72, 0x4a, 0x5c, 0x2c, 0xa5, 0xf3, 0x0f, 0x26, 0x60, 0x87,
        0x7e, 0x6b,
    ];

    /// Check if input matches empty string hash (constant folding)
    #[inline(always)]
    pub fn is_empty_hash(hash: &[u8; 32]) -> bool {
        *hash == EMPTY_STRING_HASH
    }

    /// Check if input matches empty string double hash (constant folding)
    #[inline(always)]
    pub fn is_empty_double_hash(hash: &[u8; 32]) -> bool {
        *hash == EMPTY_STRING_DOUBLE_HASH
    }

    /// Constant-fold: Check if hash is zero (all zeros)
    #[inline(always)]
    pub fn is_zero_hash(hash: &[u8; 32]) -> bool {
        hash.iter().all(|&b| b == 0)
    }
}

/// Dead code elimination markers
///
/// Functions/constants marked with this can be eliminated if unused.
#[cfg(feature = "production")]
#[allow(dead_code)]
pub mod dead_code_elimination {
    /// Mark code for dead code elimination analysis
    /// This is a marker function - the compiler can eliminate unused paths
    #[inline(never)]
    #[cold]
    pub fn mark_unused() {
        // This function never executes in production builds
        // It's a marker for dead code elimination pass
    }

    /// Hint to compiler that branch is unlikely (dead code elimination)
    ///
    /// Note: In stable Rust, this is a no-op but serves as documentation
    /// for future optimization opportunities (unstable `likely`/`unlikely` intrinsics).
    #[inline(always)]
    pub fn unlikely(condition: bool) -> bool {
        // Stable Rust doesn't have likely/unlikely intrinsics
        // This is a placeholder for future optimization
        condition
    }
}

/// SIMD Vectorization: Batch hash operations
///
/// Provides batch hash processing for parallel hash operations.
/// Leverages existing SIMD in sha2 crate (asm feature) + Rayon for CPU-core parallelization.
///
/// Provides batch functions for:
/// - SHA256 and double SHA256 (Bitcoin standard)
/// - RIPEMD160 and HASH160 (OP_HASH160)
///
/// Uses chunked processing for better cache locality and parallelizes across CPU cores
/// when batch size is large enough (≥8 items).
///
/// Reference: BLVM Optimization Pass 5 - SIMD Vectorization
#[cfg(feature = "production")]
pub mod simd_vectorization {
    use crate::crypto::OptimizedSha256;
    use digest::Digest;
    use ripemd::Ripemd160;

    /// Minimum batch size for parallelization (overhead not worth it for smaller batches).
    /// batch_sha256 uses OptimizedSha256 (SHA-NI when available) for consistency with batch_double_sha256_aligned.
    const PARALLEL_THRESHOLD: usize = 8;

    /// Chunk size for cache-friendly processing. Hardware-derived via ibd_tuning.
    #[inline]
    fn chunk_size() -> usize {
        blvm_primitives::ibd_tuning::hash_batch_chunk_size()
    }

    /// Batch SHA256: Compute SHA256 for multiple independent inputs
    ///
    /// # Arguments
    /// * `inputs` - Slice of byte slices to hash
    ///
    /// # Returns
    /// Vector of 32-byte hashes, one per input (in same order)
    ///
    /// # Performance
    /// - Small batches (< 4 items): Sequential (overhead not worth parallelization)
    /// - Medium batches (4-7 items): Chunked sequential
    /// - Large batches (≥8 items): Multi-core parallelization with Rayon
    ///
    /// # Optimizations
    /// - Uses sha2 crate with "asm" feature for optimized assembly
    /// - For large batches, leverages Rayon for multi-core parallelization
    /// - AVX2 batch optimization available via `crypto::avx2_batch` module
    pub fn batch_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
        if inputs.is_empty() {
            return Vec::new();
        }

        // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI when available).
        if inputs.len() < 4 {
            let hasher = OptimizedSha256::new();
            return inputs.iter().map(|input| hasher.hash(input)).collect();
        }

        // Medium batches: chunked sequential processing
        if inputs.len() < PARALLEL_THRESHOLD {
            let hasher = OptimizedSha256::new();
            let mut results = Vec::with_capacity(inputs.len());
            for chunk in inputs.chunks(chunk_size()) {
                for input in chunk {
                    results.push(hasher.hash(input));
                }
            }
            return results;
        }

        // Large batches: Try AVX2 first, then fallback to multi-core parallelization
        #[cfg(target_arch = "x86_64")]
        {
            use crate::crypto::sha256_avx2;
            if sha256_avx2::is_avx2_available() {
                // Use AVX2 batch processing for chunks of 8
                use crate::crypto::avx2_batch;
                return avx2_batch::batch_sha256_avx2(inputs);
            }
        }

        // Fallback: Multi-core parallelization using Rayon. Each worker gets OptimizedSha256.
        use rayon::prelude::*;

        inputs
            .par_chunks(chunk_size())
            .map(|chunk| {
                let hasher = OptimizedSha256::new();
                chunk
                    .iter()
                    .map(|input| hasher.hash(input))
                    .collect::<Vec<_>>()
            })
            .flatten()
            .collect()
    }

    /// Batch double SHA256: Compute SHA256(SHA256(x)) for multiple inputs
    ///
    /// This is Bitcoin's standard hash function used for transaction IDs, block hashes, etc.
    ///
    /// # Arguments
    /// * `inputs` - Slice of byte slices to hash
    ///
    /// # Returns
    /// Vector of 32-byte hashes, one per input (in same order)
    pub fn batch_double_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
        // Use aligned version for better cache performance
        batch_double_sha256_aligned(inputs)
            .into_iter()
            .map(|h| *h.as_bytes())
            .collect()
    }

    /// Batch double SHA256 with cache-aligned output
    ///
    /// Returns cache-aligned hash structures for better memory performance.
    /// Uses 32-byte alignment for optimal cache line utilization.
    ///
    /// # Arguments
    /// * `inputs` - Slice of byte slices to hash
    ///
    /// # Returns
    /// Vector of cache-aligned 32-byte hashes, one per input (in same order)
    pub fn batch_double_sha256_aligned(inputs: &[&[u8]]) -> Vec<super::CacheAlignedHash> {
        if inputs.is_empty() {
            return Vec::new();
        }

        // Small batches: sequential processing (overhead not worth it)
        // Use OptimizedSha256 (SHA-NI when available) instead of sha2
        let hasher = OptimizedSha256::new();
        if inputs.len() < 4 {
            return inputs
                .iter()
                .map(|input| super::CacheAlignedHash::new(hasher.hash256(input)))
                .collect();
        }

        // Medium batches: chunked sequential processing
        if inputs.len() < PARALLEL_THRESHOLD {
            let mut results = Vec::with_capacity(inputs.len());
            for chunk in inputs.chunks(chunk_size()) {
                for input in chunk {
                    results.push(super::CacheAlignedHash::new(hasher.hash256(input)));
                }
            }
            return results;
        }

        // Large batches: parallelized processing using Rayon
        // Each worker gets SHA-NI via OptimizedSha256 (runtime detection)
        use rayon::prelude::*;

        inputs
            .par_chunks(chunk_size())
            .map(|chunk| {
                let hasher = OptimizedSha256::new();
                chunk
                    .iter()
                    .map(|input| super::CacheAlignedHash::new(hasher.hash256(input)))
                    .collect::<Vec<_>>()
            })
            .flatten()
            .collect()
    }

    /// Batch RIPEMD160: Compute RIPEMD160 for multiple inputs
    ///
    /// # Arguments
    /// * `inputs` - Slice of byte slices to hash
    ///
    /// # Returns
    /// Vector of 20-byte hashes, one per input (in same order)
    pub fn batch_ripemd160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
        if inputs.is_empty() {
            return Vec::new();
        }

        // Small batches: sequential processing
        if inputs.len() < 4 {
            return inputs
                .iter()
                .map(|input| {
                    let hash = Ripemd160::digest(input);
                    let mut result = [0u8; 20];
                    result.copy_from_slice(&hash);
                    result
                })
                .collect();
        }

        // Medium batches: chunked sequential processing
        if inputs.len() < PARALLEL_THRESHOLD {
            let mut results = Vec::with_capacity(inputs.len());
            for chunk in inputs.chunks(chunk_size()) {
                for input in chunk {
                    let hash = Ripemd160::digest(input);
                    let mut result = [0u8; 20];
                    result.copy_from_slice(&hash);
                    results.push(result);
                }
            }
            return results;
        }

        // Large batches: parallelized processing
        // Rayon is enabled via the 'production' feature
        use rayon::prelude::*;

        inputs
            .par_chunks(chunk_size())
            .map(|chunk| {
                chunk
                    .iter()
                    .map(|input| {
                        let hash = Ripemd160::digest(input);
                        let mut result = [0u8; 20];
                        result.copy_from_slice(&hash);
                        result
                    })
                    .collect::<Vec<_>>()
            })
            .flatten()
            .collect()
    }

    /// Batch HASH160: Compute RIPEMD160(SHA256(x)) for multiple inputs
    ///
    /// This is Bitcoin's HASH160 operation (OP_HASH160 in script).
    ///
    /// # Arguments
    /// * `inputs` - Slice of byte slices to hash
    ///
    /// # Returns
    /// Vector of 20-byte hashes, one per input (in same order)
    pub fn batch_hash160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
        if inputs.is_empty() {
            return Vec::new();
        }

        // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI) for SHA256 part.
        if inputs.len() < 4 {
            let hasher = OptimizedSha256::new();
            return inputs
                .iter()
                .map(|input| {
                    let sha256_hash: [u8; 32] = hasher.hash(input);
                    let ripemd160_hash = Ripemd160::digest(sha256_hash);
                    let mut result = [0u8; 20];
                    result.copy_from_slice(&ripemd160_hash);
                    result
                })
                .collect();
        }

        // Medium batches: chunked sequential processing
        if inputs.len() < PARALLEL_THRESHOLD {
            let hasher = OptimizedSha256::new();
            let mut results = Vec::with_capacity(inputs.len());
            for chunk in inputs.chunks(chunk_size()) {
                for input in chunk {
                    let sha256_hash: [u8; 32] = hasher.hash(input);
                    let ripemd160_hash = Ripemd160::digest(sha256_hash);
                    let mut result = [0u8; 20];
                    result.copy_from_slice(&ripemd160_hash);
                    results.push(result);
                }
            }
            return results;
        }

        // Large batches: parallelized processing. Each worker gets OptimizedSha256.
        use rayon::prelude::*;

        inputs
            .par_chunks(chunk_size())
            .map(|chunk| {
                let hasher = OptimizedSha256::new();
                chunk
                    .iter()
                    .map(|input| {
                        let sha256_hash: [u8; 32] = hasher.hash(input);
                        let ripemd160_hash = Ripemd160::digest(sha256_hash);
                        let mut result = [0u8; 20];
                        result.copy_from_slice(&ripemd160_hash);
                        result
                    })
                    .collect::<Vec<_>>()
            })
            .flatten()
            .collect()
    }
}

#[cfg(feature = "production")]
pub use constant_folding::*;
#[cfg(feature = "production")]
pub use precomputed_constants::*;

/// Proven bounds for runtime optimization
///
/// These bounds are proven by formal verification and can be used
/// for runtime optimizations without additional safety checks.
///
/// Proven runtime bounds for BLVM optimizations
///
/// These bounds have been formally proven and are used for runtime optimizations.
/// Unlike proof-time limits (in `_helpers::proof_limits`), these represent actual
/// Bitcoin limits that have been proven to hold in all cases.
///
/// Reference: BLVM Optimization Pass
#[cfg(feature = "production")]
pub mod proven_bounds {
    use crate::constants::{MAX_INPUTS, MAX_OUTPUTS};

    /// Maximum transaction size (proven by formal verification in transaction.rs)
    pub const MAX_TX_SIZE_PROVEN: usize = 100000; // Bytes

    /// Maximum block size (proven by formal verification in block.rs)
    pub const MAX_BLOCK_SIZE_PROVEN: usize = 4000000; // Bytes (4MB)

    /// Maximum inputs per transaction (proven by formal verification)
    /// References actual Bitcoin limit from constants.rs
    pub const MAX_INPUTS_PROVEN: usize = MAX_INPUTS;

    /// Maximum outputs per transaction (proven by formal verification)
    /// References actual Bitcoin limit from constants.rs
    pub const MAX_OUTPUTS_PROVEN: usize = MAX_OUTPUTS;

    /// Maximum transactions per block (proven by formal verification)
    /// Note: Bitcoin limit is effectively unbounded by consensus rules, but practical limit
    /// is around 10,000 transactions per block based on block size limits.
    pub const MAX_TRANSACTIONS_PROVEN: usize = 10000;

    /// Maximum previous headers for difficulty adjustment (proven by formal verification)
    pub const MAX_PREV_HEADERS_PROVEN: usize = 5;
}

/// Optimized access using proven bounds
///
/// Uses bounds proven by formal verification to optimize runtime access.
/// This is safe because formal proofs guarantee these bounds hold.
///
/// Reference: Formal proofs in transaction.rs, block.rs, mining.rs, pow.rs, etc.
/// These proofs formally verify that certain bounds always hold, allowing us to
/// use optimized access patterns without runtime bounds checks.
#[cfg(feature = "production")]
pub mod optimized_access {
    use super::proven_bounds;

    /// Get element with proven bounds check
    ///
    /// Uses proven maximum sizes to optimize bounds checking.
    /// For transactions proven to have <= MAX_INPUTS_PROVEN inputs,
    /// we can use optimized access patterns.
    ///
    /// # Safety
    /// This function is safe because formal proofs guarantee bounds.
    /// However, it still returns `Option` to handle cases where:
    /// - Runtime bounds differ from proof bounds (should not happen in practice)
    /// - Defensive programming (fail-safe)
    ///
    /// # Panics
    /// Never panics - always returns `None` if out of bounds.
    ///
    /// # Examples
    /// ```rust
    /// use blvm_consensus::optimizations::optimized_access::get_proven;
    /// use blvm_consensus::types::Transaction;
    ///
    /// # let tx = Transaction { version: 1, inputs: vec![].into(), outputs: vec![].into(), lock_time: 0 };
    /// # let index = 0;
    /// if let Some(input) = get_proven(&tx.inputs, index) {
    ///     // Safe to use
    /// }
    /// ```
    #[inline(always)]
    pub fn get_proven<T>(slice: &[T], index: usize) -> Option<&T> {
        // Formal proofs have proven index < MAX_SIZE in various proofs
        // We can use unsafe access for proven-safe indices
        // This is safe because formal proofs guarantee bounds
        if index < slice.len() {
            unsafe { Some(slice.get_unchecked(index)) }
        } else {
            None
        }
    }

    /// Pre-allocate buffer using proven maximum size
    ///
    /// Uses proven maximum sizes to avoid reallocation.
    /// For example, transaction buffers can be pre-sized to MAX_TX_SIZE_PROVEN.
    #[inline(always)]
    pub fn prealloc_proven<T>(max_size: usize) -> Vec<T> {
        // Pre-allocate to proven maximum to avoid reallocation
        Vec::with_capacity(max_size)
    }

    /// Pre-allocate transaction buffer using proven maximum
    #[inline(always)]
    pub fn prealloc_tx_buffer() -> Vec<u8> {
        prealloc_proven::<u8>(proven_bounds::MAX_TX_SIZE_PROVEN)
    }

    /// Pre-allocate block buffer using proven maximum
    #[inline(always)]
    pub fn prealloc_block_buffer() -> Vec<u8> {
        prealloc_proven::<u8>(proven_bounds::MAX_BLOCK_SIZE_PROVEN)
    }

    /// Get element with proven bounds (alias for get_proven for compatibility)
    #[inline(always)]
    pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
        get_proven(slice, index)
    }
}

/// Alias module for _optimized_access (for backward compatibility)
#[cfg(feature = "production")]
pub mod _optimized_access {
    use super::optimized_access;

    /// Get element with proven bounds
    #[inline(always)]
    pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
        optimized_access::get_proven(slice, index)
    }
}

/// Re-export prealloc helpers for convenience
#[cfg(feature = "production")]
pub use optimized_access::{prealloc_block_buffer, prealloc_tx_buffer};

/// Reference implementations for equivalence proofs
///
/// These are safe versions of optimized functions, used to prove
/// that optimizations are correct via formal verification.
#[cfg(feature = "production")]
pub mod reference_implementations {
    /// Reference (safe) implementation of get_proven
    /// This is the version we prove equivalence against
    #[inline(always)]
    pub fn get_proven_reference<T>(slice: &[T], index: usize) -> Option<&T> {
        slice.get(index) // Safe version
    }
}

/// Runtime assertions for optimization correctness
///
/// These functions provide runtime checks in debug builds to verify
/// that optimizations match their reference implementations.
#[cfg(all(
    feature = "production",
    any(debug_assertions, feature = "runtime-invariants")
))]
pub mod runtime_assertions {
    use super::optimized_access::get_proven;
    use super::reference_implementations::get_proven_reference;

    /// Checked version of get_proven with runtime assertions
    ///
    /// This function performs runtime checks in debug builds to ensure
    /// the optimized implementation matches the reference implementation.
    #[inline(always)]
    pub fn get_proven_checked<T>(slice: &[T], index: usize) -> Option<&T> {
        let result_optimized = get_proven(slice, index);
        let result_reference = get_proven_reference(slice, index);

        // Runtime check: both must agree
        debug_assert_eq!(
            result_optimized.is_some(),
            result_reference.is_some(),
            "Optimization correctness check failed: optimized and reference disagree on Some/None"
        );

        if let (Some(opt_val), Some(ref_val)) = (result_optimized, result_reference) {
            debug_assert_eq!(
                opt_val as *const T,
                ref_val as *const T,
                "Optimization correctness check failed: optimized and reference return different pointers"
            );
        }

        result_optimized
    }
}