Skip to main content

blvm_consensus/
optimizations.rs

1//! BLVM Runtime Optimization Passes
2//!
3//! Additional optimization passes for 10-30% performance gains
4//!
5//! This module provides runtime optimization passes:
6//! - Constant folding (pre-computed constants)
7//! - Bounds check optimization (proven bounds)
8//! - Inlining hints (hot function markers)
9//! - Memory layout optimization (cache-friendly structures)
10//!
11//! Reference: Orange Paper Section 13.1 - Performance Considerations
12
13use crate::constants::*;
14
15/// Pre-computed constants for constant folding optimization
16///
17/// These constants are computed at compile time to avoid runtime computation
18/// in hot paths. Reference: BLVM Optimization Pass 2 - Constant Folding
19#[cfg(feature = "production")]
20pub mod precomputed_constants {
21    use super::*;
22
23    /// Pre-computed: 2^64 - 1 (used for wrapping arithmetic checks)
24    pub const U64_MAX: u64 = u64::MAX;
25
26    /// Pre-computed: MAX_MONEY as u64 (for comparisons)
27    pub const MAX_MONEY_U64: u64 = MAX_MONEY as u64;
28
29    /// Pre-computed: Inverse of SATOSHIS_PER_BTC (for BTC conversion)
30    pub const BTC_PER_SATOSHI: f64 = 1.0 / (SATOSHIS_PER_BTC as f64);
31
32    /// Pre-computed: 2^32 - 1 (for 32-bit wrapping checks)
33    pub const U32_MAX: u32 = u32::MAX;
34
35    /// Pre-computed: Number of satoshis in 1 BTC (for readability)
36    pub const ONE_BTC_SATOSHIS: i64 = SATOSHIS_PER_BTC;
37}
38
39/// Memory layout optimization: Cache-friendly hash array
40///
41/// Optimizes hash array access for cache locality.
42/// Uses 32-byte aligned structures for better cache performance.
43///
44/// This structure ensures each hash is aligned to a 32-byte boundary, which:
45/// - Reduces cache line splits
46/// - Improves prefetching behavior
47/// - Better fits modern CPU cache architectures (64-byte cache lines)
48///
49/// Reference: BLVM Optimization Pass 3 - Memory Layout Optimization
50/// Cache-aligned hash for optimized batch operations
51#[repr(align(32))]
52#[derive(Clone)]
53pub struct CacheAlignedHash([u8; 32]);
54
55impl CacheAlignedHash {
56    #[inline]
57    pub fn new(hash: [u8; 32]) -> Self {
58        Self(hash)
59    }
60
61    #[inline]
62    pub fn as_bytes(&self) -> &[u8; 32] {
63        &self.0
64    }
65}
66
67/// Memory prefetching optimization
68///
69/// Provides platform-specific prefetch hints to improve cache performance
70/// for sequential memory accesses. Used before batch UTXO lookups and
71/// other sequential data structure traversals.
72///
73/// Reference: BLVM Optimization Pass 1.3 - Memory Prefetching
74#[cfg(feature = "production")]
75pub mod prefetch {
76    /// Prefetch data for read access
77    ///
78    /// Hints the CPU to prefetch data into cache before it's needed.
79    /// This improves performance for sequential memory access patterns.
80    ///
81    /// # Safety
82    /// The pointer must be valid, but it doesn't need to be dereferenceable
83    /// at the time of the call. The prefetch is a hint and may be ignored.
84    #[cfg(target_arch = "x86_64")]
85    #[inline(always)]
86    pub unsafe fn prefetch_read(ptr: *const i8) {
87        use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
88        _mm_prefetch(ptr, _MM_HINT_T0);
89    }
90
91    #[cfg(target_arch = "aarch64")]
92    #[inline(always)]
93    pub unsafe fn prefetch_read(ptr: *const i8) {
94        use std::arch::aarch64::_prefetch;
95        _prefetch(ptr, 0, 0); // Read, temporal locality
96    }
97
98    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
99    #[inline(always)]
100    pub unsafe fn prefetch_read(_ptr: *const i8) {
101        // No-op for unsupported architectures
102    }
103
104    /// Prefetch a slice of data for sequential access
105    ///
106    /// Prefetches the next cache line(s) of data to improve sequential access.
107    /// Safe wrapper around prefetch_read that works with slices.
108    #[inline(always)]
109    pub fn prefetch_slice<T>(slice: &[T], index: usize) {
110        if index < slice.len() {
111            unsafe {
112                let ptr = slice.as_ptr().add(index) as *const i8;
113                prefetch_read(ptr);
114            }
115        }
116    }
117
118    /// Prefetch multiple elements ahead in a slice
119    ///
120    /// Prefetches elements at `index + offset` to prepare for future access.
121    /// Useful for sequential loops where you know you'll access elements ahead.
122    #[inline(always)]
123    pub fn prefetch_ahead<T>(slice: &[T], index: usize, offset: usize) {
124        let prefetch_index = index.saturating_add(offset);
125        prefetch_slice(slice, prefetch_index);
126    }
127}
128
129/// Memory layout optimization: Compact stack frame
130///
131/// Compact stack frame for script execution optimization
132/// Optimized stack frame structure for cache locality.
133#[repr(C, packed)]
134pub struct CompactStackFrame {
135    pub opcode: u8,
136    pub flags: u32,
137    pub script_offset: u16,
138    pub stack_height: u16,
139}
140
141impl CompactStackFrame {
142    #[inline]
143    pub fn new(opcode: u8, flags: u32, script_offset: u16, stack_height: u16) -> Self {
144        Self {
145            opcode,
146            flags,
147            script_offset,
148            stack_height,
149        }
150    }
151}
152
153/// Inlining hints for hot functions
154///
155/// Functions marked with HOT_INLINE should be aggressively inlined.
156/// These are called in tight loops and benefit from inlining.
157#[macro_export]
158#[cfg(feature = "production")]
159macro_rules! hot_inline {
160    () => {
161        #[inline(always)]
162    };
163}
164
165/// Constant folding: Pre-compute common hash results
166///
167/// Caches common hash pre-images for constant folding.
168#[cfg(feature = "production")]
169pub mod constant_folding {
170    /// Pre-computed: SHA256 of empty string
171    pub const EMPTY_STRING_HASH: [u8; 32] = [
172        0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9,
173        0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52,
174        0xb8, 0x55,
175    ];
176
177    /// Pre-computed: Double SHA256 of empty string
178    pub const EMPTY_STRING_DOUBLE_HASH: [u8; 32] = [
179        0x5d, 0xf6, 0xe0, 0xe2, 0x76, 0x13, 0x59, 0xf3, 0x73, 0x9a, 0x1c, 0x6f, 0x87, 0x40, 0x64,
180        0x0a, 0xf1, 0x2e, 0xc7, 0xc3, 0x72, 0x4a, 0x5c, 0x2c, 0xa5, 0xf3, 0x0f, 0x26, 0x60, 0x87,
181        0x7e, 0x6b,
182    ];
183
184    /// Check if input matches empty string hash (constant folding)
185    #[inline(always)]
186    pub fn is_empty_hash(hash: &[u8; 32]) -> bool {
187        *hash == EMPTY_STRING_HASH
188    }
189
190    /// Check if input matches empty string double hash (constant folding)
191    #[inline(always)]
192    pub fn is_empty_double_hash(hash: &[u8; 32]) -> bool {
193        *hash == EMPTY_STRING_DOUBLE_HASH
194    }
195
196    /// Constant-fold: Check if hash is zero (all zeros)
197    #[inline(always)]
198    pub fn is_zero_hash(hash: &[u8; 32]) -> bool {
199        hash.iter().all(|&b| b == 0)
200    }
201}
202
203/// Dead code elimination markers
204///
205/// Functions/constants marked with this can be eliminated if unused.
206#[cfg(feature = "production")]
207#[allow(dead_code)]
208pub mod dead_code_elimination {
209    /// Mark code for dead code elimination analysis
210    /// This is a marker function - the compiler can eliminate unused paths
211    #[inline(never)]
212    #[cold]
213    pub fn mark_unused() {
214        // This function never executes in production builds
215        // It's a marker for dead code elimination pass
216    }
217
218    /// Hint to compiler that branch is unlikely (dead code elimination)
219    ///
220    /// Note: In stable Rust, this is a no-op but serves as documentation
221    /// for future optimization opportunities (unstable `likely`/`unlikely` intrinsics).
222    #[inline(always)]
223    pub fn unlikely(condition: bool) -> bool {
224        // Stable Rust doesn't have likely/unlikely intrinsics
225        // This is a placeholder for future optimization
226        condition
227    }
228}
229
230/// SIMD Vectorization: Batch hash operations
231///
232/// Provides batch hash processing for parallel hash operations.
233/// Leverages existing SIMD in sha2 crate (asm feature) + Rayon for CPU-core parallelization.
234///
235/// Provides batch functions for:
236/// - SHA256 and double SHA256 (Bitcoin standard)
237/// - RIPEMD160 and HASH160 (OP_HASH160)
238///
239/// Uses chunked processing for better cache locality and parallelizes across CPU cores
240/// when batch size is large enough (≥8 items).
241///
242/// Reference: BLVM Optimization Pass 5 - SIMD Vectorization
243#[cfg(feature = "production")]
244pub mod simd_vectorization {
245    use crate::crypto::OptimizedSha256;
246    use digest::Digest;
247    use ripemd::Ripemd160;
248
249    /// Minimum batch size for parallelization (overhead not worth it for smaller batches).
250    /// batch_sha256 uses OptimizedSha256 (SHA-NI when available) for consistency with batch_double_sha256_aligned.
251    const PARALLEL_THRESHOLD: usize = 8;
252
253    /// Chunk size for cache-friendly processing. Hardware-derived via ibd_tuning.
254    #[inline]
255    fn chunk_size() -> usize {
256        blvm_primitives::ibd_tuning::hash_batch_chunk_size()
257    }
258
259    /// Batch SHA256: Compute SHA256 for multiple independent inputs
260    ///
261    /// # Arguments
262    /// * `inputs` - Slice of byte slices to hash
263    ///
264    /// # Returns
265    /// Vector of 32-byte hashes, one per input (in same order)
266    ///
267    /// # Performance
268    /// - Small batches (< 4 items): Sequential (overhead not worth parallelization)
269    /// - Medium batches (4-7 items): Chunked sequential
270    /// - Large batches (≥8 items): Multi-core parallelization with Rayon
271    ///
272    /// # Optimizations
273    /// - Uses sha2 crate with "asm" feature for optimized assembly
274    /// - For large batches, leverages Rayon for multi-core parallelization
275    /// - AVX2 batch optimization available via `crypto::avx2_batch` module
276    pub fn batch_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
277        if inputs.is_empty() {
278            return Vec::new();
279        }
280
281        // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI when available).
282        if inputs.len() < 4 {
283            let hasher = OptimizedSha256::new();
284            return inputs.iter().map(|input| hasher.hash(input)).collect();
285        }
286
287        // Medium batches: chunked sequential processing
288        if inputs.len() < PARALLEL_THRESHOLD {
289            let hasher = OptimizedSha256::new();
290            let mut results = Vec::with_capacity(inputs.len());
291            for chunk in inputs.chunks(chunk_size()) {
292                for input in chunk {
293                    results.push(hasher.hash(input));
294                }
295            }
296            return results;
297        }
298
299        // Large batches: Try AVX2 first, then fallback to multi-core parallelization
300        #[cfg(target_arch = "x86_64")]
301        {
302            use crate::crypto::sha256_avx2;
303            if sha256_avx2::is_avx2_available() {
304                // Use AVX2 batch processing for chunks of 8
305                use crate::crypto::avx2_batch;
306                return avx2_batch::batch_sha256_avx2(inputs);
307            }
308        }
309
310        // Fallback: Multi-core parallelization using Rayon. Each worker gets OptimizedSha256.
311        use rayon::prelude::*;
312
313        inputs
314            .par_chunks(chunk_size())
315            .map(|chunk| {
316                let hasher = OptimizedSha256::new();
317                chunk
318                    .iter()
319                    .map(|input| hasher.hash(input))
320                    .collect::<Vec<_>>()
321            })
322            .flatten()
323            .collect()
324    }
325
326    /// Batch double SHA256: Compute SHA256(SHA256(x)) for multiple inputs
327    ///
328    /// This is Bitcoin's standard hash function used for transaction IDs, block hashes, etc.
329    ///
330    /// # Arguments
331    /// * `inputs` - Slice of byte slices to hash
332    ///
333    /// # Returns
334    /// Vector of 32-byte hashes, one per input (in same order)
335    pub fn batch_double_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
336        // Use aligned version for better cache performance
337        batch_double_sha256_aligned(inputs)
338            .into_iter()
339            .map(|h| *h.as_bytes())
340            .collect()
341    }
342
343    /// Batch double SHA256 with cache-aligned output
344    ///
345    /// Returns cache-aligned hash structures for better memory performance.
346    /// Uses 32-byte alignment for optimal cache line utilization.
347    ///
348    /// # Arguments
349    /// * `inputs` - Slice of byte slices to hash
350    ///
351    /// # Returns
352    /// Vector of cache-aligned 32-byte hashes, one per input (in same order)
353    pub fn batch_double_sha256_aligned(inputs: &[&[u8]]) -> Vec<super::CacheAlignedHash> {
354        if inputs.is_empty() {
355            return Vec::new();
356        }
357
358        // Small batches: sequential processing (overhead not worth it)
359        // Use OptimizedSha256 (SHA-NI when available) instead of sha2
360        let hasher = OptimizedSha256::new();
361        if inputs.len() < 4 {
362            return inputs
363                .iter()
364                .map(|input| super::CacheAlignedHash::new(hasher.hash256(input)))
365                .collect();
366        }
367
368        // Medium batches: chunked sequential processing
369        if inputs.len() < PARALLEL_THRESHOLD {
370            let mut results = Vec::with_capacity(inputs.len());
371            for chunk in inputs.chunks(chunk_size()) {
372                for input in chunk {
373                    results.push(super::CacheAlignedHash::new(hasher.hash256(input)));
374                }
375            }
376            return results;
377        }
378
379        // Large batches: parallelized processing using Rayon
380        // Each worker gets SHA-NI via OptimizedSha256 (runtime detection)
381        use rayon::prelude::*;
382
383        inputs
384            .par_chunks(chunk_size())
385            .map(|chunk| {
386                let hasher = OptimizedSha256::new();
387                chunk
388                    .iter()
389                    .map(|input| super::CacheAlignedHash::new(hasher.hash256(input)))
390                    .collect::<Vec<_>>()
391            })
392            .flatten()
393            .collect()
394    }
395
396    /// Batch RIPEMD160: Compute RIPEMD160 for multiple inputs
397    ///
398    /// # Arguments
399    /// * `inputs` - Slice of byte slices to hash
400    ///
401    /// # Returns
402    /// Vector of 20-byte hashes, one per input (in same order)
403    pub fn batch_ripemd160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
404        if inputs.is_empty() {
405            return Vec::new();
406        }
407
408        // Small batches: sequential processing
409        if inputs.len() < 4 {
410            return inputs
411                .iter()
412                .map(|input| {
413                    let hash = Ripemd160::digest(input);
414                    let mut result = [0u8; 20];
415                    result.copy_from_slice(&hash);
416                    result
417                })
418                .collect();
419        }
420
421        // Medium batches: chunked sequential processing
422        if inputs.len() < PARALLEL_THRESHOLD {
423            let mut results = Vec::with_capacity(inputs.len());
424            for chunk in inputs.chunks(chunk_size()) {
425                for input in chunk {
426                    let hash = Ripemd160::digest(input);
427                    let mut result = [0u8; 20];
428                    result.copy_from_slice(&hash);
429                    results.push(result);
430                }
431            }
432            return results;
433        }
434
435        // Large batches: parallelized processing
436        // Rayon is enabled via the 'production' feature
437        use rayon::prelude::*;
438
439        inputs
440            .par_chunks(chunk_size())
441            .map(|chunk| {
442                chunk
443                    .iter()
444                    .map(|input| {
445                        let hash = Ripemd160::digest(input);
446                        let mut result = [0u8; 20];
447                        result.copy_from_slice(&hash);
448                        result
449                    })
450                    .collect::<Vec<_>>()
451            })
452            .flatten()
453            .collect()
454    }
455
456    /// Batch HASH160: Compute RIPEMD160(SHA256(x)) for multiple inputs
457    ///
458    /// This is Bitcoin's HASH160 operation (OP_HASH160 in script).
459    ///
460    /// # Arguments
461    /// * `inputs` - Slice of byte slices to hash
462    ///
463    /// # Returns
464    /// Vector of 20-byte hashes, one per input (in same order)
465    pub fn batch_hash160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
466        if inputs.is_empty() {
467            return Vec::new();
468        }
469
470        // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI) for SHA256 part.
471        if inputs.len() < 4 {
472            let hasher = OptimizedSha256::new();
473            return inputs
474                .iter()
475                .map(|input| {
476                    let sha256_hash: [u8; 32] = hasher.hash(input);
477                    let ripemd160_hash = Ripemd160::digest(sha256_hash);
478                    let mut result = [0u8; 20];
479                    result.copy_from_slice(&ripemd160_hash);
480                    result
481                })
482                .collect();
483        }
484
485        // Medium batches: chunked sequential processing
486        if inputs.len() < PARALLEL_THRESHOLD {
487            let hasher = OptimizedSha256::new();
488            let mut results = Vec::with_capacity(inputs.len());
489            for chunk in inputs.chunks(chunk_size()) {
490                for input in chunk {
491                    let sha256_hash: [u8; 32] = hasher.hash(input);
492                    let ripemd160_hash = Ripemd160::digest(sha256_hash);
493                    let mut result = [0u8; 20];
494                    result.copy_from_slice(&ripemd160_hash);
495                    results.push(result);
496                }
497            }
498            return results;
499        }
500
501        // Large batches: parallelized processing. Each worker gets OptimizedSha256.
502        use rayon::prelude::*;
503
504        inputs
505            .par_chunks(chunk_size())
506            .map(|chunk| {
507                let hasher = OptimizedSha256::new();
508                chunk
509                    .iter()
510                    .map(|input| {
511                        let sha256_hash: [u8; 32] = hasher.hash(input);
512                        let ripemd160_hash = Ripemd160::digest(sha256_hash);
513                        let mut result = [0u8; 20];
514                        result.copy_from_slice(&ripemd160_hash);
515                        result
516                    })
517                    .collect::<Vec<_>>()
518            })
519            .flatten()
520            .collect()
521    }
522}
523
524#[cfg(feature = "production")]
525pub use constant_folding::*;
526#[cfg(feature = "production")]
527pub use precomputed_constants::*;
528
529/// Proven bounds for runtime optimization
530///
531/// These bounds are proven by formal verification and can be used
532/// for runtime optimizations without additional safety checks.
533///
534/// Proven runtime bounds for BLVM optimizations
535///
536/// These bounds have been formally proven and are used for runtime optimizations.
537/// Unlike proof-time limits (in `_helpers::proof_limits`), these represent actual
538/// Bitcoin limits that have been proven to hold in all cases.
539///
540/// Reference: BLVM Optimization Pass
541#[cfg(feature = "production")]
542pub mod proven_bounds {
543    use crate::constants::{MAX_INPUTS, MAX_OUTPUTS};
544
545    /// Maximum transaction size (proven by formal verification in transaction.rs)
546    pub const MAX_TX_SIZE_PROVEN: usize = 100000; // Bytes
547
548    /// Maximum block size (proven by formal verification in block.rs)
549    pub const MAX_BLOCK_SIZE_PROVEN: usize = 4000000; // Bytes (4MB)
550
551    /// Maximum inputs per transaction (proven by formal verification)
552    /// References actual Bitcoin limit from constants.rs
553    pub const MAX_INPUTS_PROVEN: usize = MAX_INPUTS;
554
555    /// Maximum outputs per transaction (proven by formal verification)
556    /// References actual Bitcoin limit from constants.rs
557    pub const MAX_OUTPUTS_PROVEN: usize = MAX_OUTPUTS;
558
559    /// Maximum transactions per block (proven by formal verification)
560    /// Note: Bitcoin limit is effectively unbounded by consensus rules, but practical limit
561    /// is around 10,000 transactions per block based on block size limits.
562    pub const MAX_TRANSACTIONS_PROVEN: usize = 10000;
563
564    /// Maximum previous headers for difficulty adjustment (proven by formal verification)
565    pub const MAX_PREV_HEADERS_PROVEN: usize = 5;
566}
567
568/// Optimized access using proven bounds
569///
570/// Uses bounds proven by formal verification to optimize runtime access.
571/// This is safe because formal proofs guarantee these bounds hold.
572///
573/// Reference: Formal proofs in transaction.rs, block.rs, mining.rs, pow.rs, etc.
574/// These proofs formally verify that certain bounds always hold, allowing us to
575/// use optimized access patterns without runtime bounds checks.
576#[cfg(feature = "production")]
577pub mod optimized_access {
578    use super::proven_bounds;
579
580    /// Get element with proven bounds check
581    ///
582    /// Uses proven maximum sizes to optimize bounds checking.
583    /// For transactions proven to have <= MAX_INPUTS_PROVEN inputs,
584    /// we can use optimized access patterns.
585    ///
586    /// # Safety
587    /// This function is safe because formal proofs guarantee bounds.
588    /// However, it still returns `Option` to handle cases where:
589    /// - Runtime bounds differ from proof bounds (should not happen in practice)
590    /// - Defensive programming (fail-safe)
591    ///
592    /// # Panics
593    /// Never panics - always returns `None` if out of bounds.
594    ///
595    /// # Examples
596    /// ```rust
597    /// use blvm_consensus::optimizations::optimized_access::get_proven;
598    /// use blvm_consensus::types::Transaction;
599    ///
600    /// # let tx = Transaction { version: 1, inputs: vec![].into(), outputs: vec![].into(), lock_time: 0 };
601    /// # let index = 0;
602    /// if let Some(input) = get_proven(&tx.inputs, index) {
603    ///     // Safe to use
604    /// }
605    /// ```
606    #[inline(always)]
607    pub fn get_proven<T>(slice: &[T], index: usize) -> Option<&T> {
608        // Formal proofs have proven index < MAX_SIZE in various proofs
609        // We can use unsafe access for proven-safe indices
610        // This is safe because formal proofs guarantee bounds
611        if index < slice.len() {
612            unsafe { Some(slice.get_unchecked(index)) }
613        } else {
614            None
615        }
616    }
617
618    /// Pre-allocate buffer using proven maximum size
619    ///
620    /// Uses proven maximum sizes to avoid reallocation.
621    /// For example, transaction buffers can be pre-sized to MAX_TX_SIZE_PROVEN.
622    #[inline(always)]
623    pub fn prealloc_proven<T>(max_size: usize) -> Vec<T> {
624        // Pre-allocate to proven maximum to avoid reallocation
625        Vec::with_capacity(max_size)
626    }
627
628    /// Pre-allocate transaction buffer using proven maximum
629    #[inline(always)]
630    pub fn prealloc_tx_buffer() -> Vec<u8> {
631        prealloc_proven::<u8>(proven_bounds::MAX_TX_SIZE_PROVEN)
632    }
633
634    /// Pre-allocate block buffer using proven maximum
635    #[inline(always)]
636    pub fn prealloc_block_buffer() -> Vec<u8> {
637        prealloc_proven::<u8>(proven_bounds::MAX_BLOCK_SIZE_PROVEN)
638    }
639
640    /// Get element with proven bounds (alias for get_proven for compatibility)
641    #[inline(always)]
642    pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
643        get_proven(slice, index)
644    }
645}
646
647/// Alias module for _optimized_access (for backward compatibility)
648#[cfg(feature = "production")]
649pub mod _optimized_access {
650    use super::optimized_access;
651
652    /// Get element with proven bounds
653    #[inline(always)]
654    pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
655        optimized_access::get_proven(slice, index)
656    }
657}
658
659/// Re-export prealloc helpers for convenience
660#[cfg(feature = "production")]
661pub use optimized_access::{prealloc_block_buffer, prealloc_tx_buffer};
662
663/// Reference implementations for equivalence proofs
664///
665/// These are safe versions of optimized functions, used to prove
666/// that optimizations are correct via formal verification.
667#[cfg(feature = "production")]
668pub mod reference_implementations {
669    /// Reference (safe) implementation of get_proven
670    /// This is the version we prove equivalence against
671    #[inline(always)]
672    pub fn get_proven_reference<T>(slice: &[T], index: usize) -> Option<&T> {
673        slice.get(index) // Safe version
674    }
675}
676
677/// Runtime assertions for optimization correctness
678///
679/// These functions provide runtime checks in debug builds to verify
680/// that optimizations match their reference implementations.
681#[cfg(all(
682    feature = "production",
683    any(debug_assertions, feature = "runtime-invariants")
684))]
685pub mod runtime_assertions {
686    use super::optimized_access::get_proven;
687    use super::reference_implementations::get_proven_reference;
688
689    /// Checked version of get_proven with runtime assertions
690    ///
691    /// This function performs runtime checks in debug builds to ensure
692    /// the optimized implementation matches the reference implementation.
693    #[inline(always)]
694    pub fn get_proven_checked<T>(slice: &[T], index: usize) -> Option<&T> {
695        let result_optimized = get_proven(slice, index);
696        let result_reference = get_proven_reference(slice, index);
697
698        // Runtime check: both must agree
699        debug_assert_eq!(
700            result_optimized.is_some(),
701            result_reference.is_some(),
702            "Optimization correctness check failed: optimized and reference disagree on Some/None"
703        );
704
705        if let (Some(opt_val), Some(ref_val)) = (result_optimized, result_reference) {
706            debug_assert_eq!(
707                opt_val as *const T,
708                ref_val as *const T,
709                "Optimization correctness check failed: optimized and reference return different pointers"
710            );
711        }
712
713        result_optimized
714    }
715}