Skip to main content

blvm_consensus/
optimizations.rs

1//! BLVM Runtime Optimization Passes
2//!
3//! Additional optimization passes for 10-30% performance gains
4//!
5//! This module provides runtime optimization passes:
6//! - Constant folding (pre-computed constants)
7//! - Bounds check optimization (proven bounds)
8//! - Inlining hints (hot function markers)
9//! - Memory layout optimization (cache-friendly structures)
10//!
11//! Reference: Orange Paper Section 13.1 - Performance Considerations
12
13use crate::constants::*;
14
15/// Pre-computed constants for constant folding optimization
16///
17/// These constants are computed at compile time to avoid runtime computation
18/// in hot paths. Reference: BLVM Optimization Pass 2 - Constant Folding
19#[cfg(feature = "production")]
20pub mod precomputed_constants {
21    use super::*;
22
23    /// Pre-computed: 2^64 - 1 (used for wrapping arithmetic checks)
24    pub const U64_MAX: u64 = u64::MAX;
25
26    /// Pre-computed: MAX_MONEY as u64 (for comparisons)
27    pub const MAX_MONEY_U64: u64 = MAX_MONEY as u64;
28
29    /// Pre-computed: Inverse of SATOSHIS_PER_BTC (for BTC conversion)
30    pub const BTC_PER_SATOSHI: f64 = 1.0 / (SATOSHIS_PER_BTC as f64);
31
32    /// Pre-computed: 2^32 - 1 (for 32-bit wrapping checks)
33    pub const U32_MAX: u32 = u32::MAX;
34
35    /// Pre-computed: Number of satoshis in 1 BTC (for readability)
36    pub const ONE_BTC_SATOSHIS: i64 = SATOSHIS_PER_BTC;
37}
38
39/// Memory layout optimization: Cache-friendly hash array
40///
41/// Optimizes hash array access for cache locality.
42/// Uses 32-byte aligned structures for better cache performance.
43///
44/// This structure ensures each hash is aligned to a 32-byte boundary, which:
45/// - Reduces cache line splits
46/// - Improves prefetching behavior
47/// - Better fits modern CPU cache architectures (64-byte cache lines)
48///
49/// Reference: BLVM Optimization Pass 3 - Memory Layout Optimization
50/// Cache-aligned hash for optimized batch operations
51#[repr(align(32))]
52#[derive(Clone)]
53pub struct CacheAlignedHash([u8; 32]);
54
55impl CacheAlignedHash {
56    #[inline]
57    pub fn new(hash: [u8; 32]) -> Self {
58        Self(hash)
59    }
60
61    #[inline]
62    pub fn as_bytes(&self) -> &[u8; 32] {
63        &self.0
64    }
65}
66
67/// Memory prefetching optimization
68///
69/// Provides platform-specific prefetch hints to improve cache performance
70/// for sequential memory accesses. Used before batch UTXO lookups and
71/// other sequential data structure traversals.
72///
73/// Reference: BLVM Optimization Pass 1.3 - Memory Prefetching
74#[cfg(feature = "production")]
75pub mod prefetch {
76    /// Prefetch data for read access
77    ///
78    /// Hints the CPU to prefetch data into cache before it's needed.
79    /// This improves performance for sequential memory access patterns.
80    ///
81    /// # Safety
82    /// The pointer must be valid, but it doesn't need to be dereferenceable
83    /// at the time of the call. The prefetch is a hint and may be ignored.
84    #[cfg(target_arch = "x86_64")]
85    #[inline(always)]
86    pub unsafe fn prefetch_read(ptr: *const i8) {
87        use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
88        _mm_prefetch(ptr, _MM_HINT_T0);
89    }
90
91    #[cfg(target_arch = "aarch64")]
92    #[inline(always)]
93    pub unsafe fn prefetch_read(ptr: *const i8) {
94        // std::arch::aarch64::_prefetch requires the unstable
95        // `stdarch_aarch64_prefetch` feature (issue #117217) and is not yet
96        // available on stable Rust.  Use inline asm instead: `core::arch::asm!`
97        // is stable since 1.59 and emits the identical PRFM instruction.
98        // PRFM PLDL1KEEP = Prefetch for Load, L1, temporal (≡ _prefetch hint T0).
99        core::arch::asm!(
100            "prfm pldl1keep, [{addr}]",
101            addr = in(reg) ptr,
102            options(nostack, readonly, preserves_flags)
103        );
104    }
105
106    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
107    #[inline(always)]
108    pub unsafe fn prefetch_read(_ptr: *const i8) {
109        // No-op for unsupported architectures
110    }
111
112    /// Prefetch a slice of data for sequential access
113    ///
114    /// Prefetches the next cache line(s) of data to improve sequential access.
115    /// Safe wrapper around prefetch_read that works with slices.
116    #[inline(always)]
117    pub fn prefetch_slice<T>(slice: &[T], index: usize) {
118        if index < slice.len() {
119            unsafe {
120                let ptr = slice.as_ptr().add(index) as *const i8;
121                prefetch_read(ptr);
122            }
123        }
124    }
125
126    /// Prefetch multiple elements ahead in a slice
127    ///
128    /// Prefetches elements at `index + offset` to prepare for future access.
129    /// Useful for sequential loops where you know you'll access elements ahead.
130    #[inline(always)]
131    pub fn prefetch_ahead<T>(slice: &[T], index: usize, offset: usize) {
132        let prefetch_index = index.saturating_add(offset);
133        prefetch_slice(slice, prefetch_index);
134    }
135}
136
137/// Memory layout optimization: Compact stack frame
138///
139/// Compact stack frame for script execution optimization
140/// Optimized stack frame structure for cache locality.
141#[repr(C, packed)]
142pub struct CompactStackFrame {
143    pub opcode: u8,
144    pub flags: u32,
145    pub script_offset: u16,
146    pub stack_height: u16,
147}
148
149impl CompactStackFrame {
150    #[inline]
151    pub fn new(opcode: u8, flags: u32, script_offset: u16, stack_height: u16) -> Self {
152        Self {
153            opcode,
154            flags,
155            script_offset,
156            stack_height,
157        }
158    }
159}
160
161/// Inlining hints for hot functions
162///
163/// Functions marked with HOT_INLINE should be aggressively inlined.
164/// These are called in tight loops and benefit from inlining.
165#[macro_export]
166#[cfg(feature = "production")]
167macro_rules! hot_inline {
168    () => {
169        #[inline(always)]
170    };
171}
172
173/// Constant folding: Pre-compute common hash results
174///
175/// Caches common hash pre-images for constant folding.
176#[cfg(feature = "production")]
177pub mod constant_folding {
178    /// Pre-computed: SHA256 of empty string
179    pub const EMPTY_STRING_HASH: [u8; 32] = [
180        0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9,
181        0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52,
182        0xb8, 0x55,
183    ];
184
185    /// Pre-computed: Double SHA256 of empty string
186    pub const EMPTY_STRING_DOUBLE_HASH: [u8; 32] = [
187        0x5d, 0xf6, 0xe0, 0xe2, 0x76, 0x13, 0x59, 0xf3, 0x73, 0x9a, 0x1c, 0x6f, 0x87, 0x40, 0x64,
188        0x0a, 0xf1, 0x2e, 0xc7, 0xc3, 0x72, 0x4a, 0x5c, 0x2c, 0xa5, 0xf3, 0x0f, 0x26, 0x60, 0x87,
189        0x7e, 0x6b,
190    ];
191
192    /// Check if input matches empty string hash (constant folding)
193    #[inline(always)]
194    pub fn is_empty_hash(hash: &[u8; 32]) -> bool {
195        *hash == EMPTY_STRING_HASH
196    }
197
198    /// Check if input matches empty string double hash (constant folding)
199    #[inline(always)]
200    pub fn is_empty_double_hash(hash: &[u8; 32]) -> bool {
201        *hash == EMPTY_STRING_DOUBLE_HASH
202    }
203
204    /// Constant-fold: Check if hash is zero (all zeros)
205    #[inline(always)]
206    pub fn is_zero_hash(hash: &[u8; 32]) -> bool {
207        hash.iter().all(|&b| b == 0)
208    }
209}
210
211/// Dead code elimination markers
212///
213/// Functions/constants marked with this can be eliminated if unused.
214#[cfg(feature = "production")]
215#[allow(dead_code)]
216pub mod dead_code_elimination {
217    /// Mark code for dead code elimination analysis
218    /// This is a marker function - the compiler can eliminate unused paths
219    #[inline(never)]
220    #[cold]
221    pub fn mark_unused() {
222        // This function never executes in production builds
223        // It's a marker for dead code elimination pass
224    }
225
226    /// Hint to compiler that branch is unlikely (dead code elimination)
227    ///
228    /// Note: In stable Rust, this is a no-op but serves as documentation
229    /// for future optimization opportunities (unstable `likely`/`unlikely` intrinsics).
230    #[inline(always)]
231    pub fn unlikely(condition: bool) -> bool {
232        // Stable Rust doesn't have likely/unlikely intrinsics
233        // This is a placeholder for future optimization
234        condition
235    }
236}
237
238/// SIMD Vectorization: Batch hash operations
239///
240/// Provides batch hash processing for parallel hash operations.
241/// Leverages existing SIMD in sha2 crate (asm feature) + Rayon for CPU-core parallelization.
242///
243/// Provides batch functions for:
244/// - SHA256 and double SHA256 (Bitcoin standard)
245/// - RIPEMD160 and HASH160 (OP_HASH160)
246///
247/// Uses chunked processing for better cache locality and parallelizes across CPU cores
248/// when batch size is large enough (≥8 items).
249///
250/// Reference: BLVM Optimization Pass 5 - SIMD Vectorization
251#[cfg(feature = "production")]
252pub mod simd_vectorization {
253    use crate::crypto::OptimizedSha256;
254    use digest::Digest;
255    use ripemd::Ripemd160;
256
257    /// Minimum batch size for parallelization (overhead not worth it for smaller batches).
258    /// batch_sha256 uses OptimizedSha256 (SHA-NI when available) for consistency with batch_double_sha256_aligned.
259    const PARALLEL_THRESHOLD: usize = 8;
260
261    /// Chunk size for cache-friendly processing. Hardware-derived via ibd_tuning.
262    #[inline]
263    fn chunk_size() -> usize {
264        blvm_primitives::ibd_tuning::hash_batch_chunk_size()
265    }
266
267    /// Batch SHA256: Compute SHA256 for multiple independent inputs
268    ///
269    /// # Arguments
270    /// * `inputs` - Slice of byte slices to hash
271    ///
272    /// # Returns
273    /// Vector of 32-byte hashes, one per input (in same order)
274    ///
275    /// # Performance
276    /// - Small batches (< 4 items): Sequential (overhead not worth parallelization)
277    /// - Medium batches (4-7 items): Chunked sequential
278    /// - Large batches (≥8 items): Multi-core parallelization with Rayon
279    ///
280    /// # Optimizations
281    /// - Uses sha2 crate with "asm" feature for optimized assembly
282    /// - For large batches, leverages Rayon for multi-core parallelization
283    /// - AVX2 batch optimization available via `crypto::avx2_batch` module
284    pub fn batch_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
285        if inputs.is_empty() {
286            return Vec::new();
287        }
288
289        // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI when available).
290        if inputs.len() < 4 {
291            let hasher = OptimizedSha256::new();
292            return inputs.iter().map(|input| hasher.hash(input)).collect();
293        }
294
295        // Medium batches: chunked sequential processing
296        if inputs.len() < PARALLEL_THRESHOLD {
297            let hasher = OptimizedSha256::new();
298            let mut results = Vec::with_capacity(inputs.len());
299            for chunk in inputs.chunks(chunk_size()) {
300                for input in chunk {
301                    results.push(hasher.hash(input));
302                }
303            }
304            return results;
305        }
306
307        // Large batches: Try AVX2 first, then fallback to multi-core parallelization
308        #[cfg(target_arch = "x86_64")]
309        {
310            use crate::crypto::sha256_avx2;
311            if sha256_avx2::is_avx2_available() {
312                // Use AVX2 batch processing for chunks of 8
313                use crate::crypto::avx2_batch;
314                return avx2_batch::batch_sha256_avx2(inputs);
315            }
316        }
317
318        // Fallback: serial chunked processing. The previous `par_chunks` rayon path was
319        // disastrous in IBD: N validation workers × per-block calls × shared rayon pool =
320        // catastrophic oversubscription. SHA-NI single-thread is fast enough; cross-block
321        // parallelism (worker pool) is the only level we want.
322        let hasher = OptimizedSha256::new();
323        let mut results = Vec::with_capacity(inputs.len());
324        for chunk in inputs.chunks(chunk_size()) {
325            for input in chunk {
326                results.push(hasher.hash(input));
327            }
328        }
329        results
330    }
331
332    /// Batch double SHA256: Compute SHA256(SHA256(x)) for multiple inputs
333    ///
334    /// This is Bitcoin's standard hash function used for transaction IDs, block hashes, etc.
335    ///
336    /// # Arguments
337    /// * `inputs` - Slice of byte slices to hash
338    ///
339    /// # Returns
340    /// Vector of 32-byte hashes, one per input (in same order)
341    pub fn batch_double_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
342        // Use aligned version for better cache performance
343        batch_double_sha256_aligned(inputs)
344            .into_iter()
345            .map(|h| *h.as_bytes())
346            .collect()
347    }
348
349    /// Batch double SHA256 with cache-aligned output
350    ///
351    /// Returns cache-aligned hash structures for better memory performance.
352    /// Uses 32-byte alignment for optimal cache line utilization.
353    ///
354    /// # Arguments
355    /// * `inputs` - Slice of byte slices to hash
356    ///
357    /// # Returns
358    /// Vector of cache-aligned 32-byte hashes, one per input (in same order)
359    pub fn batch_double_sha256_aligned(inputs: &[&[u8]]) -> Vec<super::CacheAlignedHash> {
360        if inputs.is_empty() {
361            return Vec::new();
362        }
363
364        // Small batches: sequential processing (overhead not worth it)
365        // Use OptimizedSha256 (SHA-NI when available) instead of sha2
366        let hasher = OptimizedSha256::new();
367        if inputs.len() < 4 {
368            return inputs
369                .iter()
370                .map(|input| super::CacheAlignedHash::new(hasher.hash256(input)))
371                .collect();
372        }
373
374        // Medium batches: chunked sequential processing
375        if inputs.len() < PARALLEL_THRESHOLD {
376            let mut results = Vec::with_capacity(inputs.len());
377            for chunk in inputs.chunks(chunk_size()) {
378                for input in chunk {
379                    results.push(super::CacheAlignedHash::new(hasher.hash256(input)));
380                }
381            }
382            return results;
383        }
384
385        // Serial chunked processing — see `batch_sha256` for rationale (rayon oversubscribes
386        // the pool when N IBD workers each push hashing batches; SHA-NI keeps the per-worker
387        // path fast on its own thread).
388        let hasher = OptimizedSha256::new();
389        let mut results = Vec::with_capacity(inputs.len());
390        for chunk in inputs.chunks(chunk_size()) {
391            for input in chunk {
392                results.push(super::CacheAlignedHash::new(hasher.hash256(input)));
393            }
394        }
395        results
396    }
397
398    /// Batch RIPEMD160: Compute RIPEMD160 for multiple inputs
399    ///
400    /// # Arguments
401    /// * `inputs` - Slice of byte slices to hash
402    ///
403    /// # Returns
404    /// Vector of 20-byte hashes, one per input (in same order)
405    pub fn batch_ripemd160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
406        if inputs.is_empty() {
407            return Vec::new();
408        }
409
410        // Small batches: sequential processing
411        if inputs.len() < 4 {
412            return inputs
413                .iter()
414                .map(|input| {
415                    let hash = Ripemd160::digest(input);
416                    let mut result = [0u8; 20];
417                    result.copy_from_slice(&hash);
418                    result
419                })
420                .collect();
421        }
422
423        // Medium batches: chunked sequential processing
424        if inputs.len() < PARALLEL_THRESHOLD {
425            let mut results = Vec::with_capacity(inputs.len());
426            for chunk in inputs.chunks(chunk_size()) {
427                for input in chunk {
428                    let hash = Ripemd160::digest(input);
429                    let mut result = [0u8; 20];
430                    result.copy_from_slice(&hash);
431                    results.push(result);
432                }
433            }
434            return results;
435        }
436
437        // Serial chunked processing — same rationale as `batch_sha256`: cross-block
438        // parallelism is provided by the IBD worker pool; rayon par_chunks here
439        // oversubscribes the global pool when N workers each call this per-block.
440        let mut results = Vec::with_capacity(inputs.len());
441        for chunk in inputs.chunks(chunk_size()) {
442            for input in chunk {
443                let hash = Ripemd160::digest(input);
444                let mut result = [0u8; 20];
445                result.copy_from_slice(&hash);
446                results.push(result);
447            }
448        }
449        results
450    }
451
452    /// Batch HASH160: Compute RIPEMD160(SHA256(x)) for multiple inputs
453    ///
454    /// This is Bitcoin's HASH160 operation (OP_HASH160 in script).
455    ///
456    /// # Arguments
457    /// * `inputs` - Slice of byte slices to hash
458    ///
459    /// # Returns
460    /// Vector of 20-byte hashes, one per input (in same order)
461    pub fn batch_hash160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
462        if inputs.is_empty() {
463            return Vec::new();
464        }
465
466        // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI) for SHA256 part.
467        if inputs.len() < 4 {
468            let hasher = OptimizedSha256::new();
469            return inputs
470                .iter()
471                .map(|input| {
472                    let sha256_hash: [u8; 32] = hasher.hash(input);
473                    let ripemd160_hash = Ripemd160::digest(sha256_hash);
474                    let mut result = [0u8; 20];
475                    result.copy_from_slice(&ripemd160_hash);
476                    result
477                })
478                .collect();
479        }
480
481        // Medium batches: chunked sequential processing
482        if inputs.len() < PARALLEL_THRESHOLD {
483            let hasher = OptimizedSha256::new();
484            let mut results = Vec::with_capacity(inputs.len());
485            for chunk in inputs.chunks(chunk_size()) {
486                for input in chunk {
487                    let sha256_hash: [u8; 32] = hasher.hash(input);
488                    let ripemd160_hash = Ripemd160::digest(sha256_hash);
489                    let mut result = [0u8; 20];
490                    result.copy_from_slice(&ripemd160_hash);
491                    results.push(result);
492                }
493            }
494            return results;
495        }
496
497        // Serial chunked processing — see `batch_sha256` for rationale.
498        let hasher = OptimizedSha256::new();
499        let mut results = Vec::with_capacity(inputs.len());
500        for chunk in inputs.chunks(chunk_size()) {
501            for input in chunk {
502                let sha256_hash: [u8; 32] = hasher.hash(input);
503                let ripemd160_hash = Ripemd160::digest(sha256_hash);
504                let mut result = [0u8; 20];
505                result.copy_from_slice(&ripemd160_hash);
506                results.push(result);
507            }
508        }
509        results
510    }
511}
512
513#[cfg(feature = "production")]
514pub use constant_folding::*;
515#[cfg(feature = "production")]
516pub use precomputed_constants::*;
517
518/// Proven bounds for runtime optimization
519///
520/// These bounds are proven by formal verification and can be used
521/// for runtime optimizations without additional safety checks.
522///
523/// Proven runtime bounds for BLVM optimizations
524///
525/// These bounds have been formally proven and are used for runtime optimizations.
526/// Unlike proof-time limits (in `_helpers::proof_limits`), these represent actual
527/// Bitcoin limits that have been proven to hold in all cases.
528///
529/// Reference: BLVM Optimization Pass
530#[cfg(feature = "production")]
531pub mod proven_bounds {
532    use crate::constants::{MAX_INPUTS, MAX_OUTPUTS};
533
534    /// Maximum transaction size (proven by formal verification in transaction.rs)
535    pub const MAX_TX_SIZE_PROVEN: usize = 100000; // Bytes
536
537    /// Maximum block size (proven by formal verification in block.rs)
538    pub const MAX_BLOCK_SIZE_PROVEN: usize = 4000000; // Bytes (4MB)
539
540    /// Maximum inputs per transaction (proven by formal verification)
541    /// References actual Bitcoin limit from constants.rs
542    pub const MAX_INPUTS_PROVEN: usize = MAX_INPUTS;
543
544    /// Maximum outputs per transaction (proven by formal verification)
545    /// References actual Bitcoin limit from constants.rs
546    pub const MAX_OUTPUTS_PROVEN: usize = MAX_OUTPUTS;
547
548    /// Maximum transactions per block (proven by formal verification)
549    /// Note: Bitcoin limit is effectively unbounded by consensus rules, but practical limit
550    /// is around 10,000 transactions per block based on block size limits.
551    pub const MAX_TRANSACTIONS_PROVEN: usize = 10000;
552
553    /// Maximum previous headers for difficulty adjustment (proven by formal verification)
554    pub const MAX_PREV_HEADERS_PROVEN: usize = 5;
555}
556
557/// Optimized access using proven bounds
558///
559/// Uses bounds proven by formal verification to optimize runtime access.
560/// This is safe because formal proofs guarantee these bounds hold.
561///
562/// Reference: Formal proofs in transaction.rs, block.rs, mining.rs, pow.rs, etc.
563/// These proofs formally verify that certain bounds always hold, allowing us to
564/// use optimized access patterns without runtime bounds checks.
565#[cfg(feature = "production")]
566pub mod optimized_access {
567    use super::proven_bounds;
568
569    /// Get element with proven bounds check
570    ///
571    /// Uses proven maximum sizes to optimize bounds checking.
572    /// For transactions proven to have <= MAX_INPUTS_PROVEN inputs,
573    /// we can use optimized access patterns.
574    ///
575    /// # Safety
576    /// This function is safe because formal proofs guarantee bounds.
577    /// However, it still returns `Option` to handle cases where:
578    /// - Runtime bounds differ from proof bounds (should not happen in practice)
579    /// - Defensive programming (fail-safe)
580    ///
581    /// # Panics
582    /// Never panics - always returns `None` if out of bounds.
583    ///
584    /// # Examples
585    /// ```rust
586    /// use blvm_consensus::optimizations::optimized_access::get_proven;
587    /// use blvm_consensus::types::Transaction;
588    ///
589    /// # let tx = Transaction { version: 1, inputs: vec![].into(), outputs: vec![].into(), lock_time: 0 };
590    /// # let index = 0;
591    /// if let Some(input) = get_proven(&tx.inputs, index) {
592    ///     // Safe to use
593    /// }
594    /// ```
595    #[inline(always)]
596    pub fn get_proven<T>(slice: &[T], index: usize) -> Option<&T> {
597        // Formal proofs have proven index < MAX_SIZE in various proofs
598        // We can use unsafe access for proven-safe indices
599        // This is safe because formal proofs guarantee bounds
600        if index < slice.len() {
601            unsafe { Some(slice.get_unchecked(index)) }
602        } else {
603            None
604        }
605    }
606
607    /// Pre-allocate buffer using proven maximum size
608    ///
609    /// Uses proven maximum sizes to avoid reallocation.
610    /// For example, transaction buffers can be pre-sized to MAX_TX_SIZE_PROVEN.
611    #[inline(always)]
612    pub fn prealloc_proven<T>(max_size: usize) -> Vec<T> {
613        // Pre-allocate to proven maximum to avoid reallocation
614        Vec::with_capacity(max_size)
615    }
616
617    /// Pre-allocate transaction buffer using proven maximum
618    #[inline(always)]
619    pub fn prealloc_tx_buffer() -> Vec<u8> {
620        prealloc_proven::<u8>(proven_bounds::MAX_TX_SIZE_PROVEN)
621    }
622
623    /// Pre-allocate block buffer using proven maximum
624    #[inline(always)]
625    pub fn prealloc_block_buffer() -> Vec<u8> {
626        prealloc_proven::<u8>(proven_bounds::MAX_BLOCK_SIZE_PROVEN)
627    }
628
629    /// Get element with proven bounds (alias for get_proven for compatibility)
630    #[inline(always)]
631    pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
632        get_proven(slice, index)
633    }
634}
635
636/// Alias module for _optimized_access (for backward compatibility)
637#[cfg(feature = "production")]
638pub mod _optimized_access {
639    use super::optimized_access;
640
641    /// Get element with proven bounds
642    #[inline(always)]
643    pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
644        optimized_access::get_proven(slice, index)
645    }
646}
647
648/// Re-export prealloc helpers for convenience
649#[cfg(feature = "production")]
650pub use optimized_access::{prealloc_block_buffer, prealloc_tx_buffer};
651
652/// Reference implementations for equivalence proofs
653///
654/// These are safe versions of optimized functions, used to prove
655/// that optimizations are correct via formal verification.
656#[cfg(feature = "production")]
657pub mod reference_implementations {
658    /// Reference (safe) implementation of get_proven
659    /// This is the version we prove equivalence against
660    #[inline(always)]
661    pub fn get_proven_reference<T>(slice: &[T], index: usize) -> Option<&T> {
662        slice.get(index) // Safe version
663    }
664}
665
666/// Runtime assertions for optimization correctness
667///
668/// These functions provide runtime checks in debug builds to verify
669/// that optimizations match their reference implementations.
670#[cfg(all(
671    feature = "production",
672    any(debug_assertions, feature = "runtime-invariants")
673))]
674pub mod runtime_assertions {
675    use super::optimized_access::get_proven;
676    use super::reference_implementations::get_proven_reference;
677
678    /// Checked version of get_proven with runtime assertions
679    ///
680    /// This function performs runtime checks in debug builds to ensure
681    /// the optimized implementation matches the reference implementation.
682    #[inline(always)]
683    pub fn get_proven_checked<T>(slice: &[T], index: usize) -> Option<&T> {
684        let result_optimized = get_proven(slice, index);
685        let result_reference = get_proven_reference(slice, index);
686
687        // Runtime check: both must agree
688        debug_assert_eq!(
689            result_optimized.is_some(),
690            result_reference.is_some(),
691            "Optimization correctness check failed: optimized and reference disagree on Some/None"
692        );
693
694        if let (Some(opt_val), Some(ref_val)) = (result_optimized, result_reference) {
695            debug_assert_eq!(
696                opt_val as *const T,
697                ref_val as *const T,
698                "Optimization correctness check failed: optimized and reference return different pointers"
699            );
700        }
701
702        result_optimized
703    }
704}