blvm_consensus/optimizations.rs
1//! BLVM Runtime Optimization Passes
2//!
3//! Additional optimization passes for 10-30% performance gains
4//!
5//! This module provides runtime optimization passes:
6//! - Constant folding (pre-computed constants)
7//! - Bounds check optimization (proven bounds)
8//! - Inlining hints (hot function markers)
9//! - Memory layout optimization (cache-friendly structures)
10//!
11//! Reference: Orange Paper Section 13.1 - Performance Considerations
12
13use crate::constants::*;
14
15/// Pre-computed constants for constant folding optimization
16///
17/// These constants are computed at compile time to avoid runtime computation
18/// in hot paths. Reference: BLVM Optimization Pass 2 - Constant Folding
19#[cfg(feature = "production")]
20pub mod precomputed_constants {
21 use super::*;
22
23 /// Pre-computed: 2^64 - 1 (used for wrapping arithmetic checks)
24 pub const U64_MAX: u64 = u64::MAX;
25
26 /// Pre-computed: MAX_MONEY as u64 (for comparisons)
27 pub const MAX_MONEY_U64: u64 = MAX_MONEY as u64;
28
29 /// Pre-computed: Inverse of SATOSHIS_PER_BTC (for BTC conversion)
30 pub const BTC_PER_SATOSHI: f64 = 1.0 / (SATOSHIS_PER_BTC as f64);
31
32 /// Pre-computed: 2^32 - 1 (for 32-bit wrapping checks)
33 pub const U32_MAX: u32 = u32::MAX;
34
35 /// Pre-computed: Number of satoshis in 1 BTC (for readability)
36 pub const ONE_BTC_SATOSHIS: i64 = SATOSHIS_PER_BTC;
37}
38
39/// Memory layout optimization: Cache-friendly hash array
40///
41/// Optimizes hash array access for cache locality.
42/// Uses 32-byte aligned structures for better cache performance.
43///
44/// This structure ensures each hash is aligned to a 32-byte boundary, which:
45/// - Reduces cache line splits
46/// - Improves prefetching behavior
47/// - Better fits modern CPU cache architectures (64-byte cache lines)
48///
49/// Reference: BLVM Optimization Pass 3 - Memory Layout Optimization
50/// Cache-aligned hash for optimized batch operations
51#[repr(align(32))]
52#[derive(Clone)]
53pub struct CacheAlignedHash([u8; 32]);
54
55impl CacheAlignedHash {
56 #[inline]
57 pub fn new(hash: [u8; 32]) -> Self {
58 Self(hash)
59 }
60
61 #[inline]
62 pub fn as_bytes(&self) -> &[u8; 32] {
63 &self.0
64 }
65}
66
67/// Memory prefetching optimization
68///
69/// Provides platform-specific prefetch hints to improve cache performance
70/// for sequential memory accesses. Used before batch UTXO lookups and
71/// other sequential data structure traversals.
72///
73/// Reference: BLVM Optimization Pass 1.3 - Memory Prefetching
74#[cfg(feature = "production")]
75pub mod prefetch {
76 /// Prefetch data for read access
77 ///
78 /// Hints the CPU to prefetch data into cache before it's needed.
79 /// This improves performance for sequential memory access patterns.
80 ///
81 /// # Safety
82 /// The pointer must be valid, but it doesn't need to be dereferenceable
83 /// at the time of the call. The prefetch is a hint and may be ignored.
84 #[cfg(target_arch = "x86_64")]
85 #[inline(always)]
86 pub unsafe fn prefetch_read(ptr: *const i8) {
87 use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
88 _mm_prefetch(ptr, _MM_HINT_T0);
89 }
90
91 #[cfg(target_arch = "aarch64")]
92 #[inline(always)]
93 pub unsafe fn prefetch_read(ptr: *const i8) {
94 use std::arch::aarch64::_prefetch;
95 _prefetch(ptr, 0, 0); // Read, temporal locality
96 }
97
98 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
99 #[inline(always)]
100 pub unsafe fn prefetch_read(_ptr: *const i8) {
101 // No-op for unsupported architectures
102 }
103
104 /// Prefetch a slice of data for sequential access
105 ///
106 /// Prefetches the next cache line(s) of data to improve sequential access.
107 /// Safe wrapper around prefetch_read that works with slices.
108 #[inline(always)]
109 pub fn prefetch_slice<T>(slice: &[T], index: usize) {
110 if index < slice.len() {
111 unsafe {
112 let ptr = slice.as_ptr().add(index) as *const i8;
113 prefetch_read(ptr);
114 }
115 }
116 }
117
118 /// Prefetch multiple elements ahead in a slice
119 ///
120 /// Prefetches elements at `index + offset` to prepare for future access.
121 /// Useful for sequential loops where you know you'll access elements ahead.
122 #[inline(always)]
123 pub fn prefetch_ahead<T>(slice: &[T], index: usize, offset: usize) {
124 let prefetch_index = index.saturating_add(offset);
125 prefetch_slice(slice, prefetch_index);
126 }
127}
128
129/// Memory layout optimization: Compact stack frame
130///
131/// Compact stack frame for script execution optimization
132/// Optimized stack frame structure for cache locality.
133#[repr(C, packed)]
134pub struct CompactStackFrame {
135 pub opcode: u8,
136 pub flags: u32,
137 pub script_offset: u16,
138 pub stack_height: u16,
139}
140
141impl CompactStackFrame {
142 #[inline]
143 pub fn new(opcode: u8, flags: u32, script_offset: u16, stack_height: u16) -> Self {
144 Self {
145 opcode,
146 flags,
147 script_offset,
148 stack_height,
149 }
150 }
151}
152
153/// Inlining hints for hot functions
154///
155/// Functions marked with HOT_INLINE should be aggressively inlined.
156/// These are called in tight loops and benefit from inlining.
157#[macro_export]
158#[cfg(feature = "production")]
159macro_rules! hot_inline {
160 () => {
161 #[inline(always)]
162 };
163}
164
165/// Constant folding: Pre-compute common hash results
166///
167/// Caches common hash pre-images for constant folding.
168#[cfg(feature = "production")]
169pub mod constant_folding {
170 /// Pre-computed: SHA256 of empty string
171 pub const EMPTY_STRING_HASH: [u8; 32] = [
172 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9,
173 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52,
174 0xb8, 0x55,
175 ];
176
177 /// Pre-computed: Double SHA256 of empty string
178 pub const EMPTY_STRING_DOUBLE_HASH: [u8; 32] = [
179 0x5d, 0xf6, 0xe0, 0xe2, 0x76, 0x13, 0x59, 0xf3, 0x73, 0x9a, 0x1c, 0x6f, 0x87, 0x40, 0x64,
180 0x0a, 0xf1, 0x2e, 0xc7, 0xc3, 0x72, 0x4a, 0x5c, 0x2c, 0xa5, 0xf3, 0x0f, 0x26, 0x60, 0x87,
181 0x7e, 0x6b,
182 ];
183
184 /// Check if input matches empty string hash (constant folding)
185 #[inline(always)]
186 pub fn is_empty_hash(hash: &[u8; 32]) -> bool {
187 *hash == EMPTY_STRING_HASH
188 }
189
190 /// Check if input matches empty string double hash (constant folding)
191 #[inline(always)]
192 pub fn is_empty_double_hash(hash: &[u8; 32]) -> bool {
193 *hash == EMPTY_STRING_DOUBLE_HASH
194 }
195
196 /// Constant-fold: Check if hash is zero (all zeros)
197 #[inline(always)]
198 pub fn is_zero_hash(hash: &[u8; 32]) -> bool {
199 hash.iter().all(|&b| b == 0)
200 }
201}
202
203/// Dead code elimination markers
204///
205/// Functions/constants marked with this can be eliminated if unused.
206#[cfg(feature = "production")]
207#[allow(dead_code)]
208pub mod dead_code_elimination {
209 /// Mark code for dead code elimination analysis
210 /// This is a marker function - the compiler can eliminate unused paths
211 #[inline(never)]
212 #[cold]
213 pub fn mark_unused() {
214 // This function never executes in production builds
215 // It's a marker for dead code elimination pass
216 }
217
218 /// Hint to compiler that branch is unlikely (dead code elimination)
219 ///
220 /// Note: In stable Rust, this is a no-op but serves as documentation
221 /// for future optimization opportunities (unstable `likely`/`unlikely` intrinsics).
222 #[inline(always)]
223 pub fn unlikely(condition: bool) -> bool {
224 // Stable Rust doesn't have likely/unlikely intrinsics
225 // This is a placeholder for future optimization
226 condition
227 }
228}
229
230/// SIMD Vectorization: Batch hash operations
231///
232/// Provides batch hash processing for parallel hash operations.
233/// Leverages existing SIMD in sha2 crate (asm feature) + Rayon for CPU-core parallelization.
234///
235/// Provides batch functions for:
236/// - SHA256 and double SHA256 (Bitcoin standard)
237/// - RIPEMD160 and HASH160 (OP_HASH160)
238///
239/// Uses chunked processing for better cache locality and parallelizes across CPU cores
240/// when batch size is large enough (≥8 items).
241///
242/// Reference: BLVM Optimization Pass 5 - SIMD Vectorization
243#[cfg(feature = "production")]
244pub mod simd_vectorization {
245 use crate::crypto::OptimizedSha256;
246 use digest::Digest;
247 use ripemd::Ripemd160;
248
249 /// Minimum batch size for parallelization (overhead not worth it for smaller batches).
250 /// batch_sha256 uses OptimizedSha256 (SHA-NI when available) for consistency with batch_double_sha256_aligned.
251 const PARALLEL_THRESHOLD: usize = 8;
252
253 /// Chunk size for cache-friendly processing. Hardware-derived via ibd_tuning.
254 #[inline]
255 fn chunk_size() -> usize {
256 blvm_primitives::ibd_tuning::hash_batch_chunk_size()
257 }
258
259 /// Batch SHA256: Compute SHA256 for multiple independent inputs
260 ///
261 /// # Arguments
262 /// * `inputs` - Slice of byte slices to hash
263 ///
264 /// # Returns
265 /// Vector of 32-byte hashes, one per input (in same order)
266 ///
267 /// # Performance
268 /// - Small batches (< 4 items): Sequential (overhead not worth parallelization)
269 /// - Medium batches (4-7 items): Chunked sequential
270 /// - Large batches (≥8 items): Multi-core parallelization with Rayon
271 ///
272 /// # Optimizations
273 /// - Uses sha2 crate with "asm" feature for optimized assembly
274 /// - For large batches, leverages Rayon for multi-core parallelization
275 /// - AVX2 batch optimization available via `crypto::avx2_batch` module
276 pub fn batch_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
277 if inputs.is_empty() {
278 return Vec::new();
279 }
280
281 // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI when available).
282 if inputs.len() < 4 {
283 let hasher = OptimizedSha256::new();
284 return inputs.iter().map(|input| hasher.hash(input)).collect();
285 }
286
287 // Medium batches: chunked sequential processing
288 if inputs.len() < PARALLEL_THRESHOLD {
289 let hasher = OptimizedSha256::new();
290 let mut results = Vec::with_capacity(inputs.len());
291 for chunk in inputs.chunks(chunk_size()) {
292 for input in chunk {
293 results.push(hasher.hash(input));
294 }
295 }
296 return results;
297 }
298
299 // Large batches: Try AVX2 first, then fallback to multi-core parallelization
300 #[cfg(target_arch = "x86_64")]
301 {
302 use crate::crypto::sha256_avx2;
303 if sha256_avx2::is_avx2_available() {
304 // Use AVX2 batch processing for chunks of 8
305 use crate::crypto::avx2_batch;
306 return avx2_batch::batch_sha256_avx2(inputs);
307 }
308 }
309
310 // Fallback: serial chunked processing. The previous `par_chunks` rayon path was
311 // disastrous in IBD: N validation workers × per-block calls × shared rayon pool =
312 // catastrophic oversubscription. SHA-NI single-thread is fast enough; cross-block
313 // parallelism (worker pool) is the only level we want.
314 let hasher = OptimizedSha256::new();
315 let mut results = Vec::with_capacity(inputs.len());
316 for chunk in inputs.chunks(chunk_size()) {
317 for input in chunk {
318 results.push(hasher.hash(input));
319 }
320 }
321 results
322 }
323
324 /// Batch double SHA256: Compute SHA256(SHA256(x)) for multiple inputs
325 ///
326 /// This is Bitcoin's standard hash function used for transaction IDs, block hashes, etc.
327 ///
328 /// # Arguments
329 /// * `inputs` - Slice of byte slices to hash
330 ///
331 /// # Returns
332 /// Vector of 32-byte hashes, one per input (in same order)
333 pub fn batch_double_sha256(inputs: &[&[u8]]) -> Vec<[u8; 32]> {
334 // Use aligned version for better cache performance
335 batch_double_sha256_aligned(inputs)
336 .into_iter()
337 .map(|h| *h.as_bytes())
338 .collect()
339 }
340
341 /// Batch double SHA256 with cache-aligned output
342 ///
343 /// Returns cache-aligned hash structures for better memory performance.
344 /// Uses 32-byte alignment for optimal cache line utilization.
345 ///
346 /// # Arguments
347 /// * `inputs` - Slice of byte slices to hash
348 ///
349 /// # Returns
350 /// Vector of cache-aligned 32-byte hashes, one per input (in same order)
351 pub fn batch_double_sha256_aligned(inputs: &[&[u8]]) -> Vec<super::CacheAlignedHash> {
352 if inputs.is_empty() {
353 return Vec::new();
354 }
355
356 // Small batches: sequential processing (overhead not worth it)
357 // Use OptimizedSha256 (SHA-NI when available) instead of sha2
358 let hasher = OptimizedSha256::new();
359 if inputs.len() < 4 {
360 return inputs
361 .iter()
362 .map(|input| super::CacheAlignedHash::new(hasher.hash256(input)))
363 .collect();
364 }
365
366 // Medium batches: chunked sequential processing
367 if inputs.len() < PARALLEL_THRESHOLD {
368 let mut results = Vec::with_capacity(inputs.len());
369 for chunk in inputs.chunks(chunk_size()) {
370 for input in chunk {
371 results.push(super::CacheAlignedHash::new(hasher.hash256(input)));
372 }
373 }
374 return results;
375 }
376
377 // Serial chunked processing — see `batch_sha256` for rationale (rayon oversubscribes
378 // the pool when N IBD workers each push hashing batches; SHA-NI keeps the per-worker
379 // path fast on its own thread).
380 let hasher = OptimizedSha256::new();
381 let mut results = Vec::with_capacity(inputs.len());
382 for chunk in inputs.chunks(chunk_size()) {
383 for input in chunk {
384 results.push(super::CacheAlignedHash::new(hasher.hash256(input)));
385 }
386 }
387 results
388 }
389
390 /// Batch RIPEMD160: Compute RIPEMD160 for multiple inputs
391 ///
392 /// # Arguments
393 /// * `inputs` - Slice of byte slices to hash
394 ///
395 /// # Returns
396 /// Vector of 20-byte hashes, one per input (in same order)
397 pub fn batch_ripemd160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
398 if inputs.is_empty() {
399 return Vec::new();
400 }
401
402 // Small batches: sequential processing
403 if inputs.len() < 4 {
404 return inputs
405 .iter()
406 .map(|input| {
407 let hash = Ripemd160::digest(input);
408 let mut result = [0u8; 20];
409 result.copy_from_slice(&hash);
410 result
411 })
412 .collect();
413 }
414
415 // Medium batches: chunked sequential processing
416 if inputs.len() < PARALLEL_THRESHOLD {
417 let mut results = Vec::with_capacity(inputs.len());
418 for chunk in inputs.chunks(chunk_size()) {
419 for input in chunk {
420 let hash = Ripemd160::digest(input);
421 let mut result = [0u8; 20];
422 result.copy_from_slice(&hash);
423 results.push(result);
424 }
425 }
426 return results;
427 }
428
429 // Serial chunked processing — same rationale as `batch_sha256`: cross-block
430 // parallelism is provided by the IBD worker pool; rayon par_chunks here
431 // oversubscribes the global pool when N workers each call this per-block.
432 let mut results = Vec::with_capacity(inputs.len());
433 for chunk in inputs.chunks(chunk_size()) {
434 for input in chunk {
435 let hash = Ripemd160::digest(input);
436 let mut result = [0u8; 20];
437 result.copy_from_slice(&hash);
438 results.push(result);
439 }
440 }
441 results
442 }
443
444 /// Batch HASH160: Compute RIPEMD160(SHA256(x)) for multiple inputs
445 ///
446 /// This is Bitcoin's HASH160 operation (OP_HASH160 in script).
447 ///
448 /// # Arguments
449 /// * `inputs` - Slice of byte slices to hash
450 ///
451 /// # Returns
452 /// Vector of 20-byte hashes, one per input (in same order)
453 pub fn batch_hash160(inputs: &[&[u8]]) -> Vec<[u8; 20]> {
454 if inputs.is_empty() {
455 return Vec::new();
456 }
457
458 // Small batches: sequential processing. Use OptimizedSha256 (SHA-NI) for SHA256 part.
459 if inputs.len() < 4 {
460 let hasher = OptimizedSha256::new();
461 return inputs
462 .iter()
463 .map(|input| {
464 let sha256_hash: [u8; 32] = hasher.hash(input);
465 let ripemd160_hash = Ripemd160::digest(sha256_hash);
466 let mut result = [0u8; 20];
467 result.copy_from_slice(&ripemd160_hash);
468 result
469 })
470 .collect();
471 }
472
473 // Medium batches: chunked sequential processing
474 if inputs.len() < PARALLEL_THRESHOLD {
475 let hasher = OptimizedSha256::new();
476 let mut results = Vec::with_capacity(inputs.len());
477 for chunk in inputs.chunks(chunk_size()) {
478 for input in chunk {
479 let sha256_hash: [u8; 32] = hasher.hash(input);
480 let ripemd160_hash = Ripemd160::digest(sha256_hash);
481 let mut result = [0u8; 20];
482 result.copy_from_slice(&ripemd160_hash);
483 results.push(result);
484 }
485 }
486 return results;
487 }
488
489 // Serial chunked processing — see `batch_sha256` for rationale.
490 let hasher = OptimizedSha256::new();
491 let mut results = Vec::with_capacity(inputs.len());
492 for chunk in inputs.chunks(chunk_size()) {
493 for input in chunk {
494 let sha256_hash: [u8; 32] = hasher.hash(input);
495 let ripemd160_hash = Ripemd160::digest(sha256_hash);
496 let mut result = [0u8; 20];
497 result.copy_from_slice(&ripemd160_hash);
498 results.push(result);
499 }
500 }
501 results
502 }
503}
504
505#[cfg(feature = "production")]
506pub use constant_folding::*;
507#[cfg(feature = "production")]
508pub use precomputed_constants::*;
509
510/// Proven bounds for runtime optimization
511///
512/// These bounds are proven by formal verification and can be used
513/// for runtime optimizations without additional safety checks.
514///
515/// Proven runtime bounds for BLVM optimizations
516///
517/// These bounds have been formally proven and are used for runtime optimizations.
518/// Unlike proof-time limits (in `_helpers::proof_limits`), these represent actual
519/// Bitcoin limits that have been proven to hold in all cases.
520///
521/// Reference: BLVM Optimization Pass
522#[cfg(feature = "production")]
523pub mod proven_bounds {
524 use crate::constants::{MAX_INPUTS, MAX_OUTPUTS};
525
526 /// Maximum transaction size (proven by formal verification in transaction.rs)
527 pub const MAX_TX_SIZE_PROVEN: usize = 100000; // Bytes
528
529 /// Maximum block size (proven by formal verification in block.rs)
530 pub const MAX_BLOCK_SIZE_PROVEN: usize = 4000000; // Bytes (4MB)
531
532 /// Maximum inputs per transaction (proven by formal verification)
533 /// References actual Bitcoin limit from constants.rs
534 pub const MAX_INPUTS_PROVEN: usize = MAX_INPUTS;
535
536 /// Maximum outputs per transaction (proven by formal verification)
537 /// References actual Bitcoin limit from constants.rs
538 pub const MAX_OUTPUTS_PROVEN: usize = MAX_OUTPUTS;
539
540 /// Maximum transactions per block (proven by formal verification)
541 /// Note: Bitcoin limit is effectively unbounded by consensus rules, but practical limit
542 /// is around 10,000 transactions per block based on block size limits.
543 pub const MAX_TRANSACTIONS_PROVEN: usize = 10000;
544
545 /// Maximum previous headers for difficulty adjustment (proven by formal verification)
546 pub const MAX_PREV_HEADERS_PROVEN: usize = 5;
547}
548
549/// Optimized access using proven bounds
550///
551/// Uses bounds proven by formal verification to optimize runtime access.
552/// This is safe because formal proofs guarantee these bounds hold.
553///
554/// Reference: Formal proofs in transaction.rs, block.rs, mining.rs, pow.rs, etc.
555/// These proofs formally verify that certain bounds always hold, allowing us to
556/// use optimized access patterns without runtime bounds checks.
557#[cfg(feature = "production")]
558pub mod optimized_access {
559 use super::proven_bounds;
560
561 /// Get element with proven bounds check
562 ///
563 /// Uses proven maximum sizes to optimize bounds checking.
564 /// For transactions proven to have <= MAX_INPUTS_PROVEN inputs,
565 /// we can use optimized access patterns.
566 ///
567 /// # Safety
568 /// This function is safe because formal proofs guarantee bounds.
569 /// However, it still returns `Option` to handle cases where:
570 /// - Runtime bounds differ from proof bounds (should not happen in practice)
571 /// - Defensive programming (fail-safe)
572 ///
573 /// # Panics
574 /// Never panics - always returns `None` if out of bounds.
575 ///
576 /// # Examples
577 /// ```rust
578 /// use blvm_consensus::optimizations::optimized_access::get_proven;
579 /// use blvm_consensus::types::Transaction;
580 ///
581 /// # let tx = Transaction { version: 1, inputs: vec![].into(), outputs: vec![].into(), lock_time: 0 };
582 /// # let index = 0;
583 /// if let Some(input) = get_proven(&tx.inputs, index) {
584 /// // Safe to use
585 /// }
586 /// ```
587 #[inline(always)]
588 pub fn get_proven<T>(slice: &[T], index: usize) -> Option<&T> {
589 // Formal proofs have proven index < MAX_SIZE in various proofs
590 // We can use unsafe access for proven-safe indices
591 // This is safe because formal proofs guarantee bounds
592 if index < slice.len() {
593 unsafe { Some(slice.get_unchecked(index)) }
594 } else {
595 None
596 }
597 }
598
599 /// Pre-allocate buffer using proven maximum size
600 ///
601 /// Uses proven maximum sizes to avoid reallocation.
602 /// For example, transaction buffers can be pre-sized to MAX_TX_SIZE_PROVEN.
603 #[inline(always)]
604 pub fn prealloc_proven<T>(max_size: usize) -> Vec<T> {
605 // Pre-allocate to proven maximum to avoid reallocation
606 Vec::with_capacity(max_size)
607 }
608
609 /// Pre-allocate transaction buffer using proven maximum
610 #[inline(always)]
611 pub fn prealloc_tx_buffer() -> Vec<u8> {
612 prealloc_proven::<u8>(proven_bounds::MAX_TX_SIZE_PROVEN)
613 }
614
615 /// Pre-allocate block buffer using proven maximum
616 #[inline(always)]
617 pub fn prealloc_block_buffer() -> Vec<u8> {
618 prealloc_proven::<u8>(proven_bounds::MAX_BLOCK_SIZE_PROVEN)
619 }
620
621 /// Get element with proven bounds (alias for get_proven for compatibility)
622 #[inline(always)]
623 pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
624 get_proven(slice, index)
625 }
626}
627
628/// Alias module for _optimized_access (for backward compatibility)
629#[cfg(feature = "production")]
630pub mod _optimized_access {
631 use super::optimized_access;
632
633 /// Get element with proven bounds
634 #[inline(always)]
635 pub fn get_proven_by_<T>(slice: &[T], index: usize) -> Option<&T> {
636 optimized_access::get_proven(slice, index)
637 }
638}
639
640/// Re-export prealloc helpers for convenience
641#[cfg(feature = "production")]
642pub use optimized_access::{prealloc_block_buffer, prealloc_tx_buffer};
643
644/// Reference implementations for equivalence proofs
645///
646/// These are safe versions of optimized functions, used to prove
647/// that optimizations are correct via formal verification.
648#[cfg(feature = "production")]
649pub mod reference_implementations {
650 /// Reference (safe) implementation of get_proven
651 /// This is the version we prove equivalence against
652 #[inline(always)]
653 pub fn get_proven_reference<T>(slice: &[T], index: usize) -> Option<&T> {
654 slice.get(index) // Safe version
655 }
656}
657
658/// Runtime assertions for optimization correctness
659///
660/// These functions provide runtime checks in debug builds to verify
661/// that optimizations match their reference implementations.
662#[cfg(all(
663 feature = "production",
664 any(debug_assertions, feature = "runtime-invariants")
665))]
666pub mod runtime_assertions {
667 use super::optimized_access::get_proven;
668 use super::reference_implementations::get_proven_reference;
669
670 /// Checked version of get_proven with runtime assertions
671 ///
672 /// This function performs runtime checks in debug builds to ensure
673 /// the optimized implementation matches the reference implementation.
674 #[inline(always)]
675 pub fn get_proven_checked<T>(slice: &[T], index: usize) -> Option<&T> {
676 let result_optimized = get_proven(slice, index);
677 let result_reference = get_proven_reference(slice, index);
678
679 // Runtime check: both must agree
680 debug_assert_eq!(
681 result_optimized.is_some(),
682 result_reference.is_some(),
683 "Optimization correctness check failed: optimized and reference disagree on Some/None"
684 );
685
686 if let (Some(opt_val), Some(ref_val)) = (result_optimized, result_reference) {
687 debug_assert_eq!(
688 opt_val as *const T,
689 ref_val as *const T,
690 "Optimization correctness check failed: optimized and reference return different pointers"
691 );
692 }
693
694 result_optimized
695 }
696}