oxiblas_core/
lib.rs

1//! OxiBLAS Core - Foundational types and traits for OxiBLAS.
2//!
3//! This crate provides the core infrastructure for OxiBLAS:
4//!
5//! - **Scalar traits**: `Scalar`, `Real`, `ComplexScalar`, `Field` for numeric types
6//! - **SIMD abstraction**: Custom SIMD layer via `core::arch` intrinsics
7//! - **Memory management**: Aligned allocation, stack-based temporaries
8//! - **Parallelization**: Work partitioning and parallel execution
9//!
10//! # Supported Types
11//!
12//! - `f32`, `f64`: Real floating-point numbers
13//! - `Complex32`, `Complex64`: Complex numbers (via `num-complex`)
14//!
15//! # SIMD Support
16//!
17//! The SIMD abstraction automatically detects and uses the best available
18//! instruction set:
19//!
20//! - **x86_64**: AVX2 (256-bit), AVX512F (512-bit)
21//! - **AArch64**: NEON (128-bit), with 256-bit emulation
22//! - **Fallback**: Scalar operations for unsupported platforms
23//!
24//! # Example
25//!
26//! ```
27//! use oxiblas_core::scalar::{Scalar, Field};
28//! use oxiblas_core::simd::detect_simd_level;
29//!
30//! // Check SIMD capability
31//! let level = detect_simd_level();
32//! println!("SIMD level: {:?}", level);
33//!
34//! // Use scalar traits
35//! let x: f64 = 3.0;
36//! let y: f64 = 4.0;
37//! assert_eq!(x.abs_sq() + y.abs_sq(), 25.0);
38//! ```
39
40#![warn(missing_docs)]
41#![warn(clippy::all)]
42#![allow(clippy::module_name_repetitions)]
43#![allow(clippy::similar_names)]
44#![allow(clippy::too_many_lines)]
45#![allow(clippy::cast_possible_truncation)]
46#![allow(clippy::cast_sign_loss)]
47#![allow(clippy::cast_precision_loss)]
48
49pub mod blocking;
50pub mod memory;
51pub mod parallel;
52pub mod scalar;
53pub mod simd;
54pub mod tuning;
55
56// Re-exports for convenience
57pub use blocking::{
58    BASE_CASE_THRESHOLD, BlockRange, BlockVisitor, MAX_BLOCK_SIZE, MIN_BLOCK_SIZE, RecursiveTask,
59    cache_oblivious_traverse, factorization_panel_width, gemm_block_sizes, morton_decode,
60    morton_index, trsm_block_size,
61};
62pub use memory::{
63    AlignedPool, AlignedVec, Alloc, CACHE_LINE_SIZE, DEFAULT_ALIGN, Global, MemStack, MemoryPool,
64    NumaAllocHint, NumaInterleavingStrategy, NumaTopology, NumaWorkHint, PrefetchDistance,
65    PrefetchLocality, StackReq, get_huge_page_size, get_page_size, numa_alloc, numa_alloc_zeroed,
66    numa_distribute_work, prefetch_read, prefetch_read_range, prefetch_write, prefetch_write_range,
67};
68#[cfg(feature = "parallel")]
69pub use parallel::{CustomRayonPool, RayonGlobalPool};
70pub use parallel::{
71    Par, ParThreshold, PoolScope, SequentialPool, ThreadPool, WorkRange, default_pool,
72    for_each_indexed, for_each_range, map_reduce, partition_work, with_default_pool,
73};
74pub use scalar::{
75    C32, C64, ComplexExt, ComplexScalar, ExtendedPrecision, Field, HasFastFma, I32, I64, KBKSum,
76    KahanSum, Real, Scalar, ScalarBatch, ScalarClass, ScalarClassify, SimdCompatible, ToComplex,
77    UnrollHints, c32, c64, from_polar, from_polar32, imag, imag_unit, imag_unit32, imag32,
78    pairwise_sum, real, real32,
79};
80pub use simd::{
81    SimdChunks, SimdLevel, SimdRegister, SimdScalar, detect_simd_level, detect_simd_level_raw,
82};
83pub use tuning::{AutoTuner, TuningCache, TuningConfig};
84
85/// Prelude module for convenient imports.
86pub mod prelude {
87    pub use crate::blocking::{
88        BlockRange, BlockVisitor, RecursiveTask, cache_oblivious_traverse, gemm_block_sizes,
89    };
90    pub use crate::memory::{
91        AlignedPool, AlignedVec, MemStack, MemoryPool, NumaAllocHint, NumaTopology,
92        PrefetchLocality, StackReq, numa_distribute_work, prefetch_read, prefetch_write,
93    };
94    pub use crate::parallel::{Par, ParThreshold};
95    pub use crate::scalar::{
96        C32, C64, ComplexExt, ComplexScalar, ExtendedPrecision, Field, HasFastFma, I32, I64,
97        KBKSum, KahanSum, Real, Scalar, ScalarBatch, ScalarClass, ScalarClassify, SimdCompatible,
98        ToComplex, UnrollHints, c32, c64, imag, pairwise_sum, real,
99    };
100    pub use crate::simd::{SimdChunks, SimdLevel, SimdRegister, SimdScalar, detect_simd_level};
101    pub use crate::tuning::{AutoTuner, TuningCache, TuningConfig};
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn test_simd_detection() {
110        let level = detect_simd_level();
111        println!("Detected SIMD level: {:?}", level);
112
113        // When force-scalar is enabled, should be Scalar
114        #[cfg(feature = "force-scalar")]
115        assert_eq!(level, SimdLevel::Scalar);
116
117        // Without force-scalar, should detect hardware SIMD on common platforms
118        #[cfg(not(feature = "force-scalar"))]
119        {
120            #[cfg(target_arch = "x86_64")]
121            assert!(level >= SimdLevel::Simd128);
122
123            #[cfg(target_arch = "aarch64")]
124            assert!(level >= SimdLevel::Simd128);
125        }
126    }
127
128    #[test]
129    fn test_scalar_traits() {
130        use num_complex::Complex64;
131
132        let x: f64 = -3.0;
133        assert_eq!(x.abs(), 3.0);
134        assert_eq!(x.conj(), -3.0);
135        assert!(f64::is_real());
136
137        let z = Complex64::new(3.0, 4.0);
138        assert!((z.abs() - 5.0).abs() < 1e-10);
139        assert!(!Complex64::is_real());
140    }
141
142    #[test]
143    fn test_aligned_alloc() {
144        let vec: AlignedVec<f64> = AlignedVec::zeros(100);
145        assert_eq!(vec.len(), 100);
146
147        // Check alignment
148        let ptr = vec.as_ptr();
149        assert_eq!(ptr as usize % DEFAULT_ALIGN, 0);
150    }
151}