use super::{moon, sun};
use std::sync::atomic::{AtomicUsize, Ordering};
pub struct M1MaxThreadPool {
worker_count: usize,
}
impl Default for M1MaxThreadPool {
fn default() -> Self {
Self::new()
}
}
impl M1MaxThreadPool {
pub fn new() -> Self {
let worker_count = 8;
Self { worker_count }
}
pub fn worker_count(&self) -> usize {
self.worker_count
}
}
#[repr(align(128))]
pub struct CacheAlignedBatch {
pub data: [f64; 4],
}
impl CacheAlignedBatch {
pub fn new(values: [f64; 4]) -> Self {
Self { data: values }
}
pub fn from_slice(slice: &[f64]) -> Vec<Self> {
slice
.chunks(4)
.map(|chunk| {
let mut data = [0.0; 4];
for (i, &v) in chunk.iter().enumerate() {
data[i] = v;
}
Self { data }
})
.collect()
}
}
#[inline]
pub unsafe fn prefetch_astronomical_data(_ptr: *const f64, count: usize) {
for _i in (0..count).step_by(8) {
#[cfg(target_arch = "aarch64")]
{
let _prefetch_ptr = _ptr.add(_i);
}
}
}
#[inline]
pub fn m1_batch_altitude(
latitude_rad: f64,
declination: &[f64; 4],
hour_angle: &[f64; 4],
) -> CacheAlignedBatch {
let sin_lat = latitude_rad.sin();
let cos_lat = latitude_rad.cos();
let mut result = [0.0; 4];
for i in 0..4 {
let sin_dec = declination[i].sin();
let cos_dec = declination[i].cos();
let cos_ha = hour_angle[i].cos();
result[i] = (sin_lat * sin_dec + cos_lat * cos_dec * cos_ha).asin();
}
CacheAlignedBatch { data: result }
}
#[allow(dead_code)]
pub fn parallel_event_collection_m1(
locations: &[crate::astro::Location],
times: &[chrono::DateTime<chrono_tz::Tz>],
) -> Vec<Vec<(chrono::DateTime<chrono_tz::Tz>, &'static str)>> {
locations
.iter()
.zip(times.iter())
.map(|(location, time)| {
let window = chrono::Duration::hours(12);
crate::events::collect_events_within_window(location, time, window)
})
.collect()
}
#[repr(C)]
pub struct M1L2OptimizedState {
pub solar_pos: crate::astro::sun::SolarPosition,
pub lunar_pos: crate::astro::moon::LunarPosition,
pub trig_cache: [f64; 16],
}
impl Default for M1L2OptimizedState {
fn default() -> Self {
Self::new()
}
}
impl M1L2OptimizedState {
pub fn new() -> Self {
Self {
solar_pos: sun::SolarPosition {
altitude: 0.0,
azimuth: 0.0,
},
lunar_pos: moon::LunarPosition {
altitude: 0.0,
azimuth: 0.0,
distance: 0.0,
illumination: 0.0,
phase_angle: 0.0,
angular_diameter: 0.0,
},
trig_cache: [0.0; 16],
}
}
pub fn size_bytes(&self) -> usize {
std::mem::size_of::<Self>()
}
pub fn fits_in_l2(&self) -> bool {
self.size_bytes() < (32 * 1024 * 1024)
}
}
pub struct AllocationTracker {
total_bytes: AtomicUsize,
peak_bytes: AtomicUsize,
}
impl Default for AllocationTracker {
fn default() -> Self {
Self::new()
}
}
impl AllocationTracker {
pub fn new() -> Self {
Self {
total_bytes: AtomicUsize::new(0),
peak_bytes: AtomicUsize::new(0),
}
}
pub fn record_allocation(&self, size: usize) {
let total = self.total_bytes.fetch_add(size, Ordering::Relaxed);
let peak = self.peak_bytes.load(Ordering::Relaxed);
if total > peak {
self.peak_bytes.store(total, Ordering::Relaxed);
}
}
pub fn total_allocated(&self) -> usize {
self.total_bytes.load(Ordering::Relaxed)
}
pub fn peak_allocated(&self) -> usize {
self.peak_bytes.load(Ordering::Relaxed)
}
}
pub mod constants {
pub const M1_MAX_PERFORMANCE_CORES: usize = 8;
pub const M1_MAX_EFFICIENCY_CORES: usize = 2;
pub const M1_MAX_TOTAL_CORES: usize = 10;
pub const L1_CACHE_SIZE: usize = 192 * 1024;
pub const L2_CACHE_SIZE: usize = 32 * 1024 * 1024;
pub const L3_CACHE_SIZE: usize = 8 * 1024 * 1024;
pub const CACHE_LINE_SIZE: usize = 128;
pub const SIMD_BATCH_SIZE: usize = 4;
pub const MEMORY_BANDWIDTH_GBPS: f64 = 100.0;
}
pub fn m1_max_config() -> M1MaxConfig {
M1MaxConfig {
parallelism: constants::M1_MAX_PERFORMANCE_CORES,
simd_width: constants::SIMD_BATCH_SIZE,
cache_line_size: constants::CACHE_LINE_SIZE,
l2_cache_size: constants::L2_CACHE_SIZE,
}
}
#[derive(Debug, Clone)]
pub struct M1MaxConfig {
pub parallelism: usize,
pub simd_width: usize,
pub cache_line_size: usize,
pub l2_cache_size: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cache_aligned_batch() {
let batch = CacheAlignedBatch::new([1.0, 2.0, 3.0, 4.0]);
let addr = &batch as *const _ as usize;
assert_eq!(addr % 128, 0);
}
#[test]
fn test_m1_max_config() {
let config = m1_max_config();
assert_eq!(config.parallelism, 8);
assert_eq!(config.simd_width, 4);
assert_eq!(config.cache_line_size, 128);
}
#[test]
fn test_l2_optimization_fits() {
let state = M1L2OptimizedState::new();
assert!(state.fits_in_l2());
}
}