#![allow(unsafe_code, dead_code)]
use core::arch::aarch64::*;
use libm::sqrtf;
use super::{LABS_A, LABS_B, LABS_C, LABS_L};
pub fn nearest_idx(query: [f32; 3]) -> usize {
unsafe { nearest_idx_neon(query) }
}
#[target_feature(enable = "neon")]
unsafe fn nearest_idx_neon(query: [f32; 3]) -> usize {
let l2 = vdupq_n_f32(query[0]);
let a2 = vdupq_n_f32(query[1]);
let b2 = vdupq_n_f32(query[2]);
let c2_sq_v = vaddq_f32(vmulq_f32(a2, a2), vmulq_f32(b2, b2));
let c2_v = vsqrtq_f32(c2_sq_v);
let n = LABS_L.len();
let chunks = n / 4;
let mut best_d2 = f32::INFINITY;
let mut best_idx: usize = 0;
let l_ptr = LABS_L.as_ptr();
let a_ptr = LABS_A.as_ptr();
let b_ptr = LABS_B.as_ptr();
let c_ptr = LABS_C.as_ptr();
for chunk in 0..chunks {
let i = chunk * 4;
let l1 = unsafe { vld1q_f32(l_ptr.add(i)) };
let a1 = unsafe { vld1q_f32(a_ptr.add(i)) };
let b1 = unsafe { vld1q_f32(b_ptr.add(i)) };
let c1 = unsafe { vld1q_f32(c_ptr.add(i)) };
let dl = vsubq_f32(l1, l2);
let da = vsubq_f32(a1, a2);
let db = vsubq_f32(b1, b2);
let dc = vsubq_f32(c1, c2_v);
let dab_sq = vaddq_f32(vmulq_f32(da, da), vmulq_f32(db, db));
let dc_sq = vmulq_f32(dc, dc);
let dh_sq_raw = vsubq_f32(dab_sq, dc_sq);
let dh_sq = vmaxq_f32(dh_sq_raw, vdupq_n_f32(0.0));
let one = vdupq_n_f32(1.0);
let sc = vaddq_f32(one, vmulq_f32(vdupq_n_f32(0.045), c1));
let sh = vaddq_f32(one, vmulq_f32(vdupq_n_f32(0.015), c1));
let dl_sq = vmulq_f32(dl, dl);
let dc_term = vdivq_f32(dc, sc);
let dc_term_sq = vmulq_f32(dc_term, dc_term);
let sh_sq = vmulq_f32(sh, sh);
let dh_term_sq = vdivq_f32(dh_sq, sh_sq);
let d2 = vaddq_f32(vaddq_f32(dl_sq, dc_term_sq), dh_term_sq);
let mut buf = [0f32; 4];
unsafe { vst1q_f32(buf.as_mut_ptr(), d2) };
for (lane, d) in buf.iter().enumerate() {
if *d < best_d2 {
best_d2 = *d;
best_idx = i + lane;
}
}
}
let c2_scalar = sqrtf(query[1] * query[1] + query[2] * query[2]);
for i in (chunks * 4)..n {
let l1 = LABS_L[i];
let a1 = LABS_A[i];
let b1 = LABS_B[i];
let c1 = LABS_C[i];
let dl = l1 - query[0];
let da = a1 - query[1];
let db = b1 - query[2];
let dc = c1 - c2_scalar;
let dh_sq = (da * da + db * db - dc * dc).max(0.0);
let sc = 1.0 + 0.045 * c1;
let sh = 1.0 + 0.015 * c1;
let d2 = dl * dl + (dc / sc) * (dc / sc) + dh_sq / (sh * sh);
if d2 < best_d2 {
best_d2 = d2;
best_idx = i;
}
}
best_idx
}