native_neural_network 0.1.6

#![no_std]

pub mod network;
pub mod tensor;
pub mod scratch;
pub mod rnn_format;
pub mod rnn_api;
pub mod crypto;
pub mod conv3d;
pub mod conv5d;
pub mod sphere5d;
pub mod activations;
pub mod layers;
pub mod engine;
pub mod model_format;
pub mod losses;
pub mod metrics;
pub mod initializers;
pub mod inference;
pub mod trainer;
pub mod optimizers;
pub mod schedulers;
pub mod normalization;
pub mod attention;
pub mod quantization;
pub mod model_config;
pub mod runtime;
pub mod sampling;
pub mod kv_cache;
pub mod rope;
pub mod embeddings;
pub mod lora;
pub mod moe;
pub mod beam_search;
pub mod gradients;
pub mod batching;
pub mod profiler;
pub mod visualization;

mod public_api;

pub use public_api::*;

pub mod math {
	use core::f32::consts::{PI, LN_2, LOG2_E};

	#[inline]
	pub fn sqrtf(x: f32) -> f32 {
		if x == 0.0 { return 0.0; }
		if x.partial_cmp(&0.0) != Some(core::cmp::Ordering::Greater) { return f32::NAN; }
		let xhalf = 0.5_f32 * x;
		let mut i = x.to_bits();
		i = 0x5f3759dfu32.wrapping_sub(i >> 1);
		let mut y = f32::from_bits(i);
		y = y * (1.5 - xhalf * y * y);
		y = y * (1.5 - xhalf * y * y);
		x * y
	}

	#[inline]
	fn floorf(x: f32) -> f32 {
		if x.is_nan() { return x; }
		let t = x as i32 as f32;
		if t > x { t - 1.0 } else { t }
	}

	#[inline]
	fn ldexpf(x: f32, exp: i32) -> f32 {
		if x == 0.0 { return 0.0; }
		let bits = x.to_bits();
		let sign = bits & 0x8000_0000;
		let mant = bits & 0x007f_ffff;
		let mut e = ((bits >> 23) & 0xff) as i32 - 127;
		e += exp;
		if e <= -127 { return 0.0; }
		if e >= 128 { return f32::INFINITY; }
		let new_bits = sign | (((e + 127) as u32) << 23) | mant;
		f32::from_bits(new_bits)
	}

	#[inline]
	pub fn expf(x: f32) -> f32 {
		if x.is_nan() { return x; }
		let x = x.clamp(-88.0, 88.0);
		let inv_ln2: f32 = LOG2_E;
		let n = floorf(x * inv_ln2) as i32;
		let r = x - (n as f32) * LN_2;
		let r2 = r * r;
		let r3 = r2 * r;
		let r4 = r3 * r;
		let r5 = r4 * r;
		let approx = 1.0 + r + 0.5 * r2 + (1.0/6.0) * r3 + (1.0/24.0) * r4 + (1.0/120.0) * r5;
		ldexpf(approx, n)
	}

	#[inline]
	pub fn lnf(x: f32) -> f32 {
		if x <= 0.0 { return f32::NAN; }
		let bits = x.to_bits();
		let e = ((bits >> 23) & 0xff) as i32 - 127;
		let mant_bits = (bits & 0x007f_ffff) | 0x3f80_0000;
		let m = f32::from_bits(mant_bits);
		let y = (m - 1.0) / (m + 1.0);
		let y2 = y * y;
		let y3 = y2 * y;
		let y5 = y3 * y2;
		let y7 = y5 * y2;
		let ln_m = 2.0 * (y + y3 / 3.0 + y5 / 5.0 + y7 / 7.0);
		ln_m + (e as f32) * LN_2
	}

	#[inline]
	pub fn powf(x: f32, y: f32) -> f32 {
		if x <= 0.0 { return f32::NAN; }
		expf(y * lnf(x))
	}

	#[inline]
	pub fn sinf(mut x: f32) -> f32 {
		let two_pi = 2.0 * PI;
		x = x - roundf(x / two_pi) * two_pi;
		let x2 = x * x;
		let x3 = x2 * x;
		let x5 = x3 * x2;
		let x7 = x5 * x2;
		x - x3 / 6.0 + x5 / 120.0 - x7 / 5040.0
	}

	#[inline]
	pub fn cosf(mut x: f32) -> f32 {
		let two_pi = 2.0 * PI;
		x = x - roundf(x / two_pi) * two_pi;
		let x2 = x * x;
		let x4 = x2 * x2;
		let x6 = x4 * x2;
		1.0 - x2 / 2.0 + x4 / 24.0 - x6 / 720.0
	}

	#[inline]
	pub fn tanhf(x: f32) -> f32 {
		let e2 = expf(2.0 * x);
		(e2 - 1.0) / (e2 + 1.0)
	}

	#[inline]
	pub fn roundf(x: f32) -> f32 {
		if x.is_nan() { return x; }
		if x >= 0.0 { (x + 0.5) as i32 as f32 } else { (x - 0.5) as i32 as f32 }
	}
}