use crate::tensor::{tensor_ref_from_ptr, CpuTensor};
use crate::DType;
use std::cell::UnsafeCell;
use std::sync::Arc;
pub type OpaqueTensor = CpuTensor<f32>;
pub extern "C" fn tl_cpu_tensor_new(
data: *const f32,
rank: usize,
shape: *const usize,
) -> *mut OpaqueTensor {
if data.is_null() || shape.is_null() {
return std::ptr::null_mut();
}
let shape_slice = unsafe { std::slice::from_raw_parts(shape, rank) };
let numel: usize = shape_slice.iter().product();
let data_slice = unsafe { std::slice::from_raw_parts(data, numel) };
make_tensor(CpuTensor::from_slice(data_slice, shape_slice, DType::F32))
}
pub extern "C" fn tl_cpu_tensor_new_i64(
data: *const i64,
rank: usize,
shape: *const usize,
) -> *mut OpaqueTensor {
if data.is_null() || shape.is_null() {
return std::ptr::null_mut();
}
let shape_slice = unsafe { std::slice::from_raw_parts(shape, rank) };
let numel: usize = shape_slice.iter().product();
let data_slice = unsafe { std::slice::from_raw_parts(data, numel) };
let f32_data: Vec<f32> = data_slice.iter().map(|&x| x as f32).collect();
make_tensor(CpuTensor::from_slice(&f32_data, shape_slice, DType::F32))
}
pub extern "C" fn tl_cpu_tensor_zeros(
rank: usize,
shape: *const usize,
req_grad: bool,
) -> *mut OpaqueTensor {
if shape.is_null() {
return std::ptr::null_mut();
}
let shape_slice = unsafe { std::slice::from_raw_parts(shape, rank) };
let t = CpuTensor::zeros(shape_slice, DType::F32);
let ptr = make_tensor(t);
if req_grad {
unsafe {
(&mut *ptr).enable_grad();
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_ones(
rank: usize,
shape: *const usize,
req_grad: bool,
) -> *mut OpaqueTensor {
if shape.is_null() {
return std::ptr::null_mut();
}
let shape_slice = unsafe { std::slice::from_raw_parts(shape, rank) };
let mut t = CpuTensor::ones(shape_slice, DType::F32);
if req_grad {
t.enable_grad();
}
make_tensor(t)
}
pub extern "C" fn tl_cpu_tensor_randn(
rank: usize,
shape: *const usize,
req_grad: bool,
) -> *mut OpaqueTensor {
if shape.is_null() {
return std::ptr::null_mut();
}
let shape_slice = unsafe { std::slice::from_raw_parts(shape, rank) };
let t = CpuTensor::randn(shape_slice, DType::F32);
let ptr = make_tensor(t);
if req_grad {
unsafe {
(&mut *ptr).enable_grad();
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_randn_debug(
rank: usize,
shape: *const usize,
_seed: u64,
req_grad: bool,
) -> *mut OpaqueTensor {
tl_cpu_tensor_randn(rank, shape, req_grad)
}
pub extern "C" fn tl_cpu_tensor_from_i64(data: *const i64, len: i64) -> *mut OpaqueTensor {
if data.is_null() {
return std::ptr::null_mut();
}
let len = len as usize;
let data_slice = unsafe { std::slice::from_raw_parts(data, len) };
let f32_data: Vec<f32> = data_slice.iter().map(|&x| x as f32).collect();
make_tensor(CpuTensor::from_slice(&f32_data, &[len], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_from_vec_u8(
data: *mut std::ffi::c_void,
offset: i64,
shape_ptr: *const i64,
rank: i64,
) -> *mut OpaqueTensor {
if data.is_null() || shape_ptr.is_null() || rank <= 0 {
return std::ptr::null_mut();
}
let rank = rank as usize;
assert!(rank < 10, "RANK IS HUGE: {}", rank);
let _ = std::fs::write("DEBUG_RANK.txt", format!("rank={}\n", rank));
let shape_slice = unsafe { std::slice::from_raw_parts(shape_ptr, rank) };
let shape: Vec<usize> = shape_slice.iter().map(|&x| x as usize).collect();
let numel: usize = shape.iter().product();
let _ = std::fs::write("DEBUG.txt", format!("from_vec_u8: rank={}, shape={:?}, numel={}, offset={}\n", rank, shape, numel, offset));
eprintln!("[CPU_FFI] from_vec_u8: rank={}, shape={:?}, numel={}, offset={}", rank, shape, numel, offset);
let offset = offset as usize;
let data_ptr = data as *const u8;
let data_slice = unsafe { std::slice::from_raw_parts(data_ptr.add(offset), numel) };
let f32_data: Vec<f32> = data_slice.iter().map(|&x| x as f32).collect();
make_tensor(CpuTensor::from_slice(&f32_data, &shape, DType::F32))
}
pub extern "C" fn tl_cpu_tensor_from_vec_u8_f32_shape(
data: *mut std::ffi::c_void,
offset: i64,
shape_ptr: *const f32,
rank: i64,
) -> *mut OpaqueTensor {
if data.is_null() || shape_ptr.is_null() || rank <= 0 {
return std::ptr::null_mut();
}
let rank = rank as usize;
let shape_slice = unsafe { std::slice::from_raw_parts(shape_ptr, rank) };
let shape: Vec<usize> = shape_slice.iter().map(|&x| x as usize).collect();
let numel: usize = shape.iter().product();
let offset = offset as usize;
let data_ptr = data as *const u8;
let data_slice = unsafe { std::slice::from_raw_parts(data_ptr.add(offset), numel) };
let f32_data: Vec<f32> = data_slice.iter().map(|&x| x as f32).collect();
make_tensor(CpuTensor::from_slice(&f32_data, &shape, DType::F32))
}
pub extern "C" fn tl_cpu_tensor_from_u8_labels(data: *const u8, len: i64) -> *mut OpaqueTensor {
if data.is_null() {
return std::ptr::null_mut();
}
let len = len as usize;
let data_slice = unsafe { std::slice::from_raw_parts(data, len) };
let f32_data: Vec<f32> = data_slice.iter().map(|&x| x as f32).collect(); make_tensor(CpuTensor::from_slice(&f32_data, &[len], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_full(
rank: usize,
shape: *const usize,
value: f32,
req_grad: bool,
) -> *mut OpaqueTensor {
if shape.is_null() {
return std::ptr::null_mut();
}
let shape_slice = unsafe { std::slice::from_raw_parts(shape, rank) };
let numel: usize = shape_slice.iter().product();
let data = vec![value; numel];
let mut t = CpuTensor::from_slice(&data, shape_slice, DType::F32);
if req_grad {
t.enable_grad();
}
make_tensor(t)
}
pub extern "C" fn tl_cpu_tensor_eye(n: usize, req_grad: bool) -> *mut OpaqueTensor {
let numel = n * n;
let mut data = vec![0.0f32; numel];
for i in 0..n {
data[i * n + i] = 1.0;
}
let mut t = CpuTensor::from_slice(&data, &[n, n], DType::F32);
if req_grad {
t.enable_grad();
}
make_tensor(t)
}
pub extern "C" fn tl_cpu_tensor_arange(start: f64, end: f64, step: f64) -> *mut OpaqueTensor {
if step == 0.0 || (step > 0.0 && start >= end) || (step < 0.0 && start <= end) {
return make_tensor(CpuTensor::from_slice(&[], &[0], DType::F32));
}
let len = ((end - start) / step).ceil() as usize;
let data: Vec<f32> = (0..len).map(|i| (start + step * i as f64) as f32).collect();
make_tensor(CpuTensor::from_slice(&data, &[len], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_linspace(start: f64, end: f64, steps: usize) -> *mut OpaqueTensor {
if steps == 0 {
return make_tensor(CpuTensor::from_slice(&[] as &[f32], &[0], DType::F32));
}
if steps == 1 {
return make_tensor(CpuTensor::from_slice(&[start as f32], &[1], DType::F32));
}
let step = (end - start) / (steps - 1) as f64;
let data: Vec<f32> = (0..steps)
.map(|i| (start + step * i as f64) as f32)
.collect();
make_tensor(CpuTensor::from_slice(&data, &[steps], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_rand(
rank: i64,
shape_ptr: *const usize,
req_grad: bool,
) -> *mut OpaqueTensor {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let rank = rank as usize;
let shape = unsafe { std::slice::from_raw_parts(shape_ptr, rank) };
let numel: usize = shape.iter().product();
let mut hasher = DefaultHasher::new();
std::time::SystemTime::now().hash(&mut hasher);
let mut seed = hasher.finish();
let data: Vec<f32> = (0..numel)
.map(|_| {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
((seed >> 33) as f32) / (u32::MAX as f32)
})
.collect();
let mut t = CpuTensor::from_slice(&data, shape, DType::F32);
if req_grad {
t.enable_grad();
}
make_tensor(t)
}
pub extern "C" fn tl_cpu_tensor_free(t: *mut OpaqueTensor) {
if t.is_null() {
return;
}
crate::memory::promote_tensor(t);
crate::memory::release_tensor(t);
}
use std::sync::atomic::{AtomicUsize, Ordering};
static FFI_ACQUIRE_COUNT: AtomicUsize = AtomicUsize::new(0);
static FFI_RELEASE_COUNT: AtomicUsize = AtomicUsize::new(0);
pub fn dump_ffi_acquire_release() {
let acq = FFI_ACQUIRE_COUNT.load(Ordering::Relaxed);
let rel = FFI_RELEASE_COUNT.load(Ordering::Relaxed);
eprintln!("[FFI_ACQ_REL] acquire={} release={} delta={}", acq, rel, acq as i64 - rel as i64);
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_acquire(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return t;
}
if crate::memory::is_mem_log_enabled() {
eprintln!("[ACQUIRE] Ptr: {:p} (Arc RC+1)", t);
}
FFI_ACQUIRE_COUNT.fetch_add(1, Ordering::Relaxed);
unsafe {
let arc = Arc::from_raw(t as *const UnsafeCell<CpuTensor<f32>>);
let _clone = arc.clone(); let _ = Arc::into_raw(arc); Arc::into_raw(_clone) as *mut OpaqueTensor
}
}
pub extern "C" fn tl_cpu_tensor_release(t: *mut OpaqueTensor) {
if t.is_null() {
return;
}
FFI_RELEASE_COUNT.fetch_add(1, Ordering::Relaxed);
crate::memory::promote_tensor(t);
crate::memory::release_tensor(t);
}
pub extern "C" fn tl_cpu_tensor_clear_data(t: *mut OpaqueTensor) {
if t.is_null() {
return;
}
unsafe {
let tensor = &mut *t;
tensor.data = Vec::new();
tensor.data_i64 = None;
tensor.shape = Vec::new();
tensor.autograd = None;
}
}
pub extern "C" fn tl_cpu_tensor_dim(t: *mut OpaqueTensor, dim: usize) -> usize {
if t.is_null() {
return 0;
}
unsafe { (&*t).shape().get(dim).cloned().unwrap_or(0) }
}
pub extern "C" fn tl_cpu_tensor_len(t: *mut OpaqueTensor) -> usize {
if t.is_null() {
return 0;
}
let tensor = unsafe { &*t };
if tensor.shape().is_empty() {
return 0;
} tensor.shape().iter().product()
}
pub extern "C" fn tl_cpu_tensor_shape(t: *mut OpaqueTensor, out: *mut usize) -> usize {
if t.is_null() || out.is_null() {
return 0;
}
let tensor = unsafe { &*t };
for (i, &dim) in tensor.shape().iter().enumerate() {
unsafe {
*out.add(i) = dim;
}
}
tensor.shape().len()
}
pub extern "C" fn tl_cpu_tensor_get_shape(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let shape: Vec<f32> = tensor.shape().iter().map(|&d| d as f32).collect();
make_tensor(CpuTensor::from_slice(&shape, &[shape.len()], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_get_f32(
t: *mut OpaqueTensor,
indices: *const usize,
_rank: usize,
) -> f32 {
if t.is_null() || indices.is_null() {
return 0.0;
}
let tensor = unsafe { &*t };
let shape = tensor.shape();
if shape.is_empty() {
return 0.0;
} let idx_slice = unsafe { std::slice::from_raw_parts(indices, shape.len()) };
let mut flat_idx = 0;
let mut stride = 1;
for i in (0..shape.len()).rev() {
flat_idx += idx_slice[i] * stride;
stride *= shape[i];
}
tensor.data_f32().get(flat_idx).cloned().unwrap_or(0.0)
}
pub extern "C" fn tl_cpu_tensor_get_i64(
t: *mut OpaqueTensor,
indices: *const usize,
rank: usize,
) -> i64 {
if t.is_null() {
return 0;
}
let tensor = unsafe { &*t };
if tensor.shape().is_empty() {
return 0;
} tl_cpu_tensor_get_f32(t, indices, rank) as i64
}
pub extern "C" fn tl_cpu_tensor_get_f32_md(
t: *mut OpaqueTensor,
indices: *const i64,
rank: i64,
) -> f32 {
if t.is_null() || indices.is_null() {
return 0.0;
}
let tensor = unsafe { &*(t as *mut CpuTensor<f32>) };
let shape = tensor.shape();
if shape.is_empty() {
return 0.0;
}
let data = tensor.data_f32();
let rank = rank as usize;
let mut flat_idx = 0usize;
for d in 0..rank.min(shape.len()) {
let idx = unsafe { *indices.add(d) } as usize;
let stride: usize = shape[d + 1..].iter().product();
flat_idx += idx * stride.max(1);
}
data.get(flat_idx).cloned().unwrap_or(0.0)
}
pub extern "C" fn tl_cpu_tensor_get_i64_md(
t: *mut OpaqueTensor,
indices: *const i64,
rank: i64,
) -> i64 {
tl_cpu_tensor_get_f32_md(t, indices, rank) as i64
}
pub extern "C" fn tl_cpu_tensor_set_f32(
t: *mut OpaqueTensor,
indices: *const usize,
_rank: usize,
value: f32,
) {
if t.is_null() || indices.is_null() {
return;
}
let tensor = unsafe { &mut *t };
let shape = tensor.shape().to_vec();
if shape.is_empty() {
return;
} let idx_slice = unsafe { std::slice::from_raw_parts(indices, shape.len()) };
let mut flat_idx = 0;
let mut stride = 1;
for i in (0..shape.len()).rev() {
flat_idx += idx_slice[i] * stride;
stride *= shape[i];
}
let data = tensor.data_f32_mut();
if flat_idx < data.len() {
data[flat_idx] = value;
}
}
pub extern "C" fn tl_cpu_tensor_set_f32_md(
t: *mut OpaqueTensor,
indices: *const i64,
rank: usize,
value: f32,
) -> *mut OpaqueTensor {
if t.is_null() || indices.is_null() {
return t;
}
let tensor = unsafe { &mut *t };
let shape = tensor.shape().to_vec();
if shape.is_empty() {
return t;
} let idx_slice = unsafe { std::slice::from_raw_parts(indices, rank) };
let mut flat_idx = 0usize;
let mut stride = 1usize;
for d in (0..rank).rev() {
let i = idx_slice[d] as usize;
if d < shape.len() && i < shape[d] {
flat_idx += i * stride;
stride *= shape[d];
}
}
let data = tensor.data_f32_mut();
if flat_idx < data.len() {
data[flat_idx] = value;
}
t
}
pub extern "C" fn tl_cpu_tensor_get(t: *mut OpaqueTensor, idx: i64) -> f32 {
if t.is_null() {
return 0.0;
}
let tensor = unsafe { &*t };
tensor.data_f32().get(idx as usize).copied().unwrap_or(0.0)
}
pub extern "C" fn tl_cpu_tensor_item(t: *mut OpaqueTensor) -> f32 {
if t.is_null() {
return 0.0;
}
let tensor = unsafe { &*t };
tensor.data_f32().first().copied().unwrap_or(0.0)
}
pub extern "C" fn tl_cpu_tensor_item_i64(t: *mut OpaqueTensor) -> i64 {
if t.is_null() {
return 0;
}
let tensor = unsafe { &*t };
tensor.data_f32().first().map(|&f| f as i64).unwrap_or(0)
}
pub extern "C" fn tl_cpu_tensor_add(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match ta.add_impl(tb) {
Ok(result) => {
let ptr = make_tensor(result);
if ta.requires_grad() || tb.requires_grad() {
use crate::autograd::ops::AddBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(AddBackward {
a: tensor_ref_from_ptr(a),
b: tensor_ref_from_ptr(b),
a_shape: ta.shape().to_vec(),
b_shape: tb.shape().to_vec(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in add: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sub(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match ta.sub_impl(tb) {
Ok(result) => {
let ptr = make_tensor(result);
if ta.requires_grad() || tb.requires_grad() {
use crate::autograd::ops::SubBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SubBackward {
a: tensor_ref_from_ptr(a),
b: tensor_ref_from_ptr(b),
a_shape: ta.shape().to_vec(),
b_shape: tb.shape().to_vec(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in sub: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_mul(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match ta.mul_impl(tb) {
Ok(result) => {
let ptr = make_tensor(result);
if ta.requires_grad() || tb.requires_grad() {
use crate::autograd::ops::MulBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(MulBackward {
a: tensor_ref_from_ptr(a),
b: tensor_ref_from_ptr(b),
a_data: ta.shallow_clone(),
b_data: tb.shallow_clone(),
a_shape: ta.shape().to_vec(),
b_shape: tb.shape().to_vec(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in mul: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_div(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match ta.div_impl(tb) {
Ok(result) => {
let ptr = make_tensor(result);
if ta.requires_grad() || tb.requires_grad() {
use crate::autograd::ops::DivBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(DivBackward {
a: tensor_ref_from_ptr(a),
b: tensor_ref_from_ptr(b),
a_data: ta.shallow_clone(),
b_data: tb.shallow_clone(),
a_shape: ta.shape().to_vec(),
b_shape: tb.shape().to_vec(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in div: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_rem(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match ta.rem_impl(tb) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in rem: {}", e);
std::ptr::null_mut()
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_shallow_clone(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
tl_cpu_tensor_clone(t)
}
pub extern "C" fn tl_cpu_tensor_neg(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.neg_impl() {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::NegBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(NegBackward {
a: tensor_ref_from_ptr(t),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in neg: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_abs(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*t).abs_impl() } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in abs: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_add_scalar(t: *mut OpaqueTensor, s: f64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.add_scalar_impl(s as f32) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::AddScalarBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(AddScalarBackward {
a: tensor_ref_from_ptr(t),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in add_scalar: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_mul_scalar(t: *mut OpaqueTensor, s: f64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.mul_scalar_impl(s as f32) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::MulScalarBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(MulScalarBackward {
a: tensor_ref_from_ptr(t),
s: s as f32,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in mul_scalar: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_div_scalar(t: *mut OpaqueTensor, s: f64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.div_scalar_impl(s as f32) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::DivScalarBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(DivScalarBackward {
a: tensor_ref_from_ptr(t),
s: s as f32,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in div_scalar: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sub_scalar(t: *mut OpaqueTensor, s: f64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.sub_scalar_impl(s as f32) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::SubScalarBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SubScalarBackward {
a: tensor_ref_from_ptr(t),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in sub_scalar: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_pow_scalar(t: *mut OpaqueTensor, exp: f32) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let exp_tensor = CpuTensor::from_slice(&[exp], &[1], DType::F32);
match tensor.pow_impl(&exp_tensor) {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::PowBackward;
let result_clone = unsafe { (&*ptr).shallow_clone() };
unsafe {
(&mut *ptr).set_grad_fn(Box::new(PowBackward {
a: tensor_ref_from_ptr(t),
a_data: tensor.shallow_clone(),
b_data: exp_tensor,
output: result_clone,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in pow_scalar: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_pow(
t: *mut OpaqueTensor,
exp: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if t.is_null() || exp.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let exp_tensor = unsafe { &*exp };
match tensor.pow_impl(exp_tensor) {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::PowBackward;
let result_clone = unsafe { (&*ptr).shallow_clone() };
unsafe {
(&mut *ptr).set_grad_fn(Box::new(PowBackward {
a: tensor_ref_from_ptr(t),
a_data: tensor.shallow_clone(),
b_data: exp_tensor.shallow_clone(),
output: result_clone,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in pow: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_add_assign(a: *mut OpaqueTensor, b: *mut OpaqueTensor) {
if a.is_null() || b.is_null() {
return;
}
unsafe {
match (&*a).add_impl(&*b) {
Ok(res) => *a = res,
Err(e) => eprintln!("Runtime Error in add_assign: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_sub_assign(a: *mut OpaqueTensor, b: *mut OpaqueTensor) {
if a.is_null() || b.is_null() {
return;
}
unsafe {
match (&*a).sub_impl(&*b) {
Ok(res) => *a = res,
Err(e) => eprintln!("Runtime Error in sub_assign: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_mul_assign(a: *mut OpaqueTensor, b: *mut OpaqueTensor) {
if a.is_null() || b.is_null() {
return;
}
unsafe {
match (&*a).mul_impl(&*b) {
Ok(res) => *a = res,
Err(e) => eprintln!("Runtime Error in mul_assign: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_div_assign(a: *mut OpaqueTensor, b: *mut OpaqueTensor) {
if a.is_null() || b.is_null() {
return;
}
unsafe {
match (&*a).div_impl(&*b) {
Ok(res) => *a = res,
Err(e) => eprintln!("Runtime Error in div_assign: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_mod_assign(a: *mut OpaqueTensor, b: *mut OpaqueTensor) {
if a.is_null() || b.is_null() {
return;
}
unsafe {
match (&*a).rem_impl(&*b) {
Ok(res) => *a = res,
Err(e) => eprintln!("Runtime Error in mod_assign: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_add_assign_scalar_f32(t: *mut OpaqueTensor, s: f32) {
if t.is_null() {
return;
}
unsafe {
match (&*t).add_scalar_impl(s) {
Ok(res) => *t = res,
Err(e) => eprintln!("Runtime Error in add_assign_scalar: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_sub_assign_scalar_f32(t: *mut OpaqueTensor, s: f32) {
if t.is_null() {
return;
}
unsafe {
match (&*t).add_scalar_impl(-s) {
Ok(res) => *t = res,
Err(e) => eprintln!("Runtime Error in sub_assign_scalar: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_mul_assign_scalar_f32(t: *mut OpaqueTensor, s: f32) {
if t.is_null() {
return;
}
unsafe {
match (&*t).mul_scalar_impl(s) {
Ok(res) => *t = res,
Err(e) => eprintln!("Runtime Error in mul_assign_scalar: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_div_assign_scalar_f32(t: *mut OpaqueTensor, s: f32) {
if t.is_null() {
return;
}
unsafe {
match (&*t).div_scalar_impl(s) {
Ok(res) => *t = res,
Err(e) => eprintln!("Runtime Error in div_assign_scalar: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_mod_assign_scalar_f32(t: *mut OpaqueTensor, s: f32) {
if t.is_null() {
return;
}
unsafe {
let a = &*t;
let b = CpuTensor::from_slice(&[s], &[1], DType::F32);
match a.rem_impl(&b) {
Ok(res) => *t = res,
Err(e) => eprintln!("Runtime Error in mod_assign_scalar: {}", e),
}
}
}
pub extern "C" fn tl_cpu_tensor_eq(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*a).eq_impl(&*b) } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in eq: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_neq(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*a).neq_impl(&*b) } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in neq: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_lt(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*a).lt_impl(&*b) } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in lt: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_le(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*a).le_impl(&*b) } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in le: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_gt(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*a).gt_impl(&*b) } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in gt: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_ge(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*a).ge_impl(&*b) } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in ge: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_exp(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.exp_impl() {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::ExpBackward;
let output = unsafe { (&*ptr).shallow_clone() };
unsafe {
(&mut *ptr).set_grad_fn(Box::new(ExpBackward {
a: tensor_ref_from_ptr(t),
output,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in exp: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_log(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.log_impl() {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::LogBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(LogBackward {
a: tensor_ref_from_ptr(t),
a_data: tensor.shallow_clone(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in log: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sin(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*t).sin_impl() } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in sin: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_cos(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*t).cos_impl() } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in cos: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_tanh(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.tanh_impl() {
Ok(res) => {
let res_clone = res.shallow_clone();
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::TanhBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(TanhBackward {
input: tensor_ref_from_ptr(t),
output_data: res_clone,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in tanh: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_relu(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.relu_impl() {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::ReluBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(ReluBackward {
a: tensor_ref_from_ptr(t),
a_data: tensor.shallow_clone(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in relu: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_softmax(t: *mut OpaqueTensor, dim: i64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.softmax_impl(dim as i32) {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::SoftmaxBackward;
let output = unsafe { (&*ptr).shallow_clone() };
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SoftmaxBackward {
a: tensor_ref_from_ptr(t),
output,
axis: dim as i32,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in softmax: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sum(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let sum_val = tensor.sumall_impl(); let result = CpuTensor::from_slice(&[sum_val], &[1], DType::F32);
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::SumallBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SumallBackward {
a: tensor_ref_from_ptr(t),
shape: tensor.shape().to_vec(),
}));
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_mean(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let input_shape = tensor.shape.clone();
let mean_val = tensor.mean_all_impl();
let ptr = make_tensor(CpuTensor::from_slice(&[mean_val], &[1], DType::F32));
if tensor.requires_grad() {
use crate::autograd::ops::MeanBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(MeanBackward {
input: tensor_ref_from_ptr(t),
input_shape,
}));
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_max(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let idx = tensor.argmax_all_impl(); let max_val = tensor.data_f32()[idx];
make_tensor(CpuTensor::from_slice(&[max_val], &[1], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_min(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let data = tensor.data_f32();
let min_val = data.iter().cloned().fold(f32::INFINITY, f32::min);
make_tensor(CpuTensor::from_slice(&[min_val], &[1], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_argmax(
t: *mut OpaqueTensor,
dim: i64,
_keep_dim: bool,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let _ = dim; let max_idx = tensor.argmax_all_impl();
make_tensor(CpuTensor::from_slice(&[max_idx as f32], &[1], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_argmin(
t: *mut OpaqueTensor,
dim: i64,
_keep_dim: bool,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let _ = dim;
let data = tensor.data_f32();
let min_idx = data
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map(|(i, _)| i)
.unwrap_or(0);
make_tensor(CpuTensor::from_slice(&[min_idx as f32], &[1], DType::F32))
}
pub extern "C" fn tl_cpu_tensor_reshape(
t: *mut OpaqueTensor,
rank: usize,
shape: *const usize,
) -> *mut OpaqueTensor {
if t.is_null() || shape.is_null() {
return std::ptr::null_mut();
}
if rank == 0 || rank > 8 {
eprintln!("Warning: tl_cpu_tensor_reshape: invalid rank={}", rank);
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let new_shape = unsafe { std::slice::from_raw_parts(shape, rank) };
let input_shape = tensor.shape().to_vec();
match tensor.reshape_impl(new_shape) {
Ok(result) => {
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::ReshapeBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(ReshapeBackward {
input: tensor_ref_from_ptr(t),
input_shape,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in reshape: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_reshape_new(
t: *mut OpaqueTensor,
new_shape_tensor: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if t.is_null() || new_shape_tensor.is_null() {
return std::ptr::null_mut();
}
let shape_tensor = unsafe { &*new_shape_tensor };
let new_shape: Vec<usize> = shape_tensor
.data_f32()
.iter()
.map(|&x| x as usize)
.collect();
tl_cpu_tensor_reshape(t, new_shape.len(), new_shape.as_ptr())
}
pub extern "C" fn tl_cpu_tensor_reshape_dims(
t: *mut OpaqueTensor,
dims_ptr: *const i64,
rank: i64,
) -> *mut OpaqueTensor {
if t.is_null() || dims_ptr.is_null() {
return std::ptr::null_mut();
}
let rank = rank as usize;
if rank == 0 || rank > 8 {
eprintln!("Warning: tl_cpu_tensor_reshape_dims: invalid rank={}", rank);
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let dims_slice = unsafe { std::slice::from_raw_parts(dims_ptr, rank) };
let new_shape: Vec<usize> = dims_slice.iter().map(|&d| d as usize).collect();
let input_shape = tensor.shape.clone();
match tensor.reshape_impl(&new_shape) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::ReshapeBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(ReshapeBackward {
input: tensor_ref_from_ptr(t),
input_shape,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in reshape_dims: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_transpose(
t: *mut OpaqueTensor,
dim0: usize,
dim1: usize,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.transpose_impl(dim0, dim1) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::TransposeBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(TransposeBackward {
input: tensor_ref_from_ptr(t),
dim0,
dim1,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in transpose: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_squeeze(t: *mut OpaqueTensor, dim: i64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let input_shape = tensor.shape.clone();
match tensor.squeeze_impl(dim as usize) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::SqueezeBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SqueezeBackward {
input: tensor_ref_from_ptr(t),
input_shape,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in squeeze: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_unsqueeze(t: *mut OpaqueTensor, dim: usize) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let input_shape = tensor.shape.clone();
match tensor.unsqueeze_impl(dim) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::UnsqueezeBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(UnsqueezeBackward {
input: tensor_ref_from_ptr(t),
input_shape,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in unsqueeze: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_contiguous(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
match unsafe { (&*t).contiguous_impl() } {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in contiguous: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_narrow(
t: *mut OpaqueTensor,
dim: usize,
start: usize,
len: usize,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let input_shape = tensor.shape().to_vec();
match tensor.narrow_impl(dim, start, len) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::SliceBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SliceBackward {
input: tensor_ref_from_ptr(t),
input_shape,
dim,
start,
end: start + len,
step: 1,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in narrow: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_slice(
t: *mut OpaqueTensor,
dim: i64,
start: i64,
len: i64,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let input_shape = tensor.shape().to_vec();
let dim_u = dim as usize;
let start_u = start as usize;
let len_u = len as usize;
match tensor.narrow_impl(dim_u, start_u, len_u) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::SliceBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SliceBackward {
input: tensor_ref_from_ptr(t),
input_shape,
dim: dim_u,
start: start_u,
end: start_u + len_u,
step: 1,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in slice: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_cat(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
dim: i64,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match CpuTensor::cat_impl(&[ta, tb], dim as usize) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in cat: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_cat_i64(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
dim: i64,
) -> *mut OpaqueTensor {
tl_cpu_tensor_cat(a, b, dim)
}
pub extern "C" fn tl_cpu_tensor_clone(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
make_tensor(unsafe { (&*t).clone() })
}
pub extern "C" fn tl_cpu_tensor_print(t: *mut OpaqueTensor) {
if t.is_null() {
println!("Tensor[null]");
return;
}
let tensor = unsafe { &*t };
if tensor.shape().is_empty() {
let data = tensor.data_f32();
if !data.is_empty() {
let v = data[0];
if v == v.floor() && v.abs() < 1e9 {
println!("{}", v as i64);
} else {
println!("{}", v);
}
} else {
println!("Tensor[cleared]");
}
return;
}
println!("{:?}", tensor.data_f32());
}
pub extern "C" fn tl_cpu_tensor_replace_data(dst: *mut OpaqueTensor, src: *mut OpaqueTensor) {
if dst.is_null() || src.is_null() {
return;
}
unsafe {
let dst_t = &mut *dst;
let src_t = &*src;
dst_t.data = src_t.data.clone();
dst_t.data_i64 = src_t.data_i64.clone();
dst_t.shape = src_t.shape.clone();
dst_t.dtype = src_t.dtype;
}
}
pub extern "C" fn tl_cpu_tensor_matmul(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let (ta, tb) = unsafe { (&*a, &*b) };
match ta.matmul_impl(tb) {
Ok(result) => {
let ptr = make_tensor(result);
if ta.requires_grad() || tb.requires_grad() {
use crate::autograd::ops::MatmulBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(MatmulBackward {
a: tensor_ref_from_ptr(a),
b: tensor_ref_from_ptr(b),
a_data: ta.shallow_clone(),
b_data: tb.shallow_clone(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in matmul: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sqrt(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.sqrt_impl() {
Ok(res) => {
let res_clone = res.shallow_clone();
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::SqrtBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SqrtBackward {
input: tensor_ref_from_ptr(t),
output_data: res_clone,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in sqrt: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_gelu(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.gelu_impl() {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::GeluBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(GeluBackward {
input: tensor_ref_from_ptr(t),
input_data: tensor.shallow_clone(),
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in gelu: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_tril(t: *mut OpaqueTensor, diagonal: i64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
if tensor.shape().len() < 2 {
return make_tensor(tensor.clone());
}
match tensor.tril_impl(diagonal as i32) {
Ok(res) => {
let ptr = make_tensor(res);
if tensor.requires_grad() {
use crate::autograd::ops::TrilBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(TrilBackward {
input: tensor_ref_from_ptr(t),
diagonal: diagonal as i32,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in tril: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_masked_fill_scalar(
t: *mut OpaqueTensor,
mask: *mut OpaqueTensor,
value: f64,
) -> *mut OpaqueTensor {
if t.is_null() || mask.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let mask_t = unsafe { &*mask };
match tensor.masked_fill_scalar_impl(mask_t, value) {
Ok(res) => {
let ptr = make_tensor(res);
ptr
}
Err(e) => {
eprintln!("Runtime Error in masked_fill_scalar: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sum_dim(
t: *mut OpaqueTensor,
dim: usize,
keep_dim: bool,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.sum_impl(dim as i32) {
Ok(mut result) => {
if keep_dim {
let mut new_shape = tensor.shape().to_vec();
new_shape[dim] = 1;
match result.reshape_impl(&new_shape) {
Ok(reshaped) => result = reshaped,
Err(e) => {
eprintln!("Runtime Error in sum_dim (reshape): {}", e);
return std::ptr::null_mut();
}
}
}
let ptr = make_tensor(result);
if tensor.requires_grad() {
use crate::autograd::ops::SumDimBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SumDimBackward {
a: tensor_ref_from_ptr(t),
input_shape: tensor.shape().to_vec(),
axis: dim as i32,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in sum_dim: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_embedding(
weight: *mut OpaqueTensor,
indices: *mut OpaqueTensor,
_padding_idx: i64,
_scale_grad_by_freq: bool,
_sparse: bool,
) -> *mut OpaqueTensor {
if weight.is_null() || indices.is_null() {
return std::ptr::null_mut();
}
let weights_tensor = unsafe { &*weight };
let indices_tensor = unsafe { &*indices };
match weights_tensor.embedding_impl(indices_tensor) {
Ok(res) => {
let ptr = make_tensor(res);
if weights_tensor.requires_grad() {
use crate::autograd::ops::EmbeddingBackward;
let idx_data: Vec<i64> = indices_tensor.data.iter().map(|x| *x as i64).collect();
let vocab_size = weights_tensor.shape[0];
let embed_dim = if weights_tensor.shape.len() > 1 { weights_tensor.shape[1] } else { 1 };
unsafe {
(&mut *ptr).set_grad_fn(Box::new(EmbeddingBackward {
weight: tensor_ref_from_ptr(weight),
indices: idx_data,
vocab_size,
embed_dim,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in embedding: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_runtime_init() {
println!("Runtime device initialized: CPU");
}
pub extern "C" fn tl_cpu_runtime_shutdown() {}
pub extern "C" fn tl_cpu_tensor_backward(t: *mut OpaqueTensor) {
if t.is_null() {
return;
}
let tensor = unsafe { &mut *t };
tensor.backward();
}
pub extern "C" fn tl_cpu_tensor_grad(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
if let Some(grad) = tensor.get_grad() {
return make_tensor(grad);
}
make_tensor(CpuTensor::zeros(tensor.shape(), DType::F32))
}
pub extern "C" fn tl_cpu_tensor_detach(t: *mut OpaqueTensor, req_grad: bool) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let result_ptr = make_tensor(tensor.detach());
if req_grad {
unsafe {
let res_tensor = &mut *result_ptr;
res_tensor.enable_grad();
}
}
result_ptr
}
pub extern "C" fn tl_cpu_tensor_enable_grad(t: *mut OpaqueTensor) {
if t.is_null() {
return;
}
let tensor = unsafe { &mut *t };
tensor.enable_grad();
}
pub extern "C" fn tl_cpu_clear_grads() {
}
#[no_mangle]
pub extern "C" fn tl_cpu_enter_scope() {
crate::memory::enter_scope();
}
#[no_mangle]
pub extern "C" fn tl_cpu_exit_scope() {
crate::memory::exit_scope();
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_promote(t: *mut OpaqueTensor) {
crate::memory::promote_tensor(t);
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_register(t: *mut OpaqueTensor) {
crate::memory::register_tensor(t);
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_return_to_pool(t: *mut OpaqueTensor) {
if t.is_null() {
return;
}
crate::memory::release_tensor(t);
}
pub extern "C" fn tl_cpu_tensor_tan(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let data: Vec<f32> = tensor.data.iter().map(|&x| x.tan()).collect();
make_tensor(CpuTensor::<f32> {
data: data,
data_i64: None,
shape: tensor.shape.clone(),
dtype: tensor.dtype,
autograd: None,
})
}
pub extern "C" fn tl_cpu_tensor_clamp(
t: *mut OpaqueTensor,
min: f64,
max: f64,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let min_f32 = min as f32;
let max_f32 = max as f32;
let data: Vec<f32> = tensor
.data
.iter()
.map(|&x| x.max(min_f32).min(max_f32))
.collect();
make_tensor(CpuTensor::<f32> {
data: data,
data_i64: None,
shape: tensor.shape.clone(),
dtype: tensor.dtype,
autograd: None,
})
}
pub extern "C" fn tl_cpu_tensor_sigmoid(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let data: Vec<f32> = tensor.data.iter().map(|&x| 1.0 / (1.0 + (-x).exp())).collect();
let sig_clone = CpuTensor::<f32> { data: data.clone(), data_i64: None, shape: tensor.shape.clone(), dtype: tensor.dtype, autograd: None };
let ptr = make_tensor(CpuTensor::<f32> { data, data_i64: None, shape: tensor.shape.clone(), dtype: tensor.dtype, autograd: None });
if tensor.requires_grad() {
use crate::autograd::ops::SigmoidBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SigmoidBackward {
a: tensor_ref_from_ptr(t),
output: sig_clone,
}));
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_scale(t: *mut OpaqueTensor, s: f64) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.mul_scalar_impl(s as f32) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in scale: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_silu(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let data: Vec<f32> = tensor.data.iter().map(|&x| x * (1.0 / (1.0 + (-x).exp()))).collect();
let ptr = make_tensor(CpuTensor::<f32> { data, data_i64: None, shape: tensor.shape.clone(), dtype: tensor.dtype, autograd: None });
if tensor.requires_grad() {
use crate::autograd::ops::SiluBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(SiluBackward {
input: tensor_ref_from_ptr(t),
input_data: tensor.shallow_clone(),
}));
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_cross_entropy(
logits: *mut OpaqueTensor,
labels: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if logits.is_null() || labels.is_null() {
return std::ptr::null_mut();
}
let l = unsafe { &*logits };
let target = unsafe { &*labels };
let l_data = &l.data;
let t_data: Vec<i64> = target.data.iter().map(|x| *x as i64).collect();
let num_classes = if l.shape.len() >= 2 { *l.shape.last().unwrap() } else { l_data.len() };
let batch_size = t_data.len();
let mut softmax_data = vec![0.0f32; l_data.len()];
for i in 0..batch_size {
let start = i * num_classes;
let end = (start + num_classes).min(l_data.len());
let row = &l_data[start..end];
let max_val = row.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let mut sum_exp = 0.0f32;
for j in 0..row.len() {
softmax_data[start + j] = (row[j] - max_val).exp();
sum_exp += softmax_data[start + j];
}
for j in 0..row.len() {
softmax_data[start + j] /= sum_exp;
}
}
let mut loss = 0.0f32;
let mut valid_count = 0usize;
for i in 0..batch_size {
let idx = t_data[i];
if idx < 0 {
continue;
}
let idx = idx as usize;
if idx < num_classes {
let p = softmax_data[i * num_classes + idx].max(1e-7);
loss -= p.ln();
valid_count += 1;
}
}
if valid_count > 0 {
loss /= valid_count as f32;
}
let softmax_tensor = CpuTensor::<f32> {
data: softmax_data,
data_i64: None,
shape: l.shape.clone(),
dtype: DType::F32,
autograd: None,
};
let ptr = make_tensor(CpuTensor::<f32> {
data: vec![loss],
data_i64: None,
shape: vec![1],
dtype: DType::F32,
autograd: None,
});
if l.requires_grad() {
use crate::autograd::ops::CrossEntropyBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(CrossEntropyBackward {
input: tensor_ref_from_ptr(logits),
softmax_output: softmax_tensor,
targets: target.shallow_clone(),
num_classes,
}));
}
}
ptr
}
pub extern "C" fn tl_cpu_tensor_numel(t: *mut OpaqueTensor) -> i64 {
if t.is_null() {
return 0;
}
let tensor = unsafe { &*t };
tensor.shape.iter().product::<usize>() as i64
}
pub extern "C" fn tl_cpu_tensor_transpose_2d(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let ndim = tensor.shape.len();
if ndim < 2 {
return make_tensor(tensor.clone_data());
}
match tensor.transpose_impl(ndim - 2, ndim - 1) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in transpose_2d: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_to_f32(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
make_tensor(tensor.clone_data())
}
pub extern "C" fn tl_cpu_tensor_to_i64(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
let i64_data: Vec<i64> = tensor.data.iter().map(|&x| x as i64).collect();
let f32_data: Vec<f32> = i64_data.iter().map(|&x| x as f32).collect();
make_tensor(CpuTensor::<f32> {
data: f32_data,
data_i64: Some(i64_data),
shape: tensor.shape.clone(),
dtype: DType::I64,
autograd: None,
})
}
pub extern "C" fn tl_cpu_tensor_max_dim(
t: *mut OpaqueTensor,
dim: usize,
_keep_dim: bool,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.max_impl(dim as i32) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in max_dim: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_min_dim(
t: *mut OpaqueTensor,
dim: usize,
_keep_dim: bool,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.min_impl(dim as i32) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in min_dim: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_mean_dim(
t: *mut OpaqueTensor,
dim: usize,
_keep_dim: bool,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.mean_impl(dim as i32) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in mean_dim: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_device_id(_t: *mut OpaqueTensor) -> i32 {
0 }
pub extern "C" fn tl_cpu_tensor_to_device(
t: *mut OpaqueTensor,
_device_id: i32,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
make_tensor(tensor.clone_data())
}
pub extern "C" fn tl_cpu_tensor_prepare_return(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
t
}
pub extern "C" fn tl_cpu_tensor_reshape_2d(
t: *mut OpaqueTensor,
d0: i64,
d1: i64,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.reshape_impl(&[d0 as usize, d1 as usize]) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in reshape_2d: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_reshape_3d_to_2d(
t: *mut OpaqueTensor,
d0: i64,
d1: i64,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.reshape_impl(&[d0 as usize, d1 as usize]) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in reshape_3d_to_2d: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_matmul_4d(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let a_tensor = unsafe { &*a };
let b_tensor = unsafe { &*b };
match a_tensor.matmul_impl(b_tensor) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in matmul_4d: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_add_4d(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let a_tensor = unsafe { &*a };
let b_tensor = unsafe { &*b };
match a_tensor.add_impl(b_tensor) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in add_4d: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_silu_4d(t: *mut OpaqueTensor) -> *mut OpaqueTensor {
tl_cpu_tensor_silu(t) }
pub extern "C" fn tl_cpu_tensor_cat2(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
dim: i64,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let a_tensor = unsafe { &*a };
let b_tensor = unsafe { &*b };
match CpuTensor::cat_impl(&[a_tensor, b_tensor], dim as usize) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in cat2: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_cat_4d(
a: *mut OpaqueTensor,
b: *mut OpaqueTensor,
dim: i64,
) -> *mut OpaqueTensor {
if a.is_null() || b.is_null() {
return std::ptr::null_mut();
}
let a_tensor = unsafe { &*a };
let b_tensor = unsafe { &*b };
match CpuTensor::cat_impl(&[a_tensor, b_tensor], dim as usize) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in cat_4d: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_rms_norm(
input: *mut OpaqueTensor,
weight: *mut OpaqueTensor,
eps: f32,
) -> *mut OpaqueTensor {
if input.is_null() {
return std::ptr::null_mut();
}
let x = unsafe { &*input };
let data = &x.data;
let last_dim = *x.shape.last().unwrap_or(&1);
let num_groups = data.len() / last_dim;
let mut result = vec![0.0f32; data.len()];
for g in 0..num_groups {
let start = g * last_dim;
let end = start + last_dim;
let mean_sq: f32 = data[start..end].iter().map(|&v| v * v).sum::<f32>() / last_dim as f32;
let rms = (mean_sq + eps).sqrt();
for i in start..end {
result[i] = data[i] / rms;
}
}
let normalized = CpuTensor::<f32> {
data: result,
data_i64: None,
shape: x.shape.clone(),
dtype: x.dtype,
autograd: None,
};
if !weight.is_null() {
let w = unsafe { &*weight };
match normalized.mul_impl(w) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in rms_norm: {}", e);
std::ptr::null_mut()
}
}
} else {
make_tensor(normalized)
}
}
pub extern "C" fn tl_cpu_tensor_print_1(t: *mut OpaqueTensor) {
tl_cpu_tensor_print(t);
}
pub extern "C" fn tl_cpu_tensor_print_2(t: *mut OpaqueTensor) {
tl_cpu_tensor_print(t);
}
pub extern "C" fn tl_cpu_tensor_print_3(t: *mut OpaqueTensor) {
tl_cpu_tensor_print(t);
}
pub extern "C" fn tl_cpu_tensor_save(t: *mut OpaqueTensor, path: *const i8) {
if t.is_null() || path.is_null() {
return;
}
let tensor = unsafe { &*(t as *const CpuTensor<f32>) };
let path_str = unsafe { CStr::from_ptr(path).to_string_lossy() };
let mut buf: Vec<u8> = Vec::new();
buf.extend_from_slice(b"TLTF"); let rank = tensor.shape.len() as u64;
buf.extend_from_slice(&rank.to_le_bytes());
for &dim in &tensor.shape {
buf.extend_from_slice(&(dim as u64).to_le_bytes());
}
let dtype_id: u64 = match tensor.dtype {
DType::F32 => 0,
DType::I64 => 1,
_ => 0,
};
buf.extend_from_slice(&dtype_id.to_le_bytes());
for &val in &tensor.data {
buf.extend_from_slice(&val.to_le_bytes());
}
if let Err(e) = std::fs::write(&*path_str, &buf) {
eprintln!("Error saving tensor to {}: {}", path_str, e);
}
}
pub extern "C" fn tl_cpu_tensor_load(path: *const i8) -> *mut OpaqueTensor {
if path.is_null() {
return std::ptr::null_mut();
}
let path_str = unsafe { CStr::from_ptr(path).to_string_lossy() };
let buf = match std::fs::read(&*path_str) {
Ok(b) => b,
Err(e) => {
eprintln!("Error loading tensor from {}: {}", path_str, e);
return std::ptr::null_mut();
}
};
if buf.len() < 12 || &buf[0..4] != b"TLTF" {
eprintln!("Invalid tensor file format: {}", path_str);
return std::ptr::null_mut();
}
let mut offset = 4;
let rank = u64::from_le_bytes(buf[offset..offset + 8].try_into().unwrap()) as usize;
offset += 8;
let mut shape = Vec::with_capacity(rank);
for _ in 0..rank {
let dim = u64::from_le_bytes(buf[offset..offset + 8].try_into().unwrap()) as usize;
shape.push(dim);
offset += 8;
}
let dtype_id = u64::from_le_bytes(buf[offset..offset + 8].try_into().unwrap());
offset += 8;
let dtype = if dtype_id == 1 {
DType::I64
} else {
DType::F32
};
let numel: usize = shape.iter().product();
let mut data = Vec::with_capacity(numel);
for _ in 0..numel {
if offset + 4 > buf.len() {
break;
}
let val = f32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap());
data.push(val);
offset += 4;
}
make_tensor(CpuTensor::<f32> {
data: data,
data_i64: None,
shape,
dtype,
autograd: None,
})
}
pub extern "C" fn tl_cpu_tensor_rope_new_cos(
seq_len: usize,
dim: usize,
base: f32,
) -> *mut OpaqueTensor {
let half_dim = dim / 2;
let mut data = Vec::with_capacity(seq_len * dim);
for pos in 0..seq_len {
for i in 0..half_dim {
let freq = 1.0 / base.powf(2.0 * i as f32 / dim as f32);
let val = (pos as f32 * freq).cos();
data.push(val);
data.push(val); }
}
let t = CpuTensor::from_slice(&data, &[seq_len, dim], crate::DType::F32);
make_tensor(t)
}
pub extern "C" fn tl_cpu_tensor_rope_new_sin(
seq_len: usize,
dim: usize,
base: f32,
) -> *mut OpaqueTensor {
let half_dim = dim / 2;
let mut data = Vec::with_capacity(seq_len * dim);
for pos in 0..seq_len {
for i in 0..half_dim {
let freq = 1.0 / base.powf(2.0 * i as f32 / dim as f32);
let val = (pos as f32 * freq).sin();
data.push(val);
data.push(val); }
}
let t = CpuTensor::from_slice(&data, &[seq_len, dim], crate::DType::F32);
make_tensor(t)
}
pub extern "C" fn tl_cpu_tensor_apply_rope(
t: *mut OpaqueTensor,
cos_table: *mut OpaqueTensor,
sin_table: *mut OpaqueTensor,
) -> *mut OpaqueTensor {
if t.is_null() || cos_table.is_null() || sin_table.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*(t as *const CpuTensor<f32>) };
let cos_t = unsafe { &*(cos_table as *const CpuTensor<f32>) };
let sin_t = unsafe { &*(sin_table as *const CpuTensor<f32>) };
let shape = tensor.shape();
let data = tensor.data_f32();
let total = data.len();
let head_dim = *shape.last().unwrap_or(&0);
if head_dim == 0 || total == 0 {
return make_tensor(tensor.clone_data());
}
let cos_data = cos_t.data_f32();
let sin_data = sin_t.data_f32();
let mut result = vec![0.0f32; total];
let num_vectors = total / head_dim;
for v in 0..num_vectors {
let offset = v * head_dim;
for i in (0..head_dim).step_by(2) {
let cos_idx = i; let c = if cos_idx < cos_data.len() {
cos_data[cos_idx]
} else {
1.0
};
let s = if cos_idx < sin_data.len() {
sin_data[cos_idx]
} else {
0.0
};
let x0 = data[offset + i];
let x1 = data[offset + i + 1];
result[offset + i] = x0 * c - x1 * s;
result[offset + i + 1] = x0 * s + x1 * c;
}
}
let t = CpuTensor::from_slice(&result, shape, crate::DType::F32);
make_tensor(t)
}
#[track_caller]
fn make_tensor(t: CpuTensor<f32>) -> *mut OpaqueTensor {
let f32_bytes = t.data.capacity() * std::mem::size_of::<f32>();
if f32_bytes > 0 {
crate::memory::track_alloc(f32_bytes);
}
if let Some(ref v) = t.data_i64 {
let i64_bytes = v.capacity() * std::mem::size_of::<i64>();
if i64_bytes > 0 {
crate::memory::track_alloc(i64_bytes);
}
}
let arc = Arc::new(UnsafeCell::new(t));
let ptr = Arc::into_raw(arc) as *mut CpuTensor<f32>;
let loc = std::panic::Location::caller();
if crate::memory::is_mem_log_enabled() {
eprintln!("[ALLOC] Ptr: {:p} at {}:{}", ptr, loc.file(), loc.line());
}
crate::memory::register_tensor(ptr);
crate::memory::count_tensor_alloc();
ptr as *mut OpaqueTensor
}
#[no_mangle]
pub extern "C" fn tl_cpu_get_pool_count() -> usize {
crate::memory::get_pool_size()
}
#[no_mangle]
pub extern "C" fn tl_cpu_get_memory_mb() -> f64 {
let bytes = get_rss_bytes();
bytes as f64 / 1024.0 / 1024.0
}
#[no_mangle]
pub extern "C" fn tl_cpu_get_memory_bytes() -> usize {
get_rss_bytes()
}
fn get_rss_bytes() -> usize {
#[cfg(target_os = "macos")]
{
use std::mem;
extern "C" {
fn mach_task_self() -> u32;
fn task_info(
target_task: u32,
flavor: i32,
task_info_out: *mut std::ffi::c_void,
task_info_outCnt: *mut u32,
) -> i32;
}
#[repr(C)]
struct MachTaskBasicInfo {
virtual_size: u64,
resident_size: u64,
resident_size_max: u64,
user_time: [u32; 2], system_time: [u32; 2], policy: i32,
suspend_count: i32,
}
let mut info: MachTaskBasicInfo = unsafe { mem::zeroed() };
let mut count = (mem::size_of::<MachTaskBasicInfo>() / mem::size_of::<u32>()) as u32;
let kr = unsafe {
task_info(
mach_task_self(),
20, &mut info as *mut _ as *mut std::ffi::c_void,
&mut count,
)
};
if kr == 0 {
info.resident_size as usize
} else {
crate::memory::get_total_allocated()
}
}
#[cfg(target_os = "linux")]
{
if let Ok(statm) = std::fs::read_to_string("/proc/self/statm") {
let parts: Vec<&str> = statm.split_whitespace().collect();
if parts.len() >= 2 {
if let Ok(pages) = parts[1].parse::<usize>() {
return pages * 4096; }
}
}
crate::memory::get_total_allocated()
}
#[cfg(not(any(target_os = "macos", target_os = "linux")))]
{
crate::memory::get_total_allocated()
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_get_pool_mb() -> f64 {
0.0
}
#[no_mangle]
pub extern "C" fn tl_cpu_get_pool_bytes() -> usize {
0
}
use std::collections::HashMap;
use std::ffi::CStr;
use std::os::raw::c_char;
#[repr(C)]
pub struct StringStruct {
pub ptr: *mut c_char,
pub len: i64,
}
pub struct CpuTensorMap {
pub map: HashMap<String, *mut OpaqueTensor>,
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_map_new() -> *mut CpuTensorMap {
Box::into_raw(Box::new(CpuTensorMap {
map: HashMap::new(),
}))
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_map_insert(
map: *mut CpuTensorMap,
name: *mut StringStruct,
tensor: *mut OpaqueTensor,
) {
unsafe {
if map.is_null() || name.is_null() || (*name).ptr.is_null() || tensor.is_null() {
return;
}
let map_ref = &mut (*map).map;
let key = CStr::from_ptr((*name).ptr).to_string_lossy().into_owned();
let t = &*tensor;
let cloned = Box::into_raw(Box::new(CpuTensor::<f32> {
data: t.data.clone(),
data_i64: t.data_i64.clone(),
shape: t.shape.clone(),
dtype: t.dtype,
autograd: None,
}));
map_ref.insert(key, cloned);
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_map_get(
map: *mut CpuTensorMap,
name: *mut StringStruct,
) -> *mut OpaqueTensor {
unsafe {
if map.is_null() || name.is_null() || (*name).ptr.is_null() {
return std::ptr::null_mut();
}
let map_ref = &(*map).map;
let key = CStr::from_ptr((*name).ptr).to_string_lossy().into_owned();
match map_ref.get(&key) {
Some(&ptr) => {
let t = &*ptr;
Box::into_raw(Box::new(CpuTensor::<f32> {
data: t.data.clone(),
data_i64: t.data_i64.clone(),
shape: t.shape.clone(),
dtype: t.dtype,
autograd: None,
}))
}
None => std::ptr::null_mut(),
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_conv2d(
input: *mut OpaqueTensor,
weight: *mut OpaqueTensor,
padding: i64,
stride: i64,
) -> *mut OpaqueTensor {
if input.is_null() || weight.is_null() {
return std::ptr::null_mut();
}
let (inp, w) = unsafe { (&*input, &*weight) };
let p = padding.max(0) as usize;
let s = stride.max(1) as usize;
match inp.conv2d_impl(w, (s, s), (p, p)) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in conv2d: {}", e);
std::ptr::null_mut()
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_max_pool2d(
input: *mut OpaqueTensor,
kernel_size: i64,
stride: i64,
padding: i64,
) -> *mut OpaqueTensor {
if input.is_null() {
return std::ptr::null_mut();
}
let inp = unsafe { &*input };
let k = kernel_size.max(1) as usize;
let s = stride.max(1) as usize;
let _p = padding.max(0) as usize; match inp.max_pool2d_impl((k, k), (s, s)) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in max_pool2d: {}", e);
std::ptr::null_mut()
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_avg_pool2d(
input: *mut OpaqueTensor,
kernel_size: i64,
stride: i64,
padding: i64,
) -> *mut OpaqueTensor {
if input.is_null() {
return std::ptr::null_mut();
}
let inp = unsafe { &*input };
let k = kernel_size.max(1) as usize;
let s = stride.max(1) as usize;
let _p = padding.max(0) as usize;
match inp.avg_pool2d_impl((k, k), (s, s)) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in avg_pool2d: {}", e);
std::ptr::null_mut()
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_batch_norm(
input: *mut OpaqueTensor,
running_mean: *mut OpaqueTensor,
running_var: *mut OpaqueTensor,
weight: *mut OpaqueTensor,
bias: *mut OpaqueTensor,
_training: bool,
_momentum: f64,
eps: f64,
) -> *mut OpaqueTensor {
if input.is_null() {
return std::ptr::null_mut();
}
let inp = unsafe { &*input };
let shape = inp.shape();
let channels = if shape.len() >= 2 { shape[1] } else { 1 };
let dummy_ones = CpuTensor::ones(&[channels], DType::F32);
let dummy_zeros = CpuTensor::zeros(&[channels], DType::F32);
let w = if !weight.is_null() {
unsafe { &*weight }
} else {
&dummy_ones
};
let b = if !bias.is_null() {
unsafe { &*bias }
} else {
&dummy_zeros
};
let rm = if !running_mean.is_null() {
unsafe { &*running_mean }
} else {
&dummy_zeros
};
let rv = if !running_var.is_null() {
unsafe { &*running_var }
} else {
&dummy_ones
};
match inp.batch_norm_impl(w, b, rm, rv, eps as f32) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in batch_norm: {}", e);
std::ptr::null_mut()
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_layer_norm(
input: *mut OpaqueTensor,
weight: *mut OpaqueTensor,
bias: *mut OpaqueTensor,
eps: f64,
) -> *mut OpaqueTensor {
if input.is_null() {
return std::ptr::null_mut();
}
let inp = unsafe { &*input };
let shape = inp.shape();
let last_dim = *shape.last().unwrap_or(&1);
let dummy_ones = CpuTensor::ones(&[last_dim], DType::F32);
let dummy_zeros = CpuTensor::zeros(&[last_dim], DType::F32);
let w = if !weight.is_null() {
unsafe { &*weight }
} else {
&dummy_ones
};
let b = if !bias.is_null() {
unsafe { &*bias }
} else {
&dummy_zeros
};
match inp.layer_norm_impl(w, b, eps as f32) {
Ok(res) => {
let ptr = make_tensor(res);
if inp.requires_grad() {
use crate::autograd::ops::LayerNormBackward;
unsafe {
(&mut *ptr).set_grad_fn(Box::new(LayerNormBackward {
input: tensor_ref_from_ptr(input),
input_data: inp.shallow_clone(),
weight: if !weight.is_null() { tensor_ref_from_ptr(weight) } else { tensor_ref_from_ptr(input) },
weight_data: w.shallow_clone(),
bias: if !bias.is_null() { tensor_ref_from_ptr(bias) } else { tensor_ref_from_ptr(input) },
eps,
}));
}
}
ptr
}
Err(e) => {
eprintln!("Runtime Error in layer_norm: {}", e);
std::ptr::null_mut()
}
}
}
#[no_mangle]
pub extern "C" fn tl_cpu_tensor_dropout(
input: *mut OpaqueTensor,
p: f64,
training: bool,
) -> *mut OpaqueTensor {
if input.is_null() {
return std::ptr::null_mut();
}
let inp = unsafe { &*input };
match inp.dropout_impl(p as f32, training) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in dropout: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_data(t: *mut OpaqueTensor) -> *const f32 {
if t.is_null() {
return std::ptr::null();
}
let tensor = unsafe { &*t };
tensor.data_f32().as_ptr()
}
pub extern "C" fn tl_cpu_tensor_repeat_interleave(
t: *mut OpaqueTensor,
repeats: usize,
dim: usize,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.repeat_interleave_impl(repeats, dim) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in repeat_interleave: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_sample(
t: *mut OpaqueTensor,
temp: f32,
top_p: f32,
) -> *mut OpaqueTensor {
if t.is_null() {
return std::ptr::null_mut();
}
let tensor = unsafe { &*t };
match tensor.sample_impl(temp, top_p) {
Ok(res) => make_tensor(res),
Err(e) => {
eprintln!("Runtime Error in sample: {}", e);
std::ptr::null_mut()
}
}
}
pub extern "C" fn tl_cpu_tensor_new_causal_mask(size: usize) -> *mut OpaqueTensor {
let shape = [size, size];
let mut data = vec![0.0f32; size * size];
for i in 0..size {
for j in 0..size {
if j > i {
data[i * size + j] = f32::NEG_INFINITY;
}
}
}
make_tensor(CpuTensor::from_slice(&data, &shape, DType::F32))
}
pub extern "C" fn tl_cpu_mem_stats_report() {
crate::memory::mem_stats_report();
}
pub extern "C" fn tl_cpu_mem_live_count() -> i64 {
crate::memory::get_live_count() as i64
}
pub extern "C" fn tl_cpu_mem_stats_enabled() -> bool {
crate::memory::is_mem_stats_enabled()
}