Skip to main content

tl_cpu/
device_impl.rs

1//! CpuDevice: IDevice トレイトの CPU 実装
2//!
3//! 既存の `ffi::tl_cpu_tensor_*` 関数に委譲する。
4//! 全メソッドは void* と CpuTensor のキャストで橋渡しする。
5
6use tl_backend::{IDevice, BackendResult, BackendError};
7use crate::ffi;
8use crate::tensor::CpuTensor;
9use std::ffi::c_void;
10
11/// CPU デバイス (ゼロサイズ型)
12pub struct CpuDevice;
13
14/// void* → *mut CpuTensor キャスト
15#[inline(always)]
16fn t(p: *mut c_void) -> *mut CpuTensor { p as *mut CpuTensor }
17
18/// *mut CpuTensor → void* キャスト
19#[inline(always)]
20fn v(p: *mut CpuTensor) -> *mut c_void { p as *mut c_void }
21
22/// 結果ポインタの null チェックを行うヘルパー
23#[inline(always)]
24fn check(ptr: *mut crate::ffi::OpaqueTensor) -> BackendResult<*mut c_void> {
25    if ptr.is_null() {
26        Err(BackendError::InternalError("CPU operation failed (check stderr)".to_string()))
27    } else {
28        Ok(v(ptr))
29    }
30}
31
32impl IDevice for CpuDevice {
33    // ========== テンソル作成 ==========
34    #[inline] fn tensor_new(&self, data: *const f32, rank: usize, shape: *const usize) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_new(data, rank, shape)) }
35    #[inline] fn tensor_new_i64(&self, data: *const i64, rank: usize, shape: *const usize) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_new_i64(data, rank, shape)) }
36    #[inline] fn tensor_from_i64_array(&self, data: *const i64, len: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_from_i64(data, len)) }
37    #[inline] fn tensor_zeros(&self, rank: usize, shape: *const usize, req_grad: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_zeros(rank, shape, req_grad)) }
38    #[inline] fn tensor_ones(&self, rank: usize, shape: *const usize, req_grad: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_ones(rank, shape, req_grad)) }
39    #[inline] fn tensor_randn_debug(&self, rank: usize, shape: *const usize, seed: u64, req_grad: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_randn_debug(rank, shape, seed, req_grad)) }
40    #[inline] fn tensor_new_causal_mask(&self, size: usize) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_new_causal_mask(size)) }
41    #[inline] fn tensor_from_vec_u8(&self, data: *mut c_void, len: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_from_vec_u8(data, len)) }
42    #[inline] fn tensor_from_u8_labels(&self, data: *const u8, len: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_from_u8_labels(data, len)) }
43
44    // ========== メモリ管理 ==========
45    #[inline] fn tensor_clone(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_clone(t(a))) }
46    #[inline] fn tensor_shallow_clone(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_shallow_clone(t(a))) }
47    #[inline] fn tensor_free(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_free(t(a)); Ok(()) }
48    #[inline] fn tensor_release(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_release(t(a)); Ok(()) }
49    #[inline] fn tensor_acquire(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_acquire(t(a))) }
50    #[inline] fn tensor_release_safe(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_release(t(a)); Ok(()) }
51    #[inline] fn tensor_promote(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_promote(t(a)); Ok(()) }
52    #[inline] fn tensor_register(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_register(t(a)); Ok(()) }
53    #[inline] fn tensor_prepare_return(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_prepare_return(t(a))) }
54
55    // ========== テンソル情報 ==========
56    #[inline] fn tensor_len(&self, a: *mut c_void) -> BackendResult<usize> { Ok(ffi::tl_cpu_tensor_len(t(a))) }
57    #[inline] fn tensor_dim(&self, a: *mut c_void, dim: usize) -> BackendResult<usize> { Ok(ffi::tl_cpu_tensor_dim(t(a), dim)) }
58    #[inline] fn tensor_numel(&self, a: *mut c_void) -> BackendResult<i64> { Ok(ffi::tl_cpu_tensor_numel(t(a))) }
59    #[inline] fn tensor_data(&self, a: *mut c_void) -> BackendResult<*const f32> { Ok(ffi::tl_cpu_tensor_data(t(a))) }
60    #[inline] fn tensor_device_id(&self, a: *mut c_void) -> BackendResult<i32> { Ok(ffi::tl_cpu_tensor_device_id(t(a))) }
61    #[inline] fn tensor_get_shape(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_get_shape(t(a))) }
62
63    // ========== 要素アクセス ==========
64    #[inline] fn tensor_get(&self, a: *mut c_void, idx: i64) -> BackendResult<f32> { Ok(ffi::tl_cpu_tensor_get(t(a), idx)) }
65    #[inline] fn tensor_get_f32_md(&self, a: *mut c_void, indices: *const i64, rank: i64) -> BackendResult<f32> { Ok(ffi::tl_cpu_tensor_get_f32_md(t(a), indices, rank)) }
66    #[inline] fn tensor_get_i64_md(&self, a: *mut c_void, indices: *const i64, rank: i64) -> BackendResult<i64> { Ok(ffi::tl_cpu_tensor_get_i64_md(t(a), indices, rank)) }
67    #[inline] fn tensor_set_f32_md(&self, a: *mut c_void, indices: *const i64, rank: usize, value: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_set_f32_md(t(a), indices, rank, value)) }
68    #[inline] fn tensor_item(&self, a: *mut c_void) -> BackendResult<f32> { Ok(ffi::tl_cpu_tensor_item(t(a))) }
69    #[inline] fn tensor_item_i64(&self, a: *mut c_void) -> BackendResult<i64> { Ok(ffi::tl_cpu_tensor_item_i64(t(a))) }
70
71    // ========== 二項演算 ==========
72    #[inline] fn tensor_add(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_add(t(a), t(b))) }
73    #[inline] fn tensor_sub(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sub(t(a), t(b))) }
74    #[inline] fn tensor_mul(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_mul(t(a), t(b))) }
75    #[inline] fn tensor_div(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_div(t(a), t(b))) }
76    #[inline] fn tensor_rem(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_rem(t(a), t(b))) }
77    #[inline] fn tensor_matmul(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_matmul(t(a), t(b))) }
78    #[inline] fn tensor_pow(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_pow(t(a), t(b))) }
79    #[inline] fn tensor_cross_entropy(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_cross_entropy(t(a), t(b))) }
80
81    // ========== 単項演算 ==========
82    #[inline] fn tensor_neg(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_neg(t(a))) }
83    #[inline] fn tensor_abs(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_abs(t(a))) }
84    #[inline] fn tensor_contiguous(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_contiguous(t(a))) }
85
86    // ========== 比較演算 ==========
87    #[inline] fn tensor_eq(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_eq(t(a), t(b))) }
88    #[inline] fn tensor_neq(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_neq(t(a), t(b))) }
89    #[inline] fn tensor_gt(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_gt(t(a), t(b))) }
90    #[inline] fn tensor_lt(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_lt(t(a), t(b))) }
91    #[inline] fn tensor_ge(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_ge(t(a), t(b))) }
92    #[inline] fn tensor_le(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_le(t(a), t(b))) }
93
94    // ========== スカラー演算 ==========
95    #[inline] fn tensor_add_scalar(&self, a: *mut c_void, s: f64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_add_scalar(t(a), s)) }
96    #[inline] fn tensor_sub_scalar(&self, a: *mut c_void, s: f64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sub_scalar(t(a), s)) }
97    #[inline] fn tensor_mul_scalar(&self, a: *mut c_void, s: f64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_mul_scalar(t(a), s)) }
98    #[inline] fn tensor_div_scalar(&self, a: *mut c_void, s: f64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_div_scalar(t(a), s)) }
99    #[inline] fn tensor_pow_scalar(&self, a: *mut c_void, exp: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_pow_scalar(t(a), exp)) }
100    #[inline] fn tensor_scale(&self, a: *mut c_void, s: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_scale(t(a), s as f64)) }
101
102    // ========== インプレース演算 ==========
103    #[inline] fn tensor_add_assign(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_add_assign(t(a), t(b)); Ok(()) }
104    #[inline] fn tensor_sub_assign(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_sub_assign(t(a), t(b)); Ok(()) }
105    #[inline] fn tensor_mul_assign(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_mul_assign(t(a), t(b)); Ok(()) }
106    #[inline] fn tensor_div_assign(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_div_assign(t(a), t(b)); Ok(()) }
107    #[inline] fn tensor_mod_assign(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_mod_assign(t(a), t(b)); Ok(()) }
108    #[inline] fn tensor_add_assign_scalar_f32(&self, a: *mut c_void, s: f32) -> BackendResult<()> { ffi::tl_cpu_tensor_add_assign_scalar_f32(t(a), s); Ok(()) }
109    #[inline] fn tensor_sub_assign_scalar_f32(&self, a: *mut c_void, s: f32) -> BackendResult<()> { ffi::tl_cpu_tensor_sub_assign_scalar_f32(t(a), s); Ok(()) }
110    #[inline] fn tensor_mul_assign_scalar_f32(&self, a: *mut c_void, s: f32) -> BackendResult<()> { ffi::tl_cpu_tensor_mul_assign_scalar_f32(t(a), s); Ok(()) }
111    #[inline] fn tensor_div_assign_scalar_f32(&self, a: *mut c_void, s: f32) -> BackendResult<()> { ffi::tl_cpu_tensor_div_assign_scalar_f32(t(a), s); Ok(()) }
112    #[inline] fn tensor_mod_assign_scalar_f32(&self, a: *mut c_void, s: f32) -> BackendResult<()> { ffi::tl_cpu_tensor_mod_assign_scalar_f32(t(a), s); Ok(()) }
113
114    // ========== 数学・活性化関数 ==========
115    #[inline] fn tensor_exp(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_exp(t(a))) }
116    #[inline] fn tensor_log(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_log(t(a))) }
117    #[inline] fn tensor_sqrt(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sqrt(t(a))) }
118    #[inline] fn tensor_sin(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sin(t(a))) }
119    #[inline] fn tensor_cos(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_cos(t(a))) }
120    #[inline] fn tensor_tan(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_tan(t(a))) }
121    #[inline] fn tensor_tanh(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_tanh(t(a))) }
122    #[inline] fn tensor_sigmoid(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sigmoid(t(a))) }
123    #[inline] fn tensor_relu(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_relu(t(a))) }
124    #[inline] fn tensor_gelu(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_gelu(t(a))) }
125    #[inline] fn tensor_silu(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_silu(t(a))) }
126
127    // ========== Reduction ==========
128    #[inline] fn tensor_sum(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sum(t(a))) }
129    #[inline] fn tensor_mean(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_mean(t(a))) }
130    #[inline] fn tensor_max(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_max(t(a))) }
131    #[inline] fn tensor_min(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_min(t(a))) }
132    #[inline] fn tensor_softmax(&self, a: *mut c_void, dim: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_softmax(t(a), dim)) }
133    #[inline] fn tensor_max_dim(&self, a: *mut c_void, dim: usize, keep_dim: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_max_dim(t(a), dim, keep_dim)) }
134    #[inline] fn tensor_min_dim(&self, a: *mut c_void, dim: usize, keep_dim: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_min_dim(t(a), dim, keep_dim)) }
135    #[inline] fn tensor_mean_dim(&self, a: *mut c_void, dim: usize, keep_dim: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_mean_dim(t(a), dim, keep_dim)) }
136    #[inline] fn tensor_sum_dim(&self, a: *mut c_void, dim: usize, keep_dim: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sum_dim(t(a), dim, keep_dim)) }
137    #[inline] fn tensor_argmax(&self, a: *mut c_void, dim: i64, keep_dim: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_argmax(t(a), dim, keep_dim)) }
138    #[inline] fn tensor_argmin(&self, a: *mut c_void, dim: i64, keep_dim: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_argmin(t(a), dim, keep_dim)) }
139    #[inline] fn tensor_tril(&self, a: *mut c_void, diagonal: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_tril(t(a), diagonal)) }
140    #[inline] fn tensor_clamp(&self, a: *mut c_void, min: f64, max: f64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_clamp(t(a), min, max)) }
141    #[inline] fn tensor_sample(&self, a: *mut c_void, temp: f32, top_p: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_sample(t(a), temp, top_p)) }
142
143    // ========== Autograd ==========
144    #[inline] fn tensor_backward(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_backward(t(a)); Ok(()) }
145    #[inline] fn tensor_grad(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_grad(t(a))) }
146    #[inline] fn tensor_detach(&self, a: *mut c_void, req_grad: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_detach(t(a), req_grad)) }
147    #[inline] fn tensor_enable_grad(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_enable_grad(t(a)); Ok(()) }
148    #[inline] fn clear_grads(&self) -> BackendResult<()> { ffi::tl_cpu_clear_grads(); Ok(()) }
149
150    // ========== 形状操作 ==========
151    #[inline] fn tensor_reshape_new(&self, a: *mut c_void, s: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_reshape_new(t(a), t(s))) }
152    #[inline] fn tensor_reshape_dims(&self, a: *mut c_void, d1: i64, d2: i64, d3: i64, d4: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_reshape_dims(t(a), d1, d2, d3, d4)) }
153    #[inline] fn tensor_transpose(&self, a: *mut c_void, dim0: usize, dim1: usize) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_transpose(t(a), dim0, dim1)) }
154    #[inline] fn tensor_slice(&self, a: *mut c_void, dim: i64, start: i64, len: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_slice(t(a), dim, start, len)) }
155    #[inline] fn tensor_narrow(&self, a: *mut c_void, dim: usize, start: usize, len: usize) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_narrow(t(a), dim, start, len)) }
156    #[inline] fn tensor_cat(&self, a: *mut c_void, b: *mut c_void, dim: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_cat(t(a), t(b), dim)) }
157    #[inline] fn tensor_cat_i64(&self, a: *mut c_void, b: *mut c_void, dim: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_cat_i64(t(a), t(b), dim)) }
158    #[inline] fn tensor_cat2(&self, a: *mut c_void, b: *mut c_void, dim: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_cat2(t(a), t(b), dim)) }
159    #[inline] fn tensor_cat_4d(&self, a: *mut c_void, b: *mut c_void, dim: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_cat_4d(t(a), t(b), dim)) }
160    #[inline] fn tensor_replace_data(&self, dst: *mut c_void, src: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_replace_data(t(dst), t(src)); Ok(()) }
161    #[inline] fn tensor_repeat_interleave(&self, a: *mut c_void, repeats: usize, dim: usize) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_repeat_interleave(t(a), repeats, dim)) }
162    #[inline] fn tensor_to_device(&self, a: *mut c_void, device_id: i32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_to_device(t(a), device_id)) }
163    #[inline] fn tensor_to_f32(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_to_f32(t(a))) }
164    #[inline] fn tensor_to_i64(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_to_i64(t(a))) }
165    #[inline] fn tensor_embedding(&self, w: *mut c_void, idx: *mut c_void, pad: i64, sg: bool, sp: bool) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_embedding(t(w), t(idx), pad, sg, sp)) }
166
167    // ========== LLM ==========
168    #[inline] fn tensor_rms_norm(&self, a: *mut c_void, w: *mut c_void, eps: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_rms_norm(t(a), t(w), eps)) }
169    #[inline] fn tensor_rope_new_cos(&self, dim: usize, seq_len: usize, base: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_rope_new_cos(seq_len, dim, base)) }
170    #[inline] fn tensor_rope_new_sin(&self, dim: usize, seq_len: usize, base: f32) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_rope_new_sin(seq_len, dim, base)) }
171    #[inline] fn tensor_apply_rope(&self, a: *mut c_void, cos: *mut c_void, sin: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_apply_rope(t(a), t(cos), t(sin))) }
172
173    // ========== IO / Print ==========
174    #[inline] fn tensor_print(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_print(t(a)); Ok(()) }
175    #[inline] fn tensor_display(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_print(t(a)); Ok(()) }
176    #[inline] fn tensor_print_1(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_print_1(t(a)); Ok(()) }
177    #[inline] fn tensor_print_2(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_print_2(t(a)); Ok(()) }
178    #[inline] fn tensor_print_3(&self, a: *mut c_void) -> BackendResult<()> { ffi::tl_cpu_tensor_print_3(t(a)); Ok(()) }
179    #[inline] fn tensor_save(&self, a: *mut c_void, path: *const i8) -> BackendResult<()> { ffi::tl_cpu_tensor_save(t(a), path); Ok(()) }
180    #[inline] fn tensor_load(&self, path: *const i8) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_load(path)) }
181
182    // ========== NN ==========
183    #[inline] fn tensor_conv2d(&self, input: *mut c_void, weight: *mut c_void, _bias: *mut c_void, stride: usize, padding: usize, _dilation: usize, _groups: usize) -> BackendResult<*mut c_void> {
184        check(ffi::tl_cpu_tensor_conv2d(t(input), t(weight), padding as i64, stride as i64))
185    }
186    #[inline] fn tensor_batch_norm(&self, input: *mut c_void, running_mean: *mut c_void, running_var: *mut c_void, weight: *mut c_void, bias: *mut c_void, training: bool, momentum: f64, eps: f64) -> BackendResult<*mut c_void> {
187        check(ffi::tl_cpu_tensor_batch_norm(t(input), t(running_mean), t(running_var), t(weight), t(bias), training, momentum, eps))
188    }
189    #[inline] fn tensor_layer_norm(&self, input: *mut c_void, weight: *mut c_void, bias: *mut c_void, eps: f64) -> BackendResult<*mut c_void> {
190        check(ffi::tl_cpu_tensor_layer_norm(t(input), t(weight), t(bias), eps))
191    }
192    #[inline] fn tensor_dropout(&self, input: *mut c_void, p: f64, training: bool) -> BackendResult<*mut c_void> {
193        check(ffi::tl_cpu_tensor_dropout(t(input), p, training))
194    }
195    #[inline] fn tensor_max_pool2d(&self, input: *mut c_void, kernel_size: usize, stride: usize, padding: usize) -> BackendResult<*mut c_void> {
196        check(ffi::tl_cpu_tensor_max_pool2d(t(input), kernel_size as i64, stride as i64, padding as i64))
197    }
198    #[inline] fn tensor_avg_pool2d(&self, input: *mut c_void, kernel_size: usize, stride: usize, padding: usize) -> BackendResult<*mut c_void> {
199        check(ffi::tl_cpu_tensor_avg_pool2d(t(input), kernel_size as i64, stride as i64, padding as i64))
200    }
201
202    // ========== CPU 専用 ==========
203    #[inline] fn tensor_transpose_2d(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_transpose_2d(t(a))) }
204    #[inline] fn tensor_reshape_2d(&self, a: *mut c_void, d0: i64, d1: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_reshape_2d(t(a), d0, d1)) }
205    #[inline] fn tensor_reshape_3d_to_2d(&self, a: *mut c_void, d0: i64, d1: i64) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_reshape_3d_to_2d(t(a), d0, d1)) }
206    #[inline] fn tensor_matmul_4d(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_matmul_4d(t(a), t(b))) }
207    #[inline] fn tensor_add_4d(&self, a: *mut c_void, b: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_add_4d(t(a), t(b))) }
208    #[inline] fn tensor_silu_4d(&self, a: *mut c_void) -> BackendResult<*mut c_void> { check(ffi::tl_cpu_tensor_silu_4d(t(a))) }
209}