1use std::ffi::c_void;
10
11pub type FlodlTensor = *mut c_void;
13
14pub const FLODL_FLOAT16: i32 = 5;
16pub const FLODL_BFLOAT16: i32 = 15;
17pub const FLODL_FLOAT32: i32 = 6;
18pub const FLODL_FLOAT64: i32 = 7;
19pub const FLODL_INT32: i32 = 3;
20pub const FLODL_INT64: i32 = 4;
21
22pub const FLODL_CPU: i32 = 0;
24pub const FLODL_CUDA: i32 = 1;
25
26unsafe extern "C" {
27 pub fn flodl_zeros(
30 shape: *mut i64, ndim: i32, dtype: i32,
31 device_type: i32, device_index: i32,
32 result: *mut FlodlTensor,
33 ) -> *mut i8;
34
35 pub fn flodl_ones(
36 shape: *mut i64, ndim: i32, dtype: i32,
37 device_type: i32, device_index: i32,
38 result: *mut FlodlTensor,
39 ) -> *mut i8;
40
41 pub fn flodl_rand(
42 shape: *mut i64, ndim: i32, dtype: i32,
43 device_type: i32, device_index: i32,
44 result: *mut FlodlTensor,
45 ) -> *mut i8;
46
47 pub fn flodl_randn(
48 shape: *mut i64, ndim: i32, dtype: i32,
49 device_type: i32, device_index: i32,
50 result: *mut FlodlTensor,
51 ) -> *mut i8;
52
53 pub fn flodl_from_blob(
54 data: *mut c_void, shape: *mut i64, ndim: i32,
55 dtype: i32, device_type: i32, device_index: i32,
56 result: *mut FlodlTensor,
57 ) -> *mut i8;
58
59 pub fn flodl_linspace(
60 start: f64, end: f64, steps: i64,
61 dtype: i32, device_type: i32, device_index: i32,
62 result: *mut FlodlTensor,
63 ) -> *mut i8;
64
65 pub fn flodl_arange(
66 start: f64, end: f64, step: f64,
67 dtype: i32, device_type: i32, device_index: i32,
68 result: *mut FlodlTensor,
69 ) -> *mut i8;
70
71 pub fn flodl_expand(
72 t: FlodlTensor, new_shape: *mut i64, ndim: i32,
73 result: *mut FlodlTensor,
74 ) -> *mut i8;
75
76 pub fn flodl_free_tensor(t: FlodlTensor);
79 pub fn flodl_shallow_clone(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
80
81 pub fn flodl_ndim(t: FlodlTensor) -> i32;
84 pub fn flodl_shape(t: FlodlTensor, dim: i32) -> i64;
85 pub fn flodl_dtype(t: FlodlTensor) -> i32;
86 pub fn flodl_device_type(t: FlodlTensor) -> i32;
87 pub fn flodl_device_index(t: FlodlTensor) -> i32;
88 pub fn flodl_numel(t: FlodlTensor) -> i64;
89
90 pub fn flodl_copy_data(
93 t: FlodlTensor, buffer: *mut c_void, buffer_bytes: i64,
94 ) -> *mut i8;
95
96 pub fn flodl_add(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
99 pub fn flodl_sub(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
100 pub fn flodl_mul(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
101 pub fn flodl_div(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
102 pub fn flodl_matmul(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
103
104 pub fn flodl_add_scalar(
105 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
106 ) -> *mut i8;
107
108 pub fn flodl_mul_scalar(
109 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
110 ) -> *mut i8;
111
112 pub fn flodl_div_scalar(
113 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
114 ) -> *mut i8;
115
116 pub fn flodl_neg(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
117
118 pub fn flodl_relu(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
121 pub fn flodl_sigmoid(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
122 pub fn flodl_tanh_op(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
123 pub fn flodl_softmax(t: FlodlTensor, dim: i32, result: *mut FlodlTensor) -> *mut i8;
124 pub fn flodl_log_softmax(t: FlodlTensor, dim: i32, result: *mut FlodlTensor) -> *mut i8;
125 pub fn flodl_gelu(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
126 pub fn flodl_silu(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
127 pub fn flodl_leaky_relu(
128 t: FlodlTensor, negative_slope: f64, result: *mut FlodlTensor,
129 ) -> *mut i8;
130 pub fn flodl_elu(t: FlodlTensor, alpha: f64, result: *mut FlodlTensor) -> *mut i8;
131 pub fn flodl_softplus(
132 t: FlodlTensor, beta: f64, threshold: f64, result: *mut FlodlTensor,
133 ) -> *mut i8;
134 pub fn flodl_mish(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
135 pub fn flodl_selu(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
136 pub fn flodl_hardswish(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
137 pub fn flodl_hardsigmoid(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
138 pub fn flodl_prelu(t: FlodlTensor, weight: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
139
140 pub fn flodl_native_layer_norm(
143 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
144 normalized_size: i64, eps: f64,
145 output: *mut FlodlTensor, mean: *mut FlodlTensor, rstd: *mut FlodlTensor,
146 ) -> *mut i8;
147
148 pub fn flodl_group_norm(
151 input: FlodlTensor, num_groups: i64,
152 weight: FlodlTensor, bias: FlodlTensor,
153 eps: f64, result: *mut FlodlTensor,
154 ) -> *mut i8;
155
156 pub fn flodl_exp(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
159 pub fn flodl_log(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
160 pub fn flodl_sqrt(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
161 pub fn flodl_abs(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
162 pub fn flodl_triu(t: FlodlTensor, diagonal: i64, result: *mut FlodlTensor) -> *mut i8;
163 pub fn flodl_tril(t: FlodlTensor, diagonal: i64, result: *mut FlodlTensor) -> *mut i8;
164
165 pub fn flodl_pow_scalar(
166 t: FlodlTensor, exponent: f64, result: *mut FlodlTensor,
167 ) -> *mut i8;
168
169 pub fn flodl_clamp(
170 t: FlodlTensor, min_val: f64, max_val: f64, result: *mut FlodlTensor,
171 ) -> *mut i8;
172
173 pub fn flodl_clamp_min(
174 t: FlodlTensor, min_val: f64, result: *mut FlodlTensor,
175 ) -> *mut i8;
176
177 pub fn flodl_clamp_max(
178 t: FlodlTensor, max_val: f64, result: *mut FlodlTensor,
179 ) -> *mut i8;
180
181 pub fn flodl_log1p(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
182 pub fn flodl_expm1(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
183 pub fn flodl_log2(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
184 pub fn flodl_log10(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
185
186 pub fn flodl_sum(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
189 pub fn flodl_mean(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
190
191 pub fn flodl_sum_dim(
192 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
193 ) -> *mut i8;
194
195 pub fn flodl_mean_dim(
196 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
197 ) -> *mut i8;
198
199 pub fn flodl_prod(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
200
201 pub fn flodl_prod_dim(
202 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
203 ) -> *mut i8;
204
205 pub fn flodl_cumsum(
206 t: FlodlTensor, dim: i32, result: *mut FlodlTensor,
207 ) -> *mut i8;
208
209 pub fn flodl_logsumexp(
210 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
211 ) -> *mut i8;
212
213 pub fn flodl_min(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
214 pub fn flodl_max(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
215 pub fn flodl_norm(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
216
217 pub fn flodl_min_dim(
218 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
219 ) -> *mut i8;
220
221 pub fn flodl_max_dim(
222 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
223 ) -> *mut i8;
224
225 pub fn flodl_argmax(
226 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
227 ) -> *mut i8;
228
229 pub fn flodl_gt_scalar(
232 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
233 ) -> *mut i8;
234
235 pub fn flodl_ge_scalar(
236 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
237 ) -> *mut i8;
238
239 pub fn flodl_le_scalar(
240 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
241 ) -> *mut i8;
242
243 pub fn flodl_lt_scalar(
244 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
245 ) -> *mut i8;
246
247 pub fn flodl_eq_scalar(
248 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
249 ) -> *mut i8;
250
251 pub fn flodl_ne_scalar(
252 t: FlodlTensor, scalar: f64, result: *mut FlodlTensor,
253 ) -> *mut i8;
254
255 pub fn flodl_isnan(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
258 pub fn flodl_isinf(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
259 pub fn flodl_logical_and(
260 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
261 ) -> *mut i8;
262 pub fn flodl_logical_or(
263 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
264 ) -> *mut i8;
265 pub fn flodl_logical_not(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
266 pub fn flodl_any(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
267 pub fn flodl_all(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
268
269 pub fn flodl_reshape(
272 t: FlodlTensor, shape: *mut i64, ndim: i32, result: *mut FlodlTensor,
273 ) -> *mut i8;
274
275 pub fn flodl_transpose(
276 t: FlodlTensor, dim0: i32, dim1: i32, result: *mut FlodlTensor,
277 ) -> *mut i8;
278
279 pub fn flodl_permute(
280 t: FlodlTensor, dims: *mut i64, ndim: i32, result: *mut FlodlTensor,
281 ) -> *mut i8;
282
283 pub fn flodl_select(
284 t: FlodlTensor, dim: i32, index: i64, result: *mut FlodlTensor,
285 ) -> *mut i8;
286
287 pub fn flodl_narrow(
288 t: FlodlTensor, dim: i32, start: i64, length: i64,
289 result: *mut FlodlTensor,
290 ) -> *mut i8;
291
292 pub fn flodl_squeeze(
293 t: FlodlTensor, dim: i32, result: *mut FlodlTensor,
294 ) -> *mut i8;
295
296 pub fn flodl_unsqueeze(
297 t: FlodlTensor, dim: i32, result: *mut FlodlTensor,
298 ) -> *mut i8;
299
300 pub fn flodl_flatten(
301 t: FlodlTensor, start_dim: i32, end_dim: i32, result: *mut FlodlTensor,
302 ) -> *mut i8;
303
304 pub fn flodl_select_scatter(
307 input: FlodlTensor, src: FlodlTensor, dim: i32, index: i64,
308 result: *mut FlodlTensor,
309 ) -> *mut i8;
310
311 pub fn flodl_narrow_scatter(
312 input: FlodlTensor, src: FlodlTensor, dim: i32, start: i64,
313 result: *mut FlodlTensor,
314 ) -> *mut i8;
315
316 pub fn flodl_index_select(
319 t: FlodlTensor, dim: i32, index: FlodlTensor,
320 result: *mut FlodlTensor,
321 ) -> *mut i8;
322
323 pub fn flodl_index_add(
324 t: FlodlTensor, dim: i32, index: FlodlTensor, src: FlodlTensor,
325 result: *mut FlodlTensor,
326 ) -> *mut i8;
327
328 pub fn flodl_cat2(
331 a: FlodlTensor, b: FlodlTensor, dim: i32, result: *mut FlodlTensor,
332 ) -> *mut i8;
333
334 pub fn flodl_cat(
335 tensors: *mut FlodlTensor, count: i32, dim: i32, result: *mut FlodlTensor,
336 ) -> *mut i8;
337
338 pub fn flodl_stack(
339 tensors: *mut FlodlTensor, count: i32, dim: i32, result: *mut FlodlTensor,
340 ) -> *mut i8;
341
342 pub fn flodl_masked_fill(
345 t: FlodlTensor, mask: FlodlTensor, value: f64,
346 result: *mut FlodlTensor,
347 ) -> *mut i8;
348
349 pub fn flodl_where(
352 condition: FlodlTensor, x: FlodlTensor, y: FlodlTensor,
353 result: *mut FlodlTensor,
354 ) -> *mut i8;
355
356 pub fn flodl_zeros_like(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
359 pub fn flodl_ones_like(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
360 pub fn flodl_full_like(
361 t: FlodlTensor, value: f64, result: *mut FlodlTensor,
362 ) -> *mut i8;
363 pub fn flodl_rand_like(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
364 pub fn flodl_randn_like(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
365
366 pub fn flodl_randint(
369 low: i64, high: i64, shape: *mut i64, ndim: i32,
370 dtype: i32, device_type: i32, device_index: i32,
371 result: *mut FlodlTensor,
372 ) -> *mut i8;
373
374 pub fn flodl_empty(
375 shape: *mut i64, ndim: i32, dtype: i32,
376 device_type: i32, device_index: i32,
377 result: *mut FlodlTensor,
378 ) -> *mut i8;
379
380 pub fn flodl_one_hot(
381 t: FlodlTensor, num_classes: i64,
382 result: *mut FlodlTensor,
383 ) -> *mut i8;
384
385 pub fn flodl_bernoulli(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
386
387 pub fn flodl_conv2d(
390 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
391 stride: *mut i64, padding: *mut i64, dilation: *mut i64,
392 groups: i64, result: *mut FlodlTensor,
393 ) -> *mut i8;
394
395 pub fn flodl_conv1d(
398 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
399 stride: i64, padding: i64, dilation: i64,
400 groups: i64, result: *mut FlodlTensor,
401 ) -> *mut i8;
402
403 pub fn flodl_conv_transpose2d(
406 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
407 stride: *mut i64, padding: *mut i64,
408 output_padding: *mut i64, dilation: *mut i64,
409 groups: i64, result: *mut FlodlTensor,
410 ) -> *mut i8;
411
412 pub fn flodl_conv_transpose1d(
415 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
416 stride: i64, padding: i64,
417 output_padding: i64, dilation: i64,
418 groups: i64, result: *mut FlodlTensor,
419 ) -> *mut i8;
420
421 pub fn flodl_max_pool2d(
424 input: FlodlTensor, kernel_size: *mut i64,
425 stride: *mut i64, padding: *mut i64, dilation: *mut i64,
426 ceil_mode: i32, result: *mut FlodlTensor,
427 ) -> *mut i8;
428
429 pub fn flodl_avg_pool2d(
430 input: FlodlTensor, kernel_size: *mut i64,
431 stride: *mut i64, padding: *mut i64,
432 ceil_mode: i32, count_include_pad: i32,
433 result: *mut FlodlTensor,
434 ) -> *mut i8;
435
436 pub fn flodl_adaptive_avg_pool2d(
437 input: FlodlTensor, output_size: *mut i64,
438 result: *mut FlodlTensor,
439 ) -> *mut i8;
440
441 pub fn flodl_adaptive_max_pool2d(
442 input: FlodlTensor, output_size: *mut i64,
443 result: *mut FlodlTensor,
444 ) -> *mut i8;
445
446 pub fn flodl_im2col(
449 input: FlodlTensor, kernel_size: *mut i64, dilation: *mut i64,
450 padding: *mut i64, stride: *mut i64, result: *mut FlodlTensor,
451 ) -> *mut i8;
452
453 pub fn flodl_col2im(
454 input: FlodlTensor, output_size: *mut i64,
455 kernel_size: *mut i64, dilation: *mut i64,
456 padding: *mut i64, stride: *mut i64, result: *mut FlodlTensor,
457 ) -> *mut i8;
458
459 pub fn flodl_conv3d(
462 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
463 stride: *mut i64, padding: *mut i64, dilation: *mut i64,
464 groups: i64, result: *mut FlodlTensor,
465 ) -> *mut i8;
466
467 pub fn flodl_conv_transpose3d(
468 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
469 stride: *mut i64, padding: *mut i64, output_padding: *mut i64,
470 dilation: *mut i64, groups: i64, result: *mut FlodlTensor,
471 ) -> *mut i8;
472
473 pub fn flodl_max_pool1d(
476 input: FlodlTensor, kernel_size: i64,
477 stride: i64, padding: i64, dilation: i64,
478 ceil_mode: i32, result: *mut FlodlTensor,
479 ) -> *mut i8;
480
481 pub fn flodl_avg_pool1d(
482 input: FlodlTensor, kernel_size: i64,
483 stride: i64, padding: i64,
484 ceil_mode: i32, count_include_pad: i32,
485 result: *mut FlodlTensor,
486 ) -> *mut i8;
487
488 pub fn flodl_instance_norm(
491 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
492 running_mean: FlodlTensor, running_var: FlodlTensor,
493 use_input_stats: i32, momentum: f64, eps: f64,
494 result: *mut FlodlTensor,
495 ) -> *mut i8;
496
497 pub fn flodl_pixel_shuffle(
500 input: FlodlTensor, upscale_factor: i64, result: *mut FlodlTensor,
501 ) -> *mut i8;
502
503 pub fn flodl_pixel_unshuffle(
504 input: FlodlTensor, downscale_factor: i64, result: *mut FlodlTensor,
505 ) -> *mut i8;
506
507 pub fn flodl_bilinear(
510 input1: FlodlTensor, input2: FlodlTensor,
511 weight: FlodlTensor, bias: FlodlTensor,
512 result: *mut FlodlTensor,
513 ) -> *mut i8;
514
515 pub fn flodl_grid_sample(
518 input: FlodlTensor, grid: FlodlTensor,
519 mode: i32, padding_mode: i32, align_corners: i32,
520 result: *mut FlodlTensor,
521 ) -> *mut i8;
522
523 pub fn flodl_to_device(
526 t: FlodlTensor, device_type: i32, device_index: i32,
527 result: *mut FlodlTensor,
528 ) -> *mut i8;
529
530 pub fn flodl_to_device_async(
531 t: FlodlTensor, device_type: i32, device_index: i32,
532 result: *mut FlodlTensor,
533 ) -> *mut i8;
534
535 pub fn flodl_cuda_is_available() -> i32;
536 pub fn flodl_cuda_device_count() -> i32;
537 pub fn flodl_force_cuda_link() -> i32;
538 pub fn flodl_set_current_device(device_index: i32);
539 pub fn flodl_get_current_device() -> i32;
540 pub fn flodl_cuda_synchronize(device_index: i32);
541
542 pub fn flodl_cuda_mem_info(
545 device_index: i32, used_bytes: *mut u64, total_bytes: *mut u64,
546 ) -> *mut i8;
547
548 pub fn flodl_cuda_alloc_bytes(
549 device_index: i32, allocated_bytes: *mut u64,
550 ) -> *mut i8;
551
552 pub fn flodl_cuda_active_bytes(
553 device_index: i32, active_bytes: *mut u64,
554 ) -> *mut i8;
555
556 pub fn flodl_cuda_peak_active_bytes(
557 device_index: i32, peak_bytes: *mut u64,
558 ) -> *mut i8;
559
560 pub fn flodl_cuda_peak_reserved_bytes(
561 device_index: i32, peak_bytes: *mut u64,
562 ) -> *mut i8;
563
564 pub fn flodl_cuda_reset_peak_stats(device_index: i32);
565
566 pub fn flodl_cuda_empty_cache();
567
568 pub fn flodl_cuda_utilization(device_index: i32) -> i32;
569
570 pub fn flodl_cuda_device_name(
571 device_index: i32, buf: *mut i8, buf_len: i32,
572 ) -> *mut i8;
573
574 pub fn flodl_cuda_compute_capability(
575 device_index: i32, major: *mut i32, minor: *mut i32,
576 ) -> *mut i8;
577
578 pub fn flodl_to_dtype(
581 t: FlodlTensor, dtype: i32, result: *mut FlodlTensor,
582 ) -> *mut i8;
583
584 pub fn flodl_all_finite(t: FlodlTensor, result: *mut i32) -> *mut i8;
585
586 pub fn flodl_gt_tensor(
589 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
590 ) -> *mut i8;
591
592 pub fn flodl_lt_tensor(
593 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
594 ) -> *mut i8;
595
596 pub fn flodl_ge_tensor(
597 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
598 ) -> *mut i8;
599
600 pub fn flodl_le_tensor(
601 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
602 ) -> *mut i8;
603
604 pub fn flodl_eq_tensor(
605 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
606 ) -> *mut i8;
607
608 pub fn flodl_ne_tensor(
609 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
610 ) -> *mut i8;
611
612 pub fn flodl_atan2(
615 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
616 ) -> *mut i8;
617
618 pub fn flodl_maximum(
619 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
620 ) -> *mut i8;
621
622 pub fn flodl_minimum(
623 a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor,
624 ) -> *mut i8;
625
626 pub fn flodl_argmin(
629 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
630 ) -> *mut i8;
631
632 pub fn flodl_var(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
633 pub fn flodl_std_op(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
634
635 pub fn flodl_var_dim(
636 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
637 ) -> *mut i8;
638
639 pub fn flodl_std_dim(
640 t: FlodlTensor, dim: i32, keepdim: i32, result: *mut FlodlTensor,
641 ) -> *mut i8;
642
643 pub fn flodl_cumprod(t: FlodlTensor, dim: i32, result: *mut FlodlTensor) -> *mut i8;
644 pub fn flodl_norm_p_dim(
645 t: FlodlTensor, p: f64, dim: i32, keepdim: i32, result: *mut FlodlTensor,
646 ) -> *mut i8;
647 pub fn flodl_sum_dims(
648 t: FlodlTensor, dims: *mut i64, ndims: i32, keepdim: i32,
649 result: *mut FlodlTensor,
650 ) -> *mut i8;
651 pub fn flodl_median(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
652 pub fn flodl_median_dim(
653 t: FlodlTensor, dim: i32, keepdim: i32,
654 values: *mut FlodlTensor, indices: *mut FlodlTensor,
655 ) -> *mut i8;
656 pub fn flodl_count_nonzero(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
657 pub fn flodl_count_nonzero_dim(
658 t: FlodlTensor, dim: i32, result: *mut FlodlTensor,
659 ) -> *mut i8;
660
661 pub fn flodl_nonzero(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
664 pub fn flodl_unique(
665 t: FlodlTensor, sorted: i32, return_inverse: i32,
666 output: *mut FlodlTensor, inverse_indices: *mut FlodlTensor,
667 ) -> *mut i8;
668 pub fn flodl_searchsorted(
669 sorted_seq: FlodlTensor, values: FlodlTensor,
670 result: *mut FlodlTensor,
671 ) -> *mut i8;
672
673 pub fn flodl_diagonal(
676 t: FlodlTensor, offset: i64, dim1: i32, dim2: i32,
677 result: *mut FlodlTensor,
678 ) -> *mut i8;
679 pub fn flodl_movedim(
680 t: FlodlTensor, src: i64, dst: i64, result: *mut FlodlTensor,
681 ) -> *mut i8;
682 pub fn flodl_tile(
683 t: FlodlTensor, reps: *mut i64, ndim: i32, result: *mut FlodlTensor,
684 ) -> *mut i8;
685
686 pub fn flodl_sin(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
689 pub fn flodl_cos(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
690 pub fn flodl_tan(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
691 pub fn flodl_asin(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
692 pub fn flodl_acos(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
693 pub fn flodl_atan(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
694 pub fn flodl_sign(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
695 pub fn flodl_floor(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
696 pub fn flodl_ceil(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
697 pub fn flodl_round(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
698 pub fn flodl_reciprocal(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
699 pub fn flodl_erf(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
700 pub fn flodl_erfc(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
701 pub fn flodl_trunc(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
702 pub fn flodl_frac(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
703 pub fn flodl_fmod_scalar(t: FlodlTensor, scalar: f64, result: *mut FlodlTensor) -> *mut i8;
704 pub fn flodl_fmod_tensor(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
705 pub fn flodl_remainder_scalar(t: FlodlTensor, scalar: f64, result: *mut FlodlTensor) -> *mut i8;
706 pub fn flodl_remainder_tensor(a: FlodlTensor, b: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
707 pub fn flodl_lerp(a: FlodlTensor, b: FlodlTensor, weight: f64, result: *mut FlodlTensor) -> *mut i8;
708 pub fn flodl_lerp_tensor(a: FlodlTensor, b: FlodlTensor, weight: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
709 pub fn flodl_isclose(a: FlodlTensor, b: FlodlTensor, rtol: f64, atol: f64, result: *mut FlodlTensor) -> *mut i8;
710
711 pub fn flodl_addmm(
714 bias: FlodlTensor, mat1: FlodlTensor, mat2: FlodlTensor,
715 beta: f64, alpha: f64, result: *mut FlodlTensor,
716 ) -> *mut i8;
717 pub fn flodl_addcmul(
718 self_: FlodlTensor, t1: FlodlTensor, t2: FlodlTensor,
719 value: f64, result: *mut FlodlTensor,
720 ) -> *mut i8;
721 pub fn flodl_addcdiv(
722 self_: FlodlTensor, t1: FlodlTensor, t2: FlodlTensor,
723 value: f64, result: *mut FlodlTensor,
724 ) -> *mut i8;
725
726 pub fn flodl_gather(
729 t: FlodlTensor, dim: i32, index: FlodlTensor,
730 result: *mut FlodlTensor,
731 ) -> *mut i8;
732
733 pub fn flodl_scatter_add(
734 t: FlodlTensor, dim: i32, index: FlodlTensor, src: FlodlTensor,
735 result: *mut FlodlTensor,
736 ) -> *mut i8;
737
738 pub fn flodl_topk(
741 t: FlodlTensor, k: i64, dim: i32, largest: i32, sorted: i32,
742 values: *mut FlodlTensor, indices: *mut FlodlTensor,
743 ) -> *mut i8;
744
745 pub fn flodl_sort(
746 t: FlodlTensor, dim: i32, descending: i32,
747 values: *mut FlodlTensor, indices: *mut FlodlTensor,
748 ) -> *mut i8;
749
750 pub fn flodl_eye(
753 n: i64, dtype: i32, device_type: i32, device_index: i32,
754 result: *mut FlodlTensor,
755 ) -> *mut i8;
756
757 pub fn flodl_full(
758 shape: *mut i64, ndim: i32, value: f64, dtype: i32,
759 device_type: i32, device_index: i32,
760 result: *mut FlodlTensor,
761 ) -> *mut i8;
762
763 pub fn flodl_randperm(
764 n: i64, dtype: i32, device_type: i32, device_index: i32,
765 result: *mut FlodlTensor,
766 ) -> *mut i8;
767
768 pub fn flodl_multinomial(
769 probs: FlodlTensor, num_samples: i64, replacement: i32,
770 result: *mut FlodlTensor,
771 ) -> *mut i8;
772
773 pub fn flodl_normalize(
776 t: FlodlTensor, p: f64, dim: i32, result: *mut FlodlTensor,
777 ) -> *mut i8;
778
779 pub fn flodl_chunk(
782 t: FlodlTensor, chunks: i32, dim: i32,
783 results: *mut *mut FlodlTensor, count: *mut i32,
784 ) -> *mut i8;
785
786 pub fn flodl_repeat(
787 t: FlodlTensor, repeats: *mut i64, ndim: i32,
788 result: *mut FlodlTensor,
789 ) -> *mut i8;
790
791 pub fn flodl_pad(
792 t: FlodlTensor, padding: *mut i64, pad_len: i32, value: f64,
793 result: *mut FlodlTensor,
794 ) -> *mut i8;
795
796 pub fn flodl_pad_mode(
798 t: FlodlTensor, padding: *mut i64, pad_len: i32,
799 mode: i32, value: f64,
800 result: *mut FlodlTensor,
801 ) -> *mut i8;
802
803 pub fn flodl_interpolate(
805 input: FlodlTensor, output_size: *mut i64, ndim: i32,
806 mode: i32, align_corners: i32,
807 result: *mut FlodlTensor,
808 ) -> *mut i8;
809
810 pub fn flodl_flip(
811 t: FlodlTensor, dims: *mut i64, ndim: i32,
812 result: *mut FlodlTensor,
813 ) -> *mut i8;
814
815 pub fn flodl_roll(
816 t: FlodlTensor, shift: i64, dim: i32,
817 result: *mut FlodlTensor,
818 ) -> *mut i8;
819
820 pub fn flodl_split(
821 t: FlodlTensor, split_size: i64, dim: i32,
822 results: *mut *mut FlodlTensor, count: *mut i32,
823 ) -> *mut i8;
824
825 pub fn flodl_unbind(
826 t: FlodlTensor, dim: i32,
827 results: *mut *mut FlodlTensor, count: *mut i32,
828 ) -> *mut i8;
829
830 pub fn flodl_contiguous(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
831 pub fn flodl_is_contiguous(t: FlodlTensor) -> i32;
832
833 pub fn flodl_argsort(
834 t: FlodlTensor, dim: i32, descending: i32,
835 result: *mut FlodlTensor,
836 ) -> *mut i8;
837
838 pub fn flodl_scatter(
839 t: FlodlTensor, dim: i32, index: FlodlTensor, src: FlodlTensor,
840 result: *mut FlodlTensor,
841 ) -> *mut i8;
842
843 pub fn flodl_set_requires_grad(
846 t: FlodlTensor, requires_grad: i32, result: *mut FlodlTensor,
847 ) -> *mut i8;
848
849 pub fn flodl_requires_grad(t: FlodlTensor) -> i32;
850
851 pub fn flodl_backward(t: FlodlTensor) -> *mut i8;
852
853 pub fn flodl_grad(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
854
855 pub fn flodl_set_grad(t: FlodlTensor, grad: FlodlTensor) -> *mut i8;
856
857 pub fn flodl_zero_grad(t: FlodlTensor) -> *mut i8;
858
859 pub fn flodl_detach(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
860
861 pub fn flodl_detach_(t: FlodlTensor) -> *mut i8;
862
863 pub fn flodl_is_leaf(t: FlodlTensor) -> i32;
864
865 pub fn flodl_no_grad_guard_new() -> *mut c_void;
868 pub fn flodl_no_grad_guard_delete(guard: *mut c_void);
869 pub fn flodl_is_grad_enabled() -> i32;
870
871 pub fn flodl_autocast_guard_new(device_type: i32, dtype: i32) -> *mut c_void;
874 pub fn flodl_autocast_guard_delete(guard: *mut c_void);
875 pub fn flodl_is_autocast_enabled(device_type: i32) -> i32;
876
877 pub fn flodl_meshgrid(
880 tensors: *mut FlodlTensor, count: i32,
881 results: *mut *mut FlodlTensor, result_count: *mut i32,
882 ) -> *mut i8;
883
884 pub fn flodl_cdist(
887 x: FlodlTensor, y: FlodlTensor, p: f64,
888 result: *mut FlodlTensor,
889 ) -> *mut i8;
890
891 pub fn flodl_cosine_similarity(
894 a: FlodlTensor, b: FlodlTensor,
895 dim: i64, eps: f64,
896 result: *mut FlodlTensor,
897 ) -> *mut i8;
898
899 pub fn flodl_linear(
902 input: FlodlTensor, weight: FlodlTensor, bias: FlodlTensor,
903 result: *mut FlodlTensor,
904 ) -> *mut i8;
905
906 pub fn flodl_gru_cell(
907 input: FlodlTensor, hx: FlodlTensor,
908 w_ih: FlodlTensor, w_hh: FlodlTensor,
909 b_ih: FlodlTensor, b_hh: FlodlTensor,
910 result: *mut FlodlTensor,
911 ) -> *mut i8;
912
913 pub fn flodl_lstm_cell(
914 input: FlodlTensor, hx: FlodlTensor, cx: FlodlTensor,
915 w_ih: FlodlTensor, w_hh: FlodlTensor,
916 b_ih: FlodlTensor, b_hh: FlodlTensor,
917 h_out: *mut FlodlTensor, c_out: *mut FlodlTensor,
918 ) -> *mut i8;
919
920 pub fn flodl_lstm(
922 input: FlodlTensor, h_0: FlodlTensor, c_0: FlodlTensor,
923 params: *const FlodlTensor, num_params: i64,
924 num_layers: i64, batch_first: bool, flatten: bool,
925 output: *mut FlodlTensor, h_n: *mut FlodlTensor, c_n: *mut FlodlTensor,
926 ) -> *mut i8;
927
928 pub fn flodl_gru(
929 input: FlodlTensor, h_0: FlodlTensor,
930 params: *const FlodlTensor, num_params: i64,
931 num_layers: i64, batch_first: bool, flatten: bool,
932 output: *mut FlodlTensor, h_n: *mut FlodlTensor,
933 ) -> *mut i8;
934
935 pub fn flodl_rnn_params_create(
937 params: *const FlodlTensor, num_params: i64,
938 mode: i64, num_layers: i64, batch_first: bool, flatten: bool,
939 out: *mut *mut std::os::raw::c_void,
940 ) -> *mut i8;
941 pub fn flodl_rnn_params_free(rp: *mut std::os::raw::c_void);
942 pub fn flodl_lstm_cached(
943 input: FlodlTensor, h_0: FlodlTensor, c_0: FlodlTensor,
944 rp: *mut std::os::raw::c_void, num_layers: i64, batch_first: bool,
945 output: *mut FlodlTensor, h_n: *mut FlodlTensor, c_n: *mut FlodlTensor,
946 ) -> *mut i8;
947 pub fn flodl_gru_cached(
948 input: FlodlTensor, h_0: FlodlTensor,
949 rp: *mut std::os::raw::c_void, num_layers: i64, batch_first: bool,
950 output: *mut FlodlTensor, h_n: *mut FlodlTensor,
951 ) -> *mut i8;
952
953 pub fn flodl_set_cudnn_benchmark(enable: i32);
956
957 pub fn flodl_manual_seed(seed: u64);
960 pub fn flodl_cuda_manual_seed_all(seed: u64);
961
962 pub fn flodl_add_(t: FlodlTensor, other: FlodlTensor) -> *mut i8;
965 pub fn flodl_sub_(t: FlodlTensor, other: FlodlTensor) -> *mut i8;
966 pub fn flodl_mul_scalar_(t: FlodlTensor, scalar: f64) -> *mut i8;
967 pub fn flodl_add_scalar_(t: FlodlTensor, scalar: f64) -> *mut i8;
968 pub fn flodl_zero_(t: FlodlTensor) -> *mut i8;
969 pub fn flodl_mul_(t: FlodlTensor, other: FlodlTensor) -> *mut i8;
970 pub fn flodl_div_scalar_(t: FlodlTensor, scalar: f64) -> *mut i8;
971 pub fn flodl_div_(t: FlodlTensor, other: FlodlTensor) -> *mut i8;
972 pub fn flodl_fill_(t: FlodlTensor, value: f64) -> *mut i8;
973
974 pub fn flodl_adam_step(
977 param: FlodlTensor, grad: FlodlTensor,
978 m: FlodlTensor, v: FlodlTensor,
979 lr: f64, beta1: f64, beta2: f64, eps: f64,
980 weight_decay: f64, step: i64,
981 ) -> *mut i8;
982
983 pub fn flodl_adam_step_batched(
986 params: *mut FlodlTensor, grads: *mut FlodlTensor,
987 ms: *mut FlodlTensor, vs: *mut FlodlTensor,
988 lrs: *mut f64, count: i32,
989 beta1: f64, beta2: f64, eps: f64,
990 weight_decay: f64, step: i64,
991 ) -> *mut i8;
992
993 pub fn flodl_fused_adam_(
996 params: *mut FlodlTensor, grads: *mut FlodlTensor,
997 exp_avgs: *mut FlodlTensor, exp_avg_sqs: *mut FlodlTensor,
998 count: i32, lr: f64,
999 beta1: f64, beta2: f64, eps: f64,
1000 weight_decay: f64, step: i64,
1001 grad_scale: FlodlTensor, found_inf: FlodlTensor,
1002 ) -> *mut i8;
1003
1004 pub fn flodl_fused_adamw_(
1005 params: *mut FlodlTensor, grads: *mut FlodlTensor,
1006 exp_avgs: *mut FlodlTensor, exp_avg_sqs: *mut FlodlTensor,
1007 count: i32, lr: f64,
1008 beta1: f64, beta2: f64, eps: f64,
1009 weight_decay: f64, step: i64,
1010 grad_scale: FlodlTensor, found_inf: FlodlTensor,
1011 ) -> *mut i8;
1012
1013 pub fn flodl_pin_memory(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
1016 pub fn flodl_is_pinned(t: FlodlTensor) -> i32;
1017
1018 pub fn flodl_malloc_trim() -> i32;
1021
1022 pub fn flodl_zero_grad_set_to_none(t: FlodlTensor);
1025
1026 pub fn flodl_clip_grad_norm(
1029 params: *mut FlodlTensor, count: i32,
1030 max_norm: f64, total_norm_out: *mut f64,
1031 ) -> *mut i8;
1032
1033 pub fn flodl_foreach_add_scalar_(
1036 tensors: *mut FlodlTensor, count: i32, scalar: f64,
1037 ) -> *mut i8;
1038
1039 pub fn flodl_foreach_mul_scalar_(
1040 tensors: *mut FlodlTensor, count: i32, scalar: f64,
1041 ) -> *mut i8;
1042
1043 pub fn flodl_foreach_zero_(
1044 tensors: *mut FlodlTensor, count: i32,
1045 ) -> *mut i8;
1046
1047 pub fn flodl_foreach_add_list_(
1048 tensors1: *mut FlodlTensor, tensors2: *mut FlodlTensor,
1049 count: i32, alpha: f64,
1050 ) -> *mut i8;
1051
1052 pub fn flodl_foreach_norm(
1053 tensors: *mut FlodlTensor, count: i32, ord: f64,
1054 results: *mut FlodlTensor,
1055 ) -> *mut i8;
1056
1057 pub fn flodl_foreach_lerp_scalar_(
1058 tensors1: *mut FlodlTensor, tensors2: *mut FlodlTensor,
1059 count: i32, weight: f64,
1060 ) -> *mut i8;
1061
1062 pub fn flodl_foreach_sqrt_(
1063 tensors: *mut FlodlTensor, count: i32,
1064 ) -> *mut i8;
1065
1066 pub fn flodl_autograd_node_count(t: FlodlTensor) -> i64;
1069
1070 pub fn flodl_mse_loss(
1073 pred: FlodlTensor, target: FlodlTensor,
1074 reduction: i64, result: *mut FlodlTensor,
1075 ) -> *mut i8;
1076
1077 pub fn flodl_cross_entropy_loss(
1078 pred: FlodlTensor, target: FlodlTensor,
1079 reduction: i64, ignore_index: i64, label_smoothing: f64,
1080 result: *mut FlodlTensor,
1081 ) -> *mut i8;
1082
1083 pub fn flodl_bce_with_logits_loss(
1084 pred: FlodlTensor, target: FlodlTensor,
1085 reduction: i64, result: *mut FlodlTensor,
1086 ) -> *mut i8;
1087
1088 pub fn flodl_bce_loss(
1089 pred: FlodlTensor, target: FlodlTensor,
1090 reduction: i64, result: *mut FlodlTensor,
1091 ) -> *mut i8;
1092
1093 pub fn flodl_l1_loss(
1094 pred: FlodlTensor, target: FlodlTensor,
1095 reduction: i64, result: *mut FlodlTensor,
1096 ) -> *mut i8;
1097
1098 pub fn flodl_smooth_l1_loss(
1099 pred: FlodlTensor, target: FlodlTensor,
1100 reduction: i64, beta: f64,
1101 result: *mut FlodlTensor,
1102 ) -> *mut i8;
1103
1104 pub fn flodl_kl_div_loss(
1105 input: FlodlTensor, target: FlodlTensor,
1106 reduction: i64, log_target: i32,
1107 result: *mut FlodlTensor,
1108 ) -> *mut i8;
1109
1110 pub fn flodl_nll_loss(
1111 input: FlodlTensor, target: FlodlTensor,
1112 reduction: i64, ignore_index: i64,
1113 result: *mut FlodlTensor,
1114 ) -> *mut i8;
1115
1116 pub fn flodl_ctc_loss(
1117 log_probs: FlodlTensor, targets: FlodlTensor,
1118 input_lengths: FlodlTensor, target_lengths: FlodlTensor,
1119 blank: i64, reduction: i64,
1120 result: *mut FlodlTensor,
1121 ) -> *mut i8;
1122
1123 pub fn flodl_batch_norm(
1126 input: FlodlTensor, weight: FlodlTensor,
1127 bias: FlodlTensor, running_mean: FlodlTensor,
1128 running_var: FlodlTensor, training: i32,
1129 momentum: f64, eps: f64,
1130 result: *mut FlodlTensor,
1131 ) -> *mut i8;
1132
1133 pub fn flodl_dropout(
1136 input: FlodlTensor, p: f64, training: i32,
1137 result: *mut FlodlTensor,
1138 ) -> *mut i8;
1139
1140 pub fn flodl_feature_dropout(
1141 input: FlodlTensor, p: f64, training: i32,
1142 result: *mut FlodlTensor,
1143 ) -> *mut i8;
1144
1145 pub fn flodl_copy_(dst: FlodlTensor, src: FlodlTensor, non_blocking: i32) -> *mut i8;
1148
1149 pub fn flodl_to_channels_last(t: FlodlTensor, result: *mut FlodlTensor) -> *mut i8;
1152 pub fn flodl_is_channels_last(t: FlodlTensor) -> i32;
1153
1154 pub fn flodl_embedding_bag(
1157 weight: FlodlTensor, indices: FlodlTensor, offsets: FlodlTensor,
1158 mode: i64, result: *mut FlodlTensor,
1159 ) -> *mut i8;
1160
1161 pub fn flodl_cuda_graph_new(graph_out: *mut *mut c_void) -> *mut i8;
1164 pub fn flodl_cuda_graph_capture_begin(
1165 graph: *mut c_void, pool_hi: u64, pool_lo: u64, mode: i32,
1166 ) -> *mut i8;
1167 pub fn flodl_cuda_graph_capture_end(graph: *mut c_void) -> *mut i8;
1168 pub fn flodl_cuda_graph_replay(graph: *mut c_void) -> *mut i8;
1169 pub fn flodl_cuda_graph_reset(graph: *mut c_void) -> *mut i8;
1170 pub fn flodl_cuda_graph_delete(graph: *mut c_void);
1171 pub fn flodl_cuda_graph_pool(
1172 graph: *mut c_void, pool_hi: *mut u64, pool_lo: *mut u64,
1173 );
1174 pub fn flodl_cuda_graph_pool_handle(pool_hi: *mut u64, pool_lo: *mut u64);
1175
1176 pub fn flodl_cuda_event_new(flags: i32, event_out: *mut *mut c_void) -> *mut i8;
1179 pub fn flodl_cuda_event_record(event: *mut c_void) -> *mut i8;
1180 pub fn flodl_cuda_event_record_on_stream(
1181 event: *mut c_void, stream: *mut c_void,
1182 ) -> *mut i8;
1183 pub fn flodl_cuda_event_synchronize(event: *mut c_void) -> *mut i8;
1184 pub fn flodl_cuda_event_elapsed_time(
1185 start: *mut c_void, end: *mut c_void, ms_out: *mut f32,
1186 ) -> *mut i8;
1187 pub fn flodl_cuda_event_query(event: *mut c_void) -> i32;
1188 pub fn flodl_cuda_event_delete(event: *mut c_void);
1189
1190 pub fn flodl_cuda_stream_new(
1193 device_index: i32, high_priority: i32, stream_out: *mut *mut c_void,
1194 ) -> *mut i8;
1195 pub fn flodl_cuda_stream_synchronize(stream: *mut c_void) -> *mut i8;
1196 pub fn flodl_cuda_stream_wait_event(
1197 stream: *mut c_void, event: *mut c_void,
1198 ) -> *mut i8;
1199 pub fn flodl_cuda_stream_query(stream: *mut c_void) -> i32;
1200 pub fn flodl_cuda_stream_set_current(stream: *mut c_void);
1201 pub fn flodl_cuda_stream_restore_default(device_index: i32);
1202 pub fn flodl_cuda_stream_delete(stream: *mut c_void);
1203
1204 pub fn flodl_nccl_init(
1207 ndev: i32, devlist: *const i32, handle_out: *mut *mut c_void,
1208 ) -> *mut i8;
1209 pub fn flodl_nccl_destroy(handle: *mut c_void);
1210 pub fn flodl_nccl_all_reduce(
1211 handle: *mut c_void, tensors: *mut FlodlTensor,
1212 streams: *mut *mut c_void, op: i32,
1213 ) -> *mut i8;
1214 pub fn flodl_nccl_broadcast(
1215 handle: *mut c_void, tensors: *mut FlodlTensor,
1216 streams: *mut *mut c_void, root: i32,
1217 ) -> *mut i8;
1218 pub fn flodl_nccl_size(handle: *mut c_void) -> i32;
1219
1220 pub fn flodl_nccl_get_unique_id(uid_out: *mut u8) -> *mut i8;
1223 pub fn flodl_nccl_init_rank(
1224 rank: i32, nranks: i32, uid: *const u8, handle_out: *mut *mut c_void,
1225 ) -> *mut i8;
1226 pub fn flodl_nccl_destroy_rank(handle: *mut c_void);
1227 pub fn flodl_nccl_abort_rank(handle: *mut c_void) -> *mut i8;
1228 pub fn flodl_nccl_all_reduce_rank(
1229 handle: *mut c_void, tensors: *mut FlodlTensor, ntensors: i32,
1230 stream: *mut c_void, op: i32,
1231 ) -> *mut i8;
1232 pub fn flodl_nccl_split_rank(
1233 group_handle: *mut c_void, rank: i32,
1234 rank_handle_out: *mut *mut c_void,
1235 ) -> *mut i8;
1236
1237 pub fn flodl_free_string(s: *mut i8);
1240}