1use runmat_accelerate_api::GpuTensorHandle;
4use runmat_builtins::{CharArray, ComplexTensor, Tensor, Value};
5use runmat_macros::runtime_builtin;
6
7use crate::builtins::common::random_args::complex_tensor_into_value;
8use crate::builtins::common::spec::{
9 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10 ProviderHook, ReductionNaN, ResidencyPolicy, ScalarType, ShapeRequirements,
11};
12use crate::builtins::common::{gpu_helpers, tensor};
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
16
17#[cfg(feature = "doc_export")]
18pub const DOC_MD: &str = r#"---
19title: "diff"
20category: "math/reduction"
21keywords: ["diff", "difference", "finite difference", "nth difference", "gpu"]
22summary: "Forward finite differences of scalars, vectors, matrices, or N-D tensors."
23references: []
24gpu_support:
25 elementwise: false
26 reduction: false
27 precisions: ["f32", "f64"]
28 broadcasting: "matlab"
29 notes: "Calls the provider's `diff_dim` hook; falls back to host when that hook is unavailable."
30fusion:
31 elementwise: false
32 reduction: false
33 max_inputs: 1
34 constants: "inline"
35requires_feature: null
36tested:
37 unit: "builtins::math::reduction::diff::tests"
38 integration: "builtins::math::reduction::diff::tests::diff_gpu_provider_roundtrip"
39---
40
41# What does the `diff` function do in MATLAB / RunMat?
42`diff(X)` computes forward finite differences along the first dimension of `X` whose size exceeds 1.
43For vectors, this is simply the difference between adjacent elements. Higher-order differences are
44obtained by repeating this process.
45
46## How does the `diff` function behave in MATLAB / RunMat?
47- `diff(X)` walks along the first non-singleton dimension. Column vectors therefore differentiate
48 down the rows, while row vectors operate across columns.
49- `diff(X, N)` computes the Nth forward difference. `N = 0` returns `X` unchanged. Each order reduces
50 the length of the working dimension by one, so the output length becomes `max(len - N, 0)`.
51- `diff(X, N, dim)` lets you choose the dimension explicitly. Passing `[]` for `N` or `dim` keeps the
52 defaults, and dimensions larger than `ndims(X)` behave like length-1 axes (so any positive order
53 yields an empty result).
54- Real, logical, and character inputs promote to double precision tensors before differencing.
55 Complex inputs retain their complex type, with forward differences applied to both the real and
56 imaginary parts independently.
57- Empty slices propagate: if the selected dimension has length 0 or 1, the corresponding axis in the
58 output has length 0.
59
60## `diff` Function GPU Execution Behaviour
61When the operand already resides on the GPU, RunMat asks the active acceleration provider for a
62finite-difference kernel via `diff_dim`. The WGPU backend implements this hook, so forward differences
63execute entirely on the device and the result stays resident on the GPU. Providers that have not wired
64`diff_dim` yet transparently gather the data, run the CPU implementation, and hand the result back to
65the planner so subsequent kernels can re-promote it when beneficial.
66
67## Examples of using the `diff` function in MATLAB / RunMat
68
69### Computing first differences of a vector
70```matlab
71v = [3 4 9 15];
72d1 = diff(v);
73```
74Expected output:
75```matlab
76d1 = [1 5 6];
77```
78
79### Taking second-order differences
80```matlab
81v = [1 4 9 16 25];
82d2 = diff(v, 2);
83```
84Expected output:
85```matlab
86d2 = [2 2 2];
87```
88
89### Selecting the working dimension explicitly
90```matlab
91A = [1 2 3; 4 5 6];
92rowDiff = diff(A, 1, 2);
93```
94Expected output:
95```matlab
96rowDiff =
97 1 1
98 1 1
99```
100
101### Running `diff` on GPU arrays
102```matlab
103G = gpuArray([1 4 9 16]);
104gDiff = diff(G);
105result = gather(gDiff);
106```
107Expected output:
108```matlab
109result = [3 5 7];
110```
111
112### N exceeding the dimension length returns an empty array
113```matlab
114v = (1:3)';
115emptyResult = diff(v, 5);
116```
117Expected output:
118```matlab
119emptyResult =
120 0×1 empty double column vector
121```
122
123### Applying `diff` to character data
124```matlab
125codes = diff('ACEG');
126```
127Expected output:
128```matlab
129codes = [2 2 2];
130```
131
132## GPU residency in RunMat (Do I need `gpuArray`?)
133Manual `gpuArray` promotion is optional. RunMat keeps tensors on the GPU when providers implement
134the relevant hooks and the planner predicts a benefit. With the WGPU backend registered, `diff`
135executes fully on the GPU and returns a device-resident tensor. When the hook is missing, RunMat
136gathers transparently, computes on the CPU, and keeps residency metadata consistent so fused
137expressions can re-promote values when profitable.
138
139## FAQ
140
141### Does `diff` change the size of the input?
142`diff` reduces the length along the working dimension by `N`. All other dimensions are preserved.
143If the working dimension is shorter than `N`, the result is empty. With the WGPU backend the empty
144result remains GPU-resident.
145
146### How are higher-order differences computed?
147RunMat applies the first-order forward difference repeatedly. This mirrors MATLAB’s definition and
148produces the same numerical results.
149
150### Can I pass `[]` for the order or dimension arguments?
151Yes. An empty array keeps the default value (`N = 1`, first non-singleton dimension).
152
153### Does `diff` support complex numbers?
154Yes. Differences are taken on the real and imaginary parts independently, and the result remains
155complex unless it becomes empty.
156
157### What happens for character or logical inputs?
158Characters and logical values are promoted to double precision differences, matching MATLAB.
159
160### Will the GPU path produce the same results as the CPU path?
161Yes. When a provider lacks a finite-difference kernel, RunMat gathers the data and computes on the CPU
162to preserve MATLAB semantics exactly. Otherwise, the WGPU backend produces identical results on the GPU.
163
164## See Also
165[cumsum](./cumsum), [sum](./sum), [cumprod](./cumprod), [gpuArray](../../acceleration/gpu/gpuArray), [gather](../../acceleration/gpu/gather)
166
167## Source & Feedback
168- Source: [`crates/runmat-runtime/src/builtins/math/reduction/diff.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/math/reduction/diff.rs)
169- Found a bug or behavioural difference? [Open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with a repro.
170"#;
171
172pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
173 name: "diff",
174 op_kind: GpuOpKind::Custom("finite-difference"),
175 supported_precisions: &[ScalarType::F32, ScalarType::F64],
176 broadcast: BroadcastSemantics::Matlab,
177 provider_hooks: &[ProviderHook::Custom("diff_dim")],
178 constant_strategy: ConstantStrategy::InlineLiteral,
179 residency: ResidencyPolicy::NewHandle,
180 nan_mode: ReductionNaN::Include,
181 two_pass_threshold: None,
182 workgroup_size: None,
183 accepts_nan_mode: false,
184 notes: "Providers surface finite-difference kernels through `diff_dim`; the WGPU backend keeps tensors on the device.",
185};
186
187register_builtin_gpu_spec!(GPU_SPEC);
188
189pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
190 name: "diff",
191 shape: ShapeRequirements::BroadcastCompatible,
192 constant_strategy: ConstantStrategy::InlineLiteral,
193 elementwise: None,
194 reduction: None,
195 emits_nan: false,
196 notes: "Fusion planner currently delegates to the runtime implementation; providers can override with custom kernels.",
197};
198
199register_builtin_fusion_spec!(FUSION_SPEC);
200
201#[cfg(feature = "doc_export")]
202register_builtin_doc_text!("diff", DOC_MD);
203
204#[runtime_builtin(
205 name = "diff",
206 category = "math/reduction",
207 summary = "Forward finite differences of scalars, vectors, matrices, or N-D tensors.",
208 keywords = "diff,difference,finite difference,nth difference,gpu",
209 accel = "diff"
210)]
211fn diff_builtin(value: Value, rest: Vec<Value>) -> Result<Value, String> {
212 let (order, dim) = parse_arguments(&rest)?;
213 if order == 0 {
214 return Ok(value);
215 }
216
217 match value {
218 Value::Tensor(tensor) => {
219 diff_tensor_host(tensor, order, dim).map(tensor::tensor_into_value)
220 }
221 Value::LogicalArray(logical) => {
222 let tensor = tensor::logical_to_tensor(&logical)?;
223 diff_tensor_host(tensor, order, dim).map(tensor::tensor_into_value)
224 }
225 Value::Num(_) | Value::Int(_) | Value::Bool(_) => {
226 let tensor = tensor::value_into_tensor_for("diff", value)?;
227 diff_tensor_host(tensor, order, dim).map(tensor::tensor_into_value)
228 }
229 Value::Complex(re, im) => {
230 let tensor = ComplexTensor {
231 data: vec![(re, im)],
232 shape: vec![1, 1],
233 rows: 1,
234 cols: 1,
235 };
236 diff_complex_tensor(tensor, order, dim).map(complex_tensor_into_value)
237 }
238 Value::ComplexTensor(tensor) => {
239 diff_complex_tensor(tensor, order, dim).map(complex_tensor_into_value)
240 }
241 Value::CharArray(chars) => diff_char_array(chars, order, dim),
242 Value::GpuTensor(handle) => diff_gpu(handle, order, dim),
243 other => Err(format!(
244 "diff: unsupported input type {:?}; expected numeric, logical, or character data",
245 other
246 )),
247 }
248}
249
250fn parse_arguments(args: &[Value]) -> Result<(usize, Option<usize>), String> {
251 match args.len() {
252 0 => Ok((1, None)),
253 1 => {
254 let order = parse_order(&args[0])?;
255 Ok((order.unwrap_or(1), None))
256 }
257 2 => {
258 let order = parse_order(&args[0])?.unwrap_or(1);
259 let dim = parse_dimension_arg(&args[1])?;
260 Ok((order, dim))
261 }
262 _ => Err("diff: unsupported arguments".to_string()),
263 }
264}
265
266fn parse_order(value: &Value) -> Result<Option<usize>, String> {
267 if is_empty_array(value) {
268 return Ok(None);
269 }
270 match value {
271 Value::Int(i) => {
272 let raw = i.to_i64();
273 if raw < 0 {
274 return Err("diff: order must be a non-negative integer scalar".to_string());
275 }
276 Ok(Some(raw as usize))
277 }
278 Value::Num(n) => parse_numeric_order(*n).map(Some),
279 Value::Tensor(t) if t.data.len() == 1 => parse_numeric_order(t.data[0]).map(Some),
280 Value::Bool(b) => Ok(Some(if *b { 1 } else { 0 })),
281 other => Err(format!(
282 "diff: order must be a non-negative integer scalar, got {:?}",
283 other
284 )),
285 }
286}
287
288fn parse_numeric_order(value: f64) -> Result<usize, String> {
289 if !value.is_finite() {
290 return Err("diff: order must be finite".to_string());
291 }
292 if value < 0.0 {
293 return Err("diff: order must be a non-negative integer scalar".to_string());
294 }
295 let rounded = value.round();
296 if (rounded - value).abs() > f64::EPSILON {
297 return Err("diff: order must be a non-negative integer scalar".to_string());
298 }
299 Ok(rounded as usize)
300}
301
302fn parse_dimension_arg(value: &Value) -> Result<Option<usize>, String> {
303 if is_empty_array(value) {
304 return Ok(None);
305 }
306 match value {
307 Value::Int(_) | Value::Num(_) => tensor::parse_dimension(value, "diff").map(Some),
308 Value::Tensor(t) if t.data.len() == 1 => {
309 tensor::parse_dimension(&Value::Num(t.data[0]), "diff").map(Some)
310 }
311 other => Err(format!(
312 "diff: dimension must be a positive integer scalar, got {:?}",
313 other
314 )),
315 }
316}
317
318fn is_empty_array(value: &Value) -> bool {
319 matches!(value, Value::Tensor(t) if t.data.is_empty())
320}
321
322fn diff_gpu(handle: GpuTensorHandle, order: usize, dim: Option<usize>) -> Result<Value, String> {
323 let working_dim = dim.unwrap_or_else(|| default_dimension(&handle.shape));
324 if working_dim == 0 {
325 return Err("diff: dimension must be >= 1".to_string());
326 }
327
328 if let Some(provider) = runmat_accelerate_api::provider() {
329 if let Ok(device_result) = provider.diff_dim(&handle, order, working_dim.saturating_sub(1))
330 {
331 return Ok(Value::GpuTensor(device_result));
332 }
333 }
334
335 let tensor = gpu_helpers::gather_tensor(&handle)?;
336 diff_tensor_host(tensor, order, Some(working_dim)).map(tensor::tensor_into_value)
337}
338
339fn diff_char_array(chars: CharArray, order: usize, dim: Option<usize>) -> Result<Value, String> {
340 if order == 0 {
341 return Ok(Value::CharArray(chars));
342 }
343 let shape = vec![chars.rows, chars.cols];
344 let data: Vec<f64> = chars.data.iter().map(|&ch| ch as u32 as f64).collect();
345 let tensor = Tensor::new(data, shape).map_err(|e| format!("diff: {e}"))?;
346 diff_tensor_host(tensor, order, dim).map(tensor::tensor_into_value)
347}
348
349pub fn diff_tensor_host(
350 tensor: Tensor,
351 order: usize,
352 dim: Option<usize>,
353) -> Result<Tensor, String> {
354 let mut current = tensor;
355 let mut working_dim = dim.unwrap_or_else(|| default_dimension(¤t.shape));
356 for _ in 0..order {
357 current = diff_tensor_once(current, working_dim)?;
358 if current.data.is_empty() {
359 break;
360 }
361 if dim.is_none() && dimension_length(¤t.shape, working_dim) == 0 {
363 working_dim = default_dimension(¤t.shape);
364 }
365 }
366 Ok(current)
367}
368
369fn diff_complex_tensor(
370 tensor: ComplexTensor,
371 order: usize,
372 dim: Option<usize>,
373) -> Result<ComplexTensor, String> {
374 let mut current = tensor;
375 let mut working_dim = dim.unwrap_or_else(|| default_dimension(¤t.shape));
376 for _ in 0..order {
377 current = diff_complex_tensor_once(current, working_dim)?;
378 if current.data.is_empty() {
379 break;
380 }
381 if dim.is_none() && dimension_length(¤t.shape, working_dim) == 0 {
382 working_dim = default_dimension(¤t.shape);
383 }
384 }
385 Ok(current)
386}
387
388fn diff_tensor_once(tensor: Tensor, dim: usize) -> Result<Tensor, String> {
389 let Tensor {
390 data, mut shape, ..
391 } = tensor;
392 let dim_index = dim.saturating_sub(1);
393 while shape.len() <= dim_index {
394 shape.push(1);
395 }
396 let len_dim = shape[dim_index];
397 let mut output_shape = shape.clone();
398 if len_dim <= 1 || data.is_empty() {
399 output_shape[dim_index] = output_shape[dim_index].saturating_sub(1);
400 return Tensor::new(Vec::new(), output_shape).map_err(|e| format!("diff: {e}"));
401 }
402 output_shape[dim_index] = len_dim - 1;
403 let stride_before = product(&shape[..dim_index]);
404 let stride_after = product(&shape[dim_index + 1..]);
405 let output_len = stride_before * (len_dim - 1) * stride_after;
406 let mut out = Vec::with_capacity(output_len);
407
408 for after in 0..stride_after {
409 let after_base = after * stride_before * len_dim;
410 for before in 0..stride_before {
411 for k in 0..(len_dim - 1) {
412 let idx0 = before + after_base + k * stride_before;
413 let idx1 = idx0 + stride_before;
414 out.push(data[idx1] - data[idx0]);
415 }
416 }
417 }
418
419 Tensor::new(out, output_shape).map_err(|e| format!("diff: {e}"))
420}
421
422fn diff_complex_tensor_once(tensor: ComplexTensor, dim: usize) -> Result<ComplexTensor, String> {
423 let ComplexTensor {
424 data, mut shape, ..
425 } = tensor;
426 let dim_index = dim.saturating_sub(1);
427 while shape.len() <= dim_index {
428 shape.push(1);
429 }
430 let len_dim = shape[dim_index];
431 let mut output_shape = shape.clone();
432 if len_dim <= 1 || data.is_empty() {
433 output_shape[dim_index] = output_shape[dim_index].saturating_sub(1);
434 return ComplexTensor::new(Vec::new(), output_shape).map_err(|e| format!("diff: {e}"));
435 }
436 output_shape[dim_index] = len_dim - 1;
437 let stride_before = product(&shape[..dim_index]);
438 let stride_after = product(&shape[dim_index + 1..]);
439 let mut out = Vec::with_capacity(stride_before * (len_dim - 1) * stride_after);
440
441 for after in 0..stride_after {
442 let after_base = after * stride_before * len_dim;
443 for before in 0..stride_before {
444 for k in 0..(len_dim - 1) {
445 let idx0 = before + after_base + k * stride_before;
446 let idx1 = idx0 + stride_before;
447 let (re0, im0) = data[idx0];
448 let (re1, im1) = data[idx1];
449 out.push((re1 - re0, im1 - im0));
450 }
451 }
452 }
453
454 ComplexTensor::new(out, output_shape).map_err(|e| format!("diff: {e}"))
455}
456
457fn default_dimension(shape: &[usize]) -> usize {
458 shape
459 .iter()
460 .position(|&dim| dim > 1)
461 .map(|idx| idx + 1)
462 .unwrap_or(1)
463}
464
465fn dimension_length(shape: &[usize], dim: usize) -> usize {
466 let dim_index = dim.saturating_sub(1);
467 if dim_index < shape.len() {
468 shape[dim_index]
469 } else {
470 1
471 }
472}
473
474fn product(dims: &[usize]) -> usize {
475 dims.iter()
476 .copied()
477 .fold(1usize, |acc, val| acc.saturating_mul(val))
478}
479
480#[cfg(test)]
481mod tests {
482 use super::*;
483 use crate::builtins::common::test_support;
484 use runmat_builtins::{IntValue, Tensor};
485
486 #[test]
487 fn diff_row_vector_default_dimension() {
488 let tensor = Tensor::new(vec![1.0, 4.0, 9.0], vec![1, 3]).unwrap();
489 let result = diff_builtin(Value::Tensor(tensor), Vec::new()).expect("diff");
490 match result {
491 Value::Tensor(out) => {
492 assert_eq!(out.shape, vec![1, 2]);
493 assert_eq!(out.data, vec![3.0, 5.0]);
494 }
495 other => panic!("expected tensor result, got {other:?}"),
496 }
497 }
498
499 #[test]
500 fn diff_column_vector_second_order() {
501 let tensor = Tensor::new(vec![1.0, 4.0, 9.0, 16.0], vec![4, 1]).unwrap();
502 let args = vec![Value::Int(IntValue::I32(2))];
503 let result = diff_builtin(Value::Tensor(tensor), args).expect("diff");
504 match result {
505 Value::Tensor(out) => {
506 assert_eq!(out.shape, vec![2, 1]);
507 assert_eq!(out.data, vec![2.0, 2.0]);
508 }
509 other => panic!("expected tensor result, got {other:?}"),
510 }
511 }
512
513 #[test]
514 fn diff_matrix_along_columns() {
515 let tensor = Tensor::new(vec![1.0, 3.0, 5.0, 2.0, 4.0, 6.0], vec![3, 2]).unwrap();
516 let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(2))];
517 let result = diff_builtin(Value::Tensor(tensor), args).expect("diff");
518 match result {
519 Value::Tensor(out) => {
520 assert_eq!(out.shape, vec![3, 1]);
521 assert_eq!(out.data, vec![1.0, 1.0, 1.0]);
522 }
523 other => panic!("expected tensor result, got {other:?}"),
524 }
525 }
526
527 #[test]
528 fn diff_handles_empty_when_order_exceeds_dimension() {
529 let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
530 let args = vec![Value::Int(IntValue::I32(5))];
531 let result = diff_builtin(Value::Tensor(tensor), args).expect("diff");
532 match result {
533 Value::Tensor(out) => {
534 assert_eq!(out.shape[0], 0);
535 assert!(out.data.is_empty());
536 }
537 other => panic!("expected tensor result, got {other:?}"),
538 }
539 }
540
541 #[test]
542 fn diff_char_array_promotes_to_double() {
543 let chars = CharArray::new("ACEG".chars().collect(), 1, 4).unwrap();
544 let result = diff_builtin(Value::CharArray(chars), Vec::new()).expect("diff");
545 match result {
546 Value::Tensor(out) => {
547 assert_eq!(out.shape, vec![1, 3]);
548 assert_eq!(out.data, vec![2.0, 2.0, 2.0]);
549 }
550 other => panic!("expected tensor result, got {other:?}"),
551 }
552 }
553
554 #[test]
555 fn diff_complex_tensor_preserves_type() {
556 let tensor =
557 ComplexTensor::new(vec![(1.0, 1.0), (3.0, 2.0), (6.0, 5.0)], vec![1, 3]).unwrap();
558 let result = diff_builtin(Value::ComplexTensor(tensor), Vec::new()).expect("diff");
559 match result {
560 Value::ComplexTensor(out) => {
561 assert_eq!(out.shape, vec![1, 2]);
562 assert_eq!(out.data, vec![(2.0, 1.0), (3.0, 3.0)]);
563 }
564 other => panic!("expected complex tensor result, got {other:?}"),
565 }
566 }
567
568 #[test]
569 fn diff_zero_order_returns_input() {
570 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
571 let args = vec![Value::Int(IntValue::I32(0))];
572 let result = diff_builtin(Value::Tensor(tensor.clone()), args).expect("diff");
573 assert_eq!(result, Value::Tensor(tensor));
574 }
575
576 #[test]
577 fn diff_accepts_empty_order_argument() {
578 let tensor = Tensor::new(vec![1.0, 4.0, 9.0], vec![3, 1]).unwrap();
579 let baseline = diff_builtin(Value::Tensor(tensor.clone()), Vec::new()).expect("diff");
580 let empty = Tensor::new(vec![], vec![0, 0]).unwrap();
581 let result = diff_builtin(Value::Tensor(tensor), vec![Value::Tensor(empty)]).expect("diff");
582 assert_eq!(result, baseline);
583 }
584
585 #[test]
586 fn diff_accepts_empty_dimension_argument() {
587 let tensor = Tensor::new(vec![1.0, 4.0, 9.0, 16.0], vec![1, 4]).unwrap();
588 let baseline = diff_builtin(
589 Value::Tensor(tensor.clone()),
590 vec![Value::Int(IntValue::I32(1))],
591 )
592 .expect("diff");
593 let empty = Tensor::new(vec![], vec![0, 0]).unwrap();
594 let result = diff_builtin(
595 Value::Tensor(tensor),
596 vec![Value::Int(IntValue::I32(1)), Value::Tensor(empty)],
597 )
598 .expect("diff");
599 assert_eq!(result, baseline);
600 }
601
602 #[test]
603 fn diff_rejects_negative_order() {
604 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
605 let args = vec![Value::Int(IntValue::I32(-1))];
606 let err = diff_builtin(Value::Tensor(tensor), args).unwrap_err();
607 assert!(err.contains("non-negative"));
608 }
609
610 #[test]
611 fn diff_rejects_non_integer_order() {
612 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
613 let args = vec![Value::Num(1.5)];
614 let err = diff_builtin(Value::Tensor(tensor), args).unwrap_err();
615 assert!(err.contains("non-negative integer"));
616 }
617
618 #[test]
619 fn diff_rejects_invalid_dimension() {
620 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
621 let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(0))];
622 let err = diff_builtin(Value::Tensor(tensor), args).unwrap_err();
623 assert!(err.contains("dimension must be >= 1"));
624 }
625
626 #[test]
627 fn diff_gpu_provider_roundtrip() {
628 test_support::with_test_provider(|provider| {
629 let tensor = Tensor::new(vec![1.0, 4.0, 9.0], vec![3, 1]).unwrap();
630 let view = runmat_accelerate_api::HostTensorView {
631 data: &tensor.data,
632 shape: &tensor.shape,
633 };
634 let handle = provider.upload(&view).expect("upload");
635 let result = diff_builtin(Value::GpuTensor(handle), Vec::new()).expect("diff");
636 let gathered = test_support::gather(result).expect("gather");
637 assert_eq!(gathered.shape, vec![2, 1]);
638 assert_eq!(gathered.data, vec![3.0, 5.0]);
639 });
640 }
641
642 #[test]
643 #[cfg(feature = "wgpu")]
644 fn diff_wgpu_matches_cpu() {
645 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
646 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
647 );
648 let tensor = Tensor::new(vec![1.0, 4.0, 9.0, 16.0], vec![4, 1]).unwrap();
649 let args = vec![Value::Int(IntValue::I32(2))];
650
651 let cpu_result = diff_builtin(Value::Tensor(tensor.clone()), args.clone()).expect("diff");
652 let expected = match cpu_result {
653 Value::Tensor(t) => t,
654 other => panic!("expected tensor result, got {other:?}"),
655 };
656
657 let provider = runmat_accelerate_api::provider().expect("wgpu provider");
658 let view = runmat_accelerate_api::HostTensorView {
659 data: &tensor.data,
660 shape: &tensor.shape,
661 };
662 let handle = provider.upload(&view).expect("upload");
663 let gpu_value = diff_builtin(Value::GpuTensor(handle), args).expect("diff");
664 let gathered = test_support::gather(gpu_value).expect("gather");
665
666 assert_eq!(gathered.shape, expected.shape);
667 let tol = if matches!(
668 provider.precision(),
669 runmat_accelerate_api::ProviderPrecision::F32
670 ) {
671 1e-5
672 } else {
673 1e-12
674 };
675 for (a, b) in gathered.data.iter().zip(expected.data.iter()) {
676 assert!((a - b).abs() < tol, "|{a} - {b}| >= {tol}");
677 }
678 }
679
680 #[test]
681 #[cfg(feature = "doc_export")]
682 fn doc_examples_present() {
683 let blocks = test_support::doc_examples(DOC_MD);
684 assert!(!blocks.is_empty());
685 }
686}