1use crate::builtins::common::spec::{
9 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10 ProviderHook, ReductionNaN, ResidencyPolicy, ScalarType, ShapeRequirements,
11};
12use crate::builtins::common::{gpu_helpers, tensor};
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
16use runmat_accelerate_api::{GpuTensorHandle, HostTensorView, ProviderPrecision};
17use runmat_builtins::{CharArray, IntValue, Tensor, Value};
18use runmat_macros::runtime_builtin;
19
20const ERR_NO_PROVIDER: &str = "gpuArray: no acceleration provider registered";
21
22#[cfg(feature = "doc_export")]
23pub const DOC_MD: &str = r#"---
24title: "gpuArray"
25category: "acceleration/gpu"
26keywords: ["gpuArray", "gpu", "device", "upload", "accelerate", "dtype", "like", "size"]
27summary: "Move MATLAB values onto the active GPU with optional size, dtype, and prototype controls."
28references:
29 - https://www.mathworks.com/help/parallel-computing/gpuarray.html
30gpu_support:
31 elementwise: false
32 reduction: false
33 precisions: ["f32", "f64"]
34 broadcasting: "none"
35 notes: "Uploads host-resident data through the provider `upload` hook, re-uploading gpuArray inputs when dtype conversion is requested. Supports MATLAB-style size vectors, class strings, and `'like'` prototypes."
36fusion:
37 elementwise: false
38 reduction: false
39 max_inputs: 1
40 constants: "inline"
41requires_feature: null
42tested:
43 unit: "builtins::acceleration::gpu::gpuarray::tests"
44 integration: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_transfers_numeric_tensor"
45 conversions: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_casts_to_int32"
46 reshape: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_applies_size_arguments"
47 wgpu: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_wgpu_roundtrip"
48---
49
50# What does the `gpuArray` function do in MATLAB / RunMat?
51`gpuArray(X)` moves MATLAB values onto the active GPU and returns a handle that the rest of the
52runtime can execute on. RunMat mirrors MATLAB semantics, including MATLAB-style size arguments,
53explicit dtype toggles (such as `'single'`, `'int32'`, `'logical'`), and the `'like'` prototype
54syntax that matches the class of an existing array.
55
56## How does the `gpuArray` function behave in MATLAB / RunMat?
57- Accepts numeric tensors, logical arrays, booleans, character vectors, and existing gpuArray
58 handles. Other input types raise descriptive errors so callers can gather or convert first.
59- Optional leading size arguments (`gpuArray(data, m, n, ...)` or `gpuArray(data, [m n ...])`)
60 reshape the uploaded value. The element count must match the requested size.
61- Class strings such as `'single'`, `'double'`, `'int32'`, `'uint8'`, and `'logical'` convert the
62 data before upload, matching MATLAB casting semantics (round-to-nearest with saturation for
63 integers, `NaN`→0 for integer classes, and errors when converting `NaN` to logical).
64- `'like', prototype` infers the dtype (and logical state) from `prototype`. Explicit class strings
65 override the inference when both are supplied.
66- `"gpuArray"` strings are accepted as no-ops so call-sites that forward arguments from constructors
67 such as `zeros(..., 'gpuArray')` remain compatible.
68- Inputs that are already gpuArray handles pass through by default. When a class change is requested,
69 RunMat gathers the data, performs the conversion, uploads a fresh buffer, and frees the old handle.
70- When no acceleration provider is registered, the builtin raises `gpuArray: no acceleration provider
71 registered`.
72
73## `gpuArray` GPU Execution Behaviour
74`gpuArray` itself runs on the CPU. For host inputs it prepares a `HostTensorView` and forwards it to
75the provider’s `upload` hook. For gpuArray inputs that require dtype conversion, the builtin gathers
76the existing buffer, casts the result on the host, uploads a replacement, and frees the original
77handle. Providers that do not yet implement `upload` should report an informative error; the builtin
78surface mirrors MATLAB’s message by prefixing it with `gpuArray:`.
79
80## GPU residency in RunMat (Do I need `gpuArray`?)
81RunMat’s auto-offload planner transparently moves and keeps tensors on the GPU when it predicts a
82benefit. You typically call `gpuArray` to honour MATLAB scripts that opt-in explicitly, to enforce
83residency before a long computation, or when you need MATLAB-style dtype conversion alongside the
84upload. The builtin never forces a host copy once the handle has been created.
85
86## Examples of using the `gpuArray` function in MATLAB / RunMat
87
88### Moving a matrix to the GPU for elementwise work
89```matlab
90A = [1 2 3; 4 5 6];
91G = gpuArray(A);
92out = gather(sin(G));
93```
94
95Expected output:
96
97```matlab
98out =
99 2×3
100
101 0.8415 0.9093 0.1411
102 -0.7568 -0.9589 -0.2794
103```
104
105### Uploading a scalar with dtype conversion
106```matlab
107pi_single = gpuArray(pi, 'single');
108isa(pi_single, 'gpuArray');
109class(gather(pi_single));
110```
111
112Expected output:
113
114```matlab
115ans =
116 logical
117 1
118
119ans =
120 single
121```
122
123### Converting host data to a logical gpuArray
124```matlab
125mask = gpuArray([0 2 -5 0], 'logical');
126gather(mask)
127```
128
129Expected output:
130
131```matlab
132ans =
133 1×4 logical array
134
135 0 1 1 0
136```
137
138### Matching an existing prototype with `'like'`
139```matlab
140template = gpuArray(true(2, 2));
141values = gpuArray([10 20 30 40], [2 2], 'like', template);
142isequal(gather(values), logical([10 20; 30 40]))
143```
144
145Expected output:
146
147```matlab
148ans =
149 logical
150 1
151```
152
153### Reshaping during upload
154```matlab
155flat = 1:6;
156G = gpuArray(flat, 2, 3);
157size(G)
158```
159
160Expected output:
161
162```matlab
163ans =
164 2 3
165```
166
167### Calling `gpuArray` on an existing gpuArray handle
168```matlab
169G = gpuArray([1 2 3]);
170H = gpuArray(G, 'double');
171isequal(G, H)
172```
173
174Expected output:
175
176```matlab
177ans =
178 logical
179 1
180```
181
182## FAQ
183
184**Can I reshape while uploading?**
185Yes. Provide either individual dimension arguments or a size vector. The element count must match.
186
187**What class strings are supported?**
188`'double'`, `'single'`, `'logical'`, `'int8'`, `'int16'`, `'int32'`, `'int64'`, `'uint8'`, `'uint16'`,
189`'uint32'`, `'uint64'`, and the compatibility no-op `'gpuArray'`. Unknown strings raise an error.
190
191**How does `'like'` interact with explicit class strings?**
192`'like'` sets the default dtype (for example, inferring logical arrays), but any subsequent class
193string overrides that inference.
194
195**Can I promote an existing gpuArray to another class?**
196Yes. When you pass a gpuArray as the first argument, `gpuArray` reuploads the buffer if a class
197change is requested. Without a change it simply updates metadata (for example clearing logical flags).
198
199**What happens when the provider is missing?**
200The builtin raises `gpuArray: no acceleration provider registered`. Register a provider (for example,
201the in-process test provider or the WGPU backend) before uploading values.
202
203**Does `gpuArray` support complex inputs, structs, or cell arrays?**
204Not yet. Gather or decompose the data first, mirroring MATLAB’s requirement to convert to supported
205numeric or logical types.
206
207## See Also
208[gather](./gather), [gpuDevice](./gpudevice), [gpuInfo](./gpuinfo), [arrayfun](./arrayfun), [zeros](../../array/creation/zeros), [sum](../../math/reduction/sum)
209
210## Source & Feedback
211- Implementation: `crates/runmat-runtime/src/builtins/acceleration/gpu/gpuarray.rs`
212- Found a bug or behavior mismatch? Please open an issue with a minimal reproduction.
213"#;
214
215pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
216 name: "gpuArray",
217 op_kind: GpuOpKind::Custom("upload"),
218 supported_precisions: &[ScalarType::F32, ScalarType::F64],
219 broadcast: BroadcastSemantics::None,
220 provider_hooks: &[ProviderHook::Custom("upload")],
221 constant_strategy: ConstantStrategy::InlineLiteral,
222 residency: ResidencyPolicy::NewHandle,
223 nan_mode: ReductionNaN::Include,
224 two_pass_threshold: None,
225 workgroup_size: None,
226 accepts_nan_mode: false,
227 notes: "Invokes the provider `upload` hook, reuploading gpuArray inputs when dtype conversion is requested. Handles class strings, size vectors, and `'like'` prototypes.",
228};
229
230register_builtin_gpu_spec!(GPU_SPEC);
231
232pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
233 name: "gpuArray",
234 shape: ShapeRequirements::Any,
235 constant_strategy: ConstantStrategy::InlineLiteral,
236 elementwise: None,
237 reduction: None,
238 emits_nan: false,
239 notes:
240 "Acts as a residency boundary; fusion graphs never cross explicit host↔device transfers.",
241};
242
243register_builtin_fusion_spec!(FUSION_SPEC);
244
245#[cfg(feature = "doc_export")]
246register_builtin_doc_text!("gpuArray", DOC_MD);
247
248#[runtime_builtin(
249 name = "gpuArray",
250 category = "acceleration/gpu",
251 summary = "Move data to the GPU and return a gpuArray handle.",
252 keywords = "gpuArray,gpu,accelerate,upload,dtype,like",
253 examples = "G = gpuArray([1 2 3], 'single');",
254 accel = "array_construct"
255)]
256fn gpu_array_builtin(value: Value, rest: Vec<Value>) -> Result<Value, String> {
257 let options = parse_options(&rest)?;
258 let incoming_precision = match &value {
259 Value::GpuTensor(handle) => runmat_accelerate_api::handle_precision(handle),
260 _ => None,
261 };
262 let dtype = resolve_dtype(&value, &options)?;
263 let dims = options.dims.clone();
264
265 let prepared = match value {
266 Value::GpuTensor(handle) => convert_device_value(handle, dtype)?,
267 other => upload_host_value(other, dtype)?,
268 };
269
270 let mut handle = prepared.handle;
271
272 if let Some(dims) = dims.as_ref() {
273 apply_dims(&mut handle, dims)?;
274 }
275
276 let provider_precision = runmat_accelerate_api::provider()
277 .map(|p| p.precision())
278 .unwrap_or(ProviderPrecision::F64);
279 let requested_precision = match dtype {
280 DataClass::Single => Some(ProviderPrecision::F32),
281 _ => None,
282 };
283 let final_precision = requested_precision
284 .or(incoming_precision)
285 .unwrap_or(provider_precision);
286 runmat_accelerate_api::set_handle_precision(&handle, final_precision);
287
288 runmat_accelerate_api::set_handle_logical(&handle, prepared.logical);
289
290 Ok(Value::GpuTensor(handle))
291}
292
293#[derive(Clone, Copy, Debug, PartialEq, Eq)]
294enum DataClass {
295 Double,
296 Single,
297 Logical,
298 Int8,
299 Int16,
300 Int32,
301 Int64,
302 UInt8,
303 UInt16,
304 UInt32,
305 UInt64,
306}
307
308impl DataClass {
309 fn from_tag(tag: &str) -> Option<Self> {
310 match tag {
311 "double" => Some(Self::Double),
312 "single" | "float32" => Some(Self::Single),
313 "logical" | "bool" | "boolean" => Some(Self::Logical),
314 "int8" => Some(Self::Int8),
315 "int16" => Some(Self::Int16),
316 "int32" | "int" => Some(Self::Int32),
317 "int64" => Some(Self::Int64),
318 "uint8" => Some(Self::UInt8),
319 "uint16" => Some(Self::UInt16),
320 "uint32" => Some(Self::UInt32),
321 "uint64" => Some(Self::UInt64),
322 "gpuarray" => None, _ => None,
324 }
325 }
326}
327
328#[derive(Debug, Default)]
329struct ParsedOptions {
330 dims: Option<Vec<usize>>,
331 explicit_dtype: Option<DataClass>,
332 prototype: Option<Value>,
333}
334
335fn parse_options(rest: &[Value]) -> Result<ParsedOptions, String> {
336 let (index_after_dims, dims) = parse_size_arguments(rest)?;
337 let mut options = ParsedOptions {
338 dims,
339 ..ParsedOptions::default()
340 };
341
342 let mut idx = index_after_dims;
343 while idx < rest.len() {
344 let tag = value_to_lower_string(&rest[idx]).ok_or_else(|| {
345 format!(
346 "gpuArray: unexpected argument {:?}; expected a class string or the keyword 'like'",
347 rest[idx]
348 )
349 })?;
350
351 match tag.as_str() {
352 "like" => {
353 idx += 1;
354 if idx >= rest.len() {
355 return Err("gpuArray: expected a prototype value after 'like'".to_string());
356 }
357 if options.prototype.is_some() {
358 return Err("gpuArray: duplicate 'like' qualifier".to_string());
359 }
360 options.prototype = Some(rest[idx].clone());
361 }
362 "distributed" | "codistributed" => {
363 return Err("gpuArray: codistributed arrays are not supported yet".to_string());
364 }
365 tag => {
366 if let Some(class) = DataClass::from_tag(tag) {
367 if let Some(existing) = options.explicit_dtype {
368 if existing != class {
369 return Err(
370 "gpuArray: conflicting type qualifiers supplied".to_string()
371 );
372 }
373 } else {
374 options.explicit_dtype = Some(class);
375 }
376 } else if tag != "gpuarray" {
377 return Err(format!("gpuArray: unrecognised option '{tag}'"));
378 }
379 }
380 }
381
382 idx += 1;
383 }
384
385 Ok(options)
386}
387
388fn parse_size_arguments(rest: &[Value]) -> Result<(usize, Option<Vec<usize>>), String> {
389 let mut idx = 0;
390 let mut dims: Vec<usize> = Vec::new();
391 let mut vector_consumed = false;
392
393 while idx < rest.len() {
394 match &rest[idx] {
396 Value::String(_) | Value::StringArray(_) | Value::CharArray(_) => break,
397 _ => {}
398 }
399
400 match &rest[idx] {
401 Value::Int(i) => {
402 dims.push(int_to_dim(i)?);
403 }
404 Value::Num(n) => {
405 dims.push(float_to_dim(*n)?);
406 }
407 Value::Tensor(t) => {
408 if vector_consumed || !dims.is_empty() {
409 return Err(
410 "gpuArray: size vectors cannot be combined with scalar dimensions"
411 .to_string(),
412 );
413 }
414 dims = tensor_to_dims(t)?;
415 vector_consumed = true;
416 }
417 _ => break,
418 }
419 idx += 1;
420 }
421
422 let dims_option = if dims.is_empty() { None } else { Some(dims) };
423 Ok((idx, dims_option))
424}
425
426fn value_to_lower_string(value: &Value) -> Option<String> {
427 crate::builtins::common::tensor::value_to_string(value).map(|s| s.trim().to_ascii_lowercase())
428}
429
430fn int_to_dim(value: &IntValue) -> Result<usize, String> {
431 let raw = value.to_i64();
432 if raw < 0 {
433 return Err("gpuArray: size arguments must be non-negative integers".to_string());
434 }
435 Ok(raw as usize)
436}
437
438fn float_to_dim(value: f64) -> Result<usize, String> {
439 if !value.is_finite() {
440 return Err("gpuArray: size arguments must be finite integers".to_string());
441 }
442 let rounded = value.round();
443 if (rounded - value).abs() > f64::EPSILON {
444 return Err("gpuArray: size arguments must be integers".to_string());
445 }
446 if rounded < 0.0 {
447 return Err("gpuArray: size arguments must be non-negative".to_string());
448 }
449 Ok(rounded as usize)
450}
451
452fn tensor_to_dims(tensor: &Tensor) -> Result<Vec<usize>, String> {
453 let mut dims = Vec::with_capacity(tensor.data.len());
454 for value in &tensor.data {
455 dims.push(float_to_dim(*value)?);
456 }
457 Ok(dims)
458}
459
460fn resolve_dtype(value: &Value, options: &ParsedOptions) -> Result<DataClass, String> {
461 if let Some(explicit) = options.explicit_dtype {
462 return Ok(explicit);
463 }
464 if let Some(prototype) = options.prototype.as_ref() {
465 return infer_dtype_from_prototype(prototype);
466 }
467 if value_defaults_to_logical(value) {
468 return Ok(DataClass::Logical);
469 }
470 Ok(DataClass::Double)
471}
472
473fn infer_dtype_from_prototype(proto: &Value) -> Result<DataClass, String> {
474 match proto {
475 Value::GpuTensor(handle) => {
476 if runmat_accelerate_api::handle_is_logical(handle) {
477 Ok(DataClass::Logical)
478 } else {
479 Ok(DataClass::Double)
480 }
481 }
482 Value::LogicalArray(_) | Value::Bool(_) => Ok(DataClass::Logical),
483 Value::Int(int) => Ok(match int {
484 IntValue::I8(_) => DataClass::Int8,
485 IntValue::I16(_) => DataClass::Int16,
486 IntValue::I32(_) => DataClass::Int32,
487 IntValue::I64(_) => DataClass::Int64,
488 IntValue::U8(_) => DataClass::UInt8,
489 IntValue::U16(_) => DataClass::UInt16,
490 IntValue::U32(_) => DataClass::UInt32,
491 IntValue::U64(_) => DataClass::UInt64,
492 }),
493 Value::Tensor(_) | Value::Num(_) => Ok(DataClass::Double),
494 Value::CharArray(_) => Ok(DataClass::Double),
495 Value::String(_) => Err(
496 "gpuArray: 'like' does not accept MATLAB string scalars; convert to char() first".to_string(),
497 ),
498 Value::StringArray(_) => Err(
499 "gpuArray: 'like' does not accept string arrays; convert to char arrays first".to_string(),
500 ),
501 Value::Complex(_, _) | Value::ComplexTensor(_) => Err(
502 "gpuArray: complex prototypes are not supported yet; provide real-valued inputs".to_string(),
503 ),
504 other => Err(format!(
505 "gpuArray: unsupported 'like' prototype type {other:?}; expected numeric or logical values"
506 )),
507 }
508}
509
510fn value_defaults_to_logical(value: &Value) -> bool {
511 match value {
512 Value::LogicalArray(_) | Value::Bool(_) => true,
513 Value::GpuTensor(handle) => runmat_accelerate_api::handle_is_logical(handle),
514 _ => false,
515 }
516}
517
518struct PreparedHandle {
519 handle: GpuTensorHandle,
520 logical: bool,
521}
522
523fn upload_host_value(value: Value, dtype: DataClass) -> Result<PreparedHandle, String> {
524 #[cfg(all(test, feature = "wgpu"))]
525 {
526 if runmat_accelerate_api::provider().is_none() {
527 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
528 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
529 );
530 }
531 }
532 let provider = runmat_accelerate_api::provider().ok_or_else(|| ERR_NO_PROVIDER.to_string())?;
533 let tensor = coerce_host_value(value)?;
534 let (mut tensor, logical) = cast_tensor(tensor, dtype)?;
535
536 let view = HostTensorView {
538 data: &tensor.data,
539 shape: &tensor.shape,
540 };
541 let handle = provider
542 .upload(&view)
543 .map_err(|err| format!("gpuArray: {err}"))?;
544
545 tensor.data.clear();
547
548 Ok(PreparedHandle { handle, logical })
549}
550
551fn convert_device_value(
552 handle: GpuTensorHandle,
553 dtype: DataClass,
554) -> Result<PreparedHandle, String> {
555 #[cfg(all(test, feature = "wgpu"))]
556 {
557 if handle.device_id != 0 {
558 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
559 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
560 );
561 }
562 }
563 match dtype {
564 DataClass::Logical => {
565 if runmat_accelerate_api::handle_is_logical(&handle) {
566 return Ok(PreparedHandle {
567 handle,
568 logical: true,
569 });
570 }
571 }
572 DataClass::Double => {
573 if runmat_accelerate_api::handle_is_logical(&handle) {
574 runmat_accelerate_api::set_handle_logical(&handle, false);
575 }
576 return Ok(PreparedHandle {
577 handle,
578 logical: false,
579 });
580 }
581 _ => {}
582 }
583
584 let provider = runmat_accelerate_api::provider().ok_or_else(|| ERR_NO_PROVIDER.to_string())?;
585 let tensor = gpu_helpers::gather_tensor(&handle).map_err(|err| format!("gpuArray: {err}"))?;
586 let (mut tensor, logical) = cast_tensor(tensor, dtype)?;
587
588 let view = HostTensorView {
589 data: &tensor.data,
590 shape: &tensor.shape,
591 };
592 let new_handle = provider
593 .upload(&view)
594 .map_err(|err| format!("gpuArray: {err}"))?;
595
596 provider.free(&handle).ok();
597 tensor.data.clear();
598
599 Ok(PreparedHandle {
600 handle: new_handle,
601 logical,
602 })
603}
604
605fn coerce_host_value(value: Value) -> Result<Tensor, String> {
606 match value {
607 Value::Tensor(t) => Ok(t),
608 Value::LogicalArray(logical) => tensor::logical_to_tensor(&logical)
609 .map_err(|err| format!("gpuArray: {err}")),
610 Value::Bool(flag) => Tensor::new(vec![if flag { 1.0 } else { 0.0 }], vec![1, 1])
611 .map_err(|err| format!("gpuArray: {err}")),
612 Value::Num(n) => Tensor::new(vec![n], vec![1, 1]).map_err(|err| format!("gpuArray: {err}")),
613 Value::Int(i) => Tensor::new(vec![i.to_f64()], vec![1, 1])
614 .map_err(|err| format!("gpuArray: {err}")),
615 Value::CharArray(ca) => char_array_to_tensor(&ca),
616 Value::String(text) => {
617 let ca = CharArray::new_row(&text);
618 char_array_to_tensor(&ca)
619 }
620 Value::StringArray(_) => Err(
621 "gpuArray: string arrays are not supported yet; convert to char arrays with CHAR first"
622 .to_string(),
623 ),
624 Value::Complex(_, _) | Value::ComplexTensor(_) => Err(
625 "gpuArray: complex inputs are not supported yet; split real and imaginary parts before uploading"
626 .to_string(),
627 ),
628 other => Err(format!(
629 "gpuArray: unsupported input type for GPU transfer: {other:?}"
630 )),
631 }
632}
633
634fn cast_tensor(mut tensor: Tensor, dtype: DataClass) -> Result<(Tensor, bool), String> {
635 let logical = match dtype {
636 DataClass::Logical => {
637 convert_to_logical(&mut tensor.data)?;
638 true
639 }
640 DataClass::Single => {
641 convert_to_single(&mut tensor.data);
642 false
643 }
644 DataClass::Int8 => {
645 convert_to_int_range(&mut tensor.data, i8::MIN as f64, i8::MAX as f64);
646 false
647 }
648 DataClass::Int16 => {
649 convert_to_int_range(&mut tensor.data, i16::MIN as f64, i16::MAX as f64);
650 false
651 }
652 DataClass::Int32 => {
653 convert_to_int_range(&mut tensor.data, i32::MIN as f64, i32::MAX as f64);
654 false
655 }
656 DataClass::Int64 => {
657 convert_to_int_range(&mut tensor.data, i64::MIN as f64, i64::MAX as f64);
658 false
659 }
660 DataClass::UInt8 => {
661 convert_to_int_range(&mut tensor.data, 0.0, u8::MAX as f64);
662 false
663 }
664 DataClass::UInt16 => {
665 convert_to_int_range(&mut tensor.data, 0.0, u16::MAX as f64);
666 false
667 }
668 DataClass::UInt32 => {
669 convert_to_int_range(&mut tensor.data, 0.0, u32::MAX as f64);
670 false
671 }
672 DataClass::UInt64 => {
673 convert_to_int_range(&mut tensor.data, 0.0, u64::MAX as f64);
674 false
675 }
676 DataClass::Double => false,
677 };
678
679 Ok((tensor, logical))
680}
681
682fn convert_to_logical(data: &mut [f64]) -> Result<(), String> {
683 for value in data.iter_mut() {
684 if value.is_nan() {
685 return Err("gpuArray: cannot convert NaN to logical".to_string());
686 }
687 *value = if *value != 0.0 { 1.0 } else { 0.0 };
688 }
689 Ok(())
690}
691
692fn convert_to_single(data: &mut [f64]) {
693 for value in data.iter_mut() {
694 *value = (*value as f32) as f64;
695 }
696}
697
698fn convert_to_int_range(data: &mut [f64], min: f64, max: f64) {
699 for value in data.iter_mut() {
700 if value.is_nan() {
701 *value = min;
702 continue;
703 }
704 if value.is_infinite() {
705 *value = if value.is_sign_negative() { min } else { max };
706 continue;
707 }
708 let rounded = value.round();
709 *value = rounded.clamp(min, max);
710 }
711}
712
713fn apply_dims(handle: &mut GpuTensorHandle, dims: &[usize]) -> Result<(), String> {
714 let new_elems: usize = dims.iter().product();
715 let current_elems: usize = if handle.shape.is_empty() {
716 new_elems
717 } else {
718 handle.shape.iter().product()
719 };
720 if new_elems != current_elems {
721 return Err(format!(
722 "gpuArray: cannot reshape gpuArray of {current_elems} elements into size {:?}",
723 dims
724 ));
725 }
726 handle.shape = dims.to_vec();
727 Ok(())
728}
729
730fn char_array_to_tensor(ca: &CharArray) -> Result<Tensor, String> {
731 let rows = ca.rows;
732 let cols = ca.cols;
733 if rows == 0 || cols == 0 {
734 return Tensor::new(Vec::new(), vec![rows, cols]).map_err(|err| format!("gpuArray: {err}"));
735 }
736 let mut data = vec![0.0; rows * cols];
737 for row in 0..rows {
739 for col in 0..cols {
740 let idx_char = row * cols + col;
741 let ch = ca.data[idx_char];
742 data[row * cols + col] = ch as u32 as f64;
743 }
744 }
745 Tensor::new(data, vec![rows, cols]).map_err(|err| format!("gpuArray: {err}"))
746}
747
748#[cfg(test)]
749mod tests {
750 use super::*;
751 use crate::builtins::common::test_support;
752 use runmat_accelerate_api::HostTensorView;
753 use runmat_builtins::{IntValue, LogicalArray};
754
755 fn call(value: Value, rest: Vec<Value>) -> Result<Value, String> {
756 gpu_array_builtin(value, rest)
757 }
758
759 #[test]
760 fn gpu_array_transfers_numeric_tensor() {
761 test_support::with_test_provider(|_| {
762 let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![2, 2]).unwrap();
763 let result = call(Value::Tensor(tensor.clone()), Vec::new()).expect("gpuArray upload");
764 let Value::GpuTensor(handle) = result else {
765 panic!("expected gpu tensor");
766 };
767 assert_eq!(handle.shape, tensor.shape);
768 let gathered =
769 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather values");
770 assert_eq!(gathered.shape, tensor.shape);
771 assert_eq!(gathered.data, tensor.data);
772 });
773 }
774
775 #[test]
776 fn gpu_array_marks_logical_inputs() {
777 test_support::with_test_provider(|_| {
778 let logical =
779 LogicalArray::new(vec![1, 0, 1, 1], vec![2, 2]).expect("logical construction");
780 let result =
781 call(Value::LogicalArray(logical.clone()), Vec::new()).expect("gpuArray logical");
782 let Value::GpuTensor(handle) = result else {
783 panic!("expected gpu tensor");
784 };
785 assert!(runmat_accelerate_api::handle_is_logical(&handle));
786 let gathered =
787 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather logical");
788 assert_eq!(gathered.shape, logical.shape);
789 assert_eq!(gathered.data, vec![1.0, 0.0, 1.0, 1.0]);
790 });
791 }
792
793 #[test]
794 fn gpu_array_handles_scalar_bool() {
795 test_support::with_test_provider(|_| {
796 let result = call(Value::Bool(true), Vec::new()).expect("gpuArray bool");
797 let Value::GpuTensor(handle) = result else {
798 panic!("expected gpu tensor");
799 };
800 assert!(runmat_accelerate_api::handle_is_logical(&handle));
801 let gathered =
802 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather bool");
803 assert_eq!(gathered.shape, vec![1, 1]);
804 assert_eq!(gathered.data, vec![1.0]);
805 });
806 }
807
808 #[test]
809 fn gpu_array_supports_char_arrays() {
810 test_support::with_test_provider(|_| {
811 let chars = CharArray::new("row1row2".chars().collect(), 2, 4).unwrap();
812 let original: Vec<char> = chars.data.clone();
813 let result =
814 call(Value::CharArray(chars), Vec::new()).expect("gpuArray char array upload");
815 let Value::GpuTensor(handle) = result else {
816 panic!("expected gpu tensor");
817 };
818 let gathered =
819 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather chars");
820 assert_eq!(gathered.shape, vec![2, 4]);
821 let mut recovered = Vec::new();
822 for col in 0..4 {
823 for row in 0..2 {
824 let idx = row + col * 2;
825 let code = gathered.data[idx];
826 let ch = char::from_u32(code as u32)
827 .expect("valid unicode scalar from numeric code");
828 recovered.push(ch);
829 }
830 }
831 assert_eq!(recovered, original);
832 });
833 }
834
835 #[test]
836 fn gpu_array_converts_strings() {
837 test_support::with_test_provider(|_| {
838 let result = call(Value::String("gpu".into()), Vec::new()).expect("gpuArray string");
839 let Value::GpuTensor(handle) = result else {
840 panic!("expected gpu tensor");
841 };
842 let gathered =
843 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather string");
844 assert_eq!(gathered.shape, vec![1, 3]);
845 let expected: Vec<f64> = "gpu".chars().map(|ch| ch as u32 as f64).collect();
846 assert_eq!(gathered.data, expected);
847 });
848 }
849
850 #[test]
851 fn gpu_array_passthrough_existing_handle() {
852 test_support::with_test_provider(|provider| {
853 let tensor = Tensor::new(vec![5.0, 6.0], vec![2, 1]).unwrap();
854 let view = HostTensorView {
855 data: &tensor.data,
856 shape: &tensor.shape,
857 };
858 let handle = provider.upload(&view).expect("upload");
859 let cloned = handle.clone();
860 let result =
861 call(Value::GpuTensor(handle.clone()), Vec::new()).expect("gpuArray passthrough");
862 let Value::GpuTensor(returned) = result else {
863 panic!("expected gpu tensor");
864 };
865 assert_eq!(returned.buffer_id, cloned.buffer_id);
866 assert_eq!(returned.shape, cloned.shape);
867 });
868 }
869
870 #[test]
871 fn gpu_array_casts_to_int32() {
872 test_support::with_test_provider(|_| {
873 let tensor = Tensor::new(vec![1.2, -3.7, 123456.0], vec![3, 1]).unwrap();
874 let result =
875 call(Value::Tensor(tensor), vec![Value::from("int32")]).expect("gpuArray int32");
876 let Value::GpuTensor(handle) = result else {
877 panic!("expected gpu tensor");
878 };
879 let gathered =
880 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather int32");
881 assert_eq!(gathered.data, vec![1.0, -4.0, 123456.0]);
882 });
883 }
884
885 #[test]
886 fn gpu_array_casts_to_uint8() {
887 test_support::with_test_provider(|_| {
888 let tensor = Tensor::new(vec![-12.0, 12.8, 300.4, f64::INFINITY], vec![4, 1]).unwrap();
889 let result =
890 call(Value::Tensor(tensor), vec![Value::from("uint8")]).expect("gpuArray uint8");
891 let Value::GpuTensor(handle) = result else {
892 panic!("expected gpu tensor");
893 };
894 let gathered =
895 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather uint8");
896 assert_eq!(gathered.data, vec![0.0, 13.0, 255.0, 255.0]);
897 });
898 }
899
900 #[test]
901 fn gpu_array_single_precision_rounds() {
902 test_support::with_test_provider(|_| {
903 let tensor = Tensor::new(vec![1.23456789, -9.87654321], vec![2, 1]).unwrap();
904 let result =
905 call(Value::Tensor(tensor), vec![Value::from("single")]).expect("gpuArray single");
906 let Value::GpuTensor(handle) = result else {
907 panic!("expected gpu tensor");
908 };
909 let gathered =
910 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather single");
911 let expected = [1.234_567_9_f32 as f64, (-9.876_543_f32) as f64];
912 for (observed, expected) in gathered.data.iter().zip(expected.iter()) {
913 assert!((observed - expected).abs() < 1e-6);
914 }
915 });
916 }
917
918 #[test]
919 fn gpu_array_like_infers_logical() {
920 test_support::with_test_provider(|_| {
921 let tensor = Tensor::new(vec![0.0, 2.0, -3.0], vec![3, 1]).unwrap();
922 let logical_proto =
923 LogicalArray::new(vec![0, 1, 0], vec![3, 1]).expect("logical proto");
924 let result = call(
925 Value::Tensor(tensor),
926 vec![Value::from("like"), Value::LogicalArray(logical_proto)],
927 )
928 .expect("gpuArray like logical");
929 let Value::GpuTensor(handle) = result else {
930 panic!("expected gpu tensor");
931 };
932 assert!(runmat_accelerate_api::handle_is_logical(&handle));
933 let gathered = test_support::gather(Value::GpuTensor(handle.clone())).expect("gather");
934 assert_eq!(gathered.data, vec![0.0, 1.0, 1.0]);
935 });
936 }
937
938 #[test]
939 fn gpu_array_like_requires_argument() {
940 test_support::with_test_provider(|_| {
941 let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
942 let err = call(Value::Tensor(tensor), vec![Value::from("like")]).unwrap_err();
943 assert!(err.contains("expected a prototype value"));
944 });
945 }
946
947 #[test]
948 fn gpu_array_unknown_option_errors() {
949 test_support::with_test_provider(|_| {
950 let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
951 let err = call(Value::Tensor(tensor), vec![Value::from("mystery")]).unwrap_err();
952 assert!(err.contains("unrecognised option"));
953 });
954 }
955
956 #[test]
957 fn gpu_array_gpu_to_logical_reuploads() {
958 test_support::with_test_provider(|provider| {
959 let tensor = Tensor::new(vec![2.0, 0.0, -5.5], vec![3, 1]).unwrap();
960 let view = HostTensorView {
961 data: &tensor.data,
962 shape: &tensor.shape,
963 };
964 let handle = provider.upload(&view).expect("upload");
965 let result = call(
966 Value::GpuTensor(handle.clone()),
967 vec![Value::from("logical")],
968 )
969 .expect("gpuArray logical cast");
970 let Value::GpuTensor(new_handle) = result else {
971 panic!("expected gpu tensor");
972 };
973 assert!(runmat_accelerate_api::handle_is_logical(&new_handle));
974 let gathered =
975 test_support::gather(Value::GpuTensor(new_handle.clone())).expect("gather");
976 assert_eq!(gathered.data, vec![1.0, 0.0, 1.0]);
977 provider.free(&handle).ok();
978 provider.free(&new_handle).ok();
979 });
980 }
981
982 #[test]
983 fn gpu_array_gpu_logical_to_double_clears_flag() {
984 test_support::with_test_provider(|provider| {
985 let tensor = Tensor::new(vec![1.0, 0.0], vec![2, 1]).unwrap();
986 let view = HostTensorView {
987 data: &tensor.data,
988 shape: &tensor.shape,
989 };
990 let handle = provider.upload(&view).expect("upload");
991 runmat_accelerate_api::set_handle_logical(&handle, true);
992 let result = call(
993 Value::GpuTensor(handle.clone()),
994 vec![Value::from("double")],
995 )
996 .expect("gpuArray double cast");
997 let Value::GpuTensor(new_handle) = result else {
998 panic!("expected gpu tensor");
999 };
1000 assert!(!runmat_accelerate_api::handle_is_logical(&new_handle));
1001 provider.free(&handle).ok();
1002 provider.free(&new_handle).ok();
1003 });
1004 }
1005
1006 #[test]
1007 fn gpu_array_applies_size_arguments() {
1008 test_support::with_test_provider(|_| {
1009 let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![4, 1]).unwrap();
1010 let result = call(
1011 Value::Tensor(tensor),
1012 vec![Value::from(2i32), Value::from(2i32)],
1013 )
1014 .expect("gpuArray reshape");
1015 let Value::GpuTensor(handle) = result else {
1016 panic!("expected gpu tensor");
1017 };
1018 assert_eq!(handle.shape, vec![2, 2]);
1019 });
1020 }
1021
1022 #[test]
1023 fn gpu_array_gpu_size_arguments_update_shape() {
1024 test_support::with_test_provider(|provider| {
1025 let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![4, 1]).unwrap();
1026 let view = HostTensorView {
1027 data: &tensor.data,
1028 shape: &tensor.shape,
1029 };
1030 let handle = provider.upload(&view).expect("upload");
1031 let result = call(
1032 Value::GpuTensor(handle.clone()),
1033 vec![Value::from(2i32), Value::from(2i32)],
1034 )
1035 .expect("gpuArray gpu reshape");
1036 let Value::GpuTensor(new_handle) = result else {
1037 panic!("expected gpu tensor");
1038 };
1039 assert_eq!(new_handle.shape, vec![2, 2]);
1040 provider.free(&handle).ok();
1041 provider.free(&new_handle).ok();
1042 });
1043 }
1044
1045 #[test]
1046 fn gpu_array_size_mismatch_errors() {
1047 test_support::with_test_provider(|_| {
1048 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
1049 let err = call(
1050 Value::Tensor(tensor),
1051 vec![Value::from(2i32), Value::from(2i32)],
1052 )
1053 .unwrap_err();
1054 assert!(err.contains("cannot reshape"));
1055 });
1056 }
1057
1058 #[test]
1059 #[cfg(feature = "wgpu")]
1060 fn gpu_array_wgpu_roundtrip() {
1061 use runmat_accelerate_api::AccelProvider;
1062
1063 match runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1064 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1065 ) {
1066 Ok(provider) => {
1067 let tensor = Tensor::new(vec![1.0, 2.5, 3.5], vec![3, 1]).unwrap();
1068 let result = call(Value::Tensor(tensor.clone()), vec![Value::from("int32")])
1069 .expect("wgpu upload");
1070 let Value::GpuTensor(handle) = result else {
1071 panic!("expected gpu tensor");
1072 };
1073 let gathered =
1074 test_support::gather(Value::GpuTensor(handle.clone())).expect("wgpu gather");
1075 assert_eq!(gathered.shape, vec![3, 1]);
1076 assert_eq!(gathered.data, vec![1.0, 3.0, 4.0]);
1077 provider.free(&handle).ok();
1078 }
1079 Err(err) => {
1080 eprintln!("Skipping gpu_array_wgpu_roundtrip: {err}");
1081 }
1082 }
1083 runmat_accelerate::simple_provider::register_inprocess_provider();
1084 }
1085
1086 #[test]
1087 #[cfg(feature = "doc_export")]
1088 fn doc_examples_present() {
1089 let blocks = test_support::doc_examples(DOC_MD);
1090 assert!(!blocks.is_empty());
1091 }
1092
1093 #[test]
1094 fn gpu_array_accepts_int_scalars() {
1095 test_support::with_test_provider(|_| {
1096 let value = Value::Int(IntValue::I32(7));
1097 let result = call(value, Vec::new()).expect("gpuArray int");
1098 let Value::GpuTensor(handle) = result else {
1099 panic!("expected gpu tensor");
1100 };
1101 let gathered =
1102 test_support::gather(Value::GpuTensor(handle.clone())).expect("gather int");
1103 assert_eq!(gathered.shape, vec![1, 1]);
1104 assert_eq!(gathered.data, vec![7.0]);
1105 });
1106 }
1107}