runmat_runtime/builtins/acceleration/gpu/
gpuarray.rs

1//! MATLAB-compatible `gpuArray` builtin that uploads host data to the active accelerator.
2//!
3//! The implementation mirrors MathWorks MATLAB semantics, including optional
4//! size arguments, `'like'` prototypes, and explicit dtype toggles. When no
5//! acceleration provider is registered the builtin surfaces a MATLAB-style
6//! error, ensuring callers know residency could not be established.
7
8use crate::builtins::common::spec::{
9    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10    ProviderHook, ReductionNaN, ResidencyPolicy, ScalarType, ShapeRequirements,
11};
12use crate::builtins::common::{gpu_helpers, tensor};
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{register_builtin_fusion_spec, register_builtin_gpu_spec};
16use runmat_accelerate_api::{GpuTensorHandle, HostTensorView, ProviderPrecision};
17use runmat_builtins::{CharArray, IntValue, Tensor, Value};
18use runmat_macros::runtime_builtin;
19
20const ERR_NO_PROVIDER: &str = "gpuArray: no acceleration provider registered";
21
22#[cfg(feature = "doc_export")]
23pub const DOC_MD: &str = r#"---
24title: "gpuArray"
25category: "acceleration/gpu"
26keywords: ["gpuArray", "gpu", "device", "upload", "accelerate", "dtype", "like", "size"]
27summary: "Move MATLAB values onto the active GPU with optional size, dtype, and prototype controls."
28references:
29  - https://www.mathworks.com/help/parallel-computing/gpuarray.html
30gpu_support:
31  elementwise: false
32  reduction: false
33  precisions: ["f32", "f64"]
34  broadcasting: "none"
35  notes: "Uploads host-resident data through the provider `upload` hook, re-uploading gpuArray inputs when dtype conversion is requested. Supports MATLAB-style size vectors, class strings, and `'like'` prototypes."
36fusion:
37  elementwise: false
38  reduction: false
39  max_inputs: 1
40  constants: "inline"
41requires_feature: null
42tested:
43  unit: "builtins::acceleration::gpu::gpuarray::tests"
44  integration: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_transfers_numeric_tensor"
45  conversions: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_casts_to_int32"
46  reshape: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_applies_size_arguments"
47  wgpu: "builtins::acceleration::gpu::gpuarray::tests::gpu_array_wgpu_roundtrip"
48---
49
50# What does the `gpuArray` function do in MATLAB / RunMat?
51`gpuArray(X)` moves MATLAB values onto the active GPU and returns a handle that the rest of the
52runtime can execute on. RunMat mirrors MATLAB semantics, including MATLAB-style size arguments,
53explicit dtype toggles (such as `'single'`, `'int32'`, `'logical'`), and the `'like'` prototype
54syntax that matches the class of an existing array.
55
56## How does the `gpuArray` function behave in MATLAB / RunMat?
57- Accepts numeric tensors, logical arrays, booleans, character vectors, and existing gpuArray
58  handles. Other input types raise descriptive errors so callers can gather or convert first.
59- Optional leading size arguments (`gpuArray(data, m, n, ...)` or `gpuArray(data, [m n ...])`)
60  reshape the uploaded value. The element count must match the requested size.
61- Class strings such as `'single'`, `'double'`, `'int32'`, `'uint8'`, and `'logical'` convert the
62  data before upload, matching MATLAB casting semantics (round-to-nearest with saturation for
63  integers, `NaN`→0 for integer classes, and errors when converting `NaN` to logical).
64- `'like', prototype` infers the dtype (and logical state) from `prototype`. Explicit class strings
65  override the inference when both are supplied.
66- `"gpuArray"` strings are accepted as no-ops so call-sites that forward arguments from constructors
67  such as `zeros(..., 'gpuArray')` remain compatible.
68- Inputs that are already gpuArray handles pass through by default. When a class change is requested,
69  RunMat gathers the data, performs the conversion, uploads a fresh buffer, and frees the old handle.
70- When no acceleration provider is registered, the builtin raises `gpuArray: no acceleration provider
71  registered`.
72
73## `gpuArray` GPU Execution Behaviour
74`gpuArray` itself runs on the CPU. For host inputs it prepares a `HostTensorView` and forwards it to
75the provider’s `upload` hook. For gpuArray inputs that require dtype conversion, the builtin gathers
76the existing buffer, casts the result on the host, uploads a replacement, and frees the original
77handle. Providers that do not yet implement `upload` should report an informative error; the builtin
78surface mirrors MATLAB’s message by prefixing it with `gpuArray:`.
79
80## GPU residency in RunMat (Do I need `gpuArray`?)
81RunMat’s auto-offload planner transparently moves and keeps tensors on the GPU when it predicts a
82benefit. You typically call `gpuArray` to honour MATLAB scripts that opt-in explicitly, to enforce
83residency before a long computation, or when you need MATLAB-style dtype conversion alongside the
84upload. The builtin never forces a host copy once the handle has been created.
85
86## Examples of using the `gpuArray` function in MATLAB / RunMat
87
88### Moving a matrix to the GPU for elementwise work
89```matlab
90A = [1 2 3; 4 5 6];
91G = gpuArray(A);
92out = gather(sin(G));
93```
94
95Expected output:
96
97```matlab
98out =
99  2×3
100
101    0.8415    0.9093    0.1411
102   -0.7568   -0.9589   -0.2794
103```
104
105### Uploading a scalar with dtype conversion
106```matlab
107pi_single = gpuArray(pi, 'single');
108isa(pi_single, 'gpuArray');
109class(gather(pi_single));
110```
111
112Expected output:
113
114```matlab
115ans =
116  logical
117     1
118
119ans =
120  single
121```
122
123### Converting host data to a logical gpuArray
124```matlab
125mask = gpuArray([0 2 -5 0], 'logical');
126gather(mask)
127```
128
129Expected output:
130
131```matlab
132ans =
133  1×4 logical array
134
135   0   1   1   0
136```
137
138### Matching an existing prototype with `'like'`
139```matlab
140template = gpuArray(true(2, 2));
141values = gpuArray([10 20 30 40], [2 2], 'like', template);
142isequal(gather(values), logical([10 20; 30 40]))
143```
144
145Expected output:
146
147```matlab
148ans =
149  logical
150     1
151```
152
153### Reshaping during upload
154```matlab
155flat = 1:6;
156G = gpuArray(flat, 2, 3);
157size(G)
158```
159
160Expected output:
161
162```matlab
163ans =
164     2     3
165```
166
167### Calling `gpuArray` on an existing gpuArray handle
168```matlab
169G = gpuArray([1 2 3]);
170H = gpuArray(G, 'double');
171isequal(G, H)
172```
173
174Expected output:
175
176```matlab
177ans =
178  logical
179     1
180```
181
182## FAQ
183
184**Can I reshape while uploading?**  
185Yes. Provide either individual dimension arguments or a size vector. The element count must match.
186
187**What class strings are supported?**  
188`'double'`, `'single'`, `'logical'`, `'int8'`, `'int16'`, `'int32'`, `'int64'`, `'uint8'`, `'uint16'`,
189`'uint32'`, `'uint64'`, and the compatibility no-op `'gpuArray'`. Unknown strings raise an error.
190
191**How does `'like'` interact with explicit class strings?**  
192`'like'` sets the default dtype (for example, inferring logical arrays), but any subsequent class
193string overrides that inference.
194
195**Can I promote an existing gpuArray to another class?**  
196Yes. When you pass a gpuArray as the first argument, `gpuArray` reuploads the buffer if a class
197change is requested. Without a change it simply updates metadata (for example clearing logical flags).
198
199**What happens when the provider is missing?**  
200The builtin raises `gpuArray: no acceleration provider registered`. Register a provider (for example,
201the in-process test provider or the WGPU backend) before uploading values.
202
203**Does `gpuArray` support complex inputs, structs, or cell arrays?**  
204Not yet. Gather or decompose the data first, mirroring MATLAB’s requirement to convert to supported
205numeric or logical types.
206
207## See Also
208[gather](./gather), [gpuDevice](./gpudevice), [gpuInfo](./gpuinfo), [arrayfun](./arrayfun), [zeros](../../array/creation/zeros), [sum](../../math/reduction/sum)
209
210## Source & Feedback
211- Implementation: `crates/runmat-runtime/src/builtins/acceleration/gpu/gpuarray.rs`
212- Found a bug or behavior mismatch? Please open an issue with a minimal reproduction.
213"#;
214
215pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
216    name: "gpuArray",
217    op_kind: GpuOpKind::Custom("upload"),
218    supported_precisions: &[ScalarType::F32, ScalarType::F64],
219    broadcast: BroadcastSemantics::None,
220    provider_hooks: &[ProviderHook::Custom("upload")],
221    constant_strategy: ConstantStrategy::InlineLiteral,
222    residency: ResidencyPolicy::NewHandle,
223    nan_mode: ReductionNaN::Include,
224    two_pass_threshold: None,
225    workgroup_size: None,
226    accepts_nan_mode: false,
227    notes: "Invokes the provider `upload` hook, reuploading gpuArray inputs when dtype conversion is requested. Handles class strings, size vectors, and `'like'` prototypes.",
228};
229
230register_builtin_gpu_spec!(GPU_SPEC);
231
232pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
233    name: "gpuArray",
234    shape: ShapeRequirements::Any,
235    constant_strategy: ConstantStrategy::InlineLiteral,
236    elementwise: None,
237    reduction: None,
238    emits_nan: false,
239    notes:
240        "Acts as a residency boundary; fusion graphs never cross explicit host↔device transfers.",
241};
242
243register_builtin_fusion_spec!(FUSION_SPEC);
244
245#[cfg(feature = "doc_export")]
246register_builtin_doc_text!("gpuArray", DOC_MD);
247
248#[runtime_builtin(
249    name = "gpuArray",
250    category = "acceleration/gpu",
251    summary = "Move data to the GPU and return a gpuArray handle.",
252    keywords = "gpuArray,gpu,accelerate,upload,dtype,like",
253    examples = "G = gpuArray([1 2 3], 'single');",
254    accel = "array_construct"
255)]
256fn gpu_array_builtin(value: Value, rest: Vec<Value>) -> Result<Value, String> {
257    let options = parse_options(&rest)?;
258    let incoming_precision = match &value {
259        Value::GpuTensor(handle) => runmat_accelerate_api::handle_precision(handle),
260        _ => None,
261    };
262    let dtype = resolve_dtype(&value, &options)?;
263    let dims = options.dims.clone();
264
265    let prepared = match value {
266        Value::GpuTensor(handle) => convert_device_value(handle, dtype)?,
267        other => upload_host_value(other, dtype)?,
268    };
269
270    let mut handle = prepared.handle;
271
272    if let Some(dims) = dims.as_ref() {
273        apply_dims(&mut handle, dims)?;
274    }
275
276    let provider_precision = runmat_accelerate_api::provider()
277        .map(|p| p.precision())
278        .unwrap_or(ProviderPrecision::F64);
279    let requested_precision = match dtype {
280        DataClass::Single => Some(ProviderPrecision::F32),
281        _ => None,
282    };
283    let final_precision = requested_precision
284        .or(incoming_precision)
285        .unwrap_or(provider_precision);
286    runmat_accelerate_api::set_handle_precision(&handle, final_precision);
287
288    runmat_accelerate_api::set_handle_logical(&handle, prepared.logical);
289
290    Ok(Value::GpuTensor(handle))
291}
292
293#[derive(Clone, Copy, Debug, PartialEq, Eq)]
294enum DataClass {
295    Double,
296    Single,
297    Logical,
298    Int8,
299    Int16,
300    Int32,
301    Int64,
302    UInt8,
303    UInt16,
304    UInt32,
305    UInt64,
306}
307
308impl DataClass {
309    fn from_tag(tag: &str) -> Option<Self> {
310        match tag {
311            "double" => Some(Self::Double),
312            "single" | "float32" => Some(Self::Single),
313            "logical" | "bool" | "boolean" => Some(Self::Logical),
314            "int8" => Some(Self::Int8),
315            "int16" => Some(Self::Int16),
316            "int32" | "int" => Some(Self::Int32),
317            "int64" => Some(Self::Int64),
318            "uint8" => Some(Self::UInt8),
319            "uint16" => Some(Self::UInt16),
320            "uint32" => Some(Self::UInt32),
321            "uint64" => Some(Self::UInt64),
322            "gpuarray" => None, // compatibility no-op
323            _ => None,
324        }
325    }
326}
327
328#[derive(Debug, Default)]
329struct ParsedOptions {
330    dims: Option<Vec<usize>>,
331    explicit_dtype: Option<DataClass>,
332    prototype: Option<Value>,
333}
334
335fn parse_options(rest: &[Value]) -> Result<ParsedOptions, String> {
336    let (index_after_dims, dims) = parse_size_arguments(rest)?;
337    let mut options = ParsedOptions {
338        dims,
339        ..ParsedOptions::default()
340    };
341
342    let mut idx = index_after_dims;
343    while idx < rest.len() {
344        let tag = value_to_lower_string(&rest[idx]).ok_or_else(|| {
345            format!(
346                "gpuArray: unexpected argument {:?}; expected a class string or the keyword 'like'",
347                rest[idx]
348            )
349        })?;
350
351        match tag.as_str() {
352            "like" => {
353                idx += 1;
354                if idx >= rest.len() {
355                    return Err("gpuArray: expected a prototype value after 'like'".to_string());
356                }
357                if options.prototype.is_some() {
358                    return Err("gpuArray: duplicate 'like' qualifier".to_string());
359                }
360                options.prototype = Some(rest[idx].clone());
361            }
362            "distributed" | "codistributed" => {
363                return Err("gpuArray: codistributed arrays are not supported yet".to_string());
364            }
365            tag => {
366                if let Some(class) = DataClass::from_tag(tag) {
367                    if let Some(existing) = options.explicit_dtype {
368                        if existing != class {
369                            return Err(
370                                "gpuArray: conflicting type qualifiers supplied".to_string()
371                            );
372                        }
373                    } else {
374                        options.explicit_dtype = Some(class);
375                    }
376                } else if tag != "gpuarray" {
377                    return Err(format!("gpuArray: unrecognised option '{tag}'"));
378                }
379            }
380        }
381
382        idx += 1;
383    }
384
385    Ok(options)
386}
387
388fn parse_size_arguments(rest: &[Value]) -> Result<(usize, Option<Vec<usize>>), String> {
389    let mut idx = 0;
390    let mut dims: Vec<usize> = Vec::new();
391    let mut vector_consumed = false;
392
393    while idx < rest.len() {
394        // Stop at textual qualifiers only; numeric values continue parsing as size args.
395        match &rest[idx] {
396            Value::String(_) | Value::StringArray(_) | Value::CharArray(_) => break,
397            _ => {}
398        }
399
400        match &rest[idx] {
401            Value::Int(i) => {
402                dims.push(int_to_dim(i)?);
403            }
404            Value::Num(n) => {
405                dims.push(float_to_dim(*n)?);
406            }
407            Value::Tensor(t) => {
408                if vector_consumed || !dims.is_empty() {
409                    return Err(
410                        "gpuArray: size vectors cannot be combined with scalar dimensions"
411                            .to_string(),
412                    );
413                }
414                dims = tensor_to_dims(t)?;
415                vector_consumed = true;
416            }
417            _ => break,
418        }
419        idx += 1;
420    }
421
422    let dims_option = if dims.is_empty() { None } else { Some(dims) };
423    Ok((idx, dims_option))
424}
425
426fn value_to_lower_string(value: &Value) -> Option<String> {
427    crate::builtins::common::tensor::value_to_string(value).map(|s| s.trim().to_ascii_lowercase())
428}
429
430fn int_to_dim(value: &IntValue) -> Result<usize, String> {
431    let raw = value.to_i64();
432    if raw < 0 {
433        return Err("gpuArray: size arguments must be non-negative integers".to_string());
434    }
435    Ok(raw as usize)
436}
437
438fn float_to_dim(value: f64) -> Result<usize, String> {
439    if !value.is_finite() {
440        return Err("gpuArray: size arguments must be finite integers".to_string());
441    }
442    let rounded = value.round();
443    if (rounded - value).abs() > f64::EPSILON {
444        return Err("gpuArray: size arguments must be integers".to_string());
445    }
446    if rounded < 0.0 {
447        return Err("gpuArray: size arguments must be non-negative".to_string());
448    }
449    Ok(rounded as usize)
450}
451
452fn tensor_to_dims(tensor: &Tensor) -> Result<Vec<usize>, String> {
453    let mut dims = Vec::with_capacity(tensor.data.len());
454    for value in &tensor.data {
455        dims.push(float_to_dim(*value)?);
456    }
457    Ok(dims)
458}
459
460fn resolve_dtype(value: &Value, options: &ParsedOptions) -> Result<DataClass, String> {
461    if let Some(explicit) = options.explicit_dtype {
462        return Ok(explicit);
463    }
464    if let Some(prototype) = options.prototype.as_ref() {
465        return infer_dtype_from_prototype(prototype);
466    }
467    if value_defaults_to_logical(value) {
468        return Ok(DataClass::Logical);
469    }
470    Ok(DataClass::Double)
471}
472
473fn infer_dtype_from_prototype(proto: &Value) -> Result<DataClass, String> {
474    match proto {
475        Value::GpuTensor(handle) => {
476            if runmat_accelerate_api::handle_is_logical(handle) {
477                Ok(DataClass::Logical)
478            } else {
479                Ok(DataClass::Double)
480            }
481        }
482        Value::LogicalArray(_) | Value::Bool(_) => Ok(DataClass::Logical),
483        Value::Int(int) => Ok(match int {
484            IntValue::I8(_) => DataClass::Int8,
485            IntValue::I16(_) => DataClass::Int16,
486            IntValue::I32(_) => DataClass::Int32,
487            IntValue::I64(_) => DataClass::Int64,
488            IntValue::U8(_) => DataClass::UInt8,
489            IntValue::U16(_) => DataClass::UInt16,
490            IntValue::U32(_) => DataClass::UInt32,
491            IntValue::U64(_) => DataClass::UInt64,
492        }),
493        Value::Tensor(_) | Value::Num(_) => Ok(DataClass::Double),
494        Value::CharArray(_) => Ok(DataClass::Double),
495        Value::String(_) => Err(
496            "gpuArray: 'like' does not accept MATLAB string scalars; convert to char() first".to_string(),
497        ),
498        Value::StringArray(_) => Err(
499            "gpuArray: 'like' does not accept string arrays; convert to char arrays first".to_string(),
500        ),
501        Value::Complex(_, _) | Value::ComplexTensor(_) => Err(
502            "gpuArray: complex prototypes are not supported yet; provide real-valued inputs".to_string(),
503        ),
504        other => Err(format!(
505            "gpuArray: unsupported 'like' prototype type {other:?}; expected numeric or logical values"
506        )),
507    }
508}
509
510fn value_defaults_to_logical(value: &Value) -> bool {
511    match value {
512        Value::LogicalArray(_) | Value::Bool(_) => true,
513        Value::GpuTensor(handle) => runmat_accelerate_api::handle_is_logical(handle),
514        _ => false,
515    }
516}
517
518struct PreparedHandle {
519    handle: GpuTensorHandle,
520    logical: bool,
521}
522
523fn upload_host_value(value: Value, dtype: DataClass) -> Result<PreparedHandle, String> {
524    #[cfg(all(test, feature = "wgpu"))]
525    {
526        if runmat_accelerate_api::provider().is_none() {
527            let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
528                runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
529            );
530        }
531    }
532    let provider = runmat_accelerate_api::provider().ok_or_else(|| ERR_NO_PROVIDER.to_string())?;
533    let tensor = coerce_host_value(value)?;
534    let (mut tensor, logical) = cast_tensor(tensor, dtype)?;
535
536    // Upload
537    let view = HostTensorView {
538        data: &tensor.data,
539        shape: &tensor.shape,
540    };
541    let handle = provider
542        .upload(&view)
543        .map_err(|err| format!("gpuArray: {err}"))?;
544
545    // Drop host tensor eagerly to release memory
546    tensor.data.clear();
547
548    Ok(PreparedHandle { handle, logical })
549}
550
551fn convert_device_value(
552    handle: GpuTensorHandle,
553    dtype: DataClass,
554) -> Result<PreparedHandle, String> {
555    #[cfg(all(test, feature = "wgpu"))]
556    {
557        if handle.device_id != 0 {
558            let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
559                runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
560            );
561        }
562    }
563    match dtype {
564        DataClass::Logical => {
565            if runmat_accelerate_api::handle_is_logical(&handle) {
566                return Ok(PreparedHandle {
567                    handle,
568                    logical: true,
569                });
570            }
571        }
572        DataClass::Double => {
573            if runmat_accelerate_api::handle_is_logical(&handle) {
574                runmat_accelerate_api::set_handle_logical(&handle, false);
575            }
576            return Ok(PreparedHandle {
577                handle,
578                logical: false,
579            });
580        }
581        _ => {}
582    }
583
584    let provider = runmat_accelerate_api::provider().ok_or_else(|| ERR_NO_PROVIDER.to_string())?;
585    let tensor = gpu_helpers::gather_tensor(&handle).map_err(|err| format!("gpuArray: {err}"))?;
586    let (mut tensor, logical) = cast_tensor(tensor, dtype)?;
587
588    let view = HostTensorView {
589        data: &tensor.data,
590        shape: &tensor.shape,
591    };
592    let new_handle = provider
593        .upload(&view)
594        .map_err(|err| format!("gpuArray: {err}"))?;
595
596    provider.free(&handle).ok();
597    tensor.data.clear();
598
599    Ok(PreparedHandle {
600        handle: new_handle,
601        logical,
602    })
603}
604
605fn coerce_host_value(value: Value) -> Result<Tensor, String> {
606    match value {
607        Value::Tensor(t) => Ok(t),
608        Value::LogicalArray(logical) => tensor::logical_to_tensor(&logical)
609            .map_err(|err| format!("gpuArray: {err}")),
610        Value::Bool(flag) => Tensor::new(vec![if flag { 1.0 } else { 0.0 }], vec![1, 1])
611            .map_err(|err| format!("gpuArray: {err}")),
612        Value::Num(n) => Tensor::new(vec![n], vec![1, 1]).map_err(|err| format!("gpuArray: {err}")),
613        Value::Int(i) => Tensor::new(vec![i.to_f64()], vec![1, 1])
614            .map_err(|err| format!("gpuArray: {err}")),
615        Value::CharArray(ca) => char_array_to_tensor(&ca),
616        Value::String(text) => {
617            let ca = CharArray::new_row(&text);
618            char_array_to_tensor(&ca)
619        }
620        Value::StringArray(_) => Err(
621            "gpuArray: string arrays are not supported yet; convert to char arrays with CHAR first"
622                .to_string(),
623        ),
624        Value::Complex(_, _) | Value::ComplexTensor(_) => Err(
625            "gpuArray: complex inputs are not supported yet; split real and imaginary parts before uploading"
626                .to_string(),
627        ),
628        other => Err(format!(
629            "gpuArray: unsupported input type for GPU transfer: {other:?}"
630        )),
631    }
632}
633
634fn cast_tensor(mut tensor: Tensor, dtype: DataClass) -> Result<(Tensor, bool), String> {
635    let logical = match dtype {
636        DataClass::Logical => {
637            convert_to_logical(&mut tensor.data)?;
638            true
639        }
640        DataClass::Single => {
641            convert_to_single(&mut tensor.data);
642            false
643        }
644        DataClass::Int8 => {
645            convert_to_int_range(&mut tensor.data, i8::MIN as f64, i8::MAX as f64);
646            false
647        }
648        DataClass::Int16 => {
649            convert_to_int_range(&mut tensor.data, i16::MIN as f64, i16::MAX as f64);
650            false
651        }
652        DataClass::Int32 => {
653            convert_to_int_range(&mut tensor.data, i32::MIN as f64, i32::MAX as f64);
654            false
655        }
656        DataClass::Int64 => {
657            convert_to_int_range(&mut tensor.data, i64::MIN as f64, i64::MAX as f64);
658            false
659        }
660        DataClass::UInt8 => {
661            convert_to_int_range(&mut tensor.data, 0.0, u8::MAX as f64);
662            false
663        }
664        DataClass::UInt16 => {
665            convert_to_int_range(&mut tensor.data, 0.0, u16::MAX as f64);
666            false
667        }
668        DataClass::UInt32 => {
669            convert_to_int_range(&mut tensor.data, 0.0, u32::MAX as f64);
670            false
671        }
672        DataClass::UInt64 => {
673            convert_to_int_range(&mut tensor.data, 0.0, u64::MAX as f64);
674            false
675        }
676        DataClass::Double => false,
677    };
678
679    Ok((tensor, logical))
680}
681
682fn convert_to_logical(data: &mut [f64]) -> Result<(), String> {
683    for value in data.iter_mut() {
684        if value.is_nan() {
685            return Err("gpuArray: cannot convert NaN to logical".to_string());
686        }
687        *value = if *value != 0.0 { 1.0 } else { 0.0 };
688    }
689    Ok(())
690}
691
692fn convert_to_single(data: &mut [f64]) {
693    for value in data.iter_mut() {
694        *value = (*value as f32) as f64;
695    }
696}
697
698fn convert_to_int_range(data: &mut [f64], min: f64, max: f64) {
699    for value in data.iter_mut() {
700        if value.is_nan() {
701            *value = min;
702            continue;
703        }
704        if value.is_infinite() {
705            *value = if value.is_sign_negative() { min } else { max };
706            continue;
707        }
708        let rounded = value.round();
709        *value = rounded.clamp(min, max);
710    }
711}
712
713fn apply_dims(handle: &mut GpuTensorHandle, dims: &[usize]) -> Result<(), String> {
714    let new_elems: usize = dims.iter().product();
715    let current_elems: usize = if handle.shape.is_empty() {
716        new_elems
717    } else {
718        handle.shape.iter().product()
719    };
720    if new_elems != current_elems {
721        return Err(format!(
722            "gpuArray: cannot reshape gpuArray of {current_elems} elements into size {:?}",
723            dims
724        ));
725    }
726    handle.shape = dims.to_vec();
727    Ok(())
728}
729
730fn char_array_to_tensor(ca: &CharArray) -> Result<Tensor, String> {
731    let rows = ca.rows;
732    let cols = ca.cols;
733    if rows == 0 || cols == 0 {
734        return Tensor::new(Vec::new(), vec![rows, cols]).map_err(|err| format!("gpuArray: {err}"));
735    }
736    let mut data = vec![0.0; rows * cols];
737    // Store in row-major to preserve the original character order when interpreted with column-major indexing
738    for row in 0..rows {
739        for col in 0..cols {
740            let idx_char = row * cols + col;
741            let ch = ca.data[idx_char];
742            data[row * cols + col] = ch as u32 as f64;
743        }
744    }
745    Tensor::new(data, vec![rows, cols]).map_err(|err| format!("gpuArray: {err}"))
746}
747
748#[cfg(test)]
749mod tests {
750    use super::*;
751    use crate::builtins::common::test_support;
752    use runmat_accelerate_api::HostTensorView;
753    use runmat_builtins::{IntValue, LogicalArray};
754
755    fn call(value: Value, rest: Vec<Value>) -> Result<Value, String> {
756        gpu_array_builtin(value, rest)
757    }
758
759    #[test]
760    fn gpu_array_transfers_numeric_tensor() {
761        test_support::with_test_provider(|_| {
762            let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![2, 2]).unwrap();
763            let result = call(Value::Tensor(tensor.clone()), Vec::new()).expect("gpuArray upload");
764            let Value::GpuTensor(handle) = result else {
765                panic!("expected gpu tensor");
766            };
767            assert_eq!(handle.shape, tensor.shape);
768            let gathered =
769                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather values");
770            assert_eq!(gathered.shape, tensor.shape);
771            assert_eq!(gathered.data, tensor.data);
772        });
773    }
774
775    #[test]
776    fn gpu_array_marks_logical_inputs() {
777        test_support::with_test_provider(|_| {
778            let logical =
779                LogicalArray::new(vec![1, 0, 1, 1], vec![2, 2]).expect("logical construction");
780            let result =
781                call(Value::LogicalArray(logical.clone()), Vec::new()).expect("gpuArray logical");
782            let Value::GpuTensor(handle) = result else {
783                panic!("expected gpu tensor");
784            };
785            assert!(runmat_accelerate_api::handle_is_logical(&handle));
786            let gathered =
787                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather logical");
788            assert_eq!(gathered.shape, logical.shape);
789            assert_eq!(gathered.data, vec![1.0, 0.0, 1.0, 1.0]);
790        });
791    }
792
793    #[test]
794    fn gpu_array_handles_scalar_bool() {
795        test_support::with_test_provider(|_| {
796            let result = call(Value::Bool(true), Vec::new()).expect("gpuArray bool");
797            let Value::GpuTensor(handle) = result else {
798                panic!("expected gpu tensor");
799            };
800            assert!(runmat_accelerate_api::handle_is_logical(&handle));
801            let gathered =
802                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather bool");
803            assert_eq!(gathered.shape, vec![1, 1]);
804            assert_eq!(gathered.data, vec![1.0]);
805        });
806    }
807
808    #[test]
809    fn gpu_array_supports_char_arrays() {
810        test_support::with_test_provider(|_| {
811            let chars = CharArray::new("row1row2".chars().collect(), 2, 4).unwrap();
812            let original: Vec<char> = chars.data.clone();
813            let result =
814                call(Value::CharArray(chars), Vec::new()).expect("gpuArray char array upload");
815            let Value::GpuTensor(handle) = result else {
816                panic!("expected gpu tensor");
817            };
818            let gathered =
819                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather chars");
820            assert_eq!(gathered.shape, vec![2, 4]);
821            let mut recovered = Vec::new();
822            for col in 0..4 {
823                for row in 0..2 {
824                    let idx = row + col * 2;
825                    let code = gathered.data[idx];
826                    let ch = char::from_u32(code as u32)
827                        .expect("valid unicode scalar from numeric code");
828                    recovered.push(ch);
829                }
830            }
831            assert_eq!(recovered, original);
832        });
833    }
834
835    #[test]
836    fn gpu_array_converts_strings() {
837        test_support::with_test_provider(|_| {
838            let result = call(Value::String("gpu".into()), Vec::new()).expect("gpuArray string");
839            let Value::GpuTensor(handle) = result else {
840                panic!("expected gpu tensor");
841            };
842            let gathered =
843                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather string");
844            assert_eq!(gathered.shape, vec![1, 3]);
845            let expected: Vec<f64> = "gpu".chars().map(|ch| ch as u32 as f64).collect();
846            assert_eq!(gathered.data, expected);
847        });
848    }
849
850    #[test]
851    fn gpu_array_passthrough_existing_handle() {
852        test_support::with_test_provider(|provider| {
853            let tensor = Tensor::new(vec![5.0, 6.0], vec![2, 1]).unwrap();
854            let view = HostTensorView {
855                data: &tensor.data,
856                shape: &tensor.shape,
857            };
858            let handle = provider.upload(&view).expect("upload");
859            let cloned = handle.clone();
860            let result =
861                call(Value::GpuTensor(handle.clone()), Vec::new()).expect("gpuArray passthrough");
862            let Value::GpuTensor(returned) = result else {
863                panic!("expected gpu tensor");
864            };
865            assert_eq!(returned.buffer_id, cloned.buffer_id);
866            assert_eq!(returned.shape, cloned.shape);
867        });
868    }
869
870    #[test]
871    fn gpu_array_casts_to_int32() {
872        test_support::with_test_provider(|_| {
873            let tensor = Tensor::new(vec![1.2, -3.7, 123456.0], vec![3, 1]).unwrap();
874            let result =
875                call(Value::Tensor(tensor), vec![Value::from("int32")]).expect("gpuArray int32");
876            let Value::GpuTensor(handle) = result else {
877                panic!("expected gpu tensor");
878            };
879            let gathered =
880                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather int32");
881            assert_eq!(gathered.data, vec![1.0, -4.0, 123456.0]);
882        });
883    }
884
885    #[test]
886    fn gpu_array_casts_to_uint8() {
887        test_support::with_test_provider(|_| {
888            let tensor = Tensor::new(vec![-12.0, 12.8, 300.4, f64::INFINITY], vec![4, 1]).unwrap();
889            let result =
890                call(Value::Tensor(tensor), vec![Value::from("uint8")]).expect("gpuArray uint8");
891            let Value::GpuTensor(handle) = result else {
892                panic!("expected gpu tensor");
893            };
894            let gathered =
895                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather uint8");
896            assert_eq!(gathered.data, vec![0.0, 13.0, 255.0, 255.0]);
897        });
898    }
899
900    #[test]
901    fn gpu_array_single_precision_rounds() {
902        test_support::with_test_provider(|_| {
903            let tensor = Tensor::new(vec![1.23456789, -9.87654321], vec![2, 1]).unwrap();
904            let result =
905                call(Value::Tensor(tensor), vec![Value::from("single")]).expect("gpuArray single");
906            let Value::GpuTensor(handle) = result else {
907                panic!("expected gpu tensor");
908            };
909            let gathered =
910                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather single");
911            let expected = [1.234_567_9_f32 as f64, (-9.876_543_f32) as f64];
912            for (observed, expected) in gathered.data.iter().zip(expected.iter()) {
913                assert!((observed - expected).abs() < 1e-6);
914            }
915        });
916    }
917
918    #[test]
919    fn gpu_array_like_infers_logical() {
920        test_support::with_test_provider(|_| {
921            let tensor = Tensor::new(vec![0.0, 2.0, -3.0], vec![3, 1]).unwrap();
922            let logical_proto =
923                LogicalArray::new(vec![0, 1, 0], vec![3, 1]).expect("logical proto");
924            let result = call(
925                Value::Tensor(tensor),
926                vec![Value::from("like"), Value::LogicalArray(logical_proto)],
927            )
928            .expect("gpuArray like logical");
929            let Value::GpuTensor(handle) = result else {
930                panic!("expected gpu tensor");
931            };
932            assert!(runmat_accelerate_api::handle_is_logical(&handle));
933            let gathered = test_support::gather(Value::GpuTensor(handle.clone())).expect("gather");
934            assert_eq!(gathered.data, vec![0.0, 1.0, 1.0]);
935        });
936    }
937
938    #[test]
939    fn gpu_array_like_requires_argument() {
940        test_support::with_test_provider(|_| {
941            let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
942            let err = call(Value::Tensor(tensor), vec![Value::from("like")]).unwrap_err();
943            assert!(err.contains("expected a prototype value"));
944        });
945    }
946
947    #[test]
948    fn gpu_array_unknown_option_errors() {
949        test_support::with_test_provider(|_| {
950            let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
951            let err = call(Value::Tensor(tensor), vec![Value::from("mystery")]).unwrap_err();
952            assert!(err.contains("unrecognised option"));
953        });
954    }
955
956    #[test]
957    fn gpu_array_gpu_to_logical_reuploads() {
958        test_support::with_test_provider(|provider| {
959            let tensor = Tensor::new(vec![2.0, 0.0, -5.5], vec![3, 1]).unwrap();
960            let view = HostTensorView {
961                data: &tensor.data,
962                shape: &tensor.shape,
963            };
964            let handle = provider.upload(&view).expect("upload");
965            let result = call(
966                Value::GpuTensor(handle.clone()),
967                vec![Value::from("logical")],
968            )
969            .expect("gpuArray logical cast");
970            let Value::GpuTensor(new_handle) = result else {
971                panic!("expected gpu tensor");
972            };
973            assert!(runmat_accelerate_api::handle_is_logical(&new_handle));
974            let gathered =
975                test_support::gather(Value::GpuTensor(new_handle.clone())).expect("gather");
976            assert_eq!(gathered.data, vec![1.0, 0.0, 1.0]);
977            provider.free(&handle).ok();
978            provider.free(&new_handle).ok();
979        });
980    }
981
982    #[test]
983    fn gpu_array_gpu_logical_to_double_clears_flag() {
984        test_support::with_test_provider(|provider| {
985            let tensor = Tensor::new(vec![1.0, 0.0], vec![2, 1]).unwrap();
986            let view = HostTensorView {
987                data: &tensor.data,
988                shape: &tensor.shape,
989            };
990            let handle = provider.upload(&view).expect("upload");
991            runmat_accelerate_api::set_handle_logical(&handle, true);
992            let result = call(
993                Value::GpuTensor(handle.clone()),
994                vec![Value::from("double")],
995            )
996            .expect("gpuArray double cast");
997            let Value::GpuTensor(new_handle) = result else {
998                panic!("expected gpu tensor");
999            };
1000            assert!(!runmat_accelerate_api::handle_is_logical(&new_handle));
1001            provider.free(&handle).ok();
1002            provider.free(&new_handle).ok();
1003        });
1004    }
1005
1006    #[test]
1007    fn gpu_array_applies_size_arguments() {
1008        test_support::with_test_provider(|_| {
1009            let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![4, 1]).unwrap();
1010            let result = call(
1011                Value::Tensor(tensor),
1012                vec![Value::from(2i32), Value::from(2i32)],
1013            )
1014            .expect("gpuArray reshape");
1015            let Value::GpuTensor(handle) = result else {
1016                panic!("expected gpu tensor");
1017            };
1018            assert_eq!(handle.shape, vec![2, 2]);
1019        });
1020    }
1021
1022    #[test]
1023    fn gpu_array_gpu_size_arguments_update_shape() {
1024        test_support::with_test_provider(|provider| {
1025            let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![4, 1]).unwrap();
1026            let view = HostTensorView {
1027                data: &tensor.data,
1028                shape: &tensor.shape,
1029            };
1030            let handle = provider.upload(&view).expect("upload");
1031            let result = call(
1032                Value::GpuTensor(handle.clone()),
1033                vec![Value::from(2i32), Value::from(2i32)],
1034            )
1035            .expect("gpuArray gpu reshape");
1036            let Value::GpuTensor(new_handle) = result else {
1037                panic!("expected gpu tensor");
1038            };
1039            assert_eq!(new_handle.shape, vec![2, 2]);
1040            provider.free(&handle).ok();
1041            provider.free(&new_handle).ok();
1042        });
1043    }
1044
1045    #[test]
1046    fn gpu_array_size_mismatch_errors() {
1047        test_support::with_test_provider(|_| {
1048            let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
1049            let err = call(
1050                Value::Tensor(tensor),
1051                vec![Value::from(2i32), Value::from(2i32)],
1052            )
1053            .unwrap_err();
1054            assert!(err.contains("cannot reshape"));
1055        });
1056    }
1057
1058    #[test]
1059    #[cfg(feature = "wgpu")]
1060    fn gpu_array_wgpu_roundtrip() {
1061        use runmat_accelerate_api::AccelProvider;
1062
1063        match runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1064            runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1065        ) {
1066            Ok(provider) => {
1067                let tensor = Tensor::new(vec![1.0, 2.5, 3.5], vec![3, 1]).unwrap();
1068                let result = call(Value::Tensor(tensor.clone()), vec![Value::from("int32")])
1069                    .expect("wgpu upload");
1070                let Value::GpuTensor(handle) = result else {
1071                    panic!("expected gpu tensor");
1072                };
1073                let gathered =
1074                    test_support::gather(Value::GpuTensor(handle.clone())).expect("wgpu gather");
1075                assert_eq!(gathered.shape, vec![3, 1]);
1076                assert_eq!(gathered.data, vec![1.0, 3.0, 4.0]);
1077                provider.free(&handle).ok();
1078            }
1079            Err(err) => {
1080                eprintln!("Skipping gpu_array_wgpu_roundtrip: {err}");
1081            }
1082        }
1083        runmat_accelerate::simple_provider::register_inprocess_provider();
1084    }
1085
1086    #[test]
1087    #[cfg(feature = "doc_export")]
1088    fn doc_examples_present() {
1089        let blocks = test_support::doc_examples(DOC_MD);
1090        assert!(!blocks.is_empty());
1091    }
1092
1093    #[test]
1094    fn gpu_array_accepts_int_scalars() {
1095        test_support::with_test_provider(|_| {
1096            let value = Value::Int(IntValue::I32(7));
1097            let result = call(value, Vec::new()).expect("gpuArray int");
1098            let Value::GpuTensor(handle) = result else {
1099                panic!("expected gpu tensor");
1100            };
1101            let gathered =
1102                test_support::gather(Value::GpuTensor(handle.clone())).expect("gather int");
1103            assert_eq!(gathered.shape, vec![1, 1]);
1104            assert_eq!(gathered.data, vec![7.0]);
1105        });
1106    }
1107}