runmat_runtime/builtins/strings/core/
str2double.rs

1//! MATLAB-compatible `str2double` builtin with GPU-aware semantics for RunMat.
2
3use std::borrow::Cow;
4
5use runmat_builtins::{CellArray, CharArray, StringArray, Tensor, Value};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::spec::{
9    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10    ReductionNaN, ResidencyPolicy, ShapeRequirements,
11};
12use crate::builtins::common::tensor;
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
16
17#[cfg(feature = "doc_export")]
18pub const DOC_MD: &str = r#"---
19title: "str2double"
20category: "strings/core"
21keywords: ["str2double", "string to double", "text conversion", "numeric parsing", "gpu"]
22summary: "Convert strings, character arrays, or cell arrays of text into double-precision numbers with MATLAB-compatible rules."
23references:
24  - https://www.mathworks.com/help/matlab/ref/str2double.html
25gpu_support:
26  elementwise: false
27  reduction: false
28  precisions: []
29  broadcasting: "none"
30  notes: "Runs on the CPU. When inputs reference GPU data, RunMat gathers them before parsing so results match MATLAB exactly."
31fusion:
32  elementwise: false
33  reduction: false
34  max_inputs: 1
35  constants: "inline"
36requires_feature: null
37tested:
38  unit: "builtins::strings::core::str2double::tests"
39  integration: "builtins::strings::core::str2double::tests::str2double_cell_array_of_text"
40---
41
42# What does the `str2double` function do in MATLAB / RunMat?
43`str2double` converts text representations of numbers into double-precision values. It accepts string
44scalars, string arrays, character vectors, character arrays, and cell arrays of character vectors.
45Each element is parsed independently; values that cannot be interpreted as real scalars become `NaN`.
46
47## How does the `str2double` function behave in MATLAB / RunMat?
48- Leading and trailing whitespace is ignored, as are padding spaces that MATLAB inserts in character arrays.
49- Text that contains a single finite real number returns that number. Text with additional characters,
50  embedded operators, or multiple values results in `NaN`.
51- Scientific notation with `e`, `E`, `d`, or `D` exponents is supported (`"1.2e3"`, `"4.5D-6"`, etc.).
52- `"Inf"`, `"Infinity"`, and `"NaN"` (any letter case, with optional sign on `Inf`) map to IEEE special values.
53- Missing string scalars (displayed as `<missing>`) convert to `NaN`, matching MATLAB behaviour.
54- Character arrays return a column vector whose length equals the number of rows; cell arrays preserve their shape.
55
56## `str2double` Function GPU Execution Behaviour
57`str2double` executes entirely on the CPU. If any argument is backed by a GPU buffer (for example, a cell array
58that still wraps GPU-resident character data), RunMat gathers the values first, parses the text on the host,
59and returns CPU-resident doubles. Providers do not need custom kernels for this builtin.
60
61## Examples of using the `str2double` function in MATLAB / RunMat
62
63### Convert a string scalar into a double
64```matlab
65value = str2double("3.14159");
66```
67Expected output:
68```matlab
69value = 3.14159
70```
71
72### Convert every element of a string array
73```matlab
74temps = ["12.5" "19.8" "not-a-number"];
75data = str2double(temps);
76```
77Expected output:
78```matlab
79data = 1×3
80   12.5000   19.8000       NaN
81```
82
83### Parse scientific notation text
84```matlab
85result = str2double("6.022e23");
86```
87Expected output:
88```matlab
89result = 6.0220e+23
90```
91
92### Handle engineering exponents written with `D`
93```matlab
94cap = str2double("4.7D-9");
95```
96Expected output:
97```matlab
98cap = 4.7000e-09
99```
100
101### Convert a character array one row at a time
102```matlab
103chars = ['42   '; '  100'];
104numbers = str2double(chars);
105```
106Expected output:
107```matlab
108numbers = 2×1
109    42
110   100
111```
112
113### Work with cell arrays of character vectors
114```matlab
115C = {'3.14', 'NaN', '-Inf'};
116values = str2double(C);
117```
118Expected output:
119```matlab
120values = 1×3
121    3.1400      NaN      -Inf
122```
123
124### Detect invalid numeric text
125```matlab
126status = str2double("error42");
127```
128Expected output:
129```matlab
130status = NaN
131```
132
133### Recognise special values `Inf` and `NaN`
134```matlab
135special = str2double(["Inf"; "-Infinity"; "NaN"]);
136```
137Expected output:
138```matlab
139special = 3×1
140     Inf
141    -Inf
142     NaN
143```
144
145## FAQ
146
147### What input types does `str2double` accept?
148String scalars, string arrays, character vectors, character arrays, and cell arrays of character vectors or
149string scalars are supported. Other types raise an error so that mismatched inputs are caught early.
150
151### How are invalid or empty strings handled?
152Invalid text—including empty strings, whitespace-only rows, or strings with extra characters—converts to `NaN`.
153This matches MATLAB, which uses `NaN` as a sentinel for failed conversions.
154
155### Does `str2double` evaluate arithmetic expressions?
156No. Unlike `str2num`, `str2double` never calls the evaluator. Text such as `"1+2"` or `"sqrt(2)"` yields `NaN`
157instead of executing the expression, keeping the builtin safe for untrusted input.
158
159### Can `str2double` parse complex numbers?
160No. Complex text like `"3+4i"` returns `NaN`. Use `str2num` when you need MATLAB to interpret complex literals.
161
162### Are engineering exponents with `D` supported?
163Yes. Exponents that use `d` or `D` are rewritten to `e` automatically, so `"1.0D3"` converts to `1000`.
164
165### How does `str2double` treat missing strings?
166Missing strings produced with `string(missing)` display as `<missing>` and convert to `NaN`. You can detect them
167with `ismissing` before conversion if you need special handling.
168
169### Does locale affect parsing?
170`str2double` honours digits, decimal points, and exponent letters only. Locale-specific grouping separators such as
171commas are not accepted, mirroring MATLAB's behaviour.
172
173### Will the result stay on the GPU when I pass gpuArray inputs?
174No. The builtin gathers GPU-backed inputs to the host, parses them, and keeps the numeric result in host memory.
175Wrap the result with `gpuArray(...)` if you need to move it back to the device.
176
177## See Also
178`str2num`, `double`, `string`, `str2int`
179
180## Source & Feedback
181- Implementation: [`crates/runmat-runtime/src/builtins/strings/core/str2double.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/core/str2double.rs)
182- Found a bug? Please [open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
183"#;
184
185pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
186    name: "str2double",
187    op_kind: GpuOpKind::Custom("conversion"),
188    supported_precisions: &[],
189    broadcast: BroadcastSemantics::None,
190    provider_hooks: &[],
191    constant_strategy: ConstantStrategy::InlineLiteral,
192    residency: ResidencyPolicy::GatherImmediately,
193    nan_mode: ReductionNaN::Include,
194    two_pass_threshold: None,
195    workgroup_size: None,
196    accepts_nan_mode: false,
197    notes: "Parses text on the CPU; GPU-resident inputs are gathered before conversion.",
198};
199
200register_builtin_gpu_spec!(GPU_SPEC);
201
202pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
203    name: "str2double",
204    shape: ShapeRequirements::Any,
205    constant_strategy: ConstantStrategy::InlineLiteral,
206    elementwise: None,
207    reduction: None,
208    emits_nan: true,
209    notes: "Conversion builtin; not eligible for fusion and materialises host-side doubles.",
210};
211
212register_builtin_fusion_spec!(FUSION_SPEC);
213
214#[cfg(feature = "doc_export")]
215register_builtin_doc_text!("str2double", DOC_MD);
216
217const ARG_TYPE_ERROR: &str =
218    "str2double: input must be a string array, character array, or cell array of character vectors";
219const CELL_ELEMENT_ERROR: &str =
220    "str2double: cell array elements must be character vectors or string scalars";
221
222#[runtime_builtin(
223    name = "str2double",
224    category = "strings/core",
225    summary = "Convert strings, character arrays, or cell arrays of text into doubles.",
226    keywords = "str2double,string to double,text conversion,gpu",
227    accel = "sink"
228)]
229fn str2double_builtin(value: Value) -> Result<Value, String> {
230    let gathered = gather_if_needed(&value).map_err(|e| format!("str2double: {e}"))?;
231    match gathered {
232        Value::String(text) => Ok(Value::Num(parse_numeric_scalar(&text))),
233        Value::StringArray(array) => str2double_string_array(array),
234        Value::CharArray(array) => str2double_char_array(array),
235        Value::Cell(cell) => str2double_cell_array(cell),
236        _ => Err(ARG_TYPE_ERROR.to_string()),
237    }
238}
239
240fn str2double_string_array(array: StringArray) -> Result<Value, String> {
241    let StringArray { data, shape, .. } = array;
242    let mut values = Vec::with_capacity(data.len());
243    for text in &data {
244        values.push(parse_numeric_scalar(text));
245    }
246    let tensor = Tensor::new(values, shape).map_err(|e| format!("str2double: {e}"))?;
247    Ok(tensor::tensor_into_value(tensor))
248}
249
250fn str2double_char_array(array: CharArray) -> Result<Value, String> {
251    let rows = array.rows;
252    let cols = array.cols;
253    let mut values = Vec::with_capacity(rows);
254    for row in 0..rows {
255        let start = row * cols;
256        let end = start + cols;
257        let row_text: String = array.data[start..end].iter().collect();
258        values.push(parse_numeric_scalar(&row_text));
259    }
260    let tensor = Tensor::new(values, vec![rows, 1]).map_err(|e| format!("str2double: {e}"))?;
261    Ok(tensor::tensor_into_value(tensor))
262}
263
264fn str2double_cell_array(cell: CellArray) -> Result<Value, String> {
265    let CellArray {
266        data, rows, cols, ..
267    } = cell;
268    let mut values = Vec::with_capacity(rows * cols);
269    for col in 0..cols {
270        for row in 0..rows {
271            let idx = row * cols + col;
272            let element: &Value = &data[idx];
273            let numeric = match element {
274                Value::String(text) => parse_numeric_scalar(text),
275                Value::StringArray(sa) if sa.data.len() == 1 => parse_numeric_scalar(&sa.data[0]),
276                Value::CharArray(char_vec) if char_vec.rows == 1 => {
277                    let row_text: String = char_vec.data.iter().collect();
278                    parse_numeric_scalar(&row_text)
279                }
280                Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
281                _ => return Err(CELL_ELEMENT_ERROR.to_string()),
282            };
283            values.push(numeric);
284        }
285    }
286    let tensor = Tensor::new(values, vec![rows, cols]).map_err(|e| format!("str2double: {e}"))?;
287    Ok(tensor::tensor_into_value(tensor))
288}
289
290fn parse_numeric_scalar(text: &str) -> f64 {
291    let trimmed = text.trim();
292    if trimmed.is_empty() {
293        return f64::NAN;
294    }
295
296    let lowered = trimmed.to_ascii_lowercase();
297    match lowered.as_str() {
298        "nan" => return f64::NAN,
299        "inf" | "+inf" | "infinity" | "+infinity" => return f64::INFINITY,
300        "-inf" | "-infinity" => return f64::NEG_INFINITY,
301        _ => {}
302    }
303
304    let normalized: Cow<'_, str> = if trimmed.chars().any(|c| c == 'd' || c == 'D') {
305        Cow::Owned(
306            trimmed
307                .chars()
308                .map(|c| if c == 'd' || c == 'D' { 'e' } else { c })
309                .collect(),
310        )
311    } else {
312        Cow::Borrowed(trimmed)
313    };
314
315    normalized.parse::<f64>().unwrap_or(f64::NAN)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    #[cfg(feature = "doc_export")]
322    use crate::builtins::common::test_support;
323
324    #[test]
325    fn str2double_string_scalar() {
326        let result = str2double_builtin(Value::String("42.5".into())).expect("str2double");
327        assert_eq!(result, Value::Num(42.5));
328    }
329
330    #[test]
331    fn str2double_string_scalar_invalid_returns_nan() {
332        let result = str2double_builtin(Value::String("abc".into())).expect("str2double");
333        match result {
334            Value::Num(v) => assert!(v.is_nan()),
335            other => panic!("expected scalar result, got {other:?}"),
336        }
337    }
338
339    #[test]
340    fn str2double_string_array_preserves_shape() {
341        let array =
342            StringArray::new(vec!["1".into(), " 2.5 ".into(), "foo".into()], vec![3, 1]).unwrap();
343        let result = str2double_builtin(Value::StringArray(array)).expect("str2double");
344        match result {
345            Value::Tensor(tensor) => {
346                assert_eq!(tensor.shape, vec![3, 1]);
347                assert_eq!(tensor.data[0], 1.0);
348                assert_eq!(tensor.data[1], 2.5);
349                assert!(tensor.data[2].is_nan());
350            }
351            Value::Num(_) => panic!("expected tensor"),
352            other => panic!("unexpected result {other:?}"),
353        }
354    }
355
356    #[test]
357    fn str2double_char_array_multiple_rows() {
358        let data: Vec<char> = vec!['4', '2', ' ', ' ', '1', '0', '0', ' '];
359        let array = CharArray::new(data, 2, 4).unwrap();
360        let result = str2double_builtin(Value::CharArray(array)).expect("str2double");
361        match result {
362            Value::Tensor(tensor) => {
363                assert_eq!(tensor.shape, vec![2, 1]);
364                assert_eq!(tensor.data[0], 42.0);
365                assert_eq!(tensor.data[1], 100.0);
366            }
367            other => panic!("expected tensor result, got {other:?}"),
368        }
369    }
370
371    #[test]
372    fn str2double_char_array_empty_rows() {
373        let array = CharArray::new(Vec::new(), 0, 0).unwrap();
374        let result = str2double_builtin(Value::CharArray(array)).expect("str2double");
375        match result {
376            Value::Tensor(tensor) => {
377                assert_eq!(tensor.shape, vec![0, 1]);
378                assert_eq!(tensor.data.len(), 0);
379            }
380            other => panic!("expected empty tensor, got {other:?}"),
381        }
382    }
383
384    #[test]
385    #[allow(
386        clippy::approx_constant,
387        reason = "Test ensures literal 3.14 text stays 3.14, not π"
388    )]
389    fn str2double_cell_array_of_text() {
390        let cell = CellArray::new(
391            vec![
392                Value::String("3.14".into()),
393                Value::CharArray(CharArray::new_row("NaN")),
394                Value::String("-Inf".into()),
395            ],
396            1,
397            3,
398        )
399        .unwrap();
400        let result = str2double_builtin(Value::Cell(cell)).expect("str2double");
401        match result {
402            Value::Tensor(tensor) => {
403                assert_eq!(tensor.shape, vec![1, 3]);
404                assert_eq!(tensor.data[0], 3.14);
405                assert!(tensor.data[1].is_nan());
406                assert_eq!(tensor.data[2], f64::NEG_INFINITY);
407            }
408            other => panic!("expected tensor result, got {other:?}"),
409        }
410    }
411
412    #[test]
413    fn str2double_cell_array_invalid_element_errors() {
414        let cell = CellArray::new(vec![Value::Num(5.0)], 1, 1).unwrap();
415        let err = str2double_builtin(Value::Cell(cell)).unwrap_err();
416        assert!(
417            err.contains("str2double"),
418            "unexpected error message: {err}"
419        );
420    }
421
422    #[test]
423    fn str2double_supports_d_exponent() {
424        let result = str2double_builtin(Value::String("1.5D3".into())).expect("str2double");
425        match result {
426            Value::Num(v) => assert_eq!(v, 1500.0),
427            other => panic!("expected scalar result, got {other:?}"),
428        }
429    }
430
431    #[test]
432    fn str2double_recognises_infinity_forms() {
433        let array = StringArray::new(
434            vec!["Inf".into(), "-Infinity".into(), "+inf".into()],
435            vec![3, 1],
436        )
437        .unwrap();
438        let result = str2double_builtin(Value::StringArray(array)).expect("str2double");
439        match result {
440            Value::Tensor(tensor) => {
441                assert_eq!(tensor.data[0], f64::INFINITY);
442                assert_eq!(tensor.data[1], f64::NEG_INFINITY);
443                assert_eq!(tensor.data[2], f64::INFINITY);
444            }
445            other => panic!("expected tensor result, got {other:?}"),
446        }
447    }
448
449    #[test]
450    #[cfg(feature = "doc_export")]
451    fn doc_examples_present() {
452        let blocks = test_support::doc_examples(DOC_MD);
453        assert!(!blocks.is_empty());
454    }
455}