runmat_runtime/builtins/strings/core/
strlength.rs

1//! MATLAB-compatible `strlength` builtin for RunMat.
2
3use runmat_builtins::{CellArray, CharArray, StringArray, Tensor, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::common::spec::{
7    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
8    ReductionNaN, ResidencyPolicy, ShapeRequirements,
9};
10use crate::builtins::common::tensor;
11use crate::builtins::strings::common::is_missing_string;
12#[cfg(feature = "doc_export")]
13use crate::register_builtin_doc_text;
14use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
15
16#[cfg(feature = "doc_export")]
17pub const DOC_MD: &str = r#"---
18title: "strlength"
19category: "strings/core"
20keywords: ["strlength", "string length", "character count", "text analytics", "cell array"]
21summary: "Return the number of characters in each element of a string array, character array, or cell array of character vectors."
22references:
23  - https://www.mathworks.com/help/matlab/ref/strlength.html
24gpu_support:
25  elementwise: false
26  reduction: false
27  precisions: []
28  broadcasting: "none"
29  notes: "Executes on the CPU; if any argument lives on the GPU, the runtime gathers it before computing lengths to keep semantics identical to MATLAB."
30fusion:
31  elementwise: false
32  reduction: false
33  max_inputs: 1
34  constants: "inline"
35requires_feature: null
36tested:
37  unit: "builtins::strings::core::strlength::tests"
38  integration: "builtins::strings::core::strlength::tests::strlength_cell_array_of_char_vectors"
39---
40
41# What does the `strlength` function do in MATLAB / RunMat?
42`strlength(str)` counts how many characters appear in each element of text inputs. It works with string
43arrays, character vectors, character arrays, and cell arrays of character vectors, returning a `double`
44array that mirrors the input shape.
45
46## How does the `strlength` function behave in MATLAB / RunMat?
47- String arrays return a numeric array of the same size; string scalars yield a scalar `double`.
48- Character arrays report the number of characters per row and ignore padding that MATLAB inserts to keep rows the same width.
49- Character vectors stored in cells contribute one scalar per cell element; the output array matches the cell array shape.
50- Missing string scalars (for example values created with `string(missing)`) yield `NaN`. RunMat displays these entries as `<missing>` in the console just like MATLAB.
51- Empty text inputs produce zeros-sized numeric outputs that match MATLAB's dimension rules.
52
53## `strlength` Function GPU Execution Behaviour
54`strlength` is a metadata query and always executes on the CPU. If a text container references data that
55originated on the GPU (for example, a cell array that still wraps GPU-resident numeric intermediates), RunMat
56gathers that data before measuring lengths. Providers do not require custom kernels for this builtin.
57
58## Examples of using the `strlength` function in MATLAB / RunMat
59
60### Measure Characters In A String Scalar
61```matlab
62len = strlength("RunMat");
63```
64Expected output:
65```matlab
66len = 6
67```
68
69### Count Characters Across A String Array
70```matlab
71labels = ["North" "South" "East" "West"];
72counts = strlength(labels);
73```
74Expected output:
75```matlab
76counts = 1×4
77    5    5    4    4
78```
79
80### Compute Lengths For Each Row Of A Character Array
81```matlab
82names = char("cat", "giraffe");
83row_counts = strlength(names);
84```
85Expected output:
86```matlab
87row_counts = 2×1
88     3
89     7
90```
91
92### Handle Empty And Blank Strings
93```matlab
94mixed = ["", "   "];
95len = strlength(mixed);
96```
97Expected output:
98```matlab
99len = 1×2
100     0     3
101```
102
103### Get Lengths From A Cell Array Of Character Vectors
104```matlab
105C = {'red', 'green', 'blue'};
106L = strlength(C);
107```
108Expected output:
109```matlab
110L = 1×3
111     3     5     4
112```
113
114### Treat Missing Strings As NaN
115```matlab
116values = string(["alpha" "beta" "gamma"]);
117values(2) = string(missing);  % Displays as <missing> when printed
118lengths = strlength(values);
119```
120Expected output:
121```matlab
122lengths = 1×3
123    5   NaN    5
124```
125
126## FAQ
127
128### What numeric type does `strlength` return?
129`strlength` always returns doubles, even when all lengths are whole numbers. MATLAB uses doubles for most numeric results, and RunMat follows the same rule.
130
131### Why are padded spaces in character arrays ignored?
132When MATLAB builds a character array from rows of different lengths, it pads shorter rows with spaces. Those padding characters are not part of the logical content, so `strlength` removes them before counting. Explicit trailing spaces that you type in a single character vector remain part of the count.
133
134### How are missing string values handled?
135Missing string scalars display as `<missing>` and produce `NaN` lengths. Use `ismissing` or `fillmissing` if you need to substitute a default length.
136
137### Can I call `strlength` with numeric data?
138No. `strlength` only accepts string arrays, character vectors/arrays, or cell arrays of character vectors. Numeric inputs raise an error—use `num2str` first if you need to convert numbers to text.
139
140### Does `strlength` support multibyte Unicode characters?
141Yes. Each Unicode scalar value counts as one character, so emoji or accented letters contribute a length of one. Surrogate pairs are treated as a single character, matching MATLAB's behaviour.
142
143### Will `strlength` ever execute on the GPU?
144No. The builtin inspects metadata and operates on host strings. If your data already lives on the GPU, RunMat gathers it automatically before computing lengths so results match MATLAB exactly.
145
146## See Also
147`string`, `char`, `strtrim`, `length`, `size`
148
149## Source & Feedback
150- Implementation: [`crates/runmat-runtime/src/builtins/strings/core/strlength.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/core/strlength.rs)
151- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
152"#;
153
154pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
155    name: "strlength",
156    op_kind: GpuOpKind::Custom("string-metadata"),
157    supported_precisions: &[],
158    broadcast: BroadcastSemantics::None,
159    provider_hooks: &[],
160    constant_strategy: ConstantStrategy::InlineLiteral,
161    residency: ResidencyPolicy::GatherImmediately,
162    nan_mode: ReductionNaN::Include,
163    two_pass_threshold: None,
164    workgroup_size: None,
165    accepts_nan_mode: false,
166    notes: "Measures string lengths on the CPU; any GPU-resident inputs are gathered before evaluation.",
167};
168
169register_builtin_gpu_spec!(GPU_SPEC);
170
171pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
172    name: "strlength",
173    shape: ShapeRequirements::Any,
174    constant_strategy: ConstantStrategy::InlineLiteral,
175    elementwise: None,
176    reduction: None,
177    emits_nan: true,
178    notes: "Metadata-only builtin; not eligible for fusion and never emits GPU kernels.",
179};
180
181register_builtin_fusion_spec!(FUSION_SPEC);
182
183#[cfg(feature = "doc_export")]
184register_builtin_doc_text!("strlength", DOC_MD);
185
186const ARG_TYPE_ERROR: &str =
187    "strlength: first argument must be a string array, character array, or cell array of character vectors";
188const CELL_ELEMENT_ERROR: &str =
189    "strlength: cell array elements must be character vectors or string scalars";
190
191#[runtime_builtin(
192    name = "strlength",
193    category = "strings/core",
194    summary = "Count characters in string arrays, character arrays, or cell arrays of character vectors.",
195    keywords = "strlength,string length,text,count,characters",
196    accel = "sink"
197)]
198fn strlength_builtin(value: Value) -> Result<Value, String> {
199    let gathered = gather_if_needed(&value).map_err(|e| format!("strlength: {e}"))?;
200    match gathered {
201        Value::StringArray(array) => strlength_string_array(array),
202        Value::String(text) => Ok(Value::Num(string_scalar_length(&text))),
203        Value::CharArray(array) => strlength_char_array(array),
204        Value::Cell(cell) => strlength_cell_array(cell),
205        _ => Err(ARG_TYPE_ERROR.to_string()),
206    }
207}
208
209fn strlength_string_array(array: StringArray) -> Result<Value, String> {
210    let StringArray { data, shape, .. } = array;
211    let mut lengths = Vec::with_capacity(data.len());
212    for text in &data {
213        lengths.push(string_scalar_length(text));
214    }
215    let tensor = Tensor::new(lengths, shape).map_err(|e| format!("strlength: {e}"))?;
216    Ok(tensor::tensor_into_value(tensor))
217}
218
219fn strlength_char_array(array: CharArray) -> Result<Value, String> {
220    let rows = array.rows;
221    let mut lengths = Vec::with_capacity(rows);
222    for row in 0..rows {
223        let length = if array.rows <= 1 {
224            array.cols
225        } else {
226            trimmed_row_length(&array, row)
227        } as f64;
228        lengths.push(length);
229    }
230    let tensor = Tensor::new(lengths, vec![rows, 1]).map_err(|e| format!("strlength: {e}"))?;
231    Ok(tensor::tensor_into_value(tensor))
232}
233
234fn strlength_cell_array(cell: CellArray) -> Result<Value, String> {
235    let CellArray {
236        data, rows, cols, ..
237    } = cell;
238    let mut lengths = Vec::with_capacity(rows * cols);
239    for col in 0..cols {
240        for row in 0..rows {
241            let idx = row * cols + col;
242            let value: &Value = &data[idx];
243            let length = match value {
244                Value::String(text) => string_scalar_length(text),
245                Value::StringArray(sa) if sa.data.len() == 1 => string_scalar_length(&sa.data[0]),
246                Value::CharArray(char_vec) if char_vec.rows == 1 => char_vec.cols as f64,
247                Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
248                _ => return Err(CELL_ELEMENT_ERROR.to_string()),
249            };
250            lengths.push(length);
251        }
252    }
253    let tensor = Tensor::new(lengths, vec![rows, cols]).map_err(|e| format!("strlength: {e}"))?;
254    Ok(tensor::tensor_into_value(tensor))
255}
256
257fn string_scalar_length(text: &str) -> f64 {
258    if is_missing_string(text) {
259        f64::NAN
260    } else {
261        text.chars().count() as f64
262    }
263}
264
265fn trimmed_row_length(array: &CharArray, row: usize) -> usize {
266    let cols = array.cols;
267    let mut end = cols;
268    while end > 0 {
269        let ch = array.data[row * cols + end - 1];
270        if ch == ' ' {
271            end -= 1;
272        } else {
273            break;
274        }
275    }
276    end
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    #[cfg(feature = "doc_export")]
283    use crate::builtins::common::test_support;
284
285    #[test]
286    fn strlength_string_scalar() {
287        let result = strlength_builtin(Value::String("RunMat".into())).expect("strlength");
288        assert_eq!(result, Value::Num(6.0));
289    }
290
291    #[test]
292    fn strlength_string_array_with_missing() {
293        let array = StringArray::new(vec!["alpha".into(), "<missing>".into()], vec![2, 1]).unwrap();
294        let result = strlength_builtin(Value::StringArray(array)).expect("strlength");
295        match result {
296            Value::Tensor(tensor) => {
297                assert_eq!(tensor.shape, vec![2, 1]);
298                assert_eq!(tensor.data.len(), 2);
299                assert_eq!(tensor.data[0], 5.0);
300                assert!(tensor.data[1].is_nan());
301            }
302            other => panic!("expected tensor result, got {other:?}"),
303        }
304    }
305
306    #[test]
307    fn strlength_char_array_multiple_rows() {
308        let data: Vec<char> = vec!['c', 'a', 't', ' ', ' ', 'h', 'o', 'r', 's', 'e'];
309        let array = CharArray::new(data, 2, 5).unwrap();
310        let result = strlength_builtin(Value::CharArray(array)).expect("strlength");
311        match result {
312            Value::Tensor(tensor) => {
313                assert_eq!(tensor.shape, vec![2, 1]);
314                assert_eq!(tensor.data, vec![3.0, 5.0]);
315            }
316            other => panic!("expected tensor result, got {other:?}"),
317        }
318    }
319
320    #[test]
321    fn strlength_char_vector_retains_explicit_spaces() {
322        let data: Vec<char> = "hi   ".chars().collect();
323        let array = CharArray::new(data, 1, 5).unwrap();
324        let result = strlength_builtin(Value::CharArray(array)).expect("strlength");
325        assert_eq!(result, Value::Num(5.0));
326    }
327
328    #[test]
329    fn strlength_cell_array_of_char_vectors() {
330        let cell = CellArray::new(
331            vec![
332                Value::CharArray(CharArray::new_row("red")),
333                Value::CharArray(CharArray::new_row("green")),
334            ],
335            1,
336            2,
337        )
338        .unwrap();
339        let result = strlength_builtin(Value::Cell(cell)).expect("strlength");
340        match result {
341            Value::Tensor(tensor) => {
342                assert_eq!(tensor.shape, vec![1, 2]);
343                assert_eq!(tensor.data, vec![3.0, 5.0]);
344            }
345            other => panic!("expected tensor result, got {other:?}"),
346        }
347    }
348
349    #[test]
350    fn strlength_cell_array_with_string_scalars() {
351        let cell = CellArray::new(
352            vec![
353                Value::String("alpha".into()),
354                Value::String("beta".into()),
355                Value::String("<missing>".into()),
356            ],
357            1,
358            3,
359        )
360        .unwrap();
361        let result = strlength_builtin(Value::Cell(cell)).expect("strlength");
362        match result {
363            Value::Tensor(tensor) => {
364                assert_eq!(tensor.shape, vec![1, 3]);
365                assert_eq!(tensor.data.len(), 3);
366                assert_eq!(tensor.data[0], 5.0);
367                assert_eq!(tensor.data[1], 4.0);
368                assert!(tensor.data[2].is_nan());
369            }
370            other => panic!("expected tensor result, got {other:?}"),
371        }
372    }
373
374    #[test]
375    fn strlength_string_array_preserves_shape() {
376        let array = StringArray::new(
377            vec!["ab".into(), "c".into(), "def".into(), "".into()],
378            vec![2, 2],
379        )
380        .unwrap();
381        let result = strlength_builtin(Value::StringArray(array)).expect("strlength");
382        match result {
383            Value::Tensor(tensor) => {
384                assert_eq!(tensor.shape, vec![2, 2]);
385                assert_eq!(tensor.data, vec![2.0, 1.0, 3.0, 0.0]);
386            }
387            other => panic!("expected tensor result, got {other:?}"),
388        }
389    }
390
391    #[test]
392    fn strlength_char_array_trims_padding() {
393        let data: Vec<char> = vec!['d', 'o', 'g', ' ', ' ', 'h', 'o', 'r', 's', 'e'];
394        let array = CharArray::new(data, 2, 5).unwrap();
395        let result = strlength_builtin(Value::CharArray(array)).expect("strlength");
396        match result {
397            Value::Tensor(tensor) => {
398                assert_eq!(tensor.shape, vec![2, 1]);
399                assert_eq!(tensor.data, vec![3.0, 5.0]);
400            }
401            other => panic!("expected tensor result, got {other:?}"),
402        }
403    }
404
405    #[test]
406    fn strlength_errors_on_invalid_input() {
407        let err = strlength_builtin(Value::Num(1.0)).unwrap_err();
408        assert_eq!(err, ARG_TYPE_ERROR);
409    }
410
411    #[test]
412    fn strlength_rejects_cell_with_invalid_element() {
413        let cell = CellArray::new(
414            vec![Value::CharArray(CharArray::new_row("ok")), Value::Num(5.0)],
415            1,
416            2,
417        )
418        .unwrap();
419        let err = strlength_builtin(Value::Cell(cell)).unwrap_err();
420        assert_eq!(err, CELL_ELEMENT_ERROR);
421    }
422
423    #[test]
424    #[cfg(feature = "doc_export")]
425    fn doc_examples_present() {
426        let blocks = test_support::doc_examples(DOC_MD);
427        assert!(!blocks.is_empty());
428    }
429}