1use runmat_builtins::{CellArray, CharArray, StringArray, Tensor, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::common::spec::{
7 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
8 ReductionNaN, ResidencyPolicy, ShapeRequirements,
9};
10use crate::builtins::common::tensor;
11use crate::builtins::strings::common::is_missing_string;
12#[cfg(feature = "doc_export")]
13use crate::register_builtin_doc_text;
14use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
15
16#[cfg(feature = "doc_export")]
17pub const DOC_MD: &str = r#"---
18title: "strlength"
19category: "strings/core"
20keywords: ["strlength", "string length", "character count", "text analytics", "cell array"]
21summary: "Return the number of characters in each element of a string array, character array, or cell array of character vectors."
22references:
23 - https://www.mathworks.com/help/matlab/ref/strlength.html
24gpu_support:
25 elementwise: false
26 reduction: false
27 precisions: []
28 broadcasting: "none"
29 notes: "Executes on the CPU; if any argument lives on the GPU, the runtime gathers it before computing lengths to keep semantics identical to MATLAB."
30fusion:
31 elementwise: false
32 reduction: false
33 max_inputs: 1
34 constants: "inline"
35requires_feature: null
36tested:
37 unit: "builtins::strings::core::strlength::tests"
38 integration: "builtins::strings::core::strlength::tests::strlength_cell_array_of_char_vectors"
39---
40
41# What does the `strlength` function do in MATLAB / RunMat?
42`strlength(str)` counts how many characters appear in each element of text inputs. It works with string
43arrays, character vectors, character arrays, and cell arrays of character vectors, returning a `double`
44array that mirrors the input shape.
45
46## How does the `strlength` function behave in MATLAB / RunMat?
47- String arrays return a numeric array of the same size; string scalars yield a scalar `double`.
48- Character arrays report the number of characters per row and ignore padding that MATLAB inserts to keep rows the same width.
49- Character vectors stored in cells contribute one scalar per cell element; the output array matches the cell array shape.
50- Missing string scalars (for example values created with `string(missing)`) yield `NaN`. RunMat displays these entries as `<missing>` in the console just like MATLAB.
51- Empty text inputs produce zeros-sized numeric outputs that match MATLAB's dimension rules.
52
53## `strlength` Function GPU Execution Behaviour
54`strlength` is a metadata query and always executes on the CPU. If a text container references data that
55originated on the GPU (for example, a cell array that still wraps GPU-resident numeric intermediates), RunMat
56gathers that data before measuring lengths. Providers do not require custom kernels for this builtin.
57
58## Examples of using the `strlength` function in MATLAB / RunMat
59
60### Measure Characters In A String Scalar
61```matlab
62len = strlength("RunMat");
63```
64Expected output:
65```matlab
66len = 6
67```
68
69### Count Characters Across A String Array
70```matlab
71labels = ["North" "South" "East" "West"];
72counts = strlength(labels);
73```
74Expected output:
75```matlab
76counts = 1×4
77 5 5 4 4
78```
79
80### Compute Lengths For Each Row Of A Character Array
81```matlab
82names = char("cat", "giraffe");
83row_counts = strlength(names);
84```
85Expected output:
86```matlab
87row_counts = 2×1
88 3
89 7
90```
91
92### Handle Empty And Blank Strings
93```matlab
94mixed = ["", " "];
95len = strlength(mixed);
96```
97Expected output:
98```matlab
99len = 1×2
100 0 3
101```
102
103### Get Lengths From A Cell Array Of Character Vectors
104```matlab
105C = {'red', 'green', 'blue'};
106L = strlength(C);
107```
108Expected output:
109```matlab
110L = 1×3
111 3 5 4
112```
113
114### Treat Missing Strings As NaN
115```matlab
116values = string(["alpha" "beta" "gamma"]);
117values(2) = string(missing); % Displays as <missing> when printed
118lengths = strlength(values);
119```
120Expected output:
121```matlab
122lengths = 1×3
123 5 NaN 5
124```
125
126## FAQ
127
128### What numeric type does `strlength` return?
129`strlength` always returns doubles, even when all lengths are whole numbers. MATLAB uses doubles for most numeric results, and RunMat follows the same rule.
130
131### Why are padded spaces in character arrays ignored?
132When MATLAB builds a character array from rows of different lengths, it pads shorter rows with spaces. Those padding characters are not part of the logical content, so `strlength` removes them before counting. Explicit trailing spaces that you type in a single character vector remain part of the count.
133
134### How are missing string values handled?
135Missing string scalars display as `<missing>` and produce `NaN` lengths. Use `ismissing` or `fillmissing` if you need to substitute a default length.
136
137### Can I call `strlength` with numeric data?
138No. `strlength` only accepts string arrays, character vectors/arrays, or cell arrays of character vectors. Numeric inputs raise an error—use `num2str` first if you need to convert numbers to text.
139
140### Does `strlength` support multibyte Unicode characters?
141Yes. Each Unicode scalar value counts as one character, so emoji or accented letters contribute a length of one. Surrogate pairs are treated as a single character, matching MATLAB's behaviour.
142
143### Will `strlength` ever execute on the GPU?
144No. The builtin inspects metadata and operates on host strings. If your data already lives on the GPU, RunMat gathers it automatically before computing lengths so results match MATLAB exactly.
145
146## See Also
147`string`, `char`, `strtrim`, `length`, `size`
148
149## Source & Feedback
150- Implementation: [`crates/runmat-runtime/src/builtins/strings/core/strlength.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/core/strlength.rs)
151- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
152"#;
153
154pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
155 name: "strlength",
156 op_kind: GpuOpKind::Custom("string-metadata"),
157 supported_precisions: &[],
158 broadcast: BroadcastSemantics::None,
159 provider_hooks: &[],
160 constant_strategy: ConstantStrategy::InlineLiteral,
161 residency: ResidencyPolicy::GatherImmediately,
162 nan_mode: ReductionNaN::Include,
163 two_pass_threshold: None,
164 workgroup_size: None,
165 accepts_nan_mode: false,
166 notes: "Measures string lengths on the CPU; any GPU-resident inputs are gathered before evaluation.",
167};
168
169register_builtin_gpu_spec!(GPU_SPEC);
170
171pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
172 name: "strlength",
173 shape: ShapeRequirements::Any,
174 constant_strategy: ConstantStrategy::InlineLiteral,
175 elementwise: None,
176 reduction: None,
177 emits_nan: true,
178 notes: "Metadata-only builtin; not eligible for fusion and never emits GPU kernels.",
179};
180
181register_builtin_fusion_spec!(FUSION_SPEC);
182
183#[cfg(feature = "doc_export")]
184register_builtin_doc_text!("strlength", DOC_MD);
185
186const ARG_TYPE_ERROR: &str =
187 "strlength: first argument must be a string array, character array, or cell array of character vectors";
188const CELL_ELEMENT_ERROR: &str =
189 "strlength: cell array elements must be character vectors or string scalars";
190
191#[runtime_builtin(
192 name = "strlength",
193 category = "strings/core",
194 summary = "Count characters in string arrays, character arrays, or cell arrays of character vectors.",
195 keywords = "strlength,string length,text,count,characters",
196 accel = "sink"
197)]
198fn strlength_builtin(value: Value) -> Result<Value, String> {
199 let gathered = gather_if_needed(&value).map_err(|e| format!("strlength: {e}"))?;
200 match gathered {
201 Value::StringArray(array) => strlength_string_array(array),
202 Value::String(text) => Ok(Value::Num(string_scalar_length(&text))),
203 Value::CharArray(array) => strlength_char_array(array),
204 Value::Cell(cell) => strlength_cell_array(cell),
205 _ => Err(ARG_TYPE_ERROR.to_string()),
206 }
207}
208
209fn strlength_string_array(array: StringArray) -> Result<Value, String> {
210 let StringArray { data, shape, .. } = array;
211 let mut lengths = Vec::with_capacity(data.len());
212 for text in &data {
213 lengths.push(string_scalar_length(text));
214 }
215 let tensor = Tensor::new(lengths, shape).map_err(|e| format!("strlength: {e}"))?;
216 Ok(tensor::tensor_into_value(tensor))
217}
218
219fn strlength_char_array(array: CharArray) -> Result<Value, String> {
220 let rows = array.rows;
221 let mut lengths = Vec::with_capacity(rows);
222 for row in 0..rows {
223 let length = if array.rows <= 1 {
224 array.cols
225 } else {
226 trimmed_row_length(&array, row)
227 } as f64;
228 lengths.push(length);
229 }
230 let tensor = Tensor::new(lengths, vec![rows, 1]).map_err(|e| format!("strlength: {e}"))?;
231 Ok(tensor::tensor_into_value(tensor))
232}
233
234fn strlength_cell_array(cell: CellArray) -> Result<Value, String> {
235 let CellArray {
236 data, rows, cols, ..
237 } = cell;
238 let mut lengths = Vec::with_capacity(rows * cols);
239 for col in 0..cols {
240 for row in 0..rows {
241 let idx = row * cols + col;
242 let value: &Value = &data[idx];
243 let length = match value {
244 Value::String(text) => string_scalar_length(text),
245 Value::StringArray(sa) if sa.data.len() == 1 => string_scalar_length(&sa.data[0]),
246 Value::CharArray(char_vec) if char_vec.rows == 1 => char_vec.cols as f64,
247 Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
248 _ => return Err(CELL_ELEMENT_ERROR.to_string()),
249 };
250 lengths.push(length);
251 }
252 }
253 let tensor = Tensor::new(lengths, vec![rows, cols]).map_err(|e| format!("strlength: {e}"))?;
254 Ok(tensor::tensor_into_value(tensor))
255}
256
257fn string_scalar_length(text: &str) -> f64 {
258 if is_missing_string(text) {
259 f64::NAN
260 } else {
261 text.chars().count() as f64
262 }
263}
264
265fn trimmed_row_length(array: &CharArray, row: usize) -> usize {
266 let cols = array.cols;
267 let mut end = cols;
268 while end > 0 {
269 let ch = array.data[row * cols + end - 1];
270 if ch == ' ' {
271 end -= 1;
272 } else {
273 break;
274 }
275 }
276 end
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 #[cfg(feature = "doc_export")]
283 use crate::builtins::common::test_support;
284
285 #[test]
286 fn strlength_string_scalar() {
287 let result = strlength_builtin(Value::String("RunMat".into())).expect("strlength");
288 assert_eq!(result, Value::Num(6.0));
289 }
290
291 #[test]
292 fn strlength_string_array_with_missing() {
293 let array = StringArray::new(vec!["alpha".into(), "<missing>".into()], vec![2, 1]).unwrap();
294 let result = strlength_builtin(Value::StringArray(array)).expect("strlength");
295 match result {
296 Value::Tensor(tensor) => {
297 assert_eq!(tensor.shape, vec![2, 1]);
298 assert_eq!(tensor.data.len(), 2);
299 assert_eq!(tensor.data[0], 5.0);
300 assert!(tensor.data[1].is_nan());
301 }
302 other => panic!("expected tensor result, got {other:?}"),
303 }
304 }
305
306 #[test]
307 fn strlength_char_array_multiple_rows() {
308 let data: Vec<char> = vec!['c', 'a', 't', ' ', ' ', 'h', 'o', 'r', 's', 'e'];
309 let array = CharArray::new(data, 2, 5).unwrap();
310 let result = strlength_builtin(Value::CharArray(array)).expect("strlength");
311 match result {
312 Value::Tensor(tensor) => {
313 assert_eq!(tensor.shape, vec![2, 1]);
314 assert_eq!(tensor.data, vec![3.0, 5.0]);
315 }
316 other => panic!("expected tensor result, got {other:?}"),
317 }
318 }
319
320 #[test]
321 fn strlength_char_vector_retains_explicit_spaces() {
322 let data: Vec<char> = "hi ".chars().collect();
323 let array = CharArray::new(data, 1, 5).unwrap();
324 let result = strlength_builtin(Value::CharArray(array)).expect("strlength");
325 assert_eq!(result, Value::Num(5.0));
326 }
327
328 #[test]
329 fn strlength_cell_array_of_char_vectors() {
330 let cell = CellArray::new(
331 vec![
332 Value::CharArray(CharArray::new_row("red")),
333 Value::CharArray(CharArray::new_row("green")),
334 ],
335 1,
336 2,
337 )
338 .unwrap();
339 let result = strlength_builtin(Value::Cell(cell)).expect("strlength");
340 match result {
341 Value::Tensor(tensor) => {
342 assert_eq!(tensor.shape, vec![1, 2]);
343 assert_eq!(tensor.data, vec![3.0, 5.0]);
344 }
345 other => panic!("expected tensor result, got {other:?}"),
346 }
347 }
348
349 #[test]
350 fn strlength_cell_array_with_string_scalars() {
351 let cell = CellArray::new(
352 vec![
353 Value::String("alpha".into()),
354 Value::String("beta".into()),
355 Value::String("<missing>".into()),
356 ],
357 1,
358 3,
359 )
360 .unwrap();
361 let result = strlength_builtin(Value::Cell(cell)).expect("strlength");
362 match result {
363 Value::Tensor(tensor) => {
364 assert_eq!(tensor.shape, vec![1, 3]);
365 assert_eq!(tensor.data.len(), 3);
366 assert_eq!(tensor.data[0], 5.0);
367 assert_eq!(tensor.data[1], 4.0);
368 assert!(tensor.data[2].is_nan());
369 }
370 other => panic!("expected tensor result, got {other:?}"),
371 }
372 }
373
374 #[test]
375 fn strlength_string_array_preserves_shape() {
376 let array = StringArray::new(
377 vec!["ab".into(), "c".into(), "def".into(), "".into()],
378 vec![2, 2],
379 )
380 .unwrap();
381 let result = strlength_builtin(Value::StringArray(array)).expect("strlength");
382 match result {
383 Value::Tensor(tensor) => {
384 assert_eq!(tensor.shape, vec![2, 2]);
385 assert_eq!(tensor.data, vec![2.0, 1.0, 3.0, 0.0]);
386 }
387 other => panic!("expected tensor result, got {other:?}"),
388 }
389 }
390
391 #[test]
392 fn strlength_char_array_trims_padding() {
393 let data: Vec<char> = vec!['d', 'o', 'g', ' ', ' ', 'h', 'o', 'r', 's', 'e'];
394 let array = CharArray::new(data, 2, 5).unwrap();
395 let result = strlength_builtin(Value::CharArray(array)).expect("strlength");
396 match result {
397 Value::Tensor(tensor) => {
398 assert_eq!(tensor.shape, vec![2, 1]);
399 assert_eq!(tensor.data, vec![3.0, 5.0]);
400 }
401 other => panic!("expected tensor result, got {other:?}"),
402 }
403 }
404
405 #[test]
406 fn strlength_errors_on_invalid_input() {
407 let err = strlength_builtin(Value::Num(1.0)).unwrap_err();
408 assert_eq!(err, ARG_TYPE_ERROR);
409 }
410
411 #[test]
412 fn strlength_rejects_cell_with_invalid_element() {
413 let cell = CellArray::new(
414 vec![Value::CharArray(CharArray::new_row("ok")), Value::Num(5.0)],
415 1,
416 2,
417 )
418 .unwrap();
419 let err = strlength_builtin(Value::Cell(cell)).unwrap_err();
420 assert_eq!(err, CELL_ELEMENT_ERROR);
421 }
422
423 #[test]
424 #[cfg(feature = "doc_export")]
425 fn doc_examples_present() {
426 let blocks = test_support::doc_examples(DOC_MD);
427 assert!(!blocks.is_empty());
428 }
429}