runmat_runtime/builtins/strings/transform/
extractbetween.rs

1//! MATLAB-compatible `extractBetween` builtin with GPU-aware semantics for RunMat.
2
3use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
7use crate::{
8    gather_if_needed, make_cell_with_shape, register_builtin_fusion_spec, register_builtin_gpu_spec,
9};
10use runmat_builtins::{CharArray, IntValue, StringArray, Value};
11use runmat_macros::runtime_builtin;
12
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15
16use crate::builtins::common::spec::{
17    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
18    ReductionNaN, ResidencyPolicy, ShapeRequirements,
19};
20
21#[cfg(feature = "doc_export")]
22pub const DOC_MD: &str = r#"---
23title: "extractBetween"
24category: "strings/transform"
25keywords: ["extractBetween", "substring", "boundaries", "inclusive", "exclusive", "string array"]
26summary: "Extract text that lies between two boundary markers using string or position inputs."
27references:
28  - https://www.mathworks.com/help/matlab/ref/extractbetween.html
29gpu_support:
30  elementwise: false
31  reduction: false
32  precisions: []
33  broadcasting: "matlab"
34  notes: "Runs on the CPU. GPU-resident inputs are gathered before processing, results stay on the host, and the builtin is registered as an Accelerate sink."
35fusion:
36  elementwise: false
37  reduction: false
38  max_inputs: 3
39  constants: "inline"
40requires_feature: null
41tested:
42  unit: "builtins::strings::transform::extractbetween::tests"
43  integration: "builtins::strings::transform::extractbetween::tests::extractBetween_cell_array_preserves_types"
44---
45
46# What does the `extractBetween` function do in MATLAB / RunMat?
47`extractBetween(text, start, stop)` locates the substring that appears between two boundary markers.
48Markers can be text (string scalars, character vectors, or cells that contain them) or numeric
49positions. The builtin mirrors MATLAB semantics for broadcasting, missing values, and the optional
50`'Boundaries'` name-value argument.
51
52## How does the `extractBetween` function behave in MATLAB / RunMat?
53- Accepts **string scalars**, **string arrays**, **character arrays** (interpreted row-by-row), and
54  **cell arrays** that contain string scalars or character vectors. Cell outputs preserve the
55  element type (string vs. char) of each cell.
56- Boundary inputs can be text or numeric positions. Both boundaries in a call must use the same
57  kind of input; mixing text and numeric markers raises a size/type error.
58- Scalar text markers follow MATLAB implicit expansion, applying to every element of the text
59  input. Character-array and cell inputs must exactly match the text shape.
60- The `'Boundaries'` name-value pair controls inclusivity. Text markers default to **exclusive**
61  extraction, while numeric positions default to **inclusive** behaviour. Values are
62  case-insensitive and must be `'exclusive'` or `'inclusive'`.
63- Missing string scalars propagate: if the text, start marker, or end marker is `<missing>`,
64  the result is also `<missing>`.
65- When the start or end boundary cannot be located, `extractBetween` returns an empty string (or an
66  appropriately padded empty row for character arrays).
67- Numeric positions use 1-based indexing. Inputs are validated as positive integers, clamped to
68  string length, and honour inclusivity rules exactly as MATLAB does.
69
70## `extractBetween` Function GPU Execution Behaviour
71Text manipulation executes on the CPU. When any argument resides on the GPU, RunMat gathers the
72values to host memory, performs extraction, and leaves the results on the host. No Accelerate
73provider hooks are required, and the builtin is registered as an Accelerate sink so fusion plans
74never attempt to keep data on the device for this operation.
75
76## Examples of using the `extractBetween` function in MATLAB / RunMat
77
78### Extract text between words in a string
79```matlab
80txt = "RunMat accelerates MATLAB workloads";
81segment = extractBetween(txt, "RunMat ", " workloads");
82```
83Expected output:
84```matlab
85segment = "accelerates MATLAB"
86```
87
88### Include boundary markers with the `'Boundaries'` option
89```matlab
90path = "snapshots/run/fusion.mat";
91withMarkers = extractBetween(path, "snapshots/", ".mat", "Boundaries", "inclusive");
92```
93Expected output:
94```matlab
95withMarkers = "snapshots/run/fusion.mat"
96```
97
98### Use numeric positions for 1-based indexing
99```matlab
100name = "Accelerator";
101middle = extractBetween(name, 3, 7);
102```
103Expected output:
104```matlab
105middle = "celer"
106```
107
108### Apply scalar text markers to each element of a string array
109```matlab
110files = ["runmat_accel.rs", "runmat_gc.rs"; "runmat_plot.rs", "runmat_cli.rs"];
111stems = extractBetween(files, "runmat_", ".rs");
112```
113Expected output:
114```matlab
115stems = 2×2 string
116    "accel"    "gc"
117    "plot"     "cli"
118```
119
120### Work with character arrays while preserving row padding
121```matlab
122chars = char("Device<GPU>", "Planner<Fusion>");
123tokens = extractBetween(chars, "<", ">");
124```
125Expected output:
126```matlab
127tokens =
128
129  2×6 char array
130
131    "GPU   "
132    "Fusion"
133```
134
135### Preserve element types in cell arrays
136```matlab
137C = {'<missing>', 'A[B]C'; "Planner <Fusion>", "Device<GPU>"};
138out = extractBetween(C, "<", ">");
139```
140Expected output:
141```matlab
142out =
143  2×2 cell array
144    {'<missing>'}    {'B'}
145    {"Fusion"}       {"GPU"}
146```
147
148### Handle missing strings without throwing errors
149```matlab
150txt = ["<missing>", "Planner<GPU>"];
151tokens = extractBetween(txt, "<", ">");
152```
153Expected output:
154```matlab
155tokens = 1×2 string
156    "<missing>"    "GPU"
157```
158
159## FAQ
160
161### Which argument types does `extractBetween` accept?
162The first argument can be a string scalar, string array, character array, or cell array of character
163vectors / string scalars. Boundary arguments can be text (string, character array, or cell) or numeric
164positions supplied as scalars, vectors, or arrays.
165
166### Can the start and end arguments mix text and numeric positions?
167No. Both boundaries must be text markers or both must be numeric positions. Mixing types raises a
168size/type error, mirroring MATLAB.
169
170### What happens when a boundary is not found?
171`extractBetween` returns the empty string (`""`). Character-array outputs contain space padded rows
172of the appropriate length.
173
174### How does `'Boundaries','inclusive'` behave with numeric positions?
175Inclusive mode returns the substring that includes both indices. Exclusive mode removes the characters
176at the specified start and end positions, yielding the text strictly between the two indices.
177
178### Does `extractBetween` support implicit expansion?
179Yes. Scalar boundaries expand against array inputs following MATLAB implicit expansion rules. Cell and
180character array inputs must retain their original shape; attempting to expand them produces a size
181mismatch error.
182
183### Are GPU inputs supported?
184Yes. Inputs stored on a GPU are gathered automatically. The function executes on the CPU, returns
185host-side results, and fusion planning treats the builtin as a residency sink.
186
187## See Also
188[replace](../../transform/replace), [split](../../transform/split), [join](../../transform/join), [contains](../../search/contains), [strfind](../../search/strfind)
189
190## Source & Feedback
191- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/extractbetween.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/extractbetween.rs)
192- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
193"#;
194
195pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
196    name: "extractBetween",
197    op_kind: GpuOpKind::Custom("string-transform"),
198    supported_precisions: &[],
199    broadcast: BroadcastSemantics::Matlab,
200    provider_hooks: &[],
201    constant_strategy: ConstantStrategy::InlineLiteral,
202    residency: ResidencyPolicy::GatherImmediately,
203    nan_mode: ReductionNaN::Include,
204    two_pass_threshold: None,
205    workgroup_size: None,
206    accepts_nan_mode: false,
207    notes: "Runs on the CPU; GPU-resident inputs are gathered before extraction and outputs are returned on the host.",
208};
209
210register_builtin_gpu_spec!(GPU_SPEC);
211
212pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
213    name: "extractBetween",
214    shape: ShapeRequirements::Any,
215    constant_strategy: ConstantStrategy::InlineLiteral,
216    elementwise: None,
217    reduction: None,
218    emits_nan: false,
219    notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
220};
221
222register_builtin_fusion_spec!(FUSION_SPEC);
223
224#[cfg(feature = "doc_export")]
225register_builtin_doc_text!("extractBetween", DOC_MD);
226
227const FN_NAME: &str = "extractBetween";
228const ARG_TYPE_ERROR: &str = "extractBetween: first argument must be a string array, character array, or cell array of character vectors";
229const BOUNDARY_TYPE_ERROR: &str =
230    "extractBetween: start and end arguments must both be text or both be numeric positions";
231const POSITION_TYPE_ERROR: &str = "extractBetween: position arguments must be positive integers";
232const OPTION_PAIR_ERROR: &str = "extractBetween: name-value arguments must appear in pairs";
233const OPTION_NAME_ERROR: &str = "extractBetween: unrecognized parameter name";
234const OPTION_VALUE_ERROR: &str =
235    "extractBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'";
236const CELL_ELEMENT_ERROR: &str =
237    "extractBetween: cell array elements must be string scalars or character vectors";
238const SIZE_MISMATCH_ERROR: &str =
239    "extractBetween: boundary sizes must be compatible with the text input";
240
241#[derive(Clone, Copy, Debug, PartialEq, Eq)]
242enum BoundariesMode {
243    Exclusive,
244    Inclusive,
245}
246
247#[runtime_builtin(
248    name = "extractBetween",
249    category = "strings/transform",
250    summary = "Extract substrings between boundary markers using MATLAB-compatible semantics.",
251    keywords = "extractBetween,substring,boundaries,strings",
252    accel = "sink"
253)]
254fn extract_between_builtin(
255    text: Value,
256    start: Value,
257    stop: Value,
258    rest: Vec<Value>,
259) -> Result<Value, String> {
260    let text = gather_if_needed(&text).map_err(|e| format!("{FN_NAME}: {e}"))?;
261    let start = gather_if_needed(&start).map_err(|e| format!("{FN_NAME}: {e}"))?;
262    let stop = gather_if_needed(&stop).map_err(|e| format!("{FN_NAME}: {e}"))?;
263
264    let mode_override = parse_boundaries_option(&rest)?;
265
266    let normalized_text = NormalizedText::from_value(text)?;
267    let start_boundary = BoundaryArg::from_value(start)?;
268    let stop_boundary = BoundaryArg::from_value(stop)?;
269
270    if start_boundary.kind() != stop_boundary.kind() {
271        return Err(BOUNDARY_TYPE_ERROR.to_string());
272    }
273    let boundary_kind = start_boundary.kind();
274    let effective_mode = mode_override.unwrap_or(match boundary_kind {
275        BoundaryKind::Text => BoundariesMode::Exclusive,
276        BoundaryKind::Position => BoundariesMode::Inclusive,
277    });
278
279    let start_shape = start_boundary.shape();
280    let stop_shape = stop_boundary.shape();
281    let text_shape = normalized_text.shape();
282
283    let shape_ts = broadcast_shapes(FN_NAME, text_shape, start_shape)?;
284    let output_shape = broadcast_shapes(FN_NAME, &shape_ts, stop_shape)?;
285    if !normalized_text.supports_shape(&output_shape) {
286        return Err(SIZE_MISMATCH_ERROR.to_string());
287    }
288
289    let total: usize = output_shape.iter().copied().product();
290    if total == 0 {
291        return normalized_text.into_value(Vec::new(), output_shape);
292    }
293
294    let text_strides = compute_strides(text_shape);
295    let start_strides = compute_strides(start_shape);
296    let stop_strides = compute_strides(stop_shape);
297
298    let mut results = Vec::with_capacity(total);
299
300    for idx in 0..total {
301        let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
302        let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
303        let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
304
305        let result = match boundary_kind {
306            BoundaryKind::Text => {
307                let text_value = normalized_text.data(text_idx);
308                let start_value = start_boundary.text(start_idx);
309                let stop_value = stop_boundary.text(stop_idx);
310                extract_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
311            }
312            BoundaryKind::Position => {
313                let text_value = normalized_text.data(text_idx);
314                let start_value = start_boundary.position(start_idx);
315                let stop_value = stop_boundary.position(stop_idx);
316                extract_with_positions(text_value, start_value, stop_value, effective_mode)
317            }
318        };
319        results.push(result);
320    }
321
322    normalized_text.into_value(results, output_shape)
323}
324
325fn parse_boundaries_option(args: &[Value]) -> Result<Option<BoundariesMode>, String> {
326    if args.is_empty() {
327        return Ok(None);
328    }
329    if !args.len().is_multiple_of(2) {
330        return Err(OPTION_PAIR_ERROR.to_string());
331    }
332
333    let mut mode: Option<BoundariesMode> = None;
334    let mut idx = 0;
335    while idx < args.len() {
336        let name_value = gather_if_needed(&args[idx]).map_err(|e| format!("{FN_NAME}: {e}"))?;
337        let name = value_to_string(&name_value).ok_or_else(|| OPTION_NAME_ERROR.to_string())?;
338        if !name.eq_ignore_ascii_case("boundaries") {
339            return Err(OPTION_NAME_ERROR.to_string());
340        }
341        let value = gather_if_needed(&args[idx + 1]).map_err(|e| format!("{FN_NAME}: {e}"))?;
342        let value_str = value_to_string(&value).ok_or_else(|| OPTION_VALUE_ERROR.to_string())?;
343        let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
344            BoundariesMode::Inclusive
345        } else if value_str.eq_ignore_ascii_case("exclusive") {
346            BoundariesMode::Exclusive
347        } else {
348            return Err(OPTION_VALUE_ERROR.to_string());
349        };
350        mode = Some(parsed_mode);
351        idx += 2;
352    }
353    Ok(mode)
354}
355
356fn value_to_string(value: &Value) -> Option<String> {
357    match value {
358        Value::String(s) => Some(s.clone()),
359        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
360        Value::CharArray(ca) if ca.rows <= 1 => {
361            if ca.rows == 0 {
362                Some(String::new())
363            } else {
364                Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
365            }
366        }
367        Value::CharArray(_) => None,
368        Value::Cell(cell) if cell.data.len() == 1 => {
369            let element = &cell.data[0];
370            value_to_string(element)
371        }
372        _ => None,
373    }
374}
375
376#[derive(Clone)]
377struct ExtractResult {
378    text: String,
379}
380
381impl ExtractResult {
382    fn missing() -> Self {
383        Self {
384            text: "<missing>".to_string(),
385        }
386    }
387
388    fn text(text: String) -> Self {
389        Self { text }
390    }
391}
392
393fn extract_with_text_boundaries(
394    text: &str,
395    start: &str,
396    stop: &str,
397    mode: BoundariesMode,
398) -> ExtractResult {
399    if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
400        return ExtractResult::missing();
401    }
402
403    if let Some(start_idx) = text.find(start) {
404        let search_start = start_idx + start.len();
405        if search_start > text.len() {
406            return ExtractResult::text(String::new());
407        }
408        if let Some(relative_end) = text[search_start..].find(stop) {
409            let end_idx = search_start + relative_end;
410            match mode {
411                BoundariesMode::Inclusive => {
412                    let end_capture = min(text.len(), end_idx + stop.len());
413                    let slice = &text[start_idx..end_capture];
414                    ExtractResult::text(slice.to_string())
415                }
416                BoundariesMode::Exclusive => {
417                    if end_idx < search_start {
418                        ExtractResult::text(String::new())
419                    } else {
420                        let slice = &text[search_start..end_idx];
421                        ExtractResult::text(slice.to_string())
422                    }
423                }
424            }
425        } else {
426            ExtractResult::text(String::new())
427        }
428    } else {
429        ExtractResult::text(String::new())
430    }
431}
432
433fn extract_with_positions(
434    text: &str,
435    start: usize,
436    stop: usize,
437    mode: BoundariesMode,
438) -> ExtractResult {
439    if is_missing_string(text) {
440        return ExtractResult::missing();
441    }
442    if text.is_empty() {
443        return ExtractResult::text(String::new());
444    }
445    let chars: Vec<char> = text.chars().collect();
446    let len = chars.len();
447    if len == 0 {
448        return ExtractResult::text(String::new());
449    }
450
451    if start == 0 || stop == 0 {
452        return ExtractResult::text(String::new());
453    }
454
455    if start > len {
456        return ExtractResult::text(String::new());
457    }
458    let stop_clamped = stop.min(len);
459    if stop_clamped == 0 {
460        return ExtractResult::text(String::new());
461    }
462
463    match mode {
464        BoundariesMode::Inclusive => {
465            if start > stop_clamped {
466                return ExtractResult::text(String::new());
467            }
468            let start_idx = start - 1;
469            let end_idx = stop_clamped - 1;
470            if start_idx >= len || end_idx >= len || start_idx > end_idx {
471                ExtractResult::text(String::new())
472            } else {
473                let slice: String = chars[start_idx..=end_idx].iter().collect();
474                ExtractResult::text(slice)
475            }
476        }
477        BoundariesMode::Exclusive => {
478            if start + 1 >= stop_clamped {
479                return ExtractResult::text(String::new());
480            }
481            let start_idx = start;
482            let end_idx = stop_clamped - 2;
483            if start_idx >= len || end_idx >= len || start_idx > end_idx {
484                ExtractResult::text(String::new())
485            } else {
486                let slice: String = chars[start_idx..=end_idx].iter().collect();
487                ExtractResult::text(slice)
488            }
489        }
490    }
491}
492
493#[derive(Clone, Debug)]
494struct CellInfo {
495    shape: Vec<usize>,
496    element_kinds: Vec<CellElementKind>,
497}
498
499#[derive(Clone, Debug)]
500enum CellElementKind {
501    String,
502    Char,
503}
504
505#[derive(Clone, Debug)]
506enum TextKind {
507    StringScalar,
508    StringArray,
509    CharArray { rows: usize },
510    CellArray(CellInfo),
511}
512
513#[derive(Clone, Debug)]
514struct NormalizedText {
515    data: Vec<String>,
516    shape: Vec<usize>,
517    kind: TextKind,
518}
519
520impl NormalizedText {
521    fn from_value(value: Value) -> Result<Self, String> {
522        match value {
523            Value::String(s) => Ok(Self {
524                data: vec![s],
525                shape: vec![1, 1],
526                kind: TextKind::StringScalar,
527            }),
528            Value::StringArray(sa) => Ok(Self {
529                data: sa.data.clone(),
530                shape: sa.shape.clone(),
531                kind: TextKind::StringArray,
532            }),
533            Value::CharArray(ca) => {
534                let rows = ca.rows;
535                let mut data = Vec::with_capacity(rows);
536                for row in 0..rows {
537                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
538                }
539                Ok(Self {
540                    data,
541                    shape: vec![rows, 1],
542                    kind: TextKind::CharArray { rows },
543                })
544            }
545            Value::Cell(cell) => {
546                let shape = cell.shape.clone();
547                let mut data = Vec::with_capacity(cell.data.len());
548                let mut kinds = Vec::with_capacity(cell.data.len());
549                for element in &cell.data {
550                    match &**element {
551                        Value::String(s) => {
552                            data.push(s.clone());
553                            kinds.push(CellElementKind::String);
554                        }
555                        Value::StringArray(sa) if sa.data.len() == 1 => {
556                            data.push(sa.data[0].clone());
557                            kinds.push(CellElementKind::String);
558                        }
559                        Value::CharArray(ca) if ca.rows <= 1 => {
560                            if ca.rows == 0 {
561                                data.push(String::new());
562                            } else {
563                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
564                            }
565                            kinds.push(CellElementKind::Char);
566                        }
567                        Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
568                        _ => return Err(CELL_ELEMENT_ERROR.to_string()),
569                    }
570                }
571                Ok(Self {
572                    data,
573                    shape: shape.clone(),
574                    kind: TextKind::CellArray(CellInfo {
575                        shape,
576                        element_kinds: kinds,
577                    }),
578                })
579            }
580            _ => Err(ARG_TYPE_ERROR.to_string()),
581        }
582    }
583
584    fn shape(&self) -> &[usize] {
585        &self.shape
586    }
587
588    fn data(&self, idx: usize) -> &str {
589        &self.data[idx]
590    }
591
592    fn supports_shape(&self, output_shape: &[usize]) -> bool {
593        match &self.kind {
594            TextKind::StringScalar => true,
595            TextKind::StringArray => true,
596            TextKind::CharArray { .. } => output_shape == self.shape,
597            TextKind::CellArray(info) => output_shape == info.shape,
598        }
599    }
600
601    fn into_value(
602        self,
603        results: Vec<ExtractResult>,
604        output_shape: Vec<usize>,
605    ) -> Result<Value, String> {
606        match self.kind {
607            TextKind::StringScalar => {
608                if results.len() <= 1 {
609                    let value = results
610                        .into_iter()
611                        .next()
612                        .unwrap_or_else(|| ExtractResult::text(String::new()));
613                    Ok(Value::String(value.text))
614                } else {
615                    let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
616                    let array = StringArray::new(data, output_shape)
617                        .map_err(|e| format!("{FN_NAME}: {e}"))?;
618                    Ok(Value::StringArray(array))
619                }
620            }
621            TextKind::StringArray => {
622                let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
623                let array =
624                    StringArray::new(data, output_shape).map_err(|e| format!("{FN_NAME}: {e}"))?;
625                Ok(Value::StringArray(array))
626            }
627            TextKind::CharArray { rows } => {
628                if rows == 0 {
629                    return CharArray::new(Vec::new(), 0, 0)
630                        .map(Value::CharArray)
631                        .map_err(|e| format!("{FN_NAME}: {e}"));
632                }
633                if results.len() != rows {
634                    return Err(SIZE_MISMATCH_ERROR.to_string());
635                }
636                let mut max_width = 0usize;
637                let mut row_strings = Vec::with_capacity(rows);
638                for result in &results {
639                    let width = result.text.chars().count();
640                    max_width = max_width.max(width);
641                    row_strings.push(result.text.clone());
642                }
643                let mut flattened = Vec::with_capacity(rows * max_width);
644                for row in row_strings {
645                    let mut chars: Vec<char> = row.chars().collect();
646                    if chars.len() < max_width {
647                        chars.resize(max_width, ' ');
648                    }
649                    flattened.extend(chars);
650                }
651                CharArray::new(flattened, rows, max_width)
652                    .map(Value::CharArray)
653                    .map_err(|e| format!("{FN_NAME}: {e}"))
654            }
655            TextKind::CellArray(info) => {
656                if results.len() != info.element_kinds.len() {
657                    return Err(SIZE_MISMATCH_ERROR.to_string());
658                }
659                let mut values = Vec::with_capacity(results.len());
660                for (idx, result) in results.into_iter().enumerate() {
661                    match info.element_kinds[idx] {
662                        CellElementKind::String => values.push(Value::String(result.text)),
663                        CellElementKind::Char => {
664                            let ca = CharArray::new_row(&result.text);
665                            values.push(Value::CharArray(ca));
666                        }
667                    }
668                }
669                make_cell_with_shape(values, info.shape)
670            }
671        }
672    }
673}
674
675#[derive(Clone, Debug, PartialEq, Eq)]
676enum BoundaryKind {
677    Text,
678    Position,
679}
680
681#[derive(Clone, Debug)]
682enum BoundaryArg {
683    Text(BoundaryText),
684    Position(BoundaryPositions),
685}
686
687impl BoundaryArg {
688    fn from_value(value: Value) -> Result<Self, String> {
689        match value {
690            Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
691                BoundaryText::from_value(value).map(BoundaryArg::Text)
692            }
693            Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
694                BoundaryPositions::from_value(value).map(BoundaryArg::Position)
695            }
696            other => Err(format!(
697                "{BOUNDARY_TYPE_ERROR}: unsupported argument {other:?}"
698            )),
699        }
700    }
701
702    fn kind(&self) -> BoundaryKind {
703        match self {
704            BoundaryArg::Text(_) => BoundaryKind::Text,
705            BoundaryArg::Position(_) => BoundaryKind::Position,
706        }
707    }
708
709    fn shape(&self) -> &[usize] {
710        match self {
711            BoundaryArg::Text(text) => &text.shape,
712            BoundaryArg::Position(pos) => &pos.shape,
713        }
714    }
715
716    fn text(&self, idx: usize) -> &str {
717        match self {
718            BoundaryArg::Text(text) => &text.data[idx],
719            BoundaryArg::Position(_) => unreachable!(),
720        }
721    }
722
723    fn position(&self, idx: usize) -> usize {
724        match self {
725            BoundaryArg::Position(pos) => pos.data[idx],
726            BoundaryArg::Text(_) => unreachable!(),
727        }
728    }
729}
730
731#[derive(Clone, Debug)]
732struct BoundaryText {
733    data: Vec<String>,
734    shape: Vec<usize>,
735}
736
737impl BoundaryText {
738    fn from_value(value: Value) -> Result<Self, String> {
739        match value {
740            Value::String(s) => Ok(Self {
741                data: vec![s],
742                shape: vec![1, 1],
743            }),
744            Value::StringArray(sa) => Ok(Self {
745                data: sa.data.clone(),
746                shape: sa.shape.clone(),
747            }),
748            Value::CharArray(ca) => {
749                let mut data = Vec::with_capacity(ca.rows);
750                for row in 0..ca.rows {
751                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
752                }
753                Ok(Self {
754                    data,
755                    shape: vec![ca.rows, 1],
756                })
757            }
758            Value::Cell(cell) => {
759                let shape = cell.shape.clone();
760                let mut data = Vec::with_capacity(cell.data.len());
761                for element in &cell.data {
762                    match &**element {
763                        Value::String(s) => data.push(s.clone()),
764                        Value::StringArray(sa) if sa.data.len() == 1 => {
765                            data.push(sa.data[0].clone());
766                        }
767                        Value::CharArray(ca) if ca.rows <= 1 => {
768                            if ca.rows == 0 {
769                                data.push(String::new());
770                            } else {
771                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
772                            }
773                        }
774                        Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
775                        _ => return Err(CELL_ELEMENT_ERROR.to_string()),
776                    }
777                }
778                Ok(Self { data, shape })
779            }
780            _ => Err(BOUNDARY_TYPE_ERROR.to_string()),
781        }
782    }
783}
784
785#[derive(Clone, Debug)]
786struct BoundaryPositions {
787    data: Vec<usize>,
788    shape: Vec<usize>,
789}
790
791impl BoundaryPositions {
792    fn from_value(value: Value) -> Result<Self, String> {
793        match value {
794            Value::Num(n) => Ok(Self {
795                data: vec![parse_position(n)?],
796                shape: vec![1, 1],
797            }),
798            Value::Int(i) => Ok(Self {
799                data: vec![parse_position_int(i)?],
800                shape: vec![1, 1],
801            }),
802            Value::Tensor(t) => {
803                let mut data = Vec::with_capacity(t.data.len());
804                for &entry in &t.data {
805                    data.push(parse_position(entry)?);
806                }
807                Ok(Self {
808                    data,
809                    shape: if t.shape.is_empty() {
810                        vec![t.rows, t.cols.max(1)]
811                    } else {
812                        t.shape
813                    },
814                })
815            }
816            _ => Err(BOUNDARY_TYPE_ERROR.to_string()),
817        }
818    }
819}
820
821fn parse_position(value: f64) -> Result<usize, String> {
822    if !value.is_finite() || value < 1.0 {
823        return Err(POSITION_TYPE_ERROR.to_string());
824    }
825    if (value.fract()).abs() > f64::EPSILON {
826        return Err(POSITION_TYPE_ERROR.to_string());
827    }
828    if value > (usize::MAX as f64) {
829        return Err(POSITION_TYPE_ERROR.to_string());
830    }
831    Ok(value as usize)
832}
833
834fn parse_position_int(value: IntValue) -> Result<usize, String> {
835    let val = value.to_i64();
836    if val <= 0 {
837        return Err(POSITION_TYPE_ERROR.to_string());
838    }
839    Ok(val as usize)
840}
841
842#[cfg(test)]
843mod tests {
844    #![allow(non_snake_case)]
845
846    use super::*;
847    #[cfg(feature = "doc_export")]
848    use crate::builtins::common::test_support;
849    use runmat_builtins::{CellArray, Tensor};
850
851    #[test]
852    fn extractBetween_basic_string() {
853        let result = extract_between_builtin(
854            Value::String("RunMat accelerates MATLAB".into()),
855            Value::String("RunMat ".into()),
856            Value::String(" MATLAB".into()),
857            Vec::new(),
858        )
859        .expect("extractBetween");
860        assert_eq!(result, Value::String("accelerates".into()));
861    }
862
863    #[test]
864    fn extractBetween_inclusive_option() {
865        let result = extract_between_builtin(
866            Value::String("a[b]c".into()),
867            Value::String("[".into()),
868            Value::String("]".into()),
869            vec![
870                Value::String("Boundaries".into()),
871                Value::String("inclusive".into()),
872            ],
873        )
874        .expect("extractBetween");
875        assert_eq!(result, Value::String("[b]".into()));
876    }
877
878    #[test]
879    fn extractBetween_numeric_positions() {
880        let result = extract_between_builtin(
881            Value::String("Accelerator".into()),
882            Value::Num(3.0),
883            Value::Num(7.0),
884            Vec::new(),
885        )
886        .expect("extractBetween");
887        assert_eq!(result, Value::String("celer".into()));
888    }
889
890    #[test]
891    fn extractBetween_numeric_positions_exclusive_option() {
892        let result = extract_between_builtin(
893            Value::String("Accelerator".into()),
894            Value::Num(3.0),
895            Value::Num(7.0),
896            vec![
897                Value::String("Boundaries".into()),
898                Value::String("exclusive".into()),
899            ],
900        )
901        .expect("extractBetween");
902        assert_eq!(result, Value::String("ele".into()));
903    }
904
905    #[test]
906    fn extractBetween_numeric_positions_clamps_stop() {
907        let result = extract_between_builtin(
908            Value::String("Accelerator".into()),
909            Value::Num(3.0),
910            Value::Num(100.0),
911            Vec::new(),
912        )
913        .expect("extractBetween");
914        assert_eq!(result, Value::String("celerator".into()));
915    }
916
917    #[test]
918    fn extractBetween_numeric_positions_start_past_length() {
919        let result = extract_between_builtin(
920            Value::String("abc".into()),
921            Value::Num(10.0),
922            Value::Num(12.0),
923            Vec::new(),
924        )
925        .expect("extractBetween");
926        assert_eq!(result, Value::String(String::new()));
927    }
928
929    #[test]
930    fn extractBetween_string_array_broadcast() {
931        let array = StringArray::new(
932            vec!["runmat_accel.rs".into(), "runmat_gc.rs".into()],
933            vec![2, 1],
934        )
935        .unwrap();
936        let result = extract_between_builtin(
937            Value::StringArray(array),
938            Value::String("runmat_".into()),
939            Value::String(".rs".into()),
940            Vec::new(),
941        )
942        .expect("extractBetween");
943        match result {
944            Value::StringArray(sa) => {
945                assert_eq!(sa.data, vec!["accel".to_string(), "gc".to_string()]);
946                assert_eq!(sa.shape, vec![2, 1]);
947            }
948            other => panic!("expected string array, got {other:?}"),
949        }
950    }
951
952    #[test]
953    fn extractBetween_char_array_rows() {
954        let chars = CharArray::new(
955            "GPUAccelerateIgnition".chars().collect(),
956            1,
957            "GPUAccelerateIgnition".len(),
958        )
959        .unwrap();
960        let result = extract_between_builtin(
961            Value::CharArray(chars),
962            Value::String("GPU".into()),
963            Value::String("tion".into()),
964            Vec::new(),
965        )
966        .expect("extractBetween");
967        match result {
968            Value::CharArray(out) => {
969                assert_eq!(out.rows, 1);
970                let text: String = out.data.iter().collect();
971                assert_eq!(text.trim_end(), "AccelerateIgni");
972            }
973            other => panic!("expected char array, got {other:?}"),
974        }
975    }
976
977    #[test]
978    fn extractBetween_cell_array_preserves_types() {
979        let cell = CellArray::new(
980            vec![
981                Value::CharArray(CharArray::new_row("A[B]C")),
982                Value::String("Planner<GPU>".into()),
983            ],
984            1,
985            2,
986        )
987        .unwrap();
988        let result = extract_between_builtin(
989            Value::Cell(cell),
990            Value::String("[".into()),
991            Value::String("]".into()),
992            Vec::new(),
993        )
994        .expect("extractBetween");
995        match result {
996            Value::Cell(out) => {
997                let first = out.get(0, 0).unwrap();
998                let second = out.get(0, 1).unwrap();
999                assert_eq!(first, Value::CharArray(CharArray::new_row("B")));
1000                assert_eq!(second, Value::String(String::new()));
1001            }
1002            other => panic!("expected cell array, got {other:?}"),
1003        }
1004    }
1005
1006    #[test]
1007    fn extractBetween_missing_string_propagates() {
1008        let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
1009        let result = extract_between_builtin(
1010            Value::StringArray(strings),
1011            Value::String("[".into()),
1012            Value::String("]".into()),
1013            Vec::new(),
1014        )
1015        .expect("extractBetween");
1016        assert_eq!(
1017            result,
1018            Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
1019        );
1020    }
1021
1022    #[test]
1023    fn extractBetween_position_type_error() {
1024        let err = extract_between_builtin(
1025            Value::String("abc".into()),
1026            Value::Num(0.5),
1027            Value::Num(2.0),
1028            Vec::new(),
1029        )
1030        .unwrap_err();
1031        assert_eq!(err, POSITION_TYPE_ERROR);
1032    }
1033
1034    #[test]
1035    fn extractBetween_mixed_boundary_error() {
1036        let err = extract_between_builtin(
1037            Value::String("abc".into()),
1038            Value::String("a".into()),
1039            Value::Num(3.0),
1040            Vec::new(),
1041        )
1042        .unwrap_err();
1043        assert_eq!(err, BOUNDARY_TYPE_ERROR);
1044    }
1045
1046    #[test]
1047    fn extractBetween_numeric_tensor_broadcast() {
1048        let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
1049        let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1050        let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
1051        let result = extract_between_builtin(
1052            Value::StringArray(text),
1053            Value::Tensor(start),
1054            Value::Tensor(stop),
1055            Vec::new(),
1056        )
1057        .expect("extractBetween");
1058        match result {
1059            Value::StringArray(sa) => {
1060                assert_eq!(sa.data, vec!["abc".to_string(), "xyz".to_string()]);
1061                assert_eq!(sa.shape, vec![2, 1]);
1062            }
1063            other => panic!("expected string array, got {other:?}"),
1064        }
1065    }
1066
1067    #[test]
1068    fn extractBetween_option_invalid_value() {
1069        let err = extract_between_builtin(
1070            Value::String("abc".into()),
1071            Value::String("a".into()),
1072            Value::String("c".into()),
1073            vec![
1074                Value::String("Boundaries".into()),
1075                Value::String("middle".into()),
1076            ],
1077        )
1078        .unwrap_err();
1079        assert_eq!(err, OPTION_VALUE_ERROR);
1080    }
1081
1082    #[test]
1083    fn extractBetween_option_name_error() {
1084        let err = extract_between_builtin(
1085            Value::String("abc".into()),
1086            Value::String("a".into()),
1087            Value::String("c".into()),
1088            vec![
1089                Value::String("Padding".into()),
1090                Value::String("inclusive".into()),
1091            ],
1092        )
1093        .unwrap_err();
1094        assert_eq!(err, OPTION_NAME_ERROR);
1095    }
1096
1097    #[test]
1098    fn extractBetween_option_pair_error() {
1099        let err = extract_between_builtin(
1100            Value::String("abc".into()),
1101            Value::String("a".into()),
1102            Value::String("b".into()),
1103            vec![Value::String("Boundaries".into())],
1104        )
1105        .unwrap_err();
1106        assert_eq!(err, OPTION_PAIR_ERROR);
1107    }
1108
1109    #[test]
1110    fn extractBetween_missing_boundary_propagates() {
1111        let result = extract_between_builtin(
1112            Value::String("Planner<GPU>".into()),
1113            Value::String("<missing>".into()),
1114            Value::String(">".into()),
1115            Vec::new(),
1116        )
1117        .expect("extractBetween");
1118        assert_eq!(result, Value::String("<missing>".into()));
1119    }
1120
1121    #[test]
1122    fn extractBetween_cell_boundary_arguments() {
1123        let text = CellArray::new(vec![Value::String("A<GPU>".into())], 1, 1).unwrap();
1124        let start = CellArray::new(vec![Value::CharArray(CharArray::new_row("<"))], 1, 1).unwrap();
1125        let stop = CellArray::new(vec![Value::CharArray(CharArray::new_row(">"))], 1, 1).unwrap();
1126        let result = extract_between_builtin(
1127            Value::Cell(text),
1128            Value::Cell(start),
1129            Value::Cell(stop),
1130            Vec::new(),
1131        )
1132        .expect("extractBetween");
1133        match result {
1134            Value::Cell(out) => {
1135                let value = out.get(0, 0).unwrap();
1136                assert_eq!(value, Value::String("GPU".into()));
1137            }
1138            other => panic!("expected cell array, got {other:?}"),
1139        }
1140    }
1141
1142    #[test]
1143    #[cfg(feature = "doc_export")]
1144    fn extractBetween_doc_examples_present() {
1145        let blocks = test_support::doc_examples(DOC_MD);
1146        assert!(!blocks.is_empty());
1147    }
1148}