runmat_runtime/builtins/strings/transform/
erasebetween.rs

1//! MATLAB-compatible `eraseBetween` builtin with GPU-aware semantics for RunMat.
2
3use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
7use crate::{
8    gather_if_needed, make_cell_with_shape, register_builtin_fusion_spec, register_builtin_gpu_spec,
9};
10use runmat_builtins::{CharArray, IntValue, StringArray, Value};
11use runmat_macros::runtime_builtin;
12
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15
16use crate::builtins::common::spec::{
17    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
18    ReductionNaN, ResidencyPolicy, ShapeRequirements,
19};
20
21#[cfg(feature = "doc_export")]
22pub const DOC_MD: &str = r#"---
23title: "eraseBetween"
24category: "strings/transform"
25keywords: ["eraseBetween", "delete substring", "boundaries", "inclusive", "exclusive", "string array"]
26summary: "Delete text that lies between boundary markers while matching MATLAB-compatible name-value options."
27references:
28  - https://www.mathworks.com/help/matlab/ref/string.erasebetween.html
29gpu_support:
30  elementwise: false
31  reduction: false
32  precisions: []
33  broadcasting: "matlab"
34  notes: "Executes on the CPU. RunMat gathers GPU-resident inputs before deleting text and returns host-side results; the builtin is registered as an Accelerate sink so fusion never keeps the data on device."
35fusion:
36  elementwise: false
37  reduction: false
38  max_inputs: 3
39  constants: "inline"
40requires_feature: null
41tested:
42  unit: "builtins::strings::transform::erasebetween::tests"
43  integration: "builtins::strings::transform::erasebetween::tests::eraseBetween_numeric_positions_array"
44---
45
46# What does the `eraseBetween` function do in MATLAB / RunMat?
47`eraseBetween(text, start, stop)` removes the portion of `text` that appears between two boundary
48markers. Boundaries can be substrings or numeric positions. The builtin mirrors MATLAB semantics,
49including support for string arrays, character arrays, cell arrays, implicit expansion, and the
50`'Boundaries'` name-value argument that toggles inclusive or exclusive behaviour.
51
52## How does the `eraseBetween` function behave in MATLAB / RunMat?
53- Accepts **string scalars**, **string arrays**, **character arrays** (row-by-row), and **cell
54  arrays** containing string scalars or character vectors; the output keeps the same container type.
55- Boundary arguments can be text or numeric positions. Both boundaries in a call must use the same
56  representation—mixing text and numeric markers raises a size/type error.
57- Text boundaries are **exclusive** by default: the markers are preserved while the enclosed text is
58  deleted. Numeric positions are **inclusive** by default: characters at `startPos` and `endPos` are
59  deleted together with the interior.
60- `'Boundaries','inclusive'` removes the markers themselves; `'Boundaries','exclusive'` keeps them.
61  The option is case-insensitive and must be supplied as name-value pairs.
62- Missing string scalars propagate (the MATLAB `missing` placeholder in either boundary or text
63  yields `<missing>` in the result). When a boundary cannot be located, `eraseBetween` returns the
64  original element unchanged.
65- Numeric positions are validated as positive integers, clamped to the string length, and interpreted
66  using MATLAB’s 1-based indexing rules.
67
68## `eraseBetween` Function GPU Execution Behaviour
69The builtin performs all work on the CPU. When any argument is GPU-resident, RunMat gathers the
70values first, applies the deletions on the host, and returns host-resident outputs. Providers do
71not need to expose device kernels, and fusion planning treats `eraseBetween` as a residency sink so
72surrounding expressions will gather automatically.
73
74## Examples of using the `eraseBetween` function in MATLAB / RunMat
75
76### Removing text between substrings
77```matlab
78txt = "The quick brown fox";
79result = eraseBetween(txt, "quick", " fox");
80```
81Expected output:
82```matlab
83result = "The quick fox"
84```
85
86### Deleting substrings across a string array
87```matlab
88str = ["The quick brown fox jumps"; "over the lazy dog"];
89starts = ["quick"; "the"];
90ends = [" fox"; " dog"];
91trimmed = eraseBetween(str, starts, ends);
92```
93Expected output:
94```matlab
95trimmed = 2×1 string
96    "The quick fox jumps"
97    "over the dog"
98```
99
100### Removing characters between numeric positions
101```matlab
102name = "Edgar Allen Poe";
103short = eraseBetween(name, 6, 11);
104```
105Expected output:
106```matlab
107short = "Edgar Poe"
108```
109
110### Using inclusive boundaries to drop the markers
111```matlab
112sentence = "The quick brown fox jumps over the lazy dog";
113collapsed = eraseBetween(sentence, " brown", "lazy", "Boundaries", "inclusive");
114```
115Expected output:
116```matlab
117collapsed = "The quick dog"
118```
119
120### Operating on character arrays while preserving padding
121```matlab
122chars = char("Server<GPU>", "Engine<CPU>");
123trimmed = eraseBetween(chars, "<", ">", "Boundaries", "inclusive");
124```
125Expected output:
126```matlab
127trimmed =
128
129  2×6 char array
130
131    "Server"
132    "Engine"
133```
134
135### Preserving element types in cell arrays
136```matlab
137C = {'alpha<1>', "beta<2>";
138     'gamma<3>', "delta<4>"};
139clean = eraseBetween(C, "<", ">", "Boundaries", "inclusive");
140```
141Expected output:
142```matlab
143clean =
144  2×2 cell array
145    {'alpha'}    {"beta"}
146    {'gamma'}    {"delta"}
147```
148
149### Handling missing strings safely
150```matlab
151texts = [missing, "Planner<GPU>"];
152result = eraseBetween(texts, "<", ">");
153```
154Expected output:
155```matlab
156result = 1×2 string
157    "<missing>"    "Planner"
158```
159
160## GPU residency in RunMat (Do I need `gpuArray`?)
161No. RunMat automatically gathers device-resident inputs, performs the deletion on the CPU, and
162returns host outputs. Manual `gpuArray` / `gather` calls are unnecessary; they are honoured only for
163compatibility with MATLAB when you explicitly need to control residency.
164
165## FAQ
166
167### Which argument types does `eraseBetween` accept?
168The first argument can be a string scalar, string array, character array, or cell array of character
169vectors / string scalars. Boundary arguments must both be text markers or both be numeric positions.
170
171### What happens if a boundary is not found?
172The original text is returned unchanged. Missing string scalars also propagate unchanged.
173
174### How does `'Boundaries','inclusive'` interact with text markers?
175Inclusive mode removes the matched start and end markers together with the enclosed text. Exclusive
176mode keeps the markers and removes only the interior.
177
178### Can I broadcast scalar boundaries across an array input?
179Yes. Scalar markers follow MATLAB implicit expansion rules. Character-array and cell-array markers
180must match the size of the text input.
181
182### Are GPU inputs supported?
183GPU values are gathered to host memory before processing. The builtin always returns host-resident
184outputs and is registered as an Accelerate sink, so fusion planning does not keep text on the GPU.
185
186### Does `eraseBetween` validate numeric positions?
187Yes. Positions are parsed as positive integers using MATLAB’s 1-based indexing. Stops are clamped to
188the string length, and start positions that lie beyond the text leave the element unchanged.
189
190## See Also
191[extractBetween](./extractbetween), [erase](./erase), [replace](./replace), [split](./split), [join](./join)
192
193## Source & Feedback
194- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/erasebetween.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/erasebetween.rs)
195- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
196"#;
197
198pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
199    name: "eraseBetween",
200    op_kind: GpuOpKind::Custom("string-transform"),
201    supported_precisions: &[],
202    broadcast: BroadcastSemantics::Matlab,
203    provider_hooks: &[],
204    constant_strategy: ConstantStrategy::InlineLiteral,
205    residency: ResidencyPolicy::GatherImmediately,
206    nan_mode: ReductionNaN::Include,
207    two_pass_threshold: None,
208    workgroup_size: None,
209    accepts_nan_mode: false,
210    notes: "Runs on the CPU; GPU-resident inputs are gathered before deletion and outputs remain on the host.",
211};
212
213register_builtin_gpu_spec!(GPU_SPEC);
214
215pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
216    name: "eraseBetween",
217    shape: ShapeRequirements::Any,
218    constant_strategy: ConstantStrategy::InlineLiteral,
219    elementwise: None,
220    reduction: None,
221    emits_nan: false,
222    notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
223};
224
225register_builtin_fusion_spec!(FUSION_SPEC);
226
227#[cfg(feature = "doc_export")]
228register_builtin_doc_text!("eraseBetween", DOC_MD);
229
230const FN_NAME: &str = "eraseBetween";
231const ARG_TYPE_ERROR: &str = "eraseBetween: first argument must be a string array, character array, or cell array of character vectors";
232const BOUNDARY_TYPE_ERROR: &str =
233    "eraseBetween: start and end arguments must both be text or both be numeric positions";
234const POSITION_TYPE_ERROR: &str = "eraseBetween: position arguments must be positive integers";
235const OPTION_PAIR_ERROR: &str = "eraseBetween: name-value arguments must appear in pairs";
236const OPTION_NAME_ERROR: &str = "eraseBetween: unrecognized parameter name";
237const OPTION_VALUE_ERROR: &str =
238    "eraseBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'";
239const CELL_ELEMENT_ERROR: &str =
240    "eraseBetween: cell array elements must be string scalars or character vectors";
241const SIZE_MISMATCH_ERROR: &str =
242    "eraseBetween: boundary sizes must be compatible with the text input";
243
244#[derive(Clone, Copy, Debug, PartialEq, Eq)]
245enum BoundariesMode {
246    Exclusive,
247    Inclusive,
248}
249
250#[runtime_builtin(
251    name = "eraseBetween",
252    category = "strings/transform",
253    summary = "Delete text between boundary markers with MATLAB-compatible semantics.",
254    keywords = "eraseBetween,delete,boundaries,strings",
255    accel = "sink"
256)]
257fn erase_between_builtin(
258    text: Value,
259    start: Value,
260    stop: Value,
261    rest: Vec<Value>,
262) -> Result<Value, String> {
263    let text = gather_if_needed(&text).map_err(|e| format!("{FN_NAME}: {e}"))?;
264    let start = gather_if_needed(&start).map_err(|e| format!("{FN_NAME}: {e}"))?;
265    let stop = gather_if_needed(&stop).map_err(|e| format!("{FN_NAME}: {e}"))?;
266
267    let mode_override = parse_boundaries_option(&rest)?;
268
269    let normalized_text = NormalizedText::from_value(text)?;
270    let start_boundary = BoundaryArg::from_value(start)?;
271    let stop_boundary = BoundaryArg::from_value(stop)?;
272
273    if start_boundary.kind() != stop_boundary.kind() {
274        return Err(BOUNDARY_TYPE_ERROR.to_string());
275    }
276    let boundary_kind = start_boundary.kind();
277    let effective_mode = mode_override.unwrap_or(match boundary_kind {
278        BoundaryKind::Text => BoundariesMode::Exclusive,
279        BoundaryKind::Position => BoundariesMode::Inclusive,
280    });
281
282    let start_shape = start_boundary.shape();
283    let stop_shape = stop_boundary.shape();
284    let text_shape = normalized_text.shape();
285
286    let shape_ts = broadcast_shapes(FN_NAME, text_shape, start_shape)?;
287    let output_shape = broadcast_shapes(FN_NAME, &shape_ts, stop_shape)?;
288    if !normalized_text.supports_shape(&output_shape) {
289        return Err(SIZE_MISMATCH_ERROR.to_string());
290    }
291
292    let total: usize = output_shape.iter().copied().product();
293    if total == 0 {
294        return normalized_text.into_value(Vec::new(), output_shape);
295    }
296
297    let text_strides = compute_strides(text_shape);
298    let start_strides = compute_strides(start_shape);
299    let stop_strides = compute_strides(stop_shape);
300
301    let mut results = Vec::with_capacity(total);
302
303    for idx in 0..total {
304        let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
305        let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
306        let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
307
308        let result = match boundary_kind {
309            BoundaryKind::Text => {
310                let text_value = normalized_text.data(text_idx);
311                let start_value = start_boundary.text(start_idx);
312                let stop_value = stop_boundary.text(stop_idx);
313                erase_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
314            }
315            BoundaryKind::Position => {
316                let text_value = normalized_text.data(text_idx);
317                let start_value = start_boundary.position(start_idx);
318                let stop_value = stop_boundary.position(stop_idx);
319                erase_with_positions(text_value, start_value, stop_value, effective_mode)
320            }
321        };
322        results.push(result);
323    }
324
325    normalized_text.into_value(results, output_shape)
326}
327
328fn parse_boundaries_option(args: &[Value]) -> Result<Option<BoundariesMode>, String> {
329    if args.is_empty() {
330        return Ok(None);
331    }
332    if !args.len().is_multiple_of(2) {
333        return Err(OPTION_PAIR_ERROR.to_string());
334    }
335
336    let mut mode: Option<BoundariesMode> = None;
337    let mut idx = 0;
338    while idx < args.len() {
339        let name_value = gather_if_needed(&args[idx]).map_err(|e| format!("{FN_NAME}: {e}"))?;
340        let name = value_to_string(&name_value).ok_or_else(|| OPTION_NAME_ERROR.to_string())?;
341        if !name.eq_ignore_ascii_case("boundaries") {
342            return Err(OPTION_NAME_ERROR.to_string());
343        }
344        let value = gather_if_needed(&args[idx + 1]).map_err(|e| format!("{FN_NAME}: {e}"))?;
345        let value_str = value_to_string(&value).ok_or_else(|| OPTION_VALUE_ERROR.to_string())?;
346        let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
347            BoundariesMode::Inclusive
348        } else if value_str.eq_ignore_ascii_case("exclusive") {
349            BoundariesMode::Exclusive
350        } else {
351            return Err(OPTION_VALUE_ERROR.to_string());
352        };
353        mode = Some(parsed_mode);
354        idx += 2;
355    }
356    Ok(mode)
357}
358
359fn value_to_string(value: &Value) -> Option<String> {
360    match value {
361        Value::String(s) => Some(s.clone()),
362        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
363        Value::CharArray(ca) if ca.rows <= 1 => {
364            if ca.rows == 0 {
365                Some(String::new())
366            } else {
367                Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
368            }
369        }
370        Value::CharArray(_) => None,
371        Value::Cell(cell) if cell.data.len() == 1 => {
372            let element = &cell.data[0];
373            value_to_string(element)
374        }
375        _ => None,
376    }
377}
378
379#[derive(Clone)]
380struct EraseResult {
381    text: String,
382}
383
384impl EraseResult {
385    fn missing() -> Self {
386        Self {
387            text: "<missing>".to_string(),
388        }
389    }
390
391    fn text(text: String) -> Self {
392        Self { text }
393    }
394}
395
396fn erase_with_text_boundaries(
397    text: &str,
398    start: &str,
399    stop: &str,
400    mode: BoundariesMode,
401) -> EraseResult {
402    if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
403        return EraseResult::missing();
404    }
405
406    if let Some(start_idx) = text.find(start) {
407        let search_start = start_idx + start.len();
408        if search_start > text.len() {
409            return EraseResult::text(text.to_string());
410        }
411        if let Some(relative_end) = text[search_start..].find(stop) {
412            let end_idx = search_start + relative_end;
413            match mode {
414                BoundariesMode::Inclusive => {
415                    let end_capture = min(text.len(), end_idx + stop.len());
416                    let mut result = String::with_capacity(text.len());
417                    result.push_str(&text[..start_idx]);
418                    result.push_str(&text[end_capture..]);
419                    EraseResult::text(result)
420                }
421                BoundariesMode::Exclusive => {
422                    let mut result = String::with_capacity(text.len());
423                    result.push_str(&text[..search_start]);
424                    result.push_str(&text[end_idx..]);
425                    EraseResult::text(result)
426                }
427            }
428        } else {
429            EraseResult::text(text.to_string())
430        }
431    } else {
432        EraseResult::text(text.to_string())
433    }
434}
435
436fn erase_with_positions(
437    text: &str,
438    start: usize,
439    stop: usize,
440    mode: BoundariesMode,
441) -> EraseResult {
442    if is_missing_string(text) {
443        return EraseResult::missing();
444    }
445    if text.is_empty() {
446        return EraseResult::text(String::new());
447    }
448    let chars: Vec<char> = text.chars().collect();
449    let len = chars.len();
450    if len == 0 {
451        return EraseResult::text(String::new());
452    }
453
454    if start == 0 || stop == 0 {
455        return EraseResult::text(text.to_string());
456    }
457
458    if start > len {
459        return EraseResult::text(text.to_string());
460    }
461    let stop_clamped = stop.min(len);
462
463    match mode {
464        BoundariesMode::Inclusive => {
465            if stop_clamped < start {
466                return EraseResult::text(text.to_string());
467            }
468            let start_idx = start - 1;
469            let end_idx = stop_clamped - 1;
470            if start_idx >= len || end_idx >= len || start_idx > end_idx {
471                EraseResult::text(text.to_string())
472            } else {
473                let mut result = String::with_capacity(len);
474                for (idx, ch) in chars.iter().enumerate() {
475                    if idx < start_idx || idx > end_idx {
476                        result.push(*ch);
477                    }
478                }
479                EraseResult::text(result)
480            }
481        }
482        BoundariesMode::Exclusive => {
483            if start + 1 >= stop_clamped {
484                return EraseResult::text(text.to_string());
485            }
486            let start_idx = start;
487            let end_idx = stop_clamped - 2;
488            if start_idx >= len || end_idx >= len || start_idx > end_idx {
489                EraseResult::text(text.to_string())
490            } else {
491                let mut result = String::with_capacity(len);
492                for (idx, ch) in chars.iter().enumerate() {
493                    if idx >= start_idx && idx <= end_idx {
494                        continue;
495                    }
496                    result.push(*ch);
497                }
498                EraseResult::text(result)
499            }
500        }
501    }
502}
503
504#[derive(Clone, Debug)]
505struct CellInfo {
506    shape: Vec<usize>,
507    element_kinds: Vec<CellElementKind>,
508}
509
510#[derive(Clone, Debug)]
511enum CellElementKind {
512    String,
513    Char,
514}
515
516#[derive(Clone, Debug)]
517enum TextKind {
518    StringScalar,
519    StringArray,
520    CharArray { rows: usize },
521    CellArray(CellInfo),
522}
523
524#[derive(Clone, Debug)]
525struct NormalizedText {
526    data: Vec<String>,
527    shape: Vec<usize>,
528    kind: TextKind,
529}
530
531impl NormalizedText {
532    fn from_value(value: Value) -> Result<Self, String> {
533        match value {
534            Value::String(s) => Ok(Self {
535                data: vec![s],
536                shape: vec![1, 1],
537                kind: TextKind::StringScalar,
538            }),
539            Value::StringArray(sa) => Ok(Self {
540                data: sa.data.clone(),
541                shape: sa.shape.clone(),
542                kind: TextKind::StringArray,
543            }),
544            Value::CharArray(ca) => {
545                let rows = ca.rows;
546                let mut data = Vec::with_capacity(rows);
547                for row in 0..rows {
548                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
549                }
550                Ok(Self {
551                    data,
552                    shape: vec![rows, 1],
553                    kind: TextKind::CharArray { rows },
554                })
555            }
556            Value::Cell(cell) => {
557                let shape = cell.shape.clone();
558                let mut data = Vec::with_capacity(cell.data.len());
559                let mut kinds = Vec::with_capacity(cell.data.len());
560                for element in &cell.data {
561                    match &**element {
562                        Value::String(s) => {
563                            data.push(s.clone());
564                            kinds.push(CellElementKind::String);
565                        }
566                        Value::StringArray(sa) if sa.data.len() == 1 => {
567                            data.push(sa.data[0].clone());
568                            kinds.push(CellElementKind::String);
569                        }
570                        Value::CharArray(ca) if ca.rows <= 1 => {
571                            if ca.rows == 0 {
572                                data.push(String::new());
573                            } else {
574                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
575                            }
576                            kinds.push(CellElementKind::Char);
577                        }
578                        Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
579                        _ => return Err(CELL_ELEMENT_ERROR.to_string()),
580                    }
581                }
582                Ok(Self {
583                    data,
584                    shape: shape.clone(),
585                    kind: TextKind::CellArray(CellInfo {
586                        shape,
587                        element_kinds: kinds,
588                    }),
589                })
590            }
591            _ => Err(ARG_TYPE_ERROR.to_string()),
592        }
593    }
594
595    fn shape(&self) -> &[usize] {
596        &self.shape
597    }
598
599    fn data(&self, idx: usize) -> &str {
600        &self.data[idx]
601    }
602
603    fn supports_shape(&self, output_shape: &[usize]) -> bool {
604        match &self.kind {
605            TextKind::StringScalar => true,
606            TextKind::StringArray => true,
607            TextKind::CharArray { .. } => output_shape == self.shape,
608            TextKind::CellArray(info) => output_shape == info.shape,
609        }
610    }
611
612    fn into_value(
613        self,
614        results: Vec<EraseResult>,
615        output_shape: Vec<usize>,
616    ) -> Result<Value, String> {
617        match self.kind {
618            TextKind::StringScalar => {
619                let total: usize = output_shape.iter().product();
620                if total == 0 {
621                    let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
622                    let array = StringArray::new(data, output_shape)
623                        .map_err(|e| format!("{FN_NAME}: {e}"))?;
624                    return Ok(Value::StringArray(array));
625                }
626
627                if results.len() <= 1 {
628                    let value = results
629                        .into_iter()
630                        .next()
631                        .unwrap_or_else(|| EraseResult::text(String::new()));
632                    Ok(Value::String(value.text))
633                } else {
634                    let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
635                    let array = StringArray::new(data, output_shape)
636                        .map_err(|e| format!("{FN_NAME}: {e}"))?;
637                    Ok(Value::StringArray(array))
638                }
639            }
640            TextKind::StringArray => {
641                let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
642                let array =
643                    StringArray::new(data, output_shape).map_err(|e| format!("{FN_NAME}: {e}"))?;
644                Ok(Value::StringArray(array))
645            }
646            TextKind::CharArray { rows } => {
647                if rows == 0 {
648                    return CharArray::new(Vec::new(), 0, 0)
649                        .map(Value::CharArray)
650                        .map_err(|e| format!("{FN_NAME}: {e}"));
651                }
652                if results.len() != rows {
653                    return Err(SIZE_MISMATCH_ERROR.to_string());
654                }
655                let mut max_width = 0usize;
656                let mut row_strings = Vec::with_capacity(rows);
657                for result in &results {
658                    let width = result.text.chars().count();
659                    max_width = max_width.max(width);
660                    row_strings.push(result.text.clone());
661                }
662                let mut flattened = Vec::with_capacity(rows * max_width);
663                for row in row_strings {
664                    let mut chars: Vec<char> = row.chars().collect();
665                    if chars.len() < max_width {
666                        chars.resize(max_width, ' ');
667                    }
668                    flattened.extend(chars);
669                }
670                CharArray::new(flattened, rows, max_width)
671                    .map(Value::CharArray)
672                    .map_err(|e| format!("{FN_NAME}: {e}"))
673            }
674            TextKind::CellArray(info) => {
675                if results.len() != info.element_kinds.len() {
676                    return Err(SIZE_MISMATCH_ERROR.to_string());
677                }
678                let mut values = Vec::with_capacity(results.len());
679                for (idx, result) in results.into_iter().enumerate() {
680                    match info.element_kinds[idx] {
681                        CellElementKind::String => values.push(Value::String(result.text)),
682                        CellElementKind::Char => {
683                            let ca = CharArray::new_row(&result.text);
684                            values.push(Value::CharArray(ca));
685                        }
686                    }
687                }
688                make_cell_with_shape(values, info.shape)
689            }
690        }
691    }
692}
693
694#[derive(Clone, Debug, PartialEq, Eq)]
695enum BoundaryKind {
696    Text,
697    Position,
698}
699
700#[derive(Clone, Debug)]
701enum BoundaryArg {
702    Text(BoundaryText),
703    Position(BoundaryPositions),
704}
705
706impl BoundaryArg {
707    fn from_value(value: Value) -> Result<Self, String> {
708        match value {
709            Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
710                BoundaryText::from_value(value).map(BoundaryArg::Text)
711            }
712            Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
713                BoundaryPositions::from_value(value).map(BoundaryArg::Position)
714            }
715            other => Err(format!(
716                "{BOUNDARY_TYPE_ERROR}: unsupported argument {other:?}"
717            )),
718        }
719    }
720
721    fn kind(&self) -> BoundaryKind {
722        match self {
723            BoundaryArg::Text(_) => BoundaryKind::Text,
724            BoundaryArg::Position(_) => BoundaryKind::Position,
725        }
726    }
727
728    fn shape(&self) -> &[usize] {
729        match self {
730            BoundaryArg::Text(text) => &text.shape,
731            BoundaryArg::Position(pos) => &pos.shape,
732        }
733    }
734
735    fn text(&self, idx: usize) -> &str {
736        match self {
737            BoundaryArg::Text(text) => &text.data[idx],
738            BoundaryArg::Position(_) => unreachable!(),
739        }
740    }
741
742    fn position(&self, idx: usize) -> usize {
743        match self {
744            BoundaryArg::Position(pos) => pos.data[idx],
745            BoundaryArg::Text(_) => unreachable!(),
746        }
747    }
748}
749
750#[derive(Clone, Debug)]
751struct BoundaryText {
752    data: Vec<String>,
753    shape: Vec<usize>,
754}
755
756impl BoundaryText {
757    fn from_value(value: Value) -> Result<Self, String> {
758        match value {
759            Value::String(s) => Ok(Self {
760                data: vec![s],
761                shape: vec![1, 1],
762            }),
763            Value::StringArray(sa) => Ok(Self {
764                data: sa.data.clone(),
765                shape: sa.shape.clone(),
766            }),
767            Value::CharArray(ca) => {
768                let mut data = Vec::with_capacity(ca.rows);
769                for row in 0..ca.rows {
770                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
771                }
772                Ok(Self {
773                    data,
774                    shape: vec![ca.rows, 1],
775                })
776            }
777            Value::Cell(cell) => {
778                let shape = cell.shape.clone();
779                let mut data = Vec::with_capacity(cell.data.len());
780                for element in &cell.data {
781                    match &**element {
782                        Value::String(s) => data.push(s.clone()),
783                        Value::StringArray(sa) if sa.data.len() == 1 => {
784                            data.push(sa.data[0].clone());
785                        }
786                        Value::CharArray(ca) if ca.rows <= 1 => {
787                            if ca.rows == 0 {
788                                data.push(String::new());
789                            } else {
790                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
791                            }
792                        }
793                        Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
794                        _ => return Err(CELL_ELEMENT_ERROR.to_string()),
795                    }
796                }
797                Ok(Self { data, shape })
798            }
799            _ => Err(BOUNDARY_TYPE_ERROR.to_string()),
800        }
801    }
802}
803
804#[derive(Clone, Debug)]
805struct BoundaryPositions {
806    data: Vec<usize>,
807    shape: Vec<usize>,
808}
809
810impl BoundaryPositions {
811    fn from_value(value: Value) -> Result<Self, String> {
812        match value {
813            Value::Num(n) => Ok(Self {
814                data: vec![parse_position(n)?],
815                shape: vec![1, 1],
816            }),
817            Value::Int(i) => Ok(Self {
818                data: vec![parse_position_int(i)?],
819                shape: vec![1, 1],
820            }),
821            Value::Tensor(t) => {
822                let mut data = Vec::with_capacity(t.data.len());
823                for &entry in &t.data {
824                    data.push(parse_position(entry)?);
825                }
826                Ok(Self {
827                    data,
828                    shape: if t.shape.is_empty() {
829                        vec![t.rows, t.cols.max(1)]
830                    } else {
831                        t.shape
832                    },
833                })
834            }
835            _ => Err(BOUNDARY_TYPE_ERROR.to_string()),
836        }
837    }
838}
839
840fn parse_position(value: f64) -> Result<usize, String> {
841    if !value.is_finite() || value < 1.0 {
842        return Err(POSITION_TYPE_ERROR.to_string());
843    }
844    if (value.fract()).abs() > f64::EPSILON {
845        return Err(POSITION_TYPE_ERROR.to_string());
846    }
847    if value > (usize::MAX as f64) {
848        return Err(POSITION_TYPE_ERROR.to_string());
849    }
850    Ok(value as usize)
851}
852
853fn parse_position_int(value: IntValue) -> Result<usize, String> {
854    let val = value.to_i64();
855    if val <= 0 {
856        return Err(POSITION_TYPE_ERROR.to_string());
857    }
858    Ok(val as usize)
859}
860
861#[cfg(test)]
862mod tests {
863    #![allow(non_snake_case)]
864
865    use super::*;
866    #[cfg(feature = "doc_export")]
867    use crate::builtins::common::test_support;
868    use runmat_builtins::{CellArray, CharArray, StringArray, Tensor};
869
870    #[test]
871    fn eraseBetween_text_default_exclusive() {
872        let result = erase_between_builtin(
873            Value::String("The quick brown fox".into()),
874            Value::String("quick".into()),
875            Value::String(" fox".into()),
876            Vec::new(),
877        )
878        .expect("eraseBetween");
879        assert_eq!(result, Value::String("The quick fox".into()));
880    }
881
882    #[test]
883    fn eraseBetween_text_inclusive_option() {
884        let result = erase_between_builtin(
885            Value::String("The quick brown fox jumps over the lazy dog".into()),
886            Value::String(" brown".into()),
887            Value::String("lazy".into()),
888            vec![
889                Value::String("Boundaries".into()),
890                Value::String("inclusive".into()),
891            ],
892        )
893        .expect("eraseBetween");
894        assert_eq!(result, Value::String("The quick dog".into()));
895    }
896
897    #[test]
898    fn eraseBetween_numeric_positions_default_inclusive() {
899        let result = erase_between_builtin(
900            Value::String("Edgar Allen Poe".into()),
901            Value::Num(6.0),
902            Value::Num(11.0),
903            Vec::new(),
904        )
905        .expect("eraseBetween");
906        assert_eq!(result, Value::String("Edgar Poe".into()));
907    }
908
909    #[test]
910    fn eraseBetween_numeric_positions_int_inputs() {
911        let result = erase_between_builtin(
912            Value::String("abcdef".into()),
913            Value::Int(IntValue::I32(2)),
914            Value::Int(IntValue::I32(5)),
915            Vec::new(),
916        )
917        .expect("eraseBetween");
918        assert_eq!(result, Value::String("af".into()));
919    }
920
921    #[test]
922    fn eraseBetween_numeric_positions_exclusive_option() {
923        let result = erase_between_builtin(
924            Value::String("small|medium|large".into()),
925            Value::Num(6.0),
926            Value::Num(13.0),
927            vec![
928                Value::String("Boundaries".into()),
929                Value::String("exclusive".into()),
930            ],
931        )
932        .expect("eraseBetween");
933        assert_eq!(result, Value::String("small||large".into()));
934    }
935
936    #[test]
937    fn eraseBetween_start_not_found_returns_original() {
938        let result = erase_between_builtin(
939            Value::String("RunMat Accelerate".into()),
940            Value::String("<".into()),
941            Value::String(">".into()),
942            Vec::new(),
943        )
944        .expect("eraseBetween");
945        assert_eq!(result, Value::String("RunMat Accelerate".into()));
946    }
947
948    #[test]
949    fn eraseBetween_stop_not_found_returns_original() {
950        let result = erase_between_builtin(
951            Value::String("Device<GPU>".into()),
952            Value::String("<".into()),
953            Value::String(")".into()),
954            Vec::new(),
955        )
956        .expect("eraseBetween");
957        assert_eq!(result, Value::String("Device<GPU>".into()));
958    }
959
960    #[test]
961    fn eraseBetween_missing_string_propagates() {
962        let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
963        let result = erase_between_builtin(
964            Value::StringArray(strings),
965            Value::String("<".into()),
966            Value::String(">".into()),
967            Vec::new(),
968        )
969        .expect("eraseBetween");
970        assert_eq!(
971            result,
972            Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
973        );
974    }
975
976    #[test]
977    fn eraseBetween_zero_sized_broadcast_produces_empty_array() {
978        let start = StringArray::new(Vec::new(), vec![0, 1]).unwrap();
979        let stop = StringArray::new(Vec::new(), vec![0, 1]).unwrap();
980        let result = erase_between_builtin(
981            Value::String("abc".into()),
982            Value::StringArray(start),
983            Value::StringArray(stop),
984            Vec::new(),
985        )
986        .expect("eraseBetween");
987        match result {
988            Value::StringArray(sa) => {
989                assert_eq!(sa.data.len(), 0);
990                assert_eq!(sa.shape, vec![0, 1]);
991            }
992            other => panic!("expected string array, got {other:?}"),
993        }
994    }
995
996    #[test]
997    fn eraseBetween_numeric_positions_array() {
998        let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
999        let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1000        let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
1001        let result = erase_between_builtin(
1002            Value::StringArray(text),
1003            Value::Tensor(start),
1004            Value::Tensor(stop),
1005            Vec::new(),
1006        )
1007        .expect("eraseBetween");
1008        match result {
1009            Value::StringArray(sa) => {
1010                assert_eq!(sa.data, vec!["d".to_string(), "w".to_string()]);
1011                assert_eq!(sa.shape, vec![2, 1]);
1012            }
1013            other => panic!("expected string array, got {other:?}"),
1014        }
1015    }
1016
1017    #[test]
1018    fn eraseBetween_cell_array_preserves_types() {
1019        let cell = CellArray::new(
1020            vec![
1021                Value::CharArray(CharArray::new_row("A[B]C")),
1022                Value::String("Planner<GPU>".into()),
1023            ],
1024            1,
1025            2,
1026        )
1027        .unwrap();
1028        let start = CellArray::new(
1029            vec![Value::String("[".into()), Value::String("<".into())],
1030            1,
1031            2,
1032        )
1033        .unwrap();
1034        let stop = CellArray::new(
1035            vec![Value::String("]".into()), Value::String(">".into())],
1036            1,
1037            2,
1038        )
1039        .unwrap();
1040        let result = erase_between_builtin(
1041            Value::Cell(cell),
1042            Value::Cell(start),
1043            Value::Cell(stop),
1044            vec![
1045                Value::String("Boundaries".into()),
1046                Value::String("inclusive".into()),
1047            ],
1048        )
1049        .expect("eraseBetween");
1050        match result {
1051            Value::Cell(out) => {
1052                let first = out.get(0, 0).unwrap();
1053                let second = out.get(0, 1).unwrap();
1054                assert_eq!(first, Value::CharArray(CharArray::new_row("AC")));
1055                assert_eq!(second, Value::String("Planner".into()));
1056            }
1057            other => panic!("expected cell array, got {other:?}"),
1058        }
1059    }
1060
1061    #[test]
1062    fn eraseBetween_char_array_default_and_inclusive() {
1063        let chars =
1064            CharArray::new("Device<GPU>".chars().collect(), 1, "Device<GPU>".len()).unwrap();
1065        let default = erase_between_builtin(
1066            Value::CharArray(chars.clone()),
1067            Value::String("<".into()),
1068            Value::String(">".into()),
1069            Vec::new(),
1070        )
1071        .expect("eraseBetween");
1072        match default {
1073            Value::CharArray(out) => {
1074                let text: String = out.data.iter().collect();
1075                assert_eq!(text.trim_end(), "Device<>");
1076            }
1077            other => panic!("expected char array, got {other:?}"),
1078        }
1079
1080        let inclusive = erase_between_builtin(
1081            Value::CharArray(chars),
1082            Value::String("<".into()),
1083            Value::String(">".into()),
1084            vec![
1085                Value::String("Boundaries".into()),
1086                Value::String("inclusive".into()),
1087            ],
1088        )
1089        .expect("eraseBetween");
1090        match inclusive {
1091            Value::CharArray(out) => {
1092                let text: String = out.data.iter().collect();
1093                assert_eq!(text.trim_end(), "Device");
1094            }
1095            other => panic!("expected char array, got {other:?}"),
1096        }
1097    }
1098
1099    #[test]
1100    fn eraseBetween_option_with_char_arrays_case_insensitive() {
1101        let result = erase_between_builtin(
1102            Value::String("A<mid>B".into()),
1103            Value::String("<".into()),
1104            Value::String(">".into()),
1105            vec![
1106                Value::CharArray(CharArray::new_row("Boundaries")),
1107                Value::CharArray(CharArray::new_row("INCLUSIVE")),
1108            ],
1109        )
1110        .expect("eraseBetween");
1111        assert_eq!(result, Value::String("AB".into()));
1112    }
1113
1114    #[test]
1115    fn eraseBetween_text_scalar_broadcast() {
1116        let text =
1117            StringArray::new(vec!["alpha[GPU]".into(), "beta[GPU]".into()], vec![2, 1]).unwrap();
1118        let result = erase_between_builtin(
1119            Value::StringArray(text),
1120            Value::String("[".into()),
1121            Value::String("]".into()),
1122            Vec::new(),
1123        )
1124        .expect("eraseBetween");
1125        match result {
1126            Value::StringArray(sa) => {
1127                assert_eq!(sa.data, vec!["alpha[]".to_string(), "beta[]".to_string()]);
1128            }
1129            other => panic!("expected string array, got {other:?}"),
1130        }
1131    }
1132
1133    #[test]
1134    fn eraseBetween_option_invalid_value() {
1135        let err = erase_between_builtin(
1136            Value::String("abc".into()),
1137            Value::String("a".into()),
1138            Value::String("c".into()),
1139            vec![
1140                Value::String("Boundaries".into()),
1141                Value::String("middle".into()),
1142            ],
1143        )
1144        .unwrap_err();
1145        assert_eq!(err, OPTION_VALUE_ERROR);
1146    }
1147
1148    #[test]
1149    fn eraseBetween_option_name_error() {
1150        let err = erase_between_builtin(
1151            Value::String("abc".into()),
1152            Value::String("a".into()),
1153            Value::String("c".into()),
1154            vec![
1155                Value::String("Padding".into()),
1156                Value::String("inclusive".into()),
1157            ],
1158        )
1159        .unwrap_err();
1160        assert_eq!(err, OPTION_NAME_ERROR);
1161    }
1162
1163    #[test]
1164    fn eraseBetween_option_pair_error() {
1165        let err = erase_between_builtin(
1166            Value::String("abc".into()),
1167            Value::String("a".into()),
1168            Value::String("b".into()),
1169            vec![Value::String("Boundaries".into())],
1170        )
1171        .unwrap_err();
1172        assert_eq!(err, OPTION_PAIR_ERROR);
1173    }
1174
1175    #[test]
1176    fn eraseBetween_position_type_error() {
1177        let err = erase_between_builtin(
1178            Value::String("abc".into()),
1179            Value::Num(0.5),
1180            Value::Num(2.0),
1181            Vec::new(),
1182        )
1183        .unwrap_err();
1184        assert_eq!(err, POSITION_TYPE_ERROR);
1185    }
1186
1187    #[test]
1188    fn eraseBetween_mixed_boundary_error() {
1189        let err = erase_between_builtin(
1190            Value::String("abc".into()),
1191            Value::String("a".into()),
1192            Value::Num(3.0),
1193            Vec::new(),
1194        )
1195        .unwrap_err();
1196        assert_eq!(err, BOUNDARY_TYPE_ERROR);
1197    }
1198
1199    #[test]
1200    #[cfg(feature = "doc_export")]
1201    fn eraseBetween_doc_examples_present() {
1202        let blocks = test_support::doc_examples(DOC_MD);
1203        assert!(!blocks.is_empty());
1204    }
1205}