Skip to main content

runmat_runtime/builtins/strings/transform/
extractbetween.rs

1//! MATLAB-compatible `extractBetween` builtin with GPU-aware semantics for RunMat.
2
3use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::common::map_control_flow_with_builtin;
7use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
8use crate::builtins::strings::type_resolvers::text_preserve_type;
9use crate::{
10    build_runtime_error, gather_if_needed_async, make_cell_with_shape, BuiltinResult, RuntimeError,
11};
12use runmat_builtins::{CharArray, IntValue, StringArray, Value};
13use runmat_macros::runtime_builtin;
14
15use crate::builtins::common::spec::{
16    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
17    ReductionNaN, ResidencyPolicy, ShapeRequirements,
18};
19
20#[runmat_macros::register_gpu_spec(
21    builtin_path = "crate::builtins::strings::transform::extractbetween"
22)]
23pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
24    name: "extractBetween",
25    op_kind: GpuOpKind::Custom("string-transform"),
26    supported_precisions: &[],
27    broadcast: BroadcastSemantics::Matlab,
28    provider_hooks: &[],
29    constant_strategy: ConstantStrategy::InlineLiteral,
30    residency: ResidencyPolicy::GatherImmediately,
31    nan_mode: ReductionNaN::Include,
32    two_pass_threshold: None,
33    workgroup_size: None,
34    accepts_nan_mode: false,
35    notes: "Runs on the CPU; GPU-resident inputs are gathered before extraction and outputs are returned on the host.",
36};
37
38#[runmat_macros::register_fusion_spec(
39    builtin_path = "crate::builtins::strings::transform::extractbetween"
40)]
41pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
42    name: "extractBetween",
43    shape: ShapeRequirements::Any,
44    constant_strategy: ConstantStrategy::InlineLiteral,
45    elementwise: None,
46    reduction: None,
47    emits_nan: false,
48    notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
49};
50
51const FN_NAME: &str = "extractBetween";
52const ARG_TYPE_ERROR: &str = "extractBetween: first argument must be a string array, character array, or cell array of character vectors";
53const BOUNDARY_TYPE_ERROR: &str =
54    "extractBetween: start and end arguments must both be text or both be numeric positions";
55const POSITION_TYPE_ERROR: &str = "extractBetween: position arguments must be positive integers";
56const OPTION_PAIR_ERROR: &str = "extractBetween: name-value arguments must appear in pairs";
57const OPTION_NAME_ERROR: &str = "extractBetween: unrecognized parameter name";
58const OPTION_VALUE_ERROR: &str =
59    "extractBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'";
60const CELL_ELEMENT_ERROR: &str =
61    "extractBetween: cell array elements must be string scalars or character vectors";
62const SIZE_MISMATCH_ERROR: &str =
63    "extractBetween: boundary sizes must be compatible with the text input";
64
65fn runtime_error_for(message: impl Into<String>) -> RuntimeError {
66    build_runtime_error(message).with_builtin(FN_NAME).build()
67}
68
69fn map_flow(err: RuntimeError) -> RuntimeError {
70    map_control_flow_with_builtin(err, FN_NAME)
71}
72
73#[derive(Clone, Copy, Debug, PartialEq, Eq)]
74enum BoundariesMode {
75    Exclusive,
76    Inclusive,
77}
78
79#[runtime_builtin(
80    name = "extractBetween",
81    category = "strings/transform",
82    summary = "Extract substrings between boundary markers using MATLAB-compatible semantics.",
83    keywords = "extractBetween,substring,boundaries,strings",
84    accel = "sink",
85    type_resolver(text_preserve_type),
86    builtin_path = "crate::builtins::strings::transform::extractbetween"
87)]
88async fn extract_between_builtin(
89    text: Value,
90    start: Value,
91    stop: Value,
92    rest: Vec<Value>,
93) -> BuiltinResult<Value> {
94    let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
95    let start = gather_if_needed_async(&start).await.map_err(map_flow)?;
96    let stop = gather_if_needed_async(&stop).await.map_err(map_flow)?;
97
98    let mode_override = parse_boundaries_option(&rest).await?;
99
100    let normalized_text = NormalizedText::from_value(text)?;
101    let start_boundary = BoundaryArg::from_value(start)?;
102    let stop_boundary = BoundaryArg::from_value(stop)?;
103
104    if start_boundary.kind() != stop_boundary.kind() {
105        return Err(runtime_error_for(BOUNDARY_TYPE_ERROR));
106    }
107    let boundary_kind = start_boundary.kind();
108    let effective_mode = mode_override.unwrap_or(match boundary_kind {
109        BoundaryKind::Text => BoundariesMode::Exclusive,
110        BoundaryKind::Position => BoundariesMode::Inclusive,
111    });
112
113    let start_shape = start_boundary.shape();
114    let stop_shape = stop_boundary.shape();
115    let text_shape = normalized_text.shape();
116
117    let shape_ts = broadcast_shapes(FN_NAME, text_shape, start_shape).map_err(runtime_error_for)?;
118    let output_shape =
119        broadcast_shapes(FN_NAME, &shape_ts, stop_shape).map_err(runtime_error_for)?;
120    if !normalized_text.supports_shape(&output_shape) {
121        return Err(runtime_error_for(SIZE_MISMATCH_ERROR));
122    }
123
124    let total: usize = output_shape.iter().copied().product();
125    if total == 0 {
126        return normalized_text.into_value(Vec::new(), output_shape);
127    }
128
129    let text_strides = compute_strides(text_shape);
130    let start_strides = compute_strides(start_shape);
131    let stop_strides = compute_strides(stop_shape);
132
133    let mut results = Vec::with_capacity(total);
134
135    for idx in 0..total {
136        let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
137        let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
138        let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
139
140        let result = match boundary_kind {
141            BoundaryKind::Text => {
142                let text_value = normalized_text.data(text_idx);
143                let start_value = start_boundary.text(start_idx);
144                let stop_value = stop_boundary.text(stop_idx);
145                extract_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
146            }
147            BoundaryKind::Position => {
148                let text_value = normalized_text.data(text_idx);
149                let start_value = start_boundary.position(start_idx);
150                let stop_value = stop_boundary.position(stop_idx);
151                extract_with_positions(text_value, start_value, stop_value, effective_mode)
152            }
153        };
154        results.push(result);
155    }
156
157    normalized_text.into_value(results, output_shape)
158}
159
160async fn parse_boundaries_option(args: &[Value]) -> BuiltinResult<Option<BoundariesMode>> {
161    if args.is_empty() {
162        return Ok(None);
163    }
164    if !args.len().is_multiple_of(2) {
165        return Err(runtime_error_for(OPTION_PAIR_ERROR));
166    }
167
168    let mut mode: Option<BoundariesMode> = None;
169    let mut idx = 0;
170    while idx < args.len() {
171        let name_value = gather_if_needed_async(&args[idx]).await.map_err(map_flow)?;
172        let name =
173            value_to_string(&name_value).ok_or_else(|| runtime_error_for(OPTION_NAME_ERROR))?;
174        if !name.eq_ignore_ascii_case("boundaries") {
175            return Err(runtime_error_for(OPTION_NAME_ERROR));
176        }
177        let value = gather_if_needed_async(&args[idx + 1])
178            .await
179            .map_err(map_flow)?;
180        let value_str =
181            value_to_string(&value).ok_or_else(|| runtime_error_for(OPTION_VALUE_ERROR))?;
182        let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
183            BoundariesMode::Inclusive
184        } else if value_str.eq_ignore_ascii_case("exclusive") {
185            BoundariesMode::Exclusive
186        } else {
187            return Err(runtime_error_for(OPTION_VALUE_ERROR));
188        };
189        mode = Some(parsed_mode);
190        idx += 2;
191    }
192    Ok(mode)
193}
194
195fn value_to_string(value: &Value) -> Option<String> {
196    match value {
197        Value::String(s) => Some(s.clone()),
198        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
199        Value::CharArray(ca) if ca.rows <= 1 => {
200            if ca.rows == 0 {
201                Some(String::new())
202            } else {
203                Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
204            }
205        }
206        Value::CharArray(_) => None,
207        Value::Cell(cell) if cell.data.len() == 1 => {
208            let element = &cell.data[0];
209            value_to_string(element)
210        }
211        _ => None,
212    }
213}
214
215#[derive(Clone)]
216struct ExtractResult {
217    text: String,
218}
219
220impl ExtractResult {
221    fn missing() -> Self {
222        Self {
223            text: "<missing>".to_string(),
224        }
225    }
226
227    fn text(text: String) -> Self {
228        Self { text }
229    }
230}
231
232fn extract_with_text_boundaries(
233    text: &str,
234    start: &str,
235    stop: &str,
236    mode: BoundariesMode,
237) -> ExtractResult {
238    if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
239        return ExtractResult::missing();
240    }
241
242    if let Some(start_idx) = text.find(start) {
243        let search_start = start_idx + start.len();
244        if search_start > text.len() {
245            return ExtractResult::text(String::new());
246        }
247        if let Some(relative_end) = text[search_start..].find(stop) {
248            let end_idx = search_start + relative_end;
249            match mode {
250                BoundariesMode::Inclusive => {
251                    let end_capture = min(text.len(), end_idx + stop.len());
252                    let slice = &text[start_idx..end_capture];
253                    ExtractResult::text(slice.to_string())
254                }
255                BoundariesMode::Exclusive => {
256                    if end_idx < search_start {
257                        ExtractResult::text(String::new())
258                    } else {
259                        let slice = &text[search_start..end_idx];
260                        ExtractResult::text(slice.to_string())
261                    }
262                }
263            }
264        } else {
265            ExtractResult::text(String::new())
266        }
267    } else {
268        ExtractResult::text(String::new())
269    }
270}
271
272fn extract_with_positions(
273    text: &str,
274    start: usize,
275    stop: usize,
276    mode: BoundariesMode,
277) -> ExtractResult {
278    if is_missing_string(text) {
279        return ExtractResult::missing();
280    }
281    if text.is_empty() {
282        return ExtractResult::text(String::new());
283    }
284    let chars: Vec<char> = text.chars().collect();
285    let len = chars.len();
286    if len == 0 {
287        return ExtractResult::text(String::new());
288    }
289
290    if start == 0 || stop == 0 {
291        return ExtractResult::text(String::new());
292    }
293
294    if start > len {
295        return ExtractResult::text(String::new());
296    }
297    let stop_clamped = stop.min(len);
298    if stop_clamped == 0 {
299        return ExtractResult::text(String::new());
300    }
301
302    match mode {
303        BoundariesMode::Inclusive => {
304            if start > stop_clamped {
305                return ExtractResult::text(String::new());
306            }
307            let start_idx = start - 1;
308            let end_idx = stop_clamped - 1;
309            if start_idx >= len || end_idx >= len || start_idx > end_idx {
310                ExtractResult::text(String::new())
311            } else {
312                let slice: String = chars[start_idx..=end_idx].iter().collect();
313                ExtractResult::text(slice)
314            }
315        }
316        BoundariesMode::Exclusive => {
317            if start + 1 >= stop_clamped {
318                return ExtractResult::text(String::new());
319            }
320            let start_idx = start;
321            let end_idx = stop_clamped - 2;
322            if start_idx >= len || end_idx >= len || start_idx > end_idx {
323                ExtractResult::text(String::new())
324            } else {
325                let slice: String = chars[start_idx..=end_idx].iter().collect();
326                ExtractResult::text(slice)
327            }
328        }
329    }
330}
331
332#[derive(Clone, Debug)]
333struct CellInfo {
334    shape: Vec<usize>,
335    element_kinds: Vec<CellElementKind>,
336}
337
338#[derive(Clone, Debug)]
339enum CellElementKind {
340    String,
341    Char,
342}
343
344#[derive(Clone, Debug)]
345enum TextKind {
346    StringScalar,
347    StringArray,
348    CharArray { rows: usize },
349    CellArray(CellInfo),
350}
351
352#[derive(Clone, Debug)]
353struct NormalizedText {
354    data: Vec<String>,
355    shape: Vec<usize>,
356    kind: TextKind,
357}
358
359impl NormalizedText {
360    fn from_value(value: Value) -> BuiltinResult<Self> {
361        match value {
362            Value::String(s) => Ok(Self {
363                data: vec![s],
364                shape: vec![1, 1],
365                kind: TextKind::StringScalar,
366            }),
367            Value::StringArray(sa) => Ok(Self {
368                data: sa.data.clone(),
369                shape: sa.shape.clone(),
370                kind: TextKind::StringArray,
371            }),
372            Value::CharArray(ca) => {
373                let rows = ca.rows;
374                let mut data = Vec::with_capacity(rows);
375                for row in 0..rows {
376                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
377                }
378                Ok(Self {
379                    data,
380                    shape: vec![rows, 1],
381                    kind: TextKind::CharArray { rows },
382                })
383            }
384            Value::Cell(cell) => {
385                let shape = cell.shape.clone();
386                let mut data = Vec::with_capacity(cell.data.len());
387                let mut kinds = Vec::with_capacity(cell.data.len());
388                for element in &cell.data {
389                    match &**element {
390                        Value::String(s) => {
391                            data.push(s.clone());
392                            kinds.push(CellElementKind::String);
393                        }
394                        Value::StringArray(sa) if sa.data.len() == 1 => {
395                            data.push(sa.data[0].clone());
396                            kinds.push(CellElementKind::String);
397                        }
398                        Value::CharArray(ca) if ca.rows <= 1 => {
399                            if ca.rows == 0 {
400                                data.push(String::new());
401                            } else {
402                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
403                            }
404                            kinds.push(CellElementKind::Char);
405                        }
406                        Value::CharArray(_) => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
407                        _ => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
408                    }
409                }
410                Ok(Self {
411                    data,
412                    shape: shape.clone(),
413                    kind: TextKind::CellArray(CellInfo {
414                        shape,
415                        element_kinds: kinds,
416                    }),
417                })
418            }
419            _ => Err(runtime_error_for(ARG_TYPE_ERROR)),
420        }
421    }
422
423    fn shape(&self) -> &[usize] {
424        &self.shape
425    }
426
427    fn data(&self, idx: usize) -> &str {
428        &self.data[idx]
429    }
430
431    fn supports_shape(&self, output_shape: &[usize]) -> bool {
432        match &self.kind {
433            TextKind::StringScalar => true,
434            TextKind::StringArray => true,
435            TextKind::CharArray { .. } => output_shape == self.shape,
436            TextKind::CellArray(info) => output_shape == info.shape,
437        }
438    }
439
440    fn into_value(
441        self,
442        results: Vec<ExtractResult>,
443        output_shape: Vec<usize>,
444    ) -> BuiltinResult<Value> {
445        match self.kind {
446            TextKind::StringScalar => {
447                if results.len() <= 1 {
448                    let value = results
449                        .into_iter()
450                        .next()
451                        .unwrap_or_else(|| ExtractResult::text(String::new()));
452                    Ok(Value::String(value.text))
453                } else {
454                    let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
455                    let array = StringArray::new(data, output_shape)
456                        .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))?;
457                    Ok(Value::StringArray(array))
458                }
459            }
460            TextKind::StringArray => {
461                let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
462                let array = StringArray::new(data, output_shape)
463                    .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))?;
464                Ok(Value::StringArray(array))
465            }
466            TextKind::CharArray { rows } => {
467                if rows == 0 {
468                    return CharArray::new(Vec::new(), 0, 0)
469                        .map(Value::CharArray)
470                        .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")));
471                }
472                if results.len() != rows {
473                    return Err(runtime_error_for(SIZE_MISMATCH_ERROR));
474                }
475                let mut max_width = 0usize;
476                let mut row_strings = Vec::with_capacity(rows);
477                for result in &results {
478                    let width = result.text.chars().count();
479                    max_width = max_width.max(width);
480                    row_strings.push(result.text.clone());
481                }
482                let mut flattened = Vec::with_capacity(rows * max_width);
483                for row in row_strings {
484                    let mut chars: Vec<char> = row.chars().collect();
485                    if chars.len() < max_width {
486                        chars.resize(max_width, ' ');
487                    }
488                    flattened.extend(chars);
489                }
490                CharArray::new(flattened, rows, max_width)
491                    .map(Value::CharArray)
492                    .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))
493            }
494            TextKind::CellArray(info) => {
495                if results.len() != info.element_kinds.len() {
496                    return Err(runtime_error_for(SIZE_MISMATCH_ERROR));
497                }
498                let mut values = Vec::with_capacity(results.len());
499                for (idx, result) in results.into_iter().enumerate() {
500                    match info.element_kinds[idx] {
501                        CellElementKind::String => values.push(Value::String(result.text)),
502                        CellElementKind::Char => {
503                            let ca = CharArray::new_row(&result.text);
504                            values.push(Value::CharArray(ca));
505                        }
506                    }
507                }
508                make_cell_with_shape(values, info.shape)
509                    .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))
510            }
511        }
512    }
513}
514
515#[derive(Clone, Debug, PartialEq, Eq)]
516enum BoundaryKind {
517    Text,
518    Position,
519}
520
521#[derive(Clone, Debug)]
522enum BoundaryArg {
523    Text(BoundaryText),
524    Position(BoundaryPositions),
525}
526
527impl BoundaryArg {
528    fn from_value(value: Value) -> BuiltinResult<Self> {
529        match value {
530            Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
531                BoundaryText::from_value(value).map(BoundaryArg::Text)
532            }
533            Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
534                BoundaryPositions::from_value(value).map(BoundaryArg::Position)
535            }
536            other => Err(runtime_error_for(format!(
537                "{BOUNDARY_TYPE_ERROR}: unsupported argument {other:?}"
538            ))),
539        }
540    }
541
542    fn kind(&self) -> BoundaryKind {
543        match self {
544            BoundaryArg::Text(_) => BoundaryKind::Text,
545            BoundaryArg::Position(_) => BoundaryKind::Position,
546        }
547    }
548
549    fn shape(&self) -> &[usize] {
550        match self {
551            BoundaryArg::Text(text) => &text.shape,
552            BoundaryArg::Position(pos) => &pos.shape,
553        }
554    }
555
556    fn text(&self, idx: usize) -> &str {
557        match self {
558            BoundaryArg::Text(text) => &text.data[idx],
559            BoundaryArg::Position(_) => unreachable!(),
560        }
561    }
562
563    fn position(&self, idx: usize) -> usize {
564        match self {
565            BoundaryArg::Position(pos) => pos.data[idx],
566            BoundaryArg::Text(_) => unreachable!(),
567        }
568    }
569}
570
571#[derive(Clone, Debug)]
572struct BoundaryText {
573    data: Vec<String>,
574    shape: Vec<usize>,
575}
576
577impl BoundaryText {
578    fn from_value(value: Value) -> BuiltinResult<Self> {
579        match value {
580            Value::String(s) => Ok(Self {
581                data: vec![s],
582                shape: vec![1, 1],
583            }),
584            Value::StringArray(sa) => Ok(Self {
585                data: sa.data.clone(),
586                shape: sa.shape.clone(),
587            }),
588            Value::CharArray(ca) => {
589                let mut data = Vec::with_capacity(ca.rows);
590                for row in 0..ca.rows {
591                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
592                }
593                Ok(Self {
594                    data,
595                    shape: vec![ca.rows, 1],
596                })
597            }
598            Value::Cell(cell) => {
599                let shape = cell.shape.clone();
600                let mut data = Vec::with_capacity(cell.data.len());
601                for element in &cell.data {
602                    match &**element {
603                        Value::String(s) => data.push(s.clone()),
604                        Value::StringArray(sa) if sa.data.len() == 1 => {
605                            data.push(sa.data[0].clone());
606                        }
607                        Value::CharArray(ca) if ca.rows <= 1 => {
608                            if ca.rows == 0 {
609                                data.push(String::new());
610                            } else {
611                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
612                            }
613                        }
614                        Value::CharArray(_) => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
615                        _ => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
616                    }
617                }
618                Ok(Self { data, shape })
619            }
620            _ => Err(runtime_error_for(BOUNDARY_TYPE_ERROR)),
621        }
622    }
623}
624
625#[derive(Clone, Debug)]
626struct BoundaryPositions {
627    data: Vec<usize>,
628    shape: Vec<usize>,
629}
630
631impl BoundaryPositions {
632    fn from_value(value: Value) -> BuiltinResult<Self> {
633        match value {
634            Value::Num(n) => Ok(Self {
635                data: vec![parse_position(n)?],
636                shape: vec![1, 1],
637            }),
638            Value::Int(i) => Ok(Self {
639                data: vec![parse_position_int(i)?],
640                shape: vec![1, 1],
641            }),
642            Value::Tensor(t) => {
643                let mut data = Vec::with_capacity(t.data.len());
644                for &entry in &t.data {
645                    data.push(parse_position(entry)?);
646                }
647                Ok(Self {
648                    data,
649                    shape: if t.shape.is_empty() {
650                        vec![t.rows, t.cols.max(1)]
651                    } else {
652                        t.shape
653                    },
654                })
655            }
656            _ => Err(runtime_error_for(BOUNDARY_TYPE_ERROR)),
657        }
658    }
659}
660
661fn parse_position(value: f64) -> BuiltinResult<usize> {
662    if !value.is_finite() || value < 1.0 {
663        return Err(runtime_error_for(POSITION_TYPE_ERROR));
664    }
665    if (value.fract()).abs() > f64::EPSILON {
666        return Err(runtime_error_for(POSITION_TYPE_ERROR));
667    }
668    if value > (usize::MAX as f64) {
669        return Err(runtime_error_for(POSITION_TYPE_ERROR));
670    }
671    Ok(value as usize)
672}
673
674fn parse_position_int(value: IntValue) -> BuiltinResult<usize> {
675    let val = value.to_i64();
676    if val <= 0 {
677        return Err(runtime_error_for(POSITION_TYPE_ERROR));
678    }
679    Ok(val as usize)
680}
681
682#[cfg(test)]
683pub(crate) mod tests {
684    #![allow(non_snake_case)]
685
686    use super::*;
687    use runmat_builtins::{CellArray, ResolveContext, Tensor, Type};
688
689    fn extract_between_builtin(
690        text: Value,
691        start: Value,
692        stop: Value,
693        rest: Vec<Value>,
694    ) -> BuiltinResult<Value> {
695        futures::executor::block_on(super::extract_between_builtin(text, start, stop, rest))
696    }
697
698    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
699    #[test]
700    fn extractBetween_basic_string() {
701        let result = extract_between_builtin(
702            Value::String("RunMat accelerates MATLAB".into()),
703            Value::String("RunMat ".into()),
704            Value::String(" MATLAB".into()),
705            Vec::new(),
706        )
707        .expect("extractBetween");
708        assert_eq!(result, Value::String("accelerates".into()));
709    }
710
711    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
712    #[test]
713    fn extractBetween_inclusive_option() {
714        let result = extract_between_builtin(
715            Value::String("a[b]c".into()),
716            Value::String("[".into()),
717            Value::String("]".into()),
718            vec![
719                Value::String("Boundaries".into()),
720                Value::String("inclusive".into()),
721            ],
722        )
723        .expect("extractBetween");
724        assert_eq!(result, Value::String("[b]".into()));
725    }
726
727    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
728    #[test]
729    fn extractBetween_numeric_positions() {
730        let result = extract_between_builtin(
731            Value::String("Accelerator".into()),
732            Value::Num(3.0),
733            Value::Num(7.0),
734            Vec::new(),
735        )
736        .expect("extractBetween");
737        assert_eq!(result, Value::String("celer".into()));
738    }
739
740    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
741    #[test]
742    fn extractBetween_numeric_positions_exclusive_option() {
743        let result = extract_between_builtin(
744            Value::String("Accelerator".into()),
745            Value::Num(3.0),
746            Value::Num(7.0),
747            vec![
748                Value::String("Boundaries".into()),
749                Value::String("exclusive".into()),
750            ],
751        )
752        .expect("extractBetween");
753        assert_eq!(result, Value::String("ele".into()));
754    }
755
756    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
757    #[test]
758    fn extractBetween_numeric_positions_clamps_stop() {
759        let result = extract_between_builtin(
760            Value::String("Accelerator".into()),
761            Value::Num(3.0),
762            Value::Num(100.0),
763            Vec::new(),
764        )
765        .expect("extractBetween");
766        assert_eq!(result, Value::String("celerator".into()));
767    }
768
769    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
770    #[test]
771    fn extractBetween_numeric_positions_start_past_length() {
772        let result = extract_between_builtin(
773            Value::String("abc".into()),
774            Value::Num(10.0),
775            Value::Num(12.0),
776            Vec::new(),
777        )
778        .expect("extractBetween");
779        assert_eq!(result, Value::String(String::new()));
780    }
781
782    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
783    #[test]
784    fn extractBetween_string_array_broadcast() {
785        let array = StringArray::new(
786            vec!["runmat_accel.rs".into(), "runmat_gc.rs".into()],
787            vec![2, 1],
788        )
789        .unwrap();
790        let result = extract_between_builtin(
791            Value::StringArray(array),
792            Value::String("runmat_".into()),
793            Value::String(".rs".into()),
794            Vec::new(),
795        )
796        .expect("extractBetween");
797        match result {
798            Value::StringArray(sa) => {
799                assert_eq!(sa.data, vec!["accel".to_string(), "gc".to_string()]);
800                assert_eq!(sa.shape, vec![2, 1]);
801            }
802            other => panic!("expected string array, got {other:?}"),
803        }
804    }
805
806    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
807    #[test]
808    fn extractBetween_char_array_rows() {
809        let chars = CharArray::new(
810            "GPUAccelerateIgnition".chars().collect(),
811            1,
812            "GPUAccelerateIgnition".len(),
813        )
814        .unwrap();
815        let result = extract_between_builtin(
816            Value::CharArray(chars),
817            Value::String("GPU".into()),
818            Value::String("tion".into()),
819            Vec::new(),
820        )
821        .expect("extractBetween");
822        match result {
823            Value::CharArray(out) => {
824                assert_eq!(out.rows, 1);
825                let text: String = out.data.iter().collect();
826                assert_eq!(text.trim_end(), "AccelerateIgni");
827            }
828            other => panic!("expected char array, got {other:?}"),
829        }
830    }
831
832    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
833    #[test]
834    fn extractBetween_cell_array_preserves_types() {
835        let cell = CellArray::new(
836            vec![
837                Value::CharArray(CharArray::new_row("A[B]C")),
838                Value::String("Planner<GPU>".into()),
839            ],
840            1,
841            2,
842        )
843        .unwrap();
844        let result = extract_between_builtin(
845            Value::Cell(cell),
846            Value::String("[".into()),
847            Value::String("]".into()),
848            Vec::new(),
849        )
850        .expect("extractBetween");
851        match result {
852            Value::Cell(out) => {
853                let first = out.get(0, 0).unwrap();
854                let second = out.get(0, 1).unwrap();
855                assert_eq!(first, Value::CharArray(CharArray::new_row("B")));
856                assert_eq!(second, Value::String(String::new()));
857            }
858            other => panic!("expected cell array, got {other:?}"),
859        }
860    }
861
862    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
863    #[test]
864    fn extractBetween_missing_string_propagates() {
865        let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
866        let result = extract_between_builtin(
867            Value::StringArray(strings),
868            Value::String("[".into()),
869            Value::String("]".into()),
870            Vec::new(),
871        )
872        .expect("extractBetween");
873        assert_eq!(
874            result,
875            Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
876        );
877    }
878
879    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
880    #[test]
881    fn extractBetween_position_type_error() {
882        let err = extract_between_builtin(
883            Value::String("abc".into()),
884            Value::Num(0.5),
885            Value::Num(2.0),
886            Vec::new(),
887        )
888        .unwrap_err();
889        assert_eq!(err.to_string(), POSITION_TYPE_ERROR);
890    }
891
892    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
893    #[test]
894    fn extractBetween_mixed_boundary_error() {
895        let err = extract_between_builtin(
896            Value::String("abc".into()),
897            Value::String("a".into()),
898            Value::Num(3.0),
899            Vec::new(),
900        )
901        .unwrap_err();
902        assert_eq!(err.to_string(), BOUNDARY_TYPE_ERROR);
903    }
904
905    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
906    #[test]
907    fn extractBetween_numeric_tensor_broadcast() {
908        let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
909        let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
910        let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
911        let result = extract_between_builtin(
912            Value::StringArray(text),
913            Value::Tensor(start),
914            Value::Tensor(stop),
915            Vec::new(),
916        )
917        .expect("extractBetween");
918        match result {
919            Value::StringArray(sa) => {
920                assert_eq!(sa.data, vec!["abc".to_string(), "xyz".to_string()]);
921                assert_eq!(sa.shape, vec![2, 1]);
922            }
923            other => panic!("expected string array, got {other:?}"),
924        }
925    }
926
927    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
928    #[test]
929    fn extractBetween_option_invalid_value() {
930        let err = extract_between_builtin(
931            Value::String("abc".into()),
932            Value::String("a".into()),
933            Value::String("c".into()),
934            vec![
935                Value::String("Boundaries".into()),
936                Value::String("middle".into()),
937            ],
938        )
939        .unwrap_err();
940        assert_eq!(err.to_string(), OPTION_VALUE_ERROR);
941    }
942
943    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
944    #[test]
945    fn extractBetween_option_name_error() {
946        let err = extract_between_builtin(
947            Value::String("abc".into()),
948            Value::String("a".into()),
949            Value::String("c".into()),
950            vec![
951                Value::String("Padding".into()),
952                Value::String("inclusive".into()),
953            ],
954        )
955        .unwrap_err();
956        assert_eq!(err.to_string(), OPTION_NAME_ERROR);
957    }
958
959    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
960    #[test]
961    fn extractBetween_option_pair_error() {
962        let err = extract_between_builtin(
963            Value::String("abc".into()),
964            Value::String("a".into()),
965            Value::String("b".into()),
966            vec![Value::String("Boundaries".into())],
967        )
968        .unwrap_err();
969        assert_eq!(err.to_string(), OPTION_PAIR_ERROR);
970    }
971
972    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
973    #[test]
974    fn extractBetween_missing_boundary_propagates() {
975        let result = extract_between_builtin(
976            Value::String("Planner<GPU>".into()),
977            Value::String("<missing>".into()),
978            Value::String(">".into()),
979            Vec::new(),
980        )
981        .expect("extractBetween");
982        assert_eq!(result, Value::String("<missing>".into()));
983    }
984
985    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
986    #[test]
987    fn extractBetween_cell_boundary_arguments() {
988        let text = CellArray::new(vec![Value::String("A<GPU>".into())], 1, 1).unwrap();
989        let start = CellArray::new(vec![Value::CharArray(CharArray::new_row("<"))], 1, 1).unwrap();
990        let stop = CellArray::new(vec![Value::CharArray(CharArray::new_row(">"))], 1, 1).unwrap();
991        let result = extract_between_builtin(
992            Value::Cell(text),
993            Value::Cell(start),
994            Value::Cell(stop),
995            Vec::new(),
996        )
997        .expect("extractBetween");
998        match result {
999            Value::Cell(out) => {
1000                let value = out.get(0, 0).unwrap();
1001                assert_eq!(value, Value::String("GPU".into()));
1002            }
1003            other => panic!("expected cell array, got {other:?}"),
1004        }
1005    }
1006
1007    #[test]
1008    fn extract_between_type_preserves_text() {
1009        assert_eq!(
1010            text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
1011            Type::String
1012        );
1013    }
1014}