Skip to main content

runmat_runtime/builtins/strings/transform/
extractbetween.rs

1//! MATLAB-compatible `extractBetween` builtin with GPU-aware semantics for RunMat.
2
3use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::common::map_control_flow_with_builtin;
7use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
8use crate::builtins::strings::type_resolvers::text_preserve_type;
9use crate::{
10    build_runtime_error, gather_if_needed_async, make_cell_with_shape, BuiltinResult, RuntimeError,
11};
12use runmat_builtins::{
13    BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15    CharArray, IntValue, StringArray, Value,
16};
17use runmat_macros::runtime_builtin;
18
19use crate::builtins::common::spec::{
20    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
21    ReductionNaN, ResidencyPolicy, ShapeRequirements,
22};
23
24#[runmat_macros::register_gpu_spec(
25    builtin_path = "crate::builtins::strings::transform::extractbetween"
26)]
27pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
28    name: "extractBetween",
29    op_kind: GpuOpKind::Custom("string-transform"),
30    supported_precisions: &[],
31    broadcast: BroadcastSemantics::Matlab,
32    provider_hooks: &[],
33    constant_strategy: ConstantStrategy::InlineLiteral,
34    residency: ResidencyPolicy::GatherImmediately,
35    nan_mode: ReductionNaN::Include,
36    two_pass_threshold: None,
37    workgroup_size: None,
38    accepts_nan_mode: false,
39    notes: "Runs on the CPU; GPU-resident inputs are gathered before extraction and outputs are returned on the host.",
40};
41
42#[runmat_macros::register_fusion_spec(
43    builtin_path = "crate::builtins::strings::transform::extractbetween"
44)]
45pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
46    name: "extractBetween",
47    shape: ShapeRequirements::Any,
48    constant_strategy: ConstantStrategy::InlineLiteral,
49    elementwise: None,
50    reduction: None,
51    emits_nan: false,
52    notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
53};
54
55const BUILTIN_NAME: &str = "extractBetween";
56
57const EXTRACT_BETWEEN_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
58    name: "newText",
59    ty: BuiltinParamType::Any,
60    arity: BuiltinParamArity::Required,
61    default: None,
62    description: "Extracted text preserving scalar/array/cell text container semantics.",
63}];
64
65const EXTRACT_BETWEEN_INPUTS_BASE: [BuiltinParamDescriptor; 3] = [
66    BuiltinParamDescriptor {
67        name: "str",
68        ty: BuiltinParamType::Any,
69        arity: BuiltinParamArity::Required,
70        default: None,
71        description: "Input text scalar/array/cell.",
72    },
73    BuiltinParamDescriptor {
74        name: "start",
75        ty: BuiltinParamType::Any,
76        arity: BuiltinParamArity::Required,
77        default: None,
78        description: "Start boundary marker text or positive integer position(s).",
79    },
80    BuiltinParamDescriptor {
81        name: "end",
82        ty: BuiltinParamType::Any,
83        arity: BuiltinParamArity::Required,
84        default: None,
85        description: "End boundary marker text or positive integer position(s).",
86    },
87];
88
89const EXTRACT_BETWEEN_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 5] = [
90    BuiltinParamDescriptor {
91        name: "str",
92        ty: BuiltinParamType::Any,
93        arity: BuiltinParamArity::Required,
94        default: None,
95        description: "Input text scalar/array/cell.",
96    },
97    BuiltinParamDescriptor {
98        name: "start",
99        ty: BuiltinParamType::Any,
100        arity: BuiltinParamArity::Required,
101        default: None,
102        description: "Start boundary marker text or positive integer position(s).",
103    },
104    BuiltinParamDescriptor {
105        name: "end",
106        ty: BuiltinParamType::Any,
107        arity: BuiltinParamArity::Required,
108        default: None,
109        description: "End boundary marker text or positive integer position(s).",
110    },
111    BuiltinParamDescriptor {
112        name: "Name",
113        ty: BuiltinParamType::StringScalar,
114        arity: BuiltinParamArity::Required,
115        default: None,
116        description: "Option name (`Boundaries`).",
117    },
118    BuiltinParamDescriptor {
119        name: "Value",
120        ty: BuiltinParamType::Any,
121        arity: BuiltinParamArity::Variadic,
122        default: None,
123        description: "Option value and additional Name/Value pairs.",
124    },
125];
126
127const EXTRACT_BETWEEN_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
128    BuiltinSignatureDescriptor {
129        label: "newText = extractBetween(str, start, end)",
130        inputs: &EXTRACT_BETWEEN_INPUTS_BASE,
131        outputs: &EXTRACT_BETWEEN_OUTPUT,
132    },
133    BuiltinSignatureDescriptor {
134        label: "newText = extractBetween(str, start, end, Name, Value, ...)",
135        inputs: &EXTRACT_BETWEEN_INPUTS_NAME_VALUE,
136        outputs: &EXTRACT_BETWEEN_OUTPUT,
137    },
138];
139
140const EXTRACT_BETWEEN_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
141    code: "RM.EXTRACT_BETWEEN.INVALID_INPUT",
142    identifier: Some("RunMat:extractBetween:InvalidInput"),
143    when: "First argument is not a string array, character array, or cell array of text scalars.",
144    message:
145        "extractBetween: first argument must be a string array, character array, or cell array of character vectors",
146};
147
148const EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
149    code: "RM.EXTRACT_BETWEEN.BOUNDARY_TYPE",
150    identifier: Some("RunMat:extractBetween:BoundaryType"),
151    when: "Start/end boundaries are mixed text/numeric domains or use unsupported boundary types.",
152    message:
153        "extractBetween: start and end arguments must both be text or both be numeric positions",
154};
155
156const EXTRACT_BETWEEN_ERROR_POSITION_TYPE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
157    code: "RM.EXTRACT_BETWEEN.POSITION_TYPE",
158    identifier: Some("RunMat:extractBetween:PositionType"),
159    when: "Numeric boundary positions are not positive finite integers.",
160    message: "extractBetween: position arguments must be positive integers",
161};
162
163const EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
164    code: "RM.EXTRACT_BETWEEN.NAME_VALUE_PAIR",
165    identifier: Some("RunMat:extractBetween:NameValuePair"),
166    when: "Name/value options are not supplied in complete pairs.",
167    message: "extractBetween: name-value arguments must appear in pairs",
168};
169
170const EXTRACT_BETWEEN_ERROR_OPTION_NAME: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
171    code: "RM.EXTRACT_BETWEEN.OPTION_NAME",
172    identifier: Some("RunMat:extractBetween:OptionName"),
173    when: "An option name other than `Boundaries` was supplied.",
174    message: "extractBetween: unrecognized parameter name",
175};
176
177const EXTRACT_BETWEEN_ERROR_OPTION_VALUE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
178    code: "RM.EXTRACT_BETWEEN.OPTION_VALUE",
179    identifier: Some("RunMat:extractBetween:OptionValue"),
180    when: "`Boundaries` option value is not `inclusive` or `exclusive`.",
181    message: "extractBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'",
182};
183
184const EXTRACT_BETWEEN_ERROR_CELL_ELEMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
185    code: "RM.EXTRACT_BETWEEN.CELL_ELEMENT",
186    identifier: Some("RunMat:extractBetween:CellElement"),
187    when: "Cell text input/boundary contains non-text values or non-row char arrays.",
188    message: "extractBetween: cell array elements must be string scalars or character vectors",
189};
190
191const EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
192    code: "RM.EXTRACT_BETWEEN.SIZE_MISMATCH",
193    identifier: Some("RunMat:extractBetween:SizeMismatch"),
194    when: "Text/boundary inputs are not broadcast-compatible for extraction.",
195    message: "extractBetween: boundary sizes must be compatible with the text input",
196};
197
198const EXTRACT_BETWEEN_ERROR_INTERNAL: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
199    code: "RM.EXTRACT_BETWEEN.INTERNAL",
200    identifier: Some("RunMat:extractBetween:InternalError"),
201    when: "Internal output construction failed.",
202    message: "extractBetween: internal error",
203};
204
205const EXTRACT_BETWEEN_ERRORS: [BuiltinErrorDescriptor; 9] = [
206    EXTRACT_BETWEEN_ERROR_INVALID_INPUT,
207    EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE,
208    EXTRACT_BETWEEN_ERROR_POSITION_TYPE,
209    EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR,
210    EXTRACT_BETWEEN_ERROR_OPTION_NAME,
211    EXTRACT_BETWEEN_ERROR_OPTION_VALUE,
212    EXTRACT_BETWEEN_ERROR_CELL_ELEMENT,
213    EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH,
214    EXTRACT_BETWEEN_ERROR_INTERNAL,
215];
216
217pub const EXTRACT_BETWEEN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
218    signatures: &EXTRACT_BETWEEN_SIGNATURES,
219    output_mode: BuiltinOutputMode::Fixed,
220    completion_policy: BuiltinCompletionPolicy::Public,
221    errors: &EXTRACT_BETWEEN_ERRORS,
222};
223
224fn extract_between_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
225    extract_between_error_with_message(error.message, error)
226}
227
228fn extract_between_error_with_message(
229    message: impl Into<String>,
230    error: &'static BuiltinErrorDescriptor,
231) -> RuntimeError {
232    let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
233    if let Some(identifier) = error.identifier {
234        builder = builder.with_identifier(identifier);
235    }
236    builder.build()
237}
238
239fn map_flow(err: RuntimeError) -> RuntimeError {
240    map_control_flow_with_builtin(err, BUILTIN_NAME)
241}
242
243#[derive(Clone, Copy, Debug, PartialEq, Eq)]
244enum BoundariesMode {
245    Exclusive,
246    Inclusive,
247}
248
249#[runtime_builtin(
250    name = "extractBetween",
251    category = "strings/transform",
252    summary = "Extract substrings between boundary markers.",
253    keywords = "extractBetween,substring,boundaries,strings",
254    accel = "sink",
255    type_resolver(text_preserve_type),
256    descriptor(crate::builtins::strings::transform::extractbetween::EXTRACT_BETWEEN_DESCRIPTOR),
257    builtin_path = "crate::builtins::strings::transform::extractbetween"
258)]
259async fn extract_between_builtin(
260    text: Value,
261    start: Value,
262    stop: Value,
263    rest: Vec<Value>,
264) -> BuiltinResult<Value> {
265    let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
266    let start = gather_if_needed_async(&start).await.map_err(map_flow)?;
267    let stop = gather_if_needed_async(&stop).await.map_err(map_flow)?;
268
269    let mode_override = parse_boundaries_option(&rest).await?;
270
271    let normalized_text = NormalizedText::from_value(text)?;
272    let start_boundary = BoundaryArg::from_value(start)?;
273    let stop_boundary = BoundaryArg::from_value(stop)?;
274
275    if start_boundary.kind() != stop_boundary.kind() {
276        return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE));
277    }
278    let boundary_kind = start_boundary.kind();
279    let effective_mode = mode_override.unwrap_or(match boundary_kind {
280        BoundaryKind::Text => BoundariesMode::Exclusive,
281        BoundaryKind::Position => BoundariesMode::Inclusive,
282    });
283
284    let start_shape = start_boundary.shape();
285    let stop_shape = stop_boundary.shape();
286    let text_shape = normalized_text.shape();
287
288    let shape_ts = broadcast_shapes(BUILTIN_NAME, text_shape, start_shape).map_err(|err| {
289        extract_between_error_with_message(
290            format!("{}: {err}", EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH.message),
291            &EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH,
292        )
293    })?;
294    let output_shape = broadcast_shapes(BUILTIN_NAME, &shape_ts, stop_shape).map_err(|err| {
295        extract_between_error_with_message(
296            format!("{}: {err}", EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH.message),
297            &EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH,
298        )
299    })?;
300    if !normalized_text.supports_shape(&output_shape) {
301        return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH));
302    }
303
304    let total: usize = output_shape.iter().copied().product();
305    if total == 0 {
306        return normalized_text.into_value(Vec::new(), output_shape);
307    }
308
309    let text_strides = compute_strides(text_shape);
310    let start_strides = compute_strides(start_shape);
311    let stop_strides = compute_strides(stop_shape);
312
313    let mut results = Vec::with_capacity(total);
314
315    for idx in 0..total {
316        let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
317        let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
318        let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
319
320        let result = match boundary_kind {
321            BoundaryKind::Text => {
322                let text_value = normalized_text.data(text_idx);
323                let start_value = start_boundary.text(start_idx);
324                let stop_value = stop_boundary.text(stop_idx);
325                extract_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
326            }
327            BoundaryKind::Position => {
328                let text_value = normalized_text.data(text_idx);
329                let start_value = start_boundary.position(start_idx);
330                let stop_value = stop_boundary.position(stop_idx);
331                extract_with_positions(text_value, start_value, stop_value, effective_mode)
332            }
333        };
334        results.push(result);
335    }
336
337    normalized_text.into_value(results, output_shape)
338}
339
340async fn parse_boundaries_option(args: &[Value]) -> BuiltinResult<Option<BoundariesMode>> {
341    if args.is_empty() {
342        return Ok(None);
343    }
344    if !args.len().is_multiple_of(2) {
345        return Err(extract_between_error(
346            &EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR,
347        ));
348    }
349
350    let mut mode: Option<BoundariesMode> = None;
351    let mut idx = 0;
352    while idx < args.len() {
353        let name_value = gather_if_needed_async(&args[idx]).await.map_err(map_flow)?;
354        let name = value_to_string(&name_value)
355            .ok_or_else(|| extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_NAME))?;
356        if !name.eq_ignore_ascii_case("boundaries") {
357            return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_NAME));
358        }
359        let value = gather_if_needed_async(&args[idx + 1])
360            .await
361            .map_err(map_flow)?;
362        let value_str = value_to_string(&value)
363            .ok_or_else(|| extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_VALUE))?;
364        let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
365            BoundariesMode::Inclusive
366        } else if value_str.eq_ignore_ascii_case("exclusive") {
367            BoundariesMode::Exclusive
368        } else {
369            return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_VALUE));
370        };
371        mode = Some(parsed_mode);
372        idx += 2;
373    }
374    Ok(mode)
375}
376
377fn value_to_string(value: &Value) -> Option<String> {
378    match value {
379        Value::String(s) => Some(s.clone()),
380        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
381        Value::CharArray(ca) if ca.rows <= 1 => {
382            if ca.rows == 0 {
383                Some(String::new())
384            } else {
385                Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
386            }
387        }
388        Value::CharArray(_) => None,
389        Value::Cell(cell) if cell.data.len() == 1 => {
390            let element = &cell.data[0];
391            value_to_string(element)
392        }
393        _ => None,
394    }
395}
396
397#[derive(Clone)]
398struct ExtractResult {
399    text: String,
400}
401
402impl ExtractResult {
403    fn missing() -> Self {
404        Self {
405            text: "<missing>".to_string(),
406        }
407    }
408
409    fn text(text: String) -> Self {
410        Self { text }
411    }
412}
413
414fn extract_with_text_boundaries(
415    text: &str,
416    start: &str,
417    stop: &str,
418    mode: BoundariesMode,
419) -> ExtractResult {
420    if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
421        return ExtractResult::missing();
422    }
423
424    if let Some(start_idx) = text.find(start) {
425        let search_start = start_idx + start.len();
426        if search_start > text.len() {
427            return ExtractResult::text(String::new());
428        }
429        if let Some(relative_end) = text[search_start..].find(stop) {
430            let end_idx = search_start + relative_end;
431            match mode {
432                BoundariesMode::Inclusive => {
433                    let end_capture = min(text.len(), end_idx + stop.len());
434                    let slice = &text[start_idx..end_capture];
435                    ExtractResult::text(slice.to_string())
436                }
437                BoundariesMode::Exclusive => {
438                    if end_idx < search_start {
439                        ExtractResult::text(String::new())
440                    } else {
441                        let slice = &text[search_start..end_idx];
442                        ExtractResult::text(slice.to_string())
443                    }
444                }
445            }
446        } else {
447            ExtractResult::text(String::new())
448        }
449    } else {
450        ExtractResult::text(String::new())
451    }
452}
453
454fn extract_with_positions(
455    text: &str,
456    start: usize,
457    stop: usize,
458    mode: BoundariesMode,
459) -> ExtractResult {
460    if is_missing_string(text) {
461        return ExtractResult::missing();
462    }
463    if text.is_empty() {
464        return ExtractResult::text(String::new());
465    }
466    let chars: Vec<char> = text.chars().collect();
467    let len = chars.len();
468    if len == 0 {
469        return ExtractResult::text(String::new());
470    }
471
472    if start == 0 || stop == 0 {
473        return ExtractResult::text(String::new());
474    }
475
476    if start > len {
477        return ExtractResult::text(String::new());
478    }
479    let stop_clamped = stop.min(len);
480    if stop_clamped == 0 {
481        return ExtractResult::text(String::new());
482    }
483
484    match mode {
485        BoundariesMode::Inclusive => {
486            if start > stop_clamped {
487                return ExtractResult::text(String::new());
488            }
489            let start_idx = start - 1;
490            let end_idx = stop_clamped - 1;
491            if start_idx >= len || end_idx >= len || start_idx > end_idx {
492                ExtractResult::text(String::new())
493            } else {
494                let slice: String = chars[start_idx..=end_idx].iter().collect();
495                ExtractResult::text(slice)
496            }
497        }
498        BoundariesMode::Exclusive => {
499            if start + 1 >= stop_clamped {
500                return ExtractResult::text(String::new());
501            }
502            let start_idx = start;
503            let end_idx = stop_clamped - 2;
504            if start_idx >= len || end_idx >= len || start_idx > end_idx {
505                ExtractResult::text(String::new())
506            } else {
507                let slice: String = chars[start_idx..=end_idx].iter().collect();
508                ExtractResult::text(slice)
509            }
510        }
511    }
512}
513
514#[derive(Clone, Debug)]
515struct CellInfo {
516    shape: Vec<usize>,
517    element_kinds: Vec<CellElementKind>,
518}
519
520#[derive(Clone, Debug)]
521enum CellElementKind {
522    String,
523    Char,
524}
525
526#[derive(Clone, Debug)]
527enum TextKind {
528    StringScalar,
529    StringArray,
530    CharArray { rows: usize },
531    CellArray(CellInfo),
532}
533
534#[derive(Clone, Debug)]
535struct NormalizedText {
536    data: Vec<String>,
537    shape: Vec<usize>,
538    kind: TextKind,
539}
540
541impl NormalizedText {
542    fn from_value(value: Value) -> BuiltinResult<Self> {
543        match value {
544            Value::String(s) => Ok(Self {
545                data: vec![s],
546                shape: vec![1, 1],
547                kind: TextKind::StringScalar,
548            }),
549            Value::StringArray(sa) => Ok(Self {
550                data: sa.data.clone(),
551                shape: sa.shape.clone(),
552                kind: TextKind::StringArray,
553            }),
554            Value::CharArray(ca) => {
555                let rows = ca.rows;
556                let mut data = Vec::with_capacity(rows);
557                for row in 0..rows {
558                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
559                }
560                Ok(Self {
561                    data,
562                    shape: vec![rows, 1],
563                    kind: TextKind::CharArray { rows },
564                })
565            }
566            Value::Cell(cell) => {
567                let shape = cell.shape.clone();
568                let mut data = Vec::with_capacity(cell.data.len());
569                let mut kinds = Vec::with_capacity(cell.data.len());
570                for element in &cell.data {
571                    match &**element {
572                        Value::String(s) => {
573                            data.push(s.clone());
574                            kinds.push(CellElementKind::String);
575                        }
576                        Value::StringArray(sa) if sa.data.len() == 1 => {
577                            data.push(sa.data[0].clone());
578                            kinds.push(CellElementKind::String);
579                        }
580                        Value::CharArray(ca) if ca.rows <= 1 => {
581                            if ca.rows == 0 {
582                                data.push(String::new());
583                            } else {
584                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
585                            }
586                            kinds.push(CellElementKind::Char);
587                        }
588                        Value::CharArray(_) => {
589                            return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
590                        }
591                        _ => {
592                            return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
593                        }
594                    }
595                }
596                Ok(Self {
597                    data,
598                    shape: shape.clone(),
599                    kind: TextKind::CellArray(CellInfo {
600                        shape,
601                        element_kinds: kinds,
602                    }),
603                })
604            }
605            _ => Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_INVALID_INPUT)),
606        }
607    }
608
609    fn shape(&self) -> &[usize] {
610        &self.shape
611    }
612
613    fn data(&self, idx: usize) -> &str {
614        &self.data[idx]
615    }
616
617    fn supports_shape(&self, output_shape: &[usize]) -> bool {
618        match &self.kind {
619            TextKind::StringScalar => true,
620            TextKind::StringArray => true,
621            TextKind::CharArray { .. } => output_shape == self.shape,
622            TextKind::CellArray(info) => output_shape == info.shape,
623        }
624    }
625
626    fn into_value(
627        self,
628        results: Vec<ExtractResult>,
629        output_shape: Vec<usize>,
630    ) -> BuiltinResult<Value> {
631        match self.kind {
632            TextKind::StringScalar => {
633                if results.len() <= 1 {
634                    let value = results
635                        .into_iter()
636                        .next()
637                        .unwrap_or_else(|| ExtractResult::text(String::new()));
638                    Ok(Value::String(value.text))
639                } else {
640                    let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
641                    let array = StringArray::new(data, output_shape).map_err(|e| {
642                        extract_between_error_with_message(
643                            format!("{BUILTIN_NAME}: {e}"),
644                            &EXTRACT_BETWEEN_ERROR_INTERNAL,
645                        )
646                    })?;
647                    Ok(Value::StringArray(array))
648                }
649            }
650            TextKind::StringArray => {
651                let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
652                let array = StringArray::new(data, output_shape).map_err(|e| {
653                    extract_between_error_with_message(
654                        format!("{BUILTIN_NAME}: {e}"),
655                        &EXTRACT_BETWEEN_ERROR_INTERNAL,
656                    )
657                })?;
658                Ok(Value::StringArray(array))
659            }
660            TextKind::CharArray { rows } => {
661                if rows == 0 {
662                    return CharArray::new(Vec::new(), 0, 0)
663                        .map(Value::CharArray)
664                        .map_err(|e| {
665                            extract_between_error_with_message(
666                                format!("{BUILTIN_NAME}: {e}"),
667                                &EXTRACT_BETWEEN_ERROR_INTERNAL,
668                            )
669                        });
670                }
671                if results.len() != rows {
672                    return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH));
673                }
674                let mut max_width = 0usize;
675                let mut row_strings = Vec::with_capacity(rows);
676                for result in &results {
677                    let width = result.text.chars().count();
678                    max_width = max_width.max(width);
679                    row_strings.push(result.text.clone());
680                }
681                let mut flattened = Vec::with_capacity(rows * max_width);
682                for row in row_strings {
683                    let mut chars: Vec<char> = row.chars().collect();
684                    if chars.len() < max_width {
685                        chars.resize(max_width, ' ');
686                    }
687                    flattened.extend(chars);
688                }
689                CharArray::new(flattened, rows, max_width)
690                    .map(Value::CharArray)
691                    .map_err(|e| {
692                        extract_between_error_with_message(
693                            format!("{BUILTIN_NAME}: {e}"),
694                            &EXTRACT_BETWEEN_ERROR_INTERNAL,
695                        )
696                    })
697            }
698            TextKind::CellArray(info) => {
699                if results.len() != info.element_kinds.len() {
700                    return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH));
701                }
702                let mut values = Vec::with_capacity(results.len());
703                for (idx, result) in results.into_iter().enumerate() {
704                    match info.element_kinds[idx] {
705                        CellElementKind::String => values.push(Value::String(result.text)),
706                        CellElementKind::Char => {
707                            let ca = CharArray::new_row(&result.text);
708                            values.push(Value::CharArray(ca));
709                        }
710                    }
711                }
712                make_cell_with_shape(values, info.shape).map_err(|e| {
713                    extract_between_error_with_message(
714                        format!("{BUILTIN_NAME}: {e}"),
715                        &EXTRACT_BETWEEN_ERROR_INTERNAL,
716                    )
717                })
718            }
719        }
720    }
721}
722
723#[derive(Clone, Debug, PartialEq, Eq)]
724enum BoundaryKind {
725    Text,
726    Position,
727}
728
729#[derive(Clone, Debug)]
730enum BoundaryArg {
731    Text(BoundaryText),
732    Position(BoundaryPositions),
733}
734
735impl BoundaryArg {
736    fn from_value(value: Value) -> BuiltinResult<Self> {
737        match value {
738            Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
739                BoundaryText::from_value(value).map(BoundaryArg::Text)
740            }
741            Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
742                BoundaryPositions::from_value(value).map(BoundaryArg::Position)
743            }
744            other => Err(extract_between_error_with_message(
745                format!(
746                    "{}: unsupported argument {other:?}",
747                    EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE.message
748                ),
749                &EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE,
750            )),
751        }
752    }
753
754    fn kind(&self) -> BoundaryKind {
755        match self {
756            BoundaryArg::Text(_) => BoundaryKind::Text,
757            BoundaryArg::Position(_) => BoundaryKind::Position,
758        }
759    }
760
761    fn shape(&self) -> &[usize] {
762        match self {
763            BoundaryArg::Text(text) => &text.shape,
764            BoundaryArg::Position(pos) => &pos.shape,
765        }
766    }
767
768    fn text(&self, idx: usize) -> &str {
769        match self {
770            BoundaryArg::Text(text) => &text.data[idx],
771            BoundaryArg::Position(_) => unreachable!(),
772        }
773    }
774
775    fn position(&self, idx: usize) -> usize {
776        match self {
777            BoundaryArg::Position(pos) => pos.data[idx],
778            BoundaryArg::Text(_) => unreachable!(),
779        }
780    }
781}
782
783#[derive(Clone, Debug)]
784struct BoundaryText {
785    data: Vec<String>,
786    shape: Vec<usize>,
787}
788
789impl BoundaryText {
790    fn from_value(value: Value) -> BuiltinResult<Self> {
791        match value {
792            Value::String(s) => Ok(Self {
793                data: vec![s],
794                shape: vec![1, 1],
795            }),
796            Value::StringArray(sa) => Ok(Self {
797                data: sa.data.clone(),
798                shape: sa.shape.clone(),
799            }),
800            Value::CharArray(ca) => {
801                let mut data = Vec::with_capacity(ca.rows);
802                for row in 0..ca.rows {
803                    data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
804                }
805                Ok(Self {
806                    data,
807                    shape: vec![ca.rows, 1],
808                })
809            }
810            Value::Cell(cell) => {
811                let shape = cell.shape.clone();
812                let mut data = Vec::with_capacity(cell.data.len());
813                for element in &cell.data {
814                    match &**element {
815                        Value::String(s) => data.push(s.clone()),
816                        Value::StringArray(sa) if sa.data.len() == 1 => {
817                            data.push(sa.data[0].clone());
818                        }
819                        Value::CharArray(ca) if ca.rows <= 1 => {
820                            if ca.rows == 0 {
821                                data.push(String::new());
822                            } else {
823                                data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
824                            }
825                        }
826                        Value::CharArray(_) => {
827                            return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
828                        }
829                        _ => {
830                            return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
831                        }
832                    }
833                }
834                Ok(Self { data, shape })
835            }
836            _ => Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE)),
837        }
838    }
839}
840
841#[derive(Clone, Debug)]
842struct BoundaryPositions {
843    data: Vec<usize>,
844    shape: Vec<usize>,
845}
846
847impl BoundaryPositions {
848    fn from_value(value: Value) -> BuiltinResult<Self> {
849        match value {
850            Value::Num(n) => Ok(Self {
851                data: vec![parse_position(n)?],
852                shape: vec![1, 1],
853            }),
854            Value::Int(i) => Ok(Self {
855                data: vec![parse_position_int(i)?],
856                shape: vec![1, 1],
857            }),
858            Value::Tensor(t) => {
859                let mut data = Vec::with_capacity(t.data.len());
860                for &entry in &t.data {
861                    data.push(parse_position(entry)?);
862                }
863                Ok(Self {
864                    data,
865                    shape: if t.shape.is_empty() {
866                        vec![t.rows, t.cols.max(1)]
867                    } else {
868                        t.shape
869                    },
870                })
871            }
872            _ => Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE)),
873        }
874    }
875}
876
877fn parse_position(value: f64) -> BuiltinResult<usize> {
878    if !value.is_finite() || value < 1.0 {
879        return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
880    }
881    if (value.fract()).abs() > f64::EPSILON {
882        return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
883    }
884    if value > (usize::MAX as f64) {
885        return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
886    }
887    Ok(value as usize)
888}
889
890fn parse_position_int(value: IntValue) -> BuiltinResult<usize> {
891    let val = value.to_i64();
892    if val <= 0 {
893        return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
894    }
895    Ok(val as usize)
896}
897
898#[cfg(test)]
899pub(crate) mod tests {
900    #![allow(non_snake_case)]
901
902    use super::*;
903    use runmat_builtins::{CellArray, ResolveContext, Tensor, Type};
904
905    fn extract_between_builtin(
906        text: Value,
907        start: Value,
908        stop: Value,
909        rest: Vec<Value>,
910    ) -> BuiltinResult<Value> {
911        futures::executor::block_on(super::extract_between_builtin(text, start, stop, rest))
912    }
913
914    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
915    #[test]
916    fn extractBetween_basic_string() {
917        let result = extract_between_builtin(
918            Value::String("RunMat accelerates MATLAB".into()),
919            Value::String("RunMat ".into()),
920            Value::String(" MATLAB".into()),
921            Vec::new(),
922        )
923        .expect("extractBetween");
924        assert_eq!(result, Value::String("accelerates".into()));
925    }
926
927    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
928    #[test]
929    fn extractBetween_inclusive_option() {
930        let result = extract_between_builtin(
931            Value::String("a[b]c".into()),
932            Value::String("[".into()),
933            Value::String("]".into()),
934            vec![
935                Value::String("Boundaries".into()),
936                Value::String("inclusive".into()),
937            ],
938        )
939        .expect("extractBetween");
940        assert_eq!(result, Value::String("[b]".into()));
941    }
942
943    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
944    #[test]
945    fn extractBetween_numeric_positions() {
946        let result = extract_between_builtin(
947            Value::String("Accelerator".into()),
948            Value::Num(3.0),
949            Value::Num(7.0),
950            Vec::new(),
951        )
952        .expect("extractBetween");
953        assert_eq!(result, Value::String("celer".into()));
954    }
955
956    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
957    #[test]
958    fn extractBetween_numeric_positions_exclusive_option() {
959        let result = extract_between_builtin(
960            Value::String("Accelerator".into()),
961            Value::Num(3.0),
962            Value::Num(7.0),
963            vec![
964                Value::String("Boundaries".into()),
965                Value::String("exclusive".into()),
966            ],
967        )
968        .expect("extractBetween");
969        assert_eq!(result, Value::String("ele".into()));
970    }
971
972    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
973    #[test]
974    fn extractBetween_numeric_positions_clamps_stop() {
975        let result = extract_between_builtin(
976            Value::String("Accelerator".into()),
977            Value::Num(3.0),
978            Value::Num(100.0),
979            Vec::new(),
980        )
981        .expect("extractBetween");
982        assert_eq!(result, Value::String("celerator".into()));
983    }
984
985    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
986    #[test]
987    fn extractBetween_numeric_positions_start_past_length() {
988        let result = extract_between_builtin(
989            Value::String("abc".into()),
990            Value::Num(10.0),
991            Value::Num(12.0),
992            Vec::new(),
993        )
994        .expect("extractBetween");
995        assert_eq!(result, Value::String(String::new()));
996    }
997
998    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
999    #[test]
1000    fn extractBetween_string_array_broadcast() {
1001        let array = StringArray::new(
1002            vec!["runmat_accel.rs".into(), "runmat_gc.rs".into()],
1003            vec![2, 1],
1004        )
1005        .unwrap();
1006        let result = extract_between_builtin(
1007            Value::StringArray(array),
1008            Value::String("runmat_".into()),
1009            Value::String(".rs".into()),
1010            Vec::new(),
1011        )
1012        .expect("extractBetween");
1013        match result {
1014            Value::StringArray(sa) => {
1015                assert_eq!(sa.data, vec!["accel".to_string(), "gc".to_string()]);
1016                assert_eq!(sa.shape, vec![2, 1]);
1017            }
1018            other => panic!("expected string array, got {other:?}"),
1019        }
1020    }
1021
1022    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1023    #[test]
1024    fn extractBetween_char_array_rows() {
1025        let chars = CharArray::new(
1026            "GPUAccelerateVM".chars().collect(),
1027            1,
1028            "GPUAccelerateVM".len(),
1029        )
1030        .unwrap();
1031        let result = extract_between_builtin(
1032            Value::CharArray(chars),
1033            Value::String("GPU".into()),
1034            Value::String("VM".into()),
1035            Vec::new(),
1036        )
1037        .expect("extractBetween");
1038        match result {
1039            Value::CharArray(out) => {
1040                assert_eq!(out.rows, 1);
1041                let text: String = out.data.iter().collect();
1042                assert_eq!(text.trim_end(), "Accelerate");
1043            }
1044            other => panic!("expected char array, got {other:?}"),
1045        }
1046    }
1047
1048    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1049    #[test]
1050    fn extractBetween_cell_array_preserves_types() {
1051        let cell = CellArray::new(
1052            vec![
1053                Value::CharArray(CharArray::new_row("A[B]C")),
1054                Value::String("Planner<GPU>".into()),
1055            ],
1056            1,
1057            2,
1058        )
1059        .unwrap();
1060        let result = extract_between_builtin(
1061            Value::Cell(cell),
1062            Value::String("[".into()),
1063            Value::String("]".into()),
1064            Vec::new(),
1065        )
1066        .expect("extractBetween");
1067        match result {
1068            Value::Cell(out) => {
1069                let first = out.get(0, 0).unwrap();
1070                let second = out.get(0, 1).unwrap();
1071                assert_eq!(first, Value::CharArray(CharArray::new_row("B")));
1072                assert_eq!(second, Value::String(String::new()));
1073            }
1074            other => panic!("expected cell array, got {other:?}"),
1075        }
1076    }
1077
1078    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1079    #[test]
1080    fn extractBetween_missing_string_propagates() {
1081        let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
1082        let result = extract_between_builtin(
1083            Value::StringArray(strings),
1084            Value::String("[".into()),
1085            Value::String("]".into()),
1086            Vec::new(),
1087        )
1088        .expect("extractBetween");
1089        assert_eq!(
1090            result,
1091            Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
1092        );
1093    }
1094
1095    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1096    #[test]
1097    fn extractBetween_position_type_error() {
1098        let err = extract_between_builtin(
1099            Value::String("abc".into()),
1100            Value::Num(0.5),
1101            Value::Num(2.0),
1102            Vec::new(),
1103        )
1104        .unwrap_err();
1105        assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_POSITION_TYPE.message);
1106        assert_eq!(
1107            err.identifier(),
1108            EXTRACT_BETWEEN_ERROR_POSITION_TYPE.identifier
1109        );
1110    }
1111
1112    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1113    #[test]
1114    fn extractBetween_mixed_boundary_error() {
1115        let err = extract_between_builtin(
1116            Value::String("abc".into()),
1117            Value::String("a".into()),
1118            Value::Num(3.0),
1119            Vec::new(),
1120        )
1121        .unwrap_err();
1122        assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE.message);
1123        assert_eq!(
1124            err.identifier(),
1125            EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE.identifier
1126        );
1127    }
1128
1129    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1130    #[test]
1131    fn extractBetween_numeric_tensor_broadcast() {
1132        let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
1133        let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1134        let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
1135        let result = extract_between_builtin(
1136            Value::StringArray(text),
1137            Value::Tensor(start),
1138            Value::Tensor(stop),
1139            Vec::new(),
1140        )
1141        .expect("extractBetween");
1142        match result {
1143            Value::StringArray(sa) => {
1144                assert_eq!(sa.data, vec!["abc".to_string(), "xyz".to_string()]);
1145                assert_eq!(sa.shape, vec![2, 1]);
1146            }
1147            other => panic!("expected string array, got {other:?}"),
1148        }
1149    }
1150
1151    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1152    #[test]
1153    fn extractBetween_option_invalid_value() {
1154        let err = extract_between_builtin(
1155            Value::String("abc".into()),
1156            Value::String("a".into()),
1157            Value::String("c".into()),
1158            vec![
1159                Value::String("Boundaries".into()),
1160                Value::String("middle".into()),
1161            ],
1162        )
1163        .unwrap_err();
1164        assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_OPTION_VALUE.message);
1165        assert_eq!(
1166            err.identifier(),
1167            EXTRACT_BETWEEN_ERROR_OPTION_VALUE.identifier
1168        );
1169    }
1170
1171    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1172    #[test]
1173    fn extractBetween_option_name_error() {
1174        let err = extract_between_builtin(
1175            Value::String("abc".into()),
1176            Value::String("a".into()),
1177            Value::String("c".into()),
1178            vec![
1179                Value::String("Padding".into()),
1180                Value::String("inclusive".into()),
1181            ],
1182        )
1183        .unwrap_err();
1184        assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_OPTION_NAME.message);
1185        assert_eq!(
1186            err.identifier(),
1187            EXTRACT_BETWEEN_ERROR_OPTION_NAME.identifier
1188        );
1189    }
1190
1191    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1192    #[test]
1193    fn extractBetween_option_pair_error() {
1194        let err = extract_between_builtin(
1195            Value::String("abc".into()),
1196            Value::String("a".into()),
1197            Value::String("b".into()),
1198            vec![Value::String("Boundaries".into())],
1199        )
1200        .unwrap_err();
1201        assert_eq!(
1202            err.to_string(),
1203            EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR.message
1204        );
1205        assert_eq!(
1206            err.identifier(),
1207            EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR.identifier
1208        );
1209    }
1210
1211    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1212    #[test]
1213    fn extractBetween_missing_boundary_propagates() {
1214        let result = extract_between_builtin(
1215            Value::String("Planner<GPU>".into()),
1216            Value::String("<missing>".into()),
1217            Value::String(">".into()),
1218            Vec::new(),
1219        )
1220        .expect("extractBetween");
1221        assert_eq!(result, Value::String("<missing>".into()));
1222    }
1223
1224    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1225    #[test]
1226    fn extractBetween_cell_boundary_arguments() {
1227        let text = CellArray::new(vec![Value::String("A<GPU>".into())], 1, 1).unwrap();
1228        let start = CellArray::new(vec![Value::CharArray(CharArray::new_row("<"))], 1, 1).unwrap();
1229        let stop = CellArray::new(vec![Value::CharArray(CharArray::new_row(">"))], 1, 1).unwrap();
1230        let result = extract_between_builtin(
1231            Value::Cell(text),
1232            Value::Cell(start),
1233            Value::Cell(stop),
1234            Vec::new(),
1235        )
1236        .expect("extractBetween");
1237        match result {
1238            Value::Cell(out) => {
1239                let value = out.get(0, 0).unwrap();
1240                assert_eq!(value, Value::String("GPU".into()));
1241            }
1242            other => panic!("expected cell array, got {other:?}"),
1243        }
1244    }
1245
1246    #[test]
1247    fn extract_between_type_preserves_text() {
1248        assert_eq!(
1249            text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
1250            Type::String
1251        );
1252    }
1253}