1use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
7use crate::{
8 gather_if_needed, make_cell_with_shape, register_builtin_fusion_spec, register_builtin_gpu_spec,
9};
10use runmat_builtins::{CharArray, IntValue, StringArray, Value};
11use runmat_macros::runtime_builtin;
12
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15
16use crate::builtins::common::spec::{
17 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
18 ReductionNaN, ResidencyPolicy, ShapeRequirements,
19};
20
21#[cfg(feature = "doc_export")]
22pub const DOC_MD: &str = r#"---
23title: "extractBetween"
24category: "strings/transform"
25keywords: ["extractBetween", "substring", "boundaries", "inclusive", "exclusive", "string array"]
26summary: "Extract text that lies between two boundary markers using string or position inputs."
27references:
28 - https://www.mathworks.com/help/matlab/ref/extractbetween.html
29gpu_support:
30 elementwise: false
31 reduction: false
32 precisions: []
33 broadcasting: "matlab"
34 notes: "Runs on the CPU. GPU-resident inputs are gathered before processing, results stay on the host, and the builtin is registered as an Accelerate sink."
35fusion:
36 elementwise: false
37 reduction: false
38 max_inputs: 3
39 constants: "inline"
40requires_feature: null
41tested:
42 unit: "builtins::strings::transform::extractbetween::tests"
43 integration: "builtins::strings::transform::extractbetween::tests::extractBetween_cell_array_preserves_types"
44---
45
46# What does the `extractBetween` function do in MATLAB / RunMat?
47`extractBetween(text, start, stop)` locates the substring that appears between two boundary markers.
48Markers can be text (string scalars, character vectors, or cells that contain them) or numeric
49positions. The builtin mirrors MATLAB semantics for broadcasting, missing values, and the optional
50`'Boundaries'` name-value argument.
51
52## How does the `extractBetween` function behave in MATLAB / RunMat?
53- Accepts **string scalars**, **string arrays**, **character arrays** (interpreted row-by-row), and
54 **cell arrays** that contain string scalars or character vectors. Cell outputs preserve the
55 element type (string vs. char) of each cell.
56- Boundary inputs can be text or numeric positions. Both boundaries in a call must use the same
57 kind of input; mixing text and numeric markers raises a size/type error.
58- Scalar text markers follow MATLAB implicit expansion, applying to every element of the text
59 input. Character-array and cell inputs must exactly match the text shape.
60- The `'Boundaries'` name-value pair controls inclusivity. Text markers default to **exclusive**
61 extraction, while numeric positions default to **inclusive** behaviour. Values are
62 case-insensitive and must be `'exclusive'` or `'inclusive'`.
63- Missing string scalars propagate: if the text, start marker, or end marker is `<missing>`,
64 the result is also `<missing>`.
65- When the start or end boundary cannot be located, `extractBetween` returns an empty string (or an
66 appropriately padded empty row for character arrays).
67- Numeric positions use 1-based indexing. Inputs are validated as positive integers, clamped to
68 string length, and honour inclusivity rules exactly as MATLAB does.
69
70## `extractBetween` Function GPU Execution Behaviour
71Text manipulation executes on the CPU. When any argument resides on the GPU, RunMat gathers the
72values to host memory, performs extraction, and leaves the results on the host. No Accelerate
73provider hooks are required, and the builtin is registered as an Accelerate sink so fusion plans
74never attempt to keep data on the device for this operation.
75
76## Examples of using the `extractBetween` function in MATLAB / RunMat
77
78### Extract text between words in a string
79```matlab
80txt = "RunMat accelerates MATLAB workloads";
81segment = extractBetween(txt, "RunMat ", " workloads");
82```
83Expected output:
84```matlab
85segment = "accelerates MATLAB"
86```
87
88### Include boundary markers with the `'Boundaries'` option
89```matlab
90path = "snapshots/run/fusion.mat";
91withMarkers = extractBetween(path, "snapshots/", ".mat", "Boundaries", "inclusive");
92```
93Expected output:
94```matlab
95withMarkers = "snapshots/run/fusion.mat"
96```
97
98### Use numeric positions for 1-based indexing
99```matlab
100name = "Accelerator";
101middle = extractBetween(name, 3, 7);
102```
103Expected output:
104```matlab
105middle = "celer"
106```
107
108### Apply scalar text markers to each element of a string array
109```matlab
110files = ["runmat_accel.rs", "runmat_gc.rs"; "runmat_plot.rs", "runmat_cli.rs"];
111stems = extractBetween(files, "runmat_", ".rs");
112```
113Expected output:
114```matlab
115stems = 2×2 string
116 "accel" "gc"
117 "plot" "cli"
118```
119
120### Work with character arrays while preserving row padding
121```matlab
122chars = char("Device<GPU>", "Planner<Fusion>");
123tokens = extractBetween(chars, "<", ">");
124```
125Expected output:
126```matlab
127tokens =
128
129 2×6 char array
130
131 "GPU "
132 "Fusion"
133```
134
135### Preserve element types in cell arrays
136```matlab
137C = {'<missing>', 'A[B]C'; "Planner <Fusion>", "Device<GPU>"};
138out = extractBetween(C, "<", ">");
139```
140Expected output:
141```matlab
142out =
143 2×2 cell array
144 {'<missing>'} {'B'}
145 {"Fusion"} {"GPU"}
146```
147
148### Handle missing strings without throwing errors
149```matlab
150txt = ["<missing>", "Planner<GPU>"];
151tokens = extractBetween(txt, "<", ">");
152```
153Expected output:
154```matlab
155tokens = 1×2 string
156 "<missing>" "GPU"
157```
158
159## FAQ
160
161### Which argument types does `extractBetween` accept?
162The first argument can be a string scalar, string array, character array, or cell array of character
163vectors / string scalars. Boundary arguments can be text (string, character array, or cell) or numeric
164positions supplied as scalars, vectors, or arrays.
165
166### Can the start and end arguments mix text and numeric positions?
167No. Both boundaries must be text markers or both must be numeric positions. Mixing types raises a
168size/type error, mirroring MATLAB.
169
170### What happens when a boundary is not found?
171`extractBetween` returns the empty string (`""`). Character-array outputs contain space padded rows
172of the appropriate length.
173
174### How does `'Boundaries','inclusive'` behave with numeric positions?
175Inclusive mode returns the substring that includes both indices. Exclusive mode removes the characters
176at the specified start and end positions, yielding the text strictly between the two indices.
177
178### Does `extractBetween` support implicit expansion?
179Yes. Scalar boundaries expand against array inputs following MATLAB implicit expansion rules. Cell and
180character array inputs must retain their original shape; attempting to expand them produces a size
181mismatch error.
182
183### Are GPU inputs supported?
184Yes. Inputs stored on a GPU are gathered automatically. The function executes on the CPU, returns
185host-side results, and fusion planning treats the builtin as a residency sink.
186
187## See Also
188[replace](../../transform/replace), [split](../../transform/split), [join](../../transform/join), [contains](../../search/contains), [strfind](../../search/strfind)
189
190## Source & Feedback
191- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/extractbetween.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/extractbetween.rs)
192- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
193"#;
194
195pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
196 name: "extractBetween",
197 op_kind: GpuOpKind::Custom("string-transform"),
198 supported_precisions: &[],
199 broadcast: BroadcastSemantics::Matlab,
200 provider_hooks: &[],
201 constant_strategy: ConstantStrategy::InlineLiteral,
202 residency: ResidencyPolicy::GatherImmediately,
203 nan_mode: ReductionNaN::Include,
204 two_pass_threshold: None,
205 workgroup_size: None,
206 accepts_nan_mode: false,
207 notes: "Runs on the CPU; GPU-resident inputs are gathered before extraction and outputs are returned on the host.",
208};
209
210register_builtin_gpu_spec!(GPU_SPEC);
211
212pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
213 name: "extractBetween",
214 shape: ShapeRequirements::Any,
215 constant_strategy: ConstantStrategy::InlineLiteral,
216 elementwise: None,
217 reduction: None,
218 emits_nan: false,
219 notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
220};
221
222register_builtin_fusion_spec!(FUSION_SPEC);
223
224#[cfg(feature = "doc_export")]
225register_builtin_doc_text!("extractBetween", DOC_MD);
226
227const FN_NAME: &str = "extractBetween";
228const ARG_TYPE_ERROR: &str = "extractBetween: first argument must be a string array, character array, or cell array of character vectors";
229const BOUNDARY_TYPE_ERROR: &str =
230 "extractBetween: start and end arguments must both be text or both be numeric positions";
231const POSITION_TYPE_ERROR: &str = "extractBetween: position arguments must be positive integers";
232const OPTION_PAIR_ERROR: &str = "extractBetween: name-value arguments must appear in pairs";
233const OPTION_NAME_ERROR: &str = "extractBetween: unrecognized parameter name";
234const OPTION_VALUE_ERROR: &str =
235 "extractBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'";
236const CELL_ELEMENT_ERROR: &str =
237 "extractBetween: cell array elements must be string scalars or character vectors";
238const SIZE_MISMATCH_ERROR: &str =
239 "extractBetween: boundary sizes must be compatible with the text input";
240
241#[derive(Clone, Copy, Debug, PartialEq, Eq)]
242enum BoundariesMode {
243 Exclusive,
244 Inclusive,
245}
246
247#[runtime_builtin(
248 name = "extractBetween",
249 category = "strings/transform",
250 summary = "Extract substrings between boundary markers using MATLAB-compatible semantics.",
251 keywords = "extractBetween,substring,boundaries,strings",
252 accel = "sink"
253)]
254fn extract_between_builtin(
255 text: Value,
256 start: Value,
257 stop: Value,
258 rest: Vec<Value>,
259) -> Result<Value, String> {
260 let text = gather_if_needed(&text).map_err(|e| format!("{FN_NAME}: {e}"))?;
261 let start = gather_if_needed(&start).map_err(|e| format!("{FN_NAME}: {e}"))?;
262 let stop = gather_if_needed(&stop).map_err(|e| format!("{FN_NAME}: {e}"))?;
263
264 let mode_override = parse_boundaries_option(&rest)?;
265
266 let normalized_text = NormalizedText::from_value(text)?;
267 let start_boundary = BoundaryArg::from_value(start)?;
268 let stop_boundary = BoundaryArg::from_value(stop)?;
269
270 if start_boundary.kind() != stop_boundary.kind() {
271 return Err(BOUNDARY_TYPE_ERROR.to_string());
272 }
273 let boundary_kind = start_boundary.kind();
274 let effective_mode = mode_override.unwrap_or(match boundary_kind {
275 BoundaryKind::Text => BoundariesMode::Exclusive,
276 BoundaryKind::Position => BoundariesMode::Inclusive,
277 });
278
279 let start_shape = start_boundary.shape();
280 let stop_shape = stop_boundary.shape();
281 let text_shape = normalized_text.shape();
282
283 let shape_ts = broadcast_shapes(FN_NAME, text_shape, start_shape)?;
284 let output_shape = broadcast_shapes(FN_NAME, &shape_ts, stop_shape)?;
285 if !normalized_text.supports_shape(&output_shape) {
286 return Err(SIZE_MISMATCH_ERROR.to_string());
287 }
288
289 let total: usize = output_shape.iter().copied().product();
290 if total == 0 {
291 return normalized_text.into_value(Vec::new(), output_shape);
292 }
293
294 let text_strides = compute_strides(text_shape);
295 let start_strides = compute_strides(start_shape);
296 let stop_strides = compute_strides(stop_shape);
297
298 let mut results = Vec::with_capacity(total);
299
300 for idx in 0..total {
301 let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
302 let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
303 let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
304
305 let result = match boundary_kind {
306 BoundaryKind::Text => {
307 let text_value = normalized_text.data(text_idx);
308 let start_value = start_boundary.text(start_idx);
309 let stop_value = stop_boundary.text(stop_idx);
310 extract_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
311 }
312 BoundaryKind::Position => {
313 let text_value = normalized_text.data(text_idx);
314 let start_value = start_boundary.position(start_idx);
315 let stop_value = stop_boundary.position(stop_idx);
316 extract_with_positions(text_value, start_value, stop_value, effective_mode)
317 }
318 };
319 results.push(result);
320 }
321
322 normalized_text.into_value(results, output_shape)
323}
324
325fn parse_boundaries_option(args: &[Value]) -> Result<Option<BoundariesMode>, String> {
326 if args.is_empty() {
327 return Ok(None);
328 }
329 if !args.len().is_multiple_of(2) {
330 return Err(OPTION_PAIR_ERROR.to_string());
331 }
332
333 let mut mode: Option<BoundariesMode> = None;
334 let mut idx = 0;
335 while idx < args.len() {
336 let name_value = gather_if_needed(&args[idx]).map_err(|e| format!("{FN_NAME}: {e}"))?;
337 let name = value_to_string(&name_value).ok_or_else(|| OPTION_NAME_ERROR.to_string())?;
338 if !name.eq_ignore_ascii_case("boundaries") {
339 return Err(OPTION_NAME_ERROR.to_string());
340 }
341 let value = gather_if_needed(&args[idx + 1]).map_err(|e| format!("{FN_NAME}: {e}"))?;
342 let value_str = value_to_string(&value).ok_or_else(|| OPTION_VALUE_ERROR.to_string())?;
343 let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
344 BoundariesMode::Inclusive
345 } else if value_str.eq_ignore_ascii_case("exclusive") {
346 BoundariesMode::Exclusive
347 } else {
348 return Err(OPTION_VALUE_ERROR.to_string());
349 };
350 mode = Some(parsed_mode);
351 idx += 2;
352 }
353 Ok(mode)
354}
355
356fn value_to_string(value: &Value) -> Option<String> {
357 match value {
358 Value::String(s) => Some(s.clone()),
359 Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
360 Value::CharArray(ca) if ca.rows <= 1 => {
361 if ca.rows == 0 {
362 Some(String::new())
363 } else {
364 Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
365 }
366 }
367 Value::CharArray(_) => None,
368 Value::Cell(cell) if cell.data.len() == 1 => {
369 let element = &cell.data[0];
370 value_to_string(element)
371 }
372 _ => None,
373 }
374}
375
376#[derive(Clone)]
377struct ExtractResult {
378 text: String,
379}
380
381impl ExtractResult {
382 fn missing() -> Self {
383 Self {
384 text: "<missing>".to_string(),
385 }
386 }
387
388 fn text(text: String) -> Self {
389 Self { text }
390 }
391}
392
393fn extract_with_text_boundaries(
394 text: &str,
395 start: &str,
396 stop: &str,
397 mode: BoundariesMode,
398) -> ExtractResult {
399 if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
400 return ExtractResult::missing();
401 }
402
403 if let Some(start_idx) = text.find(start) {
404 let search_start = start_idx + start.len();
405 if search_start > text.len() {
406 return ExtractResult::text(String::new());
407 }
408 if let Some(relative_end) = text[search_start..].find(stop) {
409 let end_idx = search_start + relative_end;
410 match mode {
411 BoundariesMode::Inclusive => {
412 let end_capture = min(text.len(), end_idx + stop.len());
413 let slice = &text[start_idx..end_capture];
414 ExtractResult::text(slice.to_string())
415 }
416 BoundariesMode::Exclusive => {
417 if end_idx < search_start {
418 ExtractResult::text(String::new())
419 } else {
420 let slice = &text[search_start..end_idx];
421 ExtractResult::text(slice.to_string())
422 }
423 }
424 }
425 } else {
426 ExtractResult::text(String::new())
427 }
428 } else {
429 ExtractResult::text(String::new())
430 }
431}
432
433fn extract_with_positions(
434 text: &str,
435 start: usize,
436 stop: usize,
437 mode: BoundariesMode,
438) -> ExtractResult {
439 if is_missing_string(text) {
440 return ExtractResult::missing();
441 }
442 if text.is_empty() {
443 return ExtractResult::text(String::new());
444 }
445 let chars: Vec<char> = text.chars().collect();
446 let len = chars.len();
447 if len == 0 {
448 return ExtractResult::text(String::new());
449 }
450
451 if start == 0 || stop == 0 {
452 return ExtractResult::text(String::new());
453 }
454
455 if start > len {
456 return ExtractResult::text(String::new());
457 }
458 let stop_clamped = stop.min(len);
459 if stop_clamped == 0 {
460 return ExtractResult::text(String::new());
461 }
462
463 match mode {
464 BoundariesMode::Inclusive => {
465 if start > stop_clamped {
466 return ExtractResult::text(String::new());
467 }
468 let start_idx = start - 1;
469 let end_idx = stop_clamped - 1;
470 if start_idx >= len || end_idx >= len || start_idx > end_idx {
471 ExtractResult::text(String::new())
472 } else {
473 let slice: String = chars[start_idx..=end_idx].iter().collect();
474 ExtractResult::text(slice)
475 }
476 }
477 BoundariesMode::Exclusive => {
478 if start + 1 >= stop_clamped {
479 return ExtractResult::text(String::new());
480 }
481 let start_idx = start;
482 let end_idx = stop_clamped - 2;
483 if start_idx >= len || end_idx >= len || start_idx > end_idx {
484 ExtractResult::text(String::new())
485 } else {
486 let slice: String = chars[start_idx..=end_idx].iter().collect();
487 ExtractResult::text(slice)
488 }
489 }
490 }
491}
492
493#[derive(Clone, Debug)]
494struct CellInfo {
495 shape: Vec<usize>,
496 element_kinds: Vec<CellElementKind>,
497}
498
499#[derive(Clone, Debug)]
500enum CellElementKind {
501 String,
502 Char,
503}
504
505#[derive(Clone, Debug)]
506enum TextKind {
507 StringScalar,
508 StringArray,
509 CharArray { rows: usize },
510 CellArray(CellInfo),
511}
512
513#[derive(Clone, Debug)]
514struct NormalizedText {
515 data: Vec<String>,
516 shape: Vec<usize>,
517 kind: TextKind,
518}
519
520impl NormalizedText {
521 fn from_value(value: Value) -> Result<Self, String> {
522 match value {
523 Value::String(s) => Ok(Self {
524 data: vec![s],
525 shape: vec![1, 1],
526 kind: TextKind::StringScalar,
527 }),
528 Value::StringArray(sa) => Ok(Self {
529 data: sa.data.clone(),
530 shape: sa.shape.clone(),
531 kind: TextKind::StringArray,
532 }),
533 Value::CharArray(ca) => {
534 let rows = ca.rows;
535 let mut data = Vec::with_capacity(rows);
536 for row in 0..rows {
537 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
538 }
539 Ok(Self {
540 data,
541 shape: vec![rows, 1],
542 kind: TextKind::CharArray { rows },
543 })
544 }
545 Value::Cell(cell) => {
546 let shape = cell.shape.clone();
547 let mut data = Vec::with_capacity(cell.data.len());
548 let mut kinds = Vec::with_capacity(cell.data.len());
549 for element in &cell.data {
550 match &**element {
551 Value::String(s) => {
552 data.push(s.clone());
553 kinds.push(CellElementKind::String);
554 }
555 Value::StringArray(sa) if sa.data.len() == 1 => {
556 data.push(sa.data[0].clone());
557 kinds.push(CellElementKind::String);
558 }
559 Value::CharArray(ca) if ca.rows <= 1 => {
560 if ca.rows == 0 {
561 data.push(String::new());
562 } else {
563 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
564 }
565 kinds.push(CellElementKind::Char);
566 }
567 Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
568 _ => return Err(CELL_ELEMENT_ERROR.to_string()),
569 }
570 }
571 Ok(Self {
572 data,
573 shape: shape.clone(),
574 kind: TextKind::CellArray(CellInfo {
575 shape,
576 element_kinds: kinds,
577 }),
578 })
579 }
580 _ => Err(ARG_TYPE_ERROR.to_string()),
581 }
582 }
583
584 fn shape(&self) -> &[usize] {
585 &self.shape
586 }
587
588 fn data(&self, idx: usize) -> &str {
589 &self.data[idx]
590 }
591
592 fn supports_shape(&self, output_shape: &[usize]) -> bool {
593 match &self.kind {
594 TextKind::StringScalar => true,
595 TextKind::StringArray => true,
596 TextKind::CharArray { .. } => output_shape == self.shape,
597 TextKind::CellArray(info) => output_shape == info.shape,
598 }
599 }
600
601 fn into_value(
602 self,
603 results: Vec<ExtractResult>,
604 output_shape: Vec<usize>,
605 ) -> Result<Value, String> {
606 match self.kind {
607 TextKind::StringScalar => {
608 if results.len() <= 1 {
609 let value = results
610 .into_iter()
611 .next()
612 .unwrap_or_else(|| ExtractResult::text(String::new()));
613 Ok(Value::String(value.text))
614 } else {
615 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
616 let array = StringArray::new(data, output_shape)
617 .map_err(|e| format!("{FN_NAME}: {e}"))?;
618 Ok(Value::StringArray(array))
619 }
620 }
621 TextKind::StringArray => {
622 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
623 let array =
624 StringArray::new(data, output_shape).map_err(|e| format!("{FN_NAME}: {e}"))?;
625 Ok(Value::StringArray(array))
626 }
627 TextKind::CharArray { rows } => {
628 if rows == 0 {
629 return CharArray::new(Vec::new(), 0, 0)
630 .map(Value::CharArray)
631 .map_err(|e| format!("{FN_NAME}: {e}"));
632 }
633 if results.len() != rows {
634 return Err(SIZE_MISMATCH_ERROR.to_string());
635 }
636 let mut max_width = 0usize;
637 let mut row_strings = Vec::with_capacity(rows);
638 for result in &results {
639 let width = result.text.chars().count();
640 max_width = max_width.max(width);
641 row_strings.push(result.text.clone());
642 }
643 let mut flattened = Vec::with_capacity(rows * max_width);
644 for row in row_strings {
645 let mut chars: Vec<char> = row.chars().collect();
646 if chars.len() < max_width {
647 chars.resize(max_width, ' ');
648 }
649 flattened.extend(chars);
650 }
651 CharArray::new(flattened, rows, max_width)
652 .map(Value::CharArray)
653 .map_err(|e| format!("{FN_NAME}: {e}"))
654 }
655 TextKind::CellArray(info) => {
656 if results.len() != info.element_kinds.len() {
657 return Err(SIZE_MISMATCH_ERROR.to_string());
658 }
659 let mut values = Vec::with_capacity(results.len());
660 for (idx, result) in results.into_iter().enumerate() {
661 match info.element_kinds[idx] {
662 CellElementKind::String => values.push(Value::String(result.text)),
663 CellElementKind::Char => {
664 let ca = CharArray::new_row(&result.text);
665 values.push(Value::CharArray(ca));
666 }
667 }
668 }
669 make_cell_with_shape(values, info.shape)
670 }
671 }
672 }
673}
674
675#[derive(Clone, Debug, PartialEq, Eq)]
676enum BoundaryKind {
677 Text,
678 Position,
679}
680
681#[derive(Clone, Debug)]
682enum BoundaryArg {
683 Text(BoundaryText),
684 Position(BoundaryPositions),
685}
686
687impl BoundaryArg {
688 fn from_value(value: Value) -> Result<Self, String> {
689 match value {
690 Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
691 BoundaryText::from_value(value).map(BoundaryArg::Text)
692 }
693 Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
694 BoundaryPositions::from_value(value).map(BoundaryArg::Position)
695 }
696 other => Err(format!(
697 "{BOUNDARY_TYPE_ERROR}: unsupported argument {other:?}"
698 )),
699 }
700 }
701
702 fn kind(&self) -> BoundaryKind {
703 match self {
704 BoundaryArg::Text(_) => BoundaryKind::Text,
705 BoundaryArg::Position(_) => BoundaryKind::Position,
706 }
707 }
708
709 fn shape(&self) -> &[usize] {
710 match self {
711 BoundaryArg::Text(text) => &text.shape,
712 BoundaryArg::Position(pos) => &pos.shape,
713 }
714 }
715
716 fn text(&self, idx: usize) -> &str {
717 match self {
718 BoundaryArg::Text(text) => &text.data[idx],
719 BoundaryArg::Position(_) => unreachable!(),
720 }
721 }
722
723 fn position(&self, idx: usize) -> usize {
724 match self {
725 BoundaryArg::Position(pos) => pos.data[idx],
726 BoundaryArg::Text(_) => unreachable!(),
727 }
728 }
729}
730
731#[derive(Clone, Debug)]
732struct BoundaryText {
733 data: Vec<String>,
734 shape: Vec<usize>,
735}
736
737impl BoundaryText {
738 fn from_value(value: Value) -> Result<Self, String> {
739 match value {
740 Value::String(s) => Ok(Self {
741 data: vec![s],
742 shape: vec![1, 1],
743 }),
744 Value::StringArray(sa) => Ok(Self {
745 data: sa.data.clone(),
746 shape: sa.shape.clone(),
747 }),
748 Value::CharArray(ca) => {
749 let mut data = Vec::with_capacity(ca.rows);
750 for row in 0..ca.rows {
751 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
752 }
753 Ok(Self {
754 data,
755 shape: vec![ca.rows, 1],
756 })
757 }
758 Value::Cell(cell) => {
759 let shape = cell.shape.clone();
760 let mut data = Vec::with_capacity(cell.data.len());
761 for element in &cell.data {
762 match &**element {
763 Value::String(s) => data.push(s.clone()),
764 Value::StringArray(sa) if sa.data.len() == 1 => {
765 data.push(sa.data[0].clone());
766 }
767 Value::CharArray(ca) if ca.rows <= 1 => {
768 if ca.rows == 0 {
769 data.push(String::new());
770 } else {
771 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
772 }
773 }
774 Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
775 _ => return Err(CELL_ELEMENT_ERROR.to_string()),
776 }
777 }
778 Ok(Self { data, shape })
779 }
780 _ => Err(BOUNDARY_TYPE_ERROR.to_string()),
781 }
782 }
783}
784
785#[derive(Clone, Debug)]
786struct BoundaryPositions {
787 data: Vec<usize>,
788 shape: Vec<usize>,
789}
790
791impl BoundaryPositions {
792 fn from_value(value: Value) -> Result<Self, String> {
793 match value {
794 Value::Num(n) => Ok(Self {
795 data: vec![parse_position(n)?],
796 shape: vec![1, 1],
797 }),
798 Value::Int(i) => Ok(Self {
799 data: vec![parse_position_int(i)?],
800 shape: vec![1, 1],
801 }),
802 Value::Tensor(t) => {
803 let mut data = Vec::with_capacity(t.data.len());
804 for &entry in &t.data {
805 data.push(parse_position(entry)?);
806 }
807 Ok(Self {
808 data,
809 shape: if t.shape.is_empty() {
810 vec![t.rows, t.cols.max(1)]
811 } else {
812 t.shape
813 },
814 })
815 }
816 _ => Err(BOUNDARY_TYPE_ERROR.to_string()),
817 }
818 }
819}
820
821fn parse_position(value: f64) -> Result<usize, String> {
822 if !value.is_finite() || value < 1.0 {
823 return Err(POSITION_TYPE_ERROR.to_string());
824 }
825 if (value.fract()).abs() > f64::EPSILON {
826 return Err(POSITION_TYPE_ERROR.to_string());
827 }
828 if value > (usize::MAX as f64) {
829 return Err(POSITION_TYPE_ERROR.to_string());
830 }
831 Ok(value as usize)
832}
833
834fn parse_position_int(value: IntValue) -> Result<usize, String> {
835 let val = value.to_i64();
836 if val <= 0 {
837 return Err(POSITION_TYPE_ERROR.to_string());
838 }
839 Ok(val as usize)
840}
841
842#[cfg(test)]
843mod tests {
844 #![allow(non_snake_case)]
845
846 use super::*;
847 #[cfg(feature = "doc_export")]
848 use crate::builtins::common::test_support;
849 use runmat_builtins::{CellArray, Tensor};
850
851 #[test]
852 fn extractBetween_basic_string() {
853 let result = extract_between_builtin(
854 Value::String("RunMat accelerates MATLAB".into()),
855 Value::String("RunMat ".into()),
856 Value::String(" MATLAB".into()),
857 Vec::new(),
858 )
859 .expect("extractBetween");
860 assert_eq!(result, Value::String("accelerates".into()));
861 }
862
863 #[test]
864 fn extractBetween_inclusive_option() {
865 let result = extract_between_builtin(
866 Value::String("a[b]c".into()),
867 Value::String("[".into()),
868 Value::String("]".into()),
869 vec![
870 Value::String("Boundaries".into()),
871 Value::String("inclusive".into()),
872 ],
873 )
874 .expect("extractBetween");
875 assert_eq!(result, Value::String("[b]".into()));
876 }
877
878 #[test]
879 fn extractBetween_numeric_positions() {
880 let result = extract_between_builtin(
881 Value::String("Accelerator".into()),
882 Value::Num(3.0),
883 Value::Num(7.0),
884 Vec::new(),
885 )
886 .expect("extractBetween");
887 assert_eq!(result, Value::String("celer".into()));
888 }
889
890 #[test]
891 fn extractBetween_numeric_positions_exclusive_option() {
892 let result = extract_between_builtin(
893 Value::String("Accelerator".into()),
894 Value::Num(3.0),
895 Value::Num(7.0),
896 vec![
897 Value::String("Boundaries".into()),
898 Value::String("exclusive".into()),
899 ],
900 )
901 .expect("extractBetween");
902 assert_eq!(result, Value::String("ele".into()));
903 }
904
905 #[test]
906 fn extractBetween_numeric_positions_clamps_stop() {
907 let result = extract_between_builtin(
908 Value::String("Accelerator".into()),
909 Value::Num(3.0),
910 Value::Num(100.0),
911 Vec::new(),
912 )
913 .expect("extractBetween");
914 assert_eq!(result, Value::String("celerator".into()));
915 }
916
917 #[test]
918 fn extractBetween_numeric_positions_start_past_length() {
919 let result = extract_between_builtin(
920 Value::String("abc".into()),
921 Value::Num(10.0),
922 Value::Num(12.0),
923 Vec::new(),
924 )
925 .expect("extractBetween");
926 assert_eq!(result, Value::String(String::new()));
927 }
928
929 #[test]
930 fn extractBetween_string_array_broadcast() {
931 let array = StringArray::new(
932 vec!["runmat_accel.rs".into(), "runmat_gc.rs".into()],
933 vec![2, 1],
934 )
935 .unwrap();
936 let result = extract_between_builtin(
937 Value::StringArray(array),
938 Value::String("runmat_".into()),
939 Value::String(".rs".into()),
940 Vec::new(),
941 )
942 .expect("extractBetween");
943 match result {
944 Value::StringArray(sa) => {
945 assert_eq!(sa.data, vec!["accel".to_string(), "gc".to_string()]);
946 assert_eq!(sa.shape, vec![2, 1]);
947 }
948 other => panic!("expected string array, got {other:?}"),
949 }
950 }
951
952 #[test]
953 fn extractBetween_char_array_rows() {
954 let chars = CharArray::new(
955 "GPUAccelerateIgnition".chars().collect(),
956 1,
957 "GPUAccelerateIgnition".len(),
958 )
959 .unwrap();
960 let result = extract_between_builtin(
961 Value::CharArray(chars),
962 Value::String("GPU".into()),
963 Value::String("tion".into()),
964 Vec::new(),
965 )
966 .expect("extractBetween");
967 match result {
968 Value::CharArray(out) => {
969 assert_eq!(out.rows, 1);
970 let text: String = out.data.iter().collect();
971 assert_eq!(text.trim_end(), "AccelerateIgni");
972 }
973 other => panic!("expected char array, got {other:?}"),
974 }
975 }
976
977 #[test]
978 fn extractBetween_cell_array_preserves_types() {
979 let cell = CellArray::new(
980 vec![
981 Value::CharArray(CharArray::new_row("A[B]C")),
982 Value::String("Planner<GPU>".into()),
983 ],
984 1,
985 2,
986 )
987 .unwrap();
988 let result = extract_between_builtin(
989 Value::Cell(cell),
990 Value::String("[".into()),
991 Value::String("]".into()),
992 Vec::new(),
993 )
994 .expect("extractBetween");
995 match result {
996 Value::Cell(out) => {
997 let first = out.get(0, 0).unwrap();
998 let second = out.get(0, 1).unwrap();
999 assert_eq!(first, Value::CharArray(CharArray::new_row("B")));
1000 assert_eq!(second, Value::String(String::new()));
1001 }
1002 other => panic!("expected cell array, got {other:?}"),
1003 }
1004 }
1005
1006 #[test]
1007 fn extractBetween_missing_string_propagates() {
1008 let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
1009 let result = extract_between_builtin(
1010 Value::StringArray(strings),
1011 Value::String("[".into()),
1012 Value::String("]".into()),
1013 Vec::new(),
1014 )
1015 .expect("extractBetween");
1016 assert_eq!(
1017 result,
1018 Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
1019 );
1020 }
1021
1022 #[test]
1023 fn extractBetween_position_type_error() {
1024 let err = extract_between_builtin(
1025 Value::String("abc".into()),
1026 Value::Num(0.5),
1027 Value::Num(2.0),
1028 Vec::new(),
1029 )
1030 .unwrap_err();
1031 assert_eq!(err, POSITION_TYPE_ERROR);
1032 }
1033
1034 #[test]
1035 fn extractBetween_mixed_boundary_error() {
1036 let err = extract_between_builtin(
1037 Value::String("abc".into()),
1038 Value::String("a".into()),
1039 Value::Num(3.0),
1040 Vec::new(),
1041 )
1042 .unwrap_err();
1043 assert_eq!(err, BOUNDARY_TYPE_ERROR);
1044 }
1045
1046 #[test]
1047 fn extractBetween_numeric_tensor_broadcast() {
1048 let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
1049 let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1050 let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
1051 let result = extract_between_builtin(
1052 Value::StringArray(text),
1053 Value::Tensor(start),
1054 Value::Tensor(stop),
1055 Vec::new(),
1056 )
1057 .expect("extractBetween");
1058 match result {
1059 Value::StringArray(sa) => {
1060 assert_eq!(sa.data, vec!["abc".to_string(), "xyz".to_string()]);
1061 assert_eq!(sa.shape, vec![2, 1]);
1062 }
1063 other => panic!("expected string array, got {other:?}"),
1064 }
1065 }
1066
1067 #[test]
1068 fn extractBetween_option_invalid_value() {
1069 let err = extract_between_builtin(
1070 Value::String("abc".into()),
1071 Value::String("a".into()),
1072 Value::String("c".into()),
1073 vec![
1074 Value::String("Boundaries".into()),
1075 Value::String("middle".into()),
1076 ],
1077 )
1078 .unwrap_err();
1079 assert_eq!(err, OPTION_VALUE_ERROR);
1080 }
1081
1082 #[test]
1083 fn extractBetween_option_name_error() {
1084 let err = extract_between_builtin(
1085 Value::String("abc".into()),
1086 Value::String("a".into()),
1087 Value::String("c".into()),
1088 vec![
1089 Value::String("Padding".into()),
1090 Value::String("inclusive".into()),
1091 ],
1092 )
1093 .unwrap_err();
1094 assert_eq!(err, OPTION_NAME_ERROR);
1095 }
1096
1097 #[test]
1098 fn extractBetween_option_pair_error() {
1099 let err = extract_between_builtin(
1100 Value::String("abc".into()),
1101 Value::String("a".into()),
1102 Value::String("b".into()),
1103 vec![Value::String("Boundaries".into())],
1104 )
1105 .unwrap_err();
1106 assert_eq!(err, OPTION_PAIR_ERROR);
1107 }
1108
1109 #[test]
1110 fn extractBetween_missing_boundary_propagates() {
1111 let result = extract_between_builtin(
1112 Value::String("Planner<GPU>".into()),
1113 Value::String("<missing>".into()),
1114 Value::String(">".into()),
1115 Vec::new(),
1116 )
1117 .expect("extractBetween");
1118 assert_eq!(result, Value::String("<missing>".into()));
1119 }
1120
1121 #[test]
1122 fn extractBetween_cell_boundary_arguments() {
1123 let text = CellArray::new(vec![Value::String("A<GPU>".into())], 1, 1).unwrap();
1124 let start = CellArray::new(vec![Value::CharArray(CharArray::new_row("<"))], 1, 1).unwrap();
1125 let stop = CellArray::new(vec![Value::CharArray(CharArray::new_row(">"))], 1, 1).unwrap();
1126 let result = extract_between_builtin(
1127 Value::Cell(text),
1128 Value::Cell(start),
1129 Value::Cell(stop),
1130 Vec::new(),
1131 )
1132 .expect("extractBetween");
1133 match result {
1134 Value::Cell(out) => {
1135 let value = out.get(0, 0).unwrap();
1136 assert_eq!(value, Value::String("GPU".into()));
1137 }
1138 other => panic!("expected cell array, got {other:?}"),
1139 }
1140 }
1141
1142 #[test]
1143 #[cfg(feature = "doc_export")]
1144 fn extractBetween_doc_examples_present() {
1145 let blocks = test_support::doc_examples(DOC_MD);
1146 assert!(!blocks.is_empty());
1147 }
1148}