1use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::common::map_control_flow_with_builtin;
7use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
8use crate::builtins::strings::type_resolvers::text_preserve_type;
9use crate::{
10 build_runtime_error, gather_if_needed_async, make_cell_with_shape, BuiltinResult, RuntimeError,
11};
12use runmat_builtins::{CharArray, IntValue, StringArray, Value};
13use runmat_macros::runtime_builtin;
14
15use crate::builtins::common::spec::{
16 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
17 ReductionNaN, ResidencyPolicy, ShapeRequirements,
18};
19
20#[runmat_macros::register_gpu_spec(
21 builtin_path = "crate::builtins::strings::transform::extractbetween"
22)]
23pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
24 name: "extractBetween",
25 op_kind: GpuOpKind::Custom("string-transform"),
26 supported_precisions: &[],
27 broadcast: BroadcastSemantics::Matlab,
28 provider_hooks: &[],
29 constant_strategy: ConstantStrategy::InlineLiteral,
30 residency: ResidencyPolicy::GatherImmediately,
31 nan_mode: ReductionNaN::Include,
32 two_pass_threshold: None,
33 workgroup_size: None,
34 accepts_nan_mode: false,
35 notes: "Runs on the CPU; GPU-resident inputs are gathered before extraction and outputs are returned on the host.",
36};
37
38#[runmat_macros::register_fusion_spec(
39 builtin_path = "crate::builtins::strings::transform::extractbetween"
40)]
41pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
42 name: "extractBetween",
43 shape: ShapeRequirements::Any,
44 constant_strategy: ConstantStrategy::InlineLiteral,
45 elementwise: None,
46 reduction: None,
47 emits_nan: false,
48 notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
49};
50
51const FN_NAME: &str = "extractBetween";
52const ARG_TYPE_ERROR: &str = "extractBetween: first argument must be a string array, character array, or cell array of character vectors";
53const BOUNDARY_TYPE_ERROR: &str =
54 "extractBetween: start and end arguments must both be text or both be numeric positions";
55const POSITION_TYPE_ERROR: &str = "extractBetween: position arguments must be positive integers";
56const OPTION_PAIR_ERROR: &str = "extractBetween: name-value arguments must appear in pairs";
57const OPTION_NAME_ERROR: &str = "extractBetween: unrecognized parameter name";
58const OPTION_VALUE_ERROR: &str =
59 "extractBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'";
60const CELL_ELEMENT_ERROR: &str =
61 "extractBetween: cell array elements must be string scalars or character vectors";
62const SIZE_MISMATCH_ERROR: &str =
63 "extractBetween: boundary sizes must be compatible with the text input";
64
65fn runtime_error_for(message: impl Into<String>) -> RuntimeError {
66 build_runtime_error(message).with_builtin(FN_NAME).build()
67}
68
69fn map_flow(err: RuntimeError) -> RuntimeError {
70 map_control_flow_with_builtin(err, FN_NAME)
71}
72
73#[derive(Clone, Copy, Debug, PartialEq, Eq)]
74enum BoundariesMode {
75 Exclusive,
76 Inclusive,
77}
78
79#[runtime_builtin(
80 name = "extractBetween",
81 category = "strings/transform",
82 summary = "Extract substrings between boundary markers using MATLAB-compatible semantics.",
83 keywords = "extractBetween,substring,boundaries,strings",
84 accel = "sink",
85 type_resolver(text_preserve_type),
86 builtin_path = "crate::builtins::strings::transform::extractbetween"
87)]
88async fn extract_between_builtin(
89 text: Value,
90 start: Value,
91 stop: Value,
92 rest: Vec<Value>,
93) -> BuiltinResult<Value> {
94 let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
95 let start = gather_if_needed_async(&start).await.map_err(map_flow)?;
96 let stop = gather_if_needed_async(&stop).await.map_err(map_flow)?;
97
98 let mode_override = parse_boundaries_option(&rest).await?;
99
100 let normalized_text = NormalizedText::from_value(text)?;
101 let start_boundary = BoundaryArg::from_value(start)?;
102 let stop_boundary = BoundaryArg::from_value(stop)?;
103
104 if start_boundary.kind() != stop_boundary.kind() {
105 return Err(runtime_error_for(BOUNDARY_TYPE_ERROR));
106 }
107 let boundary_kind = start_boundary.kind();
108 let effective_mode = mode_override.unwrap_or(match boundary_kind {
109 BoundaryKind::Text => BoundariesMode::Exclusive,
110 BoundaryKind::Position => BoundariesMode::Inclusive,
111 });
112
113 let start_shape = start_boundary.shape();
114 let stop_shape = stop_boundary.shape();
115 let text_shape = normalized_text.shape();
116
117 let shape_ts = broadcast_shapes(FN_NAME, text_shape, start_shape).map_err(runtime_error_for)?;
118 let output_shape =
119 broadcast_shapes(FN_NAME, &shape_ts, stop_shape).map_err(runtime_error_for)?;
120 if !normalized_text.supports_shape(&output_shape) {
121 return Err(runtime_error_for(SIZE_MISMATCH_ERROR));
122 }
123
124 let total: usize = output_shape.iter().copied().product();
125 if total == 0 {
126 return normalized_text.into_value(Vec::new(), output_shape);
127 }
128
129 let text_strides = compute_strides(text_shape);
130 let start_strides = compute_strides(start_shape);
131 let stop_strides = compute_strides(stop_shape);
132
133 let mut results = Vec::with_capacity(total);
134
135 for idx in 0..total {
136 let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
137 let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
138 let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
139
140 let result = match boundary_kind {
141 BoundaryKind::Text => {
142 let text_value = normalized_text.data(text_idx);
143 let start_value = start_boundary.text(start_idx);
144 let stop_value = stop_boundary.text(stop_idx);
145 extract_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
146 }
147 BoundaryKind::Position => {
148 let text_value = normalized_text.data(text_idx);
149 let start_value = start_boundary.position(start_idx);
150 let stop_value = stop_boundary.position(stop_idx);
151 extract_with_positions(text_value, start_value, stop_value, effective_mode)
152 }
153 };
154 results.push(result);
155 }
156
157 normalized_text.into_value(results, output_shape)
158}
159
160async fn parse_boundaries_option(args: &[Value]) -> BuiltinResult<Option<BoundariesMode>> {
161 if args.is_empty() {
162 return Ok(None);
163 }
164 if !args.len().is_multiple_of(2) {
165 return Err(runtime_error_for(OPTION_PAIR_ERROR));
166 }
167
168 let mut mode: Option<BoundariesMode> = None;
169 let mut idx = 0;
170 while idx < args.len() {
171 let name_value = gather_if_needed_async(&args[idx]).await.map_err(map_flow)?;
172 let name =
173 value_to_string(&name_value).ok_or_else(|| runtime_error_for(OPTION_NAME_ERROR))?;
174 if !name.eq_ignore_ascii_case("boundaries") {
175 return Err(runtime_error_for(OPTION_NAME_ERROR));
176 }
177 let value = gather_if_needed_async(&args[idx + 1])
178 .await
179 .map_err(map_flow)?;
180 let value_str =
181 value_to_string(&value).ok_or_else(|| runtime_error_for(OPTION_VALUE_ERROR))?;
182 let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
183 BoundariesMode::Inclusive
184 } else if value_str.eq_ignore_ascii_case("exclusive") {
185 BoundariesMode::Exclusive
186 } else {
187 return Err(runtime_error_for(OPTION_VALUE_ERROR));
188 };
189 mode = Some(parsed_mode);
190 idx += 2;
191 }
192 Ok(mode)
193}
194
195fn value_to_string(value: &Value) -> Option<String> {
196 match value {
197 Value::String(s) => Some(s.clone()),
198 Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
199 Value::CharArray(ca) if ca.rows <= 1 => {
200 if ca.rows == 0 {
201 Some(String::new())
202 } else {
203 Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
204 }
205 }
206 Value::CharArray(_) => None,
207 Value::Cell(cell) if cell.data.len() == 1 => {
208 let element = &cell.data[0];
209 value_to_string(element)
210 }
211 _ => None,
212 }
213}
214
215#[derive(Clone)]
216struct ExtractResult {
217 text: String,
218}
219
220impl ExtractResult {
221 fn missing() -> Self {
222 Self {
223 text: "<missing>".to_string(),
224 }
225 }
226
227 fn text(text: String) -> Self {
228 Self { text }
229 }
230}
231
232fn extract_with_text_boundaries(
233 text: &str,
234 start: &str,
235 stop: &str,
236 mode: BoundariesMode,
237) -> ExtractResult {
238 if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
239 return ExtractResult::missing();
240 }
241
242 if let Some(start_idx) = text.find(start) {
243 let search_start = start_idx + start.len();
244 if search_start > text.len() {
245 return ExtractResult::text(String::new());
246 }
247 if let Some(relative_end) = text[search_start..].find(stop) {
248 let end_idx = search_start + relative_end;
249 match mode {
250 BoundariesMode::Inclusive => {
251 let end_capture = min(text.len(), end_idx + stop.len());
252 let slice = &text[start_idx..end_capture];
253 ExtractResult::text(slice.to_string())
254 }
255 BoundariesMode::Exclusive => {
256 if end_idx < search_start {
257 ExtractResult::text(String::new())
258 } else {
259 let slice = &text[search_start..end_idx];
260 ExtractResult::text(slice.to_string())
261 }
262 }
263 }
264 } else {
265 ExtractResult::text(String::new())
266 }
267 } else {
268 ExtractResult::text(String::new())
269 }
270}
271
272fn extract_with_positions(
273 text: &str,
274 start: usize,
275 stop: usize,
276 mode: BoundariesMode,
277) -> ExtractResult {
278 if is_missing_string(text) {
279 return ExtractResult::missing();
280 }
281 if text.is_empty() {
282 return ExtractResult::text(String::new());
283 }
284 let chars: Vec<char> = text.chars().collect();
285 let len = chars.len();
286 if len == 0 {
287 return ExtractResult::text(String::new());
288 }
289
290 if start == 0 || stop == 0 {
291 return ExtractResult::text(String::new());
292 }
293
294 if start > len {
295 return ExtractResult::text(String::new());
296 }
297 let stop_clamped = stop.min(len);
298 if stop_clamped == 0 {
299 return ExtractResult::text(String::new());
300 }
301
302 match mode {
303 BoundariesMode::Inclusive => {
304 if start > stop_clamped {
305 return ExtractResult::text(String::new());
306 }
307 let start_idx = start - 1;
308 let end_idx = stop_clamped - 1;
309 if start_idx >= len || end_idx >= len || start_idx > end_idx {
310 ExtractResult::text(String::new())
311 } else {
312 let slice: String = chars[start_idx..=end_idx].iter().collect();
313 ExtractResult::text(slice)
314 }
315 }
316 BoundariesMode::Exclusive => {
317 if start + 1 >= stop_clamped {
318 return ExtractResult::text(String::new());
319 }
320 let start_idx = start;
321 let end_idx = stop_clamped - 2;
322 if start_idx >= len || end_idx >= len || start_idx > end_idx {
323 ExtractResult::text(String::new())
324 } else {
325 let slice: String = chars[start_idx..=end_idx].iter().collect();
326 ExtractResult::text(slice)
327 }
328 }
329 }
330}
331
332#[derive(Clone, Debug)]
333struct CellInfo {
334 shape: Vec<usize>,
335 element_kinds: Vec<CellElementKind>,
336}
337
338#[derive(Clone, Debug)]
339enum CellElementKind {
340 String,
341 Char,
342}
343
344#[derive(Clone, Debug)]
345enum TextKind {
346 StringScalar,
347 StringArray,
348 CharArray { rows: usize },
349 CellArray(CellInfo),
350}
351
352#[derive(Clone, Debug)]
353struct NormalizedText {
354 data: Vec<String>,
355 shape: Vec<usize>,
356 kind: TextKind,
357}
358
359impl NormalizedText {
360 fn from_value(value: Value) -> BuiltinResult<Self> {
361 match value {
362 Value::String(s) => Ok(Self {
363 data: vec![s],
364 shape: vec![1, 1],
365 kind: TextKind::StringScalar,
366 }),
367 Value::StringArray(sa) => Ok(Self {
368 data: sa.data.clone(),
369 shape: sa.shape.clone(),
370 kind: TextKind::StringArray,
371 }),
372 Value::CharArray(ca) => {
373 let rows = ca.rows;
374 let mut data = Vec::with_capacity(rows);
375 for row in 0..rows {
376 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
377 }
378 Ok(Self {
379 data,
380 shape: vec![rows, 1],
381 kind: TextKind::CharArray { rows },
382 })
383 }
384 Value::Cell(cell) => {
385 let shape = cell.shape.clone();
386 let mut data = Vec::with_capacity(cell.data.len());
387 let mut kinds = Vec::with_capacity(cell.data.len());
388 for element in &cell.data {
389 match &**element {
390 Value::String(s) => {
391 data.push(s.clone());
392 kinds.push(CellElementKind::String);
393 }
394 Value::StringArray(sa) if sa.data.len() == 1 => {
395 data.push(sa.data[0].clone());
396 kinds.push(CellElementKind::String);
397 }
398 Value::CharArray(ca) if ca.rows <= 1 => {
399 if ca.rows == 0 {
400 data.push(String::new());
401 } else {
402 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
403 }
404 kinds.push(CellElementKind::Char);
405 }
406 Value::CharArray(_) => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
407 _ => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
408 }
409 }
410 Ok(Self {
411 data,
412 shape: shape.clone(),
413 kind: TextKind::CellArray(CellInfo {
414 shape,
415 element_kinds: kinds,
416 }),
417 })
418 }
419 _ => Err(runtime_error_for(ARG_TYPE_ERROR)),
420 }
421 }
422
423 fn shape(&self) -> &[usize] {
424 &self.shape
425 }
426
427 fn data(&self, idx: usize) -> &str {
428 &self.data[idx]
429 }
430
431 fn supports_shape(&self, output_shape: &[usize]) -> bool {
432 match &self.kind {
433 TextKind::StringScalar => true,
434 TextKind::StringArray => true,
435 TextKind::CharArray { .. } => output_shape == self.shape,
436 TextKind::CellArray(info) => output_shape == info.shape,
437 }
438 }
439
440 fn into_value(
441 self,
442 results: Vec<ExtractResult>,
443 output_shape: Vec<usize>,
444 ) -> BuiltinResult<Value> {
445 match self.kind {
446 TextKind::StringScalar => {
447 if results.len() <= 1 {
448 let value = results
449 .into_iter()
450 .next()
451 .unwrap_or_else(|| ExtractResult::text(String::new()));
452 Ok(Value::String(value.text))
453 } else {
454 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
455 let array = StringArray::new(data, output_shape)
456 .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))?;
457 Ok(Value::StringArray(array))
458 }
459 }
460 TextKind::StringArray => {
461 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
462 let array = StringArray::new(data, output_shape)
463 .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))?;
464 Ok(Value::StringArray(array))
465 }
466 TextKind::CharArray { rows } => {
467 if rows == 0 {
468 return CharArray::new(Vec::new(), 0, 0)
469 .map(Value::CharArray)
470 .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")));
471 }
472 if results.len() != rows {
473 return Err(runtime_error_for(SIZE_MISMATCH_ERROR));
474 }
475 let mut max_width = 0usize;
476 let mut row_strings = Vec::with_capacity(rows);
477 for result in &results {
478 let width = result.text.chars().count();
479 max_width = max_width.max(width);
480 row_strings.push(result.text.clone());
481 }
482 let mut flattened = Vec::with_capacity(rows * max_width);
483 for row in row_strings {
484 let mut chars: Vec<char> = row.chars().collect();
485 if chars.len() < max_width {
486 chars.resize(max_width, ' ');
487 }
488 flattened.extend(chars);
489 }
490 CharArray::new(flattened, rows, max_width)
491 .map(Value::CharArray)
492 .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))
493 }
494 TextKind::CellArray(info) => {
495 if results.len() != info.element_kinds.len() {
496 return Err(runtime_error_for(SIZE_MISMATCH_ERROR));
497 }
498 let mut values = Vec::with_capacity(results.len());
499 for (idx, result) in results.into_iter().enumerate() {
500 match info.element_kinds[idx] {
501 CellElementKind::String => values.push(Value::String(result.text)),
502 CellElementKind::Char => {
503 let ca = CharArray::new_row(&result.text);
504 values.push(Value::CharArray(ca));
505 }
506 }
507 }
508 make_cell_with_shape(values, info.shape)
509 .map_err(|e| runtime_error_for(format!("{FN_NAME}: {e}")))
510 }
511 }
512 }
513}
514
515#[derive(Clone, Debug, PartialEq, Eq)]
516enum BoundaryKind {
517 Text,
518 Position,
519}
520
521#[derive(Clone, Debug)]
522enum BoundaryArg {
523 Text(BoundaryText),
524 Position(BoundaryPositions),
525}
526
527impl BoundaryArg {
528 fn from_value(value: Value) -> BuiltinResult<Self> {
529 match value {
530 Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
531 BoundaryText::from_value(value).map(BoundaryArg::Text)
532 }
533 Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
534 BoundaryPositions::from_value(value).map(BoundaryArg::Position)
535 }
536 other => Err(runtime_error_for(format!(
537 "{BOUNDARY_TYPE_ERROR}: unsupported argument {other:?}"
538 ))),
539 }
540 }
541
542 fn kind(&self) -> BoundaryKind {
543 match self {
544 BoundaryArg::Text(_) => BoundaryKind::Text,
545 BoundaryArg::Position(_) => BoundaryKind::Position,
546 }
547 }
548
549 fn shape(&self) -> &[usize] {
550 match self {
551 BoundaryArg::Text(text) => &text.shape,
552 BoundaryArg::Position(pos) => &pos.shape,
553 }
554 }
555
556 fn text(&self, idx: usize) -> &str {
557 match self {
558 BoundaryArg::Text(text) => &text.data[idx],
559 BoundaryArg::Position(_) => unreachable!(),
560 }
561 }
562
563 fn position(&self, idx: usize) -> usize {
564 match self {
565 BoundaryArg::Position(pos) => pos.data[idx],
566 BoundaryArg::Text(_) => unreachable!(),
567 }
568 }
569}
570
571#[derive(Clone, Debug)]
572struct BoundaryText {
573 data: Vec<String>,
574 shape: Vec<usize>,
575}
576
577impl BoundaryText {
578 fn from_value(value: Value) -> BuiltinResult<Self> {
579 match value {
580 Value::String(s) => Ok(Self {
581 data: vec![s],
582 shape: vec![1, 1],
583 }),
584 Value::StringArray(sa) => Ok(Self {
585 data: sa.data.clone(),
586 shape: sa.shape.clone(),
587 }),
588 Value::CharArray(ca) => {
589 let mut data = Vec::with_capacity(ca.rows);
590 for row in 0..ca.rows {
591 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
592 }
593 Ok(Self {
594 data,
595 shape: vec![ca.rows, 1],
596 })
597 }
598 Value::Cell(cell) => {
599 let shape = cell.shape.clone();
600 let mut data = Vec::with_capacity(cell.data.len());
601 for element in &cell.data {
602 match &**element {
603 Value::String(s) => data.push(s.clone()),
604 Value::StringArray(sa) if sa.data.len() == 1 => {
605 data.push(sa.data[0].clone());
606 }
607 Value::CharArray(ca) if ca.rows <= 1 => {
608 if ca.rows == 0 {
609 data.push(String::new());
610 } else {
611 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
612 }
613 }
614 Value::CharArray(_) => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
615 _ => return Err(runtime_error_for(CELL_ELEMENT_ERROR)),
616 }
617 }
618 Ok(Self { data, shape })
619 }
620 _ => Err(runtime_error_for(BOUNDARY_TYPE_ERROR)),
621 }
622 }
623}
624
625#[derive(Clone, Debug)]
626struct BoundaryPositions {
627 data: Vec<usize>,
628 shape: Vec<usize>,
629}
630
631impl BoundaryPositions {
632 fn from_value(value: Value) -> BuiltinResult<Self> {
633 match value {
634 Value::Num(n) => Ok(Self {
635 data: vec![parse_position(n)?],
636 shape: vec![1, 1],
637 }),
638 Value::Int(i) => Ok(Self {
639 data: vec![parse_position_int(i)?],
640 shape: vec![1, 1],
641 }),
642 Value::Tensor(t) => {
643 let mut data = Vec::with_capacity(t.data.len());
644 for &entry in &t.data {
645 data.push(parse_position(entry)?);
646 }
647 Ok(Self {
648 data,
649 shape: if t.shape.is_empty() {
650 vec![t.rows, t.cols.max(1)]
651 } else {
652 t.shape
653 },
654 })
655 }
656 _ => Err(runtime_error_for(BOUNDARY_TYPE_ERROR)),
657 }
658 }
659}
660
661fn parse_position(value: f64) -> BuiltinResult<usize> {
662 if !value.is_finite() || value < 1.0 {
663 return Err(runtime_error_for(POSITION_TYPE_ERROR));
664 }
665 if (value.fract()).abs() > f64::EPSILON {
666 return Err(runtime_error_for(POSITION_TYPE_ERROR));
667 }
668 if value > (usize::MAX as f64) {
669 return Err(runtime_error_for(POSITION_TYPE_ERROR));
670 }
671 Ok(value as usize)
672}
673
674fn parse_position_int(value: IntValue) -> BuiltinResult<usize> {
675 let val = value.to_i64();
676 if val <= 0 {
677 return Err(runtime_error_for(POSITION_TYPE_ERROR));
678 }
679 Ok(val as usize)
680}
681
682#[cfg(test)]
683pub(crate) mod tests {
684 #![allow(non_snake_case)]
685
686 use super::*;
687 use runmat_builtins::{CellArray, ResolveContext, Tensor, Type};
688
689 fn extract_between_builtin(
690 text: Value,
691 start: Value,
692 stop: Value,
693 rest: Vec<Value>,
694 ) -> BuiltinResult<Value> {
695 futures::executor::block_on(super::extract_between_builtin(text, start, stop, rest))
696 }
697
698 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
699 #[test]
700 fn extractBetween_basic_string() {
701 let result = extract_between_builtin(
702 Value::String("RunMat accelerates MATLAB".into()),
703 Value::String("RunMat ".into()),
704 Value::String(" MATLAB".into()),
705 Vec::new(),
706 )
707 .expect("extractBetween");
708 assert_eq!(result, Value::String("accelerates".into()));
709 }
710
711 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
712 #[test]
713 fn extractBetween_inclusive_option() {
714 let result = extract_between_builtin(
715 Value::String("a[b]c".into()),
716 Value::String("[".into()),
717 Value::String("]".into()),
718 vec![
719 Value::String("Boundaries".into()),
720 Value::String("inclusive".into()),
721 ],
722 )
723 .expect("extractBetween");
724 assert_eq!(result, Value::String("[b]".into()));
725 }
726
727 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
728 #[test]
729 fn extractBetween_numeric_positions() {
730 let result = extract_between_builtin(
731 Value::String("Accelerator".into()),
732 Value::Num(3.0),
733 Value::Num(7.0),
734 Vec::new(),
735 )
736 .expect("extractBetween");
737 assert_eq!(result, Value::String("celer".into()));
738 }
739
740 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
741 #[test]
742 fn extractBetween_numeric_positions_exclusive_option() {
743 let result = extract_between_builtin(
744 Value::String("Accelerator".into()),
745 Value::Num(3.0),
746 Value::Num(7.0),
747 vec![
748 Value::String("Boundaries".into()),
749 Value::String("exclusive".into()),
750 ],
751 )
752 .expect("extractBetween");
753 assert_eq!(result, Value::String("ele".into()));
754 }
755
756 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
757 #[test]
758 fn extractBetween_numeric_positions_clamps_stop() {
759 let result = extract_between_builtin(
760 Value::String("Accelerator".into()),
761 Value::Num(3.0),
762 Value::Num(100.0),
763 Vec::new(),
764 )
765 .expect("extractBetween");
766 assert_eq!(result, Value::String("celerator".into()));
767 }
768
769 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
770 #[test]
771 fn extractBetween_numeric_positions_start_past_length() {
772 let result = extract_between_builtin(
773 Value::String("abc".into()),
774 Value::Num(10.0),
775 Value::Num(12.0),
776 Vec::new(),
777 )
778 .expect("extractBetween");
779 assert_eq!(result, Value::String(String::new()));
780 }
781
782 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
783 #[test]
784 fn extractBetween_string_array_broadcast() {
785 let array = StringArray::new(
786 vec!["runmat_accel.rs".into(), "runmat_gc.rs".into()],
787 vec![2, 1],
788 )
789 .unwrap();
790 let result = extract_between_builtin(
791 Value::StringArray(array),
792 Value::String("runmat_".into()),
793 Value::String(".rs".into()),
794 Vec::new(),
795 )
796 .expect("extractBetween");
797 match result {
798 Value::StringArray(sa) => {
799 assert_eq!(sa.data, vec!["accel".to_string(), "gc".to_string()]);
800 assert_eq!(sa.shape, vec![2, 1]);
801 }
802 other => panic!("expected string array, got {other:?}"),
803 }
804 }
805
806 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
807 #[test]
808 fn extractBetween_char_array_rows() {
809 let chars = CharArray::new(
810 "GPUAccelerateIgnition".chars().collect(),
811 1,
812 "GPUAccelerateIgnition".len(),
813 )
814 .unwrap();
815 let result = extract_between_builtin(
816 Value::CharArray(chars),
817 Value::String("GPU".into()),
818 Value::String("tion".into()),
819 Vec::new(),
820 )
821 .expect("extractBetween");
822 match result {
823 Value::CharArray(out) => {
824 assert_eq!(out.rows, 1);
825 let text: String = out.data.iter().collect();
826 assert_eq!(text.trim_end(), "AccelerateIgni");
827 }
828 other => panic!("expected char array, got {other:?}"),
829 }
830 }
831
832 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
833 #[test]
834 fn extractBetween_cell_array_preserves_types() {
835 let cell = CellArray::new(
836 vec![
837 Value::CharArray(CharArray::new_row("A[B]C")),
838 Value::String("Planner<GPU>".into()),
839 ],
840 1,
841 2,
842 )
843 .unwrap();
844 let result = extract_between_builtin(
845 Value::Cell(cell),
846 Value::String("[".into()),
847 Value::String("]".into()),
848 Vec::new(),
849 )
850 .expect("extractBetween");
851 match result {
852 Value::Cell(out) => {
853 let first = out.get(0, 0).unwrap();
854 let second = out.get(0, 1).unwrap();
855 assert_eq!(first, Value::CharArray(CharArray::new_row("B")));
856 assert_eq!(second, Value::String(String::new()));
857 }
858 other => panic!("expected cell array, got {other:?}"),
859 }
860 }
861
862 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
863 #[test]
864 fn extractBetween_missing_string_propagates() {
865 let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
866 let result = extract_between_builtin(
867 Value::StringArray(strings),
868 Value::String("[".into()),
869 Value::String("]".into()),
870 Vec::new(),
871 )
872 .expect("extractBetween");
873 assert_eq!(
874 result,
875 Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
876 );
877 }
878
879 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
880 #[test]
881 fn extractBetween_position_type_error() {
882 let err = extract_between_builtin(
883 Value::String("abc".into()),
884 Value::Num(0.5),
885 Value::Num(2.0),
886 Vec::new(),
887 )
888 .unwrap_err();
889 assert_eq!(err.to_string(), POSITION_TYPE_ERROR);
890 }
891
892 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
893 #[test]
894 fn extractBetween_mixed_boundary_error() {
895 let err = extract_between_builtin(
896 Value::String("abc".into()),
897 Value::String("a".into()),
898 Value::Num(3.0),
899 Vec::new(),
900 )
901 .unwrap_err();
902 assert_eq!(err.to_string(), BOUNDARY_TYPE_ERROR);
903 }
904
905 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
906 #[test]
907 fn extractBetween_numeric_tensor_broadcast() {
908 let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
909 let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
910 let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
911 let result = extract_between_builtin(
912 Value::StringArray(text),
913 Value::Tensor(start),
914 Value::Tensor(stop),
915 Vec::new(),
916 )
917 .expect("extractBetween");
918 match result {
919 Value::StringArray(sa) => {
920 assert_eq!(sa.data, vec!["abc".to_string(), "xyz".to_string()]);
921 assert_eq!(sa.shape, vec![2, 1]);
922 }
923 other => panic!("expected string array, got {other:?}"),
924 }
925 }
926
927 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
928 #[test]
929 fn extractBetween_option_invalid_value() {
930 let err = extract_between_builtin(
931 Value::String("abc".into()),
932 Value::String("a".into()),
933 Value::String("c".into()),
934 vec![
935 Value::String("Boundaries".into()),
936 Value::String("middle".into()),
937 ],
938 )
939 .unwrap_err();
940 assert_eq!(err.to_string(), OPTION_VALUE_ERROR);
941 }
942
943 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
944 #[test]
945 fn extractBetween_option_name_error() {
946 let err = extract_between_builtin(
947 Value::String("abc".into()),
948 Value::String("a".into()),
949 Value::String("c".into()),
950 vec![
951 Value::String("Padding".into()),
952 Value::String("inclusive".into()),
953 ],
954 )
955 .unwrap_err();
956 assert_eq!(err.to_string(), OPTION_NAME_ERROR);
957 }
958
959 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
960 #[test]
961 fn extractBetween_option_pair_error() {
962 let err = extract_between_builtin(
963 Value::String("abc".into()),
964 Value::String("a".into()),
965 Value::String("b".into()),
966 vec![Value::String("Boundaries".into())],
967 )
968 .unwrap_err();
969 assert_eq!(err.to_string(), OPTION_PAIR_ERROR);
970 }
971
972 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
973 #[test]
974 fn extractBetween_missing_boundary_propagates() {
975 let result = extract_between_builtin(
976 Value::String("Planner<GPU>".into()),
977 Value::String("<missing>".into()),
978 Value::String(">".into()),
979 Vec::new(),
980 )
981 .expect("extractBetween");
982 assert_eq!(result, Value::String("<missing>".into()));
983 }
984
985 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
986 #[test]
987 fn extractBetween_cell_boundary_arguments() {
988 let text = CellArray::new(vec![Value::String("A<GPU>".into())], 1, 1).unwrap();
989 let start = CellArray::new(vec![Value::CharArray(CharArray::new_row("<"))], 1, 1).unwrap();
990 let stop = CellArray::new(vec![Value::CharArray(CharArray::new_row(">"))], 1, 1).unwrap();
991 let result = extract_between_builtin(
992 Value::Cell(text),
993 Value::Cell(start),
994 Value::Cell(stop),
995 Vec::new(),
996 )
997 .expect("extractBetween");
998 match result {
999 Value::Cell(out) => {
1000 let value = out.get(0, 0).unwrap();
1001 assert_eq!(value, Value::String("GPU".into()));
1002 }
1003 other => panic!("expected cell array, got {other:?}"),
1004 }
1005 }
1006
1007 #[test]
1008 fn extract_between_type_preserves_text() {
1009 assert_eq!(
1010 text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
1011 Type::String
1012 );
1013 }
1014}