1use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::common::map_control_flow_with_builtin;
7use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
8use crate::builtins::strings::type_resolvers::text_preserve_type;
9use crate::{
10 build_runtime_error, gather_if_needed_async, make_cell_with_shape, BuiltinResult, RuntimeError,
11};
12use runmat_builtins::{
13 BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15 CharArray, IntValue, StringArray, Value,
16};
17use runmat_macros::runtime_builtin;
18
19use crate::builtins::common::spec::{
20 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
21 ReductionNaN, ResidencyPolicy, ShapeRequirements,
22};
23
24#[runmat_macros::register_gpu_spec(
25 builtin_path = "crate::builtins::strings::transform::extractbetween"
26)]
27pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
28 name: "extractBetween",
29 op_kind: GpuOpKind::Custom("string-transform"),
30 supported_precisions: &[],
31 broadcast: BroadcastSemantics::Matlab,
32 provider_hooks: &[],
33 constant_strategy: ConstantStrategy::InlineLiteral,
34 residency: ResidencyPolicy::GatherImmediately,
35 nan_mode: ReductionNaN::Include,
36 two_pass_threshold: None,
37 workgroup_size: None,
38 accepts_nan_mode: false,
39 notes: "Runs on the CPU; GPU-resident inputs are gathered before extraction and outputs are returned on the host.",
40};
41
42#[runmat_macros::register_fusion_spec(
43 builtin_path = "crate::builtins::strings::transform::extractbetween"
44)]
45pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
46 name: "extractBetween",
47 shape: ShapeRequirements::Any,
48 constant_strategy: ConstantStrategy::InlineLiteral,
49 elementwise: None,
50 reduction: None,
51 emits_nan: false,
52 notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
53};
54
55const BUILTIN_NAME: &str = "extractBetween";
56
57const EXTRACT_BETWEEN_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
58 name: "newText",
59 ty: BuiltinParamType::Any,
60 arity: BuiltinParamArity::Required,
61 default: None,
62 description: "Extracted text preserving scalar/array/cell text container semantics.",
63}];
64
65const EXTRACT_BETWEEN_INPUTS_BASE: [BuiltinParamDescriptor; 3] = [
66 BuiltinParamDescriptor {
67 name: "str",
68 ty: BuiltinParamType::Any,
69 arity: BuiltinParamArity::Required,
70 default: None,
71 description: "Input text scalar/array/cell.",
72 },
73 BuiltinParamDescriptor {
74 name: "start",
75 ty: BuiltinParamType::Any,
76 arity: BuiltinParamArity::Required,
77 default: None,
78 description: "Start boundary marker text or positive integer position(s).",
79 },
80 BuiltinParamDescriptor {
81 name: "end",
82 ty: BuiltinParamType::Any,
83 arity: BuiltinParamArity::Required,
84 default: None,
85 description: "End boundary marker text or positive integer position(s).",
86 },
87];
88
89const EXTRACT_BETWEEN_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 5] = [
90 BuiltinParamDescriptor {
91 name: "str",
92 ty: BuiltinParamType::Any,
93 arity: BuiltinParamArity::Required,
94 default: None,
95 description: "Input text scalar/array/cell.",
96 },
97 BuiltinParamDescriptor {
98 name: "start",
99 ty: BuiltinParamType::Any,
100 arity: BuiltinParamArity::Required,
101 default: None,
102 description: "Start boundary marker text or positive integer position(s).",
103 },
104 BuiltinParamDescriptor {
105 name: "end",
106 ty: BuiltinParamType::Any,
107 arity: BuiltinParamArity::Required,
108 default: None,
109 description: "End boundary marker text or positive integer position(s).",
110 },
111 BuiltinParamDescriptor {
112 name: "Name",
113 ty: BuiltinParamType::StringScalar,
114 arity: BuiltinParamArity::Required,
115 default: None,
116 description: "Option name (`Boundaries`).",
117 },
118 BuiltinParamDescriptor {
119 name: "Value",
120 ty: BuiltinParamType::Any,
121 arity: BuiltinParamArity::Variadic,
122 default: None,
123 description: "Option value and additional Name/Value pairs.",
124 },
125];
126
127const EXTRACT_BETWEEN_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
128 BuiltinSignatureDescriptor {
129 label: "newText = extractBetween(str, start, end)",
130 inputs: &EXTRACT_BETWEEN_INPUTS_BASE,
131 outputs: &EXTRACT_BETWEEN_OUTPUT,
132 },
133 BuiltinSignatureDescriptor {
134 label: "newText = extractBetween(str, start, end, Name, Value, ...)",
135 inputs: &EXTRACT_BETWEEN_INPUTS_NAME_VALUE,
136 outputs: &EXTRACT_BETWEEN_OUTPUT,
137 },
138];
139
140const EXTRACT_BETWEEN_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
141 code: "RM.EXTRACT_BETWEEN.INVALID_INPUT",
142 identifier: Some("RunMat:extractBetween:InvalidInput"),
143 when: "First argument is not a string array, character array, or cell array of text scalars.",
144 message:
145 "extractBetween: first argument must be a string array, character array, or cell array of character vectors",
146};
147
148const EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
149 code: "RM.EXTRACT_BETWEEN.BOUNDARY_TYPE",
150 identifier: Some("RunMat:extractBetween:BoundaryType"),
151 when: "Start/end boundaries are mixed text/numeric domains or use unsupported boundary types.",
152 message:
153 "extractBetween: start and end arguments must both be text or both be numeric positions",
154};
155
156const EXTRACT_BETWEEN_ERROR_POSITION_TYPE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
157 code: "RM.EXTRACT_BETWEEN.POSITION_TYPE",
158 identifier: Some("RunMat:extractBetween:PositionType"),
159 when: "Numeric boundary positions are not positive finite integers.",
160 message: "extractBetween: position arguments must be positive integers",
161};
162
163const EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
164 code: "RM.EXTRACT_BETWEEN.NAME_VALUE_PAIR",
165 identifier: Some("RunMat:extractBetween:NameValuePair"),
166 when: "Name/value options are not supplied in complete pairs.",
167 message: "extractBetween: name-value arguments must appear in pairs",
168};
169
170const EXTRACT_BETWEEN_ERROR_OPTION_NAME: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
171 code: "RM.EXTRACT_BETWEEN.OPTION_NAME",
172 identifier: Some("RunMat:extractBetween:OptionName"),
173 when: "An option name other than `Boundaries` was supplied.",
174 message: "extractBetween: unrecognized parameter name",
175};
176
177const EXTRACT_BETWEEN_ERROR_OPTION_VALUE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
178 code: "RM.EXTRACT_BETWEEN.OPTION_VALUE",
179 identifier: Some("RunMat:extractBetween:OptionValue"),
180 when: "`Boundaries` option value is not `inclusive` or `exclusive`.",
181 message: "extractBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'",
182};
183
184const EXTRACT_BETWEEN_ERROR_CELL_ELEMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
185 code: "RM.EXTRACT_BETWEEN.CELL_ELEMENT",
186 identifier: Some("RunMat:extractBetween:CellElement"),
187 when: "Cell text input/boundary contains non-text values or non-row char arrays.",
188 message: "extractBetween: cell array elements must be string scalars or character vectors",
189};
190
191const EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
192 code: "RM.EXTRACT_BETWEEN.SIZE_MISMATCH",
193 identifier: Some("RunMat:extractBetween:SizeMismatch"),
194 when: "Text/boundary inputs are not broadcast-compatible for extraction.",
195 message: "extractBetween: boundary sizes must be compatible with the text input",
196};
197
198const EXTRACT_BETWEEN_ERROR_INTERNAL: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
199 code: "RM.EXTRACT_BETWEEN.INTERNAL",
200 identifier: Some("RunMat:extractBetween:InternalError"),
201 when: "Internal output construction failed.",
202 message: "extractBetween: internal error",
203};
204
205const EXTRACT_BETWEEN_ERRORS: [BuiltinErrorDescriptor; 9] = [
206 EXTRACT_BETWEEN_ERROR_INVALID_INPUT,
207 EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE,
208 EXTRACT_BETWEEN_ERROR_POSITION_TYPE,
209 EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR,
210 EXTRACT_BETWEEN_ERROR_OPTION_NAME,
211 EXTRACT_BETWEEN_ERROR_OPTION_VALUE,
212 EXTRACT_BETWEEN_ERROR_CELL_ELEMENT,
213 EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH,
214 EXTRACT_BETWEEN_ERROR_INTERNAL,
215];
216
217pub const EXTRACT_BETWEEN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
218 signatures: &EXTRACT_BETWEEN_SIGNATURES,
219 output_mode: BuiltinOutputMode::Fixed,
220 completion_policy: BuiltinCompletionPolicy::Public,
221 errors: &EXTRACT_BETWEEN_ERRORS,
222};
223
224fn extract_between_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
225 extract_between_error_with_message(error.message, error)
226}
227
228fn extract_between_error_with_message(
229 message: impl Into<String>,
230 error: &'static BuiltinErrorDescriptor,
231) -> RuntimeError {
232 let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
233 if let Some(identifier) = error.identifier {
234 builder = builder.with_identifier(identifier);
235 }
236 builder.build()
237}
238
239fn map_flow(err: RuntimeError) -> RuntimeError {
240 map_control_flow_with_builtin(err, BUILTIN_NAME)
241}
242
243#[derive(Clone, Copy, Debug, PartialEq, Eq)]
244enum BoundariesMode {
245 Exclusive,
246 Inclusive,
247}
248
249#[runtime_builtin(
250 name = "extractBetween",
251 category = "strings/transform",
252 summary = "Extract substrings between boundary markers.",
253 keywords = "extractBetween,substring,boundaries,strings",
254 accel = "sink",
255 type_resolver(text_preserve_type),
256 descriptor(crate::builtins::strings::transform::extractbetween::EXTRACT_BETWEEN_DESCRIPTOR),
257 builtin_path = "crate::builtins::strings::transform::extractbetween"
258)]
259async fn extract_between_builtin(
260 text: Value,
261 start: Value,
262 stop: Value,
263 rest: Vec<Value>,
264) -> BuiltinResult<Value> {
265 let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
266 let start = gather_if_needed_async(&start).await.map_err(map_flow)?;
267 let stop = gather_if_needed_async(&stop).await.map_err(map_flow)?;
268
269 let mode_override = parse_boundaries_option(&rest).await?;
270
271 let normalized_text = NormalizedText::from_value(text)?;
272 let start_boundary = BoundaryArg::from_value(start)?;
273 let stop_boundary = BoundaryArg::from_value(stop)?;
274
275 if start_boundary.kind() != stop_boundary.kind() {
276 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE));
277 }
278 let boundary_kind = start_boundary.kind();
279 let effective_mode = mode_override.unwrap_or(match boundary_kind {
280 BoundaryKind::Text => BoundariesMode::Exclusive,
281 BoundaryKind::Position => BoundariesMode::Inclusive,
282 });
283
284 let start_shape = start_boundary.shape();
285 let stop_shape = stop_boundary.shape();
286 let text_shape = normalized_text.shape();
287
288 let shape_ts = broadcast_shapes(BUILTIN_NAME, text_shape, start_shape).map_err(|err| {
289 extract_between_error_with_message(
290 format!("{}: {err}", EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH.message),
291 &EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH,
292 )
293 })?;
294 let output_shape = broadcast_shapes(BUILTIN_NAME, &shape_ts, stop_shape).map_err(|err| {
295 extract_between_error_with_message(
296 format!("{}: {err}", EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH.message),
297 &EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH,
298 )
299 })?;
300 if !normalized_text.supports_shape(&output_shape) {
301 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH));
302 }
303
304 let total: usize = output_shape.iter().copied().product();
305 if total == 0 {
306 return normalized_text.into_value(Vec::new(), output_shape);
307 }
308
309 let text_strides = compute_strides(text_shape);
310 let start_strides = compute_strides(start_shape);
311 let stop_strides = compute_strides(stop_shape);
312
313 let mut results = Vec::with_capacity(total);
314
315 for idx in 0..total {
316 let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
317 let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
318 let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
319
320 let result = match boundary_kind {
321 BoundaryKind::Text => {
322 let text_value = normalized_text.data(text_idx);
323 let start_value = start_boundary.text(start_idx);
324 let stop_value = stop_boundary.text(stop_idx);
325 extract_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
326 }
327 BoundaryKind::Position => {
328 let text_value = normalized_text.data(text_idx);
329 let start_value = start_boundary.position(start_idx);
330 let stop_value = stop_boundary.position(stop_idx);
331 extract_with_positions(text_value, start_value, stop_value, effective_mode)
332 }
333 };
334 results.push(result);
335 }
336
337 normalized_text.into_value(results, output_shape)
338}
339
340async fn parse_boundaries_option(args: &[Value]) -> BuiltinResult<Option<BoundariesMode>> {
341 if args.is_empty() {
342 return Ok(None);
343 }
344 if !args.len().is_multiple_of(2) {
345 return Err(extract_between_error(
346 &EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR,
347 ));
348 }
349
350 let mut mode: Option<BoundariesMode> = None;
351 let mut idx = 0;
352 while idx < args.len() {
353 let name_value = gather_if_needed_async(&args[idx]).await.map_err(map_flow)?;
354 let name = value_to_string(&name_value)
355 .ok_or_else(|| extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_NAME))?;
356 if !name.eq_ignore_ascii_case("boundaries") {
357 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_NAME));
358 }
359 let value = gather_if_needed_async(&args[idx + 1])
360 .await
361 .map_err(map_flow)?;
362 let value_str = value_to_string(&value)
363 .ok_or_else(|| extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_VALUE))?;
364 let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
365 BoundariesMode::Inclusive
366 } else if value_str.eq_ignore_ascii_case("exclusive") {
367 BoundariesMode::Exclusive
368 } else {
369 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_OPTION_VALUE));
370 };
371 mode = Some(parsed_mode);
372 idx += 2;
373 }
374 Ok(mode)
375}
376
377fn value_to_string(value: &Value) -> Option<String> {
378 match value {
379 Value::String(s) => Some(s.clone()),
380 Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
381 Value::CharArray(ca) if ca.rows <= 1 => {
382 if ca.rows == 0 {
383 Some(String::new())
384 } else {
385 Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
386 }
387 }
388 Value::CharArray(_) => None,
389 Value::Cell(cell) if cell.data.len() == 1 => {
390 let element = &cell.data[0];
391 value_to_string(element)
392 }
393 _ => None,
394 }
395}
396
397#[derive(Clone)]
398struct ExtractResult {
399 text: String,
400}
401
402impl ExtractResult {
403 fn missing() -> Self {
404 Self {
405 text: "<missing>".to_string(),
406 }
407 }
408
409 fn text(text: String) -> Self {
410 Self { text }
411 }
412}
413
414fn extract_with_text_boundaries(
415 text: &str,
416 start: &str,
417 stop: &str,
418 mode: BoundariesMode,
419) -> ExtractResult {
420 if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
421 return ExtractResult::missing();
422 }
423
424 if let Some(start_idx) = text.find(start) {
425 let search_start = start_idx + start.len();
426 if search_start > text.len() {
427 return ExtractResult::text(String::new());
428 }
429 if let Some(relative_end) = text[search_start..].find(stop) {
430 let end_idx = search_start + relative_end;
431 match mode {
432 BoundariesMode::Inclusive => {
433 let end_capture = min(text.len(), end_idx + stop.len());
434 let slice = &text[start_idx..end_capture];
435 ExtractResult::text(slice.to_string())
436 }
437 BoundariesMode::Exclusive => {
438 if end_idx < search_start {
439 ExtractResult::text(String::new())
440 } else {
441 let slice = &text[search_start..end_idx];
442 ExtractResult::text(slice.to_string())
443 }
444 }
445 }
446 } else {
447 ExtractResult::text(String::new())
448 }
449 } else {
450 ExtractResult::text(String::new())
451 }
452}
453
454fn extract_with_positions(
455 text: &str,
456 start: usize,
457 stop: usize,
458 mode: BoundariesMode,
459) -> ExtractResult {
460 if is_missing_string(text) {
461 return ExtractResult::missing();
462 }
463 if text.is_empty() {
464 return ExtractResult::text(String::new());
465 }
466 let chars: Vec<char> = text.chars().collect();
467 let len = chars.len();
468 if len == 0 {
469 return ExtractResult::text(String::new());
470 }
471
472 if start == 0 || stop == 0 {
473 return ExtractResult::text(String::new());
474 }
475
476 if start > len {
477 return ExtractResult::text(String::new());
478 }
479 let stop_clamped = stop.min(len);
480 if stop_clamped == 0 {
481 return ExtractResult::text(String::new());
482 }
483
484 match mode {
485 BoundariesMode::Inclusive => {
486 if start > stop_clamped {
487 return ExtractResult::text(String::new());
488 }
489 let start_idx = start - 1;
490 let end_idx = stop_clamped - 1;
491 if start_idx >= len || end_idx >= len || start_idx > end_idx {
492 ExtractResult::text(String::new())
493 } else {
494 let slice: String = chars[start_idx..=end_idx].iter().collect();
495 ExtractResult::text(slice)
496 }
497 }
498 BoundariesMode::Exclusive => {
499 if start + 1 >= stop_clamped {
500 return ExtractResult::text(String::new());
501 }
502 let start_idx = start;
503 let end_idx = stop_clamped - 2;
504 if start_idx >= len || end_idx >= len || start_idx > end_idx {
505 ExtractResult::text(String::new())
506 } else {
507 let slice: String = chars[start_idx..=end_idx].iter().collect();
508 ExtractResult::text(slice)
509 }
510 }
511 }
512}
513
514#[derive(Clone, Debug)]
515struct CellInfo {
516 shape: Vec<usize>,
517 element_kinds: Vec<CellElementKind>,
518}
519
520#[derive(Clone, Debug)]
521enum CellElementKind {
522 String,
523 Char,
524}
525
526#[derive(Clone, Debug)]
527enum TextKind {
528 StringScalar,
529 StringArray,
530 CharArray { rows: usize },
531 CellArray(CellInfo),
532}
533
534#[derive(Clone, Debug)]
535struct NormalizedText {
536 data: Vec<String>,
537 shape: Vec<usize>,
538 kind: TextKind,
539}
540
541impl NormalizedText {
542 fn from_value(value: Value) -> BuiltinResult<Self> {
543 match value {
544 Value::String(s) => Ok(Self {
545 data: vec![s],
546 shape: vec![1, 1],
547 kind: TextKind::StringScalar,
548 }),
549 Value::StringArray(sa) => Ok(Self {
550 data: sa.data.clone(),
551 shape: sa.shape.clone(),
552 kind: TextKind::StringArray,
553 }),
554 Value::CharArray(ca) => {
555 let rows = ca.rows;
556 let mut data = Vec::with_capacity(rows);
557 for row in 0..rows {
558 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
559 }
560 Ok(Self {
561 data,
562 shape: vec![rows, 1],
563 kind: TextKind::CharArray { rows },
564 })
565 }
566 Value::Cell(cell) => {
567 let shape = cell.shape.clone();
568 let mut data = Vec::with_capacity(cell.data.len());
569 let mut kinds = Vec::with_capacity(cell.data.len());
570 for element in &cell.data {
571 match &**element {
572 Value::String(s) => {
573 data.push(s.clone());
574 kinds.push(CellElementKind::String);
575 }
576 Value::StringArray(sa) if sa.data.len() == 1 => {
577 data.push(sa.data[0].clone());
578 kinds.push(CellElementKind::String);
579 }
580 Value::CharArray(ca) if ca.rows <= 1 => {
581 if ca.rows == 0 {
582 data.push(String::new());
583 } else {
584 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
585 }
586 kinds.push(CellElementKind::Char);
587 }
588 Value::CharArray(_) => {
589 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
590 }
591 _ => {
592 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
593 }
594 }
595 }
596 Ok(Self {
597 data,
598 shape: shape.clone(),
599 kind: TextKind::CellArray(CellInfo {
600 shape,
601 element_kinds: kinds,
602 }),
603 })
604 }
605 _ => Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_INVALID_INPUT)),
606 }
607 }
608
609 fn shape(&self) -> &[usize] {
610 &self.shape
611 }
612
613 fn data(&self, idx: usize) -> &str {
614 &self.data[idx]
615 }
616
617 fn supports_shape(&self, output_shape: &[usize]) -> bool {
618 match &self.kind {
619 TextKind::StringScalar => true,
620 TextKind::StringArray => true,
621 TextKind::CharArray { .. } => output_shape == self.shape,
622 TextKind::CellArray(info) => output_shape == info.shape,
623 }
624 }
625
626 fn into_value(
627 self,
628 results: Vec<ExtractResult>,
629 output_shape: Vec<usize>,
630 ) -> BuiltinResult<Value> {
631 match self.kind {
632 TextKind::StringScalar => {
633 if results.len() <= 1 {
634 let value = results
635 .into_iter()
636 .next()
637 .unwrap_or_else(|| ExtractResult::text(String::new()));
638 Ok(Value::String(value.text))
639 } else {
640 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
641 let array = StringArray::new(data, output_shape).map_err(|e| {
642 extract_between_error_with_message(
643 format!("{BUILTIN_NAME}: {e}"),
644 &EXTRACT_BETWEEN_ERROR_INTERNAL,
645 )
646 })?;
647 Ok(Value::StringArray(array))
648 }
649 }
650 TextKind::StringArray => {
651 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
652 let array = StringArray::new(data, output_shape).map_err(|e| {
653 extract_between_error_with_message(
654 format!("{BUILTIN_NAME}: {e}"),
655 &EXTRACT_BETWEEN_ERROR_INTERNAL,
656 )
657 })?;
658 Ok(Value::StringArray(array))
659 }
660 TextKind::CharArray { rows } => {
661 if rows == 0 {
662 return CharArray::new(Vec::new(), 0, 0)
663 .map(Value::CharArray)
664 .map_err(|e| {
665 extract_between_error_with_message(
666 format!("{BUILTIN_NAME}: {e}"),
667 &EXTRACT_BETWEEN_ERROR_INTERNAL,
668 )
669 });
670 }
671 if results.len() != rows {
672 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH));
673 }
674 let mut max_width = 0usize;
675 let mut row_strings = Vec::with_capacity(rows);
676 for result in &results {
677 let width = result.text.chars().count();
678 max_width = max_width.max(width);
679 row_strings.push(result.text.clone());
680 }
681 let mut flattened = Vec::with_capacity(rows * max_width);
682 for row in row_strings {
683 let mut chars: Vec<char> = row.chars().collect();
684 if chars.len() < max_width {
685 chars.resize(max_width, ' ');
686 }
687 flattened.extend(chars);
688 }
689 CharArray::new(flattened, rows, max_width)
690 .map(Value::CharArray)
691 .map_err(|e| {
692 extract_between_error_with_message(
693 format!("{BUILTIN_NAME}: {e}"),
694 &EXTRACT_BETWEEN_ERROR_INTERNAL,
695 )
696 })
697 }
698 TextKind::CellArray(info) => {
699 if results.len() != info.element_kinds.len() {
700 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_SIZE_MISMATCH));
701 }
702 let mut values = Vec::with_capacity(results.len());
703 for (idx, result) in results.into_iter().enumerate() {
704 match info.element_kinds[idx] {
705 CellElementKind::String => values.push(Value::String(result.text)),
706 CellElementKind::Char => {
707 let ca = CharArray::new_row(&result.text);
708 values.push(Value::CharArray(ca));
709 }
710 }
711 }
712 make_cell_with_shape(values, info.shape).map_err(|e| {
713 extract_between_error_with_message(
714 format!("{BUILTIN_NAME}: {e}"),
715 &EXTRACT_BETWEEN_ERROR_INTERNAL,
716 )
717 })
718 }
719 }
720 }
721}
722
723#[derive(Clone, Debug, PartialEq, Eq)]
724enum BoundaryKind {
725 Text,
726 Position,
727}
728
729#[derive(Clone, Debug)]
730enum BoundaryArg {
731 Text(BoundaryText),
732 Position(BoundaryPositions),
733}
734
735impl BoundaryArg {
736 fn from_value(value: Value) -> BuiltinResult<Self> {
737 match value {
738 Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
739 BoundaryText::from_value(value).map(BoundaryArg::Text)
740 }
741 Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
742 BoundaryPositions::from_value(value).map(BoundaryArg::Position)
743 }
744 other => Err(extract_between_error_with_message(
745 format!(
746 "{}: unsupported argument {other:?}",
747 EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE.message
748 ),
749 &EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE,
750 )),
751 }
752 }
753
754 fn kind(&self) -> BoundaryKind {
755 match self {
756 BoundaryArg::Text(_) => BoundaryKind::Text,
757 BoundaryArg::Position(_) => BoundaryKind::Position,
758 }
759 }
760
761 fn shape(&self) -> &[usize] {
762 match self {
763 BoundaryArg::Text(text) => &text.shape,
764 BoundaryArg::Position(pos) => &pos.shape,
765 }
766 }
767
768 fn text(&self, idx: usize) -> &str {
769 match self {
770 BoundaryArg::Text(text) => &text.data[idx],
771 BoundaryArg::Position(_) => unreachable!(),
772 }
773 }
774
775 fn position(&self, idx: usize) -> usize {
776 match self {
777 BoundaryArg::Position(pos) => pos.data[idx],
778 BoundaryArg::Text(_) => unreachable!(),
779 }
780 }
781}
782
783#[derive(Clone, Debug)]
784struct BoundaryText {
785 data: Vec<String>,
786 shape: Vec<usize>,
787}
788
789impl BoundaryText {
790 fn from_value(value: Value) -> BuiltinResult<Self> {
791 match value {
792 Value::String(s) => Ok(Self {
793 data: vec![s],
794 shape: vec![1, 1],
795 }),
796 Value::StringArray(sa) => Ok(Self {
797 data: sa.data.clone(),
798 shape: sa.shape.clone(),
799 }),
800 Value::CharArray(ca) => {
801 let mut data = Vec::with_capacity(ca.rows);
802 for row in 0..ca.rows {
803 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
804 }
805 Ok(Self {
806 data,
807 shape: vec![ca.rows, 1],
808 })
809 }
810 Value::Cell(cell) => {
811 let shape = cell.shape.clone();
812 let mut data = Vec::with_capacity(cell.data.len());
813 for element in &cell.data {
814 match &**element {
815 Value::String(s) => data.push(s.clone()),
816 Value::StringArray(sa) if sa.data.len() == 1 => {
817 data.push(sa.data[0].clone());
818 }
819 Value::CharArray(ca) if ca.rows <= 1 => {
820 if ca.rows == 0 {
821 data.push(String::new());
822 } else {
823 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
824 }
825 }
826 Value::CharArray(_) => {
827 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
828 }
829 _ => {
830 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_CELL_ELEMENT))
831 }
832 }
833 }
834 Ok(Self { data, shape })
835 }
836 _ => Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE)),
837 }
838 }
839}
840
841#[derive(Clone, Debug)]
842struct BoundaryPositions {
843 data: Vec<usize>,
844 shape: Vec<usize>,
845}
846
847impl BoundaryPositions {
848 fn from_value(value: Value) -> BuiltinResult<Self> {
849 match value {
850 Value::Num(n) => Ok(Self {
851 data: vec![parse_position(n)?],
852 shape: vec![1, 1],
853 }),
854 Value::Int(i) => Ok(Self {
855 data: vec![parse_position_int(i)?],
856 shape: vec![1, 1],
857 }),
858 Value::Tensor(t) => {
859 let mut data = Vec::with_capacity(t.data.len());
860 for &entry in &t.data {
861 data.push(parse_position(entry)?);
862 }
863 Ok(Self {
864 data,
865 shape: if t.shape.is_empty() {
866 vec![t.rows, t.cols.max(1)]
867 } else {
868 t.shape
869 },
870 })
871 }
872 _ => Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE)),
873 }
874 }
875}
876
877fn parse_position(value: f64) -> BuiltinResult<usize> {
878 if !value.is_finite() || value < 1.0 {
879 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
880 }
881 if (value.fract()).abs() > f64::EPSILON {
882 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
883 }
884 if value > (usize::MAX as f64) {
885 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
886 }
887 Ok(value as usize)
888}
889
890fn parse_position_int(value: IntValue) -> BuiltinResult<usize> {
891 let val = value.to_i64();
892 if val <= 0 {
893 return Err(extract_between_error(&EXTRACT_BETWEEN_ERROR_POSITION_TYPE));
894 }
895 Ok(val as usize)
896}
897
898#[cfg(test)]
899pub(crate) mod tests {
900 #![allow(non_snake_case)]
901
902 use super::*;
903 use runmat_builtins::{CellArray, ResolveContext, Tensor, Type};
904
905 fn extract_between_builtin(
906 text: Value,
907 start: Value,
908 stop: Value,
909 rest: Vec<Value>,
910 ) -> BuiltinResult<Value> {
911 futures::executor::block_on(super::extract_between_builtin(text, start, stop, rest))
912 }
913
914 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
915 #[test]
916 fn extractBetween_basic_string() {
917 let result = extract_between_builtin(
918 Value::String("RunMat accelerates MATLAB".into()),
919 Value::String("RunMat ".into()),
920 Value::String(" MATLAB".into()),
921 Vec::new(),
922 )
923 .expect("extractBetween");
924 assert_eq!(result, Value::String("accelerates".into()));
925 }
926
927 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
928 #[test]
929 fn extractBetween_inclusive_option() {
930 let result = extract_between_builtin(
931 Value::String("a[b]c".into()),
932 Value::String("[".into()),
933 Value::String("]".into()),
934 vec![
935 Value::String("Boundaries".into()),
936 Value::String("inclusive".into()),
937 ],
938 )
939 .expect("extractBetween");
940 assert_eq!(result, Value::String("[b]".into()));
941 }
942
943 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
944 #[test]
945 fn extractBetween_numeric_positions() {
946 let result = extract_between_builtin(
947 Value::String("Accelerator".into()),
948 Value::Num(3.0),
949 Value::Num(7.0),
950 Vec::new(),
951 )
952 .expect("extractBetween");
953 assert_eq!(result, Value::String("celer".into()));
954 }
955
956 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
957 #[test]
958 fn extractBetween_numeric_positions_exclusive_option() {
959 let result = extract_between_builtin(
960 Value::String("Accelerator".into()),
961 Value::Num(3.0),
962 Value::Num(7.0),
963 vec![
964 Value::String("Boundaries".into()),
965 Value::String("exclusive".into()),
966 ],
967 )
968 .expect("extractBetween");
969 assert_eq!(result, Value::String("ele".into()));
970 }
971
972 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
973 #[test]
974 fn extractBetween_numeric_positions_clamps_stop() {
975 let result = extract_between_builtin(
976 Value::String("Accelerator".into()),
977 Value::Num(3.0),
978 Value::Num(100.0),
979 Vec::new(),
980 )
981 .expect("extractBetween");
982 assert_eq!(result, Value::String("celerator".into()));
983 }
984
985 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
986 #[test]
987 fn extractBetween_numeric_positions_start_past_length() {
988 let result = extract_between_builtin(
989 Value::String("abc".into()),
990 Value::Num(10.0),
991 Value::Num(12.0),
992 Vec::new(),
993 )
994 .expect("extractBetween");
995 assert_eq!(result, Value::String(String::new()));
996 }
997
998 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
999 #[test]
1000 fn extractBetween_string_array_broadcast() {
1001 let array = StringArray::new(
1002 vec!["runmat_accel.rs".into(), "runmat_gc.rs".into()],
1003 vec![2, 1],
1004 )
1005 .unwrap();
1006 let result = extract_between_builtin(
1007 Value::StringArray(array),
1008 Value::String("runmat_".into()),
1009 Value::String(".rs".into()),
1010 Vec::new(),
1011 )
1012 .expect("extractBetween");
1013 match result {
1014 Value::StringArray(sa) => {
1015 assert_eq!(sa.data, vec!["accel".to_string(), "gc".to_string()]);
1016 assert_eq!(sa.shape, vec![2, 1]);
1017 }
1018 other => panic!("expected string array, got {other:?}"),
1019 }
1020 }
1021
1022 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1023 #[test]
1024 fn extractBetween_char_array_rows() {
1025 let chars = CharArray::new(
1026 "GPUAccelerateVM".chars().collect(),
1027 1,
1028 "GPUAccelerateVM".len(),
1029 )
1030 .unwrap();
1031 let result = extract_between_builtin(
1032 Value::CharArray(chars),
1033 Value::String("GPU".into()),
1034 Value::String("VM".into()),
1035 Vec::new(),
1036 )
1037 .expect("extractBetween");
1038 match result {
1039 Value::CharArray(out) => {
1040 assert_eq!(out.rows, 1);
1041 let text: String = out.data.iter().collect();
1042 assert_eq!(text.trim_end(), "Accelerate");
1043 }
1044 other => panic!("expected char array, got {other:?}"),
1045 }
1046 }
1047
1048 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1049 #[test]
1050 fn extractBetween_cell_array_preserves_types() {
1051 let cell = CellArray::new(
1052 vec![
1053 Value::CharArray(CharArray::new_row("A[B]C")),
1054 Value::String("Planner<GPU>".into()),
1055 ],
1056 1,
1057 2,
1058 )
1059 .unwrap();
1060 let result = extract_between_builtin(
1061 Value::Cell(cell),
1062 Value::String("[".into()),
1063 Value::String("]".into()),
1064 Vec::new(),
1065 )
1066 .expect("extractBetween");
1067 match result {
1068 Value::Cell(out) => {
1069 let first = out.get(0, 0).unwrap();
1070 let second = out.get(0, 1).unwrap();
1071 assert_eq!(first, Value::CharArray(CharArray::new_row("B")));
1072 assert_eq!(second, Value::String(String::new()));
1073 }
1074 other => panic!("expected cell array, got {other:?}"),
1075 }
1076 }
1077
1078 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1079 #[test]
1080 fn extractBetween_missing_string_propagates() {
1081 let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
1082 let result = extract_between_builtin(
1083 Value::StringArray(strings),
1084 Value::String("[".into()),
1085 Value::String("]".into()),
1086 Vec::new(),
1087 )
1088 .expect("extractBetween");
1089 assert_eq!(
1090 result,
1091 Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
1092 );
1093 }
1094
1095 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1096 #[test]
1097 fn extractBetween_position_type_error() {
1098 let err = extract_between_builtin(
1099 Value::String("abc".into()),
1100 Value::Num(0.5),
1101 Value::Num(2.0),
1102 Vec::new(),
1103 )
1104 .unwrap_err();
1105 assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_POSITION_TYPE.message);
1106 assert_eq!(
1107 err.identifier(),
1108 EXTRACT_BETWEEN_ERROR_POSITION_TYPE.identifier
1109 );
1110 }
1111
1112 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1113 #[test]
1114 fn extractBetween_mixed_boundary_error() {
1115 let err = extract_between_builtin(
1116 Value::String("abc".into()),
1117 Value::String("a".into()),
1118 Value::Num(3.0),
1119 Vec::new(),
1120 )
1121 .unwrap_err();
1122 assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE.message);
1123 assert_eq!(
1124 err.identifier(),
1125 EXTRACT_BETWEEN_ERROR_BOUNDARY_TYPE.identifier
1126 );
1127 }
1128
1129 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1130 #[test]
1131 fn extractBetween_numeric_tensor_broadcast() {
1132 let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
1133 let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1134 let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
1135 let result = extract_between_builtin(
1136 Value::StringArray(text),
1137 Value::Tensor(start),
1138 Value::Tensor(stop),
1139 Vec::new(),
1140 )
1141 .expect("extractBetween");
1142 match result {
1143 Value::StringArray(sa) => {
1144 assert_eq!(sa.data, vec!["abc".to_string(), "xyz".to_string()]);
1145 assert_eq!(sa.shape, vec![2, 1]);
1146 }
1147 other => panic!("expected string array, got {other:?}"),
1148 }
1149 }
1150
1151 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1152 #[test]
1153 fn extractBetween_option_invalid_value() {
1154 let err = extract_between_builtin(
1155 Value::String("abc".into()),
1156 Value::String("a".into()),
1157 Value::String("c".into()),
1158 vec![
1159 Value::String("Boundaries".into()),
1160 Value::String("middle".into()),
1161 ],
1162 )
1163 .unwrap_err();
1164 assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_OPTION_VALUE.message);
1165 assert_eq!(
1166 err.identifier(),
1167 EXTRACT_BETWEEN_ERROR_OPTION_VALUE.identifier
1168 );
1169 }
1170
1171 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1172 #[test]
1173 fn extractBetween_option_name_error() {
1174 let err = extract_between_builtin(
1175 Value::String("abc".into()),
1176 Value::String("a".into()),
1177 Value::String("c".into()),
1178 vec![
1179 Value::String("Padding".into()),
1180 Value::String("inclusive".into()),
1181 ],
1182 )
1183 .unwrap_err();
1184 assert_eq!(err.to_string(), EXTRACT_BETWEEN_ERROR_OPTION_NAME.message);
1185 assert_eq!(
1186 err.identifier(),
1187 EXTRACT_BETWEEN_ERROR_OPTION_NAME.identifier
1188 );
1189 }
1190
1191 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1192 #[test]
1193 fn extractBetween_option_pair_error() {
1194 let err = extract_between_builtin(
1195 Value::String("abc".into()),
1196 Value::String("a".into()),
1197 Value::String("b".into()),
1198 vec![Value::String("Boundaries".into())],
1199 )
1200 .unwrap_err();
1201 assert_eq!(
1202 err.to_string(),
1203 EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR.message
1204 );
1205 assert_eq!(
1206 err.identifier(),
1207 EXTRACT_BETWEEN_ERROR_NAME_VALUE_PAIR.identifier
1208 );
1209 }
1210
1211 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1212 #[test]
1213 fn extractBetween_missing_boundary_propagates() {
1214 let result = extract_between_builtin(
1215 Value::String("Planner<GPU>".into()),
1216 Value::String("<missing>".into()),
1217 Value::String(">".into()),
1218 Vec::new(),
1219 )
1220 .expect("extractBetween");
1221 assert_eq!(result, Value::String("<missing>".into()));
1222 }
1223
1224 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1225 #[test]
1226 fn extractBetween_cell_boundary_arguments() {
1227 let text = CellArray::new(vec![Value::String("A<GPU>".into())], 1, 1).unwrap();
1228 let start = CellArray::new(vec![Value::CharArray(CharArray::new_row("<"))], 1, 1).unwrap();
1229 let stop = CellArray::new(vec![Value::CharArray(CharArray::new_row(">"))], 1, 1).unwrap();
1230 let result = extract_between_builtin(
1231 Value::Cell(text),
1232 Value::Cell(start),
1233 Value::Cell(stop),
1234 Vec::new(),
1235 )
1236 .expect("extractBetween");
1237 match result {
1238 Value::Cell(out) => {
1239 let value = out.get(0, 0).unwrap();
1240 assert_eq!(value, Value::String("GPU".into()));
1241 }
1242 other => panic!("expected cell array, got {other:?}"),
1243 }
1244 }
1245
1246 #[test]
1247 fn extract_between_type_preserves_text() {
1248 assert_eq!(
1249 text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
1250 Type::String
1251 );
1252 }
1253}