1use std::cmp::min;
4
5use crate::builtins::common::broadcast::{broadcast_index, broadcast_shapes, compute_strides};
6use crate::builtins::common::map_control_flow_with_builtin;
7use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
8use crate::builtins::strings::type_resolvers::text_preserve_type;
9use crate::{
10 build_runtime_error, gather_if_needed_async, make_cell_with_shape, BuiltinResult, RuntimeError,
11};
12use runmat_builtins::{
13 BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15 CharArray, IntValue, StringArray, Value,
16};
17use runmat_macros::runtime_builtin;
18
19use crate::builtins::common::spec::{
20 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
21 ReductionNaN, ResidencyPolicy, ShapeRequirements,
22};
23
24#[runmat_macros::register_gpu_spec(
25 builtin_path = "crate::builtins::strings::transform::erasebetween"
26)]
27pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
28 name: "eraseBetween",
29 op_kind: GpuOpKind::Custom("string-transform"),
30 supported_precisions: &[],
31 broadcast: BroadcastSemantics::Matlab,
32 provider_hooks: &[],
33 constant_strategy: ConstantStrategy::InlineLiteral,
34 residency: ResidencyPolicy::GatherImmediately,
35 nan_mode: ReductionNaN::Include,
36 two_pass_threshold: None,
37 workgroup_size: None,
38 accepts_nan_mode: false,
39 notes: "Runs on the CPU; GPU-resident inputs are gathered before deletion and outputs remain on the host.",
40};
41
42#[runmat_macros::register_fusion_spec(
43 builtin_path = "crate::builtins::strings::transform::erasebetween"
44)]
45pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
46 name: "eraseBetween",
47 shape: ShapeRequirements::Any,
48 constant_strategy: ConstantStrategy::InlineLiteral,
49 elementwise: None,
50 reduction: None,
51 emits_nan: false,
52 notes: "Pure string manipulation builtin; excluded from fusion plans and gathers GPU inputs immediately.",
53};
54
55const BUILTIN_NAME: &str = "eraseBetween";
56
57const ERASE_BETWEEN_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
58 name: "newText",
59 ty: BuiltinParamType::Any,
60 arity: BuiltinParamArity::Required,
61 default: None,
62 description: "Text with between-boundary content erased, preserving text container semantics.",
63}];
64
65const ERASE_BETWEEN_INPUTS_BASE: [BuiltinParamDescriptor; 3] = [
66 BuiltinParamDescriptor {
67 name: "str",
68 ty: BuiltinParamType::Any,
69 arity: BuiltinParamArity::Required,
70 default: None,
71 description: "Input text scalar/array/cell.",
72 },
73 BuiltinParamDescriptor {
74 name: "start",
75 ty: BuiltinParamType::Any,
76 arity: BuiltinParamArity::Required,
77 default: None,
78 description: "Start boundary marker text or positive integer position(s).",
79 },
80 BuiltinParamDescriptor {
81 name: "end",
82 ty: BuiltinParamType::Any,
83 arity: BuiltinParamArity::Required,
84 default: None,
85 description: "End boundary marker text or positive integer position(s).",
86 },
87];
88
89const ERASE_BETWEEN_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 5] = [
90 BuiltinParamDescriptor {
91 name: "str",
92 ty: BuiltinParamType::Any,
93 arity: BuiltinParamArity::Required,
94 default: None,
95 description: "Input text scalar/array/cell.",
96 },
97 BuiltinParamDescriptor {
98 name: "start",
99 ty: BuiltinParamType::Any,
100 arity: BuiltinParamArity::Required,
101 default: None,
102 description: "Start boundary marker text or positive integer position(s).",
103 },
104 BuiltinParamDescriptor {
105 name: "end",
106 ty: BuiltinParamType::Any,
107 arity: BuiltinParamArity::Required,
108 default: None,
109 description: "End boundary marker text or positive integer position(s).",
110 },
111 BuiltinParamDescriptor {
112 name: "Name",
113 ty: BuiltinParamType::StringScalar,
114 arity: BuiltinParamArity::Required,
115 default: None,
116 description: "Option name (`Boundaries`).",
117 },
118 BuiltinParamDescriptor {
119 name: "Value",
120 ty: BuiltinParamType::Any,
121 arity: BuiltinParamArity::Variadic,
122 default: None,
123 description: "Option value and additional Name/Value pairs.",
124 },
125];
126
127const ERASE_BETWEEN_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
128 BuiltinSignatureDescriptor {
129 label: "newText = eraseBetween(str, start, end)",
130 inputs: &ERASE_BETWEEN_INPUTS_BASE,
131 outputs: &ERASE_BETWEEN_OUTPUT,
132 },
133 BuiltinSignatureDescriptor {
134 label: "newText = eraseBetween(str, start, end, Name, Value, ...)",
135 inputs: &ERASE_BETWEEN_INPUTS_NAME_VALUE,
136 outputs: &ERASE_BETWEEN_OUTPUT,
137 },
138];
139
140const ERASE_BETWEEN_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
141 code: "RM.ERASE_BETWEEN.INVALID_INPUT",
142 identifier: Some("RunMat:eraseBetween:InvalidInput"),
143 when: "First argument is not a string array, character array, or cell array of text scalars.",
144 message:
145 "eraseBetween: first argument must be a string array, character array, or cell array of character vectors",
146};
147
148const ERASE_BETWEEN_ERROR_BOUNDARY_TYPE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
149 code: "RM.ERASE_BETWEEN.BOUNDARY_TYPE",
150 identifier: Some("RunMat:eraseBetween:BoundaryType"),
151 when: "Start/end boundaries are mixed text/numeric domains or use unsupported boundary types.",
152 message: "eraseBetween: start and end arguments must both be text or both be numeric positions",
153};
154
155const ERASE_BETWEEN_ERROR_POSITION_TYPE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
156 code: "RM.ERASE_BETWEEN.POSITION_TYPE",
157 identifier: Some("RunMat:eraseBetween:PositionType"),
158 when: "Numeric boundary positions are not positive finite integers.",
159 message: "eraseBetween: position arguments must be positive integers",
160};
161
162const ERASE_BETWEEN_ERROR_NAME_VALUE_PAIR: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
163 code: "RM.ERASE_BETWEEN.NAME_VALUE_PAIR",
164 identifier: Some("RunMat:eraseBetween:NameValuePair"),
165 when: "Name/value options are not supplied in complete pairs.",
166 message: "eraseBetween: name-value arguments must appear in pairs",
167};
168
169const ERASE_BETWEEN_ERROR_OPTION_NAME: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
170 code: "RM.ERASE_BETWEEN.OPTION_NAME",
171 identifier: Some("RunMat:eraseBetween:OptionName"),
172 when: "An option name other than `Boundaries` was supplied.",
173 message: "eraseBetween: unrecognized parameter name",
174};
175
176const ERASE_BETWEEN_ERROR_OPTION_VALUE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
177 code: "RM.ERASE_BETWEEN.OPTION_VALUE",
178 identifier: Some("RunMat:eraseBetween:OptionValue"),
179 when: "`Boundaries` option value is not `inclusive` or `exclusive`.",
180 message: "eraseBetween: 'Boundaries' must be either 'inclusive' or 'exclusive'",
181};
182
183const ERASE_BETWEEN_ERROR_CELL_ELEMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
184 code: "RM.ERASE_BETWEEN.CELL_ELEMENT",
185 identifier: Some("RunMat:eraseBetween:CellElement"),
186 when: "Cell text input/boundary contains non-text values or non-row char arrays.",
187 message: "eraseBetween: cell array elements must be string scalars or character vectors",
188};
189
190const ERASE_BETWEEN_ERROR_SIZE_MISMATCH: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
191 code: "RM.ERASE_BETWEEN.SIZE_MISMATCH",
192 identifier: Some("RunMat:eraseBetween:SizeMismatch"),
193 when: "Text/boundary inputs are not broadcast-compatible for erase semantics.",
194 message: "eraseBetween: boundary sizes must be compatible with the text input",
195};
196
197const ERASE_BETWEEN_ERROR_INTERNAL: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
198 code: "RM.ERASE_BETWEEN.INTERNAL",
199 identifier: Some("RunMat:eraseBetween:InternalError"),
200 when: "Internal output construction failed.",
201 message: "eraseBetween: internal error",
202};
203
204const ERASE_BETWEEN_ERRORS: [BuiltinErrorDescriptor; 9] = [
205 ERASE_BETWEEN_ERROR_INVALID_INPUT,
206 ERASE_BETWEEN_ERROR_BOUNDARY_TYPE,
207 ERASE_BETWEEN_ERROR_POSITION_TYPE,
208 ERASE_BETWEEN_ERROR_NAME_VALUE_PAIR,
209 ERASE_BETWEEN_ERROR_OPTION_NAME,
210 ERASE_BETWEEN_ERROR_OPTION_VALUE,
211 ERASE_BETWEEN_ERROR_CELL_ELEMENT,
212 ERASE_BETWEEN_ERROR_SIZE_MISMATCH,
213 ERASE_BETWEEN_ERROR_INTERNAL,
214];
215
216pub const ERASE_BETWEEN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
217 signatures: &ERASE_BETWEEN_SIGNATURES,
218 output_mode: BuiltinOutputMode::Fixed,
219 completion_policy: BuiltinCompletionPolicy::Public,
220 errors: &ERASE_BETWEEN_ERRORS,
221};
222
223fn erase_between_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
224 erase_between_error_with_message(error.message, error)
225}
226
227fn erase_between_error_with_message(
228 message: impl Into<String>,
229 error: &'static BuiltinErrorDescriptor,
230) -> RuntimeError {
231 let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
232 if let Some(identifier) = error.identifier {
233 builder = builder.with_identifier(identifier);
234 }
235 builder.build()
236}
237
238fn map_flow(err: RuntimeError) -> RuntimeError {
239 map_control_flow_with_builtin(err, BUILTIN_NAME)
240}
241
242#[derive(Clone, Copy, Debug, PartialEq, Eq)]
243enum BoundariesMode {
244 Exclusive,
245 Inclusive,
246}
247
248#[runtime_builtin(
249 name = "eraseBetween",
250 category = "strings/transform",
251 summary = "Delete text between boundary markers.",
252 keywords = "eraseBetween,delete,boundaries,strings",
253 accel = "sink",
254 type_resolver(text_preserve_type),
255 descriptor(crate::builtins::strings::transform::erasebetween::ERASE_BETWEEN_DESCRIPTOR),
256 builtin_path = "crate::builtins::strings::transform::erasebetween"
257)]
258async fn erase_between_builtin(
259 text: Value,
260 start: Value,
261 stop: Value,
262 rest: Vec<Value>,
263) -> BuiltinResult<Value> {
264 let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
265 let start = gather_if_needed_async(&start).await.map_err(map_flow)?;
266 let stop = gather_if_needed_async(&stop).await.map_err(map_flow)?;
267
268 let mode_override = parse_boundaries_option(&rest).await?;
269
270 let normalized_text = NormalizedText::from_value(text)?;
271 let start_boundary = BoundaryArg::from_value(start)?;
272 let stop_boundary = BoundaryArg::from_value(stop)?;
273
274 if start_boundary.kind() != stop_boundary.kind() {
275 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_BOUNDARY_TYPE));
276 }
277 let boundary_kind = start_boundary.kind();
278 let effective_mode = mode_override.unwrap_or(match boundary_kind {
279 BoundaryKind::Text => BoundariesMode::Exclusive,
280 BoundaryKind::Position => BoundariesMode::Inclusive,
281 });
282
283 let start_shape = start_boundary.shape();
284 let stop_shape = stop_boundary.shape();
285 let text_shape = normalized_text.shape();
286
287 let shape_ts = broadcast_shapes(BUILTIN_NAME, text_shape, start_shape).map_err(|err| {
288 erase_between_error_with_message(
289 format!("{}: {err}", ERASE_BETWEEN_ERROR_SIZE_MISMATCH.message),
290 &ERASE_BETWEEN_ERROR_SIZE_MISMATCH,
291 )
292 })?;
293 let output_shape = broadcast_shapes(BUILTIN_NAME, &shape_ts, stop_shape).map_err(|err| {
294 erase_between_error_with_message(
295 format!("{}: {err}", ERASE_BETWEEN_ERROR_SIZE_MISMATCH.message),
296 &ERASE_BETWEEN_ERROR_SIZE_MISMATCH,
297 )
298 })?;
299 if !normalized_text.supports_shape(&output_shape) {
300 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_SIZE_MISMATCH));
301 }
302
303 let total: usize = output_shape.iter().copied().product();
304 if total == 0 {
305 return normalized_text.into_value(Vec::new(), output_shape);
306 }
307
308 let text_strides = compute_strides(text_shape);
309 let start_strides = compute_strides(start_shape);
310 let stop_strides = compute_strides(stop_shape);
311
312 let mut results = Vec::with_capacity(total);
313
314 for idx in 0..total {
315 let text_idx = broadcast_index(idx, &output_shape, text_shape, &text_strides);
316 let start_idx = broadcast_index(idx, &output_shape, start_shape, &start_strides);
317 let stop_idx = broadcast_index(idx, &output_shape, stop_shape, &stop_strides);
318
319 let result = match boundary_kind {
320 BoundaryKind::Text => {
321 let text_value = normalized_text.data(text_idx);
322 let start_value = start_boundary.text(start_idx);
323 let stop_value = stop_boundary.text(stop_idx);
324 erase_with_text_boundaries(text_value, start_value, stop_value, effective_mode)
325 }
326 BoundaryKind::Position => {
327 let text_value = normalized_text.data(text_idx);
328 let start_value = start_boundary.position(start_idx);
329 let stop_value = stop_boundary.position(stop_idx);
330 erase_with_positions(text_value, start_value, stop_value, effective_mode)
331 }
332 };
333 results.push(result);
334 }
335
336 normalized_text.into_value(results, output_shape)
337}
338
339async fn parse_boundaries_option(args: &[Value]) -> BuiltinResult<Option<BoundariesMode>> {
340 if args.is_empty() {
341 return Ok(None);
342 }
343 if !args.len().is_multiple_of(2) {
344 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_NAME_VALUE_PAIR));
345 }
346
347 let mut mode: Option<BoundariesMode> = None;
348 let mut idx = 0;
349 while idx < args.len() {
350 let name_value = gather_if_needed_async(&args[idx]).await.map_err(map_flow)?;
351 let name = value_to_string(&name_value)
352 .ok_or_else(|| erase_between_error(&ERASE_BETWEEN_ERROR_OPTION_NAME))?;
353 if !name.eq_ignore_ascii_case("boundaries") {
354 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_OPTION_NAME));
355 }
356 let value = gather_if_needed_async(&args[idx + 1])
357 .await
358 .map_err(map_flow)?;
359 let value_str = value_to_string(&value)
360 .ok_or_else(|| erase_between_error(&ERASE_BETWEEN_ERROR_OPTION_VALUE))?;
361 let parsed_mode = if value_str.eq_ignore_ascii_case("inclusive") {
362 BoundariesMode::Inclusive
363 } else if value_str.eq_ignore_ascii_case("exclusive") {
364 BoundariesMode::Exclusive
365 } else {
366 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_OPTION_VALUE));
367 };
368 mode = Some(parsed_mode);
369 idx += 2;
370 }
371 Ok(mode)
372}
373
374fn value_to_string(value: &Value) -> Option<String> {
375 match value {
376 Value::String(s) => Some(s.clone()),
377 Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
378 Value::CharArray(ca) if ca.rows <= 1 => {
379 if ca.rows == 0 {
380 Some(String::new())
381 } else {
382 Some(char_row_to_string_slice(&ca.data, ca.cols, 0))
383 }
384 }
385 Value::CharArray(_) => None,
386 Value::Cell(cell) if cell.data.len() == 1 => {
387 let element = &cell.data[0];
388 value_to_string(element)
389 }
390 _ => None,
391 }
392}
393
394#[derive(Clone)]
395struct EraseResult {
396 text: String,
397}
398
399impl EraseResult {
400 fn missing() -> Self {
401 Self {
402 text: "<missing>".to_string(),
403 }
404 }
405
406 fn text(text: String) -> Self {
407 Self { text }
408 }
409}
410
411fn erase_with_text_boundaries(
412 text: &str,
413 start: &str,
414 stop: &str,
415 mode: BoundariesMode,
416) -> EraseResult {
417 if is_missing_string(text) || is_missing_string(start) || is_missing_string(stop) {
418 return EraseResult::missing();
419 }
420
421 if let Some(start_idx) = text.find(start) {
422 let search_start = start_idx + start.len();
423 if search_start > text.len() {
424 return EraseResult::text(text.to_string());
425 }
426 if let Some(relative_end) = text[search_start..].find(stop) {
427 let end_idx = search_start + relative_end;
428 match mode {
429 BoundariesMode::Inclusive => {
430 let end_capture = min(text.len(), end_idx + stop.len());
431 let mut result = String::with_capacity(text.len());
432 result.push_str(&text[..start_idx]);
433 result.push_str(&text[end_capture..]);
434 EraseResult::text(result)
435 }
436 BoundariesMode::Exclusive => {
437 let mut result = String::with_capacity(text.len());
438 result.push_str(&text[..search_start]);
439 result.push_str(&text[end_idx..]);
440 EraseResult::text(result)
441 }
442 }
443 } else {
444 EraseResult::text(text.to_string())
445 }
446 } else {
447 EraseResult::text(text.to_string())
448 }
449}
450
451fn erase_with_positions(
452 text: &str,
453 start: usize,
454 stop: usize,
455 mode: BoundariesMode,
456) -> EraseResult {
457 if is_missing_string(text) {
458 return EraseResult::missing();
459 }
460 if text.is_empty() {
461 return EraseResult::text(String::new());
462 }
463 let chars: Vec<char> = text.chars().collect();
464 let len = chars.len();
465 if len == 0 {
466 return EraseResult::text(String::new());
467 }
468
469 if start == 0 || stop == 0 {
470 return EraseResult::text(text.to_string());
471 }
472
473 if start > len {
474 return EraseResult::text(text.to_string());
475 }
476 let stop_clamped = stop.min(len);
477
478 match mode {
479 BoundariesMode::Inclusive => {
480 if stop_clamped < start {
481 return EraseResult::text(text.to_string());
482 }
483 let start_idx = start - 1;
484 let end_idx = stop_clamped - 1;
485 if start_idx >= len || end_idx >= len || start_idx > end_idx {
486 EraseResult::text(text.to_string())
487 } else {
488 let mut result = String::with_capacity(len);
489 for (idx, ch) in chars.iter().enumerate() {
490 if idx < start_idx || idx > end_idx {
491 result.push(*ch);
492 }
493 }
494 EraseResult::text(result)
495 }
496 }
497 BoundariesMode::Exclusive => {
498 if start + 1 >= stop_clamped {
499 return EraseResult::text(text.to_string());
500 }
501 let start_idx = start;
502 let end_idx = stop_clamped - 2;
503 if start_idx >= len || end_idx >= len || start_idx > end_idx {
504 EraseResult::text(text.to_string())
505 } else {
506 let mut result = String::with_capacity(len);
507 for (idx, ch) in chars.iter().enumerate() {
508 if idx >= start_idx && idx <= end_idx {
509 continue;
510 }
511 result.push(*ch);
512 }
513 EraseResult::text(result)
514 }
515 }
516 }
517}
518
519#[derive(Clone, Debug)]
520struct CellInfo {
521 shape: Vec<usize>,
522 element_kinds: Vec<CellElementKind>,
523}
524
525#[derive(Clone, Debug)]
526enum CellElementKind {
527 String,
528 Char,
529}
530
531#[derive(Clone, Debug)]
532enum TextKind {
533 StringScalar,
534 StringArray,
535 CharArray { rows: usize },
536 CellArray(CellInfo),
537}
538
539#[derive(Clone, Debug)]
540struct NormalizedText {
541 data: Vec<String>,
542 shape: Vec<usize>,
543 kind: TextKind,
544}
545
546impl NormalizedText {
547 fn from_value(value: Value) -> BuiltinResult<Self> {
548 match value {
549 Value::String(s) => Ok(Self {
550 data: vec![s],
551 shape: vec![1, 1],
552 kind: TextKind::StringScalar,
553 }),
554 Value::StringArray(sa) => Ok(Self {
555 data: sa.data.clone(),
556 shape: sa.shape.clone(),
557 kind: TextKind::StringArray,
558 }),
559 Value::CharArray(ca) => {
560 let rows = ca.rows;
561 let mut data = Vec::with_capacity(rows);
562 for row in 0..rows {
563 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
564 }
565 Ok(Self {
566 data,
567 shape: vec![rows, 1],
568 kind: TextKind::CharArray { rows },
569 })
570 }
571 Value::Cell(cell) => {
572 let shape = cell.shape.clone();
573 let mut data = Vec::with_capacity(cell.data.len());
574 let mut kinds = Vec::with_capacity(cell.data.len());
575 for element in &cell.data {
576 match &**element {
577 Value::String(s) => {
578 data.push(s.clone());
579 kinds.push(CellElementKind::String);
580 }
581 Value::StringArray(sa) if sa.data.len() == 1 => {
582 data.push(sa.data[0].clone());
583 kinds.push(CellElementKind::String);
584 }
585 Value::CharArray(ca) if ca.rows <= 1 => {
586 if ca.rows == 0 {
587 data.push(String::new());
588 } else {
589 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
590 }
591 kinds.push(CellElementKind::Char);
592 }
593 Value::CharArray(_) => {
594 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_CELL_ELEMENT))
595 }
596 _ => return Err(erase_between_error(&ERASE_BETWEEN_ERROR_CELL_ELEMENT)),
597 }
598 }
599 Ok(Self {
600 data,
601 shape: shape.clone(),
602 kind: TextKind::CellArray(CellInfo {
603 shape,
604 element_kinds: kinds,
605 }),
606 })
607 }
608 _ => Err(erase_between_error(&ERASE_BETWEEN_ERROR_INVALID_INPUT)),
609 }
610 }
611
612 fn shape(&self) -> &[usize] {
613 &self.shape
614 }
615
616 fn data(&self, idx: usize) -> &str {
617 &self.data[idx]
618 }
619
620 fn supports_shape(&self, output_shape: &[usize]) -> bool {
621 match &self.kind {
622 TextKind::StringScalar => true,
623 TextKind::StringArray => true,
624 TextKind::CharArray { .. } => output_shape == self.shape,
625 TextKind::CellArray(info) => output_shape == info.shape,
626 }
627 }
628
629 fn into_value(
630 self,
631 results: Vec<EraseResult>,
632 output_shape: Vec<usize>,
633 ) -> BuiltinResult<Value> {
634 match self.kind {
635 TextKind::StringScalar => {
636 let total: usize = output_shape.iter().product();
637 if total == 0 {
638 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
639 let array = StringArray::new(data, output_shape).map_err(|e| {
640 erase_between_error_with_message(
641 format!("{BUILTIN_NAME}: {e}"),
642 &ERASE_BETWEEN_ERROR_INTERNAL,
643 )
644 })?;
645 return Ok(Value::StringArray(array));
646 }
647
648 if results.len() <= 1 {
649 let value = results
650 .into_iter()
651 .next()
652 .unwrap_or_else(|| EraseResult::text(String::new()));
653 Ok(Value::String(value.text))
654 } else {
655 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
656 let array = StringArray::new(data, output_shape).map_err(|e| {
657 erase_between_error_with_message(
658 format!("{BUILTIN_NAME}: {e}"),
659 &ERASE_BETWEEN_ERROR_INTERNAL,
660 )
661 })?;
662 Ok(Value::StringArray(array))
663 }
664 }
665 TextKind::StringArray => {
666 let data = results.into_iter().map(|r| r.text).collect::<Vec<_>>();
667 let array = StringArray::new(data, output_shape).map_err(|e| {
668 erase_between_error_with_message(
669 format!("{BUILTIN_NAME}: {e}"),
670 &ERASE_BETWEEN_ERROR_INTERNAL,
671 )
672 })?;
673 Ok(Value::StringArray(array))
674 }
675 TextKind::CharArray { rows } => {
676 if rows == 0 {
677 return CharArray::new(Vec::new(), 0, 0)
678 .map(Value::CharArray)
679 .map_err(|e| {
680 erase_between_error_with_message(
681 format!("{BUILTIN_NAME}: {e}"),
682 &ERASE_BETWEEN_ERROR_INTERNAL,
683 )
684 });
685 }
686 if results.len() != rows {
687 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_SIZE_MISMATCH));
688 }
689 let mut max_width = 0usize;
690 let mut row_strings = Vec::with_capacity(rows);
691 for result in &results {
692 let width = result.text.chars().count();
693 max_width = max_width.max(width);
694 row_strings.push(result.text.clone());
695 }
696 let mut flattened = Vec::with_capacity(rows * max_width);
697 for row in row_strings {
698 let mut chars: Vec<char> = row.chars().collect();
699 if chars.len() < max_width {
700 chars.resize(max_width, ' ');
701 }
702 flattened.extend(chars);
703 }
704 CharArray::new(flattened, rows, max_width)
705 .map(Value::CharArray)
706 .map_err(|e| {
707 erase_between_error_with_message(
708 format!("{BUILTIN_NAME}: {e}"),
709 &ERASE_BETWEEN_ERROR_INTERNAL,
710 )
711 })
712 }
713 TextKind::CellArray(info) => {
714 if results.len() != info.element_kinds.len() {
715 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_SIZE_MISMATCH));
716 }
717 let mut values = Vec::with_capacity(results.len());
718 for (idx, result) in results.into_iter().enumerate() {
719 match info.element_kinds[idx] {
720 CellElementKind::String => values.push(Value::String(result.text)),
721 CellElementKind::Char => {
722 let ca = CharArray::new_row(&result.text);
723 values.push(Value::CharArray(ca));
724 }
725 }
726 }
727 make_cell_with_shape(values, info.shape).map_err(|e| {
728 erase_between_error_with_message(
729 format!("{BUILTIN_NAME}: {e}"),
730 &ERASE_BETWEEN_ERROR_INTERNAL,
731 )
732 })
733 }
734 }
735 }
736}
737
738#[derive(Clone, Debug, PartialEq, Eq)]
739enum BoundaryKind {
740 Text,
741 Position,
742}
743
744#[derive(Clone, Debug)]
745enum BoundaryArg {
746 Text(BoundaryText),
747 Position(BoundaryPositions),
748}
749
750impl BoundaryArg {
751 fn from_value(value: Value) -> BuiltinResult<Self> {
752 match value {
753 Value::String(_) | Value::StringArray(_) | Value::CharArray(_) | Value::Cell(_) => {
754 BoundaryText::from_value(value).map(BoundaryArg::Text)
755 }
756 Value::Num(_) | Value::Int(_) | Value::Tensor(_) => {
757 BoundaryPositions::from_value(value).map(BoundaryArg::Position)
758 }
759 other => Err(erase_between_error_with_message(
760 format!(
761 "{}: unsupported argument {other:?}",
762 ERASE_BETWEEN_ERROR_BOUNDARY_TYPE.message
763 ),
764 &ERASE_BETWEEN_ERROR_BOUNDARY_TYPE,
765 )),
766 }
767 }
768
769 fn kind(&self) -> BoundaryKind {
770 match self {
771 BoundaryArg::Text(_) => BoundaryKind::Text,
772 BoundaryArg::Position(_) => BoundaryKind::Position,
773 }
774 }
775
776 fn shape(&self) -> &[usize] {
777 match self {
778 BoundaryArg::Text(text) => &text.shape,
779 BoundaryArg::Position(pos) => &pos.shape,
780 }
781 }
782
783 fn text(&self, idx: usize) -> &str {
784 match self {
785 BoundaryArg::Text(text) => &text.data[idx],
786 BoundaryArg::Position(_) => unreachable!(),
787 }
788 }
789
790 fn position(&self, idx: usize) -> usize {
791 match self {
792 BoundaryArg::Position(pos) => pos.data[idx],
793 BoundaryArg::Text(_) => unreachable!(),
794 }
795 }
796}
797
798#[derive(Clone, Debug)]
799struct BoundaryText {
800 data: Vec<String>,
801 shape: Vec<usize>,
802}
803
804impl BoundaryText {
805 fn from_value(value: Value) -> BuiltinResult<Self> {
806 match value {
807 Value::String(s) => Ok(Self {
808 data: vec![s],
809 shape: vec![1, 1],
810 }),
811 Value::StringArray(sa) => Ok(Self {
812 data: sa.data.clone(),
813 shape: sa.shape.clone(),
814 }),
815 Value::CharArray(ca) => {
816 let mut data = Vec::with_capacity(ca.rows);
817 for row in 0..ca.rows {
818 data.push(char_row_to_string_slice(&ca.data, ca.cols, row));
819 }
820 Ok(Self {
821 data,
822 shape: vec![ca.rows, 1],
823 })
824 }
825 Value::Cell(cell) => {
826 let shape = cell.shape.clone();
827 let mut data = Vec::with_capacity(cell.data.len());
828 for element in &cell.data {
829 match &**element {
830 Value::String(s) => data.push(s.clone()),
831 Value::StringArray(sa) if sa.data.len() == 1 => {
832 data.push(sa.data[0].clone());
833 }
834 Value::CharArray(ca) if ca.rows <= 1 => {
835 if ca.rows == 0 {
836 data.push(String::new());
837 } else {
838 data.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
839 }
840 }
841 Value::CharArray(_) => {
842 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_CELL_ELEMENT))
843 }
844 _ => return Err(erase_between_error(&ERASE_BETWEEN_ERROR_CELL_ELEMENT)),
845 }
846 }
847 Ok(Self { data, shape })
848 }
849 _ => Err(erase_between_error(&ERASE_BETWEEN_ERROR_BOUNDARY_TYPE)),
850 }
851 }
852}
853
854#[derive(Clone, Debug)]
855struct BoundaryPositions {
856 data: Vec<usize>,
857 shape: Vec<usize>,
858}
859
860impl BoundaryPositions {
861 fn from_value(value: Value) -> BuiltinResult<Self> {
862 match value {
863 Value::Num(n) => Ok(Self {
864 data: vec![parse_position(n)?],
865 shape: vec![1, 1],
866 }),
867 Value::Int(i) => Ok(Self {
868 data: vec![parse_position_int(i)?],
869 shape: vec![1, 1],
870 }),
871 Value::Tensor(t) => {
872 let mut data = Vec::with_capacity(t.data.len());
873 for &entry in &t.data {
874 data.push(parse_position(entry)?);
875 }
876 Ok(Self {
877 data,
878 shape: if t.shape.is_empty() {
879 vec![t.rows, t.cols.max(1)]
880 } else {
881 t.shape
882 },
883 })
884 }
885 _ => Err(erase_between_error(&ERASE_BETWEEN_ERROR_BOUNDARY_TYPE)),
886 }
887 }
888}
889
890fn parse_position(value: f64) -> BuiltinResult<usize> {
891 if !value.is_finite() || value < 1.0 {
892 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_POSITION_TYPE));
893 }
894 if (value.fract()).abs() > f64::EPSILON {
895 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_POSITION_TYPE));
896 }
897 if value > (usize::MAX as f64) {
898 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_POSITION_TYPE));
899 }
900 Ok(value as usize)
901}
902
903fn parse_position_int(value: IntValue) -> BuiltinResult<usize> {
904 let val = value.to_i64();
905 if val <= 0 {
906 return Err(erase_between_error(&ERASE_BETWEEN_ERROR_POSITION_TYPE));
907 }
908 Ok(val as usize)
909}
910
911#[cfg(test)]
912pub(crate) mod tests {
913 #![allow(non_snake_case)]
914
915 use super::*;
916 use runmat_builtins::{CellArray, CharArray, ResolveContext, StringArray, Tensor, Type};
917
918 fn erase_between_builtin(
919 text: Value,
920 start: Value,
921 stop: Value,
922 rest: Vec<Value>,
923 ) -> BuiltinResult<Value> {
924 futures::executor::block_on(super::erase_between_builtin(text, start, stop, rest))
925 }
926
927 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
928 #[test]
929 fn eraseBetween_text_default_exclusive() {
930 let result = erase_between_builtin(
931 Value::String("The quick brown fox".into()),
932 Value::String("quick".into()),
933 Value::String(" fox".into()),
934 Vec::new(),
935 )
936 .expect("eraseBetween");
937 assert_eq!(result, Value::String("The quick fox".into()));
938 }
939
940 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
941 #[test]
942 fn eraseBetween_text_inclusive_option() {
943 let result = erase_between_builtin(
944 Value::String("The quick brown fox jumps over the lazy dog".into()),
945 Value::String(" brown".into()),
946 Value::String("lazy".into()),
947 vec![
948 Value::String("Boundaries".into()),
949 Value::String("inclusive".into()),
950 ],
951 )
952 .expect("eraseBetween");
953 assert_eq!(result, Value::String("The quick dog".into()));
954 }
955
956 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
957 #[test]
958 fn eraseBetween_numeric_positions_default_inclusive() {
959 let result = erase_between_builtin(
960 Value::String("Edgar Allen Poe".into()),
961 Value::Num(6.0),
962 Value::Num(11.0),
963 Vec::new(),
964 )
965 .expect("eraseBetween");
966 assert_eq!(result, Value::String("Edgar Poe".into()));
967 }
968
969 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
970 #[test]
971 fn eraseBetween_numeric_positions_int_inputs() {
972 let result = erase_between_builtin(
973 Value::String("abcdef".into()),
974 Value::Int(IntValue::I32(2)),
975 Value::Int(IntValue::I32(5)),
976 Vec::new(),
977 )
978 .expect("eraseBetween");
979 assert_eq!(result, Value::String("af".into()));
980 }
981
982 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
983 #[test]
984 fn eraseBetween_numeric_positions_exclusive_option() {
985 let result = erase_between_builtin(
986 Value::String("small|medium|large".into()),
987 Value::Num(6.0),
988 Value::Num(13.0),
989 vec![
990 Value::String("Boundaries".into()),
991 Value::String("exclusive".into()),
992 ],
993 )
994 .expect("eraseBetween");
995 assert_eq!(result, Value::String("small||large".into()));
996 }
997
998 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
999 #[test]
1000 fn eraseBetween_start_not_found_returns_original() {
1001 let result = erase_between_builtin(
1002 Value::String("RunMat Accelerate".into()),
1003 Value::String("<".into()),
1004 Value::String(">".into()),
1005 Vec::new(),
1006 )
1007 .expect("eraseBetween");
1008 assert_eq!(result, Value::String("RunMat Accelerate".into()));
1009 }
1010
1011 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1012 #[test]
1013 fn eraseBetween_stop_not_found_returns_original() {
1014 let result = erase_between_builtin(
1015 Value::String("Device<GPU>".into()),
1016 Value::String("<".into()),
1017 Value::String(")".into()),
1018 Vec::new(),
1019 )
1020 .expect("eraseBetween");
1021 assert_eq!(result, Value::String("Device<GPU>".into()));
1022 }
1023
1024 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1025 #[test]
1026 fn eraseBetween_missing_string_propagates() {
1027 let strings = StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap();
1028 let result = erase_between_builtin(
1029 Value::StringArray(strings),
1030 Value::String("<".into()),
1031 Value::String(">".into()),
1032 Vec::new(),
1033 )
1034 .expect("eraseBetween");
1035 assert_eq!(
1036 result,
1037 Value::StringArray(StringArray::new(vec!["<missing>".into()], vec![1, 1]).unwrap())
1038 );
1039 }
1040
1041 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1042 #[test]
1043 fn eraseBetween_zero_sized_broadcast_produces_empty_array() {
1044 let start = StringArray::new(Vec::new(), vec![0, 1]).unwrap();
1045 let stop = StringArray::new(Vec::new(), vec![0, 1]).unwrap();
1046 let result = erase_between_builtin(
1047 Value::String("abc".into()),
1048 Value::StringArray(start),
1049 Value::StringArray(stop),
1050 Vec::new(),
1051 )
1052 .expect("eraseBetween");
1053 match result {
1054 Value::StringArray(sa) => {
1055 assert_eq!(sa.data.len(), 0);
1056 assert_eq!(sa.shape, vec![0, 1]);
1057 }
1058 other => panic!("expected string array, got {other:?}"),
1059 }
1060 }
1061
1062 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1063 #[test]
1064 fn eraseBetween_numeric_positions_array() {
1065 let text = StringArray::new(vec!["abcd".into(), "wxyz".into()], vec![2, 1]).unwrap();
1066 let start = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1067 let stop = Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap();
1068 let result = erase_between_builtin(
1069 Value::StringArray(text),
1070 Value::Tensor(start),
1071 Value::Tensor(stop),
1072 Vec::new(),
1073 )
1074 .expect("eraseBetween");
1075 match result {
1076 Value::StringArray(sa) => {
1077 assert_eq!(sa.data, vec!["d".to_string(), "w".to_string()]);
1078 assert_eq!(sa.shape, vec![2, 1]);
1079 }
1080 other => panic!("expected string array, got {other:?}"),
1081 }
1082 }
1083
1084 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1085 #[test]
1086 fn eraseBetween_cell_array_preserves_types() {
1087 let cell = CellArray::new(
1088 vec![
1089 Value::CharArray(CharArray::new_row("A[B]C")),
1090 Value::String("Planner<GPU>".into()),
1091 ],
1092 1,
1093 2,
1094 )
1095 .unwrap();
1096 let start = CellArray::new(
1097 vec![Value::String("[".into()), Value::String("<".into())],
1098 1,
1099 2,
1100 )
1101 .unwrap();
1102 let stop = CellArray::new(
1103 vec![Value::String("]".into()), Value::String(">".into())],
1104 1,
1105 2,
1106 )
1107 .unwrap();
1108 let result = erase_between_builtin(
1109 Value::Cell(cell),
1110 Value::Cell(start),
1111 Value::Cell(stop),
1112 vec![
1113 Value::String("Boundaries".into()),
1114 Value::String("inclusive".into()),
1115 ],
1116 )
1117 .expect("eraseBetween");
1118 match result {
1119 Value::Cell(out) => {
1120 let first = out.get(0, 0).unwrap();
1121 let second = out.get(0, 1).unwrap();
1122 assert_eq!(first, Value::CharArray(CharArray::new_row("AC")));
1123 assert_eq!(second, Value::String("Planner".into()));
1124 }
1125 other => panic!("expected cell array, got {other:?}"),
1126 }
1127 }
1128
1129 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1130 #[test]
1131 fn eraseBetween_char_array_default_and_inclusive() {
1132 let chars =
1133 CharArray::new("Device<GPU>".chars().collect(), 1, "Device<GPU>".len()).unwrap();
1134 let default = erase_between_builtin(
1135 Value::CharArray(chars.clone()),
1136 Value::String("<".into()),
1137 Value::String(">".into()),
1138 Vec::new(),
1139 )
1140 .expect("eraseBetween");
1141 match default {
1142 Value::CharArray(out) => {
1143 let text: String = out.data.iter().collect();
1144 assert_eq!(text.trim_end(), "Device<>");
1145 }
1146 other => panic!("expected char array, got {other:?}"),
1147 }
1148
1149 let inclusive = erase_between_builtin(
1150 Value::CharArray(chars),
1151 Value::String("<".into()),
1152 Value::String(">".into()),
1153 vec![
1154 Value::String("Boundaries".into()),
1155 Value::String("inclusive".into()),
1156 ],
1157 )
1158 .expect("eraseBetween");
1159 match inclusive {
1160 Value::CharArray(out) => {
1161 let text: String = out.data.iter().collect();
1162 assert_eq!(text.trim_end(), "Device");
1163 }
1164 other => panic!("expected char array, got {other:?}"),
1165 }
1166 }
1167
1168 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1169 #[test]
1170 fn eraseBetween_option_with_char_arrays_case_insensitive() {
1171 let result = erase_between_builtin(
1172 Value::String("A<mid>B".into()),
1173 Value::String("<".into()),
1174 Value::String(">".into()),
1175 vec![
1176 Value::CharArray(CharArray::new_row("Boundaries")),
1177 Value::CharArray(CharArray::new_row("INCLUSIVE")),
1178 ],
1179 )
1180 .expect("eraseBetween");
1181 assert_eq!(result, Value::String("AB".into()));
1182 }
1183
1184 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1185 #[test]
1186 fn eraseBetween_text_scalar_broadcast() {
1187 let text =
1188 StringArray::new(vec!["alpha[GPU]".into(), "beta[GPU]".into()], vec![2, 1]).unwrap();
1189 let result = erase_between_builtin(
1190 Value::StringArray(text),
1191 Value::String("[".into()),
1192 Value::String("]".into()),
1193 Vec::new(),
1194 )
1195 .expect("eraseBetween");
1196 match result {
1197 Value::StringArray(sa) => {
1198 assert_eq!(sa.data, vec!["alpha[]".to_string(), "beta[]".to_string()]);
1199 }
1200 other => panic!("expected string array, got {other:?}"),
1201 }
1202 }
1203
1204 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1205 #[test]
1206 fn eraseBetween_option_invalid_value() {
1207 let err = erase_between_builtin(
1208 Value::String("abc".into()),
1209 Value::String("a".into()),
1210 Value::String("c".into()),
1211 vec![
1212 Value::String("Boundaries".into()),
1213 Value::String("middle".into()),
1214 ],
1215 )
1216 .unwrap_err();
1217 assert_eq!(err.to_string(), ERASE_BETWEEN_ERROR_OPTION_VALUE.message);
1218 assert_eq!(
1219 err.identifier(),
1220 ERASE_BETWEEN_ERROR_OPTION_VALUE.identifier
1221 );
1222 }
1223
1224 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1225 #[test]
1226 fn eraseBetween_option_name_error() {
1227 let err = erase_between_builtin(
1228 Value::String("abc".into()),
1229 Value::String("a".into()),
1230 Value::String("c".into()),
1231 vec![
1232 Value::String("Padding".into()),
1233 Value::String("inclusive".into()),
1234 ],
1235 )
1236 .unwrap_err();
1237 assert_eq!(err.to_string(), ERASE_BETWEEN_ERROR_OPTION_NAME.message);
1238 assert_eq!(err.identifier(), ERASE_BETWEEN_ERROR_OPTION_NAME.identifier);
1239 }
1240
1241 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1242 #[test]
1243 fn eraseBetween_option_pair_error() {
1244 let err = erase_between_builtin(
1245 Value::String("abc".into()),
1246 Value::String("a".into()),
1247 Value::String("b".into()),
1248 vec![Value::String("Boundaries".into())],
1249 )
1250 .unwrap_err();
1251 assert_eq!(err.to_string(), ERASE_BETWEEN_ERROR_NAME_VALUE_PAIR.message);
1252 assert_eq!(
1253 err.identifier(),
1254 ERASE_BETWEEN_ERROR_NAME_VALUE_PAIR.identifier
1255 );
1256 }
1257
1258 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1259 #[test]
1260 fn eraseBetween_position_type_error() {
1261 let err = erase_between_builtin(
1262 Value::String("abc".into()),
1263 Value::Num(0.5),
1264 Value::Num(2.0),
1265 Vec::new(),
1266 )
1267 .unwrap_err();
1268 assert_eq!(err.to_string(), ERASE_BETWEEN_ERROR_POSITION_TYPE.message);
1269 assert_eq!(
1270 err.identifier(),
1271 ERASE_BETWEEN_ERROR_POSITION_TYPE.identifier
1272 );
1273 }
1274
1275 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1276 #[test]
1277 fn eraseBetween_mixed_boundary_error() {
1278 let err = erase_between_builtin(
1279 Value::String("abc".into()),
1280 Value::String("a".into()),
1281 Value::Num(3.0),
1282 Vec::new(),
1283 )
1284 .unwrap_err();
1285 assert_eq!(err.to_string(), ERASE_BETWEEN_ERROR_BOUNDARY_TYPE.message);
1286 assert_eq!(
1287 err.identifier(),
1288 ERASE_BETWEEN_ERROR_BOUNDARY_TYPE.identifier
1289 );
1290 }
1291
1292 #[test]
1293 fn erase_between_type_preserves_text() {
1294 assert_eq!(
1295 text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
1296 Type::String
1297 );
1298 }
1299}