1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::{Config, Range};
4use std::{io::Cursor, iter::Peekable, str::CharIndices, sync::Arc};
5
6type Input<'t> = Peekable<CharIndices<'t>>;
7
8fn is_box_char(c: char) -> bool {
12 matches!(
13 c,
14 '─' | '━' | '┄' | '┅' | '┈' | '┉' | '-' | '=' |
16 '│' | '┃' | '┆' | '┇' | '┊' | '┋' | '|' |
18 '+' | '├' | '┤' | '┬' | '┴' | '┼' | '┌' | '┐' | '└' | '┘'
20 )
21}
22
23#[derive(Clone)]
28pub struct DetectColumns;
29
30impl Command for DetectColumns {
31 fn name(&self) -> &str {
32 "detect columns"
33 }
34
35 fn signature(&self) -> Signature {
36 Signature::build("detect columns")
37 .named(
38 "skip",
39 SyntaxShape::Int,
40 "Number of rows to skip before detecting.",
41 Some('s'),
42 )
43 .input_output_types(vec![
44 (Type::String, Type::table()),
45 (Type::table(), Type::table()),
46 ])
47 .switch("no-headers", "Don't detect headers.", Some('n'))
48 .switch(
49 "ignore-box-chars",
50 "Ignore lines consisting entirely of box drawing characters and clean box characters from tokens.",
51 Some('i'),
52 )
53 .named(
54 "combine-columns",
55 SyntaxShape::Range,
56 "Columns to be combined; listed as a range.",
57 Some('c'),
58 )
59 .switch(
60 "guess",
61 "Detect columns by guessing width, it may be useful if default one doesn't work.",
62 None,
63 )
64 .category(Category::Strings)
65 }
66
67 fn description(&self) -> &str {
68 "Attempt to automatically split text into multiple columns."
69 }
70
71 fn search_terms(&self) -> Vec<&str> {
72 vec!["split", "tabular"]
73 }
74
75 fn examples(&self) -> Vec<Example<'_>> {
76 vec![
77 Example {
78 description: "use --guess if you find default algorithm not working",
79 example: r"
80'Filesystem 1K-blocks Used Available Use% Mounted on
81none 8150224 4 8150220 1% /mnt/c' | detect columns --guess",
82 result: Some(Value::test_list(vec![Value::test_record(record! {
83 "Filesystem" => Value::test_string("none"),
84 "1K-blocks" => Value::test_string("8150224"),
85 "Used" => Value::test_string("4"),
86 "Available" => Value::test_string("8150220"),
87 "Use%" => Value::test_string("1%"),
88 "Mounted on" => Value::test_string("/mnt/c")
89 })])),
90 },
91 Example {
92 description: "detect columns with no headers",
93 example: "'a b c' | detect columns --no-headers",
94 result: Some(Value::test_list(vec![Value::test_record(record! {
95 "column0" => Value::test_string("a"),
96 "column1" => Value::test_string("b"),
97 "column2" => Value::test_string("c"),
98 })])),
99 },
100 Example {
101 description: "",
102 example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 0..1 ",
103 result: None,
104 },
105 Example {
106 description: "Splits a multi-line string into columns with headers detected",
107 example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns -2..-1 ",
108 result: None,
109 },
110 Example {
111 description: "Splits a multi-line string into columns with headers detected",
112 example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 2.. ",
113 result: None,
114 },
115 Example {
116 description: "Parse external ls command and combine columns for datetime",
117 example: "^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7",
118 result: None,
119 },
120 Example {
121 description: "Table literal input is passed through unchanged",
122 example: "[[name, age]; [Alice, 25]] | detect columns",
123 result: Some(Value::test_list(vec![Value::test_record(record! {
124 "name" => Value::test_string("Alice"),
125 "age" => Value::test_int(25)
126 })])),
127 },
128 Example {
129 description: "List of records input is passed through unchanged",
130 example: "[{name: Alice, age: 25}, {name: Bob, age: 30}] | detect columns",
131 result: Some(Value::test_list(vec![
132 Value::test_record(record! {
133 "name" => Value::test_string("Alice"),
134 "age" => Value::test_int(25)
135 }),
136 Value::test_record(record! {
137 "name" => Value::test_string("Bob"),
138 "age" => Value::test_int(30)
139 }),
140 ])),
141 },
142 Example {
143 description: "Parse a box-bordered table by ignoring separator lines and using header positions",
144 example: r#""+-------+-------+
145| col1 | col2 |
146+-------+-------+
147| a | b |
148+-------+-------+" | detect columns --ignore-box-chars"#,
149 result: Some(Value::test_list(vec![Value::test_record(record! {
150 "col1" => Value::test_string("a"),
151 "col2" => Value::test_string("b"),
152 })])),
153 },
154 ]
155 }
156
157 fn is_const(&self) -> bool {
158 true
159 }
160
161 fn run(
162 &self,
163 engine_state: &EngineState,
164 stack: &mut Stack,
165 call: &Call,
166 input: PipelineData,
167 ) -> Result<PipelineData, ShellError> {
168 let num_rows_to_skip: Option<usize> = call.get_flag(engine_state, stack, "skip")?;
170 let noheader = call.has_flag(engine_state, stack, "no-headers")?;
171 let range: Option<Range> = call.get_flag(engine_state, stack, "combine-columns")?;
172 let ignore_box_chars = call.has_flag(engine_state, stack, "ignore-box-chars")?;
173 let config = stack.get_config(engine_state);
174
175 let args = Arguments {
176 noheader,
177 num_rows_to_skip,
178 range,
179 config,
180 ignore_box_chars,
181 };
182
183 if call.has_flag(engine_state, stack, "guess")? {
185 guess_width(engine_state, call, input, args)
186 } else {
187 detect_columns(engine_state, call, input, args)
188 }
189 }
190
191 fn run_const(
192 &self,
193 working_set: &StateWorkingSet,
194 call: &Call,
195 input: PipelineData,
196 ) -> Result<PipelineData, ShellError> {
197 let num_rows_to_skip: Option<usize> = call.get_flag_const(working_set, "skip")?;
198 let noheader = call.has_flag_const(working_set, "no-headers")?;
199 let range: Option<Range> = call.get_flag_const(working_set, "combine-columns")?;
200 let ignore_box_chars = call.has_flag_const(working_set, "ignore-box-chars")?;
201 let config = working_set.get_config().clone();
202
203 let args = Arguments {
204 noheader,
205 num_rows_to_skip,
206 range,
207 config,
208 ignore_box_chars,
209 };
210
211 if call.has_flag_const(working_set, "guess")? {
212 guess_width(working_set.permanent(), call, input, args)
213 } else {
214 detect_columns(working_set.permanent(), call, input, args)
215 }
216 }
217}
218
219struct Arguments {
220 num_rows_to_skip: Option<usize>,
221 noheader: bool,
222 range: Option<Range>,
223 config: Arc<Config>,
224 ignore_box_chars: bool,
225}
226
227fn guess_width(
228 engine_state: &EngineState,
229 call: &Call,
230 input: PipelineData,
231 args: Arguments,
232) -> Result<PipelineData, ShellError> {
233 use super::guess_width::GuessWidth;
234 let input_span = input.span().unwrap_or(call.head);
235
236 let mut input = input.collect_string("", &args.config)?;
237 if let Some(rows) = args.num_rows_to_skip {
238 input = input.lines().skip(rows).map(|x| x.to_string()).join("\n");
239 }
240
241 if args.ignore_box_chars {
243 let filtered_lines = filter_box_chars(input.lines().map(|s| s.to_string()));
244 input = filtered_lines.join("\n");
245 }
246
247 let mut guess_width = GuessWidth::new_reader(Box::new(Cursor::new(input)));
248
249 let result = guess_width.read_all();
250
251 if result.is_empty() {
252 return Ok(Value::nothing(input_span).into_pipeline_data());
253 }
254 if !args.noheader {
255 let columns = result[0].clone();
256 Ok(result
257 .into_iter()
258 .skip(1)
259 .map(move |s| {
260 let mut values: Vec<Value> = s
261 .into_iter()
262 .map(|v| Value::string(v, input_span))
263 .collect();
264 for _ in values.len()..columns.len() {
266 values.push(Value::string("", input_span));
267 }
268 let record =
269 Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
270 match record {
271 Ok(r) => match &args.range {
272 Some(range) => merge_record(r, range, input_span),
273 None => Value::record(r, input_span),
274 },
275 Err(e) => Value::error(e, input_span),
276 }
277 })
278 .into_pipeline_data(input_span, engine_state.signals().clone()))
279 } else {
280 let length = result[0].len();
281 let columns: Vec<String> = (0..length).map(|n| format!("column{n}")).collect();
282 Ok(result
283 .into_iter()
284 .map(move |s| {
285 let mut values: Vec<Value> = s
286 .into_iter()
287 .map(|v| Value::string(v, input_span))
288 .collect();
289 for _ in values.len()..columns.len() {
291 values.push(Value::string("", input_span));
292 }
293 let record =
294 Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
295 match record {
296 Ok(r) => match &args.range {
297 Some(range) => merge_record(r, range, input_span),
298 None => Value::record(r, input_span),
299 },
300 Err(e) => Value::error(e, input_span),
301 }
302 })
303 .into_pipeline_data(input_span, engine_state.signals().clone()))
304 }
305}
306
307fn detect_columns(
311 _engine_state: &EngineState,
312 call: &Call,
313 input: PipelineData,
314 args: Arguments,
315) -> Result<PipelineData, ShellError> {
316 let name_span = call.head;
317 let input_span = input.span().unwrap_or(Span::unknown());
318
319 match input {
321 PipelineData::Value(val, _) => {
323 if let Value::List { vals, .. } = &val
324 && vals.iter().all(|v| matches!(v, Value::Record { .. }))
325 {
326 return Ok(val.into_pipeline_data());
327 }
328 let input_str = val.coerce_str()?.to_string();
330 process_string_input(input_str, args, name_span, input_span)
331 }
332 PipelineData::ListStream(_, _) => Ok(input),
334 PipelineData::ByteStream(_, _) => {
336 let input_str = input.collect_string("", &args.config)?;
337 process_string_input(input_str, args, name_span, input_span)
338 }
339 PipelineData::Empty => Ok(PipelineData::empty()),
341 }
342}
343
344fn process_string_input(
346 input_str: String,
347 args: Arguments,
348 name_span: Span,
349 input_span: Span,
350) -> Result<PipelineData, ShellError> {
351 let lines_iter = input_str
353 .lines()
354 .skip(args.num_rows_to_skip.unwrap_or_default());
355
356 let filtered_lines: Vec<_> = if args.ignore_box_chars {
360 filter_box_chars(lines_iter.map(|s| s.to_string()))
361 } else {
362 lines_iter.map(|x| x.to_string()).collect()
364 };
365
366 let mut lines = filtered_lines.into_iter();
367 let header_line = lines.next();
368
369 if let Some(header_line) = header_line {
370 if args.ignore_box_chars {
371 process_with_box_filter(header_line, lines, args, name_span, input_span)
372 } else {
373 process_standard(header_line, lines, args, name_span, input_span)
374 }
375 } else {
376 Ok(PipelineData::empty())
377 }
378}
379
380fn process_with_box_filter(
383 header_line: String,
384 lines: impl Iterator<Item = String>,
385 args: Arguments,
386 name_span: Span,
387 input_span: Span,
388) -> Result<PipelineData, ShellError> {
389 let has_internal_separators = header_line.contains('|') || header_line.contains('│');
392
393 let (processed_headers, processed_lines): (String, Vec<String>) = if has_internal_separators {
394 let replace_separators = |s: &str| {
396 s.chars()
397 .map(|c| if c == '|' || c == '│' { ' ' } else { c })
398 .collect::<String>()
399 };
400 (
401 replace_separators(&header_line),
402 lines.map(|line| replace_separators(&line)).collect(),
403 )
404 } else {
405 (header_line.clone(), lines.collect())
407 };
408
409 if !has_internal_separators {
411 let header_positions = find_header_positions(&header_line);
412
413 if header_positions.is_empty() {
414 return Ok(PipelineData::empty());
415 }
416
417 let mut header_names: Vec<String> = header_positions
419 .iter()
420 .map(|(_, name)| name.clone())
421 .collect();
422
423 if args.noheader {
424 for (i, name) in header_names.iter_mut().enumerate() {
425 *name = format!("column{i}");
426 }
427 }
428
429 check_duplicate_string_headers(&header_names, input_span, name_span)?;
431
432 let all_lines: Vec<_> = args
434 .noheader
435 .then_some(header_line.clone())
436 .into_iter()
437 .chain(processed_lines)
438 .collect();
439
440 return Ok(Value::list(
441 all_lines
442 .into_iter()
443 .map(|line| {
444 let values = split_line_by_positions(&line, &header_positions);
445 let mut record = Record::new();
446
447 for (header, val) in header_names.iter().zip(values.iter()) {
448 record.push(header, Value::string(val, name_span));
449 }
450
451 for header in header_names.iter().skip(values.len()) {
453 record.push(header, Value::string("", name_span));
454 }
455
456 Ok::<Value, ShellError>(match &args.range {
457 Some(range) => merge_record(record, range, name_span),
458 None => Value::record(record, name_span),
459 })
460 })
461 .collect::<Result<Vec<_>, _>>()?,
462 name_span,
463 )
464 .into_pipeline_data());
465 }
466
467 let mut headers = find_columns(&processed_headers);
469
470 if args.noheader {
471 for header in headers.iter_mut().enumerate() {
472 header.1.item = format!("column{}", header.0);
473 }
474 }
475
476 check_duplicate_headers(&headers, input_span, name_span)?;
478
479 let all_lines: Vec<_> = args
481 .noheader
482 .then_some(processed_headers.clone())
483 .into_iter()
484 .chain(processed_lines)
485 .collect();
486
487 Ok(Value::list(
488 all_lines
489 .into_iter()
490 .map(|line| {
491 let row = find_columns(&line);
492 let mut record = Record::new();
493
494 for (header, val) in headers.iter().zip(row.iter()) {
495 record.push(&header.item, Value::string(&val.item, name_span));
496 }
497
498 for header in headers.iter().skip(row.len()) {
500 record.push(&header.item, Value::string("", name_span));
501 }
502
503 Ok::<Value, ShellError>(match &args.range {
504 Some(range) => merge_record(record, range, name_span),
505 None => Value::record(record, name_span),
506 })
507 })
508 .collect::<Result<Vec<_>, _>>()?,
509 name_span,
510 )
511 .into_pipeline_data())
512}
513
514fn process_standard(
516 header_line: String,
517 lines: impl Iterator<Item = String>,
518 args: Arguments,
519 name_span: Span,
520 input_span: Span,
521) -> Result<PipelineData, ShellError> {
522 let mut headers = find_columns(&header_line);
524
525 if args.noheader {
526 for header in headers.iter_mut().enumerate() {
527 header.1.item = format!("column{}", header.0);
528 }
529 }
530
531 check_duplicate_headers(&headers, input_span, name_span)?;
533
534 let remaining_lines: Vec<_> = lines.collect();
536
537 let detection_failed = remaining_lines
541 .first()
542 .is_some_and(|first_line| find_columns(first_line).len() != headers.len());
543
544 let all_lines: Vec<_> = if detection_failed {
547 std::iter::once(header_line.clone())
549 .chain(remaining_lines)
550 .collect()
551 } else {
552 args.noheader
554 .then_some(header_line.clone())
555 .into_iter()
556 .chain(remaining_lines)
557 .collect()
558 };
559
560 Ok(Value::list(
561 all_lines
562 .into_iter()
563 .map(move |x| {
564 let row = find_columns(&x);
565
566 let mut record = Record::new();
567
568 if !detection_failed && headers.len() == row.len() {
569 for (header, val) in headers.iter().zip(row.iter()) {
570 record.push(&header.item, Value::string(&val.item, name_span));
571 }
572 } else {
573 record.push("data", Value::string(&x, name_span));
575 }
576
577 Ok::<Value, ShellError>(match &args.range {
578 Some(range) => merge_record(record, range, name_span),
579 None => Value::record(record, name_span),
580 })
581 })
582 .collect::<Result<Vec<_>, _>>()?,
583 name_span,
584 )
585 .into_pipeline_data())
586}
587
588pub fn find_columns(input: &str) -> Vec<Spanned<String>> {
589 let mut chars = input.char_indices().peekable();
591 let mut output = vec![];
592
593 while let Some((_, c)) = chars.peek() {
594 if c.is_whitespace() {
595 let _ = chars.next();
597 } else {
598 let result = baseline(&mut chars);
600 output.push(result);
601 }
602 }
603
604 output
605}
606
607fn has_duplicate_names<I, S>(iter: I) -> bool
615where
616 I: IntoIterator<Item = S>,
617 S: AsRef<str>,
618{
619 let mut set = std::collections::HashSet::new();
620 for item in iter {
621 let s = item.as_ref();
622 if !set.insert(s.to_string()) {
623 return true;
624 }
625 }
626 false
627}
628
629fn check_duplicate_headers(
631 headers: &[Spanned<String>],
632 input_span: Span,
633 name_span: Span,
634) -> Result<(), ShellError> {
635 if has_duplicate_names(headers.iter().map(|h| &h.item)) {
636 Err(ShellError::ColumnDetectionFailure {
637 bad_value: input_span,
638 failure_site: name_span,
639 })
640 } else {
641 Ok(())
642 }
643}
644
645fn check_duplicate_string_headers(
647 headers: &[String],
648 input_span: Span,
649 name_span: Span,
650) -> Result<(), ShellError> {
651 if has_duplicate_names(headers.iter().map(|s| s.as_str())) {
652 Err(ShellError::ColumnDetectionFailure {
653 bad_value: input_span,
654 failure_site: name_span,
655 })
656 } else {
657 Ok(())
658 }
659}
660
661fn filter_box_chars<I>(lines_iter: I) -> Vec<String>
664where
665 I: Iterator<Item = String>,
666{
667 lines_iter
668 .filter(|r| !r.trim().chars().all(is_box_char))
670 .map(|line| {
672 let trimmed = line.trim();
673 let cleaned = trimmed
675 .strip_prefix('|')
676 .or_else(|| trimmed.strip_prefix('│'))
677 .unwrap_or(trimmed);
678 let cleaned = cleaned.strip_prefix(' ').unwrap_or(cleaned);
679 let cleaned = cleaned
681 .strip_suffix('|')
682 .or_else(|| cleaned.strip_suffix('│'))
683 .unwrap_or(cleaned);
684 let cleaned = cleaned.strip_suffix(' ').unwrap_or(cleaned);
685 cleaned.to_string()
686 })
687 .collect()
688}
689
690fn find_header_positions(header_line: &str) -> Vec<(usize, String)> {
693 let mut positions = vec![];
694 let mut in_word = false;
695 let mut word_start = 0;
696 let mut current_word = String::new();
697
698 for (idx, c) in header_line.char_indices() {
699 if c.is_whitespace() {
700 if in_word {
701 positions.push((word_start, current_word.clone()));
703 current_word.clear();
704 in_word = false;
705 }
706 } else {
707 if !in_word {
708 word_start = idx;
710 in_word = true;
711 }
712 current_word.push(c);
713 }
714 }
715
716 if in_word && !current_word.is_empty() {
718 positions.push((word_start, current_word));
719 }
720
721 positions
722}
723
724#[inline]
733fn adjust_char_boundary(s: &str, idx: usize, backward: bool) -> usize {
734 if s.is_char_boundary(idx) {
735 return idx;
736 }
737
738 if backward {
739 (0..idx).rev().find(|&i| s.is_char_boundary(i)).unwrap_or(0)
740 } else {
741 (idx..=s.len())
742 .find(|&i| s.is_char_boundary(i))
743 .unwrap_or(s.len())
744 }
745}
746
747fn safe_slice_range(line: &str, start: usize, end: usize, prev_end: usize) -> (usize, usize) {
751 let line_len = line.len();
752 let actual_end = end.min(line_len);
753
754 let mut safe_start = adjust_char_boundary(line, start, true);
755 if safe_start < prev_end {
756 safe_start = prev_end;
757 }
758
759 let mut safe_end = adjust_char_boundary(line, actual_end, false);
760 if safe_end < safe_start {
761 safe_end = safe_start;
762 }
763
764 (safe_start, safe_end)
765}
766
767fn split_line_by_positions(line: &str, positions: &[(usize, String)]) -> Vec<String> {
776 if positions.is_empty() {
777 return vec![line.to_string()];
778 }
779
780 let mut values = vec![];
781 let line_len = line.len();
782
783 let mut prev_end = 0;
784 for (i, (start, _)) in positions.iter().enumerate() {
785 let start = *start;
786 let end = if i + 1 < positions.len() {
787 positions[i + 1].0
788 } else {
789 line_len
790 };
791
792 if start < line_len {
793 let (safe_start, safe_end) = safe_slice_range(line, start, end, prev_end);
794 let value = &line[safe_start..safe_end];
795 values.push(value.trim().to_string());
796 prev_end = safe_end;
797 } else {
798 values.push(String::new());
799 }
800 }
801
802 values
803}
804
805#[derive(Clone, Copy)]
806enum BlockKind {
807 Parenthesis,
808 Brace,
809 Bracket,
810}
811
812fn baseline(src: &mut Input) -> Spanned<String> {
818 let mut token_contents = String::new();
819
820 let start_offset = if let Some((pos, _)) = src.peek() {
821 *pos
822 } else {
823 0
824 };
825
826 let mut quote_start: Option<char> = None;
830
831 let mut block_level: Vec<BlockKind> = vec![];
833
834 fn is_termination(block_level: &[BlockKind], c: char) -> bool {
838 block_level.is_empty() && (c.is_whitespace())
839 }
840
841 while let Some((_, c)) = src.peek() {
853 let c = *c;
854
855 if quote_start.is_some() {
856 if Some(c) == quote_start {
859 quote_start = None;
860 }
861 } else if c == '\n' {
862 if is_termination(&block_level, c) {
863 break;
864 }
865 } else if c == '\'' || c == '"' || c == '`' {
866 quote_start = Some(c);
868 } else if c == '[' {
869 block_level.push(BlockKind::Bracket);
871 } else if c == ']' {
872 if let Some(BlockKind::Bracket) = block_level.last() {
875 let _ = block_level.pop();
876 }
877 } else if c == '{' {
878 block_level.push(BlockKind::Brace);
880 } else if c == '}' {
881 if let Some(BlockKind::Brace) = block_level.last() {
883 let _ = block_level.pop();
884 }
885 } else if c == '(' {
886 block_level.push(BlockKind::Parenthesis);
888 } else if c == ')' {
889 if let Some(BlockKind::Parenthesis) = block_level.last() {
891 let _ = block_level.pop();
892 }
893 } else if is_termination(&block_level, c) {
894 break;
895 }
896
897 token_contents.push(c);
899
900 let _ = src.next();
902 }
903
904 let span = Span::new(start_offset, start_offset + token_contents.len());
905
906 if block_level.last().is_some() {
909 return Spanned {
917 item: token_contents,
918 span,
919 };
920 }
921
922 if quote_start.is_some() {
923 return Spanned {
933 item: token_contents,
934 span,
935 };
936 }
937
938 Spanned {
939 item: token_contents,
940 span,
941 }
942}
943
944fn merge_record(record: Record, range: &Range, input_span: Span) -> Value {
945 let (start_index, end_index) = match process_range(range, record.len(), input_span) {
946 Ok(Some((l_idx, r_idx))) => (l_idx, r_idx),
947 Ok(None) => return Value::record(record, input_span),
948 Err(e) => return Value::error(e, input_span),
949 };
950
951 match merge_record_impl(record, start_index, end_index, input_span) {
952 Ok(rec) => Value::record(rec, input_span),
953 Err(err) => Value::error(err, input_span),
954 }
955}
956
957fn process_range(
958 range: &Range,
959 length: usize,
960 input_span: Span,
961) -> Result<Option<(usize, usize)>, ShellError> {
962 match nu_cmd_base::util::process_range(range) {
963 Ok((l_idx, r_idx)) => {
964 let l_idx = if l_idx < 0 {
965 length as isize + l_idx
966 } else {
967 l_idx
968 };
969
970 let r_idx = if r_idx < 0 {
971 length as isize + r_idx
972 } else {
973 r_idx
974 };
975
976 if !(l_idx <= r_idx && (r_idx >= 0 || l_idx < (length as isize))) {
977 return Ok(None);
978 }
979
980 Ok(Some((
981 l_idx.max(0) as usize,
982 (r_idx as usize + 1).min(length),
983 )))
984 }
985 Err(processing_error) => Err(processing_error("could not find range index", input_span)),
986 }
987}
988
989fn merge_record_impl(
990 record: Record,
991 start_index: usize,
992 end_index: usize,
993 input_span: Span,
994) -> Result<Record, ShellError> {
995 let (mut cols, mut vals): (Vec<_>, Vec<_>) = record.into_iter().unzip();
996 ((start_index + 1)..(cols.len() - end_index + start_index + 1)).for_each(|idx| {
998 cols.swap(idx, end_index - start_index - 1 + idx);
999 });
1000 cols.truncate(cols.len() - end_index + start_index + 1);
1001
1002 let combined = vals
1004 .iter()
1005 .take(end_index)
1006 .skip(start_index)
1007 .map(|v| v.coerce_str().unwrap_or_default())
1008 .join(" ");
1009 let binding = Value::string(combined, Span::unknown());
1010 let last_seg = vals.split_off(end_index);
1011 vals.truncate(start_index);
1012 vals.push(binding);
1013 vals.extend(last_seg);
1014
1015 Record::from_raw_cols_vals(cols, vals, Span::unknown(), input_span)
1016}
1017
1018#[cfg(test)]
1019mod test {
1020 use super::*;
1021
1022 #[test]
1023 fn test_examples() {
1024 crate::test_examples(DetectColumns)
1025 }
1026
1027 #[test]
1032 fn split_line_by_positions_multibyte_boundary() {
1033 let line = "a…b";
1035 assert!(!line.is_char_boundary(2));
1036
1037 let positions = vec![(0, "a".to_string()), (2, "b".to_string())];
1039
1040 let cols = split_line_by_positions(line, &positions);
1041 assert_eq!(cols, vec!["a…".to_string(), "b".to_string()]);
1045 }
1046
1047 #[test]
1048 fn split_line_with_various_unicode() {
1049 let positions = find_header_positions("a b c");
1051
1052 let examples = [
1053 "x é y", "x 😄 y", "x 👨👩👧👦 y", "x 中 y", "x a\u{0301} y", ];
1059
1060 for &line in examples.iter() {
1061 let cols = split_line_by_positions(line, &positions);
1065 assert_eq!(cols.len(), 3, "line produced wrong column count: {}", line);
1066 }
1067 }
1068}