nu_command/strings/
detect_columns.rs

1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::{Config, Range};
4use std::{io::Cursor, iter::Peekable, str::CharIndices, sync::Arc};
5
6type Input<'t> = Peekable<CharIndices<'t>>;
7
8/// Helper function to check if a character is a box drawing character.
9/// Includes Unicode box drawing symbols (horizontal, vertical, intersections, corners)
10/// as well as ASCII equivalents like '-' and '|'.
11fn is_box_char(c: char) -> bool {
12    matches!(
13        c,
14        // Horizontal box drawing characters (Unicode and ASCII)
15        '─' | '━' | '┄' | '┅' | '┈' | '┉' | '-' | '=' |
16        // Vertical box drawing characters (Unicode and ASCII)
17        '│' | '┃' | '┆' | '┇' | '┊' | '┋' | '|' |
18        // Box intersection and corner characters
19        '+' | '├' | '┤' | '┬' | '┴' | '┼' | '┌' | '┐' | '└' | '┘'
20    )
21}
22
23/// Attempts to automatically split text into multiple columns.
24///
25/// This command parses tabular data from strings or passes through existing tables.
26/// When `--ignore-box-chars` is used, it ignores separator lines and cleans box drawing characters from tokens.
27#[derive(Clone)]
28pub struct DetectColumns;
29
30impl Command for DetectColumns {
31    fn name(&self) -> &str {
32        "detect columns"
33    }
34
35    fn signature(&self) -> Signature {
36        Signature::build("detect columns")
37            .named(
38                "skip",
39                SyntaxShape::Int,
40                "number of rows to skip before detecting",
41                Some('s'),
42            )
43            .input_output_types(vec![
44                (Type::String, Type::table()),
45                (Type::table(), Type::table()),
46            ])
47            .switch("no-headers", "don't detect headers", Some('n'))
48            .switch(
49                "ignore-box-chars",
50                "ignore lines consisting entirely of box drawing characters and clean box characters from tokens",
51                Some('i'),
52            )
53            .named(
54                "combine-columns",
55                SyntaxShape::Range,
56                "columns to be combined; listed as a range",
57                Some('c'),
58            )
59            .switch(
60                "guess",
61                "detect columns by guessing width, it may be useful if default one doesn't work",
62                None,
63            )
64            .category(Category::Strings)
65    }
66
67    fn description(&self) -> &str {
68        "Attempt to automatically split text into multiple columns."
69    }
70
71    fn search_terms(&self) -> Vec<&str> {
72        vec!["split", "tabular"]
73    }
74
75    fn examples(&self) -> Vec<Example<'_>> {
76        vec![
77            Example {
78                description: "use --guess if you find default algorithm not working",
79                example: r"
80'Filesystem     1K-blocks      Used Available Use% Mounted on
81none             8150224         4   8150220   1% /mnt/c' | detect columns --guess",
82                result: Some(Value::test_list(vec![Value::test_record(record! {
83                    "Filesystem" => Value::test_string("none"),
84                    "1K-blocks" => Value::test_string("8150224"),
85                    "Used" => Value::test_string("4"),
86                    "Available" => Value::test_string("8150220"),
87                    "Use%" => Value::test_string("1%"),
88                    "Mounted on" => Value::test_string("/mnt/c")
89                })])),
90            },
91            Example {
92                description: "detect columns with no headers",
93                example: "'a b c' | detect columns  --no-headers",
94                result: Some(Value::test_list(vec![Value::test_record(record! {
95                        "column0" => Value::test_string("a"),
96                        "column1" => Value::test_string("b"),
97                        "column2" => Value::test_string("c"),
98                })])),
99            },
100            Example {
101                description: "",
102                example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 0..1 ",
103                result: None,
104            },
105            Example {
106                description: "Splits a multi-line string into columns with headers detected",
107                example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns -2..-1 ",
108                result: None,
109            },
110            Example {
111                description: "Splits a multi-line string into columns with headers detected",
112                example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 2.. ",
113                result: None,
114            },
115            Example {
116                description: "Parse external ls command and combine columns for datetime",
117                example: "^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7",
118                result: None,
119            },
120            Example {
121                description: "Table literal input is passed through unchanged",
122                example: "[[name, age]; [Alice, 25]] | detect columns",
123                result: Some(Value::test_list(vec![Value::test_record(record! {
124                    "name" => Value::test_string("Alice"),
125                    "age" => Value::test_int(25)
126                })])),
127            },
128            Example {
129                description: "List of records input is passed through unchanged",
130                example: "[{name: Alice, age: 25}, {name: Bob, age: 30}] | detect columns",
131                result: Some(Value::test_list(vec![
132                    Value::test_record(record! {
133                        "name" => Value::test_string("Alice"),
134                        "age" => Value::test_int(25)
135                    }),
136                    Value::test_record(record! {
137                        "name" => Value::test_string("Bob"),
138                        "age" => Value::test_int(30)
139                    }),
140                ])),
141            },
142            Example {
143                description: "Parse a box-bordered table by ignoring separator lines and using header positions",
144                example: r#""+-------+-------+
145| col1  | col2  |
146+-------+-------+
147| a     | b     |
148+-------+-------+" | detect columns --ignore-box-chars"#,
149                result: Some(Value::test_list(vec![Value::test_record(record! {
150                    "col1" => Value::test_string("a"),
151                    "col2" => Value::test_string("b"),
152                })])),
153            },
154        ]
155    }
156
157    fn is_const(&self) -> bool {
158        true
159    }
160
161    fn run(
162        &self,
163        engine_state: &EngineState,
164        stack: &mut Stack,
165        call: &Call,
166        input: PipelineData,
167    ) -> Result<PipelineData, ShellError> {
168        // Extract command arguments
169        let num_rows_to_skip: Option<usize> = call.get_flag(engine_state, stack, "skip")?;
170        let noheader = call.has_flag(engine_state, stack, "no-headers")?;
171        let range: Option<Range> = call.get_flag(engine_state, stack, "combine-columns")?;
172        let ignore_box_chars = call.has_flag(engine_state, stack, "ignore-box-chars")?;
173        let config = stack.get_config(engine_state);
174
175        let args = Arguments {
176            noheader,
177            num_rows_to_skip,
178            range,
179            config,
180            ignore_box_chars,
181        };
182
183        // Dispatch to appropriate implementation based on guess flag
184        if call.has_flag(engine_state, stack, "guess")? {
185            guess_width(engine_state, call, input, args)
186        } else {
187            detect_columns(engine_state, call, input, args)
188        }
189    }
190
191    fn run_const(
192        &self,
193        working_set: &StateWorkingSet,
194        call: &Call,
195        input: PipelineData,
196    ) -> Result<PipelineData, ShellError> {
197        let num_rows_to_skip: Option<usize> = call.get_flag_const(working_set, "skip")?;
198        let noheader = call.has_flag_const(working_set, "no-headers")?;
199        let range: Option<Range> = call.get_flag_const(working_set, "combine-columns")?;
200        let ignore_box_chars = call.has_flag_const(working_set, "ignore-box-chars")?;
201        let config = working_set.get_config().clone();
202
203        let args = Arguments {
204            noheader,
205            num_rows_to_skip,
206            range,
207            config,
208            ignore_box_chars,
209        };
210
211        if call.has_flag_const(working_set, "guess")? {
212            guess_width(working_set.permanent(), call, input, args)
213        } else {
214            detect_columns(working_set.permanent(), call, input, args)
215        }
216    }
217}
218
219struct Arguments {
220    num_rows_to_skip: Option<usize>,
221    noheader: bool,
222    range: Option<Range>,
223    config: Arc<Config>,
224    ignore_box_chars: bool,
225}
226
227fn guess_width(
228    engine_state: &EngineState,
229    call: &Call,
230    input: PipelineData,
231    args: Arguments,
232) -> Result<PipelineData, ShellError> {
233    use super::guess_width::GuessWidth;
234    let input_span = input.span().unwrap_or(call.head);
235
236    let mut input = input.collect_string("", &args.config)?;
237    if let Some(rows) = args.num_rows_to_skip {
238        input = input.lines().skip(rows).map(|x| x.to_string()).join("\n");
239    }
240
241    // Apply box character filtering if requested
242    if args.ignore_box_chars {
243        let filtered_lines = filter_box_chars(input.lines().map(|s| s.to_string()));
244        input = filtered_lines.join("\n");
245    }
246
247    let mut guess_width = GuessWidth::new_reader(Box::new(Cursor::new(input)));
248
249    let result = guess_width.read_all();
250
251    if result.is_empty() {
252        return Ok(Value::nothing(input_span).into_pipeline_data());
253    }
254    if !args.noheader {
255        let columns = result[0].clone();
256        Ok(result
257            .into_iter()
258            .skip(1)
259            .map(move |s| {
260                let mut values: Vec<Value> = s
261                    .into_iter()
262                    .map(|v| Value::string(v, input_span))
263                    .collect();
264                // some rows may has less columns, fill it with ""
265                for _ in values.len()..columns.len() {
266                    values.push(Value::string("", input_span));
267                }
268                let record =
269                    Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
270                match record {
271                    Ok(r) => match &args.range {
272                        Some(range) => merge_record(r, range, input_span),
273                        None => Value::record(r, input_span),
274                    },
275                    Err(e) => Value::error(e, input_span),
276                }
277            })
278            .into_pipeline_data(input_span, engine_state.signals().clone()))
279    } else {
280        let length = result[0].len();
281        let columns: Vec<String> = (0..length).map(|n| format!("column{n}")).collect();
282        Ok(result
283            .into_iter()
284            .map(move |s| {
285                let mut values: Vec<Value> = s
286                    .into_iter()
287                    .map(|v| Value::string(v, input_span))
288                    .collect();
289                // some rows may has less columns, fill it with ""
290                for _ in values.len()..columns.len() {
291                    values.push(Value::string("", input_span));
292                }
293                let record =
294                    Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
295                match record {
296                    Ok(r) => match &args.range {
297                        Some(range) => merge_record(r, range, input_span),
298                        None => Value::record(r, input_span),
299                    },
300                    Err(e) => Value::error(e, input_span),
301                }
302            })
303            .into_pipeline_data(input_span, engine_state.signals().clone()))
304    }
305}
306
307/// Core function to detect columns from input data.
308/// Handles different input types: passes through tables, parses strings.
309/// Applies filtering and cleaning based on the ignore_box_chars flag.
310fn detect_columns(
311    _engine_state: &EngineState,
312    call: &Call,
313    input: PipelineData,
314    args: Arguments,
315) -> Result<PipelineData, ShellError> {
316    let name_span = call.head;
317    let input_span = input.span().unwrap_or(Span::unknown());
318
319    // Handle different input types
320    match input {
321        // If input is already a table (list of records), pass it through unchanged
322        PipelineData::Value(val, _) => {
323            if let Value::List { vals, .. } = &val
324                && vals.iter().all(|v| matches!(v, Value::Record { .. }))
325            {
326                return Ok(val.into_pipeline_data());
327            }
328            // Otherwise, coerce to string for parsing
329            let input_str = val.coerce_str()?.to_string();
330            process_string_input(input_str, args, name_span, input_span)
331        }
332        // Table streams are passed through directly
333        PipelineData::ListStream(_, _) => Ok(input),
334        // External command output is collected as string
335        PipelineData::ByteStream(_, _) => {
336            let input_str = input.collect_string("", &args.config)?;
337            process_string_input(input_str, args, name_span, input_span)
338        }
339        // Empty input yields empty string
340        PipelineData::Empty => Ok(PipelineData::empty()),
341    }
342}
343
344/// Process string input for column detection.
345fn process_string_input(
346    input_str: String,
347    args: Arguments,
348    name_span: Span,
349    input_span: Span,
350) -> Result<PipelineData, ShellError> {
351    // Split input string into lines and skip the specified number of rows
352    let lines_iter = input_str
353        .lines()
354        .skip(args.num_rows_to_skip.unwrap_or_default());
355
356    // Conditionally filter out lines consisting entirely of box drawing characters
357    // and clean box characters from the remaining lines
358    // This helps clean up tabular output from commands like `iptab` that use box drawings
359    let filtered_lines: Vec<_> = if args.ignore_box_chars {
360        filter_box_chars(lines_iter.map(|s| s.to_string()))
361    } else {
362        // No filtering: pass through all lines as-is
363        lines_iter.map(|x| x.to_string()).collect()
364    };
365
366    let mut lines = filtered_lines.into_iter();
367    let header_line = lines.next();
368
369    if let Some(header_line) = header_line {
370        if args.ignore_box_chars {
371            process_with_box_filter(header_line, lines, args, name_span, input_span)
372        } else {
373            process_standard(header_line, lines, args, name_span, input_span)
374        }
375    } else {
376        Ok(PipelineData::empty())
377    }
378}
379
380/// Process input when ignore_box_chars is enabled.
381/// Handles both position-based and whitespace-based splitting depending on table format.
382fn process_with_box_filter(
383    header_line: String,
384    lines: impl Iterator<Item = String>,
385    args: Arguments,
386    name_span: Span,
387    input_span: Span,
388) -> Result<PipelineData, ShellError> {
389    // Check if the header line contains internal | separators
390    // If so, replace them with spaces so whitespace-based detection works
391    let has_internal_separators = header_line.contains('|') || header_line.contains('│');
392
393    let (processed_headers, processed_lines): (String, Vec<String>) = if has_internal_separators {
394        // Replace internal | with spaces for whitespace-based splitting
395        let replace_separators = |s: &str| {
396            s.chars()
397                .map(|c| if c == '|' || c == '│' { ' ' } else { c })
398                .collect::<String>()
399        };
400        (
401            replace_separators(&header_line),
402            lines.map(|line| replace_separators(&line)).collect(),
403        )
404    } else {
405        // No internal separators - use position-based splitting
406        (header_line.clone(), lines.collect())
407    };
408
409    // Use position-based splitting for tables without internal separators (like iptab)
410    if !has_internal_separators {
411        let header_positions = find_header_positions(&header_line);
412
413        if header_positions.is_empty() {
414            return Ok(PipelineData::empty());
415        }
416
417        // Extract header names
418        let mut header_names: Vec<String> = header_positions
419            .iter()
420            .map(|(_, name)| name.clone())
421            .collect();
422
423        if args.noheader {
424            for (i, name) in header_names.iter_mut().enumerate() {
425                *name = format!("column{i}");
426            }
427        }
428
429        // Check for duplicate column names
430        check_duplicate_string_headers(&header_names, input_span, name_span)?;
431
432        // Collect all lines for processing
433        let all_lines: Vec<_> = args
434            .noheader
435            .then_some(header_line.clone())
436            .into_iter()
437            .chain(processed_lines)
438            .collect();
439
440        return Ok(Value::list(
441            all_lines
442                .into_iter()
443                .map(|line| {
444                    let values = split_line_by_positions(&line, &header_positions);
445                    let mut record = Record::new();
446
447                    for (header, val) in header_names.iter().zip(values.iter()) {
448                        record.push(header, Value::string(val, name_span));
449                    }
450
451                    // Fill in missing columns with empty strings
452                    for header in header_names.iter().skip(values.len()) {
453                        record.push(header, Value::string("", name_span));
454                    }
455
456                    Ok::<Value, ShellError>(match &args.range {
457                        Some(range) => merge_record(record, range, name_span),
458                        None => Value::record(record, name_span),
459                    })
460                })
461                .collect::<Result<Vec<_>, _>>()?,
462            name_span,
463        )
464        .into_pipeline_data());
465    }
466
467    // Tables with internal separators: use whitespace-based splitting on processed data
468    let mut headers = find_columns(&processed_headers);
469
470    if args.noheader {
471        for header in headers.iter_mut().enumerate() {
472            header.1.item = format!("column{}", header.0);
473        }
474    }
475
476    // Check for duplicate column names
477    check_duplicate_headers(&headers, input_span, name_span)?;
478
479    // Collect all lines for processing
480    let all_lines: Vec<_> = args
481        .noheader
482        .then_some(processed_headers.clone())
483        .into_iter()
484        .chain(processed_lines)
485        .collect();
486
487    Ok(Value::list(
488        all_lines
489            .into_iter()
490            .map(|line| {
491                let row = find_columns(&line);
492                let mut record = Record::new();
493
494                for (header, val) in headers.iter().zip(row.iter()) {
495                    record.push(&header.item, Value::string(&val.item, name_span));
496                }
497
498                // Fill in missing columns with empty strings
499                for header in headers.iter().skip(row.len()) {
500                    record.push(&header.item, Value::string("", name_span));
501                }
502
503                Ok::<Value, ShellError>(match &args.range {
504                    Some(range) => merge_record(record, range, name_span),
505                    None => Value::record(record, name_span),
506                })
507            })
508            .collect::<Result<Vec<_>, _>>()?,
509        name_span,
510    )
511    .into_pipeline_data())
512}
513
514/// Process input with standard whitespace-based column detection.
515fn process_standard(
516    header_line: String,
517    lines: impl Iterator<Item = String>,
518    args: Arguments,
519    name_span: Span,
520    input_span: Span,
521) -> Result<PipelineData, ShellError> {
522    // Standard whitespace-based column detection
523    let mut headers = find_columns(&header_line);
524
525    if args.noheader {
526        for header in headers.iter_mut().enumerate() {
527            header.1.item = format!("column{}", header.0);
528        }
529    }
530
531    // Check for duplicate column names - this would create an invalid record
532    check_duplicate_headers(&headers, input_span, name_span)?;
533
534    // Collect remaining lines
535    let remaining_lines: Vec<_> = lines.collect();
536
537    // Check if column detection is working: if the first data row doesn't match
538    // the header structure, detection has failed and we should output all lines
539    // in a consistent "data" column to preserve the original data.
540    let detection_failed = remaining_lines
541        .first()
542        .is_some_and(|first_line| find_columns(first_line).len() != headers.len());
543
544    // When detection fails, include ALL original lines (including the first "header" line)
545    // When detection succeeds, only include header line if --no-headers was specified
546    let all_lines: Vec<_> = if detection_failed {
547        // Include the original first line since detection failed
548        std::iter::once(header_line.clone())
549            .chain(remaining_lines)
550            .collect()
551    } else {
552        // Detection succeeded - only include first line if --no-headers
553        args.noheader
554            .then_some(header_line.clone())
555            .into_iter()
556            .chain(remaining_lines)
557            .collect()
558    };
559
560    Ok(Value::list(
561        all_lines
562            .into_iter()
563            .map(move |x| {
564                let row = find_columns(&x);
565
566                let mut record = Record::new();
567
568                if !detection_failed && headers.len() == row.len() {
569                    for (header, val) in headers.iter().zip(row.iter()) {
570                        record.push(&header.item, Value::string(&val.item, name_span));
571                    }
572                } else {
573                    // Output the raw data - either detection failed or row doesn't match
574                    record.push("data", Value::string(&x, name_span));
575                }
576
577                Ok::<Value, ShellError>(match &args.range {
578                    Some(range) => merge_record(record, range, name_span),
579                    None => Value::record(record, name_span),
580                })
581            })
582            .collect::<Result<Vec<_>, _>>()?,
583        name_span,
584    )
585    .into_pipeline_data())
586}
587
588pub fn find_columns(input: &str) -> Vec<Spanned<String>> {
589    // For space-separated format, use the original baseline method
590    let mut chars = input.char_indices().peekable();
591    let mut output = vec![];
592
593    while let Some((_, c)) = chars.peek() {
594        if c.is_whitespace() {
595            // If the next character is non-newline whitespace, skip it.
596            let _ = chars.next();
597        } else {
598            // Otherwise, try to consume an unclassified token.
599            let result = baseline(&mut chars);
600            output.push(result);
601        }
602    }
603
604    output
605}
606
607/// Check for duplicate column names and return an error if found.
608fn check_duplicate_headers(
609    headers: &[Spanned<String>],
610    input_span: Span,
611    name_span: Span,
612) -> Result<(), ShellError> {
613    let has_duplicate_headers = headers
614        .iter()
615        .map(|h| &h.item)
616        .collect::<std::collections::HashSet<_>>()
617        .len()
618        != headers.len();
619
620    if has_duplicate_headers {
621        Err(ShellError::ColumnDetectionFailure {
622            bad_value: input_span,
623            failure_site: name_span,
624        })
625    } else {
626        Ok(())
627    }
628}
629
630/// Check for duplicate column names in string headers and return an error if found.
631fn check_duplicate_string_headers(
632    headers: &[String],
633    input_span: Span,
634    name_span: Span,
635) -> Result<(), ShellError> {
636    let has_duplicate_headers = headers
637        .iter()
638        .collect::<std::collections::HashSet<_>>()
639        .len()
640        != headers.len();
641
642    if has_duplicate_headers {
643        Err(ShellError::ColumnDetectionFailure {
644            bad_value: input_span,
645            failure_site: name_span,
646        })
647    } else {
648        Ok(())
649    }
650}
651
652/// Filter and clean box drawing characters from lines.
653/// Returns filtered lines with box-only lines removed and border characters stripped.
654fn filter_box_chars<I>(lines_iter: I) -> Vec<String>
655where
656    I: Iterator<Item = String>,
657{
658    lines_iter
659        // Filter out lines where all non-whitespace characters are box drawing characters
660        .filter(|r| !r.trim().chars().all(is_box_char))
661        // Clean border characters from each line
662        .map(|line| {
663            let trimmed = line.trim();
664            // Strip only leading border character (| or │) and one optional space
665            let cleaned = trimmed
666                .strip_prefix('|')
667                .or_else(|| trimmed.strip_prefix('│'))
668                .unwrap_or(trimmed);
669            let cleaned = cleaned.strip_prefix(' ').unwrap_or(cleaned);
670            // Strip only trailing border character and one optional space
671            let cleaned = cleaned
672                .strip_suffix('|')
673                .or_else(|| cleaned.strip_suffix('│'))
674                .unwrap_or(cleaned);
675            let cleaned = cleaned.strip_suffix(' ').unwrap_or(cleaned);
676            cleaned.to_string()
677        })
678        .collect()
679}
680
681/// Find column positions (start indices) from a header line.
682/// Returns a vector of (start_position, header_name) pairs.
683fn find_header_positions(header_line: &str) -> Vec<(usize, String)> {
684    let mut positions = vec![];
685    let mut in_word = false;
686    let mut word_start = 0;
687    let mut current_word = String::new();
688
689    for (idx, c) in header_line.char_indices() {
690        if c.is_whitespace() {
691            if in_word {
692                // End of a word
693                positions.push((word_start, current_word.clone()));
694                current_word.clear();
695                in_word = false;
696            }
697        } else {
698            if !in_word {
699                // Start of a new word
700                word_start = idx;
701                in_word = true;
702            }
703            current_word.push(c);
704        }
705    }
706
707    // Don't forget the last word if the line doesn't end with whitespace
708    if in_word && !current_word.is_empty() {
709        positions.push((word_start, current_word));
710    }
711
712    positions
713}
714
715/// Split a data line into columns based on header positions.
716/// Each column's value is the substring from its header position to the next header position.
717fn split_line_by_positions(line: &str, positions: &[(usize, String)]) -> Vec<String> {
718    if positions.is_empty() {
719        return vec![line.to_string()];
720    }
721
722    let mut values = vec![];
723    let line_len = line.len();
724
725    for (i, (start, _)) in positions.iter().enumerate() {
726        let start = *start;
727        let end = if i + 1 < positions.len() {
728            positions[i + 1].0
729        } else {
730            line_len
731        };
732
733        // Extract the substring for this column using byte indices
734        if start < line_len {
735            let actual_end = end.min(line_len);
736            // Safe slice since we're using char boundaries from the header
737            let value = &line[start..actual_end];
738            values.push(value.trim().to_string());
739        } else {
740            values.push(String::new());
741        }
742    }
743
744    values
745}
746
747#[derive(Clone, Copy)]
748enum BlockKind {
749    Parenthesis,
750    Brace,
751    Bracket,
752}
753
754/// Tokenizes a single "baseline" token from the input stream.
755/// A baseline token is a sequence of characters that can span multiple lines,
756/// but is bounded by whitespace, pipes, semicolons, or other shell syntax elements.
757/// It handles string literals, nested delimiters (parentheses, braces, brackets),
758/// and stops at terminating characters.
759fn baseline(src: &mut Input) -> Spanned<String> {
760    let mut token_contents = String::new();
761
762    let start_offset = if let Some((pos, _)) = src.peek() {
763        *pos
764    } else {
765        0
766    };
767
768    // This variable tracks the starting character of a string literal, so that
769    // we remain inside the string literal lexer mode until we encounter the
770    // closing quote.
771    let mut quote_start: Option<char> = None;
772
773    // This Vec tracks paired delimiters
774    let mut block_level: Vec<BlockKind> = vec![];
775
776    // A baseline token is terminated if it's not nested inside of a paired
777    // delimiter and the next character is one of: `|`, `;`, `#` or any
778    // whitespace.
779    fn is_termination(block_level: &[BlockKind], c: char) -> bool {
780        block_level.is_empty() && (c.is_whitespace())
781    }
782
783    // The process of slurping up a baseline token repeats:
784    //
785    // - String literal, which begins with `'`, `"` or `\``, and continues until
786    //   the same character is encountered again.
787    // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
788    //   the matching closing delimiter is found, skipping comments and string
789    //   literals.
790    // - When not nested inside of a delimiter pair, when a terminating
791    //   character (whitespace, `|`, `;` or `#`) is encountered, the baseline
792    //   token is done.
793    // - Otherwise, accumulate the character into the current baseline token.
794    while let Some((_, c)) = src.peek() {
795        let c = *c;
796
797        if quote_start.is_some() {
798            // If we encountered the closing quote character for the current
799            // string, we're done with the current string.
800            if Some(c) == quote_start {
801                quote_start = None;
802            }
803        } else if c == '\n' {
804            if is_termination(&block_level, c) {
805                break;
806            }
807        } else if c == '\'' || c == '"' || c == '`' {
808            // We encountered the opening quote of a string literal.
809            quote_start = Some(c);
810        } else if c == '[' {
811            // We encountered an opening `[` delimiter.
812            block_level.push(BlockKind::Bracket);
813        } else if c == ']' {
814            // We encountered a closing `]` delimiter. Pop off the opening `[`
815            // delimiter.
816            if let Some(BlockKind::Bracket) = block_level.last() {
817                let _ = block_level.pop();
818            }
819        } else if c == '{' {
820            // We encountered an opening `{` delimiter.
821            block_level.push(BlockKind::Brace);
822        } else if c == '}' {
823            // We encountered a closing `}` delimiter. Pop off the opening `{`.
824            if let Some(BlockKind::Brace) = block_level.last() {
825                let _ = block_level.pop();
826            }
827        } else if c == '(' {
828            // We enceountered an opening `(` delimiter.
829            block_level.push(BlockKind::Parenthesis);
830        } else if c == ')' {
831            // We encountered a closing `)` delimiter. Pop off the opening `(`.
832            if let Some(BlockKind::Parenthesis) = block_level.last() {
833                let _ = block_level.pop();
834            }
835        } else if is_termination(&block_level, c) {
836            break;
837        }
838
839        // Otherwise, accumulate the character into the current token.
840        token_contents.push(c);
841
842        // Consume the character.
843        let _ = src.next();
844    }
845
846    let span = Span::new(start_offset, start_offset + token_contents.len());
847
848    // If there is still unclosed opening delimiters, close them and add
849    // synthetic closing characters to the accumulated token.
850    if block_level.last().is_some() {
851        // let delim: char = (*block).closing();
852        // let cause = ParseError::unexpected_eof(delim.to_string(), span);
853
854        // while let Some(bk) = block_level.pop() {
855        //     token_contents.push(bk.closing());
856        // }
857
858        return Spanned {
859            item: token_contents,
860            span,
861        };
862    }
863
864    if quote_start.is_some() {
865        // The non-lite parse trims quotes on both sides, so we add the expected quote so that
866        // anyone wanting to consume this partial parse (e.g., completions) will be able to get
867        // correct information from the non-lite parse.
868        // token_contents.push(delimiter);
869
870        // return (
871        //     token_contents.spanned(span),
872        //     Some(ParseError::unexpected_eof(delimiter.to_string(), span)),
873        // );
874        return Spanned {
875            item: token_contents,
876            span,
877        };
878    }
879
880    Spanned {
881        item: token_contents,
882        span,
883    }
884}
885
886fn merge_record(record: Record, range: &Range, input_span: Span) -> Value {
887    let (start_index, end_index) = match process_range(range, record.len(), input_span) {
888        Ok(Some((l_idx, r_idx))) => (l_idx, r_idx),
889        Ok(None) => return Value::record(record, input_span),
890        Err(e) => return Value::error(e, input_span),
891    };
892
893    match merge_record_impl(record, start_index, end_index, input_span) {
894        Ok(rec) => Value::record(rec, input_span),
895        Err(err) => Value::error(err, input_span),
896    }
897}
898
899fn process_range(
900    range: &Range,
901    length: usize,
902    input_span: Span,
903) -> Result<Option<(usize, usize)>, ShellError> {
904    match nu_cmd_base::util::process_range(range) {
905        Ok((l_idx, r_idx)) => {
906            let l_idx = if l_idx < 0 {
907                length as isize + l_idx
908            } else {
909                l_idx
910            };
911
912            let r_idx = if r_idx < 0 {
913                length as isize + r_idx
914            } else {
915                r_idx
916            };
917
918            if !(l_idx <= r_idx && (r_idx >= 0 || l_idx < (length as isize))) {
919                return Ok(None);
920            }
921
922            Ok(Some((
923                l_idx.max(0) as usize,
924                (r_idx as usize + 1).min(length),
925            )))
926        }
927        Err(processing_error) => Err(processing_error("could not find range index", input_span)),
928    }
929}
930
931fn merge_record_impl(
932    record: Record,
933    start_index: usize,
934    end_index: usize,
935    input_span: Span,
936) -> Result<Record, ShellError> {
937    let (mut cols, mut vals): (Vec<_>, Vec<_>) = record.into_iter().unzip();
938    // Merge Columns
939    ((start_index + 1)..(cols.len() - end_index + start_index + 1)).for_each(|idx| {
940        cols.swap(idx, end_index - start_index - 1 + idx);
941    });
942    cols.truncate(cols.len() - end_index + start_index + 1);
943
944    // Merge Values
945    let combined = vals
946        .iter()
947        .take(end_index)
948        .skip(start_index)
949        .map(|v| v.coerce_str().unwrap_or_default())
950        .join(" ");
951    let binding = Value::string(combined, Span::unknown());
952    let last_seg = vals.split_off(end_index);
953    vals.truncate(start_index);
954    vals.push(binding);
955    vals.extend(last_seg);
956
957    Record::from_raw_cols_vals(cols, vals, Span::unknown(), input_span)
958}
959
960#[cfg(test)]
961mod test {
962    use super::*;
963
964    #[test]
965    fn test_examples() {
966        crate::test_examples(DetectColumns)
967    }
968}
nu_command/strings/detect_columns.rs

nu_command/strings/
detect_columns.rs