1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::{Config, Range};
4use std::{io::Cursor, iter::Peekable, str::CharIndices, sync::Arc};
5
6type Input<'t> = Peekable<CharIndices<'t>>;
7
8#[derive(Clone)]
9pub struct DetectColumns;
10
11impl Command for DetectColumns {
12 fn name(&self) -> &str {
13 "detect columns"
14 }
15
16 fn signature(&self) -> Signature {
17 Signature::build("detect columns")
18 .named(
19 "skip",
20 SyntaxShape::Int,
21 "number of rows to skip before detecting",
22 Some('s'),
23 )
24 .input_output_types(vec![(Type::String, Type::table())])
25 .switch("no-headers", "don't detect headers", Some('n'))
26 .named(
27 "combine-columns",
28 SyntaxShape::Range,
29 "columns to be combined; listed as a range",
30 Some('c'),
31 )
32 .switch(
33 "guess",
34 "detect columns by guessing width, it may be useful if default one doesn't work",
35 None,
36 )
37 .category(Category::Strings)
38 }
39
40 fn description(&self) -> &str {
41 "Attempt to automatically split text into multiple columns."
42 }
43
44 fn search_terms(&self) -> Vec<&str> {
45 vec!["split", "tabular"]
46 }
47
48 fn examples(&self) -> Vec<Example> {
49 vec![
50 Example {
51 description: "use --guess if you find default algorithm not working",
52 example: r"
53'Filesystem 1K-blocks Used Available Use% Mounted on
54none 8150224 4 8150220 1% /mnt/c' | detect columns --guess",
55 result: Some(Value::test_list(vec![Value::test_record(record! {
56 "Filesystem" => Value::test_string("none"),
57 "1K-blocks" => Value::test_string("8150224"),
58 "Used" => Value::test_string("4"),
59 "Available" => Value::test_string("8150220"),
60 "Use%" => Value::test_string("1%"),
61 "Mounted on" => Value::test_string("/mnt/c")
62 })])),
63 },
64 Example {
65 description: "detect columns with no headers",
66 example: "'a b c' | detect columns --no-headers",
67 result: Some(Value::test_list(vec![Value::test_record(record! {
68 "column0" => Value::test_string("a"),
69 "column1" => Value::test_string("b"),
70 "column2" => Value::test_string("c"),
71 })])),
72 },
73 Example {
74 description: "",
75 example:
76 "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 0..1 ",
77 result: None,
78 },
79 Example {
80 description: "Splits a multi-line string into columns with headers detected",
81 example:
82 "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns -2..-1 ",
83 result: None,
84 },
85 Example {
86 description: "Splits a multi-line string into columns with headers detected",
87 example:
88 "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 2.. ",
89 result: None,
90 },
91 Example {
92 description: "Parse external ls command and combine columns for datetime",
93 example: "^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7",
94 result: None,
95 },
96 ]
97 }
98
99 fn is_const(&self) -> bool {
100 true
101 }
102
103 fn run(
104 &self,
105 engine_state: &EngineState,
106 stack: &mut Stack,
107 call: &Call,
108 input: PipelineData,
109 ) -> Result<PipelineData, ShellError> {
110 let num_rows_to_skip: Option<usize> = call.get_flag(engine_state, stack, "skip")?;
111 let noheader = call.has_flag(engine_state, stack, "no-headers")?;
112 let range: Option<Range> = call.get_flag(engine_state, stack, "combine-columns")?;
113 let config = stack.get_config(engine_state);
114
115 let args = Arguments {
116 noheader,
117 num_rows_to_skip,
118 range,
119 config,
120 };
121
122 if call.has_flag(engine_state, stack, "guess")? {
123 guess_width(engine_state, call, input, args)
124 } else {
125 detect_columns(engine_state, call, input, args)
126 }
127 }
128
129 fn run_const(
130 &self,
131 working_set: &StateWorkingSet,
132 call: &Call,
133 input: PipelineData,
134 ) -> Result<PipelineData, ShellError> {
135 let num_rows_to_skip: Option<usize> = call.get_flag_const(working_set, "skip")?;
136 let noheader = call.has_flag_const(working_set, "no-headers")?;
137 let range: Option<Range> = call.get_flag_const(working_set, "combine-columns")?;
138 let config = working_set.get_config().clone();
139
140 let args = Arguments {
141 noheader,
142 num_rows_to_skip,
143 range,
144 config,
145 };
146
147 if call.has_flag_const(working_set, "guess")? {
148 guess_width(working_set.permanent(), call, input, args)
149 } else {
150 detect_columns(working_set.permanent(), call, input, args)
151 }
152 }
153}
154
155struct Arguments {
156 num_rows_to_skip: Option<usize>,
157 noheader: bool,
158 range: Option<Range>,
159 config: Arc<Config>,
160}
161
162fn guess_width(
163 engine_state: &EngineState,
164 call: &Call,
165 input: PipelineData,
166 args: Arguments,
167) -> Result<PipelineData, ShellError> {
168 use super::guess_width::GuessWidth;
169 let input_span = input.span().unwrap_or(call.head);
170
171 let mut input = input.collect_string("", &args.config)?;
172 if let Some(rows) = args.num_rows_to_skip {
173 input = input.lines().skip(rows).map(|x| x.to_string()).join("\n");
174 }
175
176 let mut guess_width = GuessWidth::new_reader(Box::new(Cursor::new(input)));
177
178 let result = guess_width.read_all();
179
180 if result.is_empty() {
181 return Ok(Value::nothing(input_span).into_pipeline_data());
182 }
183 if !args.noheader {
184 let columns = result[0].clone();
185 Ok(result
186 .into_iter()
187 .skip(1)
188 .map(move |s| {
189 let mut values: Vec<Value> = s
190 .into_iter()
191 .map(|v| Value::string(v, input_span))
192 .collect();
193 for _ in values.len()..columns.len() {
195 values.push(Value::string("", input_span));
196 }
197 let record =
198 Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
199 match record {
200 Ok(r) => match &args.range {
201 Some(range) => merge_record(r, range, input_span),
202 None => Value::record(r, input_span),
203 },
204 Err(e) => Value::error(e, input_span),
205 }
206 })
207 .into_pipeline_data(input_span, engine_state.signals().clone()))
208 } else {
209 let length = result[0].len();
210 let columns: Vec<String> = (0..length).map(|n| format!("column{n}")).collect();
211 Ok(result
212 .into_iter()
213 .map(move |s| {
214 let mut values: Vec<Value> = s
215 .into_iter()
216 .map(|v| Value::string(v, input_span))
217 .collect();
218 for _ in values.len()..columns.len() {
220 values.push(Value::string("", input_span));
221 }
222 let record =
223 Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
224 match record {
225 Ok(r) => match &args.range {
226 Some(range) => merge_record(r, range, input_span),
227 None => Value::record(r, input_span),
228 },
229 Err(e) => Value::error(e, input_span),
230 }
231 })
232 .into_pipeline_data(input_span, engine_state.signals().clone()))
233 }
234}
235
236fn detect_columns(
237 engine_state: &EngineState,
238 call: &Call,
239 input: PipelineData,
240 args: Arguments,
241) -> Result<PipelineData, ShellError> {
242 let name_span = call.head;
243 let input = input.collect_string("", &args.config)?;
244
245 let input: Vec<_> = input
246 .lines()
247 .skip(args.num_rows_to_skip.unwrap_or_default())
248 .map(|x| x.to_string())
249 .collect();
250
251 let mut input = input.into_iter();
252 let headers = input.next();
253
254 if let Some(orig_headers) = headers {
255 let mut headers = find_columns(&orig_headers);
256
257 if args.noheader {
258 for header in headers.iter_mut().enumerate() {
259 header.1.item = format!("column{}", header.0);
260 }
261 }
262
263 Ok(args
264 .noheader
265 .then_some(orig_headers)
266 .into_iter()
267 .chain(input)
268 .map(move |x| {
269 let row = find_columns(&x);
270
271 let mut record = Record::new();
272
273 if headers.len() == row.len() {
274 for (header, val) in headers.iter().zip(row.iter()) {
275 record.push(&header.item, Value::string(&val.item, name_span));
276 }
277 } else {
278 let mut pre_output = vec![];
279
280 for cell in row {
282 for header in &headers {
283 if cell.span.start <= header.span.end
284 && cell.span.end > header.span.start
285 {
286 pre_output.push((
287 header.item.to_string(),
288 Value::string(&cell.item, name_span),
289 ));
290 }
291 }
292 }
293
294 for header in &headers {
295 let mut found = false;
296 for pre_o in &pre_output {
297 if pre_o.0 == header.item {
298 found = true;
299 break;
300 }
301 }
302
303 if !found {
304 pre_output.push((header.item.to_string(), Value::nothing(name_span)));
305 }
306 }
307
308 for header in &headers {
309 for pre_o in &pre_output {
310 if pre_o.0 == header.item {
311 record.push(&header.item, pre_o.1.clone());
312 }
313 }
314 }
315 }
316
317 match &args.range {
318 Some(range) => merge_record(record, range, name_span),
319 None => Value::record(record, name_span),
320 }
321 })
322 .into_pipeline_data(call.head, engine_state.signals().clone()))
323 } else {
324 Ok(PipelineData::empty())
325 }
326}
327
328pub fn find_columns(input: &str) -> Vec<Spanned<String>> {
329 let mut chars = input.char_indices().peekable();
330 let mut output = vec![];
331
332 while let Some((_, c)) = chars.peek() {
333 if c.is_whitespace() {
334 let _ = chars.next();
337 } else {
338 let result = baseline(&mut chars);
341
342 output.push(result);
343 }
344 }
345
346 output
347}
348
349#[derive(Clone, Copy)]
350enum BlockKind {
351 Parenthesis,
352 Brace,
353 Bracket,
354}
355
356fn baseline(src: &mut Input) -> Spanned<String> {
357 let mut token_contents = String::new();
358
359 let start_offset = if let Some((pos, _)) = src.peek() {
360 *pos
361 } else {
362 0
363 };
364
365 let mut quote_start: Option<char> = None;
369
370 let mut block_level: Vec<BlockKind> = vec![];
372
373 fn is_termination(block_level: &[BlockKind], c: char) -> bool {
377 block_level.is_empty() && (c.is_whitespace())
378 }
379
380 while let Some((_, c)) = src.peek() {
392 let c = *c;
393
394 if quote_start.is_some() {
395 if Some(c) == quote_start {
398 quote_start = None;
399 }
400 } else if c == '\n' {
401 if is_termination(&block_level, c) {
402 break;
403 }
404 } else if c == '\'' || c == '"' || c == '`' {
405 quote_start = Some(c);
407 } else if c == '[' {
408 block_level.push(BlockKind::Bracket);
410 } else if c == ']' {
411 if let Some(BlockKind::Bracket) = block_level.last() {
414 let _ = block_level.pop();
415 }
416 } else if c == '{' {
417 block_level.push(BlockKind::Brace);
419 } else if c == '}' {
420 if let Some(BlockKind::Brace) = block_level.last() {
422 let _ = block_level.pop();
423 }
424 } else if c == '(' {
425 block_level.push(BlockKind::Parenthesis);
427 } else if c == ')' {
428 if let Some(BlockKind::Parenthesis) = block_level.last() {
430 let _ = block_level.pop();
431 }
432 } else if is_termination(&block_level, c) {
433 break;
434 }
435
436 token_contents.push(c);
438
439 let _ = src.next();
441 }
442
443 let span = Span::new(start_offset, start_offset + token_contents.len());
444
445 if block_level.last().is_some() {
448 return Spanned {
456 item: token_contents,
457 span,
458 };
459 }
460
461 if quote_start.is_some() {
462 return Spanned {
472 item: token_contents,
473 span,
474 };
475 }
476
477 Spanned {
478 item: token_contents,
479 span,
480 }
481}
482
483fn merge_record(record: Record, range: &Range, input_span: Span) -> Value {
484 let (start_index, end_index) = match process_range(range, record.len(), input_span) {
485 Ok(Some((l_idx, r_idx))) => (l_idx, r_idx),
486 Ok(None) => return Value::record(record, input_span),
487 Err(e) => return Value::error(e, input_span),
488 };
489
490 match merge_record_impl(record, start_index, end_index, input_span) {
491 Ok(rec) => Value::record(rec, input_span),
492 Err(err) => Value::error(err, input_span),
493 }
494}
495
496fn process_range(
497 range: &Range,
498 length: usize,
499 input_span: Span,
500) -> Result<Option<(usize, usize)>, ShellError> {
501 match nu_cmd_base::util::process_range(range) {
502 Ok((l_idx, r_idx)) => {
503 let l_idx = if l_idx < 0 {
504 length as isize + l_idx
505 } else {
506 l_idx
507 };
508
509 let r_idx = if r_idx < 0 {
510 length as isize + r_idx
511 } else {
512 r_idx
513 };
514
515 if !(l_idx <= r_idx && (r_idx >= 0 || l_idx < (length as isize))) {
516 return Ok(None);
517 }
518
519 Ok(Some((
520 l_idx.max(0) as usize,
521 (r_idx as usize + 1).min(length),
522 )))
523 }
524 Err(processing_error) => Err(processing_error("could not find range index", input_span)),
525 }
526}
527
528fn merge_record_impl(
529 record: Record,
530 start_index: usize,
531 end_index: usize,
532 input_span: Span,
533) -> Result<Record, ShellError> {
534 let (mut cols, mut vals): (Vec<_>, Vec<_>) = record.into_iter().unzip();
535 ((start_index + 1)..(cols.len() - end_index + start_index + 1)).for_each(|idx| {
537 cols.swap(idx, end_index - start_index - 1 + idx);
538 });
539 cols.truncate(cols.len() - end_index + start_index + 1);
540
541 let combined = vals
543 .iter()
544 .take(end_index)
545 .skip(start_index)
546 .map(|v| v.coerce_str().unwrap_or_default())
547 .join(" ");
548 let binding = Value::string(combined, Span::unknown());
549 let last_seg = vals.split_off(end_index);
550 vals.truncate(start_index);
551 vals.push(binding);
552 vals.extend(last_seg);
553
554 Record::from_raw_cols_vals(cols, vals, Span::unknown(), input_span)
555}
556
557#[cfg(test)]
558mod test {
559 use super::*;
560
561 #[test]
562 fn test_examples() {
563 crate::test_examples(DetectColumns)
564 }
565}