1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::{Config, Range};
4use std::{io::Cursor, iter::Peekable, str::CharIndices, sync::Arc};
5
6type Input<'t> = Peekable<CharIndices<'t>>;
7
8#[derive(Clone)]
9pub struct DetectColumns;
10
11impl Command for DetectColumns {
12 fn name(&self) -> &str {
13 "detect columns"
14 }
15
16 fn signature(&self) -> Signature {
17 Signature::build("detect columns")
18 .named(
19 "skip",
20 SyntaxShape::Int,
21 "number of rows to skip before detecting",
22 Some('s'),
23 )
24 .input_output_types(vec![(Type::String, Type::table())])
25 .switch("no-headers", "don't detect headers", Some('n'))
26 .named(
27 "combine-columns",
28 SyntaxShape::Range,
29 "columns to be combined; listed as a range",
30 Some('c'),
31 )
32 .switch(
33 "guess",
34 "detect columns by guessing width, it may be useful if default one doesn't work",
35 None,
36 )
37 .category(Category::Strings)
38 }
39
40 fn description(&self) -> &str {
41 "Attempt to automatically split text into multiple columns."
42 }
43
44 fn search_terms(&self) -> Vec<&str> {
45 vec!["split", "tabular"]
46 }
47
48 fn examples(&self) -> Vec<Example> {
49 vec![
50 Example {
51 description: "use --guess if you find default algorithm not working",
52 example: r"
53'Filesystem 1K-blocks Used Available Use% Mounted on
54none 8150224 4 8150220 1% /mnt/c' | detect columns --guess",
55 result: Some(Value::test_list(vec![Value::test_record(record! {
56 "Filesystem" => Value::test_string("none"),
57 "1K-blocks" => Value::test_string("8150224"),
58 "Used" => Value::test_string("4"),
59 "Available" => Value::test_string("8150220"),
60 "Use%" => Value::test_string("1%"),
61 "Mounted on" => Value::test_string("/mnt/c")
62 })])),
63 },
64 Example {
65 description: "detect columns with no headers",
66 example: "'a b c' | detect columns --no-headers",
67 result: Some(Value::test_list(vec![Value::test_record(record! {
68 "column0" => Value::test_string("a"),
69 "column1" => Value::test_string("b"),
70 "column2" => Value::test_string("c"),
71 })])),
72 },
73 Example {
74 description: "",
75 example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 0..1 ",
76 result: None,
77 },
78 Example {
79 description: "Splits a multi-line string into columns with headers detected",
80 example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns -2..-1 ",
81 result: None,
82 },
83 Example {
84 description: "Splits a multi-line string into columns with headers detected",
85 example: "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 2.. ",
86 result: None,
87 },
88 Example {
89 description: "Parse external ls command and combine columns for datetime",
90 example: "^ls -lh | detect columns --no-headers --skip 1 --combine-columns 5..7",
91 result: None,
92 },
93 ]
94 }
95
96 fn is_const(&self) -> bool {
97 true
98 }
99
100 fn run(
101 &self,
102 engine_state: &EngineState,
103 stack: &mut Stack,
104 call: &Call,
105 input: PipelineData,
106 ) -> Result<PipelineData, ShellError> {
107 let num_rows_to_skip: Option<usize> = call.get_flag(engine_state, stack, "skip")?;
108 let noheader = call.has_flag(engine_state, stack, "no-headers")?;
109 let range: Option<Range> = call.get_flag(engine_state, stack, "combine-columns")?;
110 let config = stack.get_config(engine_state);
111
112 let args = Arguments {
113 noheader,
114 num_rows_to_skip,
115 range,
116 config,
117 };
118
119 if call.has_flag(engine_state, stack, "guess")? {
120 guess_width(engine_state, call, input, args)
121 } else {
122 detect_columns(engine_state, call, input, args)
123 }
124 }
125
126 fn run_const(
127 &self,
128 working_set: &StateWorkingSet,
129 call: &Call,
130 input: PipelineData,
131 ) -> Result<PipelineData, ShellError> {
132 let num_rows_to_skip: Option<usize> = call.get_flag_const(working_set, "skip")?;
133 let noheader = call.has_flag_const(working_set, "no-headers")?;
134 let range: Option<Range> = call.get_flag_const(working_set, "combine-columns")?;
135 let config = working_set.get_config().clone();
136
137 let args = Arguments {
138 noheader,
139 num_rows_to_skip,
140 range,
141 config,
142 };
143
144 if call.has_flag_const(working_set, "guess")? {
145 guess_width(working_set.permanent(), call, input, args)
146 } else {
147 detect_columns(working_set.permanent(), call, input, args)
148 }
149 }
150}
151
152struct Arguments {
153 num_rows_to_skip: Option<usize>,
154 noheader: bool,
155 range: Option<Range>,
156 config: Arc<Config>,
157}
158
159fn guess_width(
160 engine_state: &EngineState,
161 call: &Call,
162 input: PipelineData,
163 args: Arguments,
164) -> Result<PipelineData, ShellError> {
165 use super::guess_width::GuessWidth;
166 let input_span = input.span().unwrap_or(call.head);
167
168 let mut input = input.collect_string("", &args.config)?;
169 if let Some(rows) = args.num_rows_to_skip {
170 input = input.lines().skip(rows).map(|x| x.to_string()).join("\n");
171 }
172
173 let mut guess_width = GuessWidth::new_reader(Box::new(Cursor::new(input)));
174
175 let result = guess_width.read_all();
176
177 if result.is_empty() {
178 return Ok(Value::nothing(input_span).into_pipeline_data());
179 }
180 if !args.noheader {
181 let columns = result[0].clone();
182 Ok(result
183 .into_iter()
184 .skip(1)
185 .map(move |s| {
186 let mut values: Vec<Value> = s
187 .into_iter()
188 .map(|v| Value::string(v, input_span))
189 .collect();
190 for _ in values.len()..columns.len() {
192 values.push(Value::string("", input_span));
193 }
194 let record =
195 Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
196 match record {
197 Ok(r) => match &args.range {
198 Some(range) => merge_record(r, range, input_span),
199 None => Value::record(r, input_span),
200 },
201 Err(e) => Value::error(e, input_span),
202 }
203 })
204 .into_pipeline_data(input_span, engine_state.signals().clone()))
205 } else {
206 let length = result[0].len();
207 let columns: Vec<String> = (0..length).map(|n| format!("column{n}")).collect();
208 Ok(result
209 .into_iter()
210 .map(move |s| {
211 let mut values: Vec<Value> = s
212 .into_iter()
213 .map(|v| Value::string(v, input_span))
214 .collect();
215 for _ in values.len()..columns.len() {
217 values.push(Value::string("", input_span));
218 }
219 let record =
220 Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span);
221 match record {
222 Ok(r) => match &args.range {
223 Some(range) => merge_record(r, range, input_span),
224 None => Value::record(r, input_span),
225 },
226 Err(e) => Value::error(e, input_span),
227 }
228 })
229 .into_pipeline_data(input_span, engine_state.signals().clone()))
230 }
231}
232
233fn detect_columns(
234 engine_state: &EngineState,
235 call: &Call,
236 input: PipelineData,
237 args: Arguments,
238) -> Result<PipelineData, ShellError> {
239 let name_span = call.head;
240 let input = input.collect_string("", &args.config)?;
241
242 let input: Vec<_> = input
243 .lines()
244 .skip(args.num_rows_to_skip.unwrap_or_default())
245 .map(|x| x.to_string())
246 .collect();
247
248 let mut input = input.into_iter();
249 let headers = input.next();
250
251 if let Some(orig_headers) = headers {
252 let mut headers = find_columns(&orig_headers);
253
254 if args.noheader {
255 for header in headers.iter_mut().enumerate() {
256 header.1.item = format!("column{}", header.0);
257 }
258 }
259
260 Ok(args
261 .noheader
262 .then_some(orig_headers)
263 .into_iter()
264 .chain(input)
265 .map(move |x| {
266 let row = find_columns(&x);
267
268 let mut record = Record::new();
269
270 if headers.len() == row.len() {
271 for (header, val) in headers.iter().zip(row.iter()) {
272 record.push(&header.item, Value::string(&val.item, name_span));
273 }
274 } else {
275 let mut pre_output = vec![];
276
277 for cell in row {
279 for header in &headers {
280 if cell.span.start <= header.span.end
281 && cell.span.end > header.span.start
282 {
283 pre_output.push((
284 header.item.to_string(),
285 Value::string(&cell.item, name_span),
286 ));
287 }
288 }
289 }
290
291 for header in &headers {
292 let mut found = false;
293 for pre_o in &pre_output {
294 if pre_o.0 == header.item {
295 found = true;
296 break;
297 }
298 }
299
300 if !found {
301 pre_output.push((header.item.to_string(), Value::nothing(name_span)));
302 }
303 }
304
305 for header in &headers {
306 for pre_o in &pre_output {
307 if pre_o.0 == header.item {
308 record.push(&header.item, pre_o.1.clone());
309 }
310 }
311 }
312 }
313
314 match &args.range {
315 Some(range) => merge_record(record, range, name_span),
316 None => Value::record(record, name_span),
317 }
318 })
319 .into_pipeline_data(call.head, engine_state.signals().clone()))
320 } else {
321 Ok(PipelineData::empty())
322 }
323}
324
325pub fn find_columns(input: &str) -> Vec<Spanned<String>> {
326 let mut chars = input.char_indices().peekable();
327 let mut output = vec![];
328
329 while let Some((_, c)) = chars.peek() {
330 if c.is_whitespace() {
331 let _ = chars.next();
334 } else {
335 let result = baseline(&mut chars);
338
339 output.push(result);
340 }
341 }
342
343 output
344}
345
346#[derive(Clone, Copy)]
347enum BlockKind {
348 Parenthesis,
349 Brace,
350 Bracket,
351}
352
353fn baseline(src: &mut Input) -> Spanned<String> {
354 let mut token_contents = String::new();
355
356 let start_offset = if let Some((pos, _)) = src.peek() {
357 *pos
358 } else {
359 0
360 };
361
362 let mut quote_start: Option<char> = None;
366
367 let mut block_level: Vec<BlockKind> = vec![];
369
370 fn is_termination(block_level: &[BlockKind], c: char) -> bool {
374 block_level.is_empty() && (c.is_whitespace())
375 }
376
377 while let Some((_, c)) = src.peek() {
389 let c = *c;
390
391 if quote_start.is_some() {
392 if Some(c) == quote_start {
395 quote_start = None;
396 }
397 } else if c == '\n' {
398 if is_termination(&block_level, c) {
399 break;
400 }
401 } else if c == '\'' || c == '"' || c == '`' {
402 quote_start = Some(c);
404 } else if c == '[' {
405 block_level.push(BlockKind::Bracket);
407 } else if c == ']' {
408 if let Some(BlockKind::Bracket) = block_level.last() {
411 let _ = block_level.pop();
412 }
413 } else if c == '{' {
414 block_level.push(BlockKind::Brace);
416 } else if c == '}' {
417 if let Some(BlockKind::Brace) = block_level.last() {
419 let _ = block_level.pop();
420 }
421 } else if c == '(' {
422 block_level.push(BlockKind::Parenthesis);
424 } else if c == ')' {
425 if let Some(BlockKind::Parenthesis) = block_level.last() {
427 let _ = block_level.pop();
428 }
429 } else if is_termination(&block_level, c) {
430 break;
431 }
432
433 token_contents.push(c);
435
436 let _ = src.next();
438 }
439
440 let span = Span::new(start_offset, start_offset + token_contents.len());
441
442 if block_level.last().is_some() {
445 return Spanned {
453 item: token_contents,
454 span,
455 };
456 }
457
458 if quote_start.is_some() {
459 return Spanned {
469 item: token_contents,
470 span,
471 };
472 }
473
474 Spanned {
475 item: token_contents,
476 span,
477 }
478}
479
480fn merge_record(record: Record, range: &Range, input_span: Span) -> Value {
481 let (start_index, end_index) = match process_range(range, record.len(), input_span) {
482 Ok(Some((l_idx, r_idx))) => (l_idx, r_idx),
483 Ok(None) => return Value::record(record, input_span),
484 Err(e) => return Value::error(e, input_span),
485 };
486
487 match merge_record_impl(record, start_index, end_index, input_span) {
488 Ok(rec) => Value::record(rec, input_span),
489 Err(err) => Value::error(err, input_span),
490 }
491}
492
493fn process_range(
494 range: &Range,
495 length: usize,
496 input_span: Span,
497) -> Result<Option<(usize, usize)>, ShellError> {
498 match nu_cmd_base::util::process_range(range) {
499 Ok((l_idx, r_idx)) => {
500 let l_idx = if l_idx < 0 {
501 length as isize + l_idx
502 } else {
503 l_idx
504 };
505
506 let r_idx = if r_idx < 0 {
507 length as isize + r_idx
508 } else {
509 r_idx
510 };
511
512 if !(l_idx <= r_idx && (r_idx >= 0 || l_idx < (length as isize))) {
513 return Ok(None);
514 }
515
516 Ok(Some((
517 l_idx.max(0) as usize,
518 (r_idx as usize + 1).min(length),
519 )))
520 }
521 Err(processing_error) => Err(processing_error("could not find range index", input_span)),
522 }
523}
524
525fn merge_record_impl(
526 record: Record,
527 start_index: usize,
528 end_index: usize,
529 input_span: Span,
530) -> Result<Record, ShellError> {
531 let (mut cols, mut vals): (Vec<_>, Vec<_>) = record.into_iter().unzip();
532 ((start_index + 1)..(cols.len() - end_index + start_index + 1)).for_each(|idx| {
534 cols.swap(idx, end_index - start_index - 1 + idx);
535 });
536 cols.truncate(cols.len() - end_index + start_index + 1);
537
538 let combined = vals
540 .iter()
541 .take(end_index)
542 .skip(start_index)
543 .map(|v| v.coerce_str().unwrap_or_default())
544 .join(" ");
545 let binding = Value::string(combined, Span::unknown());
546 let last_seg = vals.split_off(end_index);
547 vals.truncate(start_index);
548 vals.push(binding);
549 vals.extend(last_seg);
550
551 Record::from_raw_cols_vals(cols, vals, Span::unknown(), input_span)
552}
553
554#[cfg(test)]
555mod test {
556 use super::*;
557
558 #[test]
559 fn test_examples() {
560 crate::test_examples(DetectColumns)
561 }
562}