nu_command/formats/from/
ssv.rs

1use indexmap::IndexMap;
2use nu_engine::command_prelude::*;
3
4#[derive(Clone)]
5pub struct FromSsv;
6
7const DEFAULT_MINIMUM_SPACES: usize = 2;
8
9impl Command for FromSsv {
10    fn name(&self) -> &str {
11        "from ssv"
12    }
13
14    fn signature(&self) -> Signature {
15        Signature::build("from ssv")
16            .input_output_types(vec![(Type::String, Type::table())])
17            .switch(
18                "noheaders",
19                "don't treat the first row as column names",
20                Some('n'),
21            )
22            .switch("aligned-columns", "assume columns are aligned", Some('a'))
23            .named(
24                "minimum-spaces",
25                SyntaxShape::Int,
26                "the minimum spaces to separate columns",
27                Some('m'),
28            )
29            .category(Category::Formats)
30    }
31
32    fn description(&self) -> &str {
33        "Parse text as space-separated values and create a table. The default minimum number of spaces counted as a separator is 2."
34    }
35
36    fn examples(&self) -> Vec<Example<'_>> {
37        vec![
38            Example {
39                example: r#"'FOO   BAR
401   2' | from ssv"#,
41                description: "Converts ssv formatted string to table",
42                result: Some(Value::test_list(vec![Value::test_record(record! {
43                    "FOO" => Value::test_string("1"),
44                    "BAR" => Value::test_string("2"),
45                })])),
46            },
47            Example {
48                example: r#"'FOO   BAR
491   2' | from ssv --noheaders"#,
50                description: "Converts ssv formatted string to table but not treating the first row as column names",
51                result: Some(Value::test_list(vec![
52                    Value::test_record(record! {
53                        "column0" => Value::test_string("FOO"),
54                        "column1" => Value::test_string("BAR"),
55                    }),
56                    Value::test_record(record! {
57                        "column0" => Value::test_string("1"),
58                        "column1" => Value::test_string("2"),
59                    }),
60                ])),
61            },
62        ]
63    }
64
65    fn run(
66        &self,
67        engine_state: &EngineState,
68        stack: &mut Stack,
69        call: &Call,
70        input: PipelineData,
71    ) -> Result<PipelineData, ShellError> {
72        from_ssv(engine_state, stack, call, input)
73    }
74}
75
76enum HeaderOptions<'a> {
77    WithHeaders(&'a str),
78    WithoutHeaders,
79}
80
81fn parse_aligned_columns<'a>(
82    lines: impl Iterator<Item = &'a str>,
83    headers: HeaderOptions,
84    separator: &str,
85) -> Vec<Vec<(String, String)>> {
86    fn construct<'a>(
87        lines: impl Iterator<Item = &'a str>,
88        headers: Vec<(String, usize)>,
89    ) -> Vec<Vec<(String, String)>> {
90        lines
91            .map(|l| {
92                headers
93                    .iter()
94                    .enumerate()
95                    .map(|(i, (header_name, start_position))| {
96                        let char_index_start = match l.char_indices().nth(*start_position) {
97                            Some(idx) => idx.0,
98                            None => *start_position,
99                        };
100                        let val = match headers.get(i + 1) {
101                            Some((_, end)) => {
102                                if *end < l.len() {
103                                    let char_index_end = match l.char_indices().nth(*end) {
104                                        Some(idx) => idx.0,
105                                        None => *end,
106                                    };
107                                    l.get(char_index_start..char_index_end)
108                                } else {
109                                    l.get(char_index_start..)
110                                }
111                            }
112                            None => l.get(char_index_start..),
113                        }
114                        .unwrap_or("")
115                        .trim()
116                        .into();
117                        (header_name.clone(), val)
118                    })
119                    .collect()
120            })
121            .collect()
122    }
123
124    let find_indices = |line: &str| {
125        let values = line
126            .split(&separator)
127            .map(str::trim)
128            .filter(|s| !s.is_empty());
129        values
130            .fold(
131                (0, vec![]),
132                |(current_pos, mut indices), value| match line[current_pos..].find(value) {
133                    None => (current_pos, indices),
134                    Some(index) => {
135                        let absolute_index = current_pos + index;
136                        indices.push(absolute_index);
137                        (absolute_index + value.len(), indices)
138                    }
139                },
140            )
141            .1
142    };
143
144    let parse_with_headers = |lines, headers_raw: &str| {
145        let indices = find_indices(headers_raw);
146        let headers = headers_raw
147            .split(&separator)
148            .map(str::trim)
149            .filter(|s| !s.is_empty())
150            .map(String::from)
151            .zip(indices);
152
153        let columns = headers.collect::<Vec<(String, usize)>>();
154
155        construct(lines, columns)
156    };
157
158    let parse_without_headers = |ls: Vec<&str>| {
159        let mut indices = ls
160            .iter()
161            .flat_map(|s| find_indices(s))
162            .collect::<Vec<usize>>();
163
164        indices.sort_unstable();
165        indices.dedup();
166
167        let headers: Vec<(String, usize)> = indices
168            .iter()
169            .enumerate()
170            .map(|(i, position)| (format!("column{i}"), *position))
171            .collect();
172
173        construct(ls.iter().map(|s| s.to_owned()), headers)
174    };
175
176    match headers {
177        HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw),
178        HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()),
179    }
180}
181
182fn parse_separated_columns<'a>(
183    lines: impl Iterator<Item = &'a str>,
184    headers: HeaderOptions,
185    separator: &str,
186) -> Vec<Vec<(String, String)>> {
187    fn collect<'a>(
188        headers: Vec<String>,
189        rows: impl Iterator<Item = &'a str>,
190        separator: &str,
191    ) -> Vec<Vec<(String, String)>> {
192        rows.map(|r| {
193            headers
194                .iter()
195                .zip(r.split(separator).map(str::trim).filter(|s| !s.is_empty()))
196                .map(|(a, b)| (a.to_owned(), b.to_owned()))
197                .collect()
198        })
199        .collect()
200    }
201
202    let parse_with_headers = |lines, headers_raw: &str| {
203        let headers = headers_raw
204            .split(&separator)
205            .map(str::trim)
206            .map(str::to_owned)
207            .filter(|s| !s.is_empty())
208            .collect();
209        collect(headers, lines, separator)
210    };
211
212    let parse_without_headers = |ls: Vec<&str>| {
213        let num_columns = ls.iter().map(|r| r.len()).max().unwrap_or(0);
214
215        let headers = (0..=num_columns)
216            .map(|i| format!("column{i}"))
217            .collect::<Vec<String>>();
218        collect(headers, ls.into_iter(), separator)
219    };
220
221    match headers {
222        HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw),
223        HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()),
224    }
225}
226
227fn string_to_table(
228    s: &str,
229    noheaders: bool,
230    aligned_columns: bool,
231    split_at: usize,
232) -> Vec<Vec<(String, String)>> {
233    let mut lines = s
234        .lines()
235        .filter(|l| !l.trim().is_empty() && !l.trim().starts_with('#'));
236    let separator = " ".repeat(std::cmp::max(split_at, 1));
237
238    let (ls, header_options) = if noheaders {
239        (lines, HeaderOptions::WithoutHeaders)
240    } else {
241        match lines.next() {
242            Some(header) => (lines, HeaderOptions::WithHeaders(header)),
243            None => return vec![],
244        }
245    };
246
247    let f = if aligned_columns {
248        parse_aligned_columns
249    } else {
250        parse_separated_columns
251    };
252
253    f(ls, header_options, &separator)
254}
255
256fn from_ssv_string_to_value(
257    s: &str,
258    noheaders: bool,
259    aligned_columns: bool,
260    split_at: usize,
261    span: Span,
262) -> Value {
263    let rows = string_to_table(s, noheaders, aligned_columns, split_at)
264        .into_iter()
265        .map(|row| {
266            let mut dict = IndexMap::new();
267            for (col, entry) in row {
268                dict.insert(col, Value::string(entry, span));
269            }
270            Value::record(dict.into_iter().collect(), span)
271        })
272        .collect();
273
274    Value::list(rows, span)
275}
276
277fn from_ssv(
278    engine_state: &EngineState,
279    stack: &mut Stack,
280    call: &Call,
281    input: PipelineData,
282) -> Result<PipelineData, ShellError> {
283    let name = call.head;
284
285    let noheaders = call.has_flag(engine_state, stack, "noheaders")?;
286    let aligned_columns = call.has_flag(engine_state, stack, "aligned-columns")?;
287    let minimum_spaces: Option<Spanned<usize>> =
288        call.get_flag(engine_state, stack, "minimum-spaces")?;
289
290    let (concat_string, _span, metadata) = input.collect_string_strict(name)?;
291    let split_at = match minimum_spaces {
292        Some(number) => number.item,
293        None => DEFAULT_MINIMUM_SPACES,
294    };
295
296    Ok(
297        from_ssv_string_to_value(&concat_string, noheaders, aligned_columns, split_at, name)
298            .into_pipeline_data_with_metadata(metadata),
299    )
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    fn owned(x: &str, y: &str) -> (String, String) {
307        (String::from(x), String::from(y))
308    }
309
310    #[test]
311    fn it_filters_comment_lines() {
312        let input = r#"
313            a       b
314            1       2
315            3       4
316            #comment       line
317        "#;
318        let result = string_to_table(input, false, true, 1);
319        assert_eq!(
320            result,
321            vec![
322                vec![owned("a", "1"), owned("b", "2")],
323                vec![owned("a", "3"), owned("b", "4")]
324            ]
325        );
326    }
327
328    #[test]
329    fn it_trims_empty_and_whitespace_only_lines() {
330        let input = r#"
331
332            a       b
333
334            1       2
335
336            3       4
337        "#;
338        let result = string_to_table(input, false, true, 1);
339        assert_eq!(
340            result,
341            vec![
342                vec![owned("a", "1"), owned("b", "2")],
343                vec![owned("a", "3"), owned("b", "4")]
344            ]
345        );
346    }
347
348    #[test]
349    fn it_deals_with_single_column_input() {
350        let input = r#"
351            a
352            1
353            2
354        "#;
355        let result = string_to_table(input, false, true, 1);
356        assert_eq!(result, vec![vec![owned("a", "1")], vec![owned("a", "2")]]);
357    }
358
359    #[test]
360    fn it_uses_first_row_as_data_when_noheaders() {
361        let input = r#"
362            a b
363            1 2
364            3 4
365        "#;
366        let result = string_to_table(input, true, true, 1);
367        assert_eq!(
368            result,
369            vec![
370                vec![owned("column0", "a"), owned("column1", "b")],
371                vec![owned("column0", "1"), owned("column1", "2")],
372                vec![owned("column0", "3"), owned("column1", "4")]
373            ]
374        );
375    }
376
377    #[test]
378    fn it_allows_a_predefined_number_of_spaces() {
379        let input = r#"
380            column a   column b
381            entry 1    entry number  2
382            3          four
383        "#;
384
385        let result = string_to_table(input, false, true, 3);
386        assert_eq!(
387            result,
388            vec![
389                vec![
390                    owned("column a", "entry 1"),
391                    owned("column b", "entry number  2")
392                ],
393                vec![owned("column a", "3"), owned("column b", "four")]
394            ]
395        );
396    }
397
398    #[test]
399    fn it_trims_remaining_separator_space() {
400        let input = r#"
401            colA   colB     colC
402            val1   val2     val3
403        "#;
404
405        let trimmed = |s: &str| s.trim() == s;
406
407        let result = string_to_table(input, false, true, 2);
408        assert!(
409            result
410                .iter()
411                .all(|row| row.iter().all(|(a, b)| trimmed(a) && trimmed(b)))
412        );
413    }
414
415    #[test]
416    fn it_keeps_empty_columns() {
417        let input = r#"
418            colA   col B     col C
419                   val2      val3
420            val4   val 5     val 6
421            val7             val8
422        "#;
423
424        let result = string_to_table(input, false, true, 2);
425        assert_eq!(
426            result,
427            vec![
428                vec![
429                    owned("colA", ""),
430                    owned("col B", "val2"),
431                    owned("col C", "val3")
432                ],
433                vec![
434                    owned("colA", "val4"),
435                    owned("col B", "val 5"),
436                    owned("col C", "val 6")
437                ],
438                vec![
439                    owned("colA", "val7"),
440                    owned("col B", ""),
441                    owned("col C", "val8")
442                ],
443            ]
444        );
445    }
446
447    #[test]
448    fn it_can_produce_an_empty_stream_for_header_only_input() {
449        let input = "colA   col B";
450
451        let result = string_to_table(input, false, true, 2);
452        let expected: Vec<Vec<(String, String)>> = vec![];
453        assert_eq!(expected, result);
454    }
455
456    #[test]
457    fn it_uses_the_full_final_column() {
458        let input = r#"
459            colA   col B
460            val1   val2   trailing value that should be included
461        "#;
462
463        let result = string_to_table(input, false, true, 2);
464        assert_eq!(
465            result,
466            vec![vec![
467                owned("colA", "val1"),
468                owned("col B", "val2   trailing value that should be included"),
469            ]]
470        );
471    }
472
473    #[test]
474    fn it_handles_empty_values_when_noheaders_and_aligned_columns() {
475        let input = r#"
476            a multi-word value  b           d
477            1                        3-3    4
478                                                       last
479        "#;
480
481        let result = string_to_table(input, true, true, 2);
482        assert_eq!(
483            result,
484            vec![
485                vec![
486                    owned("column0", "a multi-word value"),
487                    owned("column1", "b"),
488                    owned("column2", ""),
489                    owned("column3", "d"),
490                    owned("column4", "")
491                ],
492                vec![
493                    owned("column0", "1"),
494                    owned("column1", ""),
495                    owned("column2", "3-3"),
496                    owned("column3", "4"),
497                    owned("column4", "")
498                ],
499                vec![
500                    owned("column0", ""),
501                    owned("column1", ""),
502                    owned("column2", ""),
503                    owned("column3", ""),
504                    owned("column4", "last")
505                ],
506            ]
507        );
508    }
509
510    #[test]
511    fn input_is_parsed_correctly_if_either_option_works() {
512        let input = r#"
513                docker-registry   docker-registry=default                   docker-registry=default   172.30.78.158   5000/TCP
514                kubernetes        component=apiserver,provider=kubernetes   <none>                    172.30.0.2      443/TCP
515                kubernetes-ro     component=apiserver,provider=kubernetes   <none>                    172.30.0.1      80/TCP
516            "#;
517
518        let aligned_columns_noheaders = string_to_table(input, true, true, 2);
519        let separator_noheaders = string_to_table(input, true, false, 2);
520        let aligned_columns_with_headers = string_to_table(input, false, true, 2);
521        let separator_with_headers = string_to_table(input, false, false, 2);
522        assert_eq!(aligned_columns_noheaders, separator_noheaders);
523        assert_eq!(aligned_columns_with_headers, separator_with_headers);
524    }
525
526    #[test]
527    fn test_examples() {
528        use crate::test_examples;
529
530        test_examples(FromSsv {})
531    }
532}