1use indexmap::IndexMap;
2use nu_engine::command_prelude::*;
3
4#[derive(Clone)]
5pub struct FromSsv;
6
7const DEFAULT_MINIMUM_SPACES: usize = 2;
8
9impl Command for FromSsv {
10 fn name(&self) -> &str {
11 "from ssv"
12 }
13
14 fn signature(&self) -> Signature {
15 Signature::build("from ssv")
16 .input_output_types(vec![(Type::String, Type::table())])
17 .switch(
18 "noheaders",
19 "don't treat the first row as column names",
20 Some('n'),
21 )
22 .switch("aligned-columns", "assume columns are aligned", Some('a'))
23 .named(
24 "minimum-spaces",
25 SyntaxShape::Int,
26 "the minimum spaces to separate columns",
27 Some('m'),
28 )
29 .category(Category::Formats)
30 }
31
32 fn description(&self) -> &str {
33 "Parse text as space-separated values and create a table. The default minimum number of spaces counted as a separator is 2."
34 }
35
36 fn examples(&self) -> Vec<Example<'_>> {
37 vec![
38 Example {
39 example: r#"'FOO BAR
401 2' | from ssv"#,
41 description: "Converts ssv formatted string to table",
42 result: Some(Value::test_list(vec![Value::test_record(record! {
43 "FOO" => Value::test_string("1"),
44 "BAR" => Value::test_string("2"),
45 })])),
46 },
47 Example {
48 example: r#"'FOO BAR
491 2' | from ssv --noheaders"#,
50 description: "Converts ssv formatted string to table but not treating the first row as column names",
51 result: Some(Value::test_list(vec![
52 Value::test_record(record! {
53 "column0" => Value::test_string("FOO"),
54 "column1" => Value::test_string("BAR"),
55 }),
56 Value::test_record(record! {
57 "column0" => Value::test_string("1"),
58 "column1" => Value::test_string("2"),
59 }),
60 ])),
61 },
62 ]
63 }
64
65 fn run(
66 &self,
67 engine_state: &EngineState,
68 stack: &mut Stack,
69 call: &Call,
70 input: PipelineData,
71 ) -> Result<PipelineData, ShellError> {
72 from_ssv(engine_state, stack, call, input)
73 }
74}
75
76enum HeaderOptions<'a> {
77 WithHeaders(&'a str),
78 WithoutHeaders,
79}
80
81fn parse_aligned_columns<'a>(
82 lines: impl Iterator<Item = &'a str>,
83 headers: HeaderOptions,
84 separator: &str,
85) -> Vec<Vec<(String, String)>> {
86 fn construct<'a>(
87 lines: impl Iterator<Item = &'a str>,
88 headers: Vec<(String, usize)>,
89 ) -> Vec<Vec<(String, String)>> {
90 lines
91 .map(|l| {
92 headers
93 .iter()
94 .enumerate()
95 .map(|(i, (header_name, start_position))| {
96 let char_index_start = match l.char_indices().nth(*start_position) {
97 Some(idx) => idx.0,
98 None => *start_position,
99 };
100 let val = match headers.get(i + 1) {
101 Some((_, end)) => {
102 if *end < l.len() {
103 let char_index_end = match l.char_indices().nth(*end) {
104 Some(idx) => idx.0,
105 None => *end,
106 };
107 l.get(char_index_start..char_index_end)
108 } else {
109 l.get(char_index_start..)
110 }
111 }
112 None => l.get(char_index_start..),
113 }
114 .unwrap_or("")
115 .trim()
116 .into();
117 (header_name.clone(), val)
118 })
119 .collect()
120 })
121 .collect()
122 }
123
124 let find_indices = |line: &str| {
125 let values = line
126 .split(&separator)
127 .map(str::trim)
128 .filter(|s| !s.is_empty());
129 values
130 .fold(
131 (0, vec![]),
132 |(current_pos, mut indices), value| match line[current_pos..].find(value) {
133 None => (current_pos, indices),
134 Some(index) => {
135 let absolute_index = current_pos + index;
136 indices.push(absolute_index);
137 (absolute_index + value.len(), indices)
138 }
139 },
140 )
141 .1
142 };
143
144 let parse_with_headers = |lines, headers_raw: &str| {
145 let indices = find_indices(headers_raw);
146 let headers = headers_raw
147 .split(&separator)
148 .map(str::trim)
149 .filter(|s| !s.is_empty())
150 .map(String::from)
151 .zip(indices);
152
153 let columns = headers.collect::<Vec<(String, usize)>>();
154
155 construct(lines, columns)
156 };
157
158 let parse_without_headers = |ls: Vec<&str>| {
159 let mut indices = ls
160 .iter()
161 .flat_map(|s| find_indices(s))
162 .collect::<Vec<usize>>();
163
164 indices.sort_unstable();
165 indices.dedup();
166
167 let headers: Vec<(String, usize)> = indices
168 .iter()
169 .enumerate()
170 .map(|(i, position)| (format!("column{i}"), *position))
171 .collect();
172
173 construct(ls.iter().map(|s| s.to_owned()), headers)
174 };
175
176 match headers {
177 HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw),
178 HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()),
179 }
180}
181
182fn parse_separated_columns<'a>(
183 lines: impl Iterator<Item = &'a str>,
184 headers: HeaderOptions,
185 separator: &str,
186) -> Vec<Vec<(String, String)>> {
187 fn collect<'a>(
188 headers: Vec<String>,
189 rows: impl Iterator<Item = &'a str>,
190 separator: &str,
191 ) -> Vec<Vec<(String, String)>> {
192 rows.map(|r| {
193 headers
194 .iter()
195 .zip(r.split(separator).map(str::trim).filter(|s| !s.is_empty()))
196 .map(|(a, b)| (a.to_owned(), b.to_owned()))
197 .collect()
198 })
199 .collect()
200 }
201
202 let parse_with_headers = |lines, headers_raw: &str| {
203 let headers = headers_raw
204 .split(&separator)
205 .map(str::trim)
206 .map(str::to_owned)
207 .filter(|s| !s.is_empty())
208 .collect();
209 collect(headers, lines, separator)
210 };
211
212 let parse_without_headers = |ls: Vec<&str>| {
213 let num_columns = ls.iter().map(|r| r.len()).max().unwrap_or(0);
214
215 let headers = (0..=num_columns)
216 .map(|i| format!("column{i}"))
217 .collect::<Vec<String>>();
218 collect(headers, ls.into_iter(), separator)
219 };
220
221 match headers {
222 HeaderOptions::WithHeaders(headers_raw) => parse_with_headers(lines, headers_raw),
223 HeaderOptions::WithoutHeaders => parse_without_headers(lines.collect()),
224 }
225}
226
227fn string_to_table(
228 s: &str,
229 noheaders: bool,
230 aligned_columns: bool,
231 split_at: usize,
232) -> Vec<Vec<(String, String)>> {
233 let mut lines = s
234 .lines()
235 .filter(|l| !l.trim().is_empty() && !l.trim().starts_with('#'));
236 let separator = " ".repeat(std::cmp::max(split_at, 1));
237
238 let (ls, header_options) = if noheaders {
239 (lines, HeaderOptions::WithoutHeaders)
240 } else {
241 match lines.next() {
242 Some(header) => (lines, HeaderOptions::WithHeaders(header)),
243 None => return vec![],
244 }
245 };
246
247 let f = if aligned_columns {
248 parse_aligned_columns
249 } else {
250 parse_separated_columns
251 };
252
253 f(ls, header_options, &separator)
254}
255
256fn from_ssv_string_to_value(
257 s: &str,
258 noheaders: bool,
259 aligned_columns: bool,
260 split_at: usize,
261 span: Span,
262) -> Value {
263 let rows = string_to_table(s, noheaders, aligned_columns, split_at)
264 .into_iter()
265 .map(|row| {
266 let mut dict = IndexMap::new();
267 for (col, entry) in row {
268 dict.insert(col, Value::string(entry, span));
269 }
270 Value::record(dict.into_iter().collect(), span)
271 })
272 .collect();
273
274 Value::list(rows, span)
275}
276
277fn from_ssv(
278 engine_state: &EngineState,
279 stack: &mut Stack,
280 call: &Call,
281 input: PipelineData,
282) -> Result<PipelineData, ShellError> {
283 let name = call.head;
284
285 let noheaders = call.has_flag(engine_state, stack, "noheaders")?;
286 let aligned_columns = call.has_flag(engine_state, stack, "aligned-columns")?;
287 let minimum_spaces: Option<Spanned<usize>> =
288 call.get_flag(engine_state, stack, "minimum-spaces")?;
289
290 let (concat_string, _span, metadata) = input.collect_string_strict(name)?;
291 let split_at = match minimum_spaces {
292 Some(number) => number.item,
293 None => DEFAULT_MINIMUM_SPACES,
294 };
295
296 Ok(
297 from_ssv_string_to_value(&concat_string, noheaders, aligned_columns, split_at, name)
298 .into_pipeline_data_with_metadata(metadata),
299 )
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 fn owned(x: &str, y: &str) -> (String, String) {
307 (String::from(x), String::from(y))
308 }
309
310 #[test]
311 fn it_filters_comment_lines() {
312 let input = r#"
313 a b
314 1 2
315 3 4
316 #comment line
317 "#;
318 let result = string_to_table(input, false, true, 1);
319 assert_eq!(
320 result,
321 vec![
322 vec![owned("a", "1"), owned("b", "2")],
323 vec![owned("a", "3"), owned("b", "4")]
324 ]
325 );
326 }
327
328 #[test]
329 fn it_trims_empty_and_whitespace_only_lines() {
330 let input = r#"
331
332 a b
333
334 1 2
335
336 3 4
337 "#;
338 let result = string_to_table(input, false, true, 1);
339 assert_eq!(
340 result,
341 vec![
342 vec![owned("a", "1"), owned("b", "2")],
343 vec![owned("a", "3"), owned("b", "4")]
344 ]
345 );
346 }
347
348 #[test]
349 fn it_deals_with_single_column_input() {
350 let input = r#"
351 a
352 1
353 2
354 "#;
355 let result = string_to_table(input, false, true, 1);
356 assert_eq!(result, vec![vec![owned("a", "1")], vec![owned("a", "2")]]);
357 }
358
359 #[test]
360 fn it_uses_first_row_as_data_when_noheaders() {
361 let input = r#"
362 a b
363 1 2
364 3 4
365 "#;
366 let result = string_to_table(input, true, true, 1);
367 assert_eq!(
368 result,
369 vec![
370 vec![owned("column0", "a"), owned("column1", "b")],
371 vec![owned("column0", "1"), owned("column1", "2")],
372 vec![owned("column0", "3"), owned("column1", "4")]
373 ]
374 );
375 }
376
377 #[test]
378 fn it_allows_a_predefined_number_of_spaces() {
379 let input = r#"
380 column a column b
381 entry 1 entry number 2
382 3 four
383 "#;
384
385 let result = string_to_table(input, false, true, 3);
386 assert_eq!(
387 result,
388 vec![
389 vec![
390 owned("column a", "entry 1"),
391 owned("column b", "entry number 2")
392 ],
393 vec![owned("column a", "3"), owned("column b", "four")]
394 ]
395 );
396 }
397
398 #[test]
399 fn it_trims_remaining_separator_space() {
400 let input = r#"
401 colA colB colC
402 val1 val2 val3
403 "#;
404
405 let trimmed = |s: &str| s.trim() == s;
406
407 let result = string_to_table(input, false, true, 2);
408 assert!(
409 result
410 .iter()
411 .all(|row| row.iter().all(|(a, b)| trimmed(a) && trimmed(b)))
412 );
413 }
414
415 #[test]
416 fn it_keeps_empty_columns() {
417 let input = r#"
418 colA col B col C
419 val2 val3
420 val4 val 5 val 6
421 val7 val8
422 "#;
423
424 let result = string_to_table(input, false, true, 2);
425 assert_eq!(
426 result,
427 vec![
428 vec![
429 owned("colA", ""),
430 owned("col B", "val2"),
431 owned("col C", "val3")
432 ],
433 vec![
434 owned("colA", "val4"),
435 owned("col B", "val 5"),
436 owned("col C", "val 6")
437 ],
438 vec![
439 owned("colA", "val7"),
440 owned("col B", ""),
441 owned("col C", "val8")
442 ],
443 ]
444 );
445 }
446
447 #[test]
448 fn it_can_produce_an_empty_stream_for_header_only_input() {
449 let input = "colA col B";
450
451 let result = string_to_table(input, false, true, 2);
452 let expected: Vec<Vec<(String, String)>> = vec![];
453 assert_eq!(expected, result);
454 }
455
456 #[test]
457 fn it_uses_the_full_final_column() {
458 let input = r#"
459 colA col B
460 val1 val2 trailing value that should be included
461 "#;
462
463 let result = string_to_table(input, false, true, 2);
464 assert_eq!(
465 result,
466 vec![vec![
467 owned("colA", "val1"),
468 owned("col B", "val2 trailing value that should be included"),
469 ]]
470 );
471 }
472
473 #[test]
474 fn it_handles_empty_values_when_noheaders_and_aligned_columns() {
475 let input = r#"
476 a multi-word value b d
477 1 3-3 4
478 last
479 "#;
480
481 let result = string_to_table(input, true, true, 2);
482 assert_eq!(
483 result,
484 vec![
485 vec![
486 owned("column0", "a multi-word value"),
487 owned("column1", "b"),
488 owned("column2", ""),
489 owned("column3", "d"),
490 owned("column4", "")
491 ],
492 vec![
493 owned("column0", "1"),
494 owned("column1", ""),
495 owned("column2", "3-3"),
496 owned("column3", "4"),
497 owned("column4", "")
498 ],
499 vec![
500 owned("column0", ""),
501 owned("column1", ""),
502 owned("column2", ""),
503 owned("column3", ""),
504 owned("column4", "last")
505 ],
506 ]
507 );
508 }
509
510 #[test]
511 fn input_is_parsed_correctly_if_either_option_works() {
512 let input = r#"
513 docker-registry docker-registry=default docker-registry=default 172.30.78.158 5000/TCP
514 kubernetes component=apiserver,provider=kubernetes <none> 172.30.0.2 443/TCP
515 kubernetes-ro component=apiserver,provider=kubernetes <none> 172.30.0.1 80/TCP
516 "#;
517
518 let aligned_columns_noheaders = string_to_table(input, true, true, 2);
519 let separator_noheaders = string_to_table(input, true, false, 2);
520 let aligned_columns_with_headers = string_to_table(input, false, true, 2);
521 let separator_with_headers = string_to_table(input, false, false, 2);
522 assert_eq!(aligned_columns_noheaders, separator_noheaders);
523 assert_eq!(aligned_columns_with_headers, separator_with_headers);
524 }
525
526 #[test]
527 fn test_examples() {
528 use crate::test_examples;
529
530 test_examples(FromSsv {})
531 }
532}