Skip to main content

nu_command/strings/split/
column.rs

1use fancy_regex::{Regex, escape};
2use nu_engine::command_prelude::*;
3use nu_protocol::shell_error::generic::GenericError;
4
5use super::split;
6
7#[derive(Clone)]
8pub struct SplitColumn;
9
10impl Command for SplitColumn {
11    fn name(&self) -> &str {
12        "split column"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build("split column")
17            .input_output_types(vec![
18                (Type::String, Type::table()),
19                (
20                    // TODO: no test coverage (is this behavior a bug or a feature?)
21                    Type::List(Box::new(Type::String)),
22                    Type::table(),
23                ),
24            ])
25            .required(
26                "separator",
27                SyntaxShape::String,
28                "The character or string that denotes what separates columns.",
29            )
30            .switch("collapse-empty", "Remove empty columns.", Some('c'))
31            .named(
32                "number",
33                SyntaxShape::Int,
34                "Split into maximum number of columns.",
35                Some('n'),
36            )
37            .switch(
38                "right",
39                "When `--number` is used, collect the remainder in the leftmost column.",
40                None,
41            )
42            .switch("regex", "Separator is a regular expression.", Some('r'))
43            .rest(
44                "rest",
45                SyntaxShape::String,
46                "Column names to give the new columns.",
47            )
48            .category(Category::Strings)
49    }
50
51    fn description(&self) -> &str {
52        "Split a string into multiple columns using a separator."
53    }
54
55    fn search_terms(&self) -> Vec<&str> {
56        vec!["separate", "divide", "regex"]
57    }
58
59    fn examples(&self) -> Vec<Example<'_>> {
60        vec![
61            Example {
62                description: "Split a string into columns by the specified separator.",
63                example: "'a--b--c' | split column '--'",
64                result: Some(Value::test_list(vec![Value::test_record(record! {
65                        "column0" => Value::test_string("a"),
66                        "column1" => Value::test_string("b"),
67                        "column2" => Value::test_string("c"),
68                })])),
69            },
70            Example {
71                description: "Split a string into columns of char and remove the empty columns.",
72                example: "'abc' | split column --collapse-empty ''",
73                result: Some(Value::test_list(vec![Value::test_record(record! {
74                        "column0" => Value::test_string("a"),
75                        "column1" => Value::test_string("b"),
76                        "column2" => Value::test_string("c"),
77                })])),
78            },
79            Example {
80                description: "Split a list of strings into a table.",
81                example: "['a-b' 'c-d'] | split column -",
82                result: Some(Value::test_list(vec![
83                    Value::test_record(record! {
84                        "column0" => Value::test_string("a"),
85                        "column1" => Value::test_string("b"),
86                    }),
87                    Value::test_record(record! {
88                        "column0" => Value::test_string("c"),
89                        "column1" => Value::test_string("d"),
90                    }),
91                ])),
92            },
93            Example {
94                description: "Split a list of strings into a table, ignoring padding.",
95                example: r"['a -  b' 'c  -    d'] | split column --regex '\s*-\s*'",
96                result: Some(Value::test_list(vec![
97                    Value::test_record(record! {
98                        "column0" => Value::test_string("a"),
99                        "column1" => Value::test_string("b"),
100                    }),
101                    Value::test_record(record! {
102                        "column0" => Value::test_string("c"),
103                        "column1" => Value::test_string("d"),
104                    }),
105                ])),
106            },
107            Example {
108                description: "Split into columns, last column may contain the delimiter.",
109                example: "['author: Salina Yoon' r#'title: Where's Ellie?: A Hide-and-Seek Book'#] | split column --number 2 ': ' key value",
110                result: Some(Value::test_list(vec![
111                    Value::test_record(record! {
112                        "key" => Value::test_string("author"),
113                        "value" => Value::test_string("Salina Yoon"),
114                    }),
115                    Value::test_record(record! {
116                        "key" => Value::test_string("title"),
117                        "value" => Value::test_string("Where's Ellie?: A Hide-and-Seek Book"),
118                    }),
119                ])),
120            },
121            Example {
122                description: "Split into columns, first column may contain the delimiter.",
123                example: "['some-package-1.2.3' 'pkg2-1.0' 'do-smart-things-0.9.1'] | split column --number 2 --right '-' name version",
124                result: Some(Value::test_list(vec![
125                    Value::test_record(record! {
126                        "name" => Value::test_string("some-package"),
127                        "version" => Value::test_string("1.2.3"),
128                    }),
129                    Value::test_record(record! {
130                        "name" => Value::test_string("pkg2"),
131                        "version" => Value::test_string("1.0"),
132                    }),
133                    Value::test_record(record! {
134                        "name" => Value::test_string("do-smart-things"),
135                        "version" => Value::test_string("0.9.1"),
136                    }),
137                ])),
138            },
139        ]
140    }
141
142    fn is_const(&self) -> bool {
143        true
144    }
145
146    fn run(
147        &self,
148        engine_state: &EngineState,
149        stack: &mut Stack,
150        call: &Call,
151        input: PipelineData,
152    ) -> Result<PipelineData, ShellError> {
153        let separator: Spanned<String> = call.req(engine_state, stack, 0)?;
154        let rest: Vec<Spanned<String>> = call.rest(engine_state, stack, 1)?;
155        let collapse_empty = call.has_flag(engine_state, stack, "collapse-empty")?;
156        let max_split: Option<usize> = call.get_flag(engine_state, stack, "number")?;
157        let split_from_right = call.has_flag(engine_state, stack, "right")?;
158        let has_regex = call.has_flag(engine_state, stack, "regex")?;
159
160        let args = Arguments {
161            separator,
162            rest,
163            collapse_empty,
164            max_split,
165            split_from_right,
166            has_regex,
167        };
168        split_column(engine_state, call, input, args)
169    }
170
171    fn run_const(
172        &self,
173        working_set: &StateWorkingSet,
174        call: &Call,
175        input: PipelineData,
176    ) -> Result<PipelineData, ShellError> {
177        let separator: Spanned<String> = call.req_const(working_set, 0)?;
178        let rest: Vec<Spanned<String>> = call.rest_const(working_set, 1)?;
179        let collapse_empty = call.has_flag_const(working_set, "collapse-empty")?;
180        let max_split: Option<usize> = call.get_flag_const(working_set, "number")?;
181        let split_from_right = call.has_flag_const(working_set, "right")?;
182        let has_regex = call.has_flag_const(working_set, "regex")?;
183
184        let args = Arguments {
185            separator,
186            rest,
187            collapse_empty,
188            max_split,
189            split_from_right,
190            has_regex,
191        };
192        split_column(working_set.permanent(), call, input, args)
193    }
194}
195
196struct Arguments {
197    separator: Spanned<String>,
198    rest: Vec<Spanned<String>>,
199    collapse_empty: bool,
200    max_split: Option<usize>,
201    split_from_right: bool,
202    has_regex: bool,
203}
204
205fn split_column(
206    engine_state: &EngineState,
207    call: &Call,
208    input: PipelineData,
209    args: Arguments,
210) -> Result<PipelineData, ShellError> {
211    let name_span = call.head;
212    let regex = if args.has_regex {
213        Regex::new(&args.separator.item)
214    } else {
215        let escaped = escape(&args.separator.item);
216        Regex::new(&escaped)
217    }
218    .map_err(|e| {
219        ShellError::Generic(GenericError::new(
220            "Error with regular expression",
221            e.to_string(),
222            args.separator.span,
223        ))
224    })?;
225
226    input.flat_map(
227        move |x| {
228            split_column_helper(
229                &x,
230                &regex,
231                &args.rest,
232                args.collapse_empty,
233                args.max_split,
234                args.split_from_right,
235                name_span,
236            )
237        },
238        engine_state.signals(),
239    )
240}
241
242fn split_column_helper(
243    v: &Value,
244    separator: &Regex,
245    rest: &[Spanned<String>],
246    collapse_empty: bool,
247    max_split: Option<usize>,
248    split_from_right: bool,
249    head: Span,
250) -> Vec<Value> {
251    if let Ok(s) = v.as_str() {
252        let split_result: Vec<_> = match (max_split, split_from_right) {
253            (Some(0), _) => vec![],
254            (Some(max_split), true) => {
255                let sep_bounds: Vec<_> = separator
256                    .find_iter(s)
257                    .filter_map(|x| x.ok())
258                    .map(|x| (x.start(), x.end()))
259                    .collect();
260                // get the last `max_split` separators and split `s` with them
261                split(s, sep_bounds.into_iter().rev().take(max_split - 1).rev()).collect()
262            }
263            (Some(max_split), false) => separator
264                .splitn(s, max_split)
265                .filter_map(|x| x.ok())
266                .filter(|x| !(collapse_empty && x.is_empty()))
267                .collect(),
268            (None, _) => separator
269                .split(s)
270                .filter_map(|x| x.ok())
271                .filter(|x| !(collapse_empty && x.is_empty()))
272                .collect(),
273        };
274        let positional: Vec<_> = rest.iter().map(|f| f.item.clone()).collect();
275
276        // If they didn't provide column names, make up our own
277        let mut record = Record::new();
278        if positional.is_empty() {
279            let mut gen_columns = vec![];
280            for i in 0..split_result.len() {
281                gen_columns.push(format!("column{}", i));
282            }
283
284            for (&k, v) in split_result.iter().zip(&gen_columns) {
285                record.push(v, Value::string(k, head));
286            }
287        } else {
288            for (&k, v) in split_result.iter().zip(&positional) {
289                record.push(v, Value::string(k, head));
290            }
291        }
292        vec![Value::record(record, head)]
293    } else {
294        match v {
295            Value::Error { error, .. } => {
296                vec![Value::error(*error.clone(), head)]
297            }
298            v => {
299                let span = v.span();
300                vec![Value::error(
301                    ShellError::OnlySupportsThisInputType {
302                        exp_input_type: "string".into(),
303                        wrong_type: v.get_type().to_string(),
304                        dst_span: head,
305                        src_span: span,
306                    },
307                    span,
308                )]
309            }
310        }
311    }
312}
313
314#[cfg(test)]
315mod test {
316    use super::*;
317
318    #[test]
319    fn test_examples() -> nu_test_support::Result {
320        nu_test_support::test().examples(SplitColumn)
321    }
322}