nu_command/strings/split/
column.rs

1use fancy_regex::{Regex, escape};
2use nu_engine::command_prelude::*;
3
4#[derive(Clone)]
5pub struct SplitColumn;
6
7impl Command for SplitColumn {
8    fn name(&self) -> &str {
9        "split column"
10    }
11
12    fn signature(&self) -> Signature {
13        Signature::build("split column")
14            .input_output_types(vec![
15                (Type::String, Type::table()),
16                (
17                    // TODO: no test coverage (is this behavior a bug or a feature?)
18                    Type::List(Box::new(Type::String)),
19                    Type::table(),
20                ),
21            ])
22            .required(
23                "separator",
24                SyntaxShape::String,
25                "The character or string that denotes what separates columns.",
26            )
27            .switch("collapse-empty", "remove empty columns", Some('c'))
28            .named(
29                "number",
30                SyntaxShape::Int,
31                "Split into maximum number of items",
32                Some('n'),
33            )
34            .switch("regex", "separator is a regular expression", Some('r'))
35            .rest(
36                "rest",
37                SyntaxShape::String,
38                "Column names to give the new columns.",
39            )
40            .category(Category::Strings)
41    }
42
43    fn description(&self) -> &str {
44        "Split a string into multiple columns using a separator."
45    }
46
47    fn search_terms(&self) -> Vec<&str> {
48        vec!["separate", "divide", "regex"]
49    }
50
51    fn examples(&self) -> Vec<Example> {
52        vec![
53            Example {
54                description: "Split a string into columns by the specified separator",
55                example: "'a--b--c' | split column '--'",
56                result: Some(Value::test_list(vec![Value::test_record(record! {
57                        "column1" => Value::test_string("a"),
58                        "column2" => Value::test_string("b"),
59                        "column3" => Value::test_string("c"),
60                })])),
61            },
62            Example {
63                description: "Split a string into columns of char and remove the empty columns",
64                example: "'abc' | split column --collapse-empty ''",
65                result: Some(Value::test_list(vec![Value::test_record(record! {
66                        "column1" => Value::test_string("a"),
67                        "column2" => Value::test_string("b"),
68                        "column3" => Value::test_string("c"),
69                })])),
70            },
71            Example {
72                description: "Split a list of strings into a table",
73                example: "['a-b' 'c-d'] | split column -",
74                result: Some(Value::test_list(vec![
75                    Value::test_record(record! {
76                        "column1" => Value::test_string("a"),
77                        "column2" => Value::test_string("b"),
78                    }),
79                    Value::test_record(record! {
80                        "column1" => Value::test_string("c"),
81                        "column2" => Value::test_string("d"),
82                    }),
83                ])),
84            },
85            Example {
86                description: "Split a list of strings into a table, ignoring padding",
87                example: r"['a -  b' 'c  -    d'] | split column --regex '\s*-\s*'",
88                result: Some(Value::test_list(vec![
89                    Value::test_record(record! {
90                        "column1" => Value::test_string("a"),
91                        "column2" => Value::test_string("b"),
92                    }),
93                    Value::test_record(record! {
94                        "column1" => Value::test_string("c"),
95                        "column2" => Value::test_string("d"),
96                    }),
97                ])),
98            },
99            Example {
100                description: "Split into columns, last column may contain the delimiter",
101                example: r"['author: Salina Yoon' r#'title: Where's Ellie?: A Hide-and-Seek Book'#] | split column --number 2 ': ' key value",
102                result: Some(Value::test_list(vec![
103                    Value::test_record(record! {
104                        "key" => Value::test_string("author"),
105                        "value" => Value::test_string("Salina Yoon"),
106                    }),
107                    Value::test_record(record! {
108                        "key" => Value::test_string("title"),
109                        "value" => Value::test_string("Where's Ellie?: A Hide-and-Seek Book"),
110                    }),
111                ])),
112            },
113        ]
114    }
115
116    fn is_const(&self) -> bool {
117        true
118    }
119
120    fn run(
121        &self,
122        engine_state: &EngineState,
123        stack: &mut Stack,
124        call: &Call,
125        input: PipelineData,
126    ) -> Result<PipelineData, ShellError> {
127        let separator: Spanned<String> = call.req(engine_state, stack, 0)?;
128        let rest: Vec<Spanned<String>> = call.rest(engine_state, stack, 1)?;
129        let collapse_empty = call.has_flag(engine_state, stack, "collapse-empty")?;
130        let max_split: Option<usize> = call.get_flag(engine_state, stack, "number")?;
131        let has_regex = call.has_flag(engine_state, stack, "regex")?;
132
133        let args = Arguments {
134            separator,
135            rest,
136            collapse_empty,
137            max_split,
138            has_regex,
139        };
140        split_column(engine_state, call, input, args)
141    }
142
143    fn run_const(
144        &self,
145        working_set: &StateWorkingSet,
146        call: &Call,
147        input: PipelineData,
148    ) -> Result<PipelineData, ShellError> {
149        let separator: Spanned<String> = call.req_const(working_set, 0)?;
150        let rest: Vec<Spanned<String>> = call.rest_const(working_set, 1)?;
151        let collapse_empty = call.has_flag_const(working_set, "collapse-empty")?;
152        let max_split: Option<usize> = call.get_flag_const(working_set, "number")?;
153        let has_regex = call.has_flag_const(working_set, "regex")?;
154
155        let args = Arguments {
156            separator,
157            rest,
158            collapse_empty,
159            max_split,
160            has_regex,
161        };
162        split_column(working_set.permanent(), call, input, args)
163    }
164}
165
166struct Arguments {
167    separator: Spanned<String>,
168    rest: Vec<Spanned<String>>,
169    collapse_empty: bool,
170    max_split: Option<usize>,
171    has_regex: bool,
172}
173
174fn split_column(
175    engine_state: &EngineState,
176    call: &Call,
177    input: PipelineData,
178    args: Arguments,
179) -> Result<PipelineData, ShellError> {
180    let name_span = call.head;
181    let regex = if args.has_regex {
182        Regex::new(&args.separator.item)
183    } else {
184        let escaped = escape(&args.separator.item);
185        Regex::new(&escaped)
186    }
187    .map_err(|e| ShellError::GenericError {
188        error: "Error with regular expression".into(),
189        msg: e.to_string(),
190        span: Some(args.separator.span),
191        help: None,
192        inner: vec![],
193    })?;
194
195    input.flat_map(
196        move |x| {
197            split_column_helper(
198                &x,
199                &regex,
200                &args.rest,
201                args.collapse_empty,
202                args.max_split,
203                name_span,
204            )
205        },
206        engine_state.signals(),
207    )
208}
209
210fn split_column_helper(
211    v: &Value,
212    separator: &Regex,
213    rest: &[Spanned<String>],
214    collapse_empty: bool,
215    max_split: Option<usize>,
216    head: Span,
217) -> Vec<Value> {
218    if let Ok(s) = v.as_str() {
219        let split_result: Vec<_> = match max_split {
220            Some(max_split) => separator
221                .splitn(s, max_split)
222                .filter_map(|x| x.ok())
223                .filter(|x| !(collapse_empty && x.is_empty()))
224                .collect(),
225            None => separator
226                .split(s)
227                .filter_map(|x| x.ok())
228                .filter(|x| !(collapse_empty && x.is_empty()))
229                .collect(),
230        };
231        let positional: Vec<_> = rest.iter().map(|f| f.item.clone()).collect();
232
233        // If they didn't provide column names, make up our own
234        let mut record = Record::new();
235        if positional.is_empty() {
236            let mut gen_columns = vec![];
237            for i in 0..split_result.len() {
238                gen_columns.push(format!("column{}", i + 1));
239            }
240
241            for (&k, v) in split_result.iter().zip(&gen_columns) {
242                record.push(v, Value::string(k, head));
243            }
244        } else {
245            for (&k, v) in split_result.iter().zip(&positional) {
246                record.push(v, Value::string(k, head));
247            }
248        }
249        vec![Value::record(record, head)]
250    } else {
251        match v {
252            Value::Error { error, .. } => {
253                vec![Value::error(*error.clone(), head)]
254            }
255            v => {
256                let span = v.span();
257                vec![Value::error(
258                    ShellError::OnlySupportsThisInputType {
259                        exp_input_type: "string".into(),
260                        wrong_type: v.get_type().to_string(),
261                        dst_span: head,
262                        src_span: span,
263                    },
264                    span,
265                )]
266            }
267        }
268    }
269}
270
271#[cfg(test)]
272mod test {
273    use super::*;
274
275    #[test]
276    fn test_examples() {
277        use crate::test_examples;
278
279        test_examples(SplitColumn {})
280    }
281}