Skip to main content

nu_command/strings/split/
column.rs

1use fancy_regex::{Regex, escape};
2use nu_engine::command_prelude::*;
3use nu_protocol::shell_error::generic::GenericError;
4
5#[derive(Clone)]
6pub struct SplitColumn;
7
8impl Command for SplitColumn {
9    fn name(&self) -> &str {
10        "split column"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("split column")
15            .input_output_types(vec![
16                (Type::String, Type::table()),
17                (
18                    // TODO: no test coverage (is this behavior a bug or a feature?)
19                    Type::List(Box::new(Type::String)),
20                    Type::table(),
21                ),
22            ])
23            .required(
24                "separator",
25                SyntaxShape::String,
26                "The character or string that denotes what separates columns.",
27            )
28            .switch("collapse-empty", "Remove empty columns.", Some('c'))
29            .named(
30                "number",
31                SyntaxShape::Int,
32                "Split into maximum number of items.",
33                Some('n'),
34            )
35            .switch("regex", "Separator is a regular expression.", Some('r'))
36            .rest(
37                "rest",
38                SyntaxShape::String,
39                "Column names to give the new columns.",
40            )
41            .category(Category::Strings)
42    }
43
44    fn description(&self) -> &str {
45        "Split a string into multiple columns using a separator."
46    }
47
48    fn search_terms(&self) -> Vec<&str> {
49        vec!["separate", "divide", "regex"]
50    }
51
52    fn examples(&self) -> Vec<Example<'_>> {
53        vec![
54            Example {
55                description: "Split a string into columns by the specified separator.",
56                example: "'a--b--c' | split column '--'",
57                result: Some(Value::test_list(vec![Value::test_record(record! {
58                        "column0" => Value::test_string("a"),
59                        "column1" => Value::test_string("b"),
60                        "column2" => Value::test_string("c"),
61                })])),
62            },
63            Example {
64                description: "Split a string into columns of char and remove the empty columns.",
65                example: "'abc' | split column --collapse-empty ''",
66                result: Some(Value::test_list(vec![Value::test_record(record! {
67                        "column0" => Value::test_string("a"),
68                        "column1" => Value::test_string("b"),
69                        "column2" => Value::test_string("c"),
70                })])),
71            },
72            Example {
73                description: "Split a list of strings into a table.",
74                example: "['a-b' 'c-d'] | split column -",
75                result: Some(Value::test_list(vec![
76                    Value::test_record(record! {
77                        "column0" => Value::test_string("a"),
78                        "column1" => Value::test_string("b"),
79                    }),
80                    Value::test_record(record! {
81                        "column0" => Value::test_string("c"),
82                        "column1" => Value::test_string("d"),
83                    }),
84                ])),
85            },
86            Example {
87                description: "Split a list of strings into a table, ignoring padding.",
88                example: r"['a -  b' 'c  -    d'] | split column --regex '\s*-\s*'",
89                result: Some(Value::test_list(vec![
90                    Value::test_record(record! {
91                        "column0" => Value::test_string("a"),
92                        "column1" => Value::test_string("b"),
93                    }),
94                    Value::test_record(record! {
95                        "column0" => Value::test_string("c"),
96                        "column1" => Value::test_string("d"),
97                    }),
98                ])),
99            },
100            Example {
101                description: "Split into columns, last column may contain the delimiter.",
102                example: "['author: Salina Yoon' r#'title: Where's Ellie?: A Hide-and-Seek Book'#] | split column --number 2 ': ' key value",
103                result: Some(Value::test_list(vec![
104                    Value::test_record(record! {
105                        "key" => Value::test_string("author"),
106                        "value" => Value::test_string("Salina Yoon"),
107                    }),
108                    Value::test_record(record! {
109                        "key" => Value::test_string("title"),
110                        "value" => Value::test_string("Where's Ellie?: A Hide-and-Seek Book"),
111                    }),
112                ])),
113            },
114        ]
115    }
116
117    fn is_const(&self) -> bool {
118        true
119    }
120
121    fn run(
122        &self,
123        engine_state: &EngineState,
124        stack: &mut Stack,
125        call: &Call,
126        input: PipelineData,
127    ) -> Result<PipelineData, ShellError> {
128        let separator: Spanned<String> = call.req(engine_state, stack, 0)?;
129        let rest: Vec<Spanned<String>> = call.rest(engine_state, stack, 1)?;
130        let collapse_empty = call.has_flag(engine_state, stack, "collapse-empty")?;
131        let max_split: Option<usize> = call.get_flag(engine_state, stack, "number")?;
132        let has_regex = call.has_flag(engine_state, stack, "regex")?;
133
134        let args = Arguments {
135            separator,
136            rest,
137            collapse_empty,
138            max_split,
139            has_regex,
140        };
141        split_column(engine_state, call, input, args)
142    }
143
144    fn run_const(
145        &self,
146        working_set: &StateWorkingSet,
147        call: &Call,
148        input: PipelineData,
149    ) -> Result<PipelineData, ShellError> {
150        let separator: Spanned<String> = call.req_const(working_set, 0)?;
151        let rest: Vec<Spanned<String>> = call.rest_const(working_set, 1)?;
152        let collapse_empty = call.has_flag_const(working_set, "collapse-empty")?;
153        let max_split: Option<usize> = call.get_flag_const(working_set, "number")?;
154        let has_regex = call.has_flag_const(working_set, "regex")?;
155
156        let args = Arguments {
157            separator,
158            rest,
159            collapse_empty,
160            max_split,
161            has_regex,
162        };
163        split_column(working_set.permanent(), call, input, args)
164    }
165}
166
167struct Arguments {
168    separator: Spanned<String>,
169    rest: Vec<Spanned<String>>,
170    collapse_empty: bool,
171    max_split: Option<usize>,
172    has_regex: bool,
173}
174
175fn split_column(
176    engine_state: &EngineState,
177    call: &Call,
178    input: PipelineData,
179    args: Arguments,
180) -> Result<PipelineData, ShellError> {
181    let name_span = call.head;
182    let regex = if args.has_regex {
183        Regex::new(&args.separator.item)
184    } else {
185        let escaped = escape(&args.separator.item);
186        Regex::new(&escaped)
187    }
188    .map_err(|e| {
189        ShellError::Generic(GenericError::new(
190            "Error with regular expression",
191            e.to_string(),
192            args.separator.span,
193        ))
194    })?;
195
196    input.flat_map(
197        move |x| {
198            split_column_helper(
199                &x,
200                &regex,
201                &args.rest,
202                args.collapse_empty,
203                args.max_split,
204                name_span,
205            )
206        },
207        engine_state.signals(),
208    )
209}
210
211fn split_column_helper(
212    v: &Value,
213    separator: &Regex,
214    rest: &[Spanned<String>],
215    collapse_empty: bool,
216    max_split: Option<usize>,
217    head: Span,
218) -> Vec<Value> {
219    if let Ok(s) = v.as_str() {
220        let split_result: Vec<_> = match max_split {
221            Some(max_split) => separator
222                .splitn(s, max_split)
223                .filter_map(|x| x.ok())
224                .filter(|x| !(collapse_empty && x.is_empty()))
225                .collect(),
226            None => separator
227                .split(s)
228                .filter_map(|x| x.ok())
229                .filter(|x| !(collapse_empty && x.is_empty()))
230                .collect(),
231        };
232        let positional: Vec<_> = rest.iter().map(|f| f.item.clone()).collect();
233
234        // If they didn't provide column names, make up our own
235        let mut record = Record::new();
236        if positional.is_empty() {
237            let mut gen_columns = vec![];
238            for i in 0..split_result.len() {
239                gen_columns.push(format!("column{}", i));
240            }
241
242            for (&k, v) in split_result.iter().zip(&gen_columns) {
243                record.push(v, Value::string(k, head));
244            }
245        } else {
246            for (&k, v) in split_result.iter().zip(&positional) {
247                record.push(v, Value::string(k, head));
248            }
249        }
250        vec![Value::record(record, head)]
251    } else {
252        match v {
253            Value::Error { error, .. } => {
254                vec![Value::error(*error.clone(), head)]
255            }
256            v => {
257                let span = v.span();
258                vec![Value::error(
259                    ShellError::OnlySupportsThisInputType {
260                        exp_input_type: "string".into(),
261                        wrong_type: v.get_type().to_string(),
262                        dst_span: head,
263                        src_span: span,
264                    },
265                    span,
266                )]
267            }
268        }
269    }
270}
271
272#[cfg(test)]
273mod test {
274    use super::*;
275
276    #[test]
277    fn test_examples() -> nu_test_support::Result {
278        nu_test_support::test().examples(SplitColumn)
279    }
280}