Skip to main content

nu_command/strings/split/
list.rs

1use fancy_regex::Regex;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, Signals};
4
5#[derive(Clone)]
6pub struct SubCommand;
7
8impl Command for SubCommand {
9    fn name(&self) -> &str {
10        "split list"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("split list")
15            .input_output_types(vec![(
16                Type::List(Box::new(Type::Any)),
17                Type::List(Box::new(Type::List(Box::new(Type::Any)))),
18            )])
19            .required(
20                "separator",
21                SyntaxShape::Any,
22                "The value that denotes what separates the list.",
23            )
24            .switch(
25                "regex",
26                "Separator is a regular expression, matching values that can be coerced into a string.",
27                Some('r'),
28            )
29            .param(
30                Flag::new("split")
31                    .arg(SyntaxShape::String)
32                    .desc("Whether to split lists before, after, or on (default) the separator.")
33                    .completion(Completion::new_list(&["before", "after", "on"])),
34            )
35            .category(Category::Filters)
36    }
37
38    fn description(&self) -> &str {
39        "Split a list into multiple lists using a separator."
40    }
41
42    fn search_terms(&self) -> Vec<&str> {
43        vec!["separate", "divide", "regex"]
44    }
45
46    fn examples(&self) -> Vec<Example<'_>> {
47        vec![
48            Example {
49                description: "Split a list of chars into two lists.",
50                example: "[a, b, c, d, e, f, g] | split list d",
51                result: Some(Value::list(
52                    vec![
53                        Value::list(
54                            vec![
55                                Value::test_string("a"),
56                                Value::test_string("b"),
57                                Value::test_string("c"),
58                            ],
59                            Span::test_data(),
60                        ),
61                        Value::list(
62                            vec![
63                                Value::test_string("e"),
64                                Value::test_string("f"),
65                                Value::test_string("g"),
66                            ],
67                            Span::test_data(),
68                        ),
69                    ],
70                    Span::test_data(),
71                )),
72            },
73            Example {
74                description: "Split a list of lists into two lists of lists.",
75                example: "[[1,2], [2,3], [3,4]] | split list [2,3]",
76                result: Some(Value::list(
77                    vec![
78                        Value::list(
79                            vec![Value::list(
80                                vec![Value::test_int(1), Value::test_int(2)],
81                                Span::test_data(),
82                            )],
83                            Span::test_data(),
84                        ),
85                        Value::list(
86                            vec![Value::list(
87                                vec![Value::test_int(3), Value::test_int(4)],
88                                Span::test_data(),
89                            )],
90                            Span::test_data(),
91                        ),
92                    ],
93                    Span::test_data(),
94                )),
95            },
96            Example {
97                description: "Split a list of chars into two lists.",
98                example: "[a, b, c, d, a, e, f, g] | split list a",
99                result: Some(Value::list(
100                    vec![
101                        Value::list(vec![], Span::test_data()),
102                        Value::list(
103                            vec![
104                                Value::test_string("b"),
105                                Value::test_string("c"),
106                                Value::test_string("d"),
107                            ],
108                            Span::test_data(),
109                        ),
110                        Value::list(
111                            vec![
112                                Value::test_string("e"),
113                                Value::test_string("f"),
114                                Value::test_string("g"),
115                            ],
116                            Span::test_data(),
117                        ),
118                    ],
119                    Span::test_data(),
120                )),
121            },
122            Example {
123                description: "Split a list of chars into lists based on multiple characters.",
124                example: r"[a, b, c, d, a, e, f, g] | split list --regex '(b|e)'",
125                result: Some(Value::list(
126                    vec![
127                        Value::list(vec![Value::test_string("a")], Span::test_data()),
128                        Value::list(
129                            vec![
130                                Value::test_string("c"),
131                                Value::test_string("d"),
132                                Value::test_string("a"),
133                            ],
134                            Span::test_data(),
135                        ),
136                        Value::list(
137                            vec![Value::test_string("f"), Value::test_string("g")],
138                            Span::test_data(),
139                        ),
140                    ],
141                    Span::test_data(),
142                )),
143            },
144            Example {
145                description: "Split a list of numbers on multiples of 3.",
146                example: r"[1 2 3 4 5 6 7 8 9 10] | split list {|e| $e mod 3 == 0 }",
147                result: Some(Value::test_list(vec![
148                    Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
149                    Value::test_list(vec![Value::test_int(4), Value::test_int(5)]),
150                    Value::test_list(vec![Value::test_int(7), Value::test_int(8)]),
151                    Value::test_list(vec![Value::test_int(10)]),
152                ])),
153            },
154            Example {
155                description: "Split a list of numbers into lists ending with 0.",
156                example: r"[1 2 0 3 4 5 0 6 0 0 7] | split list --split after 0",
157                result: Some(Value::test_list(vec![
158                    Value::test_list(vec![
159                        Value::test_int(1),
160                        Value::test_int(2),
161                        Value::test_int(0),
162                    ]),
163                    Value::test_list(vec![
164                        Value::test_int(3),
165                        Value::test_int(4),
166                        Value::test_int(5),
167                        Value::test_int(0),
168                    ]),
169                    Value::test_list(vec![Value::test_int(6), Value::test_int(0)]),
170                    Value::test_list(vec![Value::test_int(0)]),
171                    Value::test_list(vec![Value::test_int(7)]),
172                ])),
173            },
174        ]
175    }
176
177    fn is_const(&self) -> bool {
178        true
179    }
180
181    fn run(
182        &self,
183        engine_state: &EngineState,
184        stack: &mut Stack,
185        call: &Call,
186        input: PipelineData,
187    ) -> Result<PipelineData, ShellError> {
188        let has_regex = call.has_flag(engine_state, stack, "regex")?;
189        let separator: Value = call.req(engine_state, stack, 0)?;
190        let split: Option<Split> = call.get_flag(engine_state, stack, "split")?;
191        let split = split.unwrap_or(Split::On);
192        let matcher = match separator {
193            Value::Closure { val, .. } => {
194                Matcher::from_closure(ClosureEval::new(engine_state, stack, *val))
195            }
196            _ => Matcher::new(has_regex, separator)?,
197        };
198        split_list(engine_state, call, input, matcher, split)
199    }
200
201    fn run_const(
202        &self,
203        working_set: &StateWorkingSet,
204        call: &Call,
205        input: PipelineData,
206    ) -> Result<PipelineData, ShellError> {
207        let has_regex = call.has_flag_const(working_set, "regex")?;
208        let separator: Value = call.req_const(working_set, 0)?;
209        let split: Option<Split> = call.get_flag_const(working_set, "split")?;
210        let split = split.unwrap_or(Split::On);
211        let matcher = Matcher::new(has_regex, separator)?;
212        split_list(working_set.permanent(), call, input, matcher, split)
213    }
214}
215
216enum Matcher {
217    Regex(Regex),
218    Direct(Value),
219    Closure(Box<ClosureEval>),
220}
221
222enum Split {
223    On,
224    Before,
225    After,
226}
227
228impl FromValue for Split {
229    fn from_value(v: Value) -> Result<Self, ShellError> {
230        let span = v.span();
231        let s = <String>::from_value(v)?;
232        match s.as_str() {
233            "on" => Ok(Split::On),
234            "before" => Ok(Split::Before),
235            "after" => Ok(Split::After),
236            _ => Err(ShellError::InvalidValue {
237                valid: "one of: on, before, after".into(),
238                actual: s,
239                span,
240            }),
241        }
242    }
243}
244
245impl Matcher {
246    pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
247        if regex {
248            Ok(Matcher::Regex(Regex::new(&lhs.coerce_str()?).map_err(
249                |e| ShellError::GenericError {
250                    error: "Error with regular expression".into(),
251                    msg: e.to_string(),
252                    span: match lhs {
253                        Value::Error { .. } => None,
254                        _ => Some(lhs.span()),
255                    },
256                    help: None,
257                    inner: vec![],
258                },
259            )?))
260        } else {
261            Ok(Matcher::Direct(lhs))
262        }
263    }
264
265    pub fn from_closure(closure: ClosureEval) -> Self {
266        Self::Closure(Box::new(closure))
267    }
268
269    pub fn compare(&mut self, rhs: &Value) -> Result<bool, ShellError> {
270        Ok(match self {
271            Matcher::Regex(regex) => {
272                if let Ok(rhs_str) = rhs.coerce_str() {
273                    regex.is_match(&rhs_str).unwrap_or(false)
274                } else {
275                    false
276                }
277            }
278            Matcher::Direct(lhs) => rhs == lhs,
279            Matcher::Closure(closure) => closure
280                .run_with_value(rhs.clone())
281                .and_then(|data| data.into_value(Span::unknown()))
282                .map(|value| value.is_true())
283                .unwrap_or(false),
284        })
285    }
286}
287
288fn split_list(
289    engine_state: &EngineState,
290    call: &Call,
291    input: PipelineData,
292    mut matcher: Matcher,
293    split: Split,
294) -> Result<PipelineData, ShellError> {
295    let head = call.head;
296    Ok(SplitList::new(
297        input.into_iter(),
298        engine_state.signals().clone(),
299        split,
300        move |x| matcher.compare(x).unwrap_or(false),
301    )
302    .map(move |x| Value::list(x, head))
303    .into_pipeline_data(head, engine_state.signals().clone()))
304}
305
306struct SplitList<I, T, F> {
307    iterator: I,
308    closure: F,
309    done: bool,
310    signals: Signals,
311    split: Split,
312    last_item: Option<T>,
313}
314
315impl<I, T, F> SplitList<I, T, F>
316where
317    I: Iterator<Item = T>,
318    F: FnMut(&I::Item) -> bool,
319{
320    fn new(iterator: I, signals: Signals, split: Split, closure: F) -> Self {
321        Self {
322            iterator,
323            closure,
324            done: false,
325            signals,
326            split,
327            last_item: None,
328        }
329    }
330
331    fn inner_iterator_next(&mut self) -> Option<I::Item> {
332        if self.signals.interrupted() {
333            self.done = true;
334            return None;
335        }
336        self.iterator.next()
337    }
338}
339
340impl<I, T, F> Iterator for SplitList<I, T, F>
341where
342    I: Iterator<Item = T>,
343    F: FnMut(&I::Item) -> bool,
344{
345    type Item = Vec<I::Item>;
346
347    fn next(&mut self) -> Option<Self::Item> {
348        if self.done {
349            return None;
350        }
351
352        let mut items = vec![];
353        if let Some(item) = self.last_item.take() {
354            items.push(item);
355        }
356
357        loop {
358            match self.inner_iterator_next() {
359                None => {
360                    self.done = true;
361                    return Some(items);
362                }
363                Some(value) => {
364                    if (self.closure)(&value) {
365                        match self.split {
366                            Split::On => {}
367                            Split::Before => {
368                                self.last_item = Some(value);
369                            }
370                            Split::After => {
371                                items.push(value);
372                            }
373                        }
374                        return Some(items);
375                    } else {
376                        items.push(value);
377                    }
378                }
379            }
380        }
381    }
382}
383
384#[cfg(test)]
385mod test {
386    use super::*;
387
388    #[test]
389    fn test_examples() {
390        use crate::test_examples;
391
392        test_examples(SubCommand {})
393    }
394}