Skip to main content

nu_command/strings/split/
list.rs

1use fancy_regex::Regex;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::shell_error::generic::GenericError;
4use nu_protocol::{FromValue, Signals};
5
6#[derive(Clone)]
7pub struct SubCommand;
8
9impl Command for SubCommand {
10    fn name(&self) -> &str {
11        "split list"
12    }
13
14    fn signature(&self) -> Signature {
15        Signature::build("split list")
16            .input_output_types(vec![(
17                Type::List(Box::new(Type::Any)),
18                Type::List(Box::new(Type::List(Box::new(Type::Any)))),
19            )])
20            .required(
21                "separator",
22                SyntaxShape::Any,
23                "The value that denotes what separates the list.",
24            )
25            .switch(
26                "regex",
27                "Separator is a regular expression, matching values that can be coerced into a string.",
28                Some('r'),
29            )
30            .param(
31                Flag::new("split")
32                    .arg(SyntaxShape::String)
33                    .desc("Whether to split lists before, after, or on (default) the separator.")
34                    .completion(Completion::new_list(&["before", "after", "on"])),
35            )
36            .category(Category::Filters)
37    }
38
39    fn description(&self) -> &str {
40        "Split a list into multiple lists using a separator."
41    }
42
43    fn search_terms(&self) -> Vec<&str> {
44        vec!["separate", "divide", "regex"]
45    }
46
47    fn examples(&self) -> Vec<Example<'_>> {
48        vec![
49            Example {
50                description: "Split a list of chars into two lists.",
51                example: "[a, b, c, d, e, f, g] | split list d",
52                result: Some(Value::list(
53                    vec![
54                        Value::list(
55                            vec![
56                                Value::test_string("a"),
57                                Value::test_string("b"),
58                                Value::test_string("c"),
59                            ],
60                            Span::test_data(),
61                        ),
62                        Value::list(
63                            vec![
64                                Value::test_string("e"),
65                                Value::test_string("f"),
66                                Value::test_string("g"),
67                            ],
68                            Span::test_data(),
69                        ),
70                    ],
71                    Span::test_data(),
72                )),
73            },
74            Example {
75                description: "Split a list of lists into two lists of lists.",
76                example: "[[1,2], [2,3], [3,4]] | split list [2,3]",
77                result: Some(Value::list(
78                    vec![
79                        Value::list(
80                            vec![Value::list(
81                                vec![Value::test_int(1), Value::test_int(2)],
82                                Span::test_data(),
83                            )],
84                            Span::test_data(),
85                        ),
86                        Value::list(
87                            vec![Value::list(
88                                vec![Value::test_int(3), Value::test_int(4)],
89                                Span::test_data(),
90                            )],
91                            Span::test_data(),
92                        ),
93                    ],
94                    Span::test_data(),
95                )),
96            },
97            Example {
98                description: "Split a list of chars into two lists.",
99                example: "[a, b, c, d, a, e, f, g] | split list a",
100                result: Some(Value::list(
101                    vec![
102                        Value::list(vec![], Span::test_data()),
103                        Value::list(
104                            vec![
105                                Value::test_string("b"),
106                                Value::test_string("c"),
107                                Value::test_string("d"),
108                            ],
109                            Span::test_data(),
110                        ),
111                        Value::list(
112                            vec![
113                                Value::test_string("e"),
114                                Value::test_string("f"),
115                                Value::test_string("g"),
116                            ],
117                            Span::test_data(),
118                        ),
119                    ],
120                    Span::test_data(),
121                )),
122            },
123            Example {
124                description: "Split a list of chars into lists based on multiple characters.",
125                example: "[a, b, c, d, a, e, f, g] | split list --regex '(b|e)'",
126                result: Some(Value::list(
127                    vec![
128                        Value::list(vec![Value::test_string("a")], Span::test_data()),
129                        Value::list(
130                            vec![
131                                Value::test_string("c"),
132                                Value::test_string("d"),
133                                Value::test_string("a"),
134                            ],
135                            Span::test_data(),
136                        ),
137                        Value::list(
138                            vec![Value::test_string("f"), Value::test_string("g")],
139                            Span::test_data(),
140                        ),
141                    ],
142                    Span::test_data(),
143                )),
144            },
145            Example {
146                description: "Split a list of numbers on multiples of 3.",
147                example: "[1 2 3 4 5 6 7 8 9 10] | split list {|e| $e mod 3 == 0 }",
148                result: Some(Value::test_list(vec![
149                    Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
150                    Value::test_list(vec![Value::test_int(4), Value::test_int(5)]),
151                    Value::test_list(vec![Value::test_int(7), Value::test_int(8)]),
152                    Value::test_list(vec![Value::test_int(10)]),
153                ])),
154            },
155            Example {
156                description: "Split a list of numbers into lists ending with 0.",
157                example: "[1 2 0 3 4 5 0 6 0 0 7] | split list --split after 0",
158                result: Some(Value::test_list(vec![
159                    Value::test_list(vec![
160                        Value::test_int(1),
161                        Value::test_int(2),
162                        Value::test_int(0),
163                    ]),
164                    Value::test_list(vec![
165                        Value::test_int(3),
166                        Value::test_int(4),
167                        Value::test_int(5),
168                        Value::test_int(0),
169                    ]),
170                    Value::test_list(vec![Value::test_int(6), Value::test_int(0)]),
171                    Value::test_list(vec![Value::test_int(0)]),
172                    Value::test_list(vec![Value::test_int(7)]),
173                ])),
174            },
175        ]
176    }
177
178    fn is_const(&self) -> bool {
179        true
180    }
181
182    fn run(
183        &self,
184        engine_state: &EngineState,
185        stack: &mut Stack,
186        call: &Call,
187        input: PipelineData,
188    ) -> Result<PipelineData, ShellError> {
189        let has_regex = call.has_flag(engine_state, stack, "regex")?;
190        let separator: Value = call.req(engine_state, stack, 0)?;
191        let split: Option<Split> = call.get_flag(engine_state, stack, "split")?;
192        let split = split.unwrap_or(Split::On);
193        let matcher = match separator {
194            Value::Closure { val, .. } => {
195                Matcher::from_closure(ClosureEval::new(engine_state, stack, *val))
196            }
197            _ => Matcher::new(has_regex, separator)?,
198        };
199        split_list(engine_state, call, input, matcher, split)
200    }
201
202    fn run_const(
203        &self,
204        working_set: &StateWorkingSet,
205        call: &Call,
206        input: PipelineData,
207    ) -> Result<PipelineData, ShellError> {
208        let has_regex = call.has_flag_const(working_set, "regex")?;
209        let separator: Value = call.req_const(working_set, 0)?;
210        let split: Option<Split> = call.get_flag_const(working_set, "split")?;
211        let split = split.unwrap_or(Split::On);
212        let matcher = Matcher::new(has_regex, separator)?;
213        split_list(working_set.permanent(), call, input, matcher, split)
214    }
215}
216
217enum Matcher {
218    Regex(Regex),
219    Direct(Value),
220    Closure(Box<ClosureEval>),
221}
222
223enum Split {
224    On,
225    Before,
226    After,
227}
228
229impl FromValue for Split {
230    fn from_value(v: Value) -> Result<Self, ShellError> {
231        let span = v.span();
232        let s = <String>::from_value(v)?;
233        match s.as_str() {
234            "on" => Ok(Split::On),
235            "before" => Ok(Split::Before),
236            "after" => Ok(Split::After),
237            _ => Err(ShellError::InvalidValue {
238                valid: "one of: on, before, after".into(),
239                actual: s,
240                span,
241            }),
242        }
243    }
244}
245
246impl Matcher {
247    pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
248        if regex {
249            Ok(Matcher::Regex(Regex::new(&lhs.coerce_str()?).map_err(
250                |e| {
251                    let span = match lhs {
252                        Value::Error { .. } => Span::unknown(),
253                        _ => lhs.span(),
254                    };
255                    ShellError::Generic(GenericError::new(
256                        "Error with regular expression",
257                        e.to_string(),
258                        span,
259                    ))
260                },
261            )?))
262        } else {
263            Ok(Matcher::Direct(lhs))
264        }
265    }
266
267    pub fn from_closure(closure: ClosureEval) -> Self {
268        Self::Closure(Box::new(closure))
269    }
270
271    pub fn compare(&mut self, rhs: &Value) -> Result<bool, ShellError> {
272        Ok(match self {
273            Matcher::Regex(regex) => {
274                if let Ok(rhs_str) = rhs.coerce_str() {
275                    regex.is_match(&rhs_str).unwrap_or(false)
276                } else {
277                    false
278                }
279            }
280            Matcher::Direct(lhs) => rhs == lhs,
281            Matcher::Closure(closure) => closure
282                .run_with_value(rhs.clone())
283                .and_then(|data| data.into_value(rhs.span()))
284                .map(|value| value.is_true())
285                .unwrap_or(false),
286        })
287    }
288}
289
290fn split_list(
291    engine_state: &EngineState,
292    call: &Call,
293    input: PipelineData,
294    mut matcher: Matcher,
295    split: Split,
296) -> Result<PipelineData, ShellError> {
297    let head = call.head;
298    Ok(SplitList::new(
299        input.into_iter(),
300        engine_state.signals().clone(),
301        split,
302        move |x| matcher.compare(x).unwrap_or(false),
303    )
304    .map(move |x| Value::list(x, head))
305    .into_pipeline_data(head, engine_state.signals().clone()))
306}
307
308struct SplitList<I, T, F> {
309    iterator: I,
310    closure: F,
311    done: bool,
312    signals: Signals,
313    split: Split,
314    last_item: Option<T>,
315}
316
317impl<I, T, F> SplitList<I, T, F>
318where
319    I: Iterator<Item = T>,
320    F: FnMut(&I::Item) -> bool,
321{
322    fn new(iterator: I, signals: Signals, split: Split, closure: F) -> Self {
323        Self {
324            iterator,
325            closure,
326            done: false,
327            signals,
328            split,
329            last_item: None,
330        }
331    }
332
333    fn inner_iterator_next(&mut self) -> Option<I::Item> {
334        if self.signals.interrupted() {
335            self.done = true;
336            return None;
337        }
338        self.iterator.next()
339    }
340}
341
342impl<I, T, F> Iterator for SplitList<I, T, F>
343where
344    I: Iterator<Item = T>,
345    F: FnMut(&I::Item) -> bool,
346{
347    type Item = Vec<I::Item>;
348
349    fn next(&mut self) -> Option<Self::Item> {
350        if self.done {
351            return None;
352        }
353
354        let mut items = vec![];
355        if let Some(item) = self.last_item.take() {
356            items.push(item);
357        }
358
359        loop {
360            match self.inner_iterator_next() {
361                None => {
362                    self.done = true;
363                    return Some(items);
364                }
365                Some(value) => {
366                    if (self.closure)(&value) {
367                        match self.split {
368                            Split::On => {}
369                            Split::Before => {
370                                self.last_item = Some(value);
371                            }
372                            Split::After => {
373                                items.push(value);
374                            }
375                        }
376                        return Some(items);
377                    } else {
378                        items.push(value);
379                    }
380                }
381            }
382        }
383    }
384}
385
386#[cfg(test)]
387mod test {
388    use super::*;
389
390    #[test]
391    fn test_examples() -> nu_test_support::Result {
392        nu_test_support::test().examples(SubCommand)
393    }
394}