nu_command/strings/split/
list.rs

1use fancy_regex::Regex;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, Signals};
4
5#[derive(Clone)]
6pub struct SubCommand;
7
8impl Command for SubCommand {
9    fn name(&self) -> &str {
10        "split list"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("split list")
15            .input_output_types(vec![(
16                Type::List(Box::new(Type::Any)),
17                Type::List(Box::new(Type::List(Box::new(Type::Any)))),
18            )])
19            .required(
20                "separator",
21                SyntaxShape::Any,
22                "The value that denotes what separates the list.",
23            )
24            .switch(
25                "regex",
26                "separator is a regular expression, matching values that can be coerced into a string",
27                Some('r'))
28            .named("split", SyntaxShape::String, "Whether to split lists before, after, or on (default) the separator", None)
29            .category(Category::Filters)
30    }
31
32    fn description(&self) -> &str {
33        "Split a list into multiple lists using a separator."
34    }
35
36    fn search_terms(&self) -> Vec<&str> {
37        vec!["separate", "divide", "regex"]
38    }
39
40    fn examples(&self) -> Vec<Example> {
41        vec![
42            Example {
43                description: "Split a list of chars into two lists",
44                example: "[a, b, c, d, e, f, g] | split list d",
45                result: Some(Value::list(
46                    vec![
47                        Value::list(
48                            vec![
49                                Value::test_string("a"),
50                                Value::test_string("b"),
51                                Value::test_string("c"),
52                            ],
53                            Span::test_data(),
54                        ),
55                        Value::list(
56                            vec![
57                                Value::test_string("e"),
58                                Value::test_string("f"),
59                                Value::test_string("g"),
60                            ],
61                            Span::test_data(),
62                        ),
63                    ],
64                    Span::test_data(),
65                )),
66            },
67            Example {
68                description: "Split a list of lists into two lists of lists",
69                example: "[[1,2], [2,3], [3,4]] | split list [2,3]",
70                result: Some(Value::list(
71                    vec![
72                        Value::list(
73                            vec![Value::list(
74                                vec![Value::test_int(1), Value::test_int(2)],
75                                Span::test_data(),
76                            )],
77                            Span::test_data(),
78                        ),
79                        Value::list(
80                            vec![Value::list(
81                                vec![Value::test_int(3), Value::test_int(4)],
82                                Span::test_data(),
83                            )],
84                            Span::test_data(),
85                        ),
86                    ],
87                    Span::test_data(),
88                )),
89            },
90            Example {
91                description: "Split a list of chars into two lists",
92                example: "[a, b, c, d, a, e, f, g] | split list a",
93                result: Some(Value::list(
94                    vec![
95                        Value::list(vec![], Span::test_data()),
96                        Value::list(
97                            vec![
98                                Value::test_string("b"),
99                                Value::test_string("c"),
100                                Value::test_string("d"),
101                            ],
102                            Span::test_data(),
103                        ),
104                        Value::list(
105                            vec![
106                                Value::test_string("e"),
107                                Value::test_string("f"),
108                                Value::test_string("g"),
109                            ],
110                            Span::test_data(),
111                        ),
112                    ],
113                    Span::test_data(),
114                )),
115            },
116            Example {
117                description: "Split a list of chars into lists based on multiple characters",
118                example: r"[a, b, c, d, a, e, f, g] | split list --regex '(b|e)'",
119                result: Some(Value::list(
120                    vec![
121                        Value::list(vec![Value::test_string("a")], Span::test_data()),
122                        Value::list(
123                            vec![
124                                Value::test_string("c"),
125                                Value::test_string("d"),
126                                Value::test_string("a"),
127                            ],
128                            Span::test_data(),
129                        ),
130                        Value::list(
131                            vec![Value::test_string("f"), Value::test_string("g")],
132                            Span::test_data(),
133                        ),
134                    ],
135                    Span::test_data(),
136                )),
137            },
138            Example {
139                description: "Split a list of numbers on multiples of 3",
140                example: r"[1 2 3 4 5 6 7 8 9 10] | split list {|e| $e mod 3 == 0 }",
141                result: Some(Value::test_list(vec![
142                    Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
143                    Value::test_list(vec![Value::test_int(4), Value::test_int(5)]),
144                    Value::test_list(vec![Value::test_int(7), Value::test_int(8)]),
145                    Value::test_list(vec![Value::test_int(10)]),
146                ])),
147            },
148            Example {
149                description: "Split a list of numbers into lists ending with 0",
150                example: r"[1 2 0 3 4 5 0 6 0 0 7] | split list --split after 0",
151                result: Some(Value::test_list(vec![
152                    Value::test_list(vec![
153                        Value::test_int(1),
154                        Value::test_int(2),
155                        Value::test_int(0),
156                    ]),
157                    Value::test_list(vec![
158                        Value::test_int(3),
159                        Value::test_int(4),
160                        Value::test_int(5),
161                        Value::test_int(0),
162                    ]),
163                    Value::test_list(vec![Value::test_int(6), Value::test_int(0)]),
164                    Value::test_list(vec![Value::test_int(0)]),
165                    Value::test_list(vec![Value::test_int(7)]),
166                ])),
167            },
168        ]
169    }
170
171    fn is_const(&self) -> bool {
172        true
173    }
174
175    fn run(
176        &self,
177        engine_state: &EngineState,
178        stack: &mut Stack,
179        call: &Call,
180        input: PipelineData,
181    ) -> Result<PipelineData, ShellError> {
182        let has_regex = call.has_flag(engine_state, stack, "regex")?;
183        let separator: Value = call.req(engine_state, stack, 0)?;
184        let split: Option<Split> = call.get_flag(engine_state, stack, "split")?;
185        let split = split.unwrap_or(Split::On);
186        let matcher = match separator {
187            Value::Closure { val, .. } => {
188                Matcher::from_closure(ClosureEval::new(engine_state, stack, *val))
189            }
190            _ => Matcher::new(has_regex, separator)?,
191        };
192        split_list(engine_state, call, input, matcher, split)
193    }
194
195    fn run_const(
196        &self,
197        working_set: &StateWorkingSet,
198        call: &Call,
199        input: PipelineData,
200    ) -> Result<PipelineData, ShellError> {
201        let has_regex = call.has_flag_const(working_set, "regex")?;
202        let separator: Value = call.req_const(working_set, 0)?;
203        let split: Option<Split> = call.get_flag_const(working_set, "split")?;
204        let split = split.unwrap_or(Split::On);
205        let matcher = Matcher::new(has_regex, separator)?;
206        split_list(working_set.permanent(), call, input, matcher, split)
207    }
208}
209
210enum Matcher {
211    Regex(Regex),
212    Direct(Value),
213    Closure(Box<ClosureEval>),
214}
215
216enum Split {
217    On,
218    Before,
219    After,
220}
221
222impl FromValue for Split {
223    fn from_value(v: Value) -> Result<Self, ShellError> {
224        let span = v.span();
225        let s = <String>::from_value(v)?;
226        match s.as_str() {
227            "on" => Ok(Split::On),
228            "before" => Ok(Split::Before),
229            "after" => Ok(Split::After),
230            _ => Err(ShellError::InvalidValue {
231                valid: "one of: on, before, after".into(),
232                actual: s,
233                span,
234            }),
235        }
236    }
237}
238
239impl Matcher {
240    pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
241        if regex {
242            Ok(Matcher::Regex(Regex::new(&lhs.coerce_str()?).map_err(
243                |e| ShellError::GenericError {
244                    error: "Error with regular expression".into(),
245                    msg: e.to_string(),
246                    span: match lhs {
247                        Value::Error { .. } => None,
248                        _ => Some(lhs.span()),
249                    },
250                    help: None,
251                    inner: vec![],
252                },
253            )?))
254        } else {
255            Ok(Matcher::Direct(lhs))
256        }
257    }
258
259    pub fn from_closure(closure: ClosureEval) -> Self {
260        Self::Closure(Box::new(closure))
261    }
262
263    pub fn compare(&mut self, rhs: &Value) -> Result<bool, ShellError> {
264        Ok(match self {
265            Matcher::Regex(regex) => {
266                if let Ok(rhs_str) = rhs.coerce_str() {
267                    regex.is_match(&rhs_str).unwrap_or(false)
268                } else {
269                    false
270                }
271            }
272            Matcher::Direct(lhs) => rhs == lhs,
273            Matcher::Closure(closure) => closure
274                .run_with_value(rhs.clone())
275                .and_then(|data| data.into_value(Span::unknown()))
276                .map(|value| value.is_true())
277                .unwrap_or(false),
278        })
279    }
280}
281
282fn split_list(
283    engine_state: &EngineState,
284    call: &Call,
285    input: PipelineData,
286    mut matcher: Matcher,
287    split: Split,
288) -> Result<PipelineData, ShellError> {
289    let head = call.head;
290    Ok(SplitList::new(
291        input.into_iter(),
292        engine_state.signals().clone(),
293        split,
294        move |x| matcher.compare(x).unwrap_or(false),
295    )
296    .map(move |x| Value::list(x, head))
297    .into_pipeline_data(head, engine_state.signals().clone()))
298}
299
300struct SplitList<I, T, F> {
301    iterator: I,
302    closure: F,
303    done: bool,
304    signals: Signals,
305    split: Split,
306    last_item: Option<T>,
307}
308
309impl<I, T, F> SplitList<I, T, F>
310where
311    I: Iterator<Item = T>,
312    F: FnMut(&I::Item) -> bool,
313{
314    fn new(iterator: I, signals: Signals, split: Split, closure: F) -> Self {
315        Self {
316            iterator,
317            closure,
318            done: false,
319            signals,
320            split,
321            last_item: None,
322        }
323    }
324
325    fn inner_iterator_next(&mut self) -> Option<I::Item> {
326        if self.signals.interrupted() {
327            self.done = true;
328            return None;
329        }
330        self.iterator.next()
331    }
332}
333
334impl<I, T, F> Iterator for SplitList<I, T, F>
335where
336    I: Iterator<Item = T>,
337    F: FnMut(&I::Item) -> bool,
338{
339    type Item = Vec<I::Item>;
340
341    fn next(&mut self) -> Option<Self::Item> {
342        if self.done {
343            return None;
344        }
345
346        let mut items = vec![];
347        if let Some(item) = self.last_item.take() {
348            items.push(item);
349        }
350
351        loop {
352            match self.inner_iterator_next() {
353                None => {
354                    self.done = true;
355                    return Some(items);
356                }
357                Some(value) => {
358                    if (self.closure)(&value) {
359                        match self.split {
360                            Split::On => {}
361                            Split::Before => {
362                                self.last_item = Some(value);
363                            }
364                            Split::After => {
365                                items.push(value);
366                            }
367                        }
368                        return Some(items);
369                    } else {
370                        items.push(value);
371                    }
372                }
373            }
374        }
375    }
376}
377
378#[cfg(test)]
379mod test {
380    use super::*;
381
382    #[test]
383    fn test_examples() {
384        use crate::test_examples;
385
386        test_examples(SubCommand {})
387    }
388}