nu_command/strings/split/
list.rs

1use fancy_regex::Regex;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, Signals};
4
5#[derive(Clone)]
6pub struct SubCommand;
7
8impl Command for SubCommand {
9    fn name(&self) -> &str {
10        "split list"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("split list")
15            .input_output_types(vec![(
16                Type::List(Box::new(Type::Any)),
17                Type::List(Box::new(Type::List(Box::new(Type::Any)))),
18            )])
19            .required(
20                "separator",
21                SyntaxShape::Any,
22                "The value that denotes what separates the list.",
23            )
24            .switch(
25                "regex",
26                "separator is a regular expression, matching values that can be coerced into a \
27                 string",
28                Some('r'),
29            )
30            .param(
31                Flag::new("split")
32                    .arg(SyntaxShape::String)
33                    .desc("Whether to split lists before, after, or on (default) the separator")
34                    .completion(Completion::new_list(&["before", "after", "on"])),
35            )
36            .category(Category::Filters)
37    }
38
39    fn description(&self) -> &str {
40        "Split a list into multiple lists using a separator."
41    }
42
43    fn search_terms(&self) -> Vec<&str> {
44        vec!["separate", "divide", "regex"]
45    }
46
47    fn examples(&self) -> Vec<Example<'_>> {
48        vec![
49            Example {
50                description: "Split a list of chars into two lists",
51                example: "[a, b, c, d, e, f, g] | split list d",
52                result: Some(Value::list(
53                    vec![
54                        Value::list(
55                            vec![
56                                Value::test_string("a"),
57                                Value::test_string("b"),
58                                Value::test_string("c"),
59                            ],
60                            Span::test_data(),
61                        ),
62                        Value::list(
63                            vec![
64                                Value::test_string("e"),
65                                Value::test_string("f"),
66                                Value::test_string("g"),
67                            ],
68                            Span::test_data(),
69                        ),
70                    ],
71                    Span::test_data(),
72                )),
73            },
74            Example {
75                description: "Split a list of lists into two lists of lists",
76                example: "[[1,2], [2,3], [3,4]] | split list [2,3]",
77                result: Some(Value::list(
78                    vec![
79                        Value::list(
80                            vec![Value::list(
81                                vec![Value::test_int(1), Value::test_int(2)],
82                                Span::test_data(),
83                            )],
84                            Span::test_data(),
85                        ),
86                        Value::list(
87                            vec![Value::list(
88                                vec![Value::test_int(3), Value::test_int(4)],
89                                Span::test_data(),
90                            )],
91                            Span::test_data(),
92                        ),
93                    ],
94                    Span::test_data(),
95                )),
96            },
97            Example {
98                description: "Split a list of chars into two lists",
99                example: "[a, b, c, d, a, e, f, g] | split list a",
100                result: Some(Value::list(
101                    vec![
102                        Value::list(vec![], Span::test_data()),
103                        Value::list(
104                            vec![
105                                Value::test_string("b"),
106                                Value::test_string("c"),
107                                Value::test_string("d"),
108                            ],
109                            Span::test_data(),
110                        ),
111                        Value::list(
112                            vec![
113                                Value::test_string("e"),
114                                Value::test_string("f"),
115                                Value::test_string("g"),
116                            ],
117                            Span::test_data(),
118                        ),
119                    ],
120                    Span::test_data(),
121                )),
122            },
123            Example {
124                description: "Split a list of chars into lists based on multiple characters",
125                example: r"[a, b, c, d, a, e, f, g] | split list --regex '(b|e)'",
126                result: Some(Value::list(
127                    vec![
128                        Value::list(vec![Value::test_string("a")], Span::test_data()),
129                        Value::list(
130                            vec![
131                                Value::test_string("c"),
132                                Value::test_string("d"),
133                                Value::test_string("a"),
134                            ],
135                            Span::test_data(),
136                        ),
137                        Value::list(
138                            vec![Value::test_string("f"), Value::test_string("g")],
139                            Span::test_data(),
140                        ),
141                    ],
142                    Span::test_data(),
143                )),
144            },
145            Example {
146                description: "Split a list of numbers on multiples of 3",
147                example: r"[1 2 3 4 5 6 7 8 9 10] | split list {|e| $e mod 3 == 0 }",
148                result: Some(Value::test_list(vec![
149                    Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
150                    Value::test_list(vec![Value::test_int(4), Value::test_int(5)]),
151                    Value::test_list(vec![Value::test_int(7), Value::test_int(8)]),
152                    Value::test_list(vec![Value::test_int(10)]),
153                ])),
154            },
155            Example {
156                description: "Split a list of numbers into lists ending with 0",
157                example: r"[1 2 0 3 4 5 0 6 0 0 7] | split list --split after 0",
158                result: Some(Value::test_list(vec![
159                    Value::test_list(vec![
160                        Value::test_int(1),
161                        Value::test_int(2),
162                        Value::test_int(0),
163                    ]),
164                    Value::test_list(vec![
165                        Value::test_int(3),
166                        Value::test_int(4),
167                        Value::test_int(5),
168                        Value::test_int(0),
169                    ]),
170                    Value::test_list(vec![Value::test_int(6), Value::test_int(0)]),
171                    Value::test_list(vec![Value::test_int(0)]),
172                    Value::test_list(vec![Value::test_int(7)]),
173                ])),
174            },
175        ]
176    }
177
178    fn is_const(&self) -> bool {
179        true
180    }
181
182    fn run(
183        &self,
184        engine_state: &EngineState,
185        stack: &mut Stack,
186        call: &Call,
187        input: PipelineData,
188    ) -> Result<PipelineData, ShellError> {
189        let has_regex = call.has_flag(engine_state, stack, "regex")?;
190        let separator: Value = call.req(engine_state, stack, 0)?;
191        let split: Option<Split> = call.get_flag(engine_state, stack, "split")?;
192        let split = split.unwrap_or(Split::On);
193        let matcher = match separator {
194            Value::Closure { val, .. } => {
195                Matcher::from_closure(ClosureEval::new(engine_state, stack, *val))
196            }
197            _ => Matcher::new(has_regex, separator)?,
198        };
199        split_list(engine_state, call, input, matcher, split)
200    }
201
202    fn run_const(
203        &self,
204        working_set: &StateWorkingSet,
205        call: &Call,
206        input: PipelineData,
207    ) -> Result<PipelineData, ShellError> {
208        let has_regex = call.has_flag_const(working_set, "regex")?;
209        let separator: Value = call.req_const(working_set, 0)?;
210        let split: Option<Split> = call.get_flag_const(working_set, "split")?;
211        let split = split.unwrap_or(Split::On);
212        let matcher = Matcher::new(has_regex, separator)?;
213        split_list(working_set.permanent(), call, input, matcher, split)
214    }
215}
216
217enum Matcher {
218    Regex(Regex),
219    Direct(Value),
220    Closure(Box<ClosureEval>),
221}
222
223enum Split {
224    On,
225    Before,
226    After,
227}
228
229impl FromValue for Split {
230    fn from_value(v: Value) -> Result<Self, ShellError> {
231        let span = v.span();
232        let s = <String>::from_value(v)?;
233        match s.as_str() {
234            "on" => Ok(Split::On),
235            "before" => Ok(Split::Before),
236            "after" => Ok(Split::After),
237            _ => Err(ShellError::InvalidValue {
238                valid: "one of: on, before, after".into(),
239                actual: s,
240                span,
241            }),
242        }
243    }
244}
245
246impl Matcher {
247    pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
248        if regex {
249            Ok(Matcher::Regex(Regex::new(&lhs.coerce_str()?).map_err(
250                |e| ShellError::GenericError {
251                    error: "Error with regular expression".into(),
252                    msg: e.to_string(),
253                    span: match lhs {
254                        Value::Error { .. } => None,
255                        _ => Some(lhs.span()),
256                    },
257                    help: None,
258                    inner: vec![],
259                },
260            )?))
261        } else {
262            Ok(Matcher::Direct(lhs))
263        }
264    }
265
266    pub fn from_closure(closure: ClosureEval) -> Self {
267        Self::Closure(Box::new(closure))
268    }
269
270    pub fn compare(&mut self, rhs: &Value) -> Result<bool, ShellError> {
271        Ok(match self {
272            Matcher::Regex(regex) => {
273                if let Ok(rhs_str) = rhs.coerce_str() {
274                    regex.is_match(&rhs_str).unwrap_or(false)
275                } else {
276                    false
277                }
278            }
279            Matcher::Direct(lhs) => rhs == lhs,
280            Matcher::Closure(closure) => closure
281                .run_with_value(rhs.clone())
282                .and_then(|data| data.into_value(Span::unknown()))
283                .map(|value| value.is_true())
284                .unwrap_or(false),
285        })
286    }
287}
288
289fn split_list(
290    engine_state: &EngineState,
291    call: &Call,
292    input: PipelineData,
293    mut matcher: Matcher,
294    split: Split,
295) -> Result<PipelineData, ShellError> {
296    let head = call.head;
297    Ok(SplitList::new(
298        input.into_iter(),
299        engine_state.signals().clone(),
300        split,
301        move |x| matcher.compare(x).unwrap_or(false),
302    )
303    .map(move |x| Value::list(x, head))
304    .into_pipeline_data(head, engine_state.signals().clone()))
305}
306
307struct SplitList<I, T, F> {
308    iterator: I,
309    closure: F,
310    done: bool,
311    signals: Signals,
312    split: Split,
313    last_item: Option<T>,
314}
315
316impl<I, T, F> SplitList<I, T, F>
317where
318    I: Iterator<Item = T>,
319    F: FnMut(&I::Item) -> bool,
320{
321    fn new(iterator: I, signals: Signals, split: Split, closure: F) -> Self {
322        Self {
323            iterator,
324            closure,
325            done: false,
326            signals,
327            split,
328            last_item: None,
329        }
330    }
331
332    fn inner_iterator_next(&mut self) -> Option<I::Item> {
333        if self.signals.interrupted() {
334            self.done = true;
335            return None;
336        }
337        self.iterator.next()
338    }
339}
340
341impl<I, T, F> Iterator for SplitList<I, T, F>
342where
343    I: Iterator<Item = T>,
344    F: FnMut(&I::Item) -> bool,
345{
346    type Item = Vec<I::Item>;
347
348    fn next(&mut self) -> Option<Self::Item> {
349        if self.done {
350            return None;
351        }
352
353        let mut items = vec![];
354        if let Some(item) = self.last_item.take() {
355            items.push(item);
356        }
357
358        loop {
359            match self.inner_iterator_next() {
360                None => {
361                    self.done = true;
362                    return Some(items);
363                }
364                Some(value) => {
365                    if (self.closure)(&value) {
366                        match self.split {
367                            Split::On => {}
368                            Split::Before => {
369                                self.last_item = Some(value);
370                            }
371                            Split::After => {
372                                items.push(value);
373                            }
374                        }
375                        return Some(items);
376                    } else {
377                        items.push(value);
378                    }
379                }
380            }
381        }
382    }
383}
384
385#[cfg(test)]
386mod test {
387    use super::*;
388
389    #[test]
390    fn test_examples() {
391        use crate::test_examples;
392
393        test_examples(SubCommand {})
394    }
395}