1use fancy_regex::Regex;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::shell_error::generic::GenericError;
4use nu_protocol::{FromValue, Signals};
5
6#[derive(Clone)]
7pub struct SubCommand;
8
9impl Command for SubCommand {
10 fn name(&self) -> &str {
11 "split list"
12 }
13
14 fn signature(&self) -> Signature {
15 Signature::build("split list")
16 .input_output_types(vec![(
17 Type::List(Box::new(Type::Any)),
18 Type::List(Box::new(Type::List(Box::new(Type::Any)))),
19 )])
20 .required(
21 "separator",
22 SyntaxShape::Any,
23 "The value that denotes what separates the list.",
24 )
25 .switch(
26 "regex",
27 "Separator is a regular expression, matching values that can be coerced into a string.",
28 Some('r'),
29 )
30 .param(
31 Flag::new("split")
32 .arg(SyntaxShape::String)
33 .desc("Whether to split lists before, after, or on (default) the separator.")
34 .completion(Completion::new_list(&["before", "after", "on"])),
35 )
36 .category(Category::Filters)
37 }
38
39 fn description(&self) -> &str {
40 "Split a list into multiple lists using a separator."
41 }
42
43 fn search_terms(&self) -> Vec<&str> {
44 vec!["separate", "divide", "regex"]
45 }
46
47 fn examples(&self) -> Vec<Example<'_>> {
48 vec![
49 Example {
50 description: "Split a list of chars into two lists.",
51 example: "[a, b, c, d, e, f, g] | split list d",
52 result: Some(Value::list(
53 vec![
54 Value::list(
55 vec![
56 Value::test_string("a"),
57 Value::test_string("b"),
58 Value::test_string("c"),
59 ],
60 Span::test_data(),
61 ),
62 Value::list(
63 vec![
64 Value::test_string("e"),
65 Value::test_string("f"),
66 Value::test_string("g"),
67 ],
68 Span::test_data(),
69 ),
70 ],
71 Span::test_data(),
72 )),
73 },
74 Example {
75 description: "Split a list of lists into two lists of lists.",
76 example: "[[1,2], [2,3], [3,4]] | split list [2,3]",
77 result: Some(Value::list(
78 vec![
79 Value::list(
80 vec![Value::list(
81 vec![Value::test_int(1), Value::test_int(2)],
82 Span::test_data(),
83 )],
84 Span::test_data(),
85 ),
86 Value::list(
87 vec![Value::list(
88 vec![Value::test_int(3), Value::test_int(4)],
89 Span::test_data(),
90 )],
91 Span::test_data(),
92 ),
93 ],
94 Span::test_data(),
95 )),
96 },
97 Example {
98 description: "Split a list of chars into two lists.",
99 example: "[a, b, c, d, a, e, f, g] | split list a",
100 result: Some(Value::list(
101 vec![
102 Value::list(vec![], Span::test_data()),
103 Value::list(
104 vec![
105 Value::test_string("b"),
106 Value::test_string("c"),
107 Value::test_string("d"),
108 ],
109 Span::test_data(),
110 ),
111 Value::list(
112 vec![
113 Value::test_string("e"),
114 Value::test_string("f"),
115 Value::test_string("g"),
116 ],
117 Span::test_data(),
118 ),
119 ],
120 Span::test_data(),
121 )),
122 },
123 Example {
124 description: "Split a list of chars into lists based on multiple characters.",
125 example: "[a, b, c, d, a, e, f, g] | split list --regex '(b|e)'",
126 result: Some(Value::list(
127 vec![
128 Value::list(vec![Value::test_string("a")], Span::test_data()),
129 Value::list(
130 vec![
131 Value::test_string("c"),
132 Value::test_string("d"),
133 Value::test_string("a"),
134 ],
135 Span::test_data(),
136 ),
137 Value::list(
138 vec![Value::test_string("f"), Value::test_string("g")],
139 Span::test_data(),
140 ),
141 ],
142 Span::test_data(),
143 )),
144 },
145 Example {
146 description: "Split a list of numbers on multiples of 3.",
147 example: "[1 2 3 4 5 6 7 8 9 10] | split list {|e| $e mod 3 == 0 }",
148 result: Some(Value::test_list(vec![
149 Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
150 Value::test_list(vec![Value::test_int(4), Value::test_int(5)]),
151 Value::test_list(vec![Value::test_int(7), Value::test_int(8)]),
152 Value::test_list(vec![Value::test_int(10)]),
153 ])),
154 },
155 Example {
156 description: "Split a list of numbers into lists ending with 0.",
157 example: "[1 2 0 3 4 5 0 6 0 0 7] | split list --split after 0",
158 result: Some(Value::test_list(vec![
159 Value::test_list(vec![
160 Value::test_int(1),
161 Value::test_int(2),
162 Value::test_int(0),
163 ]),
164 Value::test_list(vec![
165 Value::test_int(3),
166 Value::test_int(4),
167 Value::test_int(5),
168 Value::test_int(0),
169 ]),
170 Value::test_list(vec![Value::test_int(6), Value::test_int(0)]),
171 Value::test_list(vec![Value::test_int(0)]),
172 Value::test_list(vec![Value::test_int(7)]),
173 ])),
174 },
175 ]
176 }
177
178 fn is_const(&self) -> bool {
179 true
180 }
181
182 fn run(
183 &self,
184 engine_state: &EngineState,
185 stack: &mut Stack,
186 call: &Call,
187 input: PipelineData,
188 ) -> Result<PipelineData, ShellError> {
189 let has_regex = call.has_flag(engine_state, stack, "regex")?;
190 let separator: Value = call.req(engine_state, stack, 0)?;
191 let split: Option<Split> = call.get_flag(engine_state, stack, "split")?;
192 let split = split.unwrap_or(Split::On);
193 let matcher = match separator {
194 Value::Closure { val, .. } => {
195 Matcher::from_closure(ClosureEval::new(engine_state, stack, *val))
196 }
197 _ => Matcher::new(has_regex, separator)?,
198 };
199 split_list(engine_state, call, input, matcher, split)
200 }
201
202 fn run_const(
203 &self,
204 working_set: &StateWorkingSet,
205 call: &Call,
206 input: PipelineData,
207 ) -> Result<PipelineData, ShellError> {
208 let has_regex = call.has_flag_const(working_set, "regex")?;
209 let separator: Value = call.req_const(working_set, 0)?;
210 let split: Option<Split> = call.get_flag_const(working_set, "split")?;
211 let split = split.unwrap_or(Split::On);
212 let matcher = Matcher::new(has_regex, separator)?;
213 split_list(working_set.permanent(), call, input, matcher, split)
214 }
215}
216
217enum Matcher {
218 Regex(Regex),
219 Direct(Value),
220 Closure(Box<ClosureEval>),
221}
222
223enum Split {
224 On,
225 Before,
226 After,
227}
228
229impl FromValue for Split {
230 fn from_value(v: Value) -> Result<Self, ShellError> {
231 let span = v.span();
232 let s = <String>::from_value(v)?;
233 match s.as_str() {
234 "on" => Ok(Split::On),
235 "before" => Ok(Split::Before),
236 "after" => Ok(Split::After),
237 _ => Err(ShellError::InvalidValue {
238 valid: "one of: on, before, after".into(),
239 actual: s,
240 span,
241 }),
242 }
243 }
244}
245
246impl Matcher {
247 pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
248 if regex {
249 Ok(Matcher::Regex(Regex::new(&lhs.coerce_str()?).map_err(
250 |e| {
251 let span = match lhs {
252 Value::Error { .. } => Span::unknown(),
253 _ => lhs.span(),
254 };
255 ShellError::Generic(GenericError::new(
256 "Error with regular expression",
257 e.to_string(),
258 span,
259 ))
260 },
261 )?))
262 } else {
263 Ok(Matcher::Direct(lhs))
264 }
265 }
266
267 pub fn from_closure(closure: ClosureEval) -> Self {
268 Self::Closure(Box::new(closure))
269 }
270
271 pub fn compare(&mut self, rhs: &Value) -> Result<bool, ShellError> {
272 Ok(match self {
273 Matcher::Regex(regex) => {
274 if let Ok(rhs_str) = rhs.coerce_str() {
275 regex.is_match(&rhs_str).unwrap_or(false)
276 } else {
277 false
278 }
279 }
280 Matcher::Direct(lhs) => rhs == lhs,
281 Matcher::Closure(closure) => closure
282 .run_with_value(rhs.clone())
283 .and_then(|data| data.into_value(rhs.span()))
284 .map(|value| value.is_true())
285 .unwrap_or(false),
286 })
287 }
288}
289
290fn split_list(
291 engine_state: &EngineState,
292 call: &Call,
293 input: PipelineData,
294 mut matcher: Matcher,
295 split: Split,
296) -> Result<PipelineData, ShellError> {
297 let head = call.head;
298 Ok(SplitList::new(
299 input.into_iter(),
300 engine_state.signals().clone(),
301 split,
302 move |x| matcher.compare(x).unwrap_or(false),
303 )
304 .map(move |x| Value::list(x, head))
305 .into_pipeline_data(head, engine_state.signals().clone()))
306}
307
308struct SplitList<I, T, F> {
309 iterator: I,
310 closure: F,
311 done: bool,
312 signals: Signals,
313 split: Split,
314 last_item: Option<T>,
315}
316
317impl<I, T, F> SplitList<I, T, F>
318where
319 I: Iterator<Item = T>,
320 F: FnMut(&I::Item) -> bool,
321{
322 fn new(iterator: I, signals: Signals, split: Split, closure: F) -> Self {
323 Self {
324 iterator,
325 closure,
326 done: false,
327 signals,
328 split,
329 last_item: None,
330 }
331 }
332
333 fn inner_iterator_next(&mut self) -> Option<I::Item> {
334 if self.signals.interrupted() {
335 self.done = true;
336 return None;
337 }
338 self.iterator.next()
339 }
340}
341
342impl<I, T, F> Iterator for SplitList<I, T, F>
343where
344 I: Iterator<Item = T>,
345 F: FnMut(&I::Item) -> bool,
346{
347 type Item = Vec<I::Item>;
348
349 fn next(&mut self) -> Option<Self::Item> {
350 if self.done {
351 return None;
352 }
353
354 let mut items = vec![];
355 if let Some(item) = self.last_item.take() {
356 items.push(item);
357 }
358
359 loop {
360 match self.inner_iterator_next() {
361 None => {
362 self.done = true;
363 return Some(items);
364 }
365 Some(value) => {
366 if (self.closure)(&value) {
367 match self.split {
368 Split::On => {}
369 Split::Before => {
370 self.last_item = Some(value);
371 }
372 Split::After => {
373 items.push(value);
374 }
375 }
376 return Some(items);
377 } else {
378 items.push(value);
379 }
380 }
381 }
382 }
383 }
384}
385
386#[cfg(test)]
387mod test {
388 use super::*;
389
390 #[test]
391 fn test_examples() -> nu_test_support::Result {
392 nu_test_support::test().examples(SubCommand)
393 }
394}