1use fancy_regex::Regex;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, Signals};
4
5#[derive(Clone)]
6pub struct SubCommand;
7
8impl Command for SubCommand {
9 fn name(&self) -> &str {
10 "split list"
11 }
12
13 fn signature(&self) -> Signature {
14 Signature::build("split list")
15 .input_output_types(vec![(
16 Type::List(Box::new(Type::Any)),
17 Type::List(Box::new(Type::List(Box::new(Type::Any)))),
18 )])
19 .required(
20 "separator",
21 SyntaxShape::Any,
22 "The value that denotes what separates the list.",
23 )
24 .switch(
25 "regex",
26 "separator is a regular expression, matching values that can be coerced into a string",
27 Some('r'))
28 .named("split", SyntaxShape::String, "Whether to split lists before, after, or on (default) the separator", None)
29 .category(Category::Filters)
30 }
31
32 fn description(&self) -> &str {
33 "Split a list into multiple lists using a separator."
34 }
35
36 fn search_terms(&self) -> Vec<&str> {
37 vec!["separate", "divide", "regex"]
38 }
39
40 fn examples(&self) -> Vec<Example> {
41 vec![
42 Example {
43 description: "Split a list of chars into two lists",
44 example: "[a, b, c, d, e, f, g] | split list d",
45 result: Some(Value::list(
46 vec![
47 Value::list(
48 vec![
49 Value::test_string("a"),
50 Value::test_string("b"),
51 Value::test_string("c"),
52 ],
53 Span::test_data(),
54 ),
55 Value::list(
56 vec![
57 Value::test_string("e"),
58 Value::test_string("f"),
59 Value::test_string("g"),
60 ],
61 Span::test_data(),
62 ),
63 ],
64 Span::test_data(),
65 )),
66 },
67 Example {
68 description: "Split a list of lists into two lists of lists",
69 example: "[[1,2], [2,3], [3,4]] | split list [2,3]",
70 result: Some(Value::list(
71 vec![
72 Value::list(
73 vec![Value::list(
74 vec![Value::test_int(1), Value::test_int(2)],
75 Span::test_data(),
76 )],
77 Span::test_data(),
78 ),
79 Value::list(
80 vec![Value::list(
81 vec![Value::test_int(3), Value::test_int(4)],
82 Span::test_data(),
83 )],
84 Span::test_data(),
85 ),
86 ],
87 Span::test_data(),
88 )),
89 },
90 Example {
91 description: "Split a list of chars into two lists",
92 example: "[a, b, c, d, a, e, f, g] | split list a",
93 result: Some(Value::list(
94 vec![
95 Value::list(vec![], Span::test_data()),
96 Value::list(
97 vec![
98 Value::test_string("b"),
99 Value::test_string("c"),
100 Value::test_string("d"),
101 ],
102 Span::test_data(),
103 ),
104 Value::list(
105 vec![
106 Value::test_string("e"),
107 Value::test_string("f"),
108 Value::test_string("g"),
109 ],
110 Span::test_data(),
111 ),
112 ],
113 Span::test_data(),
114 )),
115 },
116 Example {
117 description: "Split a list of chars into lists based on multiple characters",
118 example: r"[a, b, c, d, a, e, f, g] | split list --regex '(b|e)'",
119 result: Some(Value::list(
120 vec![
121 Value::list(vec![Value::test_string("a")], Span::test_data()),
122 Value::list(
123 vec![
124 Value::test_string("c"),
125 Value::test_string("d"),
126 Value::test_string("a"),
127 ],
128 Span::test_data(),
129 ),
130 Value::list(
131 vec![Value::test_string("f"), Value::test_string("g")],
132 Span::test_data(),
133 ),
134 ],
135 Span::test_data(),
136 )),
137 },
138 Example {
139 description: "Split a list of numbers on multiples of 3",
140 example: r"[1 2 3 4 5 6 7 8 9 10] | split list {|e| $e mod 3 == 0 }",
141 result: Some(Value::test_list(vec![
142 Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
143 Value::test_list(vec![Value::test_int(4), Value::test_int(5)]),
144 Value::test_list(vec![Value::test_int(7), Value::test_int(8)]),
145 Value::test_list(vec![Value::test_int(10)]),
146 ])),
147 },
148 Example {
149 description: "Split a list of numbers into lists ending with 0",
150 example: r"[1 2 0 3 4 5 0 6 0 0 7] | split list --split after 0",
151 result: Some(Value::test_list(vec![
152 Value::test_list(vec![
153 Value::test_int(1),
154 Value::test_int(2),
155 Value::test_int(0),
156 ]),
157 Value::test_list(vec![
158 Value::test_int(3),
159 Value::test_int(4),
160 Value::test_int(5),
161 Value::test_int(0),
162 ]),
163 Value::test_list(vec![Value::test_int(6), Value::test_int(0)]),
164 Value::test_list(vec![Value::test_int(0)]),
165 Value::test_list(vec![Value::test_int(7)]),
166 ])),
167 },
168 ]
169 }
170
171 fn is_const(&self) -> bool {
172 true
173 }
174
175 fn run(
176 &self,
177 engine_state: &EngineState,
178 stack: &mut Stack,
179 call: &Call,
180 input: PipelineData,
181 ) -> Result<PipelineData, ShellError> {
182 let has_regex = call.has_flag(engine_state, stack, "regex")?;
183 let separator: Value = call.req(engine_state, stack, 0)?;
184 let split: Option<Split> = call.get_flag(engine_state, stack, "split")?;
185 let split = split.unwrap_or(Split::On);
186 let matcher = match separator {
187 Value::Closure { val, .. } => {
188 Matcher::from_closure(ClosureEval::new(engine_state, stack, *val))
189 }
190 _ => Matcher::new(has_regex, separator)?,
191 };
192 split_list(engine_state, call, input, matcher, split)
193 }
194
195 fn run_const(
196 &self,
197 working_set: &StateWorkingSet,
198 call: &Call,
199 input: PipelineData,
200 ) -> Result<PipelineData, ShellError> {
201 let has_regex = call.has_flag_const(working_set, "regex")?;
202 let separator: Value = call.req_const(working_set, 0)?;
203 let split: Option<Split> = call.get_flag_const(working_set, "split")?;
204 let split = split.unwrap_or(Split::On);
205 let matcher = Matcher::new(has_regex, separator)?;
206 split_list(working_set.permanent(), call, input, matcher, split)
207 }
208}
209
210enum Matcher {
211 Regex(Regex),
212 Direct(Value),
213 Closure(Box<ClosureEval>),
214}
215
216enum Split {
217 On,
218 Before,
219 After,
220}
221
222impl FromValue for Split {
223 fn from_value(v: Value) -> Result<Self, ShellError> {
224 let span = v.span();
225 let s = <String>::from_value(v)?;
226 match s.as_str() {
227 "on" => Ok(Split::On),
228 "before" => Ok(Split::Before),
229 "after" => Ok(Split::After),
230 _ => Err(ShellError::InvalidValue {
231 valid: "one of: on, before, after".into(),
232 actual: s,
233 span,
234 }),
235 }
236 }
237}
238
239impl Matcher {
240 pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
241 if regex {
242 Ok(Matcher::Regex(Regex::new(&lhs.coerce_str()?).map_err(
243 |e| ShellError::GenericError {
244 error: "Error with regular expression".into(),
245 msg: e.to_string(),
246 span: match lhs {
247 Value::Error { .. } => None,
248 _ => Some(lhs.span()),
249 },
250 help: None,
251 inner: vec![],
252 },
253 )?))
254 } else {
255 Ok(Matcher::Direct(lhs))
256 }
257 }
258
259 pub fn from_closure(closure: ClosureEval) -> Self {
260 Self::Closure(Box::new(closure))
261 }
262
263 pub fn compare(&mut self, rhs: &Value) -> Result<bool, ShellError> {
264 Ok(match self {
265 Matcher::Regex(regex) => {
266 if let Ok(rhs_str) = rhs.coerce_str() {
267 regex.is_match(&rhs_str).unwrap_or(false)
268 } else {
269 false
270 }
271 }
272 Matcher::Direct(lhs) => rhs == lhs,
273 Matcher::Closure(closure) => closure
274 .run_with_value(rhs.clone())
275 .and_then(|data| data.into_value(Span::unknown()))
276 .map(|value| value.is_true())
277 .unwrap_or(false),
278 })
279 }
280}
281
282fn split_list(
283 engine_state: &EngineState,
284 call: &Call,
285 input: PipelineData,
286 mut matcher: Matcher,
287 split: Split,
288) -> Result<PipelineData, ShellError> {
289 let head = call.head;
290 Ok(SplitList::new(
291 input.into_iter(),
292 engine_state.signals().clone(),
293 split,
294 move |x| matcher.compare(x).unwrap_or(false),
295 )
296 .map(move |x| Value::list(x, head))
297 .into_pipeline_data(head, engine_state.signals().clone()))
298}
299
300struct SplitList<I, T, F> {
301 iterator: I,
302 closure: F,
303 done: bool,
304 signals: Signals,
305 split: Split,
306 last_item: Option<T>,
307}
308
309impl<I, T, F> SplitList<I, T, F>
310where
311 I: Iterator<Item = T>,
312 F: FnMut(&I::Item) -> bool,
313{
314 fn new(iterator: I, signals: Signals, split: Split, closure: F) -> Self {
315 Self {
316 iterator,
317 closure,
318 done: false,
319 signals,
320 split,
321 last_item: None,
322 }
323 }
324
325 fn inner_iterator_next(&mut self) -> Option<I::Item> {
326 if self.signals.interrupted() {
327 self.done = true;
328 return None;
329 }
330 self.iterator.next()
331 }
332}
333
334impl<I, T, F> Iterator for SplitList<I, T, F>
335where
336 I: Iterator<Item = T>,
337 F: FnMut(&I::Item) -> bool,
338{
339 type Item = Vec<I::Item>;
340
341 fn next(&mut self) -> Option<Self::Item> {
342 if self.done {
343 return None;
344 }
345
346 let mut items = vec![];
347 if let Some(item) = self.last_item.take() {
348 items.push(item);
349 }
350
351 loop {
352 match self.inner_iterator_next() {
353 None => {
354 self.done = true;
355 return Some(items);
356 }
357 Some(value) => {
358 if (self.closure)(&value) {
359 match self.split {
360 Split::On => {}
361 Split::Before => {
362 self.last_item = Some(value);
363 }
364 Split::After => {
365 items.push(value);
366 }
367 }
368 return Some(items);
369 } else {
370 items.push(value);
371 }
372 }
373 }
374 }
375 }
376}
377
378#[cfg(test)]
379mod test {
380 use super::*;
381
382 #[test]
383 fn test_examples() {
384 use crate::test_examples;
385
386 test_examples(SubCommand {})
387 }
388}