1use fancy_regex::{Captures, Regex};
2use nu_engine::command_prelude::*;
3use nu_protocol::{ListStream, Signals, engine::StateWorkingSet};
4use std::collections::VecDeque;
5
6#[derive(Clone)]
7pub struct Parse;
8
9impl Command for Parse {
10 fn name(&self) -> &str {
11 "parse"
12 }
13
14 fn description(&self) -> &str {
15 "Parse columns from string data using a simple pattern or a supplied regular expression."
16 }
17
18 fn search_terms(&self) -> Vec<&str> {
19 vec!["pattern", "match", "regex", "str extract"]
20 }
21
22 fn extra_description(&self) -> &str {
23 "The parse command always uses regular expressions even when you use a simple pattern. If a simple pattern is supplied, parse will transform that pattern into a regular expression."
24 }
25
26 fn signature(&self) -> nu_protocol::Signature {
27 Signature::build("parse")
28 .required("pattern", SyntaxShape::String, "The pattern to match.")
29 .input_output_types(vec![
30 (Type::String, Type::table()),
31 (Type::List(Box::new(Type::Any)), Type::table()),
32 ])
33 .switch("regex", "use full regex syntax for patterns", Some('r'))
34 .allow_variants_without_examples(true)
35 .category(Category::Strings)
36 }
37
38 fn examples(&self) -> Vec<Example> {
39 vec![
40 Example {
41 description: "Parse a string into two named columns",
42 example: "\"hi there\" | parse \"{foo} {bar}\"",
43 result: Some(Value::test_list(vec![Value::test_record(record! {
44 "foo" => Value::test_string("hi"),
45 "bar" => Value::test_string("there"),
46 })])),
47 },
48 Example {
49 description: "Parse a string, ignoring a column with _",
50 example: "\"hello world\" | parse \"{foo} {_}\"",
51 result: Some(Value::test_list(vec![Value::test_record(record! {
52 "foo" => Value::test_string("hello"),
53 })])),
54 },
55 Example {
56 description: "This is how the first example is interpreted in the source code",
57 example: "\"hi there\" | parse --regex '(?s)\\A(?P<foo>.*?) (?P<bar>.*?)\\z'",
58 result: Some(Value::test_list(vec![Value::test_record(record! {
59 "foo" => Value::test_string("hi"),
60 "bar" => Value::test_string("there"),
61 })])),
62 },
63 Example {
64 description: "Parse a string using fancy-regex named capture group pattern",
65 example: "\"foo bar.\" | parse --regex '\\s*(?<name>\\w+)(?=\\.)'",
66 result: Some(Value::test_list(vec![Value::test_record(record! {
67 "name" => Value::test_string("bar"),
68 })])),
69 },
70 Example {
71 description: "Parse a string using fancy-regex capture group pattern",
72 example: "\"foo! bar.\" | parse --regex '(\\w+)(?=\\.)|(\\w+)(?=!)'",
73 result: Some(Value::test_list(vec![
74 Value::test_record(record! {
75 "capture0" => Value::test_string(""),
76 "capture1" => Value::test_string("foo"),
77 }),
78 Value::test_record(record! {
79 "capture0" => Value::test_string("bar"),
80 "capture1" => Value::test_string(""),
81 }),
82 ])),
83 },
84 Example {
85 description: "Parse a string using fancy-regex look behind pattern",
86 example: "\" @another(foo bar) \" | parse --regex '\\s*(?<=[() ])(@\\w+)(\\([^)]*\\))?\\s*'",
87 result: Some(Value::test_list(vec![Value::test_record(record! {
88 "capture0" => Value::test_string("@another"),
89 "capture1" => Value::test_string("(foo bar)"),
90 })])),
91 },
92 Example {
93 description: "Parse a string using fancy-regex look ahead atomic group pattern",
94 example: "\"abcd\" | parse --regex '^a(bc(?=d)|b)cd$'",
95 result: Some(Value::test_list(vec![Value::test_record(record! {
96 "capture0" => Value::test_string("b"),
97 })])),
98 },
99 ]
100 }
101
102 fn is_const(&self) -> bool {
103 true
104 }
105
106 fn run(
107 &self,
108 engine_state: &EngineState,
109 stack: &mut Stack,
110 call: &Call,
111 input: PipelineData,
112 ) -> Result<PipelineData, ShellError> {
113 let pattern: Spanned<String> = call.req(engine_state, stack, 0)?;
114 let regex: bool = call.has_flag(engine_state, stack, "regex")?;
115 operate(engine_state, pattern, regex, call, input)
116 }
117
118 fn run_const(
119 &self,
120 working_set: &StateWorkingSet,
121 call: &Call,
122 input: PipelineData,
123 ) -> Result<PipelineData, ShellError> {
124 let pattern: Spanned<String> = call.req_const(working_set, 0)?;
125 let regex: bool = call.has_flag_const(working_set, "regex")?;
126 operate(working_set.permanent(), pattern, regex, call, input)
127 }
128}
129
130fn operate(
131 engine_state: &EngineState,
132 pattern: Spanned<String>,
133 regex: bool,
134 call: &Call,
135 input: PipelineData,
136) -> Result<PipelineData, ShellError> {
137 let head = call.head;
138
139 let pattern_item = pattern.item;
140 let pattern_span = pattern.span;
141
142 let item_to_parse = if regex {
143 pattern_item
144 } else {
145 build_regex(&pattern_item, pattern_span)?
146 };
147
148 let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError {
149 error: "Error with regular expression".into(),
150 msg: e.to_string(),
151 span: Some(pattern_span),
152 help: None,
153 inner: vec![],
154 })?;
155
156 let columns = regex
157 .capture_names()
158 .skip(1)
159 .enumerate()
160 .map(|(i, name)| {
161 name.map(String::from)
162 .unwrap_or_else(|| format!("capture{i}"))
163 })
164 .collect::<Vec<_>>();
165
166 match input {
167 PipelineData::Empty => Ok(PipelineData::Empty),
168 PipelineData::Value(value, ..) => match value {
169 Value::String { val, .. } => {
170 let captures = regex
171 .captures_iter(&val)
172 .map(|captures| captures_to_value(captures, &columns, head))
173 .collect::<Result<_, _>>()?;
174
175 Ok(Value::list(captures, head).into_pipeline_data())
176 }
177 Value::List { vals, .. } => {
178 let iter = vals.into_iter().map(move |val| {
179 let span = val.span();
180 let type_ = val.get_type();
181 val.into_string()
182 .map_err(|_| ShellError::OnlySupportsThisInputType {
183 exp_input_type: "string".into(),
184 wrong_type: type_.to_string(),
185 dst_span: head,
186 src_span: span,
187 })
188 });
189
190 let iter = ParseIter {
191 captures: VecDeque::new(),
192 regex,
193 columns,
194 iter,
195 span: head,
196 signals: engine_state.signals().clone(),
197 };
198
199 Ok(ListStream::new(iter, head, Signals::empty()).into())
200 }
201 value => Err(ShellError::OnlySupportsThisInputType {
202 exp_input_type: "string".into(),
203 wrong_type: value.get_type().to_string(),
204 dst_span: head,
205 src_span: value.span(),
206 }),
207 },
208 PipelineData::ListStream(stream, ..) => Ok(stream
209 .modify(|stream| {
210 let iter = stream.map(move |val| {
211 let span = val.span();
212 val.into_string().map_err(|_| ShellError::PipelineMismatch {
213 exp_input_type: "string".into(),
214 dst_span: head,
215 src_span: span,
216 })
217 });
218
219 ParseIter {
220 captures: VecDeque::new(),
221 regex,
222 columns,
223 iter,
224 span: head,
225 signals: engine_state.signals().clone(),
226 }
227 })
228 .into()),
229 PipelineData::ByteStream(stream, ..) => {
230 if let Some(lines) = stream.lines() {
231 let iter = ParseIter {
232 captures: VecDeque::new(),
233 regex,
234 columns,
235 iter: lines,
236 span: head,
237 signals: engine_state.signals().clone(),
238 };
239
240 Ok(ListStream::new(iter, head, Signals::empty()).into())
241 } else {
242 Ok(PipelineData::Empty)
243 }
244 }
245 }
246}
247
248fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
249 let mut output = "(?s)\\A".to_string();
250
251 let mut loop_input = input.chars().peekable();
252 loop {
253 let mut before = String::new();
254 while let Some(c) = loop_input.next() {
255 if c == '{' {
256 if loop_input.peek() == Some(&'{') {
258 let _ = loop_input.next();
259 } else {
260 break;
261 }
262 }
263 before.push(c);
264 }
265
266 if !before.is_empty() {
267 output.push_str(&fancy_regex::escape(&before));
268 }
269
270 let mut column = String::new();
272 while let Some(c) = loop_input.next() {
273 if c == '}' {
274 break;
275 }
276 column.push(c);
277
278 if loop_input.peek().is_none() {
279 return Err(ShellError::DelimiterError {
280 msg: "Found opening `{` without an associated closing `}`".to_owned(),
281 span,
282 });
283 }
284 }
285
286 if !column.is_empty() {
287 output.push_str("(?");
288 if column == "_" {
289 output.push(':');
291 } else {
292 output.push_str("P<");
294 output.push_str(&column);
295 output.push('>');
296 }
297 output.push_str(".*?)");
298 }
299
300 if before.is_empty() && column.is_empty() {
301 break;
302 }
303 }
304
305 output.push_str("\\z");
306 Ok(output)
307}
308
309struct ParseIter<I: Iterator<Item = Result<String, ShellError>>> {
310 captures: VecDeque<Value>,
311 regex: Regex,
312 columns: Vec<String>,
313 iter: I,
314 span: Span,
315 signals: Signals,
316}
317
318impl<I: Iterator<Item = Result<String, ShellError>>> ParseIter<I> {
319 fn populate_captures(&mut self, str: &str) -> Result<(), ShellError> {
320 for captures in self.regex.captures_iter(str) {
321 self.captures
322 .push_back(captures_to_value(captures, &self.columns, self.span)?);
323 }
324 Ok(())
325 }
326}
327
328impl<I: Iterator<Item = Result<String, ShellError>>> Iterator for ParseIter<I> {
329 type Item = Value;
330
331 fn next(&mut self) -> Option<Value> {
332 loop {
333 if self.signals.interrupted() {
334 return None;
335 }
336
337 if let Some(val) = self.captures.pop_front() {
338 return Some(val);
339 }
340
341 let result = self
342 .iter
343 .next()?
344 .and_then(|str| self.populate_captures(&str));
345
346 if let Err(err) = result {
347 return Some(Value::error(err, self.span));
348 }
349 }
350 }
351}
352
353fn captures_to_value(
354 captures: Result<Captures, fancy_regex::Error>,
355 columns: &[String],
356 span: Span,
357) -> Result<Value, ShellError> {
358 let captures = captures.map_err(|err| ShellError::GenericError {
359 error: "Error with regular expression captures".into(),
360 msg: err.to_string(),
361 span: Some(span),
362 help: None,
363 inner: vec![],
364 })?;
365
366 let record = columns
367 .iter()
368 .zip(captures.iter().skip(1))
369 .map(|(column, match_)| {
370 let match_str = match_.map(|m| m.as_str()).unwrap_or("");
371 (column.clone(), Value::string(match_str, span))
372 })
373 .collect();
374
375 Ok(Value::record(record, span))
376}
377
378#[cfg(test)]
379mod test {
380 use super::*;
381
382 #[test]
383 fn test_examples() {
384 crate::test_examples(Parse)
385 }
386}