1use fancy_regex::{Captures, Regex};
2use nu_engine::command_prelude::*;
3use nu_protocol::{engine::StateWorkingSet, ListStream, Signals};
4use std::collections::VecDeque;
5
6#[derive(Clone)]
7pub struct Parse;
8
9impl Command for Parse {
10 fn name(&self) -> &str {
11 "parse"
12 }
13
14 fn description(&self) -> &str {
15 "Parse columns from string data using a simple pattern or a supplied regular expression."
16 }
17
18 fn search_terms(&self) -> Vec<&str> {
19 vec!["pattern", "match", "regex", "str extract"]
20 }
21
22 fn extra_description(&self) -> &str {
23 "The parse command always uses regular expressions even when you use a simple pattern. If a simple pattern is supplied, parse will transform that pattern into a regular expression."
24 }
25
26 fn signature(&self) -> nu_protocol::Signature {
27 Signature::build("parse")
28 .required("pattern", SyntaxShape::String, "The pattern to match.")
29 .input_output_types(vec![
30 (Type::String, Type::table()),
31 (Type::List(Box::new(Type::Any)), Type::table()),
32 ])
33 .switch("regex", "use full regex syntax for patterns", Some('r'))
34 .allow_variants_without_examples(true)
35 .category(Category::Strings)
36 }
37
38 fn examples(&self) -> Vec<Example> {
39 vec![
40 Example {
41 description: "Parse a string into two named columns",
42 example: "\"hi there\" | parse \"{foo} {bar}\"",
43 result: Some(Value::test_list(
44 vec![Value::test_record(record! {
45 "foo" => Value::test_string("hi"),
46 "bar" => Value::test_string("there"),
47 })])),
48 },
49 Example {
50 description: "This is how the first example is interpreted in the source code",
51 example: "\"hi there\" | parse --regex '(?s)\\A(?P<foo>.*?) (?P<bar>.*?)\\z'",
52 result: Some(Value::test_list(
53 vec![Value::test_record(record! {
54 "foo" => Value::test_string("hi"),
55 "bar" => Value::test_string("there"),
56 })])),
57 },
58 Example {
59 description: "Parse a string using fancy-regex named capture group pattern",
60 example: "\"foo bar.\" | parse --regex '\\s*(?<name>\\w+)(?=\\.)'",
61 result: Some(Value::test_list(
62 vec![Value::test_record(record! {
63 "name" => Value::test_string("bar"),
64 })],
65 )),
66 },
67 Example {
68 description: "Parse a string using fancy-regex capture group pattern",
69 example: "\"foo! bar.\" | parse --regex '(\\w+)(?=\\.)|(\\w+)(?=!)'",
70 result: Some(Value::test_list(
71 vec![
72 Value::test_record(record! {
73 "capture0" => Value::test_string(""),
74 "capture1" => Value::test_string("foo"),
75 }),
76 Value::test_record(record! {
77 "capture0" => Value::test_string("bar"),
78 "capture1" => Value::test_string(""),
79 }),
80 ],
81 )),
82 },
83 Example {
84 description: "Parse a string using fancy-regex look behind pattern",
85 example:
86 "\" @another(foo bar) \" | parse --regex '\\s*(?<=[() ])(@\\w+)(\\([^)]*\\))?\\s*'",
87 result: Some(Value::test_list(
88 vec![Value::test_record(record! {
89 "capture0" => Value::test_string("@another"),
90 "capture1" => Value::test_string("(foo bar)"),
91 })],
92 )),
93 },
94 Example {
95 description: "Parse a string using fancy-regex look ahead atomic group pattern",
96 example: "\"abcd\" | parse --regex '^a(bc(?=d)|b)cd$'",
97 result: Some(Value::test_list(
98 vec![Value::test_record(record! {
99 "capture0" => Value::test_string("b"),
100 })],
101 )),
102 },
103 ]
104 }
105
106 fn is_const(&self) -> bool {
107 true
108 }
109
110 fn run(
111 &self,
112 engine_state: &EngineState,
113 stack: &mut Stack,
114 call: &Call,
115 input: PipelineData,
116 ) -> Result<PipelineData, ShellError> {
117 let pattern: Spanned<String> = call.req(engine_state, stack, 0)?;
118 let regex: bool = call.has_flag(engine_state, stack, "regex")?;
119 operate(engine_state, pattern, regex, call, input)
120 }
121
122 fn run_const(
123 &self,
124 working_set: &StateWorkingSet,
125 call: &Call,
126 input: PipelineData,
127 ) -> Result<PipelineData, ShellError> {
128 let pattern: Spanned<String> = call.req_const(working_set, 0)?;
129 let regex: bool = call.has_flag_const(working_set, "regex")?;
130 operate(working_set.permanent(), pattern, regex, call, input)
131 }
132}
133
134fn operate(
135 engine_state: &EngineState,
136 pattern: Spanned<String>,
137 regex: bool,
138 call: &Call,
139 input: PipelineData,
140) -> Result<PipelineData, ShellError> {
141 let head = call.head;
142
143 let pattern_item = pattern.item;
144 let pattern_span = pattern.span;
145
146 let item_to_parse = if regex {
147 pattern_item
148 } else {
149 build_regex(&pattern_item, pattern_span)?
150 };
151
152 let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError {
153 error: "Error with regular expression".into(),
154 msg: e.to_string(),
155 span: Some(pattern_span),
156 help: None,
157 inner: vec![],
158 })?;
159
160 let columns = regex
161 .capture_names()
162 .skip(1)
163 .enumerate()
164 .map(|(i, name)| {
165 name.map(String::from)
166 .unwrap_or_else(|| format!("capture{i}"))
167 })
168 .collect::<Vec<_>>();
169
170 match input {
171 PipelineData::Empty => Ok(PipelineData::Empty),
172 PipelineData::Value(value, ..) => match value {
173 Value::String { val, .. } => {
174 let captures = regex
175 .captures_iter(&val)
176 .map(|captures| captures_to_value(captures, &columns, head))
177 .collect::<Result<_, _>>()?;
178
179 Ok(Value::list(captures, head).into_pipeline_data())
180 }
181 Value::List { vals, .. } => {
182 let iter = vals.into_iter().map(move |val| {
183 let span = val.span();
184 val.into_string().map_err(|_| ShellError::PipelineMismatch {
185 exp_input_type: "string".into(),
186 dst_span: head,
187 src_span: span,
188 })
189 });
190
191 let iter = ParseIter {
192 captures: VecDeque::new(),
193 regex,
194 columns,
195 iter,
196 span: head,
197 signals: engine_state.signals().clone(),
198 };
199
200 Ok(ListStream::new(iter, head, Signals::empty()).into())
201 }
202 value => Err(ShellError::PipelineMismatch {
203 exp_input_type: "string".into(),
204 dst_span: head,
205 src_span: value.span(),
206 }),
207 },
208 PipelineData::ListStream(stream, ..) => Ok(stream
209 .modify(|stream| {
210 let iter = stream.map(move |val| {
211 let span = val.span();
212 val.into_string().map_err(|_| ShellError::PipelineMismatch {
213 exp_input_type: "string".into(),
214 dst_span: head,
215 src_span: span,
216 })
217 });
218
219 ParseIter {
220 captures: VecDeque::new(),
221 regex,
222 columns,
223 iter,
224 span: head,
225 signals: engine_state.signals().clone(),
226 }
227 })
228 .into()),
229 PipelineData::ByteStream(stream, ..) => {
230 if let Some(lines) = stream.lines() {
231 let iter = ParseIter {
232 captures: VecDeque::new(),
233 regex,
234 columns,
235 iter: lines,
236 span: head,
237 signals: engine_state.signals().clone(),
238 };
239
240 Ok(ListStream::new(iter, head, Signals::empty()).into())
241 } else {
242 Ok(PipelineData::Empty)
243 }
244 }
245 }
246}
247
248fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
249 let mut output = "(?s)\\A".to_string();
250
251 let mut loop_input = input.chars().peekable();
252 loop {
253 let mut before = String::new();
254 while let Some(c) = loop_input.next() {
255 if c == '{' {
256 if loop_input.peek() == Some(&'{') {
258 let _ = loop_input.next();
259 } else {
260 break;
261 }
262 }
263 before.push(c);
264 }
265
266 if !before.is_empty() {
267 output.push_str(&fancy_regex::escape(&before));
268 }
269
270 let mut column = String::new();
272 while let Some(c) = loop_input.next() {
273 if c == '}' {
274 break;
275 }
276 column.push(c);
277
278 if loop_input.peek().is_none() {
279 return Err(ShellError::DelimiterError {
280 msg: "Found opening `{` without an associated closing `}`".to_owned(),
281 span,
282 });
283 }
284 }
285
286 if !column.is_empty() {
287 output.push_str("(?P<");
288 output.push_str(&column);
289 output.push_str(">.*?)");
290 }
291
292 if before.is_empty() && column.is_empty() {
293 break;
294 }
295 }
296
297 output.push_str("\\z");
298 Ok(output)
299}
300
301struct ParseIter<I: Iterator<Item = Result<String, ShellError>>> {
302 captures: VecDeque<Value>,
303 regex: Regex,
304 columns: Vec<String>,
305 iter: I,
306 span: Span,
307 signals: Signals,
308}
309
310impl<I: Iterator<Item = Result<String, ShellError>>> ParseIter<I> {
311 fn populate_captures(&mut self, str: &str) -> Result<(), ShellError> {
312 for captures in self.regex.captures_iter(str) {
313 self.captures
314 .push_back(captures_to_value(captures, &self.columns, self.span)?);
315 }
316 Ok(())
317 }
318}
319
320impl<I: Iterator<Item = Result<String, ShellError>>> Iterator for ParseIter<I> {
321 type Item = Value;
322
323 fn next(&mut self) -> Option<Value> {
324 loop {
325 if self.signals.interrupted() {
326 return None;
327 }
328
329 if let Some(val) = self.captures.pop_front() {
330 return Some(val);
331 }
332
333 let result = self
334 .iter
335 .next()?
336 .and_then(|str| self.populate_captures(&str));
337
338 if let Err(err) = result {
339 return Some(Value::error(err, self.span));
340 }
341 }
342 }
343}
344
345fn captures_to_value(
346 captures: Result<Captures, fancy_regex::Error>,
347 columns: &[String],
348 span: Span,
349) -> Result<Value, ShellError> {
350 let captures = captures.map_err(|err| ShellError::GenericError {
351 error: "Error with regular expression captures".into(),
352 msg: err.to_string(),
353 span: Some(span),
354 help: None,
355 inner: vec![],
356 })?;
357
358 let record = columns
359 .iter()
360 .zip(captures.iter().skip(1))
361 .map(|(column, match_)| {
362 let match_str = match_.map(|m| m.as_str()).unwrap_or("");
363 (column.clone(), Value::string(match_str, span))
364 })
365 .collect();
366
367 Ok(Value::record(record, span))
368}
369
370#[cfg(test)]
371mod test {
372 use super::*;
373
374 #[test]
375 fn test_examples() {
376 crate::test_examples(Parse)
377 }
378}