1use fancy_regex::{Captures, Regex};
2use nu_engine::command_prelude::*;
3use nu_protocol::{engine::StateWorkingSet, ListStream, Signals};
4use std::collections::VecDeque;
5
6#[derive(Clone)]
7pub struct Parse;
8
9impl Command for Parse {
10 fn name(&self) -> &str {
11 "parse"
12 }
13
14 fn description(&self) -> &str {
15 "Parse columns from string data using a simple pattern or a supplied regular expression."
16 }
17
18 fn search_terms(&self) -> Vec<&str> {
19 vec!["pattern", "match", "regex", "str extract"]
20 }
21
22 fn extra_description(&self) -> &str {
23 "The parse command always uses regular expressions even when you use a simple pattern. If a simple pattern is supplied, parse will transform that pattern into a regular expression."
24 }
25
26 fn signature(&self) -> nu_protocol::Signature {
27 Signature::build("parse")
28 .required("pattern", SyntaxShape::String, "The pattern to match.")
29 .input_output_types(vec![
30 (Type::String, Type::table()),
31 (Type::List(Box::new(Type::Any)), Type::table()),
32 ])
33 .switch("regex", "use full regex syntax for patterns", Some('r'))
34 .allow_variants_without_examples(true)
35 .category(Category::Strings)
36 }
37
38 fn examples(&self) -> Vec<Example> {
39 vec![
40 Example {
41 description: "Parse a string into two named columns",
42 example: "\"hi there\" | parse \"{foo} {bar}\"",
43 result: Some(Value::test_list(
44 vec![Value::test_record(record! {
45 "foo" => Value::test_string("hi"),
46 "bar" => Value::test_string("there"),
47 })])),
48 },
49 Example {
50 description: "This is how the first example is interpreted in the source code",
51 example: "\"hi there\" | parse --regex '(?s)\\A(?P<foo>.*?) (?P<bar>.*?)\\z'",
52 result: Some(Value::test_list(
53 vec![Value::test_record(record! {
54 "foo" => Value::test_string("hi"),
55 "bar" => Value::test_string("there"),
56 })])),
57 },
58 Example {
59 description: "Parse a string using fancy-regex named capture group pattern",
60 example: "\"foo bar.\" | parse --regex '\\s*(?<name>\\w+)(?=\\.)'",
61 result: Some(Value::test_list(
62 vec![Value::test_record(record! {
63 "name" => Value::test_string("bar"),
64 })],
65 )),
66 },
67 Example {
68 description: "Parse a string using fancy-regex capture group pattern",
69 example: "\"foo! bar.\" | parse --regex '(\\w+)(?=\\.)|(\\w+)(?=!)'",
70 result: Some(Value::test_list(
71 vec![
72 Value::test_record(record! {
73 "capture0" => Value::test_string(""),
74 "capture1" => Value::test_string("foo"),
75 }),
76 Value::test_record(record! {
77 "capture0" => Value::test_string("bar"),
78 "capture1" => Value::test_string(""),
79 }),
80 ],
81 )),
82 },
83 Example {
84 description: "Parse a string using fancy-regex look behind pattern",
85 example:
86 "\" @another(foo bar) \" | parse --regex '\\s*(?<=[() ])(@\\w+)(\\([^)]*\\))?\\s*'",
87 result: Some(Value::test_list(
88 vec![Value::test_record(record! {
89 "capture0" => Value::test_string("@another"),
90 "capture1" => Value::test_string("(foo bar)"),
91 })],
92 )),
93 },
94 Example {
95 description: "Parse a string using fancy-regex look ahead atomic group pattern",
96 example: "\"abcd\" | parse --regex '^a(bc(?=d)|b)cd$'",
97 result: Some(Value::test_list(
98 vec![Value::test_record(record! {
99 "capture0" => Value::test_string("b"),
100 })],
101 )),
102 },
103 ]
104 }
105
106 fn is_const(&self) -> bool {
107 true
108 }
109
110 fn run(
111 &self,
112 engine_state: &EngineState,
113 stack: &mut Stack,
114 call: &Call,
115 input: PipelineData,
116 ) -> Result<PipelineData, ShellError> {
117 let pattern: Spanned<String> = call.req(engine_state, stack, 0)?;
118 let regex: bool = call.has_flag(engine_state, stack, "regex")?;
119 operate(engine_state, pattern, regex, call, input)
120 }
121
122 fn run_const(
123 &self,
124 working_set: &StateWorkingSet,
125 call: &Call,
126 input: PipelineData,
127 ) -> Result<PipelineData, ShellError> {
128 let pattern: Spanned<String> = call.req_const(working_set, 0)?;
129 let regex: bool = call.has_flag_const(working_set, "regex")?;
130 operate(working_set.permanent(), pattern, regex, call, input)
131 }
132}
133
134fn operate(
135 engine_state: &EngineState,
136 pattern: Spanned<String>,
137 regex: bool,
138 call: &Call,
139 input: PipelineData,
140) -> Result<PipelineData, ShellError> {
141 let head = call.head;
142
143 let pattern_item = pattern.item;
144 let pattern_span = pattern.span;
145
146 let item_to_parse = if regex {
147 pattern_item
148 } else {
149 build_regex(&pattern_item, pattern_span)?
150 };
151
152 let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError {
153 error: "Error with regular expression".into(),
154 msg: e.to_string(),
155 span: Some(pattern_span),
156 help: None,
157 inner: vec![],
158 })?;
159
160 let columns = regex
161 .capture_names()
162 .skip(1)
163 .enumerate()
164 .map(|(i, name)| {
165 name.map(String::from)
166 .unwrap_or_else(|| format!("capture{i}"))
167 })
168 .collect::<Vec<_>>();
169
170 match input {
171 PipelineData::Empty => Ok(PipelineData::Empty),
172 PipelineData::Value(value, ..) => match value {
173 Value::String { val, .. } => {
174 let captures = regex
175 .captures_iter(&val)
176 .map(|captures| captures_to_value(captures, &columns, head))
177 .collect::<Result<_, _>>()?;
178
179 Ok(Value::list(captures, head).into_pipeline_data())
180 }
181 Value::List { vals, .. } => {
182 let iter = vals.into_iter().map(move |val| {
183 let span = val.span();
184 let type_ = val.get_type();
185 val.into_string()
186 .map_err(|_| ShellError::OnlySupportsThisInputType {
187 exp_input_type: "string".into(),
188 wrong_type: type_.to_string(),
189 dst_span: head,
190 src_span: span,
191 })
192 });
193
194 let iter = ParseIter {
195 captures: VecDeque::new(),
196 regex,
197 columns,
198 iter,
199 span: head,
200 signals: engine_state.signals().clone(),
201 };
202
203 Ok(ListStream::new(iter, head, Signals::empty()).into())
204 }
205 value => Err(ShellError::OnlySupportsThisInputType {
206 exp_input_type: "string".into(),
207 wrong_type: value.get_type().to_string(),
208 dst_span: head,
209 src_span: value.span(),
210 }),
211 },
212 PipelineData::ListStream(stream, ..) => Ok(stream
213 .modify(|stream| {
214 let iter = stream.map(move |val| {
215 let span = val.span();
216 val.into_string().map_err(|_| ShellError::PipelineMismatch {
217 exp_input_type: "string".into(),
218 dst_span: head,
219 src_span: span,
220 })
221 });
222
223 ParseIter {
224 captures: VecDeque::new(),
225 regex,
226 columns,
227 iter,
228 span: head,
229 signals: engine_state.signals().clone(),
230 }
231 })
232 .into()),
233 PipelineData::ByteStream(stream, ..) => {
234 if let Some(lines) = stream.lines() {
235 let iter = ParseIter {
236 captures: VecDeque::new(),
237 regex,
238 columns,
239 iter: lines,
240 span: head,
241 signals: engine_state.signals().clone(),
242 };
243
244 Ok(ListStream::new(iter, head, Signals::empty()).into())
245 } else {
246 Ok(PipelineData::Empty)
247 }
248 }
249 }
250}
251
252fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
253 let mut output = "(?s)\\A".to_string();
254
255 let mut loop_input = input.chars().peekable();
256 loop {
257 let mut before = String::new();
258 while let Some(c) = loop_input.next() {
259 if c == '{' {
260 if loop_input.peek() == Some(&'{') {
262 let _ = loop_input.next();
263 } else {
264 break;
265 }
266 }
267 before.push(c);
268 }
269
270 if !before.is_empty() {
271 output.push_str(&fancy_regex::escape(&before));
272 }
273
274 let mut column = String::new();
276 while let Some(c) = loop_input.next() {
277 if c == '}' {
278 break;
279 }
280 column.push(c);
281
282 if loop_input.peek().is_none() {
283 return Err(ShellError::DelimiterError {
284 msg: "Found opening `{` without an associated closing `}`".to_owned(),
285 span,
286 });
287 }
288 }
289
290 if !column.is_empty() {
291 output.push_str("(?P<");
292 output.push_str(&column);
293 output.push_str(">.*?)");
294 }
295
296 if before.is_empty() && column.is_empty() {
297 break;
298 }
299 }
300
301 output.push_str("\\z");
302 Ok(output)
303}
304
305struct ParseIter<I: Iterator<Item = Result<String, ShellError>>> {
306 captures: VecDeque<Value>,
307 regex: Regex,
308 columns: Vec<String>,
309 iter: I,
310 span: Span,
311 signals: Signals,
312}
313
314impl<I: Iterator<Item = Result<String, ShellError>>> ParseIter<I> {
315 fn populate_captures(&mut self, str: &str) -> Result<(), ShellError> {
316 for captures in self.regex.captures_iter(str) {
317 self.captures
318 .push_back(captures_to_value(captures, &self.columns, self.span)?);
319 }
320 Ok(())
321 }
322}
323
324impl<I: Iterator<Item = Result<String, ShellError>>> Iterator for ParseIter<I> {
325 type Item = Value;
326
327 fn next(&mut self) -> Option<Value> {
328 loop {
329 if self.signals.interrupted() {
330 return None;
331 }
332
333 if let Some(val) = self.captures.pop_front() {
334 return Some(val);
335 }
336
337 let result = self
338 .iter
339 .next()?
340 .and_then(|str| self.populate_captures(&str));
341
342 if let Err(err) = result {
343 return Some(Value::error(err, self.span));
344 }
345 }
346 }
347}
348
349fn captures_to_value(
350 captures: Result<Captures, fancy_regex::Error>,
351 columns: &[String],
352 span: Span,
353) -> Result<Value, ShellError> {
354 let captures = captures.map_err(|err| ShellError::GenericError {
355 error: "Error with regular expression captures".into(),
356 msg: err.to_string(),
357 span: Some(span),
358 help: None,
359 inner: vec![],
360 })?;
361
362 let record = columns
363 .iter()
364 .zip(captures.iter().skip(1))
365 .map(|(column, match_)| {
366 let match_str = match_.map(|m| m.as_str()).unwrap_or("");
367 (column.clone(), Value::string(match_str, span))
368 })
369 .collect();
370
371 Ok(Value::record(record, span))
372}
373
374#[cfg(test)]
375mod test {
376 use super::*;
377
378 #[test]
379 fn test_examples() {
380 crate::test_examples(Parse)
381 }
382}