nu_command/filters/
find.rs

1use fancy_regex::{Regex, escape};
2use nu_ansi_term::Style;
3use nu_color_config::StyleComputer;
4use nu_engine::command_prelude::*;
5use nu_protocol::Config;
6
7#[derive(Clone)]
8pub struct Find;
9
10impl Command for Find {
11    fn name(&self) -> &str {
12        "find"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build(self.name())
17            .input_output_types(vec![
18                (
19                    // TODO: This is too permissive; if we could express this
20                    // using a type parameter it would be List<T> -> List<T>.
21                    Type::List(Box::new(Type::Any)),
22                    Type::List(Box::new(Type::Any)),
23                ),
24                (Type::String, Type::Any),
25            ])
26            .named(
27                "regex",
28                SyntaxShape::String,
29                "regex to match with",
30                Some('r'),
31            )
32            .switch(
33                "ignore-case",
34                "case-insensitive regex mode; equivalent to (?i)",
35                Some('i'),
36            )
37            .switch(
38                "multiline",
39                "multi-line regex mode: ^ and $ match begin/end of line; equivalent to (?m)",
40                Some('m'),
41            )
42            .switch(
43                "dotall",
44                "dotall regex mode: allow a dot . to match newlines \\n; equivalent to (?s)",
45                Some('s'),
46            )
47            .named(
48                "columns",
49                SyntaxShape::List(Box::new(SyntaxShape::String)),
50                "column names to be searched (with rest parameter, not regex yet)",
51                Some('c'),
52            )
53            .switch(
54                "no-highlight",
55                "no-highlight mode: find without marking with ansi code",
56                Some('n'),
57            )
58            .switch("invert", "invert the match", Some('v'))
59            .rest("rest", SyntaxShape::Any, "Terms to search.")
60            .category(Category::Filters)
61    }
62
63    fn description(&self) -> &str {
64        "Searches terms in the input."
65    }
66
67    fn examples(&self) -> Vec<Example> {
68        vec![
69            Example {
70                description: "Search for multiple terms in a command output",
71                example: r#"ls | find toml md sh"#,
72                result: None,
73            },
74            Example {
75                description: "Search and highlight text for a term in a string. Note that regular search is case insensitive",
76                example: r#"'Cargo.toml' | find cargo"#,
77                result: Some(Value::test_string(
78                    "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mCargo\u{1b}[0m\u{1b}[37m.toml\u{1b}[0m"
79                        .to_owned(),
80                )),
81            },
82            Example {
83                description: "Search a number or a file size in a list of numbers",
84                example: r#"[1 5 3kb 4 3Mb] | find 5 3kb"#,
85                result: Some(Value::list(
86                    vec![Value::test_int(5), Value::test_filesize(3000)],
87                    Span::test_data(),
88                )),
89            },
90            Example {
91                description: "Search a char in a list of string",
92                example: r#"[moe larry curly] | find l"#,
93                result: Some(Value::list(
94                    vec![
95                        Value::test_string(
96                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37ml\u{1b}[0m\u{1b}[37marry\u{1b}[0m",
97                        ),
98                        Value::test_string(
99                            "\u{1b}[37mcur\u{1b}[0m\u{1b}[41;37ml\u{1b}[0m\u{1b}[37my\u{1b}[0m",
100                        ),
101                    ],
102                    Span::test_data(),
103                )),
104            },
105            Example {
106                description: "Find using regex",
107                example: r#"[abc bde arc abf] | find --regex "ab""#,
108                result: Some(Value::list(
109                    vec![
110                        Value::test_string(
111                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mc\u{1b}[0m"
112                                .to_string(),
113                        ),
114                        Value::test_string(
115                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m"
116                                .to_string(),
117                        ),
118                    ],
119                    Span::test_data(),
120                )),
121            },
122            Example {
123                description: "Find using regex case insensitive",
124                example: r#"[aBc bde Arc abf] | find --regex "ab" -i"#,
125                result: Some(Value::list(
126                    vec![
127                        Value::test_string(
128                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37maB\u{1b}[0m\u{1b}[37mc\u{1b}[0m"
129                                .to_string(),
130                        ),
131                        Value::test_string(
132                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m"
133                                .to_string(),
134                        ),
135                    ],
136                    Span::test_data(),
137                )),
138            },
139            Example {
140                description: "Find value in records using regex",
141                example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu""#,
142                result: Some(Value::test_list(vec![Value::test_record(record! {
143                        "version" => Value::test_string("0.1.0"),
144                        "name" => Value::test_string("\u{1b}[37m\u{1b}[0m\u{1b}[41;37mnu\u{1b}[0m\u{1b}[37mshell\u{1b}[0m".to_string()),
145                })])),
146            },
147            Example {
148                description: "Find inverted values in records using regex",
149                example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu" --invert"#,
150                result: Some(Value::test_list(vec![
151                    Value::test_record(record! {
152                            "version" => Value::test_string("0.1.1"),
153                            "name" => Value::test_string("fish".to_string()),
154                    }),
155                    Value::test_record(record! {
156                            "version" => Value::test_string("0.2.0"),
157                            "name" =>Value::test_string("zsh".to_string()),
158                    }),
159                ])),
160            },
161            Example {
162                description: "Find value in list using regex",
163                example: r#"[["Larry", "Moe"], ["Victor", "Marina"]] | find --regex "rr""#,
164                result: Some(Value::list(
165                    vec![Value::list(
166                        vec![Value::test_string("Larry"), Value::test_string("Moe")],
167                        Span::test_data(),
168                    )],
169                    Span::test_data(),
170                )),
171            },
172            Example {
173                description: "Find inverted values in records using regex",
174                example: r#"[["Larry", "Moe"], ["Victor", "Marina"]] | find --regex "rr" --invert"#,
175                result: Some(Value::list(
176                    vec![Value::list(
177                        vec![Value::test_string("Victor"), Value::test_string("Marina")],
178                        Span::test_data(),
179                    )],
180                    Span::test_data(),
181                )),
182            },
183            Example {
184                description: "Remove ANSI sequences from result",
185                example: "[[foo bar]; [abc 123] [def 456]] | find --no-highlight 123",
186                result: Some(Value::list(
187                    vec![Value::test_record(record! {
188                        "foo" => Value::test_string("abc"),
189                        "bar" => Value::test_int(123)
190                    })],
191                    Span::test_data(),
192                )),
193            },
194            Example {
195                description: "Find and highlight text in specific columns",
196                example: "[[col1 col2 col3]; [moe larry curly] [larry curly moe]] | find moe --columns [col1]",
197                result: Some(Value::list(
198                    vec![Value::test_record(record! {
199                            "col1" => Value::test_string(
200                                "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mmoe\u{1b}[0m\u{1b}[37m\u{1b}[0m"
201                                    .to_string(),
202                            ),
203                            "col2" => Value::test_string("larry".to_string()),
204                            "col3" => Value::test_string("curly".to_string()),
205                    })],
206                    Span::test_data(),
207                )),
208            },
209        ]
210    }
211
212    fn search_terms(&self) -> Vec<&str> {
213        vec!["filter", "regex", "search", "condition"]
214    }
215
216    fn run(
217        &self,
218        engine_state: &EngineState,
219        stack: &mut Stack,
220        call: &Call,
221        input: PipelineData,
222    ) -> Result<PipelineData, ShellError> {
223        let pattern = get_match_pattern_from_arguments(engine_state, stack, call)?;
224
225        let columns_to_search: Vec<_> = call
226            .get_flag(engine_state, stack, "columns")?
227            .unwrap_or_default();
228
229        let input = split_string_if_multiline(input, call.head);
230
231        find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input)
232    }
233}
234
235#[derive(Clone)]
236struct MatchPattern {
237    /// the regex to be used for matching in text
238    regex: Regex,
239
240    /// the list of match terms converted to lowercase strings, or empty if a regex was provided
241    lower_terms: Vec<String>,
242
243    /// return a modified version of the value where matching parts are highlighted
244    highlight: bool,
245
246    /// return the values that aren't a match instead
247    invert: bool,
248
249    /// style of the non-highlighted string sections
250    string_style: Style,
251
252    /// style of the highlighted string sections
253    highlight_style: Style,
254}
255
256fn get_match_pattern_from_arguments(
257    engine_state: &EngineState,
258    stack: &mut Stack,
259    call: &Call,
260) -> Result<MatchPattern, ShellError> {
261    let config = stack.get_config(engine_state);
262
263    let span = call.head;
264    let regex = call.get_flag::<String>(engine_state, stack, "regex")?;
265    let terms = call.rest::<Value>(engine_state, stack, 0)?;
266
267    let invert = call.has_flag(engine_state, stack, "invert")?;
268    let highlight = !call.has_flag(engine_state, stack, "no-highlight")?;
269
270    let style_computer = StyleComputer::from_config(engine_state, stack);
271    // Currently, search results all use the same style.
272    // Also note that this sample string is passed into user-written code (the closure that may or may not be
273    // defined for "string").
274    let string_style = style_computer.compute("string", &Value::string("search result", span));
275    let highlight_style =
276        style_computer.compute("search_result", &Value::string("search result", span));
277
278    let (regex_str, lower_terms) = if let Some(regex) = regex {
279        if !terms.is_empty() {
280            return Err(ShellError::IncompatibleParametersSingle {
281                msg: "Cannot use a `--regex` parameter with additional search terms".into(),
282                span: call.get_flag_span(stack, "regex").expect("has flag"),
283            });
284        }
285
286        let insensitive = call.has_flag(engine_state, stack, "ignore-case")?;
287        let multiline = call.has_flag(engine_state, stack, "multiline")?;
288        let dotall = call.has_flag(engine_state, stack, "dotall")?;
289
290        let flags = match (insensitive, multiline, dotall) {
291            (false, false, false) => "",
292            (true, false, false) => "(?i)", // case insensitive
293            (false, true, false) => "(?m)", // multi-line mode
294            (false, false, true) => "(?s)", // allow . to match \n
295            (true, true, false) => "(?im)", // case insensitive and multi-line mode
296            (true, false, true) => "(?is)", // case insensitive and allow . to match \n
297            (false, true, true) => "(?ms)", // multi-line mode and allow . to match \n
298            (true, true, true) => "(?ims)", // case insensitive, multi-line mode and allow . to match \n
299        };
300
301        (flags.to_string() + regex.as_str(), Vec::new())
302    } else {
303        let mut regex = String::new();
304
305        regex += "(?i)";
306
307        let lower_terms = terms
308            .iter()
309            .map(|v| escape(&v.to_expanded_string("", &config).to_lowercase()).into())
310            .collect::<Vec<String>>();
311
312        if let Some(term) = lower_terms.first() {
313            regex += term;
314        }
315
316        for term in lower_terms.iter().skip(1) {
317            regex += "|";
318            regex += term;
319        }
320
321        let lower_terms = terms
322            .iter()
323            .map(|v| v.to_expanded_string("", &config).to_lowercase())
324            .collect::<Vec<String>>();
325
326        (regex, lower_terms)
327    };
328
329    let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch {
330        err_message: format!("invalid regex: {e}"),
331        span,
332    })?;
333
334    Ok(MatchPattern {
335        regex,
336        lower_terms,
337        invert,
338        highlight,
339        string_style,
340        highlight_style,
341    })
342}
343
344// map functions
345
346fn highlight_matches_in_string(pattern: &MatchPattern, val: String) -> String {
347    // strip haystack to remove existing ansi style
348    let stripped_val = nu_utils::strip_ansi_string_unlikely(val);
349    let mut last_match_end = 0;
350    let mut highlighted = String::new();
351
352    for cap in pattern.regex.captures_iter(stripped_val.as_ref()) {
353        match cap {
354            Ok(capture) => {
355                let start = match capture.get(0) {
356                    Some(acap) => acap.start(),
357                    None => 0,
358                };
359                let end = match capture.get(0) {
360                    Some(acap) => acap.end(),
361                    None => 0,
362                };
363                highlighted.push_str(
364                    &pattern
365                        .string_style
366                        .paint(&stripped_val[last_match_end..start])
367                        .to_string(),
368                );
369                highlighted.push_str(
370                    &pattern
371                        .highlight_style
372                        .paint(&stripped_val[start..end])
373                        .to_string(),
374                );
375                last_match_end = end;
376            }
377            Err(_e) => {
378                // in case of error, return the string with no highlight
379                return pattern.string_style.paint(&stripped_val).to_string();
380            }
381        }
382    }
383
384    highlighted.push_str(
385        &pattern
386            .string_style
387            .paint(&stripped_val[last_match_end..])
388            .to_string(),
389    );
390    highlighted
391}
392
393fn highlight_matches_in_record_or_value(
394    pattern: &MatchPattern,
395    value: Value,
396    columns_to_search: &[String],
397) -> Value {
398    if !pattern.highlight || pattern.invert {
399        return value;
400    }
401    let span = value.span();
402
403    match value {
404        Value::Record { val: record, .. } => {
405            let col_select = !columns_to_search.is_empty();
406
407            // TODO: change API to mutate in place
408            let mut record = record.into_owned();
409
410            for (col, val) in record.iter_mut() {
411                if col_select && !columns_to_search.contains(col) {
412                    continue;
413                }
414
415                if let Value::String { val: val_str, .. } = val {
416                    if pattern.regex.is_match(val_str).unwrap_or(false) {
417                        let val_str = std::mem::take(val_str);
418                        *val = highlight_matches_in_string(pattern, val_str).into_value(span)
419                    }
420                }
421            }
422
423            Value::record(record, span)
424        }
425        Value::String { val, .. } => highlight_matches_in_string(pattern, val).into_value(span),
426        _ => value,
427    }
428}
429
430fn find_in_pipelinedata(
431    pattern: MatchPattern,
432    columns_to_search: Vec<String>,
433    engine_state: &EngineState,
434    stack: &mut Stack,
435    input: PipelineData,
436) -> Result<PipelineData, ShellError> {
437    let config = stack.get_config(engine_state);
438
439    let map_pattern = pattern.clone();
440    let map_columns_to_search = columns_to_search.clone();
441
442    match input {
443        PipelineData::Empty => Ok(PipelineData::Empty),
444        PipelineData::Value(_, _) => input
445            .filter(
446                move |value| {
447                    record_or_value_should_be_printed(&pattern, value, &columns_to_search, &config)
448                },
449                engine_state.signals(),
450            )?
451            .map(
452                move |x| {
453                    highlight_matches_in_record_or_value(&map_pattern, x, &map_columns_to_search)
454                },
455                engine_state.signals(),
456            ),
457        PipelineData::ListStream(stream, metadata) => {
458            let stream = stream.modify(|iter| {
459                iter.filter(move |value| {
460                    record_or_value_should_be_printed(&pattern, value, &columns_to_search, &config)
461                })
462                .map(move |x| {
463                    highlight_matches_in_record_or_value(&map_pattern, x, &map_columns_to_search)
464                })
465            });
466
467            Ok(PipelineData::ListStream(stream, metadata))
468        }
469        PipelineData::ByteStream(stream, ..) => {
470            let span = stream.span();
471            if let Some(lines) = stream.lines() {
472                let mut output: Vec<Value> = vec![];
473                for line in lines {
474                    let line = line?;
475                    if string_should_be_printed(&pattern, &line) != pattern.invert {
476                        if pattern.highlight && !pattern.invert {
477                            output
478                                .push(highlight_matches_in_string(&pattern, line).into_value(span))
479                        } else {
480                            output.push(line.into_value(span))
481                        }
482                    }
483                }
484                Ok(Value::list(output, span).into_pipeline_data())
485            } else {
486                Ok(PipelineData::Empty)
487            }
488        }
489    }
490}
491
492// filter functions
493
494fn string_should_be_printed(pattern: &MatchPattern, value: &str) -> bool {
495    pattern.regex.is_match(value).unwrap_or(false)
496}
497
498fn value_should_be_printed(pattern: &MatchPattern, value: &Value, config: &Config) -> bool {
499    let lower_value = value.to_expanded_string("", config).to_lowercase();
500
501    match value {
502        Value::Bool { .. }
503        | Value::Int { .. }
504        | Value::Filesize { .. }
505        | Value::Duration { .. }
506        | Value::Date { .. }
507        | Value::Range { .. }
508        | Value::Float { .. }
509        | Value::Closure { .. }
510        | Value::Nothing { .. }
511        | Value::Error { .. } => {
512            if !pattern.lower_terms.is_empty() {
513                // look for exact match when searching with terms
514                pattern
515                    .lower_terms
516                    .iter()
517                    .any(|term: &String| term == &lower_value)
518            } else {
519                string_should_be_printed(pattern, &lower_value)
520            }
521        }
522        Value::Glob { .. }
523        | Value::List { .. }
524        | Value::CellPath { .. }
525        | Value::Record { .. }
526        | Value::Custom { .. } => string_should_be_printed(pattern, &lower_value),
527        Value::String { val, .. } => string_should_be_printed(pattern, val),
528        Value::Binary { .. } => false,
529    }
530}
531
532fn record_or_value_should_be_printed(
533    pattern: &MatchPattern,
534    value: &Value,
535    columns_to_search: &[String],
536    config: &Config,
537) -> bool {
538    let match_found = match value {
539        Value::Record { val: record, .. } => {
540            // Only perform column selection if given columns.
541            let col_select = !columns_to_search.is_empty();
542            record.iter().any(|(col, val)| {
543                if col_select && !columns_to_search.contains(col) {
544                    return false;
545                }
546                value_should_be_printed(pattern, val, config)
547            })
548        }
549        _ => value_should_be_printed(pattern, value, config),
550    };
551
552    match_found != pattern.invert
553}
554
555// utility
556
557fn split_string_if_multiline(input: PipelineData, head_span: Span) -> PipelineData {
558    let span = input.span().unwrap_or(head_span);
559    match input {
560        PipelineData::Value(Value::String { ref val, .. }, _) => {
561            if val.contains('\n') {
562                Value::list(
563                    val.lines()
564                        .map(|s| Value::string(s.to_string(), span))
565                        .collect(),
566                    span,
567                )
568                .into_pipeline_data_with_metadata(input.metadata())
569            } else {
570                input
571            }
572        }
573        _ => input,
574    }
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580
581    #[test]
582    fn test_examples() {
583        use crate::test_examples;
584
585        test_examples(Find)
586    }
587}