nu_command/filters/
find.rs

1use fancy_regex::{Regex, escape};
2use nu_ansi_term::Style;
3use nu_color_config::StyleComputer;
4use nu_engine::command_prelude::*;
5use nu_protocol::Config;
6
7#[derive(Clone)]
8pub struct Find;
9
10impl Command for Find {
11    fn name(&self) -> &str {
12        "find"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build(self.name())
17            .input_output_types(vec![
18                (
19                    // TODO: This is too permissive; if we could express this
20                    // using a type parameter it would be List<T> -> List<T>.
21                    Type::List(Box::new(Type::Any)),
22                    Type::List(Box::new(Type::Any)),
23                ),
24                (Type::String, Type::Any),
25            ])
26            .named(
27                "regex",
28                SyntaxShape::String,
29                "regex to match with",
30                Some('r'),
31            )
32            .switch(
33                "ignore-case",
34                "case-insensitive; when in regex mode, this is equivalent to (?i)",
35                Some('i'),
36            )
37            .switch(
38                "multiline",
39                "don't split multi-line strings into lists of lines. you should use this option when using the (?m) or (?s) flags in regex mode",
40                Some('m'),
41            )
42            .switch(
43                "dotall",
44                "dotall regex mode: allow a dot . to match newlines \\n; equivalent to (?s)",
45                Some('s'),
46            )
47            .named(
48                "columns",
49                SyntaxShape::List(Box::new(SyntaxShape::String)),
50                "column names to be searched",
51                Some('c'),
52            )
53            .switch(
54                "no-highlight",
55                "no-highlight mode: find without marking with ansi code",
56                Some('n'),
57            )
58            .switch("invert", "invert the match", Some('v'))
59            .rest("rest", SyntaxShape::Any, "Terms to search.")
60            .category(Category::Filters)
61    }
62
63    fn description(&self) -> &str {
64        "Searches terms in the input."
65    }
66
67    fn examples(&self) -> Vec<Example<'_>> {
68        vec![
69            Example {
70                description: "Search for multiple terms in a command output",
71                example: r#"ls | find toml md sh"#,
72                result: None,
73            },
74            Example {
75                description: "Search and highlight text for a term in a string.",
76                example: r#"'Cargo.toml' | find Cargo"#,
77                result: Some(Value::test_string(
78                    "\u{1b}[39m\u{1b}[0m\u{1b}[41;39mCargo\u{1b}[0m\u{1b}[39m.toml\u{1b}[0m"
79                        .to_owned(),
80                )),
81            },
82            Example {
83                description: "Search a number or a file size in a list of numbers",
84                example: r#"[1 5 3kb 4 35 3Mb] | find 5 3kb"#,
85                result: Some(Value::list(
86                    vec![Value::test_int(5), Value::test_filesize(3000)],
87                    Span::test_data(),
88                )),
89            },
90            Example {
91                description: "Search a char in a list of string",
92                example: r#"[moe larry curly] | find l"#,
93                result: Some(Value::list(
94                    vec![
95                        Value::test_string(
96                            "\u{1b}[39m\u{1b}[0m\u{1b}[41;39ml\u{1b}[0m\u{1b}[39marry\u{1b}[0m",
97                        ),
98                        Value::test_string(
99                            "\u{1b}[39mcur\u{1b}[0m\u{1b}[41;39ml\u{1b}[0m\u{1b}[39my\u{1b}[0m",
100                        ),
101                    ],
102                    Span::test_data(),
103                )),
104            },
105            Example {
106                description: "Search using regex",
107                example: r#"[abc odb arc abf] | find --regex "b.""#,
108                result: Some(Value::list(
109                    vec![
110                        Value::test_string(
111                            "\u{1b}[39ma\u{1b}[0m\u{1b}[41;39mbc\u{1b}[0m\u{1b}[39m\u{1b}[0m"
112                                .to_string(),
113                        ),
114                        Value::test_string(
115                            "\u{1b}[39ma\u{1b}[0m\u{1b}[41;39mbf\u{1b}[0m\u{1b}[39m\u{1b}[0m"
116                                .to_string(),
117                        ),
118                    ],
119                    Span::test_data(),
120                )),
121            },
122            Example {
123                description: "Case insensitive search",
124                example: r#"[aBc bde Arc abf] | find "ab" -i"#,
125                result: Some(Value::list(
126                    vec![
127                        Value::test_string(
128                            "\u{1b}[39m\u{1b}[0m\u{1b}[41;39maB\u{1b}[0m\u{1b}[39mc\u{1b}[0m"
129                                .to_string(),
130                        ),
131                        Value::test_string(
132                            "\u{1b}[39m\u{1b}[0m\u{1b}[41;39mab\u{1b}[0m\u{1b}[39mf\u{1b}[0m"
133                                .to_string(),
134                        ),
135                    ],
136                    Span::test_data(),
137                )),
138            },
139            Example {
140                description: "Find value in records using regex",
141                example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu""#,
142                result: Some(Value::test_list(vec![Value::test_record(record! {
143                        "version" => Value::test_string("0.1.0"),
144                        "name" => Value::test_string("\u{1b}[39m\u{1b}[0m\u{1b}[41;39mnu\u{1b}[0m\u{1b}[39mshell\u{1b}[0m".to_string()),
145                })])),
146            },
147            Example {
148                description: "Find inverted values in records using regex",
149                example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu" --invert"#,
150                result: Some(Value::test_list(vec![
151                    Value::test_record(record! {
152                            "version" => Value::test_string("0.1.1"),
153                            "name" => Value::test_string("fish".to_string()),
154                    }),
155                    Value::test_record(record! {
156                            "version" => Value::test_string("0.2.0"),
157                            "name" =>Value::test_string("zsh".to_string()),
158                    }),
159                ])),
160            },
161            Example {
162                description: "Find value in list using regex",
163                example: r#"[["Larry", "Moe"], ["Victor", "Marina"]] | find --regex "rr""#,
164                result: Some(Value::list(
165                    vec![Value::list(
166                        vec![
167                            Value::test_string(
168                                "\u{1b}[39mLa\u{1b}[0m\u{1b}[41;39mrr\u{1b}[0m\u{1b}[39my\u{1b}[0m",
169                            ),
170                            Value::test_string("Moe"),
171                        ],
172                        Span::test_data(),
173                    )],
174                    Span::test_data(),
175                )),
176            },
177            Example {
178                description: "Find inverted values in records using regex",
179                example: r#"[["Larry", "Moe"], ["Victor", "Marina"]] | find --regex "rr" --invert"#,
180                result: Some(Value::list(
181                    vec![Value::list(
182                        vec![Value::test_string("Victor"), Value::test_string("Marina")],
183                        Span::test_data(),
184                    )],
185                    Span::test_data(),
186                )),
187            },
188            Example {
189                description: "Remove ANSI sequences from result",
190                example: "[[foo bar]; [abc 123] [def 456]] | find --no-highlight 123",
191                result: Some(Value::list(
192                    vec![Value::test_record(record! {
193                        "foo" => Value::test_string("abc"),
194                        "bar" => Value::test_int(123)
195                    })],
196                    Span::test_data(),
197                )),
198            },
199            Example {
200                description: "Find and highlight text in specific columns",
201                example: "[[col1 col2 col3]; [moe larry curly] [larry curly moe]] | find moe --columns [col1]",
202                result: Some(Value::list(
203                    vec![Value::test_record(record! {
204                            "col1" => Value::test_string(
205                                "\u{1b}[39m\u{1b}[0m\u{1b}[41;39mmoe\u{1b}[0m\u{1b}[39m\u{1b}[0m"
206                                    .to_string(),
207                            ),
208                            "col2" => Value::test_string("larry".to_string()),
209                            "col3" => Value::test_string("curly".to_string()),
210                    })],
211                    Span::test_data(),
212                )),
213            },
214            Example {
215                description: "Find in a multi-line string",
216                example: r#""Violets are red\nAnd roses are blue\nWhen metamaterials\nAlter their hue" | find "ue""#,
217                result: Some(Value::list(
218                    vec![
219                        Value::test_string(
220                            "\u{1b}[39mAnd roses are bl\u{1b}[0m\u{1b}[41;39mue\u{1b}[0m\u{1b}[39m\u{1b}[0m",
221                        ),
222                        Value::test_string(
223                            "\u{1b}[39mAlter their h\u{1b}[0m\u{1b}[41;39mue\u{1b}[0m\u{1b}[39m\u{1b}[0m",
224                        ),
225                    ],
226                    Span::test_data(),
227                )),
228            },
229            Example {
230                description: "Find in a multi-line string without splitting the input into a list of lines",
231                example: r#""Violets are red\nAnd roses are blue\nWhen metamaterials\nAlter their hue" | find --multiline "ue""#,
232                result: Some(Value::test_string(
233                    "\u{1b}[39mViolets are red\nAnd roses are bl\u{1b}[0m\u{1b}[41;39mue\u{1b}[0m\u{1b}[39m\nWhen metamaterials\nAlter their h\u{1b}[0m\u{1b}[41;39mue\u{1b}[0m\u{1b}[39m\u{1b}[0m",
234                )),
235            },
236        ]
237    }
238
239    fn search_terms(&self) -> Vec<&str> {
240        vec!["filter", "regex", "search", "condition", "grep"]
241    }
242
243    fn run(
244        &self,
245        engine_state: &EngineState,
246        stack: &mut Stack,
247        call: &Call,
248        input: PipelineData,
249    ) -> Result<PipelineData, ShellError> {
250        let pattern = get_match_pattern_from_arguments(engine_state, stack, call)?;
251
252        let multiline = call.has_flag(engine_state, stack, "multiline")?;
253
254        let columns_to_search: Vec<_> = call
255            .get_flag(engine_state, stack, "columns")?
256            .unwrap_or_default();
257
258        let input = if multiline {
259            if let PipelineData::ByteStream(..) = input {
260                // ByteStream inputs are processed by iterating over the lines, which necessarily
261                // breaks the multi-line text being streamed into a list of lines.
262                return Err(ShellError::IncompatibleParametersSingle {
263                    msg: "Flag `--multiline` currently doesn't work for byte stream inputs. Consider using `collect`".into(),
264                    span: call.get_flag_span(stack, "multiline").expect("has flag"),
265                });
266            };
267            input
268        } else {
269            split_string_if_multiline(input, call.head)
270        };
271
272        find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input)
273    }
274}
275
276#[derive(Clone)]
277struct MatchPattern {
278    /// the regex to be used for matching in text
279    regex: Regex,
280
281    /// the list of match terms (converted to lowercase if needed), or empty if a regex was provided
282    search_terms: Vec<String>,
283
284    /// case-insensitive match
285    ignore_case: bool,
286
287    /// return a modified version of the value where matching parts are highlighted
288    highlight: bool,
289
290    /// return the values that aren't a match instead
291    invert: bool,
292
293    /// style of the non-highlighted string sections
294    string_style: Style,
295
296    /// style of the highlighted string sections
297    highlight_style: Style,
298}
299
300fn get_match_pattern_from_arguments(
301    engine_state: &EngineState,
302    stack: &mut Stack,
303    call: &Call,
304) -> Result<MatchPattern, ShellError> {
305    let config = stack.get_config(engine_state);
306
307    let span = call.head;
308    let regex = call.get_flag::<String>(engine_state, stack, "regex")?;
309    let terms = call.rest::<Value>(engine_state, stack, 0)?;
310
311    let invert = call.has_flag(engine_state, stack, "invert")?;
312    let highlight = !call.has_flag(engine_state, stack, "no-highlight")?;
313
314    let ignore_case = call.has_flag(engine_state, stack, "ignore-case")?;
315
316    let dotall = call.has_flag(engine_state, stack, "dotall")?;
317
318    let style_computer = StyleComputer::from_config(engine_state, stack);
319    // Currently, search results all use the same style.
320    // Also note that this sample string is passed into user-written code (the closure that may or may not be
321    // defined for "string").
322    let string_style = style_computer.compute("string", &Value::string("search result", span));
323    let highlight_style =
324        style_computer.compute("search_result", &Value::string("search result", span));
325
326    let (regex_str, search_terms) = if let Some(regex) = regex {
327        if !terms.is_empty() {
328            return Err(ShellError::IncompatibleParametersSingle {
329                msg: "Cannot use a `--regex` parameter with additional search terms".into(),
330                span: call.get_flag_span(stack, "regex").expect("has flag"),
331            });
332        }
333
334        let flags = match (ignore_case, dotall) {
335            (false, false) => "",
336            (true, false) => "(?i)", // case insensitive
337            (false, true) => "(?s)", // allow . to match \n
338            (true, true) => "(?is)", // case insensitive and allow . to match \n
339        };
340
341        (flags.to_string() + regex.as_str(), Vec::new())
342    } else {
343        if dotall {
344            return Err(ShellError::IncompatibleParametersSingle {
345                msg: "Flag --dotall only works for regex search".into(),
346                span: call.get_flag_span(stack, "dotall").expect("has flag"),
347            });
348        }
349
350        let mut regex = String::new();
351
352        if ignore_case {
353            regex += "(?i)";
354        }
355
356        let search_terms = terms
357            .iter()
358            .map(|v| {
359                if ignore_case {
360                    v.to_expanded_string("", &config).to_lowercase()
361                } else {
362                    v.to_expanded_string("", &config)
363                }
364            })
365            .collect::<Vec<String>>();
366
367        let escaped_terms = search_terms
368            .iter()
369            .map(|v| escape(v).into())
370            .collect::<Vec<String>>();
371
372        if let Some(term) = escaped_terms.first() {
373            regex += term;
374        }
375
376        for term in escaped_terms.iter().skip(1) {
377            regex += "|";
378            regex += term;
379        }
380
381        (regex, search_terms)
382    };
383
384    let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch {
385        err_message: format!("invalid regex: {e}"),
386        span,
387    })?;
388
389    Ok(MatchPattern {
390        regex,
391        search_terms,
392        ignore_case,
393        invert,
394        highlight,
395        string_style,
396        highlight_style,
397    })
398}
399
400// map functions
401
402fn highlight_matches_in_string(pattern: &MatchPattern, val: String) -> String {
403    if !pattern.regex.is_match(&val).unwrap_or(false) {
404        return val;
405    }
406
407    let stripped_val = nu_utils::strip_ansi_string_unlikely(val);
408    let mut last_match_end = 0;
409    let mut highlighted = String::new();
410
411    for cap in pattern.regex.captures_iter(stripped_val.as_ref()) {
412        match cap {
413            Ok(capture) => {
414                let start = match capture.get(0) {
415                    Some(acap) => acap.start(),
416                    None => 0,
417                };
418                let end = match capture.get(0) {
419                    Some(acap) => acap.end(),
420                    None => 0,
421                };
422                highlighted.push_str(
423                    &pattern
424                        .string_style
425                        .paint(&stripped_val[last_match_end..start])
426                        .to_string(),
427                );
428                highlighted.push_str(
429                    &pattern
430                        .highlight_style
431                        .paint(&stripped_val[start..end])
432                        .to_string(),
433                );
434                last_match_end = end;
435            }
436            Err(_e) => {
437                // in case of error, return the string with no highlight
438                return pattern.string_style.paint(&stripped_val).to_string();
439            }
440        }
441    }
442
443    highlighted.push_str(
444        &pattern
445            .string_style
446            .paint(&stripped_val[last_match_end..])
447            .to_string(),
448    );
449    highlighted
450}
451
452fn highlight_matches_in_value(
453    pattern: &MatchPattern,
454    value: Value,
455    columns_to_search: &[String],
456) -> Value {
457    if !pattern.highlight || pattern.invert {
458        return value;
459    }
460    let span = value.span();
461
462    match value {
463        Value::Record { val: record, .. } => {
464            let col_select = !columns_to_search.is_empty();
465
466            // TODO: change API to mutate in place
467            let mut record = record.into_owned();
468
469            for (col, val) in record.iter_mut() {
470                if col_select && !columns_to_search.contains(col) {
471                    continue;
472                }
473
474                *val = highlight_matches_in_value(pattern, std::mem::take(val), &[]);
475            }
476
477            Value::record(record, span)
478        }
479        Value::List { vals, .. } => vals
480            .into_iter()
481            .map(|item| highlight_matches_in_value(pattern, item, &[]))
482            .collect::<Vec<Value>>()
483            .into_value(span),
484        Value::String { val, .. } => highlight_matches_in_string(pattern, val).into_value(span),
485        _ => value,
486    }
487}
488
489fn find_in_pipelinedata(
490    pattern: MatchPattern,
491    columns_to_search: Vec<String>,
492    engine_state: &EngineState,
493    stack: &mut Stack,
494    input: PipelineData,
495) -> Result<PipelineData, ShellError> {
496    let config = stack.get_config(engine_state);
497
498    let map_pattern = pattern.clone();
499    let map_columns_to_search = columns_to_search.clone();
500
501    match input {
502        PipelineData::Empty => Ok(PipelineData::empty()),
503        PipelineData::Value(_, _) => input
504            .filter(
505                move |value| {
506                    value_should_be_printed(&pattern, value, &columns_to_search, &config)
507                        != pattern.invert
508                },
509                engine_state.signals(),
510            )?
511            .map(
512                move |x| highlight_matches_in_value(&map_pattern, x, &map_columns_to_search),
513                engine_state.signals(),
514            ),
515        PipelineData::ListStream(stream, metadata) => {
516            let stream = stream.modify(|iter| {
517                iter.filter(move |value| {
518                    value_should_be_printed(&pattern, value, &columns_to_search, &config)
519                        != pattern.invert
520                })
521                .map(move |x| highlight_matches_in_value(&map_pattern, x, &map_columns_to_search))
522            });
523
524            Ok(PipelineData::list_stream(stream, metadata))
525        }
526        PipelineData::ByteStream(stream, ..) => {
527            let span = stream.span();
528            if let Some(lines) = stream.lines() {
529                let mut output: Vec<Value> = vec![];
530                for line in lines {
531                    let line = line?;
532                    if string_should_be_printed(&pattern, &line) != pattern.invert {
533                        if pattern.highlight && !pattern.invert {
534                            output
535                                .push(highlight_matches_in_string(&pattern, line).into_value(span))
536                        } else {
537                            output.push(line.into_value(span))
538                        }
539                    }
540                }
541                Ok(Value::list(output, span).into_pipeline_data())
542            } else {
543                Ok(PipelineData::empty())
544            }
545        }
546    }
547}
548
549// filter functions
550
551fn string_should_be_printed(pattern: &MatchPattern, value: &str) -> bool {
552    pattern.regex.is_match(value).unwrap_or(false)
553}
554
555fn value_should_be_printed(
556    pattern: &MatchPattern,
557    value: &Value,
558    columns_to_search: &[String],
559    config: &Config,
560) -> bool {
561    let value_as_string = if pattern.ignore_case {
562        value.to_expanded_string("", config).to_lowercase()
563    } else {
564        value.to_expanded_string("", config)
565    };
566
567    match value {
568        Value::Bool { .. }
569        | Value::Int { .. }
570        | Value::Filesize { .. }
571        | Value::Duration { .. }
572        | Value::Date { .. }
573        | Value::Range { .. }
574        | Value::Float { .. }
575        | Value::Closure { .. }
576        | Value::Nothing { .. } => {
577            if !pattern.search_terms.is_empty() {
578                // look for exact match when searching with terms
579                pattern
580                    .search_terms
581                    .iter()
582                    .any(|term: &String| term == &value_as_string)
583            } else {
584                string_should_be_printed(pattern, &value_as_string)
585            }
586        }
587        Value::Glob { .. } | Value::CellPath { .. } | Value::Custom { .. } => {
588            string_should_be_printed(pattern, &value_as_string)
589        }
590        Value::String { val, .. } => string_should_be_printed(pattern, val),
591        Value::List { vals, .. } => vals
592            .iter()
593            .any(|item| value_should_be_printed(pattern, item, &[], config)),
594        Value::Record { val: record, .. } => {
595            let col_select = !columns_to_search.is_empty();
596            record.iter().any(|(col, val)| {
597                if col_select && !columns_to_search.contains(col) {
598                    return false;
599                }
600                value_should_be_printed(pattern, val, &[], config)
601            })
602        }
603        Value::Binary { .. } => false,
604        Value::Error { .. } => true,
605    }
606}
607
608// utility
609
610fn split_string_if_multiline(input: PipelineData, head_span: Span) -> PipelineData {
611    let span = input.span().unwrap_or(head_span);
612    match input {
613        PipelineData::Value(Value::String { ref val, .. }, _) => {
614            if val.contains('\n') {
615                Value::list(
616                    val.lines()
617                        .map(|s| Value::string(s.to_string(), span))
618                        .collect(),
619                    span,
620                )
621                .into_pipeline_data_with_metadata(input.metadata())
622            } else {
623                input
624            }
625        }
626        _ => input,
627    }
628}
629
630/// function for using find from other commands
631pub fn find_internal(
632    input: PipelineData,
633    engine_state: &EngineState,
634    stack: &mut Stack,
635    search_term: &str,
636    columns_to_search: &[&str],
637    highlight: bool,
638) -> Result<PipelineData, ShellError> {
639    let span = input.span().unwrap_or(Span::unknown());
640
641    let style_computer = StyleComputer::from_config(engine_state, stack);
642    let string_style = style_computer.compute("string", &Value::string("search result", span));
643    let highlight_style =
644        style_computer.compute("search_result", &Value::string("search result", span));
645
646    let regex_str = format!("(?i){}", escape(search_term));
647
648    let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch {
649        err_message: format!("invalid regex: {e}"),
650        span: Span::unknown(),
651    })?;
652
653    let pattern = MatchPattern {
654        regex,
655        search_terms: vec![search_term.to_lowercase()],
656        ignore_case: true,
657        highlight,
658        invert: false,
659        string_style,
660        highlight_style,
661    };
662
663    let columns_to_search = columns_to_search
664        .iter()
665        .map(|str| String::from(*str))
666        .collect();
667
668    find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input)
669}
670
671#[cfg(test)]
672mod tests {
673    use super::*;
674
675    #[test]
676    fn test_examples() {
677        use crate::test_examples;
678
679        test_examples(Find)
680    }
681}