nu_command/filters/
find.rs

1use fancy_regex::{Regex, escape};
2use nu_ansi_term::Style;
3use nu_color_config::StyleComputer;
4use nu_engine::command_prelude::*;
5use nu_protocol::Config;
6
7#[derive(Clone)]
8pub struct Find;
9
10impl Command for Find {
11    fn name(&self) -> &str {
12        "find"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build(self.name())
17            .input_output_types(vec![
18                (
19                    // TODO: This is too permissive; if we could express this
20                    // using a type parameter it would be List<T> -> List<T>.
21                    Type::List(Box::new(Type::Any)),
22                    Type::List(Box::new(Type::Any)),
23                ),
24                (Type::String, Type::Any),
25            ])
26            .named(
27                "regex",
28                SyntaxShape::String,
29                "regex to match with",
30                Some('r'),
31            )
32            .switch(
33                "ignore-case",
34                "case-insensitive regex mode; equivalent to (?i)",
35                Some('i'),
36            )
37            .switch(
38                "multiline",
39                "multi-line regex mode: ^ and $ match begin/end of line; equivalent to (?m)",
40                Some('m'),
41            )
42            .switch(
43                "dotall",
44                "dotall regex mode: allow a dot . to match newlines \\n; equivalent to (?s)",
45                Some('s'),
46            )
47            .named(
48                "columns",
49                SyntaxShape::List(Box::new(SyntaxShape::String)),
50                "column names to be searched",
51                Some('c'),
52            )
53            .switch(
54                "no-highlight",
55                "no-highlight mode: find without marking with ansi code",
56                Some('n'),
57            )
58            .switch("invert", "invert the match", Some('v'))
59            .rest("rest", SyntaxShape::Any, "Terms to search.")
60            .category(Category::Filters)
61    }
62
63    fn description(&self) -> &str {
64        "Searches terms in the input."
65    }
66
67    fn examples(&self) -> Vec<Example> {
68        vec![
69            Example {
70                description: "Search for multiple terms in a command output",
71                example: r#"ls | find toml md sh"#,
72                result: None,
73            },
74            Example {
75                description: "Search and highlight text for a term in a string. Note that regular search is case insensitive",
76                example: r#"'Cargo.toml' | find cargo"#,
77                result: Some(Value::test_string(
78                    "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mCargo\u{1b}[0m\u{1b}[37m.toml\u{1b}[0m"
79                        .to_owned(),
80                )),
81            },
82            Example {
83                description: "Search a number or a file size in a list of numbers",
84                example: r#"[1 5 3kb 4 3Mb] | find 5 3kb"#,
85                result: Some(Value::list(
86                    vec![Value::test_int(5), Value::test_filesize(3000)],
87                    Span::test_data(),
88                )),
89            },
90            Example {
91                description: "Search a char in a list of string",
92                example: r#"[moe larry curly] | find l"#,
93                result: Some(Value::list(
94                    vec![
95                        Value::test_string(
96                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37ml\u{1b}[0m\u{1b}[37marry\u{1b}[0m",
97                        ),
98                        Value::test_string(
99                            "\u{1b}[37mcur\u{1b}[0m\u{1b}[41;37ml\u{1b}[0m\u{1b}[37my\u{1b}[0m",
100                        ),
101                    ],
102                    Span::test_data(),
103                )),
104            },
105            Example {
106                description: "Find using regex",
107                example: r#"[abc bde arc abf] | find --regex "ab""#,
108                result: Some(Value::list(
109                    vec![
110                        Value::test_string(
111                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mc\u{1b}[0m"
112                                .to_string(),
113                        ),
114                        Value::test_string(
115                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m"
116                                .to_string(),
117                        ),
118                    ],
119                    Span::test_data(),
120                )),
121            },
122            Example {
123                description: "Find using regex case insensitive",
124                example: r#"[aBc bde Arc abf] | find --regex "ab" -i"#,
125                result: Some(Value::list(
126                    vec![
127                        Value::test_string(
128                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37maB\u{1b}[0m\u{1b}[37mc\u{1b}[0m"
129                                .to_string(),
130                        ),
131                        Value::test_string(
132                            "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m"
133                                .to_string(),
134                        ),
135                    ],
136                    Span::test_data(),
137                )),
138            },
139            Example {
140                description: "Find value in records using regex",
141                example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu""#,
142                result: Some(Value::test_list(vec![Value::test_record(record! {
143                        "version" => Value::test_string("0.1.0"),
144                        "name" => Value::test_string("\u{1b}[37m\u{1b}[0m\u{1b}[41;37mnu\u{1b}[0m\u{1b}[37mshell\u{1b}[0m".to_string()),
145                })])),
146            },
147            Example {
148                description: "Find inverted values in records using regex",
149                example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu" --invert"#,
150                result: Some(Value::test_list(vec![
151                    Value::test_record(record! {
152                            "version" => Value::test_string("0.1.1"),
153                            "name" => Value::test_string("fish".to_string()),
154                    }),
155                    Value::test_record(record! {
156                            "version" => Value::test_string("0.2.0"),
157                            "name" =>Value::test_string("zsh".to_string()),
158                    }),
159                ])),
160            },
161            Example {
162                description: "Find value in list using regex",
163                example: r#"[["Larry", "Moe"], ["Victor", "Marina"]] | find --regex "rr""#,
164                result: Some(Value::list(
165                    vec![Value::list(
166                        vec![
167                            Value::test_string(
168                                "\u{1b}[37mLa\u{1b}[0m\u{1b}[41;37mrr\u{1b}[0m\u{1b}[37my\u{1b}[0m",
169                            ),
170                            Value::test_string("Moe"),
171                        ],
172                        Span::test_data(),
173                    )],
174                    Span::test_data(),
175                )),
176            },
177            Example {
178                description: "Find inverted values in records using regex",
179                example: r#"[["Larry", "Moe"], ["Victor", "Marina"]] | find --regex "rr" --invert"#,
180                result: Some(Value::list(
181                    vec![Value::list(
182                        vec![Value::test_string("Victor"), Value::test_string("Marina")],
183                        Span::test_data(),
184                    )],
185                    Span::test_data(),
186                )),
187            },
188            Example {
189                description: "Remove ANSI sequences from result",
190                example: "[[foo bar]; [abc 123] [def 456]] | find --no-highlight 123",
191                result: Some(Value::list(
192                    vec![Value::test_record(record! {
193                        "foo" => Value::test_string("abc"),
194                        "bar" => Value::test_int(123)
195                    })],
196                    Span::test_data(),
197                )),
198            },
199            Example {
200                description: "Find and highlight text in specific columns",
201                example: "[[col1 col2 col3]; [moe larry curly] [larry curly moe]] | find moe --columns [col1]",
202                result: Some(Value::list(
203                    vec![Value::test_record(record! {
204                            "col1" => Value::test_string(
205                                "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mmoe\u{1b}[0m\u{1b}[37m\u{1b}[0m"
206                                    .to_string(),
207                            ),
208                            "col2" => Value::test_string("larry".to_string()),
209                            "col3" => Value::test_string("curly".to_string()),
210                    })],
211                    Span::test_data(),
212                )),
213            },
214        ]
215    }
216
217    fn search_terms(&self) -> Vec<&str> {
218        vec!["filter", "regex", "search", "condition"]
219    }
220
221    fn run(
222        &self,
223        engine_state: &EngineState,
224        stack: &mut Stack,
225        call: &Call,
226        input: PipelineData,
227    ) -> Result<PipelineData, ShellError> {
228        let pattern = get_match_pattern_from_arguments(engine_state, stack, call)?;
229
230        let columns_to_search: Vec<_> = call
231            .get_flag(engine_state, stack, "columns")?
232            .unwrap_or_default();
233
234        let input = split_string_if_multiline(input, call.head);
235
236        find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input)
237    }
238}
239
240#[derive(Clone)]
241struct MatchPattern {
242    /// the regex to be used for matching in text
243    regex: Regex,
244
245    /// the list of match terms converted to lowercase strings, or empty if a regex was provided
246    lower_terms: Vec<String>,
247
248    /// return a modified version of the value where matching parts are highlighted
249    highlight: bool,
250
251    /// return the values that aren't a match instead
252    invert: bool,
253
254    /// style of the non-highlighted string sections
255    string_style: Style,
256
257    /// style of the highlighted string sections
258    highlight_style: Style,
259}
260
261fn get_match_pattern_from_arguments(
262    engine_state: &EngineState,
263    stack: &mut Stack,
264    call: &Call,
265) -> Result<MatchPattern, ShellError> {
266    let config = stack.get_config(engine_state);
267
268    let span = call.head;
269    let regex = call.get_flag::<String>(engine_state, stack, "regex")?;
270    let terms = call.rest::<Value>(engine_state, stack, 0)?;
271
272    let invert = call.has_flag(engine_state, stack, "invert")?;
273    let highlight = !call.has_flag(engine_state, stack, "no-highlight")?;
274
275    let style_computer = StyleComputer::from_config(engine_state, stack);
276    // Currently, search results all use the same style.
277    // Also note that this sample string is passed into user-written code (the closure that may or may not be
278    // defined for "string").
279    let string_style = style_computer.compute("string", &Value::string("search result", span));
280    let highlight_style =
281        style_computer.compute("search_result", &Value::string("search result", span));
282
283    let (regex_str, lower_terms) = if let Some(regex) = regex {
284        if !terms.is_empty() {
285            return Err(ShellError::IncompatibleParametersSingle {
286                msg: "Cannot use a `--regex` parameter with additional search terms".into(),
287                span: call.get_flag_span(stack, "regex").expect("has flag"),
288            });
289        }
290
291        let insensitive = call.has_flag(engine_state, stack, "ignore-case")?;
292        let multiline = call.has_flag(engine_state, stack, "multiline")?;
293        let dotall = call.has_flag(engine_state, stack, "dotall")?;
294
295        let flags = match (insensitive, multiline, dotall) {
296            (false, false, false) => "",
297            (true, false, false) => "(?i)", // case insensitive
298            (false, true, false) => "(?m)", // multi-line mode
299            (false, false, true) => "(?s)", // allow . to match \n
300            (true, true, false) => "(?im)", // case insensitive and multi-line mode
301            (true, false, true) => "(?is)", // case insensitive and allow . to match \n
302            (false, true, true) => "(?ms)", // multi-line mode and allow . to match \n
303            (true, true, true) => "(?ims)", // case insensitive, multi-line mode and allow . to match \n
304        };
305
306        (flags.to_string() + regex.as_str(), Vec::new())
307    } else {
308        let mut regex = String::new();
309
310        regex += "(?i)";
311
312        let lower_terms = terms
313            .iter()
314            .map(|v| escape(&v.to_expanded_string("", &config).to_lowercase()).into())
315            .collect::<Vec<String>>();
316
317        if let Some(term) = lower_terms.first() {
318            regex += term;
319        }
320
321        for term in lower_terms.iter().skip(1) {
322            regex += "|";
323            regex += term;
324        }
325
326        let lower_terms = terms
327            .iter()
328            .map(|v| v.to_expanded_string("", &config).to_lowercase())
329            .collect::<Vec<String>>();
330
331        (regex, lower_terms)
332    };
333
334    let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch {
335        err_message: format!("invalid regex: {e}"),
336        span,
337    })?;
338
339    Ok(MatchPattern {
340        regex,
341        lower_terms,
342        invert,
343        highlight,
344        string_style,
345        highlight_style,
346    })
347}
348
349// map functions
350
351fn highlight_matches_in_string(pattern: &MatchPattern, val: String) -> String {
352    if !pattern.regex.is_match(&val).unwrap_or(false) {
353        return val;
354    }
355
356    let stripped_val = nu_utils::strip_ansi_string_unlikely(val);
357    let mut last_match_end = 0;
358    let mut highlighted = String::new();
359
360    for cap in pattern.regex.captures_iter(stripped_val.as_ref()) {
361        match cap {
362            Ok(capture) => {
363                let start = match capture.get(0) {
364                    Some(acap) => acap.start(),
365                    None => 0,
366                };
367                let end = match capture.get(0) {
368                    Some(acap) => acap.end(),
369                    None => 0,
370                };
371                highlighted.push_str(
372                    &pattern
373                        .string_style
374                        .paint(&stripped_val[last_match_end..start])
375                        .to_string(),
376                );
377                highlighted.push_str(
378                    &pattern
379                        .highlight_style
380                        .paint(&stripped_val[start..end])
381                        .to_string(),
382                );
383                last_match_end = end;
384            }
385            Err(_e) => {
386                // in case of error, return the string with no highlight
387                return pattern.string_style.paint(&stripped_val).to_string();
388            }
389        }
390    }
391
392    highlighted.push_str(
393        &pattern
394            .string_style
395            .paint(&stripped_val[last_match_end..])
396            .to_string(),
397    );
398    highlighted
399}
400
401fn highlight_matches_in_value(
402    pattern: &MatchPattern,
403    value: Value,
404    columns_to_search: &[String],
405) -> Value {
406    if !pattern.highlight || pattern.invert {
407        return value;
408    }
409    let span = value.span();
410
411    match value {
412        Value::Record { val: record, .. } => {
413            let col_select = !columns_to_search.is_empty();
414
415            // TODO: change API to mutate in place
416            let mut record = record.into_owned();
417
418            for (col, val) in record.iter_mut() {
419                if col_select && !columns_to_search.contains(col) {
420                    continue;
421                }
422
423                *val = highlight_matches_in_value(pattern, std::mem::take(val), &[]);
424            }
425
426            Value::record(record, span)
427        }
428        Value::List { vals, .. } => vals
429            .into_iter()
430            .map(|item| highlight_matches_in_value(pattern, item, &[]))
431            .collect::<Vec<Value>>()
432            .into_value(span),
433        Value::String { val, .. } => highlight_matches_in_string(pattern, val).into_value(span),
434        _ => value,
435    }
436}
437
438fn find_in_pipelinedata(
439    pattern: MatchPattern,
440    columns_to_search: Vec<String>,
441    engine_state: &EngineState,
442    stack: &mut Stack,
443    input: PipelineData,
444) -> Result<PipelineData, ShellError> {
445    let config = stack.get_config(engine_state);
446
447    let map_pattern = pattern.clone();
448    let map_columns_to_search = columns_to_search.clone();
449
450    match input {
451        PipelineData::Empty => Ok(PipelineData::Empty),
452        PipelineData::Value(_, _) => input
453            .filter(
454                move |value| {
455                    value_should_be_printed(&pattern, value, &columns_to_search, &config)
456                        != pattern.invert
457                },
458                engine_state.signals(),
459            )?
460            .map(
461                move |x| highlight_matches_in_value(&map_pattern, x, &map_columns_to_search),
462                engine_state.signals(),
463            ),
464        PipelineData::ListStream(stream, metadata) => {
465            let stream = stream.modify(|iter| {
466                iter.filter(move |value| {
467                    value_should_be_printed(&pattern, value, &columns_to_search, &config)
468                        != pattern.invert
469                })
470                .map(move |x| highlight_matches_in_value(&map_pattern, x, &map_columns_to_search))
471            });
472
473            Ok(PipelineData::ListStream(stream, metadata))
474        }
475        PipelineData::ByteStream(stream, ..) => {
476            let span = stream.span();
477            if let Some(lines) = stream.lines() {
478                let mut output: Vec<Value> = vec![];
479                for line in lines {
480                    let line = line?;
481                    if string_should_be_printed(&pattern, &line) != pattern.invert {
482                        if pattern.highlight && !pattern.invert {
483                            output
484                                .push(highlight_matches_in_string(&pattern, line).into_value(span))
485                        } else {
486                            output.push(line.into_value(span))
487                        }
488                    }
489                }
490                Ok(Value::list(output, span).into_pipeline_data())
491            } else {
492                Ok(PipelineData::Empty)
493            }
494        }
495    }
496}
497
498// filter functions
499
500fn string_should_be_printed(pattern: &MatchPattern, value: &str) -> bool {
501    pattern.regex.is_match(value).unwrap_or(false)
502}
503
504fn value_should_be_printed(
505    pattern: &MatchPattern,
506    value: &Value,
507    columns_to_search: &[String],
508    config: &Config,
509) -> bool {
510    let lower_value = value.to_expanded_string("", config).to_lowercase();
511
512    match value {
513        Value::Bool { .. }
514        | Value::Int { .. }
515        | Value::Filesize { .. }
516        | Value::Duration { .. }
517        | Value::Date { .. }
518        | Value::Range { .. }
519        | Value::Float { .. }
520        | Value::Closure { .. }
521        | Value::Nothing { .. } => {
522            if !pattern.lower_terms.is_empty() {
523                // look for exact match when searching with terms
524                pattern
525                    .lower_terms
526                    .iter()
527                    .any(|term: &String| term == &lower_value)
528            } else {
529                string_should_be_printed(pattern, &lower_value)
530            }
531        }
532        Value::Glob { .. } | Value::CellPath { .. } | Value::Custom { .. } => {
533            string_should_be_printed(pattern, &lower_value)
534        }
535        Value::String { val, .. } => string_should_be_printed(pattern, val),
536        Value::List { vals, .. } => vals
537            .iter()
538            .any(|item| value_should_be_printed(pattern, item, &[], config)),
539        Value::Record { val: record, .. } => {
540            let col_select = !columns_to_search.is_empty();
541            record.iter().any(|(col, val)| {
542                if col_select && !columns_to_search.contains(col) {
543                    return false;
544                }
545                value_should_be_printed(pattern, val, &[], config)
546            })
547        }
548        Value::Binary { .. } => false,
549        Value::Error { .. } => true,
550    }
551}
552
553// utility
554
555fn split_string_if_multiline(input: PipelineData, head_span: Span) -> PipelineData {
556    let span = input.span().unwrap_or(head_span);
557    match input {
558        PipelineData::Value(Value::String { ref val, .. }, _) => {
559            if val.contains('\n') {
560                Value::list(
561                    val.lines()
562                        .map(|s| Value::string(s.to_string(), span))
563                        .collect(),
564                    span,
565                )
566                .into_pipeline_data_with_metadata(input.metadata())
567            } else {
568                input
569            }
570        }
571        _ => input,
572    }
573}
574
575/// function for using find from other commands
576pub fn find_internal(
577    input: PipelineData,
578    engine_state: &EngineState,
579    stack: &mut Stack,
580    search_term: &str,
581    columns_to_search: &[&str],
582    highlight: bool,
583) -> Result<PipelineData, ShellError> {
584    let span = input.span().unwrap_or(Span::unknown());
585
586    let style_computer = StyleComputer::from_config(engine_state, stack);
587    let string_style = style_computer.compute("string", &Value::string("search result", span));
588    let highlight_style =
589        style_computer.compute("search_result", &Value::string("search result", span));
590
591    let regex_str = format!("(?i){}", escape(search_term));
592
593    let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch {
594        err_message: format!("invalid regex: {e}"),
595        span: Span::unknown(),
596    })?;
597
598    let pattern = MatchPattern {
599        regex,
600        lower_terms: vec![search_term.to_lowercase()],
601        highlight,
602        invert: false,
603        string_style,
604        highlight_style,
605    };
606
607    let columns_to_search = columns_to_search
608        .iter()
609        .map(|str| String::from(*str))
610        .collect();
611
612    find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input)
613}
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618
619    #[test]
620    fn test_examples() {
621        use crate::test_examples;
622
623        test_examples(Find)
624    }
625}