Skip to main content

grit_lib/
userdiff.rs

1//! User-defined and built-in diff function-name matching.
2//!
3//! This module implements the subset of Git's `userdiff` behavior needed for
4//! hunk-header function context extraction.
5
6use crate::attributes::{collect_attrs_for_path, AttrValue, MacroTable};
7use crate::config::ConfigSet;
8use crate::crlf::{get_file_attrs, AttrRule, DiffAttr};
9use regex::{Regex, RegexBuilder};
10use std::collections::BTreeMap;
11use std::io::Write;
12use std::process::{Command, Stdio};
13use std::sync::OnceLock;
14
15/// Built-in diff driver funcname patterns (same strings as Git's userdiff builtin drivers).
16const BUILTIN_PATTERN_DEFS: &[(&str, &str, bool)] = &[
17    (
18        "ada",
19        r"!^(.*[ 	])?(is[ 	]+new|renames|is[ 	]+separate)([ 	].*)?$
20!^[ 	]*with[ 	].*$
21^[ 	]*((procedure|function)[ 	]+.*)$
22^[ 	]*((package|protected|task)[ 	]+.*)$",
23        true,
24    ),
25    (
26        "bash",
27        r"^[ 	]*((([a-zA-Z_][a-zA-Z0-9_]*[ 	]*\([ 	]*\))|(function[ 	]+[a-zA-Z_][a-zA-Z0-9_]*(([ 	]*\([ 	]*\))|([ 	]+)))).*$)",
28        false,
29    ),
30    (
31        "bibtex",
32        r#"(@[a-zA-Z]{1,}[ 	]*\{{0,1}[ 	]*[^ 	"@',\#}{~%]*).*$"#,
33        false,
34    ),
35    (
36        "cpp",
37        r"!^[ 	]*[A-Za-z_][A-Za-z_0-9]*:[[:space:]]*($|/[/*])
38^((::[[:space:]]*)?[A-Za-z_].*)$",
39        false,
40    ),
41    (
42        "csharp",
43        r"!(^|[ 	]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ 	(]+|$)
44^[ 	]*(([][[:alnum:]@_.](<[][[:alnum:]@_, 	<>]+>)?)+([ 	]+([][[:alnum:]@_.](<[][[:alnum:]@_, 	<>]+>)?)+)+[ 	]*\([^;]*)$
45^[ 	]*(([][[:alnum:]@_.](<[][[:alnum:]@_, 	<>]+>)?)+([ 	]+([][[:alnum:]@_.](<[][[:alnum:]@_, 	<>]+>)?)+)+[^;=:,()]*)$
46^[ 	]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ 	]+)*(class|enum|interface|struct|record)[ 	]+.*)$
47^[ 	]*(namespace[ 	]+.*)$",
48        false,
49    ),
50    (
51        "css",
52        r"![:;][[:space:]]*$
53^[:[@.#]?[_a-z0-9].*$",
54        true,
55    ),
56    (
57        "dts",
58        r"!;
59!=
60^[ 	]*((/[ 	]*\{|&?[a-zA-Z_]).*)",
61        false,
62    ),
63    (
64        "elixir",
65        r"^[ 	]*((def(macro|module|impl|protocol|p)?|test)[ 	].*)$",
66        false,
67    ),
68    (
69        "fortran",
70        r#"!^([C*]|[ 	]*!)
71!^[ 	]*MODULE[ 	]+PROCEDURE[ 	]
72^[ 	]*((END[ 	]+)?(PROGRAM|MODULE|BLOCK[ 	]+DATA|([^!'" 	]+[ 	]+)*(SUBROUTINE|FUNCTION))[ 	]+[A-Z].*)$"#,
73        true,
74    ),
75    (
76        "fountain",
77        r"^((\.[^.]|(int|ext|est|int\.?/ext|i/e)[. ]).*)$",
78        true,
79    ),
80    (
81        "golang",
82        r"^[ 	]*(func[ 	]*.*(\{[ 	]*)?)
83^[ 	]*(type[ 	].*(struct|interface)[ 	]*(\{[ 	]*)?)",
84        false,
85    ),
86    ("html", r"^[ 	]*(<[Hh][1-6]([ 	].*)?>.*)$", false),
87    ("ini", r"^[ 	]*\[[^]]+\]", false),
88    (
89        "java",
90        r"!^[ 	]*(catch|do|for|if|instanceof|new|return|switch|throw|while)
91^[ 	]*(([a-z-]+[ 	]+)*(class|enum|interface|record)[ 	]+.*)$
92^[ 	]*(([A-Za-z_<>&][][?&<>.,A-Za-z_0-9]*[ 	]+)+[A-Za-z_][A-Za-z_0-9]*[ 	]*\([^;]*)$",
93        false,
94    ),
95    (
96        "kotlin",
97        r"^[ 	]*(([a-z]+[ 	]+)*(fun|class|interface)[ 	]+.*)$",
98        false,
99    ),
100    ("markdown", r"^ {0,3}#{1,6}[ 	].*", false),
101    (
102        "matlab",
103        r"^[[:space:]]*((classdef|function)[[:space:]].*)$|^(%%%?|##)[[:space:]].*$",
104        false,
105    ),
106    (
107        "objc",
108        r"!^[ 	]*(do|for|if|else|return|switch|while)
109^[ 	]*([-+][ 	]*\([ 	]*[A-Za-z_][A-Za-z_0-9* 	]*\)[ 	]*[A-Za-z_].*)$
110^[ 	]*(([A-Za-z_][A-Za-z_0-9]*[ 	]+)+[A-Za-z_][A-Za-z_0-9]*[ 	]*\([^;]*)$
111^(@(implementation|interface|protocol)[ 	].*)$",
112        false,
113    ),
114    (
115        "pascal",
116        r"^(((class[ 	]+)?(procedure|function)|constructor|destructor|interface|implementation|initialization|finalization)[ 	]*.*)$
117^(.*=[ 	]*(class|record).*)$",
118        false,
119    ),
120    (
121        "perl",
122        r"^package .*
123^sub [[:alnum:]_':]+[ 	]*(\([^)]*\)[ 	]*)?(:[^;#]*)?(\{[ 	]*)?(#.*)?$
124^(BEGIN|END|INIT|CHECK|UNITCHECK|AUTOLOAD|DESTROY)[ 	]*(\{[ 	]*)?(#.*)?$
125^=head[0-9] .*",
126        false,
127    ),
128    (
129        "php",
130        r"^[	 ]*(((public|protected|private|static|abstract|final)[	 ]+)*function.*)$
131^[	 ]*((((final|abstract)[	 ]+)?class|enum|interface|trait).*)$",
132        false,
133    ),
134    ("python", r"^[ 	]*((class|(async[ 	]+)?def)[ 	].*)$", false),
135    (
136        "r",
137        r"^[ 	]*([a-zA-z][a-zA-Z0-9_.]*[ 	]*(<-|=)[ 	]*function.*)$",
138        false,
139    ),
140    ("ruby", r"^[ 	]*((class|module|def)[ 	].*)$", false),
141    (
142        "rust",
143        r#"^[	 ]*((pub(\([^\)]+\))?[	 ]+)?((async|const|unsafe|extern([	 ]+"[^"]+"))[	 ]+)?(struct|enum|union|mod|trait|fn|impl|macro_rules!)[< 	]+[^;]*)$"#,
144        false,
145    ),
146    (
147        "scheme",
148        r"^[	 ]*(\(((define|def(struct|syntax|class|method|rules|record|proto|alias)?)[-*/ 	]|(library|module|struct|class)[*+ 	]).*)$",
149        false,
150    ),
151    (
152        "tex",
153        r"^(\\((sub)*section|chapter|part)\*{0,1}\{.*)$",
154        false,
155    ),
156];
157
158/// POSIX extended word-regex fragments from Git's `userdiff.c` `PATTERNS`/`IPATTERN` macros
159/// (driver-specific part before the `|[^[:space:]]` suffix Git appends at runtime).
160///
161/// When `ignore_case` is true, the regex should be compiled with `case_insensitive(true)` like
162/// Git's `REG_ICASE` for `IPATTERN` drivers.
163const BUILTIN_WORD_REGEX: &[(&str, &str, bool)] = &[
164    (
165        "ada",
166        "[a-zA-Z][a-zA-Z0-9_]*\
167         |[-+]?[0-9][0-9#_.aAbBcCdDeEfF]*([eE][+-]?[0-9_]+)?\
168         |=>|\\.\\.|\\*\\*|:=|/=|>=|<=|<<|>>|<>",
169        true,
170    ),
171    (
172        "bash",
173        "[a-zA-Z_][a-zA-Z0-9_]*\
174         |\\$[a-zA-Z0-9_]+|\\$\\{\
175         |\\|\\||&&|<<|>>\
176         |==|!=|<=|>=|[-+*/%&|^]=\
177         |:=|:-|:\\+|:\\?|##|%%|\\^\\^|,,\
178         |[-a-zA-Z0-9_]+\
179         |\\(|\\)|\\{|\\}|\\[|\\]",
180        false,
181    ),
182    (
183        "bibtex",
184        "[={}\"]|[^={}\" \t]+",
185        false,
186    ),
187    (
188        "cpp",
189        "[a-zA-Z_][a-zA-Z0-9_]*\
190         |[0-9][0-9.]*([Ee][-+]?[0-9]+)?[fFlLuU]*\
191         |0[xXbB][0-9a-fA-F]+[lLuU]*\
192         |\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?\
193         |[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>",
194        false,
195    ),
196    (
197        "csharp",
198        "[a-zA-Z_][a-zA-Z0-9_]*\
199         |[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?\
200         |[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->",
201        false,
202    ),
203    (
204        "css",
205        "-?[_a-zA-Z][-_a-zA-Z0-9]*\
206         |-?[0-9]+|\\#[0-9a-fA-F]+",
207        true,
208    ),
209    (
210        "dts",
211        "[a-zA-Z0-9,._+?#-]+\
212         |[-+*/%&^|!~]|>>|<<|&&|\\|\\|",
213        false,
214    ),
215    (
216        "elixir",
217        "[@:]?[a-zA-Z0-9@_?!]+\
218         |[-+]?0[xob][0-9a-fA-F]+\
219         |[-+]?[0-9][0-9_.]*([eE][-+]?[0-9_]+)?\
220         |:?(\\+\\+|--|\\.\\.|~~~|<>|\\^\\^\\^|<?\\|>|<<<?|>?>>|<<?~|~>?>|<~>|<=|>=|===?|!==?|=~|&&&?|\\|\\|\\|?|=>|<-|\\\\\\\\|->)\
221         |:?%[A-Za-z0-9_.]\\{\\}?",
222        false,
223    ),
224    (
225        "fortran",
226        "[a-zA-Z][a-zA-Z0-9_]*\
227         |\\.([Ee][Qq]|[Nn][Ee]|[Gg][TtEe]|[Ll][TtEe]|[Tt][Rr][Uu][Ee]|[Ff][Aa][Ll][Ss][Ee]|[Aa][Nn][Dd]|[Oo][Rr]|[Nn]?[Ee][Qq][Vv]|[Nn][Oo][Tt])\\.\
228         |[-+]?[0-9.]+([AaIiDdEeFfLlTtXx][Ss]?[-+]?[0-9.]*)?(_[a-zA-Z0-9][a-zA-Z0-9_]*)?\
229         |//|\\*\\*|::|[/<>=]=",
230        true,
231    ),
232    ("fountain", "[^ \t-]+", true),
233    (
234        "golang",
235        "[a-zA-Z_][a-zA-Z0-9_]*\
236         |[-+0-9.eE]+i?|0[xX]?[0-9a-fA-F]+i?\
237         |[-+*/<>%&^|=!:]=|--|\\+\\+|<<=?|>>=?|&\\^=?|&&|\\|\\||<-|\\.{3}",
238        false,
239    ),
240    ("html", "[^<>= \t]+", false),
241    ("ini", "[^ \t]+", false),
242    (
243        "java",
244        "[a-zA-Z_][a-zA-Z0-9_]*\
245         |[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?\
246         |[-+*/<>%&^|=!]=\
247         |--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|",
248        false,
249    ),
250    (
251        "kotlin",
252        "[a-zA-Z_][a-zA-Z0-9_]*\
253         |0[xXbB][0-9a-fA-F_]+[lLuU]*\
254         |[0-9][0-9_]*([.][0-9_]*)?([Ee][-+]?[0-9]+)?[fFlLuU]*\
255         |[.][0-9][0-9_]*([Ee][-+]?[0-9]+)?[fFlLuU]?\
256         |[-+*/<>%&^|=!]==?|--|\\+\\+|<<=|>>=|&&|\\|\\||->|\\.\\*|!!|[?:.][.:]",
257        false,
258    ),
259    ("markdown", "[^<>= \t]+", false),
260    (
261        "matlab",
262        "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[=~<>]=|\\.[*/\\^']|\\|\\||&&",
263        false,
264    ),
265    (
266        "objc",
267        "[a-zA-Z_][a-zA-Z0-9_]*\
268         |[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?\
269         |[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->",
270        false,
271    ),
272    (
273        "pascal",
274        "[a-zA-Z_][a-zA-Z0-9_]*\
275         |[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+\
276         |<>|<=|>=|:=|\\.\\.",
277        false,
278    ),
279    (
280        "perl",
281        "[[:alpha:]_'][[:alnum:]_']*\
282         |0[xb]?[0-9a-fA-F_]*\
283         |[0-9a-fA-F_]+(\\.[0-9a-fA-F_]+)?([eE][-+]?[0-9_]+)?\
284         |=>|-[rwxoRWXOezsfdlpSugkbctTBMAC>]|~~|::\
285         |&&=|\\|\\|=|//=|\\*\\*=\
286         |&&|\\|\\||//|\\+\\+|--|\\*\\*|\\.\\.\\.?\
287         |[-+*/%.^&<>=!|]=\
288         |=~|!~\
289         |<<|<>|<=>|>>",
290        false,
291    ),
292    (
293        "php",
294        "[a-zA-Z_][a-zA-Z0-9_]*\
295         |[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+\
296         |[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->",
297        false,
298    ),
299    (
300        "python",
301        "[a-zA-Z_][a-zA-Z0-9_]*\
302         |[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?\
303         |[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?",
304        false,
305    ),
306    ("r", "[^ \t]+", false),
307    (
308        "ruby",
309        "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*\
310         |[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?.\
311         |//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~",
312        false,
313    ),
314    (
315        "rust",
316        "[a-zA-Z_][a-zA-Z0-9_]*\
317         |[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?\
318         |[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::",
319        false,
320    ),
321    (
322        "scheme",
323        "\\|([^\\\\]*)\\||([^][)(}{[ \t])+",
324        false,
325    ),
326    (
327        "tex",
328        "\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\\x01-\\x7f])+",
329        false,
330    ),
331];
332
333/// Default word-regex suffix Git appends for built-in drivers (`userdiff.c` `PATTERNS` macro).
334pub const GIT_WORD_REGEX_DEFAULT_SUFFIX: &str = "|[^[:space:]]|[\\xc0-\\xff][\\x80-\\xbf]+";
335
336/// Built-in extended word regex when no driver matches (Git `userdiff` "default" driver).
337pub const GIT_WORD_REGEX_FALLBACK: &str = "[^[:space:]]|[\\xc0-\\xff][\\x80-\\xbf]+";
338
339/// Returns the driver-specific word-regex pattern fragment plus whether it uses `REG_ICASE`.
340#[must_use]
341pub fn builtin_word_regex(driver: &str) -> Option<(&'static str, bool)> {
342    BUILTIN_WORD_REGEX
343        .iter()
344        .find(|(name, _, _)| *name == driver)
345        .map(|(_, pat, ic)| (*pat, *ic))
346}
347
348#[derive(Debug, Clone)]
349struct FuncRule {
350    matcher: RuleMatcher,
351    negate: bool,
352}
353
354#[derive(Debug, Clone)]
355enum RuleMatcher {
356    Rust(Regex),
357    Posix { pattern: String, ignore_case: bool },
358}
359
360#[derive(Debug, Clone)]
361struct BuiltinPattern {
362    pattern: String,
363    ignore_case: bool,
364}
365
366/// Compiled function-name matcher used for diff hunk headers.
367#[derive(Debug, Clone)]
368pub struct FuncnameMatcher {
369    rules: Vec<FuncRule>,
370}
371
372impl FuncnameMatcher {
373    /// Match a source line against configured funcname rules.
374    ///
375    /// Returns the text to show after the hunk header when matched.
376    #[must_use]
377    pub fn match_line(&self, line: &str) -> Option<String> {
378        let mut text = line;
379        if let Some(stripped) = text.strip_suffix('\n') {
380            text = stripped;
381            if let Some(stripped_cr) = text.strip_suffix('\r') {
382                text = stripped_cr;
383            }
384        }
385
386        for rule in &self.rules {
387            let matched_text = match &rule.matcher {
388                RuleMatcher::Rust(regex) => {
389                    let Some(caps) = regex.captures(text) else {
390                        continue;
391                    };
392                    caps.get(1)
393                        .or_else(|| caps.get(0))
394                        .map(|m| m.as_str())
395                        .unwrap_or_default()
396                        .trim_end_matches(char::is_whitespace)
397                        .to_owned()
398                }
399                RuleMatcher::Posix {
400                    pattern,
401                    ignore_case,
402                } => {
403                    if !posix_line_matches(pattern, *ignore_case, text) {
404                        continue;
405                    }
406                    text.trim_end_matches(char::is_whitespace).to_owned()
407                }
408            };
409            if rule.negate {
410                return None;
411            }
412            return Some(matched_text);
413        }
414        None
415    }
416}
417
418/// Resolve a function-name matcher for `rel_path` from attributes + config.
419///
420/// Returns `Ok(None)` when no diff driver is configured for the path.
421pub fn matcher_for_path(
422    config: &ConfigSet,
423    rules: &[AttrRule],
424    rel_path: &str,
425) -> Result<Option<FuncnameMatcher>, String> {
426    let attrs = get_file_attrs(rules, rel_path, false, config);
427    let DiffAttr::Driver(ref driver) = attrs.diff_attr else {
428        return Ok(None);
429    };
430    matcher_for_driver(config, driver)
431}
432
433/// Like [`matcher_for_path`] but uses parsed `.gitattributes` rules from [`crate::attributes`].
434pub fn matcher_for_path_parsed(
435    config: &ConfigSet,
436    rules: &[crate::attributes::AttrRule],
437    macros: &MacroTable,
438    rel_path: &str,
439    ignore_case: bool,
440) -> Result<Option<FuncnameMatcher>, String> {
441    let map = collect_attrs_for_path(rules, macros, rel_path, ignore_case);
442    let Some(AttrValue::Value(driver)) = map.get("diff") else {
443        return Ok(None);
444    };
445    matcher_for_driver(config, driver.as_str())
446}
447
448/// Effective word-diff regular expression for `rel_path` (Git `diff.wordRegex` + driver + builtins).
449///
450/// Returns `None` when Git would use no `word_regex` (`regcomp`): tokenization is maximal runs of
451/// non-whitespace (`find_word_boundaries` fallback in `diff.c`).
452///
453/// Otherwise returns the full extended-regex pattern and whether Git compiles it with `REG_ICASE`.
454#[must_use]
455pub fn word_regex_pattern_for_path_parsed(
456    config: &ConfigSet,
457    rules: &[crate::attributes::AttrRule],
458    macros: &MacroTable,
459    rel_path: &str,
460    ignore_case_attrs: bool,
461) -> Option<(String, bool)> {
462    let map = collect_attrs_for_path(rules, macros, rel_path, ignore_case_attrs);
463    let driver = match map.get("diff") {
464        Some(AttrValue::Value(d)) => Some(d.as_str()),
465        _ => None,
466    };
467
468    if let Some(d) = driver {
469        for key in [format!("diff.{d}.wordregex"), format!("diff.{d}.wordRegex")] {
470            if let Some(raw) = config.get(&key) {
471                if !raw.is_empty() {
472                    return Some((raw, false));
473                }
474            }
475        }
476    }
477
478    for key in ["diff.wordregex", "diff.wordRegex"] {
479        if let Some(raw) = config.get(key) {
480            if !raw.is_empty() {
481                return Some((raw, false));
482            }
483        }
484    }
485
486    if let Some(d) = driver {
487        if let Some((frag, ic)) = builtin_word_regex(d) {
488            return Some((format!("{frag}{GIT_WORD_REGEX_DEFAULT_SUFFIX}"), ic));
489        }
490    }
491
492    None
493}
494
495/// Resolve a function-name matcher for a named diff driver.
496///
497/// Returns `Ok(None)` when the driver has no built-in or configured funcname
498/// pattern.
499pub fn matcher_for_driver(
500    config: &ConfigSet,
501    driver: &str,
502) -> Result<Option<FuncnameMatcher>, String> {
503    if let Some(pattern) = config.get(&format!("diff.{driver}.xfuncname")) {
504        return compile_matcher(&pattern, true, false).map(Some);
505    }
506    if let Some(pattern) = config.get(&format!("diff.{driver}.funcname")) {
507        return compile_matcher(&pattern, false, false).map(Some);
508    }
509    if let Some(builtin) = builtin_patterns().get(driver) {
510        return compile_matcher(&builtin.pattern, true, builtin.ignore_case).map(Some);
511    }
512    Ok(None)
513}
514
515fn compile_matcher(
516    pattern: &str,
517    extended: bool,
518    ignore_case: bool,
519) -> Result<FuncnameMatcher, String> {
520    let lines: Vec<&str> = pattern.split('\n').collect();
521    if lines.is_empty() {
522        return Ok(FuncnameMatcher { rules: Vec::new() });
523    }
524
525    let mut rules = Vec::with_capacity(lines.len());
526    for (idx, raw) in lines.iter().enumerate() {
527        let mut line = *raw;
528        let negate = line.starts_with('!');
529        if negate {
530            if idx == lines.len() - 1 {
531                return Err(format!("Last expression must not be negated: {line}"));
532            }
533            line = &line[1..];
534        }
535
536        let rust_pattern = if extended {
537            fix_charclass_escapes(line)
538        } else {
539            bre_to_ere(line)
540        };
541        let posix_pattern = if extended {
542            line.to_owned()
543        } else {
544            bre_to_ere(line)
545        };
546
547        validate_posix_regex_via_grep(&posix_pattern, ignore_case)
548            .map_err(|_| format!("Invalid regexp to look for hunk header: {line}"))?;
549
550        let matcher = RegexBuilder::new(&rust_pattern)
551            .case_insensitive(ignore_case)
552            .build()
553            .map(RuleMatcher::Rust)
554            .unwrap_or_else(|_| RuleMatcher::Posix {
555                pattern: posix_pattern,
556                ignore_case,
557            });
558        rules.push(FuncRule { matcher, negate });
559    }
560
561    Ok(FuncnameMatcher { rules })
562}
563
564fn builtin_patterns() -> &'static BTreeMap<String, BuiltinPattern> {
565    static BUILTIN_PATTERNS: OnceLock<BTreeMap<String, BuiltinPattern>> = OnceLock::new();
566    BUILTIN_PATTERNS.get_or_init(parse_builtin_patterns)
567}
568
569fn parse_builtin_patterns() -> BTreeMap<String, BuiltinPattern> {
570    BUILTIN_PATTERN_DEFS
571        .iter()
572        .filter(|(name, _, _)| !name.is_empty() && *name != "default")
573        .map(|(name, pattern, ignore_case)| {
574            (
575                (*name).to_owned(),
576                BuiltinPattern {
577                    pattern: (*pattern).to_owned(),
578                    ignore_case: *ignore_case,
579                },
580            )
581        })
582        .collect()
583}
584
585fn bre_to_ere(pattern: &str) -> String {
586    let mut result = String::with_capacity(pattern.len());
587    let chars: Vec<char> = pattern.chars().collect();
588    let mut i = 0usize;
589    let mut in_bracket = false;
590
591    while i < chars.len() {
592        if in_bracket {
593            if chars[i] == ']' && i > 0 {
594                result.push(']');
595                in_bracket = false;
596                i += 1;
597            } else if chars[i] == '[' {
598                result.push('[');
599                i += 1;
600            } else if chars[i] == '\\' {
601                // Preserve literal backslashes inside character classes.
602                // Rust `regex` understands POSIX classes like `[:alnum:]`,
603                // so we only need to escape unknown escapes.
604                if i + 1 < chars.len() {
605                    let next = chars[i + 1];
606                    if next.is_ascii_alphabetic() {
607                        result.push('\\');
608                        result.push('\\');
609                        result.push(next);
610                        i += 2;
611                    } else {
612                        result.push('\\');
613                        result.push(next);
614                        i += 2;
615                    }
616                } else {
617                    result.push('\\');
618                    i += 1;
619                }
620            } else {
621                result.push(chars[i]);
622                i += 1;
623            }
624        } else if chars[i] == '[' {
625            result.push('[');
626            in_bracket = true;
627            i += 1;
628            if i < chars.len() && (chars[i] == '^' || chars[i] == '!') {
629                result.push(chars[i]);
630                i += 1;
631            }
632            if i < chars.len() && chars[i] == ']' {
633                result.push(']');
634                i += 1;
635            }
636        } else if chars[i] == '\\' && i + 1 < chars.len() {
637            match chars[i + 1] {
638                '+' | '?' | '{' | '}' | '(' | ')' | '|' => {
639                    result.push(chars[i + 1]);
640                    i += 2;
641                }
642                _ => {
643                    result.push(chars[i]);
644                    result.push(chars[i + 1]);
645                    i += 2;
646                }
647            }
648        } else if matches!(chars[i], '+' | '?' | '{' | '}' | '(' | ')' | '|') {
649            result.push('\\');
650            result.push(chars[i]);
651            i += 1;
652        } else {
653            result.push(chars[i]);
654            i += 1;
655        }
656    }
657
658    result
659}
660
661fn fix_charclass_escapes(pattern: &str) -> String {
662    let mut result = String::with_capacity(pattern.len());
663    let chars: Vec<char> = pattern.chars().collect();
664    let mut i = 0usize;
665    let mut in_bracket = false;
666
667    while i < chars.len() {
668        if in_bracket {
669            if chars[i] == ']' {
670                result.push(']');
671                in_bracket = false;
672                i += 1;
673            } else if chars[i] == '[' {
674                result.push('[');
675                i += 1;
676            } else if chars[i] == '\\' && i + 1 < chars.len() {
677                let next = chars[i + 1];
678                if next.is_ascii_alphabetic() {
679                    result.push('\\');
680                    result.push('\\');
681                    result.push(next);
682                } else {
683                    result.push('\\');
684                    result.push(next);
685                }
686                i += 2;
687            } else {
688                result.push(chars[i]);
689                i += 1;
690            }
691        } else if chars[i] == '[' {
692            result.push('[');
693            in_bracket = true;
694            i += 1;
695            if i < chars.len() && (chars[i] == '^' || chars[i] == '!') {
696                result.push(chars[i]);
697                i += 1;
698            }
699            if i < chars.len() && chars[i] == ']' {
700                result.push(']');
701                i += 1;
702            }
703        } else if chars[i] == '\\' && i + 1 < chars.len() {
704            result.push(chars[i]);
705            result.push(chars[i + 1]);
706            i += 2;
707        } else {
708            result.push(chars[i]);
709            i += 1;
710        }
711    }
712
713    result
714}
715
716fn validate_posix_regex_via_grep(pattern: &str, ignore_case: bool) -> std::io::Result<()> {
717    let mut cmd = Command::new("grep");
718    cmd.arg("-E").arg("-q");
719    if ignore_case {
720        cmd.arg("-i");
721    }
722    cmd.arg("--").arg(pattern).arg("/dev/null");
723    let status = cmd.status()?;
724    if status.success() || status.code() == Some(1) {
725        Ok(())
726    } else {
727        Err(std::io::Error::new(
728            std::io::ErrorKind::InvalidInput,
729            "invalid regex",
730        ))
731    }
732}
733
734fn posix_line_matches(pattern: &str, ignore_case: bool, line: &str) -> bool {
735    let mut cmd = Command::new("grep");
736    cmd.arg("-E").arg("-q");
737    if ignore_case {
738        cmd.arg("-i");
739    }
740    cmd.arg("--").arg(pattern);
741    cmd.stdin(Stdio::piped());
742    cmd.stdout(Stdio::null());
743    cmd.stderr(Stdio::null());
744
745    let Ok(mut child) = cmd.spawn() else {
746        return false;
747    };
748    if let Some(mut stdin) = child.stdin.take() {
749        let _ = stdin.write_all(line.as_bytes());
750        let _ = stdin.write_all(b"\n");
751    }
752
753    child.wait().map(|status| status.success()).unwrap_or(false)
754}