1use crate::attributes::{collect_attrs_for_path, AttrValue, MacroTable};
7use crate::config::ConfigSet;
8use crate::crlf::{get_file_attrs, AttrRule, DiffAttr};
9use regex::{Regex, RegexBuilder};
10use std::collections::BTreeMap;
11use std::io::Write;
12use std::process::{Command, Stdio};
13use std::sync::OnceLock;
14
15const BUILTIN_PATTERN_DEFS: &[(&str, &str, bool)] = &[
17 (
18 "ada",
19 r"!^(.*[ ])?(is[ ]+new|renames|is[ ]+separate)([ ].*)?$
20!^[ ]*with[ ].*$
21^[ ]*((procedure|function)[ ]+.*)$
22^[ ]*((package|protected|task)[ ]+.*)$",
23 true,
24 ),
25 (
26 "bash",
27 r"^[ ]*((([a-zA-Z_][a-zA-Z0-9_]*[ ]*\([ ]*\))|(function[ ]+[a-zA-Z_][a-zA-Z0-9_]*(([ ]*\([ ]*\))|([ ]+)))).*$)",
28 false,
29 ),
30 (
31 "bibtex",
32 r#"(@[a-zA-Z]{1,}[ ]*\{{0,1}[ ]*[^ "@',\#}{~%]*).*$"#,
33 false,
34 ),
35 (
36 "cpp",
37 r"!^[ ]*[A-Za-z_][A-Za-z_0-9]*:[[:space:]]*($|/[/*])
38^((::[[:space:]]*)?[A-Za-z_].*)$",
39 false,
40 ),
41 (
42 "csharp",
43 r"!(^|[ ]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ (]+|$)
44^[ ]*(([][[:alnum:]@_.](<[][[:alnum:]@_, <>]+>)?)+([ ]+([][[:alnum:]@_.](<[][[:alnum:]@_, <>]+>)?)+)+[ ]*\([^;]*)$
45^[ ]*(([][[:alnum:]@_.](<[][[:alnum:]@_, <>]+>)?)+([ ]+([][[:alnum:]@_.](<[][[:alnum:]@_, <>]+>)?)+)+[^;=:,()]*)$
46^[ ]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ ]+)*(class|enum|interface|struct|record)[ ]+.*)$
47^[ ]*(namespace[ ]+.*)$",
48 false,
49 ),
50 (
51 "css",
52 r"![:;][[:space:]]*$
53^[:[@.#]?[_a-z0-9].*$",
54 true,
55 ),
56 (
57 "dts",
58 r"!;
59!=
60^[ ]*((/[ ]*\{|&?[a-zA-Z_]).*)",
61 false,
62 ),
63 (
64 "elixir",
65 r"^[ ]*((def(macro|module|impl|protocol|p)?|test)[ ].*)$",
66 false,
67 ),
68 (
69 "fortran",
70 r#"!^([C*]|[ ]*!)
71!^[ ]*MODULE[ ]+PROCEDURE[ ]
72^[ ]*((END[ ]+)?(PROGRAM|MODULE|BLOCK[ ]+DATA|([^!'" ]+[ ]+)*(SUBROUTINE|FUNCTION))[ ]+[A-Z].*)$"#,
73 true,
74 ),
75 (
76 "fountain",
77 r"^((\.[^.]|(int|ext|est|int\.?/ext|i/e)[. ]).*)$",
78 true,
79 ),
80 (
81 "golang",
82 r"^[ ]*(func[ ]*.*(\{[ ]*)?)
83^[ ]*(type[ ].*(struct|interface)[ ]*(\{[ ]*)?)",
84 false,
85 ),
86 ("html", r"^[ ]*(<[Hh][1-6]([ ].*)?>.*)$", false),
87 ("ini", r"^[ ]*\[[^]]+\]", false),
88 (
89 "java",
90 r"!^[ ]*(catch|do|for|if|instanceof|new|return|switch|throw|while)
91^[ ]*(([a-z-]+[ ]+)*(class|enum|interface|record)[ ]+.*)$
92^[ ]*(([A-Za-z_<>&][][?&<>.,A-Za-z_0-9]*[ ]+)+[A-Za-z_][A-Za-z_0-9]*[ ]*\([^;]*)$",
93 false,
94 ),
95 (
96 "kotlin",
97 r"^[ ]*(([a-z]+[ ]+)*(fun|class|interface)[ ]+.*)$",
98 false,
99 ),
100 ("markdown", r"^ {0,3}#{1,6}[ ].*", false),
101 (
102 "matlab",
103 r"^[[:space:]]*((classdef|function)[[:space:]].*)$|^(%%%?|##)[[:space:]].*$",
104 false,
105 ),
106 (
107 "objc",
108 r"!^[ ]*(do|for|if|else|return|switch|while)
109^[ ]*([-+][ ]*\([ ]*[A-Za-z_][A-Za-z_0-9* ]*\)[ ]*[A-Za-z_].*)$
110^[ ]*(([A-Za-z_][A-Za-z_0-9]*[ ]+)+[A-Za-z_][A-Za-z_0-9]*[ ]*\([^;]*)$
111^(@(implementation|interface|protocol)[ ].*)$",
112 false,
113 ),
114 (
115 "pascal",
116 r"^(((class[ ]+)?(procedure|function)|constructor|destructor|interface|implementation|initialization|finalization)[ ]*.*)$
117^(.*=[ ]*(class|record).*)$",
118 false,
119 ),
120 (
121 "perl",
122 r"^package .*
123^sub [[:alnum:]_':]+[ ]*(\([^)]*\)[ ]*)?(:[^;#]*)?(\{[ ]*)?(#.*)?$
124^(BEGIN|END|INIT|CHECK|UNITCHECK|AUTOLOAD|DESTROY)[ ]*(\{[ ]*)?(#.*)?$
125^=head[0-9] .*",
126 false,
127 ),
128 (
129 "php",
130 r"^[ ]*(((public|protected|private|static|abstract|final)[ ]+)*function.*)$
131^[ ]*((((final|abstract)[ ]+)?class|enum|interface|trait).*)$",
132 false,
133 ),
134 ("python", r"^[ ]*((class|(async[ ]+)?def)[ ].*)$", false),
135 (
136 "r",
137 r"^[ ]*([a-zA-z][a-zA-Z0-9_.]*[ ]*(<-|=)[ ]*function.*)$",
138 false,
139 ),
140 ("ruby", r"^[ ]*((class|module|def)[ ].*)$", false),
141 (
142 "rust",
143 r#"^[ ]*((pub(\([^\)]+\))?[ ]+)?((async|const|unsafe|extern([ ]+"[^"]+"))[ ]+)?(struct|enum|union|mod|trait|fn|impl|macro_rules!)[< ]+[^;]*)$"#,
144 false,
145 ),
146 (
147 "scheme",
148 r"^[ ]*(\(((define|def(struct|syntax|class|method|rules|record|proto|alias)?)[-*/ ]|(library|module|struct|class)[*+ ]).*)$",
149 false,
150 ),
151 (
152 "tex",
153 r"^(\\((sub)*section|chapter|part)\*{0,1}\{.*)$",
154 false,
155 ),
156];
157
158const BUILTIN_WORD_REGEX: &[(&str, &str, bool)] = &[
164 (
165 "ada",
166 "[a-zA-Z][a-zA-Z0-9_]*\
167 |[-+]?[0-9][0-9#_.aAbBcCdDeEfF]*([eE][+-]?[0-9_]+)?\
168 |=>|\\.\\.|\\*\\*|:=|/=|>=|<=|<<|>>|<>",
169 true,
170 ),
171 (
172 "bash",
173 "[a-zA-Z_][a-zA-Z0-9_]*\
174 |\\$[a-zA-Z0-9_]+|\\$\\{\
175 |\\|\\||&&|<<|>>\
176 |==|!=|<=|>=|[-+*/%&|^]=\
177 |:=|:-|:\\+|:\\?|##|%%|\\^\\^|,,\
178 |[-a-zA-Z0-9_]+\
179 |\\(|\\)|\\{|\\}|\\[|\\]",
180 false,
181 ),
182 (
183 "bibtex",
184 "[={}\"]|[^={}\" \t]+",
185 false,
186 ),
187 (
188 "cpp",
189 "[a-zA-Z_][a-zA-Z0-9_]*\
190 |[0-9][0-9.]*([Ee][-+]?[0-9]+)?[fFlLuU]*\
191 |0[xXbB][0-9a-fA-F]+[lLuU]*\
192 |\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?\
193 |[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>",
194 false,
195 ),
196 (
197 "csharp",
198 "[a-zA-Z_][a-zA-Z0-9_]*\
199 |[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?\
200 |[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->",
201 false,
202 ),
203 (
204 "css",
205 "-?[_a-zA-Z][-_a-zA-Z0-9]*\
206 |-?[0-9]+|\\#[0-9a-fA-F]+",
207 true,
208 ),
209 (
210 "dts",
211 "[a-zA-Z0-9,._+?#-]+\
212 |[-+*/%&^|!~]|>>|<<|&&|\\|\\|",
213 false,
214 ),
215 (
216 "elixir",
217 "[@:]?[a-zA-Z0-9@_?!]+\
218 |[-+]?0[xob][0-9a-fA-F]+\
219 |[-+]?[0-9][0-9_.]*([eE][-+]?[0-9_]+)?\
220 |:?(\\+\\+|--|\\.\\.|~~~|<>|\\^\\^\\^|<?\\|>|<<<?|>?>>|<<?~|~>?>|<~>|<=|>=|===?|!==?|=~|&&&?|\\|\\|\\|?|=>|<-|\\\\\\\\|->)\
221 |:?%[A-Za-z0-9_.]\\{\\}?",
222 false,
223 ),
224 (
225 "fortran",
226 "[a-zA-Z][a-zA-Z0-9_]*\
227 |\\.([Ee][Qq]|[Nn][Ee]|[Gg][TtEe]|[Ll][TtEe]|[Tt][Rr][Uu][Ee]|[Ff][Aa][Ll][Ss][Ee]|[Aa][Nn][Dd]|[Oo][Rr]|[Nn]?[Ee][Qq][Vv]|[Nn][Oo][Tt])\\.\
228 |[-+]?[0-9.]+([AaIiDdEeFfLlTtXx][Ss]?[-+]?[0-9.]*)?(_[a-zA-Z0-9][a-zA-Z0-9_]*)?\
229 |//|\\*\\*|::|[/<>=]=",
230 true,
231 ),
232 ("fountain", "[^ \t-]+", true),
233 (
234 "golang",
235 "[a-zA-Z_][a-zA-Z0-9_]*\
236 |[-+0-9.eE]+i?|0[xX]?[0-9a-fA-F]+i?\
237 |[-+*/<>%&^|=!:]=|--|\\+\\+|<<=?|>>=?|&\\^=?|&&|\\|\\||<-|\\.{3}",
238 false,
239 ),
240 ("html", "[^<>= \t]+", false),
241 ("ini", "[^ \t]+", false),
242 (
243 "java",
244 "[a-zA-Z_][a-zA-Z0-9_]*\
245 |[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?\
246 |[-+*/<>%&^|=!]=\
247 |--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|",
248 false,
249 ),
250 (
251 "kotlin",
252 "[a-zA-Z_][a-zA-Z0-9_]*\
253 |0[xXbB][0-9a-fA-F_]+[lLuU]*\
254 |[0-9][0-9_]*([.][0-9_]*)?([Ee][-+]?[0-9]+)?[fFlLuU]*\
255 |[.][0-9][0-9_]*([Ee][-+]?[0-9]+)?[fFlLuU]?\
256 |[-+*/<>%&^|=!]==?|--|\\+\\+|<<=|>>=|&&|\\|\\||->|\\.\\*|!!|[?:.][.:]",
257 false,
258 ),
259 ("markdown", "[^<>= \t]+", false),
260 (
261 "matlab",
262 "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[=~<>]=|\\.[*/\\^']|\\|\\||&&",
263 false,
264 ),
265 (
266 "objc",
267 "[a-zA-Z_][a-zA-Z0-9_]*\
268 |[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?\
269 |[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->",
270 false,
271 ),
272 (
273 "pascal",
274 "[a-zA-Z_][a-zA-Z0-9_]*\
275 |[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+\
276 |<>|<=|>=|:=|\\.\\.",
277 false,
278 ),
279 (
280 "perl",
281 "[[:alpha:]_'][[:alnum:]_']*\
282 |0[xb]?[0-9a-fA-F_]*\
283 |[0-9a-fA-F_]+(\\.[0-9a-fA-F_]+)?([eE][-+]?[0-9_]+)?\
284 |=>|-[rwxoRWXOezsfdlpSugkbctTBMAC>]|~~|::\
285 |&&=|\\|\\|=|//=|\\*\\*=\
286 |&&|\\|\\||//|\\+\\+|--|\\*\\*|\\.\\.\\.?\
287 |[-+*/%.^&<>=!|]=\
288 |=~|!~\
289 |<<|<>|<=>|>>",
290 false,
291 ),
292 (
293 "php",
294 "[a-zA-Z_][a-zA-Z0-9_]*\
295 |[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+\
296 |[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->",
297 false,
298 ),
299 (
300 "python",
301 "[a-zA-Z_][a-zA-Z0-9_]*\
302 |[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?\
303 |[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?",
304 false,
305 ),
306 ("r", "[^ \t]+", false),
307 (
308 "ruby",
309 "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*\
310 |[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?.\
311 |//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~",
312 false,
313 ),
314 (
315 "rust",
316 "[a-zA-Z_][a-zA-Z0-9_]*\
317 |[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?\
318 |[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::",
319 false,
320 ),
321 (
322 "scheme",
323 "\\|([^\\\\]*)\\||([^][)(}{[ \t])+",
324 false,
325 ),
326 (
327 "tex",
328 "\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\\x01-\\x7f])+",
329 false,
330 ),
331];
332
333pub const GIT_WORD_REGEX_DEFAULT_SUFFIX: &str = "|[^[:space:]]|[\\xc0-\\xff][\\x80-\\xbf]+";
335
336pub const GIT_WORD_REGEX_FALLBACK: &str = "[^[:space:]]|[\\xc0-\\xff][\\x80-\\xbf]+";
338
339#[must_use]
341pub fn builtin_word_regex(driver: &str) -> Option<(&'static str, bool)> {
342 BUILTIN_WORD_REGEX
343 .iter()
344 .find(|(name, _, _)| *name == driver)
345 .map(|(_, pat, ic)| (*pat, *ic))
346}
347
348#[derive(Debug, Clone)]
349struct FuncRule {
350 matcher: RuleMatcher,
351 negate: bool,
352}
353
354#[derive(Debug, Clone)]
355enum RuleMatcher {
356 Rust(Regex),
357 Posix { pattern: String, ignore_case: bool },
358}
359
360#[derive(Debug, Clone)]
361struct BuiltinPattern {
362 pattern: String,
363 ignore_case: bool,
364}
365
366#[derive(Debug, Clone)]
368pub struct FuncnameMatcher {
369 rules: Vec<FuncRule>,
370}
371
372impl FuncnameMatcher {
373 #[must_use]
377 pub fn match_line(&self, line: &str) -> Option<String> {
378 let mut text = line;
379 if let Some(stripped) = text.strip_suffix('\n') {
380 text = stripped;
381 if let Some(stripped_cr) = text.strip_suffix('\r') {
382 text = stripped_cr;
383 }
384 }
385
386 for rule in &self.rules {
387 let matched_text = match &rule.matcher {
388 RuleMatcher::Rust(regex) => {
389 let Some(caps) = regex.captures(text) else {
390 continue;
391 };
392 caps.get(1)
393 .or_else(|| caps.get(0))
394 .map(|m| m.as_str())
395 .unwrap_or_default()
396 .trim_end_matches(char::is_whitespace)
397 .to_owned()
398 }
399 RuleMatcher::Posix {
400 pattern,
401 ignore_case,
402 } => {
403 if !posix_line_matches(pattern, *ignore_case, text) {
404 continue;
405 }
406 text.trim_end_matches(char::is_whitespace).to_owned()
407 }
408 };
409 if rule.negate {
410 return None;
411 }
412 return Some(matched_text);
413 }
414 None
415 }
416}
417
418pub fn matcher_for_path(
422 config: &ConfigSet,
423 rules: &[AttrRule],
424 rel_path: &str,
425) -> Result<Option<FuncnameMatcher>, String> {
426 let attrs = get_file_attrs(rules, rel_path, false, config);
427 let DiffAttr::Driver(ref driver) = attrs.diff_attr else {
428 return Ok(None);
429 };
430 matcher_for_driver(config, driver)
431}
432
433pub fn matcher_for_path_parsed(
435 config: &ConfigSet,
436 rules: &[crate::attributes::AttrRule],
437 macros: &MacroTable,
438 rel_path: &str,
439 ignore_case: bool,
440) -> Result<Option<FuncnameMatcher>, String> {
441 let map = collect_attrs_for_path(rules, macros, rel_path, ignore_case);
442 let Some(AttrValue::Value(driver)) = map.get("diff") else {
443 return Ok(None);
444 };
445 matcher_for_driver(config, driver.as_str())
446}
447
448#[must_use]
455pub fn word_regex_pattern_for_path_parsed(
456 config: &ConfigSet,
457 rules: &[crate::attributes::AttrRule],
458 macros: &MacroTable,
459 rel_path: &str,
460 ignore_case_attrs: bool,
461) -> Option<(String, bool)> {
462 let map = collect_attrs_for_path(rules, macros, rel_path, ignore_case_attrs);
463 let driver = match map.get("diff") {
464 Some(AttrValue::Value(d)) => Some(d.as_str()),
465 _ => None,
466 };
467
468 if let Some(d) = driver {
469 for key in [format!("diff.{d}.wordregex"), format!("diff.{d}.wordRegex")] {
470 if let Some(raw) = config.get(&key) {
471 if !raw.is_empty() {
472 return Some((raw, false));
473 }
474 }
475 }
476 }
477
478 for key in ["diff.wordregex", "diff.wordRegex"] {
479 if let Some(raw) = config.get(key) {
480 if !raw.is_empty() {
481 return Some((raw, false));
482 }
483 }
484 }
485
486 if let Some(d) = driver {
487 if let Some((frag, ic)) = builtin_word_regex(d) {
488 return Some((format!("{frag}{GIT_WORD_REGEX_DEFAULT_SUFFIX}"), ic));
489 }
490 }
491
492 None
493}
494
495pub fn matcher_for_driver(
500 config: &ConfigSet,
501 driver: &str,
502) -> Result<Option<FuncnameMatcher>, String> {
503 if let Some(pattern) = config.get(&format!("diff.{driver}.xfuncname")) {
504 return compile_matcher(&pattern, true, false).map(Some);
505 }
506 if let Some(pattern) = config.get(&format!("diff.{driver}.funcname")) {
507 return compile_matcher(&pattern, false, false).map(Some);
508 }
509 if let Some(builtin) = builtin_patterns().get(driver) {
510 return compile_matcher(&builtin.pattern, true, builtin.ignore_case).map(Some);
511 }
512 Ok(None)
513}
514
515fn compile_matcher(
516 pattern: &str,
517 extended: bool,
518 ignore_case: bool,
519) -> Result<FuncnameMatcher, String> {
520 let lines: Vec<&str> = pattern.split('\n').collect();
521 if lines.is_empty() {
522 return Ok(FuncnameMatcher { rules: Vec::new() });
523 }
524
525 let mut rules = Vec::with_capacity(lines.len());
526 for (idx, raw) in lines.iter().enumerate() {
527 let mut line = *raw;
528 let negate = line.starts_with('!');
529 if negate {
530 if idx == lines.len() - 1 {
531 return Err(format!("Last expression must not be negated: {line}"));
532 }
533 line = &line[1..];
534 }
535
536 let rust_pattern = if extended {
537 fix_charclass_escapes(line)
538 } else {
539 bre_to_ere(line)
540 };
541 let posix_pattern = if extended {
542 line.to_owned()
543 } else {
544 bre_to_ere(line)
545 };
546
547 validate_posix_regex_via_grep(&posix_pattern, ignore_case)
548 .map_err(|_| format!("Invalid regexp to look for hunk header: {line}"))?;
549
550 let matcher = RegexBuilder::new(&rust_pattern)
551 .case_insensitive(ignore_case)
552 .build()
553 .map(RuleMatcher::Rust)
554 .unwrap_or_else(|_| RuleMatcher::Posix {
555 pattern: posix_pattern,
556 ignore_case,
557 });
558 rules.push(FuncRule { matcher, negate });
559 }
560
561 Ok(FuncnameMatcher { rules })
562}
563
564fn builtin_patterns() -> &'static BTreeMap<String, BuiltinPattern> {
565 static BUILTIN_PATTERNS: OnceLock<BTreeMap<String, BuiltinPattern>> = OnceLock::new();
566 BUILTIN_PATTERNS.get_or_init(parse_builtin_patterns)
567}
568
569fn parse_builtin_patterns() -> BTreeMap<String, BuiltinPattern> {
570 BUILTIN_PATTERN_DEFS
571 .iter()
572 .filter(|(name, _, _)| !name.is_empty() && *name != "default")
573 .map(|(name, pattern, ignore_case)| {
574 (
575 (*name).to_owned(),
576 BuiltinPattern {
577 pattern: (*pattern).to_owned(),
578 ignore_case: *ignore_case,
579 },
580 )
581 })
582 .collect()
583}
584
585fn bre_to_ere(pattern: &str) -> String {
586 let mut result = String::with_capacity(pattern.len());
587 let chars: Vec<char> = pattern.chars().collect();
588 let mut i = 0usize;
589 let mut in_bracket = false;
590
591 while i < chars.len() {
592 if in_bracket {
593 if chars[i] == ']' && i > 0 {
594 result.push(']');
595 in_bracket = false;
596 i += 1;
597 } else if chars[i] == '[' {
598 result.push('[');
599 i += 1;
600 } else if chars[i] == '\\' {
601 if i + 1 < chars.len() {
605 let next = chars[i + 1];
606 if next.is_ascii_alphabetic() {
607 result.push('\\');
608 result.push('\\');
609 result.push(next);
610 i += 2;
611 } else {
612 result.push('\\');
613 result.push(next);
614 i += 2;
615 }
616 } else {
617 result.push('\\');
618 i += 1;
619 }
620 } else {
621 result.push(chars[i]);
622 i += 1;
623 }
624 } else if chars[i] == '[' {
625 result.push('[');
626 in_bracket = true;
627 i += 1;
628 if i < chars.len() && (chars[i] == '^' || chars[i] == '!') {
629 result.push(chars[i]);
630 i += 1;
631 }
632 if i < chars.len() && chars[i] == ']' {
633 result.push(']');
634 i += 1;
635 }
636 } else if chars[i] == '\\' && i + 1 < chars.len() {
637 match chars[i + 1] {
638 '+' | '?' | '{' | '}' | '(' | ')' | '|' => {
639 result.push(chars[i + 1]);
640 i += 2;
641 }
642 _ => {
643 result.push(chars[i]);
644 result.push(chars[i + 1]);
645 i += 2;
646 }
647 }
648 } else if matches!(chars[i], '+' | '?' | '{' | '}' | '(' | ')' | '|') {
649 result.push('\\');
650 result.push(chars[i]);
651 i += 1;
652 } else {
653 result.push(chars[i]);
654 i += 1;
655 }
656 }
657
658 result
659}
660
661fn fix_charclass_escapes(pattern: &str) -> String {
662 let mut result = String::with_capacity(pattern.len());
663 let chars: Vec<char> = pattern.chars().collect();
664 let mut i = 0usize;
665 let mut in_bracket = false;
666
667 while i < chars.len() {
668 if in_bracket {
669 if chars[i] == ']' {
670 result.push(']');
671 in_bracket = false;
672 i += 1;
673 } else if chars[i] == '[' {
674 result.push('[');
675 i += 1;
676 } else if chars[i] == '\\' && i + 1 < chars.len() {
677 let next = chars[i + 1];
678 if next.is_ascii_alphabetic() {
679 result.push('\\');
680 result.push('\\');
681 result.push(next);
682 } else {
683 result.push('\\');
684 result.push(next);
685 }
686 i += 2;
687 } else {
688 result.push(chars[i]);
689 i += 1;
690 }
691 } else if chars[i] == '[' {
692 result.push('[');
693 in_bracket = true;
694 i += 1;
695 if i < chars.len() && (chars[i] == '^' || chars[i] == '!') {
696 result.push(chars[i]);
697 i += 1;
698 }
699 if i < chars.len() && chars[i] == ']' {
700 result.push(']');
701 i += 1;
702 }
703 } else if chars[i] == '\\' && i + 1 < chars.len() {
704 result.push(chars[i]);
705 result.push(chars[i + 1]);
706 i += 2;
707 } else {
708 result.push(chars[i]);
709 i += 1;
710 }
711 }
712
713 result
714}
715
716fn validate_posix_regex_via_grep(pattern: &str, ignore_case: bool) -> std::io::Result<()> {
717 let mut cmd = Command::new("grep");
718 cmd.arg("-E").arg("-q");
719 if ignore_case {
720 cmd.arg("-i");
721 }
722 cmd.arg("--").arg(pattern).arg("/dev/null");
723 let status = cmd.status()?;
724 if status.success() || status.code() == Some(1) {
725 Ok(())
726 } else {
727 Err(std::io::Error::new(
728 std::io::ErrorKind::InvalidInput,
729 "invalid regex",
730 ))
731 }
732}
733
734fn posix_line_matches(pattern: &str, ignore_case: bool, line: &str) -> bool {
735 let mut cmd = Command::new("grep");
736 cmd.arg("-E").arg("-q");
737 if ignore_case {
738 cmd.arg("-i");
739 }
740 cmd.arg("--").arg(pattern);
741 cmd.stdin(Stdio::piped());
742 cmd.stdout(Stdio::null());
743 cmd.stderr(Stdio::null());
744
745 let Ok(mut child) = cmd.spawn() else {
746 return false;
747 };
748 if let Some(mut stdin) = child.stdin.take() {
749 let _ = stdin.write_all(line.as_bytes());
750 let _ = stdin.write_all(b"\n");
751 }
752
753 child.wait().map(|status| status.success()).unwrap_or(false)
754}