e_utils/system/
regex.rs

1pub use memchr;
2use memchr::memmem;
3
4pub use regex::Regex;
5
6fn split_pattern(pattern: &str) -> Vec<&str> {
7  let mut parts = Vec::new();
8  let mut start = 0;
9
10  for (i, c) in pattern.char_indices() {
11    if c == '*' || c == '~' || c == '$' || c == '!' {
12      if start != i {
13        parts.push(&pattern[start..i]);
14      }
15      parts.push(&pattern[i..i + 1]);
16      start = i + 1;
17    }
18  }
19
20  if start < pattern.len() {
21    parts.push(&pattern[start..]);
22  }
23
24  parts
25}
26/// 在输入字符串中搜索匹配给定模式的行
27///
28/// # 参数
29///
30/// * `input` - 要搜索的输入字符串
31/// * `pattern` - 搜索模式
32///
33/// # 返回值
34///
35/// 返回一个元组 `(bool, Option<&str>)`,其中:
36/// - 布尔值表示是否找到匹配
37/// - `Option<&str>` 包含匹配的子字符串(如果找到)
38///
39/// # 示例
40///
41/// ```
42///  let test_str = "test\nexe \"Number 123 321-\"\n更多测试2\n12";
43/// let inputs = [
44///   (test_str, "!ABC123", true, Some("test\nexe \"Number 123 321-\"\n更多测试2\n12\n1xxx Done W  \"1a2\"\nNNNWD Done R   TEST1")),
45///   (test_str, "!Done R   ", false, Some("NNNWD Done R   TEST1")),
46///   (test_str, "$ Done R   TEST1", true, Some("NNNWD")),
47///   (test_str, "Done W  \"$\"", true, Some("1a2")),
48///   (test_str, "Done R   $", true, Some("TEST1")),
49///   (test_str, "tes~", true, Some("tes")),
50///   (test_str, "tes*", true, Some("test")),
51///   (test_str, "*123*", true, Some("exe \"Number 123 321-\"")),
52///   (test_str, "更多测试2", true, Some("更多测试2")),
53///   (test_str, "*123~", true, Some("exe \"Number 123")),
54///   (test_str, "~123*", true, Some("123 321-\"")),
55///   (test_str, "123*", false, Some(test_str)),
56///   (test_str, "123", false, Some(test_str)),
57///   (test_str, "~123", false, Some(test_str)),
58/// ];
59/// for (input, pattern, expected_match, expected_capture) in &inputs {
60///   let result = regex2(input, pattern);
61///   println!(
62///     "Input: '{}', Pattern: '{}', Result: {:?}",
63///     input, pattern, result
64///   );
65///   assert_eq!(result, (*expected_match, *expected_capture));
66/// }
67/// ```
68pub fn regex2<'a>(input: &'a str, pattern: &'a str) -> (bool, Option<&'a str>) {
69  let parts = split_pattern(pattern);
70  match parts.as_slice() {
71    ["!", suffix] => {
72      for line in input.lines() {
73        if line.contains(suffix) {
74          return (false, Some(line));
75        }
76      }
77      return (true, None);
78    }
79    [_, "!"] => {}
80    _ => {
81      for line in input.lines() {
82        match parts.as_slice() {
83          [] | [_] if line == pattern => return (true, Some(line)),
84          ["$", suffix, "*"] => {
85            if line.contains(suffix) {
86              if let Some((res, _)) = line.split_once(suffix) {
87                return (true, Some(res));
88              }
89            }
90          }
91          ["*", prefix, "$"] => {
92            if line.contains(prefix) {
93              if let Some((_left, right)) = line.split_once(prefix) {
94                return (true, Some(right));
95              }
96            }
97          }
98          [prefix, "$"] => {
99            if let Some((left, right)) = line.split_once(prefix) {
100              if left.is_empty() {
101                return (true, Some(right));
102              }
103            }
104          }
105          ["$", suffix] => {
106            if let Some((left, right)) = line.split_once(suffix) {
107              if right.is_empty() {
108                return (true, Some(left));
109              }
110            }
111          }
112          [prefix, suffix] => {
113            if prefix.contains('*') || prefix.contains('~') {
114              if let Some(pos) = memmem::find(line.as_bytes(), suffix.as_bytes()) {
115                let res = &line[..pos + suffix.len()];
116                if res == line {
117                  return (true, Some(if prefix.contains('*') { res } else { suffix }));
118                }
119              }
120            } else if let Some(pos) = memmem::find(line.as_bytes(), prefix.as_bytes()) {
121              if pos == 0 {
122                let res = &line[..prefix.len()
123                  + if suffix.contains('*') {
124                    line.len() - prefix.len()
125                  } else {
126                    0
127                  }];
128                return (true, Some(res));
129              }
130            }
131          }
132          [prefix, "$", suffix] => {
133            if line.contains(prefix) || line.contains(suffix) {
134              if let Some((_, v)) = line.split_once(prefix) {
135                if let Some((res, _)) = v.split_once(suffix) {
136                  return (true, Some(res));
137                }
138              }
139            }
140          }
141          [prefix, mid, suffix] => {
142            if let Some(pos) = memmem::find(line.as_bytes(), mid.as_bytes()) {
143              let start = if prefix.contains('*') { 0 } else { pos };
144              let end = if suffix.contains('*') {
145                line.len()
146              } else {
147                pos + mid.len()
148              };
149              return (true, Some(&line[start..end]));
150            }
151          }
152          _ => {}
153        }
154      }
155    }
156  }
157  (false, Some(input))
158}
159
160#[cfg(test)]
161mod tests {
162  use super::*;
163  #[test]
164  fn test_regex2_various_inputs() {
165    let test_str =
166      "test\nexe \"Number 123 321-\"\n更多测试2\n12\n1xxx Done W  \"1a2\"\nNNNWD Done R   TEST1\ntest_1_.txt";
167    let inputs = [
168      (test_str, "!ABC123", true, None),
169      (test_str, "tes~", true, Some("tes")),
170      (test_str, "tes*", true, Some("test")),
171      (test_str, "*123*", true, Some("exe \"Number 123 321-\"")),
172      (test_str, "更多测试2", true, Some("更多测试2")),
173      (test_str, "*123~", true, Some("exe \"Number 123")),
174      (test_str, "~123*", true, Some("123 321-\"")),
175      (test_str, "123*", false, Some(test_str)),
176      (test_str, "*.txt", true, Some("test_1_.txt")),
177      (test_str, "*.txt", true, Some("test_1_.txt")),
178      (test_str, "$ Done R   TEST1", true, Some("NNNWD")),
179      (test_str, "Done W  \"$\"", true, Some("1a2")),
180      (test_str, "*WD Done R   $", true, Some("TEST1")),
181      (test_str, "123", false, Some(test_str)),
182      (test_str, "~123", false, Some(test_str)),
183      (test_str, "!Done R   ", false, Some("NNNWD Done R   TEST1")),
184    ];
185
186    for (input, pattern, expected_match, expected_capture) in &inputs {
187      let result = regex2(input, pattern);
188      println!(
189        "Input: '{}', Pattern: '{}', Result: {:?}",
190        input, pattern, result
191      );
192      assert_eq!(result, (*expected_match, *expected_capture));
193    }
194  }
195}