e_utils/system/
regex.rs

use memchr::memmem;
pub use regex::Regex;

fn split_pattern(pattern: &str) -> Vec<&str> {
  let mut parts = Vec::new();
  let mut start = 0;

  for (i, c) in pattern.char_indices() {
    if c == '*' || c == '~' || c == '$' || c == '!' {
      if start != i {
        parts.push(&pattern[start..i]);
      }
      parts.push(&pattern[i..i + 1]);
      start = i + 1;
    }
  }

  if start < pattern.len() {
    parts.push(&pattern[start..]);
  }

  parts
}
/// 在输入字符串中搜索匹配给定模式的行
///
/// # 参数
///
/// * `input` - 要搜索的输入字符串
/// * `pattern` - 搜索模式
///
/// # 返回值
///
/// 返回一个元组 `(bool, Option<&str>)`，其中：
/// - 布尔值表示是否找到匹配
/// - `Option<&str>` 包含匹配的子字符串（如果找到）
///
/// # 示例
///
/// ```
///  let test_str = "test\nexe \"Number 123 321-\"\n更多测试2\n12";
/// let inputs = [
///   (test_str, "!ABC123", true, Some("test\nexe \"Number 123 321-\"\n更多测试2\n12\n1xxx Done W  \"1a2\"\nNNNWD Done R   TEST1")),
///   (test_str, "!Done R   ", false, Some("NNNWD Done R   TEST1")),
///   (test_str, "$ Done R   TEST1", true, Some("NNNWD")),
///   (test_str, "Done W  \"$\"", true, Some("1a2")),
///   (test_str, "Done R   $", true, Some("TEST1")),
///   (test_str, "tes~", true, Some("tes")),
///   (test_str, "tes*", true, Some("test")),
///   (test_str, "*123*", true, Some("exe \"Number 123 321-\"")),
///   (test_str, "更多测试2", true, Some("更多测试2")),
///   (test_str, "*123~", true, Some("exe \"Number 123")),
///   (test_str, "~123*", true, Some("123 321-\"")),
///   (test_str, "123*", false, Some(test_str)),
///   (test_str, "123", false, Some(test_str)),
///   (test_str, "~123", false, Some(test_str)),
/// ];
/// for (input, pattern, expected_match, expected_capture) in &inputs {
///   let result = regex2(input, pattern);
///   println!(
///     "Input: '{}', Pattern: '{}', Result: {:?}",
///     input, pattern, result
///   );
///   assert_eq!(result, (*expected_match, *expected_capture));
/// }
/// ```
pub fn regex2<'a>(input: &'a str, pattern: &'a str) -> (bool, Option<&'a str>) {
  let parts = split_pattern(pattern);
  match parts.as_slice() {
    ["!", suffix] => {
      for line in input.lines() {
        if line.contains(suffix) {
          return (false, Some(line));
        }
      }
      return (true, None);
    }
    [_, "!"] => {}
    _ => {
      for line in input.lines() {
        match parts.as_slice() {
          [] | [_] if line == pattern => return (true, Some(line)),
          ["$", suffix, "*"] => {
            if line.contains(suffix) {
              if let Some((res, _)) = line.split_once(suffix) {
                return (true, Some(res));
              }
            }
          }
          ["*", prefix, "$"] => {
            if line.contains(prefix) {
              if let Some((_left, right)) = line.split_once(prefix) {
                return (true, Some(right));
              }
            }
          }
          [prefix, "$"] => {
            if let Some((left, right)) = line.split_once(prefix) {
              if left.is_empty() {
                return (true, Some(right));
              }
            }
          }
          ["$", suffix] => {
            if let Some((left, right)) = line.split_once(suffix) {
              if right.is_empty() {
                return (true, Some(left));
              }
            }
          }
          [prefix, suffix] => {
            if prefix.contains('*') || prefix.contains('~') {
              if let Some(pos) = memmem::find(line.as_bytes(), suffix.as_bytes()) {
                let res = &line[..pos + suffix.len()];
                if res == line {
                  return (true, Some(if prefix.contains('*') { res } else { suffix }));
                }
              }
            } else if let Some(pos) = memmem::find(line.as_bytes(), prefix.as_bytes()) {
              if pos == 0 {
                let res = &line[..prefix.len()
                  + if suffix.contains('*') {
                    line.len() - prefix.len()
                  } else {
                    0
                  }];
                return (true, Some(res));
              }
            }
          }
          [prefix, "$", suffix] => {
            if line.contains(prefix) || line.contains(suffix) {
              if let Some((_, v)) = line.split_once(prefix) {
                if let Some((res, _)) = v.split_once(suffix) {
                  return (true, Some(res));
                }
              }
            }
          }
          [prefix, mid, suffix] => {
            if let Some(pos) = memmem::find(line.as_bytes(), mid.as_bytes()) {
              let start = if prefix.contains('*') { 0 } else { pos };
              let end = if suffix.contains('*') {
                line.len()
              } else {
                pos + mid.len()
              };
              return (true, Some(&line[start..end]));
            }
          }
          _ => {}
        }
      }
    }
  }
  (false, Some(input))
}

#[cfg(test)]
mod tests {
  use super::*;
  #[test]
  fn test_regex2_various_inputs() {
    let test_str =
      "test\nexe \"Number 123 321-\"\n更多测试2\n12\n1xxx Done W  \"1a2\"\nNNNWD Done R   TEST1\ntest_1_.txt";
    let inputs = [
      (test_str, "!ABC123", true, None),
      (test_str, "tes~", true, Some("tes")),
      (test_str, "tes*", true, Some("test")),
      (test_str, "*123*", true, Some("exe \"Number 123 321-\"")),
      (test_str, "更多测试2", true, Some("更多测试2")),
      (test_str, "*123~", true, Some("exe \"Number 123")),
      (test_str, "~123*", true, Some("123 321-\"")),
      (test_str, "123*", false, Some(test_str)),
      (test_str, "*.txt", true, Some("test_1_.txt")),
      (test_str, "*.txt", true, Some("test_1_.txt")),
      (test_str, "$ Done R   TEST1", true, Some("NNNWD")),
      (test_str, "Done W  \"$\"", true, Some("1a2")),
      (test_str, "*WD Done R   $", true, Some("TEST1")),
      (test_str, "123", false, Some(test_str)),
      (test_str, "~123", false, Some(test_str)),
      (test_str, "!Done R   ", false, Some("NNNWD Done R   TEST1")),
    ];

    for (input, pattern, expected_match, expected_capture) in &inputs {
      let result = regex2(input, pattern);
      println!(
        "Input: '{}', Pattern: '{}', Result: {:?}",
        input, pattern, result
      );
      assert_eq!(result, (*expected_match, *expected_capture));
    }
  }
}
e_utils/system/regex.rs

e_utils/system/
regex.rs