Skip to main content

oxihuman_core/
pattern_match.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5/// Simple glob-style pattern matching (supports `*` and `?`).
6#[allow(dead_code)]
7pub fn glob_match(pattern: &str, text: &str) -> bool {
8    let p: Vec<char> = pattern.chars().collect();
9    let t: Vec<char> = text.chars().collect();
10    glob_match_inner(&p, &t)
11}
12
13fn glob_match_inner(p: &[char], t: &[char]) -> bool {
14    match (p.first(), t.first()) {
15        (None, None) => true,
16        (None, Some(_)) => false,
17        (Some(&'*'), _) => {
18            // Try skipping the '*' or consuming one char from text
19            glob_match_inner(&p[1..], t) || (!t.is_empty() && glob_match_inner(p, &t[1..]))
20        }
21        (Some(&'?'), Some(_)) => glob_match_inner(&p[1..], &t[1..]),
22        (Some(pc), Some(tc)) if pc == tc => glob_match_inner(&p[1..], &t[1..]),
23        _ => false,
24    }
25}
26
27/// Case-insensitive glob match.
28#[allow(dead_code)]
29pub fn glob_match_ci(pattern: &str, text: &str) -> bool {
30    glob_match(&pattern.to_lowercase(), &text.to_lowercase())
31}
32
33/// Check if text has prefix.
34#[allow(dead_code)]
35pub fn has_prefix(text: &str, prefix: &str) -> bool {
36    text.starts_with(prefix)
37}
38
39/// Check if text has suffix.
40#[allow(dead_code)]
41pub fn has_suffix(text: &str, suffix: &str) -> bool {
42    text.ends_with(suffix)
43}
44
45/// Count matches of a fixed literal in text.
46#[allow(dead_code)]
47pub fn count_occurrences(text: &str, needle: &str) -> usize {
48    if needle.is_empty() {
49        return 0;
50    }
51    let mut count = 0;
52    let mut start = 0;
53    while let Some(pos) = text[start..].find(needle) {
54        count += 1;
55        start += pos + needle.len();
56    }
57    count
58}
59
60/// Extract the portion of text matching between two delimiters.
61#[allow(dead_code)]
62pub fn extract_between<'a>(text: &'a str, open: &str, close: &str) -> Option<&'a str> {
63    let start = text.find(open).map(|i| i + open.len())?;
64    let end = text[start..].find(close).map(|i| i + start)?;
65    Some(&text[start..end])
66}
67
68/// Simple tokenizer splitting on whitespace.
69#[allow(dead_code)]
70pub fn tokenize(text: &str) -> Vec<&str> {
71    text.split_whitespace().collect()
72}
73
74/// Replace all occurrences of `from` with `to`.
75#[allow(dead_code)]
76pub fn replace_all(text: &str, from: &str, to: &str) -> String {
77    text.replace(from, to)
78}
79
80/// Filter lines matching a glob pattern.
81#[allow(dead_code)]
82pub fn grep_lines<'a>(lines: &[&'a str], pattern: &str) -> Vec<&'a str> {
83    lines
84        .iter()
85        .copied()
86        .filter(|l| glob_match(pattern, l))
87        .collect()
88}
89
90#[allow(dead_code)]
91pub struct PatternMatcher {
92    patterns: Vec<String>,
93}
94
95#[allow(dead_code)]
96impl PatternMatcher {
97    pub fn new() -> Self {
98        Self {
99            patterns: Vec::new(),
100        }
101    }
102    pub fn add(&mut self, pat: &str) {
103        self.patterns.push(pat.to_string());
104    }
105    pub fn matches_any(&self, text: &str) -> bool {
106        self.patterns.iter().any(|p| glob_match(p, text))
107    }
108    pub fn matches_all(&self, text: &str) -> bool {
109        self.patterns.iter().all(|p| glob_match(p, text))
110    }
111    pub fn count(&self) -> usize {
112        self.patterns.len()
113    }
114    pub fn is_empty(&self) -> bool {
115        self.patterns.is_empty()
116    }
117    pub fn clear(&mut self) {
118        self.patterns.clear();
119    }
120}
121
122impl Default for PatternMatcher {
123    fn default() -> Self {
124        Self::new()
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    #[test]
132    fn test_star_matches_any() {
133        assert!(glob_match("*.rs", "main.rs"));
134        assert!(!glob_match("*.rs", "main.txt"));
135    }
136    #[test]
137    fn test_question_mark() {
138        assert!(glob_match("te?t", "test"));
139        assert!(!glob_match("te?t", "teat2"));
140    }
141    #[test]
142    fn test_exact_match() {
143        assert!(glob_match("hello", "hello"));
144        assert!(!glob_match("hello", "world"));
145    }
146    #[test]
147    fn test_glob_ci() {
148        assert!(glob_match_ci("*.RS", "main.rs"));
149    }
150    #[test]
151    fn test_count_occurrences() {
152        assert_eq!(count_occurrences("abcabcabc", "abc"), 3);
153        assert_eq!(count_occurrences("aaa", "b"), 0);
154    }
155    #[test]
156    fn test_extract_between() {
157        assert_eq!(extract_between("foo(bar)baz", "(", ")"), Some("bar"));
158        assert_eq!(extract_between("no delimiters", "(", ")"), None);
159    }
160    #[test]
161    fn test_tokenize() {
162        let tokens = tokenize("  hello   world  ");
163        assert_eq!(tokens, vec!["hello", "world"]);
164    }
165    #[test]
166    fn test_replace_all() {
167        assert_eq!(replace_all("aXbXc", "X", "-"), "a-b-c");
168    }
169    #[test]
170    fn test_pattern_matcher_any() {
171        let mut m = PatternMatcher::new();
172        m.add("*.rs");
173        m.add("*.toml");
174        assert!(m.matches_any("Cargo.toml"));
175        assert!(!m.matches_any("image.png"));
176    }
177    #[test]
178    fn test_has_prefix_suffix() {
179        assert!(has_prefix("hello_world", "hello"));
180        assert!(has_suffix("hello_world", "world"));
181    }
182}