libsubconverter/utils/
regexp.rs

1//! Regular expression utilities
2//!
3//! This module provides utility functions for working with regular expressions,
4//! similar to the C++ implementation in subconverter.
5
6use regex::{Regex, RegexBuilder};
7
8/// Checks if a regular expression pattern is valid
9///
10/// # Arguments
11///
12/// * `reg` - The regular expression pattern to validate
13///
14/// # Returns
15///
16/// `true` if the pattern is valid, `false` otherwise
17pub fn reg_valid(reg: &str) -> bool {
18    Regex::new(reg).is_ok()
19}
20
21/// Finds if a pattern matches anywhere in the string
22///
23/// # Arguments
24///
25/// * `src` - The source string to search in
26/// * `match_pattern` - The pattern to search for
27///
28/// # Returns
29///
30/// `true` if the pattern is found, `false` otherwise
31pub fn reg_find(src: &str, match_pattern: &str) -> bool {
32    let (pattern, case_insensitive) = if match_pattern.starts_with("(?i)") {
33        (&match_pattern[4..], true)
34    } else {
35        (match_pattern, false)
36    };
37
38    if let Ok(regex) = RegexBuilder::new(pattern)
39        .case_insensitive(case_insensitive)
40        .multi_line(true)
41        .build()
42    {
43        regex.is_match(src)
44    } else {
45        false
46    }
47}
48
49/// Replaces matches of a pattern with a replacement string
50///
51/// # Arguments
52///
53/// * `src` - The source string
54/// * `match_pattern` - The pattern to match
55/// * `rep` - The replacement string
56/// * `global` - Whether to replace all occurrences or just the first one
57/// * `multiline` - Whether to enable multiline mode
58///
59/// # Returns
60///
61/// The string with replacements made
62pub fn reg_replace(
63    src: &str,
64    match_pattern: &str,
65    rep: &str,
66    global: bool,
67    multiline: bool,
68) -> String {
69    let (pattern, case_insensitive) = if match_pattern.starts_with("(?i)") {
70        (&match_pattern[4..], true)
71    } else {
72        (match_pattern, false)
73    };
74
75    if let Ok(regex) = RegexBuilder::new(pattern)
76        .case_insensitive(case_insensitive)
77        .multi_line(multiline)
78        .build()
79    {
80        if global {
81            regex.replace_all(src, rep).to_string()
82        } else {
83            regex.replace(src, rep).to_string()
84        }
85    } else {
86        src.to_string()
87    }
88}
89
90/// Checks if a string fully matches a pattern
91///
92/// # Arguments
93///
94/// * `src` - The source string
95/// * `match_pattern` - The pattern to match
96///
97/// # Returns
98///
99/// `true` if the string fully matches the pattern, `false` otherwise
100pub fn reg_match(src: &str, match_pattern: &str) -> bool {
101    let (pattern, case_insensitive) = if match_pattern.starts_with("(?i)") {
102        (&match_pattern[4..], true)
103    } else {
104        (match_pattern, false)
105    };
106
107    if let Ok(regex) = RegexBuilder::new(&format!("^{}$", pattern))
108        .case_insensitive(case_insensitive)
109        .build()
110    {
111        regex.is_match(src)
112    } else {
113        false
114    }
115}
116
117/// Gets the capturing groups from a regex match
118///
119/// # Arguments
120///
121/// * `src` - The source string
122/// * `match_pattern` - The pattern to match with capturing groups
123///
124/// # Returns
125///
126/// A vector of matched capturing groups, or an empty vector if no match
127pub fn reg_get_match(src: &str, match_pattern: &str) -> Vec<String> {
128    let (pattern, case_insensitive) = if match_pattern.starts_with("(?i)") {
129        (&match_pattern[4..], true)
130    } else {
131        (match_pattern, false)
132    };
133
134    if let Ok(regex) = RegexBuilder::new(pattern)
135        .case_insensitive(case_insensitive)
136        .multi_line(true)
137        .build()
138    {
139        if let Some(caps) = regex.captures(src) {
140            let mut results = Vec::new();
141            for i in 0..caps.len() {
142                if let Some(m) = caps.get(i) {
143                    results.push(m.as_str().to_string());
144                }
145            }
146            results
147        } else {
148            Vec::new()
149        }
150    } else {
151        Vec::new()
152    }
153}
154
155/// Gets all matches for a regex pattern
156///
157/// # Arguments
158///
159/// * `src` - The source string
160/// * `match_pattern` - The pattern to match
161/// * `group_only` - Whether to return only capturing groups
162///
163/// # Returns
164///
165/// A vector of matched strings
166pub fn reg_get_all_match(src: &str, match_pattern: &str, group_only: bool) -> Vec<String> {
167    let (pattern, case_insensitive) = if match_pattern.starts_with("(?i)") {
168        (&match_pattern[4..], true)
169    } else {
170        (match_pattern, false)
171    };
172
173    let mut results = Vec::new();
174
175    if let Ok(regex) = RegexBuilder::new(pattern)
176        .case_insensitive(case_insensitive)
177        .multi_line(true)
178        .build()
179    {
180        if group_only {
181            for caps in regex.captures_iter(src) {
182                // Skip the 0th capture (the full match) when group_only is true
183                for i in 1..caps.len() {
184                    if let Some(m) = caps.get(i) {
185                        results.push(m.as_str().to_string());
186                    }
187                }
188            }
189        } else {
190            for caps in regex.captures_iter(src) {
191                for i in 0..caps.len() {
192                    if let Some(m) = caps.get(i) {
193                        results.push(m.as_str().to_string());
194                    }
195                }
196            }
197        }
198    }
199
200    results
201}
202
203/// Trims whitespace from a string
204///
205/// # Arguments
206///
207/// * `src` - The source string
208///
209/// # Returns
210///
211/// The trimmed string
212pub fn reg_trim(src: &str) -> String {
213    src.trim().to_string()
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_reg_valid() {
222        assert!(reg_valid(r"^\d+$"));
223        assert!(!reg_valid(r"[\d+"));
224    }
225
226    #[test]
227    fn test_reg_find() {
228        assert!(reg_find("hello world", r"world"));
229        assert!(reg_find("HELLO world", r"(?i)hello"));
230        assert!(!reg_find("hello world", r"universe"));
231    }
232
233    #[test]
234    fn test_reg_replace() {
235        assert_eq!(
236            reg_replace("hello world", r"world", "universe", false, false),
237            "hello universe"
238        );
239        assert_eq!(
240            reg_replace("hello world world", r"world", "universe", true, false),
241            "hello universe universe"
242        );
243        assert_eq!(
244            reg_replace("hello world world", r"world", "universe", false, false),
245            "hello universe world"
246        );
247    }
248
249    #[test]
250    fn test_reg_match() {
251        assert!(reg_match("12345", r"^\d+$"));
252        assert!(!reg_match("12345a", r"^\d+$"));
253        assert!(reg_match("HELLO", r"(?i)hello"));
254    }
255
256    #[test]
257    fn test_reg_get_match() {
258        let result = reg_get_match("hello 12345 world", r"(\d+)");
259        assert_eq!(result.len(), 2);
260        assert_eq!(result[0], "12345");
261        assert_eq!(result[1], "12345");
262    }
263
264    #[test]
265    fn test_reg_get_all_match() {
266        let result = reg_get_all_match("hello 123 world 456", r"(\d+)", false);
267        assert_eq!(result.len(), 4); // 2 matches, each with full match and group
268
269        let group_only = reg_get_all_match("hello 123 world 456", r"(\d+)", true);
270        assert_eq!(group_only.len(), 2);
271        assert_eq!(group_only[0], "123");
272        assert_eq!(group_only[1], "456");
273    }
274
275    #[test]
276    fn test_reg_trim() {
277        assert_eq!(reg_trim("  hello world  "), "hello world");
278    }
279}