llguidance/
regex_rewrite.rs1fn class_for(c: char) -> Option<&'static str> {
2 match c {
3 'd' => Some("0-9"),
4 'w' => Some("0-9a-zA-Z_"),
5 's' => Some(" \\t\\n\\r\\f\\v"),
6 _ => None,
7 }
8}
9
10pub fn regex_to_lark(rx: &str, use_ascii: &str) -> String {
18 let mut is_q = false;
19 let mut res = String::new();
20 for c in rx.chars() {
21 let prev_q = is_q;
22 is_q = false;
23 match c {
24 '/' => res.push_str("\\/"),
26
27 '\n' => res.push_str("\\n"),
29 '\r' => res.push_str("\\r"),
30 '\t' => res.push_str("\\t"),
31
32 '\\' if !prev_q => {
33 is_q = true;
34 }
35
36 'd' | 'w' | 's' | 'D' | 'W' | 'S' if prev_q => {
37 let c2 = c.to_ascii_lowercase();
38 if use_ascii.contains(c2) {
39 let class = class_for(c2).unwrap();
40 res.push('[');
41 if c != c2 {
42 res.push('^');
43 }
44 res.push_str(class);
45 res.push(']');
46 } else {
47 res.push('\\');
48 res.push(c);
49 }
50 }
51
52 _ => {
53 if prev_q {
54 res.push('\\');
55 }
56 res.push(c);
57 }
58 }
59 }
60 res
61}
62
63#[cfg(test)]
64mod tests {
65 use super::*;
66
67 #[test]
68 fn test_digit_conversion_with_ascii() {
69 assert_eq!(regex_to_lark(r"\d", "d"), "[0-9]");
71 assert_eq!(regex_to_lark(r"\D", "d"), "[^0-9]");
72 }
73
74 #[test]
75 fn test_word_conversion_with_ascii() {
76 assert_eq!(regex_to_lark(r"\w", "w"), "[0-9a-zA-Z_]");
78 assert_eq!(regex_to_lark(r"\W", "w"), "[^0-9a-zA-Z_]");
79 }
80
81 #[test]
82 fn test_space_conversion_with_ascii() {
83 assert_eq!(regex_to_lark(r"\s", "s"), "[ \\t\\n\\r\\f\\v]");
85 assert_eq!(regex_to_lark(r"\S", "s"), "[^ \\t\\n\\r\\f\\v]");
86 }
87
88 #[test]
89 fn test_no_conversion_when_missing_in_use_ascii() {
90 assert_eq!(regex_to_lark(r"\d", ""), r"\d");
92 assert_eq!(regex_to_lark(r"\w", "d"), r"\w");
93 }
94
95 #[test]
96 fn test_escaped_slashes_and_whitespace() {
97 let input = "/a\nb\rc\td";
99 let expected = r"\/a\nb\rc\td";
100 assert_eq!(regex_to_lark(input, "dws"), expected);
101 }
102
103 #[test]
104 fn test_combined_conversions() {
105 let input = r"\d\w\s\D\W\S";
107 let expected = "[0-9][0-9a-zA-Z_][ \\t\\n\\r\\f\\v][^0-9][^0-9a-zA-Z_][^ \\t\\n\\r\\f\\v]";
108 assert_eq!(regex_to_lark(input, "dws"), expected);
109 }
110
111 #[test]
112 fn test_miscellaneous_escapes() {
113 assert_eq!(regex_to_lark(r"\X", ""), r"\X");
115 assert_eq!(regex_to_lark(r"\@", ""), r"\@");
116
117 assert_eq!(regex_to_lark(r"/", ""), r"\/");
119 assert_eq!(regex_to_lark(r"\/", ""), r"\/");
120 assert_eq!(regex_to_lark(r"\//", ""), r"\/\/");
121 assert_eq!(regex_to_lark(r"/\//", ""), r"\/\/\/");
122
123 assert_eq!(regex_to_lark(r"\\", ""), r"\\");
125
126 assert_eq!(regex_to_lark("\"", ""), "\"");
128 assert_eq!(regex_to_lark(r#"a"b"#, ""), r#"a"b"#);
129 }
130}