Skip to main content

rgx/
codegen.rs

1use std::fmt;
2use std::fmt::Write as _;
3
4use crate::engine::EngineFlags;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum Language {
8    Rust,
9    Python,
10    JavaScript,
11    Go,
12    Java,
13    CSharp,
14    Php,
15    Ruby,
16}
17
18pub const ALL_LANGUAGES: &[Language] = &[
19    Language::Rust,
20    Language::Python,
21    Language::JavaScript,
22    Language::Go,
23    Language::Java,
24    Language::CSharp,
25    Language::Php,
26    Language::Ruby,
27];
28
29impl Language {
30    pub const fn all() -> &'static [Self] {
31        ALL_LANGUAGES
32    }
33}
34
35impl fmt::Display for Language {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self {
38            Self::Rust => write!(f, "Rust"),
39            Self::Python => write!(f, "Python"),
40            Self::JavaScript => write!(f, "JavaScript"),
41            Self::Go => write!(f, "Go"),
42            Self::Java => write!(f, "Java"),
43            Self::CSharp => write!(f, "C#"),
44            Self::Php => write!(f, "PHP"),
45            Self::Ruby => write!(f, "Ruby"),
46        }
47    }
48}
49
50pub fn generate_code(lang: &Language, pattern: &str, flags: &EngineFlags) -> String {
51    match lang {
52        Language::Rust => generate_rust(pattern, flags),
53        Language::Python => generate_python(pattern, flags),
54        Language::JavaScript => generate_javascript(pattern, flags),
55        Language::Go => generate_go(pattern, flags),
56        Language::Java => generate_java(pattern, flags),
57        Language::CSharp => generate_csharp(pattern, flags),
58        Language::Php => generate_php(pattern, flags),
59        Language::Ruby => generate_ruby(pattern, flags),
60    }
61}
62
63/// Escape a pattern for use inside a double-quoted string literal.
64fn escape_double_quoted(pattern: &str) -> String {
65    pattern.replace('\\', "\\\\").replace('"', "\\\"")
66}
67
68/// Collect active flag names from a language-specific mapping.
69fn collect_flags<'a>(mapping: &[(&'a str, bool)]) -> Vec<&'a str> {
70    mapping
71        .iter()
72        .filter(|(_, active)| *active)
73        .map(|(name, _)| *name)
74        .collect()
75}
76
77fn generate_rust(pattern: &str, flags: &EngineFlags) -> String {
78    let escaped = escape_double_quoted(pattern);
79    let has_flags = flags.case_insensitive
80        || flags.multi_line
81        || flags.dot_matches_newline
82        || flags.unicode
83        || flags.extended;
84
85    if has_flags {
86        let mut lines = String::from("use regex::RegexBuilder;\n\n");
87        let _ = writeln!(lines, "let re = RegexBuilder::new(r\"{escaped}\")");
88        if flags.case_insensitive {
89            lines.push_str("    .case_insensitive(true)\n");
90        }
91        if flags.multi_line {
92            lines.push_str("    .multi_line(true)\n");
93        }
94        if flags.dot_matches_newline {
95            lines.push_str("    .dot_matches_new_line(true)\n");
96        }
97        if flags.unicode {
98            lines.push_str("    .unicode(true)\n");
99        }
100        if flags.extended {
101            lines.push_str("    .ignore_whitespace(true)\n");
102        }
103        lines.push_str("    .build()\n    .unwrap();\n");
104        lines.push_str(
105            "let matches: Vec<&str> = re.find_iter(text).map(|m| m.as_str()).collect();\n",
106        );
107        lines
108    } else {
109        format!(
110            "use regex::Regex;\n\n\
111             let re = Regex::new(r\"{escaped}\").unwrap();\n\
112             let matches: Vec<&str> = re.find_iter(text).map(|m| m.as_str()).collect();\n"
113        )
114    }
115}
116
117fn generate_python(pattern: &str, flags: &EngineFlags) -> String {
118    let escaped = escape_double_quoted(pattern);
119    let flag_parts = collect_flags(&[
120        ("re.IGNORECASE", flags.case_insensitive),
121        ("re.MULTILINE", flags.multi_line),
122        ("re.DOTALL", flags.dot_matches_newline),
123        ("re.UNICODE", flags.unicode),
124        ("re.VERBOSE", flags.extended),
125    ]);
126
127    if flag_parts.is_empty() {
128        format!(
129            "import re\n\n\
130             pattern = re.compile(r\"{escaped}\")\n\
131             matches = pattern.findall(text)\n"
132        )
133    } else {
134        format!(
135            "import re\n\n\
136             pattern = re.compile(r\"{}\", {})\n\
137             matches = pattern.findall(text)\n",
138            escaped,
139            flag_parts.join(" | ")
140        )
141    }
142}
143
144fn generate_javascript(pattern: &str, flags: &EngineFlags) -> String {
145    let escaped = pattern.replace('/', "\\/");
146    let mut js_flags = String::from("g");
147    if flags.case_insensitive {
148        js_flags.push('i');
149    }
150    if flags.multi_line {
151        js_flags.push('m');
152    }
153    if flags.dot_matches_newline {
154        js_flags.push('s');
155    }
156    if flags.unicode {
157        js_flags.push('u');
158    }
159
160    format!(
161        "const regex = /{escaped}/{js_flags};\n\
162         const matches = [...text.matchAll(regex)];\n"
163    )
164}
165
166fn generate_go(pattern: &str, flags: &EngineFlags) -> String {
167    let escaped = pattern.replace('`', "`+\"`\"+`");
168    let mut inline_flags = String::new();
169    if flags.case_insensitive {
170        inline_flags.push('i');
171    }
172    if flags.multi_line {
173        inline_flags.push('m');
174    }
175    if flags.dot_matches_newline {
176        inline_flags.push('s');
177    }
178    if flags.unicode {
179        inline_flags.push('U');
180    }
181
182    let pattern_str = if inline_flags.is_empty() {
183        format!("`{escaped}`")
184    } else {
185        format!("`(?{inline_flags}){escaped}`")
186    };
187
188    format!(
189        "import \"regexp\"\n\n\
190         re := regexp.MustCompile({pattern_str})\n\
191         matches := re.FindAllString(text, -1)\n"
192    )
193}
194
195fn generate_java(pattern: &str, flags: &EngineFlags) -> String {
196    let escaped = escape_double_quoted(pattern);
197    let flag_parts = collect_flags(&[
198        ("Pattern.CASE_INSENSITIVE", flags.case_insensitive),
199        ("Pattern.MULTILINE", flags.multi_line),
200        ("Pattern.DOTALL", flags.dot_matches_newline),
201        ("Pattern.UNICODE_CHARACTER_CLASS", flags.unicode),
202        ("Pattern.COMMENTS", flags.extended),
203    ]);
204
205    if flag_parts.is_empty() {
206        format!(
207            "import java.util.regex.*;\n\n\
208             Pattern pattern = Pattern.compile(\"{escaped}\");\n\
209             Matcher matcher = pattern.matcher(text);\n\
210             while (matcher.find()) {{\n\
211             \x20   System.out.println(matcher.group());\n\
212             }}\n"
213        )
214    } else {
215        format!(
216            "import java.util.regex.*;\n\n\
217             Pattern pattern = Pattern.compile(\"{}\", {});\n\
218             Matcher matcher = pattern.matcher(text);\n\
219             while (matcher.find()) {{\n\
220             \x20   System.out.println(matcher.group());\n\
221             }}\n",
222            escaped,
223            flag_parts.join(" | ")
224        )
225    }
226}
227
228fn generate_csharp(pattern: &str, flags: &EngineFlags) -> String {
229    let escaped = pattern.replace('"', "\"\"");
230    let flag_parts = collect_flags(&[
231        ("RegexOptions.IgnoreCase", flags.case_insensitive),
232        ("RegexOptions.Multiline", flags.multi_line),
233        ("RegexOptions.Singleline", flags.dot_matches_newline),
234        ("RegexOptions.IgnorePatternWhitespace", flags.extended),
235    ]);
236
237    if flag_parts.is_empty() {
238        format!(
239            "using System.Text.RegularExpressions;\n\n\
240             var regex = new Regex(@\"{escaped}\");\n\
241             var matches = regex.Matches(text);\n"
242        )
243    } else {
244        format!(
245            "using System.Text.RegularExpressions;\n\n\
246             var regex = new Regex(@\"{}\", {});\n\
247             var matches = regex.Matches(text);\n",
248            escaped,
249            flag_parts.join(" | ")
250        )
251    }
252}
253
254fn generate_php(pattern: &str, flags: &EngineFlags) -> String {
255    let escaped = pattern.replace('\'', "\\'").replace('/', "\\/");
256    let php_flags = flags.to_inline_prefix();
257
258    format!(
259        "$pattern = '/{escaped}/{php_flags}';\n\
260         preg_match_all($pattern, $text, $matches);\n"
261    )
262}
263
264fn generate_ruby(pattern: &str, flags: &EngineFlags) -> String {
265    let escaped = pattern.replace('/', "\\/");
266    let mut ruby_flags = String::new();
267    if flags.case_insensitive {
268        ruby_flags.push('i');
269    }
270    if flags.multi_line {
271        ruby_flags.push('m');
272    }
273    if flags.extended {
274        ruby_flags.push('x');
275    }
276
277    format!(
278        "pattern = /{escaped}/{ruby_flags}\n\
279         matches = text.scan(pattern)\n"
280    )
281}