rust_rule_engine/parser/
grl_helpers.rs

1/// GRL Parser helpers using literal search instead of regex
2/// Provides fast parsing for GRL syntax without regex overhead
3use super::literal_search;
4
5/// Parse "rule Name" or "rule "Quoted Name"" and extract the name
6pub fn parse_rule_header(text: &str) -> Option<(String, usize)> {
7    let trimmed = text.trim_start();
8
9    if !trimmed.starts_with("rule") {
10        return None;
11    }
12
13    let skip = text.len() - trimmed.len(); // whitespace before "rule"
14    let after_rule = trimmed[4..].trim_start();
15
16    // Try quoted name first
17    if after_rule.starts_with('"') {
18        if let Some(end_quote) = memchr::memchr(b'"', &after_rule.as_bytes()[1..]) {
19            let name = after_rule[1..end_quote + 1].to_string();
20            let consumed = skip + 4 + (trimmed[4..].len() - after_rule.len()) + end_quote + 2;
21            return Some((name, consumed));
22        }
23    }
24
25    // Try identifier
26    if let Some(ident) = literal_search::extract_identifier(after_rule) {
27        let consumed = skip + 4 + (trimmed[4..].len() - after_rule.len()) + ident.len();
28        return Some((ident, consumed));
29    }
30
31    None
32}
33
34/// Split GRL text into individual rules using literal "rule" and brace matching
35pub fn split_into_rules(grl_text: &str) -> Vec<String> {
36    let mut rules = Vec::new();
37    let bytes = grl_text.as_bytes();
38    let mut i = 0;
39
40    while i < bytes.len() {
41        // Find "rule "
42        if let Some(rule_pos) = memchr::memmem::find(&bytes[i..], b"rule ") {
43            let abs_pos = i + rule_pos;
44
45            // Find the opening brace
46            if let Some(brace_pos) = memchr::memchr(b'{', &bytes[abs_pos..]) {
47                let brace_abs = abs_pos + brace_pos;
48
49                // Find matching closing brace
50                if let Some(close_pos) = literal_search::find_matching_brace(grl_text, brace_abs) {
51                    let rule_text = &grl_text[abs_pos..=close_pos];
52                    rules.push(rule_text.to_string());
53                    i = close_pos + 1;
54                    continue;
55                }
56            }
57        }
58        i += 1;
59    }
60
61    rules
62}
63
64/// Parse "when ... then ..." and extract condition and action parts
65pub fn parse_when_then(body: &str) -> Option<(String, String)> {
66    let trimmed = body.trim();
67
68    // Find "when"
69    let when_pos = literal_search::find_literal(trimmed, "when")?;
70    let after_when = trimmed[when_pos + 4..].trim_start();
71
72    // Find "then" (need to be careful with nested structures)
73    let then_pos = find_then_keyword(after_when)?;
74
75    let condition = after_when[..then_pos].trim().to_string();
76    let action = after_when[then_pos + 4..].trim().to_string();
77
78    Some((condition, action))
79}
80
81/// Find "then" keyword at the correct nesting level
82fn find_then_keyword(text: &str) -> Option<usize> {
83    let bytes = text.as_bytes();
84    let mut in_string = false;
85    let mut escape_next = false;
86    let mut paren_depth = 0;
87    let mut brace_depth = 0;
88
89    let mut i = 0;
90    while i < bytes.len() {
91        if escape_next {
92            escape_next = false;
93            i += 1;
94            continue;
95        }
96
97        match bytes[i] {
98            b'\\' if in_string => escape_next = true,
99            b'"' => in_string = !in_string,
100            b'(' if !in_string => paren_depth += 1,
101            b')' if !in_string => paren_depth -= 1,
102            b'{' if !in_string => brace_depth += 1,
103            b'}' if !in_string => brace_depth -= 1,
104            b't' if !in_string && paren_depth == 0 && brace_depth == 0 => {
105                // Check if this is "then"
106                if i + 4 <= bytes.len() && &bytes[i..i + 4] == b"then" {
107                    // Make sure it's a word boundary
108                    let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
109                    let after_ok = i + 4 >= bytes.len() || !bytes[i + 4].is_ascii_alphanumeric();
110                    if before_ok && after_ok {
111                        return Some(i);
112                    }
113                }
114            }
115            _ => {}
116        }
117        i += 1;
118    }
119
120    None
121}
122
123/// Extract salience value from attributes section
124pub fn extract_salience(attributes: &str) -> Option<i32> {
125    // Find "salience"
126    let salience_pos = literal_search::find_literal(attributes, "salience")?;
127    let after_salience = attributes[salience_pos + 8..].trim_start();
128
129    // Extract digits
130    let digits: String = after_salience
131        .chars()
132        .take_while(|c| c.is_ascii_digit())
133        .collect();
134
135    digits.parse().ok()
136}
137
138/// Parse defmodule declaration
139pub fn parse_defmodule(text: &str) -> Option<(String, String, usize)> {
140    let trimmed = text.trim_start();
141
142    if !trimmed.starts_with("defmodule") {
143        return None;
144    }
145
146    let after_defmodule = trimmed[9..].trim_start();
147
148    // Extract module name (must start with uppercase)
149    let name_end = after_defmodule
150        .chars()
151        .position(|c| !c.is_alphanumeric() && c != '_')?;
152
153    let name = after_defmodule[..name_end].to_string();
154
155    // Check if first char is uppercase
156    if !name.chars().next()?.is_uppercase() {
157        return None;
158    }
159
160    // Find opening brace
161    let rest = after_defmodule[name_end..].trim_start();
162    if !rest.starts_with('{') {
163        return None;
164    }
165
166    let brace_pos = trimmed.len() - rest.len();
167
168    // Find matching closing brace
169    let close_pos = literal_search::find_matching_brace(trimmed, brace_pos)?;
170
171    let body = trimmed[brace_pos + 1..close_pos].to_string();
172    let consumed = close_pos + 1;
173
174    Some((name, body, consumed))
175}
176
177/// Split defmodule declarations from rules
178pub fn split_modules_and_rules(grl_text: &str) -> (Vec<String>, String) {
179    let mut modules = Vec::new();
180    let mut rules_text = String::new();
181    let bytes = grl_text.as_bytes();
182    let mut i = 0;
183    let mut last_copy = 0;
184
185    while i < bytes.len() {
186        // Find "defmodule "
187        if let Some(defmodule_pos) = memchr::memmem::find(&bytes[i..], b"defmodule ") {
188            let abs_pos = i + defmodule_pos;
189
190            // Copy text before defmodule to rules
191            if abs_pos > last_copy {
192                rules_text.push_str(&grl_text[last_copy..abs_pos]);
193            }
194
195            // Find the opening brace
196            if let Some(brace_pos) = memchr::memchr(b'{', &bytes[abs_pos..]) {
197                let brace_abs = abs_pos + brace_pos;
198
199                // Find matching closing brace
200                if let Some(close_pos) = literal_search::find_matching_brace(grl_text, brace_abs) {
201                    let module_text = &grl_text[abs_pos..=close_pos];
202                    modules.push(module_text.to_string());
203                    i = close_pos + 1;
204                    last_copy = i;
205                    continue;
206                }
207            }
208        }
209        i += 1;
210    }
211
212    // Copy remaining text
213    if last_copy < grl_text.len() {
214        rules_text.push_str(&grl_text[last_copy..]);
215    }
216
217    (modules, rules_text)
218}
219
220/// Parse comparison operator from text
221pub fn parse_operator(text: &str) -> Option<(&str, usize)> {
222    let trimmed = text.trim_start();
223
224    // Check two-character operators first
225    if trimmed.len() >= 2 {
226        match &trimmed[..2] {
227            ">=" => return Some((">=", 2)),
228            "<=" => return Some(("<=", 2)),
229            "==" => return Some(("==", 2)),
230            "!=" => return Some(("!=", 2)),
231            _ => {}
232        }
233    }
234
235    // Check single-character operators
236    if let Some(first) = trimmed.chars().next() {
237        match first {
238            '>' => return Some((">", 1)),
239            '<' => return Some(("<", 1)),
240            _ => {}
241        }
242    }
243
244    // Check keyword operators
245    if trimmed.starts_with("contains") {
246        return Some(("contains", 8));
247    }
248    if trimmed.starts_with("matches") {
249        return Some(("matches", 7));
250    }
251
252    None
253}
254
255/// Check if text contains attribute keyword
256pub fn has_attribute(text: &str, attr: &str) -> bool {
257    // Use word boundary check
258    let bytes = text.as_bytes();
259    let attr_bytes = attr.as_bytes();
260
261    if let Some(pos) = memchr::memmem::find(bytes, attr_bytes) {
262        // Check word boundaries
263        let before_ok = pos == 0 || !bytes[pos - 1].is_ascii_alphanumeric();
264        let after_pos = pos + attr_bytes.len();
265        let after_ok = after_pos >= bytes.len() || !bytes[after_pos].is_ascii_alphanumeric();
266
267        return before_ok && after_ok;
268    }
269
270    false
271}
272
273/// Extract date-effective or date-expires value
274pub fn extract_date_attribute(text: &str, attr_name: &str) -> Option<String> {
275    let attr_pos = literal_search::find_literal(text, attr_name)?;
276    let after_attr = text[attr_pos + attr_name.len()..].trim_start();
277
278    // Expect format: "YYYY-MM-DD HH:MM:SS" or similar
279    // Find the quoted string
280    if after_attr.starts_with('"') {
281        if let Some(end_quote) = memchr::memchr(b'"', &after_attr.as_bytes()[1..]) {
282            return Some(after_attr[1..end_quote + 1].to_string());
283        }
284    }
285
286    None
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn test_parse_rule_header() {
295        let (name, consumed) = parse_rule_header(r#"rule "MyRule" {"#).unwrap();
296        assert_eq!(name, "MyRule");
297        assert!(consumed > 0); // Just check it's reasonable
298
299        let (name2, consumed2) = parse_rule_header("rule SimpleRule {").unwrap();
300        assert_eq!(name2, "SimpleRule");
301        assert!(consumed2 > 0);
302    }
303
304    #[test]
305    fn test_parse_when_then() {
306        let body = "when X > 5 then Y = 10";
307        if let Some((cond, action)) = parse_when_then(body) {
308            assert_eq!(cond, "X > 5");
309            assert_eq!(action, "Y = 10");
310        } else {
311            panic!("Failed to parse when-then");
312        }
313    }
314
315    #[test]
316    fn test_extract_salience() {
317        assert_eq!(extract_salience("salience 10"), Some(10));
318        assert_eq!(extract_salience("salience  42  "), Some(42));
319        assert_eq!(extract_salience("no salience here"), None);
320    }
321
322    #[test]
323    fn test_parse_operator() {
324        assert_eq!(parse_operator(">="), Some((">=", 2)));
325        assert_eq!(parse_operator("  <= "), Some(("<=", 2)));
326        assert_eq!(parse_operator("contains"), Some(("contains", 8)));
327        assert_eq!(parse_operator("> 5"), Some((">", 1)));
328    }
329
330    #[test]
331    fn test_has_attribute() {
332        assert!(has_attribute("no-loop lock-on-active", "no-loop"));
333        assert!(has_attribute("salience 10 no-loop", "no-loop"));
334        assert!(!has_attribute("no-loops", "no-loop")); // Should not match partial
335    }
336
337    #[test]
338    fn test_split_into_rules() {
339        let grl = r#"
340rule "Rule1" { when X > 5 then Y = 10 }
341rule "Rule2" { when A < 3 then B = 7 }
342        "#;
343        let rules = split_into_rules(grl);
344        assert_eq!(rules.len(), 2);
345        assert!(rules[0].contains("Rule1"));
346        assert!(rules[1].contains("Rule2"));
347    }
348
349    #[test]
350    fn test_parse_defmodule() {
351        let text = "defmodule MYMODULE { export: all }";
352        if let Some((name, body, _)) = parse_defmodule(text) {
353            assert_eq!(name, "MYMODULE");
354            assert!(body.contains("export"));
355        } else {
356            panic!("Failed to parse defmodule");
357        }
358    }
359}