seq_runtime/
regex.rs

1//! Regular expression operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//! Uses Rust's regex crate - fast, safe, no catastrophic backtracking.
5//!
6//! # API
7//!
8//! ```seq
9//! # Match check
10//! "hello world" "wo.ld" regex.match?      # ( String String -- Bool )
11//!
12//! # Find first match
13//! "a1 b2 c3" "[a-z][0-9]" regex.find      # ( String String -- String Bool )
14//!
15//! # Find all matches
16//! "a1 b2 c3" "[a-z][0-9]" regex.find-all  # ( String String -- List )
17//!
18//! # Replace first occurrence
19//! "hello world" "world" "Seq" regex.replace
20//! # ( String pattern replacement -- String )
21//!
22//! # Replace all occurrences
23//! "a1 b2 c3" "[0-9]" "X" regex.replace-all
24//! # ( String pattern replacement -- String )
25//!
26//! # Capture groups
27//! "2024-01-15" "(\d+)-(\d+)-(\d+)" regex.captures
28//! # ( String pattern -- List Bool ) returns ["2024", "01", "15"] true on match
29//!
30//! # Split by pattern
31//! "a1b2c3" "[0-9]" regex.split            # ( String pattern -- List )
32//! ```
33
34use seq_core::seqstring::global_string;
35use seq_core::stack::{Stack, pop, push};
36use seq_core::value::{Value, VariantData};
37
38use regex::Regex;
39use std::sync::Arc;
40
41/// Helper to create a List variant from a vector of values
42fn make_list(items: Vec<Value>) -> Value {
43    Value::Variant(Arc::new(VariantData::new(
44        global_string("List".to_string()),
45        items,
46    )))
47}
48
49/// Check if a pattern matches anywhere in the string
50///
51/// Stack effect: ( String pattern -- Bool )
52///
53/// # Safety
54/// Stack must have two String values on top
55#[unsafe(no_mangle)]
56pub unsafe extern "C" fn patch_seq_regex_match(stack: Stack) -> Stack {
57    assert!(!stack.is_null(), "regex.match?: stack is empty");
58
59    let (stack, pattern_val) = unsafe { pop(stack) };
60    let (stack, text_val) = unsafe { pop(stack) };
61
62    match (text_val, pattern_val) {
63        (Value::String(text), Value::String(pattern)) => {
64            let result = match Regex::new(pattern.as_str()) {
65                Ok(re) => re.is_match(text.as_str()),
66                Err(_) => false, // Invalid regex returns false
67            };
68            unsafe { push(stack, Value::Bool(result)) }
69        }
70        _ => panic!("regex.match?: expected two Strings on stack"),
71    }
72}
73
74/// Find the first match of a pattern in the string
75///
76/// Stack effect: ( String pattern -- String Bool )
77///
78/// Returns the matched text and true on success, empty string and false on no match.
79///
80/// # Safety
81/// Stack must have two String values on top
82#[unsafe(no_mangle)]
83pub unsafe extern "C" fn patch_seq_regex_find(stack: Stack) -> Stack {
84    assert!(!stack.is_null(), "regex.find: stack is empty");
85
86    let (stack, pattern_val) = unsafe { pop(stack) };
87    let (stack, text_val) = unsafe { pop(stack) };
88
89    match (text_val, pattern_val) {
90        (Value::String(text), Value::String(pattern)) => {
91            match Regex::new(pattern.as_str()) {
92                Ok(re) => match re.find(text.as_str()) {
93                    Some(m) => {
94                        let stack = unsafe {
95                            push(stack, Value::String(global_string(m.as_str().to_string())))
96                        };
97                        unsafe { push(stack, Value::Bool(true)) }
98                    }
99                    None => {
100                        let stack =
101                            unsafe { push(stack, Value::String(global_string(String::new()))) };
102                        unsafe { push(stack, Value::Bool(false)) }
103                    }
104                },
105                Err(_) => {
106                    // Invalid regex
107                    let stack = unsafe { push(stack, Value::String(global_string(String::new()))) };
108                    unsafe { push(stack, Value::Bool(false)) }
109                }
110            }
111        }
112        _ => panic!("regex.find: expected two Strings on stack"),
113    }
114}
115
116/// Find all matches of a pattern in the string
117///
118/// Stack effect: ( String pattern -- List )
119///
120/// Returns a list of all matched substrings. Empty list if no matches or invalid regex.
121///
122/// # Safety
123/// Stack must have two String values on top
124#[unsafe(no_mangle)]
125pub unsafe extern "C" fn patch_seq_regex_find_all(stack: Stack) -> Stack {
126    assert!(!stack.is_null(), "regex.find-all: stack is empty");
127
128    let (stack, pattern_val) = unsafe { pop(stack) };
129    let (stack, text_val) = unsafe { pop(stack) };
130
131    match (text_val, pattern_val) {
132        (Value::String(text), Value::String(pattern)) => {
133            let matches: Vec<Value> = match Regex::new(pattern.as_str()) {
134                Ok(re) => re
135                    .find_iter(text.as_str())
136                    .map(|m| Value::String(global_string(m.as_str().to_string())))
137                    .collect(),
138                Err(_) => vec![], // Invalid regex returns empty list
139            };
140            unsafe { push(stack, make_list(matches)) }
141        }
142        _ => panic!("regex.find-all: expected two Strings on stack"),
143    }
144}
145
146/// Replace the first occurrence of a pattern
147///
148/// Stack effect: ( String pattern replacement -- String )
149///
150/// Returns the string with the first match replaced.
151/// If no match or invalid regex, returns the original string.
152///
153/// # Safety
154/// Stack must have three String values on top
155#[unsafe(no_mangle)]
156pub unsafe extern "C" fn patch_seq_regex_replace(stack: Stack) -> Stack {
157    assert!(!stack.is_null(), "regex.replace: stack is empty");
158
159    let (stack, replacement_val) = unsafe { pop(stack) };
160    let (stack, pattern_val) = unsafe { pop(stack) };
161    let (stack, text_val) = unsafe { pop(stack) };
162
163    match (text_val, pattern_val, replacement_val) {
164        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
165            let result = match Regex::new(pattern.as_str()) {
166                Ok(re) => re.replace(text.as_str(), replacement.as_str()).into_owned(),
167                Err(_) => text.as_str().to_string(), // Invalid regex returns original
168            };
169            unsafe { push(stack, Value::String(global_string(result))) }
170        }
171        _ => panic!("regex.replace: expected three Strings on stack"),
172    }
173}
174
175/// Replace all occurrences of a pattern
176///
177/// Stack effect: ( String pattern replacement -- String )
178///
179/// Returns the string with all matches replaced.
180/// If no match or invalid regex, returns the original string.
181///
182/// # Safety
183/// Stack must have three String values on top
184#[unsafe(no_mangle)]
185pub unsafe extern "C" fn patch_seq_regex_replace_all(stack: Stack) -> Stack {
186    assert!(!stack.is_null(), "regex.replace-all: stack is empty");
187
188    let (stack, replacement_val) = unsafe { pop(stack) };
189    let (stack, pattern_val) = unsafe { pop(stack) };
190    let (stack, text_val) = unsafe { pop(stack) };
191
192    match (text_val, pattern_val, replacement_val) {
193        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
194            let result = match Regex::new(pattern.as_str()) {
195                Ok(re) => re
196                    .replace_all(text.as_str(), replacement.as_str())
197                    .into_owned(),
198                Err(_) => text.as_str().to_string(), // Invalid regex returns original
199            };
200            unsafe { push(stack, Value::String(global_string(result))) }
201        }
202        _ => panic!("regex.replace-all: expected three Strings on stack"),
203    }
204}
205
206/// Extract capture groups from a pattern match
207///
208/// Stack effect: ( String pattern -- List Bool )
209///
210/// Returns a list of captured groups (excluding the full match) and true on success.
211/// Returns empty list and false if no match or invalid regex.
212///
213/// # Safety
214/// Stack must have two String values on top
215#[unsafe(no_mangle)]
216pub unsafe extern "C" fn patch_seq_regex_captures(stack: Stack) -> Stack {
217    assert!(!stack.is_null(), "regex.captures: stack is empty");
218
219    let (stack, pattern_val) = unsafe { pop(stack) };
220    let (stack, text_val) = unsafe { pop(stack) };
221
222    match (text_val, pattern_val) {
223        (Value::String(text), Value::String(pattern)) => {
224            match Regex::new(pattern.as_str()) {
225                Ok(re) => match re.captures(text.as_str()) {
226                    Some(caps) => {
227                        // Skip group 0 (full match), collect groups 1..n
228                        let groups: Vec<Value> = caps
229                            .iter()
230                            .skip(1)
231                            .map(|m| match m {
232                                Some(m) => Value::String(global_string(m.as_str().to_string())),
233                                None => Value::String(global_string(String::new())),
234                            })
235                            .collect();
236                        let stack = unsafe { push(stack, make_list(groups)) };
237                        unsafe { push(stack, Value::Bool(true)) }
238                    }
239                    None => {
240                        let stack = unsafe { push(stack, make_list(vec![])) };
241                        unsafe { push(stack, Value::Bool(false)) }
242                    }
243                },
244                Err(_) => {
245                    // Invalid regex
246                    let stack = unsafe { push(stack, make_list(vec![])) };
247                    unsafe { push(stack, Value::Bool(false)) }
248                }
249            }
250        }
251        _ => panic!("regex.captures: expected two Strings on stack"),
252    }
253}
254
255/// Split a string by a pattern
256///
257/// Stack effect: ( String pattern -- List )
258///
259/// Returns a list of substrings split by the pattern.
260/// If invalid regex, returns a single-element list with the original string.
261///
262/// # Safety
263/// Stack must have two String values on top
264#[unsafe(no_mangle)]
265pub unsafe extern "C" fn patch_seq_regex_split(stack: Stack) -> Stack {
266    assert!(!stack.is_null(), "regex.split: stack is empty");
267
268    let (stack, pattern_val) = unsafe { pop(stack) };
269    let (stack, text_val) = unsafe { pop(stack) };
270
271    match (text_val, pattern_val) {
272        (Value::String(text), Value::String(pattern)) => {
273            let parts: Vec<Value> = match Regex::new(pattern.as_str()) {
274                Ok(re) => re
275                    .split(text.as_str())
276                    .map(|s| Value::String(global_string(s.to_string())))
277                    .collect(),
278                Err(_) => {
279                    // Invalid regex returns original as single element
280                    vec![Value::String(global_string(text.as_str().to_string()))]
281                }
282            };
283            unsafe { push(stack, make_list(parts)) }
284        }
285        _ => panic!("regex.split: expected two Strings on stack"),
286    }
287}
288
289/// Check if a pattern is a valid regex
290///
291/// Stack effect: ( String -- Bool )
292///
293/// Returns true if the pattern compiles successfully, false otherwise.
294///
295/// # Safety
296/// Stack must have a String value on top
297#[unsafe(no_mangle)]
298pub unsafe extern "C" fn patch_seq_regex_valid(stack: Stack) -> Stack {
299    assert!(!stack.is_null(), "regex.valid?: stack is empty");
300
301    let (stack, pattern_val) = unsafe { pop(stack) };
302
303    match pattern_val {
304        Value::String(pattern) => {
305            let is_valid = Regex::new(pattern.as_str()).is_ok();
306            unsafe { push(stack, Value::Bool(is_valid)) }
307        }
308        _ => panic!("regex.valid?: expected String on stack"),
309    }
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315    use seq_core::stack::alloc_stack;
316
317    #[test]
318    fn test_regex_match() {
319        let stack = alloc_stack();
320        let stack = unsafe {
321            push(
322                stack,
323                Value::String(global_string("hello world".to_string())),
324            )
325        };
326        let stack = unsafe { push(stack, Value::String(global_string("wo.ld".to_string()))) };
327
328        let stack = unsafe { patch_seq_regex_match(stack) };
329        let (_, value) = unsafe { pop(stack) };
330        assert_eq!(value, Value::Bool(true));
331    }
332
333    #[test]
334    fn test_regex_match_no_match() {
335        let stack = alloc_stack();
336        let stack = unsafe { push(stack, Value::String(global_string("hello".to_string()))) };
337        let stack = unsafe { push(stack, Value::String(global_string("xyz".to_string()))) };
338
339        let stack = unsafe { patch_seq_regex_match(stack) };
340        let (_, value) = unsafe { pop(stack) };
341        assert_eq!(value, Value::Bool(false));
342    }
343
344    #[test]
345    fn test_regex_find() {
346        let stack = alloc_stack();
347        let stack = unsafe { push(stack, Value::String(global_string("a1 b2 c3".to_string()))) };
348        let stack = unsafe {
349            push(
350                stack,
351                Value::String(global_string("[a-z][0-9]".to_string())),
352            )
353        };
354
355        let stack = unsafe { patch_seq_regex_find(stack) };
356        let (stack, success) = unsafe { pop(stack) };
357        let (_, matched) = unsafe { pop(stack) };
358
359        assert_eq!(success, Value::Bool(true));
360        if let Value::String(s) = matched {
361            assert_eq!(s.as_str(), "a1");
362        } else {
363            panic!("expected String");
364        }
365    }
366
367    #[test]
368    fn test_regex_find_all() {
369        let stack = alloc_stack();
370        let stack = unsafe { push(stack, Value::String(global_string("a1 b2 c3".to_string()))) };
371        let stack = unsafe {
372            push(
373                stack,
374                Value::String(global_string("[a-z][0-9]".to_string())),
375            )
376        };
377
378        let stack = unsafe { patch_seq_regex_find_all(stack) };
379        let (_, list_val) = unsafe { pop(stack) };
380
381        if let Value::Variant(v) = list_val {
382            assert_eq!(v.fields.len(), 3);
383            if let Value::String(s) = &v.fields[0] {
384                assert_eq!(s.as_str(), "a1");
385            }
386            if let Value::String(s) = &v.fields[1] {
387                assert_eq!(s.as_str(), "b2");
388            }
389            if let Value::String(s) = &v.fields[2] {
390                assert_eq!(s.as_str(), "c3");
391            }
392        } else {
393            panic!("expected Variant (List)");
394        }
395    }
396
397    #[test]
398    fn test_regex_replace() {
399        let stack = alloc_stack();
400        let stack = unsafe {
401            push(
402                stack,
403                Value::String(global_string("hello world".to_string())),
404            )
405        };
406        let stack = unsafe { push(stack, Value::String(global_string("world".to_string()))) };
407        let stack = unsafe { push(stack, Value::String(global_string("Seq".to_string()))) };
408
409        let stack = unsafe { patch_seq_regex_replace(stack) };
410        let (_, result) = unsafe { pop(stack) };
411
412        if let Value::String(s) = result {
413            assert_eq!(s.as_str(), "hello Seq");
414        } else {
415            panic!("expected String");
416        }
417    }
418
419    #[test]
420    fn test_regex_replace_all() {
421        let stack = alloc_stack();
422        let stack = unsafe { push(stack, Value::String(global_string("a1 b2 c3".to_string()))) };
423        let stack = unsafe { push(stack, Value::String(global_string("[0-9]".to_string()))) };
424        let stack = unsafe { push(stack, Value::String(global_string("X".to_string()))) };
425
426        let stack = unsafe { patch_seq_regex_replace_all(stack) };
427        let (_, result) = unsafe { pop(stack) };
428
429        if let Value::String(s) = result {
430            assert_eq!(s.as_str(), "aX bX cX");
431        } else {
432            panic!("expected String");
433        }
434    }
435
436    #[test]
437    fn test_regex_captures() {
438        let stack = alloc_stack();
439        let stack = unsafe {
440            push(
441                stack,
442                Value::String(global_string("2024-01-15".to_string())),
443            )
444        };
445        let stack = unsafe {
446            push(
447                stack,
448                Value::String(global_string(r"(\d+)-(\d+)-(\d+)".to_string())),
449            )
450        };
451
452        let stack = unsafe { patch_seq_regex_captures(stack) };
453        let (stack, success) = unsafe { pop(stack) };
454        let (_, groups) = unsafe { pop(stack) };
455
456        assert_eq!(success, Value::Bool(true));
457        if let Value::Variant(v) = groups {
458            assert_eq!(v.fields.len(), 3);
459            if let Value::String(s) = &v.fields[0] {
460                assert_eq!(s.as_str(), "2024");
461            }
462            if let Value::String(s) = &v.fields[1] {
463                assert_eq!(s.as_str(), "01");
464            }
465            if let Value::String(s) = &v.fields[2] {
466                assert_eq!(s.as_str(), "15");
467            }
468        } else {
469            panic!("expected Variant (List)");
470        }
471    }
472
473    #[test]
474    fn test_regex_split() {
475        let stack = alloc_stack();
476        let stack = unsafe { push(stack, Value::String(global_string("a1b2c3".to_string()))) };
477        let stack = unsafe { push(stack, Value::String(global_string("[0-9]".to_string()))) };
478
479        let stack = unsafe { patch_seq_regex_split(stack) };
480        let (_, result) = unsafe { pop(stack) };
481
482        if let Value::Variant(v) = result {
483            assert_eq!(v.fields.len(), 4); // "a", "b", "c", ""
484            if let Value::String(s) = &v.fields[0] {
485                assert_eq!(s.as_str(), "a");
486            }
487            if let Value::String(s) = &v.fields[1] {
488                assert_eq!(s.as_str(), "b");
489            }
490            if let Value::String(s) = &v.fields[2] {
491                assert_eq!(s.as_str(), "c");
492            }
493        } else {
494            panic!("expected Variant (List)");
495        }
496    }
497
498    #[test]
499    fn test_regex_valid() {
500        let stack = alloc_stack();
501        let stack = unsafe { push(stack, Value::String(global_string("[a-z]+".to_string()))) };
502
503        let stack = unsafe { patch_seq_regex_valid(stack) };
504        let (_, result) = unsafe { pop(stack) };
505        assert_eq!(result, Value::Bool(true));
506
507        // Invalid regex
508        let stack = alloc_stack();
509        let stack = unsafe { push(stack, Value::String(global_string("[invalid".to_string()))) };
510
511        let stack = unsafe { patch_seq_regex_valid(stack) };
512        let (_, result) = unsafe { pop(stack) };
513        assert_eq!(result, Value::Bool(false));
514    }
515
516    #[test]
517    fn test_invalid_regex_graceful() {
518        // Invalid regex should return false, not panic
519        let stack = alloc_stack();
520        let stack = unsafe { push(stack, Value::String(global_string("test".to_string()))) };
521        let stack = unsafe { push(stack, Value::String(global_string("[invalid".to_string()))) };
522
523        let stack = unsafe { patch_seq_regex_match(stack) };
524        let (_, result) = unsafe { pop(stack) };
525        assert_eq!(result, Value::Bool(false));
526    }
527}