seq_runtime/
regex.rs

1//! Regular expression operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//! Uses Rust's regex crate - fast, safe, no catastrophic backtracking.
5//!
6//! # API
7//!
8//! ```seq
9//! # Match check
10//! "hello world" "wo.ld" regex.match?      # ( String String -- Bool )
11//!
12//! # Find first match
13//! "a1 b2 c3" "[a-z][0-9]" regex.find      # ( String String -- String Bool )
14//!
15//! # Find all matches
16//! "a1 b2 c3" "[a-z][0-9]" regex.find-all  # ( String String -- List )
17//!
18//! # Replace first occurrence
19//! "hello world" "world" "Seq" regex.replace
20//! # ( String pattern replacement -- String )
21//!
22//! # Replace all occurrences
23//! "a1 b2 c3" "[0-9]" "X" regex.replace-all
24//! # ( String pattern replacement -- String )
25//!
26//! # Capture groups
27//! "2024-01-15" "(\d+)-(\d+)-(\d+)" regex.captures
28//! # ( String pattern -- List Bool ) returns ["2024", "01", "15"] true on match
29//!
30//! # Split by pattern
31//! "a1b2c3" "[0-9]" regex.split            # ( String pattern -- List )
32//! ```
33
34use seq_core::seqstring::global_string;
35use seq_core::stack::{Stack, pop, push};
36use seq_core::value::{Value, VariantData};
37
38use regex::Regex;
39use std::sync::Arc;
40
41/// Helper to create a List variant from a vector of values
42fn make_list(items: Vec<Value>) -> Value {
43    Value::Variant(Arc::new(VariantData::new(
44        global_string("List".to_string()),
45        items,
46    )))
47}
48
49/// Check if a pattern matches anywhere in the string
50///
51/// Stack effect: ( String pattern -- Bool )
52///
53/// # Safety
54/// Stack must have two String values on top
55#[unsafe(no_mangle)]
56pub unsafe extern "C" fn patch_seq_regex_match(stack: Stack) -> Stack {
57    assert!(!stack.is_null(), "regex.match?: stack is empty");
58
59    let (stack, pattern_val) = unsafe { pop(stack) };
60    let (stack, text_val) = unsafe { pop(stack) };
61
62    match (text_val, pattern_val) {
63        (Value::String(text), Value::String(pattern)) => {
64            let result = match Regex::new(pattern.as_str()) {
65                Ok(re) => re.is_match(text.as_str()),
66                Err(_) => false, // Invalid regex returns false
67            };
68            unsafe { push(stack, Value::Bool(result)) }
69        }
70        _ => panic!("regex.match?: expected two Strings on stack"),
71    }
72}
73
74/// Find the first match of a pattern in the string
75///
76/// Stack effect: ( String pattern -- String Bool )
77///
78/// Returns the matched text and true on success, empty string and false on no match.
79///
80/// # Safety
81/// Stack must have two String values on top
82#[unsafe(no_mangle)]
83pub unsafe extern "C" fn patch_seq_regex_find(stack: Stack) -> Stack {
84    assert!(!stack.is_null(), "regex.find: stack is empty");
85
86    let (stack, pattern_val) = unsafe { pop(stack) };
87    let (stack, text_val) = unsafe { pop(stack) };
88
89    match (text_val, pattern_val) {
90        (Value::String(text), Value::String(pattern)) => {
91            match Regex::new(pattern.as_str()) {
92                Ok(re) => match re.find(text.as_str()) {
93                    Some(m) => {
94                        let stack = unsafe {
95                            push(stack, Value::String(global_string(m.as_str().to_string())))
96                        };
97                        unsafe { push(stack, Value::Bool(true)) }
98                    }
99                    None => {
100                        let stack =
101                            unsafe { push(stack, Value::String(global_string(String::new()))) };
102                        unsafe { push(stack, Value::Bool(false)) }
103                    }
104                },
105                Err(_) => {
106                    // Invalid regex
107                    let stack = unsafe { push(stack, Value::String(global_string(String::new()))) };
108                    unsafe { push(stack, Value::Bool(false)) }
109                }
110            }
111        }
112        _ => panic!("regex.find: expected two Strings on stack"),
113    }
114}
115
116/// Find all matches of a pattern in the string
117///
118/// Stack effect: ( String pattern -- List Bool )
119///
120/// Returns a list of all matched substrings and true on success.
121/// Returns empty list and false on invalid regex.
122///
123/// # Safety
124/// Stack must have two String values on top
125#[unsafe(no_mangle)]
126pub unsafe extern "C" fn patch_seq_regex_find_all(stack: Stack) -> Stack {
127    assert!(!stack.is_null(), "regex.find-all: stack is empty");
128
129    let (stack, pattern_val) = unsafe { pop(stack) };
130    let (stack, text_val) = unsafe { pop(stack) };
131
132    match (text_val, pattern_val) {
133        (Value::String(text), Value::String(pattern)) => match Regex::new(pattern.as_str()) {
134            Ok(re) => {
135                let matches: Vec<Value> = re
136                    .find_iter(text.as_str())
137                    .map(|m| Value::String(global_string(m.as_str().to_string())))
138                    .collect();
139                let stack = unsafe { push(stack, make_list(matches)) };
140                unsafe { push(stack, Value::Bool(true)) }
141            }
142            Err(_) => {
143                // Invalid regex
144                let stack = unsafe { push(stack, make_list(vec![])) };
145                unsafe { push(stack, Value::Bool(false)) }
146            }
147        },
148        _ => panic!("regex.find-all: expected two Strings on stack"),
149    }
150}
151
152/// Replace the first occurrence of a pattern
153///
154/// Stack effect: ( String pattern replacement -- String Bool )
155///
156/// Returns the string with the first match replaced and true on success.
157/// Returns original string and false on invalid regex.
158///
159/// # Safety
160/// Stack must have three String values on top
161#[unsafe(no_mangle)]
162pub unsafe extern "C" fn patch_seq_regex_replace(stack: Stack) -> Stack {
163    assert!(!stack.is_null(), "regex.replace: stack is empty");
164
165    let (stack, replacement_val) = unsafe { pop(stack) };
166    let (stack, pattern_val) = unsafe { pop(stack) };
167    let (stack, text_val) = unsafe { pop(stack) };
168
169    match (text_val, pattern_val, replacement_val) {
170        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
171            match Regex::new(pattern.as_str()) {
172                Ok(re) => {
173                    let result = re.replace(text.as_str(), replacement.as_str()).into_owned();
174                    let stack = unsafe { push(stack, Value::String(global_string(result))) };
175                    unsafe { push(stack, Value::Bool(true)) }
176                }
177                Err(_) => {
178                    // Invalid regex returns original
179                    let stack = unsafe {
180                        push(
181                            stack,
182                            Value::String(global_string(text.as_str().to_string())),
183                        )
184                    };
185                    unsafe { push(stack, Value::Bool(false)) }
186                }
187            }
188        }
189        _ => panic!("regex.replace: expected three Strings on stack"),
190    }
191}
192
193/// Replace all occurrences of a pattern
194///
195/// Stack effect: ( String pattern replacement -- String Bool )
196///
197/// Returns the string with all matches replaced and true on success.
198/// Returns original string and false on invalid regex.
199///
200/// # Safety
201/// Stack must have three String values on top
202#[unsafe(no_mangle)]
203pub unsafe extern "C" fn patch_seq_regex_replace_all(stack: Stack) -> Stack {
204    assert!(!stack.is_null(), "regex.replace-all: stack is empty");
205
206    let (stack, replacement_val) = unsafe { pop(stack) };
207    let (stack, pattern_val) = unsafe { pop(stack) };
208    let (stack, text_val) = unsafe { pop(stack) };
209
210    match (text_val, pattern_val, replacement_val) {
211        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
212            match Regex::new(pattern.as_str()) {
213                Ok(re) => {
214                    let result = re
215                        .replace_all(text.as_str(), replacement.as_str())
216                        .into_owned();
217                    let stack = unsafe { push(stack, Value::String(global_string(result))) };
218                    unsafe { push(stack, Value::Bool(true)) }
219                }
220                Err(_) => {
221                    // Invalid regex returns original
222                    let stack = unsafe {
223                        push(
224                            stack,
225                            Value::String(global_string(text.as_str().to_string())),
226                        )
227                    };
228                    unsafe { push(stack, Value::Bool(false)) }
229                }
230            }
231        }
232        _ => panic!("regex.replace-all: expected three Strings on stack"),
233    }
234}
235
236/// Extract capture groups from a pattern match
237///
238/// Stack effect: ( String pattern -- List Bool )
239///
240/// Returns a list of captured groups (excluding the full match) and true on success.
241/// Returns empty list and false if no match or invalid regex.
242///
243/// # Safety
244/// Stack must have two String values on top
245#[unsafe(no_mangle)]
246pub unsafe extern "C" fn patch_seq_regex_captures(stack: Stack) -> Stack {
247    assert!(!stack.is_null(), "regex.captures: stack is empty");
248
249    let (stack, pattern_val) = unsafe { pop(stack) };
250    let (stack, text_val) = unsafe { pop(stack) };
251
252    match (text_val, pattern_val) {
253        (Value::String(text), Value::String(pattern)) => {
254            match Regex::new(pattern.as_str()) {
255                Ok(re) => match re.captures(text.as_str()) {
256                    Some(caps) => {
257                        // Skip group 0 (full match), collect groups 1..n
258                        let groups: Vec<Value> = caps
259                            .iter()
260                            .skip(1)
261                            .map(|m| match m {
262                                Some(m) => Value::String(global_string(m.as_str().to_string())),
263                                None => Value::String(global_string(String::new())),
264                            })
265                            .collect();
266                        let stack = unsafe { push(stack, make_list(groups)) };
267                        unsafe { push(stack, Value::Bool(true)) }
268                    }
269                    None => {
270                        let stack = unsafe { push(stack, make_list(vec![])) };
271                        unsafe { push(stack, Value::Bool(false)) }
272                    }
273                },
274                Err(_) => {
275                    // Invalid regex
276                    let stack = unsafe { push(stack, make_list(vec![])) };
277                    unsafe { push(stack, Value::Bool(false)) }
278                }
279            }
280        }
281        _ => panic!("regex.captures: expected two Strings on stack"),
282    }
283}
284
285/// Split a string by a pattern
286///
287/// Stack effect: ( String pattern -- List Bool )
288///
289/// Returns a list of substrings split by the pattern and true on success.
290/// Returns single-element list with original string and false on invalid regex.
291///
292/// # Safety
293/// Stack must have two String values on top
294#[unsafe(no_mangle)]
295pub unsafe extern "C" fn patch_seq_regex_split(stack: Stack) -> Stack {
296    assert!(!stack.is_null(), "regex.split: stack is empty");
297
298    let (stack, pattern_val) = unsafe { pop(stack) };
299    let (stack, text_val) = unsafe { pop(stack) };
300
301    match (text_val, pattern_val) {
302        (Value::String(text), Value::String(pattern)) => match Regex::new(pattern.as_str()) {
303            Ok(re) => {
304                let parts: Vec<Value> = re
305                    .split(text.as_str())
306                    .map(|s| Value::String(global_string(s.to_string())))
307                    .collect();
308                let stack = unsafe { push(stack, make_list(parts)) };
309                unsafe { push(stack, Value::Bool(true)) }
310            }
311            Err(_) => {
312                // Invalid regex returns original as single element
313                let parts = vec![Value::String(global_string(text.as_str().to_string()))];
314                let stack = unsafe { push(stack, make_list(parts)) };
315                unsafe { push(stack, Value::Bool(false)) }
316            }
317        },
318        _ => panic!("regex.split: expected two Strings on stack"),
319    }
320}
321
322/// Check if a pattern is a valid regex
323///
324/// Stack effect: ( String -- Bool )
325///
326/// Returns true if the pattern compiles successfully, false otherwise.
327///
328/// # Safety
329/// Stack must have a String value on top
330#[unsafe(no_mangle)]
331pub unsafe extern "C" fn patch_seq_regex_valid(stack: Stack) -> Stack {
332    assert!(!stack.is_null(), "regex.valid?: stack is empty");
333
334    let (stack, pattern_val) = unsafe { pop(stack) };
335
336    match pattern_val {
337        Value::String(pattern) => {
338            let is_valid = Regex::new(pattern.as_str()).is_ok();
339            unsafe { push(stack, Value::Bool(is_valid)) }
340        }
341        _ => panic!("regex.valid?: expected String on stack"),
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348    use seq_core::stack::alloc_stack;
349
350    #[test]
351    fn test_regex_match() {
352        let stack = alloc_stack();
353        let stack = unsafe {
354            push(
355                stack,
356                Value::String(global_string("hello world".to_string())),
357            )
358        };
359        let stack = unsafe { push(stack, Value::String(global_string("wo.ld".to_string()))) };
360
361        let stack = unsafe { patch_seq_regex_match(stack) };
362        let (_, value) = unsafe { pop(stack) };
363        assert_eq!(value, Value::Bool(true));
364    }
365
366    #[test]
367    fn test_regex_match_no_match() {
368        let stack = alloc_stack();
369        let stack = unsafe { push(stack, Value::String(global_string("hello".to_string()))) };
370        let stack = unsafe { push(stack, Value::String(global_string("xyz".to_string()))) };
371
372        let stack = unsafe { patch_seq_regex_match(stack) };
373        let (_, value) = unsafe { pop(stack) };
374        assert_eq!(value, Value::Bool(false));
375    }
376
377    #[test]
378    fn test_regex_find() {
379        let stack = alloc_stack();
380        let stack = unsafe { push(stack, Value::String(global_string("a1 b2 c3".to_string()))) };
381        let stack = unsafe {
382            push(
383                stack,
384                Value::String(global_string("[a-z][0-9]".to_string())),
385            )
386        };
387
388        let stack = unsafe { patch_seq_regex_find(stack) };
389        let (stack, success) = unsafe { pop(stack) };
390        let (_, matched) = unsafe { pop(stack) };
391
392        assert_eq!(success, Value::Bool(true));
393        if let Value::String(s) = matched {
394            assert_eq!(s.as_str(), "a1");
395        } else {
396            panic!("expected String");
397        }
398    }
399
400    #[test]
401    fn test_regex_find_all() {
402        let stack = alloc_stack();
403        let stack = unsafe { push(stack, Value::String(global_string("a1 b2 c3".to_string()))) };
404        let stack = unsafe {
405            push(
406                stack,
407                Value::String(global_string("[a-z][0-9]".to_string())),
408            )
409        };
410
411        let stack = unsafe { patch_seq_regex_find_all(stack) };
412        let (stack, success) = unsafe { pop(stack) };
413        assert_eq!(success, Value::Bool(true));
414        let (_, list_val) = unsafe { pop(stack) };
415
416        if let Value::Variant(v) = list_val {
417            assert_eq!(v.fields.len(), 3);
418            if let Value::String(s) = &v.fields[0] {
419                assert_eq!(s.as_str(), "a1");
420            }
421            if let Value::String(s) = &v.fields[1] {
422                assert_eq!(s.as_str(), "b2");
423            }
424            if let Value::String(s) = &v.fields[2] {
425                assert_eq!(s.as_str(), "c3");
426            }
427        } else {
428            panic!("expected Variant (List)");
429        }
430    }
431
432    #[test]
433    fn test_regex_replace() {
434        let stack = alloc_stack();
435        let stack = unsafe {
436            push(
437                stack,
438                Value::String(global_string("hello world".to_string())),
439            )
440        };
441        let stack = unsafe { push(stack, Value::String(global_string("world".to_string()))) };
442        let stack = unsafe { push(stack, Value::String(global_string("Seq".to_string()))) };
443
444        let stack = unsafe { patch_seq_regex_replace(stack) };
445        let (stack, success) = unsafe { pop(stack) };
446        assert_eq!(success, Value::Bool(true));
447        let (_, result) = unsafe { pop(stack) };
448
449        if let Value::String(s) = result {
450            assert_eq!(s.as_str(), "hello Seq");
451        } else {
452            panic!("expected String");
453        }
454    }
455
456    #[test]
457    fn test_regex_replace_all() {
458        let stack = alloc_stack();
459        let stack = unsafe { push(stack, Value::String(global_string("a1 b2 c3".to_string()))) };
460        let stack = unsafe { push(stack, Value::String(global_string("[0-9]".to_string()))) };
461        let stack = unsafe { push(stack, Value::String(global_string("X".to_string()))) };
462
463        let stack = unsafe { patch_seq_regex_replace_all(stack) };
464        let (stack, success) = unsafe { pop(stack) };
465        assert_eq!(success, Value::Bool(true));
466        let (_, result) = unsafe { pop(stack) };
467
468        if let Value::String(s) = result {
469            assert_eq!(s.as_str(), "aX bX cX");
470        } else {
471            panic!("expected String");
472        }
473    }
474
475    #[test]
476    fn test_regex_captures() {
477        let stack = alloc_stack();
478        let stack = unsafe {
479            push(
480                stack,
481                Value::String(global_string("2024-01-15".to_string())),
482            )
483        };
484        let stack = unsafe {
485            push(
486                stack,
487                Value::String(global_string(r"(\d+)-(\d+)-(\d+)".to_string())),
488            )
489        };
490
491        let stack = unsafe { patch_seq_regex_captures(stack) };
492        let (stack, success) = unsafe { pop(stack) };
493        let (_, groups) = unsafe { pop(stack) };
494
495        assert_eq!(success, Value::Bool(true));
496        if let Value::Variant(v) = groups {
497            assert_eq!(v.fields.len(), 3);
498            if let Value::String(s) = &v.fields[0] {
499                assert_eq!(s.as_str(), "2024");
500            }
501            if let Value::String(s) = &v.fields[1] {
502                assert_eq!(s.as_str(), "01");
503            }
504            if let Value::String(s) = &v.fields[2] {
505                assert_eq!(s.as_str(), "15");
506            }
507        } else {
508            panic!("expected Variant (List)");
509        }
510    }
511
512    #[test]
513    fn test_regex_split() {
514        let stack = alloc_stack();
515        let stack = unsafe { push(stack, Value::String(global_string("a1b2c3".to_string()))) };
516        let stack = unsafe { push(stack, Value::String(global_string("[0-9]".to_string()))) };
517
518        let stack = unsafe { patch_seq_regex_split(stack) };
519        let (stack, success) = unsafe { pop(stack) };
520        assert_eq!(success, Value::Bool(true));
521        let (_, result) = unsafe { pop(stack) };
522
523        if let Value::Variant(v) = result {
524            assert_eq!(v.fields.len(), 4); // "a", "b", "c", ""
525            if let Value::String(s) = &v.fields[0] {
526                assert_eq!(s.as_str(), "a");
527            }
528            if let Value::String(s) = &v.fields[1] {
529                assert_eq!(s.as_str(), "b");
530            }
531            if let Value::String(s) = &v.fields[2] {
532                assert_eq!(s.as_str(), "c");
533            }
534        } else {
535            panic!("expected Variant (List)");
536        }
537    }
538
539    #[test]
540    fn test_regex_valid() {
541        let stack = alloc_stack();
542        let stack = unsafe { push(stack, Value::String(global_string("[a-z]+".to_string()))) };
543
544        let stack = unsafe { patch_seq_regex_valid(stack) };
545        let (_, result) = unsafe { pop(stack) };
546        assert_eq!(result, Value::Bool(true));
547
548        // Invalid regex
549        let stack = alloc_stack();
550        let stack = unsafe { push(stack, Value::String(global_string("[invalid".to_string()))) };
551
552        let stack = unsafe { patch_seq_regex_valid(stack) };
553        let (_, result) = unsafe { pop(stack) };
554        assert_eq!(result, Value::Bool(false));
555    }
556
557    #[test]
558    fn test_invalid_regex_graceful() {
559        // Invalid regex should return false, not panic
560        let stack = alloc_stack();
561        let stack = unsafe { push(stack, Value::String(global_string("test".to_string()))) };
562        let stack = unsafe { push(stack, Value::String(global_string("[invalid".to_string()))) };
563
564        let stack = unsafe { patch_seq_regex_match(stack) };
565        let (_, result) = unsafe { pop(stack) };
566        assert_eq!(result, Value::Bool(false));
567    }
568}