Skip to main content

seq_runtime/
regex.rs

1//! Regular expression operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//! Uses Rust's regex crate - fast, safe, no catastrophic backtracking.
5//!
6//! # API
7//!
8//! ```seq
9//! # Match check
10//! "hello world" "wo.ld" regex.match?      # ( String String -- Bool )
11//!
12//! # Find first match
13//! "a1 b2 c3" "[a-z][0-9]" regex.find      # ( String String -- String Bool )
14//!
15//! # Find all matches
16//! "a1 b2 c3" "[a-z][0-9]" regex.find-all  # ( String String -- List )
17//!
18//! # Replace first occurrence
19//! "hello world" "world" "Seq" regex.replace
20//! # ( String pattern replacement -- String )
21//!
22//! # Replace all occurrences
23//! "a1 b2 c3" "[0-9]" "X" regex.replace-all
24//! # ( String pattern replacement -- String )
25//!
26//! # Capture groups
27//! "2024-01-15" "(\d+)-(\d+)-(\d+)" regex.captures
28//! # ( String pattern -- List Bool ) returns ["2024", "01", "15"] true on match
29//!
30//! # Split by pattern
31//! "a1b2c3" "[0-9]" regex.split            # ( String pattern -- List )
32//! ```
33
34use seq_core::seqstring::global_string;
35use seq_core::stack::{Stack, pop, push};
36use seq_core::value::{Value, VariantData};
37
38use regex::Regex;
39use std::sync::Arc;
40
41/// Helper to create a List variant from a vector of values
42fn make_list(items: Vec<Value>) -> Value {
43    Value::Variant(Arc::new(VariantData::new(
44        global_string("List".to_string()),
45        items,
46    )))
47}
48
49/// Check if a pattern matches anywhere in the string
50///
51/// Stack effect: ( String pattern -- Bool )
52///
53/// # Safety
54/// Stack must have two String values on top
55#[unsafe(no_mangle)]
56pub unsafe extern "C" fn patch_seq_regex_match(stack: Stack) -> Stack {
57    assert!(!stack.is_null(), "regex.match?: stack is empty");
58
59    let (stack, pattern_val) = unsafe { pop(stack) };
60    let (stack, text_val) = unsafe { pop(stack) };
61
62    match (text_val, pattern_val) {
63        (Value::String(text), Value::String(pattern)) => {
64            let result = match Regex::new(pattern.as_str_or_empty()) {
65                Ok(re) => re.is_match(text.as_str_or_empty()),
66                Err(_) => false, // Invalid regex returns false
67            };
68            unsafe { push(stack, Value::Bool(result)) }
69        }
70        _ => panic!("regex.match?: expected two Strings on stack"),
71    }
72}
73
74/// Find the first match of a pattern in the string
75///
76/// Stack effect: ( String pattern -- String Bool )
77///
78/// Returns the matched text and true on success, empty string and false on no match.
79///
80/// # Safety
81/// Stack must have two String values on top
82#[unsafe(no_mangle)]
83pub unsafe extern "C" fn patch_seq_regex_find(stack: Stack) -> Stack {
84    assert!(!stack.is_null(), "regex.find: stack is empty");
85
86    let (stack, pattern_val) = unsafe { pop(stack) };
87    let (stack, text_val) = unsafe { pop(stack) };
88
89    match (text_val, pattern_val) {
90        (Value::String(text), Value::String(pattern)) => {
91            match Regex::new(pattern.as_str_or_empty()) {
92                Ok(re) => match re.find(text.as_str_or_empty()) {
93                    Some(m) => {
94                        let stack = unsafe {
95                            push(stack, Value::String(global_string(m.as_str().to_string())))
96                        };
97                        unsafe { push(stack, Value::Bool(true)) }
98                    }
99                    None => {
100                        let stack =
101                            unsafe { push(stack, Value::String(global_string(String::new()))) };
102                        unsafe { push(stack, Value::Bool(false)) }
103                    }
104                },
105                Err(_) => {
106                    // Invalid regex
107                    let stack = unsafe { push(stack, Value::String(global_string(String::new()))) };
108                    unsafe { push(stack, Value::Bool(false)) }
109                }
110            }
111        }
112        _ => panic!("regex.find: expected two Strings on stack"),
113    }
114}
115
116/// Find all matches of a pattern in the string
117///
118/// Stack effect: ( String pattern -- List Bool )
119///
120/// Returns a list of all matched substrings and true on success.
121/// Returns empty list and false on invalid regex.
122///
123/// # Safety
124/// Stack must have two String values on top
125#[unsafe(no_mangle)]
126pub unsafe extern "C" fn patch_seq_regex_find_all(stack: Stack) -> Stack {
127    assert!(!stack.is_null(), "regex.find-all: stack is empty");
128
129    let (stack, pattern_val) = unsafe { pop(stack) };
130    let (stack, text_val) = unsafe { pop(stack) };
131
132    match (text_val, pattern_val) {
133        (Value::String(text), Value::String(pattern)) => {
134            match Regex::new(pattern.as_str_or_empty()) {
135                Ok(re) => {
136                    let matches: Vec<Value> = re
137                        .find_iter(text.as_str_or_empty())
138                        .map(|m| Value::String(global_string(m.as_str().to_string())))
139                        .collect();
140                    let stack = unsafe { push(stack, make_list(matches)) };
141                    unsafe { push(stack, Value::Bool(true)) }
142                }
143                Err(_) => {
144                    // Invalid regex
145                    let stack = unsafe { push(stack, make_list(vec![])) };
146                    unsafe { push(stack, Value::Bool(false)) }
147                }
148            }
149        }
150        _ => panic!("regex.find-all: expected two Strings on stack"),
151    }
152}
153
154/// Replace the first occurrence of a pattern
155///
156/// Stack effect: ( String pattern replacement -- String Bool )
157///
158/// Returns the string with the first match replaced and true on success.
159/// Returns original string and false on invalid regex.
160///
161/// # Safety
162/// Stack must have three String values on top
163#[unsafe(no_mangle)]
164pub unsafe extern "C" fn patch_seq_regex_replace(stack: Stack) -> Stack {
165    assert!(!stack.is_null(), "regex.replace: stack is empty");
166
167    let (stack, replacement_val) = unsafe { pop(stack) };
168    let (stack, pattern_val) = unsafe { pop(stack) };
169    let (stack, text_val) = unsafe { pop(stack) };
170
171    match (text_val, pattern_val, replacement_val) {
172        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
173            match Regex::new(pattern.as_str_or_empty()) {
174                Ok(re) => {
175                    let result = re
176                        .replace(text.as_str_or_empty(), replacement.as_str_or_empty())
177                        .into_owned();
178                    let stack = unsafe { push(stack, Value::String(global_string(result))) };
179                    unsafe { push(stack, Value::Bool(true)) }
180                }
181                Err(_) => {
182                    // Invalid regex returns original
183                    let stack = unsafe {
184                        push(
185                            stack,
186                            Value::String(global_string(text.as_str_or_empty().to_string())),
187                        )
188                    };
189                    unsafe { push(stack, Value::Bool(false)) }
190                }
191            }
192        }
193        _ => panic!("regex.replace: expected three Strings on stack"),
194    }
195}
196
197/// Replace all occurrences of a pattern
198///
199/// Stack effect: ( String pattern replacement -- String Bool )
200///
201/// Returns the string with all matches replaced and true on success.
202/// Returns original string and false on invalid regex.
203///
204/// # Safety
205/// Stack must have three String values on top
206#[unsafe(no_mangle)]
207pub unsafe extern "C" fn patch_seq_regex_replace_all(stack: Stack) -> Stack {
208    assert!(!stack.is_null(), "regex.replace-all: stack is empty");
209
210    let (stack, replacement_val) = unsafe { pop(stack) };
211    let (stack, pattern_val) = unsafe { pop(stack) };
212    let (stack, text_val) = unsafe { pop(stack) };
213
214    match (text_val, pattern_val, replacement_val) {
215        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
216            match Regex::new(pattern.as_str_or_empty()) {
217                Ok(re) => {
218                    let result = re
219                        .replace_all(text.as_str_or_empty(), replacement.as_str_or_empty())
220                        .into_owned();
221                    let stack = unsafe { push(stack, Value::String(global_string(result))) };
222                    unsafe { push(stack, Value::Bool(true)) }
223                }
224                Err(_) => {
225                    // Invalid regex returns original
226                    let stack = unsafe {
227                        push(
228                            stack,
229                            Value::String(global_string(text.as_str_or_empty().to_string())),
230                        )
231                    };
232                    unsafe { push(stack, Value::Bool(false)) }
233                }
234            }
235        }
236        _ => panic!("regex.replace-all: expected three Strings on stack"),
237    }
238}
239
240/// Extract capture groups from a pattern match
241///
242/// Stack effect: ( String pattern -- List Bool )
243///
244/// Returns a list of captured groups (excluding the full match) and true on success.
245/// Returns empty list and false if no match or invalid regex.
246///
247/// # Safety
248/// Stack must have two String values on top
249#[unsafe(no_mangle)]
250pub unsafe extern "C" fn patch_seq_regex_captures(stack: Stack) -> Stack {
251    assert!(!stack.is_null(), "regex.captures: stack is empty");
252
253    let (stack, pattern_val) = unsafe { pop(stack) };
254    let (stack, text_val) = unsafe { pop(stack) };
255
256    match (text_val, pattern_val) {
257        (Value::String(text), Value::String(pattern)) => {
258            match Regex::new(pattern.as_str_or_empty()) {
259                Ok(re) => match re.captures(text.as_str_or_empty()) {
260                    Some(caps) => {
261                        // Skip group 0 (full match), collect groups 1..n
262                        let groups: Vec<Value> = caps
263                            .iter()
264                            .skip(1)
265                            .map(|m| match m {
266                                Some(m) => Value::String(global_string(m.as_str().to_string())),
267                                None => Value::String(global_string(String::new())),
268                            })
269                            .collect();
270                        let stack = unsafe { push(stack, make_list(groups)) };
271                        unsafe { push(stack, Value::Bool(true)) }
272                    }
273                    None => {
274                        let stack = unsafe { push(stack, make_list(vec![])) };
275                        unsafe { push(stack, Value::Bool(false)) }
276                    }
277                },
278                Err(_) => {
279                    // Invalid regex
280                    let stack = unsafe { push(stack, make_list(vec![])) };
281                    unsafe { push(stack, Value::Bool(false)) }
282                }
283            }
284        }
285        _ => panic!("regex.captures: expected two Strings on stack"),
286    }
287}
288
289/// Split a string by a pattern
290///
291/// Stack effect: ( String pattern -- List Bool )
292///
293/// Returns a list of substrings split by the pattern and true on success.
294/// Returns single-element list with original string and false on invalid regex.
295///
296/// # Safety
297/// Stack must have two String values on top
298#[unsafe(no_mangle)]
299pub unsafe extern "C" fn patch_seq_regex_split(stack: Stack) -> Stack {
300    assert!(!stack.is_null(), "regex.split: stack is empty");
301
302    let (stack, pattern_val) = unsafe { pop(stack) };
303    let (stack, text_val) = unsafe { pop(stack) };
304
305    match (text_val, pattern_val) {
306        (Value::String(text), Value::String(pattern)) => {
307            match Regex::new(pattern.as_str_or_empty()) {
308                Ok(re) => {
309                    let parts: Vec<Value> = re
310                        .split(text.as_str_or_empty())
311                        .map(|s| Value::String(global_string(s.to_string())))
312                        .collect();
313                    let stack = unsafe { push(stack, make_list(parts)) };
314                    unsafe { push(stack, Value::Bool(true)) }
315                }
316                Err(_) => {
317                    // Invalid regex returns original as single element
318                    let parts = vec![Value::String(global_string(
319                        text.as_str_or_empty().to_string(),
320                    ))];
321                    let stack = unsafe { push(stack, make_list(parts)) };
322                    unsafe { push(stack, Value::Bool(false)) }
323                }
324            }
325        }
326        _ => panic!("regex.split: expected two Strings on stack"),
327    }
328}
329
330/// Check if a pattern is a valid regex
331///
332/// Stack effect: ( String -- Bool )
333///
334/// Returns true if the pattern compiles successfully, false otherwise.
335///
336/// # Safety
337/// Stack must have a String value on top
338#[unsafe(no_mangle)]
339pub unsafe extern "C" fn patch_seq_regex_valid(stack: Stack) -> Stack {
340    assert!(!stack.is_null(), "regex.valid?: stack is empty");
341
342    let (stack, pattern_val) = unsafe { pop(stack) };
343
344    match pattern_val {
345        Value::String(pattern) => {
346            let is_valid = Regex::new(pattern.as_str_or_empty()).is_ok();
347            unsafe { push(stack, Value::Bool(is_valid)) }
348        }
349        _ => panic!("regex.valid?: expected String on stack"),
350    }
351}
352
353#[cfg(test)]
354mod tests;