Skip to main content

seq_runtime/
regex.rs

1//! Regular expression operations for Seq
2//!
3//! These functions are exported with C ABI for LLVM codegen to call.
4//! Uses Rust's regex crate - fast, safe, no catastrophic backtracking.
5//!
6//! # API
7//!
8//! ```seq
9//! # Match check
10//! "hello world" "wo.ld" regex.match?      # ( String String -- Bool )
11//!
12//! # Find first match
13//! "a1 b2 c3" "[a-z][0-9]" regex.find      # ( String String -- String Bool )
14//!
15//! # Find all matches
16//! "a1 b2 c3" "[a-z][0-9]" regex.find-all  # ( String String -- List )
17//!
18//! # Replace first occurrence
19//! "hello world" "world" "Seq" regex.replace
20//! # ( String pattern replacement -- String )
21//!
22//! # Replace all occurrences
23//! "a1 b2 c3" "[0-9]" "X" regex.replace-all
24//! # ( String pattern replacement -- String )
25//!
26//! # Capture groups
27//! "2024-01-15" "(\d+)-(\d+)-(\d+)" regex.captures
28//! # ( String pattern -- List Bool ) returns ["2024", "01", "15"] true on match
29//!
30//! # Split by pattern
31//! "a1b2c3" "[0-9]" regex.split            # ( String pattern -- List )
32//! ```
33
34use seq_core::seqstring::global_string;
35use seq_core::stack::{Stack, pop, push};
36use seq_core::value::{Value, VariantData};
37
38use regex::Regex;
39use std::sync::Arc;
40
41/// Helper to create a List variant from a vector of values
42fn make_list(items: Vec<Value>) -> Value {
43    Value::Variant(Arc::new(VariantData::new(
44        global_string("List".to_string()),
45        items,
46    )))
47}
48
49/// Check if a pattern matches anywhere in the string
50///
51/// Stack effect: ( String pattern -- Bool )
52///
53/// # Safety
54/// Stack must have two String values on top
55#[unsafe(no_mangle)]
56pub unsafe extern "C" fn patch_seq_regex_match(stack: Stack) -> Stack {
57    assert!(!stack.is_null(), "regex.match?: stack is empty");
58
59    let (stack, pattern_val) = unsafe { pop(stack) };
60    let (stack, text_val) = unsafe { pop(stack) };
61
62    match (text_val, pattern_val) {
63        (Value::String(text), Value::String(pattern)) => {
64            let result = match Regex::new(pattern.as_str()) {
65                Ok(re) => re.is_match(text.as_str()),
66                Err(_) => false, // Invalid regex returns false
67            };
68            unsafe { push(stack, Value::Bool(result)) }
69        }
70        _ => panic!("regex.match?: expected two Strings on stack"),
71    }
72}
73
74/// Find the first match of a pattern in the string
75///
76/// Stack effect: ( String pattern -- String Bool )
77///
78/// Returns the matched text and true on success, empty string and false on no match.
79///
80/// # Safety
81/// Stack must have two String values on top
82#[unsafe(no_mangle)]
83pub unsafe extern "C" fn patch_seq_regex_find(stack: Stack) -> Stack {
84    assert!(!stack.is_null(), "regex.find: stack is empty");
85
86    let (stack, pattern_val) = unsafe { pop(stack) };
87    let (stack, text_val) = unsafe { pop(stack) };
88
89    match (text_val, pattern_val) {
90        (Value::String(text), Value::String(pattern)) => {
91            match Regex::new(pattern.as_str()) {
92                Ok(re) => match re.find(text.as_str()) {
93                    Some(m) => {
94                        let stack = unsafe {
95                            push(stack, Value::String(global_string(m.as_str().to_string())))
96                        };
97                        unsafe { push(stack, Value::Bool(true)) }
98                    }
99                    None => {
100                        let stack =
101                            unsafe { push(stack, Value::String(global_string(String::new()))) };
102                        unsafe { push(stack, Value::Bool(false)) }
103                    }
104                },
105                Err(_) => {
106                    // Invalid regex
107                    let stack = unsafe { push(stack, Value::String(global_string(String::new()))) };
108                    unsafe { push(stack, Value::Bool(false)) }
109                }
110            }
111        }
112        _ => panic!("regex.find: expected two Strings on stack"),
113    }
114}
115
116/// Find all matches of a pattern in the string
117///
118/// Stack effect: ( String pattern -- List Bool )
119///
120/// Returns a list of all matched substrings and true on success.
121/// Returns empty list and false on invalid regex.
122///
123/// # Safety
124/// Stack must have two String values on top
125#[unsafe(no_mangle)]
126pub unsafe extern "C" fn patch_seq_regex_find_all(stack: Stack) -> Stack {
127    assert!(!stack.is_null(), "regex.find-all: stack is empty");
128
129    let (stack, pattern_val) = unsafe { pop(stack) };
130    let (stack, text_val) = unsafe { pop(stack) };
131
132    match (text_val, pattern_val) {
133        (Value::String(text), Value::String(pattern)) => match Regex::new(pattern.as_str()) {
134            Ok(re) => {
135                let matches: Vec<Value> = re
136                    .find_iter(text.as_str())
137                    .map(|m| Value::String(global_string(m.as_str().to_string())))
138                    .collect();
139                let stack = unsafe { push(stack, make_list(matches)) };
140                unsafe { push(stack, Value::Bool(true)) }
141            }
142            Err(_) => {
143                // Invalid regex
144                let stack = unsafe { push(stack, make_list(vec![])) };
145                unsafe { push(stack, Value::Bool(false)) }
146            }
147        },
148        _ => panic!("regex.find-all: expected two Strings on stack"),
149    }
150}
151
152/// Replace the first occurrence of a pattern
153///
154/// Stack effect: ( String pattern replacement -- String Bool )
155///
156/// Returns the string with the first match replaced and true on success.
157/// Returns original string and false on invalid regex.
158///
159/// # Safety
160/// Stack must have three String values on top
161#[unsafe(no_mangle)]
162pub unsafe extern "C" fn patch_seq_regex_replace(stack: Stack) -> Stack {
163    assert!(!stack.is_null(), "regex.replace: stack is empty");
164
165    let (stack, replacement_val) = unsafe { pop(stack) };
166    let (stack, pattern_val) = unsafe { pop(stack) };
167    let (stack, text_val) = unsafe { pop(stack) };
168
169    match (text_val, pattern_val, replacement_val) {
170        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
171            match Regex::new(pattern.as_str()) {
172                Ok(re) => {
173                    let result = re.replace(text.as_str(), replacement.as_str()).into_owned();
174                    let stack = unsafe { push(stack, Value::String(global_string(result))) };
175                    unsafe { push(stack, Value::Bool(true)) }
176                }
177                Err(_) => {
178                    // Invalid regex returns original
179                    let stack = unsafe {
180                        push(
181                            stack,
182                            Value::String(global_string(text.as_str().to_string())),
183                        )
184                    };
185                    unsafe { push(stack, Value::Bool(false)) }
186                }
187            }
188        }
189        _ => panic!("regex.replace: expected three Strings on stack"),
190    }
191}
192
193/// Replace all occurrences of a pattern
194///
195/// Stack effect: ( String pattern replacement -- String Bool )
196///
197/// Returns the string with all matches replaced and true on success.
198/// Returns original string and false on invalid regex.
199///
200/// # Safety
201/// Stack must have three String values on top
202#[unsafe(no_mangle)]
203pub unsafe extern "C" fn patch_seq_regex_replace_all(stack: Stack) -> Stack {
204    assert!(!stack.is_null(), "regex.replace-all: stack is empty");
205
206    let (stack, replacement_val) = unsafe { pop(stack) };
207    let (stack, pattern_val) = unsafe { pop(stack) };
208    let (stack, text_val) = unsafe { pop(stack) };
209
210    match (text_val, pattern_val, replacement_val) {
211        (Value::String(text), Value::String(pattern), Value::String(replacement)) => {
212            match Regex::new(pattern.as_str()) {
213                Ok(re) => {
214                    let result = re
215                        .replace_all(text.as_str(), replacement.as_str())
216                        .into_owned();
217                    let stack = unsafe { push(stack, Value::String(global_string(result))) };
218                    unsafe { push(stack, Value::Bool(true)) }
219                }
220                Err(_) => {
221                    // Invalid regex returns original
222                    let stack = unsafe {
223                        push(
224                            stack,
225                            Value::String(global_string(text.as_str().to_string())),
226                        )
227                    };
228                    unsafe { push(stack, Value::Bool(false)) }
229                }
230            }
231        }
232        _ => panic!("regex.replace-all: expected three Strings on stack"),
233    }
234}
235
236/// Extract capture groups from a pattern match
237///
238/// Stack effect: ( String pattern -- List Bool )
239///
240/// Returns a list of captured groups (excluding the full match) and true on success.
241/// Returns empty list and false if no match or invalid regex.
242///
243/// # Safety
244/// Stack must have two String values on top
245#[unsafe(no_mangle)]
246pub unsafe extern "C" fn patch_seq_regex_captures(stack: Stack) -> Stack {
247    assert!(!stack.is_null(), "regex.captures: stack is empty");
248
249    let (stack, pattern_val) = unsafe { pop(stack) };
250    let (stack, text_val) = unsafe { pop(stack) };
251
252    match (text_val, pattern_val) {
253        (Value::String(text), Value::String(pattern)) => {
254            match Regex::new(pattern.as_str()) {
255                Ok(re) => match re.captures(text.as_str()) {
256                    Some(caps) => {
257                        // Skip group 0 (full match), collect groups 1..n
258                        let groups: Vec<Value> = caps
259                            .iter()
260                            .skip(1)
261                            .map(|m| match m {
262                                Some(m) => Value::String(global_string(m.as_str().to_string())),
263                                None => Value::String(global_string(String::new())),
264                            })
265                            .collect();
266                        let stack = unsafe { push(stack, make_list(groups)) };
267                        unsafe { push(stack, Value::Bool(true)) }
268                    }
269                    None => {
270                        let stack = unsafe { push(stack, make_list(vec![])) };
271                        unsafe { push(stack, Value::Bool(false)) }
272                    }
273                },
274                Err(_) => {
275                    // Invalid regex
276                    let stack = unsafe { push(stack, make_list(vec![])) };
277                    unsafe { push(stack, Value::Bool(false)) }
278                }
279            }
280        }
281        _ => panic!("regex.captures: expected two Strings on stack"),
282    }
283}
284
285/// Split a string by a pattern
286///
287/// Stack effect: ( String pattern -- List Bool )
288///
289/// Returns a list of substrings split by the pattern and true on success.
290/// Returns single-element list with original string and false on invalid regex.
291///
292/// # Safety
293/// Stack must have two String values on top
294#[unsafe(no_mangle)]
295pub unsafe extern "C" fn patch_seq_regex_split(stack: Stack) -> Stack {
296    assert!(!stack.is_null(), "regex.split: stack is empty");
297
298    let (stack, pattern_val) = unsafe { pop(stack) };
299    let (stack, text_val) = unsafe { pop(stack) };
300
301    match (text_val, pattern_val) {
302        (Value::String(text), Value::String(pattern)) => match Regex::new(pattern.as_str()) {
303            Ok(re) => {
304                let parts: Vec<Value> = re
305                    .split(text.as_str())
306                    .map(|s| Value::String(global_string(s.to_string())))
307                    .collect();
308                let stack = unsafe { push(stack, make_list(parts)) };
309                unsafe { push(stack, Value::Bool(true)) }
310            }
311            Err(_) => {
312                // Invalid regex returns original as single element
313                let parts = vec![Value::String(global_string(text.as_str().to_string()))];
314                let stack = unsafe { push(stack, make_list(parts)) };
315                unsafe { push(stack, Value::Bool(false)) }
316            }
317        },
318        _ => panic!("regex.split: expected two Strings on stack"),
319    }
320}
321
322/// Check if a pattern is a valid regex
323///
324/// Stack effect: ( String -- Bool )
325///
326/// Returns true if the pattern compiles successfully, false otherwise.
327///
328/// # Safety
329/// Stack must have a String value on top
330#[unsafe(no_mangle)]
331pub unsafe extern "C" fn patch_seq_regex_valid(stack: Stack) -> Stack {
332    assert!(!stack.is_null(), "regex.valid?: stack is empty");
333
334    let (stack, pattern_val) = unsafe { pop(stack) };
335
336    match pattern_val {
337        Value::String(pattern) => {
338            let is_valid = Regex::new(pattern.as_str()).is_ok();
339            unsafe { push(stack, Value::Bool(is_valid)) }
340        }
341        _ => panic!("regex.valid?: expected String on stack"),
342    }
343}
344
345#[cfg(test)]
346mod tests;