Skip to main content

jpx_core/extensions/
string.rs

1//! String manipulation functions.
2
3use std::collections::HashSet;
4
5use heck::{
6    ToKebabCase, ToLowerCamelCase, ToShoutyKebabCase, ToShoutySnakeCase, ToSnakeCase, ToTitleCase,
7    ToTrainCase, ToUpperCamelCase,
8};
9use regex::Regex;
10use serde_json::{Number, Value};
11
12use crate::functions::{Function, custom_error};
13use crate::interpreter::SearchResult;
14use crate::registry::register_if_enabled;
15use crate::{Context, Runtime, arg, defn};
16
17/// Upper bound on the size of a string produced by user-controlled repetition
18/// (`repeat`, `center`, ...). Prevents a single call from requesting a
19/// petabyte-sized allocation (e.g. `repeat('x', `1e15`)`).
20const MAX_GENERATED_STRING_BYTES: usize = 64 * 1024 * 1024;
21
22// =============================================================================
23// lower(string) -> string
24// =============================================================================
25
26defn!(LowerFn, vec![arg!(string)], None);
27
28impl Function for LowerFn {
29    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
30        self.signature.validate(args, ctx)?;
31        let s = args[0]
32            .as_str()
33            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
34        Ok(Value::String(s.to_lowercase()))
35    }
36}
37
38// =============================================================================
39// upper(string) -> string
40// =============================================================================
41
42defn!(UpperFn, vec![arg!(string)], None);
43
44impl Function for UpperFn {
45    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
46        self.signature.validate(args, ctx)?;
47        let s = args[0]
48            .as_str()
49            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
50        Ok(Value::String(s.to_uppercase()))
51    }
52}
53
54// =============================================================================
55// trim(string) -> string
56// =============================================================================
57
58defn!(TrimFn, vec![arg!(string)], None);
59
60impl Function for TrimFn {
61    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
62        self.signature.validate(args, ctx)?;
63        let s = args[0]
64            .as_str()
65            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
66        Ok(Value::String(s.trim().to_string()))
67    }
68}
69
70// =============================================================================
71// trim_start(string) -> string
72// =============================================================================
73
74defn!(TrimStartFn, vec![arg!(string)], None);
75
76impl Function for TrimStartFn {
77    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
78        self.signature.validate(args, ctx)?;
79        let s = args[0]
80            .as_str()
81            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
82        Ok(Value::String(s.trim_start().to_string()))
83    }
84}
85
86// =============================================================================
87// trim_end(string) -> string
88// =============================================================================
89
90defn!(TrimEndFn, vec![arg!(string)], None);
91
92impl Function for TrimEndFn {
93    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
94        self.signature.validate(args, ctx)?;
95        let s = args[0]
96            .as_str()
97            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
98        Ok(Value::String(s.trim_end().to_string()))
99    }
100}
101
102// =============================================================================
103// split(string, delimiter) -> array
104// =============================================================================
105
106defn!(SplitFn, vec![arg!(string), arg!(string)], None);
107
108impl Function for SplitFn {
109    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
110        self.signature.validate(args, ctx)?;
111        let s = args[0]
112            .as_str()
113            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
114        let delimiter = args[1]
115            .as_str()
116            .ok_or_else(|| custom_error(ctx, "Expected string delimiter"))?;
117
118        let parts: Vec<Value> = s
119            .split(delimiter)
120            .map(|part| Value::String(part.to_string()))
121            .collect();
122
123        Ok(Value::Array(parts))
124    }
125}
126
127// =============================================================================
128// replace(string, old, new) -> string
129// =============================================================================
130
131defn!(
132    ReplaceFn,
133    vec![arg!(string), arg!(string), arg!(string)],
134    None
135);
136
137impl Function for ReplaceFn {
138    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
139        self.signature.validate(args, ctx)?;
140        let s = args[0]
141            .as_str()
142            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
143        let old = args[1]
144            .as_str()
145            .ok_or_else(|| custom_error(ctx, "Expected old string argument"))?;
146        let new = args[2]
147            .as_str()
148            .ok_or_else(|| custom_error(ctx, "Expected new string argument"))?;
149
150        Ok(Value::String(s.replace(old, new)))
151    }
152}
153
154// =============================================================================
155// pad_left(string, width, char) -> string
156// =============================================================================
157
158defn!(
159    PadLeftFn,
160    vec![arg!(string), arg!(number), arg!(string)],
161    None
162);
163
164impl Function for PadLeftFn {
165    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
166        self.signature.validate(args, ctx)?;
167        let s = args[0]
168            .as_str()
169            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
170        let width = args[1]
171            .as_f64()
172            .map(|n| n as usize)
173            .ok_or_else(|| custom_error(ctx, "Expected positive number for width"))?;
174        let pad_char = args[2]
175            .as_str()
176            .ok_or_else(|| custom_error(ctx, "Expected string for pad character"))?;
177
178        let pad = pad_char.chars().next().unwrap_or(' ');
179        let result = if s.len() >= width {
180            s.to_string()
181        } else {
182            format!("{}{}", pad.to_string().repeat(width - s.len()), s)
183        };
184
185        Ok(Value::String(result))
186    }
187}
188
189// =============================================================================
190// pad_right(string, width, char) -> string
191// =============================================================================
192
193defn!(
194    PadRightFn,
195    vec![arg!(string), arg!(number), arg!(string)],
196    None
197);
198
199impl Function for PadRightFn {
200    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
201        self.signature.validate(args, ctx)?;
202        let s = args[0]
203            .as_str()
204            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
205        let width = args[1]
206            .as_f64()
207            .map(|n| n as usize)
208            .ok_or_else(|| custom_error(ctx, "Expected positive number for width"))?;
209        let pad_char = args[2]
210            .as_str()
211            .ok_or_else(|| custom_error(ctx, "Expected string for pad character"))?;
212
213        let pad = pad_char.chars().next().unwrap_or(' ');
214        let result = if s.len() >= width {
215            s.to_string()
216        } else {
217            format!("{}{}", s, pad.to_string().repeat(width - s.len()))
218        };
219
220        Ok(Value::String(result))
221    }
222}
223
224// =============================================================================
225// substr(string, start, length?) -> string
226// =============================================================================
227
228defn!(
229    SubstrFn,
230    vec![arg!(string), arg!(number)],
231    Some(arg!(number))
232);
233
234impl Function for SubstrFn {
235    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
236        self.signature.validate(args, ctx)?;
237        let s = args[0]
238            .as_str()
239            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
240        let start = args[1]
241            .as_f64()
242            .map(|n| n as i64)
243            .ok_or_else(|| custom_error(ctx, "Expected number for start"))?;
244
245        // Handle negative start (from end)
246        let start_idx = if start < 0 {
247            (s.len() as i64 + start).max(0) as usize
248        } else {
249            start as usize
250        };
251
252        let result = if args.len() > 2 {
253            let length = args[2]
254                .as_f64()
255                .map(|n| n as usize)
256                .ok_or_else(|| custom_error(ctx, "Expected positive number for length"))?;
257            s.chars().skip(start_idx).take(length).collect()
258        } else {
259            s.chars().skip(start_idx).collect()
260        };
261
262        Ok(Value::String(result))
263    }
264}
265
266// =============================================================================
267// capitalize(string) -> string (first letter uppercase)
268// =============================================================================
269
270defn!(CapitalizeFn, vec![arg!(string)], None);
271
272impl Function for CapitalizeFn {
273    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
274        self.signature.validate(args, ctx)?;
275        let s = args[0]
276            .as_str()
277            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
278
279        let result = if s.is_empty() {
280            String::new()
281        } else {
282            let mut chars = s.chars();
283            match chars.next() {
284                None => String::new(),
285                Some(first) => first.to_uppercase().to_string() + chars.as_str(),
286            }
287        };
288
289        Ok(Value::String(result))
290    }
291}
292
293// =============================================================================
294// title(string) -> string (capitalize each word)
295// =============================================================================
296
297defn!(TitleFn, vec![arg!(string)], None);
298
299impl Function for TitleFn {
300    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
301        self.signature.validate(args, ctx)?;
302        let s = args[0]
303            .as_str()
304            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
305
306        let result = s
307            .split_whitespace()
308            .map(|word| {
309                let mut chars = word.chars();
310                match chars.next() {
311                    None => String::new(),
312                    Some(first) => {
313                        first.to_uppercase().to_string() + &chars.as_str().to_lowercase()
314                    }
315                }
316            })
317            .collect::<Vec<_>>()
318            .join(" ");
319
320        Ok(Value::String(result))
321    }
322}
323
324// =============================================================================
325// repeat(string, count) -> string
326// =============================================================================
327
328defn!(RepeatFn, vec![arg!(string), arg!(number)], None);
329
330impl Function for RepeatFn {
331    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
332        self.signature.validate(args, ctx)?;
333        let s = args[0]
334            .as_str()
335            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
336        let count = args[1]
337            .as_f64()
338            .map(|n| n as usize)
339            .ok_or_else(|| custom_error(ctx, "Expected positive number for count"))?;
340
341        if count.saturating_mul(s.len()) > MAX_GENERATED_STRING_BYTES {
342            return Err(custom_error(
343                ctx,
344                "repeat result exceeds the maximum allowed size",
345            ));
346        }
347        Ok(Value::String(s.repeat(count)))
348    }
349}
350
351// =============================================================================
352// index_of(string, search, start?, end?) -> number | null (JEP-014)
353// =============================================================================
354
355defn!(
356    IndexOfFn,
357    vec![arg!(string), arg!(string)],
358    Some(arg!(number))
359);
360
361/// Helper to normalize an index (handling negative values) within a string length
362fn normalize_index(idx: i64, len: usize) -> usize {
363    if idx < 0 {
364        (len as i64 + idx).max(0) as usize
365    } else {
366        (idx as usize).min(len)
367    }
368}
369
370impl Function for IndexOfFn {
371    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
372        self.signature.validate(args, ctx)?;
373        let s = args[0]
374            .as_str()
375            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
376        let search = args[1]
377            .as_str()
378            .ok_or_else(|| custom_error(ctx, "Expected search string"))?;
379
380        // Operate on character indices so multibyte input is handled correctly
381        // and never slices through a UTF-8 code point boundary.
382        let chars: Vec<char> = s.chars().collect();
383        let len = chars.len();
384
385        // Get optional start parameter (default: 0)
386        let start = if args.len() > 2 {
387            let start_val = args[2]
388                .as_f64()
389                .ok_or_else(|| custom_error(ctx, "Expected number for start"))?
390                as i64;
391            normalize_index(start_val, len)
392        } else {
393            0
394        };
395
396        // Get optional end parameter (default: string length)
397        let end = if args.len() > 3 {
398            let end_val = args[3]
399                .as_f64()
400                .ok_or_else(|| custom_error(ctx, "Expected number for end"))?
401                as i64;
402            normalize_index(end_val, len)
403        } else {
404            len
405        };
406
407        // Search within the [start, end) character window.
408        if start >= end || start >= len {
409            return Ok(Value::Null);
410        }
411
412        let window: String = chars[start..end].iter().collect();
413        match window.find(search) {
414            Some(byte_idx) => {
415                let char_idx = window[..byte_idx].chars().count();
416                Ok(Value::Number(Number::from((start + char_idx) as i64)))
417            }
418            None => Ok(Value::Null),
419        }
420    }
421}
422
423// =============================================================================
424// last_index_of(string, search, start?, end?) -> number | null (JEP-014)
425// =============================================================================
426
427defn!(
428    LastIndexOfFn,
429    vec![arg!(string), arg!(string)],
430    Some(arg!(number))
431);
432
433impl Function for LastIndexOfFn {
434    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
435        self.signature.validate(args, ctx)?;
436        let s = args[0]
437            .as_str()
438            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
439        let search = args[1]
440            .as_str()
441            .ok_or_else(|| custom_error(ctx, "Expected search string"))?;
442
443        // Operate on character indices so multibyte input is handled correctly
444        // and never slices through a UTF-8 code point boundary.
445        let chars: Vec<char> = s.chars().collect();
446        let len = chars.len();
447
448        // Get optional start parameter (default: 0)
449        let start = if args.len() > 2 {
450            let start_val = args[2]
451                .as_f64()
452                .ok_or_else(|| custom_error(ctx, "Expected number for start"))?
453                as i64;
454            normalize_index(start_val, len)
455        } else {
456            0
457        };
458
459        // Get optional end parameter (default: string length)
460        let end = if args.len() > 3 {
461            let end_val = args[3]
462                .as_f64()
463                .ok_or_else(|| custom_error(ctx, "Expected number for end"))?
464                as i64;
465            normalize_index(end_val, len)
466        } else {
467            len
468        };
469
470        // Search within the [start, end) character window.
471        if start >= end || start >= len {
472            return Ok(Value::Null);
473        }
474
475        let window: String = chars[start..end].iter().collect();
476        match window.rfind(search) {
477            Some(byte_idx) => {
478                let char_idx = window[..byte_idx].chars().count();
479                Ok(Value::Number(Number::from((start + char_idx) as i64)))
480            }
481            None => Ok(Value::Null),
482        }
483    }
484}
485
486// =============================================================================
487// slice(string, start, end?) -> string
488// =============================================================================
489
490defn!(
491    SliceFn,
492    vec![arg!(string), arg!(number)],
493    Some(arg!(number))
494);
495
496impl Function for SliceFn {
497    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
498        self.signature.validate(args, ctx)?;
499        let s = args[0]
500            .as_str()
501            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
502
503        let len = s.len() as i64;
504
505        let start = args[1]
506            .as_f64()
507            .map(|n| n as i64)
508            .ok_or_else(|| custom_error(ctx, "Expected number for start"))?;
509
510        // Handle negative indices
511        let start_idx = if start < 0 {
512            (len + start).max(0) as usize
513        } else {
514            start.min(len) as usize
515        };
516
517        let end_idx = if args.len() > 2 {
518            let end = args[2]
519                .as_f64()
520                .map(|n| n as i64)
521                .ok_or_else(|| custom_error(ctx, "Expected number for end"))?;
522            if end < 0 {
523                (len + end).max(0) as usize
524            } else {
525                end.min(len) as usize
526            }
527        } else {
528            len as usize
529        };
530
531        let result: String = s
532            .chars()
533            .skip(start_idx)
534            .take(end_idx.saturating_sub(start_idx))
535            .collect();
536
537        Ok(Value::String(result))
538    }
539}
540
541// =============================================================================
542// concat(array_of_strings, separator?) -> string
543// =============================================================================
544
545defn!(ConcatFn, vec![arg!(array)], Some(arg!(string)));
546
547impl Function for ConcatFn {
548    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
549        self.signature.validate(args, ctx)?;
550        let arr = args[0]
551            .as_array()
552            .ok_or_else(|| custom_error(ctx, "Expected array argument"))?;
553
554        let separator = if args.len() > 1 {
555            args[1].as_str().map(|s| s.to_string()).unwrap_or_default()
556        } else {
557            String::new()
558        };
559
560        let strings: Vec<String> = arr
561            .iter()
562            .filter_map(|v| v.as_str().map(|s| s.to_string()))
563            .collect();
564
565        Ok(Value::String(strings.join(&separator)))
566    }
567}
568
569// =============================================================================
570// upper_case(string) -> string (alias for upper, snake_case style)
571// =============================================================================
572
573defn!(UpperCaseFn, vec![arg!(string)], None);
574
575impl Function for UpperCaseFn {
576    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
577        self.signature.validate(args, ctx)?;
578        let s = args[0]
579            .as_str()
580            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
581        Ok(Value::String(s.to_uppercase()))
582    }
583}
584
585// =============================================================================
586// lower_case(string) -> string (alias for lower, snake_case style)
587// =============================================================================
588
589defn!(LowerCaseFn, vec![arg!(string)], None);
590
591impl Function for LowerCaseFn {
592    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
593        self.signature.validate(args, ctx)?;
594        let s = args[0]
595            .as_str()
596            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
597        Ok(Value::String(s.to_lowercase()))
598    }
599}
600
601// =============================================================================
602// title_case(string) -> string (alias for title, snake_case style)
603// Uses heck crate for proper case conversion
604// =============================================================================
605
606defn!(TitleCaseFn, vec![arg!(string)], None);
607
608impl Function for TitleCaseFn {
609    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
610        self.signature.validate(args, ctx)?;
611        let s = args[0]
612            .as_str()
613            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
614        Ok(Value::String(s.to_title_case()))
615    }
616}
617
618// =============================================================================
619// camel_case(string) -> string (helloWorld)
620// Uses heck crate for proper case conversion
621// =============================================================================
622
623defn!(CamelCaseFn, vec![arg!(string)], None);
624
625impl Function for CamelCaseFn {
626    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
627        self.signature.validate(args, ctx)?;
628        let s = args[0]
629            .as_str()
630            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
631        Ok(Value::String(s.to_lower_camel_case()))
632    }
633}
634
635// =============================================================================
636// snake_case(string) -> string (hello_world)
637// Uses heck crate for proper case conversion
638// =============================================================================
639
640defn!(SnakeCaseFn, vec![arg!(string)], None);
641
642impl Function for SnakeCaseFn {
643    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
644        self.signature.validate(args, ctx)?;
645        let s = args[0]
646            .as_str()
647            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
648        Ok(Value::String(s.to_snake_case()))
649    }
650}
651
652// =============================================================================
653// kebab_case(string) -> string (hello-world)
654// Uses heck crate for proper case conversion
655// =============================================================================
656
657defn!(KebabCaseFn, vec![arg!(string)], None);
658
659impl Function for KebabCaseFn {
660    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
661        self.signature.validate(args, ctx)?;
662        let s = args[0]
663            .as_str()
664            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
665        Ok(Value::String(s.to_kebab_case()))
666    }
667}
668
669// =============================================================================
670// pascal_case(string) -> string (HelloWorld)
671// Uses heck crate - also known as UpperCamelCase
672// =============================================================================
673
674defn!(PascalCaseFn, vec![arg!(string)], None);
675
676impl Function for PascalCaseFn {
677    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
678        self.signature.validate(args, ctx)?;
679        let s = args[0]
680            .as_str()
681            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
682        Ok(Value::String(s.to_upper_camel_case()))
683    }
684}
685
686// =============================================================================
687// shouty_snake_case(string) -> string (HELLO_WORLD)
688// Uses heck crate - useful for constants
689// =============================================================================
690
691defn!(ShoutySnakeCaseFn, vec![arg!(string)], None);
692
693impl Function for ShoutySnakeCaseFn {
694    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
695        self.signature.validate(args, ctx)?;
696        let s = args[0]
697            .as_str()
698            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
699        Ok(Value::String(s.to_shouty_snake_case()))
700    }
701}
702
703// =============================================================================
704// shouty_kebab_case(string) -> string (HELLO-WORLD)
705// Uses heck crate
706// =============================================================================
707
708defn!(ShoutyKebabCaseFn, vec![arg!(string)], None);
709
710impl Function for ShoutyKebabCaseFn {
711    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
712        self.signature.validate(args, ctx)?;
713        let s = args[0]
714            .as_str()
715            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
716        Ok(Value::String(s.to_shouty_kebab_case()))
717    }
718}
719
720// =============================================================================
721// train_case(string) -> string (Hello-World)
722// Uses heck crate - like HTTP headers (Content-Type)
723// =============================================================================
724
725defn!(TrainCaseFn, vec![arg!(string)], None);
726
727impl Function for TrainCaseFn {
728    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
729        self.signature.validate(args, ctx)?;
730        let s = args[0]
731            .as_str()
732            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
733        Ok(Value::String(s.to_train_case()))
734    }
735}
736
737// =============================================================================
738// truncate(string, length, suffix?) -> string
739// =============================================================================
740
741defn!(
742    TruncateFn,
743    vec![arg!(string), arg!(number)],
744    Some(arg!(string))
745);
746
747impl Function for TruncateFn {
748    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
749        self.signature.validate(args, ctx)?;
750        let s = args[0]
751            .as_str()
752            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
753        let max_len = args[1]
754            .as_f64()
755            .ok_or_else(|| custom_error(ctx, "Expected number for length"))?
756            as usize;
757
758        let suffix = args
759            .get(2)
760            .and_then(|v| v.as_str())
761            .map(|s| s.to_string())
762            .unwrap_or_else(|| "...".to_string());
763
764        if s.len() <= max_len {
765            Ok(Value::String(s.to_string()))
766        } else {
767            let truncate_at = max_len.saturating_sub(suffix.len());
768            let truncated: String = s.chars().take(truncate_at).collect();
769            Ok(Value::String(format!("{}{}", truncated, suffix)))
770        }
771    }
772}
773
774// =============================================================================
775// wrap(string, width) -> string with newlines
776// =============================================================================
777
778defn!(WrapFn, vec![arg!(string), arg!(number)], None);
779
780impl Function for WrapFn {
781    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
782        self.signature.validate(args, ctx)?;
783        let s = args[0]
784            .as_str()
785            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
786        let width = args[1]
787            .as_f64()
788            .ok_or_else(|| custom_error(ctx, "Expected number for width"))?
789            as usize;
790
791        if width == 0 {
792            return Ok(Value::String(s.to_string()));
793        }
794
795        let mut lines: Vec<String> = Vec::new();
796
797        // Process each paragraph (separated by newlines) separately
798        for paragraph in s.split('\n') {
799            let mut current_line = String::new();
800
801            for word in paragraph.split_whitespace() {
802                if current_line.is_empty() {
803                    current_line = word.to_string();
804                } else if current_line.len() + 1 + word.len() <= width {
805                    current_line.push(' ');
806                    current_line.push_str(word);
807                } else {
808                    lines.push(current_line);
809                    current_line = word.to_string();
810                }
811            }
812
813            // Push the last line of this paragraph (even if empty to preserve blank lines)
814            lines.push(current_line);
815        }
816
817        // Remove trailing empty line if the input didn't end with a newline
818        if !s.ends_with('\n') && lines.last().is_some_and(|l| l.is_empty()) {
819            lines.pop();
820        }
821
822        // If we have no lines but had input, return the original
823        if lines.is_empty() && !s.is_empty() {
824            return Ok(Value::String(s.to_string()));
825        }
826
827        // Join lines with newlines and return as string
828        Ok(Value::String(lines.join("\n")))
829    }
830}
831
832// =============================================================================
833// format(template, args) -> string
834// Supports:
835//   - Positional with array: format('Hello {0}', ['World'])
836//   - Named with object: format('Hello {name}', {name: 'World'})
837//   - Variadic: format('Hello {0}', 'World')
838// =============================================================================
839
840defn!(FormatFn, vec![arg!(string)], Some(arg!(any)));
841
842/// Convert a Value to its string representation for formatting
843fn var_to_format_string(v: &Value) -> String {
844    match v {
845        Value::String(s) => s.clone(),
846        Value::Number(n) => n.to_string(),
847        Value::Bool(b) => b.to_string(),
848        Value::Null => "null".to_string(),
849        _ => serde_json::to_string(v).unwrap_or_else(|_| "null".to_string()),
850    }
851}
852
853impl Function for FormatFn {
854    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
855        self.signature.validate(args, ctx)?;
856        let template = args[0]
857            .as_str()
858            .ok_or_else(|| custom_error(ctx, "Expected template string"))?;
859
860        let mut result = template.to_string();
861
862        // Check if second arg is an array or object for unified formatting
863        if args.len() == 2 {
864            if let Some(arr) = args[1].as_array() {
865                // Array-based positional: format('Hello {0}', ['World'])
866                for (i, item) in arr.iter().enumerate() {
867                    let placeholder = format!("{{{}}}", i);
868                    let value = var_to_format_string(item);
869                    result = result.replace(&placeholder, &value);
870                }
871                return Ok(Value::String(result));
872            } else if let Some(obj) = args[1].as_object() {
873                // Object-based named: format('Hello {name}', {name: 'World'})
874                for (key, val) in obj.iter() {
875                    let placeholder = format!("{{{}}}", key);
876                    let value = var_to_format_string(val);
877                    result = result.replace(&placeholder, &value);
878                }
879                return Ok(Value::String(result));
880            }
881        }
882
883        // Fallback: variadic arguments format('Hello {0}', 'World')
884        for (i, arg) in args.iter().skip(1).enumerate() {
885            let placeholder = format!("{{{}}}", i);
886            let value = var_to_format_string(arg);
887            result = result.replace(&placeholder, &value);
888        }
889
890        Ok(Value::String(result))
891    }
892}
893
894// =============================================================================
895// sprintf(format_string, ...args) -> string
896// Printf-style formatting with %s, %d, %f, etc.
897// =============================================================================
898
899defn!(SprintfFn, vec![arg!(string)], Some(arg!(any)));
900
901impl Function for SprintfFn {
902    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
903        self.signature.validate(args, ctx)?;
904        let format_str = args[0]
905            .as_str()
906            .ok_or_else(|| custom_error(ctx, "Expected format string"))?;
907
908        // Get arguments - either from array or variadic
909        let format_args: Vec<&Value> = if args.len() == 2 {
910            if let Some(arr) = args[1].as_array() {
911                arr.iter().collect()
912            } else {
913                args.iter().skip(1).collect()
914            }
915        } else {
916            args.iter().skip(1).collect()
917        };
918
919        let mut result = String::new();
920        let mut arg_index = 0;
921        let mut chars = format_str.chars().peekable();
922
923        while let Some(c) = chars.next() {
924            if c == '%' {
925                if let Some(&next) = chars.peek() {
926                    if next == '%' {
927                        // Escaped %
928                        result.push('%');
929                        chars.next();
930                        continue;
931                    }
932
933                    // Parse format specifier
934                    let mut width = String::new();
935                    let mut precision = String::new();
936                    let mut in_precision = false;
937
938                    // Parse width and precision
939                    while let Some(&ch) = chars.peek() {
940                        if ch == '.' {
941                            in_precision = true;
942                            chars.next();
943                        } else if ch.is_ascii_digit() || ch == '-' || ch == '+' {
944                            if in_precision {
945                                precision.push(ch);
946                            } else {
947                                width.push(ch);
948                            }
949                            chars.next();
950                        } else {
951                            break;
952                        }
953                    }
954
955                    // Get the format type
956                    if let Some(fmt_type) = chars.next() {
957                        if arg_index < format_args.len() {
958                            let arg = format_args[arg_index];
959                            arg_index += 1;
960
961                            let formatted = match fmt_type {
962                                's' => var_to_format_string(arg),
963                                'd' | 'i' => {
964                                    if let Some(n) = arg.as_f64() {
965                                        format!("{}", n as i64)
966                                    } else {
967                                        "0".to_string()
968                                    }
969                                }
970                                'f' => {
971                                    if let Some(n) = arg.as_f64() {
972                                        let prec: usize = precision.parse().unwrap_or(6);
973                                        format!("{:.prec$}", n, prec = prec)
974                                    } else {
975                                        "0.0".to_string()
976                                    }
977                                }
978                                'e' => {
979                                    if let Some(n) = arg.as_f64() {
980                                        let prec: usize = precision.parse().unwrap_or(6);
981                                        format!("{:.prec$e}", n, prec = prec)
982                                    } else {
983                                        "0e0".to_string()
984                                    }
985                                }
986                                'x' => {
987                                    if let Some(n) = arg.as_f64() {
988                                        format!("{:x}", n as i64)
989                                    } else {
990                                        "0".to_string()
991                                    }
992                                }
993                                'X' => {
994                                    if let Some(n) = arg.as_f64() {
995                                        format!("{:X}", n as i64)
996                                    } else {
997                                        "0".to_string()
998                                    }
999                                }
1000                                'o' => {
1001                                    if let Some(n) = arg.as_f64() {
1002                                        format!("{:o}", n as i64)
1003                                    } else {
1004                                        "0".to_string()
1005                                    }
1006                                }
1007                                'b' => {
1008                                    if let Some(n) = arg.as_f64() {
1009                                        format!("{:b}", n as i64)
1010                                    } else {
1011                                        "0".to_string()
1012                                    }
1013                                }
1014                                'c' => {
1015                                    if let Some(n) = arg.as_f64() {
1016                                        char::from_u32(n as u32)
1017                                            .map(|c| c.to_string())
1018                                            .unwrap_or_default()
1019                                    } else if let Some(s) = arg.as_str() {
1020                                        s.chars().next().map(|c| c.to_string()).unwrap_or_default()
1021                                    } else {
1022                                        String::new()
1023                                    }
1024                                }
1025                                _ => {
1026                                    // Unknown format, just output as-is
1027                                    format!("%{}{}", width, fmt_type)
1028                                }
1029                            };
1030
1031                            // Apply width if specified
1032                            if !width.is_empty() {
1033                                let w: i32 = width.parse().unwrap_or(0);
1034                                if w < 0 {
1035                                    // Left-align
1036                                    result.push_str(&format!(
1037                                        "{:<width$}",
1038                                        formatted,
1039                                        width = w.unsigned_abs() as usize
1040                                    ));
1041                                } else {
1042                                    // Right-align
1043                                    result.push_str(&format!(
1044                                        "{:>width$}",
1045                                        formatted,
1046                                        width = w as usize
1047                                    ));
1048                                }
1049                            } else {
1050                                result.push_str(&formatted);
1051                            }
1052                        } else {
1053                            // Not enough arguments, output placeholder
1054                            result.push('%');
1055                            result.push_str(&width);
1056                            if !precision.is_empty() {
1057                                result.push('.');
1058                                result.push_str(&precision);
1059                            }
1060                            result.push(fmt_type);
1061                        }
1062                    }
1063                } else {
1064                    // % at end of string
1065                    result.push('%');
1066                }
1067            } else {
1068                result.push(c);
1069            }
1070        }
1071
1072        Ok(Value::String(result))
1073    }
1074}
1075
1076// =============================================================================
1077// ltrimstr(string, prefix) -> string
1078// =============================================================================
1079
1080defn!(LtrimstrFn, vec![arg!(string), arg!(string)], None);
1081
1082impl Function for LtrimstrFn {
1083    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1084        self.signature.validate(args, ctx)?;
1085        let s = args[0]
1086            .as_str()
1087            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1088        let prefix = args[1]
1089            .as_str()
1090            .ok_or_else(|| custom_error(ctx, "Expected prefix string"))?;
1091
1092        let result = s.strip_prefix(prefix).unwrap_or(s).to_string();
1093        Ok(Value::String(result))
1094    }
1095}
1096
1097// =============================================================================
1098// rtrimstr(string, suffix) -> string
1099// =============================================================================
1100
1101defn!(RtrimstrFn, vec![arg!(string), arg!(string)], None);
1102
1103impl Function for RtrimstrFn {
1104    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1105        self.signature.validate(args, ctx)?;
1106        let s = args[0]
1107            .as_str()
1108            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1109        let suffix = args[1]
1110            .as_str()
1111            .ok_or_else(|| custom_error(ctx, "Expected suffix string"))?;
1112
1113        let result = s.strip_suffix(suffix).unwrap_or(s).to_string();
1114        Ok(Value::String(result))
1115    }
1116}
1117
1118// =============================================================================
1119// indices(string, search) -> array of indices
1120// =============================================================================
1121
1122defn!(IndicesFn, vec![arg!(string), arg!(string)], None);
1123
1124impl Function for IndicesFn {
1125    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1126        self.signature.validate(args, ctx)?;
1127        let s = args[0]
1128            .as_str()
1129            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1130        let search = args[1]
1131            .as_str()
1132            .ok_or_else(|| custom_error(ctx, "Expected search string"))?;
1133
1134        // Find all indices (including overlapping matches)
1135        let mut indices: Vec<Value> = Vec::new();
1136        if !search.is_empty() {
1137            let mut start = 0;
1138            while let Some(pos) = s[start..].find(search) {
1139                let actual_pos = start + pos;
1140                indices.push(Value::Number(Number::from(actual_pos as i64)));
1141                start = actual_pos + 1; // Move by 1 to find overlapping matches
1142            }
1143        }
1144
1145        Ok(Value::Array(indices))
1146    }
1147}
1148
1149// =============================================================================
1150// inside(search, string) -> boolean
1151// =============================================================================
1152
1153defn!(InsideFn, vec![arg!(string), arg!(string)], None);
1154
1155impl Function for InsideFn {
1156    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1157        self.signature.validate(args, ctx)?;
1158        let search = args[0]
1159            .as_str()
1160            .ok_or_else(|| custom_error(ctx, "Expected search string"))?;
1161        let s = args[1]
1162            .as_str()
1163            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1164
1165        Ok(Value::Bool(s.contains(search)))
1166    }
1167}
1168
1169// =============================================================================
1170// humanize(string) -> string
1171// Converts a camelCase, snake_case, or kebab-case string to a human-readable form
1172// =============================================================================
1173
1174defn!(HumanizeFn, vec![arg!(string)], None);
1175
1176impl Function for HumanizeFn {
1177    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1178        self.signature.validate(args, ctx)?;
1179        let s = args[0]
1180            .as_str()
1181            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1182
1183        // Split on underscores, hyphens, and camelCase boundaries
1184        let mut result = String::new();
1185        let mut prev_was_lower = false;
1186        let mut word_start = true;
1187
1188        for c in s.chars() {
1189            if c == '_' || c == '-' {
1190                if !result.is_empty() && !result.ends_with(' ') {
1191                    result.push(' ');
1192                }
1193                word_start = true;
1194                prev_was_lower = false;
1195            } else if c.is_uppercase() && prev_was_lower {
1196                // camelCase boundary
1197                result.push(' ');
1198                if word_start {
1199                    result.push(c); // Keep first letter of sentence uppercase
1200                } else {
1201                    result.push(c.to_lowercase().next().unwrap_or(c));
1202                }
1203                word_start = false;
1204                prev_was_lower = false;
1205            } else {
1206                if word_start && result.is_empty() {
1207                    // First character of the string - capitalize it
1208                    result.push(c.to_uppercase().next().unwrap_or(c));
1209                } else {
1210                    result.push(c.to_lowercase().next().unwrap_or(c));
1211                }
1212                prev_was_lower = c.is_lowercase();
1213                word_start = false;
1214            }
1215        }
1216
1217        Ok(Value::String(result))
1218    }
1219}
1220
1221// =============================================================================
1222// deburr(string) -> string
1223// Removes diacritical marks (accents) from characters
1224// =============================================================================
1225
1226defn!(DeburrrFn, vec![arg!(string)], None);
1227
1228impl Function for DeburrrFn {
1229    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1230        self.signature.validate(args, ctx)?;
1231        let s = args[0]
1232            .as_str()
1233            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1234
1235        // Remove diacritical marks by mapping accented characters to ASCII
1236        let result: String = s
1237            .chars()
1238            .map(|c| match c {
1239                '\u{00C0}' | '\u{00C1}' | '\u{00C2}' | '\u{00C3}' | '\u{00C4}' | '\u{00C5}' => 'A',
1240                '\u{00C6}' => 'A', // Could be "AE" but keeping single char
1241                '\u{00C7}' => 'C',
1242                '\u{00C8}' | '\u{00C9}' | '\u{00CA}' | '\u{00CB}' => 'E',
1243                '\u{00CC}' | '\u{00CD}' | '\u{00CE}' | '\u{00CF}' => 'I',
1244                '\u{00D0}' => 'D',
1245                '\u{00D1}' => 'N',
1246                '\u{00D2}' | '\u{00D3}' | '\u{00D4}' | '\u{00D5}' | '\u{00D6}' | '\u{00D8}' => 'O',
1247                '\u{00D9}' | '\u{00DA}' | '\u{00DB}' | '\u{00DC}' => 'U',
1248                '\u{00DD}' => 'Y',
1249                '\u{00DE}' => 'T', // Thorn
1250                '\u{00DF}' => 's', // German sharp s
1251                '\u{00E0}' | '\u{00E1}' | '\u{00E2}' | '\u{00E3}' | '\u{00E4}' | '\u{00E5}' => 'a',
1252                '\u{00E6}' => 'a', // Could be "ae" but keeping single char
1253                '\u{00E7}' => 'c',
1254                '\u{00E8}' | '\u{00E9}' | '\u{00EA}' | '\u{00EB}' => 'e',
1255                '\u{00EC}' | '\u{00ED}' | '\u{00EE}' | '\u{00EF}' => 'i',
1256                '\u{00F0}' => 'd',
1257                '\u{00F1}' => 'n',
1258                '\u{00F2}' | '\u{00F3}' | '\u{00F4}' | '\u{00F5}' | '\u{00F6}' | '\u{00F8}' => 'o',
1259                '\u{00F9}' | '\u{00FA}' | '\u{00FB}' | '\u{00FC}' => 'u',
1260                '\u{00FD}' | '\u{00FF}' => 'y',
1261                '\u{00FE}' => 't', // Thorn
1262                _ => c,
1263            })
1264            .collect();
1265
1266        Ok(Value::String(result))
1267    }
1268}
1269
1270// =============================================================================
1271// words(string) -> array
1272// Splits string into an array of words
1273// =============================================================================
1274
1275defn!(WordsFn, vec![arg!(string)], None);
1276
1277impl Function for WordsFn {
1278    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1279        self.signature.validate(args, ctx)?;
1280        let s = args[0]
1281            .as_str()
1282            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1283
1284        // Split on word boundaries: spaces, underscores, hyphens, and camelCase
1285        let mut words = Vec::new();
1286        let mut current_word = String::new();
1287        let mut prev_was_lower = false;
1288
1289        for c in s.chars() {
1290            if c.is_whitespace() || c == '_' || c == '-' {
1291                if !current_word.is_empty() {
1292                    words.push(Value::String(current_word.clone()));
1293                    current_word.clear();
1294                }
1295                prev_was_lower = false;
1296            } else if c.is_uppercase() && prev_was_lower {
1297                // camelCase boundary
1298                if !current_word.is_empty() {
1299                    words.push(Value::String(current_word.clone()));
1300                    current_word.clear();
1301                }
1302                current_word.push(c);
1303                prev_was_lower = false;
1304            } else {
1305                current_word.push(c);
1306                prev_was_lower = c.is_lowercase();
1307            }
1308        }
1309
1310        if !current_word.is_empty() {
1311            words.push(Value::String(current_word));
1312        }
1313
1314        Ok(Value::Array(words))
1315    }
1316}
1317
1318// =============================================================================
1319// escape(string) -> string
1320// Escapes HTML entities: &, <, >, ", '
1321// =============================================================================
1322
1323defn!(EscapeFn, vec![arg!(string)], None);
1324
1325impl Function for EscapeFn {
1326    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1327        self.signature.validate(args, ctx)?;
1328        let s = args[0]
1329            .as_str()
1330            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1331
1332        let result = s
1333            .replace('&', "&amp;")
1334            .replace('<', "&lt;")
1335            .replace('>', "&gt;")
1336            .replace('"', "&quot;")
1337            .replace('\'', "&#39;");
1338
1339        Ok(Value::String(result))
1340    }
1341}
1342
1343// =============================================================================
1344// unescape(string) -> string
1345// Unescapes HTML entities: &amp;, &lt;, &gt;, &quot;, &#39;
1346// =============================================================================
1347
1348defn!(UnescapeFn, vec![arg!(string)], None);
1349
1350impl Function for UnescapeFn {
1351    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1352        self.signature.validate(args, ctx)?;
1353        let s = args[0]
1354            .as_str()
1355            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1356
1357        let result = s
1358            .replace("&amp;", "&")
1359            .replace("&lt;", "<")
1360            .replace("&gt;", ">")
1361            .replace("&quot;", "\"")
1362            .replace("&#39;", "'");
1363
1364        Ok(Value::String(result))
1365    }
1366}
1367
1368// =============================================================================
1369// escape_regex(string) -> string
1370// Escapes special regex characters
1371// =============================================================================
1372
1373defn!(EscapeRegexFn, vec![arg!(string)], None);
1374
1375impl Function for EscapeRegexFn {
1376    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1377        self.signature.validate(args, ctx)?;
1378        let s = args[0]
1379            .as_str()
1380            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1381
1382        // Escape regex special characters: \ ^ $ . | ? * + ( ) [ ] { }
1383        let mut result = String::with_capacity(s.len() * 2);
1384        for c in s.chars() {
1385            match c {
1386                '\\' | '^' | '$' | '.' | '|' | '?' | '*' | '+' | '(' | ')' | '[' | ']' | '{'
1387                | '}' => {
1388                    result.push('\\');
1389                    result.push(c);
1390                }
1391                _ => result.push(c),
1392            }
1393        }
1394
1395        Ok(Value::String(result))
1396    }
1397}
1398
1399// =============================================================================
1400// start_case(string) -> string
1401// Converts string to Start Case (capitalize first letter of each word)
1402// =============================================================================
1403
1404defn!(StartCaseFn, vec![arg!(string)], None);
1405
1406impl Function for StartCaseFn {
1407    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1408        self.signature.validate(args, ctx)?;
1409        let s = args[0]
1410            .as_str()
1411            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1412
1413        // Split on word boundaries and capitalize each word
1414        let mut result = String::new();
1415        let mut prev_was_lower = false;
1416        let mut word_start = true;
1417
1418        for c in s.chars() {
1419            if c.is_whitespace() || c == '_' || c == '-' {
1420                if !result.is_empty() && !result.ends_with(' ') {
1421                    result.push(' ');
1422                }
1423                word_start = true;
1424                prev_was_lower = false;
1425            } else if c.is_uppercase() && prev_was_lower {
1426                // camelCase boundary - start new word
1427                result.push(' ');
1428                result.push(c); // Keep uppercase
1429                word_start = false;
1430                prev_was_lower = false;
1431            } else {
1432                if word_start {
1433                    result.push(c.to_uppercase().next().unwrap_or(c));
1434                } else {
1435                    result.push(c.to_lowercase().next().unwrap_or(c));
1436                }
1437                prev_was_lower = c.is_lowercase();
1438                word_start = false;
1439            }
1440        }
1441
1442        Ok(Value::String(result))
1443    }
1444}
1445
1446// =============================================================================
1447// mask(string, visible?, char?) -> string
1448// Mask a string, optionally keeping the last N characters visible
1449// =============================================================================
1450
1451defn!(MaskFn, vec![arg!(string)], Some(arg!(any)));
1452
1453impl Function for MaskFn {
1454    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1455        self.signature.validate(args, ctx)?;
1456        let s = args[0]
1457            .as_str()
1458            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1459
1460        // Number of characters to keep visible at the end (default: 0)
1461        let visible = if args.len() > 1 && !args[1].is_null() {
1462            args[1].as_f64().unwrap_or(0.0) as usize
1463        } else {
1464            0
1465        };
1466
1467        // Mask character (default: '*')
1468        let mask_char = if args.len() > 2 && !args[2].is_null() {
1469            args[2]
1470                .as_str()
1471                .and_then(|s| s.chars().next())
1472                .unwrap_or('*')
1473        } else {
1474            '*'
1475        };
1476
1477        let char_count = s.chars().count();
1478
1479        if visible >= char_count {
1480            // If visible >= length, return original string
1481            return Ok(Value::String(s.to_string()));
1482        }
1483
1484        let mask_count = char_count - visible;
1485        let masked: String = std::iter::repeat_n(mask_char, mask_count)
1486            .chain(s.chars().skip(mask_count))
1487            .collect();
1488
1489        Ok(Value::String(masked))
1490    }
1491}
1492
1493// =============================================================================
1494// redact_pattern(string, pattern, replacement?) -> string
1495// Replace all matches of a regex pattern with a replacement string.
1496// Named `redact_pattern` to avoid colliding with the object-category `redact`,
1497// which redacts named fields in a structure.
1498// =============================================================================
1499
1500defn!(
1501    RedactPatternFn,
1502    vec![arg!(string), arg!(string)],
1503    Some(arg!(any))
1504);
1505
1506impl Function for RedactPatternFn {
1507    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1508        self.signature.validate(args, ctx)?;
1509        let s = args[0]
1510            .as_str()
1511            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1512        let pattern = args[1]
1513            .as_str()
1514            .ok_or_else(|| custom_error(ctx, "Expected string pattern"))?;
1515
1516        // Replacement string (default: "[REDACTED]")
1517        let replacement = if args.len() > 2 && !args[2].is_null() {
1518            args[2]
1519                .as_str()
1520                .map(|s| s.to_string())
1521                .unwrap_or_else(|| "[REDACTED]".to_string())
1522        } else {
1523            "[REDACTED]".to_string()
1524        };
1525
1526        let re = Regex::new(pattern)
1527            .map_err(|e| custom_error(ctx, &format!("Invalid regex pattern: {}", e)))?;
1528
1529        let result = re.replace_all(s, replacement.as_str());
1530        Ok(Value::String(result.into_owned()))
1531    }
1532}
1533
1534// =============================================================================
1535// normalize_whitespace(string) -> string
1536// Collapse multiple whitespace characters into a single space
1537// =============================================================================
1538
1539defn!(NormalizeWhitespaceFn, vec![arg!(string)], None);
1540
1541impl Function for NormalizeWhitespaceFn {
1542    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1543        self.signature.validate(args, ctx)?;
1544        let s = args[0]
1545            .as_str()
1546            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1547
1548        // Split on whitespace and rejoin with single spaces
1549        let result: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
1550
1551        Ok(Value::String(result))
1552    }
1553}
1554
1555// =============================================================================
1556// is_blank(string) -> boolean
1557// Check if string is empty or contains only whitespace
1558// =============================================================================
1559
1560defn!(IsBlankFn, vec![arg!(string)], None);
1561
1562impl Function for IsBlankFn {
1563    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1564        self.signature.validate(args, ctx)?;
1565        let s = args[0]
1566            .as_str()
1567            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1568
1569        let is_blank = s.trim().is_empty();
1570        Ok(Value::Bool(is_blank))
1571    }
1572}
1573
1574// =============================================================================
1575// abbreviate(string, max_length, suffix?) -> string
1576// Truncate string to max length with ellipsis suffix
1577// =============================================================================
1578
1579defn!(
1580    AbbreviateFn,
1581    vec![arg!(string), arg!(number)],
1582    Some(arg!(any))
1583);
1584
1585impl Function for AbbreviateFn {
1586    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1587        self.signature.validate(args, ctx)?;
1588        let s = args[0]
1589            .as_str()
1590            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1591        let max_length = args[1]
1592            .as_f64()
1593            .ok_or_else(|| custom_error(ctx, "Expected number for max_length"))?
1594            as usize;
1595
1596        // Suffix (default: "...")
1597        let suffix = if args.len() > 2 && !args[2].is_null() {
1598            args[2]
1599                .as_str()
1600                .map(|s| s.to_string())
1601                .unwrap_or_else(|| "...".to_string())
1602        } else {
1603            "...".to_string()
1604        };
1605
1606        let char_count = s.chars().count();
1607        let suffix_len = suffix.chars().count();
1608
1609        if char_count <= max_length {
1610            return Ok(Value::String(s.to_string()));
1611        }
1612
1613        if max_length <= suffix_len {
1614            // If max_length is too small for suffix, just truncate
1615            let result: String = s.chars().take(max_length).collect();
1616            return Ok(Value::String(result));
1617        }
1618
1619        let truncate_at = max_length - suffix_len;
1620        let mut result: String = s.chars().take(truncate_at).collect();
1621        result.push_str(&suffix);
1622
1623        Ok(Value::String(result))
1624    }
1625}
1626
1627// =============================================================================
1628// center(string, width, char?) -> string
1629// Center-pad a string to the given width
1630// =============================================================================
1631
1632defn!(CenterFn, vec![arg!(string), arg!(number)], Some(arg!(any)));
1633
1634impl Function for CenterFn {
1635    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1636        self.signature.validate(args, ctx)?;
1637        let s = args[0]
1638            .as_str()
1639            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1640        let width = args[1]
1641            .as_f64()
1642            .ok_or_else(|| custom_error(ctx, "Expected number for width"))?
1643            as usize;
1644        if width > MAX_GENERATED_STRING_BYTES {
1645            return Err(custom_error(
1646                ctx,
1647                "center width exceeds the maximum allowed size",
1648            ));
1649        }
1650
1651        // Padding character (default: ' ')
1652        let pad_char = if args.len() > 2 && !args[2].is_null() {
1653            args[2]
1654                .as_str()
1655                .and_then(|s| s.chars().next())
1656                .unwrap_or(' ')
1657        } else {
1658            ' '
1659        };
1660
1661        let char_count = s.chars().count();
1662
1663        if char_count >= width {
1664            return Ok(Value::String(s.to_string()));
1665        }
1666
1667        let total_padding = width - char_count;
1668        let left_padding = total_padding / 2;
1669        let right_padding = total_padding - left_padding;
1670
1671        let mut result = String::with_capacity(width);
1672        for _ in 0..left_padding {
1673            result.push(pad_char);
1674        }
1675        result.push_str(s);
1676        for _ in 0..right_padding {
1677            result.push(pad_char);
1678        }
1679
1680        Ok(Value::String(result))
1681    }
1682}
1683
1684// =============================================================================
1685// reverse_string(string) -> string
1686// Reverse a string (Unicode-aware, reverses grapheme clusters ideally, chars for simplicity)
1687// =============================================================================
1688
1689defn!(ReverseStringFn, vec![arg!(string)], None);
1690
1691impl Function for ReverseStringFn {
1692    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1693        self.signature.validate(args, ctx)?;
1694        let s = args[0]
1695            .as_str()
1696            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1697
1698        let result: String = s.chars().rev().collect();
1699        Ok(Value::String(result))
1700    }
1701}
1702
1703// =============================================================================
1704// explode(string) -> array
1705// Convert a string to an array of Unicode codepoints (integers)
1706// =============================================================================
1707
1708defn!(ExplodeFn, vec![arg!(string)], None);
1709
1710impl Function for ExplodeFn {
1711    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1712        self.signature.validate(args, ctx)?;
1713        let s = args[0]
1714            .as_str()
1715            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1716
1717        let codepoints: Vec<Value> = s
1718            .chars()
1719            .map(|c| Value::Number(Number::from(c as u32)))
1720            .collect();
1721
1722        Ok(Value::Array(codepoints))
1723    }
1724}
1725
1726// =============================================================================
1727// implode(array) -> string
1728// Convert an array of Unicode codepoints (integers) back to a string
1729// =============================================================================
1730
1731defn!(ImplodeFn, vec![arg!(array)], None);
1732
1733impl Function for ImplodeFn {
1734    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1735        self.signature.validate(args, ctx)?;
1736        let arr = args[0]
1737            .as_array()
1738            .ok_or_else(|| custom_error(ctx, "Expected array argument"))?;
1739
1740        let mut result = String::new();
1741        for item in arr.iter() {
1742            let codepoint = item
1743                .as_f64()
1744                .ok_or_else(|| custom_error(ctx, "Expected array of numbers (codepoints)"))?
1745                as u32;
1746
1747            let c = char::from_u32(codepoint).ok_or_else(|| {
1748                custom_error(ctx, &format!("Invalid Unicode codepoint: {}", codepoint))
1749            })?;
1750            result.push(c);
1751        }
1752
1753        Ok(Value::String(result))
1754    }
1755}
1756
1757// =============================================================================
1758// shell_escape(string) -> string
1759// Escape a string for safe use in shell commands (POSIX sh compatible)
1760// =============================================================================
1761
1762defn!(ShellEscapeFn, vec![arg!(string)], None);
1763
1764impl Function for ShellEscapeFn {
1765    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
1766        self.signature.validate(args, ctx)?;
1767        let s = args[0]
1768            .as_str()
1769            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
1770
1771        // Use single quotes for shell escaping (POSIX compatible)
1772        // If the string contains single quotes, we need to handle them specially:
1773        // 'it'\''s' becomes: 'it' + \' + 's' (end quote, escaped quote, start quote)
1774        let escaped = if s.is_empty() {
1775            "''".to_string()
1776        } else if s
1777            .chars()
1778            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '/')
1779        {
1780            // Safe characters that don't need quoting
1781            s.to_string()
1782        } else if !s.contains('\'') {
1783            // No single quotes, just wrap in single quotes
1784            format!("'{}'", s)
1785        } else {
1786            // Contains single quotes - use the '\'' technique
1787            let mut result = String::with_capacity(s.len() + 10);
1788            result.push('\'');
1789            for c in s.chars() {
1790                if c == '\'' {
1791                    result.push_str("'\\''");
1792                } else {
1793                    result.push(c);
1794                }
1795            }
1796            result.push('\'');
1797            result
1798        };
1799
1800        Ok(Value::String(escaped))
1801    }
1802}
1803
1804// =============================================================================
1805// register_filtered
1806// =============================================================================
1807
1808/// Register only the string functions that are in the enabled set.
1809pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
1810    register_if_enabled(runtime, "lower", enabled, Box::new(LowerFn::new()));
1811    register_if_enabled(runtime, "upper", enabled, Box::new(UpperFn::new()));
1812    register_if_enabled(runtime, "trim", enabled, Box::new(TrimFn::new()));
1813    register_if_enabled(runtime, "trim_left", enabled, Box::new(TrimStartFn::new()));
1814    register_if_enabled(runtime, "trim_right", enabled, Box::new(TrimEndFn::new()));
1815    register_if_enabled(runtime, "split", enabled, Box::new(SplitFn::new()));
1816    register_if_enabled(runtime, "replace", enabled, Box::new(ReplaceFn::new()));
1817    register_if_enabled(runtime, "pad_left", enabled, Box::new(PadLeftFn::new()));
1818    register_if_enabled(runtime, "pad_right", enabled, Box::new(PadRightFn::new()));
1819    register_if_enabled(runtime, "substr", enabled, Box::new(SubstrFn::new()));
1820    register_if_enabled(
1821        runtime,
1822        "capitalize",
1823        enabled,
1824        Box::new(CapitalizeFn::new()),
1825    );
1826    register_if_enabled(runtime, "title", enabled, Box::new(TitleFn::new()));
1827    register_if_enabled(runtime, "repeat", enabled, Box::new(RepeatFn::new()));
1828    register_if_enabled(runtime, "find_first", enabled, Box::new(IndexOfFn::new()));
1829    register_if_enabled(
1830        runtime,
1831        "find_last",
1832        enabled,
1833        Box::new(LastIndexOfFn::new()),
1834    );
1835    register_if_enabled(runtime, "slice", enabled, Box::new(SliceFn::new()));
1836    register_if_enabled(runtime, "concat", enabled, Box::new(ConcatFn::new()));
1837    register_if_enabled(runtime, "upper_case", enabled, Box::new(UpperCaseFn::new()));
1838    register_if_enabled(runtime, "lower_case", enabled, Box::new(LowerCaseFn::new()));
1839    register_if_enabled(runtime, "title_case", enabled, Box::new(TitleCaseFn::new()));
1840    register_if_enabled(runtime, "camel_case", enabled, Box::new(CamelCaseFn::new()));
1841    register_if_enabled(runtime, "snake_case", enabled, Box::new(SnakeCaseFn::new()));
1842    register_if_enabled(runtime, "kebab_case", enabled, Box::new(KebabCaseFn::new()));
1843    register_if_enabled(
1844        runtime,
1845        "pascal_case",
1846        enabled,
1847        Box::new(PascalCaseFn::new()),
1848    );
1849    register_if_enabled(
1850        runtime,
1851        "shouty_snake_case",
1852        enabled,
1853        Box::new(ShoutySnakeCaseFn::new()),
1854    );
1855    register_if_enabled(
1856        runtime,
1857        "shouty_kebab_case",
1858        enabled,
1859        Box::new(ShoutyKebabCaseFn::new()),
1860    );
1861    register_if_enabled(runtime, "train_case", enabled, Box::new(TrainCaseFn::new()));
1862    register_if_enabled(runtime, "truncate", enabled, Box::new(TruncateFn::new()));
1863    register_if_enabled(runtime, "wrap", enabled, Box::new(WrapFn::new()));
1864    register_if_enabled(runtime, "format", enabled, Box::new(FormatFn::new()));
1865    register_if_enabled(runtime, "sprintf", enabled, Box::new(SprintfFn::new()));
1866    register_if_enabled(runtime, "ltrimstr", enabled, Box::new(LtrimstrFn::new()));
1867    register_if_enabled(runtime, "rtrimstr", enabled, Box::new(RtrimstrFn::new()));
1868    register_if_enabled(runtime, "indices", enabled, Box::new(IndicesFn::new()));
1869    register_if_enabled(runtime, "inside", enabled, Box::new(InsideFn::new()));
1870    register_if_enabled(runtime, "humanize", enabled, Box::new(HumanizeFn::new()));
1871    register_if_enabled(runtime, "deburr", enabled, Box::new(DeburrrFn::new()));
1872    register_if_enabled(runtime, "words", enabled, Box::new(WordsFn::new()));
1873    register_if_enabled(runtime, "escape", enabled, Box::new(EscapeFn::new()));
1874    register_if_enabled(runtime, "unescape", enabled, Box::new(UnescapeFn::new()));
1875    register_if_enabled(
1876        runtime,
1877        "escape_regex",
1878        enabled,
1879        Box::new(EscapeRegexFn::new()),
1880    );
1881    register_if_enabled(runtime, "start_case", enabled, Box::new(StartCaseFn::new()));
1882    // Registered as `obscure`: the canonical `mask` is the object-module impl;
1883    // this custom-mask-character variant lives under a distinct name to avoid a
1884    // nondeterministic collision.
1885    register_if_enabled(runtime, "obscure", enabled, Box::new(MaskFn::new()));
1886    register_if_enabled(
1887        runtime,
1888        "redact_pattern",
1889        enabled,
1890        Box::new(RedactPatternFn::new()),
1891    );
1892    register_if_enabled(
1893        runtime,
1894        "normalize_whitespace",
1895        enabled,
1896        Box::new(NormalizeWhitespaceFn::new()),
1897    );
1898    register_if_enabled(runtime, "is_blank", enabled, Box::new(IsBlankFn::new()));
1899    register_if_enabled(
1900        runtime,
1901        "abbreviate",
1902        enabled,
1903        Box::new(AbbreviateFn::new()),
1904    );
1905    register_if_enabled(runtime, "center", enabled, Box::new(CenterFn::new()));
1906    register_if_enabled(
1907        runtime,
1908        "reverse_string",
1909        enabled,
1910        Box::new(ReverseStringFn::new()),
1911    );
1912    register_if_enabled(runtime, "explode", enabled, Box::new(ExplodeFn::new()));
1913    register_if_enabled(runtime, "implode", enabled, Box::new(ImplodeFn::new()));
1914    register_if_enabled(
1915        runtime,
1916        "shell_escape",
1917        enabled,
1918        Box::new(ShellEscapeFn::new()),
1919    );
1920}
1921
1922#[cfg(test)]
1923mod tests {
1924    use crate::Runtime;
1925    use serde_json::json;
1926
1927    fn setup_runtime() -> Runtime {
1928        Runtime::builder()
1929            .with_standard()
1930            .with_all_extensions()
1931            .build()
1932    }
1933
1934    #[test]
1935    fn test_lower() {
1936        let runtime = setup_runtime();
1937        let expr = runtime.compile("lower(@)").unwrap();
1938        let result = expr.search(&json!("HELLO")).unwrap();
1939        assert_eq!(result.as_str().unwrap(), "hello");
1940    }
1941
1942    #[test]
1943    fn test_repeat_rejects_excessive_output() {
1944        // A petabyte-sized repeat must error instead of attempting the alloc.
1945        let runtime = setup_runtime();
1946        let expr = runtime.compile("repeat('x', `1000000000000`)").unwrap();
1947        assert!(expr.search(&json!(null)).is_err());
1948        // A normal repeat still works.
1949        let expr = runtime.compile("repeat('ab', `3`)").unwrap();
1950        assert_eq!(expr.search(&json!(null)).unwrap(), json!("ababab"));
1951    }
1952
1953    #[test]
1954    fn test_upper() {
1955        let runtime = setup_runtime();
1956        let expr = runtime.compile("upper(@)").unwrap();
1957        let result = expr.search(&json!("hello")).unwrap();
1958        assert_eq!(result.as_str().unwrap(), "HELLO");
1959    }
1960
1961    #[test]
1962    fn test_trim() {
1963        let runtime = setup_runtime();
1964        let expr = runtime.compile("trim(@)").unwrap();
1965        let result = expr.search(&json!("  hello  ")).unwrap();
1966        assert_eq!(result.as_str().unwrap(), "hello");
1967    }
1968
1969    #[test]
1970    fn test_split() {
1971        let runtime = setup_runtime();
1972        let expr = runtime.compile("split(@, ',')").unwrap();
1973        let result = expr.search(&json!("a,b,c")).unwrap();
1974        let arr = result.as_array().unwrap();
1975        assert_eq!(arr.len(), 3);
1976        assert_eq!(arr[0].as_str().unwrap(), "a");
1977    }
1978
1979    #[test]
1980    fn test_camel_case() {
1981        let runtime = setup_runtime();
1982        let expr = runtime.compile("camel_case(@)").unwrap();
1983        let result = expr.search(&json!("hello_world")).unwrap();
1984        assert_eq!(result.as_str().unwrap(), "helloWorld");
1985    }
1986
1987    #[test]
1988    fn test_snake_case() {
1989        let runtime = setup_runtime();
1990        let expr = runtime.compile("snake_case(@)").unwrap();
1991        let result = expr.search(&json!("helloWorld")).unwrap();
1992        assert_eq!(result.as_str().unwrap(), "hello_world");
1993    }
1994
1995    #[test]
1996    fn test_wrap_basic() {
1997        let runtime = setup_runtime();
1998        let expr = runtime.compile("wrap(@, `5`)").unwrap();
1999        let result = expr.search(&json!("hello world")).unwrap();
2000        assert_eq!(result.as_str().unwrap(), "hello\nworld");
2001    }
2002
2003    #[test]
2004    fn test_wrap_preserves_newlines() {
2005        let runtime = setup_runtime();
2006        let expr = runtime.compile("wrap(@, `100`)").unwrap();
2007        let result = expr.search(&json!("hello\nworld")).unwrap();
2008        assert_eq!(result.as_str().unwrap(), "hello\nworld");
2009    }
2010
2011    #[test]
2012    fn test_wrap_wide_width() {
2013        let runtime = setup_runtime();
2014        let expr = runtime.compile("wrap(@, `100`)").unwrap();
2015        let result = expr.search(&json!("hello world")).unwrap();
2016        assert_eq!(result.as_str().unwrap(), "hello world");
2017    }
2018
2019    #[test]
2020    fn test_ltrimstr() {
2021        let runtime = setup_runtime();
2022        let expr = runtime.compile("ltrimstr(@, 'hello ')").unwrap();
2023        let result = expr.search(&json!("hello world")).unwrap();
2024        assert_eq!(result.as_str().unwrap(), "world");
2025    }
2026
2027    #[test]
2028    fn test_ltrimstr_no_match() {
2029        let runtime = setup_runtime();
2030        let expr = runtime.compile("ltrimstr(@, 'foo')").unwrap();
2031        let result = expr.search(&json!("hello world")).unwrap();
2032        assert_eq!(result.as_str().unwrap(), "hello world");
2033    }
2034
2035    #[test]
2036    fn test_rtrimstr() {
2037        let runtime = setup_runtime();
2038        let expr = runtime.compile("rtrimstr(@, ' world')").unwrap();
2039        let result = expr.search(&json!("hello world")).unwrap();
2040        assert_eq!(result.as_str().unwrap(), "hello");
2041    }
2042
2043    #[test]
2044    fn test_rtrimstr_no_match() {
2045        let runtime = setup_runtime();
2046        let expr = runtime.compile("rtrimstr(@, 'foo')").unwrap();
2047        let result = expr.search(&json!("hello world")).unwrap();
2048        assert_eq!(result.as_str().unwrap(), "hello world");
2049    }
2050
2051    #[test]
2052    fn test_indices() {
2053        let runtime = setup_runtime();
2054        let expr = runtime.compile("indices(@, 'l')").unwrap();
2055        let result = expr.search(&json!("hello")).unwrap();
2056        let arr = result.as_array().unwrap();
2057        assert_eq!(arr.len(), 2);
2058        assert_eq!(arr[0].as_f64().unwrap() as i64, 2);
2059        assert_eq!(arr[1].as_f64().unwrap() as i64, 3);
2060    }
2061
2062    #[test]
2063    fn test_indices_no_match() {
2064        let runtime = setup_runtime();
2065        let expr = runtime.compile("indices(@, 'x')").unwrap();
2066        let result = expr.search(&json!("hello")).unwrap();
2067        let arr = result.as_array().unwrap();
2068        assert_eq!(arr.len(), 0);
2069    }
2070
2071    #[test]
2072    fn test_indices_overlapping() {
2073        let runtime = setup_runtime();
2074        let expr = runtime.compile("indices(@, 'aa')").unwrap();
2075        let result = expr.search(&json!("aaa")).unwrap();
2076        let arr = result.as_array().unwrap();
2077        assert_eq!(arr.len(), 2);
2078        assert_eq!(arr[0].as_f64().unwrap() as i64, 0);
2079        assert_eq!(arr[1].as_f64().unwrap() as i64, 1);
2080    }
2081
2082    #[test]
2083    fn test_inside() {
2084        let runtime = setup_runtime();
2085        let expr = runtime.compile("inside('world', @)").unwrap();
2086        let result = expr.search(&json!("hello world")).unwrap();
2087        assert!(result.as_bool().unwrap());
2088    }
2089
2090    #[test]
2091    fn test_inside_not_found() {
2092        let runtime = setup_runtime();
2093        let expr = runtime.compile("inside('foo', @)").unwrap();
2094        let result = expr.search(&json!("hello world")).unwrap();
2095        assert!(!result.as_bool().unwrap());
2096    }
2097
2098    #[test]
2099    fn test_sprintf_string() {
2100        let runtime = setup_runtime();
2101        let expr = runtime.compile("sprintf('Hello, %s!', @)").unwrap();
2102        let result = expr.search(&json!("World")).unwrap();
2103        assert_eq!(result.as_str().unwrap(), "Hello, World!");
2104    }
2105
2106    #[test]
2107    fn test_sprintf_integer() {
2108        let runtime = setup_runtime();
2109        let expr = runtime.compile("sprintf('%d + %d = %d', @)").unwrap();
2110        let data = json!([1, 2, 3]);
2111        let result = expr.search(&data).unwrap();
2112        assert_eq!(result.as_str().unwrap(), "1 + 2 = 3");
2113    }
2114
2115    #[test]
2116    #[allow(clippy::approx_constant)]
2117    fn test_sprintf_float_precision() {
2118        let runtime = setup_runtime();
2119        let expr = runtime.compile("sprintf('Pi is %.2f', @)").unwrap();
2120        let data = json!(3.14159);
2121        let result = expr.search(&data).unwrap();
2122        assert_eq!(result.as_str().unwrap(), "Pi is 3.14");
2123    }
2124
2125    #[test]
2126    fn test_sprintf_hex() {
2127        let runtime = setup_runtime();
2128        let expr = runtime.compile("sprintf('Hex: %x', @)").unwrap();
2129        let data = json!(255);
2130        let result = expr.search(&data).unwrap();
2131        assert_eq!(result.as_str().unwrap(), "Hex: ff");
2132    }
2133
2134    #[test]
2135    fn test_sprintf_width() {
2136        let runtime = setup_runtime();
2137        let expr = runtime.compile("sprintf('%10s', @)").unwrap();
2138        let result = expr.search(&json!("hi")).unwrap();
2139        assert_eq!(result.as_str().unwrap(), "        hi");
2140    }
2141
2142    #[test]
2143    fn test_sprintf_escaped_percent() {
2144        let runtime = setup_runtime();
2145        let expr = runtime.compile("sprintf('100%% done', @)").unwrap();
2146        let result = expr.search(&json!(null)).unwrap();
2147        assert_eq!(result.as_str().unwrap(), "100% done");
2148    }
2149
2150    // JEP-014 find_first tests
2151    #[test]
2152    fn test_find_first_basic() {
2153        let runtime = setup_runtime();
2154        let expr = runtime.compile("find_first(@, 'world')").unwrap();
2155        let result = expr.search(&json!("hello world")).unwrap();
2156        assert_eq!(result.as_f64().unwrap() as i64, 6);
2157    }
2158
2159    #[test]
2160    fn test_find_first_not_found() {
2161        let runtime = setup_runtime();
2162        let expr = runtime.compile("find_first(@, 'xyz')").unwrap();
2163        let result = expr.search(&json!("hello world")).unwrap();
2164        assert!(result.is_null());
2165    }
2166
2167    #[test]
2168    fn test_find_first_with_start() {
2169        let runtime = setup_runtime();
2170        let expr = runtime.compile("find_first(@, 'o', `5`)").unwrap();
2171        let result = expr.search(&json!("hello world")).unwrap();
2172        assert_eq!(result.as_f64().unwrap() as i64, 7);
2173    }
2174
2175    #[test]
2176    fn test_find_first_with_start_and_end() {
2177        let runtime = setup_runtime();
2178        let expr = runtime.compile("find_first(@, 'o', `0`, `5`)").unwrap();
2179        let result = expr.search(&json!("hello world")).unwrap();
2180        assert_eq!(result.as_f64().unwrap() as i64, 4);
2181    }
2182
2183    #[test]
2184    fn test_find_first_with_negative_start() {
2185        let runtime = setup_runtime();
2186        let expr = runtime.compile("find_first(@, 'o', `-5`)").unwrap();
2187        let result = expr.search(&json!("hello world")).unwrap();
2188        assert_eq!(result.as_f64().unwrap() as i64, 7);
2189    }
2190
2191    // JEP-014 find_last tests
2192    #[test]
2193    fn test_find_last_basic() {
2194        let runtime = setup_runtime();
2195        let expr = runtime.compile("find_last(@, 'o')").unwrap();
2196        let result = expr.search(&json!("hello world")).unwrap();
2197        assert_eq!(result.as_f64().unwrap() as i64, 7);
2198    }
2199
2200    #[test]
2201    fn test_find_last_not_found() {
2202        let runtime = setup_runtime();
2203        let expr = runtime.compile("find_last(@, 'xyz')").unwrap();
2204        let result = expr.search(&json!("hello world")).unwrap();
2205        assert!(result.is_null());
2206    }
2207
2208    #[test]
2209    fn test_find_last_with_start_and_end() {
2210        let runtime = setup_runtime();
2211        let expr = runtime.compile("find_last(@, 'o', `0`, `6`)").unwrap();
2212        let result = expr.search(&json!("hello world")).unwrap();
2213        assert_eq!(result.as_f64().unwrap() as i64, 4);
2214    }
2215
2216    #[test]
2217    fn test_find_last_with_negative_end() {
2218        let runtime = setup_runtime();
2219        let expr = runtime.compile("find_last(@, 'l', `0`, `-1`)").unwrap();
2220        let result = expr.search(&json!("hello world")).unwrap();
2221        assert_eq!(result.as_f64().unwrap() as i64, 9);
2222    }
2223
2224    // =========================================================================
2225    // mask tests
2226    // Note: In jpx-core the object module's mask (with default show_last=4)
2227    // takes precedence over the string module's mask.
2228    // =========================================================================
2229
2230    #[test]
2231    fn test_mask_default() {
2232        let runtime = setup_runtime();
2233        let expr = runtime.compile("mask(@)").unwrap();
2234        // Default shows last 4; "secret" (6 chars) -> mask 2, show 4
2235        let result = expr.search(&json!("secret")).unwrap();
2236        assert_eq!(result.as_str().unwrap(), "**cret");
2237    }
2238
2239    #[test]
2240    fn test_mask_keep_last_4() {
2241        let runtime = setup_runtime();
2242        let expr = runtime.compile("mask(@, `4`)").unwrap();
2243        let result = expr.search(&json!("4111111111111111")).unwrap();
2244        assert_eq!(result.as_str().unwrap(), "************1111");
2245    }
2246
2247    #[test]
2248    fn test_mask_visible_exceeds_length() {
2249        let runtime = setup_runtime();
2250        let expr = runtime.compile("mask(@, `10`)").unwrap();
2251        // When show_last >= length, object mask returns all stars
2252        let result = expr.search(&json!("short")).unwrap();
2253        assert_eq!(result.as_str().unwrap(), "*****");
2254    }
2255
2256    #[test]
2257    fn test_mask_multibyte() {
2258        let runtime = setup_runtime();
2259        // "héllo" is 5 chars / 6 bytes. show_last=4 -> mask 1 char. A byte-based
2260        // slice at byte 2 would land inside "é" and panic; char-based is safe.
2261        let expr = runtime.compile("mask(@, `4`)").unwrap();
2262        let result = expr.search(&json!("héllo")).unwrap();
2263        assert_eq!(result.as_str().unwrap(), "*éllo");
2264    }
2265
2266    #[test]
2267    fn test_obscure_custom_char_and_default() {
2268        // `obscure` is the renamed string-module mask: defaults to masking the
2269        // whole string and supports a custom mask character.
2270        let runtime = setup_runtime();
2271        let expr = runtime.compile("obscure('password', `4`, '#')").unwrap();
2272        assert_eq!(expr.search(&json!(null)).unwrap(), json!("####word"));
2273        let expr = runtime.compile("obscure('secret')").unwrap();
2274        assert_eq!(expr.search(&json!(null)).unwrap(), json!("******"));
2275    }
2276
2277    #[test]
2278    fn test_find_first_multibyte() {
2279        // IndexOfFn is registered as `find_first`.
2280        let runtime = setup_runtime();
2281        // Character indices: h=0, é=1, l=2, l=3, o=4.
2282        let expr = runtime.compile("find_first('héllo', 'l')").unwrap();
2283        assert_eq!(expr.search(&json!(null)).unwrap(), json!(2));
2284        // start/end window that previously sliced mid-"é" and panicked.
2285        let expr = runtime
2286            .compile("find_first('héllo', 'l', `1`, `5`)")
2287            .unwrap();
2288        assert_eq!(expr.search(&json!(null)).unwrap(), json!(2));
2289    }
2290
2291    #[test]
2292    fn test_find_last_multibyte() {
2293        // LastIndexOfFn is registered as `find_last`.
2294        let runtime = setup_runtime();
2295        let expr = runtime.compile("find_last('héllo', 'l')").unwrap();
2296        assert_eq!(expr.search(&json!(null)).unwrap(), json!(3));
2297    }
2298
2299    // =========================================================================
2300    // normalize_whitespace tests
2301    // =========================================================================
2302
2303    #[test]
2304    fn test_normalize_whitespace_basic() {
2305        let runtime = setup_runtime();
2306        let expr = runtime.compile("normalize_whitespace(@)").unwrap();
2307        let result = expr.search(&json!("hello    world")).unwrap();
2308        assert_eq!(result.as_str().unwrap(), "hello world");
2309    }
2310
2311    #[test]
2312    fn test_normalize_whitespace_mixed() {
2313        let runtime = setup_runtime();
2314        let expr = runtime.compile("normalize_whitespace(@)").unwrap();
2315        let result = expr.search(&json!("hello\t\n  world\n\nfoo")).unwrap();
2316        assert_eq!(result.as_str().unwrap(), "hello world foo");
2317    }
2318
2319    #[test]
2320    fn test_normalize_whitespace_leading_trailing() {
2321        let runtime = setup_runtime();
2322        let expr = runtime.compile("normalize_whitespace(@)").unwrap();
2323        let result = expr.search(&json!("  hello world  ")).unwrap();
2324        assert_eq!(result.as_str().unwrap(), "hello world");
2325    }
2326
2327    // =========================================================================
2328    // is_blank tests
2329    // =========================================================================
2330
2331    #[test]
2332    fn test_is_blank_empty() {
2333        let runtime = setup_runtime();
2334        let expr = runtime.compile("is_blank(@)").unwrap();
2335        let result = expr.search(&json!("")).unwrap();
2336        assert!(result.as_bool().unwrap());
2337    }
2338
2339    #[test]
2340    fn test_is_blank_whitespace() {
2341        let runtime = setup_runtime();
2342        let expr = runtime.compile("is_blank(@)").unwrap();
2343        let result = expr.search(&json!("   \t\n  ")).unwrap();
2344        assert!(result.as_bool().unwrap());
2345    }
2346
2347    #[test]
2348    fn test_is_blank_not_blank() {
2349        let runtime = setup_runtime();
2350        let expr = runtime.compile("is_blank(@)").unwrap();
2351        let result = expr.search(&json!("  a  ")).unwrap();
2352        assert!(!result.as_bool().unwrap());
2353    }
2354
2355    // =========================================================================
2356    // abbreviate tests
2357    // =========================================================================
2358
2359    #[test]
2360    fn test_abbreviate_basic() {
2361        let runtime = setup_runtime();
2362        let expr = runtime.compile("abbreviate(@, `10`)").unwrap();
2363        let result = expr.search(&json!("This is a very long string")).unwrap();
2364        assert_eq!(result.as_str().unwrap(), "This is...");
2365    }
2366
2367    #[test]
2368    fn test_abbreviate_no_truncation() {
2369        let runtime = setup_runtime();
2370        let expr = runtime.compile("abbreviate(@, `20`)").unwrap();
2371        let result = expr.search(&json!("short")).unwrap();
2372        assert_eq!(result.as_str().unwrap(), "short");
2373    }
2374
2375    #[test]
2376    fn test_abbreviate_custom_suffix() {
2377        let runtime = setup_runtime();
2378        let expr = runtime.compile("abbreviate(@, `8`, `\"~\"`)").unwrap();
2379        let result = expr.search(&json!("Hello World")).unwrap();
2380        assert_eq!(result.as_str().unwrap(), "Hello W~");
2381    }
2382
2383    // =========================================================================
2384    // center tests
2385    // =========================================================================
2386
2387    #[test]
2388    fn test_center_basic() {
2389        let runtime = setup_runtime();
2390        let expr = runtime.compile("center(@, `10`)").unwrap();
2391        let result = expr.search(&json!("hi")).unwrap();
2392        assert_eq!(result.as_str().unwrap(), "    hi    ");
2393    }
2394
2395    #[test]
2396    fn test_center_custom_char() {
2397        let runtime = setup_runtime();
2398        let expr = runtime.compile("center(@, `10`, `\"-\"`)").unwrap();
2399        let result = expr.search(&json!("hi")).unwrap();
2400        assert_eq!(result.as_str().unwrap(), "----hi----");
2401    }
2402
2403    #[test]
2404    fn test_center_already_wide() {
2405        let runtime = setup_runtime();
2406        let expr = runtime.compile("center(@, `3`)").unwrap();
2407        let result = expr.search(&json!("hello")).unwrap();
2408        assert_eq!(result.as_str().unwrap(), "hello");
2409    }
2410
2411    #[test]
2412    fn test_center_odd_padding() {
2413        let runtime = setup_runtime();
2414        let expr = runtime.compile("center(@, `7`)").unwrap();
2415        let result = expr.search(&json!("hi")).unwrap();
2416        assert_eq!(result.as_str().unwrap(), "  hi   ");
2417    }
2418
2419    // =========================================================================
2420    // reverse_string tests
2421    // =========================================================================
2422
2423    #[test]
2424    fn test_reverse_string_basic() {
2425        let runtime = setup_runtime();
2426        let expr = runtime.compile("reverse_string(@)").unwrap();
2427        let result = expr.search(&json!("hello")).unwrap();
2428        assert_eq!(result.as_str().unwrap(), "olleh");
2429    }
2430
2431    #[test]
2432    fn test_reverse_string_empty() {
2433        let runtime = setup_runtime();
2434        let expr = runtime.compile("reverse_string(@)").unwrap();
2435        let result = expr.search(&json!("")).unwrap();
2436        assert_eq!(result.as_str().unwrap(), "");
2437    }
2438
2439    #[test]
2440    fn test_reverse_string_palindrome() {
2441        let runtime = setup_runtime();
2442        let expr = runtime.compile("reverse_string(@)").unwrap();
2443        let result = expr.search(&json!("racecar")).unwrap();
2444        assert_eq!(result.as_str().unwrap(), "racecar");
2445    }
2446
2447    // =========================================================================
2448    // explode tests
2449    // =========================================================================
2450
2451    #[test]
2452    fn test_explode_basic() {
2453        let runtime = setup_runtime();
2454        let expr = runtime.compile("explode(@)").unwrap();
2455        let result = expr.search(&json!("abc")).unwrap();
2456        let arr = result.as_array().unwrap();
2457        assert_eq!(arr.len(), 3);
2458        assert_eq!(arr[0].as_f64().unwrap() as u32, 97); // 'a'
2459        assert_eq!(arr[1].as_f64().unwrap() as u32, 98); // 'b'
2460        assert_eq!(arr[2].as_f64().unwrap() as u32, 99); // 'c'
2461    }
2462
2463    #[test]
2464    fn test_explode_empty() {
2465        let runtime = setup_runtime();
2466        let expr = runtime.compile("explode(@)").unwrap();
2467        let result = expr.search(&json!("")).unwrap();
2468        let arr = result.as_array().unwrap();
2469        assert_eq!(arr.len(), 0);
2470    }
2471
2472    #[test]
2473    fn test_explode_unicode() {
2474        let runtime = setup_runtime();
2475        let expr = runtime.compile("explode(@)").unwrap();
2476        let result = expr.search(&json!("A\u{263a}")).unwrap();
2477        let arr = result.as_array().unwrap();
2478        assert_eq!(arr.len(), 2);
2479        assert_eq!(arr[0].as_f64().unwrap() as u32, 65); // 'A'
2480        assert_eq!(arr[1].as_f64().unwrap() as u32, 9786); // smiley
2481    }
2482
2483    // =========================================================================
2484    // implode tests
2485    // =========================================================================
2486
2487    #[test]
2488    fn test_implode_basic() {
2489        let runtime = setup_runtime();
2490        let expr = runtime.compile("implode(@)").unwrap();
2491        let data = json!([97, 98, 99]);
2492        let result = expr.search(&data).unwrap();
2493        assert_eq!(result.as_str().unwrap(), "abc");
2494    }
2495
2496    #[test]
2497    fn test_implode_empty() {
2498        let runtime = setup_runtime();
2499        let expr = runtime.compile("implode(@)").unwrap();
2500        let data: serde_json::Value = serde_json::from_str("[]").unwrap();
2501        let result = expr.search(&data).unwrap();
2502        assert_eq!(result.as_str().unwrap(), "");
2503    }
2504
2505    #[test]
2506    fn test_implode_unicode() {
2507        let runtime = setup_runtime();
2508        let expr = runtime.compile("implode(@)").unwrap();
2509        let data = json!([65, 9786]);
2510        let result = expr.search(&data).unwrap();
2511        assert_eq!(result.as_str().unwrap(), "A\u{263a}");
2512    }
2513
2514    #[test]
2515    fn test_explode_implode_roundtrip() {
2516        let runtime = setup_runtime();
2517        let expr = runtime.compile("implode(explode(@))").unwrap();
2518        let result = expr.search(&json!("Hello, \u{4e16}\u{754c}!")).unwrap();
2519        assert_eq!(result.as_str().unwrap(), "Hello, \u{4e16}\u{754c}!");
2520    }
2521
2522    // =========================================================================
2523    // shell_escape tests
2524    // =========================================================================
2525
2526    #[test]
2527    fn test_shell_escape_simple() {
2528        let runtime = setup_runtime();
2529        let expr = runtime.compile("shell_escape(@)").unwrap();
2530        let result = expr.search(&json!("hello")).unwrap();
2531        // Simple alphanumeric doesn't need quoting
2532        assert_eq!(result.as_str().unwrap(), "hello");
2533    }
2534
2535    #[test]
2536    fn test_shell_escape_with_spaces() {
2537        let runtime = setup_runtime();
2538        let expr = runtime.compile("shell_escape(@)").unwrap();
2539        let result = expr.search(&json!("hello world")).unwrap();
2540        assert_eq!(result.as_str().unwrap(), "'hello world'");
2541    }
2542
2543    #[test]
2544    fn test_shell_escape_with_special_chars() {
2545        let runtime = setup_runtime();
2546        let expr = runtime.compile("shell_escape(@)").unwrap();
2547        let result = expr.search(&json!("$PATH; rm -rf /")).unwrap();
2548        assert_eq!(result.as_str().unwrap(), "'$PATH; rm -rf /'");
2549    }
2550
2551    #[test]
2552    fn test_shell_escape_with_single_quote() {
2553        let runtime = setup_runtime();
2554        let expr = runtime.compile("shell_escape(@)").unwrap();
2555        let result = expr.search(&json!("it's")).unwrap();
2556        // Single quotes are escaped as '\''
2557        assert_eq!(result.as_str().unwrap(), "'it'\\''s'");
2558    }
2559
2560    #[test]
2561    fn test_shell_escape_empty() {
2562        let runtime = setup_runtime();
2563        let expr = runtime.compile("shell_escape(@)").unwrap();
2564        let result = expr.search(&json!("")).unwrap();
2565        assert_eq!(result.as_str().unwrap(), "''");
2566    }
2567
2568    #[test]
2569    fn test_shell_escape_path() {
2570        let runtime = setup_runtime();
2571        let expr = runtime.compile("shell_escape(@)").unwrap();
2572        let result = expr.search(&json!("/usr/local/bin")).unwrap();
2573        // Paths with only safe chars don't need quoting
2574        assert_eq!(result.as_str().unwrap(), "/usr/local/bin");
2575    }
2576
2577    #[test]
2578    fn test_shell_escape_backticks() {
2579        let runtime = setup_runtime();
2580        let expr = runtime.compile("shell_escape(@)").unwrap();
2581        let result = expr.search(&json!("`whoami`")).unwrap();
2582        assert_eq!(result.as_str().unwrap(), "'`whoami`'");
2583    }
2584}