Skip to main content

cel_core/ext/
string_ext.rs

1//! String extension library for CEL.
2//!
3//! This module provides additional string manipulation functions beyond the
4//! CEL standard library, matching the cel-go strings extension.
5//!
6//! # Functions
7//!
8//! - `charAt(index)` - Returns character at index as a string
9//! - `indexOf(substring)` / `indexOf(substring, offset)` - Find first occurrence
10//! - `lastIndexOf(substring)` / `lastIndexOf(substring, offset)` - Find last occurrence
11//! - `lowerAscii()` - Convert ASCII characters to lowercase
12//! - `upperAscii()` - Convert ASCII characters to uppercase
13//! - `replace(old, new)` / `replace(old, new, count)` - Replace occurrences
14//! - `split(separator)` / `split(separator, limit)` - Split string into list
15//! - `substring(start)` / `substring(start, end)` - Extract substring
16//! - `trim()` - Remove leading/trailing whitespace
17//! - `reverse()` - Reverse the string (Unicode-aware)
18//! - `format(args)` - Format string with arguments
19//! - `join()` / `join(separator)` - Join list of strings (method on list<string>)
20//! - `strings.quote(string)` - Quote a string with escapes
21
22use std::sync::Arc;
23
24use crate::eval::time::{format_duration, format_timestamp};
25use crate::eval::{EvalError, Value};
26use crate::types::{CelType, FunctionDecl, OverloadDecl};
27
28// ==================== Unicode Helpers ====================
29
30fn codepoint_len(s: &str) -> usize {
31    s.chars().count()
32}
33
34fn codepoint_to_byte_offset(s: &str, cp_index: usize) -> Option<usize> {
35    if cp_index == 0 {
36        return Some(0);
37    }
38    s.char_indices()
39        .nth(cp_index)
40        .map(|(byte_offset, _)| byte_offset)
41        .or_else(|| {
42            // cp_index == number of chars means "end of string"
43            if cp_index == codepoint_len(s) {
44                Some(s.len())
45            } else {
46                None
47            }
48        })
49}
50
51// ==================== Format Helpers ====================
52
53fn value_type_name(v: &Value) -> &str {
54    match v {
55        Value::Null => "null_type",
56        Value::Bool(_) => "bool",
57        Value::Int(_) => "int",
58        Value::UInt(_) => "uint",
59        Value::Double(_) => "double",
60        Value::String(_) => "string",
61        Value::Bytes(_) => "bytes",
62        Value::List(_) => "list",
63        Value::Map(_) => "map",
64        Value::Timestamp(_) => "google.protobuf.Timestamp",
65        Value::Duration(_) => "google.protobuf.Duration",
66        Value::Type(_) => "type",
67        Value::Optional(_) => "optional_type",
68        Value::Message(m) => m.type_name(),
69        Value::Enum(e) => &e.type_name,
70        Value::Error(_) => "error",
71    }
72}
73
74enum FormatClause {
75    Literal(String),
76    Verb {
77        verb: char,
78        precision: Option<usize>,
79    },
80}
81
82fn parse_format_string(fmt: &str) -> Result<Vec<FormatClause>, String> {
83    let mut clauses = Vec::new();
84    let mut chars = fmt.chars().peekable();
85    let mut literal = String::new();
86
87    while let Some(c) = chars.next() {
88        if c == '%' {
89            match chars.peek() {
90                Some('%') => {
91                    chars.next();
92                    literal.push('%');
93                }
94                Some(_) => {
95                    if !literal.is_empty() {
96                        clauses.push(FormatClause::Literal(std::mem::take(&mut literal)));
97                    }
98                    // Parse optional precision: .N
99                    let precision = if chars.peek() == Some(&'.') {
100                        chars.next(); // consume '.'
101                        let mut num_str = String::new();
102                        while let Some(&d) = chars.peek() {
103                            if d.is_ascii_digit() {
104                                num_str.push(d);
105                                chars.next();
106                            } else {
107                                break;
108                            }
109                        }
110                        Some(
111                            num_str
112                                .parse::<usize>()
113                                .map_err(|_| "invalid precision".to_string())?,
114                        )
115                    } else {
116                        None
117                    };
118                    // Next char is the verb
119                    let verb = chars.next().ok_or("unexpected end of format string")?;
120                    match verb {
121                        's' | 'd' | 'f' | 'e' | 'b' | 'o' | 'x' | 'X' => {
122                            clauses.push(FormatClause::Verb { verb, precision });
123                        }
124                        other => {
125                            return Err(format!(
126                                "could not parse formatting clause: unrecognized formatting clause {:?}",
127                                other.to_string()
128                            ));
129                        }
130                    }
131                }
132                None => {
133                    return Err("unexpected end of format string after %".to_string());
134                }
135            }
136        } else {
137            literal.push(c);
138        }
139    }
140
141    if !literal.is_empty() {
142        clauses.push(FormatClause::Literal(literal));
143    }
144
145    Ok(clauses)
146}
147
148fn format_value_as_string(v: &Value) -> Result<String, String> {
149    match v {
150        Value::Null => Ok("null".to_string()),
151        Value::Bool(b) => Ok(b.to_string()),
152        Value::Int(i) => Ok(i.to_string()),
153        Value::UInt(u) => Ok(u.to_string()),
154        Value::Double(d) => {
155            if d.is_nan() {
156                Ok("NaN".to_string())
157            } else if d.is_infinite() {
158                if d.is_sign_positive() {
159                    Ok("Infinity".to_string())
160                } else {
161                    Ok("-Infinity".to_string())
162                }
163            } else {
164                // Use default float formatting (no trailing .0 normalization for %s)
165                let s = format!("{}", d);
166                Ok(s)
167            }
168        }
169        Value::String(s) => Ok(s.to_string()),
170        Value::Bytes(b) => Ok(String::from_utf8_lossy(b).to_string()),
171        Value::Timestamp(t) => Ok(format_timestamp(t)),
172        Value::Duration(d) => Ok(format_duration(d)),
173        Value::Type(t) => Ok(t.name.to_string()),
174        Value::List(items) => {
175            let mut parts = Vec::new();
176            for item in items.iter() {
177                parts.push(format_value_as_string(item)?);
178            }
179            Ok(format!("[{}]", parts.join(", ")))
180        }
181        Value::Map(m) => {
182            // Collect entries and sort by CEL ordering: int/uint (numeric), string (alpha), bool
183            let mut entries: Vec<_> = m.iter().collect();
184            entries.sort_by(|(a, _), (b, _)| {
185                use crate::eval::MapKey;
186                fn key_order(k: &MapKey) -> u8 {
187                    match k {
188                        MapKey::Int(_) | MapKey::UInt(_) => 0,
189                        MapKey::String(_) => 1,
190                        MapKey::Bool(_) => 2,
191                    }
192                }
193                let ord = key_order(a).cmp(&key_order(b));
194                if ord != std::cmp::Ordering::Equal {
195                    return ord;
196                }
197                // Within same category, use natural ordering
198                a.cmp(b)
199            });
200            let mut parts = Vec::new();
201            for (k, v) in entries {
202                let key_str = match k {
203                    crate::eval::MapKey::Bool(b) => b.to_string(),
204                    crate::eval::MapKey::Int(i) => i.to_string(),
205                    crate::eval::MapKey::UInt(u) => u.to_string(),
206                    crate::eval::MapKey::String(s) => s.to_string(),
207                };
208                let val_str = format_value_as_string(v)?;
209                parts.push(format!("{}: {}", key_str, val_str));
210            }
211            Ok(format!("{{{}}}", parts.join(", ")))
212        }
213        _ => Err(format!(
214            "string clause can only be used on strings, bools, bytes, ints, doubles, maps, lists, types, durations, and timestamps, was given {}",
215            value_type_name(v)
216        )),
217    }
218}
219
220fn format_value(v: &Value, verb: char, precision: Option<usize>) -> Result<String, String> {
221    match verb {
222        's' => format_value_as_string(v),
223        'd' => {
224            // Decimal: integers only (+ doubles that are NaN/Inf)
225            match v {
226                Value::Int(i) => Ok(i.to_string()),
227                Value::UInt(u) => Ok(u.to_string()),
228                Value::Double(d) if d.is_nan() => Ok("NaN".to_string()),
229                Value::Double(d) if d.is_infinite() => {
230                    if d.is_sign_positive() {
231                        Ok("Infinity".to_string())
232                    } else {
233                        Ok("-Infinity".to_string())
234                    }
235                }
236                _ => Err(format!(
237                    "decimal clause can only be used on integers, was given {}",
238                    value_type_name(v)
239                )),
240            }
241        }
242        'f' => {
243            let prec = precision.unwrap_or(6);
244            let d = match v {
245                Value::Double(d) => *d,
246                Value::Int(i) => *i as f64,
247                Value::UInt(u) => *u as f64,
248                _ => {
249                    return Err(format!(
250                        "fixed-point clause can only be used on doubles, was given {}",
251                        value_type_name(v)
252                    ))
253                }
254            };
255            if d.is_nan() {
256                return Ok("NaN".to_string());
257            }
258            if d.is_infinite() {
259                return if d.is_sign_positive() {
260                    Ok("Infinity".to_string())
261                } else {
262                    Ok("-Infinity".to_string())
263                };
264            }
265            Ok(format!("{:.prec$}", d, prec = prec))
266        }
267        'e' => {
268            let prec = precision.unwrap_or(6);
269            let d = match v {
270                Value::Double(d) => *d,
271                Value::Int(i) => *i as f64,
272                Value::UInt(u) => *u as f64,
273                _ => {
274                    return Err(format!(
275                        "scientific clause can only be used on doubles, was given {}",
276                        value_type_name(v)
277                    ))
278                }
279            };
280            if d.is_nan() {
281                return Ok("NaN".to_string());
282            }
283            if d.is_infinite() {
284                return if d.is_sign_positive() {
285                    Ok("Infinity".to_string())
286                } else {
287                    Ok("-Infinity".to_string())
288                };
289            }
290            // Rust doesn't have %e built-in, implement manually
291            Ok(format_scientific(d, prec))
292        }
293        'b' => match v {
294            Value::Int(i) => Ok(format!("{:b}", i)),
295            Value::UInt(u) => Ok(format!("{:b}", u)),
296            Value::Bool(b) => Ok(if *b { "1".to_string() } else { "0".to_string() }),
297            _ => Err(format!(
298                "only integers and bools can be formatted as binary, was given {}",
299                value_type_name(v)
300            )),
301        },
302        'o' => match v {
303            Value::Int(i) => Ok(format!("{:o}", i)),
304            Value::UInt(u) => Ok(format!("{:o}", u)),
305            _ => Err(format!(
306                "octal clause can only be used on integers, was given {}",
307                value_type_name(v)
308            )),
309        },
310        'x' => match v {
311            Value::Int(i) => Ok(format!("{:x}", i)),
312            Value::UInt(u) => Ok(format!("{:x}", u)),
313            Value::String(s) => Ok(s.bytes().map(|b| format!("{:02x}", b)).collect()),
314            Value::Bytes(b) => Ok(b.iter().map(|b| format!("{:02x}", b)).collect()),
315            _ => Err(format!(
316                "only integers, byte buffers, and strings can be formatted as hex, was given {}",
317                value_type_name(v)
318            )),
319        },
320        'X' => match v {
321            Value::Int(i) => Ok(format!("{:X}", i)),
322            Value::UInt(u) => Ok(format!("{:X}", u)),
323            Value::String(s) => Ok(s.bytes().map(|b| format!("{:02X}", b)).collect()),
324            Value::Bytes(b) => Ok(b.iter().map(|b| format!("{:02X}", b)).collect()),
325            _ => Err(format!(
326                "only integers, byte buffers, and strings can be formatted as hex, was given {}",
327                value_type_name(v)
328            )),
329        },
330        _ => Err(format!("unrecognized verb: {}", verb)),
331    }
332}
333
334fn format_scientific(d: f64, precision: usize) -> String {
335    if d == 0.0 {
336        let sign = if d.is_sign_negative() { "-" } else { "" };
337        if precision == 0 {
338            return format!("{}0e+00", sign);
339        }
340        return format!("{}0.{:0>width$}e+00", sign, "", width = precision);
341    }
342    let abs = d.abs();
343    let exp = abs.log10().floor() as i32;
344    let mantissa = d / 10f64.powi(exp);
345    let exp_sign = if exp >= 0 { '+' } else { '-' };
346    let exp_abs = exp.unsigned_abs();
347    if precision == 0 {
348        format!("{:.0}e{}{:02}", mantissa, exp_sign, exp_abs)
349    } else {
350        format!(
351            "{:.prec$}e{}{:02}",
352            mantissa,
353            exp_sign,
354            exp_abs,
355            prec = precision
356        )
357    }
358}
359
360// ==================== Extension Declaration ====================
361
362/// Returns the string extension library function declarations.
363pub fn string_extension() -> Vec<FunctionDecl> {
364    vec![
365        // charAt: (string).charAt(int) -> string
366        FunctionDecl::new("charAt").with_overload(
367            OverloadDecl::method(
368                "string_char_at_int",
369                vec![CelType::String, CelType::Int],
370                CelType::String,
371            )
372            .with_impl(|args| {
373                let s = match &args[0] {
374                    Value::String(s) => s,
375                    _ => return Value::error(EvalError::invalid_argument("expected string")),
376                };
377                let idx = match &args[1] {
378                    Value::Int(i) => *i,
379                    _ => return Value::error(EvalError::invalid_argument("expected int")),
380                };
381                let len = codepoint_len(s) as i64;
382                if idx == len {
383                    return Value::String(Arc::from(""));
384                }
385                if idx < 0 || idx > len {
386                    return Value::error(EvalError::range_error(format!(
387                        "index out of range: {}",
388                        idx
389                    )));
390                }
391                match s.chars().nth(idx as usize) {
392                    Some(c) => Value::String(Arc::from(c.to_string())),
393                    None => Value::String(Arc::from("")),
394                }
395            }),
396        ),
397        // indexOf: two overloads
398        FunctionDecl::new("indexOf")
399            .with_overload(
400                OverloadDecl::method(
401                    "string_index_of_string",
402                    vec![CelType::String, CelType::String],
403                    CelType::Int,
404                )
405                .with_impl(|args| {
406                    let s = match &args[0] {
407                        Value::String(s) => s,
408                        _ => return Value::error(EvalError::invalid_argument("expected string")),
409                    };
410                    let substr = match &args[1] {
411                        Value::String(s) => s,
412                        _ => return Value::error(EvalError::invalid_argument("expected string")),
413                    };
414                    if substr.is_empty() {
415                        return Value::Int(0);
416                    }
417                    match s.find(substr.as_ref()) {
418                        Some(byte_offset) => {
419                            let cp_index = s[..byte_offset].chars().count() as i64;
420                            Value::Int(cp_index)
421                        }
422                        None => Value::Int(-1),
423                    }
424                }),
425            )
426            .with_overload(
427                OverloadDecl::method(
428                    "string_index_of_string_int",
429                    vec![CelType::String, CelType::String, CelType::Int],
430                    CelType::Int,
431                )
432                .with_impl(|args| {
433                    let s = match &args[0] {
434                        Value::String(s) => s,
435                        _ => return Value::error(EvalError::invalid_argument("expected string")),
436                    };
437                    let substr = match &args[1] {
438                        Value::String(s) => s,
439                        _ => return Value::error(EvalError::invalid_argument("expected string")),
440                    };
441                    let offset = match &args[2] {
442                        Value::Int(i) => *i,
443                        _ => return Value::error(EvalError::invalid_argument("expected int")),
444                    };
445                    let len = codepoint_len(s) as i64;
446                    if offset < 0 || offset > len {
447                        return Value::error(EvalError::range_error(format!(
448                            "index out of range: {}",
449                            offset
450                        )));
451                    }
452                    if substr.is_empty() {
453                        return Value::Int(offset);
454                    }
455                    let byte_start = match codepoint_to_byte_offset(s, offset as usize) {
456                        Some(b) => b,
457                        None => return Value::Int(-1),
458                    };
459                    match s[byte_start..].find(substr.as_ref()) {
460                        Some(byte_offset) => {
461                            let cp_index = s[..byte_start + byte_offset].chars().count() as i64;
462                            Value::Int(cp_index)
463                        }
464                        None => Value::Int(-1),
465                    }
466                }),
467            ),
468        // lastIndexOf
469        FunctionDecl::new("lastIndexOf")
470            .with_overload(
471                OverloadDecl::method(
472                    "string_last_index_of_string",
473                    vec![CelType::String, CelType::String],
474                    CelType::Int,
475                )
476                .with_impl(|args| {
477                    let s = match &args[0] {
478                        Value::String(s) => s,
479                        _ => return Value::error(EvalError::invalid_argument("expected string")),
480                    };
481                    let substr = match &args[1] {
482                        Value::String(s) => s,
483                        _ => return Value::error(EvalError::invalid_argument("expected string")),
484                    };
485                    if substr.is_empty() {
486                        return Value::Int(codepoint_len(s) as i64);
487                    }
488                    match s.rfind(substr.as_ref()) {
489                        Some(byte_offset) => {
490                            let cp_index = s[..byte_offset].chars().count() as i64;
491                            Value::Int(cp_index)
492                        }
493                        None => Value::Int(-1),
494                    }
495                }),
496            )
497            .with_overload(
498                OverloadDecl::method(
499                    "string_last_index_of_string_int",
500                    vec![CelType::String, CelType::String, CelType::Int],
501                    CelType::Int,
502                )
503                .with_impl(|args| {
504                    let s = match &args[0] {
505                        Value::String(s) => s,
506                        _ => return Value::error(EvalError::invalid_argument("expected string")),
507                    };
508                    let substr = match &args[1] {
509                        Value::String(s) => s,
510                        _ => return Value::error(EvalError::invalid_argument("expected string")),
511                    };
512                    let offset = match &args[2] {
513                        Value::Int(i) => *i,
514                        _ => return Value::error(EvalError::invalid_argument("expected int")),
515                    };
516                    let len = codepoint_len(s) as i64;
517                    if offset < 0 || offset > len {
518                        return Value::error(EvalError::range_error(format!(
519                            "index out of range: {}",
520                            offset
521                        )));
522                    }
523                    if substr.is_empty() {
524                        return Value::Int(offset);
525                    }
526                    // Search only in s[0..end_byte] where end_byte is the byte offset of
527                    // offset + len(substr) code points (so we can find matches starting at offset)
528                    let substr_cp_len = codepoint_len(substr);
529                    let search_end_cp = (offset as usize) + substr_cp_len;
530                    let search_end_byte = if search_end_cp >= codepoint_len(s) {
531                        s.len()
532                    } else {
533                        codepoint_to_byte_offset(s, search_end_cp).unwrap_or(s.len())
534                    };
535                    let search_slice = &s[..search_end_byte];
536                    match search_slice.rfind(substr.as_ref()) {
537                        Some(byte_offset) => {
538                            let cp_index = s[..byte_offset].chars().count() as i64;
539                            Value::Int(cp_index)
540                        }
541                        None => Value::Int(-1),
542                    }
543                }),
544            ),
545        // lowerAscii
546        FunctionDecl::new("lowerAscii").with_overload(
547            OverloadDecl::method("string_lower_ascii", vec![CelType::String], CelType::String)
548                .with_impl(|args| {
549                    let s = match &args[0] {
550                        Value::String(s) => s,
551                        _ => return Value::error(EvalError::invalid_argument("expected string")),
552                    };
553                    let result: String = s
554                        .chars()
555                        .map(|c| {
556                            if c.is_ascii() {
557                                c.to_ascii_lowercase()
558                            } else {
559                                c
560                            }
561                        })
562                        .collect();
563                    Value::String(Arc::from(result))
564                }),
565        ),
566        // upperAscii
567        FunctionDecl::new("upperAscii").with_overload(
568            OverloadDecl::method("string_upper_ascii", vec![CelType::String], CelType::String)
569                .with_impl(|args| {
570                    let s = match &args[0] {
571                        Value::String(s) => s,
572                        _ => return Value::error(EvalError::invalid_argument("expected string")),
573                    };
574                    let result: String = s
575                        .chars()
576                        .map(|c| {
577                            if c.is_ascii() {
578                                c.to_ascii_uppercase()
579                            } else {
580                                c
581                            }
582                        })
583                        .collect();
584                    Value::String(Arc::from(result))
585                }),
586        ),
587        // replace
588        FunctionDecl::new("replace")
589            .with_overload(
590                OverloadDecl::method(
591                    "string_replace_string_string",
592                    vec![CelType::String, CelType::String, CelType::String],
593                    CelType::String,
594                )
595                .with_impl(|args| {
596                    let s = match &args[0] {
597                        Value::String(s) => s,
598                        _ => return Value::error(EvalError::invalid_argument("expected string")),
599                    };
600                    let old = match &args[1] {
601                        Value::String(s) => s,
602                        _ => return Value::error(EvalError::invalid_argument("expected string")),
603                    };
604                    let new = match &args[2] {
605                        Value::String(s) => s,
606                        _ => return Value::error(EvalError::invalid_argument("expected string")),
607                    };
608                    Value::String(Arc::from(s.replace(old.as_ref(), new.as_ref())))
609                }),
610            )
611            .with_overload(
612                OverloadDecl::method(
613                    "string_replace_string_string_int",
614                    vec![
615                        CelType::String,
616                        CelType::String,
617                        CelType::String,
618                        CelType::Int,
619                    ],
620                    CelType::String,
621                )
622                .with_impl(|args| {
623                    let s = match &args[0] {
624                        Value::String(s) => s,
625                        _ => return Value::error(EvalError::invalid_argument("expected string")),
626                    };
627                    let old = match &args[1] {
628                        Value::String(s) => s,
629                        _ => return Value::error(EvalError::invalid_argument("expected string")),
630                    };
631                    let new = match &args[2] {
632                        Value::String(s) => s,
633                        _ => return Value::error(EvalError::invalid_argument("expected string")),
634                    };
635                    let limit = match &args[3] {
636                        Value::Int(i) => *i,
637                        _ => return Value::error(EvalError::invalid_argument("expected int")),
638                    };
639                    if limit == 0 {
640                        return Value::String(Arc::clone(s));
641                    }
642                    if limit < 0 {
643                        return Value::String(Arc::from(s.replace(old.as_ref(), new.as_ref())));
644                    }
645                    Value::String(Arc::from(s.replacen(
646                        old.as_ref(),
647                        new.as_ref(),
648                        limit as usize,
649                    )))
650                }),
651            ),
652        // split
653        FunctionDecl::new("split")
654            .with_overload(
655                OverloadDecl::method(
656                    "string_split_string",
657                    vec![CelType::String, CelType::String],
658                    CelType::list(CelType::String),
659                )
660                .with_impl(|args| {
661                    let s = match &args[0] {
662                        Value::String(s) => s,
663                        _ => return Value::error(EvalError::invalid_argument("expected string")),
664                    };
665                    let sep = match &args[1] {
666                        Value::String(s) => s,
667                        _ => return Value::error(EvalError::invalid_argument("expected string")),
668                    };
669                    let parts: Vec<Value> = s
670                        .split(sep.as_ref())
671                        .map(|p| Value::String(Arc::from(p)))
672                        .collect();
673                    Value::List(Arc::from(parts))
674                }),
675            )
676            .with_overload(
677                OverloadDecl::method(
678                    "string_split_string_int",
679                    vec![CelType::String, CelType::String, CelType::Int],
680                    CelType::list(CelType::String),
681                )
682                .with_impl(|args| {
683                    let s = match &args[0] {
684                        Value::String(s) => s,
685                        _ => return Value::error(EvalError::invalid_argument("expected string")),
686                    };
687                    let sep = match &args[1] {
688                        Value::String(s) => s,
689                        _ => return Value::error(EvalError::invalid_argument("expected string")),
690                    };
691                    let limit = match &args[2] {
692                        Value::Int(i) => *i,
693                        _ => return Value::error(EvalError::invalid_argument("expected int")),
694                    };
695                    if limit == 0 {
696                        return Value::List(Arc::from(Vec::<Value>::new()));
697                    }
698                    if limit < 0 {
699                        let parts: Vec<Value> = s
700                            .split(sep.as_ref())
701                            .map(|p| Value::String(Arc::from(p)))
702                            .collect();
703                        return Value::List(Arc::from(parts));
704                    }
705                    let parts: Vec<Value> = s
706                        .splitn(limit as usize, sep.as_ref())
707                        .map(|p| Value::String(Arc::from(p)))
708                        .collect();
709                    Value::List(Arc::from(parts))
710                }),
711            ),
712        // substring
713        FunctionDecl::new("substring")
714            .with_overload(
715                OverloadDecl::method(
716                    "string_substring_int",
717                    vec![CelType::String, CelType::Int],
718                    CelType::String,
719                )
720                .with_impl(|args| {
721                    let s = match &args[0] {
722                        Value::String(s) => s,
723                        _ => return Value::error(EvalError::invalid_argument("expected string")),
724                    };
725                    let start = match &args[1] {
726                        Value::Int(i) => *i,
727                        _ => return Value::error(EvalError::invalid_argument("expected int")),
728                    };
729                    let len = codepoint_len(s) as i64;
730                    if start < 0 || start > len {
731                        return Value::error(EvalError::range_error(format!(
732                            "index out of range: {}",
733                            start
734                        )));
735                    }
736                    let byte_start = codepoint_to_byte_offset(s, start as usize).unwrap_or(s.len());
737                    Value::String(Arc::from(&s[byte_start..]))
738                }),
739            )
740            .with_overload(
741                OverloadDecl::method(
742                    "string_substring_int_int",
743                    vec![CelType::String, CelType::Int, CelType::Int],
744                    CelType::String,
745                )
746                .with_impl(|args| {
747                    let s = match &args[0] {
748                        Value::String(s) => s,
749                        _ => return Value::error(EvalError::invalid_argument("expected string")),
750                    };
751                    let start = match &args[1] {
752                        Value::Int(i) => *i,
753                        _ => return Value::error(EvalError::invalid_argument("expected int")),
754                    };
755                    let end = match &args[2] {
756                        Value::Int(i) => *i,
757                        _ => return Value::error(EvalError::invalid_argument("expected int")),
758                    };
759                    let len = codepoint_len(s) as i64;
760                    if start < 0 || start > len {
761                        return Value::error(EvalError::range_error(format!(
762                            "index out of range: {}",
763                            start
764                        )));
765                    }
766                    if end < 0 || end > len {
767                        return Value::error(EvalError::range_error(format!(
768                            "index out of range: {}",
769                            end
770                        )));
771                    }
772                    if start > end {
773                        return Value::error(EvalError::range_error(format!(
774                            "invalid substring range. start: {}, end: {}",
775                            start, end
776                        )));
777                    }
778                    let byte_start = codepoint_to_byte_offset(s, start as usize).unwrap_or(s.len());
779                    let byte_end = codepoint_to_byte_offset(s, end as usize).unwrap_or(s.len());
780                    Value::String(Arc::from(&s[byte_start..byte_end]))
781                }),
782            ),
783        // trim
784        FunctionDecl::new("trim").with_overload(
785            OverloadDecl::method("string_trim", vec![CelType::String], CelType::String).with_impl(
786                |args| {
787                    let s = match &args[0] {
788                        Value::String(s) => s,
789                        _ => return Value::error(EvalError::invalid_argument("expected string")),
790                    };
791                    Value::String(Arc::from(s.trim()))
792                },
793            ),
794        ),
795        // reverse
796        FunctionDecl::new("reverse").with_overload(
797            OverloadDecl::method("string_reverse", vec![CelType::String], CelType::String)
798                .with_impl(|args| {
799                    let s = match &args[0] {
800                        Value::String(s) => s,
801                        _ => return Value::error(EvalError::invalid_argument("expected string")),
802                    };
803                    let reversed: String = s.chars().rev().collect();
804                    Value::String(Arc::from(reversed))
805                }),
806        ),
807        // format
808        FunctionDecl::new("format").with_overload(
809            OverloadDecl::method(
810                "string_format",
811                vec![CelType::String, CelType::list(CelType::Dyn)],
812                CelType::String,
813            )
814            .with_impl(|args| {
815                let fmt_str = match &args[0] {
816                    Value::String(s) => s,
817                    _ => return Value::error(EvalError::invalid_argument("expected string")),
818                };
819                let list = match &args[1] {
820                    Value::List(l) => l,
821                    _ => return Value::error(EvalError::invalid_argument("expected list")),
822                };
823
824                let clauses = match parse_format_string(fmt_str) {
825                    Ok(c) => c,
826                    Err(e) => return Value::error(EvalError::invalid_argument(e)),
827                };
828
829                let mut result = String::new();
830                let mut arg_index = 0usize;
831
832                for clause in &clauses {
833                    match clause {
834                        FormatClause::Literal(s) => result.push_str(s),
835                        FormatClause::Verb { verb, precision } => {
836                            if arg_index >= list.len() {
837                                return Value::error(EvalError::invalid_argument(format!(
838                                    "index {} out of range",
839                                    arg_index
840                                )));
841                            }
842                            match format_value(&list[arg_index], *verb, *precision) {
843                                Ok(s) => result.push_str(&s),
844                                Err(e) => {
845                                    return Value::error(EvalError::invalid_argument(format!(
846                                        "error during formatting: {}",
847                                        e
848                                    )))
849                                }
850                            }
851                            arg_index += 1;
852                        }
853                    }
854                }
855
856                Value::String(Arc::from(result))
857            }),
858        ),
859        // join - method on list<string>
860        FunctionDecl::new("join")
861            .with_overload(
862                OverloadDecl::method(
863                    "list_string_join",
864                    vec![CelType::list(CelType::String)],
865                    CelType::String,
866                )
867                .with_impl(|args| {
868                    let list = match &args[0] {
869                        Value::List(l) => l,
870                        _ => return Value::error(EvalError::invalid_argument("expected list")),
871                    };
872                    let parts: Vec<&str> = list
873                        .iter()
874                        .map(|v| match v {
875                            Value::String(s) => s.as_ref(),
876                            _ => "",
877                        })
878                        .collect();
879                    Value::String(Arc::from(parts.join("")))
880                }),
881            )
882            .with_overload(
883                OverloadDecl::method(
884                    "list_string_join_string",
885                    vec![CelType::list(CelType::String), CelType::String],
886                    CelType::String,
887                )
888                .with_impl(|args| {
889                    let list = match &args[0] {
890                        Value::List(l) => l,
891                        _ => return Value::error(EvalError::invalid_argument("expected list")),
892                    };
893                    let sep = match &args[1] {
894                        Value::String(s) => s,
895                        _ => return Value::error(EvalError::invalid_argument("expected string")),
896                    };
897                    let parts: Vec<&str> = list
898                        .iter()
899                        .map(|v| match v {
900                            Value::String(s) => s.as_ref(),
901                            _ => "",
902                        })
903                        .collect();
904                    Value::String(Arc::from(parts.join(sep.as_ref())))
905                }),
906            ),
907        // strings.quote - namespaced standalone function
908        FunctionDecl::new("strings.quote").with_overload(
909            OverloadDecl::function(
910                "strings_quote_string",
911                vec![CelType::String],
912                CelType::String,
913            )
914            .with_impl(|args| {
915                let s = match &args[0] {
916                    Value::String(s) => s,
917                    _ => return Value::error(EvalError::invalid_argument("expected string")),
918                };
919                let mut result = String::with_capacity(s.len() + 2);
920                result.push('"');
921                for c in s.chars() {
922                    match c {
923                        '\x07' => result.push_str("\\a"),
924                        '\x08' => result.push_str("\\b"),
925                        '\x0C' => result.push_str("\\f"),
926                        '\n' => result.push_str("\\n"),
927                        '\r' => result.push_str("\\r"),
928                        '\t' => result.push_str("\\t"),
929                        '\x0B' => result.push_str("\\v"),
930                        '\\' => result.push_str("\\\\"),
931                        '"' => result.push_str("\\\""),
932                        _ => result.push(c),
933                    }
934                }
935                result.push('"');
936                Value::String(Arc::from(result))
937            }),
938        ),
939    ]
940}
941
942#[cfg(test)]
943mod tests {
944    use super::*;
945
946    #[test]
947    fn test_string_extension_count() {
948        let funcs = string_extension();
949        // 13 functions defined
950        assert_eq!(funcs.len(), 13);
951    }
952
953    #[test]
954    fn test_char_at() {
955        let funcs = string_extension();
956        let char_at = funcs.iter().find(|f| f.name == "charAt").unwrap();
957        assert_eq!(char_at.overloads.len(), 1);
958        assert!(char_at.overloads[0].is_member);
959    }
960
961    #[test]
962    fn test_index_of_overloads() {
963        let funcs = string_extension();
964        let index_of = funcs.iter().find(|f| f.name == "indexOf").unwrap();
965        assert_eq!(index_of.overloads.len(), 2);
966    }
967
968    #[test]
969    fn test_join_is_member_on_list() {
970        let funcs = string_extension();
971        let join = funcs.iter().find(|f| f.name == "join").unwrap();
972        assert_eq!(join.overloads.len(), 2);
973        for overload in &join.overloads {
974            assert!(overload.is_member);
975            assert_eq!(
976                overload.receiver_type(),
977                Some(&CelType::list(CelType::String))
978            );
979        }
980    }
981
982    #[test]
983    fn test_strings_quote_is_standalone() {
984        let funcs = string_extension();
985        let quote = funcs.iter().find(|f| f.name == "strings.quote").unwrap();
986        assert_eq!(quote.overloads.len(), 1);
987        assert!(!quote.overloads[0].is_member);
988    }
989
990    // ==================== Runtime Implementation Tests ====================
991
992    fn call_overload(
993        funcs: &[FunctionDecl],
994        name: &str,
995        overload_idx: usize,
996        args: &[Value],
997    ) -> Value {
998        let func = funcs.iter().find(|f| f.name == name).unwrap();
999        let overload = &func.overloads[overload_idx];
1000        let imp = overload.implementation.as_ref().expect("no implementation");
1001        imp(args)
1002    }
1003
1004    fn s(val: &str) -> Value {
1005        Value::String(Arc::from(val))
1006    }
1007
1008    #[test]
1009    fn test_char_at_impl() {
1010        let funcs = string_extension();
1011        assert_eq!(
1012            call_overload(&funcs, "charAt", 0, &[s("tacocat"), Value::Int(3)]),
1013            s("o")
1014        );
1015        assert_eq!(
1016            call_overload(&funcs, "charAt", 0, &[s("tacocat"), Value::Int(7)]),
1017            s("")
1018        );
1019        // Unicode
1020        assert_eq!(
1021            call_overload(&funcs, "charAt", 0, &[s("©αT"), Value::Int(0)]),
1022            s("©")
1023        );
1024        assert_eq!(
1025            call_overload(&funcs, "charAt", 0, &[s("©αT"), Value::Int(1)]),
1026            s("α")
1027        );
1028    }
1029
1030    #[test]
1031    fn test_lower_ascii_impl() {
1032        let funcs = string_extension();
1033        assert_eq!(
1034            call_overload(&funcs, "lowerAscii", 0, &[s("TacoCat")]),
1035            s("tacocat")
1036        );
1037        assert_eq!(
1038            call_overload(&funcs, "lowerAscii", 0, &[s("TacoCÆt")]),
1039            s("tacocÆt")
1040        );
1041    }
1042
1043    #[test]
1044    fn test_upper_ascii_impl() {
1045        let funcs = string_extension();
1046        assert_eq!(
1047            call_overload(&funcs, "upperAscii", 0, &[s("tacoCat")]),
1048            s("TACOCAT")
1049        );
1050        assert_eq!(
1051            call_overload(&funcs, "upperAscii", 0, &[s("tacoCαt")]),
1052            s("TACOCαT")
1053        );
1054    }
1055
1056    #[test]
1057    fn test_trim_impl() {
1058        let funcs = string_extension();
1059        assert_eq!(
1060            call_overload(&funcs, "trim", 0, &[s("  hello  ")]),
1061            s("hello")
1062        );
1063        assert_eq!(
1064            call_overload(&funcs, "trim", 0, &[s(" \t\n text \r ")]),
1065            s("text")
1066        );
1067    }
1068
1069    #[test]
1070    fn test_reverse_impl() {
1071        let funcs = string_extension();
1072        assert_eq!(call_overload(&funcs, "reverse", 0, &[s("")]), s(""));
1073        assert_eq!(call_overload(&funcs, "reverse", 0, &[s("☺")]), s("☺"));
1074        assert_eq!(
1075            call_overload(&funcs, "reverse", 0, &[s("Ta©oCαt")]),
1076            s("tαCo©aT")
1077        );
1078    }
1079
1080    #[test]
1081    fn test_index_of_impl() {
1082        let funcs = string_extension();
1083        // Basic
1084        assert_eq!(
1085            call_overload(&funcs, "indexOf", 0, &[s("tacocat"), s("")]),
1086            Value::Int(0)
1087        );
1088        assert_eq!(
1089            call_overload(&funcs, "indexOf", 0, &[s("tacocat"), s("ac")]),
1090            Value::Int(1)
1091        );
1092        assert_eq!(
1093            call_overload(&funcs, "indexOf", 0, &[s("tacocat"), s("none")]),
1094            Value::Int(-1)
1095        );
1096        // With offset
1097        assert_eq!(
1098            call_overload(&funcs, "indexOf", 1, &[s("tacocat"), s("a"), Value::Int(3)]),
1099            Value::Int(5)
1100        );
1101        // Unicode
1102        assert_eq!(
1103            call_overload(&funcs, "indexOf", 0, &[s("ta©o©αT"), s("©")]),
1104            Value::Int(2)
1105        );
1106        assert_eq!(
1107            call_overload(&funcs, "indexOf", 1, &[s("ta©o©αT"), s("©"), Value::Int(3)]),
1108            Value::Int(4)
1109        );
1110    }
1111
1112    #[test]
1113    fn test_last_index_of_impl() {
1114        let funcs = string_extension();
1115        assert_eq!(
1116            call_overload(&funcs, "lastIndexOf", 0, &[s("tacocat"), s("")]),
1117            Value::Int(7)
1118        );
1119        assert_eq!(
1120            call_overload(&funcs, "lastIndexOf", 0, &[s("tacocat"), s("at")]),
1121            Value::Int(5)
1122        );
1123        assert_eq!(
1124            call_overload(&funcs, "lastIndexOf", 0, &[s("tacocat"), s("none")]),
1125            Value::Int(-1)
1126        );
1127        // With offset
1128        assert_eq!(
1129            call_overload(
1130                &funcs,
1131                "lastIndexOf",
1132                1,
1133                &[s("tacocat"), s("a"), Value::Int(3)]
1134            ),
1135            Value::Int(1)
1136        );
1137        // Unicode
1138        assert_eq!(
1139            call_overload(&funcs, "lastIndexOf", 0, &[s("ta©o©αT"), s("©")]),
1140            Value::Int(4)
1141        );
1142        assert_eq!(
1143            call_overload(
1144                &funcs,
1145                "lastIndexOf",
1146                1,
1147                &[s("ta©o©αT"), s("©"), Value::Int(3)]
1148            ),
1149            Value::Int(2)
1150        );
1151    }
1152
1153    #[test]
1154    fn test_substring_impl() {
1155        let funcs = string_extension();
1156        assert_eq!(
1157            call_overload(&funcs, "substring", 0, &[s("tacocat"), Value::Int(4)]),
1158            s("cat")
1159        );
1160        assert_eq!(
1161            call_overload(&funcs, "substring", 0, &[s("tacocat"), Value::Int(7)]),
1162            s("")
1163        );
1164        assert_eq!(
1165            call_overload(
1166                &funcs,
1167                "substring",
1168                1,
1169                &[s("tacocat"), Value::Int(0), Value::Int(4)]
1170            ),
1171            s("taco")
1172        );
1173        assert_eq!(
1174            call_overload(
1175                &funcs,
1176                "substring",
1177                1,
1178                &[s("tacocat"), Value::Int(4), Value::Int(4)]
1179            ),
1180            s("")
1181        );
1182        // Unicode
1183        assert_eq!(
1184            call_overload(
1185                &funcs,
1186                "substring",
1187                1,
1188                &[s("ta©o©αT"), Value::Int(2), Value::Int(6)]
1189            ),
1190            s("©o©α")
1191        );
1192    }
1193
1194    #[test]
1195    fn test_replace_impl() {
1196        let funcs = string_extension();
1197        assert_eq!(
1198            call_overload(
1199                &funcs,
1200                "replace",
1201                0,
1202                &[s("{0} days {0} hours"), s("{0}"), s("2")]
1203            ),
1204            s("2 days 2 hours")
1205        );
1206        // With limit
1207        assert_eq!(
1208            call_overload(
1209                &funcs,
1210                "replace",
1211                1,
1212                &[s("{0} days {0} hours"), s("{0}"), s("2"), Value::Int(1)]
1213            ),
1214            s("2 days {0} hours")
1215        );
1216        // Limit 0 = no change
1217        assert_eq!(
1218            call_overload(
1219                &funcs,
1220                "replace",
1221                1,
1222                &[s("{0} days {0} hours"), s("{0}"), s("2"), Value::Int(0)]
1223            ),
1224            s("{0} days {0} hours")
1225        );
1226    }
1227
1228    #[test]
1229    fn test_split_impl() {
1230        let funcs = string_extension();
1231        let result = call_overload(&funcs, "split", 0, &[s("hello world"), s(" ")]);
1232        assert_eq!(result, Value::List(Arc::from(vec![s("hello"), s("world")])));
1233        // With limit 0
1234        let result = call_overload(
1235            &funcs,
1236            "split",
1237            1,
1238            &[s("hello world"), s(" "), Value::Int(0)],
1239        );
1240        assert_eq!(result, Value::List(Arc::from(Vec::<Value>::new())));
1241        // With limit 1
1242        let result = call_overload(
1243            &funcs,
1244            "split",
1245            1,
1246            &[s("hello world events!"), s(" "), Value::Int(1)],
1247        );
1248        assert_eq!(
1249            result,
1250            Value::List(Arc::from(vec![s("hello world events!")]))
1251        );
1252    }
1253
1254    #[test]
1255    fn test_join_impl() {
1256        let funcs = string_extension();
1257        let list = Value::List(Arc::from(vec![s("x"), s("y")]));
1258        assert_eq!(
1259            call_overload(&funcs, "join", 0, std::slice::from_ref(&list)),
1260            s("xy")
1261        );
1262        assert_eq!(call_overload(&funcs, "join", 1, &[list, s("-")]), s("x-y"));
1263        // Empty list
1264        let empty = Value::List(Arc::from(Vec::<Value>::new()));
1265        assert_eq!(
1266            call_overload(&funcs, "join", 0, std::slice::from_ref(&empty)),
1267            s("")
1268        );
1269        assert_eq!(call_overload(&funcs, "join", 1, &[empty, s("-")]), s(""));
1270    }
1271
1272    #[test]
1273    fn test_strings_quote_impl() {
1274        let funcs = string_extension();
1275        assert_eq!(
1276            call_overload(&funcs, "strings.quote", 0, &[s("verbatim")]),
1277            s("\"verbatim\"")
1278        );
1279        assert_eq!(
1280            call_overload(&funcs, "strings.quote", 0, &[s("first\nsecond")]),
1281            s("\"first\\nsecond\"")
1282        );
1283        assert_eq!(
1284            call_overload(&funcs, "strings.quote", 0, &[s("bell\x07")]),
1285            s("\"bell\\a\"")
1286        );
1287        assert_eq!(
1288            call_overload(&funcs, "strings.quote", 0, &[s("")]),
1289            s("\"\"")
1290        );
1291    }
1292
1293    #[test]
1294    fn test_format_basic() {
1295        let funcs = string_extension();
1296        let fmt = |f: &str, args: Vec<Value>| {
1297            call_overload(&funcs, "format", 0, &[s(f), Value::List(Arc::from(args))])
1298        };
1299
1300        assert_eq!(fmt("no substitution", vec![]), s("no substitution"));
1301        assert_eq!(fmt("%s", vec![s("filler")]), s("filler"));
1302        assert_eq!(fmt("%% and also %%", vec![]), s("% and also %"));
1303        assert_eq!(fmt("%%%s%%", vec![s("text")]), s("%text%"));
1304        assert_eq!(fmt("%d", vec![Value::Int(42)]), s("42"));
1305        assert_eq!(fmt("%d", vec![Value::UInt(64)]), s("64"));
1306        assert_eq!(fmt("%b", vec![Value::Int(5)]), s("101"));
1307        assert_eq!(fmt("%o", vec![Value::Int(11)]), s("13"));
1308        assert_eq!(fmt("%x", vec![Value::Int(30)]), s("1e"));
1309        assert_eq!(fmt("%X", vec![Value::Int(30)]), s("1E"));
1310    }
1311
1312    #[test]
1313    #[allow(clippy::approx_constant)]
1314    fn test_format_float() {
1315        let funcs = string_extension();
1316        let fmt = |f: &str, args: Vec<Value>| {
1317            call_overload(&funcs, "format", 0, &[s(f), Value::List(Arc::from(args))])
1318        };
1319
1320        assert_eq!(fmt("%.3f", vec![Value::Double(1.2345)]), s("1.234"));
1321        assert_eq!(fmt("%f", vec![Value::Double(2.71828)]), s("2.718280"));
1322        assert_eq!(fmt("%f", vec![Value::Int(2)]), s("2.000000"));
1323        assert_eq!(fmt("%f", vec![Value::Double(f64::NAN)]), s("NaN"));
1324        assert_eq!(fmt("%f", vec![Value::Double(f64::INFINITY)]), s("Infinity"));
1325    }
1326
1327    #[test]
1328    #[allow(clippy::approx_constant)]
1329    fn test_format_scientific() {
1330        let funcs = string_extension();
1331        let fmt = |f: &str, args: Vec<Value>| {
1332            call_overload(&funcs, "format", 0, &[s(f), Value::List(Arc::from(args))])
1333        };
1334
1335        assert_eq!(fmt("%e", vec![Value::Double(2.71828)]), s("2.718280e+00"));
1336        assert_eq!(
1337            fmt("%.6e", vec![Value::Double(1052.032911275)]),
1338            s("1.052033e+03")
1339        );
1340        assert_eq!(fmt("%e", vec![Value::Double(f64::NAN)]), s("NaN"));
1341        assert_eq!(fmt("%e", vec![Value::Double(f64::INFINITY)]), s("Infinity"));
1342    }
1343
1344    #[test]
1345    fn test_format_hex_on_strings() {
1346        let funcs = string_extension();
1347        let fmt = |f: &str, args: Vec<Value>| {
1348            call_overload(&funcs, "format", 0, &[s(f), Value::List(Arc::from(args))])
1349        };
1350
1351        assert_eq!(
1352            fmt("%x", vec![s("Hello world!")]),
1353            s("48656c6c6f20776f726c6421")
1354        );
1355        assert_eq!(
1356            fmt("%X", vec![s("Hello world!")]),
1357            s("48656C6C6F20776F726C6421")
1358        );
1359    }
1360}