sql_cli/sql/functions/
string_methods.rs

1use anyhow::{anyhow, Result};
2
3use super::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
4use crate::data::datatable::DataValue;
5
6/// Trait for method-style functions that operate on a column/value
7/// These are called with dot notation: column.Method(args)
8pub trait MethodFunction: SqlFunction {
9    /// Check if this method function handles the given method name
10    fn handles_method(&self, method_name: &str) -> bool;
11
12    /// Get the method name this function handles
13    fn method_name(&self) -> &'static str;
14
15    /// Evaluate as a method (first arg is implicit 'self')
16    fn evaluate_method(&self, receiver: &DataValue, args: &[DataValue]) -> Result<DataValue> {
17        // Default implementation: prepend receiver to args and call evaluate
18        let mut full_args = vec![receiver.clone()];
19        full_args.extend_from_slice(args);
20        self.evaluate(&full_args)
21    }
22}
23
24/// `ToUpper` method function
25pub struct ToUpperMethod;
26
27impl SqlFunction for ToUpperMethod {
28    fn signature(&self) -> FunctionSignature {
29        FunctionSignature {
30            name: "TOUPPER",
31            category: FunctionCategory::String,
32            arg_count: ArgCount::Fixed(1),
33            description: "Converts string to uppercase",
34            returns: "STRING",
35            examples: vec![
36                "SELECT name.ToUpper() FROM users",
37                "SELECT TOUPPER(name) FROM users",
38            ],
39        }
40    }
41
42    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
43        self.validate_args(args)?;
44
45        match &args[0] {
46            DataValue::String(s) => Ok(DataValue::String(s.to_uppercase())),
47            DataValue::InternedString(s) => Ok(DataValue::String(s.to_uppercase())),
48            DataValue::Null => Ok(DataValue::Null),
49            _ => Err(anyhow!("ToUpper expects a string argument")),
50        }
51    }
52}
53
54impl MethodFunction for ToUpperMethod {
55    fn handles_method(&self, method_name: &str) -> bool {
56        method_name.eq_ignore_ascii_case("ToUpper")
57            || method_name.eq_ignore_ascii_case("ToUpperCase")
58    }
59
60    fn method_name(&self) -> &'static str {
61        "ToUpper"
62    }
63}
64
65/// `ToLower` method function
66pub struct ToLowerMethod;
67
68impl SqlFunction for ToLowerMethod {
69    fn signature(&self) -> FunctionSignature {
70        FunctionSignature {
71            name: "TOLOWER",
72            category: FunctionCategory::String,
73            arg_count: ArgCount::Fixed(1),
74            description: "Converts string to lowercase",
75            returns: "STRING",
76            examples: vec![
77                "SELECT name.ToLower() FROM users",
78                "SELECT TOLOWER(name) FROM users",
79            ],
80        }
81    }
82
83    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
84        self.validate_args(args)?;
85
86        match &args[0] {
87            DataValue::String(s) => Ok(DataValue::String(s.to_lowercase())),
88            DataValue::InternedString(s) => Ok(DataValue::String(s.to_lowercase())),
89            DataValue::Null => Ok(DataValue::Null),
90            _ => Err(anyhow!("ToLower expects a string argument")),
91        }
92    }
93}
94
95impl MethodFunction for ToLowerMethod {
96    fn handles_method(&self, method_name: &str) -> bool {
97        method_name.eq_ignore_ascii_case("ToLower")
98            || method_name.eq_ignore_ascii_case("ToLowerCase")
99    }
100
101    fn method_name(&self) -> &'static str {
102        "ToLower"
103    }
104}
105
106/// Trim method function
107pub struct TrimMethod;
108
109impl SqlFunction for TrimMethod {
110    fn signature(&self) -> FunctionSignature {
111        FunctionSignature {
112            name: "TRIM",
113            category: FunctionCategory::String,
114            arg_count: ArgCount::Fixed(1),
115            description: "Removes leading and trailing whitespace",
116            returns: "STRING",
117            examples: vec![
118                "SELECT name.Trim() FROM users",
119                "SELECT TRIM(name) FROM users",
120            ],
121        }
122    }
123
124    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
125        self.validate_args(args)?;
126
127        match &args[0] {
128            DataValue::String(s) => Ok(DataValue::String(s.trim().to_string())),
129            DataValue::InternedString(s) => Ok(DataValue::String(s.trim().to_string())),
130            DataValue::Null => Ok(DataValue::Null),
131            _ => Err(anyhow!("Trim expects a string argument")),
132        }
133    }
134}
135
136impl MethodFunction for TrimMethod {
137    fn handles_method(&self, method_name: &str) -> bool {
138        method_name.eq_ignore_ascii_case("Trim")
139    }
140
141    fn method_name(&self) -> &'static str {
142        "Trim"
143    }
144}
145
146/// TrimStart method function
147pub struct TrimStartMethod;
148
149impl SqlFunction for TrimStartMethod {
150    fn signature(&self) -> FunctionSignature {
151        FunctionSignature {
152            name: "TRIMSTART",
153            category: FunctionCategory::String,
154            arg_count: ArgCount::Fixed(1),
155            description: "Removes leading whitespace",
156            returns: "STRING",
157            examples: vec![
158                "SELECT name.TrimStart() FROM users",
159                "SELECT TRIMSTART(name) FROM users",
160            ],
161        }
162    }
163
164    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
165        self.validate_args(args)?;
166
167        match &args[0] {
168            DataValue::String(s) => Ok(DataValue::String(s.trim_start().to_string())),
169            DataValue::InternedString(s) => Ok(DataValue::String(s.trim_start().to_string())),
170            DataValue::Null => Ok(DataValue::Null),
171            _ => Err(anyhow!("TrimStart expects a string argument")),
172        }
173    }
174}
175
176impl MethodFunction for TrimStartMethod {
177    fn handles_method(&self, method_name: &str) -> bool {
178        method_name.eq_ignore_ascii_case("TrimStart")
179    }
180
181    fn method_name(&self) -> &'static str {
182        "TrimStart"
183    }
184}
185
186/// TrimEnd method function
187pub struct TrimEndMethod;
188
189impl SqlFunction for TrimEndMethod {
190    fn signature(&self) -> FunctionSignature {
191        FunctionSignature {
192            name: "TRIMEND",
193            category: FunctionCategory::String,
194            arg_count: ArgCount::Fixed(1),
195            description: "Removes trailing whitespace",
196            returns: "STRING",
197            examples: vec![
198                "SELECT name.TrimEnd() FROM users",
199                "SELECT TRIMEND(name) FROM users",
200            ],
201        }
202    }
203
204    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
205        self.validate_args(args)?;
206
207        match &args[0] {
208            DataValue::String(s) => Ok(DataValue::String(s.trim_end().to_string())),
209            DataValue::InternedString(s) => Ok(DataValue::String(s.trim_end().to_string())),
210            DataValue::Null => Ok(DataValue::Null),
211            _ => Err(anyhow!("TrimEnd expects a string argument")),
212        }
213    }
214}
215
216impl MethodFunction for TrimEndMethod {
217    fn handles_method(&self, method_name: &str) -> bool {
218        method_name.eq_ignore_ascii_case("TrimEnd")
219    }
220
221    fn method_name(&self) -> &'static str {
222        "TrimEnd"
223    }
224}
225
226/// Length method function (returns integer)
227pub struct LengthMethod;
228
229impl SqlFunction for LengthMethod {
230    fn signature(&self) -> FunctionSignature {
231        FunctionSignature {
232            name: "LENGTH",
233            category: FunctionCategory::String,
234            arg_count: ArgCount::Fixed(1),
235            description: "Returns the length of a string",
236            returns: "INTEGER",
237            examples: vec![
238                "SELECT name.Length() FROM users",
239                "SELECT LENGTH(name) FROM users",
240            ],
241        }
242    }
243
244    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
245        self.validate_args(args)?;
246
247        match &args[0] {
248            DataValue::String(s) => Ok(DataValue::Integer(s.len() as i64)),
249            DataValue::InternedString(s) => Ok(DataValue::Integer(s.len() as i64)),
250            DataValue::Null => Ok(DataValue::Null),
251            _ => Err(anyhow!("Length expects a string argument")),
252        }
253    }
254}
255
256impl MethodFunction for LengthMethod {
257    fn handles_method(&self, method_name: &str) -> bool {
258        method_name.eq_ignore_ascii_case("Length") || method_name.eq_ignore_ascii_case("Len")
259    }
260
261    fn method_name(&self) -> &'static str {
262        "Length"
263    }
264}
265
266/// Contains method function (returns boolean)
267pub struct ContainsMethod;
268
269impl SqlFunction for ContainsMethod {
270    fn signature(&self) -> FunctionSignature {
271        FunctionSignature {
272            name: "CONTAINS",
273            category: FunctionCategory::String,
274            arg_count: ArgCount::Fixed(2),
275            description: "Checks if string contains substring",
276            returns: "BOOLEAN",
277            examples: vec![
278                "SELECT * FROM users WHERE name.Contains('john')",
279                "SELECT CONTAINS(name, 'john') FROM users",
280            ],
281        }
282    }
283
284    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
285        self.validate_args(args)?;
286
287        let haystack = match &args[0] {
288            DataValue::String(s) => s.as_str(),
289            DataValue::InternedString(s) => s.as_str(),
290            DataValue::Null => return Ok(DataValue::Boolean(false)),
291            _ => return Err(anyhow!("Contains expects string arguments")),
292        };
293
294        let needle = match &args[1] {
295            DataValue::String(s) => s.as_str(),
296            DataValue::InternedString(s) => s.as_str(),
297            DataValue::Null => return Ok(DataValue::Boolean(false)),
298            _ => return Err(anyhow!("Contains expects string arguments")),
299        };
300
301        Ok(DataValue::Boolean(haystack.contains(needle)))
302    }
303}
304
305impl MethodFunction for ContainsMethod {
306    fn handles_method(&self, method_name: &str) -> bool {
307        method_name.eq_ignore_ascii_case("Contains")
308    }
309
310    fn method_name(&self) -> &'static str {
311        "Contains"
312    }
313}
314
315/// `StartsWith` method function
316pub struct StartsWithMethod;
317
318impl SqlFunction for StartsWithMethod {
319    fn signature(&self) -> FunctionSignature {
320        FunctionSignature {
321            name: "STARTSWITH",
322            category: FunctionCategory::String,
323            arg_count: ArgCount::Fixed(2),
324            description: "Checks if string starts with prefix",
325            returns: "BOOLEAN",
326            examples: vec![
327                "SELECT * FROM users WHERE name.StartsWith('John')",
328                "SELECT STARTSWITH(name, 'John') FROM users",
329            ],
330        }
331    }
332
333    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
334        self.validate_args(args)?;
335
336        let string = match &args[0] {
337            DataValue::String(s) => s.as_str(),
338            DataValue::InternedString(s) => s.as_str(),
339            DataValue::Null => return Ok(DataValue::Boolean(false)),
340            _ => return Err(anyhow!("StartsWith expects string arguments")),
341        };
342
343        let prefix = match &args[1] {
344            DataValue::String(s) => s.as_str(),
345            DataValue::InternedString(s) => s.as_str(),
346            DataValue::Null => return Ok(DataValue::Boolean(false)),
347            _ => return Err(anyhow!("StartsWith expects string arguments")),
348        };
349
350        Ok(DataValue::Boolean(string.starts_with(prefix)))
351    }
352}
353
354impl MethodFunction for StartsWithMethod {
355    fn handles_method(&self, method_name: &str) -> bool {
356        method_name.eq_ignore_ascii_case("StartsWith")
357    }
358
359    fn method_name(&self) -> &'static str {
360        "StartsWith"
361    }
362}
363
364/// `EndsWith` method function
365pub struct EndsWithMethod;
366
367impl SqlFunction for EndsWithMethod {
368    fn signature(&self) -> FunctionSignature {
369        FunctionSignature {
370            name: "ENDSWITH",
371            category: FunctionCategory::String,
372            arg_count: ArgCount::Fixed(2),
373            description: "Checks if string ends with suffix",
374            returns: "BOOLEAN",
375            examples: vec![
376                "SELECT * FROM users WHERE email.EndsWith('.com')",
377                "SELECT ENDSWITH(email, '.com') FROM users",
378            ],
379        }
380    }
381
382    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
383        self.validate_args(args)?;
384
385        let string = match &args[0] {
386            DataValue::String(s) => s.as_str(),
387            DataValue::InternedString(s) => s.as_str(),
388            DataValue::Null => return Ok(DataValue::Boolean(false)),
389            _ => return Err(anyhow!("EndsWith expects string arguments")),
390        };
391
392        let suffix = match &args[1] {
393            DataValue::String(s) => s.as_str(),
394            DataValue::InternedString(s) => s.as_str(),
395            DataValue::Null => return Ok(DataValue::Boolean(false)),
396            _ => return Err(anyhow!("EndsWith expects string arguments")),
397        };
398
399        Ok(DataValue::Boolean(string.ends_with(suffix)))
400    }
401}
402
403impl MethodFunction for EndsWithMethod {
404    fn handles_method(&self, method_name: &str) -> bool {
405        method_name.eq_ignore_ascii_case("EndsWith")
406    }
407
408    fn method_name(&self) -> &'static str {
409        "EndsWith"
410    }
411}
412
413/// Substring method function
414pub struct SubstringMethod;
415
416impl SqlFunction for SubstringMethod {
417    fn signature(&self) -> FunctionSignature {
418        FunctionSignature {
419            name: "SUBSTRING",
420            category: FunctionCategory::String,
421            arg_count: ArgCount::Range(2, 3),
422            description: "Extracts substring from string",
423            returns: "STRING",
424            examples: vec![
425                "SELECT name.Substring(0, 5) FROM users",
426                "SELECT SUBSTRING(name, 0, 5) FROM users",
427            ],
428        }
429    }
430
431    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
432        if args.len() < 2 || args.len() > 3 {
433            return Err(anyhow!("Substring expects 2 or 3 arguments"));
434        }
435
436        let string = match &args[0] {
437            DataValue::String(s) => s.as_str(),
438            DataValue::InternedString(s) => s.as_str(),
439            DataValue::Null => return Ok(DataValue::Null),
440            _ => return Err(anyhow!("Substring expects a string as first argument")),
441        };
442
443        let start = match &args[1] {
444            DataValue::Integer(i) => *i as usize,
445            _ => return Err(anyhow!("Substring expects integer start position")),
446        };
447
448        let result = if args.len() == 3 {
449            let length = match &args[2] {
450                DataValue::Integer(i) => *i as usize,
451                _ => return Err(anyhow!("Substring expects integer length")),
452            };
453
454            let end = (start + length).min(string.len());
455            string.chars().skip(start).take(end - start).collect()
456        } else {
457            string.chars().skip(start).collect()
458        };
459
460        Ok(DataValue::String(result))
461    }
462}
463
464impl MethodFunction for SubstringMethod {
465    fn handles_method(&self, method_name: &str) -> bool {
466        method_name.eq_ignore_ascii_case("Substring") || method_name.eq_ignore_ascii_case("Substr")
467    }
468
469    fn method_name(&self) -> &'static str {
470        "Substring"
471    }
472}
473
474/// Replace method function
475pub struct ReplaceMethod;
476
477impl SqlFunction for ReplaceMethod {
478    fn signature(&self) -> FunctionSignature {
479        FunctionSignature {
480            name: "REPLACE",
481            category: FunctionCategory::String,
482            arg_count: ArgCount::Fixed(3),
483            description: "Replaces all occurrences of a substring",
484            returns: "STRING",
485            examples: vec![
486                "SELECT name.Replace('John', 'Jane') FROM users",
487                "SELECT REPLACE(name, 'John', 'Jane') FROM users",
488            ],
489        }
490    }
491
492    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
493        self.validate_args(args)?;
494
495        let string = match &args[0] {
496            DataValue::String(s) => s.as_str(),
497            DataValue::InternedString(s) => s.as_str(),
498            DataValue::Null => return Ok(DataValue::Null),
499            _ => return Err(anyhow!("Replace expects string arguments")),
500        };
501
502        let from = match &args[1] {
503            DataValue::String(s) => s.as_str(),
504            DataValue::InternedString(s) => s.as_str(),
505            _ => return Err(anyhow!("Replace expects string arguments")),
506        };
507
508        let to = match &args[2] {
509            DataValue::String(s) => s.as_str(),
510            DataValue::InternedString(s) => s.as_str(),
511            _ => return Err(anyhow!("Replace expects string arguments")),
512        };
513
514        Ok(DataValue::String(string.replace(from, to)))
515    }
516}
517
518impl MethodFunction for ReplaceMethod {
519    fn handles_method(&self, method_name: &str) -> bool {
520        method_name.eq_ignore_ascii_case("Replace")
521    }
522
523    fn method_name(&self) -> &'static str {
524        "Replace"
525    }
526}
527
528/// MID function - Extract substring (SQL/Excel compatible, 1-based indexing)
529pub struct MidFunction;
530
531impl SqlFunction for MidFunction {
532    fn signature(&self) -> FunctionSignature {
533        FunctionSignature {
534            name: "MID",
535            category: FunctionCategory::String,
536            arg_count: ArgCount::Fixed(3),
537            description: "Extract substring from text (1-based indexing)",
538            returns: "STRING",
539            examples: vec![
540                "SELECT MID('Hello', 1, 3)", // Returns 'Hel'
541                "SELECT MID('World', 2, 3)", // Returns 'orl'
542                "SELECT MID(name, 1, 5) FROM table",
543            ],
544        }
545    }
546
547    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
548        self.validate_args(args)?;
549
550        // Get the string
551        let text = match &args[0] {
552            DataValue::String(s) => s.clone(),
553            DataValue::InternedString(s) => s.to_string(),
554            DataValue::Integer(n) => n.to_string(),
555            DataValue::Float(f) => f.to_string(),
556            DataValue::Null => String::new(),
557            _ => return Err(anyhow!("MID first argument must be convertible to text")),
558        };
559
560        // Get start position (1-based)
561        let start_pos = match &args[1] {
562            DataValue::Integer(n) => *n,
563            DataValue::Float(f) => *f as i64,
564            _ => return Err(anyhow!("MID start position must be a number")),
565        };
566
567        // Get length
568        let length = match &args[2] {
569            DataValue::Integer(n) => *n,
570            DataValue::Float(f) => *f as i64,
571            _ => return Err(anyhow!("MID length must be a number")),
572        };
573
574        // Validate arguments
575        if start_pos < 1 {
576            return Err(anyhow!("MID start position must be >= 1"));
577        }
578        if length < 0 {
579            return Err(anyhow!("MID length must be >= 0"));
580        }
581
582        // Convert to 0-based index
583        let start_idx = (start_pos - 1) as usize;
584        let chars: Vec<char> = text.chars().collect();
585
586        // If start position is beyond string length, return empty string
587        if start_idx >= chars.len() {
588            return Ok(DataValue::String(String::new()));
589        }
590
591        // Extract substring
592        let end_idx = std::cmp::min(start_idx + length as usize, chars.len());
593        let result: String = chars[start_idx..end_idx].iter().collect();
594
595        Ok(DataValue::String(result))
596    }
597}
598
599/// UPPER function - Convert string to uppercase
600pub struct UpperFunction;
601
602impl SqlFunction for UpperFunction {
603    fn signature(&self) -> FunctionSignature {
604        FunctionSignature {
605            name: "UPPER",
606            category: FunctionCategory::String,
607            arg_count: ArgCount::Fixed(1),
608            description: "Convert string to uppercase",
609            returns: "STRING",
610            examples: vec![
611                "SELECT UPPER('hello')", // Returns 'HELLO'
612                "SELECT UPPER(name) FROM table",
613            ],
614        }
615    }
616
617    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
618        self.validate_args(args)?;
619
620        match &args[0] {
621            DataValue::String(s) => Ok(DataValue::String(s.to_uppercase())),
622            DataValue::InternedString(s) => Ok(DataValue::String(s.to_uppercase())),
623            DataValue::Null => Ok(DataValue::Null),
624            _ => Err(anyhow!("UPPER expects a string argument")),
625        }
626    }
627}
628
629/// LOWER function - Convert string to lowercase
630pub struct LowerFunction;
631
632impl SqlFunction for LowerFunction {
633    fn signature(&self) -> FunctionSignature {
634        FunctionSignature {
635            name: "LOWER",
636            category: FunctionCategory::String,
637            arg_count: ArgCount::Fixed(1),
638            description: "Convert string to lowercase",
639            returns: "STRING",
640            examples: vec![
641                "SELECT LOWER('HELLO')", // Returns 'hello'
642                "SELECT LOWER(name) FROM table",
643            ],
644        }
645    }
646
647    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
648        self.validate_args(args)?;
649
650        match &args[0] {
651            DataValue::String(s) => Ok(DataValue::String(s.to_lowercase())),
652            DataValue::InternedString(s) => Ok(DataValue::String(s.to_lowercase())),
653            DataValue::Null => Ok(DataValue::Null),
654            _ => Err(anyhow!("LOWER expects a string argument")),
655        }
656    }
657}
658
659/// TRIM function - Remove leading and trailing whitespace
660pub struct TrimFunction;
661
662impl SqlFunction for TrimFunction {
663    fn signature(&self) -> FunctionSignature {
664        FunctionSignature {
665            name: "TRIM",
666            category: FunctionCategory::String,
667            arg_count: ArgCount::Fixed(1),
668            description: "Remove leading and trailing whitespace",
669            returns: "STRING",
670            examples: vec![
671                "SELECT TRIM('  hello  ')", // Returns 'hello'
672                "SELECT TRIM(description) FROM table",
673            ],
674        }
675    }
676
677    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
678        self.validate_args(args)?;
679
680        match &args[0] {
681            DataValue::String(s) => Ok(DataValue::String(s.trim().to_string())),
682            DataValue::InternedString(s) => Ok(DataValue::String(s.trim().to_string())),
683            DataValue::Null => Ok(DataValue::Null),
684            _ => Err(anyhow!("TRIM expects a string argument")),
685        }
686    }
687}
688
689/// TEXTJOIN function - Join multiple text values with a delimiter
690pub struct TextJoinFunction;
691
692impl SqlFunction for TextJoinFunction {
693    fn signature(&self) -> FunctionSignature {
694        FunctionSignature {
695            name: "TEXTJOIN",
696            category: FunctionCategory::String,
697            arg_count: ArgCount::Variadic,
698            description: "Join multiple text values with a delimiter",
699            returns: "STRING",
700            examples: vec![
701                "SELECT TEXTJOIN(',', 1, 'a', 'b', 'c')", // Returns 'a,b,c'
702                "SELECT TEXTJOIN(' - ', 1, name, city) FROM table",
703                "SELECT TEXTJOIN('|', 0, col1, col2, col3) FROM table",
704            ],
705        }
706    }
707
708    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
709        if args.len() < 3 {
710            return Err(anyhow!("TEXTJOIN requires at least 3 arguments: delimiter, ignore_empty, text1, [text2, ...]"));
711        }
712
713        // First argument: delimiter
714        let delimiter = match &args[0] {
715            DataValue::String(s) => s.clone(),
716            DataValue::InternedString(s) => s.to_string(),
717            DataValue::Integer(n) => n.to_string(),
718            DataValue::Float(f) => f.to_string(),
719            DataValue::Boolean(b) => b.to_string(),
720            DataValue::Null => String::new(),
721            _ => String::new(),
722        };
723
724        // Second argument: ignore_empty (treat as boolean - 0 is false, anything else is true)
725        let ignore_empty = match &args[1] {
726            DataValue::Integer(n) => *n != 0,
727            DataValue::Float(f) => *f != 0.0,
728            DataValue::Boolean(b) => *b,
729            DataValue::String(s) => !s.is_empty() && s != "0" && s.to_lowercase() != "false",
730            DataValue::InternedString(s) => {
731                !s.is_empty() && s.as_str() != "0" && s.to_lowercase() != "false"
732            }
733            DataValue::Null => false,
734            _ => true,
735        };
736
737        // Remaining arguments: values to join
738        let mut values = Vec::new();
739        for i in 2..args.len() {
740            let string_value = match &args[i] {
741                DataValue::String(s) => Some(s.clone()),
742                DataValue::InternedString(s) => Some(s.to_string()),
743                DataValue::Integer(n) => Some(n.to_string()),
744                DataValue::Float(f) => Some(f.to_string()),
745                DataValue::Boolean(b) => Some(b.to_string()),
746                DataValue::DateTime(dt) => Some(dt.clone()),
747                DataValue::Vector(v) => {
748                    let components: Vec<String> = v.iter().map(|f| f.to_string()).collect();
749                    Some(format!("[{}]", components.join(",")))
750                }
751                DataValue::Null => {
752                    if ignore_empty {
753                        None
754                    } else {
755                        Some(String::new())
756                    }
757                }
758            };
759
760            if let Some(s) = string_value {
761                if !ignore_empty || !s.is_empty() {
762                    values.push(s);
763                }
764            }
765        }
766
767        Ok(DataValue::String(values.join(&delimiter)))
768    }
769}
770
771/// Edit distance (Levenshtein distance) function
772pub struct EditDistanceFunction;
773
774impl EditDistanceFunction {
775    /// Calculate the Levenshtein distance between two strings
776    #[must_use]
777    pub fn calculate_edit_distance(s1: &str, s2: &str) -> usize {
778        let len1 = s1.len();
779        let len2 = s2.len();
780        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
781
782        for i in 0..=len1 {
783            matrix[i][0] = i;
784        }
785        for j in 0..=len2 {
786            matrix[0][j] = j;
787        }
788
789        for (i, c1) in s1.chars().enumerate() {
790            for (j, c2) in s2.chars().enumerate() {
791                let cost = usize::from(c1 != c2);
792                matrix[i + 1][j + 1] = std::cmp::min(
793                    matrix[i][j + 1] + 1, // deletion
794                    std::cmp::min(
795                        matrix[i + 1][j] + 1, // insertion
796                        matrix[i][j] + cost,  // substitution
797                    ),
798                );
799            }
800        }
801
802        matrix[len1][len2]
803    }
804}
805
806impl SqlFunction for EditDistanceFunction {
807    fn signature(&self) -> FunctionSignature {
808        FunctionSignature {
809            name: "EDIT_DISTANCE",
810            category: FunctionCategory::String,
811            arg_count: ArgCount::Fixed(2),
812            description: "Calculate the Levenshtein edit distance between two strings",
813            returns: "INTEGER",
814            examples: vec![
815                "SELECT EDIT_DISTANCE('kitten', 'sitting')",
816                "SELECT EDIT_DISTANCE(name, 'John') FROM users",
817                "SELECT * FROM users WHERE EDIT_DISTANCE(name, 'Smith') <= 2",
818            ],
819        }
820    }
821
822    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
823        self.validate_args(args)?;
824
825        let s1 = match &args[0] {
826            DataValue::String(s) => s.clone(),
827            DataValue::InternedString(s) => s.to_string(),
828            DataValue::Null => return Ok(DataValue::Null),
829            _ => return Err(anyhow!("EDIT_DISTANCE expects string arguments")),
830        };
831
832        let s2 = match &args[1] {
833            DataValue::String(s) => s.clone(),
834            DataValue::InternedString(s) => s.to_string(),
835            DataValue::Null => return Ok(DataValue::Null),
836            _ => return Err(anyhow!("EDIT_DISTANCE expects string arguments")),
837        };
838
839        let distance = Self::calculate_edit_distance(&s1, &s2);
840        Ok(DataValue::Integer(distance as i64))
841    }
842}
843
844/// FREQUENCY function - Count occurrences of a substring in a string
845pub struct FrequencyFunction;
846
847impl SqlFunction for FrequencyFunction {
848    fn signature(&self) -> FunctionSignature {
849        FunctionSignature {
850            name: "FREQUENCY",
851            category: FunctionCategory::String,
852            arg_count: ArgCount::Fixed(2),
853            description: "Count occurrences of a substring within a string",
854            returns: "INTEGER",
855            examples: vec![
856                "SELECT FREQUENCY('hello world', 'o')",  // Returns 2
857                "SELECT FREQUENCY('mississippi', 'ss')", // Returns 2
858                "SELECT FREQUENCY(text_column, 'error') FROM logs",
859                "SELECT name, FREQUENCY(name, 'a') as a_count FROM users",
860            ],
861        }
862    }
863
864    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
865        self.validate_args(args)?;
866
867        // Get the string to search in
868        let text = match &args[0] {
869            DataValue::String(s) => s.clone(),
870            DataValue::InternedString(s) => s.to_string(),
871            DataValue::Null => return Ok(DataValue::Integer(0)),
872            _ => return Err(anyhow!("FREQUENCY expects string as first argument")),
873        };
874
875        // Get the substring to search for
876        let search = match &args[1] {
877            DataValue::String(s) => s.clone(),
878            DataValue::InternedString(s) => s.to_string(),
879            DataValue::Null => return Ok(DataValue::Integer(0)),
880            _ => return Err(anyhow!("FREQUENCY expects string as second argument")),
881        };
882
883        // Empty search string returns 0
884        if search.is_empty() {
885            return Ok(DataValue::Integer(0));
886        }
887
888        // Count occurrences
889        let count = text.matches(&search).count();
890        Ok(DataValue::Integer(count as i64))
891    }
892}
893
894/// IndexOf method function - finds the position of a substring
895pub struct IndexOfMethod;
896
897impl SqlFunction for IndexOfMethod {
898    fn signature(&self) -> FunctionSignature {
899        FunctionSignature {
900            name: "INDEXOF",
901            category: FunctionCategory::String,
902            arg_count: ArgCount::Fixed(2),
903            description: "Returns the position of the first occurrence of a substring (0-based)",
904            returns: "INTEGER",
905            examples: vec![
906                "SELECT email.IndexOf('@') FROM users",
907                "SELECT INDEXOF(email, '@') FROM users",
908            ],
909        }
910    }
911
912    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
913        self.validate_args(args)?;
914
915        let string = match &args[0] {
916            DataValue::String(s) => s.as_str(),
917            DataValue::InternedString(s) => s.as_str(),
918            DataValue::Null => return Ok(DataValue::Null),
919            _ => return Err(anyhow!("IndexOf expects string arguments")),
920        };
921
922        let substring = match &args[1] {
923            DataValue::String(s) => s.as_str(),
924            DataValue::InternedString(s) => s.as_str(),
925            DataValue::Null => return Ok(DataValue::Null),
926            _ => return Err(anyhow!("IndexOf expects string arguments")),
927        };
928
929        match string.find(substring) {
930            Some(pos) => Ok(DataValue::Integer(pos as i64)),
931            None => Ok(DataValue::Integer(-1)), // Return -1 if not found
932        }
933    }
934}
935
936impl MethodFunction for IndexOfMethod {
937    fn handles_method(&self, method_name: &str) -> bool {
938        method_name.eq_ignore_ascii_case("IndexOf")
939    }
940
941    fn method_name(&self) -> &'static str {
942        "IndexOf"
943    }
944}
945
946/// INSTR function - SQL standard function for finding substring position
947/// Returns 1-based position for SQL compatibility
948pub struct InstrFunction;
949
950impl SqlFunction for InstrFunction {
951    fn signature(&self) -> FunctionSignature {
952        FunctionSignature {
953            name: "INSTR",
954            category: FunctionCategory::String,
955            arg_count: ArgCount::Fixed(2),
956            description: "Returns the position of the first occurrence of a substring (1-based, SQL standard)",
957            returns: "INTEGER",
958            examples: vec![
959                "SELECT INSTR(email, '@') FROM users",
960                "SELECT SUBSTRING(email, INSTR(email, '@') + 1) FROM users",
961            ],
962        }
963    }
964
965    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
966        self.validate_args(args)?;
967
968        let string = match &args[0] {
969            DataValue::String(s) => s.as_str(),
970            DataValue::InternedString(s) => s.as_str(),
971            DataValue::Null => return Ok(DataValue::Null),
972            _ => return Err(anyhow!("INSTR expects string arguments")),
973        };
974
975        let substring = match &args[1] {
976            DataValue::String(s) => s.as_str(),
977            DataValue::InternedString(s) => s.as_str(),
978            DataValue::Null => return Ok(DataValue::Null),
979            _ => return Err(anyhow!("INSTR expects string arguments")),
980        };
981
982        match string.find(substring) {
983            Some(pos) => Ok(DataValue::Integer((pos + 1) as i64)), // 1-based for SQL
984            None => Ok(DataValue::Integer(0)), // Return 0 if not found (SQL standard)
985        }
986    }
987}
988
989/// LEFT function - extracts leftmost n characters or up to a delimiter
990pub struct LeftFunction;
991
992impl SqlFunction for LeftFunction {
993    fn signature(&self) -> FunctionSignature {
994        FunctionSignature {
995            name: "LEFT",
996            category: FunctionCategory::String,
997            arg_count: ArgCount::Fixed(2),
998            description: "Returns leftmost n characters from string",
999            returns: "STRING",
1000            examples: vec![
1001                "SELECT LEFT(email, 5) FROM users",
1002                "SELECT LEFT('hello@world', INSTR('hello@world', '@') - 1)",
1003            ],
1004        }
1005    }
1006
1007    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1008        self.validate_args(args)?;
1009
1010        let string = match &args[0] {
1011            DataValue::String(s) => s.as_str(),
1012            DataValue::InternedString(s) => s.as_str(),
1013            DataValue::Null => return Ok(DataValue::Null),
1014            _ => return Err(anyhow!("LEFT expects a string as first argument")),
1015        };
1016
1017        let length = match &args[1] {
1018            DataValue::Integer(n) => *n as usize,
1019            DataValue::Float(f) => *f as usize,
1020            DataValue::Null => return Ok(DataValue::Null),
1021            _ => return Err(anyhow!("LEFT expects a number as second argument")),
1022        };
1023
1024        let result = if length >= string.len() {
1025            string.to_string()
1026        } else {
1027            string.chars().take(length).collect()
1028        };
1029
1030        Ok(DataValue::String(result))
1031    }
1032}
1033
1034/// RIGHT function - extracts rightmost n characters
1035pub struct RightFunction;
1036
1037impl SqlFunction for RightFunction {
1038    fn signature(&self) -> FunctionSignature {
1039        FunctionSignature {
1040            name: "RIGHT",
1041            category: FunctionCategory::String,
1042            arg_count: ArgCount::Fixed(2),
1043            description: "Returns rightmost n characters from string",
1044            returns: "STRING",
1045            examples: vec![
1046                "SELECT RIGHT(filename, 4) FROM files", // Get file extension
1047                "SELECT RIGHT(email, LENGTH(email) - INSTR(email, '@'))", // Get domain
1048            ],
1049        }
1050    }
1051
1052    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1053        self.validate_args(args)?;
1054
1055        let string = match &args[0] {
1056            DataValue::String(s) => s.as_str(),
1057            DataValue::InternedString(s) => s.as_str(),
1058            DataValue::Null => return Ok(DataValue::Null),
1059            _ => return Err(anyhow!("RIGHT expects a string as first argument")),
1060        };
1061
1062        let length = match &args[1] {
1063            DataValue::Integer(n) => *n as usize,
1064            DataValue::Float(f) => *f as usize,
1065            DataValue::Null => return Ok(DataValue::Null),
1066            _ => return Err(anyhow!("RIGHT expects a number as second argument")),
1067        };
1068
1069        let chars: Vec<char> = string.chars().collect();
1070        let start = if length >= chars.len() {
1071            0
1072        } else {
1073            chars.len() - length
1074        };
1075
1076        let result: String = chars[start..].iter().collect();
1077        Ok(DataValue::String(result))
1078    }
1079}
1080
1081/// SUBSTRING_BEFORE - returns substring before first/nth occurrence of delimiter
1082pub struct SubstringBeforeFunction;
1083
1084impl SqlFunction for SubstringBeforeFunction {
1085    fn signature(&self) -> FunctionSignature {
1086        FunctionSignature {
1087            name: "SUBSTRING_BEFORE",
1088            category: FunctionCategory::String,
1089            arg_count: ArgCount::Range(2, 3),
1090            description: "Returns substring before the first (or nth) occurrence of delimiter",
1091            returns: "STRING",
1092            examples: vec![
1093                "SELECT SUBSTRING_BEFORE(email, '@') FROM users", // Get username
1094                "SELECT SUBSTRING_BEFORE('a.b.c.d', '.', 2)",     // Get 'a.b' (before 2nd dot)
1095            ],
1096        }
1097    }
1098
1099    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1100        if args.len() < 2 || args.len() > 3 {
1101            return Err(anyhow!("SUBSTRING_BEFORE expects 2 or 3 arguments"));
1102        }
1103
1104        let string = match &args[0] {
1105            DataValue::String(s) => s.as_str(),
1106            DataValue::InternedString(s) => s.as_str(),
1107            DataValue::Null => return Ok(DataValue::Null),
1108            _ => {
1109                return Err(anyhow!(
1110                    "SUBSTRING_BEFORE expects a string as first argument"
1111                ))
1112            }
1113        };
1114
1115        let delimiter = match &args[1] {
1116            DataValue::String(s) => s.as_str(),
1117            DataValue::InternedString(s) => s.as_str(),
1118            DataValue::Null => return Ok(DataValue::Null),
1119            _ => return Err(anyhow!("SUBSTRING_BEFORE expects a string delimiter")),
1120        };
1121
1122        let occurrence = if args.len() == 3 {
1123            match &args[2] {
1124                DataValue::Integer(n) => *n as usize,
1125                DataValue::Float(f) => *f as usize,
1126                DataValue::Null => 1,
1127                _ => return Err(anyhow!("SUBSTRING_BEFORE expects a number for occurrence")),
1128            }
1129        } else {
1130            1
1131        };
1132
1133        if occurrence == 0 {
1134            return Ok(DataValue::String(String::new()));
1135        }
1136
1137        // Find the nth occurrence
1138        let mut count = 0;
1139        for (i, _) in string.match_indices(delimiter) {
1140            count += 1;
1141            if count == occurrence {
1142                return Ok(DataValue::String(string[..i].to_string()));
1143            }
1144        }
1145
1146        // If we didn't find enough occurrences, return empty string
1147        Ok(DataValue::String(String::new()))
1148    }
1149}
1150
1151/// SUBSTRING_AFTER - returns substring after first/nth occurrence of delimiter
1152pub struct SubstringAfterFunction;
1153
1154impl SqlFunction for SubstringAfterFunction {
1155    fn signature(&self) -> FunctionSignature {
1156        FunctionSignature {
1157            name: "SUBSTRING_AFTER",
1158            category: FunctionCategory::String,
1159            arg_count: ArgCount::Range(2, 3),
1160            description: "Returns substring after the first (or nth) occurrence of delimiter",
1161            returns: "STRING",
1162            examples: vec![
1163                "SELECT SUBSTRING_AFTER(email, '@') FROM users", // Get domain
1164                "SELECT SUBSTRING_AFTER('a.b.c.d', '.', 2)",     // Get 'c.d' (after 2nd dot)
1165            ],
1166        }
1167    }
1168
1169    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1170        if args.len() < 2 || args.len() > 3 {
1171            return Err(anyhow!("SUBSTRING_AFTER expects 2 or 3 arguments"));
1172        }
1173
1174        let string = match &args[0] {
1175            DataValue::String(s) => s.as_str(),
1176            DataValue::InternedString(s) => s.as_str(),
1177            DataValue::Null => return Ok(DataValue::Null),
1178            _ => {
1179                return Err(anyhow!(
1180                    "SUBSTRING_AFTER expects a string as first argument"
1181                ))
1182            }
1183        };
1184
1185        let delimiter = match &args[1] {
1186            DataValue::String(s) => s.as_str(),
1187            DataValue::InternedString(s) => s.as_str(),
1188            DataValue::Null => return Ok(DataValue::Null),
1189            _ => return Err(anyhow!("SUBSTRING_AFTER expects a string delimiter")),
1190        };
1191
1192        let occurrence = if args.len() == 3 {
1193            match &args[2] {
1194                DataValue::Integer(n) => *n as usize,
1195                DataValue::Float(f) => *f as usize,
1196                DataValue::Null => 1,
1197                _ => return Err(anyhow!("SUBSTRING_AFTER expects a number for occurrence")),
1198            }
1199        } else {
1200            1
1201        };
1202
1203        if occurrence == 0 {
1204            return Ok(DataValue::String(string.to_string()));
1205        }
1206
1207        // Find the nth occurrence
1208        let mut count = 0;
1209        for (i, _) in string.match_indices(delimiter) {
1210            count += 1;
1211            if count == occurrence {
1212                let start = i + delimiter.len();
1213                if start < string.len() {
1214                    return Ok(DataValue::String(string[start..].to_string()));
1215                } else {
1216                    return Ok(DataValue::String(String::new()));
1217                }
1218            }
1219        }
1220
1221        // If we didn't find enough occurrences, return empty string
1222        Ok(DataValue::String(String::new()))
1223    }
1224}
1225
1226/// SPLIT_PART - returns the nth part of a string split by delimiter (1-based)
1227pub struct SplitPartFunction;
1228
1229impl SqlFunction for SplitPartFunction {
1230    fn signature(&self) -> FunctionSignature {
1231        FunctionSignature {
1232            name: "SPLIT_PART",
1233            category: FunctionCategory::String,
1234            arg_count: ArgCount::Fixed(3),
1235            description: "Returns the nth part of a string split by delimiter (1-based index)",
1236            returns: "STRING",
1237            examples: vec![
1238                "SELECT SPLIT_PART('a.b.c.d', '.', 2)",        // Returns 'b'
1239                "SELECT SPLIT_PART(email, '@', 1) FROM users", // Get username
1240            ],
1241        }
1242    }
1243
1244    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1245        self.validate_args(args)?;
1246
1247        let string = match &args[0] {
1248            DataValue::String(s) => s.as_str(),
1249            DataValue::InternedString(s) => s.as_str(),
1250            DataValue::Null => return Ok(DataValue::Null),
1251            _ => return Err(anyhow!("SPLIT_PART expects a string as first argument")),
1252        };
1253
1254        let delimiter = match &args[1] {
1255            DataValue::String(s) => s.as_str(),
1256            DataValue::InternedString(s) => s.as_str(),
1257            DataValue::Null => return Ok(DataValue::Null),
1258            _ => return Err(anyhow!("SPLIT_PART expects a string delimiter")),
1259        };
1260
1261        let part_num = match &args[2] {
1262            DataValue::Integer(n) => *n as usize,
1263            DataValue::Float(f) => *f as usize,
1264            DataValue::Null => return Ok(DataValue::Null),
1265            _ => return Err(anyhow!("SPLIT_PART expects a number for part index")),
1266        };
1267
1268        if part_num == 0 {
1269            return Err(anyhow!("SPLIT_PART part index must be >= 1"));
1270        }
1271
1272        let parts: Vec<&str> = string.split(delimiter).collect();
1273
1274        if part_num <= parts.len() {
1275            Ok(DataValue::String(parts[part_num - 1].to_string()))
1276        } else {
1277            Ok(DataValue::String(String::new()))
1278        }
1279    }
1280}
1281
1282/// CHR function - Convert ASCII code to character
1283pub struct ChrFunction;
1284
1285impl SqlFunction for ChrFunction {
1286    fn signature(&self) -> FunctionSignature {
1287        FunctionSignature {
1288            name: "CHR",
1289            category: FunctionCategory::String,
1290            arg_count: ArgCount::Fixed(1),
1291            description: "Convert ASCII code to character",
1292            returns: "STRING",
1293            examples: vec![
1294                "SELECT CHR(65)", // Returns 'A'
1295                "SELECT CHR(97)", // Returns 'a'
1296                "SELECT CHR(48)", // Returns '0'
1297            ],
1298        }
1299    }
1300
1301    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1302        if args.len() != 1 {
1303            return Err(anyhow!("CHR expects exactly 1 argument"));
1304        }
1305
1306        let ascii_code = match &args[0] {
1307            DataValue::Integer(n) => *n,
1308            DataValue::Float(f) => *f as i64,
1309            DataValue::String(s) => s
1310                .parse::<i64>()
1311                .map_err(|_| anyhow!("Invalid number for CHR: {}", s))?,
1312            DataValue::InternedString(s) => s
1313                .parse::<i64>()
1314                .map_err(|_| anyhow!("Invalid number for CHR: {}", s))?,
1315            DataValue::Null => return Ok(DataValue::Null),
1316            _ => return Err(anyhow!("CHR expects a numeric argument")),
1317        };
1318
1319        // ASCII printable range is 32-126, but we'll allow 0-255
1320        if ascii_code < 0 || ascii_code > 255 {
1321            return Err(anyhow!(
1322                "CHR argument must be between 0 and 255, got {}",
1323                ascii_code
1324            ));
1325        }
1326
1327        let ch = ascii_code as u8 as char;
1328        Ok(DataValue::String(ch.to_string()))
1329    }
1330}
1331
1332/// LOREM_IPSUM function - generates Lorem Ipsum placeholder text
1333pub struct LoremIpsumFunction;
1334
1335impl SqlFunction for LoremIpsumFunction {
1336    fn signature(&self) -> FunctionSignature {
1337        FunctionSignature {
1338            name: "LOREM_IPSUM",
1339            category: FunctionCategory::String,
1340            arg_count: ArgCount::Range(1, 3),
1341            description: "Generate Lorem Ipsum placeholder text with specified number of words",
1342            returns: "STRING",
1343            examples: vec![
1344                "SELECT LOREM_IPSUM(10)",        // 10 random Lorem Ipsum words
1345                "SELECT LOREM_IPSUM(50)",        // 50 words
1346                "SELECT LOREM_IPSUM(20, 1)",     // 20 words, starting with 'Lorem ipsum...'
1347                "SELECT LOREM_IPSUM(15, 0, id)", // 15 words, use id as seed for variation
1348            ],
1349        }
1350    }
1351
1352    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1353        self.validate_args(args)?;
1354
1355        let num_words = match &args[0] {
1356            DataValue::Integer(n) if *n > 0 => *n as usize,
1357            DataValue::Float(f) if *f > 0.0 => *f as usize,
1358            DataValue::Null => return Ok(DataValue::Null),
1359            _ => return Err(anyhow!("LOREM_IPSUM requires a positive number of words")),
1360        };
1361
1362        // Check if we should start with traditional "Lorem ipsum..." opening
1363        let start_traditional = if args.len() > 1 {
1364            match &args[1] {
1365                DataValue::Integer(n) => *n != 0,
1366                DataValue::Boolean(b) => *b,
1367                _ => false,
1368            }
1369        } else {
1370            false
1371        };
1372
1373        // Get optional seed for reproducible but varied results
1374        let seed_value = if args.len() > 2 {
1375            match &args[2] {
1376                DataValue::Integer(n) => *n as u64,
1377                DataValue::Float(f) => *f as u64,
1378                DataValue::String(s) => {
1379                    // Hash the string to get a numeric seed
1380                    let mut hash = 0u64;
1381                    for byte in s.bytes() {
1382                        hash = hash.wrapping_mul(31).wrapping_add(byte as u64);
1383                    }
1384                    hash
1385                }
1386                DataValue::Null => 0,
1387                _ => 0,
1388            }
1389        } else {
1390            0
1391        };
1392
1393        // Lorem Ipsum word bank - traditional Latin placeholder text words
1394        const LOREM_WORDS: &[&str] = &[
1395            "lorem",
1396            "ipsum",
1397            "dolor",
1398            "sit",
1399            "amet",
1400            "consectetur",
1401            "adipiscing",
1402            "elit",
1403            "sed",
1404            "do",
1405            "eiusmod",
1406            "tempor",
1407            "incididunt",
1408            "ut",
1409            "labore",
1410            "et",
1411            "dolore",
1412            "magna",
1413            "aliqua",
1414            "enim",
1415            "ad",
1416            "minim",
1417            "veniam",
1418            "quis",
1419            "nostrud",
1420            "exercitation",
1421            "ullamco",
1422            "laboris",
1423            "nisi",
1424            "aliquip",
1425            "ex",
1426            "ea",
1427            "commodo",
1428            "consequat",
1429            "duis",
1430            "aute",
1431            "irure",
1432            "in",
1433            "reprehenderit",
1434            "voluptate",
1435            "velit",
1436            "esse",
1437            "cillum",
1438            "fugiat",
1439            "nulla",
1440            "pariatur",
1441            "excepteur",
1442            "sint",
1443            "occaecat",
1444            "cupidatat",
1445            "non",
1446            "proident",
1447            "sunt",
1448            "culpa",
1449            "qui",
1450            "officia",
1451            "deserunt",
1452            "mollit",
1453            "anim",
1454            "id",
1455            "est",
1456            "laborum",
1457            "perspiciatis",
1458            "unde",
1459            "omnis",
1460            "iste",
1461            "natus",
1462            "error",
1463            "voluptatem",
1464            "accusantium",
1465            "doloremque",
1466            "laudantium",
1467            "totam",
1468            "rem",
1469            "aperiam",
1470            "eaque",
1471            "ipsa",
1472            "quae",
1473            "ab",
1474            "illo",
1475            "inventore",
1476            "veritatis",
1477            "quasi",
1478            "architecto",
1479            "beatae",
1480            "vitae",
1481            "dicta",
1482            "explicabo",
1483            "nemo",
1484            "enim",
1485            "ipsam",
1486            "quia",
1487            "voluptas",
1488            "aspernatur",
1489            "aut",
1490            "odit",
1491            "fugit",
1492            "consequuntur",
1493            "magni",
1494            "dolores",
1495            "eos",
1496            "ratione",
1497            "sequi",
1498            "nesciunt",
1499            "neque",
1500            "porro",
1501            "quisquam",
1502            "dolorem",
1503            "adipisci",
1504            "numquam",
1505            "eius",
1506            "modi",
1507            "tempora",
1508            "incidunt",
1509            "magnam",
1510            "quaerat",
1511            "etiam",
1512            "minus",
1513            "soluta",
1514            "nobis",
1515            "eligendi",
1516            "optio",
1517            "cumque",
1518            "nihil",
1519            "impedit",
1520            "quo",
1521            "possimus",
1522            "suscipit",
1523            "laboriosam",
1524            "aliquid",
1525            "fuga",
1526            "distinctio",
1527            "libero",
1528            "tempore",
1529            "cum",
1530            "assumenda",
1531            "est",
1532            "omnis",
1533            "dolor",
1534            "repellendus",
1535            "temporibus",
1536            "autem",
1537            "quibusdam",
1538            "officiis",
1539            "debitis",
1540            "rerum",
1541            "necessitatibus",
1542            "saepe",
1543            "eveniet",
1544            "voluptates",
1545            "repudiandae",
1546            "molestiae",
1547            "recusandae",
1548            "itaque",
1549            "earum",
1550            "hic",
1551            "tenetur",
1552            "sapiente",
1553            "delectus",
1554            "reiciendis",
1555            "voluptatibus",
1556            "maiores",
1557            "alias",
1558            "consequatur",
1559            "perferendis",
1560            "doloribus",
1561            "asperiores",
1562            "repellat",
1563            "iusto",
1564            "odio",
1565            "dignissimos",
1566            "ducimus",
1567            "blanditiis",
1568            "praesentium",
1569            "voluptatum",
1570            "deleniti",
1571            "atque",
1572            "corrupti",
1573            "quos",
1574            "quas",
1575            "molestias",
1576            "excepturi",
1577            "occaecati",
1578            "provident",
1579            "similique",
1580            "mollitia",
1581            "animi",
1582            "illum",
1583            "dolorum",
1584            "fuga",
1585            "harum",
1586            "quidem",
1587            "rerum",
1588            "facilis",
1589            "expedita",
1590            "distinctio",
1591            "nam",
1592            "libero",
1593            "tempore",
1594            "cum",
1595            "soluta",
1596            "nobis",
1597            "eligendi",
1598            "optio",
1599            "cumque",
1600            "nihil",
1601            "impedit",
1602            "minus",
1603            "quod",
1604            "maxime",
1605            "placeat",
1606            "facere",
1607            "possimus",
1608            "omnis",
1609            "voluptas",
1610            "assumenda",
1611        ];
1612
1613        let mut result = Vec::with_capacity(num_words);
1614
1615        if start_traditional && num_words > 0 {
1616            // Start with traditional "Lorem ipsum dolor sit amet..."
1617            let traditional_start = ["lorem", "ipsum", "dolor", "sit", "amet"];
1618            let take_count = num_words.min(traditional_start.len());
1619            for i in 0..take_count {
1620                result.push(traditional_start[i]);
1621            }
1622
1623            // Fill remaining with random words
1624            let seed = if seed_value != 0 {
1625                seed_value
1626            } else {
1627                use std::time::{SystemTime, UNIX_EPOCH};
1628                SystemTime::now()
1629                    .duration_since(UNIX_EPOCH)
1630                    .unwrap_or_default()
1631                    .as_nanos() as u64
1632            };
1633
1634            let mut rng = seed.wrapping_mul(num_words as u64); // Combine seed with word count
1635            for i in take_count..num_words {
1636                // Simple pseudo-random selection
1637                rng = (rng.wrapping_mul(1664525).wrapping_add(1013904223)) ^ (i as u64);
1638                let idx = (rng as usize) % LOREM_WORDS.len();
1639                result.push(LOREM_WORDS[idx]);
1640            }
1641        } else {
1642            // Generate random Lorem words
1643            let seed = if seed_value != 0 {
1644                seed_value
1645            } else {
1646                use std::time::{SystemTime, UNIX_EPOCH};
1647                SystemTime::now()
1648                    .duration_since(UNIX_EPOCH)
1649                    .unwrap_or_default()
1650                    .as_nanos() as u64
1651            };
1652
1653            let mut rng = seed.wrapping_mul(num_words as u64).wrapping_add(12345); // Combine seed with word count
1654            for i in 0..num_words {
1655                // Simple pseudo-random selection with better entropy
1656                rng = (rng.wrapping_mul(1664525).wrapping_add(1013904223)) ^ (i as u64);
1657                let idx = (rng as usize) % LOREM_WORDS.len();
1658                result.push(LOREM_WORDS[idx]);
1659            }
1660        }
1661
1662        // Capitalize first word and add periods for readability
1663        let mut text = String::new();
1664        for (i, word) in result.iter().enumerate() {
1665            if i == 0 {
1666                // Capitalize first word
1667                text.push_str(&word.chars().next().unwrap().to_uppercase().to_string());
1668                text.push_str(&word[1..]);
1669            } else {
1670                text.push(' ');
1671                // Occasionally start a new sentence (roughly every 10-15 words)
1672                if i > 0 && ((i * 7) % 13 == 0) && i < num_words - 1 {
1673                    text.pop(); // Remove the space
1674                    text.push_str(". ");
1675                    // Capitalize next word
1676                    text.push_str(&word.chars().next().unwrap().to_uppercase().to_string());
1677                    text.push_str(&word[1..]);
1678                } else {
1679                    text.push_str(word);
1680                }
1681            }
1682        }
1683
1684        // Add final period if we generated text
1685        if !text.is_empty() {
1686            text.push('.');
1687        }
1688
1689        Ok(DataValue::String(text))
1690    }
1691}
1692
1693/// Register all string method functions
1694pub fn register_string_methods(registry: &mut super::FunctionRegistry) {
1695    use std::sync::Arc;
1696
1697    // Register new string functions (non-method versions)
1698    registry.register(Box::new(MidFunction));
1699    registry.register(Box::new(UpperFunction));
1700    registry.register(Box::new(LowerFunction));
1701    registry.register(Box::new(TrimFunction));
1702    registry.register(Box::new(TextJoinFunction));
1703    registry.register(Box::new(EditDistanceFunction));
1704    registry.register(Box::new(FrequencyFunction));
1705
1706    // Register new convenient string extraction functions
1707    registry.register(Box::new(LeftFunction));
1708    registry.register(Box::new(RightFunction));
1709    registry.register(Box::new(SubstringBeforeFunction));
1710    registry.register(Box::new(SubstringAfterFunction));
1711    registry.register(Box::new(SplitPartFunction));
1712
1713    // Register ToUpper
1714    let to_upper = Arc::new(ToUpperMethod);
1715    registry.register(Box::new(ToUpperMethod));
1716    registry.register_method(to_upper);
1717
1718    // Register ToLower
1719    let to_lower = Arc::new(ToLowerMethod);
1720    registry.register(Box::new(ToLowerMethod));
1721    registry.register_method(to_lower);
1722
1723    // Register Trim
1724    let trim = Arc::new(TrimMethod);
1725    registry.register(Box::new(TrimMethod));
1726    registry.register_method(trim);
1727
1728    // Register TrimStart
1729    let trim_start = Arc::new(TrimStartMethod);
1730    registry.register(Box::new(TrimStartMethod));
1731    registry.register_method(trim_start);
1732
1733    // Register TrimEnd
1734    let trim_end = Arc::new(TrimEndMethod);
1735    registry.register(Box::new(TrimEndMethod));
1736    registry.register_method(trim_end);
1737
1738    // Register Length
1739    let length = Arc::new(LengthMethod);
1740    registry.register(Box::new(LengthMethod));
1741    registry.register_method(length);
1742
1743    // Register Contains
1744    let contains = Arc::new(ContainsMethod);
1745    registry.register(Box::new(ContainsMethod));
1746    registry.register_method(contains);
1747
1748    // Register StartsWith
1749    let starts_with = Arc::new(StartsWithMethod);
1750    registry.register(Box::new(StartsWithMethod));
1751    registry.register_method(starts_with);
1752
1753    // Register EndsWith
1754    let ends_with = Arc::new(EndsWithMethod);
1755    registry.register(Box::new(EndsWithMethod));
1756    registry.register_method(ends_with);
1757
1758    // Register Substring
1759    let substring = Arc::new(SubstringMethod);
1760    registry.register(Box::new(SubstringMethod));
1761    registry.register_method(substring);
1762
1763    // Register Replace
1764    let replace = Arc::new(ReplaceMethod);
1765    registry.register(Box::new(ReplaceMethod));
1766    registry.register_method(replace);
1767
1768    // Register IndexOf/INSTR
1769    let indexof = Arc::new(IndexOfMethod);
1770    registry.register(Box::new(IndexOfMethod));
1771    registry.register_method(indexof.clone());
1772    // Also register as INSTR for SQL compatibility
1773    registry.register(Box::new(InstrFunction));
1774
1775    // Register CHR function
1776    registry.register(Box::new(ChrFunction));
1777
1778    // Register LOREM_IPSUM function
1779    registry.register(Box::new(LoremIpsumFunction));
1780}