sql_cli/sql/functions/
string_methods.rs

1use anyhow::{anyhow, Result};
2
3use super::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
4use crate::data::datatable::DataValue;
5
6/// Trait for method-style functions that operate on a column/value
7/// These are called with dot notation: column.Method(args)
8pub trait MethodFunction: SqlFunction {
9    /// Check if this method function handles the given method name
10    fn handles_method(&self, method_name: &str) -> bool;
11
12    /// Get the method name this function handles
13    fn method_name(&self) -> &'static str;
14
15    /// Evaluate as a method (first arg is implicit 'self')
16    fn evaluate_method(&self, receiver: &DataValue, args: &[DataValue]) -> Result<DataValue> {
17        // Default implementation: prepend receiver to args and call evaluate
18        let mut full_args = vec![receiver.clone()];
19        full_args.extend_from_slice(args);
20        self.evaluate(&full_args)
21    }
22}
23
24/// `ToUpper` method function
25pub struct ToUpperMethod;
26
27impl SqlFunction for ToUpperMethod {
28    fn signature(&self) -> FunctionSignature {
29        FunctionSignature {
30            name: "TOUPPER",
31            category: FunctionCategory::String,
32            arg_count: ArgCount::Fixed(1),
33            description: "Converts string to uppercase",
34            returns: "STRING",
35            examples: vec![
36                "SELECT name.ToUpper() FROM users",
37                "SELECT TOUPPER(name) FROM users",
38            ],
39        }
40    }
41
42    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
43        self.validate_args(args)?;
44
45        match &args[0] {
46            DataValue::String(s) => Ok(DataValue::String(s.to_uppercase())),
47            DataValue::InternedString(s) => Ok(DataValue::String(s.to_uppercase())),
48            DataValue::Null => Ok(DataValue::Null),
49            _ => Err(anyhow!("ToUpper expects a string argument")),
50        }
51    }
52}
53
54impl MethodFunction for ToUpperMethod {
55    fn handles_method(&self, method_name: &str) -> bool {
56        method_name.eq_ignore_ascii_case("ToUpper")
57            || method_name.eq_ignore_ascii_case("ToUpperCase")
58    }
59
60    fn method_name(&self) -> &'static str {
61        "ToUpper"
62    }
63}
64
65/// `ToLower` method function
66pub struct ToLowerMethod;
67
68impl SqlFunction for ToLowerMethod {
69    fn signature(&self) -> FunctionSignature {
70        FunctionSignature {
71            name: "TOLOWER",
72            category: FunctionCategory::String,
73            arg_count: ArgCount::Fixed(1),
74            description: "Converts string to lowercase",
75            returns: "STRING",
76            examples: vec![
77                "SELECT name.ToLower() FROM users",
78                "SELECT TOLOWER(name) FROM users",
79            ],
80        }
81    }
82
83    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
84        self.validate_args(args)?;
85
86        match &args[0] {
87            DataValue::String(s) => Ok(DataValue::String(s.to_lowercase())),
88            DataValue::InternedString(s) => Ok(DataValue::String(s.to_lowercase())),
89            DataValue::Null => Ok(DataValue::Null),
90            _ => Err(anyhow!("ToLower expects a string argument")),
91        }
92    }
93}
94
95impl MethodFunction for ToLowerMethod {
96    fn handles_method(&self, method_name: &str) -> bool {
97        method_name.eq_ignore_ascii_case("ToLower")
98            || method_name.eq_ignore_ascii_case("ToLowerCase")
99    }
100
101    fn method_name(&self) -> &'static str {
102        "ToLower"
103    }
104}
105
106/// Trim method function
107pub struct TrimMethod;
108
109impl SqlFunction for TrimMethod {
110    fn signature(&self) -> FunctionSignature {
111        FunctionSignature {
112            name: "TRIM",
113            category: FunctionCategory::String,
114            arg_count: ArgCount::Fixed(1),
115            description: "Removes leading and trailing whitespace",
116            returns: "STRING",
117            examples: vec![
118                "SELECT name.Trim() FROM users",
119                "SELECT TRIM(name) FROM users",
120            ],
121        }
122    }
123
124    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
125        self.validate_args(args)?;
126
127        match &args[0] {
128            DataValue::String(s) => Ok(DataValue::String(s.trim().to_string())),
129            DataValue::InternedString(s) => Ok(DataValue::String(s.trim().to_string())),
130            DataValue::Null => Ok(DataValue::Null),
131            _ => Err(anyhow!("Trim expects a string argument")),
132        }
133    }
134}
135
136impl MethodFunction for TrimMethod {
137    fn handles_method(&self, method_name: &str) -> bool {
138        method_name.eq_ignore_ascii_case("Trim")
139    }
140
141    fn method_name(&self) -> &'static str {
142        "Trim"
143    }
144}
145
146/// TrimStart method function
147pub struct TrimStartMethod;
148
149impl SqlFunction for TrimStartMethod {
150    fn signature(&self) -> FunctionSignature {
151        FunctionSignature {
152            name: "TRIMSTART",
153            category: FunctionCategory::String,
154            arg_count: ArgCount::Fixed(1),
155            description: "Removes leading whitespace",
156            returns: "STRING",
157            examples: vec![
158                "SELECT name.TrimStart() FROM users",
159                "SELECT TRIMSTART(name) FROM users",
160            ],
161        }
162    }
163
164    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
165        self.validate_args(args)?;
166
167        match &args[0] {
168            DataValue::String(s) => Ok(DataValue::String(s.trim_start().to_string())),
169            DataValue::InternedString(s) => Ok(DataValue::String(s.trim_start().to_string())),
170            DataValue::Null => Ok(DataValue::Null),
171            _ => Err(anyhow!("TrimStart expects a string argument")),
172        }
173    }
174}
175
176impl MethodFunction for TrimStartMethod {
177    fn handles_method(&self, method_name: &str) -> bool {
178        method_name.eq_ignore_ascii_case("TrimStart")
179    }
180
181    fn method_name(&self) -> &'static str {
182        "TrimStart"
183    }
184}
185
186/// TrimEnd method function
187pub struct TrimEndMethod;
188
189impl SqlFunction for TrimEndMethod {
190    fn signature(&self) -> FunctionSignature {
191        FunctionSignature {
192            name: "TRIMEND",
193            category: FunctionCategory::String,
194            arg_count: ArgCount::Fixed(1),
195            description: "Removes trailing whitespace",
196            returns: "STRING",
197            examples: vec![
198                "SELECT name.TrimEnd() FROM users",
199                "SELECT TRIMEND(name) FROM users",
200            ],
201        }
202    }
203
204    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
205        self.validate_args(args)?;
206
207        match &args[0] {
208            DataValue::String(s) => Ok(DataValue::String(s.trim_end().to_string())),
209            DataValue::InternedString(s) => Ok(DataValue::String(s.trim_end().to_string())),
210            DataValue::Null => Ok(DataValue::Null),
211            _ => Err(anyhow!("TrimEnd expects a string argument")),
212        }
213    }
214}
215
216impl MethodFunction for TrimEndMethod {
217    fn handles_method(&self, method_name: &str) -> bool {
218        method_name.eq_ignore_ascii_case("TrimEnd")
219    }
220
221    fn method_name(&self) -> &'static str {
222        "TrimEnd"
223    }
224}
225
226/// Length method function (returns integer)
227pub struct LengthMethod;
228
229impl SqlFunction for LengthMethod {
230    fn signature(&self) -> FunctionSignature {
231        FunctionSignature {
232            name: "LENGTH",
233            category: FunctionCategory::String,
234            arg_count: ArgCount::Fixed(1),
235            description: "Returns the length of a string",
236            returns: "INTEGER",
237            examples: vec![
238                "SELECT name.Length() FROM users",
239                "SELECT LENGTH(name) FROM users",
240            ],
241        }
242    }
243
244    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
245        self.validate_args(args)?;
246
247        match &args[0] {
248            DataValue::String(s) => Ok(DataValue::Integer(s.len() as i64)),
249            DataValue::InternedString(s) => Ok(DataValue::Integer(s.len() as i64)),
250            DataValue::Null => Ok(DataValue::Null),
251            _ => Err(anyhow!("Length expects a string argument")),
252        }
253    }
254}
255
256impl MethodFunction for LengthMethod {
257    fn handles_method(&self, method_name: &str) -> bool {
258        method_name.eq_ignore_ascii_case("Length") || method_name.eq_ignore_ascii_case("Len")
259    }
260
261    fn method_name(&self) -> &'static str {
262        "Length"
263    }
264}
265
266/// Contains method function (returns boolean)
267pub struct ContainsMethod;
268
269impl SqlFunction for ContainsMethod {
270    fn signature(&self) -> FunctionSignature {
271        FunctionSignature {
272            name: "CONTAINS",
273            category: FunctionCategory::String,
274            arg_count: ArgCount::Fixed(2),
275            description: "Checks if string contains substring",
276            returns: "BOOLEAN",
277            examples: vec![
278                "SELECT * FROM users WHERE name.Contains('john')",
279                "SELECT CONTAINS(name, 'john') FROM users",
280            ],
281        }
282    }
283
284    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
285        self.validate_args(args)?;
286
287        let haystack = match &args[0] {
288            DataValue::String(s) => s.as_str(),
289            DataValue::InternedString(s) => s.as_str(),
290            DataValue::Null => return Ok(DataValue::Boolean(false)),
291            _ => return Err(anyhow!("Contains expects string arguments")),
292        };
293
294        let needle = match &args[1] {
295            DataValue::String(s) => s.as_str(),
296            DataValue::InternedString(s) => s.as_str(),
297            DataValue::Null => return Ok(DataValue::Boolean(false)),
298            _ => return Err(anyhow!("Contains expects string arguments")),
299        };
300
301        Ok(DataValue::Boolean(haystack.contains(needle)))
302    }
303}
304
305impl MethodFunction for ContainsMethod {
306    fn handles_method(&self, method_name: &str) -> bool {
307        method_name.eq_ignore_ascii_case("Contains")
308    }
309
310    fn method_name(&self) -> &'static str {
311        "Contains"
312    }
313}
314
315/// `StartsWith` method function
316pub struct StartsWithMethod;
317
318impl SqlFunction for StartsWithMethod {
319    fn signature(&self) -> FunctionSignature {
320        FunctionSignature {
321            name: "STARTSWITH",
322            category: FunctionCategory::String,
323            arg_count: ArgCount::Fixed(2),
324            description: "Checks if string starts with prefix",
325            returns: "BOOLEAN",
326            examples: vec![
327                "SELECT * FROM users WHERE name.StartsWith('John')",
328                "SELECT STARTSWITH(name, 'John') FROM users",
329            ],
330        }
331    }
332
333    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
334        self.validate_args(args)?;
335
336        let string = match &args[0] {
337            DataValue::String(s) => s.as_str(),
338            DataValue::InternedString(s) => s.as_str(),
339            DataValue::Null => return Ok(DataValue::Boolean(false)),
340            _ => return Err(anyhow!("StartsWith expects string arguments")),
341        };
342
343        let prefix = match &args[1] {
344            DataValue::String(s) => s.as_str(),
345            DataValue::InternedString(s) => s.as_str(),
346            DataValue::Null => return Ok(DataValue::Boolean(false)),
347            _ => return Err(anyhow!("StartsWith expects string arguments")),
348        };
349
350        Ok(DataValue::Boolean(string.starts_with(prefix)))
351    }
352}
353
354impl MethodFunction for StartsWithMethod {
355    fn handles_method(&self, method_name: &str) -> bool {
356        method_name.eq_ignore_ascii_case("StartsWith")
357    }
358
359    fn method_name(&self) -> &'static str {
360        "StartsWith"
361    }
362}
363
364/// `EndsWith` method function
365pub struct EndsWithMethod;
366
367impl SqlFunction for EndsWithMethod {
368    fn signature(&self) -> FunctionSignature {
369        FunctionSignature {
370            name: "ENDSWITH",
371            category: FunctionCategory::String,
372            arg_count: ArgCount::Fixed(2),
373            description: "Checks if string ends with suffix",
374            returns: "BOOLEAN",
375            examples: vec![
376                "SELECT * FROM users WHERE email.EndsWith('.com')",
377                "SELECT ENDSWITH(email, '.com') FROM users",
378            ],
379        }
380    }
381
382    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
383        self.validate_args(args)?;
384
385        let string = match &args[0] {
386            DataValue::String(s) => s.as_str(),
387            DataValue::InternedString(s) => s.as_str(),
388            DataValue::Null => return Ok(DataValue::Boolean(false)),
389            _ => return Err(anyhow!("EndsWith expects string arguments")),
390        };
391
392        let suffix = match &args[1] {
393            DataValue::String(s) => s.as_str(),
394            DataValue::InternedString(s) => s.as_str(),
395            DataValue::Null => return Ok(DataValue::Boolean(false)),
396            _ => return Err(anyhow!("EndsWith expects string arguments")),
397        };
398
399        Ok(DataValue::Boolean(string.ends_with(suffix)))
400    }
401}
402
403impl MethodFunction for EndsWithMethod {
404    fn handles_method(&self, method_name: &str) -> bool {
405        method_name.eq_ignore_ascii_case("EndsWith")
406    }
407
408    fn method_name(&self) -> &'static str {
409        "EndsWith"
410    }
411}
412
413/// Substring method function
414pub struct SubstringMethod;
415
416impl SqlFunction for SubstringMethod {
417    fn signature(&self) -> FunctionSignature {
418        FunctionSignature {
419            name: "SUBSTRING",
420            category: FunctionCategory::String,
421            arg_count: ArgCount::Range(2, 3),
422            description: "Extracts substring from string",
423            returns: "STRING",
424            examples: vec![
425                "SELECT name.Substring(0, 5) FROM users",
426                "SELECT SUBSTRING(name, 0, 5) FROM users",
427            ],
428        }
429    }
430
431    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
432        if args.len() < 2 || args.len() > 3 {
433            return Err(anyhow!("Substring expects 2 or 3 arguments"));
434        }
435
436        let string = match &args[0] {
437            DataValue::String(s) => s.as_str(),
438            DataValue::InternedString(s) => s.as_str(),
439            DataValue::Null => return Ok(DataValue::Null),
440            _ => return Err(anyhow!("Substring expects a string as first argument")),
441        };
442
443        let start = match &args[1] {
444            DataValue::Integer(i) => *i as usize,
445            _ => return Err(anyhow!("Substring expects integer start position")),
446        };
447
448        let result = if args.len() == 3 {
449            let length = match &args[2] {
450                DataValue::Integer(i) => *i as usize,
451                _ => return Err(anyhow!("Substring expects integer length")),
452            };
453
454            let end = (start + length).min(string.len());
455            string.chars().skip(start).take(end - start).collect()
456        } else {
457            string.chars().skip(start).collect()
458        };
459
460        Ok(DataValue::String(result))
461    }
462}
463
464impl MethodFunction for SubstringMethod {
465    fn handles_method(&self, method_name: &str) -> bool {
466        method_name.eq_ignore_ascii_case("Substring") || method_name.eq_ignore_ascii_case("Substr")
467    }
468
469    fn method_name(&self) -> &'static str {
470        "Substring"
471    }
472}
473
474/// Replace method function
475pub struct ReplaceMethod;
476
477impl SqlFunction for ReplaceMethod {
478    fn signature(&self) -> FunctionSignature {
479        FunctionSignature {
480            name: "REPLACE",
481            category: FunctionCategory::String,
482            arg_count: ArgCount::Fixed(3),
483            description: "Replaces all occurrences of a substring",
484            returns: "STRING",
485            examples: vec![
486                "SELECT name.Replace('John', 'Jane') FROM users",
487                "SELECT REPLACE(name, 'John', 'Jane') FROM users",
488            ],
489        }
490    }
491
492    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
493        self.validate_args(args)?;
494
495        let string = match &args[0] {
496            DataValue::String(s) => s.as_str(),
497            DataValue::InternedString(s) => s.as_str(),
498            DataValue::Null => return Ok(DataValue::Null),
499            _ => return Err(anyhow!("Replace expects string arguments")),
500        };
501
502        let from = match &args[1] {
503            DataValue::String(s) => s.as_str(),
504            DataValue::InternedString(s) => s.as_str(),
505            _ => return Err(anyhow!("Replace expects string arguments")),
506        };
507
508        let to = match &args[2] {
509            DataValue::String(s) => s.as_str(),
510            DataValue::InternedString(s) => s.as_str(),
511            _ => return Err(anyhow!("Replace expects string arguments")),
512        };
513
514        Ok(DataValue::String(string.replace(from, to)))
515    }
516}
517
518impl MethodFunction for ReplaceMethod {
519    fn handles_method(&self, method_name: &str) -> bool {
520        method_name.eq_ignore_ascii_case("Replace")
521    }
522
523    fn method_name(&self) -> &'static str {
524        "Replace"
525    }
526}
527
528/// MID function - Extract substring (SQL/Excel compatible, 1-based indexing)
529pub struct MidFunction;
530
531impl SqlFunction for MidFunction {
532    fn signature(&self) -> FunctionSignature {
533        FunctionSignature {
534            name: "MID",
535            category: FunctionCategory::String,
536            arg_count: ArgCount::Fixed(3),
537            description: "Extract substring from text (1-based indexing)",
538            returns: "STRING",
539            examples: vec![
540                "SELECT MID('Hello', 1, 3)", // Returns 'Hel'
541                "SELECT MID('World', 2, 3)", // Returns 'orl'
542                "SELECT MID(name, 1, 5) FROM table",
543            ],
544        }
545    }
546
547    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
548        self.validate_args(args)?;
549
550        // Get the string
551        let text = match &args[0] {
552            DataValue::String(s) => s.clone(),
553            DataValue::InternedString(s) => s.to_string(),
554            DataValue::Integer(n) => n.to_string(),
555            DataValue::Float(f) => f.to_string(),
556            DataValue::Null => String::new(),
557            _ => return Err(anyhow!("MID first argument must be convertible to text")),
558        };
559
560        // Get start position (1-based)
561        let start_pos = match &args[1] {
562            DataValue::Integer(n) => *n,
563            DataValue::Float(f) => *f as i64,
564            _ => return Err(anyhow!("MID start position must be a number")),
565        };
566
567        // Get length
568        let length = match &args[2] {
569            DataValue::Integer(n) => *n,
570            DataValue::Float(f) => *f as i64,
571            _ => return Err(anyhow!("MID length must be a number")),
572        };
573
574        // Validate arguments
575        if start_pos < 1 {
576            return Err(anyhow!("MID start position must be >= 1"));
577        }
578        if length < 0 {
579            return Err(anyhow!("MID length must be >= 0"));
580        }
581
582        // Convert to 0-based index
583        let start_idx = (start_pos - 1) as usize;
584        let chars: Vec<char> = text.chars().collect();
585
586        // If start position is beyond string length, return empty string
587        if start_idx >= chars.len() {
588            return Ok(DataValue::String(String::new()));
589        }
590
591        // Extract substring
592        let end_idx = std::cmp::min(start_idx + length as usize, chars.len());
593        let result: String = chars[start_idx..end_idx].iter().collect();
594
595        Ok(DataValue::String(result))
596    }
597}
598
599/// UPPER function - Convert string to uppercase
600pub struct UpperFunction;
601
602impl SqlFunction for UpperFunction {
603    fn signature(&self) -> FunctionSignature {
604        FunctionSignature {
605            name: "UPPER",
606            category: FunctionCategory::String,
607            arg_count: ArgCount::Fixed(1),
608            description: "Convert string to uppercase",
609            returns: "STRING",
610            examples: vec![
611                "SELECT UPPER('hello')", // Returns 'HELLO'
612                "SELECT UPPER(name) FROM table",
613            ],
614        }
615    }
616
617    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
618        self.validate_args(args)?;
619
620        match &args[0] {
621            DataValue::String(s) => Ok(DataValue::String(s.to_uppercase())),
622            DataValue::InternedString(s) => Ok(DataValue::String(s.to_uppercase())),
623            DataValue::Null => Ok(DataValue::Null),
624            _ => Err(anyhow!("UPPER expects a string argument")),
625        }
626    }
627}
628
629/// LOWER function - Convert string to lowercase
630pub struct LowerFunction;
631
632impl SqlFunction for LowerFunction {
633    fn signature(&self) -> FunctionSignature {
634        FunctionSignature {
635            name: "LOWER",
636            category: FunctionCategory::String,
637            arg_count: ArgCount::Fixed(1),
638            description: "Convert string to lowercase",
639            returns: "STRING",
640            examples: vec![
641                "SELECT LOWER('HELLO')", // Returns 'hello'
642                "SELECT LOWER(name) FROM table",
643            ],
644        }
645    }
646
647    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
648        self.validate_args(args)?;
649
650        match &args[0] {
651            DataValue::String(s) => Ok(DataValue::String(s.to_lowercase())),
652            DataValue::InternedString(s) => Ok(DataValue::String(s.to_lowercase())),
653            DataValue::Null => Ok(DataValue::Null),
654            _ => Err(anyhow!("LOWER expects a string argument")),
655        }
656    }
657}
658
659/// TRIM function - Remove leading and trailing whitespace
660pub struct TrimFunction;
661
662impl SqlFunction for TrimFunction {
663    fn signature(&self) -> FunctionSignature {
664        FunctionSignature {
665            name: "TRIM",
666            category: FunctionCategory::String,
667            arg_count: ArgCount::Fixed(1),
668            description: "Remove leading and trailing whitespace",
669            returns: "STRING",
670            examples: vec![
671                "SELECT TRIM('  hello  ')", // Returns 'hello'
672                "SELECT TRIM(description) FROM table",
673            ],
674        }
675    }
676
677    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
678        self.validate_args(args)?;
679
680        match &args[0] {
681            DataValue::String(s) => Ok(DataValue::String(s.trim().to_string())),
682            DataValue::InternedString(s) => Ok(DataValue::String(s.trim().to_string())),
683            DataValue::Null => Ok(DataValue::Null),
684            _ => Err(anyhow!("TRIM expects a string argument")),
685        }
686    }
687}
688
689/// TEXTJOIN function - Join multiple text values with a delimiter
690pub struct TextJoinFunction;
691
692impl SqlFunction for TextJoinFunction {
693    fn signature(&self) -> FunctionSignature {
694        FunctionSignature {
695            name: "TEXTJOIN",
696            category: FunctionCategory::String,
697            arg_count: ArgCount::Variadic,
698            description: "Join multiple text values with a delimiter",
699            returns: "STRING",
700            examples: vec![
701                "SELECT TEXTJOIN(',', 1, 'a', 'b', 'c')", // Returns 'a,b,c'
702                "SELECT TEXTJOIN(' - ', 1, name, city) FROM table",
703                "SELECT TEXTJOIN('|', 0, col1, col2, col3) FROM table",
704            ],
705        }
706    }
707
708    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
709        if args.len() < 3 {
710            return Err(anyhow!("TEXTJOIN requires at least 3 arguments: delimiter, ignore_empty, text1, [text2, ...]"));
711        }
712
713        // First argument: delimiter
714        let delimiter = match &args[0] {
715            DataValue::String(s) => s.clone(),
716            DataValue::InternedString(s) => s.to_string(),
717            DataValue::Integer(n) => n.to_string(),
718            DataValue::Float(f) => f.to_string(),
719            DataValue::Boolean(b) => b.to_string(),
720            DataValue::Null => String::new(),
721            _ => String::new(),
722        };
723
724        // Second argument: ignore_empty (treat as boolean - 0 is false, anything else is true)
725        let ignore_empty = match &args[1] {
726            DataValue::Integer(n) => *n != 0,
727            DataValue::Float(f) => *f != 0.0,
728            DataValue::Boolean(b) => *b,
729            DataValue::String(s) => !s.is_empty() && s != "0" && s.to_lowercase() != "false",
730            DataValue::InternedString(s) => {
731                !s.is_empty() && s.as_str() != "0" && s.to_lowercase() != "false"
732            }
733            DataValue::Null => false,
734            _ => true,
735        };
736
737        // Remaining arguments: values to join
738        let mut values = Vec::new();
739        for i in 2..args.len() {
740            let string_value = match &args[i] {
741                DataValue::String(s) => Some(s.clone()),
742                DataValue::InternedString(s) => Some(s.to_string()),
743                DataValue::Integer(n) => Some(n.to_string()),
744                DataValue::Float(f) => Some(f.to_string()),
745                DataValue::Boolean(b) => Some(b.to_string()),
746                DataValue::DateTime(dt) => Some(dt.clone()),
747                DataValue::Null => {
748                    if ignore_empty {
749                        None
750                    } else {
751                        Some(String::new())
752                    }
753                }
754            };
755
756            if let Some(s) = string_value {
757                if !ignore_empty || !s.is_empty() {
758                    values.push(s);
759                }
760            }
761        }
762
763        Ok(DataValue::String(values.join(&delimiter)))
764    }
765}
766
767/// Edit distance (Levenshtein distance) function
768pub struct EditDistanceFunction;
769
770impl EditDistanceFunction {
771    /// Calculate the Levenshtein distance between two strings
772    #[must_use]
773    pub fn calculate_edit_distance(s1: &str, s2: &str) -> usize {
774        let len1 = s1.len();
775        let len2 = s2.len();
776        let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
777
778        for i in 0..=len1 {
779            matrix[i][0] = i;
780        }
781        for j in 0..=len2 {
782            matrix[0][j] = j;
783        }
784
785        for (i, c1) in s1.chars().enumerate() {
786            for (j, c2) in s2.chars().enumerate() {
787                let cost = usize::from(c1 != c2);
788                matrix[i + 1][j + 1] = std::cmp::min(
789                    matrix[i][j + 1] + 1, // deletion
790                    std::cmp::min(
791                        matrix[i + 1][j] + 1, // insertion
792                        matrix[i][j] + cost,  // substitution
793                    ),
794                );
795            }
796        }
797
798        matrix[len1][len2]
799    }
800}
801
802impl SqlFunction for EditDistanceFunction {
803    fn signature(&self) -> FunctionSignature {
804        FunctionSignature {
805            name: "EDIT_DISTANCE",
806            category: FunctionCategory::String,
807            arg_count: ArgCount::Fixed(2),
808            description: "Calculate the Levenshtein edit distance between two strings",
809            returns: "INTEGER",
810            examples: vec![
811                "SELECT EDIT_DISTANCE('kitten', 'sitting')",
812                "SELECT EDIT_DISTANCE(name, 'John') FROM users",
813                "SELECT * FROM users WHERE EDIT_DISTANCE(name, 'Smith') <= 2",
814            ],
815        }
816    }
817
818    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
819        self.validate_args(args)?;
820
821        let s1 = match &args[0] {
822            DataValue::String(s) => s.clone(),
823            DataValue::InternedString(s) => s.to_string(),
824            DataValue::Null => return Ok(DataValue::Null),
825            _ => return Err(anyhow!("EDIT_DISTANCE expects string arguments")),
826        };
827
828        let s2 = match &args[1] {
829            DataValue::String(s) => s.clone(),
830            DataValue::InternedString(s) => s.to_string(),
831            DataValue::Null => return Ok(DataValue::Null),
832            _ => return Err(anyhow!("EDIT_DISTANCE expects string arguments")),
833        };
834
835        let distance = Self::calculate_edit_distance(&s1, &s2);
836        Ok(DataValue::Integer(distance as i64))
837    }
838}
839
840/// FREQUENCY function - Count occurrences of a substring in a string
841pub struct FrequencyFunction;
842
843impl SqlFunction for FrequencyFunction {
844    fn signature(&self) -> FunctionSignature {
845        FunctionSignature {
846            name: "FREQUENCY",
847            category: FunctionCategory::String,
848            arg_count: ArgCount::Fixed(2),
849            description: "Count occurrences of a substring within a string",
850            returns: "INTEGER",
851            examples: vec![
852                "SELECT FREQUENCY('hello world', 'o')",  // Returns 2
853                "SELECT FREQUENCY('mississippi', 'ss')", // Returns 2
854                "SELECT FREQUENCY(text_column, 'error') FROM logs",
855                "SELECT name, FREQUENCY(name, 'a') as a_count FROM users",
856            ],
857        }
858    }
859
860    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
861        self.validate_args(args)?;
862
863        // Get the string to search in
864        let text = match &args[0] {
865            DataValue::String(s) => s.clone(),
866            DataValue::InternedString(s) => s.to_string(),
867            DataValue::Null => return Ok(DataValue::Integer(0)),
868            _ => return Err(anyhow!("FREQUENCY expects string as first argument")),
869        };
870
871        // Get the substring to search for
872        let search = match &args[1] {
873            DataValue::String(s) => s.clone(),
874            DataValue::InternedString(s) => s.to_string(),
875            DataValue::Null => return Ok(DataValue::Integer(0)),
876            _ => return Err(anyhow!("FREQUENCY expects string as second argument")),
877        };
878
879        // Empty search string returns 0
880        if search.is_empty() {
881            return Ok(DataValue::Integer(0));
882        }
883
884        // Count occurrences
885        let count = text.matches(&search).count();
886        Ok(DataValue::Integer(count as i64))
887    }
888}
889
890/// IndexOf method function - finds the position of a substring
891pub struct IndexOfMethod;
892
893impl SqlFunction for IndexOfMethod {
894    fn signature(&self) -> FunctionSignature {
895        FunctionSignature {
896            name: "INDEXOF",
897            category: FunctionCategory::String,
898            arg_count: ArgCount::Fixed(2),
899            description: "Returns the position of the first occurrence of a substring (0-based)",
900            returns: "INTEGER",
901            examples: vec![
902                "SELECT email.IndexOf('@') FROM users",
903                "SELECT INDEXOF(email, '@') FROM users",
904            ],
905        }
906    }
907
908    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
909        self.validate_args(args)?;
910
911        let string = match &args[0] {
912            DataValue::String(s) => s.as_str(),
913            DataValue::InternedString(s) => s.as_str(),
914            DataValue::Null => return Ok(DataValue::Null),
915            _ => return Err(anyhow!("IndexOf expects string arguments")),
916        };
917
918        let substring = match &args[1] {
919            DataValue::String(s) => s.as_str(),
920            DataValue::InternedString(s) => s.as_str(),
921            DataValue::Null => return Ok(DataValue::Null),
922            _ => return Err(anyhow!("IndexOf expects string arguments")),
923        };
924
925        match string.find(substring) {
926            Some(pos) => Ok(DataValue::Integer(pos as i64)),
927            None => Ok(DataValue::Integer(-1)), // Return -1 if not found
928        }
929    }
930}
931
932impl MethodFunction for IndexOfMethod {
933    fn handles_method(&self, method_name: &str) -> bool {
934        method_name.eq_ignore_ascii_case("IndexOf")
935    }
936
937    fn method_name(&self) -> &'static str {
938        "IndexOf"
939    }
940}
941
942/// INSTR function - SQL standard function for finding substring position
943/// Returns 1-based position for SQL compatibility
944pub struct InstrFunction;
945
946impl SqlFunction for InstrFunction {
947    fn signature(&self) -> FunctionSignature {
948        FunctionSignature {
949            name: "INSTR",
950            category: FunctionCategory::String,
951            arg_count: ArgCount::Fixed(2),
952            description: "Returns the position of the first occurrence of a substring (1-based, SQL standard)",
953            returns: "INTEGER",
954            examples: vec![
955                "SELECT INSTR(email, '@') FROM users",
956                "SELECT SUBSTRING(email, INSTR(email, '@') + 1) FROM users",
957            ],
958        }
959    }
960
961    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
962        self.validate_args(args)?;
963
964        let string = match &args[0] {
965            DataValue::String(s) => s.as_str(),
966            DataValue::InternedString(s) => s.as_str(),
967            DataValue::Null => return Ok(DataValue::Null),
968            _ => return Err(anyhow!("INSTR expects string arguments")),
969        };
970
971        let substring = match &args[1] {
972            DataValue::String(s) => s.as_str(),
973            DataValue::InternedString(s) => s.as_str(),
974            DataValue::Null => return Ok(DataValue::Null),
975            _ => return Err(anyhow!("INSTR expects string arguments")),
976        };
977
978        match string.find(substring) {
979            Some(pos) => Ok(DataValue::Integer((pos + 1) as i64)), // 1-based for SQL
980            None => Ok(DataValue::Integer(0)), // Return 0 if not found (SQL standard)
981        }
982    }
983}
984
985/// LEFT function - extracts leftmost n characters or up to a delimiter
986pub struct LeftFunction;
987
988impl SqlFunction for LeftFunction {
989    fn signature(&self) -> FunctionSignature {
990        FunctionSignature {
991            name: "LEFT",
992            category: FunctionCategory::String,
993            arg_count: ArgCount::Fixed(2),
994            description: "Returns leftmost n characters from string",
995            returns: "STRING",
996            examples: vec![
997                "SELECT LEFT(email, 5) FROM users",
998                "SELECT LEFT('hello@world', INSTR('hello@world', '@') - 1)",
999            ],
1000        }
1001    }
1002
1003    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1004        self.validate_args(args)?;
1005
1006        let string = match &args[0] {
1007            DataValue::String(s) => s.as_str(),
1008            DataValue::InternedString(s) => s.as_str(),
1009            DataValue::Null => return Ok(DataValue::Null),
1010            _ => return Err(anyhow!("LEFT expects a string as first argument")),
1011        };
1012
1013        let length = match &args[1] {
1014            DataValue::Integer(n) => *n as usize,
1015            DataValue::Float(f) => *f as usize,
1016            DataValue::Null => return Ok(DataValue::Null),
1017            _ => return Err(anyhow!("LEFT expects a number as second argument")),
1018        };
1019
1020        let result = if length >= string.len() {
1021            string.to_string()
1022        } else {
1023            string.chars().take(length).collect()
1024        };
1025
1026        Ok(DataValue::String(result))
1027    }
1028}
1029
1030/// RIGHT function - extracts rightmost n characters
1031pub struct RightFunction;
1032
1033impl SqlFunction for RightFunction {
1034    fn signature(&self) -> FunctionSignature {
1035        FunctionSignature {
1036            name: "RIGHT",
1037            category: FunctionCategory::String,
1038            arg_count: ArgCount::Fixed(2),
1039            description: "Returns rightmost n characters from string",
1040            returns: "STRING",
1041            examples: vec![
1042                "SELECT RIGHT(filename, 4) FROM files", // Get file extension
1043                "SELECT RIGHT(email, LENGTH(email) - INSTR(email, '@'))", // Get domain
1044            ],
1045        }
1046    }
1047
1048    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1049        self.validate_args(args)?;
1050
1051        let string = match &args[0] {
1052            DataValue::String(s) => s.as_str(),
1053            DataValue::InternedString(s) => s.as_str(),
1054            DataValue::Null => return Ok(DataValue::Null),
1055            _ => return Err(anyhow!("RIGHT expects a string as first argument")),
1056        };
1057
1058        let length = match &args[1] {
1059            DataValue::Integer(n) => *n as usize,
1060            DataValue::Float(f) => *f as usize,
1061            DataValue::Null => return Ok(DataValue::Null),
1062            _ => return Err(anyhow!("RIGHT expects a number as second argument")),
1063        };
1064
1065        let chars: Vec<char> = string.chars().collect();
1066        let start = if length >= chars.len() {
1067            0
1068        } else {
1069            chars.len() - length
1070        };
1071
1072        let result: String = chars[start..].iter().collect();
1073        Ok(DataValue::String(result))
1074    }
1075}
1076
1077/// SUBSTRING_BEFORE - returns substring before first/nth occurrence of delimiter
1078pub struct SubstringBeforeFunction;
1079
1080impl SqlFunction for SubstringBeforeFunction {
1081    fn signature(&self) -> FunctionSignature {
1082        FunctionSignature {
1083            name: "SUBSTRING_BEFORE",
1084            category: FunctionCategory::String,
1085            arg_count: ArgCount::Range(2, 3),
1086            description: "Returns substring before the first (or nth) occurrence of delimiter",
1087            returns: "STRING",
1088            examples: vec![
1089                "SELECT SUBSTRING_BEFORE(email, '@') FROM users", // Get username
1090                "SELECT SUBSTRING_BEFORE('a.b.c.d', '.', 2)",     // Get 'a.b' (before 2nd dot)
1091            ],
1092        }
1093    }
1094
1095    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1096        if args.len() < 2 || args.len() > 3 {
1097            return Err(anyhow!("SUBSTRING_BEFORE expects 2 or 3 arguments"));
1098        }
1099
1100        let string = match &args[0] {
1101            DataValue::String(s) => s.as_str(),
1102            DataValue::InternedString(s) => s.as_str(),
1103            DataValue::Null => return Ok(DataValue::Null),
1104            _ => {
1105                return Err(anyhow!(
1106                    "SUBSTRING_BEFORE expects a string as first argument"
1107                ))
1108            }
1109        };
1110
1111        let delimiter = match &args[1] {
1112            DataValue::String(s) => s.as_str(),
1113            DataValue::InternedString(s) => s.as_str(),
1114            DataValue::Null => return Ok(DataValue::Null),
1115            _ => return Err(anyhow!("SUBSTRING_BEFORE expects a string delimiter")),
1116        };
1117
1118        let occurrence = if args.len() == 3 {
1119            match &args[2] {
1120                DataValue::Integer(n) => *n as usize,
1121                DataValue::Float(f) => *f as usize,
1122                DataValue::Null => 1,
1123                _ => return Err(anyhow!("SUBSTRING_BEFORE expects a number for occurrence")),
1124            }
1125        } else {
1126            1
1127        };
1128
1129        if occurrence == 0 {
1130            return Ok(DataValue::String(String::new()));
1131        }
1132
1133        // Find the nth occurrence
1134        let mut count = 0;
1135        for (i, _) in string.match_indices(delimiter) {
1136            count += 1;
1137            if count == occurrence {
1138                return Ok(DataValue::String(string[..i].to_string()));
1139            }
1140        }
1141
1142        // If we didn't find enough occurrences, return empty string
1143        Ok(DataValue::String(String::new()))
1144    }
1145}
1146
1147/// SUBSTRING_AFTER - returns substring after first/nth occurrence of delimiter
1148pub struct SubstringAfterFunction;
1149
1150impl SqlFunction for SubstringAfterFunction {
1151    fn signature(&self) -> FunctionSignature {
1152        FunctionSignature {
1153            name: "SUBSTRING_AFTER",
1154            category: FunctionCategory::String,
1155            arg_count: ArgCount::Range(2, 3),
1156            description: "Returns substring after the first (or nth) occurrence of delimiter",
1157            returns: "STRING",
1158            examples: vec![
1159                "SELECT SUBSTRING_AFTER(email, '@') FROM users", // Get domain
1160                "SELECT SUBSTRING_AFTER('a.b.c.d', '.', 2)",     // Get 'c.d' (after 2nd dot)
1161            ],
1162        }
1163    }
1164
1165    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1166        if args.len() < 2 || args.len() > 3 {
1167            return Err(anyhow!("SUBSTRING_AFTER expects 2 or 3 arguments"));
1168        }
1169
1170        let string = match &args[0] {
1171            DataValue::String(s) => s.as_str(),
1172            DataValue::InternedString(s) => s.as_str(),
1173            DataValue::Null => return Ok(DataValue::Null),
1174            _ => {
1175                return Err(anyhow!(
1176                    "SUBSTRING_AFTER expects a string as first argument"
1177                ))
1178            }
1179        };
1180
1181        let delimiter = match &args[1] {
1182            DataValue::String(s) => s.as_str(),
1183            DataValue::InternedString(s) => s.as_str(),
1184            DataValue::Null => return Ok(DataValue::Null),
1185            _ => return Err(anyhow!("SUBSTRING_AFTER expects a string delimiter")),
1186        };
1187
1188        let occurrence = if args.len() == 3 {
1189            match &args[2] {
1190                DataValue::Integer(n) => *n as usize,
1191                DataValue::Float(f) => *f as usize,
1192                DataValue::Null => 1,
1193                _ => return Err(anyhow!("SUBSTRING_AFTER expects a number for occurrence")),
1194            }
1195        } else {
1196            1
1197        };
1198
1199        if occurrence == 0 {
1200            return Ok(DataValue::String(string.to_string()));
1201        }
1202
1203        // Find the nth occurrence
1204        let mut count = 0;
1205        for (i, _) in string.match_indices(delimiter) {
1206            count += 1;
1207            if count == occurrence {
1208                let start = i + delimiter.len();
1209                if start < string.len() {
1210                    return Ok(DataValue::String(string[start..].to_string()));
1211                } else {
1212                    return Ok(DataValue::String(String::new()));
1213                }
1214            }
1215        }
1216
1217        // If we didn't find enough occurrences, return empty string
1218        Ok(DataValue::String(String::new()))
1219    }
1220}
1221
1222/// SPLIT_PART - returns the nth part of a string split by delimiter (1-based)
1223pub struct SplitPartFunction;
1224
1225impl SqlFunction for SplitPartFunction {
1226    fn signature(&self) -> FunctionSignature {
1227        FunctionSignature {
1228            name: "SPLIT_PART",
1229            category: FunctionCategory::String,
1230            arg_count: ArgCount::Fixed(3),
1231            description: "Returns the nth part of a string split by delimiter (1-based index)",
1232            returns: "STRING",
1233            examples: vec![
1234                "SELECT SPLIT_PART('a.b.c.d', '.', 2)",        // Returns 'b'
1235                "SELECT SPLIT_PART(email, '@', 1) FROM users", // Get username
1236            ],
1237        }
1238    }
1239
1240    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1241        self.validate_args(args)?;
1242
1243        let string = match &args[0] {
1244            DataValue::String(s) => s.as_str(),
1245            DataValue::InternedString(s) => s.as_str(),
1246            DataValue::Null => return Ok(DataValue::Null),
1247            _ => return Err(anyhow!("SPLIT_PART expects a string as first argument")),
1248        };
1249
1250        let delimiter = match &args[1] {
1251            DataValue::String(s) => s.as_str(),
1252            DataValue::InternedString(s) => s.as_str(),
1253            DataValue::Null => return Ok(DataValue::Null),
1254            _ => return Err(anyhow!("SPLIT_PART expects a string delimiter")),
1255        };
1256
1257        let part_num = match &args[2] {
1258            DataValue::Integer(n) => *n as usize,
1259            DataValue::Float(f) => *f as usize,
1260            DataValue::Null => return Ok(DataValue::Null),
1261            _ => return Err(anyhow!("SPLIT_PART expects a number for part index")),
1262        };
1263
1264        if part_num == 0 {
1265            return Err(anyhow!("SPLIT_PART part index must be >= 1"));
1266        }
1267
1268        let parts: Vec<&str> = string.split(delimiter).collect();
1269
1270        if part_num <= parts.len() {
1271            Ok(DataValue::String(parts[part_num - 1].to_string()))
1272        } else {
1273            Ok(DataValue::String(String::new()))
1274        }
1275    }
1276}
1277
1278/// CHR function - Convert ASCII code to character
1279pub struct ChrFunction;
1280
1281impl SqlFunction for ChrFunction {
1282    fn signature(&self) -> FunctionSignature {
1283        FunctionSignature {
1284            name: "CHR",
1285            category: FunctionCategory::String,
1286            arg_count: ArgCount::Fixed(1),
1287            description: "Convert ASCII code to character",
1288            returns: "STRING",
1289            examples: vec![
1290                "SELECT CHR(65)", // Returns 'A'
1291                "SELECT CHR(97)", // Returns 'a'
1292                "SELECT CHR(48)", // Returns '0'
1293            ],
1294        }
1295    }
1296
1297    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1298        if args.len() != 1 {
1299            return Err(anyhow!("CHR expects exactly 1 argument"));
1300        }
1301
1302        let ascii_code = match &args[0] {
1303            DataValue::Integer(n) => *n,
1304            DataValue::Float(f) => *f as i64,
1305            DataValue::String(s) => s
1306                .parse::<i64>()
1307                .map_err(|_| anyhow!("Invalid number for CHR: {}", s))?,
1308            DataValue::InternedString(s) => s
1309                .parse::<i64>()
1310                .map_err(|_| anyhow!("Invalid number for CHR: {}", s))?,
1311            DataValue::Null => return Ok(DataValue::Null),
1312            _ => return Err(anyhow!("CHR expects a numeric argument")),
1313        };
1314
1315        // ASCII printable range is 32-126, but we'll allow 0-255
1316        if ascii_code < 0 || ascii_code > 255 {
1317            return Err(anyhow!(
1318                "CHR argument must be between 0 and 255, got {}",
1319                ascii_code
1320            ));
1321        }
1322
1323        let ch = ascii_code as u8 as char;
1324        Ok(DataValue::String(ch.to_string()))
1325    }
1326}
1327
1328/// LOREM_IPSUM function - generates Lorem Ipsum placeholder text
1329pub struct LoremIpsumFunction;
1330
1331impl SqlFunction for LoremIpsumFunction {
1332    fn signature(&self) -> FunctionSignature {
1333        FunctionSignature {
1334            name: "LOREM_IPSUM",
1335            category: FunctionCategory::String,
1336            arg_count: ArgCount::Range(1, 3),
1337            description: "Generate Lorem Ipsum placeholder text with specified number of words",
1338            returns: "STRING",
1339            examples: vec![
1340                "SELECT LOREM_IPSUM(10)",        // 10 random Lorem Ipsum words
1341                "SELECT LOREM_IPSUM(50)",        // 50 words
1342                "SELECT LOREM_IPSUM(20, 1)",     // 20 words, starting with 'Lorem ipsum...'
1343                "SELECT LOREM_IPSUM(15, 0, id)", // 15 words, use id as seed for variation
1344            ],
1345        }
1346    }
1347
1348    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
1349        self.validate_args(args)?;
1350
1351        let num_words = match &args[0] {
1352            DataValue::Integer(n) if *n > 0 => *n as usize,
1353            DataValue::Float(f) if *f > 0.0 => *f as usize,
1354            DataValue::Null => return Ok(DataValue::Null),
1355            _ => return Err(anyhow!("LOREM_IPSUM requires a positive number of words")),
1356        };
1357
1358        // Check if we should start with traditional "Lorem ipsum..." opening
1359        let start_traditional = if args.len() > 1 {
1360            match &args[1] {
1361                DataValue::Integer(n) => *n != 0,
1362                DataValue::Boolean(b) => *b,
1363                _ => false,
1364            }
1365        } else {
1366            false
1367        };
1368
1369        // Get optional seed for reproducible but varied results
1370        let seed_value = if args.len() > 2 {
1371            match &args[2] {
1372                DataValue::Integer(n) => *n as u64,
1373                DataValue::Float(f) => *f as u64,
1374                DataValue::String(s) => {
1375                    // Hash the string to get a numeric seed
1376                    let mut hash = 0u64;
1377                    for byte in s.bytes() {
1378                        hash = hash.wrapping_mul(31).wrapping_add(byte as u64);
1379                    }
1380                    hash
1381                }
1382                DataValue::Null => 0,
1383                _ => 0,
1384            }
1385        } else {
1386            0
1387        };
1388
1389        // Lorem Ipsum word bank - traditional Latin placeholder text words
1390        const LOREM_WORDS: &[&str] = &[
1391            "lorem",
1392            "ipsum",
1393            "dolor",
1394            "sit",
1395            "amet",
1396            "consectetur",
1397            "adipiscing",
1398            "elit",
1399            "sed",
1400            "do",
1401            "eiusmod",
1402            "tempor",
1403            "incididunt",
1404            "ut",
1405            "labore",
1406            "et",
1407            "dolore",
1408            "magna",
1409            "aliqua",
1410            "enim",
1411            "ad",
1412            "minim",
1413            "veniam",
1414            "quis",
1415            "nostrud",
1416            "exercitation",
1417            "ullamco",
1418            "laboris",
1419            "nisi",
1420            "aliquip",
1421            "ex",
1422            "ea",
1423            "commodo",
1424            "consequat",
1425            "duis",
1426            "aute",
1427            "irure",
1428            "in",
1429            "reprehenderit",
1430            "voluptate",
1431            "velit",
1432            "esse",
1433            "cillum",
1434            "fugiat",
1435            "nulla",
1436            "pariatur",
1437            "excepteur",
1438            "sint",
1439            "occaecat",
1440            "cupidatat",
1441            "non",
1442            "proident",
1443            "sunt",
1444            "culpa",
1445            "qui",
1446            "officia",
1447            "deserunt",
1448            "mollit",
1449            "anim",
1450            "id",
1451            "est",
1452            "laborum",
1453            "perspiciatis",
1454            "unde",
1455            "omnis",
1456            "iste",
1457            "natus",
1458            "error",
1459            "voluptatem",
1460            "accusantium",
1461            "doloremque",
1462            "laudantium",
1463            "totam",
1464            "rem",
1465            "aperiam",
1466            "eaque",
1467            "ipsa",
1468            "quae",
1469            "ab",
1470            "illo",
1471            "inventore",
1472            "veritatis",
1473            "quasi",
1474            "architecto",
1475            "beatae",
1476            "vitae",
1477            "dicta",
1478            "explicabo",
1479            "nemo",
1480            "enim",
1481            "ipsam",
1482            "quia",
1483            "voluptas",
1484            "aspernatur",
1485            "aut",
1486            "odit",
1487            "fugit",
1488            "consequuntur",
1489            "magni",
1490            "dolores",
1491            "eos",
1492            "ratione",
1493            "sequi",
1494            "nesciunt",
1495            "neque",
1496            "porro",
1497            "quisquam",
1498            "dolorem",
1499            "adipisci",
1500            "numquam",
1501            "eius",
1502            "modi",
1503            "tempora",
1504            "incidunt",
1505            "magnam",
1506            "quaerat",
1507            "etiam",
1508            "minus",
1509            "soluta",
1510            "nobis",
1511            "eligendi",
1512            "optio",
1513            "cumque",
1514            "nihil",
1515            "impedit",
1516            "quo",
1517            "possimus",
1518            "suscipit",
1519            "laboriosam",
1520            "aliquid",
1521            "fuga",
1522            "distinctio",
1523            "libero",
1524            "tempore",
1525            "cum",
1526            "assumenda",
1527            "est",
1528            "omnis",
1529            "dolor",
1530            "repellendus",
1531            "temporibus",
1532            "autem",
1533            "quibusdam",
1534            "officiis",
1535            "debitis",
1536            "rerum",
1537            "necessitatibus",
1538            "saepe",
1539            "eveniet",
1540            "voluptates",
1541            "repudiandae",
1542            "molestiae",
1543            "recusandae",
1544            "itaque",
1545            "earum",
1546            "hic",
1547            "tenetur",
1548            "sapiente",
1549            "delectus",
1550            "reiciendis",
1551            "voluptatibus",
1552            "maiores",
1553            "alias",
1554            "consequatur",
1555            "perferendis",
1556            "doloribus",
1557            "asperiores",
1558            "repellat",
1559            "iusto",
1560            "odio",
1561            "dignissimos",
1562            "ducimus",
1563            "blanditiis",
1564            "praesentium",
1565            "voluptatum",
1566            "deleniti",
1567            "atque",
1568            "corrupti",
1569            "quos",
1570            "quas",
1571            "molestias",
1572            "excepturi",
1573            "occaecati",
1574            "provident",
1575            "similique",
1576            "mollitia",
1577            "animi",
1578            "illum",
1579            "dolorum",
1580            "fuga",
1581            "harum",
1582            "quidem",
1583            "rerum",
1584            "facilis",
1585            "expedita",
1586            "distinctio",
1587            "nam",
1588            "libero",
1589            "tempore",
1590            "cum",
1591            "soluta",
1592            "nobis",
1593            "eligendi",
1594            "optio",
1595            "cumque",
1596            "nihil",
1597            "impedit",
1598            "minus",
1599            "quod",
1600            "maxime",
1601            "placeat",
1602            "facere",
1603            "possimus",
1604            "omnis",
1605            "voluptas",
1606            "assumenda",
1607        ];
1608
1609        let mut result = Vec::with_capacity(num_words);
1610
1611        if start_traditional && num_words > 0 {
1612            // Start with traditional "Lorem ipsum dolor sit amet..."
1613            let traditional_start = ["lorem", "ipsum", "dolor", "sit", "amet"];
1614            let take_count = num_words.min(traditional_start.len());
1615            for i in 0..take_count {
1616                result.push(traditional_start[i]);
1617            }
1618
1619            // Fill remaining with random words
1620            let seed = if seed_value != 0 {
1621                seed_value
1622            } else {
1623                use std::time::{SystemTime, UNIX_EPOCH};
1624                SystemTime::now()
1625                    .duration_since(UNIX_EPOCH)
1626                    .unwrap_or_default()
1627                    .as_nanos() as u64
1628            };
1629
1630            let mut rng = seed.wrapping_mul(num_words as u64); // Combine seed with word count
1631            for i in take_count..num_words {
1632                // Simple pseudo-random selection
1633                rng = (rng.wrapping_mul(1664525).wrapping_add(1013904223)) ^ (i as u64);
1634                let idx = (rng as usize) % LOREM_WORDS.len();
1635                result.push(LOREM_WORDS[idx]);
1636            }
1637        } else {
1638            // Generate random Lorem words
1639            let seed = if seed_value != 0 {
1640                seed_value
1641            } else {
1642                use std::time::{SystemTime, UNIX_EPOCH};
1643                SystemTime::now()
1644                    .duration_since(UNIX_EPOCH)
1645                    .unwrap_or_default()
1646                    .as_nanos() as u64
1647            };
1648
1649            let mut rng = seed.wrapping_mul(num_words as u64).wrapping_add(12345); // Combine seed with word count
1650            for i in 0..num_words {
1651                // Simple pseudo-random selection with better entropy
1652                rng = (rng.wrapping_mul(1664525).wrapping_add(1013904223)) ^ (i as u64);
1653                let idx = (rng as usize) % LOREM_WORDS.len();
1654                result.push(LOREM_WORDS[idx]);
1655            }
1656        }
1657
1658        // Capitalize first word and add periods for readability
1659        let mut text = String::new();
1660        for (i, word) in result.iter().enumerate() {
1661            if i == 0 {
1662                // Capitalize first word
1663                text.push_str(&word.chars().next().unwrap().to_uppercase().to_string());
1664                text.push_str(&word[1..]);
1665            } else {
1666                text.push(' ');
1667                // Occasionally start a new sentence (roughly every 10-15 words)
1668                if i > 0 && ((i * 7) % 13 == 0) && i < num_words - 1 {
1669                    text.pop(); // Remove the space
1670                    text.push_str(". ");
1671                    // Capitalize next word
1672                    text.push_str(&word.chars().next().unwrap().to_uppercase().to_string());
1673                    text.push_str(&word[1..]);
1674                } else {
1675                    text.push_str(word);
1676                }
1677            }
1678        }
1679
1680        // Add final period if we generated text
1681        if !text.is_empty() {
1682            text.push('.');
1683        }
1684
1685        Ok(DataValue::String(text))
1686    }
1687}
1688
1689/// Register all string method functions
1690pub fn register_string_methods(registry: &mut super::FunctionRegistry) {
1691    use std::sync::Arc;
1692
1693    // Register new string functions (non-method versions)
1694    registry.register(Box::new(MidFunction));
1695    registry.register(Box::new(UpperFunction));
1696    registry.register(Box::new(LowerFunction));
1697    registry.register(Box::new(TrimFunction));
1698    registry.register(Box::new(TextJoinFunction));
1699    registry.register(Box::new(EditDistanceFunction));
1700    registry.register(Box::new(FrequencyFunction));
1701
1702    // Register new convenient string extraction functions
1703    registry.register(Box::new(LeftFunction));
1704    registry.register(Box::new(RightFunction));
1705    registry.register(Box::new(SubstringBeforeFunction));
1706    registry.register(Box::new(SubstringAfterFunction));
1707    registry.register(Box::new(SplitPartFunction));
1708
1709    // Register ToUpper
1710    let to_upper = Arc::new(ToUpperMethod);
1711    registry.register(Box::new(ToUpperMethod));
1712    registry.register_method(to_upper);
1713
1714    // Register ToLower
1715    let to_lower = Arc::new(ToLowerMethod);
1716    registry.register(Box::new(ToLowerMethod));
1717    registry.register_method(to_lower);
1718
1719    // Register Trim
1720    let trim = Arc::new(TrimMethod);
1721    registry.register(Box::new(TrimMethod));
1722    registry.register_method(trim);
1723
1724    // Register TrimStart
1725    let trim_start = Arc::new(TrimStartMethod);
1726    registry.register(Box::new(TrimStartMethod));
1727    registry.register_method(trim_start);
1728
1729    // Register TrimEnd
1730    let trim_end = Arc::new(TrimEndMethod);
1731    registry.register(Box::new(TrimEndMethod));
1732    registry.register_method(trim_end);
1733
1734    // Register Length
1735    let length = Arc::new(LengthMethod);
1736    registry.register(Box::new(LengthMethod));
1737    registry.register_method(length);
1738
1739    // Register Contains
1740    let contains = Arc::new(ContainsMethod);
1741    registry.register(Box::new(ContainsMethod));
1742    registry.register_method(contains);
1743
1744    // Register StartsWith
1745    let starts_with = Arc::new(StartsWithMethod);
1746    registry.register(Box::new(StartsWithMethod));
1747    registry.register_method(starts_with);
1748
1749    // Register EndsWith
1750    let ends_with = Arc::new(EndsWithMethod);
1751    registry.register(Box::new(EndsWithMethod));
1752    registry.register_method(ends_with);
1753
1754    // Register Substring
1755    let substring = Arc::new(SubstringMethod);
1756    registry.register(Box::new(SubstringMethod));
1757    registry.register_method(substring);
1758
1759    // Register Replace
1760    let replace = Arc::new(ReplaceMethod);
1761    registry.register(Box::new(ReplaceMethod));
1762    registry.register_method(replace);
1763
1764    // Register IndexOf/INSTR
1765    let indexof = Arc::new(IndexOfMethod);
1766    registry.register(Box::new(IndexOfMethod));
1767    registry.register_method(indexof.clone());
1768    // Also register as INSTR for SQL compatibility
1769    registry.register(Box::new(InstrFunction));
1770
1771    // Register CHR function
1772    registry.register(Box::new(ChrFunction));
1773
1774    // Register LOREM_IPSUM function
1775    registry.register(Box::new(LoremIpsumFunction));
1776}