Skip to main content

elusion/custom_error/
cust_error.rs

1use crate::prelude::*;
2use crate::normalizers::normalize::STRING_LITERAL_PATTERN;
3use crate::normalizers::normalize::AS_PATTERN;
4use crate::normalizers::normalize::FUNCTION_PATTERN;
5use crate::normalizers::normalize::SQL_KEYWORDS;
6use crate::normalizers::normalize::TABLE_COLUMN_PATTERN;
7use crate::normalizers::normalize::AGGREGATE_FUNCTIONS;
8use crate::normalizers::normalize::SIMPLE_COLUMN_PATTERN;
9use crate::normalizers::normalize::POSTGRES_CAST_PATTERN;
10use crate::normalizers::normalize::DATETIME_FUNCTIONS;
11use crate::normalizers::normalize::STRING_FUNCTIONS;
12
13#[derive(Debug)]
14pub enum ElusionError {
15
16    MissingColumnWithContext {
17        column: String,
18        available_columns: Vec<String>,
19        context: String,
20        location: String,
21        suggestion: String,
22    },
23    MissingColumn {
24        column: String,
25        available_columns: Vec<String>,
26    },
27    InvalidDataType {
28        column: String,
29        expected: String,
30        found: String,
31    },
32    DuplicateColumn {
33        column: String,
34        locations: Vec<String>,
35    },
36    InvalidOperation {
37        operation: String,
38        reason: String,
39        suggestion: String,
40    },
41    SchemaError {
42        message: String,
43        schema: Option<String>,
44        suggestion: String,
45    },
46    JoinError {
47        message: String,
48        left_table: String,
49        right_table: String,
50        suggestion: String,
51    },
52    GroupByError {
53        message: String,
54        invalid_columns: Vec<String>,
55        suggestion: String,
56        function_context: Option<String>,
57    },
58    WriteError {
59        path: String,
60        operation: String,
61        reason: String,
62        suggestion: String,
63    },
64    PartitionError {
65        message: String,
66        partition_columns: Vec<String>,
67        suggestion: String,
68    },
69    AggregationError {
70        message: String,
71        function: String,
72        column: String,
73        suggestion: String,
74    },
75    OrderByError {
76        message: String,
77        columns: Vec<String>,
78        suggestion: String,
79    },
80    WindowFunctionError {
81        message: String,
82        function: String,
83        details: String,
84        suggestion: String,
85    },
86    LimitError {
87        message: String,
88        value: u64,
89        suggestion: String,
90    },
91    SetOperationError {
92        operation: String,
93        reason: String,
94        suggestion: String,
95    },
96    GroupByAllCompatibilityError {
97        missing_columns: Vec<String>,
98        window_function_dependencies: Vec<(String, String)>, 
99        suggestion: String,
100    },
101    
102    GroupByAllWindowError {
103        missing_column: String,
104        window_function_context: String,
105        suggestion: String,
106    },
107    
108    GroupByAllDependencyError {
109        missing_column: String,
110        dependency_context: String,
111        suggestion: String,
112    },
113    DataFusion(DataFusionError),
114    Io(std::io::Error),
115    OneLakeError(String),
116    Custom(String),
117}
118
119impl fmt::Display for ElusionError {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121        match self {
122            ElusionError::MissingColumn { column, available_columns } => {
123                let suggestion = suggest_similar_column(column, available_columns);
124                write!(
125                    f,
126                    "🔍 Column Not Found: '{}' 📋 Available columns are: {} 💡 Did you mean '{}'? 🔧 Check for typos or use .display_schema() to see all available columns.",
127                    column,
128                    available_columns.join(", "),
129                    suggestion
130                )
131            },
132            ElusionError::MissingColumnWithContext { column, available_columns, context, location, suggestion } => {
133                let similar_suggestion = suggest_similar_column(column, available_columns);
134                write!(
135                    f,
136                    "🔍 Column Not Found: '{}' in {} 📍 Location: {} 🔍 Context: {} 📋 Available columns: {} 💡 Did you mean '{}'? 🔧 Suggestion: {}",
137                    column,
138                    location,
139                    location,
140                    context,
141                    available_columns.join(", "),
142                    similar_suggestion,
143                    suggestion
144                )
145            },
146            ElusionError::InvalidDataType { column, expected, found } => write!(
147                f,
148                "📊 Type Mismatch in column '{}' ❌ Found: {} ✅ Expected: {} 💡 Try: .with_column(\"{}\", cast(\"{}\", {}));",
149                column, found, expected, column, column, expected
150            ),
151            ElusionError::DuplicateColumn { column, locations } => write!(
152                f,
153                "🔄 Duplicate Column: '{}' 📍 Found in: {} 💡 Try using table aliases or renaming columns: .select([\"table1.{} as table1_{}\", \"table2.{} as table2_{}\"])",
154                column,
155                locations.join(", "),
156                column, column, column, column
157            ),
158            ElusionError::InvalidOperation { operation, reason, suggestion } => write!(
159                f,
160                "⚠️ Invalid Operation: {} ❌ Problem: {} 💡 Suggestion: {}",
161                operation, reason, suggestion
162            ),
163            ElusionError::SchemaError { message, schema, suggestion } => {
164                let schema_info = schema.as_ref().map_or(
165                    String::new(),
166                    |s| format!("📋 Current Schema:{}", s)
167                );
168                write!(
169                    f,
170                    "🏗️ Schema Error: {}{} 💡 Suggestion: {}",
171                    message, schema_info, suggestion
172                )
173            },
174            ElusionError::JoinError { message, left_table, right_table, suggestion } => write!(
175                f,
176                "🤝 Join Error: ❌ {} 📌 Left Table: {} 📌 Right Table: {} 💡 Suggestion: {}",
177                message, left_table, right_table, suggestion
178            ),
179            ElusionError::GroupByError { message, invalid_columns, suggestion, function_context } => {
180                let function_info = if let Some(context) = function_context {
181                    format!("🔧 Function Context: {}", context)
182                } else {
183                    String::new()
184                };
185                
186                write!(
187                    f,
188                    "📊 Group By Error: {} ❌ Invalid columns: {}{} 💡 Suggestion: {}",
189                    message,
190                    invalid_columns.join(", "),
191                    function_info,
192                    suggestion
193                )
194            },
195            ElusionError::GroupByAllCompatibilityError { missing_columns, window_function_dependencies, suggestion } => {
196                let deps_info = if !window_function_dependencies.is_empty() {
197                    let deps = window_function_dependencies.iter()
198                        .map(|(func, col)| format!("  • {} needs '{}'", func, col))
199                        .collect::<Vec<_>>()
200                        .join("\n");
201                    format!("🪟 Window Function Dependencies:{}", deps)
202                } else {
203                    String::new()
204                };
205                
206                write!(
207                    f,
208                    "🔧 group_by_all() Compatibility Issue. ❌ Missing columns from SELECT: {}{}/ 💡 {}",
209                    missing_columns.join(", "),
210                    deps_info,
211                    suggestion
212                )
213            },
214            
215            ElusionError::GroupByAllWindowError { missing_column, window_function_context, suggestion } => {
216                write!(
217                    f,
218                    "🪟 group_by_all() + Window Function Error. ❌ Missing column '{}' from SELECT clause. 🔍 Context: {} {}", 
219                    missing_column, window_function_context, suggestion
220                )
221            },
222            
223            ElusionError::GroupByAllDependencyError { missing_column, dependency_context, suggestion } => {
224                write!(
225                    f,
226                    "🔗 group_by_all() + Column Dependency Error. ❌ Missing column '{}' from SELECT clause. 🔍 Context: {} {}",
227                    missing_column, dependency_context, suggestion
228                )
229            },
230
231            ElusionError::WriteError { path, operation, reason, suggestion } => write!(
232                f,
233                "💾 Write Error during {} operation 📍 Path: {} ❌ Problem: {} 💡 Suggestion: {}",
234                operation, path, reason, suggestion
235            ),
236            // FIXED: Removed numbered lists from DataFusion error
237            ElusionError::DataFusion(err) => write!(
238                f,
239                "⚡ DataFusion Error: {} 💡 Don't worry! Here's what you can try: • Check your column names and types • Verify your SQL syntax • Use .df_schema() to see available columns • Try breaking down complex operations into smaller steps",
240                err
241            ),
242            // FIXED: Removed numbered lists from I/O error
243            ElusionError::Io(err) => write!(
244                f,
245                "📁 I/O Error: {} 💡 Quick fixes to try: • Check if the file/directory exists • Verify your permissions • Ensure the path is correct • Close any programs using the file",
246                err
247            ),
248            ElusionError::PartitionError { message, partition_columns, suggestion } => write!(
249                f,
250                "📦 Partition Error: {} ❌ Affected partition columns: {} 💡 Suggestion: {}",
251                message,
252                partition_columns.join(", "),
253                suggestion
254            ),
255            ElusionError::AggregationError { message, function, column, suggestion } => write!(
256                f,
257                "📊 Aggregation Error in function '{}' ❌ Problem with column '{}': {} 💡 Suggestion: {}",
258                function, column, message, suggestion
259            ),
260            ElusionError::OrderByError { message, columns, suggestion } => write!(
261                f,
262                "🔄 Order By Error: {} ❌ Problem with columns: {} 💡 Suggestion: {}",
263                message,
264                columns.join(", "),
265                suggestion
266            ),
267            ElusionError::WindowFunctionError { message, function, details, suggestion } => write!(
268                f,
269                "🪟 Window Function Error in '{}' ❌ Problem: {} 📝 Details: {} 💡 Suggestion: {}",
270                function, message, details, suggestion
271            ),
272            ElusionError::LimitError { message, value, suggestion } => write!(
273                f,
274                "🔢 Limit Error: {} ❌ Invalid limit value: {} 💡 Suggestion: {}",
275                message, value, suggestion
276            ),
277            ElusionError::SetOperationError { operation, reason, suggestion } => write!(
278                f,
279                "🔄 Set Operation Error in '{}' ❌ Problem: {} 💡 Suggestion: {}",
280                operation, reason, suggestion
281            ),
282            ElusionError::OneLakeError(msg) => write!(
283                f,
284                "🏢 OneLake Error: {} 💡 Check your OneLake URL format, authentication, and permissions",
285                msg
286            ),
287            ElusionError::Custom(err) => write!(f, "💫 {}", err),
288        }
289    }
290}
291
292impl From<DataFusionError> for ElusionError {
293    fn from(err: DataFusionError) -> Self {
294        match &err {
295            DataFusionError::SchemaError(schema_err, _context) => {
296                let error_msg = schema_err.to_string();
297                
298                if error_msg.contains("Column") && error_msg.contains("not found") {
299                    if let Some(col_name) = extract_column_name_from_error(&error_msg) {
300                        return ElusionError::MissingColumn {
301                            column: col_name,
302                            available_columns: extract_available_columns_from_error(&error_msg),
303                        };
304                    }
305                }
306                
307                if error_msg.contains("Cannot cast") {
308                    if let Some((col, expected, found)) = extract_type_info_from_error(&error_msg) {
309                        return ElusionError::InvalidDataType {
310                            column: col,
311                            expected,
312                            found,
313                        };
314                    }
315                }
316
317                if error_msg.contains("Schema") {
318                    return ElusionError::SchemaError {
319                        message: error_msg,
320                        schema: None,
321                        suggestion: "💡 Check column names and data types in your schema".to_string(),
322                    };
323                }
324
325                ElusionError::DataFusion(err)
326            },
327            DataFusionError::Plan(plan_err) => {
328                let error_msg = plan_err.to_string();
329                
330                if error_msg.contains("Duplicate column") {
331                    if let Some((col, locs)) = extract_duplicate_column_info(&error_msg) {
332                        return ElusionError::DuplicateColumn {
333                            column: col,
334                            locations: locs,
335                        };
336                    }
337                }
338
339                if error_msg.contains("JOIN") {
340                    return ElusionError::JoinError {
341                        message: error_msg.clone(),
342                        left_table: "unknown".to_string(),
343                        right_table: "unknown".to_string(),
344                        suggestion: "💡 Check join conditions and table names".to_string(),
345                    };
346                }
347
348                ElusionError::DataFusion(err)
349            },
350            DataFusionError::Execution(exec_err) => {
351                let error_msg = exec_err.to_string();
352
353                if error_msg.contains("aggregate") || error_msg.contains("SUM") || 
354                error_msg.contains("AVG") || error_msg.contains("COUNT") {
355                 if let Some((func, col)) = extract_aggregation_error(&error_msg) {
356                     return ElusionError::AggregationError {
357                         message: error_msg.clone(),
358                         function: func,
359                         column: col,
360                         suggestion: "💡 Verify aggregation function syntax and column data types".to_string(),
361                     };
362                 }
363             }
364                if error_msg.contains("GROUP BY") {
365                    let missing_col = extract_missing_column(&error_msg).unwrap_or("unknown".to_string());
366                    let function_context = detect_function_usage_in_error(&error_msg, &missing_col);
367                    
368                    return ElusionError::GroupByError {
369                        message: error_msg.clone(),
370                        invalid_columns: if missing_col != "unknown" { vec![missing_col.clone()] } else { Vec::new() },
371                        function_context: function_context.clone(),
372                        suggestion: generate_enhanced_groupby_suggestion(&missing_col, function_context.as_deref()),
373                    };
374                }
375
376                if error_msg.contains("PARTITION BY") {
377                    return ElusionError::PartitionError {
378                        message: error_msg.clone(),
379                        partition_columns: Vec::new(),
380                        suggestion: "💡 Check partition column names and data types".to_string(),
381                    };
382                }
383
384                if error_msg.contains("ORDER BY") {
385                    return ElusionError::OrderByError {
386                        message: error_msg.clone(),
387                        columns: Vec::new(),
388                        suggestion: "💡 Verify column names and sort directions".to_string(),
389                    };
390                }
391
392                if error_msg.contains("OVER") || error_msg.contains("window") {
393                    if let Some((func, details)) = extract_window_function_error(&error_msg) {
394                        return ElusionError::WindowFunctionError {
395                            message: error_msg.clone(),
396                            function: func,
397                            details,
398                            suggestion: "💡 Check window function syntax and parameters".to_string(),
399                        };
400                    }
401                }
402
403                if error_msg.contains("LIMIT") {
404                    return ElusionError::LimitError {
405                        message: error_msg.clone(),
406                        value: 0,
407                        suggestion: "💡 Ensure limit value is a positive integer".to_string(),
408                    };
409                }
410
411                if error_msg.contains("UNION") || error_msg.contains("INTERSECT") || error_msg.contains("EXCEPT") {
412                    return ElusionError::SetOperationError {
413                        operation: "Set Operation".to_string(),
414                        reason: error_msg.clone(),
415                        suggestion: "💡 Ensure both sides of the operation have compatible schemas".to_string(),
416                    };
417                }
418
419                ElusionError::DataFusion(err)
420            },
421            DataFusionError::NotImplemented(msg) => {
422                ElusionError::InvalidOperation {
423                    operation: "Operation not supported".to_string(),
424                    reason: msg.clone(),
425                    suggestion: "💡 Try using an alternative approach or check documentation for supported features".to_string(),
426                }
427            },
428            DataFusionError::Internal(msg) => {
429                ElusionError::Custom(format!("Internal error: {}. Please report this issue.", msg))
430            },
431            _ => ElusionError::DataFusion(err)
432        }
433    }
434}
435
436fn extract_window_function_error(err: &str) -> Option<(String, String)> {
437    let re = Regex::new(r"Window function '([^']+)' error: (.+)").ok()?;
438    let caps = re.captures(err)?;
439    Some((
440        caps.get(1)?.as_str().to_string(),
441        caps.get(2)?.as_str().to_string(),
442    ))
443}
444
445fn extract_aggregation_error(err: &str) -> Option<(String, String)> {
446    let re = Regex::new(r"Aggregate function '([^']+)' error on column '([^']+)'").ok()?;
447    let caps = re.captures(err)?;
448    Some((
449        caps.get(1)?.as_str().to_string(),
450        caps.get(2)?.as_str().to_string(),
451    ))
452}
453// Helper functions for error parsing
454fn extract_column_name_from_error(err: &str) -> Option<String> {
455    let re = Regex::new(r"Column '([^']+)'").ok()?;
456    re.captures(err)?.get(1).map(|m| m.as_str().to_string())
457}
458
459fn extract_available_columns_from_error(err: &str) -> Vec<String> {
460    if let Some(re) = Regex::new(r"Available fields are: \[(.*?)\]").ok() {
461        if let Some(caps) = re.captures(err) {
462            if let Some(fields) = caps.get(1) {
463                return fields.as_str()
464                    .split(',')
465                    .map(|s| s.trim().trim_matches('\'').to_string())
466                    .collect();
467            }
468        }
469    }
470    Vec::new()
471}
472
473fn extract_type_info_from_error(err: &str) -> Option<(String, String, String)> {
474    let re = Regex::new(r"Cannot cast column '([^']+)' from ([^ ]+) to ([^ ]+)").ok()?;
475    let caps = re.captures(err)?;
476    Some((
477        caps.get(1)?.as_str().to_string(),
478        caps.get(3)?.as_str().to_string(),
479        caps.get(2)?.as_str().to_string(),
480    ))
481}
482
483fn extract_duplicate_column_info(err: &str) -> Option<(String, Vec<String>)> {
484    let re = Regex::new(r"Duplicate column '([^']+)' in schema: \[(.*?)\]").ok()?;
485    let caps = re.captures(err)?;
486    Some((
487        caps.get(1)?.as_str().to_string(),
488        caps.get(2)?
489            .as_str()
490            .split(',')
491            .map(|s| s.trim().to_string())
492            .collect()
493    ))
494}
495
496// Helper function to suggest similar column names using basic string similarity
497fn suggest_similar_column(target: &str, available: &[String]) -> String {
498    available
499        .iter()
500        .map(|col| (col, string_similarity(target, col)))
501        .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
502        .map(|(col, _)| col.clone())
503        .unwrap_or_else(|| "".to_string())
504}
505
506// Simple string similarity function (you might want to use a proper crate like 'strsim' in production)
507fn string_similarity(s1: &str, s2: &str) -> f64 {
508    let s1_lower = s1.to_lowercase();
509    let s2_lower = s2.to_lowercase();
510    
511    // Check for exact prefix match
512    if s1_lower.starts_with(&s2_lower) || s2_lower.starts_with(&s1_lower) {
513        return 0.9;
514    }
515    
516    // Check for common substring
517    let common_len = s1_lower.chars()
518        .zip(s2_lower.chars())
519        .take_while(|(c1, c2)| c1 == c2)
520        .count() as f64;
521    
522    if common_len > 0.0 {
523        return common_len / s1_lower.len().max(s2_lower.len()) as f64;
524    }
525    
526    // Fall back to character frequency similarity
527    let max_len = s1_lower.len().max(s2_lower.len()) as f64;
528    let common_chars = s1_lower.chars()
529        .filter(|c| s2_lower.contains(*c))
530        .count() as f64;
531    
532    common_chars / max_len
533}
534
535impl Error for ElusionError {}
536
537impl From<std::io::Error> for ElusionError {
538    fn from(err: std::io::Error) -> Self {
539        ElusionError::Io(err)
540    }
541}
542
543pub type ElusionResult<T> = Result<T, ElusionError>;
544
545pub fn extract_table_from_join_error(error: &str) -> Option<String> {
546
547    for cap in STRING_LITERAL_PATTERN.captures_iter(error) {
548        if let Some(quoted_text) = cap.get(1) {
549            let text = quoted_text.as_str();
550            // Filter out common non-table words
551            if !SQL_KEYWORDS.contains(&text.to_uppercase().as_str()) && 
552               !text.chars().all(|c| c.is_numeric()) {
553                return Some(text.to_string());
554            }
555        }
556    }
557    
558    if let Some(cap) = TABLE_COLUMN_PATTERN.captures(error) {
559        if let Some(table_part) = cap.get(1) {
560            let table = table_part.as_str();
561            // Return if it's not a SQL keyword
562            if !SQL_KEYWORDS.contains(&table.to_uppercase().as_str()) {
563                return Some(table.to_string());
564            }
565        }
566    }
567    
568    let error_lower = error.to_lowercase();
569    if error_lower.contains("table") && error_lower.contains("not found") {
570        // Try to find the table name near "not found"
571        if let Some(start) = error_lower.find("table") {
572            let remaining = &error[start..];
573            if let Some(cap) = STRING_LITERAL_PATTERN.captures(remaining) {
574                if let Some(table_name) = cap.get(1) {
575                    return Some(table_name.as_str().to_string());
576                }
577            }
578        }
579    }
580    
581    None
582}
583
584pub fn extract_column_from_agg_error(error: &str) -> Option<String> {
585    
586    if let Some(cap) = FUNCTION_PATTERN.captures(error) {
587        if let Some(func_name) = cap.get(1) {
588            if AGGREGATE_FUNCTIONS.contains(&func_name.as_str().to_uppercase().as_str()) {
589                if let Some(args) = cap.get(2) {
590                    let arg_str = args.as_str().trim();
591                    
592                    // Check if it's a table.column reference
593                    if let Some(table_col_cap) = TABLE_COLUMN_PATTERN.captures(arg_str) {
594                        if let Some(column_part) = table_col_cap.get(2) {
595                            return Some(column_part.as_str().to_string());
596                        }
597                    }
598                    
599                    // Check if it's a simple column
600                    if SIMPLE_COLUMN_PATTERN.is_match(arg_str) && 
601                       !SQL_KEYWORDS.contains(&arg_str.to_uppercase().as_str()) {
602                        return Some(arg_str.to_string());
603                    }
604                }
605            }
606        }
607    }
608    
609    for cap in STRING_LITERAL_PATTERN.captures_iter(error) {
610        if let Some(quoted_text) = cap.get(1) {
611            let text = quoted_text.as_str();
612            // Check if it looks like a column name (not a SQL keyword or pure number)
613            if SIMPLE_COLUMN_PATTERN.is_match(text) && 
614               !SQL_KEYWORDS.contains(&text.to_uppercase().as_str()) &&
615               !text.chars().all(|c| c.is_numeric()) {
616                return Some(text.to_string());
617            }
618        }
619    }
620    
621    if let Some(cap) = TABLE_COLUMN_PATTERN.captures(error) {
622        if let Some(column_part) = cap.get(2) {
623            return Some(column_part.as_str().to_string());
624        }
625    }
626    
627    if let Some(cap) = POSTGRES_CAST_PATTERN.captures(error) {
628        if let Some(column_expr) = cap.get(1) {
629            let expr = column_expr.as_str();
630            // If it's table.column, extract just the column part
631            if let Some(dot_pos) = expr.rfind('.') {
632                return Some(expr[dot_pos + 1..].to_string());
633            } else {
634                return Some(expr.to_string());
635            }
636        }
637    }
638    
639    None
640}
641
642pub fn extract_function_from_error(error: &str) -> Option<String> {
643
644    if let Some(cap) = FUNCTION_PATTERN.captures(error) {
645        if let Some(func_name) = cap.get(1) {
646            let func = func_name.as_str().to_uppercase();
647            
648            // Check if it's an aggregate function
649            if AGGREGATE_FUNCTIONS.contains(&func.as_str()) {
650                return Some(func);
651            }
652            
653            // Check if it's a datetime function
654            if DATETIME_FUNCTIONS.contains(&func.as_str()) {
655                return Some(func);
656            }
657        }
658    }
659    
660    // Fallback: look for any aggregate function names in the error text
661    for &func in AGGREGATE_FUNCTIONS.iter() {
662        if error.to_uppercase().contains(func) {
663            return Some(func.to_string());
664        }
665    }
666    
667    // Check datetime functions too
668    for &func in DATETIME_FUNCTIONS.iter() {
669        if error.to_uppercase().contains(func) {
670            return Some(func.to_string());
671        }
672    }
673    
674    None
675}
676
677
678    pub fn extract_missing_column(error: &str) -> Option<String> {
679        let error_lower = error.to_lowercase();
680        
681        // Pattern 1: "Expression X could not be resolved"
682        if error_lower.contains("expression") && error_lower.contains("could not be resolved") {
683            if let Some(start) = error_lower.find("expression ") {
684                let remaining = &error[start + 11..];
685                if let Some(end) = remaining.find(" could not be resolved") {
686                    let expr = remaining[..end].trim();
687                    
688                    if let Some(cap) = TABLE_COLUMN_PATTERN.captures(expr) {
689                        if let Some(column_part) = cap.get(2) {
690                            return Some(column_part.as_str().to_string());
691                        }
692                    }
693                    
694                    if SIMPLE_COLUMN_PATTERN.is_match(expr) {
695                        return Some(expr.to_string());
696                    }
697                }
698            }
699        }
700        
701        if error_lower.contains("no field named") {
702            if let Some(start) = error_lower.find("no field named") {
703                let remaining = &error[start..];
704                // Look for quoted field name
705                if let Some(cap) = regex::Regex::new(r"'([^']+)'").unwrap().captures(remaining) {
706                    return Some(cap.get(1)?.as_str().to_string());
707                }
708            }
709        }
710        
711        if error_lower.contains("over") && error_lower.contains("could not be resolved") {
712            // Look for pattern like "PARTITION BY region" or "ORDER BY mesto"
713            if let Some(cap) = regex::Regex::new(r"(partition by|order by)\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap().captures(&error_lower) {
714                return Some(cap.get(2)?.as_str().to_string());
715            }
716        }
717        
718        None
719    }
720
721    
722
723    pub fn extract_column_from_duplicate_error(error: &str) -> Option<String> {
724
725        if error.to_lowercase().contains("duplicate") && error.to_lowercase().contains("field name") {
726            if let Some(start) = error.to_lowercase().find("field name") {
727                let remaining = &error[start + 10..]; 
728                
729                if let Some(cap) = TABLE_COLUMN_PATTERN.captures(remaining) {
730                    if let Some(column_part) = cap.get(2) {
731                        return Some(column_part.as_str().to_string());
732                    }
733                }
734                
735                if let Some(cap) = SIMPLE_COLUMN_PATTERN.captures(remaining) {
736                    let potential_column = cap.get(0)?.as_str();
737                    if !SQL_KEYWORDS.contains(&potential_column.to_uppercase().as_str()) {
738                        return Some(potential_column.to_string());
739                    }
740                }
741            }
742        }
743        
744        None
745    }
746
747    pub fn extract_column_from_projection_error(error: &str) -> Option<String> {
748        //  "expression \"table.column AS alias\" at position X"
749        if error.contains("expression") && error.contains("at position") {
750            // Look for quoted expressions
751            for cap in STRING_LITERAL_PATTERN.captures_iter(error) {
752                if let Some(quoted_expr) = cap.get(1) {
753                    let expr = quoted_expr.as_str();
754                    
755                    // Check if it contains AS clause using your AS_PATTERN
756                    if AS_PATTERN.is_match(expr) {
757                        // Split by AS and get the alias part after AS
758                        if let Some(as_match) = AS_PATTERN.find(expr) {
759                            let alias_part = expr[as_match.end()..].trim();
760                            if SIMPLE_COLUMN_PATTERN.is_match(alias_part) {
761                                return Some(alias_part.to_string());
762                            }
763                        }
764                    } else {
765                        // No AS clause, extract column from table.column
766                        if let Some(table_col_cap) = TABLE_COLUMN_PATTERN.captures(expr) {
767                            if let Some(column_part) = table_col_cap.get(2) {
768                                return Some(column_part.as_str().to_string());
769                            }
770                        }
771                    }
772                }
773            }
774        }
775        
776        None
777    }
778
779    pub fn generate_enhanced_groupby_suggestion(missing_column: &str, function_context: Option<&str>) -> String {
780        if let Some(context) = function_context {
781            let function_type = if context.contains("string function") {
782                "string function"
783            } else if context.contains("datetime function") {
784                "datetime function"  
785            } else if context.contains("CASE expression") {
786                "CASE expression"
787            } else {
788                "function"
789            };
790            
791            format!(
792                "Column '{}' is referenced in a {} but missing from GROUP BY.
793                🔧 Solutions:
794                [1] Add '{}' to .select([...]) then use .group_by_all() Example: .select([\"existing_cols\", \"{}\"]).group_by_all()
795                [2] Add '{}' manually to .group_by([...])
796                [3] Use manual GROUP BY for complex function dependencies  Example: .group_by([\"col1\", \"col2\", \"{}\"])",
797                missing_column, function_type, missing_column, missing_column, missing_column, missing_column
798            )
799        } else {
800            "💡 Use .group_by_all() to automatically include all SELECT columns in GROUP BY, or manually add missing columns to .group_by([...])".to_string()
801        }
802    }
803
804    pub fn detect_function_usage_in_error(error: &str, missing_column: &str) -> Option<String> {
805        let error_upper = error.to_uppercase();
806        let column_upper = missing_column.to_uppercase();
807
808        if error_upper.contains("PROJECTION REFERENCES NON-AGGREGATE VALUES") {
809            return Some(format!("Column '{}' is used in a window function but not selected", missing_column));
810        }
811        
812        if error_upper.contains("ORDER BY") && error_upper.contains(&column_upper) {
813            return Some(format!("Column '{}' is used in ORDER BY clause of window function", missing_column));
814        }
815        
816        if error_upper.contains("PARTITION BY") && error_upper.contains(&column_upper) {
817            return Some(format!("Column '{}' is used in PARTITION BY clause of window function", missing_column));
818        }
819        
820        if error_upper.contains("OVER") {
821            return Some(format!("Column '{}' is used in window function", missing_column));
822        }
823        
824        for &func in STRING_FUNCTIONS.iter() {
825            let patterns = [
826                format!("{}({})", func, column_upper),
827                format!("{}({}", func, column_upper),  
828                format!("{}(.*{}.*)", func, column_upper), 
829            ];
830            
831            for pattern in &patterns {
832                if error_upper.contains(pattern) {
833                    return Some(format!("Column '{}' is used in {}() string function", missing_column, func));
834                }
835            }
836        }
837        
838        for &func in DATETIME_FUNCTIONS.iter() {
839            let patterns = [
840                format!("{}({})", func, column_upper),
841                format!("{}({}", func, column_upper),
842                format!("{}(.*{}.*)", func, column_upper),
843            ];
844            
845            for pattern in &patterns {
846                if error_upper.contains(pattern) {
847                    return Some(format!("Column '{}' is used in {}() datetime function", missing_column, func));
848                }
849            }
850        }
851        
852        if error_upper.contains("CASE") && error_upper.contains(&column_upper) {
853            return Some(format!("Column '{}' is used in CASE expression", missing_column));
854        }
855        
856        for &func in AGGREGATE_FUNCTIONS.iter() {
857            let patterns = [
858                format!("{}({})", func, column_upper),
859                format!("{}({}", func, column_upper),
860            ];
861            
862            for pattern in &patterns {
863                if error_upper.contains(pattern) {
864                    return Some(format!("Column '{}' is used in {}() aggregate function", missing_column, func));
865                }
866            }
867        }
868        
869        None
870    }
871
872    pub fn extract_window_function_columns(error: &str) -> Vec<String> {
873        let mut columns = Vec::new();
874        let error_upper = error.to_uppercase();
875        
876        if let Some(cap) = regex::Regex::new(r"PARTITION BY\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap().captures(&error_upper) {
877            if let Some(col) = cap.get(1) {
878                columns.push(col.as_str().to_lowercase());
879            }
880        }
881        
882        if let Some(cap) = regex::Regex::new(r"ORDER BY\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap().captures(&error_upper) {
883            if let Some(col) = cap.get(1) {
884                let col_name = col.as_str().to_lowercase();
885                if !columns.contains(&col_name) {
886                    columns.push(col_name);
887                }
888            }
889        }
890        
891        if columns.is_empty() {
892            if let Some(col) = extract_missing_column(error) {
893                columns.push(col);
894            }
895        }
896        
897        columns
898    }
899
900    pub fn extract_window_function_name(error: &str) -> Option<String> {
901        let error_upper = error.to_uppercase();
902        
903        // Common window functions
904        let window_functions = [
905            "ROW_NUMBER", "RANK", "DENSE_RANK", "NTILE", "PERCENT_RANK", "CUME_DIST",
906            "LAG", "LEAD", "FIRST_VALUE", "LAST_VALUE", "NTH_VALUE"
907        ];
908        
909        for func in &window_functions {
910            if error_upper.contains(&format!("{}(", func)) {
911                return Some(func.to_string());
912            }
913        }
914        
915        if error_upper.contains("OVER") {
916            for func in ["SUM", "AVG", "COUNT", "MIN", "MAX"] {
917                if error_upper.contains(&format!("{}(", func)) {
918                    return Some(format!("{} (window)", func));
919                }
920            }
921        }
922        
923        Some("WINDOW_FUNCTION".to_string())
924    }
925
926// SPECIFIC ERRPR 
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931    
932    #[test]
933    fn test_extract_with_existing_patterns() {
934 
935        assert_eq!(
936            extract_function_from_error("SUM(s.orderquantity) failed"),
937            Some("SUM".to_string())
938        );
939        
940        assert_eq!(
941            extract_column_from_agg_error("SUM(customer.total_amount) type error"),
942            Some("total_amount".to_string())
943        );
944        
945        assert_eq!(
946            extract_table_from_join_error("Table 'customers' not found in join"),
947            Some("customers".to_string())
948        );
949        
950        assert_eq!(
951            extract_missing_column("Expression s.customerkey could not be resolved from available columns"),
952            Some("customerkey".to_string())
953        );
954    }
955}