1use crate::prelude::*;
2use crate::normalizers::normalize::STRING_LITERAL_PATTERN;
3use crate::normalizers::normalize::AS_PATTERN;
4use crate::normalizers::normalize::FUNCTION_PATTERN;
5use crate::normalizers::normalize::SQL_KEYWORDS;
6use crate::normalizers::normalize::TABLE_COLUMN_PATTERN;
7use crate::normalizers::normalize::AGGREGATE_FUNCTIONS;
8use crate::normalizers::normalize::SIMPLE_COLUMN_PATTERN;
9use crate::normalizers::normalize::POSTGRES_CAST_PATTERN;
10use crate::normalizers::normalize::DATETIME_FUNCTIONS;
11use crate::normalizers::normalize::STRING_FUNCTIONS;
12
13#[derive(Debug)]
14pub enum ElusionError {
15
16 MissingColumnWithContext {
17 column: String,
18 available_columns: Vec<String>,
19 context: String,
20 location: String,
21 suggestion: String,
22 },
23 MissingColumn {
24 column: String,
25 available_columns: Vec<String>,
26 },
27 InvalidDataType {
28 column: String,
29 expected: String,
30 found: String,
31 },
32 DuplicateColumn {
33 column: String,
34 locations: Vec<String>,
35 },
36 InvalidOperation {
37 operation: String,
38 reason: String,
39 suggestion: String,
40 },
41 SchemaError {
42 message: String,
43 schema: Option<String>,
44 suggestion: String,
45 },
46 JoinError {
47 message: String,
48 left_table: String,
49 right_table: String,
50 suggestion: String,
51 },
52 GroupByError {
53 message: String,
54 invalid_columns: Vec<String>,
55 suggestion: String,
56 function_context: Option<String>,
57 },
58 WriteError {
59 path: String,
60 operation: String,
61 reason: String,
62 suggestion: String,
63 },
64 PartitionError {
65 message: String,
66 partition_columns: Vec<String>,
67 suggestion: String,
68 },
69 AggregationError {
70 message: String,
71 function: String,
72 column: String,
73 suggestion: String,
74 },
75 OrderByError {
76 message: String,
77 columns: Vec<String>,
78 suggestion: String,
79 },
80 WindowFunctionError {
81 message: String,
82 function: String,
83 details: String,
84 suggestion: String,
85 },
86 LimitError {
87 message: String,
88 value: u64,
89 suggestion: String,
90 },
91 SetOperationError {
92 operation: String,
93 reason: String,
94 suggestion: String,
95 },
96 GroupByAllCompatibilityError {
97 missing_columns: Vec<String>,
98 window_function_dependencies: Vec<(String, String)>,
99 suggestion: String,
100 },
101
102 GroupByAllWindowError {
103 missing_column: String,
104 window_function_context: String,
105 suggestion: String,
106 },
107
108 GroupByAllDependencyError {
109 missing_column: String,
110 dependency_context: String,
111 suggestion: String,
112 },
113 DataFusion(DataFusionError),
114 Io(std::io::Error),
115 OneLakeError(String),
116 Custom(String),
117}
118
119impl fmt::Display for ElusionError {
120 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121 match self {
122 ElusionError::MissingColumn { column, available_columns } => {
123 let suggestion = suggest_similar_column(column, available_columns);
124 write!(
125 f,
126 "🔍 Column Not Found: '{}' 📋 Available columns are: {} 💡 Did you mean '{}'? 🔧 Check for typos or use .display_schema() to see all available columns.",
127 column,
128 available_columns.join(", "),
129 suggestion
130 )
131 },
132 ElusionError::MissingColumnWithContext { column, available_columns, context, location, suggestion } => {
133 let similar_suggestion = suggest_similar_column(column, available_columns);
134 write!(
135 f,
136 "🔍 Column Not Found: '{}' in {} 📍 Location: {} 🔍 Context: {} 📋 Available columns: {} 💡 Did you mean '{}'? 🔧 Suggestion: {}",
137 column,
138 location,
139 location,
140 context,
141 available_columns.join(", "),
142 similar_suggestion,
143 suggestion
144 )
145 },
146 ElusionError::InvalidDataType { column, expected, found } => write!(
147 f,
148 "📊 Type Mismatch in column '{}' ❌ Found: {} ✅ Expected: {} 💡 Try: .with_column(\"{}\", cast(\"{}\", {}));",
149 column, found, expected, column, column, expected
150 ),
151 ElusionError::DuplicateColumn { column, locations } => write!(
152 f,
153 "🔄 Duplicate Column: '{}' 📍 Found in: {} 💡 Try using table aliases or renaming columns: .select([\"table1.{} as table1_{}\", \"table2.{} as table2_{}\"])",
154 column,
155 locations.join(", "),
156 column, column, column, column
157 ),
158 ElusionError::InvalidOperation { operation, reason, suggestion } => write!(
159 f,
160 "⚠️ Invalid Operation: {} ❌ Problem: {} 💡 Suggestion: {}",
161 operation, reason, suggestion
162 ),
163 ElusionError::SchemaError { message, schema, suggestion } => {
164 let schema_info = schema.as_ref().map_or(
165 String::new(),
166 |s| format!("📋 Current Schema:{}", s)
167 );
168 write!(
169 f,
170 "🏗️ Schema Error: {}{} 💡 Suggestion: {}",
171 message, schema_info, suggestion
172 )
173 },
174 ElusionError::JoinError { message, left_table, right_table, suggestion } => write!(
175 f,
176 "🤝 Join Error: ❌ {} 📌 Left Table: {} 📌 Right Table: {} 💡 Suggestion: {}",
177 message, left_table, right_table, suggestion
178 ),
179 ElusionError::GroupByError { message, invalid_columns, suggestion, function_context } => {
180 let function_info = if let Some(context) = function_context {
181 format!("🔧 Function Context: {}", context)
182 } else {
183 String::new()
184 };
185
186 write!(
187 f,
188 "📊 Group By Error: {} ❌ Invalid columns: {}{} 💡 Suggestion: {}",
189 message,
190 invalid_columns.join(", "),
191 function_info,
192 suggestion
193 )
194 },
195 ElusionError::GroupByAllCompatibilityError { missing_columns, window_function_dependencies, suggestion } => {
196 let deps_info = if !window_function_dependencies.is_empty() {
197 let deps = window_function_dependencies.iter()
198 .map(|(func, col)| format!(" • {} needs '{}'", func, col))
199 .collect::<Vec<_>>()
200 .join("\n");
201 format!("🪟 Window Function Dependencies:{}", deps)
202 } else {
203 String::new()
204 };
205
206 write!(
207 f,
208 "🔧 group_by_all() Compatibility Issue. ❌ Missing columns from SELECT: {}{}/ 💡 {}",
209 missing_columns.join(", "),
210 deps_info,
211 suggestion
212 )
213 },
214
215 ElusionError::GroupByAllWindowError { missing_column, window_function_context, suggestion } => {
216 write!(
217 f,
218 "🪟 group_by_all() + Window Function Error. ❌ Missing column '{}' from SELECT clause. 🔍 Context: {} {}",
219 missing_column, window_function_context, suggestion
220 )
221 },
222
223 ElusionError::GroupByAllDependencyError { missing_column, dependency_context, suggestion } => {
224 write!(
225 f,
226 "🔗 group_by_all() + Column Dependency Error. ❌ Missing column '{}' from SELECT clause. 🔍 Context: {} {}",
227 missing_column, dependency_context, suggestion
228 )
229 },
230
231 ElusionError::WriteError { path, operation, reason, suggestion } => write!(
232 f,
233 "💾 Write Error during {} operation 📍 Path: {} ❌ Problem: {} 💡 Suggestion: {}",
234 operation, path, reason, suggestion
235 ),
236 ElusionError::DataFusion(err) => write!(
238 f,
239 "⚡ DataFusion Error: {} 💡 Don't worry! Here's what you can try: • Check your column names and types • Verify your SQL syntax • Use .df_schema() to see available columns • Try breaking down complex operations into smaller steps",
240 err
241 ),
242 ElusionError::Io(err) => write!(
244 f,
245 "📁 I/O Error: {} 💡 Quick fixes to try: • Check if the file/directory exists • Verify your permissions • Ensure the path is correct • Close any programs using the file",
246 err
247 ),
248 ElusionError::PartitionError { message, partition_columns, suggestion } => write!(
249 f,
250 "📦 Partition Error: {} ❌ Affected partition columns: {} 💡 Suggestion: {}",
251 message,
252 partition_columns.join(", "),
253 suggestion
254 ),
255 ElusionError::AggregationError { message, function, column, suggestion } => write!(
256 f,
257 "📊 Aggregation Error in function '{}' ❌ Problem with column '{}': {} 💡 Suggestion: {}",
258 function, column, message, suggestion
259 ),
260 ElusionError::OrderByError { message, columns, suggestion } => write!(
261 f,
262 "🔄 Order By Error: {} ❌ Problem with columns: {} 💡 Suggestion: {}",
263 message,
264 columns.join(", "),
265 suggestion
266 ),
267 ElusionError::WindowFunctionError { message, function, details, suggestion } => write!(
268 f,
269 "🪟 Window Function Error in '{}' ❌ Problem: {} 📝 Details: {} 💡 Suggestion: {}",
270 function, message, details, suggestion
271 ),
272 ElusionError::LimitError { message, value, suggestion } => write!(
273 f,
274 "🔢 Limit Error: {} ❌ Invalid limit value: {} 💡 Suggestion: {}",
275 message, value, suggestion
276 ),
277 ElusionError::SetOperationError { operation, reason, suggestion } => write!(
278 f,
279 "🔄 Set Operation Error in '{}' ❌ Problem: {} 💡 Suggestion: {}",
280 operation, reason, suggestion
281 ),
282 ElusionError::OneLakeError(msg) => write!(
283 f,
284 "🏢 OneLake Error: {} 💡 Check your OneLake URL format, authentication, and permissions",
285 msg
286 ),
287 ElusionError::Custom(err) => write!(f, "💫 {}", err),
288 }
289 }
290}
291
292impl From<DataFusionError> for ElusionError {
293 fn from(err: DataFusionError) -> Self {
294 match &err {
295 DataFusionError::SchemaError(schema_err, _context) => {
296 let error_msg = schema_err.to_string();
297
298 if error_msg.contains("Column") && error_msg.contains("not found") {
299 if let Some(col_name) = extract_column_name_from_error(&error_msg) {
300 return ElusionError::MissingColumn {
301 column: col_name,
302 available_columns: extract_available_columns_from_error(&error_msg),
303 };
304 }
305 }
306
307 if error_msg.contains("Cannot cast") {
308 if let Some((col, expected, found)) = extract_type_info_from_error(&error_msg) {
309 return ElusionError::InvalidDataType {
310 column: col,
311 expected,
312 found,
313 };
314 }
315 }
316
317 if error_msg.contains("Schema") {
318 return ElusionError::SchemaError {
319 message: error_msg,
320 schema: None,
321 suggestion: "💡 Check column names and data types in your schema".to_string(),
322 };
323 }
324
325 ElusionError::DataFusion(err)
326 },
327 DataFusionError::Plan(plan_err) => {
328 let error_msg = plan_err.to_string();
329
330 if error_msg.contains("Duplicate column") {
331 if let Some((col, locs)) = extract_duplicate_column_info(&error_msg) {
332 return ElusionError::DuplicateColumn {
333 column: col,
334 locations: locs,
335 };
336 }
337 }
338
339 if error_msg.contains("JOIN") {
340 return ElusionError::JoinError {
341 message: error_msg.clone(),
342 left_table: "unknown".to_string(),
343 right_table: "unknown".to_string(),
344 suggestion: "💡 Check join conditions and table names".to_string(),
345 };
346 }
347
348 ElusionError::DataFusion(err)
349 },
350 DataFusionError::Execution(exec_err) => {
351 let error_msg = exec_err.to_string();
352
353 if error_msg.contains("aggregate") || error_msg.contains("SUM") ||
354 error_msg.contains("AVG") || error_msg.contains("COUNT") {
355 if let Some((func, col)) = extract_aggregation_error(&error_msg) {
356 return ElusionError::AggregationError {
357 message: error_msg.clone(),
358 function: func,
359 column: col,
360 suggestion: "💡 Verify aggregation function syntax and column data types".to_string(),
361 };
362 }
363 }
364 if error_msg.contains("GROUP BY") {
365 let missing_col = extract_missing_column(&error_msg).unwrap_or("unknown".to_string());
366 let function_context = detect_function_usage_in_error(&error_msg, &missing_col);
367
368 return ElusionError::GroupByError {
369 message: error_msg.clone(),
370 invalid_columns: if missing_col != "unknown" { vec![missing_col.clone()] } else { Vec::new() },
371 function_context: function_context.clone(),
372 suggestion: generate_enhanced_groupby_suggestion(&missing_col, function_context.as_deref()),
373 };
374 }
375
376 if error_msg.contains("PARTITION BY") {
377 return ElusionError::PartitionError {
378 message: error_msg.clone(),
379 partition_columns: Vec::new(),
380 suggestion: "💡 Check partition column names and data types".to_string(),
381 };
382 }
383
384 if error_msg.contains("ORDER BY") {
385 return ElusionError::OrderByError {
386 message: error_msg.clone(),
387 columns: Vec::new(),
388 suggestion: "💡 Verify column names and sort directions".to_string(),
389 };
390 }
391
392 if error_msg.contains("OVER") || error_msg.contains("window") {
393 if let Some((func, details)) = extract_window_function_error(&error_msg) {
394 return ElusionError::WindowFunctionError {
395 message: error_msg.clone(),
396 function: func,
397 details,
398 suggestion: "💡 Check window function syntax and parameters".to_string(),
399 };
400 }
401 }
402
403 if error_msg.contains("LIMIT") {
404 return ElusionError::LimitError {
405 message: error_msg.clone(),
406 value: 0,
407 suggestion: "💡 Ensure limit value is a positive integer".to_string(),
408 };
409 }
410
411 if error_msg.contains("UNION") || error_msg.contains("INTERSECT") || error_msg.contains("EXCEPT") {
412 return ElusionError::SetOperationError {
413 operation: "Set Operation".to_string(),
414 reason: error_msg.clone(),
415 suggestion: "💡 Ensure both sides of the operation have compatible schemas".to_string(),
416 };
417 }
418
419 ElusionError::DataFusion(err)
420 },
421 DataFusionError::NotImplemented(msg) => {
422 ElusionError::InvalidOperation {
423 operation: "Operation not supported".to_string(),
424 reason: msg.clone(),
425 suggestion: "💡 Try using an alternative approach or check documentation for supported features".to_string(),
426 }
427 },
428 DataFusionError::Internal(msg) => {
429 ElusionError::Custom(format!("Internal error: {}. Please report this issue.", msg))
430 },
431 _ => ElusionError::DataFusion(err)
432 }
433 }
434}
435
436fn extract_window_function_error(err: &str) -> Option<(String, String)> {
437 let re = Regex::new(r"Window function '([^']+)' error: (.+)").ok()?;
438 let caps = re.captures(err)?;
439 Some((
440 caps.get(1)?.as_str().to_string(),
441 caps.get(2)?.as_str().to_string(),
442 ))
443}
444
445fn extract_aggregation_error(err: &str) -> Option<(String, String)> {
446 let re = Regex::new(r"Aggregate function '([^']+)' error on column '([^']+)'").ok()?;
447 let caps = re.captures(err)?;
448 Some((
449 caps.get(1)?.as_str().to_string(),
450 caps.get(2)?.as_str().to_string(),
451 ))
452}
453fn extract_column_name_from_error(err: &str) -> Option<String> {
455 let re = Regex::new(r"Column '([^']+)'").ok()?;
456 re.captures(err)?.get(1).map(|m| m.as_str().to_string())
457}
458
459fn extract_available_columns_from_error(err: &str) -> Vec<String> {
460 if let Some(re) = Regex::new(r"Available fields are: \[(.*?)\]").ok() {
461 if let Some(caps) = re.captures(err) {
462 if let Some(fields) = caps.get(1) {
463 return fields.as_str()
464 .split(',')
465 .map(|s| s.trim().trim_matches('\'').to_string())
466 .collect();
467 }
468 }
469 }
470 Vec::new()
471}
472
473fn extract_type_info_from_error(err: &str) -> Option<(String, String, String)> {
474 let re = Regex::new(r"Cannot cast column '([^']+)' from ([^ ]+) to ([^ ]+)").ok()?;
475 let caps = re.captures(err)?;
476 Some((
477 caps.get(1)?.as_str().to_string(),
478 caps.get(3)?.as_str().to_string(),
479 caps.get(2)?.as_str().to_string(),
480 ))
481}
482
483fn extract_duplicate_column_info(err: &str) -> Option<(String, Vec<String>)> {
484 let re = Regex::new(r"Duplicate column '([^']+)' in schema: \[(.*?)\]").ok()?;
485 let caps = re.captures(err)?;
486 Some((
487 caps.get(1)?.as_str().to_string(),
488 caps.get(2)?
489 .as_str()
490 .split(',')
491 .map(|s| s.trim().to_string())
492 .collect()
493 ))
494}
495
496fn suggest_similar_column(target: &str, available: &[String]) -> String {
498 available
499 .iter()
500 .map(|col| (col, string_similarity(target, col)))
501 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
502 .map(|(col, _)| col.clone())
503 .unwrap_or_else(|| "".to_string())
504}
505
506fn string_similarity(s1: &str, s2: &str) -> f64 {
508 let s1_lower = s1.to_lowercase();
509 let s2_lower = s2.to_lowercase();
510
511 if s1_lower.starts_with(&s2_lower) || s2_lower.starts_with(&s1_lower) {
513 return 0.9;
514 }
515
516 let common_len = s1_lower.chars()
518 .zip(s2_lower.chars())
519 .take_while(|(c1, c2)| c1 == c2)
520 .count() as f64;
521
522 if common_len > 0.0 {
523 return common_len / s1_lower.len().max(s2_lower.len()) as f64;
524 }
525
526 let max_len = s1_lower.len().max(s2_lower.len()) as f64;
528 let common_chars = s1_lower.chars()
529 .filter(|c| s2_lower.contains(*c))
530 .count() as f64;
531
532 common_chars / max_len
533}
534
535impl Error for ElusionError {}
536
537impl From<std::io::Error> for ElusionError {
538 fn from(err: std::io::Error) -> Self {
539 ElusionError::Io(err)
540 }
541}
542
543pub type ElusionResult<T> = Result<T, ElusionError>;
544
545pub fn extract_table_from_join_error(error: &str) -> Option<String> {
546
547 for cap in STRING_LITERAL_PATTERN.captures_iter(error) {
548 if let Some(quoted_text) = cap.get(1) {
549 let text = quoted_text.as_str();
550 if !SQL_KEYWORDS.contains(&text.to_uppercase().as_str()) &&
552 !text.chars().all(|c| c.is_numeric()) {
553 return Some(text.to_string());
554 }
555 }
556 }
557
558 if let Some(cap) = TABLE_COLUMN_PATTERN.captures(error) {
559 if let Some(table_part) = cap.get(1) {
560 let table = table_part.as_str();
561 if !SQL_KEYWORDS.contains(&table.to_uppercase().as_str()) {
563 return Some(table.to_string());
564 }
565 }
566 }
567
568 let error_lower = error.to_lowercase();
569 if error_lower.contains("table") && error_lower.contains("not found") {
570 if let Some(start) = error_lower.find("table") {
572 let remaining = &error[start..];
573 if let Some(cap) = STRING_LITERAL_PATTERN.captures(remaining) {
574 if let Some(table_name) = cap.get(1) {
575 return Some(table_name.as_str().to_string());
576 }
577 }
578 }
579 }
580
581 None
582}
583
584pub fn extract_column_from_agg_error(error: &str) -> Option<String> {
585
586 if let Some(cap) = FUNCTION_PATTERN.captures(error) {
587 if let Some(func_name) = cap.get(1) {
588 if AGGREGATE_FUNCTIONS.contains(&func_name.as_str().to_uppercase().as_str()) {
589 if let Some(args) = cap.get(2) {
590 let arg_str = args.as_str().trim();
591
592 if let Some(table_col_cap) = TABLE_COLUMN_PATTERN.captures(arg_str) {
594 if let Some(column_part) = table_col_cap.get(2) {
595 return Some(column_part.as_str().to_string());
596 }
597 }
598
599 if SIMPLE_COLUMN_PATTERN.is_match(arg_str) &&
601 !SQL_KEYWORDS.contains(&arg_str.to_uppercase().as_str()) {
602 return Some(arg_str.to_string());
603 }
604 }
605 }
606 }
607 }
608
609 for cap in STRING_LITERAL_PATTERN.captures_iter(error) {
610 if let Some(quoted_text) = cap.get(1) {
611 let text = quoted_text.as_str();
612 if SIMPLE_COLUMN_PATTERN.is_match(text) &&
614 !SQL_KEYWORDS.contains(&text.to_uppercase().as_str()) &&
615 !text.chars().all(|c| c.is_numeric()) {
616 return Some(text.to_string());
617 }
618 }
619 }
620
621 if let Some(cap) = TABLE_COLUMN_PATTERN.captures(error) {
622 if let Some(column_part) = cap.get(2) {
623 return Some(column_part.as_str().to_string());
624 }
625 }
626
627 if let Some(cap) = POSTGRES_CAST_PATTERN.captures(error) {
628 if let Some(column_expr) = cap.get(1) {
629 let expr = column_expr.as_str();
630 if let Some(dot_pos) = expr.rfind('.') {
632 return Some(expr[dot_pos + 1..].to_string());
633 } else {
634 return Some(expr.to_string());
635 }
636 }
637 }
638
639 None
640}
641
642pub fn extract_function_from_error(error: &str) -> Option<String> {
643
644 if let Some(cap) = FUNCTION_PATTERN.captures(error) {
645 if let Some(func_name) = cap.get(1) {
646 let func = func_name.as_str().to_uppercase();
647
648 if AGGREGATE_FUNCTIONS.contains(&func.as_str()) {
650 return Some(func);
651 }
652
653 if DATETIME_FUNCTIONS.contains(&func.as_str()) {
655 return Some(func);
656 }
657 }
658 }
659
660 for &func in AGGREGATE_FUNCTIONS.iter() {
662 if error.to_uppercase().contains(func) {
663 return Some(func.to_string());
664 }
665 }
666
667 for &func in DATETIME_FUNCTIONS.iter() {
669 if error.to_uppercase().contains(func) {
670 return Some(func.to_string());
671 }
672 }
673
674 None
675}
676
677
678 pub fn extract_missing_column(error: &str) -> Option<String> {
679 let error_lower = error.to_lowercase();
680
681 if error_lower.contains("expression") && error_lower.contains("could not be resolved") {
683 if let Some(start) = error_lower.find("expression ") {
684 let remaining = &error[start + 11..];
685 if let Some(end) = remaining.find(" could not be resolved") {
686 let expr = remaining[..end].trim();
687
688 if let Some(cap) = TABLE_COLUMN_PATTERN.captures(expr) {
689 if let Some(column_part) = cap.get(2) {
690 return Some(column_part.as_str().to_string());
691 }
692 }
693
694 if SIMPLE_COLUMN_PATTERN.is_match(expr) {
695 return Some(expr.to_string());
696 }
697 }
698 }
699 }
700
701 if error_lower.contains("no field named") {
702 if let Some(start) = error_lower.find("no field named") {
703 let remaining = &error[start..];
704 if let Some(cap) = regex::Regex::new(r"'([^']+)'").unwrap().captures(remaining) {
706 return Some(cap.get(1)?.as_str().to_string());
707 }
708 }
709 }
710
711 if error_lower.contains("over") && error_lower.contains("could not be resolved") {
712 if let Some(cap) = regex::Regex::new(r"(partition by|order by)\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap().captures(&error_lower) {
714 return Some(cap.get(2)?.as_str().to_string());
715 }
716 }
717
718 None
719 }
720
721
722
723 pub fn extract_column_from_duplicate_error(error: &str) -> Option<String> {
724
725 if error.to_lowercase().contains("duplicate") && error.to_lowercase().contains("field name") {
726 if let Some(start) = error.to_lowercase().find("field name") {
727 let remaining = &error[start + 10..];
728
729 if let Some(cap) = TABLE_COLUMN_PATTERN.captures(remaining) {
730 if let Some(column_part) = cap.get(2) {
731 return Some(column_part.as_str().to_string());
732 }
733 }
734
735 if let Some(cap) = SIMPLE_COLUMN_PATTERN.captures(remaining) {
736 let potential_column = cap.get(0)?.as_str();
737 if !SQL_KEYWORDS.contains(&potential_column.to_uppercase().as_str()) {
738 return Some(potential_column.to_string());
739 }
740 }
741 }
742 }
743
744 None
745 }
746
747 pub fn extract_column_from_projection_error(error: &str) -> Option<String> {
748 if error.contains("expression") && error.contains("at position") {
750 for cap in STRING_LITERAL_PATTERN.captures_iter(error) {
752 if let Some(quoted_expr) = cap.get(1) {
753 let expr = quoted_expr.as_str();
754
755 if AS_PATTERN.is_match(expr) {
757 if let Some(as_match) = AS_PATTERN.find(expr) {
759 let alias_part = expr[as_match.end()..].trim();
760 if SIMPLE_COLUMN_PATTERN.is_match(alias_part) {
761 return Some(alias_part.to_string());
762 }
763 }
764 } else {
765 if let Some(table_col_cap) = TABLE_COLUMN_PATTERN.captures(expr) {
767 if let Some(column_part) = table_col_cap.get(2) {
768 return Some(column_part.as_str().to_string());
769 }
770 }
771 }
772 }
773 }
774 }
775
776 None
777 }
778
779 pub fn generate_enhanced_groupby_suggestion(missing_column: &str, function_context: Option<&str>) -> String {
780 if let Some(context) = function_context {
781 let function_type = if context.contains("string function") {
782 "string function"
783 } else if context.contains("datetime function") {
784 "datetime function"
785 } else if context.contains("CASE expression") {
786 "CASE expression"
787 } else {
788 "function"
789 };
790
791 format!(
792 "Column '{}' is referenced in a {} but missing from GROUP BY.
793 🔧 Solutions:
794 [1] Add '{}' to .select([...]) then use .group_by_all() Example: .select([\"existing_cols\", \"{}\"]).group_by_all()
795 [2] Add '{}' manually to .group_by([...])
796 [3] Use manual GROUP BY for complex function dependencies Example: .group_by([\"col1\", \"col2\", \"{}\"])",
797 missing_column, function_type, missing_column, missing_column, missing_column, missing_column
798 )
799 } else {
800 "💡 Use .group_by_all() to automatically include all SELECT columns in GROUP BY, or manually add missing columns to .group_by([...])".to_string()
801 }
802 }
803
804 pub fn detect_function_usage_in_error(error: &str, missing_column: &str) -> Option<String> {
805 let error_upper = error.to_uppercase();
806 let column_upper = missing_column.to_uppercase();
807
808 if error_upper.contains("PROJECTION REFERENCES NON-AGGREGATE VALUES") {
809 return Some(format!("Column '{}' is used in a window function but not selected", missing_column));
810 }
811
812 if error_upper.contains("ORDER BY") && error_upper.contains(&column_upper) {
813 return Some(format!("Column '{}' is used in ORDER BY clause of window function", missing_column));
814 }
815
816 if error_upper.contains("PARTITION BY") && error_upper.contains(&column_upper) {
817 return Some(format!("Column '{}' is used in PARTITION BY clause of window function", missing_column));
818 }
819
820 if error_upper.contains("OVER") {
821 return Some(format!("Column '{}' is used in window function", missing_column));
822 }
823
824 for &func in STRING_FUNCTIONS.iter() {
825 let patterns = [
826 format!("{}({})", func, column_upper),
827 format!("{}({}", func, column_upper),
828 format!("{}(.*{}.*)", func, column_upper),
829 ];
830
831 for pattern in &patterns {
832 if error_upper.contains(pattern) {
833 return Some(format!("Column '{}' is used in {}() string function", missing_column, func));
834 }
835 }
836 }
837
838 for &func in DATETIME_FUNCTIONS.iter() {
839 let patterns = [
840 format!("{}({})", func, column_upper),
841 format!("{}({}", func, column_upper),
842 format!("{}(.*{}.*)", func, column_upper),
843 ];
844
845 for pattern in &patterns {
846 if error_upper.contains(pattern) {
847 return Some(format!("Column '{}' is used in {}() datetime function", missing_column, func));
848 }
849 }
850 }
851
852 if error_upper.contains("CASE") && error_upper.contains(&column_upper) {
853 return Some(format!("Column '{}' is used in CASE expression", missing_column));
854 }
855
856 for &func in AGGREGATE_FUNCTIONS.iter() {
857 let patterns = [
858 format!("{}({})", func, column_upper),
859 format!("{}({}", func, column_upper),
860 ];
861
862 for pattern in &patterns {
863 if error_upper.contains(pattern) {
864 return Some(format!("Column '{}' is used in {}() aggregate function", missing_column, func));
865 }
866 }
867 }
868
869 None
870 }
871
872 pub fn extract_window_function_columns(error: &str) -> Vec<String> {
873 let mut columns = Vec::new();
874 let error_upper = error.to_uppercase();
875
876 if let Some(cap) = regex::Regex::new(r"PARTITION BY\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap().captures(&error_upper) {
877 if let Some(col) = cap.get(1) {
878 columns.push(col.as_str().to_lowercase());
879 }
880 }
881
882 if let Some(cap) = regex::Regex::new(r"ORDER BY\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap().captures(&error_upper) {
883 if let Some(col) = cap.get(1) {
884 let col_name = col.as_str().to_lowercase();
885 if !columns.contains(&col_name) {
886 columns.push(col_name);
887 }
888 }
889 }
890
891 if columns.is_empty() {
892 if let Some(col) = extract_missing_column(error) {
893 columns.push(col);
894 }
895 }
896
897 columns
898 }
899
900 pub fn extract_window_function_name(error: &str) -> Option<String> {
901 let error_upper = error.to_uppercase();
902
903 let window_functions = [
905 "ROW_NUMBER", "RANK", "DENSE_RANK", "NTILE", "PERCENT_RANK", "CUME_DIST",
906 "LAG", "LEAD", "FIRST_VALUE", "LAST_VALUE", "NTH_VALUE"
907 ];
908
909 for func in &window_functions {
910 if error_upper.contains(&format!("{}(", func)) {
911 return Some(func.to_string());
912 }
913 }
914
915 if error_upper.contains("OVER") {
916 for func in ["SUM", "AVG", "COUNT", "MIN", "MAX"] {
917 if error_upper.contains(&format!("{}(", func)) {
918 return Some(format!("{} (window)", func));
919 }
920 }
921 }
922
923 Some("WINDOW_FUNCTION".to_string())
924 }
925
926#[cfg(test)]
929mod tests {
930 use super::*;
931
932 #[test]
933 fn test_extract_with_existing_patterns() {
934
935 assert_eq!(
936 extract_function_from_error("SUM(s.orderquantity) failed"),
937 Some("SUM".to_string())
938 );
939
940 assert_eq!(
941 extract_column_from_agg_error("SUM(customer.total_amount) type error"),
942 Some("total_amount".to_string())
943 );
944
945 assert_eq!(
946 extract_table_from_join_error("Table 'customers' not found in join"),
947 Some("customers".to_string())
948 );
949
950 assert_eq!(
951 extract_missing_column("Expression s.customerkey could not be resolved from available columns"),
952 Some("customerkey".to_string())
953 );
954 }
955}