use std::collections::HashMap;
use std::time::SystemTime;
#[derive(Debug, Clone)]
pub struct ErrorContext {
pub operation: &'static str,
pub dataframe_shape: Option<(usize, usize)>,
pub column_names: Vec<String>,
pub suggested_fixes: Vec<String>,
pub performance_hint: Option<String>,
pub stack_trace: Vec<String>,
pub metadata: HashMap<String, String>,
pub timestamp: SystemTime,
pub severity: ErrorSeverity,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ErrorSeverity {
Info,
Warning,
Error,
Critical,
}
impl ErrorContext {
pub fn new(operation: &'static str) -> Self {
Self {
operation,
dataframe_shape: None,
column_names: Vec::new(),
suggested_fixes: Vec::new(),
performance_hint: None,
stack_trace: Vec::new(),
metadata: HashMap::new(),
timestamp: SystemTime::now(),
severity: ErrorSeverity::Error,
}
}
pub fn with_shape(mut self, shape: (usize, usize)) -> Self {
self.dataframe_shape = Some(shape);
self
}
pub fn with_columns(mut self, columns: Vec<String>) -> Self {
self.column_names = columns;
self
}
pub fn with_suggestion(mut self, suggestion: String) -> Self {
self.suggested_fixes.push(suggestion);
self
}
pub fn with_suggestions(mut self, suggestions: Vec<String>) -> Self {
self.suggested_fixes.extend(suggestions);
self
}
pub fn with_performance_hint(mut self, hint: String) -> Self {
self.performance_hint = Some(hint);
self
}
pub fn with_metadata(mut self, key: String, value: String) -> Self {
self.metadata.insert(key, value);
self
}
pub fn with_severity(mut self, severity: ErrorSeverity) -> Self {
self.severity = severity;
self
}
pub fn with_stack_entry(mut self, entry: String) -> Self {
self.stack_trace.push(entry);
self
}
pub fn summary(&self) -> String {
let mut summary = format!(
"Error in operation '{}' at {:?}\n",
self.operation, self.timestamp
);
if let Some(shape) = self.dataframe_shape {
summary.push_str(&format!("DataFrame shape: {:?}\n", shape));
}
if !self.column_names.is_empty() {
summary.push_str(&format!("Columns involved: {:?}\n", self.column_names));
}
if !self.suggested_fixes.is_empty() {
summary.push_str("\nSuggested fixes:\n");
for (i, fix) in self.suggested_fixes.iter().enumerate() {
summary.push_str(&format!(" {}. {}\n", i + 1, fix));
}
}
if let Some(ref hint) = self.performance_hint {
summary.push_str(&format!("\nPerformance hint: {}\n", hint));
}
if !self.metadata.is_empty() {
summary.push_str("\nAdditional context:\n");
for (key, value) in &self.metadata {
summary.push_str(&format!(" {}: {}\n", key, value));
}
}
summary
}
}
impl Default for ErrorContext {
fn default() -> Self {
Self::new("unknown")
}
}
pub trait ErrorRecovery {
fn suggest_fixes(&self) -> Vec<String>;
fn can_auto_recover(&self) -> bool;
fn attempt_recovery(
&self,
) -> std::result::Result<Option<Box<dyn std::any::Any>>, crate::core::error::Error>;
fn error_context(&self) -> Option<&ErrorContext>;
}
pub struct ErrorRecoveryHelper;
impl ErrorRecoveryHelper {
pub fn column_not_found_suggestions(
missing_column: &str,
available_columns: &[String],
) -> Vec<String> {
let mut suggestions = Vec::new();
suggestions.push(format!("Column '{}' not found", missing_column));
suggestions.push(format!("Available columns: {:?}", available_columns));
let similar_columns: Vec<&String> = available_columns
.iter()
.filter(|col| {
let similarity = Self::string_similarity(missing_column, col);
similarity > 0.5
})
.collect();
if !similar_columns.is_empty() {
suggestions.push(format!("Did you mean one of: {:?}", similar_columns));
}
suggestions.push("Use .columns() to list all available columns".to_string());
suggestions.push("Check for typos in column name".to_string());
suggestions
}
pub fn shape_mismatch_suggestions(
expected: (usize, usize),
actual: (usize, usize),
) -> Vec<String> {
let mut suggestions = Vec::new();
suggestions.push(format!("Expected shape {:?}, got {:?}", expected, actual));
if expected.0 != actual.0 {
suggestions.push("Row count mismatch - check data alignment".to_string());
if actual.0 > expected.0 {
suggestions.push("Consider using .head() to limit rows".to_string());
} else {
suggestions.push("Data may be incomplete - check data source".to_string());
}
}
if expected.1 != actual.1 {
suggestions.push("Column count mismatch - check column selection".to_string());
suggestions.push("Use .select() to choose specific columns".to_string());
}
suggestions.push("Use .shape() to check DataFrame dimensions".to_string());
suggestions
}
pub fn type_mismatch_suggestions(column: &str, expected: &str, found: &str) -> Vec<String> {
let mut suggestions = vec![
format!(
"Column '{}' type mismatch: expected {}, found {}",
column, expected, found
),
format!("Use .dtypes() to check column types"),
format!("Consider type conversion with .astype()"),
];
match (expected, found) {
("int64", "float64") => {
suggestions.push("Data contains floating point values - consider using .round() or .astype('int64')".to_string());
}
("float64", "int64") => {
suggestions.push("Converting to float - this is usually safe".to_string());
}
("string", _) => {
suggestions.push("Use .astype('string') to convert to string type".to_string());
}
(_, "string") => {
suggestions
.push("Parse string data with appropriate conversion functions".to_string());
}
_ => {}
}
suggestions
}
pub fn performance_hints(
operation: &str,
data_size: Option<usize>,
column_count: Option<usize>,
) -> Option<String> {
match operation {
"groupby" => {
if let Some(size) = data_size {
if size > 1_000_000 {
return Some("Large dataset detected - consider using .sample() for testing or distributed processing".to_string());
}
}
if let Some(cols) = column_count {
if cols > 50 {
return Some("Many columns detected - consider selecting only needed columns before groupby".to_string());
}
}
None
}
"join" => {
if let Some(size) = data_size {
if size > 10_000_000 {
return Some("Large join detected - ensure join columns are indexed for better performance".to_string());
}
}
None
}
"sort" => {
if let Some(size) = data_size {
if size > 1_000_000 {
return Some("Large sort operation - consider using partial sorting with .nlargest() or .nsmallest()".to_string());
}
}
None
}
_ => None,
}
}
fn string_similarity(s1: &str, s2: &str) -> f64 {
let len1 = s1.len();
let len2 = s2.len();
if len1 == 0 || len2 == 0 {
return 0.0;
}
let max_len = len1.max(len2);
let distance = Self::levenshtein_distance(s1, s2);
1.0 - (distance as f64 / max_len as f64)
}
fn levenshtein_distance(s1: &str, s2: &str) -> usize {
let len1 = s1.len();
let len2 = s2.len();
if len1 == 0 {
return len2;
}
if len2 == 0 {
return len1;
}
let mut matrix = vec![vec![0; len2 + 1]; len1 + 1];
for i in 0..=len1 {
matrix[i][0] = i;
}
for j in 0..=len2 {
matrix[0][j] = j;
}
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
for i in 1..=len1 {
for j in 1..=len2 {
let cost = if s1_chars[i - 1] == s2_chars[j - 1] {
0
} else {
1
};
matrix[i][j] = (matrix[i - 1][j] + 1)
.min(matrix[i][j - 1] + 1)
.min(matrix[i - 1][j - 1] + cost);
}
}
matrix[len1][len2]
}
}
pub struct ErrorContextBuilder {
context: ErrorContext,
}
impl ErrorContextBuilder {
pub fn new(operation: &'static str) -> Self {
Self {
context: ErrorContext::new(operation),
}
}
pub fn shape(mut self, rows: usize, cols: usize) -> Self {
self.context.dataframe_shape = Some((rows, cols));
self
}
pub fn columns(mut self, columns: Vec<String>) -> Self {
self.context.column_names = columns;
self
}
pub fn suggest(mut self, suggestion: &str) -> Self {
self.context.suggested_fixes.push(suggestion.to_string());
self
}
pub fn hint(mut self, hint: &str) -> Self {
self.context.performance_hint = Some(hint.to_string());
self
}
pub fn meta(mut self, key: &str, value: &str) -> Self {
self.context
.metadata
.insert(key.to_string(), value.to_string());
self
}
pub fn severity(mut self, severity: ErrorSeverity) -> Self {
self.context.severity = severity;
self
}
pub fn build(self) -> ErrorContext {
self.context
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_context_creation() {
let context = ErrorContextBuilder::new("test_operation")
.shape(100, 5)
.columns(vec!["col1".to_string(), "col2".to_string()])
.suggest("Try using .select() to choose columns")
.hint("Large dataset - consider sampling")
.meta("data_type", "numeric")
.severity(ErrorSeverity::Warning)
.build();
assert_eq!(context.operation, "test_operation");
assert_eq!(context.dataframe_shape, Some((100, 5)));
assert_eq!(context.column_names.len(), 2);
assert_eq!(context.suggested_fixes.len(), 1);
assert!(context.performance_hint.is_some());
assert_eq!(context.severity, ErrorSeverity::Warning);
}
#[test]
fn test_column_not_found_suggestions() {
let available = vec!["name".to_string(), "age".to_string(), "salary".to_string()];
let suggestions = ErrorRecoveryHelper::column_not_found_suggestions("nam", &available);
assert!(!suggestions.is_empty());
assert!(suggestions.iter().any(|s| s.contains("nam")));
assert!(suggestions.iter().any(|s| s.contains("name")));
}
#[test]
fn test_string_similarity() {
assert!(ErrorRecoveryHelper::string_similarity("hello", "helo") >= 0.8);
assert!(ErrorRecoveryHelper::string_similarity("hello", "world") < 0.5);
assert_eq!(ErrorRecoveryHelper::string_similarity("same", "same"), 1.0);
}
}