sql_cli/
history.rs

1use crate::app_paths::AppPaths;
2use crate::history_protection::HistoryProtection;
3use anyhow::Result;
4use chrono::{DateTime, Utc};
5use fuzzy_matcher::skim::SkimMatcherV2;
6use fuzzy_matcher::FuzzyMatcher;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::fs;
10use std::path::PathBuf;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct QueryMetadata {
14    #[serde(default)]
15    pub tables: Vec<String>, // Tables referenced (FROM clause)
16    #[serde(default)]
17    pub select_columns: Vec<String>, // Columns in SELECT clause
18    #[serde(default)]
19    pub where_columns: Vec<String>, // Columns in WHERE clause
20    #[serde(default)]
21    pub order_by_columns: Vec<String>, // Columns in ORDER BY clause
22    #[serde(default)]
23    pub functions_used: Vec<String>, // Functions/methods used (Contains, StartsWith, etc.)
24    #[serde(default)]
25    pub query_type: String, // SELECT, INSERT, UPDATE, DELETE, etc.
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct HistoryEntry {
30    pub command: String,
31    pub timestamp: DateTime<Utc>,
32    pub execution_count: u32,
33    pub success: bool,
34    pub duration_ms: Option<u64>,
35    #[serde(default)]
36    pub schema_columns: Vec<String>, // Column names from the data source
37    #[serde(default)]
38    pub data_source: Option<String>, // e.g., "customers.csv", "trades_api", etc.
39    #[serde(default)]
40    pub metadata: Option<QueryMetadata>, // Parsed query metadata
41    #[serde(default)]
42    pub is_starred: bool, // User marked as important
43    #[serde(default)]
44    pub session_id: Option<String>, // Session this was created in
45}
46
47#[derive(Debug, Clone)]
48pub struct HistoryMatch {
49    pub entry: HistoryEntry,
50    pub score: i64,
51    pub indices: Vec<usize>,
52}
53
54pub struct CommandHistory {
55    entries: Vec<HistoryEntry>,
56    history_file: PathBuf,
57    matcher: SkimMatcherV2,
58    command_counts: HashMap<String, u32>,
59    session_id: String,
60    session_entries: Vec<HistoryEntry>, // Entries from current session only
61    protection: HistoryProtection,
62}
63
64impl CommandHistory {
65    pub fn new() -> Result<Self> {
66        let history_file = AppPaths::history_file()
67            .map_err(|e| anyhow::anyhow!("Failed to get history file path: {}", e))?;
68
69        // Create backup directory
70        let backup_dir = history_file
71            .parent()
72            .unwrap_or(std::path::Path::new("."))
73            .join("history_backups");
74
75        // Generate a unique session ID
76        let session_id = format!("session_{}", Utc::now().timestamp_millis());
77
78        let mut history = Self {
79            entries: Vec::new(),
80            history_file,
81            matcher: SkimMatcherV2::default(),
82            command_counts: HashMap::new(),
83            session_id,
84            session_entries: Vec::new(),
85            protection: HistoryProtection::new(backup_dir),
86        };
87
88        history.load_from_file()?;
89
90        // Clean the history file on startup to remove any duplicates
91        // This ensures the file stays clean over time
92        if !history.entries.is_empty() {
93            history.clean_and_save()?;
94        }
95
96        Ok(history)
97    }
98
99    pub fn add_entry(
100        &mut self,
101        command: String,
102        success: bool,
103        duration_ms: Option<u64>,
104    ) -> Result<()> {
105        self.add_entry_with_schema(command, success, duration_ms, Vec::new(), None)
106    }
107
108    pub fn add_entry_with_schema(
109        &mut self,
110        command: String,
111        success: bool,
112        duration_ms: Option<u64>,
113        schema_columns: Vec<String>,
114        data_source: Option<String>,
115    ) -> Result<()> {
116        // Don't add empty commands or duplicates of the last command
117        if command.trim().is_empty() {
118            return Ok(());
119        }
120
121        // Check if this is the same as the last command
122        if let Some(last_entry) = self.entries.last() {
123            if last_entry.command == command {
124                return Ok(());
125            }
126        }
127
128        // Extract metadata from the query
129        let metadata = self.extract_query_metadata(&command);
130
131        let entry = HistoryEntry {
132            command: command.clone(),
133            timestamp: Utc::now(),
134            execution_count: *self.command_counts.get(&command).unwrap_or(&0) + 1,
135            success,
136            duration_ms,
137            schema_columns,
138            data_source,
139            metadata,
140            is_starred: false,
141            session_id: Some(self.session_id.clone()),
142        };
143
144        // Add to session entries
145        self.session_entries.push(entry.clone());
146
147        // Update command count
148        *self.command_counts.entry(command.clone()).or_insert(0) += 1;
149
150        // Remove any existing entry with the same command to avoid duplicates
151        // This moves the command to the end of the history with updated timestamp
152        self.entries.retain(|e| e.command != command);
153
154        self.entries.push(entry);
155
156        // Keep only the last 1000 entries
157        if self.entries.len() > 1000 {
158            self.entries.drain(0..self.entries.len() - 1000);
159        }
160
161        self.save_to_file()?;
162        Ok(())
163    }
164
165    pub fn search(&self, query: &str) -> Vec<HistoryMatch> {
166        self.search_with_schema(query, &[], None)
167    }
168
169    pub fn search_with_schema(
170        &self,
171        query: &str,
172        current_columns: &[String],
173        current_source: Option<&str>,
174    ) -> Vec<HistoryMatch> {
175        if query.is_empty() {
176            // Return recent entries when no query, prioritizing schema matches
177            let mut entries: Vec<_> = self
178                .entries
179                .iter()
180                .rev()
181                .take(100)
182                .map(|entry| {
183                    let schema_score =
184                        self.calculate_schema_match_score(entry, current_columns, current_source);
185                    HistoryMatch {
186                        entry: entry.clone(),
187                        score: 100 + schema_score,
188                        indices: Vec::new(),
189                    }
190                })
191                .collect();
192
193            entries.sort_by(|a, b| b.score.cmp(&a.score));
194            entries.truncate(50);
195            return entries;
196        }
197
198        let mut matches: Vec<HistoryMatch> = self
199            .entries
200            .iter()
201            .filter_map(|entry| {
202                if let Some((score, indices)) = self.matcher.fuzzy_indices(&entry.command, query) {
203                    let schema_score =
204                        self.calculate_schema_match_score(entry, current_columns, current_source);
205                    Some(HistoryMatch {
206                        entry: entry.clone(),
207                        score: score + schema_score,
208                        indices,
209                    })
210                } else {
211                    None
212                }
213            })
214            .collect();
215
216        // Sort by score (descending), then by recency and frequency
217        matches.sort_by(|a, b| {
218            // Primary sort: fuzzy match score (including schema bonus)
219            let score_cmp = b.score.cmp(&a.score);
220            if score_cmp != std::cmp::Ordering::Equal {
221                return score_cmp;
222            }
223
224            // Secondary sort: execution count (more frequently used commands rank higher)
225            let count_cmp = b.entry.execution_count.cmp(&a.entry.execution_count);
226            if count_cmp != std::cmp::Ordering::Equal {
227                return count_cmp;
228            }
229
230            // Tertiary sort: recency (more recent commands rank higher)
231            b.entry.timestamp.cmp(&a.entry.timestamp)
232        });
233
234        matches.truncate(20); // Limit to top 20 matches
235        matches
236    }
237
238    fn calculate_schema_match_score(
239        &self,
240        entry: &HistoryEntry,
241        current_columns: &[String],
242        current_source: Option<&str>,
243    ) -> i64 {
244        let mut score = 0i64;
245
246        // Bonus for matching data source
247        if let (Some(entry_source), Some(current)) = (&entry.data_source, current_source) {
248            if entry_source == current {
249                score += 50; // High bonus for same data source
250            }
251        }
252
253        // Bonus for matching columns in schema
254        if !current_columns.is_empty() && !entry.schema_columns.is_empty() {
255            let matching_columns = entry
256                .schema_columns
257                .iter()
258                .filter(|col| current_columns.contains(col))
259                .count();
260
261            let total_columns = entry.schema_columns.len().max(current_columns.len());
262            if total_columns > 0 {
263                // Scale bonus based on percentage of matching columns
264                let match_percentage = (matching_columns * 100) / total_columns;
265                score += (match_percentage as i64) / 2; // Up to 50 points for perfect match
266            }
267        }
268
269        // Additional bonus for matching columns in query metadata
270        if let Some(metadata) = &entry.metadata {
271            let metadata_columns: Vec<&String> = metadata
272                .select_columns
273                .iter()
274                .chain(metadata.where_columns.iter())
275                .chain(metadata.order_by_columns.iter())
276                .collect();
277
278            let matching_metadata = metadata_columns
279                .iter()
280                .filter(|col| current_columns.contains(col))
281                .count();
282
283            if matching_metadata > 0 {
284                score += (matching_metadata as i64) * 5; // 5 points per matching column
285            }
286        }
287
288        score
289    }
290
291    fn extract_query_metadata(&self, query: &str) -> Option<QueryMetadata> {
292        let query_upper = query.to_uppercase();
293
294        // Determine query type
295        let query_type = if query_upper.starts_with("SELECT") {
296            "SELECT"
297        } else if query_upper.starts_with("INSERT") {
298            "INSERT"
299        } else if query_upper.starts_with("UPDATE") {
300            "UPDATE"
301        } else if query_upper.starts_with("DELETE") {
302            "DELETE"
303        } else {
304            "OTHER"
305        }
306        .to_string();
307
308        // Extract table names (simple regex-based approach)
309        let mut tables = Vec::new();
310        if let Some(from_idx) = query_upper.find(" FROM ") {
311            let after_from = &query[from_idx + 6..];
312            if let Some(end_idx) = after_from.find([' ', '(', ';']) {
313                let table_name = after_from[..end_idx].trim().to_string();
314                if !table_name.is_empty() {
315                    tables.push(table_name);
316                }
317            }
318        }
319
320        // Extract columns from SELECT clause
321        let mut select_columns = Vec::new();
322        if query_type == "SELECT" {
323            if let Some(select_idx) = query_upper.find("SELECT ") {
324                let after_select = &query[select_idx + 7..];
325                if let Some(from_idx) = after_select.to_uppercase().find(" FROM") {
326                    let select_clause = &after_select[..from_idx];
327                    if !select_clause.trim().eq("*") {
328                        // Parse column names (simplified)
329                        for col in select_clause.split(',') {
330                            let col_name = col
331                                .split_whitespace()
332                                .next()
333                                .unwrap_or("")
334                                .trim_matches('"')
335                                .to_string();
336                            if !col_name.is_empty() {
337                                select_columns.push(col_name);
338                            }
339                        }
340                    }
341                }
342            }
343        }
344
345        // Extract columns from WHERE clause and functions used
346        let mut where_columns = Vec::new();
347        let mut functions_used = Vec::new();
348        if let Some(where_idx) = query_upper.find(" WHERE ") {
349            let after_where = &query[where_idx + 7..];
350
351            // Look for LINQ methods
352            let linq_methods = [
353                "Contains",
354                "StartsWith",
355                "EndsWith",
356                "Length",
357                "ToUpper",
358                "ToLower",
359                "IsNullOrEmpty",
360            ];
361            for method in &linq_methods {
362                if after_where.contains(method) {
363                    functions_used.push((*method).to_string());
364                }
365            }
366
367            // Extract column names before operators or methods
368            // This is simplified - a proper parser would be better
369            let words: Vec<&str> = after_where
370                .split(|c: char| !c.is_alphanumeric() && c != '_')
371                .filter(|s| !s.is_empty())
372                .collect();
373
374            for (i, word) in words.iter().enumerate() {
375                // If next word is an operator or method, this might be a column
376                if i + 1 < words.len() {
377                    let next = words[i + 1];
378                    if linq_methods.contains(&next)
379                        || ["IS", "NOT", "LIKE", "BETWEEN"].contains(&next.to_uppercase().as_str())
380                    {
381                        where_columns.push((*word).to_string());
382                    }
383                }
384            }
385        }
386
387        // Extract ORDER BY columns
388        let mut order_by_columns = Vec::new();
389        if let Some(order_idx) = query_upper.find(" ORDER BY ") {
390            let after_order = &query[order_idx + 10..];
391            let end_idx = after_order.find([';', ')']).unwrap_or(after_order.len());
392            let order_clause = &after_order[..end_idx];
393
394            for col in order_clause.split(',') {
395                let col_name = col
396                    .split_whitespace()
397                    .next()
398                    .unwrap_or("")
399                    .trim_matches('"')
400                    .to_string();
401                if !col_name.is_empty()
402                    && col_name.to_uppercase() != "ASC"
403                    && col_name.to_uppercase() != "DESC"
404                {
405                    order_by_columns.push(col_name);
406                }
407            }
408        }
409
410        Some(QueryMetadata {
411            tables,
412            select_columns,
413            where_columns,
414            order_by_columns,
415            functions_used,
416            query_type,
417        })
418    }
419
420    pub fn get_recent(&self, limit: usize) -> Vec<&HistoryEntry> {
421        self.entries.iter().rev().take(limit).collect()
422    }
423
424    pub fn get_all(&self) -> &[HistoryEntry] {
425        &self.entries
426    }
427
428    pub fn get_last_entry(&self) -> Option<&HistoryEntry> {
429        self.entries.last()
430    }
431
432    /// Get session-only entries (from current run)
433    pub fn get_session_entries(&self) -> &[HistoryEntry] {
434        &self.session_entries
435    }
436
437    /// Get entries for navigation (session + starred from persistent)
438    pub fn get_navigation_entries(&self) -> Vec<HistoryEntry> {
439        let mut entries = self.session_entries.clone();
440
441        // Add starred entries from persistent history that aren't in session
442        for entry in &self.entries {
443            if entry.is_starred
444                && !self
445                    .session_entries
446                    .iter()
447                    .any(|e| e.command == entry.command)
448            {
449                entries.push(entry.clone());
450            }
451        }
452
453        // Sort by timestamp, most recent first
454        entries.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
455
456        // Deduplicate keeping most recent
457        let mut seen = std::collections::HashSet::new();
458        entries.retain(|e| seen.insert(e.command.clone()));
459
460        entries
461    }
462
463    /// Star/unstar a command
464    pub fn toggle_star(&mut self, command: &str) -> Result<()> {
465        // Find in entries and toggle
466        for entry in &mut self.entries {
467            if entry.command == command {
468                entry.is_starred = !entry.is_starred;
469                break;
470            }
471        }
472
473        // Also update session entries
474        for entry in &mut self.session_entries {
475            if entry.command == command {
476                entry.is_starred = !entry.is_starred;
477                break;
478            }
479        }
480
481        self.save_to_file()
482    }
483
484    pub fn clear(&mut self) -> Result<()> {
485        // SAFETY: Create backup before clearing
486        let current_count = self.entries.len();
487        if current_count > 0 {
488            eprintln!("[HISTORY WARNING] Clearing {current_count} entries - creating backup");
489            if let Ok(content) = serde_json::to_string_pretty(&self.entries) {
490                self.protection.backup_before_write(&content, current_count);
491            }
492        }
493        self.entries.clear();
494        self.command_counts.clear();
495        self.save_to_file()?;
496        Ok(())
497    }
498
499    fn load_from_file(&mut self) -> Result<()> {
500        if !self.history_file.exists() {
501            eprintln!("[History] No history file found at {:?}", self.history_file);
502            return Ok(());
503        }
504
505        eprintln!("[History] Loading history from {:?}", self.history_file);
506        let content = fs::read_to_string(&self.history_file)?;
507        if content.trim().is_empty() {
508            eprintln!("[History] History file is empty");
509            return Ok(());
510        }
511
512        // Try to parse the history file
513        let entries: Vec<HistoryEntry> = match serde_json::from_str(&content) {
514            Ok(entries) => entries,
515            Err(e) => {
516                eprintln!("[History] ERROR: Failed to parse history file: {e}");
517                eprintln!("[History] Attempting recovery from backup...");
518
519                // Try to recover from backup
520                if let Some(backup_content) = self.protection.recover_from_backup() {
521                    eprintln!("[History] Found backup, attempting to restore...");
522
523                    // Try to parse the backup
524                    match serde_json::from_str::<Vec<HistoryEntry>>(&backup_content) {
525                        Ok(backup_entries) => {
526                            eprintln!(
527                                "[History] Successfully recovered {} entries from backup",
528                                backup_entries.len()
529                            );
530
531                            // Save the recovered content to the main history file
532                            fs::write(&self.history_file, &backup_content)?;
533
534                            // Move the corrupted file for investigation
535                            let corrupted_path = self.history_file.with_extension("json.corrupted");
536                            let _ = fs::rename(
537                                self.history_file.with_extension("json"),
538                                &corrupted_path,
539                            );
540                            eprintln!("[History] Corrupted file moved to {corrupted_path:?}");
541
542                            backup_entries
543                        }
544                        Err(backup_err) => {
545                            eprintln!("[History] Backup also corrupted: {backup_err}");
546                            eprintln!("[History] Starting with empty history");
547                            Vec::new()
548                        }
549                    }
550                } else {
551                    eprintln!("[History] No backup available, starting with empty history");
552
553                    // Move the corrupted file for investigation
554                    let corrupted_path = self.history_file.with_extension("json.corrupted");
555                    let _ = fs::copy(&self.history_file, &corrupted_path);
556                    eprintln!("[History] Corrupted file copied to {corrupted_path:?}");
557
558                    Vec::new()
559                }
560            }
561        };
562        eprintln!(
563            "[History] Loaded {} entries from history file",
564            entries.len()
565        );
566        let original_count = entries.len();
567
568        // Deduplicate entries, keeping only the most recent of each command
569        // This cleans up any existing duplicates in the history file
570        let mut seen_commands = std::collections::HashSet::new();
571        let mut deduplicated = Vec::new();
572
573        // Process in reverse to keep the most recent version of each command
574        for entry in entries.into_iter().rev() {
575            if seen_commands.insert(entry.command.clone()) {
576                deduplicated.push(entry);
577            }
578        }
579
580        // Reverse back to chronological order
581        deduplicated.reverse();
582
583        // Sort by timestamp to ensure chronological order (oldest first)
584        deduplicated.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
585
586        // Log if we removed duplicates (only on first load, not every save)
587        let removed_count = original_count - deduplicated.len();
588        if removed_count > 0 {
589            eprintln!("[sql-cli] Cleaned {removed_count} duplicate commands from history");
590        }
591
592        // Rebuild command counts
593        self.command_counts.clear();
594        for entry in &deduplicated {
595            *self
596                .command_counts
597                .entry(entry.command.clone())
598                .or_insert(0) = entry.execution_count;
599        }
600
601        self.entries = deduplicated;
602        eprintln!(
603            "[History] Final history contains {} unique entries",
604            self.entries.len()
605        );
606        Ok(())
607    }
608
609    fn save_to_file(&self) -> Result<()> {
610        // SAFETY: Validate before writing
611        let new_content = serde_json::to_string_pretty(&self.entries)?;
612        let new_count = self.entries.len();
613
614        // Get current file entry count for comparison
615        let old_count = if self.history_file.exists() {
616            if let Ok(existing) = fs::read_to_string(&self.history_file) {
617                existing.matches("\"command\":").count()
618            } else {
619                0
620            }
621        } else {
622            0
623        };
624
625        // Validate the write
626        if !self
627            .protection
628            .validate_write(old_count, new_count, &new_content)
629        {
630            eprintln!("[HISTORY PROTECTION] Write blocked! Attempting recovery from backup...");
631            if let Some(backup_content) = self.protection.recover_from_backup() {
632                fs::write(&self.history_file, backup_content)?;
633                return Ok(());
634            }
635            return Err(anyhow::anyhow!("History write validation failed"));
636        }
637
638        // Create backup before significant changes
639        if old_count > 0 && (old_count != new_count || old_count > 10) {
640            self.protection.backup_before_write(&new_content, new_count);
641        }
642
643        // Use atomic write to prevent corruption from partial writes
644        // Write to a temp file first, then rename it
645        let temp_file = self.history_file.with_extension("json.tmp");
646        fs::write(&temp_file, new_content)?;
647
648        // Atomic rename (on Unix, rename is atomic)
649        fs::rename(temp_file, &self.history_file)?;
650        Ok(())
651    }
652
653    /// Clean the history file by removing duplicates and rewriting it
654    /// This is called after loading to ensure the file stays clean
655    pub fn clean_and_save(&mut self) -> Result<()> {
656        // The entries are already deduplicated in memory after loading
657        // Just save them back to clean the file
658        self.save_to_file()?;
659        Ok(())
660    }
661
662    pub fn stats(&self) -> HistoryStats {
663        let total_commands = self.entries.len();
664        let unique_commands = self.command_counts.len();
665        let successful_commands = self.entries.iter().filter(|e| e.success).count();
666        let failed_commands = total_commands - successful_commands;
667
668        let most_used = self
669            .command_counts
670            .iter()
671            .max_by_key(|(_, &count)| count)
672            .map(|(cmd, &count)| (cmd.clone(), count));
673
674        HistoryStats {
675            total_commands,
676            unique_commands,
677            successful_commands,
678            failed_commands,
679            most_used_command: most_used,
680        }
681    }
682}
683
684#[derive(Debug)]
685pub struct HistoryStats {
686    pub total_commands: usize,
687    pub unique_commands: usize,
688    pub successful_commands: usize,
689    pub failed_commands: usize,
690    pub most_used_command: Option<(String, u32)>,
691}
692
693impl Clone for CommandHistory {
694    fn clone(&self) -> Self {
695        Self {
696            entries: self.entries.clone(),
697            history_file: self.history_file.clone(),
698            matcher: SkimMatcherV2::default(), // Create new matcher
699            command_counts: self.command_counts.clone(),
700            session_id: self.session_id.clone(),
701            session_entries: self.session_entries.clone(),
702            protection: HistoryProtection::new(
703                self.history_file
704                    .parent()
705                    .unwrap_or(std::path::Path::new("."))
706                    .join("history_backups"),
707            ),
708        }
709    }
710}
711
712impl Default for CommandHistory {
713    fn default() -> Self {
714        let session_id = format!("session_{}", Utc::now().timestamp_millis());
715        Self::new().unwrap_or_else(|_| {
716            let history_file =
717                AppPaths::history_file().unwrap_or_else(|_| PathBuf::from(".sql_cli_history.json"));
718            let backup_dir = history_file
719                .parent()
720                .unwrap_or(std::path::Path::new("."))
721                .join("history_backups");
722            Self {
723                entries: Vec::new(),
724                history_file,
725                matcher: SkimMatcherV2::default(),
726                command_counts: HashMap::new(),
727                session_id,
728                session_entries: Vec::new(),
729                protection: HistoryProtection::new(backup_dir),
730            }
731        })
732    }
733}