sql_cli/
history.rs

1use crate::app_paths::AppPaths;
2use crate::history_protection::HistoryProtection;
3use anyhow::Result;
4use chrono::{DateTime, Utc};
5use fuzzy_matcher::skim::SkimMatcherV2;
6use fuzzy_matcher::FuzzyMatcher;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::fs;
10use std::path::PathBuf;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct QueryMetadata {
14    #[serde(default)]
15    pub tables: Vec<String>, // Tables referenced (FROM clause)
16    #[serde(default)]
17    pub select_columns: Vec<String>, // Columns in SELECT clause
18    #[serde(default)]
19    pub where_columns: Vec<String>, // Columns in WHERE clause
20    #[serde(default)]
21    pub order_by_columns: Vec<String>, // Columns in ORDER BY clause
22    #[serde(default)]
23    pub functions_used: Vec<String>, // Functions/methods used (Contains, StartsWith, etc.)
24    #[serde(default)]
25    pub query_type: String, // SELECT, INSERT, UPDATE, DELETE, etc.
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct HistoryEntry {
30    pub command: String,
31    pub timestamp: DateTime<Utc>,
32    pub execution_count: u32,
33    pub success: bool,
34    pub duration_ms: Option<u64>,
35    #[serde(default)]
36    pub schema_columns: Vec<String>, // Column names from the data source
37    #[serde(default)]
38    pub data_source: Option<String>, // e.g., "customers.csv", "trades_api", etc.
39    #[serde(default)]
40    pub metadata: Option<QueryMetadata>, // Parsed query metadata
41    #[serde(default)]
42    pub is_starred: bool, // User marked as important
43    #[serde(default)]
44    pub session_id: Option<String>, // Session this was created in
45}
46
47#[derive(Debug, Clone)]
48pub struct HistoryMatch {
49    pub entry: HistoryEntry,
50    pub score: i64,
51    pub indices: Vec<usize>,
52}
53
54pub struct CommandHistory {
55    entries: Vec<HistoryEntry>,
56    history_file: PathBuf,
57    matcher: SkimMatcherV2,
58    command_counts: HashMap<String, u32>,
59    session_id: String,
60    session_entries: Vec<HistoryEntry>, // Entries from current session only
61    protection: HistoryProtection,
62}
63
64impl CommandHistory {
65    pub fn new() -> Result<Self> {
66        let history_file = AppPaths::history_file()
67            .map_err(|e| anyhow::anyhow!("Failed to get history file path: {}", e))?;
68
69        // Create backup directory
70        let backup_dir = history_file
71            .parent()
72            .unwrap_or(std::path::Path::new("."))
73            .join("history_backups");
74
75        // Generate a unique session ID
76        let session_id = format!("session_{}", Utc::now().timestamp_millis());
77
78        let mut history = Self {
79            entries: Vec::new(),
80            history_file,
81            matcher: SkimMatcherV2::default(),
82            command_counts: HashMap::new(),
83            session_id,
84            session_entries: Vec::new(),
85            protection: HistoryProtection::new(backup_dir),
86        };
87
88        history.load_from_file()?;
89
90        // Clean the history file on startup to remove any duplicates
91        // This ensures the file stays clean over time
92        if !history.entries.is_empty() {
93            history.clean_and_save()?;
94        }
95
96        Ok(history)
97    }
98
99    pub fn add_entry(
100        &mut self,
101        command: String,
102        success: bool,
103        duration_ms: Option<u64>,
104    ) -> Result<()> {
105        self.add_entry_with_schema(command, success, duration_ms, Vec::new(), None)
106    }
107
108    pub fn add_entry_with_schema(
109        &mut self,
110        command: String,
111        success: bool,
112        duration_ms: Option<u64>,
113        schema_columns: Vec<String>,
114        data_source: Option<String>,
115    ) -> Result<()> {
116        // Don't add empty commands or duplicates of the last command
117        if command.trim().is_empty() {
118            return Ok(());
119        }
120
121        // Check if this is the same as the last command
122        if let Some(last_entry) = self.entries.last() {
123            if last_entry.command == command {
124                return Ok(());
125            }
126        }
127
128        // Extract metadata from the query
129        let metadata = self.extract_query_metadata(&command);
130
131        let entry = HistoryEntry {
132            command: command.clone(),
133            timestamp: Utc::now(),
134            execution_count: *self.command_counts.get(&command).unwrap_or(&0) + 1,
135            success,
136            duration_ms,
137            schema_columns,
138            data_source,
139            metadata,
140            is_starred: false,
141            session_id: Some(self.session_id.clone()),
142        };
143
144        // Add to session entries
145        self.session_entries.push(entry.clone());
146
147        // Update command count
148        *self.command_counts.entry(command.clone()).or_insert(0) += 1;
149
150        // Remove any existing entry with the same command to avoid duplicates
151        // This moves the command to the end of the history with updated timestamp
152        self.entries.retain(|e| e.command != command);
153
154        self.entries.push(entry);
155
156        // Keep only the last 1000 entries
157        if self.entries.len() > 1000 {
158            self.entries.drain(0..self.entries.len() - 1000);
159        }
160
161        self.save_to_file()?;
162        Ok(())
163    }
164
165    pub fn search(&self, query: &str) -> Vec<HistoryMatch> {
166        self.search_with_schema(query, &[], None)
167    }
168
169    pub fn search_with_schema(
170        &self,
171        query: &str,
172        current_columns: &[String],
173        current_source: Option<&str>,
174    ) -> Vec<HistoryMatch> {
175        if query.is_empty() {
176            // Return recent entries when no query, prioritizing schema matches
177            let mut entries: Vec<_> = self
178                .entries
179                .iter()
180                .rev()
181                .take(100)
182                .map(|entry| {
183                    let schema_score =
184                        self.calculate_schema_match_score(entry, current_columns, current_source);
185                    HistoryMatch {
186                        entry: entry.clone(),
187                        score: 100 + schema_score,
188                        indices: Vec::new(),
189                    }
190                })
191                .collect();
192
193            entries.sort_by(|a, b| b.score.cmp(&a.score));
194            entries.truncate(50);
195            return entries;
196        }
197
198        let mut matches: Vec<HistoryMatch> = self
199            .entries
200            .iter()
201            .filter_map(|entry| {
202                if let Some((score, indices)) = self.matcher.fuzzy_indices(&entry.command, query) {
203                    let schema_score =
204                        self.calculate_schema_match_score(entry, current_columns, current_source);
205                    Some(HistoryMatch {
206                        entry: entry.clone(),
207                        score: score + schema_score,
208                        indices,
209                    })
210                } else {
211                    None
212                }
213            })
214            .collect();
215
216        // Sort by score (descending), then by recency and frequency
217        matches.sort_by(|a, b| {
218            // Primary sort: fuzzy match score (including schema bonus)
219            let score_cmp = b.score.cmp(&a.score);
220            if score_cmp != std::cmp::Ordering::Equal {
221                return score_cmp;
222            }
223
224            // Secondary sort: execution count (more frequently used commands rank higher)
225            let count_cmp = b.entry.execution_count.cmp(&a.entry.execution_count);
226            if count_cmp != std::cmp::Ordering::Equal {
227                return count_cmp;
228            }
229
230            // Tertiary sort: recency (more recent commands rank higher)
231            b.entry.timestamp.cmp(&a.entry.timestamp)
232        });
233
234        matches.truncate(20); // Limit to top 20 matches
235        matches
236    }
237
238    fn calculate_schema_match_score(
239        &self,
240        entry: &HistoryEntry,
241        current_columns: &[String],
242        current_source: Option<&str>,
243    ) -> i64 {
244        let mut score = 0i64;
245
246        // Bonus for matching data source
247        if let (Some(entry_source), Some(current)) = (&entry.data_source, current_source) {
248            if entry_source == current {
249                score += 50; // High bonus for same data source
250            }
251        }
252
253        // Bonus for matching columns in schema
254        if !current_columns.is_empty() && !entry.schema_columns.is_empty() {
255            let matching_columns = entry
256                .schema_columns
257                .iter()
258                .filter(|col| current_columns.contains(col))
259                .count();
260
261            let total_columns = entry.schema_columns.len().max(current_columns.len());
262            if total_columns > 0 {
263                // Scale bonus based on percentage of matching columns
264                let match_percentage = (matching_columns * 100) / total_columns;
265                score += (match_percentage as i64) / 2; // Up to 50 points for perfect match
266            }
267        }
268
269        // Additional bonus for matching columns in query metadata
270        if let Some(metadata) = &entry.metadata {
271            let metadata_columns: Vec<&String> = metadata
272                .select_columns
273                .iter()
274                .chain(metadata.where_columns.iter())
275                .chain(metadata.order_by_columns.iter())
276                .collect();
277
278            let matching_metadata = metadata_columns
279                .iter()
280                .filter(|col| current_columns.contains(col))
281                .count();
282
283            if matching_metadata > 0 {
284                score += (matching_metadata as i64) * 5; // 5 points per matching column
285            }
286        }
287
288        score
289    }
290
291    fn extract_query_metadata(&self, query: &str) -> Option<QueryMetadata> {
292        let query_upper = query.to_uppercase();
293
294        // Determine query type
295        let query_type = if query_upper.starts_with("SELECT") {
296            "SELECT"
297        } else if query_upper.starts_with("INSERT") {
298            "INSERT"
299        } else if query_upper.starts_with("UPDATE") {
300            "UPDATE"
301        } else if query_upper.starts_with("DELETE") {
302            "DELETE"
303        } else {
304            "OTHER"
305        }
306        .to_string();
307
308        // Extract table names (simple regex-based approach)
309        let mut tables = Vec::new();
310        if let Some(from_idx) = query_upper.find(" FROM ") {
311            let after_from = &query[from_idx + 6..];
312            if let Some(end_idx) = after_from.find(|c: char| c == ' ' || c == '(' || c == ';') {
313                let table_name = after_from[..end_idx].trim().to_string();
314                if !table_name.is_empty() {
315                    tables.push(table_name);
316                }
317            }
318        }
319
320        // Extract columns from SELECT clause
321        let mut select_columns = Vec::new();
322        if query_type == "SELECT" {
323            if let Some(select_idx) = query_upper.find("SELECT ") {
324                let after_select = &query[select_idx + 7..];
325                if let Some(from_idx) = after_select.to_uppercase().find(" FROM") {
326                    let select_clause = &after_select[..from_idx];
327                    if !select_clause.trim().eq("*") {
328                        // Parse column names (simplified)
329                        for col in select_clause.split(',') {
330                            let col_name = col
331                                .trim()
332                                .split_whitespace()
333                                .next()
334                                .unwrap_or("")
335                                .trim_matches('"')
336                                .to_string();
337                            if !col_name.is_empty() {
338                                select_columns.push(col_name);
339                            }
340                        }
341                    }
342                }
343            }
344        }
345
346        // Extract columns from WHERE clause and functions used
347        let mut where_columns = Vec::new();
348        let mut functions_used = Vec::new();
349        if let Some(where_idx) = query_upper.find(" WHERE ") {
350            let after_where = &query[where_idx + 7..];
351
352            // Look for LINQ methods
353            let linq_methods = [
354                "Contains",
355                "StartsWith",
356                "EndsWith",
357                "Length",
358                "ToUpper",
359                "ToLower",
360                "IsNullOrEmpty",
361            ];
362            for method in &linq_methods {
363                if after_where.contains(method) {
364                    functions_used.push(method.to_string());
365                }
366            }
367
368            // Extract column names before operators or methods
369            // This is simplified - a proper parser would be better
370            let words: Vec<&str> = after_where
371                .split(|c: char| !c.is_alphanumeric() && c != '_')
372                .filter(|s| !s.is_empty())
373                .collect();
374
375            for (i, word) in words.iter().enumerate() {
376                // If next word is an operator or method, this might be a column
377                if i + 1 < words.len() {
378                    let next = words[i + 1];
379                    if linq_methods.contains(&next)
380                        || ["IS", "NOT", "LIKE", "BETWEEN"].contains(&next.to_uppercase().as_str())
381                    {
382                        where_columns.push(word.to_string());
383                    }
384                }
385            }
386        }
387
388        // Extract ORDER BY columns
389        let mut order_by_columns = Vec::new();
390        if let Some(order_idx) = query_upper.find(" ORDER BY ") {
391            let after_order = &query[order_idx + 10..];
392            let end_idx = after_order
393                .find(|c: char| c == ';' || c == ')')
394                .unwrap_or(after_order.len());
395            let order_clause = &after_order[..end_idx];
396
397            for col in order_clause.split(',') {
398                let col_name = col
399                    .trim()
400                    .split_whitespace()
401                    .next()
402                    .unwrap_or("")
403                    .trim_matches('"')
404                    .to_string();
405                if !col_name.is_empty()
406                    && col_name.to_uppercase() != "ASC"
407                    && col_name.to_uppercase() != "DESC"
408                {
409                    order_by_columns.push(col_name);
410                }
411            }
412        }
413
414        Some(QueryMetadata {
415            tables,
416            select_columns,
417            where_columns,
418            order_by_columns,
419            functions_used,
420            query_type,
421        })
422    }
423
424    pub fn get_recent(&self, limit: usize) -> Vec<&HistoryEntry> {
425        self.entries.iter().rev().take(limit).collect()
426    }
427
428    pub fn get_all(&self) -> &[HistoryEntry] {
429        &self.entries
430    }
431
432    pub fn get_last_entry(&self) -> Option<&HistoryEntry> {
433        self.entries.last()
434    }
435
436    /// Get session-only entries (from current run)
437    pub fn get_session_entries(&self) -> &[HistoryEntry] {
438        &self.session_entries
439    }
440
441    /// Get entries for navigation (session + starred from persistent)
442    pub fn get_navigation_entries(&self) -> Vec<HistoryEntry> {
443        let mut entries = self.session_entries.clone();
444
445        // Add starred entries from persistent history that aren't in session
446        for entry in &self.entries {
447            if entry.is_starred
448                && !self
449                    .session_entries
450                    .iter()
451                    .any(|e| e.command == entry.command)
452            {
453                entries.push(entry.clone());
454            }
455        }
456
457        // Sort by timestamp, most recent first
458        entries.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
459
460        // Deduplicate keeping most recent
461        let mut seen = std::collections::HashSet::new();
462        entries.retain(|e| seen.insert(e.command.clone()));
463
464        entries
465    }
466
467    /// Star/unstar a command
468    pub fn toggle_star(&mut self, command: &str) -> Result<()> {
469        // Find in entries and toggle
470        for entry in &mut self.entries {
471            if entry.command == command {
472                entry.is_starred = !entry.is_starred;
473                break;
474            }
475        }
476
477        // Also update session entries
478        for entry in &mut self.session_entries {
479            if entry.command == command {
480                entry.is_starred = !entry.is_starred;
481                break;
482            }
483        }
484
485        self.save_to_file()
486    }
487
488    pub fn clear(&mut self) -> Result<()> {
489        // SAFETY: Create backup before clearing
490        let current_count = self.entries.len();
491        if current_count > 0 {
492            eprintln!(
493                "[HISTORY WARNING] Clearing {} entries - creating backup",
494                current_count
495            );
496            if let Ok(content) = serde_json::to_string_pretty(&self.entries) {
497                self.protection.backup_before_write(&content, current_count);
498            }
499        }
500        self.entries.clear();
501        self.command_counts.clear();
502        self.save_to_file()?;
503        Ok(())
504    }
505
506    fn load_from_file(&mut self) -> Result<()> {
507        if !self.history_file.exists() {
508            eprintln!("[History] No history file found at {:?}", self.history_file);
509            return Ok(());
510        }
511
512        eprintln!("[History] Loading history from {:?}", self.history_file);
513        let content = fs::read_to_string(&self.history_file)?;
514        if content.trim().is_empty() {
515            eprintln!("[History] History file is empty");
516            return Ok(());
517        }
518
519        // Try to parse the history file
520        let entries: Vec<HistoryEntry> = match serde_json::from_str(&content) {
521            Ok(entries) => entries,
522            Err(e) => {
523                eprintln!("[History] ERROR: Failed to parse history file: {}", e);
524                eprintln!("[History] Attempting recovery from backup...");
525
526                // Try to recover from backup
527                if let Some(backup_content) = self.protection.recover_from_backup() {
528                    eprintln!("[History] Found backup, attempting to restore...");
529
530                    // Try to parse the backup
531                    match serde_json::from_str::<Vec<HistoryEntry>>(&backup_content) {
532                        Ok(backup_entries) => {
533                            eprintln!(
534                                "[History] Successfully recovered {} entries from backup",
535                                backup_entries.len()
536                            );
537
538                            // Save the recovered content to the main history file
539                            fs::write(&self.history_file, &backup_content)?;
540
541                            // Move the corrupted file for investigation
542                            let corrupted_path = self.history_file.with_extension("json.corrupted");
543                            let _ = fs::rename(
544                                &self.history_file.with_extension("json"),
545                                &corrupted_path,
546                            );
547                            eprintln!("[History] Corrupted file moved to {:?}", corrupted_path);
548
549                            backup_entries
550                        }
551                        Err(backup_err) => {
552                            eprintln!("[History] Backup also corrupted: {}", backup_err);
553                            eprintln!("[History] Starting with empty history");
554                            Vec::new()
555                        }
556                    }
557                } else {
558                    eprintln!("[History] No backup available, starting with empty history");
559
560                    // Move the corrupted file for investigation
561                    let corrupted_path = self.history_file.with_extension("json.corrupted");
562                    let _ = fs::copy(&self.history_file, &corrupted_path);
563                    eprintln!("[History] Corrupted file copied to {:?}", corrupted_path);
564
565                    Vec::new()
566                }
567            }
568        };
569        eprintln!(
570            "[History] Loaded {} entries from history file",
571            entries.len()
572        );
573        let original_count = entries.len();
574
575        // Deduplicate entries, keeping only the most recent of each command
576        // This cleans up any existing duplicates in the history file
577        let mut seen_commands = std::collections::HashSet::new();
578        let mut deduplicated = Vec::new();
579
580        // Process in reverse to keep the most recent version of each command
581        for entry in entries.into_iter().rev() {
582            if seen_commands.insert(entry.command.clone()) {
583                deduplicated.push(entry);
584            }
585        }
586
587        // Reverse back to chronological order
588        deduplicated.reverse();
589
590        // Sort by timestamp to ensure chronological order (oldest first)
591        deduplicated.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
592
593        // Log if we removed duplicates (only on first load, not every save)
594        let removed_count = original_count - deduplicated.len();
595        if removed_count > 0 {
596            eprintln!(
597                "[sql-cli] Cleaned {} duplicate commands from history",
598                removed_count
599            );
600        }
601
602        // Rebuild command counts
603        self.command_counts.clear();
604        for entry in &deduplicated {
605            *self
606                .command_counts
607                .entry(entry.command.clone())
608                .or_insert(0) = entry.execution_count;
609        }
610
611        self.entries = deduplicated;
612        eprintln!(
613            "[History] Final history contains {} unique entries",
614            self.entries.len()
615        );
616        Ok(())
617    }
618
619    fn save_to_file(&self) -> Result<()> {
620        // SAFETY: Validate before writing
621        let new_content = serde_json::to_string_pretty(&self.entries)?;
622        let new_count = self.entries.len();
623
624        // Get current file entry count for comparison
625        let old_count = if self.history_file.exists() {
626            if let Ok(existing) = fs::read_to_string(&self.history_file) {
627                existing.matches("\"command\":").count()
628            } else {
629                0
630            }
631        } else {
632            0
633        };
634
635        // Validate the write
636        if !self
637            .protection
638            .validate_write(old_count, new_count, &new_content)
639        {
640            eprintln!("[HISTORY PROTECTION] Write blocked! Attempting recovery from backup...");
641            if let Some(backup_content) = self.protection.recover_from_backup() {
642                fs::write(&self.history_file, backup_content)?;
643                return Ok(());
644            }
645            return Err(anyhow::anyhow!("History write validation failed"));
646        }
647
648        // Create backup before significant changes
649        if old_count > 0 && (old_count != new_count || old_count > 10) {
650            self.protection.backup_before_write(&new_content, new_count);
651        }
652
653        // Use atomic write to prevent corruption from partial writes
654        // Write to a temp file first, then rename it
655        let temp_file = self.history_file.with_extension("json.tmp");
656        fs::write(&temp_file, new_content)?;
657
658        // Atomic rename (on Unix, rename is atomic)
659        fs::rename(temp_file, &self.history_file)?;
660        Ok(())
661    }
662
663    /// Clean the history file by removing duplicates and rewriting it
664    /// This is called after loading to ensure the file stays clean
665    pub fn clean_and_save(&mut self) -> Result<()> {
666        // The entries are already deduplicated in memory after loading
667        // Just save them back to clean the file
668        self.save_to_file()?;
669        Ok(())
670    }
671
672    pub fn stats(&self) -> HistoryStats {
673        let total_commands = self.entries.len();
674        let unique_commands = self.command_counts.len();
675        let successful_commands = self.entries.iter().filter(|e| e.success).count();
676        let failed_commands = total_commands - successful_commands;
677
678        let most_used = self
679            .command_counts
680            .iter()
681            .max_by_key(|(_, &count)| count)
682            .map(|(cmd, &count)| (cmd.clone(), count));
683
684        HistoryStats {
685            total_commands,
686            unique_commands,
687            successful_commands,
688            failed_commands,
689            most_used_command: most_used,
690        }
691    }
692}
693
694#[derive(Debug)]
695pub struct HistoryStats {
696    pub total_commands: usize,
697    pub unique_commands: usize,
698    pub successful_commands: usize,
699    pub failed_commands: usize,
700    pub most_used_command: Option<(String, u32)>,
701}
702
703impl Clone for CommandHistory {
704    fn clone(&self) -> Self {
705        Self {
706            entries: self.entries.clone(),
707            history_file: self.history_file.clone(),
708            matcher: SkimMatcherV2::default(), // Create new matcher
709            command_counts: self.command_counts.clone(),
710            session_id: self.session_id.clone(),
711            session_entries: self.session_entries.clone(),
712            protection: HistoryProtection::new(
713                self.history_file
714                    .parent()
715                    .unwrap_or(std::path::Path::new("."))
716                    .join("history_backups"),
717            ),
718        }
719    }
720}
721
722impl Default for CommandHistory {
723    fn default() -> Self {
724        let session_id = format!("session_{}", Utc::now().timestamp_millis());
725        Self::new().unwrap_or_else(|_| {
726            let history_file =
727                AppPaths::history_file().unwrap_or_else(|_| PathBuf::from(".sql_cli_history.json"));
728            let backup_dir = history_file
729                .parent()
730                .unwrap_or(std::path::Path::new("."))
731                .join("history_backups");
732            Self {
733                entries: Vec::new(),
734                history_file,
735                matcher: SkimMatcherV2::default(),
736                command_counts: HashMap::new(),
737                session_id,
738                session_entries: Vec::new(),
739                protection: HistoryProtection::new(backup_dir),
740            }
741        })
742    }
743}