chasm_cli/commands/
harvest.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! Harvest commands for collecting chat sessions from multiple providers
4//!
5//! The harvester scans for local and remote LLM providers, downloads all chat
6//! sessions into a single SQLite database, and allows users to version-track
7//! the database with git.
8
9use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::storage::parse_session_json;
24use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
25
26/// Check if a string is an empty code block marker (just ``` with no content)
27fn is_empty_code_block(s: &str) -> bool {
28    // Match patterns like "```", "```\n", "```language", "```\n```", "```\n\n```"
29    let s = s.trim();
30    if s == "```" {
31        return true;
32    }
33    // Check for code block with just a language identifier and no content
34    if s.starts_with("```") && !s.contains('\n') {
35        return true;
36    }
37    // Check for empty code block with opening and closing (possibly with whitespace-only lines)
38    let lines: Vec<&str> = s.lines().collect();
39    if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
40        // Check if all lines between opening and closing are empty or whitespace
41        let content_lines = &lines[1..lines.len() - 1];
42        if content_lines.iter().all(|line| line.trim().is_empty()) {
43            return true;
44        }
45    }
46    false
47}
48
49/// Type alias for harvested session query result (id, provider, title, msg_count, created, last_msg, workspace)
50type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
51
52/// Web-based LLM provider endpoint configuration
53#[derive(Debug, Clone)]
54struct WebProviderEndpoint {
55    name: &'static str,
56    url: &'static str,
57    description: &'static str,
58}
59
60/// List of known web-based LLM provider endpoints to probe
61const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
62    WebProviderEndpoint {
63        name: "ChatGPT",
64        url: "https://chat.openai.com",
65        description: "OpenAI ChatGPT Web Interface",
66    },
67    WebProviderEndpoint {
68        name: "Claude",
69        url: "https://claude.ai",
70        description: "Anthropic Claude Web Interface",
71    },
72    WebProviderEndpoint {
73        name: "Gemini",
74        url: "https://gemini.google.com",
75        description: "Google Gemini Web Interface",
76    },
77    WebProviderEndpoint {
78        name: "Perplexity",
79        url: "https://www.perplexity.ai",
80        description: "Perplexity AI Search Interface",
81    },
82    WebProviderEndpoint {
83        name: "DeepSeek",
84        url: "https://chat.deepseek.com",
85        description: "DeepSeek Chat Interface",
86    },
87    WebProviderEndpoint {
88        name: "Poe",
89        url: "https://poe.com",
90        description: "Quora Poe Multi-model Chat",
91    },
92    WebProviderEndpoint {
93        name: "You.com",
94        url: "https://you.com/chat",
95        description: "You.com AI Chat",
96    },
97    WebProviderEndpoint {
98        name: "HuggingChat",
99        url: "https://huggingface.co/chat",
100        description: "HuggingFace Chat Interface",
101    },
102    WebProviderEndpoint {
103        name: "Copilot",
104        url: "https://copilot.microsoft.com",
105        description: "Microsoft Copilot Web Interface",
106    },
107    WebProviderEndpoint {
108        name: "Mistral",
109        url: "https://chat.mistral.ai",
110        description: "Mistral AI Le Chat Interface",
111    },
112    WebProviderEndpoint {
113        name: "Cohere",
114        url: "https://coral.cohere.com",
115        description: "Cohere Coral Chat Interface",
116    },
117    WebProviderEndpoint {
118        name: "Groq",
119        url: "https://groq.com",
120        description: "Groq Fast Inference Interface",
121    },
122    WebProviderEndpoint {
123        name: "Phind",
124        url: "https://www.phind.com",
125        description: "Phind AI Code Search",
126    },
127    WebProviderEndpoint {
128        name: "Character.AI",
129        url: "https://character.ai",
130        description: "Character.AI Chat Interface",
131    },
132    WebProviderEndpoint {
133        name: "Pi",
134        url: "https://pi.ai",
135        description: "Inflection Pi Personal AI",
136    },
137];
138
139/// Scan for reachable web-based LLM providers (parallel)
140fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
141    use std::sync::{Arc, Mutex};
142    use std::thread;
143
144    let reachable = Arc::new(Mutex::new(Vec::new()));
145    let results = Arc::new(Mutex::new(Vec::new()));
146
147    // Create threads for parallel scanning
148    let handles: Vec<_> = WEB_PROVIDERS
149        .iter()
150        .map(|provider| {
151            let reachable = Arc::clone(&reachable);
152            let results = Arc::clone(&results);
153            let timeout = timeout_secs;
154            let name = provider.name;
155            let url = provider.url;
156            let desc = provider.description;
157
158            thread::spawn(move || {
159                let client = match reqwest::blocking::Client::builder()
160                    .timeout(Duration::from_secs(timeout))
161                    .connect_timeout(Duration::from_secs(timeout.min(3))) // Short connect timeout
162                    .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
163                    .build()
164                {
165                    Ok(c) => c,
166                    Err(_) => return,
167                };
168
169                let result: (&str, bool, String, &str) = match client.head(url).send() {
170                    Ok(response) => {
171                        let status = response.status();
172                        if status.is_success() || status.is_redirection() {
173                            reachable.lock().unwrap().push(name.to_string());
174                            (name, true, desc.to_string(), url)
175                        } else {
176                            (name, false, format!("HTTP {}", status.as_u16()), url)
177                        }
178                    }
179                    Err(e) => {
180                        let reason = if e.is_timeout() {
181                            "timeout".to_string()
182                        } else if e.is_connect() {
183                            "connection failed".to_string()
184                        } else {
185                            "unreachable".to_string()
186                        };
187                        (name, false, reason, url)
188                    }
189                };
190
191                results.lock().unwrap().push((
192                    result.0.to_string(),
193                    result.1,
194                    result.2,
195                    result.3.to_string(),
196                ));
197            })
198        })
199        .collect();
200
201    // Wait for all threads to complete
202    for handle in handles {
203        let _ = handle.join();
204    }
205
206    // Print results in order
207    let results = results.lock().unwrap();
208    let mut sorted_results: Vec<_> = results.iter().collect();
209    sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
210
211    for (name, success, info, url) in sorted_results {
212        if *success {
213            println!(
214                "   {} {}: {} ({})",
215                "[+]".green(),
216                name.bold(),
217                "reachable".green(),
218                info.dimmed()
219            );
220            println!("      {} {}", "`".dimmed(), url.dimmed());
221        } else {
222            println!(
223                "   {} {}: {} ({})",
224                "[-]".dimmed(),
225                name,
226                "blocked or unavailable".dimmed(),
227                info.dimmed()
228            );
229        }
230    }
231
232    let result = reachable.lock().unwrap().clone();
233    result
234}
235
236/// Scan browser cookies for authenticated web LLM providers
237fn scan_browser_authentication(
238    verbose: bool,
239) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
240    use crate::browser::scan_browser_auth_verbose;
241    use std::collections::HashMap;
242
243    let installed = get_installed_browsers();
244    if installed.is_empty() {
245        println!("   {} No supported browsers found", "[-]".dimmed());
246        return (HashMap::new(), 0);
247    }
248
249    println!(
250        "   {} Checking {} browser(s): {}",
251        "[*]".blue(),
252        installed.len(),
253        installed
254            .iter()
255            .map(|b| b.name())
256            .collect::<Vec<_>>()
257            .join(", ")
258    );
259
260    let results = if verbose {
261        scan_browser_auth_verbose()
262    } else {
263        scan_browser_auth()
264    };
265
266    // Group results by provider
267    let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
268
269    for result in results {
270        if result.authenticated {
271            authenticated
272                .entry(result.provider.clone())
273                .or_default()
274                .push(result.browser);
275        }
276    }
277
278    let count = authenticated.len();
279
280    if authenticated.is_empty() {
281        println!(
282            "   {} No authenticated web LLM providers found",
283            "[-]".dimmed()
284        );
285        println!(
286            "      {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
287            "`".dimmed()
288        );
289    } else {
290        for (provider, browsers) in &authenticated {
291            let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
292            println!(
293                "   {} {}: {} in {}",
294                "[+]".green(),
295                provider.bold(),
296                "authenticated".green(),
297                browser_names.join(", ")
298            );
299        }
300    }
301
302    (authenticated, count)
303}
304
305/// Configuration for the harvest database
306#[allow(dead_code)]
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct HarvestConfig {
309    /// Path to the harvest database
310    pub db_path: PathBuf,
311    /// Auto-commit changes to git
312    pub auto_commit: bool,
313    /// Providers to include (empty = all)
314    pub include_providers: Vec<String>,
315    /// Providers to exclude
316    pub exclude_providers: Vec<String>,
317    /// Include VS Code workspaces
318    pub include_workspaces: bool,
319    /// Last harvest timestamp
320    pub last_harvest: Option<i64>,
321}
322
323#[allow(dead_code)]
324impl Default for HarvestConfig {
325    fn default() -> Self {
326        Self {
327            db_path: PathBuf::from("chat_sessions.db"),
328            auto_commit: false,
329            include_providers: Vec::new(),
330            exclude_providers: Vec::new(),
331            include_workspaces: true,
332            last_harvest: None,
333        }
334    }
335}
336
337/// Statistics from a harvest operation
338#[derive(Debug, Default)]
339pub struct HarvestStats {
340    pub providers_scanned: usize,
341    pub workspaces_scanned: usize,
342    pub sessions_found: usize,
343    pub sessions_added: usize,
344    pub sessions_updated: usize,
345    pub sessions_skipped: usize,
346    pub errors: Vec<String>,
347}
348
349/// A harvested session record
350#[allow(dead_code)]
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct HarvestedSession {
353    pub id: String,
354    pub provider: String,
355    pub provider_type: String,
356    pub workspace_id: Option<String>,
357    pub workspace_name: Option<String>,
358    pub title: String,
359    pub message_count: usize,
360    pub created_at: i64,
361    pub updated_at: i64,
362    pub harvested_at: i64,
363    pub session_json: String,
364}
365
366/// Initialize a harvest database at the specified path
367pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
368    let db_path = get_db_path(path)?;
369    let db_dir = db_path.parent().unwrap_or(Path::new("."));
370
371    println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
372    println!("{}", "=".repeat(60));
373
374    // Create directory if needed
375    if !db_dir.exists() {
376        fs::create_dir_all(db_dir)?;
377        println!("{} Created directory: {}", "[+]".green(), db_dir.display());
378    }
379
380    // Check if database already exists
381    if db_path.exists() {
382        println!(
383            "{} Database already exists: {}",
384            "[!]".yellow(),
385            db_path.display()
386        );
387        println!("   Use 'csm harvest run' to update it");
388        return Ok(());
389    }
390
391    // Create the database
392    create_harvest_database(&db_path)?;
393    println!("{} Created database: {}", "[+]".green(), db_path.display());
394
395    // Initialize git if requested
396    if git_init {
397        init_git_tracking(&db_path)?;
398    }
399
400    println!("\n{} Harvest database initialized!", "[+]".green().bold());
401    println!("\nNext steps:");
402    println!("  1. Run 'csm harvest scan' to see available providers");
403    println!("  2. Run 'csm harvest run' to collect sessions");
404    if !git_init {
405        println!("  3. Run 'csm harvest git init' to enable version tracking");
406    }
407
408    Ok(())
409}
410
411/// Scan for available providers and workspaces
412pub fn harvest_scan(
413    show_sessions: bool,
414    scan_web: bool,
415    timeout_secs: u64,
416    verbose: bool,
417) -> Result<()> {
418    println!("\n{} Scanning for Providers", "[H]".magenta().bold());
419    println!("{}", "=".repeat(60));
420
421    let registry = ProviderRegistry::new();
422    let mut total_sessions = 0;
423    let mut available_providers = Vec::new();
424
425    // Scan LLM providers
426    println!("\n{} LLM Providers:", "[*]".blue().bold());
427
428    let provider_types = vec![
429        ProviderType::Copilot,
430        ProviderType::Cursor,
431        ProviderType::Ollama,
432        ProviderType::Vllm,
433        ProviderType::Foundry,
434        ProviderType::LmStudio,
435        ProviderType::LocalAI,
436        ProviderType::TextGenWebUI,
437        ProviderType::Jan,
438        ProviderType::Gpt4All,
439        ProviderType::Llamafile,
440        ProviderType::OpenAI,
441        ProviderType::ChatGPT,
442        ProviderType::Anthropic,
443        ProviderType::Perplexity,
444        ProviderType::DeepSeek,
445        ProviderType::Gemini,
446    ];
447
448    for pt in &provider_types {
449        if let Some(provider) = registry.get_provider(*pt) {
450            let available = provider.is_available();
451            let session_count = if available {
452                provider.list_sessions().map(|s| s.len()).unwrap_or(0)
453            } else {
454                0
455            };
456
457            if available {
458                available_providers.push((*pt, session_count));
459                total_sessions += session_count;
460
461                let status = if session_count > 0 {
462                    format!(
463                        "{} {} sessions",
464                        "+".green(),
465                        session_count.to_string().cyan()
466                    )
467                } else {
468                    format!("{} no sessions", "+".green())
469                };
470
471                println!(
472                    "   {} {}: {}",
473                    "[+]".green(),
474                    provider.name().bold(),
475                    status
476                );
477
478                if show_sessions && session_count > 0 {
479                    if let Ok(sessions) = provider.list_sessions() {
480                        for session in sessions.iter().take(3) {
481                            println!("      {} {}", "`".dimmed(), session.title().dimmed());
482                        }
483                        if sessions.len() > 3 {
484                            println!("      {} ... and {} more", "`".dimmed(), sessions.len() - 3);
485                        }
486                    }
487                }
488
489                if let Some(path) = provider.sessions_path() {
490                    println!(
491                        "      {} {}",
492                        "`".dimmed(),
493                        path.display().to_string().dimmed()
494                    );
495                }
496            }
497        }
498    }
499
500    // Scan VS Code workspaces
501    println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
502
503    match discover_workspaces() {
504        Ok(workspaces) => {
505            let workspaces_with_sessions: Vec<_> = workspaces
506                .iter()
507                .filter(|ws| ws.chat_session_count > 0)
508                .collect();
509
510            let ws_sessions: usize = workspaces_with_sessions
511                .iter()
512                .map(|ws| ws.chat_session_count)
513                .sum();
514
515            println!(
516                "   {} {} workspaces with {} sessions",
517                "[+]".green(),
518                workspaces_with_sessions.len().to_string().cyan(),
519                ws_sessions.to_string().cyan()
520            );
521
522            if show_sessions {
523                for ws in workspaces_with_sessions.iter().take(5) {
524                    let name = ws
525                        .project_path
526                        .clone()
527                        .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
528                    println!(
529                        "      {} {} ({} sessions)",
530                        "`".dimmed(),
531                        name.dimmed(),
532                        ws.chat_session_count
533                    );
534                }
535                if workspaces_with_sessions.len() > 5 {
536                    println!(
537                        "      {} ... and {} more workspaces",
538                        "`".dimmed(),
539                        workspaces_with_sessions.len() - 5
540                    );
541                }
542            }
543
544            total_sessions += ws_sessions;
545        }
546        Err(e) => {
547            println!("   {} Failed to scan workspaces: {}", "[!]".yellow(), e);
548        }
549    }
550
551    // Scan web-based LLM providers if requested
552    let mut web_providers_found = Vec::new();
553    let mut authenticated_count = 0;
554    if scan_web {
555        // First check browser authentication (no network requests)
556        println!("\n{} Browser Authentication:", "[*]".blue().bold());
557        let (auth_results, auth_count) = scan_browser_authentication(verbose);
558        authenticated_count = auth_count;
559
560        // Then probe web endpoints
561        println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
562        web_providers_found = scan_web_providers(timeout_secs);
563
564        // Show which authenticated providers are reachable
565        if !auth_results.is_empty() {
566            println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
567            for (provider, browsers) in &auth_results {
568                let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
569                let reachable = web_providers_found.iter().any(|p| p == provider);
570                let status = if reachable {
571                    format!("{} (reachable)", "ready to harvest".green())
572                } else {
573                    format!("{}", "authenticated but endpoint blocked".yellow())
574                };
575                println!(
576                    "   {} {}: {} via {}",
577                    "[+]".green(),
578                    provider.bold(),
579                    status,
580                    browser_list.join(", ").dimmed()
581                );
582            }
583        }
584    }
585
586    // Summary
587    println!("\n{} Summary:", "[*]".green().bold());
588    println!(
589        "   {} local providers available",
590        available_providers.len().to_string().cyan()
591    );
592    if scan_web {
593        println!(
594            "   {} web providers reachable",
595            web_providers_found.len().to_string().cyan()
596        );
597        println!(
598            "   {} web providers authenticated",
599            authenticated_count.to_string().cyan()
600        );
601    }
602    println!(
603        "   {} total sessions to harvest",
604        total_sessions.to_string().cyan()
605    );
606
607    Ok(())
608}
609
610/// Run the harvest operation
611pub fn harvest_run(
612    path: Option<&str>,
613    providers: Option<&[String]>,
614    exclude: Option<&[String]>,
615    incremental: bool,
616    auto_commit: bool,
617    message: Option<&str>,
618) -> Result<()> {
619    let db_path = get_db_path(path)?;
620
621    println!("\n{} Running Harvest", "[H]".magenta().bold());
622    println!("{}", "=".repeat(60));
623
624    // Ensure database exists
625    if !db_path.exists() {
626        println!("{} Database not found, creating...", "[*]".blue());
627        create_harvest_database(&db_path)?;
628    }
629
630    let conn = Connection::open(&db_path)?;
631    let mut stats = HarvestStats::default();
632
633    // Get last harvest time for incremental updates
634    let last_harvest: Option<i64> = if incremental {
635        conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
636            row.get(0)
637        })
638        .ok()
639    } else {
640        None
641    };
642
643    if let Some(ts) = last_harvest {
644        let dt = DateTime::from_timestamp_millis(ts)
645            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
646            .unwrap_or_default();
647        println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
648    }
649
650    let registry = ProviderRegistry::new();
651    let include_providers = providers.map(|p| p.to_vec());
652    let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
653
654    // Harvest from LLM providers
655    println!("\n{} Harvesting from providers...", "[*]".blue());
656
657    let provider_types = vec![
658        ProviderType::Copilot,
659        ProviderType::Cursor,
660        ProviderType::Ollama,
661        ProviderType::Vllm,
662        ProviderType::Foundry,
663        ProviderType::LmStudio,
664        ProviderType::LocalAI,
665        ProviderType::TextGenWebUI,
666        ProviderType::Jan,
667        ProviderType::Gpt4All,
668        ProviderType::Llamafile,
669    ];
670
671    for pt in &provider_types {
672        let provider_name = pt.display_name().to_lowercase();
673
674        // Check include/exclude filters
675        if let Some(ref include) = include_providers {
676            if !include
677                .iter()
678                .any(|p| provider_name.contains(&p.to_lowercase()))
679            {
680                continue;
681            }
682        }
683        if exclude_providers
684            .iter()
685            .any(|p| provider_name.contains(&p.to_lowercase()))
686        {
687            continue;
688        }
689
690        if let Some(provider) = registry.get_provider(*pt) {
691            if !provider.is_available() {
692                continue;
693            }
694
695            stats.providers_scanned += 1;
696
697            match provider.list_sessions() {
698                Ok(sessions) => {
699                    for session in sessions {
700                        stats.sessions_found += 1;
701
702                        // Check if session should be skipped (incremental)
703                        if let Some(last) = last_harvest {
704                            if session.last_message_date <= last {
705                                stats.sessions_skipped += 1;
706                                continue;
707                            }
708                        }
709
710                        match insert_or_update_session(
711                            &conn,
712                            &session,
713                            pt.display_name(),
714                            None,
715                            None,
716                        ) {
717                            Ok(updated) => {
718                                if updated {
719                                    stats.sessions_updated += 1;
720                                } else {
721                                    stats.sessions_added += 1;
722                                }
723                            }
724                            Err(e) => {
725                                stats.errors.push(format!("{}: {}", session.title(), e));
726                            }
727                        }
728                    }
729
730                    let session_count = stats.sessions_added + stats.sessions_updated;
731                    if session_count > 0 {
732                        println!(
733                            "   {} {}: {} sessions",
734                            "[+]".green(),
735                            provider.name(),
736                            session_count.to_string().cyan()
737                        );
738                    }
739                }
740                Err(e) => {
741                    stats.errors.push(format!("{}: {}", provider.name(), e));
742                }
743            }
744        }
745    }
746
747    // Harvest from VS Code workspaces
748    if include_providers.is_none()
749        || include_providers
750            .as_ref()
751            .map(|p| {
752                p.iter()
753                    .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
754            })
755            .unwrap_or(false)
756    {
757        println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
758
759        if let Ok(workspaces) = discover_workspaces() {
760            for ws in &workspaces {
761                if ws.chat_session_count == 0 {
762                    continue;
763                }
764
765                stats.workspaces_scanned += 1;
766
767                if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
768                    for swp in sessions {
769                        stats.sessions_found += 1;
770
771                        // Check if session should be skipped (incremental)
772                        if let Some(last) = last_harvest {
773                            if swp.session.last_message_date <= last {
774                                stats.sessions_skipped += 1;
775                                continue;
776                            }
777                        }
778
779                        let ws_name = ws.project_path.clone();
780
781                        match insert_or_update_session(
782                            &conn,
783                            &swp.session,
784                            "GitHub Copilot",
785                            Some(&ws.hash),
786                            ws_name.as_deref(),
787                        ) {
788                            Ok(updated) => {
789                                if updated {
790                                    stats.sessions_updated += 1;
791                                } else {
792                                    stats.sessions_added += 1;
793                                }
794                            }
795                            Err(e) => {
796                                stats.errors.push(format!("{}: {}", swp.session.title(), e));
797                            }
798                        }
799                    }
800                }
801            }
802
803            println!(
804                "   {} Workspaces: {} scanned",
805                "[+]".green(),
806                stats.workspaces_scanned.to_string().cyan()
807            );
808        }
809    }
810
811    // Harvest from web-based cloud providers (ChatGPT, Claude, etc.)
812    let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
813    harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
814
815    // Update metadata
816    update_harvest_metadata(&conn)?;
817
818    // Print summary
819    println!("\n{} Harvest Complete:", "[+]".green().bold());
820    println!(
821        "   {} providers scanned",
822        stats.providers_scanned.to_string().cyan()
823    );
824    println!(
825        "   {} workspaces scanned",
826        stats.workspaces_scanned.to_string().cyan()
827    );
828    println!(
829        "   {} sessions found",
830        stats.sessions_found.to_string().cyan()
831    );
832    println!(
833        "   {} sessions added",
834        stats.sessions_added.to_string().green()
835    );
836    println!(
837        "   {} sessions updated",
838        stats.sessions_updated.to_string().yellow()
839    );
840    if stats.sessions_skipped > 0 {
841        println!(
842            "   {} sessions skipped (unchanged)",
843            stats.sessions_skipped.to_string().dimmed()
844        );
845    }
846
847    if !stats.errors.is_empty() {
848        println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
849        for (i, err) in stats.errors.iter().take(5).enumerate() {
850            println!("   {}. {}", i + 1, err);
851        }
852        if stats.errors.len() > 5 {
853            println!("   ... and {} more errors", stats.errors.len() - 5);
854        }
855    }
856
857    // Auto-commit if requested
858    if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
859        println!("\n{} Auto-committing changes...", "[*]".blue());
860        let commit_msg = message.unwrap_or("Harvest: update chat sessions");
861        if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
862            println!("{} Git commit failed: {}", "[!]".yellow(), e);
863        } else {
864            println!("{} Changes committed", "[+]".green());
865        }
866    }
867
868    println!("\nDatabase: {}", db_path.display());
869
870    Ok(())
871}
872
873/// Show harvest database status
874pub fn harvest_status(path: Option<&str>) -> Result<()> {
875    let db_path = get_db_path(path)?;
876
877    println!("\n{} Harvest Database Status", "[H]".magenta().bold());
878    println!("{}", "=".repeat(60));
879
880    if !db_path.exists() {
881        println!(
882            "{} Database not found: {}",
883            "[!]".yellow(),
884            db_path.display()
885        );
886        println!("   Run 'csm harvest init' to create one");
887        return Ok(());
888    }
889
890    let conn = Connection::open(&db_path)?;
891
892    // Get session counts by provider
893    let mut stmt = conn.prepare(
894        "SELECT provider, COUNT(*) as count, SUM(message_count) as messages 
895         FROM sessions 
896         GROUP BY provider 
897         ORDER BY count DESC",
898    )?;
899
900    let provider_stats: Vec<(String, i64, i64)> = stmt
901        .query_map([], |row| {
902            Ok((
903                row.get(0)?,
904                row.get(1)?,
905                row.get::<_, Option<i64>>(2)?.unwrap_or(0),
906            ))
907        })?
908        .filter_map(|r| r.ok())
909        .collect();
910
911    // Get total counts
912    let total_sessions: i64 = conn
913        .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
914        .unwrap_or(0);
915
916    let total_messages: i64 = conn
917        .query_row(
918            "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
919            [],
920            |row| row.get(0),
921        )
922        .unwrap_or(0);
923
924    // Get last harvest time
925    let last_harvest: Option<i64> = conn
926        .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
927            row.get(0)
928        })
929        .ok()
930        .flatten();
931
932    // Get oldest and newest sessions
933    let oldest: Option<i64> = conn
934        .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
935        .ok()
936        .flatten();
937
938    let newest: Option<i64> = conn
939        .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
940        .ok()
941        .flatten();
942
943    println!("{} Database: {}", "[*]".blue(), db_path.display());
944    println!(
945        "{} Total Sessions: {}",
946        "[*]".blue(),
947        total_sessions.to_string().cyan()
948    );
949    println!(
950        "{} Total Messages: {}",
951        "[*]".blue(),
952        total_messages.to_string().cyan()
953    );
954
955    if let Some(ts) = last_harvest {
956        let dt = DateTime::from_timestamp_millis(ts)
957            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
958            .unwrap_or_else(|| "Unknown".to_string());
959        println!("{} Last Harvest: {}", "[*]".blue(), dt);
960    }
961
962    if let (Some(old), Some(new)) = (oldest, newest) {
963        let old_dt = DateTime::from_timestamp_millis(old)
964            .map(|d| d.format("%Y-%m-%d").to_string())
965            .unwrap_or_default();
966        let new_dt = DateTime::from_timestamp_millis(new)
967            .map(|d| d.format("%Y-%m-%d").to_string())
968            .unwrap_or_default();
969        println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
970    }
971
972    if !provider_stats.is_empty() {
973        println!("\n{} Sessions by Provider:", "[*]".blue());
974        for (provider, count, messages) in &provider_stats {
975            println!(
976                "   {} {}: {} sessions, {} messages",
977                "[+]".green(),
978                provider.bold(),
979                count.to_string().cyan(),
980                messages.to_string().dimmed()
981            );
982        }
983    }
984
985    // Check git status
986    let db_dir = db_path.parent().unwrap_or(Path::new("."));
987    if db_dir.join(".git").exists()
988        || db_path
989            .parent()
990            .map(|p| p.join(".git").exists())
991            .unwrap_or(false)
992    {
993        println!("\n{} Git Status:", "[*]".blue());
994
995        let output = Command::new("git")
996            .current_dir(db_dir)
997            .args([
998                "status",
999                "--porcelain",
1000                db_path.file_name().unwrap().to_str().unwrap(),
1001            ])
1002            .output();
1003
1004        match output {
1005            Ok(out) => {
1006                let status = String::from_utf8_lossy(&out.stdout);
1007                if status.is_empty() {
1008                    println!("   {} No uncommitted changes", "[+]".green());
1009                } else {
1010                    println!("   {} Uncommitted changes detected", "[!]".yellow());
1011                    println!("   Run 'csm harvest git commit' to save changes");
1012                }
1013            }
1014            Err(_) => {
1015                println!("   {} Unable to check git status", "[!]".yellow());
1016            }
1017        }
1018    } else {
1019        println!("\n{} Git tracking not enabled", "[i]".dimmed());
1020        println!("   Run 'csm harvest git init' to enable version tracking");
1021    }
1022
1023    Ok(())
1024}
1025
1026/// List sessions in the harvest database
1027pub fn harvest_list(
1028    path: Option<&str>,
1029    provider: Option<&str>,
1030    limit: usize,
1031    search: Option<&str>,
1032) -> Result<()> {
1033    let db_path = get_db_path(path)?;
1034
1035    if !db_path.exists() {
1036        println!(
1037            "{} Database not found: {}",
1038            "[!]".yellow(),
1039            db_path.display()
1040        );
1041        return Ok(());
1042    }
1043
1044    let conn = Connection::open(&db_path)?;
1045
1046    let mut query = String::from(
1047        "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name 
1048         FROM sessions WHERE 1=1",
1049    );
1050    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1051
1052    if let Some(p) = provider {
1053        query.push_str(" AND LOWER(provider) LIKE ?");
1054        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1055    }
1056
1057    if let Some(s) = search {
1058        query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1059        let pattern = format!("%{}%", s.to_lowercase());
1060        params_vec.push(Box::new(pattern.clone()));
1061        params_vec.push(Box::new(pattern));
1062    }
1063
1064    query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1065    params_vec.push(Box::new(limit as i64));
1066
1067    let mut stmt = conn.prepare(&query)?;
1068
1069    // Build params slice
1070    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1071
1072    let sessions: Vec<HarvestQueryResult> = stmt
1073        .query_map(params_slice.as_slice(), |row| {
1074            Ok((
1075                row.get(0)?,
1076                row.get(1)?,
1077                row.get(2)?,
1078                row.get(3)?,
1079                row.get(4)?,
1080                row.get(5)?,
1081                row.get(6)?,
1082            ))
1083        })?
1084        .filter_map(|r| r.ok())
1085        .collect();
1086
1087    println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1088    println!("{}", "=".repeat(60));
1089
1090    if sessions.is_empty() {
1091        println!("{} No sessions found", "[i]".dimmed());
1092        return Ok(());
1093    }
1094
1095    for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1096        let date = DateTime::from_timestamp_millis(*updated)
1097            .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1098            .unwrap_or_default();
1099
1100        println!("\n{} {}", "[S]".cyan(), title.bold());
1101        println!("   ID: {}", &id[..16.min(id.len())].dimmed());
1102        println!(
1103            "   Provider: {} | Messages: {} | Updated: {}",
1104            prov.cyan(),
1105            msg_count.to_string().green(),
1106            date.dimmed()
1107        );
1108        if let Some(ws) = ws_name {
1109            println!("   Workspace: {}", ws.dimmed());
1110        }
1111    }
1112
1113    println!(
1114        "\n{} Showing {} of available sessions",
1115        "[i]".dimmed(),
1116        sessions.len()
1117    );
1118
1119    Ok(())
1120}
1121
1122/// Export sessions from the harvest database
1123pub fn harvest_export(
1124    path: Option<&str>,
1125    output: &str,
1126    format: &str,
1127    provider: Option<&str>,
1128    session_ids: Option<&[String]>,
1129) -> Result<()> {
1130    let db_path = get_db_path(path)?;
1131    let output_path = PathBuf::from(output);
1132
1133    if !db_path.exists() {
1134        anyhow::bail!("Database not found: {}", db_path.display());
1135    }
1136
1137    let conn = Connection::open(&db_path)?;
1138
1139    println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1140    println!("{}", "=".repeat(60));
1141
1142    // Build query
1143    let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1144    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1145
1146    if let Some(p) = provider {
1147        query.push_str(" AND LOWER(provider) LIKE ?");
1148        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1149    }
1150
1151    if let Some(ids) = session_ids {
1152        let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1153        query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1154        for id in ids {
1155            params_vec.push(Box::new(id.clone()));
1156        }
1157    }
1158
1159    let mut stmt = conn.prepare(&query)?;
1160    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1161
1162    let sessions: Vec<String> = stmt
1163        .query_map(params_slice.as_slice(), |row| row.get(0))?
1164        .filter_map(|r| r.ok())
1165        .collect();
1166
1167    if sessions.is_empty() {
1168        println!("{} No sessions to export", "[i]".dimmed());
1169        return Ok(());
1170    }
1171
1172    // Create output directory if needed
1173    if let Some(parent) = output_path.parent() {
1174        if !parent.exists() {
1175            fs::create_dir_all(parent)?;
1176        }
1177    }
1178
1179    match format.to_lowercase().as_str() {
1180        "json" => {
1181            // Export as JSON array
1182            let parsed: Vec<serde_json::Value> = sessions
1183                .iter()
1184                .filter_map(|s| serde_json::from_str(s).ok())
1185                .collect();
1186            let json_output = serde_json::to_string_pretty(&parsed)?;
1187            fs::write(&output_path, json_output)?;
1188        }
1189        "jsonl" => {
1190            // Export as JSON Lines
1191            let content: String = sessions
1192                .iter()
1193                .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1194                .map(|v| serde_json::to_string(&v).unwrap_or_default())
1195                .collect::<Vec<_>>()
1196                .join("\n");
1197            fs::write(&output_path, content)?;
1198        }
1199        "md" | "markdown" => {
1200            // Export as Markdown
1201            let mut md_content = String::from("# Chat Sessions Export\n\n");
1202            md_content.push_str(&format!(
1203                "Exported: {}\n\n",
1204                Utc::now().format("%Y-%m-%d %H:%M:%S")
1205            ));
1206
1207            for session_json in &sessions {
1208                if let Ok(session) = parse_session_json(session_json) {
1209                    md_content.push_str(&format!("## {}\n\n", session.title()));
1210                    md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1211
1212                    for request in &session.requests {
1213                        if let Some(msg) = &request.message {
1214                            if let Some(text) = &msg.text {
1215                                md_content.push_str(&format!("### User\n\n{}\n\n", text));
1216                            }
1217                        }
1218                        // Extract response text from the JSON value
1219                        if let Some(response) = &request.response {
1220                            let response_text = response
1221                                .get("text")
1222                                .and_then(|v| v.as_str())
1223                                .or_else(|| response.get("content").and_then(|v| v.as_str()))
1224                                .or_else(|| {
1225                                    // Try to get from value array (older format)
1226                                    response
1227                                        .get("value")
1228                                        .and_then(|v| v.as_array())
1229                                        .and_then(|arr| arr.first())
1230                                        .and_then(|v| v.get("value"))
1231                                        .and_then(|v| v.as_str())
1232                                });
1233
1234                            if let Some(text) = response_text {
1235                                md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1236                            }
1237                        }
1238                    }
1239                    md_content.push_str("---\n\n");
1240                }
1241            }
1242            fs::write(&output_path, md_content)?;
1243        }
1244        _ => {
1245            anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1246        }
1247    }
1248
1249    println!(
1250        "{} Exported {} sessions to {}",
1251        "[+]".green(),
1252        sessions.len().to_string().cyan(),
1253        output_path.display()
1254    );
1255
1256    Ok(())
1257}
1258
1259/// Git operations for harvest database
1260pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1261    let db_path = get_db_path(path)?;
1262    init_git_tracking(&db_path)
1263}
1264
1265pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1266    let db_path = get_db_path(path)?;
1267    let msg = message.unwrap_or("Update harvest database");
1268    git_commit_harvest(&db_path, msg)
1269}
1270
1271pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1272    let db_path = get_db_path(path)?;
1273    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1274    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1275
1276    println!("\n{} Harvest Git History", "[H]".magenta().bold());
1277    println!("{}", "=".repeat(60));
1278
1279    let output = Command::new("git")
1280        .current_dir(db_dir)
1281        .args([
1282            "log",
1283            "--oneline",
1284            &format!("-{}", count),
1285            "--follow",
1286            "--",
1287            db_name,
1288        ])
1289        .output()
1290        .context("Failed to run git log")?;
1291
1292    if !output.status.success() {
1293        let stderr = String::from_utf8_lossy(&output.stderr);
1294        if stderr.contains("does not have any commits") {
1295            println!("{} No commits yet", "[i]".dimmed());
1296        } else {
1297            anyhow::bail!("Git log failed: {}", stderr);
1298        }
1299        return Ok(());
1300    }
1301
1302    let log = String::from_utf8_lossy(&output.stdout);
1303    if log.trim().is_empty() {
1304        println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1305    } else {
1306        for line in log.lines() {
1307            let parts: Vec<&str> = line.splitn(2, ' ').collect();
1308            if parts.len() == 2 {
1309                println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1310            } else {
1311                println!("{}", line);
1312            }
1313        }
1314    }
1315
1316    Ok(())
1317}
1318
1319pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1320    let db_path = get_db_path(path)?;
1321    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1322    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1323
1324    println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1325    println!("{}", "=".repeat(60));
1326
1327    let mut args = vec!["diff", "--stat"];
1328    if let Some(c) = commit {
1329        args.push(c);
1330    }
1331    args.push("--");
1332    args.push(db_name);
1333
1334    let output = Command::new("git")
1335        .current_dir(db_dir)
1336        .args(&args)
1337        .output()
1338        .context("Failed to run git diff")?;
1339
1340    let diff = String::from_utf8_lossy(&output.stdout);
1341    if diff.trim().is_empty() {
1342        println!("{} No changes", "[+]".green());
1343    } else {
1344        println!("{}", diff);
1345    }
1346
1347    Ok(())
1348}
1349
1350pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1351    let db_path = get_db_path(path)?;
1352    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1353    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1354
1355    println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1356    println!("{}", "=".repeat(60));
1357
1358    // Create backup first
1359    let backup_path = db_path.with_extension("db.backup");
1360    if db_path.exists() {
1361        fs::copy(&db_path, &backup_path)?;
1362        println!(
1363            "{} Created backup: {}",
1364            "[+]".green(),
1365            backup_path.display()
1366        );
1367    }
1368
1369    // Restore from commit
1370    let output = Command::new("git")
1371        .current_dir(db_dir)
1372        .args(["checkout", commit, "--", db_name])
1373        .output()
1374        .context("Failed to run git checkout")?;
1375
1376    if !output.status.success() {
1377        let stderr = String::from_utf8_lossy(&output.stderr);
1378        anyhow::bail!("Git restore failed: {}", stderr);
1379    }
1380
1381    println!(
1382        "{} Restored database from commit: {}",
1383        "[+]".green(),
1384        commit
1385    );
1386
1387    Ok(())
1388}
1389
1390// ============================================================================
1391// Helper Functions
1392// ============================================================================
1393
1394fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1395    if let Some(p) = path {
1396        return Ok(PathBuf::from(p));
1397    }
1398
1399    // Check environment variable
1400    if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1401        return Ok(PathBuf::from(p));
1402    }
1403
1404    // Default to current directory
1405    Ok(std::env::current_dir()?.join("chat_sessions.db"))
1406}
1407
1408fn create_harvest_database(path: &Path) -> Result<()> {
1409    let conn = Connection::open(path)?;
1410
1411    conn.execute_batch(
1412        r#"
1413        -- Sessions table (original harvest format)
1414        CREATE TABLE IF NOT EXISTS sessions (
1415            id TEXT PRIMARY KEY,
1416            provider TEXT NOT NULL,
1417            provider_type TEXT,
1418            workspace_id TEXT,
1419            workspace_name TEXT,
1420            title TEXT NOT NULL,
1421            message_count INTEGER DEFAULT 0,
1422            created_at INTEGER NOT NULL,
1423            updated_at INTEGER NOT NULL,
1424            harvested_at INTEGER NOT NULL,
1425            session_json TEXT NOT NULL
1426        );
1427
1428        CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1429        CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1430        CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1431        
1432        -- Enhanced messages table with raw markdown and metadata
1433        CREATE TABLE IF NOT EXISTS messages_v2 (
1434            id INTEGER PRIMARY KEY AUTOINCREMENT,
1435            session_id TEXT NOT NULL,
1436            message_index INTEGER NOT NULL,
1437            request_id TEXT,
1438            response_id TEXT,
1439            role TEXT NOT NULL,
1440            content_raw TEXT NOT NULL,
1441            content_markdown TEXT,
1442            model_id TEXT,
1443            timestamp INTEGER,
1444            is_canceled INTEGER DEFAULT 0,
1445            metadata_json TEXT,
1446            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1447            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1448            UNIQUE(session_id, message_index, role)
1449        );
1450        
1451        CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1452        CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1453        CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1454        
1455        -- Tool invocations within messages (file edits, terminal commands, etc.)
1456        CREATE TABLE IF NOT EXISTS tool_invocations (
1457            id INTEGER PRIMARY KEY AUTOINCREMENT,
1458            message_id INTEGER NOT NULL,
1459            session_id TEXT NOT NULL,
1460            tool_name TEXT NOT NULL,
1461            tool_call_id TEXT,
1462            invocation_index INTEGER DEFAULT 0,
1463            input_json TEXT,
1464            output_json TEXT,
1465            status TEXT DEFAULT 'pending',
1466            is_confirmed INTEGER DEFAULT 0,
1467            timestamp INTEGER,
1468            FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1469            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1470        );
1471        
1472        CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1473        CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1474        CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1475        
1476        -- File changes/diffs associated with tool invocations
1477        CREATE TABLE IF NOT EXISTS file_changes (
1478            id INTEGER PRIMARY KEY AUTOINCREMENT,
1479            tool_invocation_id INTEGER,
1480            session_id TEXT NOT NULL,
1481            message_index INTEGER,
1482            file_path TEXT NOT NULL,
1483            change_type TEXT NOT NULL,
1484            old_content TEXT,
1485            new_content TEXT,
1486            diff_unified TEXT,
1487            line_start INTEGER,
1488            line_end INTEGER,
1489            timestamp INTEGER,
1490            FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1491            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1492        );
1493        
1494        CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1495        CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1496        CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1497        
1498        -- Message-level checkpoints for versioning
1499        CREATE TABLE IF NOT EXISTS message_checkpoints (
1500            id INTEGER PRIMARY KEY AUTOINCREMENT,
1501            session_id TEXT NOT NULL,
1502            message_index INTEGER NOT NULL,
1503            checkpoint_number INTEGER NOT NULL,
1504            content_hash TEXT NOT NULL,
1505            snapshot_json TEXT,
1506            file_state_json TEXT,
1507            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1508            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1509            UNIQUE(session_id, message_index, checkpoint_number)
1510        );
1511        
1512        CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1513        CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1514
1515        -- Messages table for full-text search
1516        CREATE TABLE IF NOT EXISTS messages (
1517            id INTEGER PRIMARY KEY AUTOINCREMENT,
1518            session_id TEXT NOT NULL,
1519            message_index INTEGER NOT NULL,
1520            role TEXT NOT NULL,
1521            content TEXT NOT NULL,
1522            created_at INTEGER,
1523            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1524            UNIQUE(session_id, message_index)
1525        );
1526        
1527        CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1528        CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1529        
1530        -- Checkpoints for version tracking
1531        CREATE TABLE IF NOT EXISTS checkpoints (
1532            id INTEGER PRIMARY KEY AUTOINCREMENT,
1533            session_id TEXT NOT NULL,
1534            checkpoint_number INTEGER NOT NULL,
1535            message TEXT,
1536            message_count INTEGER NOT NULL,
1537            content_hash TEXT NOT NULL,
1538            snapshot TEXT,
1539            created_at TEXT NOT NULL DEFAULT (datetime('now')),
1540            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1541            UNIQUE(session_id, checkpoint_number)
1542        );
1543        
1544        CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1545        
1546        -- Share links for importing shared conversations
1547        CREATE TABLE IF NOT EXISTS share_links (
1548            id TEXT PRIMARY KEY,
1549            session_id TEXT,
1550            provider TEXT NOT NULL,
1551            url TEXT NOT NULL UNIQUE,
1552            share_id TEXT NOT NULL,
1553            title TEXT,
1554            imported INTEGER DEFAULT 0,
1555            imported_at INTEGER,
1556            created_at INTEGER NOT NULL,
1557            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1558        );
1559        
1560        CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1561        CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1562
1563        -- Harvest metadata
1564        CREATE TABLE IF NOT EXISTS harvest_metadata (
1565            key TEXT PRIMARY KEY,
1566            value TEXT NOT NULL
1567        );
1568
1569        INSERT OR REPLACE INTO harvest_metadata (key, value) 
1570        VALUES ('version', '2.1'),
1571               ('created_at', datetime('now'));
1572               
1573        -- Full-text search for messages (standalone FTS table)
1574        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1575            content_raw
1576        );
1577        
1578        -- Triggers to keep FTS index in sync with messages_v2
1579        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1580            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1581        END;
1582        
1583        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1584            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1585        END;
1586        
1587        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1588            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1589            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1590        END;
1591        "#,
1592    )?;
1593
1594    Ok(())
1595}
1596
1597fn insert_or_update_session(
1598    conn: &Connection,
1599    session: &ChatSession,
1600    provider: &str,
1601    workspace_id: Option<&str>,
1602    workspace_name: Option<&str>,
1603) -> Result<bool> {
1604    let session_id = session
1605        .session_id
1606        .clone()
1607        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1608
1609    let now = Utc::now().timestamp_millis();
1610    let session_json = serde_json::to_string(session)?;
1611
1612    // Check if session exists
1613    let existing: Option<i64> = conn
1614        .query_row(
1615            "SELECT updated_at FROM sessions WHERE id = ?",
1616            [&session_id],
1617            |row| row.get(0),
1618        )
1619        .ok();
1620
1621    let updated = existing.is_some();
1622
1623    conn.execute(
1624        r#"
1625        INSERT OR REPLACE INTO sessions 
1626        (id, provider, provider_type, workspace_id, workspace_name, title, 
1627         message_count, created_at, updated_at, harvested_at, session_json)
1628        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1629        "#,
1630        params![
1631            session_id,
1632            provider,
1633            provider,
1634            workspace_id,
1635            workspace_name,
1636            session.title(),
1637            session.request_count() as i64,
1638            session.creation_date,
1639            session.last_message_date,
1640            now,
1641            session_json,
1642        ],
1643    )?;
1644
1645    // Populate enhanced message tables
1646    populate_enhanced_messages(conn, &session_id, session)?;
1647
1648    Ok(updated)
1649}
1650
1651/// Populate the enhanced messages_v2, tool_invocations, and file_changes tables
1652fn populate_enhanced_messages(
1653    conn: &Connection,
1654    session_id: &str,
1655    session: &ChatSession,
1656) -> Result<()> {
1657    // Delete existing messages for this session to avoid duplicates
1658    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1659    conn.execute(
1660        "DELETE FROM tool_invocations WHERE session_id = ?",
1661        [session_id],
1662    )?;
1663    conn.execute(
1664        "DELETE FROM file_changes WHERE session_id = ?",
1665        [session_id],
1666    )?;
1667
1668    for (idx, request) in session.requests.iter().enumerate() {
1669        let timestamp = request.timestamp;
1670        let request_id = request.request_id.as_deref();
1671        let response_id = request.response_id.as_deref();
1672        let model_id = request.model_id.as_deref();
1673        let is_canceled = request.is_canceled.unwrap_or(false);
1674
1675        // Insert user message
1676        if let Some(ref message) = request.message {
1677            let content = message.text.clone().unwrap_or_default();
1678            if !content.is_empty() {
1679                let metadata = serde_json::json!({
1680                    "variable_data": request.variable_data,
1681                });
1682
1683                conn.execute(
1684                    r#"
1685                    INSERT OR REPLACE INTO messages_v2 
1686                    (session_id, message_index, request_id, response_id, role, 
1687                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1688                    VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1689                    "#,
1690                    params![
1691                        session_id,
1692                        (idx * 2) as i64,
1693                        request_id,
1694                        response_id,
1695                        &content,
1696                        &content, // content_markdown same as raw for user messages
1697                        model_id,
1698                        timestamp,
1699                        serde_json::to_string(&metadata).ok(),
1700                    ],
1701                )?;
1702            }
1703        }
1704
1705        // Insert assistant response with tool invocations
1706        if let Some(ref response) = request.response {
1707            let (content, tool_invocations) = extract_response_content_and_tools(response);
1708
1709            if !content.is_empty() || !tool_invocations.is_empty() {
1710                let metadata = serde_json::json!({
1711                    "content_references": request.content_references,
1712                    "code_citations": request.code_citations,
1713                    "response_markdown_info": request.response_markdown_info,
1714                });
1715
1716                conn.execute(
1717                    r#"
1718                    INSERT OR REPLACE INTO messages_v2 
1719                    (session_id, message_index, request_id, response_id, role, 
1720                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1721                    VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1722                    "#,
1723                    params![
1724                        session_id,
1725                        (idx * 2 + 1) as i64,
1726                        request_id,
1727                        response_id,
1728                        &content,
1729                        &content,
1730                        model_id,
1731                        timestamp,
1732                        is_canceled as i64,
1733                        serde_json::to_string(&metadata).ok(),
1734                    ],
1735                )?;
1736
1737                // Get the message_id we just inserted
1738                let message_id: i64 = conn.last_insert_rowid();
1739
1740                // Insert tool invocations and file changes
1741                for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1742                    insert_tool_invocation(
1743                        conn, message_id, session_id, inv_idx, invocation, timestamp,
1744                    )?;
1745                }
1746            }
1747        }
1748    }
1749
1750    Ok(())
1751}
1752
1753/// Extract response content and tool invocations from the response JSON
1754fn extract_response_content_and_tools(
1755    response: &serde_json::Value,
1756) -> (String, Vec<serde_json::Value>) {
1757    let mut text_parts = Vec::new();
1758    let mut tool_invocations = Vec::new();
1759
1760    if let Some(items) = response.as_array() {
1761        for item in items {
1762            let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1763
1764            match kind {
1765                "toolInvocationSerialized" => {
1766                    tool_invocations.push(item.clone());
1767                }
1768                "thinking" => {
1769                    // Skip thinking blocks
1770                    continue;
1771                }
1772                _ => {
1773                    if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1774                        // Filter out empty code block markers (```\n or just ```)
1775                        let trimmed = value.trim();
1776                        if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1777                            text_parts.push(value.to_string());
1778                        }
1779                    }
1780                }
1781            }
1782        }
1783    }
1784
1785    (text_parts.join("\n\n"), tool_invocations)
1786}
1787
1788/// Insert a tool invocation and any associated file changes
1789fn insert_tool_invocation(
1790    conn: &Connection,
1791    message_id: i64,
1792    session_id: &str,
1793    inv_idx: usize,
1794    invocation: &serde_json::Value,
1795    timestamp: Option<i64>,
1796) -> Result<()> {
1797    let tool_name = invocation
1798        .get("toolId")
1799        .and_then(|t| t.as_str())
1800        .unwrap_or("unknown");
1801    let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1802    let is_complete = invocation
1803        .get("isComplete")
1804        .and_then(|c| c.as_bool())
1805        .unwrap_or(false);
1806    let is_confirmed = invocation.get("isConfirmed");
1807    let tool_data = invocation.get("toolSpecificData");
1808
1809    let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1810    let status = if is_complete { "complete" } else { "pending" };
1811    let confirmed = match is_confirmed {
1812        Some(v) => v
1813            .get("type")
1814            .and_then(|t| t.as_i64())
1815            .map(|t| t > 0)
1816            .unwrap_or(false),
1817        None => false,
1818    };
1819
1820    conn.execute(
1821        r#"
1822        INSERT INTO tool_invocations 
1823        (message_id, session_id, tool_name, tool_call_id, invocation_index, 
1824         input_json, status, is_confirmed, timestamp)
1825        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1826        "#,
1827        params![
1828            message_id,
1829            session_id,
1830            tool_name,
1831            tool_call_id,
1832            inv_idx as i64,
1833            input_json,
1834            status,
1835            confirmed as i64,
1836            timestamp,
1837        ],
1838    )?;
1839
1840    let tool_invocation_id = conn.last_insert_rowid();
1841
1842    // Extract and insert file changes based on tool type
1843    if let Some(data) = tool_data {
1844        insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1845    }
1846
1847    Ok(())
1848}
1849
1850/// Insert file changes from tool-specific data
1851fn insert_file_changes(
1852    conn: &Connection,
1853    tool_invocation_id: i64,
1854    session_id: &str,
1855    tool_data: &serde_json::Value,
1856    timestamp: Option<i64>,
1857) -> Result<()> {
1858    let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1859
1860    match kind {
1861        "terminal" => {
1862            // Terminal command execution
1863            if let Some(cmd_line) = tool_data.get("commandLine") {
1864                let original = cmd_line.get("original").and_then(|o| o.as_str());
1865                let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1866                let output = tool_data
1867                    .get("terminalCommandOutput")
1868                    .map(|o| serde_json::to_string(o).unwrap_or_default());
1869
1870                conn.execute(
1871                    r#"
1872                    INSERT INTO file_changes 
1873                    (tool_invocation_id, session_id, file_path, change_type, 
1874                     old_content, new_content, diff_unified, timestamp)
1875                    VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1876                    "#,
1877                    params![
1878                        tool_invocation_id,
1879                        session_id,
1880                        original,
1881                        edited.or(original),
1882                        output,
1883                        timestamp,
1884                    ],
1885                )?;
1886            }
1887        }
1888        "replaceFile" | "editFile" => {
1889            // File edit with old/new strings
1890            let file_path = tool_data
1891                .get("uri")
1892                .or_else(|| tool_data.get("filePath"))
1893                .and_then(|p| p.as_str())
1894                .unwrap_or("[unknown]");
1895            let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1896            let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1897
1898            // Generate unified diff if we have both old and new content
1899            let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1900                Some(generate_unified_diff(old, new, file_path))
1901            } else {
1902                None
1903            };
1904
1905            conn.execute(
1906                r#"
1907                INSERT INTO file_changes 
1908                (tool_invocation_id, session_id, file_path, change_type, 
1909                 old_content, new_content, diff_unified, timestamp)
1910                VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1911                "#,
1912                params![
1913                    tool_invocation_id,
1914                    session_id,
1915                    file_path,
1916                    old_string,
1917                    new_string,
1918                    diff,
1919                    timestamp,
1920                ],
1921            )?;
1922        }
1923        "createFile" => {
1924            let file_path = tool_data
1925                .get("uri")
1926                .or_else(|| tool_data.get("filePath"))
1927                .and_then(|p| p.as_str())
1928                .unwrap_or("[unknown]");
1929            let content = tool_data.get("content").and_then(|c| c.as_str());
1930
1931            conn.execute(
1932                r#"
1933                INSERT INTO file_changes 
1934                (tool_invocation_id, session_id, file_path, change_type, 
1935                 new_content, timestamp)
1936                VALUES (?, ?, ?, 'create', ?, ?)
1937                "#,
1938                params![
1939                    tool_invocation_id,
1940                    session_id,
1941                    file_path,
1942                    content,
1943                    timestamp,
1944                ],
1945            )?;
1946        }
1947        "readFile" => {
1948            let file_path = tool_data
1949                .get("uri")
1950                .or_else(|| tool_data.get("filePath"))
1951                .and_then(|p| p.as_str())
1952                .unwrap_or("[unknown]");
1953
1954            conn.execute(
1955                r#"
1956                INSERT INTO file_changes 
1957                (tool_invocation_id, session_id, file_path, change_type, timestamp)
1958                VALUES (?, ?, ?, 'read', ?)
1959                "#,
1960                params![tool_invocation_id, session_id, file_path, timestamp,],
1961            )?;
1962        }
1963        _ => {
1964            // Other tool types - store as generic change
1965            if !kind.is_empty() {
1966                let data_json = serde_json::to_string(tool_data).ok();
1967                conn.execute(
1968                    r#"
1969                    INSERT INTO file_changes 
1970                    (tool_invocation_id, session_id, file_path, change_type, 
1971                     diff_unified, timestamp)
1972                    VALUES (?, ?, ?, ?, ?, ?)
1973                    "#,
1974                    params![
1975                        tool_invocation_id,
1976                        session_id,
1977                        format!("[{}]", kind),
1978                        kind,
1979                        data_json,
1980                        timestamp,
1981                    ],
1982                )?;
1983            }
1984        }
1985    }
1986
1987    Ok(())
1988}
1989
1990/// Generate a simple unified diff between two strings
1991fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
1992    let old_lines: Vec<&str> = old.lines().collect();
1993    let new_lines: Vec<&str> = new.lines().collect();
1994
1995    let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
1996
1997    // Simple line-by-line diff (not a full Myers diff, but good enough for storage)
1998    let max_lines = old_lines.len().max(new_lines.len());
1999    let mut in_hunk = false;
2000    let mut hunk_start = 0;
2001    let mut hunk_lines = Vec::new();
2002
2003    for i in 0..max_lines {
2004        let old_line = old_lines.get(i).copied();
2005        let new_line = new_lines.get(i).copied();
2006
2007        match (old_line, new_line) {
2008            (Some(o), Some(n)) if o == n => {
2009                if in_hunk {
2010                    hunk_lines.push(format!(" {}", o));
2011                }
2012            }
2013            (Some(o), Some(n)) => {
2014                if !in_hunk {
2015                    in_hunk = true;
2016                    hunk_start = i + 1;
2017                }
2018                hunk_lines.push(format!("-{}", o));
2019                hunk_lines.push(format!("+{}", n));
2020            }
2021            (Some(o), None) => {
2022                if !in_hunk {
2023                    in_hunk = true;
2024                    hunk_start = i + 1;
2025                }
2026                hunk_lines.push(format!("-{}", o));
2027            }
2028            (None, Some(n)) => {
2029                if !in_hunk {
2030                    in_hunk = true;
2031                    hunk_start = i + 1;
2032                }
2033                hunk_lines.push(format!("+{}", n));
2034            }
2035            (None, None) => break,
2036        }
2037    }
2038
2039    if !hunk_lines.is_empty() {
2040        diff.push_str(&format!(
2041            "@@ -{},{} +{},{} @@\n",
2042            hunk_start,
2043            old_lines.len(),
2044            hunk_start,
2045            new_lines.len()
2046        ));
2047        for line in hunk_lines {
2048            diff.push_str(&line);
2049            diff.push('\n');
2050        }
2051    }
2052
2053    diff
2054}
2055
2056/// Harvest sessions from web-based cloud providers (ChatGPT, Claude, etc.)
2057fn harvest_web_providers(
2058    conn: &Connection,
2059    stats: &mut HarvestStats,
2060    include_providers: &[String],
2061    exclude_providers: &[String],
2062) -> Result<()> {
2063    use crate::browser::extract_provider_cookies;
2064
2065    println!("\n{} Harvesting from web providers...", "[*]".blue());
2066
2067    // Define web providers to harvest from
2068    let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2069        ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2070        ("Claude", "claude", "sessionKey"),
2071    ];
2072
2073    let mut web_sessions_harvested = 0;
2074
2075    for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2076        // Check provider filters
2077        if !include_providers.is_empty()
2078            && !include_providers
2079                .iter()
2080                .any(|p| p.eq_ignore_ascii_case(provider_key))
2081        {
2082            continue;
2083        }
2084        if exclude_providers
2085            .iter()
2086            .any(|p| p.eq_ignore_ascii_case(provider_key))
2087        {
2088            continue;
2089        }
2090
2091        print!("   {} Checking {} ... ", "[-]".yellow(), display_name);
2092
2093        // Try to extract cookies for this provider
2094        if let Some(creds) = extract_provider_cookies(provider_key) {
2095            if let Some(session_token) = &creds.session_token {
2096                println!("{}", "authenticated".green());
2097
2098                // Create provider and fetch conversations
2099                let result = match *provider_key {
2100                    "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2101                    "claude" => harvest_claude_sessions(conn, session_token, stats),
2102                    _ => Ok(0),
2103                };
2104
2105                match result {
2106                    Ok(count) => {
2107                        if count > 0 {
2108                            println!(
2109                                "      {} Harvested {} sessions from {}",
2110                                "[+]".green(),
2111                                count.to_string().cyan(),
2112                                display_name
2113                            );
2114                            web_sessions_harvested += count;
2115                        }
2116                    }
2117                    Err(e) => {
2118                        println!(
2119                            "      {} Failed to harvest {}: {:?}",
2120                            "[!]".red(),
2121                            display_name,
2122                            e
2123                        );
2124                    }
2125                }
2126            } else {
2127                println!("{}", "no session token".yellow());
2128            }
2129        } else {
2130            println!("{}", "not authenticated".yellow());
2131        }
2132    }
2133
2134    if web_sessions_harvested > 0 {
2135        println!(
2136            "   {} Total web sessions harvested: {}",
2137            "[+]".green(),
2138            web_sessions_harvested.to_string().cyan()
2139        );
2140    }
2141
2142    Ok(())
2143}
2144
2145/// Harvest sessions from ChatGPT web interface
2146fn harvest_chatgpt_sessions(
2147    conn: &Connection,
2148    session_token: &str,
2149    stats: &mut HarvestStats,
2150) -> Result<usize> {
2151    use crate::providers::cloud::chatgpt::ChatGPTProvider;
2152    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2153
2154    let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2155
2156    // List all conversations
2157    let options = FetchOptions {
2158        limit: Some(100),
2159        include_archived: false,
2160        after: None,
2161        before: None,
2162        session_token: Some(session_token.to_string()),
2163    };
2164
2165    let conversations = provider
2166        .list_conversations(&options)
2167        .context("Failed to list ChatGPT conversations")?;
2168
2169    let mut harvested = 0;
2170
2171    for conv_summary in conversations {
2172        // Check if we already have this conversation
2173        let exists: bool = conn
2174            .query_row(
2175                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2176                params![&conv_summary.id],
2177                |row| row.get(0),
2178            )
2179            .unwrap_or(false);
2180
2181        if exists {
2182            // Could add update logic here later
2183            continue;
2184        }
2185
2186        // Fetch full conversation
2187        match provider.fetch_conversation(&conv_summary.id) {
2188            Ok(conv) => {
2189                // Insert into database using existing function
2190                if let Err(e) =
2191                    insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2192                {
2193                    eprintln!("Failed to insert ChatGPT session: {}", e);
2194                    continue;
2195                }
2196                harvested += 1;
2197                stats.sessions_added += 1;
2198            }
2199            Err(e) => {
2200                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2201            }
2202        }
2203
2204        // Small delay to avoid rate limiting
2205        std::thread::sleep(std::time::Duration::from_millis(100));
2206    }
2207
2208    Ok(harvested)
2209}
2210
2211/// Harvest sessions from Claude web interface
2212fn harvest_claude_sessions(
2213    conn: &Connection,
2214    session_token: &str,
2215    stats: &mut HarvestStats,
2216) -> Result<usize> {
2217    use crate::providers::cloud::anthropic::AnthropicProvider;
2218    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2219
2220    let provider = AnthropicProvider::with_session_token(session_token.to_string());
2221
2222    // List all conversations
2223    let options = FetchOptions {
2224        limit: Some(100),
2225        include_archived: false,
2226        after: None,
2227        before: None,
2228        session_token: Some(session_token.to_string()),
2229    };
2230
2231    let conversations = provider
2232        .list_conversations(&options)
2233        .context("Failed to list Claude conversations")?;
2234
2235    let mut harvested = 0;
2236
2237    for conv_summary in conversations {
2238        // Check if we already have this conversation
2239        let exists: bool = conn
2240            .query_row(
2241                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2242                params![&conv_summary.id],
2243                |row| row.get(0),
2244            )
2245            .unwrap_or(false);
2246
2247        if exists {
2248            continue;
2249        }
2250
2251        // Fetch full conversation
2252        match provider.fetch_conversation(&conv_summary.id) {
2253            Ok(conv) => {
2254                if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2255                {
2256                    eprintln!("Failed to insert Claude session: {}", e);
2257                    continue;
2258                }
2259                harvested += 1;
2260                stats.sessions_added += 1;
2261            }
2262            Err(e) => {
2263                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2264            }
2265        }
2266
2267        std::thread::sleep(std::time::Duration::from_millis(100));
2268    }
2269
2270    Ok(harvested)
2271}
2272
2273/// Insert a CloudConversation into the harvest database
2274fn insert_cloud_conversation_to_harvest_db(
2275    conn: &Connection,
2276    conv: &crate::providers::cloud::common::CloudConversation,
2277    provider: &str,
2278    workspace_name: Option<&str>,
2279) -> Result<()> {
2280    let now = Utc::now().timestamp_millis();
2281    let created_at = conv.created_at.timestamp_millis();
2282    let updated_at = conv
2283        .updated_at
2284        .map(|dt| dt.timestamp_millis())
2285        .unwrap_or(created_at);
2286
2287    // Build a session-like JSON structure for compatibility
2288    let session_json = serde_json::json!({
2289        "id": conv.id,
2290        "title": conv.title,
2291        "model": conv.model,
2292        "created_at": conv.created_at.to_rfc3339(),
2293        "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2294        "messages": conv.messages.iter().map(|m| {
2295            serde_json::json!({
2296                "id": m.id,
2297                "role": m.role,
2298                "content": m.content,
2299                "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2300                "model": m.model,
2301            })
2302        }).collect::<Vec<_>>(),
2303    });
2304
2305    conn.execute(
2306        r#"
2307        INSERT OR REPLACE INTO sessions 
2308        (id, provider, provider_type, workspace_id, workspace_name, title, 
2309         message_count, created_at, updated_at, harvested_at, session_json)
2310        VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2311        "#,
2312        params![
2313            conv.id,
2314            provider,
2315            provider,
2316            workspace_name,
2317            conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2318            conv.messages.len() as i64,
2319            created_at,
2320            updated_at,
2321            now,
2322            session_json.to_string(),
2323        ],
2324    )?;
2325
2326    // Also populate messages_v2 table for detailed message storage
2327    populate_cloud_messages(conn, &conv.id, conv)?;
2328
2329    Ok(())
2330}
2331
2332/// Populate messages_v2 table from a CloudConversation
2333fn populate_cloud_messages(
2334    conn: &Connection,
2335    session_id: &str,
2336    conv: &crate::providers::cloud::common::CloudConversation,
2337) -> Result<()> {
2338    // Delete existing messages for this session to avoid duplicates
2339    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2340
2341    for (idx, message) in conv.messages.iter().enumerate() {
2342        let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2343        let role = match message.role.as_str() {
2344            "user" | "human" => "user",
2345            "assistant" => "assistant",
2346            "system" => "system",
2347            other => other,
2348        };
2349
2350        conn.execute(
2351            r#"
2352            INSERT INTO messages_v2 
2353            (session_id, message_index, request_id, response_id, role, 
2354             content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2355            VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2356            "#,
2357            params![
2358                session_id,
2359                idx as i64,
2360                message.id,
2361                role,
2362                &message.content,
2363                &message.content,
2364                message.model.as_deref(),
2365                timestamp,
2366            ],
2367        )?;
2368    }
2369
2370    Ok(())
2371}
2372
2373fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2374    conn.execute(
2375        "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2376        [],
2377    )?;
2378    Ok(())
2379}
2380
2381fn init_git_tracking(db_path: &Path) -> Result<()> {
2382    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2383    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2384
2385    println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2386
2387    // Check if already a git repo
2388    if !db_dir.join(".git").exists() {
2389        let output = Command::new("git")
2390            .current_dir(db_dir)
2391            .args(["init"])
2392            .output()
2393            .context("Failed to run git init")?;
2394
2395        if !output.status.success() {
2396            anyhow::bail!(
2397                "Git init failed: {}",
2398                String::from_utf8_lossy(&output.stderr)
2399            );
2400        }
2401        println!("{} Initialized git repository", "[+]".green());
2402    } else {
2403        println!("{} Git repository already exists", "[i]".blue());
2404    }
2405
2406    // Create .gitignore if needed
2407    let gitignore_path = db_dir.join(".gitignore");
2408    if !gitignore_path.exists() {
2409        fs::write(
2410            &gitignore_path,
2411            "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2412        )?;
2413        println!("{} Created .gitignore", "[+]".green());
2414    }
2415
2416    // Add database to git
2417    let output = Command::new("git")
2418        .current_dir(db_dir)
2419        .args(["add", db_name])
2420        .output()?;
2421
2422    if output.status.success() {
2423        println!("{} Added {} to git", "[+]".green(), db_name);
2424    }
2425
2426    // Initial commit if no commits yet
2427    let output = Command::new("git")
2428        .current_dir(db_dir)
2429        .args(["rev-parse", "HEAD"])
2430        .output()?;
2431
2432    if !output.status.success() {
2433        let output = Command::new("git")
2434            .current_dir(db_dir)
2435            .args(["commit", "-m", "Initialize harvest database"])
2436            .output()?;
2437
2438        if output.status.success() {
2439            println!("{} Created initial commit", "[+]".green());
2440        }
2441    }
2442
2443    println!("\n{} Git tracking enabled!", "[+]".green().bold());
2444    println!("   Run 'csm harvest git commit -m \"message\"' to save changes");
2445    println!("   Run 'csm harvest git log' to view history");
2446
2447    Ok(())
2448}
2449
2450fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2451    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2452    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2453
2454    // Stage the database
2455    let output = Command::new("git")
2456        .current_dir(db_dir)
2457        .args(["add", db_name])
2458        .output()
2459        .context("Failed to stage database")?;
2460
2461    if !output.status.success() {
2462        anyhow::bail!(
2463            "Git add failed: {}",
2464            String::from_utf8_lossy(&output.stderr)
2465        );
2466    }
2467
2468    // Commit
2469    let output = Command::new("git")
2470        .current_dir(db_dir)
2471        .args(["commit", "-m", message])
2472        .output()
2473        .context("Failed to commit")?;
2474
2475    if !output.status.success() {
2476        let stderr = String::from_utf8_lossy(&output.stderr);
2477        if stderr.contains("nothing to commit") {
2478            println!("{} Nothing to commit", "[i]".blue());
2479            return Ok(());
2480        }
2481        anyhow::bail!("Git commit failed: {}", stderr);
2482    }
2483
2484    // Get commit hash
2485    let output = Command::new("git")
2486        .current_dir(db_dir)
2487        .args(["rev-parse", "--short", "HEAD"])
2488        .output()?;
2489
2490    let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2491    println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2492
2493    Ok(())
2494}
2495
2496// ============================================================================
2497// Share Link Commands
2498// ============================================================================
2499
2500/// Import a chat session from a share link URL
2501pub fn harvest_share(
2502    db_path: Option<&str>,
2503    url: &str,
2504    name: Option<&str>,
2505    _workspace: Option<&str>,
2506) -> Result<()> {
2507    let db_path = get_db_path(db_path)?;
2508
2509    // Parse the share link
2510    let share_info = match ShareLinkParser::parse(url) {
2511        Some(info) => info,
2512        None => {
2513            println!("{} Unrecognized share link format", "[!]".yellow());
2514            println!("   Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2515            println!("   Example URLs:");
2516            println!("   - https://chat.openai.com/share/abc123...");
2517            println!("   - https://claude.ai/share/xyz789...");
2518            anyhow::bail!("Could not parse share link URL");
2519        }
2520    };
2521
2522    println!("{}", "=".repeat(60).cyan());
2523    println!("{}", " Share Link Import ".bold().cyan());
2524    println!("{}", "=".repeat(60).cyan());
2525    println!();
2526
2527    println!(
2528        "{} Detected provider: {}",
2529        "[i]".blue(),
2530        share_info.provider.bold()
2531    );
2532    println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2533
2534    // Open or create the database with new schema
2535    let db = ChatDatabase::open(&db_path)?;
2536
2537    // Check if already imported
2538    let conn = db.connection();
2539    let existing: Option<(String, i64)> = conn
2540        .query_row(
2541            "SELECT id, imported FROM share_links WHERE url = ?",
2542            [url],
2543            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2544        )
2545        .ok();
2546
2547    if let Some((id, imported)) = existing {
2548        println!();
2549        println!(
2550            "{} This share link has already been registered (ID: {})",
2551            "[!]".yellow(),
2552            id
2553        );
2554
2555        let status = if imported == 1 { "imported" } else { "pending" };
2556        println!("{} Current status: {}", "[i]".blue(), status);
2557
2558        if imported == 1 {
2559            println!(
2560                "{} Session already imported - no action needed",
2561                "[+]".green()
2562            );
2563            return Ok(());
2564        }
2565    } else {
2566        // Insert the share link as pending
2567        let link_id = uuid::Uuid::new_v4().to_string();
2568        let now = chrono::Utc::now().timestamp();
2569        conn.execute(
2570            "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2571            params![link_id, url, share_info.provider, share_info.share_id, name, now],
2572        )?;
2573
2574        println!(
2575            "{} Registered share link (ID: {})",
2576            "[+]".green(),
2577            &link_id[..8]
2578        );
2579    }
2580
2581    println!();
2582    println!("{}", "-".repeat(60).dimmed());
2583    println!();
2584
2585    // TODO: Implement actual content fetching
2586    // For now, we just register the share link and mark it as pending
2587    // Actual fetching would require:
2588    // 1. Browser cookie extraction (already implemented in browser.rs)
2589    // 2. HTTP request with auth cookies
2590    // 3. HTML/JSON parsing of the share page
2591    // 4. Conversion to our universal format
2592
2593    println!("{} Share link registered as pending", "[i]".blue());
2594    println!();
2595    println!("{}", "Note:".bold().yellow());
2596    println!("   Automatic content fetching from share links is not yet implemented.");
2597    println!("   For now, you can:");
2598    println!("   1. Open the share link in your browser");
2599    println!("   2. Export the conversation manually");
2600    println!("   3. Import with: csm import <file>");
2601    println!();
2602    println!("   Or use 'csm harvest shares' to view pending links.");
2603
2604    Ok(())
2605}
2606
2607/// List share links in the harvest database
2608pub fn harvest_shares(
2609    db_path: Option<&str>,
2610    status_filter: Option<&str>,
2611    limit: usize,
2612) -> Result<()> {
2613    let db_path = get_db_path(db_path)?;
2614
2615    if !db_path.exists() {
2616        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2617    }
2618
2619    let db = ChatDatabase::open(&db_path)?;
2620    let conn = db.connection();
2621
2622    println!("{}", "=".repeat(70).cyan());
2623    println!("{}", " Share Links ".bold().cyan());
2624    println!("{}", "=".repeat(70).cyan());
2625    println!();
2626
2627    let query = match status_filter {
2628        Some("pending") => format!(
2629            "SELECT id, url, provider, share_id, title, imported, created_at 
2630             FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2631            limit
2632        ),
2633        Some("imported") => format!(
2634            "SELECT id, url, provider, share_id, title, imported, created_at 
2635             FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2636            limit
2637        ),
2638        Some(_) | None => format!(
2639            "SELECT id, url, provider, share_id, title, imported, created_at 
2640             FROM share_links ORDER BY created_at DESC LIMIT {}",
2641            limit
2642        ),
2643    };
2644
2645    let mut stmt = conn.prepare(&query)?;
2646
2647    let rows: Vec<_> = stmt
2648        .query_map([], |row| {
2649            Ok((
2650                row.get::<_, String>(0)?,
2651                row.get::<_, String>(1)?,
2652                row.get::<_, String>(2)?,
2653                row.get::<_, Option<String>>(3)?,
2654                row.get::<_, Option<String>>(4)?,
2655                row.get::<_, i64>(5)?,
2656                row.get::<_, i64>(6)?,
2657            ))
2658        })?
2659        .collect::<Result<Vec<_>, _>>()?;
2660
2661    if rows.is_empty() {
2662        if let Some(status) = status_filter {
2663            println!("{} No share links with status '{}'", "[i]".blue(), status);
2664        } else {
2665            println!("{} No share links found", "[i]".blue());
2666        }
2667        println!("   Use 'csm harvest share <url>' to add a share link");
2668        return Ok(());
2669    }
2670
2671    // Count by status
2672    let pending_count: i64 = conn
2673        .query_row(
2674            "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2675            [],
2676            |row| row.get(0),
2677        )
2678        .unwrap_or(0);
2679
2680    let imported_count: i64 = conn
2681        .query_row(
2682            "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2683            [],
2684            |row| row.get(0),
2685        )
2686        .unwrap_or(0);
2687
2688    println!(
2689        "{} {} pending, {} imported",
2690        "[i]".blue(),
2691        pending_count.to_string().yellow(),
2692        imported_count.to_string().green()
2693    );
2694    println!();
2695
2696    for (id, url, provider, _share_id, title, imported, created_at) in rows {
2697        let status = if imported == 1 { "imported" } else { "pending" };
2698        let status_colored = if imported == 1 {
2699            status.green()
2700        } else {
2701            status.yellow()
2702        };
2703
2704        println!(
2705            "{} [{}] {} - {}",
2706            format!("#{}", &id[..8]).dimmed(),
2707            status_colored,
2708            provider.bold(),
2709            title.as_deref().unwrap_or("(untitled)")
2710        );
2711
2712        // Truncate URL for display
2713        let display_url = if url.len() > 60 {
2714            format!("{}...", &url[..57])
2715        } else {
2716            url.clone()
2717        };
2718        println!("   {} {}", "URL:".dimmed(), display_url.dimmed());
2719
2720        // Format timestamp
2721        let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2722            .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2723            .unwrap_or_else(|| created_at.to_string());
2724        println!("   {} {}", "Added:".dimmed(), timestamp.dimmed());
2725        println!();
2726    }
2727
2728    Ok(())
2729}
2730
2731// ============================================================================
2732// Checkpoint Commands
2733// ============================================================================
2734
2735/// Create a checkpoint (version snapshot) of a session
2736pub fn harvest_checkpoint(
2737    db_path: Option<&str>,
2738    session_id: &str,
2739    message: Option<&str>,
2740) -> Result<()> {
2741    let db_path = get_db_path(db_path)?;
2742
2743    if !db_path.exists() {
2744        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2745    }
2746
2747    let db = ChatDatabase::open(&db_path)?;
2748    let conn = db.connection();
2749
2750    // Find the session
2751    let session: Option<(i64, String, i64)> = conn
2752        .query_row(
2753            "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2754            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2755            |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2756        )
2757        .ok();
2758
2759    let (internal_id, actual_session_id, msg_count) = match session {
2760        Some(s) => s,
2761        None => {
2762            println!("{} Session not found: {}", "[!]".red(), session_id);
2763            println!("   Use 'csm harvest list' to see available sessions");
2764            anyhow::bail!("Session not found");
2765        }
2766    };
2767
2768    // Get current checkpoint number
2769    let checkpoint_num: i64 = conn.query_row(
2770        "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2771        [internal_id],
2772        |row| row.get(0),
2773    )?;
2774
2775    // Calculate content hash (simple hash of message count + session_id for now)
2776    let content_hash = format!(
2777        "{:x}",
2778        md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2779    );
2780
2781    // Get message snapshot
2782    let messages: Vec<String> = {
2783        let mut stmt = conn
2784            .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2785        let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2786        rows.filter_map(|r| r.ok()).collect()
2787    };
2788
2789    let snapshot = serde_json::json!({
2790        "message_count": msg_count,
2791        "messages": messages,
2792    });
2793
2794    let default_message = format!("Checkpoint {}", checkpoint_num);
2795    let message_text = message.unwrap_or(&default_message);
2796
2797    // Create checkpoint
2798    conn.execute(
2799        "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2800         VALUES (?, ?, ?, ?, ?, ?)",
2801        params![
2802            internal_id,
2803            checkpoint_num,
2804            message_text,
2805            msg_count,
2806            content_hash,
2807            snapshot.to_string()
2808        ],
2809    )?;
2810
2811    println!("{}", "=".repeat(60).cyan());
2812    println!("{}", " Checkpoint Created ".bold().cyan());
2813    println!("{}", "=".repeat(60).cyan());
2814    println!();
2815    println!("{} Session: {}", "[+]".green(), actual_session_id);
2816    println!(
2817        "{} Checkpoint #{}: {}",
2818        "[+]".green(),
2819        checkpoint_num,
2820        message_text
2821    );
2822    println!("{} Messages: {}", "[i]".blue(), msg_count);
2823    println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2824    println!();
2825    println!(
2826        "   Use 'csm harvest checkpoints {}' to view history",
2827        session_id
2828    );
2829
2830    Ok(())
2831}
2832
2833/// List checkpoints for a session
2834pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2835    let db_path = get_db_path(db_path)?;
2836
2837    if !db_path.exists() {
2838        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2839    }
2840
2841    let db = ChatDatabase::open(&db_path)?;
2842    let conn = db.connection();
2843
2844    // Find the session
2845    let session: Option<(i64, String, String)> = conn
2846        .query_row(
2847            "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2848            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2849            |row| {
2850                Ok((
2851                    row.get(0)?,
2852                    row.get(1)?,
2853                    row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2854                ))
2855            },
2856        )
2857        .ok();
2858
2859    let (internal_id, actual_session_id, name) = match session {
2860        Some(s) => s,
2861        None => {
2862            println!("{} Session not found: {}", "[!]".red(), session_id);
2863            anyhow::bail!("Session not found");
2864        }
2865    };
2866
2867    println!("{}", "=".repeat(70).cyan());
2868    println!("{}", " Session Checkpoints ".bold().cyan());
2869    println!("{}", "=".repeat(70).cyan());
2870    println!();
2871    println!(
2872        "{} Session: {} {}",
2873        "[i]".blue(),
2874        actual_session_id,
2875        if !name.is_empty() {
2876            format!("({})", name)
2877        } else {
2878            String::new()
2879        }
2880    );
2881    println!();
2882
2883    let mut stmt = conn.prepare(
2884        "SELECT checkpoint_number, message, message_count, content_hash, created_at
2885         FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2886    )?;
2887
2888    let checkpoints: Vec<_> = stmt
2889        .query_map([internal_id], |row| {
2890            Ok((
2891                row.get::<_, i64>(0)?,
2892                row.get::<_, String>(1)?,
2893                row.get::<_, i64>(2)?,
2894                row.get::<_, String>(3)?,
2895                row.get::<_, String>(4)?,
2896            ))
2897        })?
2898        .collect::<Result<Vec<_>, _>>()?;
2899
2900    if checkpoints.is_empty() {
2901        println!("{} No checkpoints found for this session", "[i]".blue());
2902        println!(
2903            "   Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2904            session_id
2905        );
2906        return Ok(());
2907    }
2908
2909    println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2910    println!();
2911
2912    for (num, msg, msg_count, hash, created_at) in checkpoints {
2913        println!("  {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2914        println!(
2915            "       {} messages | {} | {}",
2916            msg_count,
2917            &hash[..12],
2918            created_at.dimmed()
2919        );
2920    }
2921
2922    println!();
2923    println!(
2924        "   Use 'csm harvest restore {} <checkpoint>' to restore",
2925        session_id
2926    );
2927
2928    Ok(())
2929}
2930
2931/// Restore a session to a previous checkpoint
2932pub fn harvest_restore_checkpoint(
2933    db_path: Option<&str>,
2934    session_id: &str,
2935    checkpoint_number: i64,
2936) -> Result<()> {
2937    let db_path = get_db_path(db_path)?;
2938
2939    if !db_path.exists() {
2940        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2941    }
2942
2943    let db = ChatDatabase::open(&db_path)?;
2944    let conn = db.connection();
2945
2946    // Find the session
2947    let session: Option<(i64, String)> = conn
2948        .query_row(
2949            "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
2950            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2951            |row| Ok((row.get(0)?, row.get(1)?)),
2952        )
2953        .ok();
2954
2955    let (internal_id, actual_session_id) = match session {
2956        Some(s) => s,
2957        None => {
2958            println!("{} Session not found: {}", "[!]".red(), session_id);
2959            anyhow::bail!("Session not found");
2960        }
2961    };
2962
2963    // Find the checkpoint
2964    let checkpoint: Option<(String, i64)> = conn
2965        .query_row(
2966            "SELECT snapshot, message_count FROM checkpoints 
2967         WHERE session_id = ? AND checkpoint_number = ?",
2968            params![internal_id, checkpoint_number],
2969            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2970        )
2971        .ok();
2972
2973    let (snapshot_json, original_msg_count) = match checkpoint {
2974        Some(c) => c,
2975        None => {
2976            println!(
2977                "{} Checkpoint #{} not found for session {}",
2978                "[!]".red(),
2979                checkpoint_number,
2980                session_id
2981            );
2982            println!(
2983                "   Use 'csm harvest checkpoints {}' to see available checkpoints",
2984                session_id
2985            );
2986            anyhow::bail!("Checkpoint not found");
2987        }
2988    };
2989
2990    // Parse snapshot
2991    let snapshot: serde_json::Value =
2992        serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
2993
2994    let messages = snapshot["messages"]
2995        .as_array()
2996        .context("Invalid snapshot format")?;
2997
2998    println!("{}", "=".repeat(60).cyan());
2999    println!("{}", " Restore Checkpoint ".bold().yellow());
3000    println!("{}", "=".repeat(60).cyan());
3001    println!();
3002    println!("{} Session: {}", "[i]".blue(), actual_session_id);
3003    println!(
3004        "{} Restoring to checkpoint #{}",
3005        "[!]".yellow(),
3006        checkpoint_number
3007    );
3008    println!(
3009        "{} Messages to restore: {}",
3010        "[i]".blue(),
3011        original_msg_count
3012    );
3013    println!();
3014
3015    // Delete current messages
3016    let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3017
3018    println!("{} Removed {} current messages", "[-]".red(), deleted);
3019
3020    // Restore messages from snapshot
3021    for (idx, msg) in messages.iter().enumerate() {
3022        if let Some(content) = msg.as_str() {
3023            conn.execute(
3024                "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3025                params![internal_id, idx as i64, content],
3026            )?;
3027        }
3028    }
3029
3030    // Update session message count
3031    conn.execute(
3032        "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3033        params![original_msg_count, internal_id],
3034    )?;
3035
3036    println!(
3037        "{} Restored {} messages from checkpoint",
3038        "[+]".green(),
3039        messages.len()
3040    );
3041    println!();
3042    println!(
3043        "{} Session restored to checkpoint #{}",
3044        "[+]".green().bold(),
3045        checkpoint_number
3046    );
3047
3048    Ok(())
3049}
3050
3051// ============================================================================
3052// Search Commands
3053// ============================================================================
3054
3055/// Rebuild the FTS index from messages_v2 table
3056pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3057    let db_path = get_db_path(db_path)?;
3058
3059    if !db_path.exists() {
3060        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3061    }
3062
3063    let conn = Connection::open(&db_path)?;
3064
3065    println!("{}", "=".repeat(70).cyan());
3066    println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3067    println!("{}", "=".repeat(70).cyan());
3068    println!();
3069
3070    // Drop existing FTS table and triggers
3071    println!("{} Dropping old FTS index...", "[*]".blue());
3072    conn.execute_batch(
3073        r#"
3074        DROP TRIGGER IF EXISTS messages_v2_ai;
3075        DROP TRIGGER IF EXISTS messages_v2_ad;
3076        DROP TRIGGER IF EXISTS messages_v2_au;
3077        DROP TABLE IF EXISTS messages_fts;
3078        "#,
3079    )?;
3080
3081    // Create new FTS table with triggers
3082    println!("{} Creating new FTS index...", "[*]".blue());
3083    conn.execute_batch(
3084        r#"
3085        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3086            content_raw
3087        );
3088        
3089        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3090            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3091        END;
3092        
3093        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3094            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3095        END;
3096        
3097        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3098            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3099            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3100        END;
3101        "#,
3102    )?;
3103
3104    // Count total messages
3105    let total_messages: i64 =
3106        conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3107
3108    // Populate FTS from existing messages
3109    println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3110
3111    conn.execute(
3112        "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3113        [],
3114    )?;
3115
3116    // Verify the index
3117    let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3118
3119    println!();
3120    println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3121    println!("   {} messages indexed", indexed);
3122
3123    Ok(())
3124}
3125
3126/// Full-text search across all sessions
3127pub fn harvest_search(
3128    db_path: Option<&str>,
3129    query: &str,
3130    provider_filter: Option<&str>,
3131    limit: usize,
3132) -> Result<()> {
3133    let db_path = get_db_path(db_path)?;
3134
3135    if !db_path.exists() {
3136        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3137    }
3138
3139    let db = ChatDatabase::open(&db_path)?;
3140    let conn = db.connection();
3141
3142    println!("{}", "=".repeat(70).cyan());
3143    println!("{} Search: {}", "[?]".bold(), query.bold());
3144    println!("{}", "=".repeat(70).cyan());
3145    println!();
3146
3147    // Results: (session_id, provider, title, content)
3148    let results: Vec<(String, String, String, String)> = {
3149        // Check if FTS table exists
3150        let fts_exists: bool = conn
3151            .query_row(
3152                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3153                [],
3154                |_| Ok(true),
3155            )
3156            .unwrap_or(false);
3157
3158        if fts_exists {
3159            // Use FTS search - query the FTS table and join with messages/sessions
3160            let sql = format!(
3161                "SELECT s.id, s.provider, s.title, m.content_raw
3162                 FROM messages_fts fts
3163                 JOIN messages_v2 m ON m.id = fts.rowid
3164                 JOIN sessions s ON m.session_id = s.id
3165                 WHERE messages_fts MATCH ?
3166                 {}
3167                 LIMIT {}",
3168                if provider_filter.is_some() {
3169                    "AND s.provider = ?"
3170                } else {
3171                    ""
3172                },
3173                limit
3174            );
3175
3176            let mut stmt = conn.prepare(&sql)?;
3177
3178            if let Some(provider) = provider_filter {
3179                stmt.query_map(params![query, provider], |row| {
3180                    Ok((
3181                        row.get::<_, String>(0)?,
3182                        row.get::<_, String>(1)?,
3183                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3184                        row.get::<_, String>(3)?,
3185                    ))
3186                })?
3187                .collect::<Result<Vec<_>, _>>()?
3188            } else {
3189                stmt.query_map([query], |row| {
3190                    Ok((
3191                        row.get::<_, String>(0)?,
3192                        row.get::<_, String>(1)?,
3193                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3194                        row.get::<_, String>(3)?,
3195                    ))
3196                })?
3197                .collect::<Result<Vec<_>, _>>()?
3198            }
3199        } else {
3200            // Fall back to LIKE search
3201            let search_pattern = format!("%{}%", query);
3202            let sql = format!(
3203                "SELECT s.id, s.provider, s.title, m.content_raw
3204                 FROM messages_v2 m
3205                 JOIN sessions s ON m.session_id = s.id
3206                 WHERE m.content_raw LIKE ?
3207                 {}
3208                 LIMIT {}",
3209                if provider_filter.is_some() {
3210                    "AND s.provider = ?"
3211                } else {
3212                    ""
3213                },
3214                limit
3215            );
3216
3217            let mut stmt = conn.prepare(&sql)?;
3218
3219            if let Some(provider) = provider_filter {
3220                stmt.query_map(params![search_pattern, provider], |row| {
3221                    Ok((
3222                        row.get::<_, String>(0)?,
3223                        row.get::<_, String>(1)?,
3224                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3225                        row.get::<_, String>(3)?,
3226                    ))
3227                })?
3228                .collect::<Result<Vec<_>, _>>()?
3229            } else {
3230                stmt.query_map([search_pattern], |row| {
3231                    Ok((
3232                        row.get::<_, String>(0)?,
3233                        row.get::<_, String>(1)?,
3234                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3235                        row.get::<_, String>(3)?,
3236                    ))
3237                })?
3238                .collect::<Result<Vec<_>, _>>()?
3239            }
3240        }
3241    };
3242
3243    if results.is_empty() {
3244        println!("{} No results found for '{}'", "[i]".blue(), query);
3245        return Ok(());
3246    }
3247
3248    println!("{} Found {} result(s):", "[i]".blue(), results.len());
3249    println!();
3250
3251    for (session_id, provider, title, content) in results {
3252        // Highlight the search term in content
3253        let display_name = if title.is_empty() {
3254            session_id.clone()
3255        } else {
3256            format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3257        };
3258
3259        println!(
3260            "{} {} [{}]",
3261            "*".cyan(),
3262            display_name.bold(),
3263            provider.dimmed()
3264        );
3265
3266        // Show snippet around the match
3267        let snippet = create_search_snippet(&content, query, 100);
3268        println!("   {}", snippet.dimmed());
3269        println!();
3270    }
3271
3272    Ok(())
3273}
3274
3275/// Create a search result snippet with the query highlighted
3276fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3277    let content_lower = content.to_lowercase();
3278    let query_lower = query.to_lowercase();
3279
3280    if let Some(pos) = content_lower.find(&query_lower) {
3281        let start = pos.saturating_sub(max_len / 2);
3282        let end = (pos + query.len() + max_len / 2).min(content.len());
3283
3284        let mut snippet = String::new();
3285        if start > 0 {
3286            snippet.push_str("...");
3287        }
3288        snippet.push_str(&content[start..end]);
3289        if end < content.len() {
3290            snippet.push_str("...");
3291        }
3292
3293        // Replace newlines with spaces for display
3294        snippet.replace('\n', " ").replace('\r', "")
3295    } else {
3296        // Fallback: just show first max_len chars
3297        if content.len() > max_len {
3298            format!("{}...", &content[..max_len])
3299        } else {
3300            content.to_string()
3301        }
3302    }
3303}
3304
3305/// Simple MD5 hash for content checksums
3306fn md5_hash(data: &str) -> u128 {
3307    // Simple hash implementation (not cryptographically secure, just for checksums)
3308    let mut hash: u128 = 0;
3309    for (i, byte) in data.bytes().enumerate() {
3310        hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3311        hash = hash.rotate_left(7);
3312    }
3313    hash
3314}