chasm_cli/commands/
harvest.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! Harvest commands for collecting chat sessions from multiple providers
4//!
5//! The harvester scans for local and remote LLM providers, downloads all chat
6//! sessions into a single SQLite database, and allows users to version-track
7//! the database with git.
8
9use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
24
25/// Check if a string is an empty code block marker (just ``` with no content)
26fn is_empty_code_block(s: &str) -> bool {
27    // Match patterns like "```", "```\n", "```language", "```\n```", "```\n\n```"
28    let s = s.trim();
29    if s == "```" {
30        return true;
31    }
32    // Check for code block with just a language identifier and no content
33    if s.starts_with("```") && !s.contains('\n') {
34        return true;
35    }
36    // Check for empty code block with opening and closing (possibly with whitespace-only lines)
37    let lines: Vec<&str> = s.lines().collect();
38    if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
39        // Check if all lines between opening and closing are empty or whitespace
40        let content_lines = &lines[1..lines.len() - 1];
41        if content_lines.iter().all(|line| line.trim().is_empty()) {
42            return true;
43        }
44    }
45    false
46}
47
48/// Type alias for harvested session query result (id, provider, title, msg_count, created, last_msg, workspace)
49type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
50
51/// Web-based LLM provider endpoint configuration
52#[derive(Debug, Clone)]
53struct WebProviderEndpoint {
54    name: &'static str,
55    url: &'static str,
56    description: &'static str,
57}
58
59/// List of known web-based LLM provider endpoints to probe
60const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
61    WebProviderEndpoint {
62        name: "ChatGPT",
63        url: "https://chat.openai.com",
64        description: "OpenAI ChatGPT Web Interface",
65    },
66    WebProviderEndpoint {
67        name: "Claude",
68        url: "https://claude.ai",
69        description: "Anthropic Claude Web Interface",
70    },
71    WebProviderEndpoint {
72        name: "Gemini",
73        url: "https://gemini.google.com",
74        description: "Google Gemini Web Interface",
75    },
76    WebProviderEndpoint {
77        name: "Perplexity",
78        url: "https://www.perplexity.ai",
79        description: "Perplexity AI Search Interface",
80    },
81    WebProviderEndpoint {
82        name: "DeepSeek",
83        url: "https://chat.deepseek.com",
84        description: "DeepSeek Chat Interface",
85    },
86    WebProviderEndpoint {
87        name: "Poe",
88        url: "https://poe.com",
89        description: "Quora Poe Multi-model Chat",
90    },
91    WebProviderEndpoint {
92        name: "You.com",
93        url: "https://you.com/chat",
94        description: "You.com AI Chat",
95    },
96    WebProviderEndpoint {
97        name: "HuggingChat",
98        url: "https://huggingface.co/chat",
99        description: "HuggingFace Chat Interface",
100    },
101    WebProviderEndpoint {
102        name: "Copilot",
103        url: "https://copilot.microsoft.com",
104        description: "Microsoft Copilot Web Interface",
105    },
106    WebProviderEndpoint {
107        name: "Mistral",
108        url: "https://chat.mistral.ai",
109        description: "Mistral AI Le Chat Interface",
110    },
111    WebProviderEndpoint {
112        name: "Cohere",
113        url: "https://coral.cohere.com",
114        description: "Cohere Coral Chat Interface",
115    },
116    WebProviderEndpoint {
117        name: "Groq",
118        url: "https://groq.com",
119        description: "Groq Fast Inference Interface",
120    },
121    WebProviderEndpoint {
122        name: "Phind",
123        url: "https://www.phind.com",
124        description: "Phind AI Code Search",
125    },
126    WebProviderEndpoint {
127        name: "Character.AI",
128        url: "https://character.ai",
129        description: "Character.AI Chat Interface",
130    },
131    WebProviderEndpoint {
132        name: "Pi",
133        url: "https://pi.ai",
134        description: "Inflection Pi Personal AI",
135    },
136];
137
138/// Scan for reachable web-based LLM providers (parallel)
139fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
140    use std::sync::{Arc, Mutex};
141    use std::thread;
142
143    let reachable = Arc::new(Mutex::new(Vec::new()));
144    let results = Arc::new(Mutex::new(Vec::new()));
145
146    // Create threads for parallel scanning
147    let handles: Vec<_> = WEB_PROVIDERS
148        .iter()
149        .map(|provider| {
150            let reachable = Arc::clone(&reachable);
151            let results = Arc::clone(&results);
152            let timeout = timeout_secs;
153            let name = provider.name;
154            let url = provider.url;
155            let desc = provider.description;
156
157            thread::spawn(move || {
158                let client = match reqwest::blocking::Client::builder()
159                    .timeout(Duration::from_secs(timeout))
160                    .connect_timeout(Duration::from_secs(timeout.min(3))) // Short connect timeout
161                    .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
162                    .build()
163                {
164                    Ok(c) => c,
165                    Err(_) => return,
166                };
167
168                let result: (&str, bool, String, &str) = match client.head(url).send() {
169                    Ok(response) => {
170                        let status = response.status();
171                        if status.is_success() || status.is_redirection() {
172                            reachable.lock().unwrap().push(name.to_string());
173                            (name, true, desc.to_string(), url)
174                        } else {
175                            (name, false, format!("HTTP {}", status.as_u16()), url)
176                        }
177                    }
178                    Err(e) => {
179                        let reason = if e.is_timeout() {
180                            "timeout".to_string()
181                        } else if e.is_connect() {
182                            "connection failed".to_string()
183                        } else {
184                            "unreachable".to_string()
185                        };
186                        (name, false, reason, url)
187                    }
188                };
189
190                results.lock().unwrap().push((
191                    result.0.to_string(),
192                    result.1,
193                    result.2,
194                    result.3.to_string(),
195                ));
196            })
197        })
198        .collect();
199
200    // Wait for all threads to complete
201    for handle in handles {
202        let _ = handle.join();
203    }
204
205    // Print results in order
206    let results = results.lock().unwrap();
207    let mut sorted_results: Vec<_> = results.iter().collect();
208    sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
209
210    for (name, success, info, url) in sorted_results {
211        if *success {
212            println!(
213                "   {} {}: {} ({})",
214                "[+]".green(),
215                name.bold(),
216                "reachable".green(),
217                info.dimmed()
218            );
219            println!("      {} {}", "`".dimmed(), url.dimmed());
220        } else {
221            println!(
222                "   {} {}: {} ({})",
223                "[-]".dimmed(),
224                name,
225                "blocked or unavailable".dimmed(),
226                info.dimmed()
227            );
228        }
229    }
230
231    let result = reachable.lock().unwrap().clone();
232    result
233}
234
235/// Scan browser cookies for authenticated web LLM providers
236fn scan_browser_authentication(
237    verbose: bool,
238) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
239    use crate::browser::scan_browser_auth_verbose;
240    use std::collections::HashMap;
241
242    let installed = get_installed_browsers();
243    if installed.is_empty() {
244        println!("   {} No supported browsers found", "[-]".dimmed());
245        return (HashMap::new(), 0);
246    }
247
248    println!(
249        "   {} Checking {} browser(s): {}",
250        "[*]".blue(),
251        installed.len(),
252        installed
253            .iter()
254            .map(|b| b.name())
255            .collect::<Vec<_>>()
256            .join(", ")
257    );
258
259    let results = if verbose {
260        scan_browser_auth_verbose()
261    } else {
262        scan_browser_auth()
263    };
264
265    // Group results by provider
266    let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
267
268    for result in results {
269        if result.authenticated {
270            authenticated
271                .entry(result.provider.clone())
272                .or_default()
273                .push(result.browser);
274        }
275    }
276
277    let count = authenticated.len();
278
279    if authenticated.is_empty() {
280        println!(
281            "   {} No authenticated web LLM providers found",
282            "[-]".dimmed()
283        );
284        println!(
285            "      {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
286            "`".dimmed()
287        );
288    } else {
289        for (provider, browsers) in &authenticated {
290            let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
291            println!(
292                "   {} {}: {} in {}",
293                "[+]".green(),
294                provider.bold(),
295                "authenticated".green(),
296                browser_names.join(", ")
297            );
298        }
299    }
300
301    (authenticated, count)
302}
303
304/// Configuration for the harvest database
305#[allow(dead_code)]
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct HarvestConfig {
308    /// Path to the harvest database
309    pub db_path: PathBuf,
310    /// Auto-commit changes to git
311    pub auto_commit: bool,
312    /// Providers to include (empty = all)
313    pub include_providers: Vec<String>,
314    /// Providers to exclude
315    pub exclude_providers: Vec<String>,
316    /// Include VS Code workspaces
317    pub include_workspaces: bool,
318    /// Last harvest timestamp
319    pub last_harvest: Option<i64>,
320}
321
322#[allow(dead_code)]
323impl Default for HarvestConfig {
324    fn default() -> Self {
325        Self {
326            db_path: PathBuf::from("chat_sessions.db"),
327            auto_commit: false,
328            include_providers: Vec::new(),
329            exclude_providers: Vec::new(),
330            include_workspaces: true,
331            last_harvest: None,
332        }
333    }
334}
335
336/// Statistics from a harvest operation
337#[derive(Debug, Default)]
338pub struct HarvestStats {
339    pub providers_scanned: usize,
340    pub workspaces_scanned: usize,
341    pub sessions_found: usize,
342    pub sessions_added: usize,
343    pub sessions_updated: usize,
344    pub sessions_skipped: usize,
345    pub errors: Vec<String>,
346}
347
348/// A harvested session record
349#[allow(dead_code)]
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct HarvestedSession {
352    pub id: String,
353    pub provider: String,
354    pub provider_type: String,
355    pub workspace_id: Option<String>,
356    pub workspace_name: Option<String>,
357    pub title: String,
358    pub message_count: usize,
359    pub created_at: i64,
360    pub updated_at: i64,
361    pub harvested_at: i64,
362    pub session_json: String,
363}
364
365/// Initialize a harvest database at the specified path
366pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
367    let db_path = get_db_path(path)?;
368    let db_dir = db_path.parent().unwrap_or(Path::new("."));
369
370    println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
371    println!("{}", "=".repeat(60));
372
373    // Create directory if needed
374    if !db_dir.exists() {
375        fs::create_dir_all(db_dir)?;
376        println!("{} Created directory: {}", "[+]".green(), db_dir.display());
377    }
378
379    // Check if database already exists
380    if db_path.exists() {
381        println!(
382            "{} Database already exists: {}",
383            "[!]".yellow(),
384            db_path.display()
385        );
386        println!("   Use 'csm harvest run' to update it");
387        return Ok(());
388    }
389
390    // Create the database
391    create_harvest_database(&db_path)?;
392    println!("{} Created database: {}", "[+]".green(), db_path.display());
393
394    // Initialize git if requested
395    if git_init {
396        init_git_tracking(&db_path)?;
397    }
398
399    println!("\n{} Harvest database initialized!", "[+]".green().bold());
400    println!("\nNext steps:");
401    println!("  1. Run 'csm harvest scan' to see available providers");
402    println!("  2. Run 'csm harvest run' to collect sessions");
403    if !git_init {
404        println!("  3. Run 'csm harvest git init' to enable version tracking");
405    }
406
407    Ok(())
408}
409
410/// Scan for available providers and workspaces
411pub fn harvest_scan(
412    show_sessions: bool,
413    scan_web: bool,
414    timeout_secs: u64,
415    verbose: bool,
416) -> Result<()> {
417    println!("\n{} Scanning for Providers", "[H]".magenta().bold());
418    println!("{}", "=".repeat(60));
419
420    let registry = ProviderRegistry::new();
421    let mut total_sessions = 0;
422    let mut available_providers = Vec::new();
423
424    // Scan LLM providers
425    println!("\n{} LLM Providers:", "[*]".blue().bold());
426
427    let provider_types = vec![
428        ProviderType::Copilot,
429        ProviderType::Cursor,
430        ProviderType::Ollama,
431        ProviderType::Vllm,
432        ProviderType::Foundry,
433        ProviderType::LmStudio,
434        ProviderType::LocalAI,
435        ProviderType::TextGenWebUI,
436        ProviderType::Jan,
437        ProviderType::Gpt4All,
438        ProviderType::Llamafile,
439        ProviderType::OpenAI,
440        ProviderType::ChatGPT,
441        ProviderType::Anthropic,
442        ProviderType::Perplexity,
443        ProviderType::DeepSeek,
444        ProviderType::Gemini,
445    ];
446
447    for pt in &provider_types {
448        if let Some(provider) = registry.get_provider(*pt) {
449            let available = provider.is_available();
450            let session_count = if available {
451                provider.list_sessions().map(|s| s.len()).unwrap_or(0)
452            } else {
453                0
454            };
455
456            if available {
457                available_providers.push((*pt, session_count));
458                total_sessions += session_count;
459
460                let status = if session_count > 0 {
461                    format!(
462                        "{} {} sessions",
463                        "+".green(),
464                        session_count.to_string().cyan()
465                    )
466                } else {
467                    format!("{} no sessions", "+".green())
468                };
469
470                println!(
471                    "   {} {}: {}",
472                    "[+]".green(),
473                    provider.name().bold(),
474                    status
475                );
476
477                if show_sessions && session_count > 0 {
478                    if let Ok(sessions) = provider.list_sessions() {
479                        for session in sessions.iter().take(3) {
480                            println!("      {} {}", "`".dimmed(), session.title().dimmed());
481                        }
482                        if sessions.len() > 3 {
483                            println!("      {} ... and {} more", "`".dimmed(), sessions.len() - 3);
484                        }
485                    }
486                }
487
488                if let Some(path) = provider.sessions_path() {
489                    println!(
490                        "      {} {}",
491                        "`".dimmed(),
492                        path.display().to_string().dimmed()
493                    );
494                }
495            }
496        }
497    }
498
499    // Scan VS Code workspaces
500    println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
501
502    match discover_workspaces() {
503        Ok(workspaces) => {
504            let workspaces_with_sessions: Vec<_> = workspaces
505                .iter()
506                .filter(|ws| ws.chat_session_count > 0)
507                .collect();
508
509            let ws_sessions: usize = workspaces_with_sessions
510                .iter()
511                .map(|ws| ws.chat_session_count)
512                .sum();
513
514            println!(
515                "   {} {} workspaces with {} sessions",
516                "[+]".green(),
517                workspaces_with_sessions.len().to_string().cyan(),
518                ws_sessions.to_string().cyan()
519            );
520
521            if show_sessions {
522                for ws in workspaces_with_sessions.iter().take(5) {
523                    let name = ws
524                        .project_path
525                        .clone()
526                        .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
527                    println!(
528                        "      {} {} ({} sessions)",
529                        "`".dimmed(),
530                        name.dimmed(),
531                        ws.chat_session_count
532                    );
533                }
534                if workspaces_with_sessions.len() > 5 {
535                    println!(
536                        "      {} ... and {} more workspaces",
537                        "`".dimmed(),
538                        workspaces_with_sessions.len() - 5
539                    );
540                }
541            }
542
543            total_sessions += ws_sessions;
544        }
545        Err(e) => {
546            println!("   {} Failed to scan workspaces: {}", "[!]".yellow(), e);
547        }
548    }
549
550    // Scan web-based LLM providers if requested
551    let mut web_providers_found = Vec::new();
552    let mut authenticated_count = 0;
553    if scan_web {
554        // First check browser authentication (no network requests)
555        println!("\n{} Browser Authentication:", "[*]".blue().bold());
556        let (auth_results, auth_count) = scan_browser_authentication(verbose);
557        authenticated_count = auth_count;
558
559        // Then probe web endpoints
560        println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
561        web_providers_found = scan_web_providers(timeout_secs);
562
563        // Show which authenticated providers are reachable
564        if !auth_results.is_empty() {
565            println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
566            for (provider, browsers) in &auth_results {
567                let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
568                let reachable = web_providers_found.iter().any(|p| p == provider);
569                let status = if reachable {
570                    format!("{} (reachable)", "ready to harvest".green())
571                } else {
572                    format!("{}", "authenticated but endpoint blocked".yellow())
573                };
574                println!(
575                    "   {} {}: {} via {}",
576                    "[+]".green(),
577                    provider.bold(),
578                    status,
579                    browser_list.join(", ").dimmed()
580                );
581            }
582        }
583    }
584
585    // Summary
586    println!("\n{} Summary:", "[*]".green().bold());
587    println!(
588        "   {} local providers available",
589        available_providers.len().to_string().cyan()
590    );
591    if scan_web {
592        println!(
593            "   {} web providers reachable",
594            web_providers_found.len().to_string().cyan()
595        );
596        println!(
597            "   {} web providers authenticated",
598            authenticated_count.to_string().cyan()
599        );
600    }
601    println!(
602        "   {} total sessions to harvest",
603        total_sessions.to_string().cyan()
604    );
605
606    Ok(())
607}
608
609/// Run the harvest operation
610pub fn harvest_run(
611    path: Option<&str>,
612    providers: Option<&[String]>,
613    exclude: Option<&[String]>,
614    incremental: bool,
615    auto_commit: bool,
616    message: Option<&str>,
617) -> Result<()> {
618    let db_path = get_db_path(path)?;
619
620    println!("\n{} Running Harvest", "[H]".magenta().bold());
621    println!("{}", "=".repeat(60));
622
623    // Ensure database exists
624    if !db_path.exists() {
625        println!("{} Database not found, creating...", "[*]".blue());
626        create_harvest_database(&db_path)?;
627    }
628
629    let conn = Connection::open(&db_path)?;
630    let mut stats = HarvestStats::default();
631
632    // Get last harvest time for incremental updates
633    let last_harvest: Option<i64> = if incremental {
634        conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
635            row.get(0)
636        })
637        .ok()
638    } else {
639        None
640    };
641
642    if let Some(ts) = last_harvest {
643        let dt = DateTime::from_timestamp_millis(ts)
644            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
645            .unwrap_or_default();
646        println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
647    }
648
649    let registry = ProviderRegistry::new();
650    let include_providers = providers.map(|p| p.to_vec());
651    let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
652
653    // Harvest from LLM providers
654    println!("\n{} Harvesting from providers...", "[*]".blue());
655
656    let provider_types = vec![
657        ProviderType::Copilot,
658        ProviderType::Cursor,
659        ProviderType::Ollama,
660        ProviderType::Vllm,
661        ProviderType::Foundry,
662        ProviderType::LmStudio,
663        ProviderType::LocalAI,
664        ProviderType::TextGenWebUI,
665        ProviderType::Jan,
666        ProviderType::Gpt4All,
667        ProviderType::Llamafile,
668    ];
669
670    for pt in &provider_types {
671        let provider_name = pt.display_name().to_lowercase();
672
673        // Check include/exclude filters
674        if let Some(ref include) = include_providers {
675            if !include
676                .iter()
677                .any(|p| provider_name.contains(&p.to_lowercase()))
678            {
679                continue;
680            }
681        }
682        if exclude_providers
683            .iter()
684            .any(|p| provider_name.contains(&p.to_lowercase()))
685        {
686            continue;
687        }
688
689        if let Some(provider) = registry.get_provider(*pt) {
690            if !provider.is_available() {
691                continue;
692            }
693
694            stats.providers_scanned += 1;
695
696            match provider.list_sessions() {
697                Ok(sessions) => {
698                    for session in sessions {
699                        stats.sessions_found += 1;
700
701                        // Check if session should be skipped (incremental)
702                        if let Some(last) = last_harvest {
703                            if session.last_message_date <= last {
704                                stats.sessions_skipped += 1;
705                                continue;
706                            }
707                        }
708
709                        match insert_or_update_session(
710                            &conn,
711                            &session,
712                            pt.display_name(),
713                            None,
714                            None,
715                        ) {
716                            Ok(updated) => {
717                                if updated {
718                                    stats.sessions_updated += 1;
719                                } else {
720                                    stats.sessions_added += 1;
721                                }
722                            }
723                            Err(e) => {
724                                stats.errors.push(format!("{}: {}", session.title(), e));
725                            }
726                        }
727                    }
728
729                    let session_count = stats.sessions_added + stats.sessions_updated;
730                    if session_count > 0 {
731                        println!(
732                            "   {} {}: {} sessions",
733                            "[+]".green(),
734                            provider.name(),
735                            session_count.to_string().cyan()
736                        );
737                    }
738                }
739                Err(e) => {
740                    stats.errors.push(format!("{}: {}", provider.name(), e));
741                }
742            }
743        }
744    }
745
746    // Harvest from VS Code workspaces
747    if include_providers.is_none()
748        || include_providers
749            .as_ref()
750            .map(|p| {
751                p.iter()
752                    .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
753            })
754            .unwrap_or(false)
755    {
756        println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
757
758        if let Ok(workspaces) = discover_workspaces() {
759            for ws in &workspaces {
760                if ws.chat_session_count == 0 {
761                    continue;
762                }
763
764                stats.workspaces_scanned += 1;
765
766                if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
767                    for swp in sessions {
768                        stats.sessions_found += 1;
769
770                        // Check if session should be skipped (incremental)
771                        if let Some(last) = last_harvest {
772                            if swp.session.last_message_date <= last {
773                                stats.sessions_skipped += 1;
774                                continue;
775                            }
776                        }
777
778                        let ws_name = ws.project_path.clone();
779
780                        match insert_or_update_session(
781                            &conn,
782                            &swp.session,
783                            "GitHub Copilot",
784                            Some(&ws.hash),
785                            ws_name.as_deref(),
786                        ) {
787                            Ok(updated) => {
788                                if updated {
789                                    stats.sessions_updated += 1;
790                                } else {
791                                    stats.sessions_added += 1;
792                                }
793                            }
794                            Err(e) => {
795                                stats.errors.push(format!("{}: {}", swp.session.title(), e));
796                            }
797                        }
798                    }
799                }
800            }
801
802            println!(
803                "   {} Workspaces: {} scanned",
804                "[+]".green(),
805                stats.workspaces_scanned.to_string().cyan()
806            );
807        }
808    }
809
810    // Harvest from web-based cloud providers (ChatGPT, Claude, etc.)
811    let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
812    harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
813
814    // Update metadata
815    update_harvest_metadata(&conn)?;
816
817    // Print summary
818    println!("\n{} Harvest Complete:", "[+]".green().bold());
819    println!(
820        "   {} providers scanned",
821        stats.providers_scanned.to_string().cyan()
822    );
823    println!(
824        "   {} workspaces scanned",
825        stats.workspaces_scanned.to_string().cyan()
826    );
827    println!(
828        "   {} sessions found",
829        stats.sessions_found.to_string().cyan()
830    );
831    println!(
832        "   {} sessions added",
833        stats.sessions_added.to_string().green()
834    );
835    println!(
836        "   {} sessions updated",
837        stats.sessions_updated.to_string().yellow()
838    );
839    if stats.sessions_skipped > 0 {
840        println!(
841            "   {} sessions skipped (unchanged)",
842            stats.sessions_skipped.to_string().dimmed()
843        );
844    }
845
846    if !stats.errors.is_empty() {
847        println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
848        for (i, err) in stats.errors.iter().take(5).enumerate() {
849            println!("   {}. {}", i + 1, err);
850        }
851        if stats.errors.len() > 5 {
852            println!("   ... and {} more errors", stats.errors.len() - 5);
853        }
854    }
855
856    // Auto-commit if requested
857    if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
858        println!("\n{} Auto-committing changes...", "[*]".blue());
859        let commit_msg = message.unwrap_or("Harvest: update chat sessions");
860        if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
861            println!("{} Git commit failed: {}", "[!]".yellow(), e);
862        } else {
863            println!("{} Changes committed", "[+]".green());
864        }
865    }
866
867    println!("\nDatabase: {}", db_path.display());
868
869    Ok(())
870}
871
872/// Show harvest database status
873pub fn harvest_status(path: Option<&str>) -> Result<()> {
874    let db_path = get_db_path(path)?;
875
876    println!("\n{} Harvest Database Status", "[H]".magenta().bold());
877    println!("{}", "=".repeat(60));
878
879    if !db_path.exists() {
880        println!(
881            "{} Database not found: {}",
882            "[!]".yellow(),
883            db_path.display()
884        );
885        println!("   Run 'csm harvest init' to create one");
886        return Ok(());
887    }
888
889    let conn = Connection::open(&db_path)?;
890
891    // Get session counts by provider
892    let mut stmt = conn.prepare(
893        "SELECT provider, COUNT(*) as count, SUM(message_count) as messages 
894         FROM sessions 
895         GROUP BY provider 
896         ORDER BY count DESC",
897    )?;
898
899    let provider_stats: Vec<(String, i64, i64)> = stmt
900        .query_map([], |row| {
901            Ok((
902                row.get(0)?,
903                row.get(1)?,
904                row.get::<_, Option<i64>>(2)?.unwrap_or(0),
905            ))
906        })?
907        .filter_map(|r| r.ok())
908        .collect();
909
910    // Get total counts
911    let total_sessions: i64 = conn
912        .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
913        .unwrap_or(0);
914
915    let total_messages: i64 = conn
916        .query_row(
917            "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
918            [],
919            |row| row.get(0),
920        )
921        .unwrap_or(0);
922
923    // Get last harvest time
924    let last_harvest: Option<i64> = conn
925        .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
926            row.get(0)
927        })
928        .ok()
929        .flatten();
930
931    // Get oldest and newest sessions
932    let oldest: Option<i64> = conn
933        .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
934        .ok()
935        .flatten();
936
937    let newest: Option<i64> = conn
938        .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
939        .ok()
940        .flatten();
941
942    println!("{} Database: {}", "[*]".blue(), db_path.display());
943    println!(
944        "{} Total Sessions: {}",
945        "[*]".blue(),
946        total_sessions.to_string().cyan()
947    );
948    println!(
949        "{} Total Messages: {}",
950        "[*]".blue(),
951        total_messages.to_string().cyan()
952    );
953
954    if let Some(ts) = last_harvest {
955        let dt = DateTime::from_timestamp_millis(ts)
956            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
957            .unwrap_or_else(|| "Unknown".to_string());
958        println!("{} Last Harvest: {}", "[*]".blue(), dt);
959    }
960
961    if let (Some(old), Some(new)) = (oldest, newest) {
962        let old_dt = DateTime::from_timestamp_millis(old)
963            .map(|d| d.format("%Y-%m-%d").to_string())
964            .unwrap_or_default();
965        let new_dt = DateTime::from_timestamp_millis(new)
966            .map(|d| d.format("%Y-%m-%d").to_string())
967            .unwrap_or_default();
968        println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
969    }
970
971    if !provider_stats.is_empty() {
972        println!("\n{} Sessions by Provider:", "[*]".blue());
973        for (provider, count, messages) in &provider_stats {
974            println!(
975                "   {} {}: {} sessions, {} messages",
976                "[+]".green(),
977                provider.bold(),
978                count.to_string().cyan(),
979                messages.to_string().dimmed()
980            );
981        }
982    }
983
984    // Check git status
985    let db_dir = db_path.parent().unwrap_or(Path::new("."));
986    if db_dir.join(".git").exists()
987        || db_path
988            .parent()
989            .map(|p| p.join(".git").exists())
990            .unwrap_or(false)
991    {
992        println!("\n{} Git Status:", "[*]".blue());
993
994        let output = Command::new("git")
995            .current_dir(db_dir)
996            .args([
997                "status",
998                "--porcelain",
999                db_path.file_name().unwrap().to_str().unwrap(),
1000            ])
1001            .output();
1002
1003        match output {
1004            Ok(out) => {
1005                let status = String::from_utf8_lossy(&out.stdout);
1006                if status.is_empty() {
1007                    println!("   {} No uncommitted changes", "[+]".green());
1008                } else {
1009                    println!("   {} Uncommitted changes detected", "[!]".yellow());
1010                    println!("   Run 'csm harvest git commit' to save changes");
1011                }
1012            }
1013            Err(_) => {
1014                println!("   {} Unable to check git status", "[!]".yellow());
1015            }
1016        }
1017    } else {
1018        println!("\n{} Git tracking not enabled", "[i]".dimmed());
1019        println!("   Run 'csm harvest git init' to enable version tracking");
1020    }
1021
1022    Ok(())
1023}
1024
1025/// List sessions in the harvest database
1026pub fn harvest_list(
1027    path: Option<&str>,
1028    provider: Option<&str>,
1029    limit: usize,
1030    search: Option<&str>,
1031) -> Result<()> {
1032    let db_path = get_db_path(path)?;
1033
1034    if !db_path.exists() {
1035        println!(
1036            "{} Database not found: {}",
1037            "[!]".yellow(),
1038            db_path.display()
1039        );
1040        return Ok(());
1041    }
1042
1043    let conn = Connection::open(&db_path)?;
1044
1045    let mut query = String::from(
1046        "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name 
1047         FROM sessions WHERE 1=1",
1048    );
1049    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1050
1051    if let Some(p) = provider {
1052        query.push_str(" AND LOWER(provider) LIKE ?");
1053        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1054    }
1055
1056    if let Some(s) = search {
1057        query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1058        let pattern = format!("%{}%", s.to_lowercase());
1059        params_vec.push(Box::new(pattern.clone()));
1060        params_vec.push(Box::new(pattern));
1061    }
1062
1063    query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1064    params_vec.push(Box::new(limit as i64));
1065
1066    let mut stmt = conn.prepare(&query)?;
1067
1068    // Build params slice
1069    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1070
1071    let sessions: Vec<HarvestQueryResult> = stmt
1072        .query_map(params_slice.as_slice(), |row| {
1073            Ok((
1074                row.get(0)?,
1075                row.get(1)?,
1076                row.get(2)?,
1077                row.get(3)?,
1078                row.get(4)?,
1079                row.get(5)?,
1080                row.get(6)?,
1081            ))
1082        })?
1083        .filter_map(|r| r.ok())
1084        .collect();
1085
1086    println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1087    println!("{}", "=".repeat(60));
1088
1089    if sessions.is_empty() {
1090        println!("{} No sessions found", "[i]".dimmed());
1091        return Ok(());
1092    }
1093
1094    for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1095        let date = DateTime::from_timestamp_millis(*updated)
1096            .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1097            .unwrap_or_default();
1098
1099        println!("\n{} {}", "[S]".cyan(), title.bold());
1100        println!("   ID: {}", &id[..16.min(id.len())].dimmed());
1101        println!(
1102            "   Provider: {} | Messages: {} | Updated: {}",
1103            prov.cyan(),
1104            msg_count.to_string().green(),
1105            date.dimmed()
1106        );
1107        if let Some(ws) = ws_name {
1108            println!("   Workspace: {}", ws.dimmed());
1109        }
1110    }
1111
1112    println!(
1113        "\n{} Showing {} of available sessions",
1114        "[i]".dimmed(),
1115        sessions.len()
1116    );
1117
1118    Ok(())
1119}
1120
1121/// Export sessions from the harvest database
1122pub fn harvest_export(
1123    path: Option<&str>,
1124    output: &str,
1125    format: &str,
1126    provider: Option<&str>,
1127    session_ids: Option<&[String]>,
1128) -> Result<()> {
1129    let db_path = get_db_path(path)?;
1130    let output_path = PathBuf::from(output);
1131
1132    if !db_path.exists() {
1133        anyhow::bail!("Database not found: {}", db_path.display());
1134    }
1135
1136    let conn = Connection::open(&db_path)?;
1137
1138    println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1139    println!("{}", "=".repeat(60));
1140
1141    // Build query
1142    let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1143    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1144
1145    if let Some(p) = provider {
1146        query.push_str(" AND LOWER(provider) LIKE ?");
1147        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1148    }
1149
1150    if let Some(ids) = session_ids {
1151        let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1152        query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1153        for id in ids {
1154            params_vec.push(Box::new(id.clone()));
1155        }
1156    }
1157
1158    let mut stmt = conn.prepare(&query)?;
1159    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1160
1161    let sessions: Vec<String> = stmt
1162        .query_map(params_slice.as_slice(), |row| row.get(0))?
1163        .filter_map(|r| r.ok())
1164        .collect();
1165
1166    if sessions.is_empty() {
1167        println!("{} No sessions to export", "[i]".dimmed());
1168        return Ok(());
1169    }
1170
1171    // Create output directory if needed
1172    if let Some(parent) = output_path.parent() {
1173        if !parent.exists() {
1174            fs::create_dir_all(parent)?;
1175        }
1176    }
1177
1178    match format.to_lowercase().as_str() {
1179        "json" => {
1180            // Export as JSON array
1181            let parsed: Vec<serde_json::Value> = sessions
1182                .iter()
1183                .filter_map(|s| serde_json::from_str(s).ok())
1184                .collect();
1185            let json_output = serde_json::to_string_pretty(&parsed)?;
1186            fs::write(&output_path, json_output)?;
1187        }
1188        "jsonl" => {
1189            // Export as JSON Lines
1190            let content: String = sessions
1191                .iter()
1192                .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1193                .map(|v| serde_json::to_string(&v).unwrap_or_default())
1194                .collect::<Vec<_>>()
1195                .join("\n");
1196            fs::write(&output_path, content)?;
1197        }
1198        "md" | "markdown" => {
1199            // Export as Markdown
1200            let mut md_content = String::from("# Chat Sessions Export\n\n");
1201            md_content.push_str(&format!(
1202                "Exported: {}\n\n",
1203                Utc::now().format("%Y-%m-%d %H:%M:%S")
1204            ));
1205
1206            for session_json in &sessions {
1207                if let Ok(session) = serde_json::from_str::<ChatSession>(session_json) {
1208                    md_content.push_str(&format!("## {}\n\n", session.title()));
1209                    md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1210
1211                    for request in &session.requests {
1212                        if let Some(msg) = &request.message {
1213                            if let Some(text) = &msg.text {
1214                                md_content.push_str(&format!("### User\n\n{}\n\n", text));
1215                            }
1216                        }
1217                        // Extract response text from the JSON value
1218                        if let Some(response) = &request.response {
1219                            let response_text = response
1220                                .get("text")
1221                                .and_then(|v| v.as_str())
1222                                .or_else(|| response.get("content").and_then(|v| v.as_str()))
1223                                .or_else(|| {
1224                                    // Try to get from value array (older format)
1225                                    response
1226                                        .get("value")
1227                                        .and_then(|v| v.as_array())
1228                                        .and_then(|arr| arr.first())
1229                                        .and_then(|v| v.get("value"))
1230                                        .and_then(|v| v.as_str())
1231                                });
1232
1233                            if let Some(text) = response_text {
1234                                md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1235                            }
1236                        }
1237                    }
1238                    md_content.push_str("---\n\n");
1239                }
1240            }
1241            fs::write(&output_path, md_content)?;
1242        }
1243        _ => {
1244            anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1245        }
1246    }
1247
1248    println!(
1249        "{} Exported {} sessions to {}",
1250        "[+]".green(),
1251        sessions.len().to_string().cyan(),
1252        output_path.display()
1253    );
1254
1255    Ok(())
1256}
1257
1258/// Git operations for harvest database
1259pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1260    let db_path = get_db_path(path)?;
1261    init_git_tracking(&db_path)
1262}
1263
1264pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1265    let db_path = get_db_path(path)?;
1266    let msg = message.unwrap_or("Update harvest database");
1267    git_commit_harvest(&db_path, msg)
1268}
1269
1270pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1271    let db_path = get_db_path(path)?;
1272    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1273    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1274
1275    println!("\n{} Harvest Git History", "[H]".magenta().bold());
1276    println!("{}", "=".repeat(60));
1277
1278    let output = Command::new("git")
1279        .current_dir(db_dir)
1280        .args([
1281            "log",
1282            "--oneline",
1283            &format!("-{}", count),
1284            "--follow",
1285            "--",
1286            db_name,
1287        ])
1288        .output()
1289        .context("Failed to run git log")?;
1290
1291    if !output.status.success() {
1292        let stderr = String::from_utf8_lossy(&output.stderr);
1293        if stderr.contains("does not have any commits") {
1294            println!("{} No commits yet", "[i]".dimmed());
1295        } else {
1296            anyhow::bail!("Git log failed: {}", stderr);
1297        }
1298        return Ok(());
1299    }
1300
1301    let log = String::from_utf8_lossy(&output.stdout);
1302    if log.trim().is_empty() {
1303        println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1304    } else {
1305        for line in log.lines() {
1306            let parts: Vec<&str> = line.splitn(2, ' ').collect();
1307            if parts.len() == 2 {
1308                println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1309            } else {
1310                println!("{}", line);
1311            }
1312        }
1313    }
1314
1315    Ok(())
1316}
1317
1318pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1319    let db_path = get_db_path(path)?;
1320    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1321    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1322
1323    println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1324    println!("{}", "=".repeat(60));
1325
1326    let mut args = vec!["diff", "--stat"];
1327    if let Some(c) = commit {
1328        args.push(c);
1329    }
1330    args.push("--");
1331    args.push(db_name);
1332
1333    let output = Command::new("git")
1334        .current_dir(db_dir)
1335        .args(&args)
1336        .output()
1337        .context("Failed to run git diff")?;
1338
1339    let diff = String::from_utf8_lossy(&output.stdout);
1340    if diff.trim().is_empty() {
1341        println!("{} No changes", "[+]".green());
1342    } else {
1343        println!("{}", diff);
1344    }
1345
1346    Ok(())
1347}
1348
1349pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1350    let db_path = get_db_path(path)?;
1351    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1352    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1353
1354    println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1355    println!("{}", "=".repeat(60));
1356
1357    // Create backup first
1358    let backup_path = db_path.with_extension("db.backup");
1359    if db_path.exists() {
1360        fs::copy(&db_path, &backup_path)?;
1361        println!(
1362            "{} Created backup: {}",
1363            "[+]".green(),
1364            backup_path.display()
1365        );
1366    }
1367
1368    // Restore from commit
1369    let output = Command::new("git")
1370        .current_dir(db_dir)
1371        .args(["checkout", commit, "--", db_name])
1372        .output()
1373        .context("Failed to run git checkout")?;
1374
1375    if !output.status.success() {
1376        let stderr = String::from_utf8_lossy(&output.stderr);
1377        anyhow::bail!("Git restore failed: {}", stderr);
1378    }
1379
1380    println!(
1381        "{} Restored database from commit: {}",
1382        "[+]".green(),
1383        commit
1384    );
1385
1386    Ok(())
1387}
1388
1389// ============================================================================
1390// Helper Functions
1391// ============================================================================
1392
1393fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1394    if let Some(p) = path {
1395        return Ok(PathBuf::from(p));
1396    }
1397
1398    // Check environment variable
1399    if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1400        return Ok(PathBuf::from(p));
1401    }
1402
1403    // Default to current directory
1404    Ok(std::env::current_dir()?.join("chat_sessions.db"))
1405}
1406
1407fn create_harvest_database(path: &Path) -> Result<()> {
1408    let conn = Connection::open(path)?;
1409
1410    conn.execute_batch(
1411        r#"
1412        -- Sessions table (original harvest format)
1413        CREATE TABLE IF NOT EXISTS sessions (
1414            id TEXT PRIMARY KEY,
1415            provider TEXT NOT NULL,
1416            provider_type TEXT,
1417            workspace_id TEXT,
1418            workspace_name TEXT,
1419            title TEXT NOT NULL,
1420            message_count INTEGER DEFAULT 0,
1421            created_at INTEGER NOT NULL,
1422            updated_at INTEGER NOT NULL,
1423            harvested_at INTEGER NOT NULL,
1424            session_json TEXT NOT NULL
1425        );
1426
1427        CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1428        CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1429        CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1430        
1431        -- Enhanced messages table with raw markdown and metadata
1432        CREATE TABLE IF NOT EXISTS messages_v2 (
1433            id INTEGER PRIMARY KEY AUTOINCREMENT,
1434            session_id TEXT NOT NULL,
1435            message_index INTEGER NOT NULL,
1436            request_id TEXT,
1437            response_id TEXT,
1438            role TEXT NOT NULL,
1439            content_raw TEXT NOT NULL,
1440            content_markdown TEXT,
1441            model_id TEXT,
1442            timestamp INTEGER,
1443            is_canceled INTEGER DEFAULT 0,
1444            metadata_json TEXT,
1445            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1446            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1447            UNIQUE(session_id, message_index, role)
1448        );
1449        
1450        CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1451        CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1452        CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1453        
1454        -- Tool invocations within messages (file edits, terminal commands, etc.)
1455        CREATE TABLE IF NOT EXISTS tool_invocations (
1456            id INTEGER PRIMARY KEY AUTOINCREMENT,
1457            message_id INTEGER NOT NULL,
1458            session_id TEXT NOT NULL,
1459            tool_name TEXT NOT NULL,
1460            tool_call_id TEXT,
1461            invocation_index INTEGER DEFAULT 0,
1462            input_json TEXT,
1463            output_json TEXT,
1464            status TEXT DEFAULT 'pending',
1465            is_confirmed INTEGER DEFAULT 0,
1466            timestamp INTEGER,
1467            FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1468            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1469        );
1470        
1471        CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1472        CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1473        CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1474        
1475        -- File changes/diffs associated with tool invocations
1476        CREATE TABLE IF NOT EXISTS file_changes (
1477            id INTEGER PRIMARY KEY AUTOINCREMENT,
1478            tool_invocation_id INTEGER,
1479            session_id TEXT NOT NULL,
1480            message_index INTEGER,
1481            file_path TEXT NOT NULL,
1482            change_type TEXT NOT NULL,
1483            old_content TEXT,
1484            new_content TEXT,
1485            diff_unified TEXT,
1486            line_start INTEGER,
1487            line_end INTEGER,
1488            timestamp INTEGER,
1489            FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1490            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1491        );
1492        
1493        CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1494        CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1495        CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1496        
1497        -- Message-level checkpoints for versioning
1498        CREATE TABLE IF NOT EXISTS message_checkpoints (
1499            id INTEGER PRIMARY KEY AUTOINCREMENT,
1500            session_id TEXT NOT NULL,
1501            message_index INTEGER NOT NULL,
1502            checkpoint_number INTEGER NOT NULL,
1503            content_hash TEXT NOT NULL,
1504            snapshot_json TEXT,
1505            file_state_json TEXT,
1506            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1507            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1508            UNIQUE(session_id, message_index, checkpoint_number)
1509        );
1510        
1511        CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1512        CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1513
1514        -- Messages table for full-text search
1515        CREATE TABLE IF NOT EXISTS messages (
1516            id INTEGER PRIMARY KEY AUTOINCREMENT,
1517            session_id TEXT NOT NULL,
1518            message_index INTEGER NOT NULL,
1519            role TEXT NOT NULL,
1520            content TEXT NOT NULL,
1521            created_at INTEGER,
1522            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1523            UNIQUE(session_id, message_index)
1524        );
1525        
1526        CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1527        CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1528        
1529        -- Checkpoints for version tracking
1530        CREATE TABLE IF NOT EXISTS checkpoints (
1531            id INTEGER PRIMARY KEY AUTOINCREMENT,
1532            session_id TEXT NOT NULL,
1533            checkpoint_number INTEGER NOT NULL,
1534            message TEXT,
1535            message_count INTEGER NOT NULL,
1536            content_hash TEXT NOT NULL,
1537            snapshot TEXT,
1538            created_at TEXT NOT NULL DEFAULT (datetime('now')),
1539            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1540            UNIQUE(session_id, checkpoint_number)
1541        );
1542        
1543        CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1544        
1545        -- Share links for importing shared conversations
1546        CREATE TABLE IF NOT EXISTS share_links (
1547            id TEXT PRIMARY KEY,
1548            session_id TEXT,
1549            provider TEXT NOT NULL,
1550            url TEXT NOT NULL UNIQUE,
1551            share_id TEXT NOT NULL,
1552            title TEXT,
1553            imported INTEGER DEFAULT 0,
1554            imported_at INTEGER,
1555            created_at INTEGER NOT NULL,
1556            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1557        );
1558        
1559        CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1560        CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1561
1562        -- Harvest metadata
1563        CREATE TABLE IF NOT EXISTS harvest_metadata (
1564            key TEXT PRIMARY KEY,
1565            value TEXT NOT NULL
1566        );
1567
1568        INSERT OR REPLACE INTO harvest_metadata (key, value) 
1569        VALUES ('version', '2.1'),
1570               ('created_at', datetime('now'));
1571               
1572        -- Full-text search for messages (standalone FTS table)
1573        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1574            content_raw
1575        );
1576        
1577        -- Triggers to keep FTS index in sync with messages_v2
1578        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1579            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1580        END;
1581        
1582        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1583            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1584        END;
1585        
1586        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1587            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1588            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1589        END;
1590        "#,
1591    )?;
1592
1593    Ok(())
1594}
1595
1596fn insert_or_update_session(
1597    conn: &Connection,
1598    session: &ChatSession,
1599    provider: &str,
1600    workspace_id: Option<&str>,
1601    workspace_name: Option<&str>,
1602) -> Result<bool> {
1603    let session_id = session
1604        .session_id
1605        .clone()
1606        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1607
1608    let now = Utc::now().timestamp_millis();
1609    let session_json = serde_json::to_string(session)?;
1610
1611    // Check if session exists
1612    let existing: Option<i64> = conn
1613        .query_row(
1614            "SELECT updated_at FROM sessions WHERE id = ?",
1615            [&session_id],
1616            |row| row.get(0),
1617        )
1618        .ok();
1619
1620    let updated = existing.is_some();
1621
1622    conn.execute(
1623        r#"
1624        INSERT OR REPLACE INTO sessions 
1625        (id, provider, provider_type, workspace_id, workspace_name, title, 
1626         message_count, created_at, updated_at, harvested_at, session_json)
1627        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1628        "#,
1629        params![
1630            session_id,
1631            provider,
1632            provider,
1633            workspace_id,
1634            workspace_name,
1635            session.title(),
1636            session.request_count() as i64,
1637            session.creation_date,
1638            session.last_message_date,
1639            now,
1640            session_json,
1641        ],
1642    )?;
1643
1644    // Populate enhanced message tables
1645    populate_enhanced_messages(conn, &session_id, session)?;
1646
1647    Ok(updated)
1648}
1649
1650/// Populate the enhanced messages_v2, tool_invocations, and file_changes tables
1651fn populate_enhanced_messages(
1652    conn: &Connection,
1653    session_id: &str,
1654    session: &ChatSession,
1655) -> Result<()> {
1656    // Delete existing messages for this session to avoid duplicates
1657    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1658    conn.execute(
1659        "DELETE FROM tool_invocations WHERE session_id = ?",
1660        [session_id],
1661    )?;
1662    conn.execute(
1663        "DELETE FROM file_changes WHERE session_id = ?",
1664        [session_id],
1665    )?;
1666
1667    for (idx, request) in session.requests.iter().enumerate() {
1668        let timestamp = request.timestamp;
1669        let request_id = request.request_id.as_deref();
1670        let response_id = request.response_id.as_deref();
1671        let model_id = request.model_id.as_deref();
1672        let is_canceled = request.is_canceled.unwrap_or(false);
1673
1674        // Insert user message
1675        if let Some(ref message) = request.message {
1676            let content = message.text.clone().unwrap_or_default();
1677            if !content.is_empty() {
1678                let metadata = serde_json::json!({
1679                    "variable_data": request.variable_data,
1680                });
1681
1682                conn.execute(
1683                    r#"
1684                    INSERT OR REPLACE INTO messages_v2 
1685                    (session_id, message_index, request_id, response_id, role, 
1686                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1687                    VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1688                    "#,
1689                    params![
1690                        session_id,
1691                        (idx * 2) as i64,
1692                        request_id,
1693                        response_id,
1694                        &content,
1695                        &content, // content_markdown same as raw for user messages
1696                        model_id,
1697                        timestamp,
1698                        serde_json::to_string(&metadata).ok(),
1699                    ],
1700                )?;
1701            }
1702        }
1703
1704        // Insert assistant response with tool invocations
1705        if let Some(ref response) = request.response {
1706            let (content, tool_invocations) = extract_response_content_and_tools(response);
1707
1708            if !content.is_empty() || !tool_invocations.is_empty() {
1709                let metadata = serde_json::json!({
1710                    "content_references": request.content_references,
1711                    "code_citations": request.code_citations,
1712                    "response_markdown_info": request.response_markdown_info,
1713                });
1714
1715                conn.execute(
1716                    r#"
1717                    INSERT OR REPLACE INTO messages_v2 
1718                    (session_id, message_index, request_id, response_id, role, 
1719                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1720                    VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1721                    "#,
1722                    params![
1723                        session_id,
1724                        (idx * 2 + 1) as i64,
1725                        request_id,
1726                        response_id,
1727                        &content,
1728                        &content,
1729                        model_id,
1730                        timestamp,
1731                        is_canceled as i64,
1732                        serde_json::to_string(&metadata).ok(),
1733                    ],
1734                )?;
1735
1736                // Get the message_id we just inserted
1737                let message_id: i64 = conn.last_insert_rowid();
1738
1739                // Insert tool invocations and file changes
1740                for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1741                    insert_tool_invocation(
1742                        conn, message_id, session_id, inv_idx, invocation, timestamp,
1743                    )?;
1744                }
1745            }
1746        }
1747    }
1748
1749    Ok(())
1750}
1751
1752/// Extract response content and tool invocations from the response JSON
1753fn extract_response_content_and_tools(
1754    response: &serde_json::Value,
1755) -> (String, Vec<serde_json::Value>) {
1756    let mut text_parts = Vec::new();
1757    let mut tool_invocations = Vec::new();
1758
1759    if let Some(items) = response.as_array() {
1760        for item in items {
1761            let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1762
1763            match kind {
1764                "toolInvocationSerialized" => {
1765                    tool_invocations.push(item.clone());
1766                }
1767                "thinking" => {
1768                    // Skip thinking blocks
1769                    continue;
1770                }
1771                _ => {
1772                    if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1773                        // Filter out empty code block markers (```\n or just ```)
1774                        let trimmed = value.trim();
1775                        if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1776                            text_parts.push(value.to_string());
1777                        }
1778                    }
1779                }
1780            }
1781        }
1782    }
1783
1784    (text_parts.join("\n\n"), tool_invocations)
1785}
1786
1787/// Insert a tool invocation and any associated file changes
1788fn insert_tool_invocation(
1789    conn: &Connection,
1790    message_id: i64,
1791    session_id: &str,
1792    inv_idx: usize,
1793    invocation: &serde_json::Value,
1794    timestamp: Option<i64>,
1795) -> Result<()> {
1796    let tool_name = invocation
1797        .get("toolId")
1798        .and_then(|t| t.as_str())
1799        .unwrap_or("unknown");
1800    let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1801    let is_complete = invocation
1802        .get("isComplete")
1803        .and_then(|c| c.as_bool())
1804        .unwrap_or(false);
1805    let is_confirmed = invocation.get("isConfirmed");
1806    let tool_data = invocation.get("toolSpecificData");
1807
1808    let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1809    let status = if is_complete { "complete" } else { "pending" };
1810    let confirmed = match is_confirmed {
1811        Some(v) => v
1812            .get("type")
1813            .and_then(|t| t.as_i64())
1814            .map(|t| t > 0)
1815            .unwrap_or(false),
1816        None => false,
1817    };
1818
1819    conn.execute(
1820        r#"
1821        INSERT INTO tool_invocations 
1822        (message_id, session_id, tool_name, tool_call_id, invocation_index, 
1823         input_json, status, is_confirmed, timestamp)
1824        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1825        "#,
1826        params![
1827            message_id,
1828            session_id,
1829            tool_name,
1830            tool_call_id,
1831            inv_idx as i64,
1832            input_json,
1833            status,
1834            confirmed as i64,
1835            timestamp,
1836        ],
1837    )?;
1838
1839    let tool_invocation_id = conn.last_insert_rowid();
1840
1841    // Extract and insert file changes based on tool type
1842    if let Some(data) = tool_data {
1843        insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1844    }
1845
1846    Ok(())
1847}
1848
1849/// Insert file changes from tool-specific data
1850fn insert_file_changes(
1851    conn: &Connection,
1852    tool_invocation_id: i64,
1853    session_id: &str,
1854    tool_data: &serde_json::Value,
1855    timestamp: Option<i64>,
1856) -> Result<()> {
1857    let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1858
1859    match kind {
1860        "terminal" => {
1861            // Terminal command execution
1862            if let Some(cmd_line) = tool_data.get("commandLine") {
1863                let original = cmd_line.get("original").and_then(|o| o.as_str());
1864                let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1865                let output = tool_data
1866                    .get("terminalCommandOutput")
1867                    .map(|o| serde_json::to_string(o).unwrap_or_default());
1868
1869                conn.execute(
1870                    r#"
1871                    INSERT INTO file_changes 
1872                    (tool_invocation_id, session_id, file_path, change_type, 
1873                     old_content, new_content, diff_unified, timestamp)
1874                    VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1875                    "#,
1876                    params![
1877                        tool_invocation_id,
1878                        session_id,
1879                        original,
1880                        edited.or(original),
1881                        output,
1882                        timestamp,
1883                    ],
1884                )?;
1885            }
1886        }
1887        "replaceFile" | "editFile" => {
1888            // File edit with old/new strings
1889            let file_path = tool_data
1890                .get("uri")
1891                .or_else(|| tool_data.get("filePath"))
1892                .and_then(|p| p.as_str())
1893                .unwrap_or("[unknown]");
1894            let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1895            let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1896
1897            // Generate unified diff if we have both old and new content
1898            let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1899                Some(generate_unified_diff(old, new, file_path))
1900            } else {
1901                None
1902            };
1903
1904            conn.execute(
1905                r#"
1906                INSERT INTO file_changes 
1907                (tool_invocation_id, session_id, file_path, change_type, 
1908                 old_content, new_content, diff_unified, timestamp)
1909                VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1910                "#,
1911                params![
1912                    tool_invocation_id,
1913                    session_id,
1914                    file_path,
1915                    old_string,
1916                    new_string,
1917                    diff,
1918                    timestamp,
1919                ],
1920            )?;
1921        }
1922        "createFile" => {
1923            let file_path = tool_data
1924                .get("uri")
1925                .or_else(|| tool_data.get("filePath"))
1926                .and_then(|p| p.as_str())
1927                .unwrap_or("[unknown]");
1928            let content = tool_data.get("content").and_then(|c| c.as_str());
1929
1930            conn.execute(
1931                r#"
1932                INSERT INTO file_changes 
1933                (tool_invocation_id, session_id, file_path, change_type, 
1934                 new_content, timestamp)
1935                VALUES (?, ?, ?, 'create', ?, ?)
1936                "#,
1937                params![
1938                    tool_invocation_id,
1939                    session_id,
1940                    file_path,
1941                    content,
1942                    timestamp,
1943                ],
1944            )?;
1945        }
1946        "readFile" => {
1947            let file_path = tool_data
1948                .get("uri")
1949                .or_else(|| tool_data.get("filePath"))
1950                .and_then(|p| p.as_str())
1951                .unwrap_or("[unknown]");
1952
1953            conn.execute(
1954                r#"
1955                INSERT INTO file_changes 
1956                (tool_invocation_id, session_id, file_path, change_type, timestamp)
1957                VALUES (?, ?, ?, 'read', ?)
1958                "#,
1959                params![tool_invocation_id, session_id, file_path, timestamp,],
1960            )?;
1961        }
1962        _ => {
1963            // Other tool types - store as generic change
1964            if !kind.is_empty() {
1965                let data_json = serde_json::to_string(tool_data).ok();
1966                conn.execute(
1967                    r#"
1968                    INSERT INTO file_changes 
1969                    (tool_invocation_id, session_id, file_path, change_type, 
1970                     diff_unified, timestamp)
1971                    VALUES (?, ?, ?, ?, ?, ?)
1972                    "#,
1973                    params![
1974                        tool_invocation_id,
1975                        session_id,
1976                        format!("[{}]", kind),
1977                        kind,
1978                        data_json,
1979                        timestamp,
1980                    ],
1981                )?;
1982            }
1983        }
1984    }
1985
1986    Ok(())
1987}
1988
1989/// Generate a simple unified diff between two strings
1990fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
1991    let old_lines: Vec<&str> = old.lines().collect();
1992    let new_lines: Vec<&str> = new.lines().collect();
1993
1994    let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
1995
1996    // Simple line-by-line diff (not a full Myers diff, but good enough for storage)
1997    let max_lines = old_lines.len().max(new_lines.len());
1998    let mut in_hunk = false;
1999    let mut hunk_start = 0;
2000    let mut hunk_lines = Vec::new();
2001
2002    for i in 0..max_lines {
2003        let old_line = old_lines.get(i).copied();
2004        let new_line = new_lines.get(i).copied();
2005
2006        match (old_line, new_line) {
2007            (Some(o), Some(n)) if o == n => {
2008                if in_hunk {
2009                    hunk_lines.push(format!(" {}", o));
2010                }
2011            }
2012            (Some(o), Some(n)) => {
2013                if !in_hunk {
2014                    in_hunk = true;
2015                    hunk_start = i + 1;
2016                }
2017                hunk_lines.push(format!("-{}", o));
2018                hunk_lines.push(format!("+{}", n));
2019            }
2020            (Some(o), None) => {
2021                if !in_hunk {
2022                    in_hunk = true;
2023                    hunk_start = i + 1;
2024                }
2025                hunk_lines.push(format!("-{}", o));
2026            }
2027            (None, Some(n)) => {
2028                if !in_hunk {
2029                    in_hunk = true;
2030                    hunk_start = i + 1;
2031                }
2032                hunk_lines.push(format!("+{}", n));
2033            }
2034            (None, None) => break,
2035        }
2036    }
2037
2038    if !hunk_lines.is_empty() {
2039        diff.push_str(&format!(
2040            "@@ -{},{} +{},{} @@\n",
2041            hunk_start,
2042            old_lines.len(),
2043            hunk_start,
2044            new_lines.len()
2045        ));
2046        for line in hunk_lines {
2047            diff.push_str(&line);
2048            diff.push('\n');
2049        }
2050    }
2051
2052    diff
2053}
2054
2055/// Harvest sessions from web-based cloud providers (ChatGPT, Claude, etc.)
2056fn harvest_web_providers(
2057    conn: &Connection,
2058    stats: &mut HarvestStats,
2059    include_providers: &Vec<String>,
2060    exclude_providers: &Vec<String>,
2061) -> Result<()> {
2062    use crate::browser::extract_provider_cookies;
2063
2064    println!("\n{} Harvesting from web providers...", "[*]".blue());
2065
2066    // Define web providers to harvest from
2067    let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2068        ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2069        ("Claude", "claude", "sessionKey"),
2070    ];
2071
2072    let mut web_sessions_harvested = 0;
2073
2074    for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2075        // Check provider filters
2076        if !include_providers.is_empty()
2077            && !include_providers
2078                .iter()
2079                .any(|p| p.eq_ignore_ascii_case(provider_key))
2080        {
2081            continue;
2082        }
2083        if exclude_providers
2084            .iter()
2085            .any(|p| p.eq_ignore_ascii_case(provider_key))
2086        {
2087            continue;
2088        }
2089
2090        print!("   {} Checking {} ... ", "[-]".yellow(), display_name);
2091
2092        // Try to extract cookies for this provider
2093        if let Some(creds) = extract_provider_cookies(provider_key) {
2094            if let Some(session_token) = &creds.session_token {
2095                println!("{}", "authenticated".green());
2096
2097                // Create provider and fetch conversations
2098                let result = match *provider_key {
2099                    "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2100                    "claude" => harvest_claude_sessions(conn, session_token, stats),
2101                    _ => Ok(0),
2102                };
2103
2104                match result {
2105                    Ok(count) => {
2106                        if count > 0 {
2107                            println!(
2108                                "      {} Harvested {} sessions from {}",
2109                                "[+]".green(),
2110                                count.to_string().cyan(),
2111                                display_name
2112                            );
2113                            web_sessions_harvested += count;
2114                        }
2115                    }
2116                    Err(e) => {
2117                        println!(
2118                            "      {} Failed to harvest {}: {:?}",
2119                            "[!]".red(),
2120                            display_name,
2121                            e
2122                        );
2123                    }
2124                }
2125            } else {
2126                println!("{}", "no session token".yellow());
2127            }
2128        } else {
2129            println!("{}", "not authenticated".yellow());
2130        }
2131    }
2132
2133    if web_sessions_harvested > 0 {
2134        println!(
2135            "   {} Total web sessions harvested: {}",
2136            "[+]".green(),
2137            web_sessions_harvested.to_string().cyan()
2138        );
2139    }
2140
2141    Ok(())
2142}
2143
2144/// Harvest sessions from ChatGPT web interface
2145fn harvest_chatgpt_sessions(
2146    conn: &Connection,
2147    session_token: &str,
2148    stats: &mut HarvestStats,
2149) -> Result<usize> {
2150    use crate::providers::cloud::chatgpt::ChatGPTProvider;
2151    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2152
2153    let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2154
2155    // List all conversations
2156    let options = FetchOptions {
2157        limit: Some(100),
2158        include_archived: false,
2159        after: None,
2160        before: None,
2161        session_token: Some(session_token.to_string()),
2162    };
2163
2164    let conversations = provider
2165        .list_conversations(&options)
2166        .context("Failed to list ChatGPT conversations")?;
2167
2168    let mut harvested = 0;
2169
2170    for conv_summary in conversations {
2171        // Check if we already have this conversation
2172        let exists: bool = conn
2173            .query_row(
2174                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2175                params![&conv_summary.id],
2176                |row| row.get(0),
2177            )
2178            .unwrap_or(false);
2179
2180        if exists {
2181            // Could add update logic here later
2182            continue;
2183        }
2184
2185        // Fetch full conversation
2186        match provider.fetch_conversation(&conv_summary.id) {
2187            Ok(conv) => {
2188                // Insert into database using existing function
2189                if let Err(e) =
2190                    insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2191                {
2192                    eprintln!("Failed to insert ChatGPT session: {}", e);
2193                    continue;
2194                }
2195                harvested += 1;
2196                stats.sessions_added += 1;
2197            }
2198            Err(e) => {
2199                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2200            }
2201        }
2202
2203        // Small delay to avoid rate limiting
2204        std::thread::sleep(std::time::Duration::from_millis(100));
2205    }
2206
2207    Ok(harvested)
2208}
2209
2210/// Harvest sessions from Claude web interface
2211fn harvest_claude_sessions(
2212    conn: &Connection,
2213    session_token: &str,
2214    stats: &mut HarvestStats,
2215) -> Result<usize> {
2216    use crate::providers::cloud::anthropic::AnthropicProvider;
2217    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2218
2219    let provider = AnthropicProvider::with_session_token(session_token.to_string());
2220
2221    // List all conversations
2222    let options = FetchOptions {
2223        limit: Some(100),
2224        include_archived: false,
2225        after: None,
2226        before: None,
2227        session_token: Some(session_token.to_string()),
2228    };
2229
2230    let conversations = provider
2231        .list_conversations(&options)
2232        .context("Failed to list Claude conversations")?;
2233
2234    let mut harvested = 0;
2235
2236    for conv_summary in conversations {
2237        // Check if we already have this conversation
2238        let exists: bool = conn
2239            .query_row(
2240                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2241                params![&conv_summary.id],
2242                |row| row.get(0),
2243            )
2244            .unwrap_or(false);
2245
2246        if exists {
2247            continue;
2248        }
2249
2250        // Fetch full conversation
2251        match provider.fetch_conversation(&conv_summary.id) {
2252            Ok(conv) => {
2253                if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2254                {
2255                    eprintln!("Failed to insert Claude session: {}", e);
2256                    continue;
2257                }
2258                harvested += 1;
2259                stats.sessions_added += 1;
2260            }
2261            Err(e) => {
2262                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2263            }
2264        }
2265
2266        std::thread::sleep(std::time::Duration::from_millis(100));
2267    }
2268
2269    Ok(harvested)
2270}
2271
2272/// Insert a CloudConversation into the harvest database
2273fn insert_cloud_conversation_to_harvest_db(
2274    conn: &Connection,
2275    conv: &crate::providers::cloud::common::CloudConversation,
2276    provider: &str,
2277    workspace_name: Option<&str>,
2278) -> Result<()> {
2279    let now = Utc::now().timestamp_millis();
2280    let created_at = conv.created_at.timestamp_millis();
2281    let updated_at = conv
2282        .updated_at
2283        .map(|dt| dt.timestamp_millis())
2284        .unwrap_or(created_at);
2285
2286    // Build a session-like JSON structure for compatibility
2287    let session_json = serde_json::json!({
2288        "id": conv.id,
2289        "title": conv.title,
2290        "model": conv.model,
2291        "created_at": conv.created_at.to_rfc3339(),
2292        "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2293        "messages": conv.messages.iter().map(|m| {
2294            serde_json::json!({
2295                "id": m.id,
2296                "role": m.role,
2297                "content": m.content,
2298                "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2299                "model": m.model,
2300            })
2301        }).collect::<Vec<_>>(),
2302    });
2303
2304    conn.execute(
2305        r#"
2306        INSERT OR REPLACE INTO sessions 
2307        (id, provider, provider_type, workspace_id, workspace_name, title, 
2308         message_count, created_at, updated_at, harvested_at, session_json)
2309        VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2310        "#,
2311        params![
2312            conv.id,
2313            provider,
2314            provider,
2315            workspace_name,
2316            conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2317            conv.messages.len() as i64,
2318            created_at,
2319            updated_at,
2320            now,
2321            session_json.to_string(),
2322        ],
2323    )?;
2324
2325    // Also populate messages_v2 table for detailed message storage
2326    populate_cloud_messages(conn, &conv.id, conv)?;
2327
2328    Ok(())
2329}
2330
2331/// Populate messages_v2 table from a CloudConversation
2332fn populate_cloud_messages(
2333    conn: &Connection,
2334    session_id: &str,
2335    conv: &crate::providers::cloud::common::CloudConversation,
2336) -> Result<()> {
2337    // Delete existing messages for this session to avoid duplicates
2338    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2339
2340    for (idx, message) in conv.messages.iter().enumerate() {
2341        let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2342        let role = match message.role.as_str() {
2343            "user" | "human" => "user",
2344            "assistant" => "assistant",
2345            "system" => "system",
2346            other => other,
2347        };
2348
2349        conn.execute(
2350            r#"
2351            INSERT INTO messages_v2 
2352            (session_id, message_index, request_id, response_id, role, 
2353             content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2354            VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2355            "#,
2356            params![
2357                session_id,
2358                idx as i64,
2359                message.id,
2360                role,
2361                &message.content,
2362                &message.content,
2363                message.model.as_deref(),
2364                timestamp,
2365            ],
2366        )?;
2367    }
2368
2369    Ok(())
2370}
2371
2372fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2373    conn.execute(
2374        "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2375        [],
2376    )?;
2377    Ok(())
2378}
2379
2380fn init_git_tracking(db_path: &Path) -> Result<()> {
2381    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2382    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2383
2384    println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2385
2386    // Check if already a git repo
2387    if !db_dir.join(".git").exists() {
2388        let output = Command::new("git")
2389            .current_dir(db_dir)
2390            .args(["init"])
2391            .output()
2392            .context("Failed to run git init")?;
2393
2394        if !output.status.success() {
2395            anyhow::bail!(
2396                "Git init failed: {}",
2397                String::from_utf8_lossy(&output.stderr)
2398            );
2399        }
2400        println!("{} Initialized git repository", "[+]".green());
2401    } else {
2402        println!("{} Git repository already exists", "[i]".blue());
2403    }
2404
2405    // Create .gitignore if needed
2406    let gitignore_path = db_dir.join(".gitignore");
2407    if !gitignore_path.exists() {
2408        fs::write(
2409            &gitignore_path,
2410            "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2411        )?;
2412        println!("{} Created .gitignore", "[+]".green());
2413    }
2414
2415    // Add database to git
2416    let output = Command::new("git")
2417        .current_dir(db_dir)
2418        .args(["add", db_name])
2419        .output()?;
2420
2421    if output.status.success() {
2422        println!("{} Added {} to git", "[+]".green(), db_name);
2423    }
2424
2425    // Initial commit if no commits yet
2426    let output = Command::new("git")
2427        .current_dir(db_dir)
2428        .args(["rev-parse", "HEAD"])
2429        .output()?;
2430
2431    if !output.status.success() {
2432        let output = Command::new("git")
2433            .current_dir(db_dir)
2434            .args(["commit", "-m", "Initialize harvest database"])
2435            .output()?;
2436
2437        if output.status.success() {
2438            println!("{} Created initial commit", "[+]".green());
2439        }
2440    }
2441
2442    println!("\n{} Git tracking enabled!", "[+]".green().bold());
2443    println!("   Run 'csm harvest git commit -m \"message\"' to save changes");
2444    println!("   Run 'csm harvest git log' to view history");
2445
2446    Ok(())
2447}
2448
2449fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2450    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2451    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2452
2453    // Stage the database
2454    let output = Command::new("git")
2455        .current_dir(db_dir)
2456        .args(["add", db_name])
2457        .output()
2458        .context("Failed to stage database")?;
2459
2460    if !output.status.success() {
2461        anyhow::bail!(
2462            "Git add failed: {}",
2463            String::from_utf8_lossy(&output.stderr)
2464        );
2465    }
2466
2467    // Commit
2468    let output = Command::new("git")
2469        .current_dir(db_dir)
2470        .args(["commit", "-m", message])
2471        .output()
2472        .context("Failed to commit")?;
2473
2474    if !output.status.success() {
2475        let stderr = String::from_utf8_lossy(&output.stderr);
2476        if stderr.contains("nothing to commit") {
2477            println!("{} Nothing to commit", "[i]".blue());
2478            return Ok(());
2479        }
2480        anyhow::bail!("Git commit failed: {}", stderr);
2481    }
2482
2483    // Get commit hash
2484    let output = Command::new("git")
2485        .current_dir(db_dir)
2486        .args(["rev-parse", "--short", "HEAD"])
2487        .output()?;
2488
2489    let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2490    println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2491
2492    Ok(())
2493}
2494
2495// ============================================================================
2496// Share Link Commands
2497// ============================================================================
2498
2499/// Import a chat session from a share link URL
2500pub fn harvest_share(
2501    db_path: Option<&str>,
2502    url: &str,
2503    name: Option<&str>,
2504    _workspace: Option<&str>,
2505) -> Result<()> {
2506    let db_path = get_db_path(db_path)?;
2507
2508    // Parse the share link
2509    let share_info = match ShareLinkParser::parse(url) {
2510        Some(info) => info,
2511        None => {
2512            println!("{} Unrecognized share link format", "[!]".yellow());
2513            println!("   Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2514            println!("   Example URLs:");
2515            println!("   - https://chat.openai.com/share/abc123...");
2516            println!("   - https://claude.ai/share/xyz789...");
2517            anyhow::bail!("Could not parse share link URL");
2518        }
2519    };
2520
2521    println!("{}", "=".repeat(60).cyan());
2522    println!("{}", " Share Link Import ".bold().cyan());
2523    println!("{}", "=".repeat(60).cyan());
2524    println!();
2525
2526    println!(
2527        "{} Detected provider: {}",
2528        "[i]".blue(),
2529        share_info.provider.bold()
2530    );
2531    println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2532
2533    // Open or create the database with new schema
2534    let db = ChatDatabase::open(&db_path)?;
2535
2536    // Check if already imported
2537    let conn = db.connection();
2538    let existing: Option<(String, i64)> = conn
2539        .query_row(
2540            "SELECT id, imported FROM share_links WHERE url = ?",
2541            [url],
2542            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2543        )
2544        .ok();
2545
2546    if let Some((id, imported)) = existing {
2547        println!();
2548        println!(
2549            "{} This share link has already been registered (ID: {})",
2550            "[!]".yellow(),
2551            id
2552        );
2553
2554        let status = if imported == 1 { "imported" } else { "pending" };
2555        println!("{} Current status: {}", "[i]".blue(), status);
2556
2557        if imported == 1 {
2558            println!(
2559                "{} Session already imported - no action needed",
2560                "[+]".green()
2561            );
2562            return Ok(());
2563        }
2564    } else {
2565        // Insert the share link as pending
2566        let link_id = uuid::Uuid::new_v4().to_string();
2567        let now = chrono::Utc::now().timestamp();
2568        conn.execute(
2569            "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2570            params![link_id, url, share_info.provider, share_info.share_id, name, now],
2571        )?;
2572
2573        println!(
2574            "{} Registered share link (ID: {})",
2575            "[+]".green(),
2576            &link_id[..8]
2577        );
2578    }
2579
2580    println!();
2581    println!("{}", "-".repeat(60).dimmed());
2582    println!();
2583
2584    // TODO: Implement actual content fetching
2585    // For now, we just register the share link and mark it as pending
2586    // Actual fetching would require:
2587    // 1. Browser cookie extraction (already implemented in browser.rs)
2588    // 2. HTTP request with auth cookies
2589    // 3. HTML/JSON parsing of the share page
2590    // 4. Conversion to our universal format
2591
2592    println!("{} Share link registered as pending", "[i]".blue());
2593    println!();
2594    println!("{}", "Note:".bold().yellow());
2595    println!("   Automatic content fetching from share links is not yet implemented.");
2596    println!("   For now, you can:");
2597    println!("   1. Open the share link in your browser");
2598    println!("   2. Export the conversation manually");
2599    println!("   3. Import with: csm import <file>");
2600    println!();
2601    println!("   Or use 'csm harvest shares' to view pending links.");
2602
2603    Ok(())
2604}
2605
2606/// List share links in the harvest database
2607pub fn harvest_shares(
2608    db_path: Option<&str>,
2609    status_filter: Option<&str>,
2610    limit: usize,
2611) -> Result<()> {
2612    let db_path = get_db_path(db_path)?;
2613
2614    if !db_path.exists() {
2615        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2616    }
2617
2618    let db = ChatDatabase::open(&db_path)?;
2619    let conn = db.connection();
2620
2621    println!("{}", "=".repeat(70).cyan());
2622    println!("{}", " Share Links ".bold().cyan());
2623    println!("{}", "=".repeat(70).cyan());
2624    println!();
2625
2626    let query = match status_filter {
2627        Some("pending") => format!(
2628            "SELECT id, url, provider, share_id, title, imported, created_at 
2629             FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2630            limit
2631        ),
2632        Some("imported") => format!(
2633            "SELECT id, url, provider, share_id, title, imported, created_at 
2634             FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2635            limit
2636        ),
2637        Some(_) | None => format!(
2638            "SELECT id, url, provider, share_id, title, imported, created_at 
2639             FROM share_links ORDER BY created_at DESC LIMIT {}",
2640            limit
2641        ),
2642    };
2643
2644    let mut stmt = conn.prepare(&query)?;
2645
2646    let rows: Vec<_> = stmt
2647        .query_map([], |row| {
2648            Ok((
2649                row.get::<_, String>(0)?,
2650                row.get::<_, String>(1)?,
2651                row.get::<_, String>(2)?,
2652                row.get::<_, Option<String>>(3)?,
2653                row.get::<_, Option<String>>(4)?,
2654                row.get::<_, i64>(5)?,
2655                row.get::<_, i64>(6)?,
2656            ))
2657        })?
2658        .collect::<Result<Vec<_>, _>>()?;
2659
2660    if rows.is_empty() {
2661        if let Some(status) = status_filter {
2662            println!("{} No share links with status '{}'", "[i]".blue(), status);
2663        } else {
2664            println!("{} No share links found", "[i]".blue());
2665        }
2666        println!("   Use 'csm harvest share <url>' to add a share link");
2667        return Ok(());
2668    }
2669
2670    // Count by status
2671    let pending_count: i64 = conn
2672        .query_row(
2673            "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2674            [],
2675            |row| row.get(0),
2676        )
2677        .unwrap_or(0);
2678
2679    let imported_count: i64 = conn
2680        .query_row(
2681            "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2682            [],
2683            |row| row.get(0),
2684        )
2685        .unwrap_or(0);
2686
2687    println!(
2688        "{} {} pending, {} imported",
2689        "[i]".blue(),
2690        pending_count.to_string().yellow(),
2691        imported_count.to_string().green()
2692    );
2693    println!();
2694
2695    for (id, url, provider, _share_id, title, imported, created_at) in rows {
2696        let status = if imported == 1 { "imported" } else { "pending" };
2697        let status_colored = if imported == 1 {
2698            status.green()
2699        } else {
2700            status.yellow()
2701        };
2702
2703        println!(
2704            "{} [{}] {} - {}",
2705            format!("#{}", &id[..8]).dimmed(),
2706            status_colored,
2707            provider.bold(),
2708            title.as_deref().unwrap_or("(untitled)")
2709        );
2710
2711        // Truncate URL for display
2712        let display_url = if url.len() > 60 {
2713            format!("{}...", &url[..57])
2714        } else {
2715            url.clone()
2716        };
2717        println!("   {} {}", "URL:".dimmed(), display_url.dimmed());
2718
2719        // Format timestamp
2720        let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2721            .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2722            .unwrap_or_else(|| created_at.to_string());
2723        println!("   {} {}", "Added:".dimmed(), timestamp.dimmed());
2724        println!();
2725    }
2726
2727    Ok(())
2728}
2729
2730// ============================================================================
2731// Checkpoint Commands
2732// ============================================================================
2733
2734/// Create a checkpoint (version snapshot) of a session
2735pub fn harvest_checkpoint(
2736    db_path: Option<&str>,
2737    session_id: &str,
2738    message: Option<&str>,
2739) -> Result<()> {
2740    let db_path = get_db_path(db_path)?;
2741
2742    if !db_path.exists() {
2743        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2744    }
2745
2746    let db = ChatDatabase::open(&db_path)?;
2747    let conn = db.connection();
2748
2749    // Find the session
2750    let session: Option<(i64, String, i64)> = conn
2751        .query_row(
2752            "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2753            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2754            |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2755        )
2756        .ok();
2757
2758    let (internal_id, actual_session_id, msg_count) = match session {
2759        Some(s) => s,
2760        None => {
2761            println!("{} Session not found: {}", "[!]".red(), session_id);
2762            println!("   Use 'csm harvest list' to see available sessions");
2763            anyhow::bail!("Session not found");
2764        }
2765    };
2766
2767    // Get current checkpoint number
2768    let checkpoint_num: i64 = conn.query_row(
2769        "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2770        [internal_id],
2771        |row| row.get(0),
2772    )?;
2773
2774    // Calculate content hash (simple hash of message count + session_id for now)
2775    let content_hash = format!(
2776        "{:x}",
2777        md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2778    );
2779
2780    // Get message snapshot
2781    let messages: Vec<String> = {
2782        let mut stmt = conn
2783            .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2784        let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2785        rows.filter_map(|r| r.ok()).collect()
2786    };
2787
2788    let snapshot = serde_json::json!({
2789        "message_count": msg_count,
2790        "messages": messages,
2791    });
2792
2793    let default_message = format!("Checkpoint {}", checkpoint_num);
2794    let message_text = message.unwrap_or(&default_message);
2795
2796    // Create checkpoint
2797    conn.execute(
2798        "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2799         VALUES (?, ?, ?, ?, ?, ?)",
2800        params![
2801            internal_id,
2802            checkpoint_num,
2803            message_text,
2804            msg_count,
2805            content_hash,
2806            snapshot.to_string()
2807        ],
2808    )?;
2809
2810    println!("{}", "=".repeat(60).cyan());
2811    println!("{}", " Checkpoint Created ".bold().cyan());
2812    println!("{}", "=".repeat(60).cyan());
2813    println!();
2814    println!("{} Session: {}", "[+]".green(), actual_session_id);
2815    println!(
2816        "{} Checkpoint #{}: {}",
2817        "[+]".green(),
2818        checkpoint_num,
2819        message_text
2820    );
2821    println!("{} Messages: {}", "[i]".blue(), msg_count);
2822    println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2823    println!();
2824    println!(
2825        "   Use 'csm harvest checkpoints {}' to view history",
2826        session_id
2827    );
2828
2829    Ok(())
2830}
2831
2832/// List checkpoints for a session
2833pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2834    let db_path = get_db_path(db_path)?;
2835
2836    if !db_path.exists() {
2837        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2838    }
2839
2840    let db = ChatDatabase::open(&db_path)?;
2841    let conn = db.connection();
2842
2843    // Find the session
2844    let session: Option<(i64, String, String)> = conn
2845        .query_row(
2846            "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2847            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2848            |row| {
2849                Ok((
2850                    row.get(0)?,
2851                    row.get(1)?,
2852                    row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2853                ))
2854            },
2855        )
2856        .ok();
2857
2858    let (internal_id, actual_session_id, name) = match session {
2859        Some(s) => s,
2860        None => {
2861            println!("{} Session not found: {}", "[!]".red(), session_id);
2862            anyhow::bail!("Session not found");
2863        }
2864    };
2865
2866    println!("{}", "=".repeat(70).cyan());
2867    println!("{}", " Session Checkpoints ".bold().cyan());
2868    println!("{}", "=".repeat(70).cyan());
2869    println!();
2870    println!(
2871        "{} Session: {} {}",
2872        "[i]".blue(),
2873        actual_session_id,
2874        if !name.is_empty() {
2875            format!("({})", name)
2876        } else {
2877            String::new()
2878        }
2879    );
2880    println!();
2881
2882    let mut stmt = conn.prepare(
2883        "SELECT checkpoint_number, message, message_count, content_hash, created_at
2884         FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2885    )?;
2886
2887    let checkpoints: Vec<_> = stmt
2888        .query_map([internal_id], |row| {
2889            Ok((
2890                row.get::<_, i64>(0)?,
2891                row.get::<_, String>(1)?,
2892                row.get::<_, i64>(2)?,
2893                row.get::<_, String>(3)?,
2894                row.get::<_, String>(4)?,
2895            ))
2896        })?
2897        .collect::<Result<Vec<_>, _>>()?;
2898
2899    if checkpoints.is_empty() {
2900        println!("{} No checkpoints found for this session", "[i]".blue());
2901        println!(
2902            "   Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2903            session_id
2904        );
2905        return Ok(());
2906    }
2907
2908    println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2909    println!();
2910
2911    for (num, msg, msg_count, hash, created_at) in checkpoints {
2912        println!("  {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2913        println!(
2914            "       {} messages | {} | {}",
2915            msg_count,
2916            &hash[..12],
2917            created_at.dimmed()
2918        );
2919    }
2920
2921    println!();
2922    println!(
2923        "   Use 'csm harvest restore {} <checkpoint>' to restore",
2924        session_id
2925    );
2926
2927    Ok(())
2928}
2929
2930/// Restore a session to a previous checkpoint
2931pub fn harvest_restore_checkpoint(
2932    db_path: Option<&str>,
2933    session_id: &str,
2934    checkpoint_number: i64,
2935) -> Result<()> {
2936    let db_path = get_db_path(db_path)?;
2937
2938    if !db_path.exists() {
2939        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2940    }
2941
2942    let db = ChatDatabase::open(&db_path)?;
2943    let conn = db.connection();
2944
2945    // Find the session
2946    let session: Option<(i64, String)> = conn
2947        .query_row(
2948            "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
2949            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2950            |row| Ok((row.get(0)?, row.get(1)?)),
2951        )
2952        .ok();
2953
2954    let (internal_id, actual_session_id) = match session {
2955        Some(s) => s,
2956        None => {
2957            println!("{} Session not found: {}", "[!]".red(), session_id);
2958            anyhow::bail!("Session not found");
2959        }
2960    };
2961
2962    // Find the checkpoint
2963    let checkpoint: Option<(String, i64)> = conn
2964        .query_row(
2965            "SELECT snapshot, message_count FROM checkpoints 
2966         WHERE session_id = ? AND checkpoint_number = ?",
2967            params![internal_id, checkpoint_number],
2968            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2969        )
2970        .ok();
2971
2972    let (snapshot_json, original_msg_count) = match checkpoint {
2973        Some(c) => c,
2974        None => {
2975            println!(
2976                "{} Checkpoint #{} not found for session {}",
2977                "[!]".red(),
2978                checkpoint_number,
2979                session_id
2980            );
2981            println!(
2982                "   Use 'csm harvest checkpoints {}' to see available checkpoints",
2983                session_id
2984            );
2985            anyhow::bail!("Checkpoint not found");
2986        }
2987    };
2988
2989    // Parse snapshot
2990    let snapshot: serde_json::Value =
2991        serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
2992
2993    let messages = snapshot["messages"]
2994        .as_array()
2995        .context("Invalid snapshot format")?;
2996
2997    println!("{}", "=".repeat(60).cyan());
2998    println!("{}", " Restore Checkpoint ".bold().yellow());
2999    println!("{}", "=".repeat(60).cyan());
3000    println!();
3001    println!("{} Session: {}", "[i]".blue(), actual_session_id);
3002    println!(
3003        "{} Restoring to checkpoint #{}",
3004        "[!]".yellow(),
3005        checkpoint_number
3006    );
3007    println!(
3008        "{} Messages to restore: {}",
3009        "[i]".blue(),
3010        original_msg_count
3011    );
3012    println!();
3013
3014    // Delete current messages
3015    let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3016
3017    println!("{} Removed {} current messages", "[-]".red(), deleted);
3018
3019    // Restore messages from snapshot
3020    for (idx, msg) in messages.iter().enumerate() {
3021        if let Some(content) = msg.as_str() {
3022            conn.execute(
3023                "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3024                params![internal_id, idx as i64, content],
3025            )?;
3026        }
3027    }
3028
3029    // Update session message count
3030    conn.execute(
3031        "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3032        params![original_msg_count, internal_id],
3033    )?;
3034
3035    println!(
3036        "{} Restored {} messages from checkpoint",
3037        "[+]".green(),
3038        messages.len()
3039    );
3040    println!();
3041    println!(
3042        "{} Session restored to checkpoint #{}",
3043        "[+]".green().bold(),
3044        checkpoint_number
3045    );
3046
3047    Ok(())
3048}
3049
3050// ============================================================================
3051// Search Commands
3052// ============================================================================
3053
3054/// Rebuild the FTS index from messages_v2 table
3055pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3056    let db_path = get_db_path(db_path)?;
3057
3058    if !db_path.exists() {
3059        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3060    }
3061
3062    let conn = Connection::open(&db_path)?;
3063
3064    println!("{}", "=".repeat(70).cyan());
3065    println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3066    println!("{}", "=".repeat(70).cyan());
3067    println!();
3068
3069    // Drop existing FTS table and triggers
3070    println!("{} Dropping old FTS index...", "[*]".blue());
3071    conn.execute_batch(
3072        r#"
3073        DROP TRIGGER IF EXISTS messages_v2_ai;
3074        DROP TRIGGER IF EXISTS messages_v2_ad;
3075        DROP TRIGGER IF EXISTS messages_v2_au;
3076        DROP TABLE IF EXISTS messages_fts;
3077        "#,
3078    )?;
3079
3080    // Create new FTS table with triggers
3081    println!("{} Creating new FTS index...", "[*]".blue());
3082    conn.execute_batch(
3083        r#"
3084        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3085            content_raw
3086        );
3087        
3088        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3089            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3090        END;
3091        
3092        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3093            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3094        END;
3095        
3096        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3097            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3098            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3099        END;
3100        "#,
3101    )?;
3102
3103    // Count total messages
3104    let total_messages: i64 =
3105        conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3106
3107    // Populate FTS from existing messages
3108    println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3109
3110    conn.execute(
3111        "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3112        [],
3113    )?;
3114
3115    // Verify the index
3116    let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3117
3118    println!();
3119    println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3120    println!("   {} messages indexed", indexed);
3121
3122    Ok(())
3123}
3124
3125/// Full-text search across all sessions
3126pub fn harvest_search(
3127    db_path: Option<&str>,
3128    query: &str,
3129    provider_filter: Option<&str>,
3130    limit: usize,
3131) -> Result<()> {
3132    let db_path = get_db_path(db_path)?;
3133
3134    if !db_path.exists() {
3135        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3136    }
3137
3138    let db = ChatDatabase::open(&db_path)?;
3139    let conn = db.connection();
3140
3141    println!("{}", "=".repeat(70).cyan());
3142    println!("{} Search: {}", "[?]".bold(), query.bold());
3143    println!("{}", "=".repeat(70).cyan());
3144    println!();
3145
3146    // Results: (session_id, provider, title, content)
3147    let results: Vec<(String, String, String, String)> = {
3148        // Check if FTS table exists
3149        let fts_exists: bool = conn
3150            .query_row(
3151                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3152                [],
3153                |_| Ok(true),
3154            )
3155            .unwrap_or(false);
3156
3157        if fts_exists {
3158            // Use FTS search - query the FTS table and join with messages/sessions
3159            let sql = format!(
3160                "SELECT s.id, s.provider, s.title, m.content_raw
3161                 FROM messages_fts fts
3162                 JOIN messages_v2 m ON m.id = fts.rowid
3163                 JOIN sessions s ON m.session_id = s.id
3164                 WHERE messages_fts MATCH ?
3165                 {}
3166                 LIMIT {}",
3167                if provider_filter.is_some() {
3168                    "AND s.provider = ?"
3169                } else {
3170                    ""
3171                },
3172                limit
3173            );
3174
3175            let mut stmt = conn.prepare(&sql)?;
3176
3177            if let Some(provider) = provider_filter {
3178                stmt.query_map(params![query, provider], |row| {
3179                    Ok((
3180                        row.get::<_, String>(0)?,
3181                        row.get::<_, String>(1)?,
3182                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3183                        row.get::<_, String>(3)?,
3184                    ))
3185                })?
3186                .collect::<Result<Vec<_>, _>>()?
3187            } else {
3188                stmt.query_map([query], |row| {
3189                    Ok((
3190                        row.get::<_, String>(0)?,
3191                        row.get::<_, String>(1)?,
3192                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3193                        row.get::<_, String>(3)?,
3194                    ))
3195                })?
3196                .collect::<Result<Vec<_>, _>>()?
3197            }
3198        } else {
3199            // Fall back to LIKE search
3200            let search_pattern = format!("%{}%", query);
3201            let sql = format!(
3202                "SELECT s.id, s.provider, s.title, m.content_raw
3203                 FROM messages_v2 m
3204                 JOIN sessions s ON m.session_id = s.id
3205                 WHERE m.content_raw LIKE ?
3206                 {}
3207                 LIMIT {}",
3208                if provider_filter.is_some() {
3209                    "AND s.provider = ?"
3210                } else {
3211                    ""
3212                },
3213                limit
3214            );
3215
3216            let mut stmt = conn.prepare(&sql)?;
3217
3218            if let Some(provider) = provider_filter {
3219                stmt.query_map(params![search_pattern, provider], |row| {
3220                    Ok((
3221                        row.get::<_, String>(0)?,
3222                        row.get::<_, String>(1)?,
3223                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3224                        row.get::<_, String>(3)?,
3225                    ))
3226                })?
3227                .collect::<Result<Vec<_>, _>>()?
3228            } else {
3229                stmt.query_map([search_pattern], |row| {
3230                    Ok((
3231                        row.get::<_, String>(0)?,
3232                        row.get::<_, String>(1)?,
3233                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3234                        row.get::<_, String>(3)?,
3235                    ))
3236                })?
3237                .collect::<Result<Vec<_>, _>>()?
3238            }
3239        }
3240    };
3241
3242    if results.is_empty() {
3243        println!("{} No results found for '{}'", "[i]".blue(), query);
3244        return Ok(());
3245    }
3246
3247    println!("{} Found {} result(s):", "[i]".blue(), results.len());
3248    println!();
3249
3250    for (session_id, provider, title, content) in results {
3251        // Highlight the search term in content
3252        let display_name = if title.is_empty() {
3253            session_id.clone()
3254        } else {
3255            format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3256        };
3257
3258        println!(
3259            "{} {} [{}]",
3260            "*".cyan(),
3261            display_name.bold(),
3262            provider.dimmed()
3263        );
3264
3265        // Show snippet around the match
3266        let snippet = create_search_snippet(&content, query, 100);
3267        println!("   {}", snippet.dimmed());
3268        println!();
3269    }
3270
3271    Ok(())
3272}
3273
3274/// Create a search result snippet with the query highlighted
3275fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3276    let content_lower = content.to_lowercase();
3277    let query_lower = query.to_lowercase();
3278
3279    if let Some(pos) = content_lower.find(&query_lower) {
3280        let start = pos.saturating_sub(max_len / 2);
3281        let end = (pos + query.len() + max_len / 2).min(content.len());
3282
3283        let mut snippet = String::new();
3284        if start > 0 {
3285            snippet.push_str("...");
3286        }
3287        snippet.push_str(&content[start..end]);
3288        if end < content.len() {
3289            snippet.push_str("...");
3290        }
3291
3292        // Replace newlines with spaces for display
3293        snippet.replace('\n', " ").replace('\r', "")
3294    } else {
3295        // Fallback: just show first max_len chars
3296        if content.len() > max_len {
3297            format!("{}...", &content[..max_len])
3298        } else {
3299            content.to_string()
3300        }
3301    }
3302}
3303
3304/// Simple MD5 hash for content checksums
3305fn md5_hash(data: &str) -> u128 {
3306    // Simple hash implementation (not cryptographically secure, just for checksums)
3307    let mut hash: u128 = 0;
3308    for (i, byte) in data.bytes().enumerate() {
3309        hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3310        hash = hash.rotate_left(7);
3311    }
3312    hash
3313}