Skip to main content

chasm/commands/
harvest.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! Harvest commands for collecting chat sessions from multiple providers
4//!
5//! The harvester scans for local and remote LLM providers, downloads all chat
6//! sessions into a single SQLite database, and allows users to version-track
7//! the database with git.
8
9use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::storage::parse_session_json;
24use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
25
26/// Check if a string is an empty code block marker (just ``` with no content)
27fn is_empty_code_block(s: &str) -> bool {
28    // Match patterns like "```", "```\n", "```language", "```\n```", "```\n\n```"
29    let s = s.trim();
30    if s == "```" {
31        return true;
32    }
33    // Check for code block with just a language identifier and no content
34    if s.starts_with("```") && !s.contains('\n') {
35        return true;
36    }
37    // Check for empty code block with opening and closing (possibly with whitespace-only lines)
38    let lines: Vec<&str> = s.lines().collect();
39    if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
40        // Check if all lines between opening and closing are empty or whitespace
41        let content_lines = &lines[1..lines.len() - 1];
42        if content_lines.iter().all(|line| line.trim().is_empty()) {
43            return true;
44        }
45    }
46    false
47}
48
49/// Type alias for harvested session query result (id, provider, title, msg_count, created, last_msg, workspace)
50type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
51
52/// Web-based LLM provider endpoint configuration
53#[derive(Debug, Clone)]
54struct WebProviderEndpoint {
55    name: &'static str,
56    url: &'static str,
57    description: &'static str,
58}
59
60/// List of known web-based LLM provider endpoints to probe
61const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
62    WebProviderEndpoint {
63        name: "ChatGPT",
64        url: "https://chat.openai.com",
65        description: "OpenAI ChatGPT Web Interface",
66    },
67    WebProviderEndpoint {
68        name: "Claude",
69        url: "https://claude.ai",
70        description: "Anthropic Claude Web Interface",
71    },
72    WebProviderEndpoint {
73        name: "Gemini",
74        url: "https://gemini.google.com",
75        description: "Google Gemini Web Interface",
76    },
77    WebProviderEndpoint {
78        name: "Perplexity",
79        url: "https://www.perplexity.ai",
80        description: "Perplexity AI Search Interface",
81    },
82    WebProviderEndpoint {
83        name: "DeepSeek",
84        url: "https://chat.deepseek.com",
85        description: "DeepSeek Chat Interface",
86    },
87    WebProviderEndpoint {
88        name: "Poe",
89        url: "https://poe.com",
90        description: "Quora Poe Multi-model Chat",
91    },
92    WebProviderEndpoint {
93        name: "You.com",
94        url: "https://you.com/chat",
95        description: "You.com AI Chat",
96    },
97    WebProviderEndpoint {
98        name: "HuggingChat",
99        url: "https://huggingface.co/chat",
100        description: "HuggingFace Chat Interface",
101    },
102    WebProviderEndpoint {
103        name: "Copilot",
104        url: "https://copilot.microsoft.com",
105        description: "Microsoft Copilot Web Interface",
106    },
107    WebProviderEndpoint {
108        name: "Mistral",
109        url: "https://chat.mistral.ai",
110        description: "Mistral AI Le Chat Interface",
111    },
112    WebProviderEndpoint {
113        name: "Cohere",
114        url: "https://coral.cohere.com",
115        description: "Cohere Coral Chat Interface",
116    },
117    WebProviderEndpoint {
118        name: "Groq",
119        url: "https://groq.com",
120        description: "Groq Fast Inference Interface",
121    },
122    WebProviderEndpoint {
123        name: "Phind",
124        url: "https://www.phind.com",
125        description: "Phind AI Code Search",
126    },
127    WebProviderEndpoint {
128        name: "Character.AI",
129        url: "https://character.ai",
130        description: "Character.AI Chat Interface",
131    },
132    WebProviderEndpoint {
133        name: "Pi",
134        url: "https://pi.ai",
135        description: "Inflection Pi Personal AI",
136    },
137];
138
139/// Scan for reachable web-based LLM providers (parallel)
140fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
141    use std::sync::{Arc, Mutex};
142    use std::thread;
143
144    let reachable = Arc::new(Mutex::new(Vec::new()));
145    let results = Arc::new(Mutex::new(Vec::new()));
146
147    // Create threads for parallel scanning
148    let handles: Vec<_> = WEB_PROVIDERS
149        .iter()
150        .map(|provider| {
151            let reachable = Arc::clone(&reachable);
152            let results = Arc::clone(&results);
153            let timeout = timeout_secs;
154            let name = provider.name;
155            let url = provider.url;
156            let desc = provider.description;
157
158            thread::spawn(move || {
159                let client = match reqwest::blocking::Client::builder()
160                    .timeout(Duration::from_secs(timeout))
161                    .connect_timeout(Duration::from_secs(timeout.min(3))) // Short connect timeout
162                    .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
163                    .build()
164                {
165                    Ok(c) => c,
166                    Err(_) => return,
167                };
168
169                let result: (&str, bool, String, &str) = match client.head(url).send() {
170                    Ok(response) => {
171                        let status = response.status();
172                        if status.is_success() || status.is_redirection() {
173                            reachable.lock().unwrap().push(name.to_string());
174                            (name, true, desc.to_string(), url)
175                        } else {
176                            (name, false, format!("HTTP {}", status.as_u16()), url)
177                        }
178                    }
179                    Err(e) => {
180                        let reason = if e.is_timeout() {
181                            "timeout".to_string()
182                        } else if e.is_connect() {
183                            "connection failed".to_string()
184                        } else {
185                            "unreachable".to_string()
186                        };
187                        (name, false, reason, url)
188                    }
189                };
190
191                results.lock().unwrap().push((
192                    result.0.to_string(),
193                    result.1,
194                    result.2,
195                    result.3.to_string(),
196                ));
197            })
198        })
199        .collect();
200
201    // Wait for all threads to complete
202    for handle in handles {
203        let _ = handle.join();
204    }
205
206    // Print results in order
207    let results = results.lock().unwrap();
208    let mut sorted_results: Vec<_> = results.iter().collect();
209    sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
210
211    for (name, success, info, url) in sorted_results {
212        if *success {
213            println!(
214                "   {} {}: {} ({})",
215                "[+]".green(),
216                name.bold(),
217                "reachable".green(),
218                info.dimmed()
219            );
220            println!("      {} {}", "`".dimmed(), url.dimmed());
221        } else {
222            println!(
223                "   {} {}: {} ({})",
224                "[-]".dimmed(),
225                name,
226                "blocked or unavailable".dimmed(),
227                info.dimmed()
228            );
229        }
230    }
231
232    let result = reachable.lock().unwrap().clone();
233    result
234}
235
236/// Scan browser cookies for authenticated web LLM providers
237fn scan_browser_authentication(
238    verbose: bool,
239) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
240    use crate::browser::scan_browser_auth_verbose;
241    use std::collections::HashMap;
242
243    let installed = get_installed_browsers();
244    if installed.is_empty() {
245        println!("   {} No supported browsers found", "[-]".dimmed());
246        return (HashMap::new(), 0);
247    }
248
249    println!(
250        "   {} Checking {} browser(s): {}",
251        "[*]".blue(),
252        installed.len(),
253        installed
254            .iter()
255            .map(|b| b.name())
256            .collect::<Vec<_>>()
257            .join(", ")
258    );
259
260    let results = if verbose {
261        scan_browser_auth_verbose()
262    } else {
263        scan_browser_auth()
264    };
265
266    // Group results by provider
267    let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
268
269    for result in results {
270        if result.authenticated {
271            authenticated
272                .entry(result.provider.clone())
273                .or_default()
274                .push(result.browser);
275        }
276    }
277
278    let count = authenticated.len();
279
280    if authenticated.is_empty() {
281        println!(
282            "   {} No authenticated web LLM providers found",
283            "[-]".dimmed()
284        );
285        println!(
286            "      {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
287            "`".dimmed()
288        );
289    } else {
290        for (provider, browsers) in &authenticated {
291            let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
292            println!(
293                "   {} {}: {} in {}",
294                "[+]".green(),
295                provider.bold(),
296                "authenticated".green(),
297                browser_names.join(", ")
298            );
299        }
300    }
301
302    (authenticated, count)
303}
304
305/// Configuration for the harvest database
306#[allow(dead_code)]
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct HarvestConfig {
309    /// Path to the harvest database
310    pub db_path: PathBuf,
311    /// Auto-commit changes to git
312    pub auto_commit: bool,
313    /// Providers to include (empty = all)
314    pub include_providers: Vec<String>,
315    /// Providers to exclude
316    pub exclude_providers: Vec<String>,
317    /// Include VS Code workspaces
318    pub include_workspaces: bool,
319    /// Last harvest timestamp
320    pub last_harvest: Option<i64>,
321}
322
323#[allow(dead_code)]
324impl Default for HarvestConfig {
325    fn default() -> Self {
326        Self {
327            db_path: PathBuf::from("chat_sessions.db"),
328            auto_commit: false,
329            include_providers: Vec::new(),
330            exclude_providers: Vec::new(),
331            include_workspaces: true,
332            last_harvest: None,
333        }
334    }
335}
336
337/// Statistics from a harvest operation
338#[derive(Debug, Default)]
339pub struct HarvestStats {
340    pub providers_scanned: usize,
341    pub workspaces_scanned: usize,
342    pub sessions_found: usize,
343    pub sessions_added: usize,
344    pub sessions_updated: usize,
345    pub sessions_skipped: usize,
346    pub errors: Vec<String>,
347}
348
349/// A harvested session record
350#[allow(dead_code)]
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct HarvestedSession {
353    pub id: String,
354    pub provider: String,
355    pub provider_type: String,
356    pub workspace_id: Option<String>,
357    pub workspace_name: Option<String>,
358    pub title: String,
359    pub message_count: usize,
360    pub created_at: i64,
361    pub updated_at: i64,
362    pub harvested_at: i64,
363    pub session_json: String,
364}
365
366/// Initialize a harvest database at the specified path
367pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
368    let db_path = get_db_path(path)?;
369    let db_dir = db_path.parent().unwrap_or(Path::new("."));
370
371    println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
372    println!("{}", "=".repeat(60));
373
374    // Create directory if needed
375    if !db_dir.exists() {
376        fs::create_dir_all(db_dir)?;
377        println!("{} Created directory: {}", "[+]".green(), db_dir.display());
378    }
379
380    // Check if database already exists
381    if db_path.exists() {
382        println!(
383            "{} Database already exists: {}",
384            "[!]".yellow(),
385            db_path.display()
386        );
387        println!("   Use 'csm harvest run' to update it");
388        return Ok(());
389    }
390
391    // Create the database
392    create_harvest_database(&db_path)?;
393    println!("{} Created database: {}", "[+]".green(), db_path.display());
394
395    // Initialize git if requested
396    if git_init {
397        init_git_tracking(&db_path)?;
398    }
399
400    println!("\n{} Harvest database initialized!", "[+]".green().bold());
401    println!("\nNext steps:");
402    println!("  1. Run 'csm harvest scan' to see available providers");
403    println!("  2. Run 'csm harvest run' to collect sessions");
404    if !git_init {
405        println!("  3. Run 'csm harvest git init' to enable version tracking");
406    }
407
408    Ok(())
409}
410
411/// Scan for available providers and workspaces
412pub fn harvest_scan(
413    show_sessions: bool,
414    scan_web: bool,
415    timeout_secs: u64,
416    verbose: bool,
417) -> Result<()> {
418    println!("\n{} Scanning for Providers", "[H]".magenta().bold());
419    println!("{}", "=".repeat(60));
420
421    let registry = ProviderRegistry::new();
422    let mut total_sessions = 0;
423    let mut available_providers = Vec::new();
424
425    // Scan LLM providers
426    println!("\n{} LLM Providers:", "[*]".blue().bold());
427
428    let provider_types = vec![
429        ProviderType::Copilot,
430        ProviderType::Cursor,
431        ProviderType::CodexCli,
432        ProviderType::DroidCli,
433        ProviderType::GeminiCli,
434        ProviderType::Ollama,
435        ProviderType::Vllm,
436        ProviderType::Foundry,
437        ProviderType::LmStudio,
438        ProviderType::LocalAI,
439        ProviderType::TextGenWebUI,
440        ProviderType::Jan,
441        ProviderType::Gpt4All,
442        ProviderType::Llamafile,
443        ProviderType::OpenAI,
444        ProviderType::ChatGPT,
445        ProviderType::Anthropic,
446        ProviderType::Perplexity,
447        ProviderType::DeepSeek,
448        ProviderType::Gemini,
449    ];
450
451    for pt in &provider_types {
452        if let Some(provider) = registry.get_provider(*pt) {
453            let available = provider.is_available();
454            let session_count = if available {
455                provider.list_sessions().map(|s| s.len()).unwrap_or(0)
456            } else {
457                0
458            };
459
460            if available {
461                available_providers.push((*pt, session_count));
462                total_sessions += session_count;
463
464                let status = if session_count > 0 {
465                    format!(
466                        "{} {} sessions",
467                        "+".green(),
468                        session_count.to_string().cyan()
469                    )
470                } else {
471                    format!("{} no sessions", "+".green())
472                };
473
474                println!(
475                    "   {} {}: {}",
476                    "[+]".green(),
477                    provider.name().bold(),
478                    status
479                );
480
481                if show_sessions && session_count > 0 {
482                    if let Ok(sessions) = provider.list_sessions() {
483                        for session in sessions.iter().take(3) {
484                            println!("      {} {}", "`".dimmed(), session.title().dimmed());
485                        }
486                        if sessions.len() > 3 {
487                            println!("      {} ... and {} more", "`".dimmed(), sessions.len() - 3);
488                        }
489                    }
490                }
491
492                if let Some(path) = provider.sessions_path() {
493                    println!(
494                        "      {} {}",
495                        "`".dimmed(),
496                        path.display().to_string().dimmed()
497                    );
498                }
499            }
500        }
501    }
502
503    // Scan VS Code workspaces
504    println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
505
506    match discover_workspaces() {
507        Ok(workspaces) => {
508            let workspaces_with_sessions: Vec<_> = workspaces
509                .iter()
510                .filter(|ws| ws.chat_session_count > 0)
511                .collect();
512
513            let ws_sessions: usize = workspaces_with_sessions
514                .iter()
515                .map(|ws| ws.chat_session_count)
516                .sum();
517
518            println!(
519                "   {} {} workspaces with {} sessions",
520                "[+]".green(),
521                workspaces_with_sessions.len().to_string().cyan(),
522                ws_sessions.to_string().cyan()
523            );
524
525            if show_sessions {
526                for ws in workspaces_with_sessions.iter().take(5) {
527                    let name = ws
528                        .project_path
529                        .clone()
530                        .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
531                    println!(
532                        "      {} {} ({} sessions)",
533                        "`".dimmed(),
534                        name.dimmed(),
535                        ws.chat_session_count
536                    );
537                }
538                if workspaces_with_sessions.len() > 5 {
539                    println!(
540                        "      {} ... and {} more workspaces",
541                        "`".dimmed(),
542                        workspaces_with_sessions.len() - 5
543                    );
544                }
545            }
546
547            total_sessions += ws_sessions;
548        }
549        Err(e) => {
550            println!("   {} Failed to scan workspaces: {}", "[!]".yellow(), e);
551        }
552    }
553
554    // Scan web-based LLM providers if requested
555    let mut web_providers_found = Vec::new();
556    let mut authenticated_count = 0;
557    if scan_web {
558        // First check browser authentication (no network requests)
559        println!("\n{} Browser Authentication:", "[*]".blue().bold());
560        let (auth_results, auth_count) = scan_browser_authentication(verbose);
561        authenticated_count = auth_count;
562
563        // Then probe web endpoints
564        println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
565        web_providers_found = scan_web_providers(timeout_secs);
566
567        // Show which authenticated providers are reachable
568        if !auth_results.is_empty() {
569            println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
570            for (provider, browsers) in &auth_results {
571                let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
572                let reachable = web_providers_found.iter().any(|p| p == provider);
573                let status = if reachable {
574                    format!("{} (reachable)", "ready to harvest".green())
575                } else {
576                    format!("{}", "authenticated but endpoint blocked".yellow())
577                };
578                println!(
579                    "   {} {}: {} via {}",
580                    "[+]".green(),
581                    provider.bold(),
582                    status,
583                    browser_list.join(", ").dimmed()
584                );
585            }
586        }
587    }
588
589    // Summary
590    println!("\n{} Summary:", "[*]".green().bold());
591    println!(
592        "   {} local providers available",
593        available_providers.len().to_string().cyan()
594    );
595    if scan_web {
596        println!(
597            "   {} web providers reachable",
598            web_providers_found.len().to_string().cyan()
599        );
600        println!(
601            "   {} web providers authenticated",
602            authenticated_count.to_string().cyan()
603        );
604    }
605    println!(
606        "   {} total sessions to harvest",
607        total_sessions.to_string().truecolor(252, 152, 103)
608    );
609
610    Ok(())
611}
612
613/// Run the harvest operation
614pub fn harvest_run(
615    path: Option<&str>,
616    providers: Option<&[String]>,
617    exclude: Option<&[String]>,
618    incremental: bool,
619    auto_commit: bool,
620    message: Option<&str>,
621) -> Result<()> {
622    let db_path = get_db_path(path)?;
623
624    println!("\n{} Running Harvest", "[H]".magenta().bold());
625    println!("{}", "=".repeat(60));
626
627    // Ensure database exists
628    if !db_path.exists() {
629        println!("{} Database not found, creating...", "[*]".blue());
630        create_harvest_database(&db_path)?;
631    }
632
633    let conn = Connection::open(&db_path)?;
634
635    // Enable WAL mode and set pragmas for large databases
636    conn.execute_batch(
637        "PRAGMA journal_mode = WAL;
638         PRAGMA synchronous = NORMAL;
639         PRAGMA cache_size = -64000;",
640    )?;
641
642    let mut stats = HarvestStats::default();
643
644    // Get last harvest time for incremental updates
645    let last_harvest: Option<i64> = if incremental {
646        conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
647            row.get(0)
648        })
649        .ok()
650    } else {
651        None
652    };
653
654    if let Some(ts) = last_harvest {
655        let dt = DateTime::from_timestamp_millis(ts)
656            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
657            .unwrap_or_default();
658        println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
659    }
660
661    let registry = ProviderRegistry::new();
662    let include_providers = providers.map(|p| p.to_vec());
663    let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
664
665    // Harvest from LLM providers
666    println!("\n{} Harvesting from providers...", "[*]".blue());
667
668    let provider_types = vec![
669        ProviderType::Copilot,
670        ProviderType::Cursor,
671        ProviderType::ClaudeCode,
672        ProviderType::CodexCli,
673        ProviderType::DroidCli,
674        ProviderType::GeminiCli,
675        ProviderType::OpenCode,
676        ProviderType::OpenClaw,
677        ProviderType::Antigravity,
678        ProviderType::Ollama,
679        ProviderType::Vllm,
680        ProviderType::Foundry,
681        ProviderType::LmStudio,
682        ProviderType::LocalAI,
683        ProviderType::TextGenWebUI,
684        ProviderType::Jan,
685        ProviderType::Gpt4All,
686        ProviderType::Llamafile,
687    ];
688
689    for pt in &provider_types {
690        let provider_name = pt.display_name().to_lowercase();
691
692        // Check include/exclude filters
693        if let Some(ref include) = include_providers {
694            if !include
695                .iter()
696                .any(|p| provider_name.contains(&p.to_lowercase()))
697            {
698                continue;
699            }
700        }
701        if exclude_providers
702            .iter()
703            .any(|p| provider_name.contains(&p.to_lowercase()))
704        {
705            continue;
706        }
707
708        if let Some(provider) = registry.get_provider(*pt) {
709            if !provider.is_available() {
710                continue;
711            }
712
713            stats.providers_scanned += 1;
714
715            match provider.list_sessions() {
716                Ok(sessions) => {
717                    for session in sessions {
718                        stats.sessions_found += 1;
719
720                        // Check if session should be skipped (incremental)
721                        if let Some(last) = last_harvest {
722                            if session.last_message_date <= last {
723                                stats.sessions_skipped += 1;
724                                continue;
725                            }
726                        }
727
728                        match insert_or_update_session(
729                            &conn,
730                            &session,
731                            pt.display_name(),
732                            None,
733                            None,
734                            None,   // provider_version
735                            3,      // schema_version (V3)
736                            "json", // file_format
737                            None,   // workspace_path
738                        ) {
739                            Ok(updated) => {
740                                if updated {
741                                    stats.sessions_updated += 1;
742                                } else {
743                                    stats.sessions_added += 1;
744                                }
745                            }
746                            Err(e) => {
747                                stats.errors.push(format!("{}: {}", session.title(), e));
748                            }
749                        }
750                    }
751
752                    let session_count = stats.sessions_added + stats.sessions_updated;
753                    if session_count > 0 {
754                        println!(
755                            "   {} {}: {} sessions",
756                            "[+]".green(),
757                            provider.name(),
758                            session_count.to_string().cyan()
759                        );
760                    }
761                }
762                Err(e) => {
763                    stats.errors.push(format!("{}: {}", provider.name(), e));
764                }
765            }
766        }
767    }
768
769    // Harvest from VS Code workspaces
770    if include_providers.is_none()
771        || include_providers
772            .as_ref()
773            .map(|p| {
774                p.iter()
775                    .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
776            })
777            .unwrap_or(false)
778    {
779        println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
780
781        if let Ok(workspaces) = discover_workspaces() {
782            for ws in &workspaces {
783                if ws.chat_session_count == 0 {
784                    continue;
785                }
786
787                stats.workspaces_scanned += 1;
788
789                if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
790                    for swp in sessions {
791                        stats.sessions_found += 1;
792
793                        // Check if session should be skipped (incremental)
794                        if let Some(last) = last_harvest {
795                            if swp.session.last_message_date <= last {
796                                stats.sessions_skipped += 1;
797                                continue;
798                            }
799                        }
800
801                        let ws_name = ws.project_path.clone();
802                        let ws_path = ws.workspace_path.to_string_lossy().to_string();
803
804                        match insert_or_update_session(
805                            &conn,
806                            &swp.session,
807                            "GitHub Copilot",
808                            Some(&ws.hash),
809                            ws_name.as_deref(),
810                            None,           // provider_version
811                            3,              // schema_version (V3)
812                            "json",         // file_format
813                            Some(&ws_path), // workspace_path
814                        ) {
815                            Ok(updated) => {
816                                if updated {
817                                    stats.sessions_updated += 1;
818                                } else {
819                                    stats.sessions_added += 1;
820                                }
821                            }
822                            Err(e) => {
823                                stats.errors.push(format!("{}: {}", swp.session.title(), e));
824                            }
825                        }
826                    }
827                }
828            }
829
830            println!(
831                "   {} Workspaces: {} scanned",
832                "[+]".green(),
833                stats.workspaces_scanned.to_string().cyan()
834            );
835        }
836    }
837
838    // Harvest from web-based cloud providers (ChatGPT, Claude, etc.)
839    let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
840    harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
841
842    // Update metadata
843    update_harvest_metadata(&conn)?;
844
845    // Print summary
846    println!("\n{} Harvest Complete:", "[+]".green().bold());
847    println!(
848        "   {} providers scanned",
849        stats.providers_scanned.to_string().cyan()
850    );
851    println!(
852        "   {} workspaces scanned",
853        stats.workspaces_scanned.to_string().cyan()
854    );
855    println!(
856        "   {} sessions found",
857        stats.sessions_found.to_string().cyan()
858    );
859    println!(
860        "   {} sessions added",
861        stats.sessions_added.to_string().green()
862    );
863    println!(
864        "   {} sessions updated",
865        stats.sessions_updated.to_string().yellow()
866    );
867    if stats.sessions_skipped > 0 {
868        println!(
869            "   {} sessions skipped (unchanged)",
870            stats.sessions_skipped.to_string().dimmed()
871        );
872    }
873
874    if !stats.errors.is_empty() {
875        println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
876        for (i, err) in stats.errors.iter().take(5).enumerate() {
877            println!("   {}. {}", i + 1, err);
878        }
879        if stats.errors.len() > 5 {
880            println!("   ... and {} more errors", stats.errors.len() - 5);
881        }
882    }
883
884    // Auto-commit if requested
885    if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
886        println!("\n{} Auto-committing changes...", "[*]".blue());
887        let commit_msg = message.unwrap_or("Harvest: update chat sessions");
888        if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
889            println!("{} Git commit failed: {}", "[!]".yellow(), e);
890        } else {
891            println!("{} Changes committed", "[+]".green());
892        }
893    }
894
895    println!("\nDatabase: {}", db_path.display());
896
897    Ok(())
898}
899
900/// Show harvest database status
901pub fn harvest_status(path: Option<&str>) -> Result<()> {
902    let db_path = get_db_path(path)?;
903
904    println!("\n{} Harvest Database Status", "[H]".magenta().bold());
905    println!("{}", "=".repeat(60));
906
907    if !db_path.exists() {
908        println!(
909            "{} Database not found: {}",
910            "[!]".yellow(),
911            db_path.display()
912        );
913        println!("   Run 'csm harvest init' to create one");
914        return Ok(());
915    }
916
917    let conn = Connection::open(&db_path)?;
918
919    // Get session counts by provider
920    let mut stmt = conn.prepare(
921        "SELECT provider, COUNT(*) as count, SUM(message_count) as messages 
922         FROM sessions 
923         GROUP BY provider 
924         ORDER BY count DESC",
925    )?;
926
927    let provider_stats: Vec<(String, i64, i64)> = stmt
928        .query_map([], |row| {
929            Ok((
930                row.get(0)?,
931                row.get(1)?,
932                row.get::<_, Option<i64>>(2)?.unwrap_or(0),
933            ))
934        })?
935        .filter_map(|r| r.ok())
936        .collect();
937
938    // Get total counts
939    let total_sessions: i64 = conn
940        .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
941        .unwrap_or(0);
942
943    let total_messages: i64 = conn
944        .query_row(
945            "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
946            [],
947            |row| row.get(0),
948        )
949        .unwrap_or(0);
950
951    // Get last harvest time
952    let last_harvest: Option<i64> = conn
953        .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
954            row.get(0)
955        })
956        .ok()
957        .flatten();
958
959    // Get oldest and newest sessions
960    let oldest: Option<i64> = conn
961        .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
962        .ok()
963        .flatten();
964
965    let newest: Option<i64> = conn
966        .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
967        .ok()
968        .flatten();
969
970    println!("{} Database: {}", "[*]".blue(), db_path.display());
971    println!(
972        "{} {}: {}",
973        "[*]".blue(),
974        "Total Sessions".truecolor(171, 157, 242),
975        total_sessions.to_string().truecolor(252, 152, 103)
976    );
977    println!(
978        "{} {}: {}",
979        "[*]".blue(),
980        "Total Messages".truecolor(171, 157, 242),
981        total_messages.to_string().truecolor(252, 152, 103)
982    );
983
984    if let Some(ts) = last_harvest {
985        let dt = DateTime::from_timestamp_millis(ts)
986            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
987            .unwrap_or_else(|| "Unknown".to_string());
988        println!("{} Last Harvest: {}", "[*]".blue(), dt);
989    }
990
991    if let (Some(old), Some(new)) = (oldest, newest) {
992        let old_dt = DateTime::from_timestamp_millis(old)
993            .map(|d| d.format("%Y-%m-%d").to_string())
994            .unwrap_or_default();
995        let new_dt = DateTime::from_timestamp_millis(new)
996            .map(|d| d.format("%Y-%m-%d").to_string())
997            .unwrap_or_default();
998        println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
999    }
1000
1001    if !provider_stats.is_empty() {
1002        println!("\n{} Sessions by Provider:", "[*]".blue());
1003        for (provider, count, messages) in &provider_stats {
1004            println!(
1005                "   {} {}: {} sessions, {} messages",
1006                "[+]".green(),
1007                provider.bold(),
1008                count.to_string().cyan(),
1009                messages.to_string().dimmed()
1010            );
1011        }
1012    }
1013
1014    // Check git status
1015    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1016    if db_dir.join(".git").exists()
1017        || db_path
1018            .parent()
1019            .map(|p| p.join(".git").exists())
1020            .unwrap_or(false)
1021    {
1022        println!("\n{} Git Status:", "[*]".blue());
1023
1024        let output = Command::new("git")
1025            .current_dir(db_dir)
1026            .args([
1027                "status",
1028                "--porcelain",
1029                db_path.file_name().unwrap().to_str().unwrap(),
1030            ])
1031            .output();
1032
1033        match output {
1034            Ok(out) => {
1035                let status = String::from_utf8_lossy(&out.stdout);
1036                if status.is_empty() {
1037                    println!("   {} No uncommitted changes", "[+]".green());
1038                } else {
1039                    println!("   {} Uncommitted changes detected", "[!]".yellow());
1040                    println!("   Run 'csm harvest git commit' to save changes");
1041                }
1042            }
1043            Err(_) => {
1044                println!("   {} Unable to check git status", "[!]".yellow());
1045            }
1046        }
1047    } else {
1048        println!("\n{} Git tracking not enabled", "[i]".dimmed());
1049        println!("   Run 'csm harvest git init' to enable version tracking");
1050    }
1051
1052    Ok(())
1053}
1054
1055/// List sessions in the harvest database
1056pub fn harvest_list(
1057    path: Option<&str>,
1058    provider: Option<&str>,
1059    limit: usize,
1060    search: Option<&str>,
1061) -> Result<()> {
1062    let db_path = get_db_path(path)?;
1063
1064    if !db_path.exists() {
1065        println!(
1066            "{} Database not found: {}",
1067            "[!]".yellow(),
1068            db_path.display()
1069        );
1070        return Ok(());
1071    }
1072
1073    let conn = Connection::open(&db_path)?;
1074
1075    let mut query = String::from(
1076        "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name 
1077         FROM sessions WHERE 1=1",
1078    );
1079    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1080
1081    if let Some(p) = provider {
1082        query.push_str(" AND LOWER(provider) LIKE ?");
1083        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1084    }
1085
1086    if let Some(s) = search {
1087        query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1088        let pattern = format!("%{}%", s.to_lowercase());
1089        params_vec.push(Box::new(pattern.clone()));
1090        params_vec.push(Box::new(pattern));
1091    }
1092
1093    query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1094    params_vec.push(Box::new(limit as i64));
1095
1096    let mut stmt = conn.prepare(&query)?;
1097
1098    // Build params slice
1099    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1100
1101    let sessions: Vec<HarvestQueryResult> = stmt
1102        .query_map(params_slice.as_slice(), |row| {
1103            Ok((
1104                row.get(0)?,
1105                row.get(1)?,
1106                row.get(2)?,
1107                row.get(3)?,
1108                row.get(4)?,
1109                row.get(5)?,
1110                row.get(6)?,
1111            ))
1112        })?
1113        .filter_map(|r| r.ok())
1114        .collect();
1115
1116    println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1117    println!("{}", "=".repeat(60));
1118
1119    if sessions.is_empty() {
1120        println!("{} No sessions found", "[i]".dimmed());
1121        return Ok(());
1122    }
1123
1124    for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1125        let date = DateTime::from_timestamp_millis(*updated)
1126            .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1127            .unwrap_or_default();
1128
1129        println!("\n{} {}", "[S]".cyan(), title.bold());
1130        println!("   ID: {}", &id[..16.min(id.len())].dimmed());
1131        println!(
1132            "   Provider: {} | Messages: {} | Updated: {}",
1133            prov.cyan(),
1134            msg_count.to_string().green(),
1135            date.dimmed()
1136        );
1137        if let Some(ws) = ws_name {
1138            println!("   Workspace: {}", ws.dimmed());
1139        }
1140    }
1141
1142    println!(
1143        "\n{} Showing {} of available sessions",
1144        "[i]".dimmed(),
1145        sessions.len()
1146    );
1147
1148    Ok(())
1149}
1150
1151/// Export sessions from the harvest database
1152pub fn harvest_export(
1153    path: Option<&str>,
1154    output: &str,
1155    format: &str,
1156    provider: Option<&str>,
1157    session_ids: Option<&[String]>,
1158) -> Result<()> {
1159    let db_path = get_db_path(path)?;
1160    let output_path = PathBuf::from(output);
1161
1162    if !db_path.exists() {
1163        anyhow::bail!("Database not found: {}", db_path.display());
1164    }
1165
1166    let conn = Connection::open(&db_path)?;
1167
1168    println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1169    println!("{}", "=".repeat(60));
1170
1171    // Build query
1172    let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1173    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1174
1175    if let Some(p) = provider {
1176        query.push_str(" AND LOWER(provider) LIKE ?");
1177        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1178    }
1179
1180    if let Some(ids) = session_ids {
1181        let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1182        query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1183        for id in ids {
1184            params_vec.push(Box::new(id.clone()));
1185        }
1186    }
1187
1188    let mut stmt = conn.prepare(&query)?;
1189    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1190
1191    let sessions: Vec<String> = stmt
1192        .query_map(params_slice.as_slice(), |row| row.get(0))?
1193        .filter_map(|r| r.ok())
1194        .collect();
1195
1196    if sessions.is_empty() {
1197        println!("{} No sessions to export", "[i]".dimmed());
1198        return Ok(());
1199    }
1200
1201    // Create output directory if needed
1202    if let Some(parent) = output_path.parent() {
1203        if !parent.exists() {
1204            fs::create_dir_all(parent)?;
1205        }
1206    }
1207
1208    match format.to_lowercase().as_str() {
1209        "json" => {
1210            // Export as JSON array
1211            let parsed: Vec<serde_json::Value> = sessions
1212                .iter()
1213                .filter_map(|s| serde_json::from_str(s).ok())
1214                .collect();
1215            let json_output = serde_json::to_string_pretty(&parsed)?;
1216            fs::write(&output_path, json_output)?;
1217        }
1218        "jsonl" => {
1219            // Export as JSON Lines
1220            let content: String = sessions
1221                .iter()
1222                .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1223                .map(|v| serde_json::to_string(&v).unwrap_or_default())
1224                .collect::<Vec<_>>()
1225                .join("\n");
1226            fs::write(&output_path, content)?;
1227        }
1228        "md" | "markdown" => {
1229            // Export as Markdown
1230            let mut md_content = String::from("# Chat Sessions Export\n\n");
1231            md_content.push_str(&format!(
1232                "Exported: {}\n\n",
1233                Utc::now().format("%Y-%m-%d %H:%M:%S")
1234            ));
1235
1236            for session_json in &sessions {
1237                if let Ok(session) = parse_session_json(session_json) {
1238                    md_content.push_str(&format!("## {}\n\n", session.title()));
1239                    md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1240
1241                    for request in &session.requests {
1242                        if let Some(msg) = &request.message {
1243                            if let Some(text) = &msg.text {
1244                                md_content.push_str(&format!("### User\n\n{}\n\n", text));
1245                            }
1246                        }
1247                        // Extract response text from the JSON value
1248                        if let Some(response) = &request.response {
1249                            let response_text = response
1250                                .get("text")
1251                                .and_then(|v| v.as_str())
1252                                .or_else(|| response.get("content").and_then(|v| v.as_str()))
1253                                .or_else(|| {
1254                                    // Try to get from value array (older format)
1255                                    response
1256                                        .get("value")
1257                                        .and_then(|v| v.as_array())
1258                                        .and_then(|arr| arr.first())
1259                                        .and_then(|v| v.get("value"))
1260                                        .and_then(|v| v.as_str())
1261                                });
1262
1263                            if let Some(text) = response_text {
1264                                md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1265                            }
1266                        }
1267                    }
1268                    md_content.push_str("---\n\n");
1269                }
1270            }
1271            fs::write(&output_path, md_content)?;
1272        }
1273        _ => {
1274            anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1275        }
1276    }
1277
1278    println!(
1279        "{} Exported {} sessions to {}",
1280        "[+]".green(),
1281        sessions.len().to_string().cyan(),
1282        output_path.display()
1283    );
1284
1285    Ok(())
1286}
1287
1288/// Git operations for harvest database
1289pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1290    let db_path = get_db_path(path)?;
1291    init_git_tracking(&db_path)
1292}
1293
1294pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1295    let db_path = get_db_path(path)?;
1296    let msg = message.unwrap_or("Update harvest database");
1297    git_commit_harvest(&db_path, msg)
1298}
1299
1300pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1301    let db_path = get_db_path(path)?;
1302    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1303    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1304
1305    println!("\n{} Harvest Git History", "[H]".magenta().bold());
1306    println!("{}", "=".repeat(60));
1307
1308    let output = Command::new("git")
1309        .current_dir(db_dir)
1310        .args([
1311            "log",
1312            "--oneline",
1313            &format!("-{}", count),
1314            "--follow",
1315            "--",
1316            db_name,
1317        ])
1318        .output()
1319        .context("Failed to run git log")?;
1320
1321    if !output.status.success() {
1322        let stderr = String::from_utf8_lossy(&output.stderr);
1323        if stderr.contains("does not have any commits") {
1324            println!("{} No commits yet", "[i]".dimmed());
1325        } else {
1326            anyhow::bail!("Git log failed: {}", stderr);
1327        }
1328        return Ok(());
1329    }
1330
1331    let log = String::from_utf8_lossy(&output.stdout);
1332    if log.trim().is_empty() {
1333        println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1334    } else {
1335        for line in log.lines() {
1336            let parts: Vec<&str> = line.splitn(2, ' ').collect();
1337            if parts.len() == 2 {
1338                println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1339            } else {
1340                println!("{}", line);
1341            }
1342        }
1343    }
1344
1345    Ok(())
1346}
1347
1348pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1349    let db_path = get_db_path(path)?;
1350    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1351    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1352
1353    println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1354    println!("{}", "=".repeat(60));
1355
1356    let mut args = vec!["diff", "--stat"];
1357    if let Some(c) = commit {
1358        args.push(c);
1359    }
1360    args.push("--");
1361    args.push(db_name);
1362
1363    let output = Command::new("git")
1364        .current_dir(db_dir)
1365        .args(&args)
1366        .output()
1367        .context("Failed to run git diff")?;
1368
1369    let diff = String::from_utf8_lossy(&output.stdout);
1370    if diff.trim().is_empty() {
1371        println!("{} No changes", "[+]".green());
1372    } else {
1373        println!("{}", diff);
1374    }
1375
1376    Ok(())
1377}
1378
1379pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1380    let db_path = get_db_path(path)?;
1381    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1382    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1383
1384    println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1385    println!("{}", "=".repeat(60));
1386
1387    // Create backup first
1388    let backup_path = db_path.with_extension("db.backup");
1389    if db_path.exists() {
1390        fs::copy(&db_path, &backup_path)?;
1391        println!(
1392            "{} Created backup: {}",
1393            "[+]".green(),
1394            backup_path.display()
1395        );
1396    }
1397
1398    // Restore from commit
1399    let output = Command::new("git")
1400        .current_dir(db_dir)
1401        .args(["checkout", commit, "--", db_name])
1402        .output()
1403        .context("Failed to run git checkout")?;
1404
1405    if !output.status.success() {
1406        let stderr = String::from_utf8_lossy(&output.stderr);
1407        anyhow::bail!("Git restore failed: {}", stderr);
1408    }
1409
1410    println!(
1411        "{} Restored database from commit: {}",
1412        "[+]".green(),
1413        commit
1414    );
1415
1416    Ok(())
1417}
1418
1419// ============================================================================
1420// Helper Functions
1421// ============================================================================
1422
1423fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1424    if let Some(p) = path {
1425        return Ok(PathBuf::from(p));
1426    }
1427
1428    // Check environment variable
1429    if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1430        return Ok(PathBuf::from(p));
1431    }
1432
1433    // Default to current directory
1434    Ok(std::env::current_dir()?.join("chat_sessions.db"))
1435}
1436
1437fn create_harvest_database(path: &Path) -> Result<()> {
1438    let conn = Connection::open(path)?;
1439
1440    // Use larger page size and WAL mode for better performance with large session data
1441    conn.execute_batch(
1442        "PRAGMA page_size = 8192;
1443         PRAGMA journal_mode = WAL;
1444         PRAGMA synchronous = NORMAL;",
1445    )?;
1446
1447    conn.execute_batch(
1448        r#"
1449        -- Sessions table (original harvest format)
1450        CREATE TABLE IF NOT EXISTS sessions (
1451            id TEXT PRIMARY KEY,
1452            provider TEXT NOT NULL,
1453            provider_type TEXT,
1454            provider_version TEXT,
1455            schema_version INTEGER DEFAULT 3,
1456            file_format TEXT DEFAULT 'json',
1457            workspace_id TEXT,
1458            workspace_name TEXT,
1459            workspace_path TEXT,
1460            title TEXT NOT NULL,
1461            message_count INTEGER DEFAULT 0,
1462            created_at INTEGER NOT NULL,
1463            updated_at INTEGER NOT NULL,
1464            harvested_at INTEGER NOT NULL,
1465            session_json TEXT NOT NULL
1466        );
1467
1468        CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1469        CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1470        CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1471        CREATE INDEX IF NOT EXISTS idx_sessions_workspace_path ON sessions(workspace_path);
1472        
1473        -- Enhanced messages table with raw markdown and metadata
1474        CREATE TABLE IF NOT EXISTS messages_v2 (
1475            id INTEGER PRIMARY KEY AUTOINCREMENT,
1476            session_id TEXT NOT NULL,
1477            message_index INTEGER NOT NULL,
1478            request_id TEXT,
1479            response_id TEXT,
1480            role TEXT NOT NULL,
1481            content_raw TEXT NOT NULL,
1482            content_markdown TEXT,
1483            model_id TEXT,
1484            timestamp INTEGER,
1485            is_canceled INTEGER DEFAULT 0,
1486            metadata_json TEXT,
1487            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1488            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1489            UNIQUE(session_id, message_index, role)
1490        );
1491        
1492        CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1493        CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1494        CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1495        
1496        -- Tool invocations within messages (file edits, terminal commands, etc.)
1497        CREATE TABLE IF NOT EXISTS tool_invocations (
1498            id INTEGER PRIMARY KEY AUTOINCREMENT,
1499            message_id INTEGER NOT NULL,
1500            session_id TEXT NOT NULL,
1501            tool_name TEXT NOT NULL,
1502            tool_call_id TEXT,
1503            invocation_index INTEGER DEFAULT 0,
1504            input_json TEXT,
1505            output_json TEXT,
1506            status TEXT DEFAULT 'pending',
1507            is_confirmed INTEGER DEFAULT 0,
1508            timestamp INTEGER,
1509            FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1510            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1511        );
1512        
1513        CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1514        CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1515        CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1516        
1517        -- File changes/diffs associated with tool invocations
1518        CREATE TABLE IF NOT EXISTS file_changes (
1519            id INTEGER PRIMARY KEY AUTOINCREMENT,
1520            tool_invocation_id INTEGER,
1521            session_id TEXT NOT NULL,
1522            message_index INTEGER,
1523            file_path TEXT NOT NULL,
1524            change_type TEXT NOT NULL,
1525            old_content TEXT,
1526            new_content TEXT,
1527            diff_unified TEXT,
1528            line_start INTEGER,
1529            line_end INTEGER,
1530            timestamp INTEGER,
1531            FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1532            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1533        );
1534        
1535        CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1536        CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1537        CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1538        
1539        -- Message-level checkpoints for versioning
1540        CREATE TABLE IF NOT EXISTS message_checkpoints (
1541            id INTEGER PRIMARY KEY AUTOINCREMENT,
1542            session_id TEXT NOT NULL,
1543            message_index INTEGER NOT NULL,
1544            checkpoint_number INTEGER NOT NULL,
1545            content_hash TEXT NOT NULL,
1546            snapshot_json TEXT,
1547            file_state_json TEXT,
1548            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1549            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1550            UNIQUE(session_id, message_index, checkpoint_number)
1551        );
1552        
1553        CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1554        CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1555
1556        -- Messages table for full-text search
1557        CREATE TABLE IF NOT EXISTS messages (
1558            id INTEGER PRIMARY KEY AUTOINCREMENT,
1559            session_id TEXT NOT NULL,
1560            message_index INTEGER NOT NULL,
1561            role TEXT NOT NULL,
1562            content TEXT NOT NULL,
1563            created_at INTEGER,
1564            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1565            UNIQUE(session_id, message_index)
1566        );
1567        
1568        CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1569        CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1570        
1571        -- Checkpoints for version tracking
1572        CREATE TABLE IF NOT EXISTS checkpoints (
1573            id INTEGER PRIMARY KEY AUTOINCREMENT,
1574            session_id TEXT NOT NULL,
1575            checkpoint_number INTEGER NOT NULL,
1576            message TEXT,
1577            message_count INTEGER NOT NULL,
1578            content_hash TEXT NOT NULL,
1579            snapshot TEXT,
1580            created_at TEXT NOT NULL DEFAULT (datetime('now')),
1581            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1582            UNIQUE(session_id, checkpoint_number)
1583        );
1584        
1585        CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1586        
1587        -- Share links for importing shared conversations
1588        CREATE TABLE IF NOT EXISTS share_links (
1589            id TEXT PRIMARY KEY,
1590            session_id TEXT,
1591            provider TEXT NOT NULL,
1592            url TEXT NOT NULL UNIQUE,
1593            share_id TEXT NOT NULL,
1594            title TEXT,
1595            imported INTEGER DEFAULT 0,
1596            imported_at INTEGER,
1597            created_at INTEGER NOT NULL,
1598            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1599        );
1600        
1601        CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1602        CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1603
1604        -- Harvest metadata
1605        CREATE TABLE IF NOT EXISTS harvest_metadata (
1606            key TEXT PRIMARY KEY,
1607            value TEXT NOT NULL
1608        );
1609
1610        INSERT OR REPLACE INTO harvest_metadata (key, value) 
1611        VALUES ('version', '2.1'),
1612               ('created_at', datetime('now'));
1613               
1614        -- Full-text search for messages (standalone FTS table)
1615        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1616            content_raw
1617        );
1618        
1619        -- Triggers to keep FTS index in sync with messages_v2
1620        -- Note: messages_fts is a standalone FTS5 table, so use regular DELETE (not the
1621        -- external-content 'delete' command which causes SQL logic errors on standalone tables)
1622        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1623            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1624        END;
1625        
1626        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1627            DELETE FROM messages_fts WHERE rowid = old.id;
1628        END;
1629        
1630        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1631            DELETE FROM messages_fts WHERE rowid = old.id;
1632            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1633        END;
1634        "#,
1635    )?;
1636
1637    Ok(())
1638}
1639
1640fn insert_or_update_session(
1641    conn: &Connection,
1642    session: &ChatSession,
1643    provider: &str,
1644    workspace_id: Option<&str>,
1645    workspace_name: Option<&str>,
1646    provider_version: Option<&str>,
1647    schema_version: u32,
1648    file_format: &str,
1649    workspace_path: Option<&str>,
1650) -> Result<bool> {
1651    let session_id = session
1652        .session_id
1653        .clone()
1654        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1655
1656    let now = Utc::now().timestamp_millis();
1657    let session_json = serde_json::to_string(session)?;
1658
1659    // Check if session exists
1660    let existing: Option<i64> = conn
1661        .query_row(
1662            "SELECT updated_at FROM sessions WHERE id = ?",
1663            [&session_id],
1664            |row| row.get(0),
1665        )
1666        .ok();
1667
1668    let updated = existing.is_some();
1669
1670    conn.execute(
1671        r#"
1672        INSERT OR REPLACE INTO sessions 
1673        (id, provider, provider_type, workspace_id, workspace_name, title, 
1674         message_count, created_at, updated_at, harvested_at, session_json,
1675         provider_version, schema_version, file_format, workspace_path)
1676        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1677        "#,
1678        params![
1679            session_id,
1680            provider,
1681            provider,
1682            workspace_id,
1683            workspace_name,
1684            session.title(),
1685            session.request_count() as i64,
1686            session.creation_date,
1687            session.last_message_date,
1688            now,
1689            session_json,
1690            provider_version,
1691            schema_version as i64,
1692            file_format,
1693            workspace_path,
1694        ],
1695    )?;
1696
1697    // Populate enhanced message tables
1698    populate_enhanced_messages(conn, &session_id, session)?;
1699
1700    Ok(updated)
1701}
1702
1703/// Populate the enhanced messages_v2, tool_invocations, and file_changes tables
1704fn populate_enhanced_messages(
1705    conn: &Connection,
1706    session_id: &str,
1707    session: &ChatSession,
1708) -> Result<()> {
1709    // Delete existing messages for this session to avoid duplicates
1710    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1711    conn.execute(
1712        "DELETE FROM tool_invocations WHERE session_id = ?",
1713        [session_id],
1714    )?;
1715    conn.execute(
1716        "DELETE FROM file_changes WHERE session_id = ?",
1717        [session_id],
1718    )?;
1719
1720    for (idx, request) in session.requests.iter().enumerate() {
1721        let timestamp = request.timestamp;
1722        let request_id = request.request_id.as_deref();
1723        let response_id = request.response_id.as_deref();
1724        let model_id = request.model_id.as_deref();
1725        let is_canceled = request.is_canceled.unwrap_or(false);
1726
1727        // Insert user message
1728        if let Some(ref message) = request.message {
1729            let content = message.text.clone().unwrap_or_default();
1730            if !content.is_empty() {
1731                let metadata = serde_json::json!({
1732                    "variable_data": request.variable_data,
1733                });
1734
1735                conn.execute(
1736                    r#"
1737                    INSERT OR REPLACE INTO messages_v2 
1738                    (session_id, message_index, request_id, response_id, role, 
1739                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1740                    VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1741                    "#,
1742                    params![
1743                        session_id,
1744                        (idx * 2) as i64,
1745                        request_id,
1746                        response_id,
1747                        &content,
1748                        &content, // content_markdown same as raw for user messages
1749                        model_id,
1750                        timestamp,
1751                        serde_json::to_string(&metadata).ok(),
1752                    ],
1753                )?;
1754            }
1755        }
1756
1757        // Insert assistant response with tool invocations
1758        if let Some(ref response) = request.response {
1759            let (content, tool_invocations) = extract_response_content_and_tools(response);
1760
1761            if !content.is_empty() || !tool_invocations.is_empty() {
1762                let metadata = serde_json::json!({
1763                    "content_references": request.content_references,
1764                    "code_citations": request.code_citations,
1765                    "response_markdown_info": request.response_markdown_info,
1766                });
1767
1768                conn.execute(
1769                    r#"
1770                    INSERT OR REPLACE INTO messages_v2 
1771                    (session_id, message_index, request_id, response_id, role, 
1772                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1773                    VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1774                    "#,
1775                    params![
1776                        session_id,
1777                        (idx * 2 + 1) as i64,
1778                        request_id,
1779                        response_id,
1780                        &content,
1781                        &content,
1782                        model_id,
1783                        timestamp,
1784                        is_canceled as i64,
1785                        serde_json::to_string(&metadata).ok(),
1786                    ],
1787                )?;
1788
1789                // Get the message_id we just inserted
1790                let message_id: i64 = conn.last_insert_rowid();
1791
1792                // Insert tool invocations and file changes
1793                for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1794                    insert_tool_invocation(
1795                        conn, message_id, session_id, inv_idx, invocation, timestamp,
1796                    )?;
1797                }
1798            }
1799        }
1800    }
1801
1802    Ok(())
1803}
1804
1805/// Extract response content and tool invocations from the response JSON
1806fn extract_response_content_and_tools(
1807    response: &serde_json::Value,
1808) -> (String, Vec<serde_json::Value>) {
1809    let mut text_parts = Vec::new();
1810    let mut tool_invocations = Vec::new();
1811
1812    if let Some(items) = response.as_array() {
1813        for item in items {
1814            let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1815
1816            match kind {
1817                "toolInvocationSerialized" => {
1818                    tool_invocations.push(item.clone());
1819                }
1820                "thinking" => {
1821                    // Skip thinking blocks
1822                    continue;
1823                }
1824                _ => {
1825                    if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1826                        // Filter out empty code block markers (```\n or just ```)
1827                        let trimmed = value.trim();
1828                        if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1829                            text_parts.push(value.to_string());
1830                        }
1831                    }
1832                }
1833            }
1834        }
1835    }
1836
1837    (text_parts.join("\n\n"), tool_invocations)
1838}
1839
1840/// Insert a tool invocation and any associated file changes
1841fn insert_tool_invocation(
1842    conn: &Connection,
1843    message_id: i64,
1844    session_id: &str,
1845    inv_idx: usize,
1846    invocation: &serde_json::Value,
1847    timestamp: Option<i64>,
1848) -> Result<()> {
1849    let tool_name = invocation
1850        .get("toolId")
1851        .and_then(|t| t.as_str())
1852        .unwrap_or("unknown");
1853    let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1854    let is_complete = invocation
1855        .get("isComplete")
1856        .and_then(|c| c.as_bool())
1857        .unwrap_or(false);
1858    let is_confirmed = invocation.get("isConfirmed");
1859    let tool_data = invocation.get("toolSpecificData");
1860
1861    let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1862    let status = if is_complete { "complete" } else { "pending" };
1863    let confirmed = match is_confirmed {
1864        Some(v) => v
1865            .get("type")
1866            .and_then(|t| t.as_i64())
1867            .map(|t| t > 0)
1868            .unwrap_or(false),
1869        None => false,
1870    };
1871
1872    conn.execute(
1873        r#"
1874        INSERT INTO tool_invocations 
1875        (message_id, session_id, tool_name, tool_call_id, invocation_index, 
1876         input_json, status, is_confirmed, timestamp)
1877        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1878        "#,
1879        params![
1880            message_id,
1881            session_id,
1882            tool_name,
1883            tool_call_id,
1884            inv_idx as i64,
1885            input_json,
1886            status,
1887            confirmed as i64,
1888            timestamp,
1889        ],
1890    )?;
1891
1892    let tool_invocation_id = conn.last_insert_rowid();
1893
1894    // Extract and insert file changes based on tool type
1895    if let Some(data) = tool_data {
1896        insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1897    }
1898
1899    Ok(())
1900}
1901
1902/// Insert file changes from tool-specific data
1903fn insert_file_changes(
1904    conn: &Connection,
1905    tool_invocation_id: i64,
1906    session_id: &str,
1907    tool_data: &serde_json::Value,
1908    timestamp: Option<i64>,
1909) -> Result<()> {
1910    let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1911
1912    match kind {
1913        "terminal" => {
1914            // Terminal command execution
1915            if let Some(cmd_line) = tool_data.get("commandLine") {
1916                let original = cmd_line.get("original").and_then(|o| o.as_str());
1917                let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1918                let output = tool_data
1919                    .get("terminalCommandOutput")
1920                    .map(|o| serde_json::to_string(o).unwrap_or_default());
1921
1922                conn.execute(
1923                    r#"
1924                    INSERT INTO file_changes 
1925                    (tool_invocation_id, session_id, file_path, change_type, 
1926                     old_content, new_content, diff_unified, timestamp)
1927                    VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1928                    "#,
1929                    params![
1930                        tool_invocation_id,
1931                        session_id,
1932                        original,
1933                        edited.or(original),
1934                        output,
1935                        timestamp,
1936                    ],
1937                )?;
1938            }
1939        }
1940        "replaceFile" | "editFile" => {
1941            // File edit with old/new strings
1942            let file_path = tool_data
1943                .get("uri")
1944                .or_else(|| tool_data.get("filePath"))
1945                .and_then(|p| p.as_str())
1946                .unwrap_or("[unknown]");
1947            let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1948            let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1949
1950            // Generate unified diff if we have both old and new content
1951            let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1952                Some(generate_unified_diff(old, new, file_path))
1953            } else {
1954                None
1955            };
1956
1957            conn.execute(
1958                r#"
1959                INSERT INTO file_changes 
1960                (tool_invocation_id, session_id, file_path, change_type, 
1961                 old_content, new_content, diff_unified, timestamp)
1962                VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1963                "#,
1964                params![
1965                    tool_invocation_id,
1966                    session_id,
1967                    file_path,
1968                    old_string,
1969                    new_string,
1970                    diff,
1971                    timestamp,
1972                ],
1973            )?;
1974        }
1975        "createFile" => {
1976            let file_path = tool_data
1977                .get("uri")
1978                .or_else(|| tool_data.get("filePath"))
1979                .and_then(|p| p.as_str())
1980                .unwrap_or("[unknown]");
1981            let content = tool_data.get("content").and_then(|c| c.as_str());
1982
1983            conn.execute(
1984                r#"
1985                INSERT INTO file_changes 
1986                (tool_invocation_id, session_id, file_path, change_type, 
1987                 new_content, timestamp)
1988                VALUES (?, ?, ?, 'create', ?, ?)
1989                "#,
1990                params![
1991                    tool_invocation_id,
1992                    session_id,
1993                    file_path,
1994                    content,
1995                    timestamp,
1996                ],
1997            )?;
1998        }
1999        "readFile" => {
2000            let file_path = tool_data
2001                .get("uri")
2002                .or_else(|| tool_data.get("filePath"))
2003                .and_then(|p| p.as_str())
2004                .unwrap_or("[unknown]");
2005
2006            conn.execute(
2007                r#"
2008                INSERT INTO file_changes 
2009                (tool_invocation_id, session_id, file_path, change_type, timestamp)
2010                VALUES (?, ?, ?, 'read', ?)
2011                "#,
2012                params![tool_invocation_id, session_id, file_path, timestamp,],
2013            )?;
2014        }
2015        _ => {
2016            // Other tool types - store as generic change
2017            if !kind.is_empty() {
2018                let data_json = serde_json::to_string(tool_data).ok();
2019                conn.execute(
2020                    r#"
2021                    INSERT INTO file_changes 
2022                    (tool_invocation_id, session_id, file_path, change_type, 
2023                     diff_unified, timestamp)
2024                    VALUES (?, ?, ?, ?, ?, ?)
2025                    "#,
2026                    params![
2027                        tool_invocation_id,
2028                        session_id,
2029                        format!("[{}]", kind),
2030                        kind,
2031                        data_json,
2032                        timestamp,
2033                    ],
2034                )?;
2035            }
2036        }
2037    }
2038
2039    Ok(())
2040}
2041
2042/// Generate a simple unified diff between two strings
2043fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
2044    let old_lines: Vec<&str> = old.lines().collect();
2045    let new_lines: Vec<&str> = new.lines().collect();
2046
2047    let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
2048
2049    // Simple line-by-line diff (not a full Myers diff, but good enough for storage)
2050    let max_lines = old_lines.len().max(new_lines.len());
2051    let mut in_hunk = false;
2052    let mut hunk_start = 0;
2053    let mut hunk_lines = Vec::new();
2054
2055    for i in 0..max_lines {
2056        let old_line = old_lines.get(i).copied();
2057        let new_line = new_lines.get(i).copied();
2058
2059        match (old_line, new_line) {
2060            (Some(o), Some(n)) if o == n => {
2061                if in_hunk {
2062                    hunk_lines.push(format!(" {}", o));
2063                }
2064            }
2065            (Some(o), Some(n)) => {
2066                if !in_hunk {
2067                    in_hunk = true;
2068                    hunk_start = i + 1;
2069                }
2070                hunk_lines.push(format!("-{}", o));
2071                hunk_lines.push(format!("+{}", n));
2072            }
2073            (Some(o), None) => {
2074                if !in_hunk {
2075                    in_hunk = true;
2076                    hunk_start = i + 1;
2077                }
2078                hunk_lines.push(format!("-{}", o));
2079            }
2080            (None, Some(n)) => {
2081                if !in_hunk {
2082                    in_hunk = true;
2083                    hunk_start = i + 1;
2084                }
2085                hunk_lines.push(format!("+{}", n));
2086            }
2087            (None, None) => break,
2088        }
2089    }
2090
2091    if !hunk_lines.is_empty() {
2092        diff.push_str(&format!(
2093            "@@ -{},{} +{},{} @@\n",
2094            hunk_start,
2095            old_lines.len(),
2096            hunk_start,
2097            new_lines.len()
2098        ));
2099        for line in hunk_lines {
2100            diff.push_str(&line);
2101            diff.push('\n');
2102        }
2103    }
2104
2105    diff
2106}
2107
2108/// Harvest sessions from web-based cloud providers (ChatGPT, Claude, etc.)
2109fn harvest_web_providers(
2110    conn: &Connection,
2111    stats: &mut HarvestStats,
2112    include_providers: &[String],
2113    exclude_providers: &[String],
2114) -> Result<()> {
2115    use crate::browser::extract_provider_cookies;
2116
2117    println!("\n{} Harvesting from web providers...", "[*]".blue());
2118
2119    // Define web providers to harvest from
2120    let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2121        ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2122        ("Claude", "claude", "sessionKey"),
2123    ];
2124
2125    let mut web_sessions_harvested = 0;
2126
2127    for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2128        // Check provider filters
2129        if !include_providers.is_empty()
2130            && !include_providers
2131                .iter()
2132                .any(|p| p.eq_ignore_ascii_case(provider_key))
2133        {
2134            continue;
2135        }
2136        if exclude_providers
2137            .iter()
2138            .any(|p| p.eq_ignore_ascii_case(provider_key))
2139        {
2140            continue;
2141        }
2142
2143        print!("   {} Checking {} ... ", "[-]".yellow(), display_name);
2144
2145        // Try to extract cookies for this provider
2146        if let Some(creds) = extract_provider_cookies(provider_key) {
2147            if let Some(session_token) = &creds.session_token {
2148                println!("{}", "authenticated".green());
2149
2150                // Create provider and fetch conversations
2151                let result = match *provider_key {
2152                    "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2153                    "claude" => harvest_claude_sessions(conn, session_token, stats),
2154                    _ => Ok(0),
2155                };
2156
2157                match result {
2158                    Ok(count) => {
2159                        if count > 0 {
2160                            println!(
2161                                "      {} Harvested {} sessions from {}",
2162                                "[+]".green(),
2163                                count.to_string().cyan(),
2164                                display_name
2165                            );
2166                            web_sessions_harvested += count;
2167                        }
2168                    }
2169                    Err(e) => {
2170                        println!(
2171                            "      {} Failed to harvest {}: {:?}",
2172                            "[!]".red(),
2173                            display_name,
2174                            e
2175                        );
2176                    }
2177                }
2178            } else {
2179                println!("{}", "no session token".yellow());
2180            }
2181        } else {
2182            println!("{}", "not authenticated".yellow());
2183        }
2184    }
2185
2186    if web_sessions_harvested > 0 {
2187        println!(
2188            "   {} Total web sessions harvested: {}",
2189            "[+]".green(),
2190            web_sessions_harvested.to_string().cyan()
2191        );
2192    }
2193
2194    Ok(())
2195}
2196
2197/// Harvest sessions from ChatGPT web interface
2198fn harvest_chatgpt_sessions(
2199    conn: &Connection,
2200    session_token: &str,
2201    stats: &mut HarvestStats,
2202) -> Result<usize> {
2203    use crate::providers::cloud::chatgpt::ChatGPTProvider;
2204    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2205
2206    let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2207
2208    // List all conversations
2209    let options = FetchOptions {
2210        limit: Some(100),
2211        include_archived: false,
2212        after: None,
2213        before: None,
2214        session_token: Some(session_token.to_string()),
2215    };
2216
2217    let conversations = provider
2218        .list_conversations(&options)
2219        .context("Failed to list ChatGPT conversations")?;
2220
2221    let mut harvested = 0;
2222
2223    for conv_summary in conversations {
2224        // Check if we already have this conversation
2225        let exists: bool = conn
2226            .query_row(
2227                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2228                params![&conv_summary.id],
2229                |row| row.get(0),
2230            )
2231            .unwrap_or(false);
2232
2233        if exists {
2234            // Could add update logic here later
2235            continue;
2236        }
2237
2238        // Fetch full conversation
2239        match provider.fetch_conversation(&conv_summary.id) {
2240            Ok(conv) => {
2241                // Insert into database using existing function
2242                if let Err(e) =
2243                    insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2244                {
2245                    eprintln!("Failed to insert ChatGPT session: {}", e);
2246                    continue;
2247                }
2248                harvested += 1;
2249                stats.sessions_added += 1;
2250            }
2251            Err(e) => {
2252                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2253            }
2254        }
2255
2256        // Small delay to avoid rate limiting
2257        std::thread::sleep(std::time::Duration::from_millis(100));
2258    }
2259
2260    Ok(harvested)
2261}
2262
2263/// Harvest sessions from Claude web interface
2264fn harvest_claude_sessions(
2265    conn: &Connection,
2266    session_token: &str,
2267    stats: &mut HarvestStats,
2268) -> Result<usize> {
2269    use crate::providers::cloud::anthropic::AnthropicProvider;
2270    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2271
2272    let provider = AnthropicProvider::with_session_token(session_token.to_string());
2273
2274    // List all conversations
2275    let options = FetchOptions {
2276        limit: Some(100),
2277        include_archived: false,
2278        after: None,
2279        before: None,
2280        session_token: Some(session_token.to_string()),
2281    };
2282
2283    let conversations = provider
2284        .list_conversations(&options)
2285        .context("Failed to list Claude conversations")?;
2286
2287    let mut harvested = 0;
2288
2289    for conv_summary in conversations {
2290        // Check if we already have this conversation
2291        let exists: bool = conn
2292            .query_row(
2293                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2294                params![&conv_summary.id],
2295                |row| row.get(0),
2296            )
2297            .unwrap_or(false);
2298
2299        if exists {
2300            continue;
2301        }
2302
2303        // Fetch full conversation
2304        match provider.fetch_conversation(&conv_summary.id) {
2305            Ok(conv) => {
2306                if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2307                {
2308                    eprintln!("Failed to insert Claude session: {}", e);
2309                    continue;
2310                }
2311                harvested += 1;
2312                stats.sessions_added += 1;
2313            }
2314            Err(e) => {
2315                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2316            }
2317        }
2318
2319        std::thread::sleep(std::time::Duration::from_millis(100));
2320    }
2321
2322    Ok(harvested)
2323}
2324
2325/// Insert a CloudConversation into the harvest database
2326fn insert_cloud_conversation_to_harvest_db(
2327    conn: &Connection,
2328    conv: &crate::providers::cloud::common::CloudConversation,
2329    provider: &str,
2330    workspace_name: Option<&str>,
2331) -> Result<()> {
2332    let now = Utc::now().timestamp_millis();
2333    let created_at = conv.created_at.timestamp_millis();
2334    let updated_at = conv
2335        .updated_at
2336        .map(|dt| dt.timestamp_millis())
2337        .unwrap_or(created_at);
2338
2339    // Build a session-like JSON structure for compatibility
2340    let session_json = serde_json::json!({
2341        "id": conv.id,
2342        "title": conv.title,
2343        "model": conv.model,
2344        "created_at": conv.created_at.to_rfc3339(),
2345        "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2346        "messages": conv.messages.iter().map(|m| {
2347            serde_json::json!({
2348                "id": m.id,
2349                "role": m.role,
2350                "content": m.content,
2351                "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2352                "model": m.model,
2353            })
2354        }).collect::<Vec<_>>(),
2355    });
2356
2357    conn.execute(
2358        r#"
2359        INSERT OR REPLACE INTO sessions 
2360        (id, provider, provider_type, workspace_id, workspace_name, title, 
2361         message_count, created_at, updated_at, harvested_at, session_json)
2362        VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2363        "#,
2364        params![
2365            conv.id,
2366            provider,
2367            provider,
2368            workspace_name,
2369            conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2370            conv.messages.len() as i64,
2371            created_at,
2372            updated_at,
2373            now,
2374            session_json.to_string(),
2375        ],
2376    )?;
2377
2378    // Also populate messages_v2 table for detailed message storage
2379    populate_cloud_messages(conn, &conv.id, conv)?;
2380
2381    Ok(())
2382}
2383
2384/// Populate messages_v2 table from a CloudConversation
2385fn populate_cloud_messages(
2386    conn: &Connection,
2387    session_id: &str,
2388    conv: &crate::providers::cloud::common::CloudConversation,
2389) -> Result<()> {
2390    // Delete existing messages for this session to avoid duplicates
2391    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2392
2393    for (idx, message) in conv.messages.iter().enumerate() {
2394        let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2395        let role = match message.role.as_str() {
2396            "user" | "human" => "user",
2397            "assistant" => "assistant",
2398            "system" => "system",
2399            other => other,
2400        };
2401
2402        conn.execute(
2403            r#"
2404            INSERT INTO messages_v2 
2405            (session_id, message_index, request_id, response_id, role, 
2406             content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2407            VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2408            "#,
2409            params![
2410                session_id,
2411                idx as i64,
2412                message.id,
2413                role,
2414                &message.content,
2415                &message.content,
2416                message.model.as_deref(),
2417                timestamp,
2418            ],
2419        )?;
2420    }
2421
2422    Ok(())
2423}
2424
2425fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2426    conn.execute(
2427        "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2428        [],
2429    )?;
2430    Ok(())
2431}
2432
2433fn init_git_tracking(db_path: &Path) -> Result<()> {
2434    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2435    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2436
2437    println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2438
2439    // Check if already a git repo
2440    if !db_dir.join(".git").exists() {
2441        let output = Command::new("git")
2442            .current_dir(db_dir)
2443            .args(["init"])
2444            .output()
2445            .context("Failed to run git init")?;
2446
2447        if !output.status.success() {
2448            anyhow::bail!(
2449                "Git init failed: {}",
2450                String::from_utf8_lossy(&output.stderr)
2451            );
2452        }
2453        println!("{} Initialized git repository", "[+]".green());
2454    } else {
2455        println!("{} Git repository already exists", "[i]".blue());
2456    }
2457
2458    // Create .gitignore if needed
2459    let gitignore_path = db_dir.join(".gitignore");
2460    if !gitignore_path.exists() {
2461        fs::write(
2462            &gitignore_path,
2463            "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2464        )?;
2465        println!("{} Created .gitignore", "[+]".green());
2466    }
2467
2468    // Add database to git
2469    let output = Command::new("git")
2470        .current_dir(db_dir)
2471        .args(["add", db_name])
2472        .output()?;
2473
2474    if output.status.success() {
2475        println!("{} Added {} to git", "[+]".green(), db_name);
2476    }
2477
2478    // Initial commit if no commits yet
2479    let output = Command::new("git")
2480        .current_dir(db_dir)
2481        .args(["rev-parse", "HEAD"])
2482        .output()?;
2483
2484    if !output.status.success() {
2485        let output = Command::new("git")
2486            .current_dir(db_dir)
2487            .args(["commit", "-m", "Initialize harvest database"])
2488            .output()?;
2489
2490        if output.status.success() {
2491            println!("{} Created initial commit", "[+]".green());
2492        }
2493    }
2494
2495    println!("\n{} Git tracking enabled!", "[+]".green().bold());
2496    println!("   Run 'csm harvest git commit -m \"message\"' to save changes");
2497    println!("   Run 'csm harvest git log' to view history");
2498
2499    Ok(())
2500}
2501
2502fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2503    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2504    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2505
2506    // Stage the database
2507    let output = Command::new("git")
2508        .current_dir(db_dir)
2509        .args(["add", db_name])
2510        .output()
2511        .context("Failed to stage database")?;
2512
2513    if !output.status.success() {
2514        anyhow::bail!(
2515            "Git add failed: {}",
2516            String::from_utf8_lossy(&output.stderr)
2517        );
2518    }
2519
2520    // Commit
2521    let output = Command::new("git")
2522        .current_dir(db_dir)
2523        .args(["commit", "-m", message])
2524        .output()
2525        .context("Failed to commit")?;
2526
2527    if !output.status.success() {
2528        let stderr = String::from_utf8_lossy(&output.stderr);
2529        if stderr.contains("nothing to commit") {
2530            println!("{} Nothing to commit", "[i]".blue());
2531            return Ok(());
2532        }
2533        anyhow::bail!("Git commit failed: {}", stderr);
2534    }
2535
2536    // Get commit hash
2537    let output = Command::new("git")
2538        .current_dir(db_dir)
2539        .args(["rev-parse", "--short", "HEAD"])
2540        .output()?;
2541
2542    let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2543    println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2544
2545    Ok(())
2546}
2547
2548// ============================================================================
2549// Share Link Commands
2550// ============================================================================
2551
2552/// Import a chat session from a share link URL
2553pub fn harvest_share(
2554    db_path: Option<&str>,
2555    url: &str,
2556    name: Option<&str>,
2557    _workspace: Option<&str>,
2558) -> Result<()> {
2559    let db_path = get_db_path(db_path)?;
2560
2561    // Parse the share link
2562    let share_info = match ShareLinkParser::parse(url) {
2563        Some(info) => info,
2564        None => {
2565            println!("{} Unrecognized share link format", "[!]".yellow());
2566            println!("   Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2567            println!("   Example URLs:");
2568            println!("   - https://chat.openai.com/share/abc123...");
2569            println!("   - https://claude.ai/share/xyz789...");
2570            anyhow::bail!("Could not parse share link URL");
2571        }
2572    };
2573
2574    println!("{}", "=".repeat(60).cyan());
2575    println!("{}", " Share Link Import ".bold().cyan());
2576    println!("{}", "=".repeat(60).cyan());
2577    println!();
2578
2579    println!(
2580        "{} Detected provider: {}",
2581        "[i]".blue(),
2582        share_info.provider.bold()
2583    );
2584    println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2585
2586    // Open or create the database with new schema
2587    let db = ChatDatabase::open(&db_path)?;
2588
2589    // Check if already imported
2590    let conn = db.connection();
2591    let existing: Option<(String, i64)> = conn
2592        .query_row(
2593            "SELECT id, imported FROM share_links WHERE url = ?",
2594            [url],
2595            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2596        )
2597        .ok();
2598
2599    if let Some((id, imported)) = existing {
2600        println!();
2601        println!(
2602            "{} This share link has already been registered (ID: {})",
2603            "[!]".yellow(),
2604            id
2605        );
2606
2607        let status = if imported == 1 { "imported" } else { "pending" };
2608        println!("{} Current status: {}", "[i]".blue(), status);
2609
2610        if imported == 1 {
2611            println!(
2612                "{} Session already imported - no action needed",
2613                "[+]".green()
2614            );
2615            return Ok(());
2616        }
2617    } else {
2618        // Insert the share link as pending
2619        let link_id = uuid::Uuid::new_v4().to_string();
2620        let now = chrono::Utc::now().timestamp();
2621        conn.execute(
2622            "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2623            params![link_id, url, share_info.provider, share_info.share_id, name, now],
2624        )?;
2625
2626        println!(
2627            "{} Registered share link (ID: {})",
2628            "[+]".green(),
2629            &link_id[..8]
2630        );
2631    }
2632
2633    println!();
2634    println!("{}", "-".repeat(60).dimmed());
2635    println!();
2636
2637    // TODO: Implement actual content fetching
2638    // For now, we just register the share link and mark it as pending
2639    // Actual fetching would require:
2640    // 1. Browser cookie extraction (already implemented in browser.rs)
2641    // 2. HTTP request with auth cookies
2642    // 3. HTML/JSON parsing of the share page
2643    // 4. Conversion to our universal format
2644
2645    println!("{} Share link registered as pending", "[i]".blue());
2646    println!();
2647    println!("{}", "Note:".bold().yellow());
2648    println!("   Automatic content fetching from share links is not yet implemented.");
2649    println!("   For now, you can:");
2650    println!("   1. Open the share link in your browser");
2651    println!("   2. Export the conversation manually");
2652    println!("   3. Import with: csm import <file>");
2653    println!();
2654    println!("   Or use 'csm harvest shares' to view pending links.");
2655
2656    Ok(())
2657}
2658
2659/// List share links in the harvest database
2660pub fn harvest_shares(
2661    db_path: Option<&str>,
2662    status_filter: Option<&str>,
2663    limit: usize,
2664) -> Result<()> {
2665    let db_path = get_db_path(db_path)?;
2666
2667    if !db_path.exists() {
2668        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2669    }
2670
2671    let db = ChatDatabase::open(&db_path)?;
2672    let conn = db.connection();
2673
2674    println!("{}", "=".repeat(70).cyan());
2675    println!("{}", " Share Links ".bold().cyan());
2676    println!("{}", "=".repeat(70).cyan());
2677    println!();
2678
2679    let query = match status_filter {
2680        Some("pending") => format!(
2681            "SELECT id, url, provider, share_id, title, imported, created_at 
2682             FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2683            limit
2684        ),
2685        Some("imported") => format!(
2686            "SELECT id, url, provider, share_id, title, imported, created_at 
2687             FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2688            limit
2689        ),
2690        Some(_) | None => format!(
2691            "SELECT id, url, provider, share_id, title, imported, created_at 
2692             FROM share_links ORDER BY created_at DESC LIMIT {}",
2693            limit
2694        ),
2695    };
2696
2697    let mut stmt = conn.prepare(&query)?;
2698
2699    let rows: Vec<_> = stmt
2700        .query_map([], |row| {
2701            Ok((
2702                row.get::<_, String>(0)?,
2703                row.get::<_, String>(1)?,
2704                row.get::<_, String>(2)?,
2705                row.get::<_, Option<String>>(3)?,
2706                row.get::<_, Option<String>>(4)?,
2707                row.get::<_, i64>(5)?,
2708                row.get::<_, i64>(6)?,
2709            ))
2710        })?
2711        .collect::<Result<Vec<_>, _>>()?;
2712
2713    if rows.is_empty() {
2714        if let Some(status) = status_filter {
2715            println!("{} No share links with status '{}'", "[i]".blue(), status);
2716        } else {
2717            println!("{} No share links found", "[i]".blue());
2718        }
2719        println!("   Use 'csm harvest share <url>' to add a share link");
2720        return Ok(());
2721    }
2722
2723    // Count by status
2724    let pending_count: i64 = conn
2725        .query_row(
2726            "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2727            [],
2728            |row| row.get(0),
2729        )
2730        .unwrap_or(0);
2731
2732    let imported_count: i64 = conn
2733        .query_row(
2734            "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2735            [],
2736            |row| row.get(0),
2737        )
2738        .unwrap_or(0);
2739
2740    println!(
2741        "{} {} pending, {} imported",
2742        "[i]".blue(),
2743        pending_count.to_string().yellow(),
2744        imported_count.to_string().green()
2745    );
2746    println!();
2747
2748    for (id, url, provider, _share_id, title, imported, created_at) in rows {
2749        let status = if imported == 1 { "imported" } else { "pending" };
2750        let status_colored = if imported == 1 {
2751            status.green()
2752        } else {
2753            status.yellow()
2754        };
2755
2756        println!(
2757            "{} [{}] {} - {}",
2758            format!("#{}", &id[..8]).dimmed(),
2759            status_colored,
2760            provider.bold(),
2761            title.as_deref().unwrap_or("(untitled)")
2762        );
2763
2764        // Truncate URL for display
2765        let display_url = if url.len() > 60 {
2766            format!("{}...", &url[..57])
2767        } else {
2768            url.clone()
2769        };
2770        println!("   {} {}", "URL:".dimmed(), display_url.dimmed());
2771
2772        // Format timestamp
2773        let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2774            .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2775            .unwrap_or_else(|| created_at.to_string());
2776        println!("   {} {}", "Added:".dimmed(), timestamp.dimmed());
2777        println!();
2778    }
2779
2780    Ok(())
2781}
2782
2783// ============================================================================
2784// Checkpoint Commands
2785// ============================================================================
2786
2787/// Create a checkpoint (version snapshot) of a session
2788pub fn harvest_checkpoint(
2789    db_path: Option<&str>,
2790    session_id: &str,
2791    message: Option<&str>,
2792) -> Result<()> {
2793    let db_path = get_db_path(db_path)?;
2794
2795    if !db_path.exists() {
2796        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2797    }
2798
2799    let db = ChatDatabase::open(&db_path)?;
2800    let conn = db.connection();
2801
2802    // Find the session
2803    let session: Option<(i64, String, i64)> = conn
2804        .query_row(
2805            "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2806            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2807            |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2808        )
2809        .ok();
2810
2811    let (internal_id, actual_session_id, msg_count) = match session {
2812        Some(s) => s,
2813        None => {
2814            println!("{} Session not found: {}", "[!]".red(), session_id);
2815            println!("   Use 'csm harvest list' to see available sessions");
2816            anyhow::bail!("Session not found");
2817        }
2818    };
2819
2820    // Get current checkpoint number
2821    let checkpoint_num: i64 = conn.query_row(
2822        "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2823        [internal_id],
2824        |row| row.get(0),
2825    )?;
2826
2827    // Calculate content hash (simple hash of message count + session_id for now)
2828    let content_hash = format!(
2829        "{:x}",
2830        md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2831    );
2832
2833    // Get message snapshot
2834    let messages: Vec<String> = {
2835        let mut stmt = conn
2836            .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2837        let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2838        rows.filter_map(|r| r.ok()).collect()
2839    };
2840
2841    let snapshot = serde_json::json!({
2842        "message_count": msg_count,
2843        "messages": messages,
2844    });
2845
2846    let default_message = format!("Checkpoint {}", checkpoint_num);
2847    let message_text = message.unwrap_or(&default_message);
2848
2849    // Create checkpoint
2850    conn.execute(
2851        "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2852         VALUES (?, ?, ?, ?, ?, ?)",
2853        params![
2854            internal_id,
2855            checkpoint_num,
2856            message_text,
2857            msg_count,
2858            content_hash,
2859            snapshot.to_string()
2860        ],
2861    )?;
2862
2863    println!("{}", "=".repeat(60).cyan());
2864    println!("{}", " Checkpoint Created ".bold().cyan());
2865    println!("{}", "=".repeat(60).cyan());
2866    println!();
2867    println!("{} Session: {}", "[+]".green(), actual_session_id);
2868    println!(
2869        "{} Checkpoint #{}: {}",
2870        "[+]".green(),
2871        checkpoint_num,
2872        message_text
2873    );
2874    println!("{} Messages: {}", "[i]".blue(), msg_count);
2875    println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2876    println!();
2877    println!(
2878        "   Use 'csm harvest checkpoints {}' to view history",
2879        session_id
2880    );
2881
2882    Ok(())
2883}
2884
2885/// List checkpoints for a session
2886pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2887    let db_path = get_db_path(db_path)?;
2888
2889    if !db_path.exists() {
2890        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2891    }
2892
2893    let db = ChatDatabase::open(&db_path)?;
2894    let conn = db.connection();
2895
2896    // Find the session
2897    let session: Option<(i64, String, String)> = conn
2898        .query_row(
2899            "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2900            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2901            |row| {
2902                Ok((
2903                    row.get(0)?,
2904                    row.get(1)?,
2905                    row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2906                ))
2907            },
2908        )
2909        .ok();
2910
2911    let (internal_id, actual_session_id, name) = match session {
2912        Some(s) => s,
2913        None => {
2914            println!("{} Session not found: {}", "[!]".red(), session_id);
2915            anyhow::bail!("Session not found");
2916        }
2917    };
2918
2919    println!("{}", "=".repeat(70).cyan());
2920    println!("{}", " Session Checkpoints ".bold().cyan());
2921    println!("{}", "=".repeat(70).cyan());
2922    println!();
2923    println!(
2924        "{} Session: {} {}",
2925        "[i]".blue(),
2926        actual_session_id,
2927        if !name.is_empty() {
2928            format!("({})", name)
2929        } else {
2930            String::new()
2931        }
2932    );
2933    println!();
2934
2935    let mut stmt = conn.prepare(
2936        "SELECT checkpoint_number, message, message_count, content_hash, created_at
2937         FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2938    )?;
2939
2940    let checkpoints: Vec<_> = stmt
2941        .query_map([internal_id], |row| {
2942            Ok((
2943                row.get::<_, i64>(0)?,
2944                row.get::<_, String>(1)?,
2945                row.get::<_, i64>(2)?,
2946                row.get::<_, String>(3)?,
2947                row.get::<_, String>(4)?,
2948            ))
2949        })?
2950        .collect::<Result<Vec<_>, _>>()?;
2951
2952    if checkpoints.is_empty() {
2953        println!("{} No checkpoints found for this session", "[i]".blue());
2954        println!(
2955            "   Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2956            session_id
2957        );
2958        return Ok(());
2959    }
2960
2961    println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2962    println!();
2963
2964    for (num, msg, msg_count, hash, created_at) in checkpoints {
2965        println!("  {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2966        println!(
2967            "       {} messages | {} | {}",
2968            msg_count,
2969            &hash[..12],
2970            created_at.dimmed()
2971        );
2972    }
2973
2974    println!();
2975    println!(
2976        "   Use 'csm harvest restore {} <checkpoint>' to restore",
2977        session_id
2978    );
2979
2980    Ok(())
2981}
2982
2983/// Restore a session to a previous checkpoint
2984pub fn harvest_restore_checkpoint(
2985    db_path: Option<&str>,
2986    session_id: &str,
2987    checkpoint_number: i64,
2988) -> Result<()> {
2989    let db_path = get_db_path(db_path)?;
2990
2991    if !db_path.exists() {
2992        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2993    }
2994
2995    let db = ChatDatabase::open(&db_path)?;
2996    let conn = db.connection();
2997
2998    // Find the session
2999    let session: Option<(i64, String)> = conn
3000        .query_row(
3001            "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
3002            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
3003            |row| Ok((row.get(0)?, row.get(1)?)),
3004        )
3005        .ok();
3006
3007    let (internal_id, actual_session_id) = match session {
3008        Some(s) => s,
3009        None => {
3010            println!("{} Session not found: {}", "[!]".red(), session_id);
3011            anyhow::bail!("Session not found");
3012        }
3013    };
3014
3015    // Find the checkpoint
3016    let checkpoint: Option<(String, i64)> = conn
3017        .query_row(
3018            "SELECT snapshot, message_count FROM checkpoints 
3019         WHERE session_id = ? AND checkpoint_number = ?",
3020            params![internal_id, checkpoint_number],
3021            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
3022        )
3023        .ok();
3024
3025    let (snapshot_json, original_msg_count) = match checkpoint {
3026        Some(c) => c,
3027        None => {
3028            println!(
3029                "{} Checkpoint #{} not found for session {}",
3030                "[!]".red(),
3031                checkpoint_number,
3032                session_id
3033            );
3034            println!(
3035                "   Use 'csm harvest checkpoints {}' to see available checkpoints",
3036                session_id
3037            );
3038            anyhow::bail!("Checkpoint not found");
3039        }
3040    };
3041
3042    // Parse snapshot
3043    let snapshot: serde_json::Value =
3044        serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
3045
3046    let messages = snapshot["messages"]
3047        .as_array()
3048        .context("Invalid snapshot format")?;
3049
3050    println!("{}", "=".repeat(60).cyan());
3051    println!("{}", " Restore Checkpoint ".bold().yellow());
3052    println!("{}", "=".repeat(60).cyan());
3053    println!();
3054    println!("{} Session: {}", "[i]".blue(), actual_session_id);
3055    println!(
3056        "{} Restoring to checkpoint #{}",
3057        "[!]".yellow(),
3058        checkpoint_number
3059    );
3060    println!(
3061        "{} Messages to restore: {}",
3062        "[i]".blue(),
3063        original_msg_count
3064    );
3065    println!();
3066
3067    // Delete current messages
3068    let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3069
3070    println!("{} Removed {} current messages", "[-]".red(), deleted);
3071
3072    // Restore messages from snapshot
3073    for (idx, msg) in messages.iter().enumerate() {
3074        if let Some(content) = msg.as_str() {
3075            conn.execute(
3076                "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3077                params![internal_id, idx as i64, content],
3078            )?;
3079        }
3080    }
3081
3082    // Update session message count
3083    conn.execute(
3084        "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3085        params![original_msg_count, internal_id],
3086    )?;
3087
3088    println!(
3089        "{} Restored {} messages from checkpoint",
3090        "[+]".green(),
3091        messages.len()
3092    );
3093    println!();
3094    println!(
3095        "{} Session restored to checkpoint #{}",
3096        "[+]".green().bold(),
3097        checkpoint_number
3098    );
3099
3100    Ok(())
3101}
3102
3103// ============================================================================
3104// Compact Command
3105// ============================================================================
3106
3107/// Compact the harvest database by stripping session_json request blobs.
3108///
3109/// Sessions that have been expanded into messages_v2 have their data duplicated:
3110/// once in the raw session_json TEXT blob and again in the normalized messages_v2,
3111/// tool_invocations, and file_changes tables. This command replaces session_json
3112/// with a minimal metadata stub (keeping session_id, title, dates, etc.) and then
3113/// runs VACUUM to reclaim disk space.
3114pub fn harvest_compact(db_path: Option<&str>, dry_run: bool) -> Result<()> {
3115    let db_path = get_db_path(db_path)?;
3116
3117    if !db_path.exists() {
3118        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3119    }
3120
3121    println!("\n{} Harvest Database Compact", "[H]".magenta().bold());
3122    println!("{}", "=".repeat(60));
3123
3124    let file_size_before = std::fs::metadata(&db_path)?.len();
3125    println!(
3126        "{} Database: {} ({:.2} MB)",
3127        "[*]".blue(),
3128        db_path.display(),
3129        file_size_before as f64 / 1_048_576.0
3130    );
3131
3132    let conn = Connection::open(&db_path)?;
3133
3134    // Count sessions that have been expanded into messages_v2
3135    let expanded_count: i64 = conn.query_row(
3136        "SELECT COUNT(DISTINCT s.id) FROM sessions s
3137         INNER JOIN messages_v2 m ON m.session_id = s.id",
3138        [],
3139        |row| row.get(0),
3140    )?;
3141
3142    let total_sessions: i64 =
3143        conn.query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))?;
3144
3145    // Measure current session_json size
3146    let json_size: i64 = conn
3147        .query_row(
3148            "SELECT COALESCE(SUM(LENGTH(session_json)), 0) FROM sessions",
3149            [],
3150            |row| row.get(0),
3151        )
3152        .unwrap_or(0);
3153
3154    // Measure expanded session_json size (those with messages_v2 data)
3155    let expanded_json_size: i64 = conn
3156        .query_row(
3157            "SELECT COALESCE(SUM(LENGTH(s.session_json)), 0) FROM sessions s
3158             WHERE s.id IN (SELECT DISTINCT session_id FROM messages_v2)",
3159            [],
3160            |row| row.get(0),
3161        )
3162        .unwrap_or(0);
3163
3164    println!(
3165        "{} Total sessions: {}",
3166        "[*]".blue(),
3167        total_sessions.to_string().truecolor(252, 152, 103)
3168    );
3169    println!(
3170        "{} Sessions with messages_v2 data: {}",
3171        "[*]".blue(),
3172        expanded_count.to_string().truecolor(252, 152, 103)
3173    );
3174    println!(
3175        "{} Current session_json size: {:.2} MB",
3176        "[*]".blue(),
3177        json_size as f64 / 1_048_576.0
3178    );
3179    println!(
3180        "{} Compactable session_json size: {:.2} MB",
3181        "[*]".blue(),
3182        expanded_json_size as f64 / 1_048_576.0
3183    );
3184
3185    if expanded_count == 0 {
3186        println!(
3187            "\n{} Nothing to compact — no sessions have messages_v2 data.",
3188            "[i]".dimmed()
3189        );
3190        return Ok(());
3191    }
3192
3193    if dry_run {
3194        println!(
3195            "\n{} Dry run — would compact {} sessions, freeing ~{:.2} MB of JSON blobs.",
3196            "[i]".yellow(),
3197            expanded_count,
3198            expanded_json_size as f64 / 1_048_576.0
3199        );
3200        return Ok(());
3201    }
3202
3203    // Strip requests from session_json, keeping only metadata
3204    println!("\n{} Compacting session_json blobs...", "[*]".blue());
3205
3206    let compacted = conn.execute(
3207        r#"UPDATE sessions SET session_json = json_object(
3208            'session_id', json_extract(session_json, '$.session_id'),
3209            'version', json_extract(session_json, '$.version'),
3210            'creation_date', json_extract(session_json, '$.creation_date'),
3211            'last_message_date', json_extract(session_json, '$.last_message_date'),
3212            'custom_title', json_extract(session_json, '$.custom_title'),
3213            'initial_location', json_extract(session_json, '$.initial_location'),
3214            'is_imported', json_extract(session_json, '$.is_imported'),
3215            'requester_username', json_extract(session_json, '$.requester_username'),
3216            'responder_username', json_extract(session_json, '$.responder_username'),
3217            'request_count', message_count,
3218            '_compacted', 1
3219        ) WHERE id IN (SELECT DISTINCT session_id FROM messages_v2)"#,
3220        [],
3221    )?;
3222
3223    println!(
3224        "   {} Compacted {} session JSON blobs",
3225        "[+]".green(),
3226        compacted
3227    );
3228
3229    // VACUUM to reclaim space
3230    println!("{} Running VACUUM to reclaim disk space...", "[*]".blue());
3231    conn.execute_batch("VACUUM")?;
3232
3233    let file_size_after = std::fs::metadata(&db_path)?.len();
3234    let saved = if file_size_before > file_size_after {
3235        file_size_before - file_size_after
3236    } else {
3237        0
3238    };
3239
3240    println!();
3241    println!("{} Compact complete!", "[+]".green().bold());
3242    println!("   Before: {:.2} MB", file_size_before as f64 / 1_048_576.0);
3243    println!("   After:  {:.2} MB", file_size_after as f64 / 1_048_576.0);
3244    println!("   Saved:  {:.2} MB", saved as f64 / 1_048_576.0);
3245
3246    Ok(())
3247}
3248
3249// ============================================================================
3250// Search Commands
3251// ============================================================================
3252
3253/// Rebuild the FTS index from messages_v2 table
3254pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3255    let db_path = get_db_path(db_path)?;
3256
3257    if !db_path.exists() {
3258        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3259    }
3260
3261    let conn = Connection::open(&db_path)?;
3262
3263    println!("{}", "=".repeat(70).cyan());
3264    println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3265    println!("{}", "=".repeat(70).cyan());
3266    println!();
3267
3268    // Drop existing FTS table and triggers
3269    println!("{} Dropping old FTS index...", "[*]".blue());
3270    conn.execute_batch(
3271        r#"
3272        DROP TRIGGER IF EXISTS messages_v2_ai;
3273        DROP TRIGGER IF EXISTS messages_v2_ad;
3274        DROP TRIGGER IF EXISTS messages_v2_au;
3275        DROP TABLE IF EXISTS messages_fts;
3276        "#,
3277    )?;
3278
3279    // Create new FTS table with content_raw and session_id for filtering
3280    println!("{} Creating new FTS index...", "[*]".blue());
3281    conn.execute_batch(
3282        r#"
3283        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3284            content_raw,
3285            content='messages_v2',
3286            content_rowid='id'
3287        );
3288        
3289        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3290            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3291        END;
3292        
3293        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3294            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3295        END;
3296        
3297        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3298            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3299            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3300        END;
3301        "#,
3302    )?;
3303
3304    // Count total messages
3305    let total_messages: i64 =
3306        conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3307
3308    // Populate FTS from existing messages
3309    println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3310
3311    conn.execute(
3312        "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3313        [],
3314    )?;
3315
3316    // Verify the index
3317    let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3318
3319    println!();
3320    println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3321    println!("   {} messages indexed", indexed);
3322
3323    Ok(())
3324}
3325
3326/// Full-text search across all sessions
3327pub fn harvest_search(
3328    db_path: Option<&str>,
3329    query: &str,
3330    provider_filter: Option<&str>,
3331    limit: usize,
3332) -> Result<()> {
3333    let db_path = get_db_path(db_path)?;
3334
3335    if !db_path.exists() {
3336        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3337    }
3338
3339    let db = ChatDatabase::open(&db_path)?;
3340    let conn = db.connection();
3341
3342    println!("{}", "=".repeat(70).cyan());
3343    println!("{} Search: {}", "[?]".bold(), query.bold());
3344    println!("{}", "=".repeat(70).cyan());
3345    println!();
3346
3347    // Results: (session_id, provider, title, snippet)
3348    let results: Vec<(String, String, String, String)> = {
3349        // Check if FTS table exists
3350        let fts_exists: bool = conn
3351            .query_row(
3352                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3353                [],
3354                |_| Ok(true),
3355            )
3356            .unwrap_or(false);
3357
3358        if fts_exists {
3359            // Use FTS5 search with built-in snippet() for fast highlighting
3360            // and rank for relevance ordering, deduplicated per session
3361            let sql = format!(
3362                "SELECT s.id, s.provider, s.title,
3363                        snippet(messages_fts, 0, '>>>', '<<<', '...', 32) as snip
3364                 FROM messages_fts fts
3365                 JOIN messages_v2 m ON m.id = fts.rowid
3366                 JOIN sessions s ON m.session_id = s.id
3367                 WHERE messages_fts MATCH ?
3368                 {}
3369                 GROUP BY s.id
3370                 ORDER BY rank
3371                 LIMIT {}",
3372                if provider_filter.is_some() {
3373                    "AND s.provider = ?"
3374                } else {
3375                    ""
3376                },
3377                limit
3378            );
3379
3380            let mut stmt = conn.prepare(&sql)?;
3381
3382            if let Some(provider) = provider_filter {
3383                stmt.query_map(params![query, provider], |row| {
3384                    Ok((
3385                        row.get::<_, String>(0)?,
3386                        row.get::<_, String>(1)?,
3387                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3388                        row.get::<_, String>(3)?,
3389                    ))
3390                })?
3391                .collect::<Result<Vec<_>, _>>()?
3392            } else {
3393                stmt.query_map([query], |row| {
3394                    Ok((
3395                        row.get::<_, String>(0)?,
3396                        row.get::<_, String>(1)?,
3397                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3398                        row.get::<_, String>(3)?,
3399                    ))
3400                })?
3401                .collect::<Result<Vec<_>, _>>()?
3402            }
3403        } else {
3404            // Fall back to LIKE search with session deduplication
3405            let search_pattern = format!("%{}%", query);
3406            let sql = format!(
3407                "SELECT s.id, s.provider, s.title,
3408                        SUBSTR(m.content_raw,
3409                            MAX(1, INSTR(LOWER(m.content_raw), LOWER(?)) - 50),
3410                            150) as snip
3411                 FROM messages_v2 m
3412                 JOIN sessions s ON m.session_id = s.id
3413                 WHERE m.content_raw LIKE ?
3414                 {}
3415                 GROUP BY s.id
3416                 ORDER BY s.updated_at DESC
3417                 LIMIT {}",
3418                if provider_filter.is_some() {
3419                    "AND s.provider = ?"
3420                } else {
3421                    ""
3422                },
3423                limit
3424            );
3425
3426            let mut stmt = conn.prepare(&sql)?;
3427
3428            if let Some(provider) = provider_filter {
3429                stmt.query_map(params![query, search_pattern, provider], |row| {
3430                    Ok((
3431                        row.get::<_, String>(0)?,
3432                        row.get::<_, String>(1)?,
3433                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3434                        row.get::<_, String>(3)?,
3435                    ))
3436                })?
3437                .collect::<Result<Vec<_>, _>>()?
3438            } else {
3439                stmt.query_map(params![query, search_pattern], |row| {
3440                    Ok((
3441                        row.get::<_, String>(0)?,
3442                        row.get::<_, String>(1)?,
3443                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3444                        row.get::<_, String>(3)?,
3445                    ))
3446                })?
3447                .collect::<Result<Vec<_>, _>>()?
3448            }
3449        }
3450    };
3451
3452    if results.is_empty() {
3453        println!("{} No results found for '{}'", "[i]".blue(), query);
3454        return Ok(());
3455    }
3456
3457    println!("{} Found {} result(s):", "[i]".blue(), results.len());
3458    println!();
3459
3460    for (session_id, provider, title, snippet) in results {
3461        let display_name = if title.is_empty() {
3462            session_id.clone()
3463        } else {
3464            format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3465        };
3466
3467        println!(
3468            "{} {} [{}]",
3469            "*".cyan(),
3470            display_name.bold(),
3471            provider.dimmed()
3472        );
3473
3474        // Display the snippet (already extracted by SQL or FTS5 snippet())
3475        let clean_snippet = snippet.replace('\n', " ").replace('\r', "");
3476        println!("   {}", clean_snippet.dimmed());
3477        println!();
3478    }
3479
3480    Ok(())
3481}
3482
3483/// Create a search result snippet with the query highlighted
3484fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3485    let content_lower = content.to_lowercase();
3486    let query_lower = query.to_lowercase();
3487
3488    if let Some(pos) = content_lower.find(&query_lower) {
3489        let start = pos.saturating_sub(max_len / 2);
3490        let end = (pos + query.len() + max_len / 2).min(content.len());
3491
3492        let mut snippet = String::new();
3493        if start > 0 {
3494            snippet.push_str("...");
3495        }
3496        snippet.push_str(&content[start..end]);
3497        if end < content.len() {
3498            snippet.push_str("...");
3499        }
3500
3501        // Replace newlines with spaces for display
3502        snippet.replace('\n', " ").replace('\r', "")
3503    } else {
3504        // Fallback: just show first max_len chars
3505        if content.len() > max_len {
3506            format!("{}...", &content[..max_len])
3507        } else {
3508            content.to_string()
3509        }
3510    }
3511}
3512
3513/// Simple MD5 hash for content checksums
3514fn md5_hash(data: &str) -> u128 {
3515    // Simple hash implementation (not cryptographically secure, just for checksums)
3516    let mut hash: u128 = 0;
3517    for (i, byte) in data.bytes().enumerate() {
3518        hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3519        hash = hash.rotate_left(7);
3520    }
3521    hash
3522}
3523
3524/// Sync sessions between the harvest database and provider workspaces
3525///
3526/// This command provides bidirectional sync capabilities:
3527/// - `--push`: Write sessions from the database to provider workspace directories
3528/// - `--pull`: Import sessions from provider workspaces into the database (similar to harvest run)
3529///
3530/// Sync supports filtering by provider, workspace, or specific session IDs.
3531pub fn harvest_sync(
3532    path: Option<&str>,
3533    push: bool,
3534    pull: bool,
3535    provider: Option<&str>,
3536    workspace: Option<&str>,
3537    sessions: Option<&[String]>,
3538    format: Option<&str>,
3539    force: bool,
3540    dry_run: bool,
3541) -> Result<()> {
3542    // Validate arguments
3543    if !push && !pull {
3544        anyhow::bail!("Must specify either --push or --pull (or both)");
3545    }
3546
3547    // Determine database path
3548    let db_path = get_db_path(path)?;
3549
3550    if !db_path.exists() {
3551        anyhow::bail!(
3552            "Harvest database not found at {:?}. Run 'harvest run' first.",
3553            db_path
3554        );
3555    }
3556
3557    println!(
3558        "{} Sync Operation",
3559        if dry_run {
3560            "[DRY RUN]".yellow()
3561        } else {
3562            "[SYNC]".green()
3563        }
3564    );
3565    println!("   Database: {}", db_path.display().to_string().cyan());
3566
3567    if push {
3568        println!("\n{} Push: Database → Provider Workspaces", "[→]".blue());
3569        sync_push(
3570            &db_path, provider, workspace, sessions, format, force, dry_run,
3571        )?;
3572    }
3573
3574    if pull {
3575        println!("\n{} Pull: Provider Workspaces → Database", "[←]".blue());
3576        sync_pull(&db_path, provider, workspace, sessions, force, dry_run)?;
3577    }
3578
3579    if dry_run {
3580        println!(
3581            "\n{} Dry run complete. No changes were made.",
3582            "[!]".yellow()
3583        );
3584    } else {
3585        println!("\n{} Sync complete.", "[✓]".green());
3586    }
3587
3588    Ok(())
3589}
3590
3591/// Push sessions from the database to provider workspace directories
3592fn sync_push(
3593    db_path: &Path,
3594    provider: Option<&str>,
3595    workspace: Option<&str>,
3596    sessions: Option<&[String]>,
3597    format: Option<&str>,
3598    force: bool,
3599    dry_run: bool,
3600) -> Result<()> {
3601    let conn = Connection::open(db_path)?;
3602
3603    // Build query with filters
3604    let mut sql = String::from(
3605        r#"SELECT id, provider, workspace_path, workspace_name, session_json, file_format 
3606           FROM sessions WHERE workspace_path IS NOT NULL"#,
3607    );
3608    let mut params_vec: Vec<String> = Vec::new();
3609
3610    if let Some(p) = provider {
3611        sql.push_str(" AND provider = ?");
3612        params_vec.push(p.to_string());
3613    }
3614
3615    if let Some(w) = workspace {
3616        sql.push_str(" AND (workspace_path LIKE ? OR workspace_name LIKE ?)");
3617        params_vec.push(format!("%{}%", w));
3618        params_vec.push(format!("%{}%", w));
3619    }
3620
3621    if let Some(sess_ids) = sessions {
3622        if !sess_ids.is_empty() {
3623            let placeholders: Vec<&str> = sess_ids.iter().map(|_| "?").collect();
3624            sql.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
3625            for id in sess_ids {
3626                params_vec.push(id.clone());
3627            }
3628        }
3629    }
3630
3631    let mut stmt = conn.prepare(&sql)?;
3632
3633    // Build params array for rusqlite
3634    let params_refs: Vec<&dyn rusqlite::ToSql> = params_vec
3635        .iter()
3636        .map(|s| s as &dyn rusqlite::ToSql)
3637        .collect();
3638
3639    let rows = stmt.query_map(params_refs.as_slice(), |row| {
3640        Ok((
3641            row.get::<_, String>(0)?,         // id
3642            row.get::<_, String>(1)?,         // provider
3643            row.get::<_, String>(2)?,         // workspace_path
3644            row.get::<_, Option<String>>(3)?, // workspace_name
3645            row.get::<_, String>(4)?,         // session_json
3646            row.get::<_, Option<String>>(5)?, // file_format
3647        ))
3648    })?;
3649
3650    let target_format = format.unwrap_or("jsonl");
3651    let mut pushed = 0;
3652    let mut skipped = 0;
3653    let mut errors = 0;
3654
3655    for row_result in rows {
3656        match row_result {
3657            Ok((
3658                session_id,
3659                provider_name,
3660                workspace_path,
3661                workspace_name,
3662                session_json,
3663                stored_format,
3664            )) => {
3665                // Determine target directory
3666                let ws_path = PathBuf::from(&workspace_path);
3667                let chat_dir = ws_path.join(".vscode").join("chat");
3668
3669                // Skip if directory doesn't exist and we're not creating it
3670                if !chat_dir.exists() {
3671                    if !force {
3672                        println!(
3673                            "   {} Skipped {} - workspace chat dir not found: {}",
3674                            "[~]".yellow(),
3675                            session_id.dimmed(),
3676                            chat_dir.display()
3677                        );
3678                        skipped += 1;
3679                        continue;
3680                    }
3681                    // Create directory if force is set
3682                    if !dry_run {
3683                        fs::create_dir_all(&chat_dir)?;
3684                    }
3685                }
3686
3687                // Determine output format and filename
3688                let (output_ext, needs_conversion) = match target_format {
3689                    "jsonl" => ("jsonl", stored_format.as_deref() != Some("jsonl")),
3690                    "json" => ("json", stored_format.as_deref() == Some("jsonl")),
3691                    _ => ("jsonl", true), // Default to JSONL for modern VS Code
3692                };
3693
3694                let session_file = chat_dir.join(format!("{}.{}", session_id, output_ext));
3695
3696                // Check if file exists
3697                if session_file.exists() && !force {
3698                    println!(
3699                        "   {} Skipped {} - file exists (use --force to overwrite)",
3700                        "[~]".yellow(),
3701                        session_id.dimmed()
3702                    );
3703                    skipped += 1;
3704                    continue;
3705                }
3706
3707                // Parse and potentially convert the session
3708                let output_content = if needs_conversion && target_format == "jsonl" {
3709                    // Convert JSON to JSONL format
3710                    match serde_json::from_str::<ChatSession>(&session_json) {
3711                        Ok(session) => convert_session_to_jsonl(&session)?,
3712                        Err(e) => {
3713                            println!(
3714                                "   {} Error parsing session {}: {}",
3715                                "[!]".red(),
3716                                session_id,
3717                                e
3718                            );
3719                            errors += 1;
3720                            continue;
3721                        }
3722                    }
3723                } else {
3724                    // Use as-is or simple format
3725                    session_json.clone()
3726                };
3727
3728                if dry_run {
3729                    println!(
3730                        "   {} Would write {} ({}) → {}",
3731                        "[+]".green(),
3732                        workspace_name.as_deref().unwrap_or(&provider_name),
3733                        session_id.dimmed(),
3734                        session_file.display()
3735                    );
3736                } else {
3737                    fs::write(&session_file, &output_content)?;
3738                    println!(
3739                        "   {} Pushed {} ({}) → {}",
3740                        "[+]".green(),
3741                        workspace_name.as_deref().unwrap_or(&provider_name),
3742                        session_id.dimmed(),
3743                        session_file.display()
3744                    );
3745                }
3746                pushed += 1;
3747            }
3748            Err(e) => {
3749                println!("   {} Database error: {}", "[!]".red(), e);
3750                errors += 1;
3751            }
3752        }
3753    }
3754
3755    println!(
3756        "\n   {} Push summary: {} pushed, {} skipped, {} errors",
3757        "[=]".cyan(),
3758        pushed.to_string().green(),
3759        skipped.to_string().yellow(),
3760        errors.to_string().red()
3761    );
3762
3763    Ok(())
3764}
3765
3766/// Pull sessions from provider workspaces into the database
3767fn sync_pull(
3768    db_path: &Path,
3769    provider: Option<&str>,
3770    workspace: Option<&str>,
3771    sessions: Option<&[String]>,
3772    force: bool,
3773    dry_run: bool,
3774) -> Result<()> {
3775    let conn = if dry_run {
3776        // For dry run, still open the DB to check existing sessions
3777        Connection::open(db_path)?
3778    } else {
3779        Connection::open(db_path)?
3780    };
3781
3782    // Discover workspaces
3783    let workspaces = discover_workspaces()?;
3784    let mut pulled = 0;
3785    let mut skipped = 0;
3786    let mut errors = 0;
3787
3788    for ws in workspaces {
3789        // Filter by workspace if specified
3790        if let Some(ws_filter) = workspace {
3791            let ws_path_str = ws.workspace_path.to_string_lossy();
3792            let ws_name = ws.project_path.as_deref().unwrap_or("");
3793            if !ws_path_str.contains(ws_filter) && !ws_name.contains(ws_filter) {
3794                continue;
3795            }
3796        }
3797
3798        // Filter by provider if specified (for Copilot workspaces, it's "GitHub Copilot")
3799        if let Some(p) = provider {
3800            if !p.eq_ignore_ascii_case("copilot")
3801                && !p.eq_ignore_ascii_case("github copilot")
3802                && !p.eq_ignore_ascii_case("vscode")
3803            {
3804                continue;
3805            }
3806        }
3807
3808        // Get sessions from this workspace
3809        match get_chat_sessions_from_workspace(&ws.workspace_path) {
3810            Ok(ws_sessions) => {
3811                for swp in ws_sessions {
3812                    // Filter by session ID if specified
3813                    if let Some(sess_ids) = sessions {
3814                        let sid = swp.session.session_id.as_deref().unwrap_or("");
3815                        if !sess_ids.iter().any(|id| id == sid) {
3816                            continue;
3817                        }
3818                    }
3819
3820                    let session_id = swp
3821                        .session
3822                        .session_id
3823                        .clone()
3824                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
3825
3826                    // Check if session already exists
3827                    let exists: bool = conn
3828                        .query_row("SELECT 1 FROM sessions WHERE id = ?", [&session_id], |_| {
3829                            Ok(true)
3830                        })
3831                        .unwrap_or(false);
3832
3833                    if exists && !force {
3834                        skipped += 1;
3835                        continue;
3836                    }
3837
3838                    let ws_name = ws.project_path.clone();
3839                    let ws_path = ws.workspace_path.to_string_lossy().to_string();
3840
3841                    if dry_run {
3842                        println!(
3843                            "   {} Would pull {} from {}",
3844                            "[+]".green(),
3845                            session_id.dimmed(),
3846                            ws_name.as_deref().unwrap_or(&ws_path)
3847                        );
3848                        pulled += 1;
3849                    } else {
3850                        match insert_or_update_session(
3851                            &conn,
3852                            &swp.session,
3853                            "GitHub Copilot",
3854                            Some(&ws.hash),
3855                            ws_name.as_deref(),
3856                            None,           // provider_version
3857                            3,              // schema_version (V3)
3858                            "json",         // file_format (detect from swp.path extension)
3859                            Some(&ws_path), // workspace_path
3860                        ) {
3861                            Ok(_) => {
3862                                println!(
3863                                    "   {} Pulled {} from {}",
3864                                    "[+]".green(),
3865                                    session_id.dimmed(),
3866                                    ws_name.as_deref().unwrap_or(&ws_path)
3867                                );
3868                                pulled += 1;
3869                            }
3870                            Err(e) => {
3871                                println!("   {} Error pulling {}: {}", "[!]".red(), session_id, e);
3872                                errors += 1;
3873                            }
3874                        }
3875                    }
3876                }
3877            }
3878            Err(e) => {
3879                println!(
3880                    "   {} Error reading workspace {}: {}",
3881                    "[!]".red(),
3882                    ws.workspace_path.display(),
3883                    e
3884                );
3885                errors += 1;
3886            }
3887        }
3888    }
3889
3890    println!(
3891        "\n   {} Pull summary: {} pulled, {} skipped, {} errors",
3892        "[=]".cyan(),
3893        pulled.to_string().green(),
3894        skipped.to_string().yellow(),
3895        errors.to_string().red()
3896    );
3897
3898    Ok(())
3899}
3900
3901/// Convert a ChatSession to JSONL format (VS Code 1.109+)
3902fn convert_session_to_jsonl(session: &ChatSession) -> Result<String> {
3903    use serde_json::json;
3904
3905    let mut lines = Vec::new();
3906
3907    // First line: session metadata
3908    let session_meta = json!({
3909        "version": 3,
3910        "sessionId": session.session_id,
3911        "title": session.title(),
3912        "createdAt": session.creation_date,
3913        "lastInteractionAt": session.last_message_date,
3914        "customTitle": session.custom_title.as_deref(),
3915    });
3916    lines.push(serde_json::to_string(&session_meta)?);
3917
3918    // Subsequent lines: each request as a separate JSON object
3919    for request in &session.requests {
3920        let request_json = serde_json::to_string(request)?;
3921        lines.push(request_json);
3922    }
3923
3924    Ok(lines.join("\n"))
3925}