Skip to main content

chasm/commands/
harvest.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! Harvest commands for collecting chat sessions from multiple providers
4//!
5//! The harvester scans for local and remote LLM providers, downloads all chat
6//! sessions into a single SQLite database, and allows users to version-track
7//! the database with git.
8
9use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::storage::parse_session_json;
24use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
25
26/// Check if a string is an empty code block marker (just ``` with no content)
27fn is_empty_code_block(s: &str) -> bool {
28    // Match patterns like "```", "```\n", "```language", "```\n```", "```\n\n```"
29    let s = s.trim();
30    if s == "```" {
31        return true;
32    }
33    // Check for code block with just a language identifier and no content
34    if s.starts_with("```") && !s.contains('\n') {
35        return true;
36    }
37    // Check for empty code block with opening and closing (possibly with whitespace-only lines)
38    let lines: Vec<&str> = s.lines().collect();
39    if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
40        // Check if all lines between opening and closing are empty or whitespace
41        let content_lines = &lines[1..lines.len() - 1];
42        if content_lines.iter().all(|line| line.trim().is_empty()) {
43            return true;
44        }
45    }
46    false
47}
48
49/// Type alias for harvested session query result (id, provider, title, msg_count, created, last_msg, workspace)
50type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
51
52/// Web-based LLM provider endpoint configuration
53#[derive(Debug, Clone)]
54struct WebProviderEndpoint {
55    name: &'static str,
56    url: &'static str,
57    description: &'static str,
58}
59
60/// List of known web-based LLM provider endpoints to probe
61const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
62    WebProviderEndpoint {
63        name: "ChatGPT",
64        url: "https://chat.openai.com",
65        description: "OpenAI ChatGPT Web Interface",
66    },
67    WebProviderEndpoint {
68        name: "Claude",
69        url: "https://claude.ai",
70        description: "Anthropic Claude Web Interface",
71    },
72    WebProviderEndpoint {
73        name: "Gemini",
74        url: "https://gemini.google.com",
75        description: "Google Gemini Web Interface",
76    },
77    WebProviderEndpoint {
78        name: "Perplexity",
79        url: "https://www.perplexity.ai",
80        description: "Perplexity AI Search Interface",
81    },
82    WebProviderEndpoint {
83        name: "DeepSeek",
84        url: "https://chat.deepseek.com",
85        description: "DeepSeek Chat Interface",
86    },
87    WebProviderEndpoint {
88        name: "Poe",
89        url: "https://poe.com",
90        description: "Quora Poe Multi-model Chat",
91    },
92    WebProviderEndpoint {
93        name: "You.com",
94        url: "https://you.com/chat",
95        description: "You.com AI Chat",
96    },
97    WebProviderEndpoint {
98        name: "HuggingChat",
99        url: "https://huggingface.co/chat",
100        description: "HuggingFace Chat Interface",
101    },
102    WebProviderEndpoint {
103        name: "Copilot",
104        url: "https://copilot.microsoft.com",
105        description: "Microsoft Copilot Web Interface",
106    },
107    WebProviderEndpoint {
108        name: "Mistral",
109        url: "https://chat.mistral.ai",
110        description: "Mistral AI Le Chat Interface",
111    },
112    WebProviderEndpoint {
113        name: "Cohere",
114        url: "https://coral.cohere.com",
115        description: "Cohere Coral Chat Interface",
116    },
117    WebProviderEndpoint {
118        name: "Groq",
119        url: "https://groq.com",
120        description: "Groq Fast Inference Interface",
121    },
122    WebProviderEndpoint {
123        name: "Phind",
124        url: "https://www.phind.com",
125        description: "Phind AI Code Search",
126    },
127    WebProviderEndpoint {
128        name: "Character.AI",
129        url: "https://character.ai",
130        description: "Character.AI Chat Interface",
131    },
132    WebProviderEndpoint {
133        name: "Pi",
134        url: "https://pi.ai",
135        description: "Inflection Pi Personal AI",
136    },
137];
138
139/// Scan for reachable web-based LLM providers (parallel)
140fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
141    use std::sync::{Arc, Mutex};
142    use std::thread;
143
144    let reachable = Arc::new(Mutex::new(Vec::new()));
145    let results = Arc::new(Mutex::new(Vec::new()));
146
147    // Create threads for parallel scanning
148    let handles: Vec<_> = WEB_PROVIDERS
149        .iter()
150        .map(|provider| {
151            let reachable = Arc::clone(&reachable);
152            let results = Arc::clone(&results);
153            let timeout = timeout_secs;
154            let name = provider.name;
155            let url = provider.url;
156            let desc = provider.description;
157
158            thread::spawn(move || {
159                let client = match reqwest::blocking::Client::builder()
160                    .timeout(Duration::from_secs(timeout))
161                    .connect_timeout(Duration::from_secs(timeout.min(3))) // Short connect timeout
162                    .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
163                    .build()
164                {
165                    Ok(c) => c,
166                    Err(_) => return,
167                };
168
169                let result: (&str, bool, String, &str) = match client.head(url).send() {
170                    Ok(response) => {
171                        let status = response.status();
172                        if status.is_success() || status.is_redirection() {
173                            reachable.lock().unwrap().push(name.to_string());
174                            (name, true, desc.to_string(), url)
175                        } else {
176                            (name, false, format!("HTTP {}", status.as_u16()), url)
177                        }
178                    }
179                    Err(e) => {
180                        let reason = if e.is_timeout() {
181                            "timeout".to_string()
182                        } else if e.is_connect() {
183                            "connection failed".to_string()
184                        } else {
185                            "unreachable".to_string()
186                        };
187                        (name, false, reason, url)
188                    }
189                };
190
191                results.lock().unwrap().push((
192                    result.0.to_string(),
193                    result.1,
194                    result.2,
195                    result.3.to_string(),
196                ));
197            })
198        })
199        .collect();
200
201    // Wait for all threads to complete
202    for handle in handles {
203        let _ = handle.join();
204    }
205
206    // Print results in order
207    let results = results.lock().unwrap();
208    let mut sorted_results: Vec<_> = results.iter().collect();
209    sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
210
211    for (name, success, info, url) in sorted_results {
212        if *success {
213            println!(
214                "   {} {}: {} ({})",
215                "[+]".green(),
216                name.bold(),
217                "reachable".green(),
218                info.dimmed()
219            );
220            println!("      {} {}", "`".dimmed(), url.dimmed());
221        } else {
222            println!(
223                "   {} {}: {} ({})",
224                "[-]".dimmed(),
225                name,
226                "blocked or unavailable".dimmed(),
227                info.dimmed()
228            );
229        }
230    }
231
232    let result = reachable.lock().unwrap().clone();
233    result
234}
235
236/// Scan browser cookies for authenticated web LLM providers
237fn scan_browser_authentication(
238    verbose: bool,
239) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
240    use crate::browser::scan_browser_auth_verbose;
241    use std::collections::HashMap;
242
243    let installed = get_installed_browsers();
244    if installed.is_empty() {
245        println!("   {} No supported browsers found", "[-]".dimmed());
246        return (HashMap::new(), 0);
247    }
248
249    println!(
250        "   {} Checking {} browser(s): {}",
251        "[*]".blue(),
252        installed.len(),
253        installed
254            .iter()
255            .map(|b| b.name())
256            .collect::<Vec<_>>()
257            .join(", ")
258    );
259
260    let results = if verbose {
261        scan_browser_auth_verbose()
262    } else {
263        scan_browser_auth()
264    };
265
266    // Group results by provider
267    let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
268
269    for result in results {
270        if result.authenticated {
271            authenticated
272                .entry(result.provider.clone())
273                .or_default()
274                .push(result.browser);
275        }
276    }
277
278    let count = authenticated.len();
279
280    if authenticated.is_empty() {
281        println!(
282            "   {} No authenticated web LLM providers found",
283            "[-]".dimmed()
284        );
285        println!(
286            "      {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
287            "`".dimmed()
288        );
289    } else {
290        for (provider, browsers) in &authenticated {
291            let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
292            println!(
293                "   {} {}: {} in {}",
294                "[+]".green(),
295                provider.bold(),
296                "authenticated".green(),
297                browser_names.join(", ")
298            );
299        }
300    }
301
302    (authenticated, count)
303}
304
305/// Configuration for the harvest database
306#[allow(dead_code)]
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct HarvestConfig {
309    /// Path to the harvest database
310    pub db_path: PathBuf,
311    /// Auto-commit changes to git
312    pub auto_commit: bool,
313    /// Providers to include (empty = all)
314    pub include_providers: Vec<String>,
315    /// Providers to exclude
316    pub exclude_providers: Vec<String>,
317    /// Include VS Code workspaces
318    pub include_workspaces: bool,
319    /// Last harvest timestamp
320    pub last_harvest: Option<i64>,
321}
322
323#[allow(dead_code)]
324impl Default for HarvestConfig {
325    fn default() -> Self {
326        Self {
327            db_path: PathBuf::from("chat_sessions.db"),
328            auto_commit: false,
329            include_providers: Vec::new(),
330            exclude_providers: Vec::new(),
331            include_workspaces: true,
332            last_harvest: None,
333        }
334    }
335}
336
337/// Statistics from a harvest operation
338#[derive(Debug, Default)]
339pub struct HarvestStats {
340    pub providers_scanned: usize,
341    pub workspaces_scanned: usize,
342    pub sessions_found: usize,
343    pub sessions_added: usize,
344    pub sessions_updated: usize,
345    pub sessions_skipped: usize,
346    pub errors: Vec<String>,
347}
348
349/// A harvested session record
350#[allow(dead_code)]
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct HarvestedSession {
353    pub id: String,
354    pub provider: String,
355    pub provider_type: String,
356    pub workspace_id: Option<String>,
357    pub workspace_name: Option<String>,
358    pub title: String,
359    pub message_count: usize,
360    pub created_at: i64,
361    pub updated_at: i64,
362    pub harvested_at: i64,
363    pub session_json: String,
364}
365
366/// Initialize a harvest database at the specified path
367pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
368    let db_path = get_db_path(path)?;
369    let db_dir = db_path.parent().unwrap_or(Path::new("."));
370
371    println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
372    println!("{}", "=".repeat(60));
373
374    // Create directory if needed
375    if !db_dir.exists() {
376        fs::create_dir_all(db_dir)?;
377        println!("{} Created directory: {}", "[+]".green(), db_dir.display());
378    }
379
380    // Check if database already exists
381    if db_path.exists() {
382        println!(
383            "{} Database already exists: {}",
384            "[!]".yellow(),
385            db_path.display()
386        );
387        println!("   Use 'csm harvest run' to update it");
388        return Ok(());
389    }
390
391    // Create the database
392    create_harvest_database(&db_path)?;
393    println!("{} Created database: {}", "[+]".green(), db_path.display());
394
395    // Initialize git if requested
396    if git_init {
397        init_git_tracking(&db_path)?;
398    }
399
400    println!("\n{} Harvest database initialized!", "[+]".green().bold());
401    println!("\nNext steps:");
402    println!("  1. Run 'csm harvest scan' to see available providers");
403    println!("  2. Run 'csm harvest run' to collect sessions");
404    if !git_init {
405        println!("  3. Run 'csm harvest git init' to enable version tracking");
406    }
407
408    Ok(())
409}
410
411/// Scan for available providers and workspaces
412pub fn harvest_scan(
413    show_sessions: bool,
414    scan_web: bool,
415    timeout_secs: u64,
416    verbose: bool,
417) -> Result<()> {
418    println!("\n{} Scanning for Providers", "[H]".magenta().bold());
419    println!("{}", "=".repeat(60));
420
421    let registry = ProviderRegistry::new();
422    let mut total_sessions = 0;
423    let mut available_providers = Vec::new();
424
425    // Scan LLM providers
426    println!("\n{} LLM Providers:", "[*]".blue().bold());
427
428    let provider_types = vec![
429        ProviderType::Copilot,
430        ProviderType::Cursor,
431        ProviderType::CodexCli,
432        ProviderType::DroidCli,
433        ProviderType::GeminiCli,
434        ProviderType::Ollama,
435        ProviderType::Vllm,
436        ProviderType::Foundry,
437        ProviderType::LmStudio,
438        ProviderType::LocalAI,
439        ProviderType::TextGenWebUI,
440        ProviderType::Jan,
441        ProviderType::Gpt4All,
442        ProviderType::Llamafile,
443        ProviderType::OpenAI,
444        ProviderType::ChatGPT,
445        ProviderType::Anthropic,
446        ProviderType::Perplexity,
447        ProviderType::DeepSeek,
448        ProviderType::Gemini,
449    ];
450
451    for pt in &provider_types {
452        if let Some(provider) = registry.get_provider(*pt) {
453            let available = provider.is_available();
454            let session_count = if available {
455                provider.list_sessions().map(|s| s.len()).unwrap_or(0)
456            } else {
457                0
458            };
459
460            if available {
461                available_providers.push((*pt, session_count));
462                total_sessions += session_count;
463
464                let status = if session_count > 0 {
465                    format!(
466                        "{} {} sessions",
467                        "+".green(),
468                        session_count.to_string().cyan()
469                    )
470                } else {
471                    format!("{} no sessions", "+".green())
472                };
473
474                println!(
475                    "   {} {}: {}",
476                    "[+]".green(),
477                    provider.name().bold(),
478                    status
479                );
480
481                if show_sessions && session_count > 0 {
482                    if let Ok(sessions) = provider.list_sessions() {
483                        for session in sessions.iter().take(3) {
484                            println!("      {} {}", "`".dimmed(), session.title().dimmed());
485                        }
486                        if sessions.len() > 3 {
487                            println!("      {} ... and {} more", "`".dimmed(), sessions.len() - 3);
488                        }
489                    }
490                }
491
492                if let Some(path) = provider.sessions_path() {
493                    println!(
494                        "      {} {}",
495                        "`".dimmed(),
496                        path.display().to_string().dimmed()
497                    );
498                }
499            }
500        }
501    }
502
503    // Scan VS Code workspaces
504    println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
505
506    match discover_workspaces() {
507        Ok(workspaces) => {
508            let workspaces_with_sessions: Vec<_> = workspaces
509                .iter()
510                .filter(|ws| ws.chat_session_count > 0)
511                .collect();
512
513            let ws_sessions: usize = workspaces_with_sessions
514                .iter()
515                .map(|ws| ws.chat_session_count)
516                .sum();
517
518            println!(
519                "   {} {} workspaces with {} sessions",
520                "[+]".green(),
521                workspaces_with_sessions.len().to_string().cyan(),
522                ws_sessions.to_string().cyan()
523            );
524
525            if show_sessions {
526                for ws in workspaces_with_sessions.iter().take(5) {
527                    let name = ws
528                        .project_path
529                        .clone()
530                        .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
531                    println!(
532                        "      {} {} ({} sessions)",
533                        "`".dimmed(),
534                        name.dimmed(),
535                        ws.chat_session_count
536                    );
537                }
538                if workspaces_with_sessions.len() > 5 {
539                    println!(
540                        "      {} ... and {} more workspaces",
541                        "`".dimmed(),
542                        workspaces_with_sessions.len() - 5
543                    );
544                }
545            }
546
547            total_sessions += ws_sessions;
548        }
549        Err(e) => {
550            println!("   {} Failed to scan workspaces: {}", "[!]".yellow(), e);
551        }
552    }
553
554    // Scan web-based LLM providers if requested
555    let mut web_providers_found = Vec::new();
556    let mut authenticated_count = 0;
557    if scan_web {
558        // First check browser authentication (no network requests)
559        println!("\n{} Browser Authentication:", "[*]".blue().bold());
560        let (auth_results, auth_count) = scan_browser_authentication(verbose);
561        authenticated_count = auth_count;
562
563        // Then probe web endpoints
564        println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
565        web_providers_found = scan_web_providers(timeout_secs);
566
567        // Show which authenticated providers are reachable
568        if !auth_results.is_empty() {
569            println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
570            for (provider, browsers) in &auth_results {
571                let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
572                let reachable = web_providers_found.iter().any(|p| p == provider);
573                let status = if reachable {
574                    format!("{} (reachable)", "ready to harvest".green())
575                } else {
576                    format!("{}", "authenticated but endpoint blocked".yellow())
577                };
578                println!(
579                    "   {} {}: {} via {}",
580                    "[+]".green(),
581                    provider.bold(),
582                    status,
583                    browser_list.join(", ").dimmed()
584                );
585            }
586        }
587    }
588
589    // Summary
590    println!("\n{} Summary:", "[*]".green().bold());
591    println!(
592        "   {} local providers available",
593        available_providers.len().to_string().cyan()
594    );
595    if scan_web {
596        println!(
597            "   {} web providers reachable",
598            web_providers_found.len().to_string().cyan()
599        );
600        println!(
601            "   {} web providers authenticated",
602            authenticated_count.to_string().cyan()
603        );
604    }
605    println!(
606        "   {} total sessions to harvest",
607        total_sessions.to_string().truecolor(252, 152, 103)
608    );
609
610    Ok(())
611}
612
613/// Run the harvest operation
614pub fn harvest_run(
615    path: Option<&str>,
616    providers: Option<&[String]>,
617    exclude: Option<&[String]>,
618    incremental: bool,
619    auto_commit: bool,
620    message: Option<&str>,
621) -> Result<()> {
622    let db_path = get_db_path(path)?;
623
624    println!("\n{} Running Harvest", "[H]".magenta().bold());
625    println!("{}", "=".repeat(60));
626
627    // Ensure database exists
628    if !db_path.exists() {
629        println!("{} Database not found, creating...", "[*]".blue());
630        create_harvest_database(&db_path)?;
631    }
632
633    let conn = Connection::open(&db_path)?;
634    let mut stats = HarvestStats::default();
635
636    // Get last harvest time for incremental updates
637    let last_harvest: Option<i64> = if incremental {
638        conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
639            row.get(0)
640        })
641        .ok()
642    } else {
643        None
644    };
645
646    if let Some(ts) = last_harvest {
647        let dt = DateTime::from_timestamp_millis(ts)
648            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
649            .unwrap_or_default();
650        println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
651    }
652
653    let registry = ProviderRegistry::new();
654    let include_providers = providers.map(|p| p.to_vec());
655    let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
656
657    // Harvest from LLM providers
658    println!("\n{} Harvesting from providers...", "[*]".blue());
659
660    let provider_types = vec![
661        ProviderType::Copilot,
662        ProviderType::Cursor,
663        ProviderType::ClaudeCode,
664        ProviderType::CodexCli,
665        ProviderType::DroidCli,
666        ProviderType::GeminiCli,
667        ProviderType::OpenCode,
668        ProviderType::OpenClaw,
669        ProviderType::Antigravity,
670        ProviderType::Ollama,
671        ProviderType::Vllm,
672        ProviderType::Foundry,
673        ProviderType::LmStudio,
674        ProviderType::LocalAI,
675        ProviderType::TextGenWebUI,
676        ProviderType::Jan,
677        ProviderType::Gpt4All,
678        ProviderType::Llamafile,
679    ];
680
681    for pt in &provider_types {
682        let provider_name = pt.display_name().to_lowercase();
683
684        // Check include/exclude filters
685        if let Some(ref include) = include_providers {
686            if !include
687                .iter()
688                .any(|p| provider_name.contains(&p.to_lowercase()))
689            {
690                continue;
691            }
692        }
693        if exclude_providers
694            .iter()
695            .any(|p| provider_name.contains(&p.to_lowercase()))
696        {
697            continue;
698        }
699
700        if let Some(provider) = registry.get_provider(*pt) {
701            if !provider.is_available() {
702                continue;
703            }
704
705            stats.providers_scanned += 1;
706
707            match provider.list_sessions() {
708                Ok(sessions) => {
709                    for session in sessions {
710                        stats.sessions_found += 1;
711
712                        // Check if session should be skipped (incremental)
713                        if let Some(last) = last_harvest {
714                            if session.last_message_date <= last {
715                                stats.sessions_skipped += 1;
716                                continue;
717                            }
718                        }
719
720                        match insert_or_update_session(
721                            &conn,
722                            &session,
723                            pt.display_name(),
724                            None,
725                            None,
726                            None,           // provider_version
727                            3,              // schema_version (V3)
728                            "json",         // file_format
729                            None,           // workspace_path
730                        ) {
731                            Ok(updated) => {
732                                if updated {
733                                    stats.sessions_updated += 1;
734                                } else {
735                                    stats.sessions_added += 1;
736                                }
737                            }
738                            Err(e) => {
739                                stats.errors.push(format!("{}: {}", session.title(), e));
740                            }
741                        }
742                    }
743
744                    let session_count = stats.sessions_added + stats.sessions_updated;
745                    if session_count > 0 {
746                        println!(
747                            "   {} {}: {} sessions",
748                            "[+]".green(),
749                            provider.name(),
750                            session_count.to_string().cyan()
751                        );
752                    }
753                }
754                Err(e) => {
755                    stats.errors.push(format!("{}: {}", provider.name(), e));
756                }
757            }
758        }
759    }
760
761    // Harvest from VS Code workspaces
762    if include_providers.is_none()
763        || include_providers
764            .as_ref()
765            .map(|p| {
766                p.iter()
767                    .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
768            })
769            .unwrap_or(false)
770    {
771        println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
772
773        if let Ok(workspaces) = discover_workspaces() {
774            for ws in &workspaces {
775                if ws.chat_session_count == 0 {
776                    continue;
777                }
778
779                stats.workspaces_scanned += 1;
780
781                if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
782                    for swp in sessions {
783                        stats.sessions_found += 1;
784
785                        // Check if session should be skipped (incremental)
786                        if let Some(last) = last_harvest {
787                            if swp.session.last_message_date <= last {
788                                stats.sessions_skipped += 1;
789                                continue;
790                            }
791                        }
792
793                        let ws_name = ws.project_path.clone();
794                        let ws_path = ws.workspace_path.to_string_lossy().to_string();
795
796                        match insert_or_update_session(
797                            &conn,
798                            &swp.session,
799                            "GitHub Copilot",
800                            Some(&ws.hash),
801                            ws_name.as_deref(),
802                            None,           // provider_version
803                            3,              // schema_version (V3)
804                            "json",         // file_format
805                            Some(&ws_path), // workspace_path
806                        ) {
807                            Ok(updated) => {
808                                if updated {
809                                    stats.sessions_updated += 1;
810                                } else {
811                                    stats.sessions_added += 1;
812                                }
813                            }
814                            Err(e) => {
815                                stats.errors.push(format!("{}: {}", swp.session.title(), e));
816                            }
817                        }
818                    }
819                }
820            }
821
822            println!(
823                "   {} Workspaces: {} scanned",
824                "[+]".green(),
825                stats.workspaces_scanned.to_string().cyan()
826            );
827        }
828    }
829
830    // Harvest from web-based cloud providers (ChatGPT, Claude, etc.)
831    let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
832    harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
833
834    // Update metadata
835    update_harvest_metadata(&conn)?;
836
837    // Print summary
838    println!("\n{} Harvest Complete:", "[+]".green().bold());
839    println!(
840        "   {} providers scanned",
841        stats.providers_scanned.to_string().cyan()
842    );
843    println!(
844        "   {} workspaces scanned",
845        stats.workspaces_scanned.to_string().cyan()
846    );
847    println!(
848        "   {} sessions found",
849        stats.sessions_found.to_string().cyan()
850    );
851    println!(
852        "   {} sessions added",
853        stats.sessions_added.to_string().green()
854    );
855    println!(
856        "   {} sessions updated",
857        stats.sessions_updated.to_string().yellow()
858    );
859    if stats.sessions_skipped > 0 {
860        println!(
861            "   {} sessions skipped (unchanged)",
862            stats.sessions_skipped.to_string().dimmed()
863        );
864    }
865
866    if !stats.errors.is_empty() {
867        println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
868        for (i, err) in stats.errors.iter().take(5).enumerate() {
869            println!("   {}. {}", i + 1, err);
870        }
871        if stats.errors.len() > 5 {
872            println!("   ... and {} more errors", stats.errors.len() - 5);
873        }
874    }
875
876    // Auto-commit if requested
877    if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
878        println!("\n{} Auto-committing changes...", "[*]".blue());
879        let commit_msg = message.unwrap_or("Harvest: update chat sessions");
880        if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
881            println!("{} Git commit failed: {}", "[!]".yellow(), e);
882        } else {
883            println!("{} Changes committed", "[+]".green());
884        }
885    }
886
887    println!("\nDatabase: {}", db_path.display());
888
889    Ok(())
890}
891
892/// Show harvest database status
893pub fn harvest_status(path: Option<&str>) -> Result<()> {
894    let db_path = get_db_path(path)?;
895
896    println!("\n{} Harvest Database Status", "[H]".magenta().bold());
897    println!("{}", "=".repeat(60));
898
899    if !db_path.exists() {
900        println!(
901            "{} Database not found: {}",
902            "[!]".yellow(),
903            db_path.display()
904        );
905        println!("   Run 'csm harvest init' to create one");
906        return Ok(());
907    }
908
909    let conn = Connection::open(&db_path)?;
910
911    // Get session counts by provider
912    let mut stmt = conn.prepare(
913        "SELECT provider, COUNT(*) as count, SUM(message_count) as messages 
914         FROM sessions 
915         GROUP BY provider 
916         ORDER BY count DESC",
917    )?;
918
919    let provider_stats: Vec<(String, i64, i64)> = stmt
920        .query_map([], |row| {
921            Ok((
922                row.get(0)?,
923                row.get(1)?,
924                row.get::<_, Option<i64>>(2)?.unwrap_or(0),
925            ))
926        })?
927        .filter_map(|r| r.ok())
928        .collect();
929
930    // Get total counts
931    let total_sessions: i64 = conn
932        .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
933        .unwrap_or(0);
934
935    let total_messages: i64 = conn
936        .query_row(
937            "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
938            [],
939            |row| row.get(0),
940        )
941        .unwrap_or(0);
942
943    // Get last harvest time
944    let last_harvest: Option<i64> = conn
945        .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
946            row.get(0)
947        })
948        .ok()
949        .flatten();
950
951    // Get oldest and newest sessions
952    let oldest: Option<i64> = conn
953        .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
954        .ok()
955        .flatten();
956
957    let newest: Option<i64> = conn
958        .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
959        .ok()
960        .flatten();
961
962    println!("{} Database: {}", "[*]".blue(), db_path.display());
963    println!(
964        "{} {}: {}",
965        "[*]".blue(),
966        "Total Sessions".truecolor(171, 157, 242),
967        total_sessions.to_string().truecolor(252, 152, 103)
968    );
969    println!(
970        "{} {}: {}",
971        "[*]".blue(),
972        "Total Messages".truecolor(171, 157, 242),
973        total_messages.to_string().truecolor(252, 152, 103)
974    );
975
976    if let Some(ts) = last_harvest {
977        let dt = DateTime::from_timestamp_millis(ts)
978            .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
979            .unwrap_or_else(|| "Unknown".to_string());
980        println!("{} Last Harvest: {}", "[*]".blue(), dt);
981    }
982
983    if let (Some(old), Some(new)) = (oldest, newest) {
984        let old_dt = DateTime::from_timestamp_millis(old)
985            .map(|d| d.format("%Y-%m-%d").to_string())
986            .unwrap_or_default();
987        let new_dt = DateTime::from_timestamp_millis(new)
988            .map(|d| d.format("%Y-%m-%d").to_string())
989            .unwrap_or_default();
990        println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
991    }
992
993    if !provider_stats.is_empty() {
994        println!("\n{} Sessions by Provider:", "[*]".blue());
995        for (provider, count, messages) in &provider_stats {
996            println!(
997                "   {} {}: {} sessions, {} messages",
998                "[+]".green(),
999                provider.bold(),
1000                count.to_string().cyan(),
1001                messages.to_string().dimmed()
1002            );
1003        }
1004    }
1005
1006    // Check git status
1007    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1008    if db_dir.join(".git").exists()
1009        || db_path
1010            .parent()
1011            .map(|p| p.join(".git").exists())
1012            .unwrap_or(false)
1013    {
1014        println!("\n{} Git Status:", "[*]".blue());
1015
1016        let output = Command::new("git")
1017            .current_dir(db_dir)
1018            .args([
1019                "status",
1020                "--porcelain",
1021                db_path.file_name().unwrap().to_str().unwrap(),
1022            ])
1023            .output();
1024
1025        match output {
1026            Ok(out) => {
1027                let status = String::from_utf8_lossy(&out.stdout);
1028                if status.is_empty() {
1029                    println!("   {} No uncommitted changes", "[+]".green());
1030                } else {
1031                    println!("   {} Uncommitted changes detected", "[!]".yellow());
1032                    println!("   Run 'csm harvest git commit' to save changes");
1033                }
1034            }
1035            Err(_) => {
1036                println!("   {} Unable to check git status", "[!]".yellow());
1037            }
1038        }
1039    } else {
1040        println!("\n{} Git tracking not enabled", "[i]".dimmed());
1041        println!("   Run 'csm harvest git init' to enable version tracking");
1042    }
1043
1044    Ok(())
1045}
1046
1047/// List sessions in the harvest database
1048pub fn harvest_list(
1049    path: Option<&str>,
1050    provider: Option<&str>,
1051    limit: usize,
1052    search: Option<&str>,
1053) -> Result<()> {
1054    let db_path = get_db_path(path)?;
1055
1056    if !db_path.exists() {
1057        println!(
1058            "{} Database not found: {}",
1059            "[!]".yellow(),
1060            db_path.display()
1061        );
1062        return Ok(());
1063    }
1064
1065    let conn = Connection::open(&db_path)?;
1066
1067    let mut query = String::from(
1068        "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name 
1069         FROM sessions WHERE 1=1",
1070    );
1071    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1072
1073    if let Some(p) = provider {
1074        query.push_str(" AND LOWER(provider) LIKE ?");
1075        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1076    }
1077
1078    if let Some(s) = search {
1079        query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1080        let pattern = format!("%{}%", s.to_lowercase());
1081        params_vec.push(Box::new(pattern.clone()));
1082        params_vec.push(Box::new(pattern));
1083    }
1084
1085    query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1086    params_vec.push(Box::new(limit as i64));
1087
1088    let mut stmt = conn.prepare(&query)?;
1089
1090    // Build params slice
1091    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1092
1093    let sessions: Vec<HarvestQueryResult> = stmt
1094        .query_map(params_slice.as_slice(), |row| {
1095            Ok((
1096                row.get(0)?,
1097                row.get(1)?,
1098                row.get(2)?,
1099                row.get(3)?,
1100                row.get(4)?,
1101                row.get(5)?,
1102                row.get(6)?,
1103            ))
1104        })?
1105        .filter_map(|r| r.ok())
1106        .collect();
1107
1108    println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1109    println!("{}", "=".repeat(60));
1110
1111    if sessions.is_empty() {
1112        println!("{} No sessions found", "[i]".dimmed());
1113        return Ok(());
1114    }
1115
1116    for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1117        let date = DateTime::from_timestamp_millis(*updated)
1118            .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1119            .unwrap_or_default();
1120
1121        println!("\n{} {}", "[S]".cyan(), title.bold());
1122        println!("   ID: {}", &id[..16.min(id.len())].dimmed());
1123        println!(
1124            "   Provider: {} | Messages: {} | Updated: {}",
1125            prov.cyan(),
1126            msg_count.to_string().green(),
1127            date.dimmed()
1128        );
1129        if let Some(ws) = ws_name {
1130            println!("   Workspace: {}", ws.dimmed());
1131        }
1132    }
1133
1134    println!(
1135        "\n{} Showing {} of available sessions",
1136        "[i]".dimmed(),
1137        sessions.len()
1138    );
1139
1140    Ok(())
1141}
1142
1143/// Export sessions from the harvest database
1144pub fn harvest_export(
1145    path: Option<&str>,
1146    output: &str,
1147    format: &str,
1148    provider: Option<&str>,
1149    session_ids: Option<&[String]>,
1150) -> Result<()> {
1151    let db_path = get_db_path(path)?;
1152    let output_path = PathBuf::from(output);
1153
1154    if !db_path.exists() {
1155        anyhow::bail!("Database not found: {}", db_path.display());
1156    }
1157
1158    let conn = Connection::open(&db_path)?;
1159
1160    println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1161    println!("{}", "=".repeat(60));
1162
1163    // Build query
1164    let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1165    let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1166
1167    if let Some(p) = provider {
1168        query.push_str(" AND LOWER(provider) LIKE ?");
1169        params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1170    }
1171
1172    if let Some(ids) = session_ids {
1173        let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1174        query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1175        for id in ids {
1176            params_vec.push(Box::new(id.clone()));
1177        }
1178    }
1179
1180    let mut stmt = conn.prepare(&query)?;
1181    let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1182
1183    let sessions: Vec<String> = stmt
1184        .query_map(params_slice.as_slice(), |row| row.get(0))?
1185        .filter_map(|r| r.ok())
1186        .collect();
1187
1188    if sessions.is_empty() {
1189        println!("{} No sessions to export", "[i]".dimmed());
1190        return Ok(());
1191    }
1192
1193    // Create output directory if needed
1194    if let Some(parent) = output_path.parent() {
1195        if !parent.exists() {
1196            fs::create_dir_all(parent)?;
1197        }
1198    }
1199
1200    match format.to_lowercase().as_str() {
1201        "json" => {
1202            // Export as JSON array
1203            let parsed: Vec<serde_json::Value> = sessions
1204                .iter()
1205                .filter_map(|s| serde_json::from_str(s).ok())
1206                .collect();
1207            let json_output = serde_json::to_string_pretty(&parsed)?;
1208            fs::write(&output_path, json_output)?;
1209        }
1210        "jsonl" => {
1211            // Export as JSON Lines
1212            let content: String = sessions
1213                .iter()
1214                .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1215                .map(|v| serde_json::to_string(&v).unwrap_or_default())
1216                .collect::<Vec<_>>()
1217                .join("\n");
1218            fs::write(&output_path, content)?;
1219        }
1220        "md" | "markdown" => {
1221            // Export as Markdown
1222            let mut md_content = String::from("# Chat Sessions Export\n\n");
1223            md_content.push_str(&format!(
1224                "Exported: {}\n\n",
1225                Utc::now().format("%Y-%m-%d %H:%M:%S")
1226            ));
1227
1228            for session_json in &sessions {
1229                if let Ok(session) = parse_session_json(session_json) {
1230                    md_content.push_str(&format!("## {}\n\n", session.title()));
1231                    md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1232
1233                    for request in &session.requests {
1234                        if let Some(msg) = &request.message {
1235                            if let Some(text) = &msg.text {
1236                                md_content.push_str(&format!("### User\n\n{}\n\n", text));
1237                            }
1238                        }
1239                        // Extract response text from the JSON value
1240                        if let Some(response) = &request.response {
1241                            let response_text = response
1242                                .get("text")
1243                                .and_then(|v| v.as_str())
1244                                .or_else(|| response.get("content").and_then(|v| v.as_str()))
1245                                .or_else(|| {
1246                                    // Try to get from value array (older format)
1247                                    response
1248                                        .get("value")
1249                                        .and_then(|v| v.as_array())
1250                                        .and_then(|arr| arr.first())
1251                                        .and_then(|v| v.get("value"))
1252                                        .and_then(|v| v.as_str())
1253                                });
1254
1255                            if let Some(text) = response_text {
1256                                md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1257                            }
1258                        }
1259                    }
1260                    md_content.push_str("---\n\n");
1261                }
1262            }
1263            fs::write(&output_path, md_content)?;
1264        }
1265        _ => {
1266            anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1267        }
1268    }
1269
1270    println!(
1271        "{} Exported {} sessions to {}",
1272        "[+]".green(),
1273        sessions.len().to_string().cyan(),
1274        output_path.display()
1275    );
1276
1277    Ok(())
1278}
1279
1280/// Git operations for harvest database
1281pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1282    let db_path = get_db_path(path)?;
1283    init_git_tracking(&db_path)
1284}
1285
1286pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1287    let db_path = get_db_path(path)?;
1288    let msg = message.unwrap_or("Update harvest database");
1289    git_commit_harvest(&db_path, msg)
1290}
1291
1292pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1293    let db_path = get_db_path(path)?;
1294    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1295    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1296
1297    println!("\n{} Harvest Git History", "[H]".magenta().bold());
1298    println!("{}", "=".repeat(60));
1299
1300    let output = Command::new("git")
1301        .current_dir(db_dir)
1302        .args([
1303            "log",
1304            "--oneline",
1305            &format!("-{}", count),
1306            "--follow",
1307            "--",
1308            db_name,
1309        ])
1310        .output()
1311        .context("Failed to run git log")?;
1312
1313    if !output.status.success() {
1314        let stderr = String::from_utf8_lossy(&output.stderr);
1315        if stderr.contains("does not have any commits") {
1316            println!("{} No commits yet", "[i]".dimmed());
1317        } else {
1318            anyhow::bail!("Git log failed: {}", stderr);
1319        }
1320        return Ok(());
1321    }
1322
1323    let log = String::from_utf8_lossy(&output.stdout);
1324    if log.trim().is_empty() {
1325        println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1326    } else {
1327        for line in log.lines() {
1328            let parts: Vec<&str> = line.splitn(2, ' ').collect();
1329            if parts.len() == 2 {
1330                println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1331            } else {
1332                println!("{}", line);
1333            }
1334        }
1335    }
1336
1337    Ok(())
1338}
1339
1340pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1341    let db_path = get_db_path(path)?;
1342    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1343    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1344
1345    println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1346    println!("{}", "=".repeat(60));
1347
1348    let mut args = vec!["diff", "--stat"];
1349    if let Some(c) = commit {
1350        args.push(c);
1351    }
1352    args.push("--");
1353    args.push(db_name);
1354
1355    let output = Command::new("git")
1356        .current_dir(db_dir)
1357        .args(&args)
1358        .output()
1359        .context("Failed to run git diff")?;
1360
1361    let diff = String::from_utf8_lossy(&output.stdout);
1362    if diff.trim().is_empty() {
1363        println!("{} No changes", "[+]".green());
1364    } else {
1365        println!("{}", diff);
1366    }
1367
1368    Ok(())
1369}
1370
1371pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1372    let db_path = get_db_path(path)?;
1373    let db_dir = db_path.parent().unwrap_or(Path::new("."));
1374    let db_name = db_path.file_name().unwrap().to_str().unwrap();
1375
1376    println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1377    println!("{}", "=".repeat(60));
1378
1379    // Create backup first
1380    let backup_path = db_path.with_extension("db.backup");
1381    if db_path.exists() {
1382        fs::copy(&db_path, &backup_path)?;
1383        println!(
1384            "{} Created backup: {}",
1385            "[+]".green(),
1386            backup_path.display()
1387        );
1388    }
1389
1390    // Restore from commit
1391    let output = Command::new("git")
1392        .current_dir(db_dir)
1393        .args(["checkout", commit, "--", db_name])
1394        .output()
1395        .context("Failed to run git checkout")?;
1396
1397    if !output.status.success() {
1398        let stderr = String::from_utf8_lossy(&output.stderr);
1399        anyhow::bail!("Git restore failed: {}", stderr);
1400    }
1401
1402    println!(
1403        "{} Restored database from commit: {}",
1404        "[+]".green(),
1405        commit
1406    );
1407
1408    Ok(())
1409}
1410
1411// ============================================================================
1412// Helper Functions
1413// ============================================================================
1414
1415fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1416    if let Some(p) = path {
1417        return Ok(PathBuf::from(p));
1418    }
1419
1420    // Check environment variable
1421    if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1422        return Ok(PathBuf::from(p));
1423    }
1424
1425    // Default to current directory
1426    Ok(std::env::current_dir()?.join("chat_sessions.db"))
1427}
1428
1429fn create_harvest_database(path: &Path) -> Result<()> {
1430    let conn = Connection::open(path)?;
1431
1432    conn.execute_batch(
1433        r#"
1434        -- Sessions table (original harvest format)
1435        CREATE TABLE IF NOT EXISTS sessions (
1436            id TEXT PRIMARY KEY,
1437            provider TEXT NOT NULL,
1438            provider_type TEXT,
1439            provider_version TEXT,
1440            schema_version INTEGER DEFAULT 3,
1441            file_format TEXT DEFAULT 'json',
1442            workspace_id TEXT,
1443            workspace_name TEXT,
1444            workspace_path TEXT,
1445            title TEXT NOT NULL,
1446            message_count INTEGER DEFAULT 0,
1447            created_at INTEGER NOT NULL,
1448            updated_at INTEGER NOT NULL,
1449            harvested_at INTEGER NOT NULL,
1450            session_json TEXT NOT NULL
1451        );
1452
1453        CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1454        CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1455        CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1456        CREATE INDEX IF NOT EXISTS idx_sessions_workspace_path ON sessions(workspace_path);
1457        
1458        -- Enhanced messages table with raw markdown and metadata
1459        CREATE TABLE IF NOT EXISTS messages_v2 (
1460            id INTEGER PRIMARY KEY AUTOINCREMENT,
1461            session_id TEXT NOT NULL,
1462            message_index INTEGER NOT NULL,
1463            request_id TEXT,
1464            response_id TEXT,
1465            role TEXT NOT NULL,
1466            content_raw TEXT NOT NULL,
1467            content_markdown TEXT,
1468            model_id TEXT,
1469            timestamp INTEGER,
1470            is_canceled INTEGER DEFAULT 0,
1471            metadata_json TEXT,
1472            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1473            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1474            UNIQUE(session_id, message_index, role)
1475        );
1476        
1477        CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1478        CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1479        CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1480        
1481        -- Tool invocations within messages (file edits, terminal commands, etc.)
1482        CREATE TABLE IF NOT EXISTS tool_invocations (
1483            id INTEGER PRIMARY KEY AUTOINCREMENT,
1484            message_id INTEGER NOT NULL,
1485            session_id TEXT NOT NULL,
1486            tool_name TEXT NOT NULL,
1487            tool_call_id TEXT,
1488            invocation_index INTEGER DEFAULT 0,
1489            input_json TEXT,
1490            output_json TEXT,
1491            status TEXT DEFAULT 'pending',
1492            is_confirmed INTEGER DEFAULT 0,
1493            timestamp INTEGER,
1494            FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1495            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1496        );
1497        
1498        CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1499        CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1500        CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1501        
1502        -- File changes/diffs associated with tool invocations
1503        CREATE TABLE IF NOT EXISTS file_changes (
1504            id INTEGER PRIMARY KEY AUTOINCREMENT,
1505            tool_invocation_id INTEGER,
1506            session_id TEXT NOT NULL,
1507            message_index INTEGER,
1508            file_path TEXT NOT NULL,
1509            change_type TEXT NOT NULL,
1510            old_content TEXT,
1511            new_content TEXT,
1512            diff_unified TEXT,
1513            line_start INTEGER,
1514            line_end INTEGER,
1515            timestamp INTEGER,
1516            FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1517            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1518        );
1519        
1520        CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1521        CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1522        CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1523        
1524        -- Message-level checkpoints for versioning
1525        CREATE TABLE IF NOT EXISTS message_checkpoints (
1526            id INTEGER PRIMARY KEY AUTOINCREMENT,
1527            session_id TEXT NOT NULL,
1528            message_index INTEGER NOT NULL,
1529            checkpoint_number INTEGER NOT NULL,
1530            content_hash TEXT NOT NULL,
1531            snapshot_json TEXT,
1532            file_state_json TEXT,
1533            created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1534            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1535            UNIQUE(session_id, message_index, checkpoint_number)
1536        );
1537        
1538        CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1539        CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1540
1541        -- Messages table for full-text search
1542        CREATE TABLE IF NOT EXISTS messages (
1543            id INTEGER PRIMARY KEY AUTOINCREMENT,
1544            session_id TEXT NOT NULL,
1545            message_index INTEGER NOT NULL,
1546            role TEXT NOT NULL,
1547            content TEXT NOT NULL,
1548            created_at INTEGER,
1549            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1550            UNIQUE(session_id, message_index)
1551        );
1552        
1553        CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1554        CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1555        
1556        -- Checkpoints for version tracking
1557        CREATE TABLE IF NOT EXISTS checkpoints (
1558            id INTEGER PRIMARY KEY AUTOINCREMENT,
1559            session_id TEXT NOT NULL,
1560            checkpoint_number INTEGER NOT NULL,
1561            message TEXT,
1562            message_count INTEGER NOT NULL,
1563            content_hash TEXT NOT NULL,
1564            snapshot TEXT,
1565            created_at TEXT NOT NULL DEFAULT (datetime('now')),
1566            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1567            UNIQUE(session_id, checkpoint_number)
1568        );
1569        
1570        CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1571        
1572        -- Share links for importing shared conversations
1573        CREATE TABLE IF NOT EXISTS share_links (
1574            id TEXT PRIMARY KEY,
1575            session_id TEXT,
1576            provider TEXT NOT NULL,
1577            url TEXT NOT NULL UNIQUE,
1578            share_id TEXT NOT NULL,
1579            title TEXT,
1580            imported INTEGER DEFAULT 0,
1581            imported_at INTEGER,
1582            created_at INTEGER NOT NULL,
1583            FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1584        );
1585        
1586        CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1587        CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1588
1589        -- Harvest metadata
1590        CREATE TABLE IF NOT EXISTS harvest_metadata (
1591            key TEXT PRIMARY KEY,
1592            value TEXT NOT NULL
1593        );
1594
1595        INSERT OR REPLACE INTO harvest_metadata (key, value) 
1596        VALUES ('version', '2.1'),
1597               ('created_at', datetime('now'));
1598               
1599        -- Full-text search for messages (standalone FTS table)
1600        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1601            content_raw
1602        );
1603        
1604        -- Triggers to keep FTS index in sync with messages_v2
1605        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1606            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1607        END;
1608        
1609        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1610            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1611        END;
1612        
1613        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1614            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1615            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1616        END;
1617        "#,
1618    )?;
1619
1620    Ok(())
1621}
1622
1623fn insert_or_update_session(
1624    conn: &Connection,
1625    session: &ChatSession,
1626    provider: &str,
1627    workspace_id: Option<&str>,
1628    workspace_name: Option<&str>,
1629    provider_version: Option<&str>,
1630    schema_version: u32,
1631    file_format: &str,
1632    workspace_path: Option<&str>,
1633) -> Result<bool> {
1634    let session_id = session
1635        .session_id
1636        .clone()
1637        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1638
1639    let now = Utc::now().timestamp_millis();
1640    let session_json = serde_json::to_string(session)?;
1641
1642    // Check if session exists
1643    let existing: Option<i64> = conn
1644        .query_row(
1645            "SELECT updated_at FROM sessions WHERE id = ?",
1646            [&session_id],
1647            |row| row.get(0),
1648        )
1649        .ok();
1650
1651    let updated = existing.is_some();
1652
1653    conn.execute(
1654        r#"
1655        INSERT OR REPLACE INTO sessions 
1656        (id, provider, provider_type, workspace_id, workspace_name, title, 
1657         message_count, created_at, updated_at, harvested_at, session_json,
1658         provider_version, schema_version, file_format, workspace_path)
1659        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1660        "#,
1661        params![
1662            session_id,
1663            provider,
1664            provider,
1665            workspace_id,
1666            workspace_name,
1667            session.title(),
1668            session.request_count() as i64,
1669            session.creation_date,
1670            session.last_message_date,
1671            now,
1672            session_json,
1673            provider_version,
1674            schema_version as i64,
1675            file_format,
1676            workspace_path,
1677        ],
1678    )?;
1679
1680    // Populate enhanced message tables
1681    populate_enhanced_messages(conn, &session_id, session)?;
1682
1683    Ok(updated)
1684}
1685
1686/// Populate the enhanced messages_v2, tool_invocations, and file_changes tables
1687fn populate_enhanced_messages(
1688    conn: &Connection,
1689    session_id: &str,
1690    session: &ChatSession,
1691) -> Result<()> {
1692    // Delete existing messages for this session to avoid duplicates
1693    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1694    conn.execute(
1695        "DELETE FROM tool_invocations WHERE session_id = ?",
1696        [session_id],
1697    )?;
1698    conn.execute(
1699        "DELETE FROM file_changes WHERE session_id = ?",
1700        [session_id],
1701    )?;
1702
1703    for (idx, request) in session.requests.iter().enumerate() {
1704        let timestamp = request.timestamp;
1705        let request_id = request.request_id.as_deref();
1706        let response_id = request.response_id.as_deref();
1707        let model_id = request.model_id.as_deref();
1708        let is_canceled = request.is_canceled.unwrap_or(false);
1709
1710        // Insert user message
1711        if let Some(ref message) = request.message {
1712            let content = message.text.clone().unwrap_or_default();
1713            if !content.is_empty() {
1714                let metadata = serde_json::json!({
1715                    "variable_data": request.variable_data,
1716                });
1717
1718                conn.execute(
1719                    r#"
1720                    INSERT OR REPLACE INTO messages_v2 
1721                    (session_id, message_index, request_id, response_id, role, 
1722                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1723                    VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1724                    "#,
1725                    params![
1726                        session_id,
1727                        (idx * 2) as i64,
1728                        request_id,
1729                        response_id,
1730                        &content,
1731                        &content, // content_markdown same as raw for user messages
1732                        model_id,
1733                        timestamp,
1734                        serde_json::to_string(&metadata).ok(),
1735                    ],
1736                )?;
1737            }
1738        }
1739
1740        // Insert assistant response with tool invocations
1741        if let Some(ref response) = request.response {
1742            let (content, tool_invocations) = extract_response_content_and_tools(response);
1743
1744            if !content.is_empty() || !tool_invocations.is_empty() {
1745                let metadata = serde_json::json!({
1746                    "content_references": request.content_references,
1747                    "code_citations": request.code_citations,
1748                    "response_markdown_info": request.response_markdown_info,
1749                });
1750
1751                conn.execute(
1752                    r#"
1753                    INSERT OR REPLACE INTO messages_v2 
1754                    (session_id, message_index, request_id, response_id, role, 
1755                     content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1756                    VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1757                    "#,
1758                    params![
1759                        session_id,
1760                        (idx * 2 + 1) as i64,
1761                        request_id,
1762                        response_id,
1763                        &content,
1764                        &content,
1765                        model_id,
1766                        timestamp,
1767                        is_canceled as i64,
1768                        serde_json::to_string(&metadata).ok(),
1769                    ],
1770                )?;
1771
1772                // Get the message_id we just inserted
1773                let message_id: i64 = conn.last_insert_rowid();
1774
1775                // Insert tool invocations and file changes
1776                for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1777                    insert_tool_invocation(
1778                        conn, message_id, session_id, inv_idx, invocation, timestamp,
1779                    )?;
1780                }
1781            }
1782        }
1783    }
1784
1785    Ok(())
1786}
1787
1788/// Extract response content and tool invocations from the response JSON
1789fn extract_response_content_and_tools(
1790    response: &serde_json::Value,
1791) -> (String, Vec<serde_json::Value>) {
1792    let mut text_parts = Vec::new();
1793    let mut tool_invocations = Vec::new();
1794
1795    if let Some(items) = response.as_array() {
1796        for item in items {
1797            let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1798
1799            match kind {
1800                "toolInvocationSerialized" => {
1801                    tool_invocations.push(item.clone());
1802                }
1803                "thinking" => {
1804                    // Skip thinking blocks
1805                    continue;
1806                }
1807                _ => {
1808                    if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1809                        // Filter out empty code block markers (```\n or just ```)
1810                        let trimmed = value.trim();
1811                        if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1812                            text_parts.push(value.to_string());
1813                        }
1814                    }
1815                }
1816            }
1817        }
1818    }
1819
1820    (text_parts.join("\n\n"), tool_invocations)
1821}
1822
1823/// Insert a tool invocation and any associated file changes
1824fn insert_tool_invocation(
1825    conn: &Connection,
1826    message_id: i64,
1827    session_id: &str,
1828    inv_idx: usize,
1829    invocation: &serde_json::Value,
1830    timestamp: Option<i64>,
1831) -> Result<()> {
1832    let tool_name = invocation
1833        .get("toolId")
1834        .and_then(|t| t.as_str())
1835        .unwrap_or("unknown");
1836    let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1837    let is_complete = invocation
1838        .get("isComplete")
1839        .and_then(|c| c.as_bool())
1840        .unwrap_or(false);
1841    let is_confirmed = invocation.get("isConfirmed");
1842    let tool_data = invocation.get("toolSpecificData");
1843
1844    let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1845    let status = if is_complete { "complete" } else { "pending" };
1846    let confirmed = match is_confirmed {
1847        Some(v) => v
1848            .get("type")
1849            .and_then(|t| t.as_i64())
1850            .map(|t| t > 0)
1851            .unwrap_or(false),
1852        None => false,
1853    };
1854
1855    conn.execute(
1856        r#"
1857        INSERT INTO tool_invocations 
1858        (message_id, session_id, tool_name, tool_call_id, invocation_index, 
1859         input_json, status, is_confirmed, timestamp)
1860        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1861        "#,
1862        params![
1863            message_id,
1864            session_id,
1865            tool_name,
1866            tool_call_id,
1867            inv_idx as i64,
1868            input_json,
1869            status,
1870            confirmed as i64,
1871            timestamp,
1872        ],
1873    )?;
1874
1875    let tool_invocation_id = conn.last_insert_rowid();
1876
1877    // Extract and insert file changes based on tool type
1878    if let Some(data) = tool_data {
1879        insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1880    }
1881
1882    Ok(())
1883}
1884
1885/// Insert file changes from tool-specific data
1886fn insert_file_changes(
1887    conn: &Connection,
1888    tool_invocation_id: i64,
1889    session_id: &str,
1890    tool_data: &serde_json::Value,
1891    timestamp: Option<i64>,
1892) -> Result<()> {
1893    let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1894
1895    match kind {
1896        "terminal" => {
1897            // Terminal command execution
1898            if let Some(cmd_line) = tool_data.get("commandLine") {
1899                let original = cmd_line.get("original").and_then(|o| o.as_str());
1900                let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1901                let output = tool_data
1902                    .get("terminalCommandOutput")
1903                    .map(|o| serde_json::to_string(o).unwrap_or_default());
1904
1905                conn.execute(
1906                    r#"
1907                    INSERT INTO file_changes 
1908                    (tool_invocation_id, session_id, file_path, change_type, 
1909                     old_content, new_content, diff_unified, timestamp)
1910                    VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1911                    "#,
1912                    params![
1913                        tool_invocation_id,
1914                        session_id,
1915                        original,
1916                        edited.or(original),
1917                        output,
1918                        timestamp,
1919                    ],
1920                )?;
1921            }
1922        }
1923        "replaceFile" | "editFile" => {
1924            // File edit with old/new strings
1925            let file_path = tool_data
1926                .get("uri")
1927                .or_else(|| tool_data.get("filePath"))
1928                .and_then(|p| p.as_str())
1929                .unwrap_or("[unknown]");
1930            let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1931            let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1932
1933            // Generate unified diff if we have both old and new content
1934            let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1935                Some(generate_unified_diff(old, new, file_path))
1936            } else {
1937                None
1938            };
1939
1940            conn.execute(
1941                r#"
1942                INSERT INTO file_changes 
1943                (tool_invocation_id, session_id, file_path, change_type, 
1944                 old_content, new_content, diff_unified, timestamp)
1945                VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1946                "#,
1947                params![
1948                    tool_invocation_id,
1949                    session_id,
1950                    file_path,
1951                    old_string,
1952                    new_string,
1953                    diff,
1954                    timestamp,
1955                ],
1956            )?;
1957        }
1958        "createFile" => {
1959            let file_path = tool_data
1960                .get("uri")
1961                .or_else(|| tool_data.get("filePath"))
1962                .and_then(|p| p.as_str())
1963                .unwrap_or("[unknown]");
1964            let content = tool_data.get("content").and_then(|c| c.as_str());
1965
1966            conn.execute(
1967                r#"
1968                INSERT INTO file_changes 
1969                (tool_invocation_id, session_id, file_path, change_type, 
1970                 new_content, timestamp)
1971                VALUES (?, ?, ?, 'create', ?, ?)
1972                "#,
1973                params![
1974                    tool_invocation_id,
1975                    session_id,
1976                    file_path,
1977                    content,
1978                    timestamp,
1979                ],
1980            )?;
1981        }
1982        "readFile" => {
1983            let file_path = tool_data
1984                .get("uri")
1985                .or_else(|| tool_data.get("filePath"))
1986                .and_then(|p| p.as_str())
1987                .unwrap_or("[unknown]");
1988
1989            conn.execute(
1990                r#"
1991                INSERT INTO file_changes 
1992                (tool_invocation_id, session_id, file_path, change_type, timestamp)
1993                VALUES (?, ?, ?, 'read', ?)
1994                "#,
1995                params![tool_invocation_id, session_id, file_path, timestamp,],
1996            )?;
1997        }
1998        _ => {
1999            // Other tool types - store as generic change
2000            if !kind.is_empty() {
2001                let data_json = serde_json::to_string(tool_data).ok();
2002                conn.execute(
2003                    r#"
2004                    INSERT INTO file_changes 
2005                    (tool_invocation_id, session_id, file_path, change_type, 
2006                     diff_unified, timestamp)
2007                    VALUES (?, ?, ?, ?, ?, ?)
2008                    "#,
2009                    params![
2010                        tool_invocation_id,
2011                        session_id,
2012                        format!("[{}]", kind),
2013                        kind,
2014                        data_json,
2015                        timestamp,
2016                    ],
2017                )?;
2018            }
2019        }
2020    }
2021
2022    Ok(())
2023}
2024
2025/// Generate a simple unified diff between two strings
2026fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
2027    let old_lines: Vec<&str> = old.lines().collect();
2028    let new_lines: Vec<&str> = new.lines().collect();
2029
2030    let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
2031
2032    // Simple line-by-line diff (not a full Myers diff, but good enough for storage)
2033    let max_lines = old_lines.len().max(new_lines.len());
2034    let mut in_hunk = false;
2035    let mut hunk_start = 0;
2036    let mut hunk_lines = Vec::new();
2037
2038    for i in 0..max_lines {
2039        let old_line = old_lines.get(i).copied();
2040        let new_line = new_lines.get(i).copied();
2041
2042        match (old_line, new_line) {
2043            (Some(o), Some(n)) if o == n => {
2044                if in_hunk {
2045                    hunk_lines.push(format!(" {}", o));
2046                }
2047            }
2048            (Some(o), Some(n)) => {
2049                if !in_hunk {
2050                    in_hunk = true;
2051                    hunk_start = i + 1;
2052                }
2053                hunk_lines.push(format!("-{}", o));
2054                hunk_lines.push(format!("+{}", n));
2055            }
2056            (Some(o), None) => {
2057                if !in_hunk {
2058                    in_hunk = true;
2059                    hunk_start = i + 1;
2060                }
2061                hunk_lines.push(format!("-{}", o));
2062            }
2063            (None, Some(n)) => {
2064                if !in_hunk {
2065                    in_hunk = true;
2066                    hunk_start = i + 1;
2067                }
2068                hunk_lines.push(format!("+{}", n));
2069            }
2070            (None, None) => break,
2071        }
2072    }
2073
2074    if !hunk_lines.is_empty() {
2075        diff.push_str(&format!(
2076            "@@ -{},{} +{},{} @@\n",
2077            hunk_start,
2078            old_lines.len(),
2079            hunk_start,
2080            new_lines.len()
2081        ));
2082        for line in hunk_lines {
2083            diff.push_str(&line);
2084            diff.push('\n');
2085        }
2086    }
2087
2088    diff
2089}
2090
2091/// Harvest sessions from web-based cloud providers (ChatGPT, Claude, etc.)
2092fn harvest_web_providers(
2093    conn: &Connection,
2094    stats: &mut HarvestStats,
2095    include_providers: &[String],
2096    exclude_providers: &[String],
2097) -> Result<()> {
2098    use crate::browser::extract_provider_cookies;
2099
2100    println!("\n{} Harvesting from web providers...", "[*]".blue());
2101
2102    // Define web providers to harvest from
2103    let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2104        ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2105        ("Claude", "claude", "sessionKey"),
2106    ];
2107
2108    let mut web_sessions_harvested = 0;
2109
2110    for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2111        // Check provider filters
2112        if !include_providers.is_empty()
2113            && !include_providers
2114                .iter()
2115                .any(|p| p.eq_ignore_ascii_case(provider_key))
2116        {
2117            continue;
2118        }
2119        if exclude_providers
2120            .iter()
2121            .any(|p| p.eq_ignore_ascii_case(provider_key))
2122        {
2123            continue;
2124        }
2125
2126        print!("   {} Checking {} ... ", "[-]".yellow(), display_name);
2127
2128        // Try to extract cookies for this provider
2129        if let Some(creds) = extract_provider_cookies(provider_key) {
2130            if let Some(session_token) = &creds.session_token {
2131                println!("{}", "authenticated".green());
2132
2133                // Create provider and fetch conversations
2134                let result = match *provider_key {
2135                    "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2136                    "claude" => harvest_claude_sessions(conn, session_token, stats),
2137                    _ => Ok(0),
2138                };
2139
2140                match result {
2141                    Ok(count) => {
2142                        if count > 0 {
2143                            println!(
2144                                "      {} Harvested {} sessions from {}",
2145                                "[+]".green(),
2146                                count.to_string().cyan(),
2147                                display_name
2148                            );
2149                            web_sessions_harvested += count;
2150                        }
2151                    }
2152                    Err(e) => {
2153                        println!(
2154                            "      {} Failed to harvest {}: {:?}",
2155                            "[!]".red(),
2156                            display_name,
2157                            e
2158                        );
2159                    }
2160                }
2161            } else {
2162                println!("{}", "no session token".yellow());
2163            }
2164        } else {
2165            println!("{}", "not authenticated".yellow());
2166        }
2167    }
2168
2169    if web_sessions_harvested > 0 {
2170        println!(
2171            "   {} Total web sessions harvested: {}",
2172            "[+]".green(),
2173            web_sessions_harvested.to_string().cyan()
2174        );
2175    }
2176
2177    Ok(())
2178}
2179
2180/// Harvest sessions from ChatGPT web interface
2181fn harvest_chatgpt_sessions(
2182    conn: &Connection,
2183    session_token: &str,
2184    stats: &mut HarvestStats,
2185) -> Result<usize> {
2186    use crate::providers::cloud::chatgpt::ChatGPTProvider;
2187    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2188
2189    let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2190
2191    // List all conversations
2192    let options = FetchOptions {
2193        limit: Some(100),
2194        include_archived: false,
2195        after: None,
2196        before: None,
2197        session_token: Some(session_token.to_string()),
2198    };
2199
2200    let conversations = provider
2201        .list_conversations(&options)
2202        .context("Failed to list ChatGPT conversations")?;
2203
2204    let mut harvested = 0;
2205
2206    for conv_summary in conversations {
2207        // Check if we already have this conversation
2208        let exists: bool = conn
2209            .query_row(
2210                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2211                params![&conv_summary.id],
2212                |row| row.get(0),
2213            )
2214            .unwrap_or(false);
2215
2216        if exists {
2217            // Could add update logic here later
2218            continue;
2219        }
2220
2221        // Fetch full conversation
2222        match provider.fetch_conversation(&conv_summary.id) {
2223            Ok(conv) => {
2224                // Insert into database using existing function
2225                if let Err(e) =
2226                    insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2227                {
2228                    eprintln!("Failed to insert ChatGPT session: {}", e);
2229                    continue;
2230                }
2231                harvested += 1;
2232                stats.sessions_added += 1;
2233            }
2234            Err(e) => {
2235                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2236            }
2237        }
2238
2239        // Small delay to avoid rate limiting
2240        std::thread::sleep(std::time::Duration::from_millis(100));
2241    }
2242
2243    Ok(harvested)
2244}
2245
2246/// Harvest sessions from Claude web interface
2247fn harvest_claude_sessions(
2248    conn: &Connection,
2249    session_token: &str,
2250    stats: &mut HarvestStats,
2251) -> Result<usize> {
2252    use crate::providers::cloud::anthropic::AnthropicProvider;
2253    use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2254
2255    let provider = AnthropicProvider::with_session_token(session_token.to_string());
2256
2257    // List all conversations
2258    let options = FetchOptions {
2259        limit: Some(100),
2260        include_archived: false,
2261        after: None,
2262        before: None,
2263        session_token: Some(session_token.to_string()),
2264    };
2265
2266    let conversations = provider
2267        .list_conversations(&options)
2268        .context("Failed to list Claude conversations")?;
2269
2270    let mut harvested = 0;
2271
2272    for conv_summary in conversations {
2273        // Check if we already have this conversation
2274        let exists: bool = conn
2275            .query_row(
2276                "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2277                params![&conv_summary.id],
2278                |row| row.get(0),
2279            )
2280            .unwrap_or(false);
2281
2282        if exists {
2283            continue;
2284        }
2285
2286        // Fetch full conversation
2287        match provider.fetch_conversation(&conv_summary.id) {
2288            Ok(conv) => {
2289                if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2290                {
2291                    eprintln!("Failed to insert Claude session: {}", e);
2292                    continue;
2293                }
2294                harvested += 1;
2295                stats.sessions_added += 1;
2296            }
2297            Err(e) => {
2298                eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2299            }
2300        }
2301
2302        std::thread::sleep(std::time::Duration::from_millis(100));
2303    }
2304
2305    Ok(harvested)
2306}
2307
2308/// Insert a CloudConversation into the harvest database
2309fn insert_cloud_conversation_to_harvest_db(
2310    conn: &Connection,
2311    conv: &crate::providers::cloud::common::CloudConversation,
2312    provider: &str,
2313    workspace_name: Option<&str>,
2314) -> Result<()> {
2315    let now = Utc::now().timestamp_millis();
2316    let created_at = conv.created_at.timestamp_millis();
2317    let updated_at = conv
2318        .updated_at
2319        .map(|dt| dt.timestamp_millis())
2320        .unwrap_or(created_at);
2321
2322    // Build a session-like JSON structure for compatibility
2323    let session_json = serde_json::json!({
2324        "id": conv.id,
2325        "title": conv.title,
2326        "model": conv.model,
2327        "created_at": conv.created_at.to_rfc3339(),
2328        "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2329        "messages": conv.messages.iter().map(|m| {
2330            serde_json::json!({
2331                "id": m.id,
2332                "role": m.role,
2333                "content": m.content,
2334                "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2335                "model": m.model,
2336            })
2337        }).collect::<Vec<_>>(),
2338    });
2339
2340    conn.execute(
2341        r#"
2342        INSERT OR REPLACE INTO sessions 
2343        (id, provider, provider_type, workspace_id, workspace_name, title, 
2344         message_count, created_at, updated_at, harvested_at, session_json)
2345        VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2346        "#,
2347        params![
2348            conv.id,
2349            provider,
2350            provider,
2351            workspace_name,
2352            conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2353            conv.messages.len() as i64,
2354            created_at,
2355            updated_at,
2356            now,
2357            session_json.to_string(),
2358        ],
2359    )?;
2360
2361    // Also populate messages_v2 table for detailed message storage
2362    populate_cloud_messages(conn, &conv.id, conv)?;
2363
2364    Ok(())
2365}
2366
2367/// Populate messages_v2 table from a CloudConversation
2368fn populate_cloud_messages(
2369    conn: &Connection,
2370    session_id: &str,
2371    conv: &crate::providers::cloud::common::CloudConversation,
2372) -> Result<()> {
2373    // Delete existing messages for this session to avoid duplicates
2374    conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2375
2376    for (idx, message) in conv.messages.iter().enumerate() {
2377        let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2378        let role = match message.role.as_str() {
2379            "user" | "human" => "user",
2380            "assistant" => "assistant",
2381            "system" => "system",
2382            other => other,
2383        };
2384
2385        conn.execute(
2386            r#"
2387            INSERT INTO messages_v2 
2388            (session_id, message_index, request_id, response_id, role, 
2389             content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2390            VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2391            "#,
2392            params![
2393                session_id,
2394                idx as i64,
2395                message.id,
2396                role,
2397                &message.content,
2398                &message.content,
2399                message.model.as_deref(),
2400                timestamp,
2401            ],
2402        )?;
2403    }
2404
2405    Ok(())
2406}
2407
2408fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2409    conn.execute(
2410        "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2411        [],
2412    )?;
2413    Ok(())
2414}
2415
2416fn init_git_tracking(db_path: &Path) -> Result<()> {
2417    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2418    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2419
2420    println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2421
2422    // Check if already a git repo
2423    if !db_dir.join(".git").exists() {
2424        let output = Command::new("git")
2425            .current_dir(db_dir)
2426            .args(["init"])
2427            .output()
2428            .context("Failed to run git init")?;
2429
2430        if !output.status.success() {
2431            anyhow::bail!(
2432                "Git init failed: {}",
2433                String::from_utf8_lossy(&output.stderr)
2434            );
2435        }
2436        println!("{} Initialized git repository", "[+]".green());
2437    } else {
2438        println!("{} Git repository already exists", "[i]".blue());
2439    }
2440
2441    // Create .gitignore if needed
2442    let gitignore_path = db_dir.join(".gitignore");
2443    if !gitignore_path.exists() {
2444        fs::write(
2445            &gitignore_path,
2446            "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2447        )?;
2448        println!("{} Created .gitignore", "[+]".green());
2449    }
2450
2451    // Add database to git
2452    let output = Command::new("git")
2453        .current_dir(db_dir)
2454        .args(["add", db_name])
2455        .output()?;
2456
2457    if output.status.success() {
2458        println!("{} Added {} to git", "[+]".green(), db_name);
2459    }
2460
2461    // Initial commit if no commits yet
2462    let output = Command::new("git")
2463        .current_dir(db_dir)
2464        .args(["rev-parse", "HEAD"])
2465        .output()?;
2466
2467    if !output.status.success() {
2468        let output = Command::new("git")
2469            .current_dir(db_dir)
2470            .args(["commit", "-m", "Initialize harvest database"])
2471            .output()?;
2472
2473        if output.status.success() {
2474            println!("{} Created initial commit", "[+]".green());
2475        }
2476    }
2477
2478    println!("\n{} Git tracking enabled!", "[+]".green().bold());
2479    println!("   Run 'csm harvest git commit -m \"message\"' to save changes");
2480    println!("   Run 'csm harvest git log' to view history");
2481
2482    Ok(())
2483}
2484
2485fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2486    let db_dir = db_path.parent().unwrap_or(Path::new("."));
2487    let db_name = db_path.file_name().unwrap().to_str().unwrap();
2488
2489    // Stage the database
2490    let output = Command::new("git")
2491        .current_dir(db_dir)
2492        .args(["add", db_name])
2493        .output()
2494        .context("Failed to stage database")?;
2495
2496    if !output.status.success() {
2497        anyhow::bail!(
2498            "Git add failed: {}",
2499            String::from_utf8_lossy(&output.stderr)
2500        );
2501    }
2502
2503    // Commit
2504    let output = Command::new("git")
2505        .current_dir(db_dir)
2506        .args(["commit", "-m", message])
2507        .output()
2508        .context("Failed to commit")?;
2509
2510    if !output.status.success() {
2511        let stderr = String::from_utf8_lossy(&output.stderr);
2512        if stderr.contains("nothing to commit") {
2513            println!("{} Nothing to commit", "[i]".blue());
2514            return Ok(());
2515        }
2516        anyhow::bail!("Git commit failed: {}", stderr);
2517    }
2518
2519    // Get commit hash
2520    let output = Command::new("git")
2521        .current_dir(db_dir)
2522        .args(["rev-parse", "--short", "HEAD"])
2523        .output()?;
2524
2525    let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2526    println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2527
2528    Ok(())
2529}
2530
2531// ============================================================================
2532// Share Link Commands
2533// ============================================================================
2534
2535/// Import a chat session from a share link URL
2536pub fn harvest_share(
2537    db_path: Option<&str>,
2538    url: &str,
2539    name: Option<&str>,
2540    _workspace: Option<&str>,
2541) -> Result<()> {
2542    let db_path = get_db_path(db_path)?;
2543
2544    // Parse the share link
2545    let share_info = match ShareLinkParser::parse(url) {
2546        Some(info) => info,
2547        None => {
2548            println!("{} Unrecognized share link format", "[!]".yellow());
2549            println!("   Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2550            println!("   Example URLs:");
2551            println!("   - https://chat.openai.com/share/abc123...");
2552            println!("   - https://claude.ai/share/xyz789...");
2553            anyhow::bail!("Could not parse share link URL");
2554        }
2555    };
2556
2557    println!("{}", "=".repeat(60).cyan());
2558    println!("{}", " Share Link Import ".bold().cyan());
2559    println!("{}", "=".repeat(60).cyan());
2560    println!();
2561
2562    println!(
2563        "{} Detected provider: {}",
2564        "[i]".blue(),
2565        share_info.provider.bold()
2566    );
2567    println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2568
2569    // Open or create the database with new schema
2570    let db = ChatDatabase::open(&db_path)?;
2571
2572    // Check if already imported
2573    let conn = db.connection();
2574    let existing: Option<(String, i64)> = conn
2575        .query_row(
2576            "SELECT id, imported FROM share_links WHERE url = ?",
2577            [url],
2578            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2579        )
2580        .ok();
2581
2582    if let Some((id, imported)) = existing {
2583        println!();
2584        println!(
2585            "{} This share link has already been registered (ID: {})",
2586            "[!]".yellow(),
2587            id
2588        );
2589
2590        let status = if imported == 1 { "imported" } else { "pending" };
2591        println!("{} Current status: {}", "[i]".blue(), status);
2592
2593        if imported == 1 {
2594            println!(
2595                "{} Session already imported - no action needed",
2596                "[+]".green()
2597            );
2598            return Ok(());
2599        }
2600    } else {
2601        // Insert the share link as pending
2602        let link_id = uuid::Uuid::new_v4().to_string();
2603        let now = chrono::Utc::now().timestamp();
2604        conn.execute(
2605            "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2606            params![link_id, url, share_info.provider, share_info.share_id, name, now],
2607        )?;
2608
2609        println!(
2610            "{} Registered share link (ID: {})",
2611            "[+]".green(),
2612            &link_id[..8]
2613        );
2614    }
2615
2616    println!();
2617    println!("{}", "-".repeat(60).dimmed());
2618    println!();
2619
2620    // TODO: Implement actual content fetching
2621    // For now, we just register the share link and mark it as pending
2622    // Actual fetching would require:
2623    // 1. Browser cookie extraction (already implemented in browser.rs)
2624    // 2. HTTP request with auth cookies
2625    // 3. HTML/JSON parsing of the share page
2626    // 4. Conversion to our universal format
2627
2628    println!("{} Share link registered as pending", "[i]".blue());
2629    println!();
2630    println!("{}", "Note:".bold().yellow());
2631    println!("   Automatic content fetching from share links is not yet implemented.");
2632    println!("   For now, you can:");
2633    println!("   1. Open the share link in your browser");
2634    println!("   2. Export the conversation manually");
2635    println!("   3. Import with: csm import <file>");
2636    println!();
2637    println!("   Or use 'csm harvest shares' to view pending links.");
2638
2639    Ok(())
2640}
2641
2642/// List share links in the harvest database
2643pub fn harvest_shares(
2644    db_path: Option<&str>,
2645    status_filter: Option<&str>,
2646    limit: usize,
2647) -> Result<()> {
2648    let db_path = get_db_path(db_path)?;
2649
2650    if !db_path.exists() {
2651        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2652    }
2653
2654    let db = ChatDatabase::open(&db_path)?;
2655    let conn = db.connection();
2656
2657    println!("{}", "=".repeat(70).cyan());
2658    println!("{}", " Share Links ".bold().cyan());
2659    println!("{}", "=".repeat(70).cyan());
2660    println!();
2661
2662    let query = match status_filter {
2663        Some("pending") => format!(
2664            "SELECT id, url, provider, share_id, title, imported, created_at 
2665             FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2666            limit
2667        ),
2668        Some("imported") => format!(
2669            "SELECT id, url, provider, share_id, title, imported, created_at 
2670             FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2671            limit
2672        ),
2673        Some(_) | None => format!(
2674            "SELECT id, url, provider, share_id, title, imported, created_at 
2675             FROM share_links ORDER BY created_at DESC LIMIT {}",
2676            limit
2677        ),
2678    };
2679
2680    let mut stmt = conn.prepare(&query)?;
2681
2682    let rows: Vec<_> = stmt
2683        .query_map([], |row| {
2684            Ok((
2685                row.get::<_, String>(0)?,
2686                row.get::<_, String>(1)?,
2687                row.get::<_, String>(2)?,
2688                row.get::<_, Option<String>>(3)?,
2689                row.get::<_, Option<String>>(4)?,
2690                row.get::<_, i64>(5)?,
2691                row.get::<_, i64>(6)?,
2692            ))
2693        })?
2694        .collect::<Result<Vec<_>, _>>()?;
2695
2696    if rows.is_empty() {
2697        if let Some(status) = status_filter {
2698            println!("{} No share links with status '{}'", "[i]".blue(), status);
2699        } else {
2700            println!("{} No share links found", "[i]".blue());
2701        }
2702        println!("   Use 'csm harvest share <url>' to add a share link");
2703        return Ok(());
2704    }
2705
2706    // Count by status
2707    let pending_count: i64 = conn
2708        .query_row(
2709            "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2710            [],
2711            |row| row.get(0),
2712        )
2713        .unwrap_or(0);
2714
2715    let imported_count: i64 = conn
2716        .query_row(
2717            "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2718            [],
2719            |row| row.get(0),
2720        )
2721        .unwrap_or(0);
2722
2723    println!(
2724        "{} {} pending, {} imported",
2725        "[i]".blue(),
2726        pending_count.to_string().yellow(),
2727        imported_count.to_string().green()
2728    );
2729    println!();
2730
2731    for (id, url, provider, _share_id, title, imported, created_at) in rows {
2732        let status = if imported == 1 { "imported" } else { "pending" };
2733        let status_colored = if imported == 1 {
2734            status.green()
2735        } else {
2736            status.yellow()
2737        };
2738
2739        println!(
2740            "{} [{}] {} - {}",
2741            format!("#{}", &id[..8]).dimmed(),
2742            status_colored,
2743            provider.bold(),
2744            title.as_deref().unwrap_or("(untitled)")
2745        );
2746
2747        // Truncate URL for display
2748        let display_url = if url.len() > 60 {
2749            format!("{}...", &url[..57])
2750        } else {
2751            url.clone()
2752        };
2753        println!("   {} {}", "URL:".dimmed(), display_url.dimmed());
2754
2755        // Format timestamp
2756        let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2757            .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2758            .unwrap_or_else(|| created_at.to_string());
2759        println!("   {} {}", "Added:".dimmed(), timestamp.dimmed());
2760        println!();
2761    }
2762
2763    Ok(())
2764}
2765
2766// ============================================================================
2767// Checkpoint Commands
2768// ============================================================================
2769
2770/// Create a checkpoint (version snapshot) of a session
2771pub fn harvest_checkpoint(
2772    db_path: Option<&str>,
2773    session_id: &str,
2774    message: Option<&str>,
2775) -> Result<()> {
2776    let db_path = get_db_path(db_path)?;
2777
2778    if !db_path.exists() {
2779        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2780    }
2781
2782    let db = ChatDatabase::open(&db_path)?;
2783    let conn = db.connection();
2784
2785    // Find the session
2786    let session: Option<(i64, String, i64)> = conn
2787        .query_row(
2788            "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2789            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2790            |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2791        )
2792        .ok();
2793
2794    let (internal_id, actual_session_id, msg_count) = match session {
2795        Some(s) => s,
2796        None => {
2797            println!("{} Session not found: {}", "[!]".red(), session_id);
2798            println!("   Use 'csm harvest list' to see available sessions");
2799            anyhow::bail!("Session not found");
2800        }
2801    };
2802
2803    // Get current checkpoint number
2804    let checkpoint_num: i64 = conn.query_row(
2805        "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2806        [internal_id],
2807        |row| row.get(0),
2808    )?;
2809
2810    // Calculate content hash (simple hash of message count + session_id for now)
2811    let content_hash = format!(
2812        "{:x}",
2813        md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2814    );
2815
2816    // Get message snapshot
2817    let messages: Vec<String> = {
2818        let mut stmt = conn
2819            .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2820        let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2821        rows.filter_map(|r| r.ok()).collect()
2822    };
2823
2824    let snapshot = serde_json::json!({
2825        "message_count": msg_count,
2826        "messages": messages,
2827    });
2828
2829    let default_message = format!("Checkpoint {}", checkpoint_num);
2830    let message_text = message.unwrap_or(&default_message);
2831
2832    // Create checkpoint
2833    conn.execute(
2834        "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2835         VALUES (?, ?, ?, ?, ?, ?)",
2836        params![
2837            internal_id,
2838            checkpoint_num,
2839            message_text,
2840            msg_count,
2841            content_hash,
2842            snapshot.to_string()
2843        ],
2844    )?;
2845
2846    println!("{}", "=".repeat(60).cyan());
2847    println!("{}", " Checkpoint Created ".bold().cyan());
2848    println!("{}", "=".repeat(60).cyan());
2849    println!();
2850    println!("{} Session: {}", "[+]".green(), actual_session_id);
2851    println!(
2852        "{} Checkpoint #{}: {}",
2853        "[+]".green(),
2854        checkpoint_num,
2855        message_text
2856    );
2857    println!("{} Messages: {}", "[i]".blue(), msg_count);
2858    println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2859    println!();
2860    println!(
2861        "   Use 'csm harvest checkpoints {}' to view history",
2862        session_id
2863    );
2864
2865    Ok(())
2866}
2867
2868/// List checkpoints for a session
2869pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2870    let db_path = get_db_path(db_path)?;
2871
2872    if !db_path.exists() {
2873        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2874    }
2875
2876    let db = ChatDatabase::open(&db_path)?;
2877    let conn = db.connection();
2878
2879    // Find the session
2880    let session: Option<(i64, String, String)> = conn
2881        .query_row(
2882            "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2883            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2884            |row| {
2885                Ok((
2886                    row.get(0)?,
2887                    row.get(1)?,
2888                    row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2889                ))
2890            },
2891        )
2892        .ok();
2893
2894    let (internal_id, actual_session_id, name) = match session {
2895        Some(s) => s,
2896        None => {
2897            println!("{} Session not found: {}", "[!]".red(), session_id);
2898            anyhow::bail!("Session not found");
2899        }
2900    };
2901
2902    println!("{}", "=".repeat(70).cyan());
2903    println!("{}", " Session Checkpoints ".bold().cyan());
2904    println!("{}", "=".repeat(70).cyan());
2905    println!();
2906    println!(
2907        "{} Session: {} {}",
2908        "[i]".blue(),
2909        actual_session_id,
2910        if !name.is_empty() {
2911            format!("({})", name)
2912        } else {
2913            String::new()
2914        }
2915    );
2916    println!();
2917
2918    let mut stmt = conn.prepare(
2919        "SELECT checkpoint_number, message, message_count, content_hash, created_at
2920         FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2921    )?;
2922
2923    let checkpoints: Vec<_> = stmt
2924        .query_map([internal_id], |row| {
2925            Ok((
2926                row.get::<_, i64>(0)?,
2927                row.get::<_, String>(1)?,
2928                row.get::<_, i64>(2)?,
2929                row.get::<_, String>(3)?,
2930                row.get::<_, String>(4)?,
2931            ))
2932        })?
2933        .collect::<Result<Vec<_>, _>>()?;
2934
2935    if checkpoints.is_empty() {
2936        println!("{} No checkpoints found for this session", "[i]".blue());
2937        println!(
2938            "   Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2939            session_id
2940        );
2941        return Ok(());
2942    }
2943
2944    println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2945    println!();
2946
2947    for (num, msg, msg_count, hash, created_at) in checkpoints {
2948        println!("  {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2949        println!(
2950            "       {} messages | {} | {}",
2951            msg_count,
2952            &hash[..12],
2953            created_at.dimmed()
2954        );
2955    }
2956
2957    println!();
2958    println!(
2959        "   Use 'csm harvest restore {} <checkpoint>' to restore",
2960        session_id
2961    );
2962
2963    Ok(())
2964}
2965
2966/// Restore a session to a previous checkpoint
2967pub fn harvest_restore_checkpoint(
2968    db_path: Option<&str>,
2969    session_id: &str,
2970    checkpoint_number: i64,
2971) -> Result<()> {
2972    let db_path = get_db_path(db_path)?;
2973
2974    if !db_path.exists() {
2975        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2976    }
2977
2978    let db = ChatDatabase::open(&db_path)?;
2979    let conn = db.connection();
2980
2981    // Find the session
2982    let session: Option<(i64, String)> = conn
2983        .query_row(
2984            "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
2985            params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2986            |row| Ok((row.get(0)?, row.get(1)?)),
2987        )
2988        .ok();
2989
2990    let (internal_id, actual_session_id) = match session {
2991        Some(s) => s,
2992        None => {
2993            println!("{} Session not found: {}", "[!]".red(), session_id);
2994            anyhow::bail!("Session not found");
2995        }
2996    };
2997
2998    // Find the checkpoint
2999    let checkpoint: Option<(String, i64)> = conn
3000        .query_row(
3001            "SELECT snapshot, message_count FROM checkpoints 
3002         WHERE session_id = ? AND checkpoint_number = ?",
3003            params![internal_id, checkpoint_number],
3004            |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
3005        )
3006        .ok();
3007
3008    let (snapshot_json, original_msg_count) = match checkpoint {
3009        Some(c) => c,
3010        None => {
3011            println!(
3012                "{} Checkpoint #{} not found for session {}",
3013                "[!]".red(),
3014                checkpoint_number,
3015                session_id
3016            );
3017            println!(
3018                "   Use 'csm harvest checkpoints {}' to see available checkpoints",
3019                session_id
3020            );
3021            anyhow::bail!("Checkpoint not found");
3022        }
3023    };
3024
3025    // Parse snapshot
3026    let snapshot: serde_json::Value =
3027        serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
3028
3029    let messages = snapshot["messages"]
3030        .as_array()
3031        .context("Invalid snapshot format")?;
3032
3033    println!("{}", "=".repeat(60).cyan());
3034    println!("{}", " Restore Checkpoint ".bold().yellow());
3035    println!("{}", "=".repeat(60).cyan());
3036    println!();
3037    println!("{} Session: {}", "[i]".blue(), actual_session_id);
3038    println!(
3039        "{} Restoring to checkpoint #{}",
3040        "[!]".yellow(),
3041        checkpoint_number
3042    );
3043    println!(
3044        "{} Messages to restore: {}",
3045        "[i]".blue(),
3046        original_msg_count
3047    );
3048    println!();
3049
3050    // Delete current messages
3051    let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3052
3053    println!("{} Removed {} current messages", "[-]".red(), deleted);
3054
3055    // Restore messages from snapshot
3056    for (idx, msg) in messages.iter().enumerate() {
3057        if let Some(content) = msg.as_str() {
3058            conn.execute(
3059                "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3060                params![internal_id, idx as i64, content],
3061            )?;
3062        }
3063    }
3064
3065    // Update session message count
3066    conn.execute(
3067        "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3068        params![original_msg_count, internal_id],
3069    )?;
3070
3071    println!(
3072        "{} Restored {} messages from checkpoint",
3073        "[+]".green(),
3074        messages.len()
3075    );
3076    println!();
3077    println!(
3078        "{} Session restored to checkpoint #{}",
3079        "[+]".green().bold(),
3080        checkpoint_number
3081    );
3082
3083    Ok(())
3084}
3085
3086// ============================================================================
3087// Search Commands
3088// ============================================================================
3089
3090/// Rebuild the FTS index from messages_v2 table
3091pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3092    let db_path = get_db_path(db_path)?;
3093
3094    if !db_path.exists() {
3095        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3096    }
3097
3098    let conn = Connection::open(&db_path)?;
3099
3100    println!("{}", "=".repeat(70).cyan());
3101    println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3102    println!("{}", "=".repeat(70).cyan());
3103    println!();
3104
3105    // Drop existing FTS table and triggers
3106    println!("{} Dropping old FTS index...", "[*]".blue());
3107    conn.execute_batch(
3108        r#"
3109        DROP TRIGGER IF EXISTS messages_v2_ai;
3110        DROP TRIGGER IF EXISTS messages_v2_ad;
3111        DROP TRIGGER IF EXISTS messages_v2_au;
3112        DROP TABLE IF EXISTS messages_fts;
3113        "#,
3114    )?;
3115
3116    // Create new FTS table with content_raw and session_id for filtering
3117    println!("{} Creating new FTS index...", "[*]".blue());
3118    conn.execute_batch(
3119        r#"
3120        CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3121            content_raw,
3122            content='messages_v2',
3123            content_rowid='id'
3124        );
3125        
3126        CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3127            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3128        END;
3129        
3130        CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3131            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3132        END;
3133        
3134        CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3135            INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3136            INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3137        END;
3138        "#,
3139    )?;
3140
3141    // Count total messages
3142    let total_messages: i64 =
3143        conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3144
3145    // Populate FTS from existing messages
3146    println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3147
3148    conn.execute(
3149        "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3150        [],
3151    )?;
3152
3153    // Verify the index
3154    let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3155
3156    println!();
3157    println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3158    println!("   {} messages indexed", indexed);
3159
3160    Ok(())
3161}
3162
3163/// Full-text search across all sessions
3164pub fn harvest_search(
3165    db_path: Option<&str>,
3166    query: &str,
3167    provider_filter: Option<&str>,
3168    limit: usize,
3169) -> Result<()> {
3170    let db_path = get_db_path(db_path)?;
3171
3172    if !db_path.exists() {
3173        anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3174    }
3175
3176    let db = ChatDatabase::open(&db_path)?;
3177    let conn = db.connection();
3178
3179    println!("{}", "=".repeat(70).cyan());
3180    println!("{} Search: {}", "[?]".bold(), query.bold());
3181    println!("{}", "=".repeat(70).cyan());
3182    println!();
3183
3184    // Results: (session_id, provider, title, snippet)
3185    let results: Vec<(String, String, String, String)> = {
3186        // Check if FTS table exists
3187        let fts_exists: bool = conn
3188            .query_row(
3189                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3190                [],
3191                |_| Ok(true),
3192            )
3193            .unwrap_or(false);
3194
3195        if fts_exists {
3196            // Use FTS5 search with built-in snippet() for fast highlighting
3197            // and rank for relevance ordering, deduplicated per session
3198            let sql = format!(
3199                "SELECT s.id, s.provider, s.title,
3200                        snippet(messages_fts, 0, '>>>', '<<<', '...', 32) as snip
3201                 FROM messages_fts fts
3202                 JOIN messages_v2 m ON m.id = fts.rowid
3203                 JOIN sessions s ON m.session_id = s.id
3204                 WHERE messages_fts MATCH ?
3205                 {}
3206                 GROUP BY s.id
3207                 ORDER BY rank
3208                 LIMIT {}",
3209                if provider_filter.is_some() {
3210                    "AND s.provider = ?"
3211                } else {
3212                    ""
3213                },
3214                limit
3215            );
3216
3217            let mut stmt = conn.prepare(&sql)?;
3218
3219            if let Some(provider) = provider_filter {
3220                stmt.query_map(params![query, provider], |row| {
3221                    Ok((
3222                        row.get::<_, String>(0)?,
3223                        row.get::<_, String>(1)?,
3224                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3225                        row.get::<_, String>(3)?,
3226                    ))
3227                })?
3228                .collect::<Result<Vec<_>, _>>()?
3229            } else {
3230                stmt.query_map([query], |row| {
3231                    Ok((
3232                        row.get::<_, String>(0)?,
3233                        row.get::<_, String>(1)?,
3234                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3235                        row.get::<_, String>(3)?,
3236                    ))
3237                })?
3238                .collect::<Result<Vec<_>, _>>()?
3239            }
3240        } else {
3241            // Fall back to LIKE search with session deduplication
3242            let search_pattern = format!("%{}%", query);
3243            let sql = format!(
3244                "SELECT s.id, s.provider, s.title,
3245                        SUBSTR(m.content_raw,
3246                            MAX(1, INSTR(LOWER(m.content_raw), LOWER(?)) - 50),
3247                            150) as snip
3248                 FROM messages_v2 m
3249                 JOIN sessions s ON m.session_id = s.id
3250                 WHERE m.content_raw LIKE ?
3251                 {}
3252                 GROUP BY s.id
3253                 ORDER BY s.updated_at DESC
3254                 LIMIT {}",
3255                if provider_filter.is_some() {
3256                    "AND s.provider = ?"
3257                } else {
3258                    ""
3259                },
3260                limit
3261            );
3262
3263            let mut stmt = conn.prepare(&sql)?;
3264
3265            if let Some(provider) = provider_filter {
3266                stmt.query_map(params![query, search_pattern, provider], |row| {
3267                    Ok((
3268                        row.get::<_, String>(0)?,
3269                        row.get::<_, String>(1)?,
3270                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3271                        row.get::<_, String>(3)?,
3272                    ))
3273                })?
3274                .collect::<Result<Vec<_>, _>>()?
3275            } else {
3276                stmt.query_map(params![query, search_pattern], |row| {
3277                    Ok((
3278                        row.get::<_, String>(0)?,
3279                        row.get::<_, String>(1)?,
3280                        row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3281                        row.get::<_, String>(3)?,
3282                    ))
3283                })?
3284                .collect::<Result<Vec<_>, _>>()?
3285            }
3286        }
3287    };
3288
3289    if results.is_empty() {
3290        println!("{} No results found for '{}'", "[i]".blue(), query);
3291        return Ok(());
3292    }
3293
3294    println!("{} Found {} result(s):", "[i]".blue(), results.len());
3295    println!();
3296
3297    for (session_id, provider, title, snippet) in results {
3298        let display_name = if title.is_empty() {
3299            session_id.clone()
3300        } else {
3301            format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3302        };
3303
3304        println!(
3305            "{} {} [{}]",
3306            "*".cyan(),
3307            display_name.bold(),
3308            provider.dimmed()
3309        );
3310
3311        // Display the snippet (already extracted by SQL or FTS5 snippet())
3312        let clean_snippet = snippet
3313            .replace('\n', " ")
3314            .replace('\r', "");
3315        println!("   {}", clean_snippet.dimmed());
3316        println!();
3317    }
3318
3319    Ok(())
3320}
3321
3322/// Create a search result snippet with the query highlighted
3323fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3324    let content_lower = content.to_lowercase();
3325    let query_lower = query.to_lowercase();
3326
3327    if let Some(pos) = content_lower.find(&query_lower) {
3328        let start = pos.saturating_sub(max_len / 2);
3329        let end = (pos + query.len() + max_len / 2).min(content.len());
3330
3331        let mut snippet = String::new();
3332        if start > 0 {
3333            snippet.push_str("...");
3334        }
3335        snippet.push_str(&content[start..end]);
3336        if end < content.len() {
3337            snippet.push_str("...");
3338        }
3339
3340        // Replace newlines with spaces for display
3341        snippet.replace('\n', " ").replace('\r', "")
3342    } else {
3343        // Fallback: just show first max_len chars
3344        if content.len() > max_len {
3345            format!("{}...", &content[..max_len])
3346        } else {
3347            content.to_string()
3348        }
3349    }
3350}
3351
3352/// Simple MD5 hash for content checksums
3353fn md5_hash(data: &str) -> u128 {
3354    // Simple hash implementation (not cryptographically secure, just for checksums)
3355    let mut hash: u128 = 0;
3356    for (i, byte) in data.bytes().enumerate() {
3357        hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3358        hash = hash.rotate_left(7);
3359    }
3360    hash
3361}
3362
3363/// Sync sessions between the harvest database and provider workspaces
3364///
3365/// This command provides bidirectional sync capabilities:
3366/// - `--push`: Write sessions from the database to provider workspace directories
3367/// - `--pull`: Import sessions from provider workspaces into the database (similar to harvest run)
3368///
3369/// Sync supports filtering by provider, workspace, or specific session IDs.
3370pub fn harvest_sync(
3371    path: Option<&str>,
3372    push: bool,
3373    pull: bool,
3374    provider: Option<&str>,
3375    workspace: Option<&str>,
3376    sessions: Option<&[String]>,
3377    format: Option<&str>,
3378    force: bool,
3379    dry_run: bool,
3380) -> Result<()> {
3381    // Validate arguments
3382    if !push && !pull {
3383        anyhow::bail!("Must specify either --push or --pull (or both)");
3384    }
3385
3386    // Determine database path
3387    let db_path = get_db_path(path)?;
3388
3389    if !db_path.exists() {
3390        anyhow::bail!(
3391            "Harvest database not found at {:?}. Run 'harvest run' first.",
3392            db_path
3393        );
3394    }
3395
3396    println!(
3397        "{} Sync Operation",
3398        if dry_run {
3399            "[DRY RUN]".yellow()
3400        } else {
3401            "[SYNC]".green()
3402        }
3403    );
3404    println!("   Database: {}", db_path.display().to_string().cyan());
3405
3406    if push {
3407        println!("\n{} Push: Database → Provider Workspaces", "[→]".blue());
3408        sync_push(
3409            &db_path, provider, workspace, sessions, format, force, dry_run,
3410        )?;
3411    }
3412
3413    if pull {
3414        println!("\n{} Pull: Provider Workspaces → Database", "[←]".blue());
3415        sync_pull(&db_path, provider, workspace, sessions, force, dry_run)?;
3416    }
3417
3418    if dry_run {
3419        println!(
3420            "\n{} Dry run complete. No changes were made.",
3421            "[!]".yellow()
3422        );
3423    } else {
3424        println!("\n{} Sync complete.", "[✓]".green());
3425    }
3426
3427    Ok(())
3428}
3429
3430/// Push sessions from the database to provider workspace directories
3431fn sync_push(
3432    db_path: &Path,
3433    provider: Option<&str>,
3434    workspace: Option<&str>,
3435    sessions: Option<&[String]>,
3436    format: Option<&str>,
3437    force: bool,
3438    dry_run: bool,
3439) -> Result<()> {
3440    let conn = Connection::open(db_path)?;
3441
3442    // Build query with filters
3443    let mut sql = String::from(
3444        r#"SELECT id, provider, workspace_path, workspace_name, session_json, file_format 
3445           FROM sessions WHERE workspace_path IS NOT NULL"#,
3446    );
3447    let mut params_vec: Vec<String> = Vec::new();
3448
3449    if let Some(p) = provider {
3450        sql.push_str(" AND provider = ?");
3451        params_vec.push(p.to_string());
3452    }
3453
3454    if let Some(w) = workspace {
3455        sql.push_str(" AND (workspace_path LIKE ? OR workspace_name LIKE ?)");
3456        params_vec.push(format!("%{}%", w));
3457        params_vec.push(format!("%{}%", w));
3458    }
3459
3460    if let Some(sess_ids) = sessions {
3461        if !sess_ids.is_empty() {
3462            let placeholders: Vec<&str> = sess_ids.iter().map(|_| "?").collect();
3463            sql.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
3464            for id in sess_ids {
3465                params_vec.push(id.clone());
3466            }
3467        }
3468    }
3469
3470    let mut stmt = conn.prepare(&sql)?;
3471
3472    // Build params array for rusqlite
3473    let params_refs: Vec<&dyn rusqlite::ToSql> = params_vec
3474        .iter()
3475        .map(|s| s as &dyn rusqlite::ToSql)
3476        .collect();
3477
3478    let rows = stmt.query_map(params_refs.as_slice(), |row| {
3479        Ok((
3480            row.get::<_, String>(0)?,      // id
3481            row.get::<_, String>(1)?,      // provider
3482            row.get::<_, String>(2)?,      // workspace_path
3483            row.get::<_, Option<String>>(3)?, // workspace_name
3484            row.get::<_, String>(4)?,      // session_json
3485            row.get::<_, Option<String>>(5)?, // file_format
3486        ))
3487    })?;
3488
3489    let target_format = format.unwrap_or("jsonl");
3490    let mut pushed = 0;
3491    let mut skipped = 0;
3492    let mut errors = 0;
3493
3494    for row_result in rows {
3495        match row_result {
3496            Ok((session_id, provider_name, workspace_path, workspace_name, session_json, stored_format)) => {
3497                // Determine target directory
3498                let ws_path = PathBuf::from(&workspace_path);
3499                let chat_dir = ws_path.join(".vscode").join("chat");
3500
3501                // Skip if directory doesn't exist and we're not creating it
3502                if !chat_dir.exists() {
3503                    if !force {
3504                        println!(
3505                            "   {} Skipped {} - workspace chat dir not found: {}",
3506                            "[~]".yellow(),
3507                            session_id.dimmed(),
3508                            chat_dir.display()
3509                        );
3510                        skipped += 1;
3511                        continue;
3512                    }
3513                    // Create directory if force is set
3514                    if !dry_run {
3515                        fs::create_dir_all(&chat_dir)?;
3516                    }
3517                }
3518
3519                // Determine output format and filename
3520                let (output_ext, needs_conversion) = match target_format {
3521                    "jsonl" => ("jsonl", stored_format.as_deref() != Some("jsonl")),
3522                    "json" => ("json", stored_format.as_deref() == Some("jsonl")),
3523                    _ => ("jsonl", true), // Default to JSONL for modern VS Code
3524                };
3525
3526                let session_file = chat_dir.join(format!("{}.{}", session_id, output_ext));
3527
3528                // Check if file exists
3529                if session_file.exists() && !force {
3530                    println!(
3531                        "   {} Skipped {} - file exists (use --force to overwrite)",
3532                        "[~]".yellow(),
3533                        session_id.dimmed()
3534                    );
3535                    skipped += 1;
3536                    continue;
3537                }
3538
3539                // Parse and potentially convert the session
3540                let output_content = if needs_conversion && target_format == "jsonl" {
3541                    // Convert JSON to JSONL format
3542                    match serde_json::from_str::<ChatSession>(&session_json) {
3543                        Ok(session) => convert_session_to_jsonl(&session)?,
3544                        Err(e) => {
3545                            println!(
3546                                "   {} Error parsing session {}: {}",
3547                                "[!]".red(),
3548                                session_id,
3549                                e
3550                            );
3551                            errors += 1;
3552                            continue;
3553                        }
3554                    }
3555                } else {
3556                    // Use as-is or simple format
3557                    session_json.clone()
3558                };
3559
3560                if dry_run {
3561                    println!(
3562                        "   {} Would write {} ({}) → {}",
3563                        "[+]".green(),
3564                        workspace_name.as_deref().unwrap_or(&provider_name),
3565                        session_id.dimmed(),
3566                        session_file.display()
3567                    );
3568                } else {
3569                    fs::write(&session_file, &output_content)?;
3570                    println!(
3571                        "   {} Pushed {} ({}) → {}",
3572                        "[+]".green(),
3573                        workspace_name.as_deref().unwrap_or(&provider_name),
3574                        session_id.dimmed(),
3575                        session_file.display()
3576                    );
3577                }
3578                pushed += 1;
3579            }
3580            Err(e) => {
3581                println!("   {} Database error: {}", "[!]".red(), e);
3582                errors += 1;
3583            }
3584        }
3585    }
3586
3587    println!(
3588        "\n   {} Push summary: {} pushed, {} skipped, {} errors",
3589        "[=]".cyan(),
3590        pushed.to_string().green(),
3591        skipped.to_string().yellow(),
3592        errors.to_string().red()
3593    );
3594
3595    Ok(())
3596}
3597
3598/// Pull sessions from provider workspaces into the database
3599fn sync_pull(
3600    db_path: &Path,
3601    provider: Option<&str>,
3602    workspace: Option<&str>,
3603    sessions: Option<&[String]>,
3604    force: bool,
3605    dry_run: bool,
3606) -> Result<()> {
3607    let conn = if dry_run {
3608        // For dry run, still open the DB to check existing sessions
3609        Connection::open(db_path)?
3610    } else {
3611        Connection::open(db_path)?
3612    };
3613
3614    // Discover workspaces
3615    let workspaces = discover_workspaces()?;
3616    let mut pulled = 0;
3617    let mut skipped = 0;
3618    let mut errors = 0;
3619
3620    for ws in workspaces {
3621        // Filter by workspace if specified
3622        if let Some(ws_filter) = workspace {
3623            let ws_path_str = ws.workspace_path.to_string_lossy();
3624            let ws_name = ws.project_path.as_deref().unwrap_or("");
3625            if !ws_path_str.contains(ws_filter) && !ws_name.contains(ws_filter) {
3626                continue;
3627            }
3628        }
3629
3630        // Filter by provider if specified (for Copilot workspaces, it's "GitHub Copilot")
3631        if let Some(p) = provider {
3632            if !p.eq_ignore_ascii_case("copilot")
3633                && !p.eq_ignore_ascii_case("github copilot")
3634                && !p.eq_ignore_ascii_case("vscode")
3635            {
3636                continue;
3637            }
3638        }
3639
3640        // Get sessions from this workspace
3641        match get_chat_sessions_from_workspace(&ws.workspace_path) {
3642            Ok(ws_sessions) => {
3643                for swp in ws_sessions {
3644                    // Filter by session ID if specified
3645                    if let Some(sess_ids) = sessions {
3646                        let sid = swp.session.session_id.as_deref().unwrap_or("");
3647                        if !sess_ids.iter().any(|id| id == sid) {
3648                            continue;
3649                        }
3650                    }
3651
3652                    let session_id = swp
3653                        .session
3654                        .session_id
3655                        .clone()
3656                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
3657
3658                    // Check if session already exists
3659                    let exists: bool = conn
3660                        .query_row(
3661                            "SELECT 1 FROM sessions WHERE id = ?",
3662                            [&session_id],
3663                            |_| Ok(true),
3664                        )
3665                        .unwrap_or(false);
3666
3667                    if exists && !force {
3668                        skipped += 1;
3669                        continue;
3670                    }
3671
3672                    let ws_name = ws.project_path.clone();
3673                    let ws_path = ws.workspace_path.to_string_lossy().to_string();
3674
3675                    if dry_run {
3676                        println!(
3677                            "   {} Would pull {} from {}",
3678                            "[+]".green(),
3679                            session_id.dimmed(),
3680                            ws_name.as_deref().unwrap_or(&ws_path)
3681                        );
3682                        pulled += 1;
3683                    } else {
3684                        match insert_or_update_session(
3685                            &conn,
3686                            &swp.session,
3687                            "GitHub Copilot",
3688                            Some(&ws.hash),
3689                            ws_name.as_deref(),
3690                            None,           // provider_version
3691                            3,              // schema_version (V3)
3692                            "json",         // file_format (detect from swp.path extension)
3693                            Some(&ws_path), // workspace_path
3694                        ) {
3695                            Ok(_) => {
3696                                println!(
3697                                    "   {} Pulled {} from {}",
3698                                    "[+]".green(),
3699                                    session_id.dimmed(),
3700                                    ws_name.as_deref().unwrap_or(&ws_path)
3701                                );
3702                                pulled += 1;
3703                            }
3704                            Err(e) => {
3705                                println!(
3706                                    "   {} Error pulling {}: {}",
3707                                    "[!]".red(),
3708                                    session_id,
3709                                    e
3710                                );
3711                                errors += 1;
3712                            }
3713                        }
3714                    }
3715                }
3716            }
3717            Err(e) => {
3718                println!(
3719                    "   {} Error reading workspace {}: {}",
3720                    "[!]".red(),
3721                    ws.workspace_path.display(),
3722                    e
3723                );
3724                errors += 1;
3725            }
3726        }
3727    }
3728
3729    println!(
3730        "\n   {} Pull summary: {} pulled, {} skipped, {} errors",
3731        "[=]".cyan(),
3732        pulled.to_string().green(),
3733        skipped.to_string().yellow(),
3734        errors.to_string().red()
3735    );
3736
3737    Ok(())
3738}
3739
3740/// Convert a ChatSession to JSONL format (VS Code 1.109+)
3741fn convert_session_to_jsonl(session: &ChatSession) -> Result<String> {
3742    use serde_json::json;
3743
3744    let mut lines = Vec::new();
3745
3746    // First line: session metadata
3747    let session_meta = json!({
3748        "version": 3,
3749        "sessionId": session.session_id,
3750        "title": session.title(),
3751        "createdAt": session.creation_date,
3752        "lastInteractionAt": session.last_message_date,
3753        "customTitle": session.custom_title.as_deref(),
3754    });
3755    lines.push(serde_json::to_string(&session_meta)?);
3756
3757    // Subsequent lines: each request as a separate JSON object
3758    for request in &session.requests {
3759        let request_json = serde_json::to_string(request)?;
3760        lines.push(request_json);
3761    }
3762
3763    Ok(lines.join("\n"))
3764}