1use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::storage::parse_session_json;
24use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
25
26fn is_empty_code_block(s: &str) -> bool {
28 let s = s.trim();
30 if s == "```" {
31 return true;
32 }
33 if s.starts_with("```") && !s.contains('\n') {
35 return true;
36 }
37 let lines: Vec<&str> = s.lines().collect();
39 if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
40 let content_lines = &lines[1..lines.len() - 1];
42 if content_lines.iter().all(|line| line.trim().is_empty()) {
43 return true;
44 }
45 }
46 false
47}
48
49type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
51
52#[derive(Debug, Clone)]
54struct WebProviderEndpoint {
55 name: &'static str,
56 url: &'static str,
57 description: &'static str,
58}
59
60const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
62 WebProviderEndpoint {
63 name: "ChatGPT",
64 url: "https://chat.openai.com",
65 description: "OpenAI ChatGPT Web Interface",
66 },
67 WebProviderEndpoint {
68 name: "Claude",
69 url: "https://claude.ai",
70 description: "Anthropic Claude Web Interface",
71 },
72 WebProviderEndpoint {
73 name: "Gemini",
74 url: "https://gemini.google.com",
75 description: "Google Gemini Web Interface",
76 },
77 WebProviderEndpoint {
78 name: "Perplexity",
79 url: "https://www.perplexity.ai",
80 description: "Perplexity AI Search Interface",
81 },
82 WebProviderEndpoint {
83 name: "DeepSeek",
84 url: "https://chat.deepseek.com",
85 description: "DeepSeek Chat Interface",
86 },
87 WebProviderEndpoint {
88 name: "Poe",
89 url: "https://poe.com",
90 description: "Quora Poe Multi-model Chat",
91 },
92 WebProviderEndpoint {
93 name: "You.com",
94 url: "https://you.com/chat",
95 description: "You.com AI Chat",
96 },
97 WebProviderEndpoint {
98 name: "HuggingChat",
99 url: "https://huggingface.co/chat",
100 description: "HuggingFace Chat Interface",
101 },
102 WebProviderEndpoint {
103 name: "Copilot",
104 url: "https://copilot.microsoft.com",
105 description: "Microsoft Copilot Web Interface",
106 },
107 WebProviderEndpoint {
108 name: "Mistral",
109 url: "https://chat.mistral.ai",
110 description: "Mistral AI Le Chat Interface",
111 },
112 WebProviderEndpoint {
113 name: "Cohere",
114 url: "https://coral.cohere.com",
115 description: "Cohere Coral Chat Interface",
116 },
117 WebProviderEndpoint {
118 name: "Groq",
119 url: "https://groq.com",
120 description: "Groq Fast Inference Interface",
121 },
122 WebProviderEndpoint {
123 name: "Phind",
124 url: "https://www.phind.com",
125 description: "Phind AI Code Search",
126 },
127 WebProviderEndpoint {
128 name: "Character.AI",
129 url: "https://character.ai",
130 description: "Character.AI Chat Interface",
131 },
132 WebProviderEndpoint {
133 name: "Pi",
134 url: "https://pi.ai",
135 description: "Inflection Pi Personal AI",
136 },
137];
138
139fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
141 use std::sync::{Arc, Mutex};
142 use std::thread;
143
144 let reachable = Arc::new(Mutex::new(Vec::new()));
145 let results = Arc::new(Mutex::new(Vec::new()));
146
147 let handles: Vec<_> = WEB_PROVIDERS
149 .iter()
150 .map(|provider| {
151 let reachable = Arc::clone(&reachable);
152 let results = Arc::clone(&results);
153 let timeout = timeout_secs;
154 let name = provider.name;
155 let url = provider.url;
156 let desc = provider.description;
157
158 thread::spawn(move || {
159 let client = match reqwest::blocking::Client::builder()
160 .timeout(Duration::from_secs(timeout))
161 .connect_timeout(Duration::from_secs(timeout.min(3))) .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
163 .build()
164 {
165 Ok(c) => c,
166 Err(_) => return,
167 };
168
169 let result: (&str, bool, String, &str) = match client.head(url).send() {
170 Ok(response) => {
171 let status = response.status();
172 if status.is_success() || status.is_redirection() {
173 reachable.lock().unwrap().push(name.to_string());
174 (name, true, desc.to_string(), url)
175 } else {
176 (name, false, format!("HTTP {}", status.as_u16()), url)
177 }
178 }
179 Err(e) => {
180 let reason = if e.is_timeout() {
181 "timeout".to_string()
182 } else if e.is_connect() {
183 "connection failed".to_string()
184 } else {
185 "unreachable".to_string()
186 };
187 (name, false, reason, url)
188 }
189 };
190
191 results.lock().unwrap().push((
192 result.0.to_string(),
193 result.1,
194 result.2,
195 result.3.to_string(),
196 ));
197 })
198 })
199 .collect();
200
201 for handle in handles {
203 let _ = handle.join();
204 }
205
206 let results = results.lock().unwrap();
208 let mut sorted_results: Vec<_> = results.iter().collect();
209 sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
210
211 for (name, success, info, url) in sorted_results {
212 if *success {
213 println!(
214 " {} {}: {} ({})",
215 "[+]".green(),
216 name.bold(),
217 "reachable".green(),
218 info.dimmed()
219 );
220 println!(" {} {}", "`".dimmed(), url.dimmed());
221 } else {
222 println!(
223 " {} {}: {} ({})",
224 "[-]".dimmed(),
225 name,
226 "blocked or unavailable".dimmed(),
227 info.dimmed()
228 );
229 }
230 }
231
232 let result = reachable.lock().unwrap().clone();
233 result
234}
235
236fn scan_browser_authentication(
238 verbose: bool,
239) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
240 use crate::browser::scan_browser_auth_verbose;
241 use std::collections::HashMap;
242
243 let installed = get_installed_browsers();
244 if installed.is_empty() {
245 println!(" {} No supported browsers found", "[-]".dimmed());
246 return (HashMap::new(), 0);
247 }
248
249 println!(
250 " {} Checking {} browser(s): {}",
251 "[*]".blue(),
252 installed.len(),
253 installed
254 .iter()
255 .map(|b| b.name())
256 .collect::<Vec<_>>()
257 .join(", ")
258 );
259
260 let results = if verbose {
261 scan_browser_auth_verbose()
262 } else {
263 scan_browser_auth()
264 };
265
266 let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
268
269 for result in results {
270 if result.authenticated {
271 authenticated
272 .entry(result.provider.clone())
273 .or_default()
274 .push(result.browser);
275 }
276 }
277
278 let count = authenticated.len();
279
280 if authenticated.is_empty() {
281 println!(
282 " {} No authenticated web LLM providers found",
283 "[-]".dimmed()
284 );
285 println!(
286 " {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
287 "`".dimmed()
288 );
289 } else {
290 for (provider, browsers) in &authenticated {
291 let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
292 println!(
293 " {} {}: {} in {}",
294 "[+]".green(),
295 provider.bold(),
296 "authenticated".green(),
297 browser_names.join(", ")
298 );
299 }
300 }
301
302 (authenticated, count)
303}
304
305#[allow(dead_code)]
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct HarvestConfig {
309 pub db_path: PathBuf,
311 pub auto_commit: bool,
313 pub include_providers: Vec<String>,
315 pub exclude_providers: Vec<String>,
317 pub include_workspaces: bool,
319 pub last_harvest: Option<i64>,
321}
322
323#[allow(dead_code)]
324impl Default for HarvestConfig {
325 fn default() -> Self {
326 Self {
327 db_path: PathBuf::from("chat_sessions.db"),
328 auto_commit: false,
329 include_providers: Vec::new(),
330 exclude_providers: Vec::new(),
331 include_workspaces: true,
332 last_harvest: None,
333 }
334 }
335}
336
337#[derive(Debug, Default)]
339pub struct HarvestStats {
340 pub providers_scanned: usize,
341 pub workspaces_scanned: usize,
342 pub sessions_found: usize,
343 pub sessions_added: usize,
344 pub sessions_updated: usize,
345 pub sessions_skipped: usize,
346 pub errors: Vec<String>,
347}
348
349#[allow(dead_code)]
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct HarvestedSession {
353 pub id: String,
354 pub provider: String,
355 pub provider_type: String,
356 pub workspace_id: Option<String>,
357 pub workspace_name: Option<String>,
358 pub title: String,
359 pub message_count: usize,
360 pub created_at: i64,
361 pub updated_at: i64,
362 pub harvested_at: i64,
363 pub session_json: String,
364}
365
366pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
368 let db_path = get_db_path(path)?;
369 let db_dir = db_path.parent().unwrap_or(Path::new("."));
370
371 println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
372 println!("{}", "=".repeat(60));
373
374 if !db_dir.exists() {
376 fs::create_dir_all(db_dir)?;
377 println!("{} Created directory: {}", "[+]".green(), db_dir.display());
378 }
379
380 if db_path.exists() {
382 println!(
383 "{} Database already exists: {}",
384 "[!]".yellow(),
385 db_path.display()
386 );
387 println!(" Use 'csm harvest run' to update it");
388 return Ok(());
389 }
390
391 create_harvest_database(&db_path)?;
393 println!("{} Created database: {}", "[+]".green(), db_path.display());
394
395 if git_init {
397 init_git_tracking(&db_path)?;
398 }
399
400 println!("\n{} Harvest database initialized!", "[+]".green().bold());
401 println!("\nNext steps:");
402 println!(" 1. Run 'csm harvest scan' to see available providers");
403 println!(" 2. Run 'csm harvest run' to collect sessions");
404 if !git_init {
405 println!(" 3. Run 'csm harvest git init' to enable version tracking");
406 }
407
408 Ok(())
409}
410
411pub fn harvest_scan(
413 show_sessions: bool,
414 scan_web: bool,
415 timeout_secs: u64,
416 verbose: bool,
417) -> Result<()> {
418 println!("\n{} Scanning for Providers", "[H]".magenta().bold());
419 println!("{}", "=".repeat(60));
420
421 let registry = ProviderRegistry::new();
422 let mut total_sessions = 0;
423 let mut available_providers = Vec::new();
424
425 println!("\n{} LLM Providers:", "[*]".blue().bold());
427
428 let provider_types = vec![
429 ProviderType::Copilot,
430 ProviderType::Cursor,
431 ProviderType::CodexCli,
432 ProviderType::DroidCli,
433 ProviderType::GeminiCli,
434 ProviderType::Ollama,
435 ProviderType::Vllm,
436 ProviderType::Foundry,
437 ProviderType::LmStudio,
438 ProviderType::LocalAI,
439 ProviderType::TextGenWebUI,
440 ProviderType::Jan,
441 ProviderType::Gpt4All,
442 ProviderType::Llamafile,
443 ProviderType::OpenAI,
444 ProviderType::ChatGPT,
445 ProviderType::Anthropic,
446 ProviderType::Perplexity,
447 ProviderType::DeepSeek,
448 ProviderType::Gemini,
449 ];
450
451 for pt in &provider_types {
452 if let Some(provider) = registry.get_provider(*pt) {
453 let available = provider.is_available();
454 let session_count = if available {
455 provider.list_sessions().map(|s| s.len()).unwrap_or(0)
456 } else {
457 0
458 };
459
460 if available {
461 available_providers.push((*pt, session_count));
462 total_sessions += session_count;
463
464 let status = if session_count > 0 {
465 format!(
466 "{} {} sessions",
467 "+".green(),
468 session_count.to_string().cyan()
469 )
470 } else {
471 format!("{} no sessions", "+".green())
472 };
473
474 println!(
475 " {} {}: {}",
476 "[+]".green(),
477 provider.name().bold(),
478 status
479 );
480
481 if show_sessions && session_count > 0 {
482 if let Ok(sessions) = provider.list_sessions() {
483 for session in sessions.iter().take(3) {
484 println!(" {} {}", "`".dimmed(), session.title().dimmed());
485 }
486 if sessions.len() > 3 {
487 println!(" {} ... and {} more", "`".dimmed(), sessions.len() - 3);
488 }
489 }
490 }
491
492 if let Some(path) = provider.sessions_path() {
493 println!(
494 " {} {}",
495 "`".dimmed(),
496 path.display().to_string().dimmed()
497 );
498 }
499 }
500 }
501 }
502
503 println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
505
506 match discover_workspaces() {
507 Ok(workspaces) => {
508 let workspaces_with_sessions: Vec<_> = workspaces
509 .iter()
510 .filter(|ws| ws.chat_session_count > 0)
511 .collect();
512
513 let ws_sessions: usize = workspaces_with_sessions
514 .iter()
515 .map(|ws| ws.chat_session_count)
516 .sum();
517
518 println!(
519 " {} {} workspaces with {} sessions",
520 "[+]".green(),
521 workspaces_with_sessions.len().to_string().cyan(),
522 ws_sessions.to_string().cyan()
523 );
524
525 if show_sessions {
526 for ws in workspaces_with_sessions.iter().take(5) {
527 let name = ws
528 .project_path
529 .clone()
530 .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
531 println!(
532 " {} {} ({} sessions)",
533 "`".dimmed(),
534 name.dimmed(),
535 ws.chat_session_count
536 );
537 }
538 if workspaces_with_sessions.len() > 5 {
539 println!(
540 " {} ... and {} more workspaces",
541 "`".dimmed(),
542 workspaces_with_sessions.len() - 5
543 );
544 }
545 }
546
547 total_sessions += ws_sessions;
548 }
549 Err(e) => {
550 println!(" {} Failed to scan workspaces: {}", "[!]".yellow(), e);
551 }
552 }
553
554 let mut web_providers_found = Vec::new();
556 let mut authenticated_count = 0;
557 if scan_web {
558 println!("\n{} Browser Authentication:", "[*]".blue().bold());
560 let (auth_results, auth_count) = scan_browser_authentication(verbose);
561 authenticated_count = auth_count;
562
563 println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
565 web_providers_found = scan_web_providers(timeout_secs);
566
567 if !auth_results.is_empty() {
569 println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
570 for (provider, browsers) in &auth_results {
571 let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
572 let reachable = web_providers_found.iter().any(|p| p == provider);
573 let status = if reachable {
574 format!("{} (reachable)", "ready to harvest".green())
575 } else {
576 format!("{}", "authenticated but endpoint blocked".yellow())
577 };
578 println!(
579 " {} {}: {} via {}",
580 "[+]".green(),
581 provider.bold(),
582 status,
583 browser_list.join(", ").dimmed()
584 );
585 }
586 }
587 }
588
589 println!("\n{} Summary:", "[*]".green().bold());
591 println!(
592 " {} local providers available",
593 available_providers.len().to_string().cyan()
594 );
595 if scan_web {
596 println!(
597 " {} web providers reachable",
598 web_providers_found.len().to_string().cyan()
599 );
600 println!(
601 " {} web providers authenticated",
602 authenticated_count.to_string().cyan()
603 );
604 }
605 println!(
606 " {} total sessions to harvest",
607 total_sessions.to_string().truecolor(252, 152, 103)
608 );
609
610 Ok(())
611}
612
613pub fn harvest_run(
615 path: Option<&str>,
616 providers: Option<&[String]>,
617 exclude: Option<&[String]>,
618 incremental: bool,
619 auto_commit: bool,
620 message: Option<&str>,
621) -> Result<()> {
622 let db_path = get_db_path(path)?;
623
624 println!("\n{} Running Harvest", "[H]".magenta().bold());
625 println!("{}", "=".repeat(60));
626
627 if !db_path.exists() {
629 println!("{} Database not found, creating...", "[*]".blue());
630 create_harvest_database(&db_path)?;
631 }
632
633 let conn = Connection::open(&db_path)?;
634
635 conn.execute_batch(
637 "PRAGMA journal_mode = WAL;
638 PRAGMA synchronous = NORMAL;
639 PRAGMA cache_size = -64000;",
640 )?;
641
642 let mut stats = HarvestStats::default();
643
644 let last_harvest: Option<i64> = if incremental {
646 conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
647 row.get(0)
648 })
649 .ok()
650 } else {
651 None
652 };
653
654 if let Some(ts) = last_harvest {
655 let dt = DateTime::from_timestamp_millis(ts)
656 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
657 .unwrap_or_default();
658 println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
659 }
660
661 let registry = ProviderRegistry::new();
662 let include_providers = providers.map(|p| p.to_vec());
663 let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
664
665 println!("\n{} Harvesting from providers...", "[*]".blue());
667
668 let provider_types = vec![
669 ProviderType::Copilot,
670 ProviderType::Cursor,
671 ProviderType::ClaudeCode,
672 ProviderType::CodexCli,
673 ProviderType::DroidCli,
674 ProviderType::GeminiCli,
675 ProviderType::OpenCode,
676 ProviderType::OpenClaw,
677 ProviderType::Antigravity,
678 ProviderType::Ollama,
679 ProviderType::Vllm,
680 ProviderType::Foundry,
681 ProviderType::LmStudio,
682 ProviderType::LocalAI,
683 ProviderType::TextGenWebUI,
684 ProviderType::Jan,
685 ProviderType::Gpt4All,
686 ProviderType::Llamafile,
687 ];
688
689 for pt in &provider_types {
690 let provider_name = pt.display_name().to_lowercase();
691
692 if let Some(ref include) = include_providers {
694 if !include
695 .iter()
696 .any(|p| provider_name.contains(&p.to_lowercase()))
697 {
698 continue;
699 }
700 }
701 if exclude_providers
702 .iter()
703 .any(|p| provider_name.contains(&p.to_lowercase()))
704 {
705 continue;
706 }
707
708 if let Some(provider) = registry.get_provider(*pt) {
709 if !provider.is_available() {
710 continue;
711 }
712
713 stats.providers_scanned += 1;
714
715 match provider.list_sessions() {
716 Ok(sessions) => {
717 for session in sessions {
718 stats.sessions_found += 1;
719
720 if let Some(last) = last_harvest {
722 if session.last_message_date <= last {
723 stats.sessions_skipped += 1;
724 continue;
725 }
726 }
727
728 match insert_or_update_session(
729 &conn,
730 &session,
731 pt.display_name(),
732 None,
733 None,
734 None, 3, "json", None, ) {
739 Ok(updated) => {
740 if updated {
741 stats.sessions_updated += 1;
742 } else {
743 stats.sessions_added += 1;
744 }
745 }
746 Err(e) => {
747 stats.errors.push(format!("{}: {}", session.title(), e));
748 }
749 }
750 }
751
752 let session_count = stats.sessions_added + stats.sessions_updated;
753 if session_count > 0 {
754 println!(
755 " {} {}: {} sessions",
756 "[+]".green(),
757 provider.name(),
758 session_count.to_string().cyan()
759 );
760 }
761 }
762 Err(e) => {
763 stats.errors.push(format!("{}: {}", provider.name(), e));
764 }
765 }
766 }
767 }
768
769 if include_providers.is_none()
771 || include_providers
772 .as_ref()
773 .map(|p| {
774 p.iter()
775 .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
776 })
777 .unwrap_or(false)
778 {
779 println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
780
781 if let Ok(workspaces) = discover_workspaces() {
782 for ws in &workspaces {
783 if ws.chat_session_count == 0 {
784 continue;
785 }
786
787 stats.workspaces_scanned += 1;
788
789 if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
790 for swp in sessions {
791 stats.sessions_found += 1;
792
793 if let Some(last) = last_harvest {
795 if swp.session.last_message_date <= last {
796 stats.sessions_skipped += 1;
797 continue;
798 }
799 }
800
801 let ws_name = ws.project_path.clone();
802 let ws_path = ws.workspace_path.to_string_lossy().to_string();
803
804 match insert_or_update_session(
805 &conn,
806 &swp.session,
807 "GitHub Copilot",
808 Some(&ws.hash),
809 ws_name.as_deref(),
810 None, 3, "json", Some(&ws_path), ) {
815 Ok(updated) => {
816 if updated {
817 stats.sessions_updated += 1;
818 } else {
819 stats.sessions_added += 1;
820 }
821 }
822 Err(e) => {
823 stats.errors.push(format!("{}: {}", swp.session.title(), e));
824 }
825 }
826 }
827 }
828 }
829
830 println!(
831 " {} Workspaces: {} scanned",
832 "[+]".green(),
833 stats.workspaces_scanned.to_string().cyan()
834 );
835 }
836 }
837
838 let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
840 harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
841
842 update_harvest_metadata(&conn)?;
844
845 println!("\n{} Harvest Complete:", "[+]".green().bold());
847 println!(
848 " {} providers scanned",
849 stats.providers_scanned.to_string().cyan()
850 );
851 println!(
852 " {} workspaces scanned",
853 stats.workspaces_scanned.to_string().cyan()
854 );
855 println!(
856 " {} sessions found",
857 stats.sessions_found.to_string().cyan()
858 );
859 println!(
860 " {} sessions added",
861 stats.sessions_added.to_string().green()
862 );
863 println!(
864 " {} sessions updated",
865 stats.sessions_updated.to_string().yellow()
866 );
867 if stats.sessions_skipped > 0 {
868 println!(
869 " {} sessions skipped (unchanged)",
870 stats.sessions_skipped.to_string().dimmed()
871 );
872 }
873
874 if !stats.errors.is_empty() {
875 println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
876 for (i, err) in stats.errors.iter().take(5).enumerate() {
877 println!(" {}. {}", i + 1, err);
878 }
879 if stats.errors.len() > 5 {
880 println!(" ... and {} more errors", stats.errors.len() - 5);
881 }
882 }
883
884 if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
886 println!("\n{} Auto-committing changes...", "[*]".blue());
887 let commit_msg = message.unwrap_or("Harvest: update chat sessions");
888 if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
889 println!("{} Git commit failed: {}", "[!]".yellow(), e);
890 } else {
891 println!("{} Changes committed", "[+]".green());
892 }
893 }
894
895 println!("\nDatabase: {}", db_path.display());
896
897 Ok(())
898}
899
900pub fn harvest_status(path: Option<&str>) -> Result<()> {
902 let db_path = get_db_path(path)?;
903
904 println!("\n{} Harvest Database Status", "[H]".magenta().bold());
905 println!("{}", "=".repeat(60));
906
907 if !db_path.exists() {
908 println!(
909 "{} Database not found: {}",
910 "[!]".yellow(),
911 db_path.display()
912 );
913 println!(" Run 'csm harvest init' to create one");
914 return Ok(());
915 }
916
917 let conn = Connection::open(&db_path)?;
918
919 let mut stmt = conn.prepare(
921 "SELECT provider, COUNT(*) as count, SUM(message_count) as messages
922 FROM sessions
923 GROUP BY provider
924 ORDER BY count DESC",
925 )?;
926
927 let provider_stats: Vec<(String, i64, i64)> = stmt
928 .query_map([], |row| {
929 Ok((
930 row.get(0)?,
931 row.get(1)?,
932 row.get::<_, Option<i64>>(2)?.unwrap_or(0),
933 ))
934 })?
935 .filter_map(|r| r.ok())
936 .collect();
937
938 let total_sessions: i64 = conn
940 .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
941 .unwrap_or(0);
942
943 let total_messages: i64 = conn
944 .query_row(
945 "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
946 [],
947 |row| row.get(0),
948 )
949 .unwrap_or(0);
950
951 let last_harvest: Option<i64> = conn
953 .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
954 row.get(0)
955 })
956 .ok()
957 .flatten();
958
959 let oldest: Option<i64> = conn
961 .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
962 .ok()
963 .flatten();
964
965 let newest: Option<i64> = conn
966 .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
967 .ok()
968 .flatten();
969
970 println!("{} Database: {}", "[*]".blue(), db_path.display());
971 println!(
972 "{} {}: {}",
973 "[*]".blue(),
974 "Total Sessions".truecolor(171, 157, 242),
975 total_sessions.to_string().truecolor(252, 152, 103)
976 );
977 println!(
978 "{} {}: {}",
979 "[*]".blue(),
980 "Total Messages".truecolor(171, 157, 242),
981 total_messages.to_string().truecolor(252, 152, 103)
982 );
983
984 if let Some(ts) = last_harvest {
985 let dt = DateTime::from_timestamp_millis(ts)
986 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
987 .unwrap_or_else(|| "Unknown".to_string());
988 println!("{} Last Harvest: {}", "[*]".blue(), dt);
989 }
990
991 if let (Some(old), Some(new)) = (oldest, newest) {
992 let old_dt = DateTime::from_timestamp_millis(old)
993 .map(|d| d.format("%Y-%m-%d").to_string())
994 .unwrap_or_default();
995 let new_dt = DateTime::from_timestamp_millis(new)
996 .map(|d| d.format("%Y-%m-%d").to_string())
997 .unwrap_or_default();
998 println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
999 }
1000
1001 if !provider_stats.is_empty() {
1002 println!("\n{} Sessions by Provider:", "[*]".blue());
1003 for (provider, count, messages) in &provider_stats {
1004 println!(
1005 " {} {}: {} sessions, {} messages",
1006 "[+]".green(),
1007 provider.bold(),
1008 count.to_string().cyan(),
1009 messages.to_string().dimmed()
1010 );
1011 }
1012 }
1013
1014 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1016 if db_dir.join(".git").exists()
1017 || db_path
1018 .parent()
1019 .map(|p| p.join(".git").exists())
1020 .unwrap_or(false)
1021 {
1022 println!("\n{} Git Status:", "[*]".blue());
1023
1024 let output = Command::new("git")
1025 .current_dir(db_dir)
1026 .args([
1027 "status",
1028 "--porcelain",
1029 db_path.file_name().unwrap().to_str().unwrap(),
1030 ])
1031 .output();
1032
1033 match output {
1034 Ok(out) => {
1035 let status = String::from_utf8_lossy(&out.stdout);
1036 if status.is_empty() {
1037 println!(" {} No uncommitted changes", "[+]".green());
1038 } else {
1039 println!(" {} Uncommitted changes detected", "[!]".yellow());
1040 println!(" Run 'csm harvest git commit' to save changes");
1041 }
1042 }
1043 Err(_) => {
1044 println!(" {} Unable to check git status", "[!]".yellow());
1045 }
1046 }
1047 } else {
1048 println!("\n{} Git tracking not enabled", "[i]".dimmed());
1049 println!(" Run 'csm harvest git init' to enable version tracking");
1050 }
1051
1052 Ok(())
1053}
1054
1055pub fn harvest_list(
1057 path: Option<&str>,
1058 provider: Option<&str>,
1059 limit: usize,
1060 search: Option<&str>,
1061) -> Result<()> {
1062 let db_path = get_db_path(path)?;
1063
1064 if !db_path.exists() {
1065 println!(
1066 "{} Database not found: {}",
1067 "[!]".yellow(),
1068 db_path.display()
1069 );
1070 return Ok(());
1071 }
1072
1073 let conn = Connection::open(&db_path)?;
1074
1075 let mut query = String::from(
1076 "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name
1077 FROM sessions WHERE 1=1",
1078 );
1079 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1080
1081 if let Some(p) = provider {
1082 query.push_str(" AND LOWER(provider) LIKE ?");
1083 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1084 }
1085
1086 if let Some(s) = search {
1087 query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1088 let pattern = format!("%{}%", s.to_lowercase());
1089 params_vec.push(Box::new(pattern.clone()));
1090 params_vec.push(Box::new(pattern));
1091 }
1092
1093 query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1094 params_vec.push(Box::new(limit as i64));
1095
1096 let mut stmt = conn.prepare(&query)?;
1097
1098 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1100
1101 let sessions: Vec<HarvestQueryResult> = stmt
1102 .query_map(params_slice.as_slice(), |row| {
1103 Ok((
1104 row.get(0)?,
1105 row.get(1)?,
1106 row.get(2)?,
1107 row.get(3)?,
1108 row.get(4)?,
1109 row.get(5)?,
1110 row.get(6)?,
1111 ))
1112 })?
1113 .filter_map(|r| r.ok())
1114 .collect();
1115
1116 println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1117 println!("{}", "=".repeat(60));
1118
1119 if sessions.is_empty() {
1120 println!("{} No sessions found", "[i]".dimmed());
1121 return Ok(());
1122 }
1123
1124 for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1125 let date = DateTime::from_timestamp_millis(*updated)
1126 .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1127 .unwrap_or_default();
1128
1129 println!("\n{} {}", "[S]".cyan(), title.bold());
1130 println!(" ID: {}", &id[..16.min(id.len())].dimmed());
1131 println!(
1132 " Provider: {} | Messages: {} | Updated: {}",
1133 prov.cyan(),
1134 msg_count.to_string().green(),
1135 date.dimmed()
1136 );
1137 if let Some(ws) = ws_name {
1138 println!(" Workspace: {}", ws.dimmed());
1139 }
1140 }
1141
1142 println!(
1143 "\n{} Showing {} of available sessions",
1144 "[i]".dimmed(),
1145 sessions.len()
1146 );
1147
1148 Ok(())
1149}
1150
1151pub fn harvest_export(
1153 path: Option<&str>,
1154 output: &str,
1155 format: &str,
1156 provider: Option<&str>,
1157 session_ids: Option<&[String]>,
1158) -> Result<()> {
1159 let db_path = get_db_path(path)?;
1160 let output_path = PathBuf::from(output);
1161
1162 if !db_path.exists() {
1163 anyhow::bail!("Database not found: {}", db_path.display());
1164 }
1165
1166 let conn = Connection::open(&db_path)?;
1167
1168 println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1169 println!("{}", "=".repeat(60));
1170
1171 let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1173 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1174
1175 if let Some(p) = provider {
1176 query.push_str(" AND LOWER(provider) LIKE ?");
1177 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1178 }
1179
1180 if let Some(ids) = session_ids {
1181 let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1182 query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1183 for id in ids {
1184 params_vec.push(Box::new(id.clone()));
1185 }
1186 }
1187
1188 let mut stmt = conn.prepare(&query)?;
1189 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1190
1191 let sessions: Vec<String> = stmt
1192 .query_map(params_slice.as_slice(), |row| row.get(0))?
1193 .filter_map(|r| r.ok())
1194 .collect();
1195
1196 if sessions.is_empty() {
1197 println!("{} No sessions to export", "[i]".dimmed());
1198 return Ok(());
1199 }
1200
1201 if let Some(parent) = output_path.parent() {
1203 if !parent.exists() {
1204 fs::create_dir_all(parent)?;
1205 }
1206 }
1207
1208 match format.to_lowercase().as_str() {
1209 "json" => {
1210 let parsed: Vec<serde_json::Value> = sessions
1212 .iter()
1213 .filter_map(|s| serde_json::from_str(s).ok())
1214 .collect();
1215 let json_output = serde_json::to_string_pretty(&parsed)?;
1216 fs::write(&output_path, json_output)?;
1217 }
1218 "jsonl" => {
1219 let content: String = sessions
1221 .iter()
1222 .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1223 .map(|v| serde_json::to_string(&v).unwrap_or_default())
1224 .collect::<Vec<_>>()
1225 .join("\n");
1226 fs::write(&output_path, content)?;
1227 }
1228 "md" | "markdown" => {
1229 let mut md_content = String::from("# Chat Sessions Export\n\n");
1231 md_content.push_str(&format!(
1232 "Exported: {}\n\n",
1233 Utc::now().format("%Y-%m-%d %H:%M:%S")
1234 ));
1235
1236 for session_json in &sessions {
1237 if let Ok(session) = parse_session_json(session_json) {
1238 md_content.push_str(&format!("## {}\n\n", session.title()));
1239 md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1240
1241 for request in &session.requests {
1242 if let Some(msg) = &request.message {
1243 if let Some(text) = &msg.text {
1244 md_content.push_str(&format!("### User\n\n{}\n\n", text));
1245 }
1246 }
1247 if let Some(response) = &request.response {
1249 let response_text = response
1250 .get("text")
1251 .and_then(|v| v.as_str())
1252 .or_else(|| response.get("content").and_then(|v| v.as_str()))
1253 .or_else(|| {
1254 response
1256 .get("value")
1257 .and_then(|v| v.as_array())
1258 .and_then(|arr| arr.first())
1259 .and_then(|v| v.get("value"))
1260 .and_then(|v| v.as_str())
1261 });
1262
1263 if let Some(text) = response_text {
1264 md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1265 }
1266 }
1267 }
1268 md_content.push_str("---\n\n");
1269 }
1270 }
1271 fs::write(&output_path, md_content)?;
1272 }
1273 _ => {
1274 anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1275 }
1276 }
1277
1278 println!(
1279 "{} Exported {} sessions to {}",
1280 "[+]".green(),
1281 sessions.len().to_string().cyan(),
1282 output_path.display()
1283 );
1284
1285 Ok(())
1286}
1287
1288pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1290 let db_path = get_db_path(path)?;
1291 init_git_tracking(&db_path)
1292}
1293
1294pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1295 let db_path = get_db_path(path)?;
1296 let msg = message.unwrap_or("Update harvest database");
1297 git_commit_harvest(&db_path, msg)
1298}
1299
1300pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1301 let db_path = get_db_path(path)?;
1302 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1303 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1304
1305 println!("\n{} Harvest Git History", "[H]".magenta().bold());
1306 println!("{}", "=".repeat(60));
1307
1308 let output = Command::new("git")
1309 .current_dir(db_dir)
1310 .args([
1311 "log",
1312 "--oneline",
1313 &format!("-{}", count),
1314 "--follow",
1315 "--",
1316 db_name,
1317 ])
1318 .output()
1319 .context("Failed to run git log")?;
1320
1321 if !output.status.success() {
1322 let stderr = String::from_utf8_lossy(&output.stderr);
1323 if stderr.contains("does not have any commits") {
1324 println!("{} No commits yet", "[i]".dimmed());
1325 } else {
1326 anyhow::bail!("Git log failed: {}", stderr);
1327 }
1328 return Ok(());
1329 }
1330
1331 let log = String::from_utf8_lossy(&output.stdout);
1332 if log.trim().is_empty() {
1333 println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1334 } else {
1335 for line in log.lines() {
1336 let parts: Vec<&str> = line.splitn(2, ' ').collect();
1337 if parts.len() == 2 {
1338 println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1339 } else {
1340 println!("{}", line);
1341 }
1342 }
1343 }
1344
1345 Ok(())
1346}
1347
1348pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1349 let db_path = get_db_path(path)?;
1350 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1351 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1352
1353 println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1354 println!("{}", "=".repeat(60));
1355
1356 let mut args = vec!["diff", "--stat"];
1357 if let Some(c) = commit {
1358 args.push(c);
1359 }
1360 args.push("--");
1361 args.push(db_name);
1362
1363 let output = Command::new("git")
1364 .current_dir(db_dir)
1365 .args(&args)
1366 .output()
1367 .context("Failed to run git diff")?;
1368
1369 let diff = String::from_utf8_lossy(&output.stdout);
1370 if diff.trim().is_empty() {
1371 println!("{} No changes", "[+]".green());
1372 } else {
1373 println!("{}", diff);
1374 }
1375
1376 Ok(())
1377}
1378
1379pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1380 let db_path = get_db_path(path)?;
1381 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1382 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1383
1384 println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1385 println!("{}", "=".repeat(60));
1386
1387 let backup_path = db_path.with_extension("db.backup");
1389 if db_path.exists() {
1390 fs::copy(&db_path, &backup_path)?;
1391 println!(
1392 "{} Created backup: {}",
1393 "[+]".green(),
1394 backup_path.display()
1395 );
1396 }
1397
1398 let output = Command::new("git")
1400 .current_dir(db_dir)
1401 .args(["checkout", commit, "--", db_name])
1402 .output()
1403 .context("Failed to run git checkout")?;
1404
1405 if !output.status.success() {
1406 let stderr = String::from_utf8_lossy(&output.stderr);
1407 anyhow::bail!("Git restore failed: {}", stderr);
1408 }
1409
1410 println!(
1411 "{} Restored database from commit: {}",
1412 "[+]".green(),
1413 commit
1414 );
1415
1416 Ok(())
1417}
1418
1419fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1424 if let Some(p) = path {
1425 return Ok(PathBuf::from(p));
1426 }
1427
1428 if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1430 return Ok(PathBuf::from(p));
1431 }
1432
1433 Ok(std::env::current_dir()?.join("chat_sessions.db"))
1435}
1436
1437fn create_harvest_database(path: &Path) -> Result<()> {
1438 let conn = Connection::open(path)?;
1439
1440 conn.execute_batch(
1442 "PRAGMA page_size = 8192;
1443 PRAGMA journal_mode = WAL;
1444 PRAGMA synchronous = NORMAL;",
1445 )?;
1446
1447 conn.execute_batch(
1448 r#"
1449 -- Sessions table (original harvest format)
1450 CREATE TABLE IF NOT EXISTS sessions (
1451 id TEXT PRIMARY KEY,
1452 provider TEXT NOT NULL,
1453 provider_type TEXT,
1454 provider_version TEXT,
1455 schema_version INTEGER DEFAULT 3,
1456 file_format TEXT DEFAULT 'json',
1457 workspace_id TEXT,
1458 workspace_name TEXT,
1459 workspace_path TEXT,
1460 title TEXT NOT NULL,
1461 message_count INTEGER DEFAULT 0,
1462 created_at INTEGER NOT NULL,
1463 updated_at INTEGER NOT NULL,
1464 harvested_at INTEGER NOT NULL,
1465 session_json TEXT NOT NULL
1466 );
1467
1468 CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1469 CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1470 CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1471 CREATE INDEX IF NOT EXISTS idx_sessions_workspace_path ON sessions(workspace_path);
1472
1473 -- Enhanced messages table with raw markdown and metadata
1474 CREATE TABLE IF NOT EXISTS messages_v2 (
1475 id INTEGER PRIMARY KEY AUTOINCREMENT,
1476 session_id TEXT NOT NULL,
1477 message_index INTEGER NOT NULL,
1478 request_id TEXT,
1479 response_id TEXT,
1480 role TEXT NOT NULL,
1481 content_raw TEXT NOT NULL,
1482 content_markdown TEXT,
1483 model_id TEXT,
1484 timestamp INTEGER,
1485 is_canceled INTEGER DEFAULT 0,
1486 metadata_json TEXT,
1487 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1488 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1489 UNIQUE(session_id, message_index, role)
1490 );
1491
1492 CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1493 CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1494 CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1495
1496 -- Tool invocations within messages (file edits, terminal commands, etc.)
1497 CREATE TABLE IF NOT EXISTS tool_invocations (
1498 id INTEGER PRIMARY KEY AUTOINCREMENT,
1499 message_id INTEGER NOT NULL,
1500 session_id TEXT NOT NULL,
1501 tool_name TEXT NOT NULL,
1502 tool_call_id TEXT,
1503 invocation_index INTEGER DEFAULT 0,
1504 input_json TEXT,
1505 output_json TEXT,
1506 status TEXT DEFAULT 'pending',
1507 is_confirmed INTEGER DEFAULT 0,
1508 timestamp INTEGER,
1509 FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1510 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1511 );
1512
1513 CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1514 CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1515 CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1516
1517 -- File changes/diffs associated with tool invocations
1518 CREATE TABLE IF NOT EXISTS file_changes (
1519 id INTEGER PRIMARY KEY AUTOINCREMENT,
1520 tool_invocation_id INTEGER,
1521 session_id TEXT NOT NULL,
1522 message_index INTEGER,
1523 file_path TEXT NOT NULL,
1524 change_type TEXT NOT NULL,
1525 old_content TEXT,
1526 new_content TEXT,
1527 diff_unified TEXT,
1528 line_start INTEGER,
1529 line_end INTEGER,
1530 timestamp INTEGER,
1531 FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1532 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1533 );
1534
1535 CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1536 CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1537 CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1538
1539 -- Message-level checkpoints for versioning
1540 CREATE TABLE IF NOT EXISTS message_checkpoints (
1541 id INTEGER PRIMARY KEY AUTOINCREMENT,
1542 session_id TEXT NOT NULL,
1543 message_index INTEGER NOT NULL,
1544 checkpoint_number INTEGER NOT NULL,
1545 content_hash TEXT NOT NULL,
1546 snapshot_json TEXT,
1547 file_state_json TEXT,
1548 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1549 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1550 UNIQUE(session_id, message_index, checkpoint_number)
1551 );
1552
1553 CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1554 CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1555
1556 -- Messages table for full-text search
1557 CREATE TABLE IF NOT EXISTS messages (
1558 id INTEGER PRIMARY KEY AUTOINCREMENT,
1559 session_id TEXT NOT NULL,
1560 message_index INTEGER NOT NULL,
1561 role TEXT NOT NULL,
1562 content TEXT NOT NULL,
1563 created_at INTEGER,
1564 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1565 UNIQUE(session_id, message_index)
1566 );
1567
1568 CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1569 CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1570
1571 -- Checkpoints for version tracking
1572 CREATE TABLE IF NOT EXISTS checkpoints (
1573 id INTEGER PRIMARY KEY AUTOINCREMENT,
1574 session_id TEXT NOT NULL,
1575 checkpoint_number INTEGER NOT NULL,
1576 message TEXT,
1577 message_count INTEGER NOT NULL,
1578 content_hash TEXT NOT NULL,
1579 snapshot TEXT,
1580 created_at TEXT NOT NULL DEFAULT (datetime('now')),
1581 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1582 UNIQUE(session_id, checkpoint_number)
1583 );
1584
1585 CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1586
1587 -- Share links for importing shared conversations
1588 CREATE TABLE IF NOT EXISTS share_links (
1589 id TEXT PRIMARY KEY,
1590 session_id TEXT,
1591 provider TEXT NOT NULL,
1592 url TEXT NOT NULL UNIQUE,
1593 share_id TEXT NOT NULL,
1594 title TEXT,
1595 imported INTEGER DEFAULT 0,
1596 imported_at INTEGER,
1597 created_at INTEGER NOT NULL,
1598 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1599 );
1600
1601 CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1602 CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1603
1604 -- Harvest metadata
1605 CREATE TABLE IF NOT EXISTS harvest_metadata (
1606 key TEXT PRIMARY KEY,
1607 value TEXT NOT NULL
1608 );
1609
1610 INSERT OR REPLACE INTO harvest_metadata (key, value)
1611 VALUES ('version', '2.1'),
1612 ('created_at', datetime('now'));
1613
1614 -- Full-text search for messages (standalone FTS table)
1615 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1616 content_raw
1617 );
1618
1619 -- Triggers to keep FTS index in sync with messages_v2
1620 -- Note: messages_fts is a standalone FTS5 table, so use regular DELETE (not the
1621 -- external-content 'delete' command which causes SQL logic errors on standalone tables)
1622 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1623 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1624 END;
1625
1626 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1627 DELETE FROM messages_fts WHERE rowid = old.id;
1628 END;
1629
1630 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1631 DELETE FROM messages_fts WHERE rowid = old.id;
1632 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1633 END;
1634 "#,
1635 )?;
1636
1637 Ok(())
1638}
1639
1640fn insert_or_update_session(
1641 conn: &Connection,
1642 session: &ChatSession,
1643 provider: &str,
1644 workspace_id: Option<&str>,
1645 workspace_name: Option<&str>,
1646 provider_version: Option<&str>,
1647 schema_version: u32,
1648 file_format: &str,
1649 workspace_path: Option<&str>,
1650) -> Result<bool> {
1651 let session_id = session
1652 .session_id
1653 .clone()
1654 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1655
1656 let now = Utc::now().timestamp_millis();
1657 let session_json = serde_json::to_string(session)?;
1658
1659 let existing: Option<i64> = conn
1661 .query_row(
1662 "SELECT updated_at FROM sessions WHERE id = ?",
1663 [&session_id],
1664 |row| row.get(0),
1665 )
1666 .ok();
1667
1668 let updated = existing.is_some();
1669
1670 conn.execute(
1671 r#"
1672 INSERT OR REPLACE INTO sessions
1673 (id, provider, provider_type, workspace_id, workspace_name, title,
1674 message_count, created_at, updated_at, harvested_at, session_json,
1675 provider_version, schema_version, file_format, workspace_path)
1676 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1677 "#,
1678 params![
1679 session_id,
1680 provider,
1681 provider,
1682 workspace_id,
1683 workspace_name,
1684 session.title(),
1685 session.request_count() as i64,
1686 session.creation_date,
1687 session.last_message_date,
1688 now,
1689 session_json,
1690 provider_version,
1691 schema_version as i64,
1692 file_format,
1693 workspace_path,
1694 ],
1695 )?;
1696
1697 populate_enhanced_messages(conn, &session_id, session)?;
1699
1700 Ok(updated)
1701}
1702
1703fn populate_enhanced_messages(
1705 conn: &Connection,
1706 session_id: &str,
1707 session: &ChatSession,
1708) -> Result<()> {
1709 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1711 conn.execute(
1712 "DELETE FROM tool_invocations WHERE session_id = ?",
1713 [session_id],
1714 )?;
1715 conn.execute(
1716 "DELETE FROM file_changes WHERE session_id = ?",
1717 [session_id],
1718 )?;
1719
1720 for (idx, request) in session.requests.iter().enumerate() {
1721 let timestamp = request.timestamp;
1722 let request_id = request.request_id.as_deref();
1723 let response_id = request.response_id.as_deref();
1724 let model_id = request.model_id.as_deref();
1725 let is_canceled = request.is_canceled.unwrap_or(false);
1726
1727 if let Some(ref message) = request.message {
1729 let content = message.text.clone().unwrap_or_default();
1730 if !content.is_empty() {
1731 let metadata = serde_json::json!({
1732 "variable_data": request.variable_data,
1733 });
1734
1735 conn.execute(
1736 r#"
1737 INSERT OR REPLACE INTO messages_v2
1738 (session_id, message_index, request_id, response_id, role,
1739 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1740 VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1741 "#,
1742 params![
1743 session_id,
1744 (idx * 2) as i64,
1745 request_id,
1746 response_id,
1747 &content,
1748 &content, model_id,
1750 timestamp,
1751 serde_json::to_string(&metadata).ok(),
1752 ],
1753 )?;
1754 }
1755 }
1756
1757 if let Some(ref response) = request.response {
1759 let (content, tool_invocations) = extract_response_content_and_tools(response);
1760
1761 if !content.is_empty() || !tool_invocations.is_empty() {
1762 let metadata = serde_json::json!({
1763 "content_references": request.content_references,
1764 "code_citations": request.code_citations,
1765 "response_markdown_info": request.response_markdown_info,
1766 });
1767
1768 conn.execute(
1769 r#"
1770 INSERT OR REPLACE INTO messages_v2
1771 (session_id, message_index, request_id, response_id, role,
1772 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1773 VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1774 "#,
1775 params![
1776 session_id,
1777 (idx * 2 + 1) as i64,
1778 request_id,
1779 response_id,
1780 &content,
1781 &content,
1782 model_id,
1783 timestamp,
1784 is_canceled as i64,
1785 serde_json::to_string(&metadata).ok(),
1786 ],
1787 )?;
1788
1789 let message_id: i64 = conn.last_insert_rowid();
1791
1792 for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1794 insert_tool_invocation(
1795 conn, message_id, session_id, inv_idx, invocation, timestamp,
1796 )?;
1797 }
1798 }
1799 }
1800 }
1801
1802 Ok(())
1803}
1804
1805fn extract_response_content_and_tools(
1807 response: &serde_json::Value,
1808) -> (String, Vec<serde_json::Value>) {
1809 let mut text_parts = Vec::new();
1810 let mut tool_invocations = Vec::new();
1811
1812 if let Some(items) = response.as_array() {
1813 for item in items {
1814 let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1815
1816 match kind {
1817 "toolInvocationSerialized" => {
1818 tool_invocations.push(item.clone());
1819 }
1820 "thinking" => {
1821 continue;
1823 }
1824 _ => {
1825 if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1826 let trimmed = value.trim();
1828 if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1829 text_parts.push(value.to_string());
1830 }
1831 }
1832 }
1833 }
1834 }
1835 }
1836
1837 (text_parts.join("\n\n"), tool_invocations)
1838}
1839
1840fn insert_tool_invocation(
1842 conn: &Connection,
1843 message_id: i64,
1844 session_id: &str,
1845 inv_idx: usize,
1846 invocation: &serde_json::Value,
1847 timestamp: Option<i64>,
1848) -> Result<()> {
1849 let tool_name = invocation
1850 .get("toolId")
1851 .and_then(|t| t.as_str())
1852 .unwrap_or("unknown");
1853 let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1854 let is_complete = invocation
1855 .get("isComplete")
1856 .and_then(|c| c.as_bool())
1857 .unwrap_or(false);
1858 let is_confirmed = invocation.get("isConfirmed");
1859 let tool_data = invocation.get("toolSpecificData");
1860
1861 let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1862 let status = if is_complete { "complete" } else { "pending" };
1863 let confirmed = match is_confirmed {
1864 Some(v) => v
1865 .get("type")
1866 .and_then(|t| t.as_i64())
1867 .map(|t| t > 0)
1868 .unwrap_or(false),
1869 None => false,
1870 };
1871
1872 conn.execute(
1873 r#"
1874 INSERT INTO tool_invocations
1875 (message_id, session_id, tool_name, tool_call_id, invocation_index,
1876 input_json, status, is_confirmed, timestamp)
1877 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1878 "#,
1879 params![
1880 message_id,
1881 session_id,
1882 tool_name,
1883 tool_call_id,
1884 inv_idx as i64,
1885 input_json,
1886 status,
1887 confirmed as i64,
1888 timestamp,
1889 ],
1890 )?;
1891
1892 let tool_invocation_id = conn.last_insert_rowid();
1893
1894 if let Some(data) = tool_data {
1896 insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1897 }
1898
1899 Ok(())
1900}
1901
1902fn insert_file_changes(
1904 conn: &Connection,
1905 tool_invocation_id: i64,
1906 session_id: &str,
1907 tool_data: &serde_json::Value,
1908 timestamp: Option<i64>,
1909) -> Result<()> {
1910 let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1911
1912 match kind {
1913 "terminal" => {
1914 if let Some(cmd_line) = tool_data.get("commandLine") {
1916 let original = cmd_line.get("original").and_then(|o| o.as_str());
1917 let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1918 let output = tool_data
1919 .get("terminalCommandOutput")
1920 .map(|o| serde_json::to_string(o).unwrap_or_default());
1921
1922 conn.execute(
1923 r#"
1924 INSERT INTO file_changes
1925 (tool_invocation_id, session_id, file_path, change_type,
1926 old_content, new_content, diff_unified, timestamp)
1927 VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1928 "#,
1929 params![
1930 tool_invocation_id,
1931 session_id,
1932 original,
1933 edited.or(original),
1934 output,
1935 timestamp,
1936 ],
1937 )?;
1938 }
1939 }
1940 "replaceFile" | "editFile" => {
1941 let file_path = tool_data
1943 .get("uri")
1944 .or_else(|| tool_data.get("filePath"))
1945 .and_then(|p| p.as_str())
1946 .unwrap_or("[unknown]");
1947 let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1948 let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1949
1950 let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1952 Some(generate_unified_diff(old, new, file_path))
1953 } else {
1954 None
1955 };
1956
1957 conn.execute(
1958 r#"
1959 INSERT INTO file_changes
1960 (tool_invocation_id, session_id, file_path, change_type,
1961 old_content, new_content, diff_unified, timestamp)
1962 VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1963 "#,
1964 params![
1965 tool_invocation_id,
1966 session_id,
1967 file_path,
1968 old_string,
1969 new_string,
1970 diff,
1971 timestamp,
1972 ],
1973 )?;
1974 }
1975 "createFile" => {
1976 let file_path = tool_data
1977 .get("uri")
1978 .or_else(|| tool_data.get("filePath"))
1979 .and_then(|p| p.as_str())
1980 .unwrap_or("[unknown]");
1981 let content = tool_data.get("content").and_then(|c| c.as_str());
1982
1983 conn.execute(
1984 r#"
1985 INSERT INTO file_changes
1986 (tool_invocation_id, session_id, file_path, change_type,
1987 new_content, timestamp)
1988 VALUES (?, ?, ?, 'create', ?, ?)
1989 "#,
1990 params![
1991 tool_invocation_id,
1992 session_id,
1993 file_path,
1994 content,
1995 timestamp,
1996 ],
1997 )?;
1998 }
1999 "readFile" => {
2000 let file_path = tool_data
2001 .get("uri")
2002 .or_else(|| tool_data.get("filePath"))
2003 .and_then(|p| p.as_str())
2004 .unwrap_or("[unknown]");
2005
2006 conn.execute(
2007 r#"
2008 INSERT INTO file_changes
2009 (tool_invocation_id, session_id, file_path, change_type, timestamp)
2010 VALUES (?, ?, ?, 'read', ?)
2011 "#,
2012 params![tool_invocation_id, session_id, file_path, timestamp,],
2013 )?;
2014 }
2015 _ => {
2016 if !kind.is_empty() {
2018 let data_json = serde_json::to_string(tool_data).ok();
2019 conn.execute(
2020 r#"
2021 INSERT INTO file_changes
2022 (tool_invocation_id, session_id, file_path, change_type,
2023 diff_unified, timestamp)
2024 VALUES (?, ?, ?, ?, ?, ?)
2025 "#,
2026 params![
2027 tool_invocation_id,
2028 session_id,
2029 format!("[{}]", kind),
2030 kind,
2031 data_json,
2032 timestamp,
2033 ],
2034 )?;
2035 }
2036 }
2037 }
2038
2039 Ok(())
2040}
2041
2042fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
2044 let old_lines: Vec<&str> = old.lines().collect();
2045 let new_lines: Vec<&str> = new.lines().collect();
2046
2047 let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
2048
2049 let max_lines = old_lines.len().max(new_lines.len());
2051 let mut in_hunk = false;
2052 let mut hunk_start = 0;
2053 let mut hunk_lines = Vec::new();
2054
2055 for i in 0..max_lines {
2056 let old_line = old_lines.get(i).copied();
2057 let new_line = new_lines.get(i).copied();
2058
2059 match (old_line, new_line) {
2060 (Some(o), Some(n)) if o == n => {
2061 if in_hunk {
2062 hunk_lines.push(format!(" {}", o));
2063 }
2064 }
2065 (Some(o), Some(n)) => {
2066 if !in_hunk {
2067 in_hunk = true;
2068 hunk_start = i + 1;
2069 }
2070 hunk_lines.push(format!("-{}", o));
2071 hunk_lines.push(format!("+{}", n));
2072 }
2073 (Some(o), None) => {
2074 if !in_hunk {
2075 in_hunk = true;
2076 hunk_start = i + 1;
2077 }
2078 hunk_lines.push(format!("-{}", o));
2079 }
2080 (None, Some(n)) => {
2081 if !in_hunk {
2082 in_hunk = true;
2083 hunk_start = i + 1;
2084 }
2085 hunk_lines.push(format!("+{}", n));
2086 }
2087 (None, None) => break,
2088 }
2089 }
2090
2091 if !hunk_lines.is_empty() {
2092 diff.push_str(&format!(
2093 "@@ -{},{} +{},{} @@\n",
2094 hunk_start,
2095 old_lines.len(),
2096 hunk_start,
2097 new_lines.len()
2098 ));
2099 for line in hunk_lines {
2100 diff.push_str(&line);
2101 diff.push('\n');
2102 }
2103 }
2104
2105 diff
2106}
2107
2108fn harvest_web_providers(
2110 conn: &Connection,
2111 stats: &mut HarvestStats,
2112 include_providers: &[String],
2113 exclude_providers: &[String],
2114) -> Result<()> {
2115 use crate::browser::extract_provider_cookies;
2116
2117 println!("\n{} Harvesting from web providers...", "[*]".blue());
2118
2119 let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2121 ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2122 ("Claude", "claude", "sessionKey"),
2123 ];
2124
2125 let mut web_sessions_harvested = 0;
2126
2127 for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2128 if !include_providers.is_empty()
2130 && !include_providers
2131 .iter()
2132 .any(|p| p.eq_ignore_ascii_case(provider_key))
2133 {
2134 continue;
2135 }
2136 if exclude_providers
2137 .iter()
2138 .any(|p| p.eq_ignore_ascii_case(provider_key))
2139 {
2140 continue;
2141 }
2142
2143 print!(" {} Checking {} ... ", "[-]".yellow(), display_name);
2144
2145 if let Some(creds) = extract_provider_cookies(provider_key) {
2147 if let Some(session_token) = &creds.session_token {
2148 println!("{}", "authenticated".green());
2149
2150 let result = match *provider_key {
2152 "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2153 "claude" => harvest_claude_sessions(conn, session_token, stats),
2154 _ => Ok(0),
2155 };
2156
2157 match result {
2158 Ok(count) => {
2159 if count > 0 {
2160 println!(
2161 " {} Harvested {} sessions from {}",
2162 "[+]".green(),
2163 count.to_string().cyan(),
2164 display_name
2165 );
2166 web_sessions_harvested += count;
2167 }
2168 }
2169 Err(e) => {
2170 println!(
2171 " {} Failed to harvest {}: {:?}",
2172 "[!]".red(),
2173 display_name,
2174 e
2175 );
2176 }
2177 }
2178 } else {
2179 println!("{}", "no session token".yellow());
2180 }
2181 } else {
2182 println!("{}", "not authenticated".yellow());
2183 }
2184 }
2185
2186 if web_sessions_harvested > 0 {
2187 println!(
2188 " {} Total web sessions harvested: {}",
2189 "[+]".green(),
2190 web_sessions_harvested.to_string().cyan()
2191 );
2192 }
2193
2194 Ok(())
2195}
2196
2197fn harvest_chatgpt_sessions(
2199 conn: &Connection,
2200 session_token: &str,
2201 stats: &mut HarvestStats,
2202) -> Result<usize> {
2203 use crate::providers::cloud::chatgpt::ChatGPTProvider;
2204 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2205
2206 let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2207
2208 let options = FetchOptions {
2210 limit: Some(100),
2211 include_archived: false,
2212 after: None,
2213 before: None,
2214 session_token: Some(session_token.to_string()),
2215 };
2216
2217 let conversations = provider
2218 .list_conversations(&options)
2219 .context("Failed to list ChatGPT conversations")?;
2220
2221 let mut harvested = 0;
2222
2223 for conv_summary in conversations {
2224 let exists: bool = conn
2226 .query_row(
2227 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2228 params![&conv_summary.id],
2229 |row| row.get(0),
2230 )
2231 .unwrap_or(false);
2232
2233 if exists {
2234 continue;
2236 }
2237
2238 match provider.fetch_conversation(&conv_summary.id) {
2240 Ok(conv) => {
2241 if let Err(e) =
2243 insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2244 {
2245 eprintln!("Failed to insert ChatGPT session: {}", e);
2246 continue;
2247 }
2248 harvested += 1;
2249 stats.sessions_added += 1;
2250 }
2251 Err(e) => {
2252 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2253 }
2254 }
2255
2256 std::thread::sleep(std::time::Duration::from_millis(100));
2258 }
2259
2260 Ok(harvested)
2261}
2262
2263fn harvest_claude_sessions(
2265 conn: &Connection,
2266 session_token: &str,
2267 stats: &mut HarvestStats,
2268) -> Result<usize> {
2269 use crate::providers::cloud::anthropic::AnthropicProvider;
2270 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2271
2272 let provider = AnthropicProvider::with_session_token(session_token.to_string());
2273
2274 let options = FetchOptions {
2276 limit: Some(100),
2277 include_archived: false,
2278 after: None,
2279 before: None,
2280 session_token: Some(session_token.to_string()),
2281 };
2282
2283 let conversations = provider
2284 .list_conversations(&options)
2285 .context("Failed to list Claude conversations")?;
2286
2287 let mut harvested = 0;
2288
2289 for conv_summary in conversations {
2290 let exists: bool = conn
2292 .query_row(
2293 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2294 params![&conv_summary.id],
2295 |row| row.get(0),
2296 )
2297 .unwrap_or(false);
2298
2299 if exists {
2300 continue;
2301 }
2302
2303 match provider.fetch_conversation(&conv_summary.id) {
2305 Ok(conv) => {
2306 if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2307 {
2308 eprintln!("Failed to insert Claude session: {}", e);
2309 continue;
2310 }
2311 harvested += 1;
2312 stats.sessions_added += 1;
2313 }
2314 Err(e) => {
2315 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2316 }
2317 }
2318
2319 std::thread::sleep(std::time::Duration::from_millis(100));
2320 }
2321
2322 Ok(harvested)
2323}
2324
2325fn insert_cloud_conversation_to_harvest_db(
2327 conn: &Connection,
2328 conv: &crate::providers::cloud::common::CloudConversation,
2329 provider: &str,
2330 workspace_name: Option<&str>,
2331) -> Result<()> {
2332 let now = Utc::now().timestamp_millis();
2333 let created_at = conv.created_at.timestamp_millis();
2334 let updated_at = conv
2335 .updated_at
2336 .map(|dt| dt.timestamp_millis())
2337 .unwrap_or(created_at);
2338
2339 let session_json = serde_json::json!({
2341 "id": conv.id,
2342 "title": conv.title,
2343 "model": conv.model,
2344 "created_at": conv.created_at.to_rfc3339(),
2345 "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2346 "messages": conv.messages.iter().map(|m| {
2347 serde_json::json!({
2348 "id": m.id,
2349 "role": m.role,
2350 "content": m.content,
2351 "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2352 "model": m.model,
2353 })
2354 }).collect::<Vec<_>>(),
2355 });
2356
2357 conn.execute(
2358 r#"
2359 INSERT OR REPLACE INTO sessions
2360 (id, provider, provider_type, workspace_id, workspace_name, title,
2361 message_count, created_at, updated_at, harvested_at, session_json)
2362 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2363 "#,
2364 params![
2365 conv.id,
2366 provider,
2367 provider,
2368 workspace_name,
2369 conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2370 conv.messages.len() as i64,
2371 created_at,
2372 updated_at,
2373 now,
2374 session_json.to_string(),
2375 ],
2376 )?;
2377
2378 populate_cloud_messages(conn, &conv.id, conv)?;
2380
2381 Ok(())
2382}
2383
2384fn populate_cloud_messages(
2386 conn: &Connection,
2387 session_id: &str,
2388 conv: &crate::providers::cloud::common::CloudConversation,
2389) -> Result<()> {
2390 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2392
2393 for (idx, message) in conv.messages.iter().enumerate() {
2394 let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2395 let role = match message.role.as_str() {
2396 "user" | "human" => "user",
2397 "assistant" => "assistant",
2398 "system" => "system",
2399 other => other,
2400 };
2401
2402 conn.execute(
2403 r#"
2404 INSERT INTO messages_v2
2405 (session_id, message_index, request_id, response_id, role,
2406 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2407 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2408 "#,
2409 params![
2410 session_id,
2411 idx as i64,
2412 message.id,
2413 role,
2414 &message.content,
2415 &message.content,
2416 message.model.as_deref(),
2417 timestamp,
2418 ],
2419 )?;
2420 }
2421
2422 Ok(())
2423}
2424
2425fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2426 conn.execute(
2427 "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2428 [],
2429 )?;
2430 Ok(())
2431}
2432
2433fn init_git_tracking(db_path: &Path) -> Result<()> {
2434 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2435 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2436
2437 println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2438
2439 if !db_dir.join(".git").exists() {
2441 let output = Command::new("git")
2442 .current_dir(db_dir)
2443 .args(["init"])
2444 .output()
2445 .context("Failed to run git init")?;
2446
2447 if !output.status.success() {
2448 anyhow::bail!(
2449 "Git init failed: {}",
2450 String::from_utf8_lossy(&output.stderr)
2451 );
2452 }
2453 println!("{} Initialized git repository", "[+]".green());
2454 } else {
2455 println!("{} Git repository already exists", "[i]".blue());
2456 }
2457
2458 let gitignore_path = db_dir.join(".gitignore");
2460 if !gitignore_path.exists() {
2461 fs::write(
2462 &gitignore_path,
2463 "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2464 )?;
2465 println!("{} Created .gitignore", "[+]".green());
2466 }
2467
2468 let output = Command::new("git")
2470 .current_dir(db_dir)
2471 .args(["add", db_name])
2472 .output()?;
2473
2474 if output.status.success() {
2475 println!("{} Added {} to git", "[+]".green(), db_name);
2476 }
2477
2478 let output = Command::new("git")
2480 .current_dir(db_dir)
2481 .args(["rev-parse", "HEAD"])
2482 .output()?;
2483
2484 if !output.status.success() {
2485 let output = Command::new("git")
2486 .current_dir(db_dir)
2487 .args(["commit", "-m", "Initialize harvest database"])
2488 .output()?;
2489
2490 if output.status.success() {
2491 println!("{} Created initial commit", "[+]".green());
2492 }
2493 }
2494
2495 println!("\n{} Git tracking enabled!", "[+]".green().bold());
2496 println!(" Run 'csm harvest git commit -m \"message\"' to save changes");
2497 println!(" Run 'csm harvest git log' to view history");
2498
2499 Ok(())
2500}
2501
2502fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2503 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2504 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2505
2506 let output = Command::new("git")
2508 .current_dir(db_dir)
2509 .args(["add", db_name])
2510 .output()
2511 .context("Failed to stage database")?;
2512
2513 if !output.status.success() {
2514 anyhow::bail!(
2515 "Git add failed: {}",
2516 String::from_utf8_lossy(&output.stderr)
2517 );
2518 }
2519
2520 let output = Command::new("git")
2522 .current_dir(db_dir)
2523 .args(["commit", "-m", message])
2524 .output()
2525 .context("Failed to commit")?;
2526
2527 if !output.status.success() {
2528 let stderr = String::from_utf8_lossy(&output.stderr);
2529 if stderr.contains("nothing to commit") {
2530 println!("{} Nothing to commit", "[i]".blue());
2531 return Ok(());
2532 }
2533 anyhow::bail!("Git commit failed: {}", stderr);
2534 }
2535
2536 let output = Command::new("git")
2538 .current_dir(db_dir)
2539 .args(["rev-parse", "--short", "HEAD"])
2540 .output()?;
2541
2542 let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2543 println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2544
2545 Ok(())
2546}
2547
2548pub fn harvest_share(
2554 db_path: Option<&str>,
2555 url: &str,
2556 name: Option<&str>,
2557 _workspace: Option<&str>,
2558) -> Result<()> {
2559 let db_path = get_db_path(db_path)?;
2560
2561 let share_info = match ShareLinkParser::parse(url) {
2563 Some(info) => info,
2564 None => {
2565 println!("{} Unrecognized share link format", "[!]".yellow());
2566 println!(" Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2567 println!(" Example URLs:");
2568 println!(" - https://chat.openai.com/share/abc123...");
2569 println!(" - https://claude.ai/share/xyz789...");
2570 anyhow::bail!("Could not parse share link URL");
2571 }
2572 };
2573
2574 println!("{}", "=".repeat(60).cyan());
2575 println!("{}", " Share Link Import ".bold().cyan());
2576 println!("{}", "=".repeat(60).cyan());
2577 println!();
2578
2579 println!(
2580 "{} Detected provider: {}",
2581 "[i]".blue(),
2582 share_info.provider.bold()
2583 );
2584 println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2585
2586 let db = ChatDatabase::open(&db_path)?;
2588
2589 let conn = db.connection();
2591 let existing: Option<(String, i64)> = conn
2592 .query_row(
2593 "SELECT id, imported FROM share_links WHERE url = ?",
2594 [url],
2595 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2596 )
2597 .ok();
2598
2599 if let Some((id, imported)) = existing {
2600 println!();
2601 println!(
2602 "{} This share link has already been registered (ID: {})",
2603 "[!]".yellow(),
2604 id
2605 );
2606
2607 let status = if imported == 1 { "imported" } else { "pending" };
2608 println!("{} Current status: {}", "[i]".blue(), status);
2609
2610 if imported == 1 {
2611 println!(
2612 "{} Session already imported - no action needed",
2613 "[+]".green()
2614 );
2615 return Ok(());
2616 }
2617 } else {
2618 let link_id = uuid::Uuid::new_v4().to_string();
2620 let now = chrono::Utc::now().timestamp();
2621 conn.execute(
2622 "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2623 params![link_id, url, share_info.provider, share_info.share_id, name, now],
2624 )?;
2625
2626 println!(
2627 "{} Registered share link (ID: {})",
2628 "[+]".green(),
2629 &link_id[..8]
2630 );
2631 }
2632
2633 println!();
2634 println!("{}", "-".repeat(60).dimmed());
2635 println!();
2636
2637 println!("{} Share link registered as pending", "[i]".blue());
2646 println!();
2647 println!("{}", "Note:".bold().yellow());
2648 println!(" Automatic content fetching from share links is not yet implemented.");
2649 println!(" For now, you can:");
2650 println!(" 1. Open the share link in your browser");
2651 println!(" 2. Export the conversation manually");
2652 println!(" 3. Import with: csm import <file>");
2653 println!();
2654 println!(" Or use 'csm harvest shares' to view pending links.");
2655
2656 Ok(())
2657}
2658
2659pub fn harvest_shares(
2661 db_path: Option<&str>,
2662 status_filter: Option<&str>,
2663 limit: usize,
2664) -> Result<()> {
2665 let db_path = get_db_path(db_path)?;
2666
2667 if !db_path.exists() {
2668 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2669 }
2670
2671 let db = ChatDatabase::open(&db_path)?;
2672 let conn = db.connection();
2673
2674 println!("{}", "=".repeat(70).cyan());
2675 println!("{}", " Share Links ".bold().cyan());
2676 println!("{}", "=".repeat(70).cyan());
2677 println!();
2678
2679 let query = match status_filter {
2680 Some("pending") => format!(
2681 "SELECT id, url, provider, share_id, title, imported, created_at
2682 FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2683 limit
2684 ),
2685 Some("imported") => format!(
2686 "SELECT id, url, provider, share_id, title, imported, created_at
2687 FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2688 limit
2689 ),
2690 Some(_) | None => format!(
2691 "SELECT id, url, provider, share_id, title, imported, created_at
2692 FROM share_links ORDER BY created_at DESC LIMIT {}",
2693 limit
2694 ),
2695 };
2696
2697 let mut stmt = conn.prepare(&query)?;
2698
2699 let rows: Vec<_> = stmt
2700 .query_map([], |row| {
2701 Ok((
2702 row.get::<_, String>(0)?,
2703 row.get::<_, String>(1)?,
2704 row.get::<_, String>(2)?,
2705 row.get::<_, Option<String>>(3)?,
2706 row.get::<_, Option<String>>(4)?,
2707 row.get::<_, i64>(5)?,
2708 row.get::<_, i64>(6)?,
2709 ))
2710 })?
2711 .collect::<Result<Vec<_>, _>>()?;
2712
2713 if rows.is_empty() {
2714 if let Some(status) = status_filter {
2715 println!("{} No share links with status '{}'", "[i]".blue(), status);
2716 } else {
2717 println!("{} No share links found", "[i]".blue());
2718 }
2719 println!(" Use 'csm harvest share <url>' to add a share link");
2720 return Ok(());
2721 }
2722
2723 let pending_count: i64 = conn
2725 .query_row(
2726 "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2727 [],
2728 |row| row.get(0),
2729 )
2730 .unwrap_or(0);
2731
2732 let imported_count: i64 = conn
2733 .query_row(
2734 "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2735 [],
2736 |row| row.get(0),
2737 )
2738 .unwrap_or(0);
2739
2740 println!(
2741 "{} {} pending, {} imported",
2742 "[i]".blue(),
2743 pending_count.to_string().yellow(),
2744 imported_count.to_string().green()
2745 );
2746 println!();
2747
2748 for (id, url, provider, _share_id, title, imported, created_at) in rows {
2749 let status = if imported == 1 { "imported" } else { "pending" };
2750 let status_colored = if imported == 1 {
2751 status.green()
2752 } else {
2753 status.yellow()
2754 };
2755
2756 println!(
2757 "{} [{}] {} - {}",
2758 format!("#{}", &id[..8]).dimmed(),
2759 status_colored,
2760 provider.bold(),
2761 title.as_deref().unwrap_or("(untitled)")
2762 );
2763
2764 let display_url = if url.len() > 60 {
2766 format!("{}...", &url[..57])
2767 } else {
2768 url.clone()
2769 };
2770 println!(" {} {}", "URL:".dimmed(), display_url.dimmed());
2771
2772 let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2774 .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2775 .unwrap_or_else(|| created_at.to_string());
2776 println!(" {} {}", "Added:".dimmed(), timestamp.dimmed());
2777 println!();
2778 }
2779
2780 Ok(())
2781}
2782
2783pub fn harvest_checkpoint(
2789 db_path: Option<&str>,
2790 session_id: &str,
2791 message: Option<&str>,
2792) -> Result<()> {
2793 let db_path = get_db_path(db_path)?;
2794
2795 if !db_path.exists() {
2796 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2797 }
2798
2799 let db = ChatDatabase::open(&db_path)?;
2800 let conn = db.connection();
2801
2802 let session: Option<(i64, String, i64)> = conn
2804 .query_row(
2805 "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2806 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2807 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2808 )
2809 .ok();
2810
2811 let (internal_id, actual_session_id, msg_count) = match session {
2812 Some(s) => s,
2813 None => {
2814 println!("{} Session not found: {}", "[!]".red(), session_id);
2815 println!(" Use 'csm harvest list' to see available sessions");
2816 anyhow::bail!("Session not found");
2817 }
2818 };
2819
2820 let checkpoint_num: i64 = conn.query_row(
2822 "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2823 [internal_id],
2824 |row| row.get(0),
2825 )?;
2826
2827 let content_hash = format!(
2829 "{:x}",
2830 md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2831 );
2832
2833 let messages: Vec<String> = {
2835 let mut stmt = conn
2836 .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2837 let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2838 rows.filter_map(|r| r.ok()).collect()
2839 };
2840
2841 let snapshot = serde_json::json!({
2842 "message_count": msg_count,
2843 "messages": messages,
2844 });
2845
2846 let default_message = format!("Checkpoint {}", checkpoint_num);
2847 let message_text = message.unwrap_or(&default_message);
2848
2849 conn.execute(
2851 "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2852 VALUES (?, ?, ?, ?, ?, ?)",
2853 params![
2854 internal_id,
2855 checkpoint_num,
2856 message_text,
2857 msg_count,
2858 content_hash,
2859 snapshot.to_string()
2860 ],
2861 )?;
2862
2863 println!("{}", "=".repeat(60).cyan());
2864 println!("{}", " Checkpoint Created ".bold().cyan());
2865 println!("{}", "=".repeat(60).cyan());
2866 println!();
2867 println!("{} Session: {}", "[+]".green(), actual_session_id);
2868 println!(
2869 "{} Checkpoint #{}: {}",
2870 "[+]".green(),
2871 checkpoint_num,
2872 message_text
2873 );
2874 println!("{} Messages: {}", "[i]".blue(), msg_count);
2875 println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2876 println!();
2877 println!(
2878 " Use 'csm harvest checkpoints {}' to view history",
2879 session_id
2880 );
2881
2882 Ok(())
2883}
2884
2885pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2887 let db_path = get_db_path(db_path)?;
2888
2889 if !db_path.exists() {
2890 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2891 }
2892
2893 let db = ChatDatabase::open(&db_path)?;
2894 let conn = db.connection();
2895
2896 let session: Option<(i64, String, String)> = conn
2898 .query_row(
2899 "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2900 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2901 |row| {
2902 Ok((
2903 row.get(0)?,
2904 row.get(1)?,
2905 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2906 ))
2907 },
2908 )
2909 .ok();
2910
2911 let (internal_id, actual_session_id, name) = match session {
2912 Some(s) => s,
2913 None => {
2914 println!("{} Session not found: {}", "[!]".red(), session_id);
2915 anyhow::bail!("Session not found");
2916 }
2917 };
2918
2919 println!("{}", "=".repeat(70).cyan());
2920 println!("{}", " Session Checkpoints ".bold().cyan());
2921 println!("{}", "=".repeat(70).cyan());
2922 println!();
2923 println!(
2924 "{} Session: {} {}",
2925 "[i]".blue(),
2926 actual_session_id,
2927 if !name.is_empty() {
2928 format!("({})", name)
2929 } else {
2930 String::new()
2931 }
2932 );
2933 println!();
2934
2935 let mut stmt = conn.prepare(
2936 "SELECT checkpoint_number, message, message_count, content_hash, created_at
2937 FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2938 )?;
2939
2940 let checkpoints: Vec<_> = stmt
2941 .query_map([internal_id], |row| {
2942 Ok((
2943 row.get::<_, i64>(0)?,
2944 row.get::<_, String>(1)?,
2945 row.get::<_, i64>(2)?,
2946 row.get::<_, String>(3)?,
2947 row.get::<_, String>(4)?,
2948 ))
2949 })?
2950 .collect::<Result<Vec<_>, _>>()?;
2951
2952 if checkpoints.is_empty() {
2953 println!("{} No checkpoints found for this session", "[i]".blue());
2954 println!(
2955 " Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2956 session_id
2957 );
2958 return Ok(());
2959 }
2960
2961 println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2962 println!();
2963
2964 for (num, msg, msg_count, hash, created_at) in checkpoints {
2965 println!(" {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2966 println!(
2967 " {} messages | {} | {}",
2968 msg_count,
2969 &hash[..12],
2970 created_at.dimmed()
2971 );
2972 }
2973
2974 println!();
2975 println!(
2976 " Use 'csm harvest restore {} <checkpoint>' to restore",
2977 session_id
2978 );
2979
2980 Ok(())
2981}
2982
2983pub fn harvest_restore_checkpoint(
2985 db_path: Option<&str>,
2986 session_id: &str,
2987 checkpoint_number: i64,
2988) -> Result<()> {
2989 let db_path = get_db_path(db_path)?;
2990
2991 if !db_path.exists() {
2992 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2993 }
2994
2995 let db = ChatDatabase::open(&db_path)?;
2996 let conn = db.connection();
2997
2998 let session: Option<(i64, String)> = conn
3000 .query_row(
3001 "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
3002 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
3003 |row| Ok((row.get(0)?, row.get(1)?)),
3004 )
3005 .ok();
3006
3007 let (internal_id, actual_session_id) = match session {
3008 Some(s) => s,
3009 None => {
3010 println!("{} Session not found: {}", "[!]".red(), session_id);
3011 anyhow::bail!("Session not found");
3012 }
3013 };
3014
3015 let checkpoint: Option<(String, i64)> = conn
3017 .query_row(
3018 "SELECT snapshot, message_count FROM checkpoints
3019 WHERE session_id = ? AND checkpoint_number = ?",
3020 params![internal_id, checkpoint_number],
3021 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
3022 )
3023 .ok();
3024
3025 let (snapshot_json, original_msg_count) = match checkpoint {
3026 Some(c) => c,
3027 None => {
3028 println!(
3029 "{} Checkpoint #{} not found for session {}",
3030 "[!]".red(),
3031 checkpoint_number,
3032 session_id
3033 );
3034 println!(
3035 " Use 'csm harvest checkpoints {}' to see available checkpoints",
3036 session_id
3037 );
3038 anyhow::bail!("Checkpoint not found");
3039 }
3040 };
3041
3042 let snapshot: serde_json::Value =
3044 serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
3045
3046 let messages = snapshot["messages"]
3047 .as_array()
3048 .context("Invalid snapshot format")?;
3049
3050 println!("{}", "=".repeat(60).cyan());
3051 println!("{}", " Restore Checkpoint ".bold().yellow());
3052 println!("{}", "=".repeat(60).cyan());
3053 println!();
3054 println!("{} Session: {}", "[i]".blue(), actual_session_id);
3055 println!(
3056 "{} Restoring to checkpoint #{}",
3057 "[!]".yellow(),
3058 checkpoint_number
3059 );
3060 println!(
3061 "{} Messages to restore: {}",
3062 "[i]".blue(),
3063 original_msg_count
3064 );
3065 println!();
3066
3067 let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3069
3070 println!("{} Removed {} current messages", "[-]".red(), deleted);
3071
3072 for (idx, msg) in messages.iter().enumerate() {
3074 if let Some(content) = msg.as_str() {
3075 conn.execute(
3076 "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3077 params![internal_id, idx as i64, content],
3078 )?;
3079 }
3080 }
3081
3082 conn.execute(
3084 "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3085 params![original_msg_count, internal_id],
3086 )?;
3087
3088 println!(
3089 "{} Restored {} messages from checkpoint",
3090 "[+]".green(),
3091 messages.len()
3092 );
3093 println!();
3094 println!(
3095 "{} Session restored to checkpoint #{}",
3096 "[+]".green().bold(),
3097 checkpoint_number
3098 );
3099
3100 Ok(())
3101}
3102
3103pub fn harvest_compact(db_path: Option<&str>, dry_run: bool) -> Result<()> {
3115 let db_path = get_db_path(db_path)?;
3116
3117 if !db_path.exists() {
3118 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3119 }
3120
3121 println!("\n{} Harvest Database Compact", "[H]".magenta().bold());
3122 println!("{}", "=".repeat(60));
3123
3124 let file_size_before = std::fs::metadata(&db_path)?.len();
3125 println!(
3126 "{} Database: {} ({:.2} MB)",
3127 "[*]".blue(),
3128 db_path.display(),
3129 file_size_before as f64 / 1_048_576.0
3130 );
3131
3132 let conn = Connection::open(&db_path)?;
3133
3134 let expanded_count: i64 = conn.query_row(
3136 "SELECT COUNT(DISTINCT s.id) FROM sessions s
3137 INNER JOIN messages_v2 m ON m.session_id = s.id",
3138 [],
3139 |row| row.get(0),
3140 )?;
3141
3142 let total_sessions: i64 =
3143 conn.query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))?;
3144
3145 let json_size: i64 = conn
3147 .query_row(
3148 "SELECT COALESCE(SUM(LENGTH(session_json)), 0) FROM sessions",
3149 [],
3150 |row| row.get(0),
3151 )
3152 .unwrap_or(0);
3153
3154 let expanded_json_size: i64 = conn
3156 .query_row(
3157 "SELECT COALESCE(SUM(LENGTH(s.session_json)), 0) FROM sessions s
3158 WHERE s.id IN (SELECT DISTINCT session_id FROM messages_v2)",
3159 [],
3160 |row| row.get(0),
3161 )
3162 .unwrap_or(0);
3163
3164 println!(
3165 "{} Total sessions: {}",
3166 "[*]".blue(),
3167 total_sessions.to_string().truecolor(252, 152, 103)
3168 );
3169 println!(
3170 "{} Sessions with messages_v2 data: {}",
3171 "[*]".blue(),
3172 expanded_count.to_string().truecolor(252, 152, 103)
3173 );
3174 println!(
3175 "{} Current session_json size: {:.2} MB",
3176 "[*]".blue(),
3177 json_size as f64 / 1_048_576.0
3178 );
3179 println!(
3180 "{} Compactable session_json size: {:.2} MB",
3181 "[*]".blue(),
3182 expanded_json_size as f64 / 1_048_576.0
3183 );
3184
3185 if expanded_count == 0 {
3186 println!(
3187 "\n{} Nothing to compact — no sessions have messages_v2 data.",
3188 "[i]".dimmed()
3189 );
3190 return Ok(());
3191 }
3192
3193 if dry_run {
3194 println!(
3195 "\n{} Dry run — would compact {} sessions, freeing ~{:.2} MB of JSON blobs.",
3196 "[i]".yellow(),
3197 expanded_count,
3198 expanded_json_size as f64 / 1_048_576.0
3199 );
3200 return Ok(());
3201 }
3202
3203 println!("\n{} Compacting session_json blobs...", "[*]".blue());
3205
3206 let compacted = conn.execute(
3207 r#"UPDATE sessions SET session_json = json_object(
3208 'session_id', json_extract(session_json, '$.session_id'),
3209 'version', json_extract(session_json, '$.version'),
3210 'creation_date', json_extract(session_json, '$.creation_date'),
3211 'last_message_date', json_extract(session_json, '$.last_message_date'),
3212 'custom_title', json_extract(session_json, '$.custom_title'),
3213 'initial_location', json_extract(session_json, '$.initial_location'),
3214 'is_imported', json_extract(session_json, '$.is_imported'),
3215 'requester_username', json_extract(session_json, '$.requester_username'),
3216 'responder_username', json_extract(session_json, '$.responder_username'),
3217 'request_count', message_count,
3218 '_compacted', 1
3219 ) WHERE id IN (SELECT DISTINCT session_id FROM messages_v2)"#,
3220 [],
3221 )?;
3222
3223 println!(
3224 " {} Compacted {} session JSON blobs",
3225 "[+]".green(),
3226 compacted
3227 );
3228
3229 println!("{} Running VACUUM to reclaim disk space...", "[*]".blue());
3231 conn.execute_batch("VACUUM")?;
3232
3233 let file_size_after = std::fs::metadata(&db_path)?.len();
3234 let saved = if file_size_before > file_size_after {
3235 file_size_before - file_size_after
3236 } else {
3237 0
3238 };
3239
3240 println!();
3241 println!("{} Compact complete!", "[+]".green().bold());
3242 println!(" Before: {:.2} MB", file_size_before as f64 / 1_048_576.0);
3243 println!(" After: {:.2} MB", file_size_after as f64 / 1_048_576.0);
3244 println!(" Saved: {:.2} MB", saved as f64 / 1_048_576.0);
3245
3246 Ok(())
3247}
3248
3249pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3255 let db_path = get_db_path(db_path)?;
3256
3257 if !db_path.exists() {
3258 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3259 }
3260
3261 let conn = Connection::open(&db_path)?;
3262
3263 println!("{}", "=".repeat(70).cyan());
3264 println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3265 println!("{}", "=".repeat(70).cyan());
3266 println!();
3267
3268 println!("{} Dropping old FTS index...", "[*]".blue());
3270 conn.execute_batch(
3271 r#"
3272 DROP TRIGGER IF EXISTS messages_v2_ai;
3273 DROP TRIGGER IF EXISTS messages_v2_ad;
3274 DROP TRIGGER IF EXISTS messages_v2_au;
3275 DROP TABLE IF EXISTS messages_fts;
3276 "#,
3277 )?;
3278
3279 println!("{} Creating new FTS index...", "[*]".blue());
3281 conn.execute_batch(
3282 r#"
3283 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3284 content_raw,
3285 content='messages_v2',
3286 content_rowid='id'
3287 );
3288
3289 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3290 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3291 END;
3292
3293 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3294 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3295 END;
3296
3297 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3298 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3299 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3300 END;
3301 "#,
3302 )?;
3303
3304 let total_messages: i64 =
3306 conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3307
3308 println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3310
3311 conn.execute(
3312 "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3313 [],
3314 )?;
3315
3316 let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3318
3319 println!();
3320 println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3321 println!(" {} messages indexed", indexed);
3322
3323 Ok(())
3324}
3325
3326pub fn harvest_search(
3328 db_path: Option<&str>,
3329 query: &str,
3330 provider_filter: Option<&str>,
3331 limit: usize,
3332) -> Result<()> {
3333 let db_path = get_db_path(db_path)?;
3334
3335 if !db_path.exists() {
3336 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3337 }
3338
3339 let db = ChatDatabase::open(&db_path)?;
3340 let conn = db.connection();
3341
3342 println!("{}", "=".repeat(70).cyan());
3343 println!("{} Search: {}", "[?]".bold(), query.bold());
3344 println!("{}", "=".repeat(70).cyan());
3345 println!();
3346
3347 let results: Vec<(String, String, String, String)> = {
3349 let fts_exists: bool = conn
3351 .query_row(
3352 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3353 [],
3354 |_| Ok(true),
3355 )
3356 .unwrap_or(false);
3357
3358 if fts_exists {
3359 let sql = format!(
3362 "SELECT s.id, s.provider, s.title,
3363 snippet(messages_fts, 0, '>>>', '<<<', '...', 32) as snip
3364 FROM messages_fts fts
3365 JOIN messages_v2 m ON m.id = fts.rowid
3366 JOIN sessions s ON m.session_id = s.id
3367 WHERE messages_fts MATCH ?
3368 {}
3369 GROUP BY s.id
3370 ORDER BY rank
3371 LIMIT {}",
3372 if provider_filter.is_some() {
3373 "AND s.provider = ?"
3374 } else {
3375 ""
3376 },
3377 limit
3378 );
3379
3380 let mut stmt = conn.prepare(&sql)?;
3381
3382 if let Some(provider) = provider_filter {
3383 stmt.query_map(params![query, provider], |row| {
3384 Ok((
3385 row.get::<_, String>(0)?,
3386 row.get::<_, String>(1)?,
3387 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3388 row.get::<_, String>(3)?,
3389 ))
3390 })?
3391 .collect::<Result<Vec<_>, _>>()?
3392 } else {
3393 stmt.query_map([query], |row| {
3394 Ok((
3395 row.get::<_, String>(0)?,
3396 row.get::<_, String>(1)?,
3397 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3398 row.get::<_, String>(3)?,
3399 ))
3400 })?
3401 .collect::<Result<Vec<_>, _>>()?
3402 }
3403 } else {
3404 let search_pattern = format!("%{}%", query);
3406 let sql = format!(
3407 "SELECT s.id, s.provider, s.title,
3408 SUBSTR(m.content_raw,
3409 MAX(1, INSTR(LOWER(m.content_raw), LOWER(?)) - 50),
3410 150) as snip
3411 FROM messages_v2 m
3412 JOIN sessions s ON m.session_id = s.id
3413 WHERE m.content_raw LIKE ?
3414 {}
3415 GROUP BY s.id
3416 ORDER BY s.updated_at DESC
3417 LIMIT {}",
3418 if provider_filter.is_some() {
3419 "AND s.provider = ?"
3420 } else {
3421 ""
3422 },
3423 limit
3424 );
3425
3426 let mut stmt = conn.prepare(&sql)?;
3427
3428 if let Some(provider) = provider_filter {
3429 stmt.query_map(params![query, search_pattern, provider], |row| {
3430 Ok((
3431 row.get::<_, String>(0)?,
3432 row.get::<_, String>(1)?,
3433 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3434 row.get::<_, String>(3)?,
3435 ))
3436 })?
3437 .collect::<Result<Vec<_>, _>>()?
3438 } else {
3439 stmt.query_map(params![query, search_pattern], |row| {
3440 Ok((
3441 row.get::<_, String>(0)?,
3442 row.get::<_, String>(1)?,
3443 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3444 row.get::<_, String>(3)?,
3445 ))
3446 })?
3447 .collect::<Result<Vec<_>, _>>()?
3448 }
3449 }
3450 };
3451
3452 if results.is_empty() {
3453 println!("{} No results found for '{}'", "[i]".blue(), query);
3454 return Ok(());
3455 }
3456
3457 println!("{} Found {} result(s):", "[i]".blue(), results.len());
3458 println!();
3459
3460 for (session_id, provider, title, snippet) in results {
3461 let display_name = if title.is_empty() {
3462 session_id.clone()
3463 } else {
3464 format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3465 };
3466
3467 println!(
3468 "{} {} [{}]",
3469 "*".cyan(),
3470 display_name.bold(),
3471 provider.dimmed()
3472 );
3473
3474 let clean_snippet = snippet.replace('\n', " ").replace('\r', "");
3476 println!(" {}", clean_snippet.dimmed());
3477 println!();
3478 }
3479
3480 Ok(())
3481}
3482
3483fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3485 let content_lower = content.to_lowercase();
3486 let query_lower = query.to_lowercase();
3487
3488 if let Some(pos) = content_lower.find(&query_lower) {
3489 let start = pos.saturating_sub(max_len / 2);
3490 let end = (pos + query.len() + max_len / 2).min(content.len());
3491
3492 let mut snippet = String::new();
3493 if start > 0 {
3494 snippet.push_str("...");
3495 }
3496 snippet.push_str(&content[start..end]);
3497 if end < content.len() {
3498 snippet.push_str("...");
3499 }
3500
3501 snippet.replace('\n', " ").replace('\r', "")
3503 } else {
3504 if content.len() > max_len {
3506 format!("{}...", &content[..max_len])
3507 } else {
3508 content.to_string()
3509 }
3510 }
3511}
3512
3513fn md5_hash(data: &str) -> u128 {
3515 let mut hash: u128 = 0;
3517 for (i, byte) in data.bytes().enumerate() {
3518 hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3519 hash = hash.rotate_left(7);
3520 }
3521 hash
3522}
3523
3524pub fn harvest_sync(
3532 path: Option<&str>,
3533 push: bool,
3534 pull: bool,
3535 provider: Option<&str>,
3536 workspace: Option<&str>,
3537 sessions: Option<&[String]>,
3538 format: Option<&str>,
3539 force: bool,
3540 dry_run: bool,
3541) -> Result<()> {
3542 if !push && !pull {
3544 anyhow::bail!("Must specify either --push or --pull (or both)");
3545 }
3546
3547 let db_path = get_db_path(path)?;
3549
3550 if !db_path.exists() {
3551 anyhow::bail!(
3552 "Harvest database not found at {:?}. Run 'harvest run' first.",
3553 db_path
3554 );
3555 }
3556
3557 println!(
3558 "{} Sync Operation",
3559 if dry_run {
3560 "[DRY RUN]".yellow()
3561 } else {
3562 "[SYNC]".green()
3563 }
3564 );
3565 println!(" Database: {}", db_path.display().to_string().cyan());
3566
3567 if push {
3568 println!("\n{} Push: Database → Provider Workspaces", "[→]".blue());
3569 sync_push(
3570 &db_path, provider, workspace, sessions, format, force, dry_run,
3571 )?;
3572 }
3573
3574 if pull {
3575 println!("\n{} Pull: Provider Workspaces → Database", "[←]".blue());
3576 sync_pull(&db_path, provider, workspace, sessions, force, dry_run)?;
3577 }
3578
3579 if dry_run {
3580 println!(
3581 "\n{} Dry run complete. No changes were made.",
3582 "[!]".yellow()
3583 );
3584 } else {
3585 println!("\n{} Sync complete.", "[✓]".green());
3586 }
3587
3588 Ok(())
3589}
3590
3591fn sync_push(
3593 db_path: &Path,
3594 provider: Option<&str>,
3595 workspace: Option<&str>,
3596 sessions: Option<&[String]>,
3597 format: Option<&str>,
3598 force: bool,
3599 dry_run: bool,
3600) -> Result<()> {
3601 let conn = Connection::open(db_path)?;
3602
3603 let mut sql = String::from(
3605 r#"SELECT id, provider, workspace_path, workspace_name, session_json, file_format
3606 FROM sessions WHERE workspace_path IS NOT NULL"#,
3607 );
3608 let mut params_vec: Vec<String> = Vec::new();
3609
3610 if let Some(p) = provider {
3611 sql.push_str(" AND provider = ?");
3612 params_vec.push(p.to_string());
3613 }
3614
3615 if let Some(w) = workspace {
3616 sql.push_str(" AND (workspace_path LIKE ? OR workspace_name LIKE ?)");
3617 params_vec.push(format!("%{}%", w));
3618 params_vec.push(format!("%{}%", w));
3619 }
3620
3621 if let Some(sess_ids) = sessions {
3622 if !sess_ids.is_empty() {
3623 let placeholders: Vec<&str> = sess_ids.iter().map(|_| "?").collect();
3624 sql.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
3625 for id in sess_ids {
3626 params_vec.push(id.clone());
3627 }
3628 }
3629 }
3630
3631 let mut stmt = conn.prepare(&sql)?;
3632
3633 let params_refs: Vec<&dyn rusqlite::ToSql> = params_vec
3635 .iter()
3636 .map(|s| s as &dyn rusqlite::ToSql)
3637 .collect();
3638
3639 let rows = stmt.query_map(params_refs.as_slice(), |row| {
3640 Ok((
3641 row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, Option<String>>(3)?, row.get::<_, String>(4)?, row.get::<_, Option<String>>(5)?, ))
3648 })?;
3649
3650 let target_format = format.unwrap_or("jsonl");
3651 let mut pushed = 0;
3652 let mut skipped = 0;
3653 let mut errors = 0;
3654
3655 for row_result in rows {
3656 match row_result {
3657 Ok((
3658 session_id,
3659 provider_name,
3660 workspace_path,
3661 workspace_name,
3662 session_json,
3663 stored_format,
3664 )) => {
3665 let ws_path = PathBuf::from(&workspace_path);
3667 let chat_dir = ws_path.join(".vscode").join("chat");
3668
3669 if !chat_dir.exists() {
3671 if !force {
3672 println!(
3673 " {} Skipped {} - workspace chat dir not found: {}",
3674 "[~]".yellow(),
3675 session_id.dimmed(),
3676 chat_dir.display()
3677 );
3678 skipped += 1;
3679 continue;
3680 }
3681 if !dry_run {
3683 fs::create_dir_all(&chat_dir)?;
3684 }
3685 }
3686
3687 let (output_ext, needs_conversion) = match target_format {
3689 "jsonl" => ("jsonl", stored_format.as_deref() != Some("jsonl")),
3690 "json" => ("json", stored_format.as_deref() == Some("jsonl")),
3691 _ => ("jsonl", true), };
3693
3694 let session_file = chat_dir.join(format!("{}.{}", session_id, output_ext));
3695
3696 if session_file.exists() && !force {
3698 println!(
3699 " {} Skipped {} - file exists (use --force to overwrite)",
3700 "[~]".yellow(),
3701 session_id.dimmed()
3702 );
3703 skipped += 1;
3704 continue;
3705 }
3706
3707 let output_content = if needs_conversion && target_format == "jsonl" {
3709 match serde_json::from_str::<ChatSession>(&session_json) {
3711 Ok(session) => convert_session_to_jsonl(&session)?,
3712 Err(e) => {
3713 println!(
3714 " {} Error parsing session {}: {}",
3715 "[!]".red(),
3716 session_id,
3717 e
3718 );
3719 errors += 1;
3720 continue;
3721 }
3722 }
3723 } else {
3724 session_json.clone()
3726 };
3727
3728 if dry_run {
3729 println!(
3730 " {} Would write {} ({}) → {}",
3731 "[+]".green(),
3732 workspace_name.as_deref().unwrap_or(&provider_name),
3733 session_id.dimmed(),
3734 session_file.display()
3735 );
3736 } else {
3737 fs::write(&session_file, &output_content)?;
3738 println!(
3739 " {} Pushed {} ({}) → {}",
3740 "[+]".green(),
3741 workspace_name.as_deref().unwrap_or(&provider_name),
3742 session_id.dimmed(),
3743 session_file.display()
3744 );
3745 }
3746 pushed += 1;
3747 }
3748 Err(e) => {
3749 println!(" {} Database error: {}", "[!]".red(), e);
3750 errors += 1;
3751 }
3752 }
3753 }
3754
3755 println!(
3756 "\n {} Push summary: {} pushed, {} skipped, {} errors",
3757 "[=]".cyan(),
3758 pushed.to_string().green(),
3759 skipped.to_string().yellow(),
3760 errors.to_string().red()
3761 );
3762
3763 Ok(())
3764}
3765
3766fn sync_pull(
3768 db_path: &Path,
3769 provider: Option<&str>,
3770 workspace: Option<&str>,
3771 sessions: Option<&[String]>,
3772 force: bool,
3773 dry_run: bool,
3774) -> Result<()> {
3775 let conn = if dry_run {
3776 Connection::open(db_path)?
3778 } else {
3779 Connection::open(db_path)?
3780 };
3781
3782 let workspaces = discover_workspaces()?;
3784 let mut pulled = 0;
3785 let mut skipped = 0;
3786 let mut errors = 0;
3787
3788 for ws in workspaces {
3789 if let Some(ws_filter) = workspace {
3791 let ws_path_str = ws.workspace_path.to_string_lossy();
3792 let ws_name = ws.project_path.as_deref().unwrap_or("");
3793 if !ws_path_str.contains(ws_filter) && !ws_name.contains(ws_filter) {
3794 continue;
3795 }
3796 }
3797
3798 if let Some(p) = provider {
3800 if !p.eq_ignore_ascii_case("copilot")
3801 && !p.eq_ignore_ascii_case("github copilot")
3802 && !p.eq_ignore_ascii_case("vscode")
3803 {
3804 continue;
3805 }
3806 }
3807
3808 match get_chat_sessions_from_workspace(&ws.workspace_path) {
3810 Ok(ws_sessions) => {
3811 for swp in ws_sessions {
3812 if let Some(sess_ids) = sessions {
3814 let sid = swp.session.session_id.as_deref().unwrap_or("");
3815 if !sess_ids.iter().any(|id| id == sid) {
3816 continue;
3817 }
3818 }
3819
3820 let session_id = swp
3821 .session
3822 .session_id
3823 .clone()
3824 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
3825
3826 let exists: bool = conn
3828 .query_row("SELECT 1 FROM sessions WHERE id = ?", [&session_id], |_| {
3829 Ok(true)
3830 })
3831 .unwrap_or(false);
3832
3833 if exists && !force {
3834 skipped += 1;
3835 continue;
3836 }
3837
3838 let ws_name = ws.project_path.clone();
3839 let ws_path = ws.workspace_path.to_string_lossy().to_string();
3840
3841 if dry_run {
3842 println!(
3843 " {} Would pull {} from {}",
3844 "[+]".green(),
3845 session_id.dimmed(),
3846 ws_name.as_deref().unwrap_or(&ws_path)
3847 );
3848 pulled += 1;
3849 } else {
3850 match insert_or_update_session(
3851 &conn,
3852 &swp.session,
3853 "GitHub Copilot",
3854 Some(&ws.hash),
3855 ws_name.as_deref(),
3856 None, 3, "json", Some(&ws_path), ) {
3861 Ok(_) => {
3862 println!(
3863 " {} Pulled {} from {}",
3864 "[+]".green(),
3865 session_id.dimmed(),
3866 ws_name.as_deref().unwrap_or(&ws_path)
3867 );
3868 pulled += 1;
3869 }
3870 Err(e) => {
3871 println!(" {} Error pulling {}: {}", "[!]".red(), session_id, e);
3872 errors += 1;
3873 }
3874 }
3875 }
3876 }
3877 }
3878 Err(e) => {
3879 println!(
3880 " {} Error reading workspace {}: {}",
3881 "[!]".red(),
3882 ws.workspace_path.display(),
3883 e
3884 );
3885 errors += 1;
3886 }
3887 }
3888 }
3889
3890 println!(
3891 "\n {} Pull summary: {} pulled, {} skipped, {} errors",
3892 "[=]".cyan(),
3893 pulled.to_string().green(),
3894 skipped.to_string().yellow(),
3895 errors.to_string().red()
3896 );
3897
3898 Ok(())
3899}
3900
3901fn convert_session_to_jsonl(session: &ChatSession) -> Result<String> {
3903 use serde_json::json;
3904
3905 let mut lines = Vec::new();
3906
3907 let session_meta = json!({
3909 "version": 3,
3910 "sessionId": session.session_id,
3911 "title": session.title(),
3912 "createdAt": session.creation_date,
3913 "lastInteractionAt": session.last_message_date,
3914 "customTitle": session.custom_title.as_deref(),
3915 });
3916 lines.push(serde_json::to_string(&session_meta)?);
3917
3918 for request in &session.requests {
3920 let request_json = serde_json::to_string(request)?;
3921 lines.push(request_json);
3922 }
3923
3924 Ok(lines.join("\n"))
3925}