1use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::storage::parse_session_json;
24use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
25
26fn is_empty_code_block(s: &str) -> bool {
28 let s = s.trim();
30 if s == "```" {
31 return true;
32 }
33 if s.starts_with("```") && !s.contains('\n') {
35 return true;
36 }
37 let lines: Vec<&str> = s.lines().collect();
39 if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
40 let content_lines = &lines[1..lines.len() - 1];
42 if content_lines.iter().all(|line| line.trim().is_empty()) {
43 return true;
44 }
45 }
46 false
47}
48
49type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
51
52#[derive(Debug, Clone)]
54struct WebProviderEndpoint {
55 name: &'static str,
56 url: &'static str,
57 description: &'static str,
58}
59
60const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
62 WebProviderEndpoint {
63 name: "ChatGPT",
64 url: "https://chat.openai.com",
65 description: "OpenAI ChatGPT Web Interface",
66 },
67 WebProviderEndpoint {
68 name: "Claude",
69 url: "https://claude.ai",
70 description: "Anthropic Claude Web Interface",
71 },
72 WebProviderEndpoint {
73 name: "Gemini",
74 url: "https://gemini.google.com",
75 description: "Google Gemini Web Interface",
76 },
77 WebProviderEndpoint {
78 name: "Perplexity",
79 url: "https://www.perplexity.ai",
80 description: "Perplexity AI Search Interface",
81 },
82 WebProviderEndpoint {
83 name: "DeepSeek",
84 url: "https://chat.deepseek.com",
85 description: "DeepSeek Chat Interface",
86 },
87 WebProviderEndpoint {
88 name: "Poe",
89 url: "https://poe.com",
90 description: "Quora Poe Multi-model Chat",
91 },
92 WebProviderEndpoint {
93 name: "You.com",
94 url: "https://you.com/chat",
95 description: "You.com AI Chat",
96 },
97 WebProviderEndpoint {
98 name: "HuggingChat",
99 url: "https://huggingface.co/chat",
100 description: "HuggingFace Chat Interface",
101 },
102 WebProviderEndpoint {
103 name: "Copilot",
104 url: "https://copilot.microsoft.com",
105 description: "Microsoft Copilot Web Interface",
106 },
107 WebProviderEndpoint {
108 name: "Mistral",
109 url: "https://chat.mistral.ai",
110 description: "Mistral AI Le Chat Interface",
111 },
112 WebProviderEndpoint {
113 name: "Cohere",
114 url: "https://coral.cohere.com",
115 description: "Cohere Coral Chat Interface",
116 },
117 WebProviderEndpoint {
118 name: "Groq",
119 url: "https://groq.com",
120 description: "Groq Fast Inference Interface",
121 },
122 WebProviderEndpoint {
123 name: "Phind",
124 url: "https://www.phind.com",
125 description: "Phind AI Code Search",
126 },
127 WebProviderEndpoint {
128 name: "Character.AI",
129 url: "https://character.ai",
130 description: "Character.AI Chat Interface",
131 },
132 WebProviderEndpoint {
133 name: "Pi",
134 url: "https://pi.ai",
135 description: "Inflection Pi Personal AI",
136 },
137];
138
139fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
141 use std::sync::{Arc, Mutex};
142 use std::thread;
143
144 let reachable = Arc::new(Mutex::new(Vec::new()));
145 let results = Arc::new(Mutex::new(Vec::new()));
146
147 let handles: Vec<_> = WEB_PROVIDERS
149 .iter()
150 .map(|provider| {
151 let reachable = Arc::clone(&reachable);
152 let results = Arc::clone(&results);
153 let timeout = timeout_secs;
154 let name = provider.name;
155 let url = provider.url;
156 let desc = provider.description;
157
158 thread::spawn(move || {
159 let client = match reqwest::blocking::Client::builder()
160 .timeout(Duration::from_secs(timeout))
161 .connect_timeout(Duration::from_secs(timeout.min(3))) .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
163 .build()
164 {
165 Ok(c) => c,
166 Err(_) => return,
167 };
168
169 let result: (&str, bool, String, &str) = match client.head(url).send() {
170 Ok(response) => {
171 let status = response.status();
172 if status.is_success() || status.is_redirection() {
173 reachable.lock().unwrap().push(name.to_string());
174 (name, true, desc.to_string(), url)
175 } else {
176 (name, false, format!("HTTP {}", status.as_u16()), url)
177 }
178 }
179 Err(e) => {
180 let reason = if e.is_timeout() {
181 "timeout".to_string()
182 } else if e.is_connect() {
183 "connection failed".to_string()
184 } else {
185 "unreachable".to_string()
186 };
187 (name, false, reason, url)
188 }
189 };
190
191 results.lock().unwrap().push((
192 result.0.to_string(),
193 result.1,
194 result.2,
195 result.3.to_string(),
196 ));
197 })
198 })
199 .collect();
200
201 for handle in handles {
203 let _ = handle.join();
204 }
205
206 let results = results.lock().unwrap();
208 let mut sorted_results: Vec<_> = results.iter().collect();
209 sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
210
211 for (name, success, info, url) in sorted_results {
212 if *success {
213 println!(
214 " {} {}: {} ({})",
215 "[+]".green(),
216 name.bold(),
217 "reachable".green(),
218 info.dimmed()
219 );
220 println!(" {} {}", "`".dimmed(), url.dimmed());
221 } else {
222 println!(
223 " {} {}: {} ({})",
224 "[-]".dimmed(),
225 name,
226 "blocked or unavailable".dimmed(),
227 info.dimmed()
228 );
229 }
230 }
231
232 let result = reachable.lock().unwrap().clone();
233 result
234}
235
236fn scan_browser_authentication(
238 verbose: bool,
239) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
240 use crate::browser::scan_browser_auth_verbose;
241 use std::collections::HashMap;
242
243 let installed = get_installed_browsers();
244 if installed.is_empty() {
245 println!(" {} No supported browsers found", "[-]".dimmed());
246 return (HashMap::new(), 0);
247 }
248
249 println!(
250 " {} Checking {} browser(s): {}",
251 "[*]".blue(),
252 installed.len(),
253 installed
254 .iter()
255 .map(|b| b.name())
256 .collect::<Vec<_>>()
257 .join(", ")
258 );
259
260 let results = if verbose {
261 scan_browser_auth_verbose()
262 } else {
263 scan_browser_auth()
264 };
265
266 let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
268
269 for result in results {
270 if result.authenticated {
271 authenticated
272 .entry(result.provider.clone())
273 .or_default()
274 .push(result.browser);
275 }
276 }
277
278 let count = authenticated.len();
279
280 if authenticated.is_empty() {
281 println!(
282 " {} No authenticated web LLM providers found",
283 "[-]".dimmed()
284 );
285 println!(
286 " {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
287 "`".dimmed()
288 );
289 } else {
290 for (provider, browsers) in &authenticated {
291 let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
292 println!(
293 " {} {}: {} in {}",
294 "[+]".green(),
295 provider.bold(),
296 "authenticated".green(),
297 browser_names.join(", ")
298 );
299 }
300 }
301
302 (authenticated, count)
303}
304
305#[allow(dead_code)]
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct HarvestConfig {
309 pub db_path: PathBuf,
311 pub auto_commit: bool,
313 pub include_providers: Vec<String>,
315 pub exclude_providers: Vec<String>,
317 pub include_workspaces: bool,
319 pub last_harvest: Option<i64>,
321}
322
323#[allow(dead_code)]
324impl Default for HarvestConfig {
325 fn default() -> Self {
326 Self {
327 db_path: PathBuf::from("chat_sessions.db"),
328 auto_commit: false,
329 include_providers: Vec::new(),
330 exclude_providers: Vec::new(),
331 include_workspaces: true,
332 last_harvest: None,
333 }
334 }
335}
336
337#[derive(Debug, Default)]
339pub struct HarvestStats {
340 pub providers_scanned: usize,
341 pub workspaces_scanned: usize,
342 pub sessions_found: usize,
343 pub sessions_added: usize,
344 pub sessions_updated: usize,
345 pub sessions_skipped: usize,
346 pub errors: Vec<String>,
347}
348
349#[allow(dead_code)]
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct HarvestedSession {
353 pub id: String,
354 pub provider: String,
355 pub provider_type: String,
356 pub workspace_id: Option<String>,
357 pub workspace_name: Option<String>,
358 pub title: String,
359 pub message_count: usize,
360 pub created_at: i64,
361 pub updated_at: i64,
362 pub harvested_at: i64,
363 pub session_json: String,
364}
365
366pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
368 let db_path = get_db_path(path)?;
369 let db_dir = db_path.parent().unwrap_or(Path::new("."));
370
371 println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
372 println!("{}", "=".repeat(60));
373
374 if !db_dir.exists() {
376 fs::create_dir_all(db_dir)?;
377 println!("{} Created directory: {}", "[+]".green(), db_dir.display());
378 }
379
380 if db_path.exists() {
382 println!(
383 "{} Database already exists: {}",
384 "[!]".yellow(),
385 db_path.display()
386 );
387 println!(" Use 'csm harvest run' to update it");
388 return Ok(());
389 }
390
391 create_harvest_database(&db_path)?;
393 println!("{} Created database: {}", "[+]".green(), db_path.display());
394
395 if git_init {
397 init_git_tracking(&db_path)?;
398 }
399
400 println!("\n{} Harvest database initialized!", "[+]".green().bold());
401 println!("\nNext steps:");
402 println!(" 1. Run 'csm harvest scan' to see available providers");
403 println!(" 2. Run 'csm harvest run' to collect sessions");
404 if !git_init {
405 println!(" 3. Run 'csm harvest git init' to enable version tracking");
406 }
407
408 Ok(())
409}
410
411pub fn harvest_scan(
413 show_sessions: bool,
414 scan_web: bool,
415 timeout_secs: u64,
416 verbose: bool,
417) -> Result<()> {
418 println!("\n{} Scanning for Providers", "[H]".magenta().bold());
419 println!("{}", "=".repeat(60));
420
421 let registry = ProviderRegistry::new();
422 let mut total_sessions = 0;
423 let mut available_providers = Vec::new();
424
425 println!("\n{} LLM Providers:", "[*]".blue().bold());
427
428 let provider_types = vec![
429 ProviderType::Copilot,
430 ProviderType::Cursor,
431 ProviderType::CodexCli,
432 ProviderType::DroidCli,
433 ProviderType::GeminiCli,
434 ProviderType::Ollama,
435 ProviderType::Vllm,
436 ProviderType::Foundry,
437 ProviderType::LmStudio,
438 ProviderType::LocalAI,
439 ProviderType::TextGenWebUI,
440 ProviderType::Jan,
441 ProviderType::Gpt4All,
442 ProviderType::Llamafile,
443 ProviderType::OpenAI,
444 ProviderType::ChatGPT,
445 ProviderType::Anthropic,
446 ProviderType::Perplexity,
447 ProviderType::DeepSeek,
448 ProviderType::Gemini,
449 ];
450
451 for pt in &provider_types {
452 if let Some(provider) = registry.get_provider(*pt) {
453 let available = provider.is_available();
454 let session_count = if available {
455 provider.list_sessions().map(|s| s.len()).unwrap_or(0)
456 } else {
457 0
458 };
459
460 if available {
461 available_providers.push((*pt, session_count));
462 total_sessions += session_count;
463
464 let status = if session_count > 0 {
465 format!(
466 "{} {} sessions",
467 "+".green(),
468 session_count.to_string().cyan()
469 )
470 } else {
471 format!("{} no sessions", "+".green())
472 };
473
474 println!(
475 " {} {}: {}",
476 "[+]".green(),
477 provider.name().bold(),
478 status
479 );
480
481 if show_sessions && session_count > 0 {
482 if let Ok(sessions) = provider.list_sessions() {
483 for session in sessions.iter().take(3) {
484 println!(" {} {}", "`".dimmed(), session.title().dimmed());
485 }
486 if sessions.len() > 3 {
487 println!(" {} ... and {} more", "`".dimmed(), sessions.len() - 3);
488 }
489 }
490 }
491
492 if let Some(path) = provider.sessions_path() {
493 println!(
494 " {} {}",
495 "`".dimmed(),
496 path.display().to_string().dimmed()
497 );
498 }
499 }
500 }
501 }
502
503 println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
505
506 match discover_workspaces() {
507 Ok(workspaces) => {
508 let workspaces_with_sessions: Vec<_> = workspaces
509 .iter()
510 .filter(|ws| ws.chat_session_count > 0)
511 .collect();
512
513 let ws_sessions: usize = workspaces_with_sessions
514 .iter()
515 .map(|ws| ws.chat_session_count)
516 .sum();
517
518 println!(
519 " {} {} workspaces with {} sessions",
520 "[+]".green(),
521 workspaces_with_sessions.len().to_string().cyan(),
522 ws_sessions.to_string().cyan()
523 );
524
525 if show_sessions {
526 for ws in workspaces_with_sessions.iter().take(5) {
527 let name = ws
528 .project_path
529 .clone()
530 .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
531 println!(
532 " {} {} ({} sessions)",
533 "`".dimmed(),
534 name.dimmed(),
535 ws.chat_session_count
536 );
537 }
538 if workspaces_with_sessions.len() > 5 {
539 println!(
540 " {} ... and {} more workspaces",
541 "`".dimmed(),
542 workspaces_with_sessions.len() - 5
543 );
544 }
545 }
546
547 total_sessions += ws_sessions;
548 }
549 Err(e) => {
550 println!(" {} Failed to scan workspaces: {}", "[!]".yellow(), e);
551 }
552 }
553
554 let mut web_providers_found = Vec::new();
556 let mut authenticated_count = 0;
557 if scan_web {
558 println!("\n{} Browser Authentication:", "[*]".blue().bold());
560 let (auth_results, auth_count) = scan_browser_authentication(verbose);
561 authenticated_count = auth_count;
562
563 println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
565 web_providers_found = scan_web_providers(timeout_secs);
566
567 if !auth_results.is_empty() {
569 println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
570 for (provider, browsers) in &auth_results {
571 let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
572 let reachable = web_providers_found.iter().any(|p| p == provider);
573 let status = if reachable {
574 format!("{} (reachable)", "ready to harvest".green())
575 } else {
576 format!("{}", "authenticated but endpoint blocked".yellow())
577 };
578 println!(
579 " {} {}: {} via {}",
580 "[+]".green(),
581 provider.bold(),
582 status,
583 browser_list.join(", ").dimmed()
584 );
585 }
586 }
587 }
588
589 println!("\n{} Summary:", "[*]".green().bold());
591 println!(
592 " {} local providers available",
593 available_providers.len().to_string().cyan()
594 );
595 if scan_web {
596 println!(
597 " {} web providers reachable",
598 web_providers_found.len().to_string().cyan()
599 );
600 println!(
601 " {} web providers authenticated",
602 authenticated_count.to_string().cyan()
603 );
604 }
605 println!(
606 " {} total sessions to harvest",
607 total_sessions.to_string().truecolor(252, 152, 103)
608 );
609
610 Ok(())
611}
612
613pub fn harvest_run(
615 path: Option<&str>,
616 providers: Option<&[String]>,
617 exclude: Option<&[String]>,
618 incremental: bool,
619 auto_commit: bool,
620 message: Option<&str>,
621) -> Result<()> {
622 let db_path = get_db_path(path)?;
623
624 println!("\n{} Running Harvest", "[H]".magenta().bold());
625 println!("{}", "=".repeat(60));
626
627 if !db_path.exists() {
629 println!("{} Database not found, creating...", "[*]".blue());
630 create_harvest_database(&db_path)?;
631 }
632
633 let conn = Connection::open(&db_path)?;
634 let mut stats = HarvestStats::default();
635
636 let last_harvest: Option<i64> = if incremental {
638 conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
639 row.get(0)
640 })
641 .ok()
642 } else {
643 None
644 };
645
646 if let Some(ts) = last_harvest {
647 let dt = DateTime::from_timestamp_millis(ts)
648 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
649 .unwrap_or_default();
650 println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
651 }
652
653 let registry = ProviderRegistry::new();
654 let include_providers = providers.map(|p| p.to_vec());
655 let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
656
657 println!("\n{} Harvesting from providers...", "[*]".blue());
659
660 let provider_types = vec![
661 ProviderType::Copilot,
662 ProviderType::Cursor,
663 ProviderType::ClaudeCode,
664 ProviderType::CodexCli,
665 ProviderType::DroidCli,
666 ProviderType::GeminiCli,
667 ProviderType::OpenCode,
668 ProviderType::OpenClaw,
669 ProviderType::Antigravity,
670 ProviderType::Ollama,
671 ProviderType::Vllm,
672 ProviderType::Foundry,
673 ProviderType::LmStudio,
674 ProviderType::LocalAI,
675 ProviderType::TextGenWebUI,
676 ProviderType::Jan,
677 ProviderType::Gpt4All,
678 ProviderType::Llamafile,
679 ];
680
681 for pt in &provider_types {
682 let provider_name = pt.display_name().to_lowercase();
683
684 if let Some(ref include) = include_providers {
686 if !include
687 .iter()
688 .any(|p| provider_name.contains(&p.to_lowercase()))
689 {
690 continue;
691 }
692 }
693 if exclude_providers
694 .iter()
695 .any(|p| provider_name.contains(&p.to_lowercase()))
696 {
697 continue;
698 }
699
700 if let Some(provider) = registry.get_provider(*pt) {
701 if !provider.is_available() {
702 continue;
703 }
704
705 stats.providers_scanned += 1;
706
707 match provider.list_sessions() {
708 Ok(sessions) => {
709 for session in sessions {
710 stats.sessions_found += 1;
711
712 if let Some(last) = last_harvest {
714 if session.last_message_date <= last {
715 stats.sessions_skipped += 1;
716 continue;
717 }
718 }
719
720 match insert_or_update_session(
721 &conn,
722 &session,
723 pt.display_name(),
724 None,
725 None,
726 None, 3, "json", None, ) {
731 Ok(updated) => {
732 if updated {
733 stats.sessions_updated += 1;
734 } else {
735 stats.sessions_added += 1;
736 }
737 }
738 Err(e) => {
739 stats.errors.push(format!("{}: {}", session.title(), e));
740 }
741 }
742 }
743
744 let session_count = stats.sessions_added + stats.sessions_updated;
745 if session_count > 0 {
746 println!(
747 " {} {}: {} sessions",
748 "[+]".green(),
749 provider.name(),
750 session_count.to_string().cyan()
751 );
752 }
753 }
754 Err(e) => {
755 stats.errors.push(format!("{}: {}", provider.name(), e));
756 }
757 }
758 }
759 }
760
761 if include_providers.is_none()
763 || include_providers
764 .as_ref()
765 .map(|p| {
766 p.iter()
767 .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
768 })
769 .unwrap_or(false)
770 {
771 println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
772
773 if let Ok(workspaces) = discover_workspaces() {
774 for ws in &workspaces {
775 if ws.chat_session_count == 0 {
776 continue;
777 }
778
779 stats.workspaces_scanned += 1;
780
781 if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
782 for swp in sessions {
783 stats.sessions_found += 1;
784
785 if let Some(last) = last_harvest {
787 if swp.session.last_message_date <= last {
788 stats.sessions_skipped += 1;
789 continue;
790 }
791 }
792
793 let ws_name = ws.project_path.clone();
794 let ws_path = ws.workspace_path.to_string_lossy().to_string();
795
796 match insert_or_update_session(
797 &conn,
798 &swp.session,
799 "GitHub Copilot",
800 Some(&ws.hash),
801 ws_name.as_deref(),
802 None, 3, "json", Some(&ws_path), ) {
807 Ok(updated) => {
808 if updated {
809 stats.sessions_updated += 1;
810 } else {
811 stats.sessions_added += 1;
812 }
813 }
814 Err(e) => {
815 stats.errors.push(format!("{}: {}", swp.session.title(), e));
816 }
817 }
818 }
819 }
820 }
821
822 println!(
823 " {} Workspaces: {} scanned",
824 "[+]".green(),
825 stats.workspaces_scanned.to_string().cyan()
826 );
827 }
828 }
829
830 let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
832 harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
833
834 update_harvest_metadata(&conn)?;
836
837 println!("\n{} Harvest Complete:", "[+]".green().bold());
839 println!(
840 " {} providers scanned",
841 stats.providers_scanned.to_string().cyan()
842 );
843 println!(
844 " {} workspaces scanned",
845 stats.workspaces_scanned.to_string().cyan()
846 );
847 println!(
848 " {} sessions found",
849 stats.sessions_found.to_string().cyan()
850 );
851 println!(
852 " {} sessions added",
853 stats.sessions_added.to_string().green()
854 );
855 println!(
856 " {} sessions updated",
857 stats.sessions_updated.to_string().yellow()
858 );
859 if stats.sessions_skipped > 0 {
860 println!(
861 " {} sessions skipped (unchanged)",
862 stats.sessions_skipped.to_string().dimmed()
863 );
864 }
865
866 if !stats.errors.is_empty() {
867 println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
868 for (i, err) in stats.errors.iter().take(5).enumerate() {
869 println!(" {}. {}", i + 1, err);
870 }
871 if stats.errors.len() > 5 {
872 println!(" ... and {} more errors", stats.errors.len() - 5);
873 }
874 }
875
876 if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
878 println!("\n{} Auto-committing changes...", "[*]".blue());
879 let commit_msg = message.unwrap_or("Harvest: update chat sessions");
880 if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
881 println!("{} Git commit failed: {}", "[!]".yellow(), e);
882 } else {
883 println!("{} Changes committed", "[+]".green());
884 }
885 }
886
887 println!("\nDatabase: {}", db_path.display());
888
889 Ok(())
890}
891
892pub fn harvest_status(path: Option<&str>) -> Result<()> {
894 let db_path = get_db_path(path)?;
895
896 println!("\n{} Harvest Database Status", "[H]".magenta().bold());
897 println!("{}", "=".repeat(60));
898
899 if !db_path.exists() {
900 println!(
901 "{} Database not found: {}",
902 "[!]".yellow(),
903 db_path.display()
904 );
905 println!(" Run 'csm harvest init' to create one");
906 return Ok(());
907 }
908
909 let conn = Connection::open(&db_path)?;
910
911 let mut stmt = conn.prepare(
913 "SELECT provider, COUNT(*) as count, SUM(message_count) as messages
914 FROM sessions
915 GROUP BY provider
916 ORDER BY count DESC",
917 )?;
918
919 let provider_stats: Vec<(String, i64, i64)> = stmt
920 .query_map([], |row| {
921 Ok((
922 row.get(0)?,
923 row.get(1)?,
924 row.get::<_, Option<i64>>(2)?.unwrap_or(0),
925 ))
926 })?
927 .filter_map(|r| r.ok())
928 .collect();
929
930 let total_sessions: i64 = conn
932 .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
933 .unwrap_or(0);
934
935 let total_messages: i64 = conn
936 .query_row(
937 "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
938 [],
939 |row| row.get(0),
940 )
941 .unwrap_or(0);
942
943 let last_harvest: Option<i64> = conn
945 .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
946 row.get(0)
947 })
948 .ok()
949 .flatten();
950
951 let oldest: Option<i64> = conn
953 .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
954 .ok()
955 .flatten();
956
957 let newest: Option<i64> = conn
958 .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
959 .ok()
960 .flatten();
961
962 println!("{} Database: {}", "[*]".blue(), db_path.display());
963 println!(
964 "{} {}: {}",
965 "[*]".blue(),
966 "Total Sessions".truecolor(171, 157, 242),
967 total_sessions.to_string().truecolor(252, 152, 103)
968 );
969 println!(
970 "{} {}: {}",
971 "[*]".blue(),
972 "Total Messages".truecolor(171, 157, 242),
973 total_messages.to_string().truecolor(252, 152, 103)
974 );
975
976 if let Some(ts) = last_harvest {
977 let dt = DateTime::from_timestamp_millis(ts)
978 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
979 .unwrap_or_else(|| "Unknown".to_string());
980 println!("{} Last Harvest: {}", "[*]".blue(), dt);
981 }
982
983 if let (Some(old), Some(new)) = (oldest, newest) {
984 let old_dt = DateTime::from_timestamp_millis(old)
985 .map(|d| d.format("%Y-%m-%d").to_string())
986 .unwrap_or_default();
987 let new_dt = DateTime::from_timestamp_millis(new)
988 .map(|d| d.format("%Y-%m-%d").to_string())
989 .unwrap_or_default();
990 println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
991 }
992
993 if !provider_stats.is_empty() {
994 println!("\n{} Sessions by Provider:", "[*]".blue());
995 for (provider, count, messages) in &provider_stats {
996 println!(
997 " {} {}: {} sessions, {} messages",
998 "[+]".green(),
999 provider.bold(),
1000 count.to_string().cyan(),
1001 messages.to_string().dimmed()
1002 );
1003 }
1004 }
1005
1006 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1008 if db_dir.join(".git").exists()
1009 || db_path
1010 .parent()
1011 .map(|p| p.join(".git").exists())
1012 .unwrap_or(false)
1013 {
1014 println!("\n{} Git Status:", "[*]".blue());
1015
1016 let output = Command::new("git")
1017 .current_dir(db_dir)
1018 .args([
1019 "status",
1020 "--porcelain",
1021 db_path.file_name().unwrap().to_str().unwrap(),
1022 ])
1023 .output();
1024
1025 match output {
1026 Ok(out) => {
1027 let status = String::from_utf8_lossy(&out.stdout);
1028 if status.is_empty() {
1029 println!(" {} No uncommitted changes", "[+]".green());
1030 } else {
1031 println!(" {} Uncommitted changes detected", "[!]".yellow());
1032 println!(" Run 'csm harvest git commit' to save changes");
1033 }
1034 }
1035 Err(_) => {
1036 println!(" {} Unable to check git status", "[!]".yellow());
1037 }
1038 }
1039 } else {
1040 println!("\n{} Git tracking not enabled", "[i]".dimmed());
1041 println!(" Run 'csm harvest git init' to enable version tracking");
1042 }
1043
1044 Ok(())
1045}
1046
1047pub fn harvest_list(
1049 path: Option<&str>,
1050 provider: Option<&str>,
1051 limit: usize,
1052 search: Option<&str>,
1053) -> Result<()> {
1054 let db_path = get_db_path(path)?;
1055
1056 if !db_path.exists() {
1057 println!(
1058 "{} Database not found: {}",
1059 "[!]".yellow(),
1060 db_path.display()
1061 );
1062 return Ok(());
1063 }
1064
1065 let conn = Connection::open(&db_path)?;
1066
1067 let mut query = String::from(
1068 "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name
1069 FROM sessions WHERE 1=1",
1070 );
1071 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1072
1073 if let Some(p) = provider {
1074 query.push_str(" AND LOWER(provider) LIKE ?");
1075 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1076 }
1077
1078 if let Some(s) = search {
1079 query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1080 let pattern = format!("%{}%", s.to_lowercase());
1081 params_vec.push(Box::new(pattern.clone()));
1082 params_vec.push(Box::new(pattern));
1083 }
1084
1085 query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1086 params_vec.push(Box::new(limit as i64));
1087
1088 let mut stmt = conn.prepare(&query)?;
1089
1090 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1092
1093 let sessions: Vec<HarvestQueryResult> = stmt
1094 .query_map(params_slice.as_slice(), |row| {
1095 Ok((
1096 row.get(0)?,
1097 row.get(1)?,
1098 row.get(2)?,
1099 row.get(3)?,
1100 row.get(4)?,
1101 row.get(5)?,
1102 row.get(6)?,
1103 ))
1104 })?
1105 .filter_map(|r| r.ok())
1106 .collect();
1107
1108 println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1109 println!("{}", "=".repeat(60));
1110
1111 if sessions.is_empty() {
1112 println!("{} No sessions found", "[i]".dimmed());
1113 return Ok(());
1114 }
1115
1116 for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1117 let date = DateTime::from_timestamp_millis(*updated)
1118 .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1119 .unwrap_or_default();
1120
1121 println!("\n{} {}", "[S]".cyan(), title.bold());
1122 println!(" ID: {}", &id[..16.min(id.len())].dimmed());
1123 println!(
1124 " Provider: {} | Messages: {} | Updated: {}",
1125 prov.cyan(),
1126 msg_count.to_string().green(),
1127 date.dimmed()
1128 );
1129 if let Some(ws) = ws_name {
1130 println!(" Workspace: {}", ws.dimmed());
1131 }
1132 }
1133
1134 println!(
1135 "\n{} Showing {} of available sessions",
1136 "[i]".dimmed(),
1137 sessions.len()
1138 );
1139
1140 Ok(())
1141}
1142
1143pub fn harvest_export(
1145 path: Option<&str>,
1146 output: &str,
1147 format: &str,
1148 provider: Option<&str>,
1149 session_ids: Option<&[String]>,
1150) -> Result<()> {
1151 let db_path = get_db_path(path)?;
1152 let output_path = PathBuf::from(output);
1153
1154 if !db_path.exists() {
1155 anyhow::bail!("Database not found: {}", db_path.display());
1156 }
1157
1158 let conn = Connection::open(&db_path)?;
1159
1160 println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1161 println!("{}", "=".repeat(60));
1162
1163 let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1165 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1166
1167 if let Some(p) = provider {
1168 query.push_str(" AND LOWER(provider) LIKE ?");
1169 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1170 }
1171
1172 if let Some(ids) = session_ids {
1173 let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1174 query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1175 for id in ids {
1176 params_vec.push(Box::new(id.clone()));
1177 }
1178 }
1179
1180 let mut stmt = conn.prepare(&query)?;
1181 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1182
1183 let sessions: Vec<String> = stmt
1184 .query_map(params_slice.as_slice(), |row| row.get(0))?
1185 .filter_map(|r| r.ok())
1186 .collect();
1187
1188 if sessions.is_empty() {
1189 println!("{} No sessions to export", "[i]".dimmed());
1190 return Ok(());
1191 }
1192
1193 if let Some(parent) = output_path.parent() {
1195 if !parent.exists() {
1196 fs::create_dir_all(parent)?;
1197 }
1198 }
1199
1200 match format.to_lowercase().as_str() {
1201 "json" => {
1202 let parsed: Vec<serde_json::Value> = sessions
1204 .iter()
1205 .filter_map(|s| serde_json::from_str(s).ok())
1206 .collect();
1207 let json_output = serde_json::to_string_pretty(&parsed)?;
1208 fs::write(&output_path, json_output)?;
1209 }
1210 "jsonl" => {
1211 let content: String = sessions
1213 .iter()
1214 .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1215 .map(|v| serde_json::to_string(&v).unwrap_or_default())
1216 .collect::<Vec<_>>()
1217 .join("\n");
1218 fs::write(&output_path, content)?;
1219 }
1220 "md" | "markdown" => {
1221 let mut md_content = String::from("# Chat Sessions Export\n\n");
1223 md_content.push_str(&format!(
1224 "Exported: {}\n\n",
1225 Utc::now().format("%Y-%m-%d %H:%M:%S")
1226 ));
1227
1228 for session_json in &sessions {
1229 if let Ok(session) = parse_session_json(session_json) {
1230 md_content.push_str(&format!("## {}\n\n", session.title()));
1231 md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1232
1233 for request in &session.requests {
1234 if let Some(msg) = &request.message {
1235 if let Some(text) = &msg.text {
1236 md_content.push_str(&format!("### User\n\n{}\n\n", text));
1237 }
1238 }
1239 if let Some(response) = &request.response {
1241 let response_text = response
1242 .get("text")
1243 .and_then(|v| v.as_str())
1244 .or_else(|| response.get("content").and_then(|v| v.as_str()))
1245 .or_else(|| {
1246 response
1248 .get("value")
1249 .and_then(|v| v.as_array())
1250 .and_then(|arr| arr.first())
1251 .and_then(|v| v.get("value"))
1252 .and_then(|v| v.as_str())
1253 });
1254
1255 if let Some(text) = response_text {
1256 md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1257 }
1258 }
1259 }
1260 md_content.push_str("---\n\n");
1261 }
1262 }
1263 fs::write(&output_path, md_content)?;
1264 }
1265 _ => {
1266 anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1267 }
1268 }
1269
1270 println!(
1271 "{} Exported {} sessions to {}",
1272 "[+]".green(),
1273 sessions.len().to_string().cyan(),
1274 output_path.display()
1275 );
1276
1277 Ok(())
1278}
1279
1280pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1282 let db_path = get_db_path(path)?;
1283 init_git_tracking(&db_path)
1284}
1285
1286pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1287 let db_path = get_db_path(path)?;
1288 let msg = message.unwrap_or("Update harvest database");
1289 git_commit_harvest(&db_path, msg)
1290}
1291
1292pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1293 let db_path = get_db_path(path)?;
1294 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1295 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1296
1297 println!("\n{} Harvest Git History", "[H]".magenta().bold());
1298 println!("{}", "=".repeat(60));
1299
1300 let output = Command::new("git")
1301 .current_dir(db_dir)
1302 .args([
1303 "log",
1304 "--oneline",
1305 &format!("-{}", count),
1306 "--follow",
1307 "--",
1308 db_name,
1309 ])
1310 .output()
1311 .context("Failed to run git log")?;
1312
1313 if !output.status.success() {
1314 let stderr = String::from_utf8_lossy(&output.stderr);
1315 if stderr.contains("does not have any commits") {
1316 println!("{} No commits yet", "[i]".dimmed());
1317 } else {
1318 anyhow::bail!("Git log failed: {}", stderr);
1319 }
1320 return Ok(());
1321 }
1322
1323 let log = String::from_utf8_lossy(&output.stdout);
1324 if log.trim().is_empty() {
1325 println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1326 } else {
1327 for line in log.lines() {
1328 let parts: Vec<&str> = line.splitn(2, ' ').collect();
1329 if parts.len() == 2 {
1330 println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1331 } else {
1332 println!("{}", line);
1333 }
1334 }
1335 }
1336
1337 Ok(())
1338}
1339
1340pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1341 let db_path = get_db_path(path)?;
1342 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1343 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1344
1345 println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1346 println!("{}", "=".repeat(60));
1347
1348 let mut args = vec!["diff", "--stat"];
1349 if let Some(c) = commit {
1350 args.push(c);
1351 }
1352 args.push("--");
1353 args.push(db_name);
1354
1355 let output = Command::new("git")
1356 .current_dir(db_dir)
1357 .args(&args)
1358 .output()
1359 .context("Failed to run git diff")?;
1360
1361 let diff = String::from_utf8_lossy(&output.stdout);
1362 if diff.trim().is_empty() {
1363 println!("{} No changes", "[+]".green());
1364 } else {
1365 println!("{}", diff);
1366 }
1367
1368 Ok(())
1369}
1370
1371pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1372 let db_path = get_db_path(path)?;
1373 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1374 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1375
1376 println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1377 println!("{}", "=".repeat(60));
1378
1379 let backup_path = db_path.with_extension("db.backup");
1381 if db_path.exists() {
1382 fs::copy(&db_path, &backup_path)?;
1383 println!(
1384 "{} Created backup: {}",
1385 "[+]".green(),
1386 backup_path.display()
1387 );
1388 }
1389
1390 let output = Command::new("git")
1392 .current_dir(db_dir)
1393 .args(["checkout", commit, "--", db_name])
1394 .output()
1395 .context("Failed to run git checkout")?;
1396
1397 if !output.status.success() {
1398 let stderr = String::from_utf8_lossy(&output.stderr);
1399 anyhow::bail!("Git restore failed: {}", stderr);
1400 }
1401
1402 println!(
1403 "{} Restored database from commit: {}",
1404 "[+]".green(),
1405 commit
1406 );
1407
1408 Ok(())
1409}
1410
1411fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1416 if let Some(p) = path {
1417 return Ok(PathBuf::from(p));
1418 }
1419
1420 if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1422 return Ok(PathBuf::from(p));
1423 }
1424
1425 Ok(std::env::current_dir()?.join("chat_sessions.db"))
1427}
1428
1429fn create_harvest_database(path: &Path) -> Result<()> {
1430 let conn = Connection::open(path)?;
1431
1432 conn.execute_batch(
1433 r#"
1434 -- Sessions table (original harvest format)
1435 CREATE TABLE IF NOT EXISTS sessions (
1436 id TEXT PRIMARY KEY,
1437 provider TEXT NOT NULL,
1438 provider_type TEXT,
1439 provider_version TEXT,
1440 schema_version INTEGER DEFAULT 3,
1441 file_format TEXT DEFAULT 'json',
1442 workspace_id TEXT,
1443 workspace_name TEXT,
1444 workspace_path TEXT,
1445 title TEXT NOT NULL,
1446 message_count INTEGER DEFAULT 0,
1447 created_at INTEGER NOT NULL,
1448 updated_at INTEGER NOT NULL,
1449 harvested_at INTEGER NOT NULL,
1450 session_json TEXT NOT NULL
1451 );
1452
1453 CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1454 CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1455 CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1456 CREATE INDEX IF NOT EXISTS idx_sessions_workspace_path ON sessions(workspace_path);
1457
1458 -- Enhanced messages table with raw markdown and metadata
1459 CREATE TABLE IF NOT EXISTS messages_v2 (
1460 id INTEGER PRIMARY KEY AUTOINCREMENT,
1461 session_id TEXT NOT NULL,
1462 message_index INTEGER NOT NULL,
1463 request_id TEXT,
1464 response_id TEXT,
1465 role TEXT NOT NULL,
1466 content_raw TEXT NOT NULL,
1467 content_markdown TEXT,
1468 model_id TEXT,
1469 timestamp INTEGER,
1470 is_canceled INTEGER DEFAULT 0,
1471 metadata_json TEXT,
1472 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1473 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1474 UNIQUE(session_id, message_index, role)
1475 );
1476
1477 CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1478 CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1479 CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1480
1481 -- Tool invocations within messages (file edits, terminal commands, etc.)
1482 CREATE TABLE IF NOT EXISTS tool_invocations (
1483 id INTEGER PRIMARY KEY AUTOINCREMENT,
1484 message_id INTEGER NOT NULL,
1485 session_id TEXT NOT NULL,
1486 tool_name TEXT NOT NULL,
1487 tool_call_id TEXT,
1488 invocation_index INTEGER DEFAULT 0,
1489 input_json TEXT,
1490 output_json TEXT,
1491 status TEXT DEFAULT 'pending',
1492 is_confirmed INTEGER DEFAULT 0,
1493 timestamp INTEGER,
1494 FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1495 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1496 );
1497
1498 CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1499 CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1500 CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1501
1502 -- File changes/diffs associated with tool invocations
1503 CREATE TABLE IF NOT EXISTS file_changes (
1504 id INTEGER PRIMARY KEY AUTOINCREMENT,
1505 tool_invocation_id INTEGER,
1506 session_id TEXT NOT NULL,
1507 message_index INTEGER,
1508 file_path TEXT NOT NULL,
1509 change_type TEXT NOT NULL,
1510 old_content TEXT,
1511 new_content TEXT,
1512 diff_unified TEXT,
1513 line_start INTEGER,
1514 line_end INTEGER,
1515 timestamp INTEGER,
1516 FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1517 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1518 );
1519
1520 CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1521 CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1522 CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1523
1524 -- Message-level checkpoints for versioning
1525 CREATE TABLE IF NOT EXISTS message_checkpoints (
1526 id INTEGER PRIMARY KEY AUTOINCREMENT,
1527 session_id TEXT NOT NULL,
1528 message_index INTEGER NOT NULL,
1529 checkpoint_number INTEGER NOT NULL,
1530 content_hash TEXT NOT NULL,
1531 snapshot_json TEXT,
1532 file_state_json TEXT,
1533 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1534 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1535 UNIQUE(session_id, message_index, checkpoint_number)
1536 );
1537
1538 CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1539 CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1540
1541 -- Messages table for full-text search
1542 CREATE TABLE IF NOT EXISTS messages (
1543 id INTEGER PRIMARY KEY AUTOINCREMENT,
1544 session_id TEXT NOT NULL,
1545 message_index INTEGER NOT NULL,
1546 role TEXT NOT NULL,
1547 content TEXT NOT NULL,
1548 created_at INTEGER,
1549 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1550 UNIQUE(session_id, message_index)
1551 );
1552
1553 CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1554 CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1555
1556 -- Checkpoints for version tracking
1557 CREATE TABLE IF NOT EXISTS checkpoints (
1558 id INTEGER PRIMARY KEY AUTOINCREMENT,
1559 session_id TEXT NOT NULL,
1560 checkpoint_number INTEGER NOT NULL,
1561 message TEXT,
1562 message_count INTEGER NOT NULL,
1563 content_hash TEXT NOT NULL,
1564 snapshot TEXT,
1565 created_at TEXT NOT NULL DEFAULT (datetime('now')),
1566 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1567 UNIQUE(session_id, checkpoint_number)
1568 );
1569
1570 CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1571
1572 -- Share links for importing shared conversations
1573 CREATE TABLE IF NOT EXISTS share_links (
1574 id TEXT PRIMARY KEY,
1575 session_id TEXT,
1576 provider TEXT NOT NULL,
1577 url TEXT NOT NULL UNIQUE,
1578 share_id TEXT NOT NULL,
1579 title TEXT,
1580 imported INTEGER DEFAULT 0,
1581 imported_at INTEGER,
1582 created_at INTEGER NOT NULL,
1583 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1584 );
1585
1586 CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1587 CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1588
1589 -- Harvest metadata
1590 CREATE TABLE IF NOT EXISTS harvest_metadata (
1591 key TEXT PRIMARY KEY,
1592 value TEXT NOT NULL
1593 );
1594
1595 INSERT OR REPLACE INTO harvest_metadata (key, value)
1596 VALUES ('version', '2.1'),
1597 ('created_at', datetime('now'));
1598
1599 -- Full-text search for messages (standalone FTS table)
1600 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1601 content_raw
1602 );
1603
1604 -- Triggers to keep FTS index in sync with messages_v2
1605 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1606 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1607 END;
1608
1609 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1610 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1611 END;
1612
1613 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1614 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1615 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1616 END;
1617 "#,
1618 )?;
1619
1620 Ok(())
1621}
1622
1623fn insert_or_update_session(
1624 conn: &Connection,
1625 session: &ChatSession,
1626 provider: &str,
1627 workspace_id: Option<&str>,
1628 workspace_name: Option<&str>,
1629 provider_version: Option<&str>,
1630 schema_version: u32,
1631 file_format: &str,
1632 workspace_path: Option<&str>,
1633) -> Result<bool> {
1634 let session_id = session
1635 .session_id
1636 .clone()
1637 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1638
1639 let now = Utc::now().timestamp_millis();
1640 let session_json = serde_json::to_string(session)?;
1641
1642 let existing: Option<i64> = conn
1644 .query_row(
1645 "SELECT updated_at FROM sessions WHERE id = ?",
1646 [&session_id],
1647 |row| row.get(0),
1648 )
1649 .ok();
1650
1651 let updated = existing.is_some();
1652
1653 conn.execute(
1654 r#"
1655 INSERT OR REPLACE INTO sessions
1656 (id, provider, provider_type, workspace_id, workspace_name, title,
1657 message_count, created_at, updated_at, harvested_at, session_json,
1658 provider_version, schema_version, file_format, workspace_path)
1659 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1660 "#,
1661 params![
1662 session_id,
1663 provider,
1664 provider,
1665 workspace_id,
1666 workspace_name,
1667 session.title(),
1668 session.request_count() as i64,
1669 session.creation_date,
1670 session.last_message_date,
1671 now,
1672 session_json,
1673 provider_version,
1674 schema_version as i64,
1675 file_format,
1676 workspace_path,
1677 ],
1678 )?;
1679
1680 populate_enhanced_messages(conn, &session_id, session)?;
1682
1683 Ok(updated)
1684}
1685
1686fn populate_enhanced_messages(
1688 conn: &Connection,
1689 session_id: &str,
1690 session: &ChatSession,
1691) -> Result<()> {
1692 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1694 conn.execute(
1695 "DELETE FROM tool_invocations WHERE session_id = ?",
1696 [session_id],
1697 )?;
1698 conn.execute(
1699 "DELETE FROM file_changes WHERE session_id = ?",
1700 [session_id],
1701 )?;
1702
1703 for (idx, request) in session.requests.iter().enumerate() {
1704 let timestamp = request.timestamp;
1705 let request_id = request.request_id.as_deref();
1706 let response_id = request.response_id.as_deref();
1707 let model_id = request.model_id.as_deref();
1708 let is_canceled = request.is_canceled.unwrap_or(false);
1709
1710 if let Some(ref message) = request.message {
1712 let content = message.text.clone().unwrap_or_default();
1713 if !content.is_empty() {
1714 let metadata = serde_json::json!({
1715 "variable_data": request.variable_data,
1716 });
1717
1718 conn.execute(
1719 r#"
1720 INSERT OR REPLACE INTO messages_v2
1721 (session_id, message_index, request_id, response_id, role,
1722 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1723 VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1724 "#,
1725 params![
1726 session_id,
1727 (idx * 2) as i64,
1728 request_id,
1729 response_id,
1730 &content,
1731 &content, model_id,
1733 timestamp,
1734 serde_json::to_string(&metadata).ok(),
1735 ],
1736 )?;
1737 }
1738 }
1739
1740 if let Some(ref response) = request.response {
1742 let (content, tool_invocations) = extract_response_content_and_tools(response);
1743
1744 if !content.is_empty() || !tool_invocations.is_empty() {
1745 let metadata = serde_json::json!({
1746 "content_references": request.content_references,
1747 "code_citations": request.code_citations,
1748 "response_markdown_info": request.response_markdown_info,
1749 });
1750
1751 conn.execute(
1752 r#"
1753 INSERT OR REPLACE INTO messages_v2
1754 (session_id, message_index, request_id, response_id, role,
1755 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1756 VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1757 "#,
1758 params![
1759 session_id,
1760 (idx * 2 + 1) as i64,
1761 request_id,
1762 response_id,
1763 &content,
1764 &content,
1765 model_id,
1766 timestamp,
1767 is_canceled as i64,
1768 serde_json::to_string(&metadata).ok(),
1769 ],
1770 )?;
1771
1772 let message_id: i64 = conn.last_insert_rowid();
1774
1775 for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1777 insert_tool_invocation(
1778 conn, message_id, session_id, inv_idx, invocation, timestamp,
1779 )?;
1780 }
1781 }
1782 }
1783 }
1784
1785 Ok(())
1786}
1787
1788fn extract_response_content_and_tools(
1790 response: &serde_json::Value,
1791) -> (String, Vec<serde_json::Value>) {
1792 let mut text_parts = Vec::new();
1793 let mut tool_invocations = Vec::new();
1794
1795 if let Some(items) = response.as_array() {
1796 for item in items {
1797 let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1798
1799 match kind {
1800 "toolInvocationSerialized" => {
1801 tool_invocations.push(item.clone());
1802 }
1803 "thinking" => {
1804 continue;
1806 }
1807 _ => {
1808 if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1809 let trimmed = value.trim();
1811 if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1812 text_parts.push(value.to_string());
1813 }
1814 }
1815 }
1816 }
1817 }
1818 }
1819
1820 (text_parts.join("\n\n"), tool_invocations)
1821}
1822
1823fn insert_tool_invocation(
1825 conn: &Connection,
1826 message_id: i64,
1827 session_id: &str,
1828 inv_idx: usize,
1829 invocation: &serde_json::Value,
1830 timestamp: Option<i64>,
1831) -> Result<()> {
1832 let tool_name = invocation
1833 .get("toolId")
1834 .and_then(|t| t.as_str())
1835 .unwrap_or("unknown");
1836 let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1837 let is_complete = invocation
1838 .get("isComplete")
1839 .and_then(|c| c.as_bool())
1840 .unwrap_or(false);
1841 let is_confirmed = invocation.get("isConfirmed");
1842 let tool_data = invocation.get("toolSpecificData");
1843
1844 let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1845 let status = if is_complete { "complete" } else { "pending" };
1846 let confirmed = match is_confirmed {
1847 Some(v) => v
1848 .get("type")
1849 .and_then(|t| t.as_i64())
1850 .map(|t| t > 0)
1851 .unwrap_or(false),
1852 None => false,
1853 };
1854
1855 conn.execute(
1856 r#"
1857 INSERT INTO tool_invocations
1858 (message_id, session_id, tool_name, tool_call_id, invocation_index,
1859 input_json, status, is_confirmed, timestamp)
1860 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1861 "#,
1862 params![
1863 message_id,
1864 session_id,
1865 tool_name,
1866 tool_call_id,
1867 inv_idx as i64,
1868 input_json,
1869 status,
1870 confirmed as i64,
1871 timestamp,
1872 ],
1873 )?;
1874
1875 let tool_invocation_id = conn.last_insert_rowid();
1876
1877 if let Some(data) = tool_data {
1879 insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1880 }
1881
1882 Ok(())
1883}
1884
1885fn insert_file_changes(
1887 conn: &Connection,
1888 tool_invocation_id: i64,
1889 session_id: &str,
1890 tool_data: &serde_json::Value,
1891 timestamp: Option<i64>,
1892) -> Result<()> {
1893 let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1894
1895 match kind {
1896 "terminal" => {
1897 if let Some(cmd_line) = tool_data.get("commandLine") {
1899 let original = cmd_line.get("original").and_then(|o| o.as_str());
1900 let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1901 let output = tool_data
1902 .get("terminalCommandOutput")
1903 .map(|o| serde_json::to_string(o).unwrap_or_default());
1904
1905 conn.execute(
1906 r#"
1907 INSERT INTO file_changes
1908 (tool_invocation_id, session_id, file_path, change_type,
1909 old_content, new_content, diff_unified, timestamp)
1910 VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1911 "#,
1912 params![
1913 tool_invocation_id,
1914 session_id,
1915 original,
1916 edited.or(original),
1917 output,
1918 timestamp,
1919 ],
1920 )?;
1921 }
1922 }
1923 "replaceFile" | "editFile" => {
1924 let file_path = tool_data
1926 .get("uri")
1927 .or_else(|| tool_data.get("filePath"))
1928 .and_then(|p| p.as_str())
1929 .unwrap_or("[unknown]");
1930 let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1931 let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1932
1933 let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1935 Some(generate_unified_diff(old, new, file_path))
1936 } else {
1937 None
1938 };
1939
1940 conn.execute(
1941 r#"
1942 INSERT INTO file_changes
1943 (tool_invocation_id, session_id, file_path, change_type,
1944 old_content, new_content, diff_unified, timestamp)
1945 VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1946 "#,
1947 params![
1948 tool_invocation_id,
1949 session_id,
1950 file_path,
1951 old_string,
1952 new_string,
1953 diff,
1954 timestamp,
1955 ],
1956 )?;
1957 }
1958 "createFile" => {
1959 let file_path = tool_data
1960 .get("uri")
1961 .or_else(|| tool_data.get("filePath"))
1962 .and_then(|p| p.as_str())
1963 .unwrap_or("[unknown]");
1964 let content = tool_data.get("content").and_then(|c| c.as_str());
1965
1966 conn.execute(
1967 r#"
1968 INSERT INTO file_changes
1969 (tool_invocation_id, session_id, file_path, change_type,
1970 new_content, timestamp)
1971 VALUES (?, ?, ?, 'create', ?, ?)
1972 "#,
1973 params![
1974 tool_invocation_id,
1975 session_id,
1976 file_path,
1977 content,
1978 timestamp,
1979 ],
1980 )?;
1981 }
1982 "readFile" => {
1983 let file_path = tool_data
1984 .get("uri")
1985 .or_else(|| tool_data.get("filePath"))
1986 .and_then(|p| p.as_str())
1987 .unwrap_or("[unknown]");
1988
1989 conn.execute(
1990 r#"
1991 INSERT INTO file_changes
1992 (tool_invocation_id, session_id, file_path, change_type, timestamp)
1993 VALUES (?, ?, ?, 'read', ?)
1994 "#,
1995 params![tool_invocation_id, session_id, file_path, timestamp,],
1996 )?;
1997 }
1998 _ => {
1999 if !kind.is_empty() {
2001 let data_json = serde_json::to_string(tool_data).ok();
2002 conn.execute(
2003 r#"
2004 INSERT INTO file_changes
2005 (tool_invocation_id, session_id, file_path, change_type,
2006 diff_unified, timestamp)
2007 VALUES (?, ?, ?, ?, ?, ?)
2008 "#,
2009 params![
2010 tool_invocation_id,
2011 session_id,
2012 format!("[{}]", kind),
2013 kind,
2014 data_json,
2015 timestamp,
2016 ],
2017 )?;
2018 }
2019 }
2020 }
2021
2022 Ok(())
2023}
2024
2025fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
2027 let old_lines: Vec<&str> = old.lines().collect();
2028 let new_lines: Vec<&str> = new.lines().collect();
2029
2030 let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
2031
2032 let max_lines = old_lines.len().max(new_lines.len());
2034 let mut in_hunk = false;
2035 let mut hunk_start = 0;
2036 let mut hunk_lines = Vec::new();
2037
2038 for i in 0..max_lines {
2039 let old_line = old_lines.get(i).copied();
2040 let new_line = new_lines.get(i).copied();
2041
2042 match (old_line, new_line) {
2043 (Some(o), Some(n)) if o == n => {
2044 if in_hunk {
2045 hunk_lines.push(format!(" {}", o));
2046 }
2047 }
2048 (Some(o), Some(n)) => {
2049 if !in_hunk {
2050 in_hunk = true;
2051 hunk_start = i + 1;
2052 }
2053 hunk_lines.push(format!("-{}", o));
2054 hunk_lines.push(format!("+{}", n));
2055 }
2056 (Some(o), None) => {
2057 if !in_hunk {
2058 in_hunk = true;
2059 hunk_start = i + 1;
2060 }
2061 hunk_lines.push(format!("-{}", o));
2062 }
2063 (None, Some(n)) => {
2064 if !in_hunk {
2065 in_hunk = true;
2066 hunk_start = i + 1;
2067 }
2068 hunk_lines.push(format!("+{}", n));
2069 }
2070 (None, None) => break,
2071 }
2072 }
2073
2074 if !hunk_lines.is_empty() {
2075 diff.push_str(&format!(
2076 "@@ -{},{} +{},{} @@\n",
2077 hunk_start,
2078 old_lines.len(),
2079 hunk_start,
2080 new_lines.len()
2081 ));
2082 for line in hunk_lines {
2083 diff.push_str(&line);
2084 diff.push('\n');
2085 }
2086 }
2087
2088 diff
2089}
2090
2091fn harvest_web_providers(
2093 conn: &Connection,
2094 stats: &mut HarvestStats,
2095 include_providers: &[String],
2096 exclude_providers: &[String],
2097) -> Result<()> {
2098 use crate::browser::extract_provider_cookies;
2099
2100 println!("\n{} Harvesting from web providers...", "[*]".blue());
2101
2102 let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2104 ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2105 ("Claude", "claude", "sessionKey"),
2106 ];
2107
2108 let mut web_sessions_harvested = 0;
2109
2110 for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2111 if !include_providers.is_empty()
2113 && !include_providers
2114 .iter()
2115 .any(|p| p.eq_ignore_ascii_case(provider_key))
2116 {
2117 continue;
2118 }
2119 if exclude_providers
2120 .iter()
2121 .any(|p| p.eq_ignore_ascii_case(provider_key))
2122 {
2123 continue;
2124 }
2125
2126 print!(" {} Checking {} ... ", "[-]".yellow(), display_name);
2127
2128 if let Some(creds) = extract_provider_cookies(provider_key) {
2130 if let Some(session_token) = &creds.session_token {
2131 println!("{}", "authenticated".green());
2132
2133 let result = match *provider_key {
2135 "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2136 "claude" => harvest_claude_sessions(conn, session_token, stats),
2137 _ => Ok(0),
2138 };
2139
2140 match result {
2141 Ok(count) => {
2142 if count > 0 {
2143 println!(
2144 " {} Harvested {} sessions from {}",
2145 "[+]".green(),
2146 count.to_string().cyan(),
2147 display_name
2148 );
2149 web_sessions_harvested += count;
2150 }
2151 }
2152 Err(e) => {
2153 println!(
2154 " {} Failed to harvest {}: {:?}",
2155 "[!]".red(),
2156 display_name,
2157 e
2158 );
2159 }
2160 }
2161 } else {
2162 println!("{}", "no session token".yellow());
2163 }
2164 } else {
2165 println!("{}", "not authenticated".yellow());
2166 }
2167 }
2168
2169 if web_sessions_harvested > 0 {
2170 println!(
2171 " {} Total web sessions harvested: {}",
2172 "[+]".green(),
2173 web_sessions_harvested.to_string().cyan()
2174 );
2175 }
2176
2177 Ok(())
2178}
2179
2180fn harvest_chatgpt_sessions(
2182 conn: &Connection,
2183 session_token: &str,
2184 stats: &mut HarvestStats,
2185) -> Result<usize> {
2186 use crate::providers::cloud::chatgpt::ChatGPTProvider;
2187 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2188
2189 let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2190
2191 let options = FetchOptions {
2193 limit: Some(100),
2194 include_archived: false,
2195 after: None,
2196 before: None,
2197 session_token: Some(session_token.to_string()),
2198 };
2199
2200 let conversations = provider
2201 .list_conversations(&options)
2202 .context("Failed to list ChatGPT conversations")?;
2203
2204 let mut harvested = 0;
2205
2206 for conv_summary in conversations {
2207 let exists: bool = conn
2209 .query_row(
2210 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2211 params![&conv_summary.id],
2212 |row| row.get(0),
2213 )
2214 .unwrap_or(false);
2215
2216 if exists {
2217 continue;
2219 }
2220
2221 match provider.fetch_conversation(&conv_summary.id) {
2223 Ok(conv) => {
2224 if let Err(e) =
2226 insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2227 {
2228 eprintln!("Failed to insert ChatGPT session: {}", e);
2229 continue;
2230 }
2231 harvested += 1;
2232 stats.sessions_added += 1;
2233 }
2234 Err(e) => {
2235 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2236 }
2237 }
2238
2239 std::thread::sleep(std::time::Duration::from_millis(100));
2241 }
2242
2243 Ok(harvested)
2244}
2245
2246fn harvest_claude_sessions(
2248 conn: &Connection,
2249 session_token: &str,
2250 stats: &mut HarvestStats,
2251) -> Result<usize> {
2252 use crate::providers::cloud::anthropic::AnthropicProvider;
2253 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2254
2255 let provider = AnthropicProvider::with_session_token(session_token.to_string());
2256
2257 let options = FetchOptions {
2259 limit: Some(100),
2260 include_archived: false,
2261 after: None,
2262 before: None,
2263 session_token: Some(session_token.to_string()),
2264 };
2265
2266 let conversations = provider
2267 .list_conversations(&options)
2268 .context("Failed to list Claude conversations")?;
2269
2270 let mut harvested = 0;
2271
2272 for conv_summary in conversations {
2273 let exists: bool = conn
2275 .query_row(
2276 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2277 params![&conv_summary.id],
2278 |row| row.get(0),
2279 )
2280 .unwrap_or(false);
2281
2282 if exists {
2283 continue;
2284 }
2285
2286 match provider.fetch_conversation(&conv_summary.id) {
2288 Ok(conv) => {
2289 if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2290 {
2291 eprintln!("Failed to insert Claude session: {}", e);
2292 continue;
2293 }
2294 harvested += 1;
2295 stats.sessions_added += 1;
2296 }
2297 Err(e) => {
2298 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2299 }
2300 }
2301
2302 std::thread::sleep(std::time::Duration::from_millis(100));
2303 }
2304
2305 Ok(harvested)
2306}
2307
2308fn insert_cloud_conversation_to_harvest_db(
2310 conn: &Connection,
2311 conv: &crate::providers::cloud::common::CloudConversation,
2312 provider: &str,
2313 workspace_name: Option<&str>,
2314) -> Result<()> {
2315 let now = Utc::now().timestamp_millis();
2316 let created_at = conv.created_at.timestamp_millis();
2317 let updated_at = conv
2318 .updated_at
2319 .map(|dt| dt.timestamp_millis())
2320 .unwrap_or(created_at);
2321
2322 let session_json = serde_json::json!({
2324 "id": conv.id,
2325 "title": conv.title,
2326 "model": conv.model,
2327 "created_at": conv.created_at.to_rfc3339(),
2328 "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2329 "messages": conv.messages.iter().map(|m| {
2330 serde_json::json!({
2331 "id": m.id,
2332 "role": m.role,
2333 "content": m.content,
2334 "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2335 "model": m.model,
2336 })
2337 }).collect::<Vec<_>>(),
2338 });
2339
2340 conn.execute(
2341 r#"
2342 INSERT OR REPLACE INTO sessions
2343 (id, provider, provider_type, workspace_id, workspace_name, title,
2344 message_count, created_at, updated_at, harvested_at, session_json)
2345 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2346 "#,
2347 params![
2348 conv.id,
2349 provider,
2350 provider,
2351 workspace_name,
2352 conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2353 conv.messages.len() as i64,
2354 created_at,
2355 updated_at,
2356 now,
2357 session_json.to_string(),
2358 ],
2359 )?;
2360
2361 populate_cloud_messages(conn, &conv.id, conv)?;
2363
2364 Ok(())
2365}
2366
2367fn populate_cloud_messages(
2369 conn: &Connection,
2370 session_id: &str,
2371 conv: &crate::providers::cloud::common::CloudConversation,
2372) -> Result<()> {
2373 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2375
2376 for (idx, message) in conv.messages.iter().enumerate() {
2377 let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2378 let role = match message.role.as_str() {
2379 "user" | "human" => "user",
2380 "assistant" => "assistant",
2381 "system" => "system",
2382 other => other,
2383 };
2384
2385 conn.execute(
2386 r#"
2387 INSERT INTO messages_v2
2388 (session_id, message_index, request_id, response_id, role,
2389 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2390 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2391 "#,
2392 params![
2393 session_id,
2394 idx as i64,
2395 message.id,
2396 role,
2397 &message.content,
2398 &message.content,
2399 message.model.as_deref(),
2400 timestamp,
2401 ],
2402 )?;
2403 }
2404
2405 Ok(())
2406}
2407
2408fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2409 conn.execute(
2410 "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2411 [],
2412 )?;
2413 Ok(())
2414}
2415
2416fn init_git_tracking(db_path: &Path) -> Result<()> {
2417 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2418 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2419
2420 println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2421
2422 if !db_dir.join(".git").exists() {
2424 let output = Command::new("git")
2425 .current_dir(db_dir)
2426 .args(["init"])
2427 .output()
2428 .context("Failed to run git init")?;
2429
2430 if !output.status.success() {
2431 anyhow::bail!(
2432 "Git init failed: {}",
2433 String::from_utf8_lossy(&output.stderr)
2434 );
2435 }
2436 println!("{} Initialized git repository", "[+]".green());
2437 } else {
2438 println!("{} Git repository already exists", "[i]".blue());
2439 }
2440
2441 let gitignore_path = db_dir.join(".gitignore");
2443 if !gitignore_path.exists() {
2444 fs::write(
2445 &gitignore_path,
2446 "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2447 )?;
2448 println!("{} Created .gitignore", "[+]".green());
2449 }
2450
2451 let output = Command::new("git")
2453 .current_dir(db_dir)
2454 .args(["add", db_name])
2455 .output()?;
2456
2457 if output.status.success() {
2458 println!("{} Added {} to git", "[+]".green(), db_name);
2459 }
2460
2461 let output = Command::new("git")
2463 .current_dir(db_dir)
2464 .args(["rev-parse", "HEAD"])
2465 .output()?;
2466
2467 if !output.status.success() {
2468 let output = Command::new("git")
2469 .current_dir(db_dir)
2470 .args(["commit", "-m", "Initialize harvest database"])
2471 .output()?;
2472
2473 if output.status.success() {
2474 println!("{} Created initial commit", "[+]".green());
2475 }
2476 }
2477
2478 println!("\n{} Git tracking enabled!", "[+]".green().bold());
2479 println!(" Run 'csm harvest git commit -m \"message\"' to save changes");
2480 println!(" Run 'csm harvest git log' to view history");
2481
2482 Ok(())
2483}
2484
2485fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2486 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2487 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2488
2489 let output = Command::new("git")
2491 .current_dir(db_dir)
2492 .args(["add", db_name])
2493 .output()
2494 .context("Failed to stage database")?;
2495
2496 if !output.status.success() {
2497 anyhow::bail!(
2498 "Git add failed: {}",
2499 String::from_utf8_lossy(&output.stderr)
2500 );
2501 }
2502
2503 let output = Command::new("git")
2505 .current_dir(db_dir)
2506 .args(["commit", "-m", message])
2507 .output()
2508 .context("Failed to commit")?;
2509
2510 if !output.status.success() {
2511 let stderr = String::from_utf8_lossy(&output.stderr);
2512 if stderr.contains("nothing to commit") {
2513 println!("{} Nothing to commit", "[i]".blue());
2514 return Ok(());
2515 }
2516 anyhow::bail!("Git commit failed: {}", stderr);
2517 }
2518
2519 let output = Command::new("git")
2521 .current_dir(db_dir)
2522 .args(["rev-parse", "--short", "HEAD"])
2523 .output()?;
2524
2525 let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2526 println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2527
2528 Ok(())
2529}
2530
2531pub fn harvest_share(
2537 db_path: Option<&str>,
2538 url: &str,
2539 name: Option<&str>,
2540 _workspace: Option<&str>,
2541) -> Result<()> {
2542 let db_path = get_db_path(db_path)?;
2543
2544 let share_info = match ShareLinkParser::parse(url) {
2546 Some(info) => info,
2547 None => {
2548 println!("{} Unrecognized share link format", "[!]".yellow());
2549 println!(" Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2550 println!(" Example URLs:");
2551 println!(" - https://chat.openai.com/share/abc123...");
2552 println!(" - https://claude.ai/share/xyz789...");
2553 anyhow::bail!("Could not parse share link URL");
2554 }
2555 };
2556
2557 println!("{}", "=".repeat(60).cyan());
2558 println!("{}", " Share Link Import ".bold().cyan());
2559 println!("{}", "=".repeat(60).cyan());
2560 println!();
2561
2562 println!(
2563 "{} Detected provider: {}",
2564 "[i]".blue(),
2565 share_info.provider.bold()
2566 );
2567 println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2568
2569 let db = ChatDatabase::open(&db_path)?;
2571
2572 let conn = db.connection();
2574 let existing: Option<(String, i64)> = conn
2575 .query_row(
2576 "SELECT id, imported FROM share_links WHERE url = ?",
2577 [url],
2578 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2579 )
2580 .ok();
2581
2582 if let Some((id, imported)) = existing {
2583 println!();
2584 println!(
2585 "{} This share link has already been registered (ID: {})",
2586 "[!]".yellow(),
2587 id
2588 );
2589
2590 let status = if imported == 1 { "imported" } else { "pending" };
2591 println!("{} Current status: {}", "[i]".blue(), status);
2592
2593 if imported == 1 {
2594 println!(
2595 "{} Session already imported - no action needed",
2596 "[+]".green()
2597 );
2598 return Ok(());
2599 }
2600 } else {
2601 let link_id = uuid::Uuid::new_v4().to_string();
2603 let now = chrono::Utc::now().timestamp();
2604 conn.execute(
2605 "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2606 params![link_id, url, share_info.provider, share_info.share_id, name, now],
2607 )?;
2608
2609 println!(
2610 "{} Registered share link (ID: {})",
2611 "[+]".green(),
2612 &link_id[..8]
2613 );
2614 }
2615
2616 println!();
2617 println!("{}", "-".repeat(60).dimmed());
2618 println!();
2619
2620 println!("{} Share link registered as pending", "[i]".blue());
2629 println!();
2630 println!("{}", "Note:".bold().yellow());
2631 println!(" Automatic content fetching from share links is not yet implemented.");
2632 println!(" For now, you can:");
2633 println!(" 1. Open the share link in your browser");
2634 println!(" 2. Export the conversation manually");
2635 println!(" 3. Import with: csm import <file>");
2636 println!();
2637 println!(" Or use 'csm harvest shares' to view pending links.");
2638
2639 Ok(())
2640}
2641
2642pub fn harvest_shares(
2644 db_path: Option<&str>,
2645 status_filter: Option<&str>,
2646 limit: usize,
2647) -> Result<()> {
2648 let db_path = get_db_path(db_path)?;
2649
2650 if !db_path.exists() {
2651 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2652 }
2653
2654 let db = ChatDatabase::open(&db_path)?;
2655 let conn = db.connection();
2656
2657 println!("{}", "=".repeat(70).cyan());
2658 println!("{}", " Share Links ".bold().cyan());
2659 println!("{}", "=".repeat(70).cyan());
2660 println!();
2661
2662 let query = match status_filter {
2663 Some("pending") => format!(
2664 "SELECT id, url, provider, share_id, title, imported, created_at
2665 FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2666 limit
2667 ),
2668 Some("imported") => format!(
2669 "SELECT id, url, provider, share_id, title, imported, created_at
2670 FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2671 limit
2672 ),
2673 Some(_) | None => format!(
2674 "SELECT id, url, provider, share_id, title, imported, created_at
2675 FROM share_links ORDER BY created_at DESC LIMIT {}",
2676 limit
2677 ),
2678 };
2679
2680 let mut stmt = conn.prepare(&query)?;
2681
2682 let rows: Vec<_> = stmt
2683 .query_map([], |row| {
2684 Ok((
2685 row.get::<_, String>(0)?,
2686 row.get::<_, String>(1)?,
2687 row.get::<_, String>(2)?,
2688 row.get::<_, Option<String>>(3)?,
2689 row.get::<_, Option<String>>(4)?,
2690 row.get::<_, i64>(5)?,
2691 row.get::<_, i64>(6)?,
2692 ))
2693 })?
2694 .collect::<Result<Vec<_>, _>>()?;
2695
2696 if rows.is_empty() {
2697 if let Some(status) = status_filter {
2698 println!("{} No share links with status '{}'", "[i]".blue(), status);
2699 } else {
2700 println!("{} No share links found", "[i]".blue());
2701 }
2702 println!(" Use 'csm harvest share <url>' to add a share link");
2703 return Ok(());
2704 }
2705
2706 let pending_count: i64 = conn
2708 .query_row(
2709 "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2710 [],
2711 |row| row.get(0),
2712 )
2713 .unwrap_or(0);
2714
2715 let imported_count: i64 = conn
2716 .query_row(
2717 "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2718 [],
2719 |row| row.get(0),
2720 )
2721 .unwrap_or(0);
2722
2723 println!(
2724 "{} {} pending, {} imported",
2725 "[i]".blue(),
2726 pending_count.to_string().yellow(),
2727 imported_count.to_string().green()
2728 );
2729 println!();
2730
2731 for (id, url, provider, _share_id, title, imported, created_at) in rows {
2732 let status = if imported == 1 { "imported" } else { "pending" };
2733 let status_colored = if imported == 1 {
2734 status.green()
2735 } else {
2736 status.yellow()
2737 };
2738
2739 println!(
2740 "{} [{}] {} - {}",
2741 format!("#{}", &id[..8]).dimmed(),
2742 status_colored,
2743 provider.bold(),
2744 title.as_deref().unwrap_or("(untitled)")
2745 );
2746
2747 let display_url = if url.len() > 60 {
2749 format!("{}...", &url[..57])
2750 } else {
2751 url.clone()
2752 };
2753 println!(" {} {}", "URL:".dimmed(), display_url.dimmed());
2754
2755 let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2757 .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2758 .unwrap_or_else(|| created_at.to_string());
2759 println!(" {} {}", "Added:".dimmed(), timestamp.dimmed());
2760 println!();
2761 }
2762
2763 Ok(())
2764}
2765
2766pub fn harvest_checkpoint(
2772 db_path: Option<&str>,
2773 session_id: &str,
2774 message: Option<&str>,
2775) -> Result<()> {
2776 let db_path = get_db_path(db_path)?;
2777
2778 if !db_path.exists() {
2779 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2780 }
2781
2782 let db = ChatDatabase::open(&db_path)?;
2783 let conn = db.connection();
2784
2785 let session: Option<(i64, String, i64)> = conn
2787 .query_row(
2788 "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2789 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2790 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2791 )
2792 .ok();
2793
2794 let (internal_id, actual_session_id, msg_count) = match session {
2795 Some(s) => s,
2796 None => {
2797 println!("{} Session not found: {}", "[!]".red(), session_id);
2798 println!(" Use 'csm harvest list' to see available sessions");
2799 anyhow::bail!("Session not found");
2800 }
2801 };
2802
2803 let checkpoint_num: i64 = conn.query_row(
2805 "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2806 [internal_id],
2807 |row| row.get(0),
2808 )?;
2809
2810 let content_hash = format!(
2812 "{:x}",
2813 md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2814 );
2815
2816 let messages: Vec<String> = {
2818 let mut stmt = conn
2819 .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2820 let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2821 rows.filter_map(|r| r.ok()).collect()
2822 };
2823
2824 let snapshot = serde_json::json!({
2825 "message_count": msg_count,
2826 "messages": messages,
2827 });
2828
2829 let default_message = format!("Checkpoint {}", checkpoint_num);
2830 let message_text = message.unwrap_or(&default_message);
2831
2832 conn.execute(
2834 "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2835 VALUES (?, ?, ?, ?, ?, ?)",
2836 params![
2837 internal_id,
2838 checkpoint_num,
2839 message_text,
2840 msg_count,
2841 content_hash,
2842 snapshot.to_string()
2843 ],
2844 )?;
2845
2846 println!("{}", "=".repeat(60).cyan());
2847 println!("{}", " Checkpoint Created ".bold().cyan());
2848 println!("{}", "=".repeat(60).cyan());
2849 println!();
2850 println!("{} Session: {}", "[+]".green(), actual_session_id);
2851 println!(
2852 "{} Checkpoint #{}: {}",
2853 "[+]".green(),
2854 checkpoint_num,
2855 message_text
2856 );
2857 println!("{} Messages: {}", "[i]".blue(), msg_count);
2858 println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2859 println!();
2860 println!(
2861 " Use 'csm harvest checkpoints {}' to view history",
2862 session_id
2863 );
2864
2865 Ok(())
2866}
2867
2868pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2870 let db_path = get_db_path(db_path)?;
2871
2872 if !db_path.exists() {
2873 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2874 }
2875
2876 let db = ChatDatabase::open(&db_path)?;
2877 let conn = db.connection();
2878
2879 let session: Option<(i64, String, String)> = conn
2881 .query_row(
2882 "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2883 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2884 |row| {
2885 Ok((
2886 row.get(0)?,
2887 row.get(1)?,
2888 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2889 ))
2890 },
2891 )
2892 .ok();
2893
2894 let (internal_id, actual_session_id, name) = match session {
2895 Some(s) => s,
2896 None => {
2897 println!("{} Session not found: {}", "[!]".red(), session_id);
2898 anyhow::bail!("Session not found");
2899 }
2900 };
2901
2902 println!("{}", "=".repeat(70).cyan());
2903 println!("{}", " Session Checkpoints ".bold().cyan());
2904 println!("{}", "=".repeat(70).cyan());
2905 println!();
2906 println!(
2907 "{} Session: {} {}",
2908 "[i]".blue(),
2909 actual_session_id,
2910 if !name.is_empty() {
2911 format!("({})", name)
2912 } else {
2913 String::new()
2914 }
2915 );
2916 println!();
2917
2918 let mut stmt = conn.prepare(
2919 "SELECT checkpoint_number, message, message_count, content_hash, created_at
2920 FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2921 )?;
2922
2923 let checkpoints: Vec<_> = stmt
2924 .query_map([internal_id], |row| {
2925 Ok((
2926 row.get::<_, i64>(0)?,
2927 row.get::<_, String>(1)?,
2928 row.get::<_, i64>(2)?,
2929 row.get::<_, String>(3)?,
2930 row.get::<_, String>(4)?,
2931 ))
2932 })?
2933 .collect::<Result<Vec<_>, _>>()?;
2934
2935 if checkpoints.is_empty() {
2936 println!("{} No checkpoints found for this session", "[i]".blue());
2937 println!(
2938 " Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2939 session_id
2940 );
2941 return Ok(());
2942 }
2943
2944 println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2945 println!();
2946
2947 for (num, msg, msg_count, hash, created_at) in checkpoints {
2948 println!(" {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2949 println!(
2950 " {} messages | {} | {}",
2951 msg_count,
2952 &hash[..12],
2953 created_at.dimmed()
2954 );
2955 }
2956
2957 println!();
2958 println!(
2959 " Use 'csm harvest restore {} <checkpoint>' to restore",
2960 session_id
2961 );
2962
2963 Ok(())
2964}
2965
2966pub fn harvest_restore_checkpoint(
2968 db_path: Option<&str>,
2969 session_id: &str,
2970 checkpoint_number: i64,
2971) -> Result<()> {
2972 let db_path = get_db_path(db_path)?;
2973
2974 if !db_path.exists() {
2975 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2976 }
2977
2978 let db = ChatDatabase::open(&db_path)?;
2979 let conn = db.connection();
2980
2981 let session: Option<(i64, String)> = conn
2983 .query_row(
2984 "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
2985 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2986 |row| Ok((row.get(0)?, row.get(1)?)),
2987 )
2988 .ok();
2989
2990 let (internal_id, actual_session_id) = match session {
2991 Some(s) => s,
2992 None => {
2993 println!("{} Session not found: {}", "[!]".red(), session_id);
2994 anyhow::bail!("Session not found");
2995 }
2996 };
2997
2998 let checkpoint: Option<(String, i64)> = conn
3000 .query_row(
3001 "SELECT snapshot, message_count FROM checkpoints
3002 WHERE session_id = ? AND checkpoint_number = ?",
3003 params![internal_id, checkpoint_number],
3004 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
3005 )
3006 .ok();
3007
3008 let (snapshot_json, original_msg_count) = match checkpoint {
3009 Some(c) => c,
3010 None => {
3011 println!(
3012 "{} Checkpoint #{} not found for session {}",
3013 "[!]".red(),
3014 checkpoint_number,
3015 session_id
3016 );
3017 println!(
3018 " Use 'csm harvest checkpoints {}' to see available checkpoints",
3019 session_id
3020 );
3021 anyhow::bail!("Checkpoint not found");
3022 }
3023 };
3024
3025 let snapshot: serde_json::Value =
3027 serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
3028
3029 let messages = snapshot["messages"]
3030 .as_array()
3031 .context("Invalid snapshot format")?;
3032
3033 println!("{}", "=".repeat(60).cyan());
3034 println!("{}", " Restore Checkpoint ".bold().yellow());
3035 println!("{}", "=".repeat(60).cyan());
3036 println!();
3037 println!("{} Session: {}", "[i]".blue(), actual_session_id);
3038 println!(
3039 "{} Restoring to checkpoint #{}",
3040 "[!]".yellow(),
3041 checkpoint_number
3042 );
3043 println!(
3044 "{} Messages to restore: {}",
3045 "[i]".blue(),
3046 original_msg_count
3047 );
3048 println!();
3049
3050 let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3052
3053 println!("{} Removed {} current messages", "[-]".red(), deleted);
3054
3055 for (idx, msg) in messages.iter().enumerate() {
3057 if let Some(content) = msg.as_str() {
3058 conn.execute(
3059 "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3060 params![internal_id, idx as i64, content],
3061 )?;
3062 }
3063 }
3064
3065 conn.execute(
3067 "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3068 params![original_msg_count, internal_id],
3069 )?;
3070
3071 println!(
3072 "{} Restored {} messages from checkpoint",
3073 "[+]".green(),
3074 messages.len()
3075 );
3076 println!();
3077 println!(
3078 "{} Session restored to checkpoint #{}",
3079 "[+]".green().bold(),
3080 checkpoint_number
3081 );
3082
3083 Ok(())
3084}
3085
3086pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3092 let db_path = get_db_path(db_path)?;
3093
3094 if !db_path.exists() {
3095 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3096 }
3097
3098 let conn = Connection::open(&db_path)?;
3099
3100 println!("{}", "=".repeat(70).cyan());
3101 println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3102 println!("{}", "=".repeat(70).cyan());
3103 println!();
3104
3105 println!("{} Dropping old FTS index...", "[*]".blue());
3107 conn.execute_batch(
3108 r#"
3109 DROP TRIGGER IF EXISTS messages_v2_ai;
3110 DROP TRIGGER IF EXISTS messages_v2_ad;
3111 DROP TRIGGER IF EXISTS messages_v2_au;
3112 DROP TABLE IF EXISTS messages_fts;
3113 "#,
3114 )?;
3115
3116 println!("{} Creating new FTS index...", "[*]".blue());
3118 conn.execute_batch(
3119 r#"
3120 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3121 content_raw,
3122 content='messages_v2',
3123 content_rowid='id'
3124 );
3125
3126 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3127 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3128 END;
3129
3130 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3131 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3132 END;
3133
3134 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3135 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3136 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3137 END;
3138 "#,
3139 )?;
3140
3141 let total_messages: i64 =
3143 conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3144
3145 println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3147
3148 conn.execute(
3149 "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3150 [],
3151 )?;
3152
3153 let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3155
3156 println!();
3157 println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3158 println!(" {} messages indexed", indexed);
3159
3160 Ok(())
3161}
3162
3163pub fn harvest_search(
3165 db_path: Option<&str>,
3166 query: &str,
3167 provider_filter: Option<&str>,
3168 limit: usize,
3169) -> Result<()> {
3170 let db_path = get_db_path(db_path)?;
3171
3172 if !db_path.exists() {
3173 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3174 }
3175
3176 let db = ChatDatabase::open(&db_path)?;
3177 let conn = db.connection();
3178
3179 println!("{}", "=".repeat(70).cyan());
3180 println!("{} Search: {}", "[?]".bold(), query.bold());
3181 println!("{}", "=".repeat(70).cyan());
3182 println!();
3183
3184 let results: Vec<(String, String, String, String)> = {
3186 let fts_exists: bool = conn
3188 .query_row(
3189 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3190 [],
3191 |_| Ok(true),
3192 )
3193 .unwrap_or(false);
3194
3195 if fts_exists {
3196 let sql = format!(
3199 "SELECT s.id, s.provider, s.title,
3200 snippet(messages_fts, 0, '>>>', '<<<', '...', 32) as snip
3201 FROM messages_fts fts
3202 JOIN messages_v2 m ON m.id = fts.rowid
3203 JOIN sessions s ON m.session_id = s.id
3204 WHERE messages_fts MATCH ?
3205 {}
3206 GROUP BY s.id
3207 ORDER BY rank
3208 LIMIT {}",
3209 if provider_filter.is_some() {
3210 "AND s.provider = ?"
3211 } else {
3212 ""
3213 },
3214 limit
3215 );
3216
3217 let mut stmt = conn.prepare(&sql)?;
3218
3219 if let Some(provider) = provider_filter {
3220 stmt.query_map(params![query, provider], |row| {
3221 Ok((
3222 row.get::<_, String>(0)?,
3223 row.get::<_, String>(1)?,
3224 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3225 row.get::<_, String>(3)?,
3226 ))
3227 })?
3228 .collect::<Result<Vec<_>, _>>()?
3229 } else {
3230 stmt.query_map([query], |row| {
3231 Ok((
3232 row.get::<_, String>(0)?,
3233 row.get::<_, String>(1)?,
3234 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3235 row.get::<_, String>(3)?,
3236 ))
3237 })?
3238 .collect::<Result<Vec<_>, _>>()?
3239 }
3240 } else {
3241 let search_pattern = format!("%{}%", query);
3243 let sql = format!(
3244 "SELECT s.id, s.provider, s.title,
3245 SUBSTR(m.content_raw,
3246 MAX(1, INSTR(LOWER(m.content_raw), LOWER(?)) - 50),
3247 150) as snip
3248 FROM messages_v2 m
3249 JOIN sessions s ON m.session_id = s.id
3250 WHERE m.content_raw LIKE ?
3251 {}
3252 GROUP BY s.id
3253 ORDER BY s.updated_at DESC
3254 LIMIT {}",
3255 if provider_filter.is_some() {
3256 "AND s.provider = ?"
3257 } else {
3258 ""
3259 },
3260 limit
3261 );
3262
3263 let mut stmt = conn.prepare(&sql)?;
3264
3265 if let Some(provider) = provider_filter {
3266 stmt.query_map(params![query, search_pattern, provider], |row| {
3267 Ok((
3268 row.get::<_, String>(0)?,
3269 row.get::<_, String>(1)?,
3270 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3271 row.get::<_, String>(3)?,
3272 ))
3273 })?
3274 .collect::<Result<Vec<_>, _>>()?
3275 } else {
3276 stmt.query_map(params![query, search_pattern], |row| {
3277 Ok((
3278 row.get::<_, String>(0)?,
3279 row.get::<_, String>(1)?,
3280 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3281 row.get::<_, String>(3)?,
3282 ))
3283 })?
3284 .collect::<Result<Vec<_>, _>>()?
3285 }
3286 }
3287 };
3288
3289 if results.is_empty() {
3290 println!("{} No results found for '{}'", "[i]".blue(), query);
3291 return Ok(());
3292 }
3293
3294 println!("{} Found {} result(s):", "[i]".blue(), results.len());
3295 println!();
3296
3297 for (session_id, provider, title, snippet) in results {
3298 let display_name = if title.is_empty() {
3299 session_id.clone()
3300 } else {
3301 format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3302 };
3303
3304 println!(
3305 "{} {} [{}]",
3306 "*".cyan(),
3307 display_name.bold(),
3308 provider.dimmed()
3309 );
3310
3311 let clean_snippet = snippet
3313 .replace('\n', " ")
3314 .replace('\r', "");
3315 println!(" {}", clean_snippet.dimmed());
3316 println!();
3317 }
3318
3319 Ok(())
3320}
3321
3322fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3324 let content_lower = content.to_lowercase();
3325 let query_lower = query.to_lowercase();
3326
3327 if let Some(pos) = content_lower.find(&query_lower) {
3328 let start = pos.saturating_sub(max_len / 2);
3329 let end = (pos + query.len() + max_len / 2).min(content.len());
3330
3331 let mut snippet = String::new();
3332 if start > 0 {
3333 snippet.push_str("...");
3334 }
3335 snippet.push_str(&content[start..end]);
3336 if end < content.len() {
3337 snippet.push_str("...");
3338 }
3339
3340 snippet.replace('\n', " ").replace('\r', "")
3342 } else {
3343 if content.len() > max_len {
3345 format!("{}...", &content[..max_len])
3346 } else {
3347 content.to_string()
3348 }
3349 }
3350}
3351
3352fn md5_hash(data: &str) -> u128 {
3354 let mut hash: u128 = 0;
3356 for (i, byte) in data.bytes().enumerate() {
3357 hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3358 hash = hash.rotate_left(7);
3359 }
3360 hash
3361}
3362
3363pub fn harvest_sync(
3371 path: Option<&str>,
3372 push: bool,
3373 pull: bool,
3374 provider: Option<&str>,
3375 workspace: Option<&str>,
3376 sessions: Option<&[String]>,
3377 format: Option<&str>,
3378 force: bool,
3379 dry_run: bool,
3380) -> Result<()> {
3381 if !push && !pull {
3383 anyhow::bail!("Must specify either --push or --pull (or both)");
3384 }
3385
3386 let db_path = get_db_path(path)?;
3388
3389 if !db_path.exists() {
3390 anyhow::bail!(
3391 "Harvest database not found at {:?}. Run 'harvest run' first.",
3392 db_path
3393 );
3394 }
3395
3396 println!(
3397 "{} Sync Operation",
3398 if dry_run {
3399 "[DRY RUN]".yellow()
3400 } else {
3401 "[SYNC]".green()
3402 }
3403 );
3404 println!(" Database: {}", db_path.display().to_string().cyan());
3405
3406 if push {
3407 println!("\n{} Push: Database → Provider Workspaces", "[→]".blue());
3408 sync_push(
3409 &db_path, provider, workspace, sessions, format, force, dry_run,
3410 )?;
3411 }
3412
3413 if pull {
3414 println!("\n{} Pull: Provider Workspaces → Database", "[←]".blue());
3415 sync_pull(&db_path, provider, workspace, sessions, force, dry_run)?;
3416 }
3417
3418 if dry_run {
3419 println!(
3420 "\n{} Dry run complete. No changes were made.",
3421 "[!]".yellow()
3422 );
3423 } else {
3424 println!("\n{} Sync complete.", "[✓]".green());
3425 }
3426
3427 Ok(())
3428}
3429
3430fn sync_push(
3432 db_path: &Path,
3433 provider: Option<&str>,
3434 workspace: Option<&str>,
3435 sessions: Option<&[String]>,
3436 format: Option<&str>,
3437 force: bool,
3438 dry_run: bool,
3439) -> Result<()> {
3440 let conn = Connection::open(db_path)?;
3441
3442 let mut sql = String::from(
3444 r#"SELECT id, provider, workspace_path, workspace_name, session_json, file_format
3445 FROM sessions WHERE workspace_path IS NOT NULL"#,
3446 );
3447 let mut params_vec: Vec<String> = Vec::new();
3448
3449 if let Some(p) = provider {
3450 sql.push_str(" AND provider = ?");
3451 params_vec.push(p.to_string());
3452 }
3453
3454 if let Some(w) = workspace {
3455 sql.push_str(" AND (workspace_path LIKE ? OR workspace_name LIKE ?)");
3456 params_vec.push(format!("%{}%", w));
3457 params_vec.push(format!("%{}%", w));
3458 }
3459
3460 if let Some(sess_ids) = sessions {
3461 if !sess_ids.is_empty() {
3462 let placeholders: Vec<&str> = sess_ids.iter().map(|_| "?").collect();
3463 sql.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
3464 for id in sess_ids {
3465 params_vec.push(id.clone());
3466 }
3467 }
3468 }
3469
3470 let mut stmt = conn.prepare(&sql)?;
3471
3472 let params_refs: Vec<&dyn rusqlite::ToSql> = params_vec
3474 .iter()
3475 .map(|s| s as &dyn rusqlite::ToSql)
3476 .collect();
3477
3478 let rows = stmt.query_map(params_refs.as_slice(), |row| {
3479 Ok((
3480 row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, Option<String>>(3)?, row.get::<_, String>(4)?, row.get::<_, Option<String>>(5)?, ))
3487 })?;
3488
3489 let target_format = format.unwrap_or("jsonl");
3490 let mut pushed = 0;
3491 let mut skipped = 0;
3492 let mut errors = 0;
3493
3494 for row_result in rows {
3495 match row_result {
3496 Ok((session_id, provider_name, workspace_path, workspace_name, session_json, stored_format)) => {
3497 let ws_path = PathBuf::from(&workspace_path);
3499 let chat_dir = ws_path.join(".vscode").join("chat");
3500
3501 if !chat_dir.exists() {
3503 if !force {
3504 println!(
3505 " {} Skipped {} - workspace chat dir not found: {}",
3506 "[~]".yellow(),
3507 session_id.dimmed(),
3508 chat_dir.display()
3509 );
3510 skipped += 1;
3511 continue;
3512 }
3513 if !dry_run {
3515 fs::create_dir_all(&chat_dir)?;
3516 }
3517 }
3518
3519 let (output_ext, needs_conversion) = match target_format {
3521 "jsonl" => ("jsonl", stored_format.as_deref() != Some("jsonl")),
3522 "json" => ("json", stored_format.as_deref() == Some("jsonl")),
3523 _ => ("jsonl", true), };
3525
3526 let session_file = chat_dir.join(format!("{}.{}", session_id, output_ext));
3527
3528 if session_file.exists() && !force {
3530 println!(
3531 " {} Skipped {} - file exists (use --force to overwrite)",
3532 "[~]".yellow(),
3533 session_id.dimmed()
3534 );
3535 skipped += 1;
3536 continue;
3537 }
3538
3539 let output_content = if needs_conversion && target_format == "jsonl" {
3541 match serde_json::from_str::<ChatSession>(&session_json) {
3543 Ok(session) => convert_session_to_jsonl(&session)?,
3544 Err(e) => {
3545 println!(
3546 " {} Error parsing session {}: {}",
3547 "[!]".red(),
3548 session_id,
3549 e
3550 );
3551 errors += 1;
3552 continue;
3553 }
3554 }
3555 } else {
3556 session_json.clone()
3558 };
3559
3560 if dry_run {
3561 println!(
3562 " {} Would write {} ({}) → {}",
3563 "[+]".green(),
3564 workspace_name.as_deref().unwrap_or(&provider_name),
3565 session_id.dimmed(),
3566 session_file.display()
3567 );
3568 } else {
3569 fs::write(&session_file, &output_content)?;
3570 println!(
3571 " {} Pushed {} ({}) → {}",
3572 "[+]".green(),
3573 workspace_name.as_deref().unwrap_or(&provider_name),
3574 session_id.dimmed(),
3575 session_file.display()
3576 );
3577 }
3578 pushed += 1;
3579 }
3580 Err(e) => {
3581 println!(" {} Database error: {}", "[!]".red(), e);
3582 errors += 1;
3583 }
3584 }
3585 }
3586
3587 println!(
3588 "\n {} Push summary: {} pushed, {} skipped, {} errors",
3589 "[=]".cyan(),
3590 pushed.to_string().green(),
3591 skipped.to_string().yellow(),
3592 errors.to_string().red()
3593 );
3594
3595 Ok(())
3596}
3597
3598fn sync_pull(
3600 db_path: &Path,
3601 provider: Option<&str>,
3602 workspace: Option<&str>,
3603 sessions: Option<&[String]>,
3604 force: bool,
3605 dry_run: bool,
3606) -> Result<()> {
3607 let conn = if dry_run {
3608 Connection::open(db_path)?
3610 } else {
3611 Connection::open(db_path)?
3612 };
3613
3614 let workspaces = discover_workspaces()?;
3616 let mut pulled = 0;
3617 let mut skipped = 0;
3618 let mut errors = 0;
3619
3620 for ws in workspaces {
3621 if let Some(ws_filter) = workspace {
3623 let ws_path_str = ws.workspace_path.to_string_lossy();
3624 let ws_name = ws.project_path.as_deref().unwrap_or("");
3625 if !ws_path_str.contains(ws_filter) && !ws_name.contains(ws_filter) {
3626 continue;
3627 }
3628 }
3629
3630 if let Some(p) = provider {
3632 if !p.eq_ignore_ascii_case("copilot")
3633 && !p.eq_ignore_ascii_case("github copilot")
3634 && !p.eq_ignore_ascii_case("vscode")
3635 {
3636 continue;
3637 }
3638 }
3639
3640 match get_chat_sessions_from_workspace(&ws.workspace_path) {
3642 Ok(ws_sessions) => {
3643 for swp in ws_sessions {
3644 if let Some(sess_ids) = sessions {
3646 let sid = swp.session.session_id.as_deref().unwrap_or("");
3647 if !sess_ids.iter().any(|id| id == sid) {
3648 continue;
3649 }
3650 }
3651
3652 let session_id = swp
3653 .session
3654 .session_id
3655 .clone()
3656 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
3657
3658 let exists: bool = conn
3660 .query_row(
3661 "SELECT 1 FROM sessions WHERE id = ?",
3662 [&session_id],
3663 |_| Ok(true),
3664 )
3665 .unwrap_or(false);
3666
3667 if exists && !force {
3668 skipped += 1;
3669 continue;
3670 }
3671
3672 let ws_name = ws.project_path.clone();
3673 let ws_path = ws.workspace_path.to_string_lossy().to_string();
3674
3675 if dry_run {
3676 println!(
3677 " {} Would pull {} from {}",
3678 "[+]".green(),
3679 session_id.dimmed(),
3680 ws_name.as_deref().unwrap_or(&ws_path)
3681 );
3682 pulled += 1;
3683 } else {
3684 match insert_or_update_session(
3685 &conn,
3686 &swp.session,
3687 "GitHub Copilot",
3688 Some(&ws.hash),
3689 ws_name.as_deref(),
3690 None, 3, "json", Some(&ws_path), ) {
3695 Ok(_) => {
3696 println!(
3697 " {} Pulled {} from {}",
3698 "[+]".green(),
3699 session_id.dimmed(),
3700 ws_name.as_deref().unwrap_or(&ws_path)
3701 );
3702 pulled += 1;
3703 }
3704 Err(e) => {
3705 println!(
3706 " {} Error pulling {}: {}",
3707 "[!]".red(),
3708 session_id,
3709 e
3710 );
3711 errors += 1;
3712 }
3713 }
3714 }
3715 }
3716 }
3717 Err(e) => {
3718 println!(
3719 " {} Error reading workspace {}: {}",
3720 "[!]".red(),
3721 ws.workspace_path.display(),
3722 e
3723 );
3724 errors += 1;
3725 }
3726 }
3727 }
3728
3729 println!(
3730 "\n {} Pull summary: {} pulled, {} skipped, {} errors",
3731 "[=]".cyan(),
3732 pulled.to_string().green(),
3733 skipped.to_string().yellow(),
3734 errors.to_string().red()
3735 );
3736
3737 Ok(())
3738}
3739
3740fn convert_session_to_jsonl(session: &ChatSession) -> Result<String> {
3742 use serde_json::json;
3743
3744 let mut lines = Vec::new();
3745
3746 let session_meta = json!({
3748 "version": 3,
3749 "sessionId": session.session_id,
3750 "title": session.title(),
3751 "createdAt": session.creation_date,
3752 "lastInteractionAt": session.last_message_date,
3753 "customTitle": session.custom_title.as_deref(),
3754 });
3755 lines.push(serde_json::to_string(&session_meta)?);
3756
3757 for request in &session.requests {
3759 let request_json = serde_json::to_string(request)?;
3760 lines.push(request_json);
3761 }
3762
3763 Ok(lines.join("\n"))
3764}