1use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::storage::parse_session_json;
24use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
25
26fn is_empty_code_block(s: &str) -> bool {
28 let s = s.trim();
30 if s == "```" {
31 return true;
32 }
33 if s.starts_with("```") && !s.contains('\n') {
35 return true;
36 }
37 let lines: Vec<&str> = s.lines().collect();
39 if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
40 let content_lines = &lines[1..lines.len() - 1];
42 if content_lines.iter().all(|line| line.trim().is_empty()) {
43 return true;
44 }
45 }
46 false
47}
48
49type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
51
52#[derive(Debug, Clone)]
54struct WebProviderEndpoint {
55 name: &'static str,
56 url: &'static str,
57 description: &'static str,
58}
59
60const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
62 WebProviderEndpoint {
63 name: "ChatGPT",
64 url: "https://chat.openai.com",
65 description: "OpenAI ChatGPT Web Interface",
66 },
67 WebProviderEndpoint {
68 name: "Claude",
69 url: "https://claude.ai",
70 description: "Anthropic Claude Web Interface",
71 },
72 WebProviderEndpoint {
73 name: "Gemini",
74 url: "https://gemini.google.com",
75 description: "Google Gemini Web Interface",
76 },
77 WebProviderEndpoint {
78 name: "Perplexity",
79 url: "https://www.perplexity.ai",
80 description: "Perplexity AI Search Interface",
81 },
82 WebProviderEndpoint {
83 name: "DeepSeek",
84 url: "https://chat.deepseek.com",
85 description: "DeepSeek Chat Interface",
86 },
87 WebProviderEndpoint {
88 name: "Poe",
89 url: "https://poe.com",
90 description: "Quora Poe Multi-model Chat",
91 },
92 WebProviderEndpoint {
93 name: "You.com",
94 url: "https://you.com/chat",
95 description: "You.com AI Chat",
96 },
97 WebProviderEndpoint {
98 name: "HuggingChat",
99 url: "https://huggingface.co/chat",
100 description: "HuggingFace Chat Interface",
101 },
102 WebProviderEndpoint {
103 name: "Copilot",
104 url: "https://copilot.microsoft.com",
105 description: "Microsoft Copilot Web Interface",
106 },
107 WebProviderEndpoint {
108 name: "Mistral",
109 url: "https://chat.mistral.ai",
110 description: "Mistral AI Le Chat Interface",
111 },
112 WebProviderEndpoint {
113 name: "Cohere",
114 url: "https://coral.cohere.com",
115 description: "Cohere Coral Chat Interface",
116 },
117 WebProviderEndpoint {
118 name: "Groq",
119 url: "https://groq.com",
120 description: "Groq Fast Inference Interface",
121 },
122 WebProviderEndpoint {
123 name: "Phind",
124 url: "https://www.phind.com",
125 description: "Phind AI Code Search",
126 },
127 WebProviderEndpoint {
128 name: "Character.AI",
129 url: "https://character.ai",
130 description: "Character.AI Chat Interface",
131 },
132 WebProviderEndpoint {
133 name: "Pi",
134 url: "https://pi.ai",
135 description: "Inflection Pi Personal AI",
136 },
137];
138
139fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
141 use std::sync::{Arc, Mutex};
142 use std::thread;
143
144 let reachable = Arc::new(Mutex::new(Vec::new()));
145 let results = Arc::new(Mutex::new(Vec::new()));
146
147 let handles: Vec<_> = WEB_PROVIDERS
149 .iter()
150 .map(|provider| {
151 let reachable = Arc::clone(&reachable);
152 let results = Arc::clone(&results);
153 let timeout = timeout_secs;
154 let name = provider.name;
155 let url = provider.url;
156 let desc = provider.description;
157
158 thread::spawn(move || {
159 let client = match reqwest::blocking::Client::builder()
160 .timeout(Duration::from_secs(timeout))
161 .connect_timeout(Duration::from_secs(timeout.min(3))) .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
163 .build()
164 {
165 Ok(c) => c,
166 Err(_) => return,
167 };
168
169 let result: (&str, bool, String, &str) = match client.head(url).send() {
170 Ok(response) => {
171 let status = response.status();
172 if status.is_success() || status.is_redirection() {
173 reachable.lock().unwrap().push(name.to_string());
174 (name, true, desc.to_string(), url)
175 } else {
176 (name, false, format!("HTTP {}", status.as_u16()), url)
177 }
178 }
179 Err(e) => {
180 let reason = if e.is_timeout() {
181 "timeout".to_string()
182 } else if e.is_connect() {
183 "connection failed".to_string()
184 } else {
185 "unreachable".to_string()
186 };
187 (name, false, reason, url)
188 }
189 };
190
191 results.lock().unwrap().push((
192 result.0.to_string(),
193 result.1,
194 result.2,
195 result.3.to_string(),
196 ));
197 })
198 })
199 .collect();
200
201 for handle in handles {
203 let _ = handle.join();
204 }
205
206 let results = results.lock().unwrap();
208 let mut sorted_results: Vec<_> = results.iter().collect();
209 sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
210
211 for (name, success, info, url) in sorted_results {
212 if *success {
213 println!(
214 " {} {}: {} ({})",
215 "[+]".green(),
216 name.bold(),
217 "reachable".green(),
218 info.dimmed()
219 );
220 println!(" {} {}", "`".dimmed(), url.dimmed());
221 } else {
222 println!(
223 " {} {}: {} ({})",
224 "[-]".dimmed(),
225 name,
226 "blocked or unavailable".dimmed(),
227 info.dimmed()
228 );
229 }
230 }
231
232 let result = reachable.lock().unwrap().clone();
233 result
234}
235
236fn scan_browser_authentication(
238 verbose: bool,
239) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
240 use crate::browser::scan_browser_auth_verbose;
241 use std::collections::HashMap;
242
243 let installed = get_installed_browsers();
244 if installed.is_empty() {
245 println!(" {} No supported browsers found", "[-]".dimmed());
246 return (HashMap::new(), 0);
247 }
248
249 println!(
250 " {} Checking {} browser(s): {}",
251 "[*]".blue(),
252 installed.len(),
253 installed
254 .iter()
255 .map(|b| b.name())
256 .collect::<Vec<_>>()
257 .join(", ")
258 );
259
260 let results = if verbose {
261 scan_browser_auth_verbose()
262 } else {
263 scan_browser_auth()
264 };
265
266 let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
268
269 for result in results {
270 if result.authenticated {
271 authenticated
272 .entry(result.provider.clone())
273 .or_default()
274 .push(result.browser);
275 }
276 }
277
278 let count = authenticated.len();
279
280 if authenticated.is_empty() {
281 println!(
282 " {} No authenticated web LLM providers found",
283 "[-]".dimmed()
284 );
285 println!(
286 " {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
287 "`".dimmed()
288 );
289 } else {
290 for (provider, browsers) in &authenticated {
291 let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
292 println!(
293 " {} {}: {} in {}",
294 "[+]".green(),
295 provider.bold(),
296 "authenticated".green(),
297 browser_names.join(", ")
298 );
299 }
300 }
301
302 (authenticated, count)
303}
304
305#[allow(dead_code)]
307#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct HarvestConfig {
309 pub db_path: PathBuf,
311 pub auto_commit: bool,
313 pub include_providers: Vec<String>,
315 pub exclude_providers: Vec<String>,
317 pub include_workspaces: bool,
319 pub last_harvest: Option<i64>,
321}
322
323#[allow(dead_code)]
324impl Default for HarvestConfig {
325 fn default() -> Self {
326 Self {
327 db_path: PathBuf::from("chat_sessions.db"),
328 auto_commit: false,
329 include_providers: Vec::new(),
330 exclude_providers: Vec::new(),
331 include_workspaces: true,
332 last_harvest: None,
333 }
334 }
335}
336
337#[derive(Debug, Default)]
339pub struct HarvestStats {
340 pub providers_scanned: usize,
341 pub workspaces_scanned: usize,
342 pub sessions_found: usize,
343 pub sessions_added: usize,
344 pub sessions_updated: usize,
345 pub sessions_skipped: usize,
346 pub errors: Vec<String>,
347}
348
349#[allow(dead_code)]
351#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct HarvestedSession {
353 pub id: String,
354 pub provider: String,
355 pub provider_type: String,
356 pub workspace_id: Option<String>,
357 pub workspace_name: Option<String>,
358 pub title: String,
359 pub message_count: usize,
360 pub created_at: i64,
361 pub updated_at: i64,
362 pub harvested_at: i64,
363 pub session_json: String,
364}
365
366pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
368 let db_path = get_db_path(path)?;
369 let db_dir = db_path.parent().unwrap_or(Path::new("."));
370
371 println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
372 println!("{}", "=".repeat(60));
373
374 if !db_dir.exists() {
376 fs::create_dir_all(db_dir)?;
377 println!("{} Created directory: {}", "[+]".green(), db_dir.display());
378 }
379
380 if db_path.exists() {
382 println!(
383 "{} Database already exists: {}",
384 "[!]".yellow(),
385 db_path.display()
386 );
387 println!(" Use 'csm harvest run' to update it");
388 return Ok(());
389 }
390
391 create_harvest_database(&db_path)?;
393 println!("{} Created database: {}", "[+]".green(), db_path.display());
394
395 if git_init {
397 init_git_tracking(&db_path)?;
398 }
399
400 println!("\n{} Harvest database initialized!", "[+]".green().bold());
401 println!("\nNext steps:");
402 println!(" 1. Run 'csm harvest scan' to see available providers");
403 println!(" 2. Run 'csm harvest run' to collect sessions");
404 if !git_init {
405 println!(" 3. Run 'csm harvest git init' to enable version tracking");
406 }
407
408 Ok(())
409}
410
411pub fn harvest_scan(
413 show_sessions: bool,
414 scan_web: bool,
415 timeout_secs: u64,
416 verbose: bool,
417) -> Result<()> {
418 println!("\n{} Scanning for Providers", "[H]".magenta().bold());
419 println!("{}", "=".repeat(60));
420
421 let registry = ProviderRegistry::new();
422 let mut total_sessions = 0;
423 let mut available_providers = Vec::new();
424
425 println!("\n{} LLM Providers:", "[*]".blue().bold());
427
428 let provider_types = vec![
429 ProviderType::Copilot,
430 ProviderType::Cursor,
431 ProviderType::Ollama,
432 ProviderType::Vllm,
433 ProviderType::Foundry,
434 ProviderType::LmStudio,
435 ProviderType::LocalAI,
436 ProviderType::TextGenWebUI,
437 ProviderType::Jan,
438 ProviderType::Gpt4All,
439 ProviderType::Llamafile,
440 ProviderType::OpenAI,
441 ProviderType::ChatGPT,
442 ProviderType::Anthropic,
443 ProviderType::Perplexity,
444 ProviderType::DeepSeek,
445 ProviderType::Gemini,
446 ];
447
448 for pt in &provider_types {
449 if let Some(provider) = registry.get_provider(*pt) {
450 let available = provider.is_available();
451 let session_count = if available {
452 provider.list_sessions().map(|s| s.len()).unwrap_or(0)
453 } else {
454 0
455 };
456
457 if available {
458 available_providers.push((*pt, session_count));
459 total_sessions += session_count;
460
461 let status = if session_count > 0 {
462 format!(
463 "{} {} sessions",
464 "+".green(),
465 session_count.to_string().cyan()
466 )
467 } else {
468 format!("{} no sessions", "+".green())
469 };
470
471 println!(
472 " {} {}: {}",
473 "[+]".green(),
474 provider.name().bold(),
475 status
476 );
477
478 if show_sessions && session_count > 0 {
479 if let Ok(sessions) = provider.list_sessions() {
480 for session in sessions.iter().take(3) {
481 println!(" {} {}", "`".dimmed(), session.title().dimmed());
482 }
483 if sessions.len() > 3 {
484 println!(" {} ... and {} more", "`".dimmed(), sessions.len() - 3);
485 }
486 }
487 }
488
489 if let Some(path) = provider.sessions_path() {
490 println!(
491 " {} {}",
492 "`".dimmed(),
493 path.display().to_string().dimmed()
494 );
495 }
496 }
497 }
498 }
499
500 println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
502
503 match discover_workspaces() {
504 Ok(workspaces) => {
505 let workspaces_with_sessions: Vec<_> = workspaces
506 .iter()
507 .filter(|ws| ws.chat_session_count > 0)
508 .collect();
509
510 let ws_sessions: usize = workspaces_with_sessions
511 .iter()
512 .map(|ws| ws.chat_session_count)
513 .sum();
514
515 println!(
516 " {} {} workspaces with {} sessions",
517 "[+]".green(),
518 workspaces_with_sessions.len().to_string().cyan(),
519 ws_sessions.to_string().cyan()
520 );
521
522 if show_sessions {
523 for ws in workspaces_with_sessions.iter().take(5) {
524 let name = ws
525 .project_path
526 .clone()
527 .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
528 println!(
529 " {} {} ({} sessions)",
530 "`".dimmed(),
531 name.dimmed(),
532 ws.chat_session_count
533 );
534 }
535 if workspaces_with_sessions.len() > 5 {
536 println!(
537 " {} ... and {} more workspaces",
538 "`".dimmed(),
539 workspaces_with_sessions.len() - 5
540 );
541 }
542 }
543
544 total_sessions += ws_sessions;
545 }
546 Err(e) => {
547 println!(" {} Failed to scan workspaces: {}", "[!]".yellow(), e);
548 }
549 }
550
551 let mut web_providers_found = Vec::new();
553 let mut authenticated_count = 0;
554 if scan_web {
555 println!("\n{} Browser Authentication:", "[*]".blue().bold());
557 let (auth_results, auth_count) = scan_browser_authentication(verbose);
558 authenticated_count = auth_count;
559
560 println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
562 web_providers_found = scan_web_providers(timeout_secs);
563
564 if !auth_results.is_empty() {
566 println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
567 for (provider, browsers) in &auth_results {
568 let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
569 let reachable = web_providers_found.iter().any(|p| p == provider);
570 let status = if reachable {
571 format!("{} (reachable)", "ready to harvest".green())
572 } else {
573 format!("{}", "authenticated but endpoint blocked".yellow())
574 };
575 println!(
576 " {} {}: {} via {}",
577 "[+]".green(),
578 provider.bold(),
579 status,
580 browser_list.join(", ").dimmed()
581 );
582 }
583 }
584 }
585
586 println!("\n{} Summary:", "[*]".green().bold());
588 println!(
589 " {} local providers available",
590 available_providers.len().to_string().cyan()
591 );
592 if scan_web {
593 println!(
594 " {} web providers reachable",
595 web_providers_found.len().to_string().cyan()
596 );
597 println!(
598 " {} web providers authenticated",
599 authenticated_count.to_string().cyan()
600 );
601 }
602 println!(
603 " {} total sessions to harvest",
604 total_sessions.to_string().cyan()
605 );
606
607 Ok(())
608}
609
610pub fn harvest_run(
612 path: Option<&str>,
613 providers: Option<&[String]>,
614 exclude: Option<&[String]>,
615 incremental: bool,
616 auto_commit: bool,
617 message: Option<&str>,
618) -> Result<()> {
619 let db_path = get_db_path(path)?;
620
621 println!("\n{} Running Harvest", "[H]".magenta().bold());
622 println!("{}", "=".repeat(60));
623
624 if !db_path.exists() {
626 println!("{} Database not found, creating...", "[*]".blue());
627 create_harvest_database(&db_path)?;
628 }
629
630 let conn = Connection::open(&db_path)?;
631 let mut stats = HarvestStats::default();
632
633 let last_harvest: Option<i64> = if incremental {
635 conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
636 row.get(0)
637 })
638 .ok()
639 } else {
640 None
641 };
642
643 if let Some(ts) = last_harvest {
644 let dt = DateTime::from_timestamp_millis(ts)
645 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
646 .unwrap_or_default();
647 println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
648 }
649
650 let registry = ProviderRegistry::new();
651 let include_providers = providers.map(|p| p.to_vec());
652 let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
653
654 println!("\n{} Harvesting from providers...", "[*]".blue());
656
657 let provider_types = vec![
658 ProviderType::Copilot,
659 ProviderType::Cursor,
660 ProviderType::Ollama,
661 ProviderType::Vllm,
662 ProviderType::Foundry,
663 ProviderType::LmStudio,
664 ProviderType::LocalAI,
665 ProviderType::TextGenWebUI,
666 ProviderType::Jan,
667 ProviderType::Gpt4All,
668 ProviderType::Llamafile,
669 ];
670
671 for pt in &provider_types {
672 let provider_name = pt.display_name().to_lowercase();
673
674 if let Some(ref include) = include_providers {
676 if !include
677 .iter()
678 .any(|p| provider_name.contains(&p.to_lowercase()))
679 {
680 continue;
681 }
682 }
683 if exclude_providers
684 .iter()
685 .any(|p| provider_name.contains(&p.to_lowercase()))
686 {
687 continue;
688 }
689
690 if let Some(provider) = registry.get_provider(*pt) {
691 if !provider.is_available() {
692 continue;
693 }
694
695 stats.providers_scanned += 1;
696
697 match provider.list_sessions() {
698 Ok(sessions) => {
699 for session in sessions {
700 stats.sessions_found += 1;
701
702 if let Some(last) = last_harvest {
704 if session.last_message_date <= last {
705 stats.sessions_skipped += 1;
706 continue;
707 }
708 }
709
710 match insert_or_update_session(
711 &conn,
712 &session,
713 pt.display_name(),
714 None,
715 None,
716 ) {
717 Ok(updated) => {
718 if updated {
719 stats.sessions_updated += 1;
720 } else {
721 stats.sessions_added += 1;
722 }
723 }
724 Err(e) => {
725 stats.errors.push(format!("{}: {}", session.title(), e));
726 }
727 }
728 }
729
730 let session_count = stats.sessions_added + stats.sessions_updated;
731 if session_count > 0 {
732 println!(
733 " {} {}: {} sessions",
734 "[+]".green(),
735 provider.name(),
736 session_count.to_string().cyan()
737 );
738 }
739 }
740 Err(e) => {
741 stats.errors.push(format!("{}: {}", provider.name(), e));
742 }
743 }
744 }
745 }
746
747 if include_providers.is_none()
749 || include_providers
750 .as_ref()
751 .map(|p| {
752 p.iter()
753 .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
754 })
755 .unwrap_or(false)
756 {
757 println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
758
759 if let Ok(workspaces) = discover_workspaces() {
760 for ws in &workspaces {
761 if ws.chat_session_count == 0 {
762 continue;
763 }
764
765 stats.workspaces_scanned += 1;
766
767 if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
768 for swp in sessions {
769 stats.sessions_found += 1;
770
771 if let Some(last) = last_harvest {
773 if swp.session.last_message_date <= last {
774 stats.sessions_skipped += 1;
775 continue;
776 }
777 }
778
779 let ws_name = ws.project_path.clone();
780
781 match insert_or_update_session(
782 &conn,
783 &swp.session,
784 "GitHub Copilot",
785 Some(&ws.hash),
786 ws_name.as_deref(),
787 ) {
788 Ok(updated) => {
789 if updated {
790 stats.sessions_updated += 1;
791 } else {
792 stats.sessions_added += 1;
793 }
794 }
795 Err(e) => {
796 stats.errors.push(format!("{}: {}", swp.session.title(), e));
797 }
798 }
799 }
800 }
801 }
802
803 println!(
804 " {} Workspaces: {} scanned",
805 "[+]".green(),
806 stats.workspaces_scanned.to_string().cyan()
807 );
808 }
809 }
810
811 let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
813 harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
814
815 update_harvest_metadata(&conn)?;
817
818 println!("\n{} Harvest Complete:", "[+]".green().bold());
820 println!(
821 " {} providers scanned",
822 stats.providers_scanned.to_string().cyan()
823 );
824 println!(
825 " {} workspaces scanned",
826 stats.workspaces_scanned.to_string().cyan()
827 );
828 println!(
829 " {} sessions found",
830 stats.sessions_found.to_string().cyan()
831 );
832 println!(
833 " {} sessions added",
834 stats.sessions_added.to_string().green()
835 );
836 println!(
837 " {} sessions updated",
838 stats.sessions_updated.to_string().yellow()
839 );
840 if stats.sessions_skipped > 0 {
841 println!(
842 " {} sessions skipped (unchanged)",
843 stats.sessions_skipped.to_string().dimmed()
844 );
845 }
846
847 if !stats.errors.is_empty() {
848 println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
849 for (i, err) in stats.errors.iter().take(5).enumerate() {
850 println!(" {}. {}", i + 1, err);
851 }
852 if stats.errors.len() > 5 {
853 println!(" ... and {} more errors", stats.errors.len() - 5);
854 }
855 }
856
857 if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
859 println!("\n{} Auto-committing changes...", "[*]".blue());
860 let commit_msg = message.unwrap_or("Harvest: update chat sessions");
861 if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
862 println!("{} Git commit failed: {}", "[!]".yellow(), e);
863 } else {
864 println!("{} Changes committed", "[+]".green());
865 }
866 }
867
868 println!("\nDatabase: {}", db_path.display());
869
870 Ok(())
871}
872
873pub fn harvest_status(path: Option<&str>) -> Result<()> {
875 let db_path = get_db_path(path)?;
876
877 println!("\n{} Harvest Database Status", "[H]".magenta().bold());
878 println!("{}", "=".repeat(60));
879
880 if !db_path.exists() {
881 println!(
882 "{} Database not found: {}",
883 "[!]".yellow(),
884 db_path.display()
885 );
886 println!(" Run 'csm harvest init' to create one");
887 return Ok(());
888 }
889
890 let conn = Connection::open(&db_path)?;
891
892 let mut stmt = conn.prepare(
894 "SELECT provider, COUNT(*) as count, SUM(message_count) as messages
895 FROM sessions
896 GROUP BY provider
897 ORDER BY count DESC",
898 )?;
899
900 let provider_stats: Vec<(String, i64, i64)> = stmt
901 .query_map([], |row| {
902 Ok((
903 row.get(0)?,
904 row.get(1)?,
905 row.get::<_, Option<i64>>(2)?.unwrap_or(0),
906 ))
907 })?
908 .filter_map(|r| r.ok())
909 .collect();
910
911 let total_sessions: i64 = conn
913 .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
914 .unwrap_or(0);
915
916 let total_messages: i64 = conn
917 .query_row(
918 "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
919 [],
920 |row| row.get(0),
921 )
922 .unwrap_or(0);
923
924 let last_harvest: Option<i64> = conn
926 .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
927 row.get(0)
928 })
929 .ok()
930 .flatten();
931
932 let oldest: Option<i64> = conn
934 .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
935 .ok()
936 .flatten();
937
938 let newest: Option<i64> = conn
939 .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
940 .ok()
941 .flatten();
942
943 println!("{} Database: {}", "[*]".blue(), db_path.display());
944 println!(
945 "{} Total Sessions: {}",
946 "[*]".blue(),
947 total_sessions.to_string().cyan()
948 );
949 println!(
950 "{} Total Messages: {}",
951 "[*]".blue(),
952 total_messages.to_string().cyan()
953 );
954
955 if let Some(ts) = last_harvest {
956 let dt = DateTime::from_timestamp_millis(ts)
957 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
958 .unwrap_or_else(|| "Unknown".to_string());
959 println!("{} Last Harvest: {}", "[*]".blue(), dt);
960 }
961
962 if let (Some(old), Some(new)) = (oldest, newest) {
963 let old_dt = DateTime::from_timestamp_millis(old)
964 .map(|d| d.format("%Y-%m-%d").to_string())
965 .unwrap_or_default();
966 let new_dt = DateTime::from_timestamp_millis(new)
967 .map(|d| d.format("%Y-%m-%d").to_string())
968 .unwrap_or_default();
969 println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
970 }
971
972 if !provider_stats.is_empty() {
973 println!("\n{} Sessions by Provider:", "[*]".blue());
974 for (provider, count, messages) in &provider_stats {
975 println!(
976 " {} {}: {} sessions, {} messages",
977 "[+]".green(),
978 provider.bold(),
979 count.to_string().cyan(),
980 messages.to_string().dimmed()
981 );
982 }
983 }
984
985 let db_dir = db_path.parent().unwrap_or(Path::new("."));
987 if db_dir.join(".git").exists()
988 || db_path
989 .parent()
990 .map(|p| p.join(".git").exists())
991 .unwrap_or(false)
992 {
993 println!("\n{} Git Status:", "[*]".blue());
994
995 let output = Command::new("git")
996 .current_dir(db_dir)
997 .args([
998 "status",
999 "--porcelain",
1000 db_path.file_name().unwrap().to_str().unwrap(),
1001 ])
1002 .output();
1003
1004 match output {
1005 Ok(out) => {
1006 let status = String::from_utf8_lossy(&out.stdout);
1007 if status.is_empty() {
1008 println!(" {} No uncommitted changes", "[+]".green());
1009 } else {
1010 println!(" {} Uncommitted changes detected", "[!]".yellow());
1011 println!(" Run 'csm harvest git commit' to save changes");
1012 }
1013 }
1014 Err(_) => {
1015 println!(" {} Unable to check git status", "[!]".yellow());
1016 }
1017 }
1018 } else {
1019 println!("\n{} Git tracking not enabled", "[i]".dimmed());
1020 println!(" Run 'csm harvest git init' to enable version tracking");
1021 }
1022
1023 Ok(())
1024}
1025
1026pub fn harvest_list(
1028 path: Option<&str>,
1029 provider: Option<&str>,
1030 limit: usize,
1031 search: Option<&str>,
1032) -> Result<()> {
1033 let db_path = get_db_path(path)?;
1034
1035 if !db_path.exists() {
1036 println!(
1037 "{} Database not found: {}",
1038 "[!]".yellow(),
1039 db_path.display()
1040 );
1041 return Ok(());
1042 }
1043
1044 let conn = Connection::open(&db_path)?;
1045
1046 let mut query = String::from(
1047 "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name
1048 FROM sessions WHERE 1=1",
1049 );
1050 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1051
1052 if let Some(p) = provider {
1053 query.push_str(" AND LOWER(provider) LIKE ?");
1054 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1055 }
1056
1057 if let Some(s) = search {
1058 query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1059 let pattern = format!("%{}%", s.to_lowercase());
1060 params_vec.push(Box::new(pattern.clone()));
1061 params_vec.push(Box::new(pattern));
1062 }
1063
1064 query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1065 params_vec.push(Box::new(limit as i64));
1066
1067 let mut stmt = conn.prepare(&query)?;
1068
1069 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1071
1072 let sessions: Vec<HarvestQueryResult> = stmt
1073 .query_map(params_slice.as_slice(), |row| {
1074 Ok((
1075 row.get(0)?,
1076 row.get(1)?,
1077 row.get(2)?,
1078 row.get(3)?,
1079 row.get(4)?,
1080 row.get(5)?,
1081 row.get(6)?,
1082 ))
1083 })?
1084 .filter_map(|r| r.ok())
1085 .collect();
1086
1087 println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1088 println!("{}", "=".repeat(60));
1089
1090 if sessions.is_empty() {
1091 println!("{} No sessions found", "[i]".dimmed());
1092 return Ok(());
1093 }
1094
1095 for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1096 let date = DateTime::from_timestamp_millis(*updated)
1097 .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1098 .unwrap_or_default();
1099
1100 println!("\n{} {}", "[S]".cyan(), title.bold());
1101 println!(" ID: {}", &id[..16.min(id.len())].dimmed());
1102 println!(
1103 " Provider: {} | Messages: {} | Updated: {}",
1104 prov.cyan(),
1105 msg_count.to_string().green(),
1106 date.dimmed()
1107 );
1108 if let Some(ws) = ws_name {
1109 println!(" Workspace: {}", ws.dimmed());
1110 }
1111 }
1112
1113 println!(
1114 "\n{} Showing {} of available sessions",
1115 "[i]".dimmed(),
1116 sessions.len()
1117 );
1118
1119 Ok(())
1120}
1121
1122pub fn harvest_export(
1124 path: Option<&str>,
1125 output: &str,
1126 format: &str,
1127 provider: Option<&str>,
1128 session_ids: Option<&[String]>,
1129) -> Result<()> {
1130 let db_path = get_db_path(path)?;
1131 let output_path = PathBuf::from(output);
1132
1133 if !db_path.exists() {
1134 anyhow::bail!("Database not found: {}", db_path.display());
1135 }
1136
1137 let conn = Connection::open(&db_path)?;
1138
1139 println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1140 println!("{}", "=".repeat(60));
1141
1142 let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1144 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1145
1146 if let Some(p) = provider {
1147 query.push_str(" AND LOWER(provider) LIKE ?");
1148 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1149 }
1150
1151 if let Some(ids) = session_ids {
1152 let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1153 query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1154 for id in ids {
1155 params_vec.push(Box::new(id.clone()));
1156 }
1157 }
1158
1159 let mut stmt = conn.prepare(&query)?;
1160 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1161
1162 let sessions: Vec<String> = stmt
1163 .query_map(params_slice.as_slice(), |row| row.get(0))?
1164 .filter_map(|r| r.ok())
1165 .collect();
1166
1167 if sessions.is_empty() {
1168 println!("{} No sessions to export", "[i]".dimmed());
1169 return Ok(());
1170 }
1171
1172 if let Some(parent) = output_path.parent() {
1174 if !parent.exists() {
1175 fs::create_dir_all(parent)?;
1176 }
1177 }
1178
1179 match format.to_lowercase().as_str() {
1180 "json" => {
1181 let parsed: Vec<serde_json::Value> = sessions
1183 .iter()
1184 .filter_map(|s| serde_json::from_str(s).ok())
1185 .collect();
1186 let json_output = serde_json::to_string_pretty(&parsed)?;
1187 fs::write(&output_path, json_output)?;
1188 }
1189 "jsonl" => {
1190 let content: String = sessions
1192 .iter()
1193 .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1194 .map(|v| serde_json::to_string(&v).unwrap_or_default())
1195 .collect::<Vec<_>>()
1196 .join("\n");
1197 fs::write(&output_path, content)?;
1198 }
1199 "md" | "markdown" => {
1200 let mut md_content = String::from("# Chat Sessions Export\n\n");
1202 md_content.push_str(&format!(
1203 "Exported: {}\n\n",
1204 Utc::now().format("%Y-%m-%d %H:%M:%S")
1205 ));
1206
1207 for session_json in &sessions {
1208 if let Ok(session) = parse_session_json(session_json) {
1209 md_content.push_str(&format!("## {}\n\n", session.title()));
1210 md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1211
1212 for request in &session.requests {
1213 if let Some(msg) = &request.message {
1214 if let Some(text) = &msg.text {
1215 md_content.push_str(&format!("### User\n\n{}\n\n", text));
1216 }
1217 }
1218 if let Some(response) = &request.response {
1220 let response_text = response
1221 .get("text")
1222 .and_then(|v| v.as_str())
1223 .or_else(|| response.get("content").and_then(|v| v.as_str()))
1224 .or_else(|| {
1225 response
1227 .get("value")
1228 .and_then(|v| v.as_array())
1229 .and_then(|arr| arr.first())
1230 .and_then(|v| v.get("value"))
1231 .and_then(|v| v.as_str())
1232 });
1233
1234 if let Some(text) = response_text {
1235 md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1236 }
1237 }
1238 }
1239 md_content.push_str("---\n\n");
1240 }
1241 }
1242 fs::write(&output_path, md_content)?;
1243 }
1244 _ => {
1245 anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1246 }
1247 }
1248
1249 println!(
1250 "{} Exported {} sessions to {}",
1251 "[+]".green(),
1252 sessions.len().to_string().cyan(),
1253 output_path.display()
1254 );
1255
1256 Ok(())
1257}
1258
1259pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1261 let db_path = get_db_path(path)?;
1262 init_git_tracking(&db_path)
1263}
1264
1265pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1266 let db_path = get_db_path(path)?;
1267 let msg = message.unwrap_or("Update harvest database");
1268 git_commit_harvest(&db_path, msg)
1269}
1270
1271pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1272 let db_path = get_db_path(path)?;
1273 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1274 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1275
1276 println!("\n{} Harvest Git History", "[H]".magenta().bold());
1277 println!("{}", "=".repeat(60));
1278
1279 let output = Command::new("git")
1280 .current_dir(db_dir)
1281 .args([
1282 "log",
1283 "--oneline",
1284 &format!("-{}", count),
1285 "--follow",
1286 "--",
1287 db_name,
1288 ])
1289 .output()
1290 .context("Failed to run git log")?;
1291
1292 if !output.status.success() {
1293 let stderr = String::from_utf8_lossy(&output.stderr);
1294 if stderr.contains("does not have any commits") {
1295 println!("{} No commits yet", "[i]".dimmed());
1296 } else {
1297 anyhow::bail!("Git log failed: {}", stderr);
1298 }
1299 return Ok(());
1300 }
1301
1302 let log = String::from_utf8_lossy(&output.stdout);
1303 if log.trim().is_empty() {
1304 println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1305 } else {
1306 for line in log.lines() {
1307 let parts: Vec<&str> = line.splitn(2, ' ').collect();
1308 if parts.len() == 2 {
1309 println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1310 } else {
1311 println!("{}", line);
1312 }
1313 }
1314 }
1315
1316 Ok(())
1317}
1318
1319pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1320 let db_path = get_db_path(path)?;
1321 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1322 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1323
1324 println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1325 println!("{}", "=".repeat(60));
1326
1327 let mut args = vec!["diff", "--stat"];
1328 if let Some(c) = commit {
1329 args.push(c);
1330 }
1331 args.push("--");
1332 args.push(db_name);
1333
1334 let output = Command::new("git")
1335 .current_dir(db_dir)
1336 .args(&args)
1337 .output()
1338 .context("Failed to run git diff")?;
1339
1340 let diff = String::from_utf8_lossy(&output.stdout);
1341 if diff.trim().is_empty() {
1342 println!("{} No changes", "[+]".green());
1343 } else {
1344 println!("{}", diff);
1345 }
1346
1347 Ok(())
1348}
1349
1350pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1351 let db_path = get_db_path(path)?;
1352 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1353 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1354
1355 println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1356 println!("{}", "=".repeat(60));
1357
1358 let backup_path = db_path.with_extension("db.backup");
1360 if db_path.exists() {
1361 fs::copy(&db_path, &backup_path)?;
1362 println!(
1363 "{} Created backup: {}",
1364 "[+]".green(),
1365 backup_path.display()
1366 );
1367 }
1368
1369 let output = Command::new("git")
1371 .current_dir(db_dir)
1372 .args(["checkout", commit, "--", db_name])
1373 .output()
1374 .context("Failed to run git checkout")?;
1375
1376 if !output.status.success() {
1377 let stderr = String::from_utf8_lossy(&output.stderr);
1378 anyhow::bail!("Git restore failed: {}", stderr);
1379 }
1380
1381 println!(
1382 "{} Restored database from commit: {}",
1383 "[+]".green(),
1384 commit
1385 );
1386
1387 Ok(())
1388}
1389
1390fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1395 if let Some(p) = path {
1396 return Ok(PathBuf::from(p));
1397 }
1398
1399 if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1401 return Ok(PathBuf::from(p));
1402 }
1403
1404 Ok(std::env::current_dir()?.join("chat_sessions.db"))
1406}
1407
1408fn create_harvest_database(path: &Path) -> Result<()> {
1409 let conn = Connection::open(path)?;
1410
1411 conn.execute_batch(
1412 r#"
1413 -- Sessions table (original harvest format)
1414 CREATE TABLE IF NOT EXISTS sessions (
1415 id TEXT PRIMARY KEY,
1416 provider TEXT NOT NULL,
1417 provider_type TEXT,
1418 workspace_id TEXT,
1419 workspace_name TEXT,
1420 title TEXT NOT NULL,
1421 message_count INTEGER DEFAULT 0,
1422 created_at INTEGER NOT NULL,
1423 updated_at INTEGER NOT NULL,
1424 harvested_at INTEGER NOT NULL,
1425 session_json TEXT NOT NULL
1426 );
1427
1428 CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1429 CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1430 CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1431
1432 -- Enhanced messages table with raw markdown and metadata
1433 CREATE TABLE IF NOT EXISTS messages_v2 (
1434 id INTEGER PRIMARY KEY AUTOINCREMENT,
1435 session_id TEXT NOT NULL,
1436 message_index INTEGER NOT NULL,
1437 request_id TEXT,
1438 response_id TEXT,
1439 role TEXT NOT NULL,
1440 content_raw TEXT NOT NULL,
1441 content_markdown TEXT,
1442 model_id TEXT,
1443 timestamp INTEGER,
1444 is_canceled INTEGER DEFAULT 0,
1445 metadata_json TEXT,
1446 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1447 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1448 UNIQUE(session_id, message_index, role)
1449 );
1450
1451 CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1452 CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1453 CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1454
1455 -- Tool invocations within messages (file edits, terminal commands, etc.)
1456 CREATE TABLE IF NOT EXISTS tool_invocations (
1457 id INTEGER PRIMARY KEY AUTOINCREMENT,
1458 message_id INTEGER NOT NULL,
1459 session_id TEXT NOT NULL,
1460 tool_name TEXT NOT NULL,
1461 tool_call_id TEXT,
1462 invocation_index INTEGER DEFAULT 0,
1463 input_json TEXT,
1464 output_json TEXT,
1465 status TEXT DEFAULT 'pending',
1466 is_confirmed INTEGER DEFAULT 0,
1467 timestamp INTEGER,
1468 FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1469 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1470 );
1471
1472 CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1473 CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1474 CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1475
1476 -- File changes/diffs associated with tool invocations
1477 CREATE TABLE IF NOT EXISTS file_changes (
1478 id INTEGER PRIMARY KEY AUTOINCREMENT,
1479 tool_invocation_id INTEGER,
1480 session_id TEXT NOT NULL,
1481 message_index INTEGER,
1482 file_path TEXT NOT NULL,
1483 change_type TEXT NOT NULL,
1484 old_content TEXT,
1485 new_content TEXT,
1486 diff_unified TEXT,
1487 line_start INTEGER,
1488 line_end INTEGER,
1489 timestamp INTEGER,
1490 FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1491 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1492 );
1493
1494 CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1495 CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1496 CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1497
1498 -- Message-level checkpoints for versioning
1499 CREATE TABLE IF NOT EXISTS message_checkpoints (
1500 id INTEGER PRIMARY KEY AUTOINCREMENT,
1501 session_id TEXT NOT NULL,
1502 message_index INTEGER NOT NULL,
1503 checkpoint_number INTEGER NOT NULL,
1504 content_hash TEXT NOT NULL,
1505 snapshot_json TEXT,
1506 file_state_json TEXT,
1507 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1508 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1509 UNIQUE(session_id, message_index, checkpoint_number)
1510 );
1511
1512 CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1513 CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1514
1515 -- Messages table for full-text search
1516 CREATE TABLE IF NOT EXISTS messages (
1517 id INTEGER PRIMARY KEY AUTOINCREMENT,
1518 session_id TEXT NOT NULL,
1519 message_index INTEGER NOT NULL,
1520 role TEXT NOT NULL,
1521 content TEXT NOT NULL,
1522 created_at INTEGER,
1523 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1524 UNIQUE(session_id, message_index)
1525 );
1526
1527 CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1528 CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1529
1530 -- Checkpoints for version tracking
1531 CREATE TABLE IF NOT EXISTS checkpoints (
1532 id INTEGER PRIMARY KEY AUTOINCREMENT,
1533 session_id TEXT NOT NULL,
1534 checkpoint_number INTEGER NOT NULL,
1535 message TEXT,
1536 message_count INTEGER NOT NULL,
1537 content_hash TEXT NOT NULL,
1538 snapshot TEXT,
1539 created_at TEXT NOT NULL DEFAULT (datetime('now')),
1540 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1541 UNIQUE(session_id, checkpoint_number)
1542 );
1543
1544 CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1545
1546 -- Share links for importing shared conversations
1547 CREATE TABLE IF NOT EXISTS share_links (
1548 id TEXT PRIMARY KEY,
1549 session_id TEXT,
1550 provider TEXT NOT NULL,
1551 url TEXT NOT NULL UNIQUE,
1552 share_id TEXT NOT NULL,
1553 title TEXT,
1554 imported INTEGER DEFAULT 0,
1555 imported_at INTEGER,
1556 created_at INTEGER NOT NULL,
1557 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1558 );
1559
1560 CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1561 CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1562
1563 -- Harvest metadata
1564 CREATE TABLE IF NOT EXISTS harvest_metadata (
1565 key TEXT PRIMARY KEY,
1566 value TEXT NOT NULL
1567 );
1568
1569 INSERT OR REPLACE INTO harvest_metadata (key, value)
1570 VALUES ('version', '2.1'),
1571 ('created_at', datetime('now'));
1572
1573 -- Full-text search for messages (standalone FTS table)
1574 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1575 content_raw
1576 );
1577
1578 -- Triggers to keep FTS index in sync with messages_v2
1579 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1580 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1581 END;
1582
1583 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1584 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1585 END;
1586
1587 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1588 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1589 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1590 END;
1591 "#,
1592 )?;
1593
1594 Ok(())
1595}
1596
1597fn insert_or_update_session(
1598 conn: &Connection,
1599 session: &ChatSession,
1600 provider: &str,
1601 workspace_id: Option<&str>,
1602 workspace_name: Option<&str>,
1603) -> Result<bool> {
1604 let session_id = session
1605 .session_id
1606 .clone()
1607 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1608
1609 let now = Utc::now().timestamp_millis();
1610 let session_json = serde_json::to_string(session)?;
1611
1612 let existing: Option<i64> = conn
1614 .query_row(
1615 "SELECT updated_at FROM sessions WHERE id = ?",
1616 [&session_id],
1617 |row| row.get(0),
1618 )
1619 .ok();
1620
1621 let updated = existing.is_some();
1622
1623 conn.execute(
1624 r#"
1625 INSERT OR REPLACE INTO sessions
1626 (id, provider, provider_type, workspace_id, workspace_name, title,
1627 message_count, created_at, updated_at, harvested_at, session_json)
1628 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1629 "#,
1630 params![
1631 session_id,
1632 provider,
1633 provider,
1634 workspace_id,
1635 workspace_name,
1636 session.title(),
1637 session.request_count() as i64,
1638 session.creation_date,
1639 session.last_message_date,
1640 now,
1641 session_json,
1642 ],
1643 )?;
1644
1645 populate_enhanced_messages(conn, &session_id, session)?;
1647
1648 Ok(updated)
1649}
1650
1651fn populate_enhanced_messages(
1653 conn: &Connection,
1654 session_id: &str,
1655 session: &ChatSession,
1656) -> Result<()> {
1657 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1659 conn.execute(
1660 "DELETE FROM tool_invocations WHERE session_id = ?",
1661 [session_id],
1662 )?;
1663 conn.execute(
1664 "DELETE FROM file_changes WHERE session_id = ?",
1665 [session_id],
1666 )?;
1667
1668 for (idx, request) in session.requests.iter().enumerate() {
1669 let timestamp = request.timestamp;
1670 let request_id = request.request_id.as_deref();
1671 let response_id = request.response_id.as_deref();
1672 let model_id = request.model_id.as_deref();
1673 let is_canceled = request.is_canceled.unwrap_or(false);
1674
1675 if let Some(ref message) = request.message {
1677 let content = message.text.clone().unwrap_or_default();
1678 if !content.is_empty() {
1679 let metadata = serde_json::json!({
1680 "variable_data": request.variable_data,
1681 });
1682
1683 conn.execute(
1684 r#"
1685 INSERT OR REPLACE INTO messages_v2
1686 (session_id, message_index, request_id, response_id, role,
1687 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1688 VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1689 "#,
1690 params![
1691 session_id,
1692 (idx * 2) as i64,
1693 request_id,
1694 response_id,
1695 &content,
1696 &content, model_id,
1698 timestamp,
1699 serde_json::to_string(&metadata).ok(),
1700 ],
1701 )?;
1702 }
1703 }
1704
1705 if let Some(ref response) = request.response {
1707 let (content, tool_invocations) = extract_response_content_and_tools(response);
1708
1709 if !content.is_empty() || !tool_invocations.is_empty() {
1710 let metadata = serde_json::json!({
1711 "content_references": request.content_references,
1712 "code_citations": request.code_citations,
1713 "response_markdown_info": request.response_markdown_info,
1714 });
1715
1716 conn.execute(
1717 r#"
1718 INSERT OR REPLACE INTO messages_v2
1719 (session_id, message_index, request_id, response_id, role,
1720 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1721 VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1722 "#,
1723 params![
1724 session_id,
1725 (idx * 2 + 1) as i64,
1726 request_id,
1727 response_id,
1728 &content,
1729 &content,
1730 model_id,
1731 timestamp,
1732 is_canceled as i64,
1733 serde_json::to_string(&metadata).ok(),
1734 ],
1735 )?;
1736
1737 let message_id: i64 = conn.last_insert_rowid();
1739
1740 for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1742 insert_tool_invocation(
1743 conn, message_id, session_id, inv_idx, invocation, timestamp,
1744 )?;
1745 }
1746 }
1747 }
1748 }
1749
1750 Ok(())
1751}
1752
1753fn extract_response_content_and_tools(
1755 response: &serde_json::Value,
1756) -> (String, Vec<serde_json::Value>) {
1757 let mut text_parts = Vec::new();
1758 let mut tool_invocations = Vec::new();
1759
1760 if let Some(items) = response.as_array() {
1761 for item in items {
1762 let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1763
1764 match kind {
1765 "toolInvocationSerialized" => {
1766 tool_invocations.push(item.clone());
1767 }
1768 "thinking" => {
1769 continue;
1771 }
1772 _ => {
1773 if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1774 let trimmed = value.trim();
1776 if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1777 text_parts.push(value.to_string());
1778 }
1779 }
1780 }
1781 }
1782 }
1783 }
1784
1785 (text_parts.join("\n\n"), tool_invocations)
1786}
1787
1788fn insert_tool_invocation(
1790 conn: &Connection,
1791 message_id: i64,
1792 session_id: &str,
1793 inv_idx: usize,
1794 invocation: &serde_json::Value,
1795 timestamp: Option<i64>,
1796) -> Result<()> {
1797 let tool_name = invocation
1798 .get("toolId")
1799 .and_then(|t| t.as_str())
1800 .unwrap_or("unknown");
1801 let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1802 let is_complete = invocation
1803 .get("isComplete")
1804 .and_then(|c| c.as_bool())
1805 .unwrap_or(false);
1806 let is_confirmed = invocation.get("isConfirmed");
1807 let tool_data = invocation.get("toolSpecificData");
1808
1809 let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1810 let status = if is_complete { "complete" } else { "pending" };
1811 let confirmed = match is_confirmed {
1812 Some(v) => v
1813 .get("type")
1814 .and_then(|t| t.as_i64())
1815 .map(|t| t > 0)
1816 .unwrap_or(false),
1817 None => false,
1818 };
1819
1820 conn.execute(
1821 r#"
1822 INSERT INTO tool_invocations
1823 (message_id, session_id, tool_name, tool_call_id, invocation_index,
1824 input_json, status, is_confirmed, timestamp)
1825 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1826 "#,
1827 params![
1828 message_id,
1829 session_id,
1830 tool_name,
1831 tool_call_id,
1832 inv_idx as i64,
1833 input_json,
1834 status,
1835 confirmed as i64,
1836 timestamp,
1837 ],
1838 )?;
1839
1840 let tool_invocation_id = conn.last_insert_rowid();
1841
1842 if let Some(data) = tool_data {
1844 insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1845 }
1846
1847 Ok(())
1848}
1849
1850fn insert_file_changes(
1852 conn: &Connection,
1853 tool_invocation_id: i64,
1854 session_id: &str,
1855 tool_data: &serde_json::Value,
1856 timestamp: Option<i64>,
1857) -> Result<()> {
1858 let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1859
1860 match kind {
1861 "terminal" => {
1862 if let Some(cmd_line) = tool_data.get("commandLine") {
1864 let original = cmd_line.get("original").and_then(|o| o.as_str());
1865 let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1866 let output = tool_data
1867 .get("terminalCommandOutput")
1868 .map(|o| serde_json::to_string(o).unwrap_or_default());
1869
1870 conn.execute(
1871 r#"
1872 INSERT INTO file_changes
1873 (tool_invocation_id, session_id, file_path, change_type,
1874 old_content, new_content, diff_unified, timestamp)
1875 VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1876 "#,
1877 params![
1878 tool_invocation_id,
1879 session_id,
1880 original,
1881 edited.or(original),
1882 output,
1883 timestamp,
1884 ],
1885 )?;
1886 }
1887 }
1888 "replaceFile" | "editFile" => {
1889 let file_path = tool_data
1891 .get("uri")
1892 .or_else(|| tool_data.get("filePath"))
1893 .and_then(|p| p.as_str())
1894 .unwrap_or("[unknown]");
1895 let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1896 let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1897
1898 let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1900 Some(generate_unified_diff(old, new, file_path))
1901 } else {
1902 None
1903 };
1904
1905 conn.execute(
1906 r#"
1907 INSERT INTO file_changes
1908 (tool_invocation_id, session_id, file_path, change_type,
1909 old_content, new_content, diff_unified, timestamp)
1910 VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1911 "#,
1912 params![
1913 tool_invocation_id,
1914 session_id,
1915 file_path,
1916 old_string,
1917 new_string,
1918 diff,
1919 timestamp,
1920 ],
1921 )?;
1922 }
1923 "createFile" => {
1924 let file_path = tool_data
1925 .get("uri")
1926 .or_else(|| tool_data.get("filePath"))
1927 .and_then(|p| p.as_str())
1928 .unwrap_or("[unknown]");
1929 let content = tool_data.get("content").and_then(|c| c.as_str());
1930
1931 conn.execute(
1932 r#"
1933 INSERT INTO file_changes
1934 (tool_invocation_id, session_id, file_path, change_type,
1935 new_content, timestamp)
1936 VALUES (?, ?, ?, 'create', ?, ?)
1937 "#,
1938 params![
1939 tool_invocation_id,
1940 session_id,
1941 file_path,
1942 content,
1943 timestamp,
1944 ],
1945 )?;
1946 }
1947 "readFile" => {
1948 let file_path = tool_data
1949 .get("uri")
1950 .or_else(|| tool_data.get("filePath"))
1951 .and_then(|p| p.as_str())
1952 .unwrap_or("[unknown]");
1953
1954 conn.execute(
1955 r#"
1956 INSERT INTO file_changes
1957 (tool_invocation_id, session_id, file_path, change_type, timestamp)
1958 VALUES (?, ?, ?, 'read', ?)
1959 "#,
1960 params![tool_invocation_id, session_id, file_path, timestamp,],
1961 )?;
1962 }
1963 _ => {
1964 if !kind.is_empty() {
1966 let data_json = serde_json::to_string(tool_data).ok();
1967 conn.execute(
1968 r#"
1969 INSERT INTO file_changes
1970 (tool_invocation_id, session_id, file_path, change_type,
1971 diff_unified, timestamp)
1972 VALUES (?, ?, ?, ?, ?, ?)
1973 "#,
1974 params![
1975 tool_invocation_id,
1976 session_id,
1977 format!("[{}]", kind),
1978 kind,
1979 data_json,
1980 timestamp,
1981 ],
1982 )?;
1983 }
1984 }
1985 }
1986
1987 Ok(())
1988}
1989
1990fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
1992 let old_lines: Vec<&str> = old.lines().collect();
1993 let new_lines: Vec<&str> = new.lines().collect();
1994
1995 let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
1996
1997 let max_lines = old_lines.len().max(new_lines.len());
1999 let mut in_hunk = false;
2000 let mut hunk_start = 0;
2001 let mut hunk_lines = Vec::new();
2002
2003 for i in 0..max_lines {
2004 let old_line = old_lines.get(i).copied();
2005 let new_line = new_lines.get(i).copied();
2006
2007 match (old_line, new_line) {
2008 (Some(o), Some(n)) if o == n => {
2009 if in_hunk {
2010 hunk_lines.push(format!(" {}", o));
2011 }
2012 }
2013 (Some(o), Some(n)) => {
2014 if !in_hunk {
2015 in_hunk = true;
2016 hunk_start = i + 1;
2017 }
2018 hunk_lines.push(format!("-{}", o));
2019 hunk_lines.push(format!("+{}", n));
2020 }
2021 (Some(o), None) => {
2022 if !in_hunk {
2023 in_hunk = true;
2024 hunk_start = i + 1;
2025 }
2026 hunk_lines.push(format!("-{}", o));
2027 }
2028 (None, Some(n)) => {
2029 if !in_hunk {
2030 in_hunk = true;
2031 hunk_start = i + 1;
2032 }
2033 hunk_lines.push(format!("+{}", n));
2034 }
2035 (None, None) => break,
2036 }
2037 }
2038
2039 if !hunk_lines.is_empty() {
2040 diff.push_str(&format!(
2041 "@@ -{},{} +{},{} @@\n",
2042 hunk_start,
2043 old_lines.len(),
2044 hunk_start,
2045 new_lines.len()
2046 ));
2047 for line in hunk_lines {
2048 diff.push_str(&line);
2049 diff.push('\n');
2050 }
2051 }
2052
2053 diff
2054}
2055
2056fn harvest_web_providers(
2058 conn: &Connection,
2059 stats: &mut HarvestStats,
2060 include_providers: &[String],
2061 exclude_providers: &[String],
2062) -> Result<()> {
2063 use crate::browser::extract_provider_cookies;
2064
2065 println!("\n{} Harvesting from web providers...", "[*]".blue());
2066
2067 let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2069 ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2070 ("Claude", "claude", "sessionKey"),
2071 ];
2072
2073 let mut web_sessions_harvested = 0;
2074
2075 for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2076 if !include_providers.is_empty()
2078 && !include_providers
2079 .iter()
2080 .any(|p| p.eq_ignore_ascii_case(provider_key))
2081 {
2082 continue;
2083 }
2084 if exclude_providers
2085 .iter()
2086 .any(|p| p.eq_ignore_ascii_case(provider_key))
2087 {
2088 continue;
2089 }
2090
2091 print!(" {} Checking {} ... ", "[-]".yellow(), display_name);
2092
2093 if let Some(creds) = extract_provider_cookies(provider_key) {
2095 if let Some(session_token) = &creds.session_token {
2096 println!("{}", "authenticated".green());
2097
2098 let result = match *provider_key {
2100 "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2101 "claude" => harvest_claude_sessions(conn, session_token, stats),
2102 _ => Ok(0),
2103 };
2104
2105 match result {
2106 Ok(count) => {
2107 if count > 0 {
2108 println!(
2109 " {} Harvested {} sessions from {}",
2110 "[+]".green(),
2111 count.to_string().cyan(),
2112 display_name
2113 );
2114 web_sessions_harvested += count;
2115 }
2116 }
2117 Err(e) => {
2118 println!(
2119 " {} Failed to harvest {}: {:?}",
2120 "[!]".red(),
2121 display_name,
2122 e
2123 );
2124 }
2125 }
2126 } else {
2127 println!("{}", "no session token".yellow());
2128 }
2129 } else {
2130 println!("{}", "not authenticated".yellow());
2131 }
2132 }
2133
2134 if web_sessions_harvested > 0 {
2135 println!(
2136 " {} Total web sessions harvested: {}",
2137 "[+]".green(),
2138 web_sessions_harvested.to_string().cyan()
2139 );
2140 }
2141
2142 Ok(())
2143}
2144
2145fn harvest_chatgpt_sessions(
2147 conn: &Connection,
2148 session_token: &str,
2149 stats: &mut HarvestStats,
2150) -> Result<usize> {
2151 use crate::providers::cloud::chatgpt::ChatGPTProvider;
2152 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2153
2154 let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2155
2156 let options = FetchOptions {
2158 limit: Some(100),
2159 include_archived: false,
2160 after: None,
2161 before: None,
2162 session_token: Some(session_token.to_string()),
2163 };
2164
2165 let conversations = provider
2166 .list_conversations(&options)
2167 .context("Failed to list ChatGPT conversations")?;
2168
2169 let mut harvested = 0;
2170
2171 for conv_summary in conversations {
2172 let exists: bool = conn
2174 .query_row(
2175 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2176 params![&conv_summary.id],
2177 |row| row.get(0),
2178 )
2179 .unwrap_or(false);
2180
2181 if exists {
2182 continue;
2184 }
2185
2186 match provider.fetch_conversation(&conv_summary.id) {
2188 Ok(conv) => {
2189 if let Err(e) =
2191 insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2192 {
2193 eprintln!("Failed to insert ChatGPT session: {}", e);
2194 continue;
2195 }
2196 harvested += 1;
2197 stats.sessions_added += 1;
2198 }
2199 Err(e) => {
2200 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2201 }
2202 }
2203
2204 std::thread::sleep(std::time::Duration::from_millis(100));
2206 }
2207
2208 Ok(harvested)
2209}
2210
2211fn harvest_claude_sessions(
2213 conn: &Connection,
2214 session_token: &str,
2215 stats: &mut HarvestStats,
2216) -> Result<usize> {
2217 use crate::providers::cloud::anthropic::AnthropicProvider;
2218 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2219
2220 let provider = AnthropicProvider::with_session_token(session_token.to_string());
2221
2222 let options = FetchOptions {
2224 limit: Some(100),
2225 include_archived: false,
2226 after: None,
2227 before: None,
2228 session_token: Some(session_token.to_string()),
2229 };
2230
2231 let conversations = provider
2232 .list_conversations(&options)
2233 .context("Failed to list Claude conversations")?;
2234
2235 let mut harvested = 0;
2236
2237 for conv_summary in conversations {
2238 let exists: bool = conn
2240 .query_row(
2241 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2242 params![&conv_summary.id],
2243 |row| row.get(0),
2244 )
2245 .unwrap_or(false);
2246
2247 if exists {
2248 continue;
2249 }
2250
2251 match provider.fetch_conversation(&conv_summary.id) {
2253 Ok(conv) => {
2254 if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2255 {
2256 eprintln!("Failed to insert Claude session: {}", e);
2257 continue;
2258 }
2259 harvested += 1;
2260 stats.sessions_added += 1;
2261 }
2262 Err(e) => {
2263 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2264 }
2265 }
2266
2267 std::thread::sleep(std::time::Duration::from_millis(100));
2268 }
2269
2270 Ok(harvested)
2271}
2272
2273fn insert_cloud_conversation_to_harvest_db(
2275 conn: &Connection,
2276 conv: &crate::providers::cloud::common::CloudConversation,
2277 provider: &str,
2278 workspace_name: Option<&str>,
2279) -> Result<()> {
2280 let now = Utc::now().timestamp_millis();
2281 let created_at = conv.created_at.timestamp_millis();
2282 let updated_at = conv
2283 .updated_at
2284 .map(|dt| dt.timestamp_millis())
2285 .unwrap_or(created_at);
2286
2287 let session_json = serde_json::json!({
2289 "id": conv.id,
2290 "title": conv.title,
2291 "model": conv.model,
2292 "created_at": conv.created_at.to_rfc3339(),
2293 "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2294 "messages": conv.messages.iter().map(|m| {
2295 serde_json::json!({
2296 "id": m.id,
2297 "role": m.role,
2298 "content": m.content,
2299 "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2300 "model": m.model,
2301 })
2302 }).collect::<Vec<_>>(),
2303 });
2304
2305 conn.execute(
2306 r#"
2307 INSERT OR REPLACE INTO sessions
2308 (id, provider, provider_type, workspace_id, workspace_name, title,
2309 message_count, created_at, updated_at, harvested_at, session_json)
2310 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2311 "#,
2312 params![
2313 conv.id,
2314 provider,
2315 provider,
2316 workspace_name,
2317 conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2318 conv.messages.len() as i64,
2319 created_at,
2320 updated_at,
2321 now,
2322 session_json.to_string(),
2323 ],
2324 )?;
2325
2326 populate_cloud_messages(conn, &conv.id, conv)?;
2328
2329 Ok(())
2330}
2331
2332fn populate_cloud_messages(
2334 conn: &Connection,
2335 session_id: &str,
2336 conv: &crate::providers::cloud::common::CloudConversation,
2337) -> Result<()> {
2338 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2340
2341 for (idx, message) in conv.messages.iter().enumerate() {
2342 let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2343 let role = match message.role.as_str() {
2344 "user" | "human" => "user",
2345 "assistant" => "assistant",
2346 "system" => "system",
2347 other => other,
2348 };
2349
2350 conn.execute(
2351 r#"
2352 INSERT INTO messages_v2
2353 (session_id, message_index, request_id, response_id, role,
2354 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2355 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2356 "#,
2357 params![
2358 session_id,
2359 idx as i64,
2360 message.id,
2361 role,
2362 &message.content,
2363 &message.content,
2364 message.model.as_deref(),
2365 timestamp,
2366 ],
2367 )?;
2368 }
2369
2370 Ok(())
2371}
2372
2373fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2374 conn.execute(
2375 "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2376 [],
2377 )?;
2378 Ok(())
2379}
2380
2381fn init_git_tracking(db_path: &Path) -> Result<()> {
2382 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2383 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2384
2385 println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2386
2387 if !db_dir.join(".git").exists() {
2389 let output = Command::new("git")
2390 .current_dir(db_dir)
2391 .args(["init"])
2392 .output()
2393 .context("Failed to run git init")?;
2394
2395 if !output.status.success() {
2396 anyhow::bail!(
2397 "Git init failed: {}",
2398 String::from_utf8_lossy(&output.stderr)
2399 );
2400 }
2401 println!("{} Initialized git repository", "[+]".green());
2402 } else {
2403 println!("{} Git repository already exists", "[i]".blue());
2404 }
2405
2406 let gitignore_path = db_dir.join(".gitignore");
2408 if !gitignore_path.exists() {
2409 fs::write(
2410 &gitignore_path,
2411 "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2412 )?;
2413 println!("{} Created .gitignore", "[+]".green());
2414 }
2415
2416 let output = Command::new("git")
2418 .current_dir(db_dir)
2419 .args(["add", db_name])
2420 .output()?;
2421
2422 if output.status.success() {
2423 println!("{} Added {} to git", "[+]".green(), db_name);
2424 }
2425
2426 let output = Command::new("git")
2428 .current_dir(db_dir)
2429 .args(["rev-parse", "HEAD"])
2430 .output()?;
2431
2432 if !output.status.success() {
2433 let output = Command::new("git")
2434 .current_dir(db_dir)
2435 .args(["commit", "-m", "Initialize harvest database"])
2436 .output()?;
2437
2438 if output.status.success() {
2439 println!("{} Created initial commit", "[+]".green());
2440 }
2441 }
2442
2443 println!("\n{} Git tracking enabled!", "[+]".green().bold());
2444 println!(" Run 'csm harvest git commit -m \"message\"' to save changes");
2445 println!(" Run 'csm harvest git log' to view history");
2446
2447 Ok(())
2448}
2449
2450fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2451 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2452 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2453
2454 let output = Command::new("git")
2456 .current_dir(db_dir)
2457 .args(["add", db_name])
2458 .output()
2459 .context("Failed to stage database")?;
2460
2461 if !output.status.success() {
2462 anyhow::bail!(
2463 "Git add failed: {}",
2464 String::from_utf8_lossy(&output.stderr)
2465 );
2466 }
2467
2468 let output = Command::new("git")
2470 .current_dir(db_dir)
2471 .args(["commit", "-m", message])
2472 .output()
2473 .context("Failed to commit")?;
2474
2475 if !output.status.success() {
2476 let stderr = String::from_utf8_lossy(&output.stderr);
2477 if stderr.contains("nothing to commit") {
2478 println!("{} Nothing to commit", "[i]".blue());
2479 return Ok(());
2480 }
2481 anyhow::bail!("Git commit failed: {}", stderr);
2482 }
2483
2484 let output = Command::new("git")
2486 .current_dir(db_dir)
2487 .args(["rev-parse", "--short", "HEAD"])
2488 .output()?;
2489
2490 let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2491 println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2492
2493 Ok(())
2494}
2495
2496pub fn harvest_share(
2502 db_path: Option<&str>,
2503 url: &str,
2504 name: Option<&str>,
2505 _workspace: Option<&str>,
2506) -> Result<()> {
2507 let db_path = get_db_path(db_path)?;
2508
2509 let share_info = match ShareLinkParser::parse(url) {
2511 Some(info) => info,
2512 None => {
2513 println!("{} Unrecognized share link format", "[!]".yellow());
2514 println!(" Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2515 println!(" Example URLs:");
2516 println!(" - https://chat.openai.com/share/abc123...");
2517 println!(" - https://claude.ai/share/xyz789...");
2518 anyhow::bail!("Could not parse share link URL");
2519 }
2520 };
2521
2522 println!("{}", "=".repeat(60).cyan());
2523 println!("{}", " Share Link Import ".bold().cyan());
2524 println!("{}", "=".repeat(60).cyan());
2525 println!();
2526
2527 println!(
2528 "{} Detected provider: {}",
2529 "[i]".blue(),
2530 share_info.provider.bold()
2531 );
2532 println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2533
2534 let db = ChatDatabase::open(&db_path)?;
2536
2537 let conn = db.connection();
2539 let existing: Option<(String, i64)> = conn
2540 .query_row(
2541 "SELECT id, imported FROM share_links WHERE url = ?",
2542 [url],
2543 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2544 )
2545 .ok();
2546
2547 if let Some((id, imported)) = existing {
2548 println!();
2549 println!(
2550 "{} This share link has already been registered (ID: {})",
2551 "[!]".yellow(),
2552 id
2553 );
2554
2555 let status = if imported == 1 { "imported" } else { "pending" };
2556 println!("{} Current status: {}", "[i]".blue(), status);
2557
2558 if imported == 1 {
2559 println!(
2560 "{} Session already imported - no action needed",
2561 "[+]".green()
2562 );
2563 return Ok(());
2564 }
2565 } else {
2566 let link_id = uuid::Uuid::new_v4().to_string();
2568 let now = chrono::Utc::now().timestamp();
2569 conn.execute(
2570 "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2571 params![link_id, url, share_info.provider, share_info.share_id, name, now],
2572 )?;
2573
2574 println!(
2575 "{} Registered share link (ID: {})",
2576 "[+]".green(),
2577 &link_id[..8]
2578 );
2579 }
2580
2581 println!();
2582 println!("{}", "-".repeat(60).dimmed());
2583 println!();
2584
2585 println!("{} Share link registered as pending", "[i]".blue());
2594 println!();
2595 println!("{}", "Note:".bold().yellow());
2596 println!(" Automatic content fetching from share links is not yet implemented.");
2597 println!(" For now, you can:");
2598 println!(" 1. Open the share link in your browser");
2599 println!(" 2. Export the conversation manually");
2600 println!(" 3. Import with: csm import <file>");
2601 println!();
2602 println!(" Or use 'csm harvest shares' to view pending links.");
2603
2604 Ok(())
2605}
2606
2607pub fn harvest_shares(
2609 db_path: Option<&str>,
2610 status_filter: Option<&str>,
2611 limit: usize,
2612) -> Result<()> {
2613 let db_path = get_db_path(db_path)?;
2614
2615 if !db_path.exists() {
2616 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2617 }
2618
2619 let db = ChatDatabase::open(&db_path)?;
2620 let conn = db.connection();
2621
2622 println!("{}", "=".repeat(70).cyan());
2623 println!("{}", " Share Links ".bold().cyan());
2624 println!("{}", "=".repeat(70).cyan());
2625 println!();
2626
2627 let query = match status_filter {
2628 Some("pending") => format!(
2629 "SELECT id, url, provider, share_id, title, imported, created_at
2630 FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2631 limit
2632 ),
2633 Some("imported") => format!(
2634 "SELECT id, url, provider, share_id, title, imported, created_at
2635 FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2636 limit
2637 ),
2638 Some(_) | None => format!(
2639 "SELECT id, url, provider, share_id, title, imported, created_at
2640 FROM share_links ORDER BY created_at DESC LIMIT {}",
2641 limit
2642 ),
2643 };
2644
2645 let mut stmt = conn.prepare(&query)?;
2646
2647 let rows: Vec<_> = stmt
2648 .query_map([], |row| {
2649 Ok((
2650 row.get::<_, String>(0)?,
2651 row.get::<_, String>(1)?,
2652 row.get::<_, String>(2)?,
2653 row.get::<_, Option<String>>(3)?,
2654 row.get::<_, Option<String>>(4)?,
2655 row.get::<_, i64>(5)?,
2656 row.get::<_, i64>(6)?,
2657 ))
2658 })?
2659 .collect::<Result<Vec<_>, _>>()?;
2660
2661 if rows.is_empty() {
2662 if let Some(status) = status_filter {
2663 println!("{} No share links with status '{}'", "[i]".blue(), status);
2664 } else {
2665 println!("{} No share links found", "[i]".blue());
2666 }
2667 println!(" Use 'csm harvest share <url>' to add a share link");
2668 return Ok(());
2669 }
2670
2671 let pending_count: i64 = conn
2673 .query_row(
2674 "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2675 [],
2676 |row| row.get(0),
2677 )
2678 .unwrap_or(0);
2679
2680 let imported_count: i64 = conn
2681 .query_row(
2682 "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2683 [],
2684 |row| row.get(0),
2685 )
2686 .unwrap_or(0);
2687
2688 println!(
2689 "{} {} pending, {} imported",
2690 "[i]".blue(),
2691 pending_count.to_string().yellow(),
2692 imported_count.to_string().green()
2693 );
2694 println!();
2695
2696 for (id, url, provider, _share_id, title, imported, created_at) in rows {
2697 let status = if imported == 1 { "imported" } else { "pending" };
2698 let status_colored = if imported == 1 {
2699 status.green()
2700 } else {
2701 status.yellow()
2702 };
2703
2704 println!(
2705 "{} [{}] {} - {}",
2706 format!("#{}", &id[..8]).dimmed(),
2707 status_colored,
2708 provider.bold(),
2709 title.as_deref().unwrap_or("(untitled)")
2710 );
2711
2712 let display_url = if url.len() > 60 {
2714 format!("{}...", &url[..57])
2715 } else {
2716 url.clone()
2717 };
2718 println!(" {} {}", "URL:".dimmed(), display_url.dimmed());
2719
2720 let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2722 .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2723 .unwrap_or_else(|| created_at.to_string());
2724 println!(" {} {}", "Added:".dimmed(), timestamp.dimmed());
2725 println!();
2726 }
2727
2728 Ok(())
2729}
2730
2731pub fn harvest_checkpoint(
2737 db_path: Option<&str>,
2738 session_id: &str,
2739 message: Option<&str>,
2740) -> Result<()> {
2741 let db_path = get_db_path(db_path)?;
2742
2743 if !db_path.exists() {
2744 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2745 }
2746
2747 let db = ChatDatabase::open(&db_path)?;
2748 let conn = db.connection();
2749
2750 let session: Option<(i64, String, i64)> = conn
2752 .query_row(
2753 "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2754 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2755 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2756 )
2757 .ok();
2758
2759 let (internal_id, actual_session_id, msg_count) = match session {
2760 Some(s) => s,
2761 None => {
2762 println!("{} Session not found: {}", "[!]".red(), session_id);
2763 println!(" Use 'csm harvest list' to see available sessions");
2764 anyhow::bail!("Session not found");
2765 }
2766 };
2767
2768 let checkpoint_num: i64 = conn.query_row(
2770 "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2771 [internal_id],
2772 |row| row.get(0),
2773 )?;
2774
2775 let content_hash = format!(
2777 "{:x}",
2778 md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2779 );
2780
2781 let messages: Vec<String> = {
2783 let mut stmt = conn
2784 .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2785 let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2786 rows.filter_map(|r| r.ok()).collect()
2787 };
2788
2789 let snapshot = serde_json::json!({
2790 "message_count": msg_count,
2791 "messages": messages,
2792 });
2793
2794 let default_message = format!("Checkpoint {}", checkpoint_num);
2795 let message_text = message.unwrap_or(&default_message);
2796
2797 conn.execute(
2799 "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2800 VALUES (?, ?, ?, ?, ?, ?)",
2801 params![
2802 internal_id,
2803 checkpoint_num,
2804 message_text,
2805 msg_count,
2806 content_hash,
2807 snapshot.to_string()
2808 ],
2809 )?;
2810
2811 println!("{}", "=".repeat(60).cyan());
2812 println!("{}", " Checkpoint Created ".bold().cyan());
2813 println!("{}", "=".repeat(60).cyan());
2814 println!();
2815 println!("{} Session: {}", "[+]".green(), actual_session_id);
2816 println!(
2817 "{} Checkpoint #{}: {}",
2818 "[+]".green(),
2819 checkpoint_num,
2820 message_text
2821 );
2822 println!("{} Messages: {}", "[i]".blue(), msg_count);
2823 println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2824 println!();
2825 println!(
2826 " Use 'csm harvest checkpoints {}' to view history",
2827 session_id
2828 );
2829
2830 Ok(())
2831}
2832
2833pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2835 let db_path = get_db_path(db_path)?;
2836
2837 if !db_path.exists() {
2838 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2839 }
2840
2841 let db = ChatDatabase::open(&db_path)?;
2842 let conn = db.connection();
2843
2844 let session: Option<(i64, String, String)> = conn
2846 .query_row(
2847 "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2848 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2849 |row| {
2850 Ok((
2851 row.get(0)?,
2852 row.get(1)?,
2853 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2854 ))
2855 },
2856 )
2857 .ok();
2858
2859 let (internal_id, actual_session_id, name) = match session {
2860 Some(s) => s,
2861 None => {
2862 println!("{} Session not found: {}", "[!]".red(), session_id);
2863 anyhow::bail!("Session not found");
2864 }
2865 };
2866
2867 println!("{}", "=".repeat(70).cyan());
2868 println!("{}", " Session Checkpoints ".bold().cyan());
2869 println!("{}", "=".repeat(70).cyan());
2870 println!();
2871 println!(
2872 "{} Session: {} {}",
2873 "[i]".blue(),
2874 actual_session_id,
2875 if !name.is_empty() {
2876 format!("({})", name)
2877 } else {
2878 String::new()
2879 }
2880 );
2881 println!();
2882
2883 let mut stmt = conn.prepare(
2884 "SELECT checkpoint_number, message, message_count, content_hash, created_at
2885 FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2886 )?;
2887
2888 let checkpoints: Vec<_> = stmt
2889 .query_map([internal_id], |row| {
2890 Ok((
2891 row.get::<_, i64>(0)?,
2892 row.get::<_, String>(1)?,
2893 row.get::<_, i64>(2)?,
2894 row.get::<_, String>(3)?,
2895 row.get::<_, String>(4)?,
2896 ))
2897 })?
2898 .collect::<Result<Vec<_>, _>>()?;
2899
2900 if checkpoints.is_empty() {
2901 println!("{} No checkpoints found for this session", "[i]".blue());
2902 println!(
2903 " Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2904 session_id
2905 );
2906 return Ok(());
2907 }
2908
2909 println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2910 println!();
2911
2912 for (num, msg, msg_count, hash, created_at) in checkpoints {
2913 println!(" {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2914 println!(
2915 " {} messages | {} | {}",
2916 msg_count,
2917 &hash[..12],
2918 created_at.dimmed()
2919 );
2920 }
2921
2922 println!();
2923 println!(
2924 " Use 'csm harvest restore {} <checkpoint>' to restore",
2925 session_id
2926 );
2927
2928 Ok(())
2929}
2930
2931pub fn harvest_restore_checkpoint(
2933 db_path: Option<&str>,
2934 session_id: &str,
2935 checkpoint_number: i64,
2936) -> Result<()> {
2937 let db_path = get_db_path(db_path)?;
2938
2939 if !db_path.exists() {
2940 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2941 }
2942
2943 let db = ChatDatabase::open(&db_path)?;
2944 let conn = db.connection();
2945
2946 let session: Option<(i64, String)> = conn
2948 .query_row(
2949 "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
2950 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2951 |row| Ok((row.get(0)?, row.get(1)?)),
2952 )
2953 .ok();
2954
2955 let (internal_id, actual_session_id) = match session {
2956 Some(s) => s,
2957 None => {
2958 println!("{} Session not found: {}", "[!]".red(), session_id);
2959 anyhow::bail!("Session not found");
2960 }
2961 };
2962
2963 let checkpoint: Option<(String, i64)> = conn
2965 .query_row(
2966 "SELECT snapshot, message_count FROM checkpoints
2967 WHERE session_id = ? AND checkpoint_number = ?",
2968 params![internal_id, checkpoint_number],
2969 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2970 )
2971 .ok();
2972
2973 let (snapshot_json, original_msg_count) = match checkpoint {
2974 Some(c) => c,
2975 None => {
2976 println!(
2977 "{} Checkpoint #{} not found for session {}",
2978 "[!]".red(),
2979 checkpoint_number,
2980 session_id
2981 );
2982 println!(
2983 " Use 'csm harvest checkpoints {}' to see available checkpoints",
2984 session_id
2985 );
2986 anyhow::bail!("Checkpoint not found");
2987 }
2988 };
2989
2990 let snapshot: serde_json::Value =
2992 serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
2993
2994 let messages = snapshot["messages"]
2995 .as_array()
2996 .context("Invalid snapshot format")?;
2997
2998 println!("{}", "=".repeat(60).cyan());
2999 println!("{}", " Restore Checkpoint ".bold().yellow());
3000 println!("{}", "=".repeat(60).cyan());
3001 println!();
3002 println!("{} Session: {}", "[i]".blue(), actual_session_id);
3003 println!(
3004 "{} Restoring to checkpoint #{}",
3005 "[!]".yellow(),
3006 checkpoint_number
3007 );
3008 println!(
3009 "{} Messages to restore: {}",
3010 "[i]".blue(),
3011 original_msg_count
3012 );
3013 println!();
3014
3015 let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3017
3018 println!("{} Removed {} current messages", "[-]".red(), deleted);
3019
3020 for (idx, msg) in messages.iter().enumerate() {
3022 if let Some(content) = msg.as_str() {
3023 conn.execute(
3024 "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3025 params![internal_id, idx as i64, content],
3026 )?;
3027 }
3028 }
3029
3030 conn.execute(
3032 "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3033 params![original_msg_count, internal_id],
3034 )?;
3035
3036 println!(
3037 "{} Restored {} messages from checkpoint",
3038 "[+]".green(),
3039 messages.len()
3040 );
3041 println!();
3042 println!(
3043 "{} Session restored to checkpoint #{}",
3044 "[+]".green().bold(),
3045 checkpoint_number
3046 );
3047
3048 Ok(())
3049}
3050
3051pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3057 let db_path = get_db_path(db_path)?;
3058
3059 if !db_path.exists() {
3060 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3061 }
3062
3063 let conn = Connection::open(&db_path)?;
3064
3065 println!("{}", "=".repeat(70).cyan());
3066 println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3067 println!("{}", "=".repeat(70).cyan());
3068 println!();
3069
3070 println!("{} Dropping old FTS index...", "[*]".blue());
3072 conn.execute_batch(
3073 r#"
3074 DROP TRIGGER IF EXISTS messages_v2_ai;
3075 DROP TRIGGER IF EXISTS messages_v2_ad;
3076 DROP TRIGGER IF EXISTS messages_v2_au;
3077 DROP TABLE IF EXISTS messages_fts;
3078 "#,
3079 )?;
3080
3081 println!("{} Creating new FTS index...", "[*]".blue());
3083 conn.execute_batch(
3084 r#"
3085 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3086 content_raw
3087 );
3088
3089 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3090 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3091 END;
3092
3093 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3094 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3095 END;
3096
3097 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3098 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3099 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3100 END;
3101 "#,
3102 )?;
3103
3104 let total_messages: i64 =
3106 conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3107
3108 println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3110
3111 conn.execute(
3112 "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3113 [],
3114 )?;
3115
3116 let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3118
3119 println!();
3120 println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3121 println!(" {} messages indexed", indexed);
3122
3123 Ok(())
3124}
3125
3126pub fn harvest_search(
3128 db_path: Option<&str>,
3129 query: &str,
3130 provider_filter: Option<&str>,
3131 limit: usize,
3132) -> Result<()> {
3133 let db_path = get_db_path(db_path)?;
3134
3135 if !db_path.exists() {
3136 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3137 }
3138
3139 let db = ChatDatabase::open(&db_path)?;
3140 let conn = db.connection();
3141
3142 println!("{}", "=".repeat(70).cyan());
3143 println!("{} Search: {}", "[?]".bold(), query.bold());
3144 println!("{}", "=".repeat(70).cyan());
3145 println!();
3146
3147 let results: Vec<(String, String, String, String)> = {
3149 let fts_exists: bool = conn
3151 .query_row(
3152 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3153 [],
3154 |_| Ok(true),
3155 )
3156 .unwrap_or(false);
3157
3158 if fts_exists {
3159 let sql = format!(
3161 "SELECT s.id, s.provider, s.title, m.content_raw
3162 FROM messages_fts fts
3163 JOIN messages_v2 m ON m.id = fts.rowid
3164 JOIN sessions s ON m.session_id = s.id
3165 WHERE messages_fts MATCH ?
3166 {}
3167 LIMIT {}",
3168 if provider_filter.is_some() {
3169 "AND s.provider = ?"
3170 } else {
3171 ""
3172 },
3173 limit
3174 );
3175
3176 let mut stmt = conn.prepare(&sql)?;
3177
3178 if let Some(provider) = provider_filter {
3179 stmt.query_map(params![query, provider], |row| {
3180 Ok((
3181 row.get::<_, String>(0)?,
3182 row.get::<_, String>(1)?,
3183 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3184 row.get::<_, String>(3)?,
3185 ))
3186 })?
3187 .collect::<Result<Vec<_>, _>>()?
3188 } else {
3189 stmt.query_map([query], |row| {
3190 Ok((
3191 row.get::<_, String>(0)?,
3192 row.get::<_, String>(1)?,
3193 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3194 row.get::<_, String>(3)?,
3195 ))
3196 })?
3197 .collect::<Result<Vec<_>, _>>()?
3198 }
3199 } else {
3200 let search_pattern = format!("%{}%", query);
3202 let sql = format!(
3203 "SELECT s.id, s.provider, s.title, m.content_raw
3204 FROM messages_v2 m
3205 JOIN sessions s ON m.session_id = s.id
3206 WHERE m.content_raw LIKE ?
3207 {}
3208 LIMIT {}",
3209 if provider_filter.is_some() {
3210 "AND s.provider = ?"
3211 } else {
3212 ""
3213 },
3214 limit
3215 );
3216
3217 let mut stmt = conn.prepare(&sql)?;
3218
3219 if let Some(provider) = provider_filter {
3220 stmt.query_map(params![search_pattern, provider], |row| {
3221 Ok((
3222 row.get::<_, String>(0)?,
3223 row.get::<_, String>(1)?,
3224 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3225 row.get::<_, String>(3)?,
3226 ))
3227 })?
3228 .collect::<Result<Vec<_>, _>>()?
3229 } else {
3230 stmt.query_map([search_pattern], |row| {
3231 Ok((
3232 row.get::<_, String>(0)?,
3233 row.get::<_, String>(1)?,
3234 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3235 row.get::<_, String>(3)?,
3236 ))
3237 })?
3238 .collect::<Result<Vec<_>, _>>()?
3239 }
3240 }
3241 };
3242
3243 if results.is_empty() {
3244 println!("{} No results found for '{}'", "[i]".blue(), query);
3245 return Ok(());
3246 }
3247
3248 println!("{} Found {} result(s):", "[i]".blue(), results.len());
3249 println!();
3250
3251 for (session_id, provider, title, content) in results {
3252 let display_name = if title.is_empty() {
3254 session_id.clone()
3255 } else {
3256 format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3257 };
3258
3259 println!(
3260 "{} {} [{}]",
3261 "*".cyan(),
3262 display_name.bold(),
3263 provider.dimmed()
3264 );
3265
3266 let snippet = create_search_snippet(&content, query, 100);
3268 println!(" {}", snippet.dimmed());
3269 println!();
3270 }
3271
3272 Ok(())
3273}
3274
3275fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3277 let content_lower = content.to_lowercase();
3278 let query_lower = query.to_lowercase();
3279
3280 if let Some(pos) = content_lower.find(&query_lower) {
3281 let start = pos.saturating_sub(max_len / 2);
3282 let end = (pos + query.len() + max_len / 2).min(content.len());
3283
3284 let mut snippet = String::new();
3285 if start > 0 {
3286 snippet.push_str("...");
3287 }
3288 snippet.push_str(&content[start..end]);
3289 if end < content.len() {
3290 snippet.push_str("...");
3291 }
3292
3293 snippet.replace('\n', " ").replace('\r', "")
3295 } else {
3296 if content.len() > max_len {
3298 format!("{}...", &content[..max_len])
3299 } else {
3300 content.to_string()
3301 }
3302 }
3303}
3304
3305fn md5_hash(data: &str) -> u128 {
3307 let mut hash: u128 = 0;
3309 for (i, byte) in data.bytes().enumerate() {
3310 hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3311 hash = hash.rotate_left(7);
3312 }
3313 hash
3314}