1use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use colored::*;
12use rusqlite::{params, Connection};
13use serde::{Deserialize, Serialize};
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17use std::time::Duration;
18
19use crate::browser::{get_installed_browsers, scan_browser_auth, BrowserType};
20use crate::database::{ChatDatabase, ShareLinkParser};
21use crate::models::ChatSession;
22use crate::providers::{ProviderRegistry, ProviderType};
23use crate::workspace::{discover_workspaces, get_chat_sessions_from_workspace};
24
25fn is_empty_code_block(s: &str) -> bool {
27 let s = s.trim();
29 if s == "```" {
30 return true;
31 }
32 if s.starts_with("```") && !s.contains('\n') {
34 return true;
35 }
36 let lines: Vec<&str> = s.lines().collect();
38 if lines.len() >= 2 && lines[0].starts_with("```") && lines.last() == Some(&"```") {
39 let content_lines = &lines[1..lines.len() - 1];
41 if content_lines.iter().all(|line| line.trim().is_empty()) {
42 return true;
43 }
44 }
45 false
46}
47
48type HarvestQueryResult = (String, String, String, i64, i64, i64, Option<String>);
50
51#[derive(Debug, Clone)]
53struct WebProviderEndpoint {
54 name: &'static str,
55 url: &'static str,
56 description: &'static str,
57}
58
59const WEB_PROVIDERS: &[WebProviderEndpoint] = &[
61 WebProviderEndpoint {
62 name: "ChatGPT",
63 url: "https://chat.openai.com",
64 description: "OpenAI ChatGPT Web Interface",
65 },
66 WebProviderEndpoint {
67 name: "Claude",
68 url: "https://claude.ai",
69 description: "Anthropic Claude Web Interface",
70 },
71 WebProviderEndpoint {
72 name: "Gemini",
73 url: "https://gemini.google.com",
74 description: "Google Gemini Web Interface",
75 },
76 WebProviderEndpoint {
77 name: "Perplexity",
78 url: "https://www.perplexity.ai",
79 description: "Perplexity AI Search Interface",
80 },
81 WebProviderEndpoint {
82 name: "DeepSeek",
83 url: "https://chat.deepseek.com",
84 description: "DeepSeek Chat Interface",
85 },
86 WebProviderEndpoint {
87 name: "Poe",
88 url: "https://poe.com",
89 description: "Quora Poe Multi-model Chat",
90 },
91 WebProviderEndpoint {
92 name: "You.com",
93 url: "https://you.com/chat",
94 description: "You.com AI Chat",
95 },
96 WebProviderEndpoint {
97 name: "HuggingChat",
98 url: "https://huggingface.co/chat",
99 description: "HuggingFace Chat Interface",
100 },
101 WebProviderEndpoint {
102 name: "Copilot",
103 url: "https://copilot.microsoft.com",
104 description: "Microsoft Copilot Web Interface",
105 },
106 WebProviderEndpoint {
107 name: "Mistral",
108 url: "https://chat.mistral.ai",
109 description: "Mistral AI Le Chat Interface",
110 },
111 WebProviderEndpoint {
112 name: "Cohere",
113 url: "https://coral.cohere.com",
114 description: "Cohere Coral Chat Interface",
115 },
116 WebProviderEndpoint {
117 name: "Groq",
118 url: "https://groq.com",
119 description: "Groq Fast Inference Interface",
120 },
121 WebProviderEndpoint {
122 name: "Phind",
123 url: "https://www.phind.com",
124 description: "Phind AI Code Search",
125 },
126 WebProviderEndpoint {
127 name: "Character.AI",
128 url: "https://character.ai",
129 description: "Character.AI Chat Interface",
130 },
131 WebProviderEndpoint {
132 name: "Pi",
133 url: "https://pi.ai",
134 description: "Inflection Pi Personal AI",
135 },
136];
137
138fn scan_web_providers(timeout_secs: u64) -> Vec<String> {
140 use std::sync::{Arc, Mutex};
141 use std::thread;
142
143 let reachable = Arc::new(Mutex::new(Vec::new()));
144 let results = Arc::new(Mutex::new(Vec::new()));
145
146 let handles: Vec<_> = WEB_PROVIDERS
148 .iter()
149 .map(|provider| {
150 let reachable = Arc::clone(&reachable);
151 let results = Arc::clone(&results);
152 let timeout = timeout_secs;
153 let name = provider.name;
154 let url = provider.url;
155 let desc = provider.description;
156
157 thread::spawn(move || {
158 let client = match reqwest::blocking::Client::builder()
159 .timeout(Duration::from_secs(timeout))
160 .connect_timeout(Duration::from_secs(timeout.min(3))) .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
162 .build()
163 {
164 Ok(c) => c,
165 Err(_) => return,
166 };
167
168 let result: (&str, bool, String, &str) = match client.head(url).send() {
169 Ok(response) => {
170 let status = response.status();
171 if status.is_success() || status.is_redirection() {
172 reachable.lock().unwrap().push(name.to_string());
173 (name, true, desc.to_string(), url)
174 } else {
175 (name, false, format!("HTTP {}", status.as_u16()), url)
176 }
177 }
178 Err(e) => {
179 let reason = if e.is_timeout() {
180 "timeout".to_string()
181 } else if e.is_connect() {
182 "connection failed".to_string()
183 } else {
184 "unreachable".to_string()
185 };
186 (name, false, reason, url)
187 }
188 };
189
190 results.lock().unwrap().push((
191 result.0.to_string(),
192 result.1,
193 result.2,
194 result.3.to_string(),
195 ));
196 })
197 })
198 .collect();
199
200 for handle in handles {
202 let _ = handle.join();
203 }
204
205 let results = results.lock().unwrap();
207 let mut sorted_results: Vec<_> = results.iter().collect();
208 sorted_results.sort_by_key(|(name, _, _, _)| name.as_str());
209
210 for (name, success, info, url) in sorted_results {
211 if *success {
212 println!(
213 " {} {}: {} ({})",
214 "[+]".green(),
215 name.bold(),
216 "reachable".green(),
217 info.dimmed()
218 );
219 println!(" {} {}", "`".dimmed(), url.dimmed());
220 } else {
221 println!(
222 " {} {}: {} ({})",
223 "[-]".dimmed(),
224 name,
225 "blocked or unavailable".dimmed(),
226 info.dimmed()
227 );
228 }
229 }
230
231 let result = reachable.lock().unwrap().clone();
232 result
233}
234
235fn scan_browser_authentication(
237 verbose: bool,
238) -> (std::collections::HashMap<String, Vec<BrowserType>>, usize) {
239 use crate::browser::scan_browser_auth_verbose;
240 use std::collections::HashMap;
241
242 let installed = get_installed_browsers();
243 if installed.is_empty() {
244 println!(" {} No supported browsers found", "[-]".dimmed());
245 return (HashMap::new(), 0);
246 }
247
248 println!(
249 " {} Checking {} browser(s): {}",
250 "[*]".blue(),
251 installed.len(),
252 installed
253 .iter()
254 .map(|b| b.name())
255 .collect::<Vec<_>>()
256 .join(", ")
257 );
258
259 let results = if verbose {
260 scan_browser_auth_verbose()
261 } else {
262 scan_browser_auth()
263 };
264
265 let mut authenticated: HashMap<String, Vec<BrowserType>> = HashMap::new();
267
268 for result in results {
269 if result.authenticated {
270 authenticated
271 .entry(result.provider.clone())
272 .or_default()
273 .push(result.browser);
274 }
275 }
276
277 let count = authenticated.len();
278
279 if authenticated.is_empty() {
280 println!(
281 " {} No authenticated web LLM providers found",
282 "[-]".dimmed()
283 );
284 println!(
285 " {} Log into ChatGPT, Claude, etc. in your browser to enable harvesting",
286 "`".dimmed()
287 );
288 } else {
289 for (provider, browsers) in &authenticated {
290 let browser_names: Vec<_> = browsers.iter().map(|b| b.name()).collect();
291 println!(
292 " {} {}: {} in {}",
293 "[+]".green(),
294 provider.bold(),
295 "authenticated".green(),
296 browser_names.join(", ")
297 );
298 }
299 }
300
301 (authenticated, count)
302}
303
304#[allow(dead_code)]
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct HarvestConfig {
308 pub db_path: PathBuf,
310 pub auto_commit: bool,
312 pub include_providers: Vec<String>,
314 pub exclude_providers: Vec<String>,
316 pub include_workspaces: bool,
318 pub last_harvest: Option<i64>,
320}
321
322#[allow(dead_code)]
323impl Default for HarvestConfig {
324 fn default() -> Self {
325 Self {
326 db_path: PathBuf::from("chat_sessions.db"),
327 auto_commit: false,
328 include_providers: Vec::new(),
329 exclude_providers: Vec::new(),
330 include_workspaces: true,
331 last_harvest: None,
332 }
333 }
334}
335
336#[derive(Debug, Default)]
338pub struct HarvestStats {
339 pub providers_scanned: usize,
340 pub workspaces_scanned: usize,
341 pub sessions_found: usize,
342 pub sessions_added: usize,
343 pub sessions_updated: usize,
344 pub sessions_skipped: usize,
345 pub errors: Vec<String>,
346}
347
348#[allow(dead_code)]
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct HarvestedSession {
352 pub id: String,
353 pub provider: String,
354 pub provider_type: String,
355 pub workspace_id: Option<String>,
356 pub workspace_name: Option<String>,
357 pub title: String,
358 pub message_count: usize,
359 pub created_at: i64,
360 pub updated_at: i64,
361 pub harvested_at: i64,
362 pub session_json: String,
363}
364
365pub fn harvest_init(path: Option<&str>, git_init: bool) -> Result<()> {
367 let db_path = get_db_path(path)?;
368 let db_dir = db_path.parent().unwrap_or(Path::new("."));
369
370 println!("\n{} Initializing Harvest Database", "[H]".magenta().bold());
371 println!("{}", "=".repeat(60));
372
373 if !db_dir.exists() {
375 fs::create_dir_all(db_dir)?;
376 println!("{} Created directory: {}", "[+]".green(), db_dir.display());
377 }
378
379 if db_path.exists() {
381 println!(
382 "{} Database already exists: {}",
383 "[!]".yellow(),
384 db_path.display()
385 );
386 println!(" Use 'csm harvest run' to update it");
387 return Ok(());
388 }
389
390 create_harvest_database(&db_path)?;
392 println!("{} Created database: {}", "[+]".green(), db_path.display());
393
394 if git_init {
396 init_git_tracking(&db_path)?;
397 }
398
399 println!("\n{} Harvest database initialized!", "[+]".green().bold());
400 println!("\nNext steps:");
401 println!(" 1. Run 'csm harvest scan' to see available providers");
402 println!(" 2. Run 'csm harvest run' to collect sessions");
403 if !git_init {
404 println!(" 3. Run 'csm harvest git init' to enable version tracking");
405 }
406
407 Ok(())
408}
409
410pub fn harvest_scan(
412 show_sessions: bool,
413 scan_web: bool,
414 timeout_secs: u64,
415 verbose: bool,
416) -> Result<()> {
417 println!("\n{} Scanning for Providers", "[H]".magenta().bold());
418 println!("{}", "=".repeat(60));
419
420 let registry = ProviderRegistry::new();
421 let mut total_sessions = 0;
422 let mut available_providers = Vec::new();
423
424 println!("\n{} LLM Providers:", "[*]".blue().bold());
426
427 let provider_types = vec![
428 ProviderType::Copilot,
429 ProviderType::Cursor,
430 ProviderType::Ollama,
431 ProviderType::Vllm,
432 ProviderType::Foundry,
433 ProviderType::LmStudio,
434 ProviderType::LocalAI,
435 ProviderType::TextGenWebUI,
436 ProviderType::Jan,
437 ProviderType::Gpt4All,
438 ProviderType::Llamafile,
439 ProviderType::OpenAI,
440 ProviderType::ChatGPT,
441 ProviderType::Anthropic,
442 ProviderType::Perplexity,
443 ProviderType::DeepSeek,
444 ProviderType::Gemini,
445 ];
446
447 for pt in &provider_types {
448 if let Some(provider) = registry.get_provider(*pt) {
449 let available = provider.is_available();
450 let session_count = if available {
451 provider.list_sessions().map(|s| s.len()).unwrap_or(0)
452 } else {
453 0
454 };
455
456 if available {
457 available_providers.push((*pt, session_count));
458 total_sessions += session_count;
459
460 let status = if session_count > 0 {
461 format!(
462 "{} {} sessions",
463 "+".green(),
464 session_count.to_string().cyan()
465 )
466 } else {
467 format!("{} no sessions", "+".green())
468 };
469
470 println!(
471 " {} {}: {}",
472 "[+]".green(),
473 provider.name().bold(),
474 status
475 );
476
477 if show_sessions && session_count > 0 {
478 if let Ok(sessions) = provider.list_sessions() {
479 for session in sessions.iter().take(3) {
480 println!(" {} {}", "`".dimmed(), session.title().dimmed());
481 }
482 if sessions.len() > 3 {
483 println!(" {} ... and {} more", "`".dimmed(), sessions.len() - 3);
484 }
485 }
486 }
487
488 if let Some(path) = provider.sessions_path() {
489 println!(
490 " {} {}",
491 "`".dimmed(),
492 path.display().to_string().dimmed()
493 );
494 }
495 }
496 }
497 }
498
499 println!("\n{} VS Code Workspaces:", "[*]".blue().bold());
501
502 match discover_workspaces() {
503 Ok(workspaces) => {
504 let workspaces_with_sessions: Vec<_> = workspaces
505 .iter()
506 .filter(|ws| ws.chat_session_count > 0)
507 .collect();
508
509 let ws_sessions: usize = workspaces_with_sessions
510 .iter()
511 .map(|ws| ws.chat_session_count)
512 .sum();
513
514 println!(
515 " {} {} workspaces with {} sessions",
516 "[+]".green(),
517 workspaces_with_sessions.len().to_string().cyan(),
518 ws_sessions.to_string().cyan()
519 );
520
521 if show_sessions {
522 for ws in workspaces_with_sessions.iter().take(5) {
523 let name = ws
524 .project_path
525 .clone()
526 .unwrap_or_else(|| ws.hash[..8.min(ws.hash.len())].to_string());
527 println!(
528 " {} {} ({} sessions)",
529 "`".dimmed(),
530 name.dimmed(),
531 ws.chat_session_count
532 );
533 }
534 if workspaces_with_sessions.len() > 5 {
535 println!(
536 " {} ... and {} more workspaces",
537 "`".dimmed(),
538 workspaces_with_sessions.len() - 5
539 );
540 }
541 }
542
543 total_sessions += ws_sessions;
544 }
545 Err(e) => {
546 println!(" {} Failed to scan workspaces: {}", "[!]".yellow(), e);
547 }
548 }
549
550 let mut web_providers_found = Vec::new();
552 let mut authenticated_count = 0;
553 if scan_web {
554 println!("\n{} Browser Authentication:", "[*]".blue().bold());
556 let (auth_results, auth_count) = scan_browser_authentication(verbose);
557 authenticated_count = auth_count;
558
559 println!("\n{} Web LLM Provider Endpoints:", "[*]".blue().bold());
561 web_providers_found = scan_web_providers(timeout_secs);
562
563 if !auth_results.is_empty() {
565 println!("\n{} Authenticated Provider Sessions:", "[*]".blue().bold());
566 for (provider, browsers) in &auth_results {
567 let browser_list: Vec<_> = browsers.iter().map(|b| b.name()).collect();
568 let reachable = web_providers_found.iter().any(|p| p == provider);
569 let status = if reachable {
570 format!("{} (reachable)", "ready to harvest".green())
571 } else {
572 format!("{}", "authenticated but endpoint blocked".yellow())
573 };
574 println!(
575 " {} {}: {} via {}",
576 "[+]".green(),
577 provider.bold(),
578 status,
579 browser_list.join(", ").dimmed()
580 );
581 }
582 }
583 }
584
585 println!("\n{} Summary:", "[*]".green().bold());
587 println!(
588 " {} local providers available",
589 available_providers.len().to_string().cyan()
590 );
591 if scan_web {
592 println!(
593 " {} web providers reachable",
594 web_providers_found.len().to_string().cyan()
595 );
596 println!(
597 " {} web providers authenticated",
598 authenticated_count.to_string().cyan()
599 );
600 }
601 println!(
602 " {} total sessions to harvest",
603 total_sessions.to_string().cyan()
604 );
605
606 Ok(())
607}
608
609pub fn harvest_run(
611 path: Option<&str>,
612 providers: Option<&[String]>,
613 exclude: Option<&[String]>,
614 incremental: bool,
615 auto_commit: bool,
616 message: Option<&str>,
617) -> Result<()> {
618 let db_path = get_db_path(path)?;
619
620 println!("\n{} Running Harvest", "[H]".magenta().bold());
621 println!("{}", "=".repeat(60));
622
623 if !db_path.exists() {
625 println!("{} Database not found, creating...", "[*]".blue());
626 create_harvest_database(&db_path)?;
627 }
628
629 let conn = Connection::open(&db_path)?;
630 let mut stats = HarvestStats::default();
631
632 let last_harvest: Option<i64> = if incremental {
634 conn.query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
635 row.get(0)
636 })
637 .ok()
638 } else {
639 None
640 };
641
642 if let Some(ts) = last_harvest {
643 let dt = DateTime::from_timestamp_millis(ts)
644 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
645 .unwrap_or_default();
646 println!("{} Incremental harvest since: {}", "[*]".blue(), dt);
647 }
648
649 let registry = ProviderRegistry::new();
650 let include_providers = providers.map(|p| p.to_vec());
651 let exclude_providers = exclude.map(|p| p.to_vec()).unwrap_or_default();
652
653 println!("\n{} Harvesting from providers...", "[*]".blue());
655
656 let provider_types = vec![
657 ProviderType::Copilot,
658 ProviderType::Cursor,
659 ProviderType::Ollama,
660 ProviderType::Vllm,
661 ProviderType::Foundry,
662 ProviderType::LmStudio,
663 ProviderType::LocalAI,
664 ProviderType::TextGenWebUI,
665 ProviderType::Jan,
666 ProviderType::Gpt4All,
667 ProviderType::Llamafile,
668 ];
669
670 for pt in &provider_types {
671 let provider_name = pt.display_name().to_lowercase();
672
673 if let Some(ref include) = include_providers {
675 if !include
676 .iter()
677 .any(|p| provider_name.contains(&p.to_lowercase()))
678 {
679 continue;
680 }
681 }
682 if exclude_providers
683 .iter()
684 .any(|p| provider_name.contains(&p.to_lowercase()))
685 {
686 continue;
687 }
688
689 if let Some(provider) = registry.get_provider(*pt) {
690 if !provider.is_available() {
691 continue;
692 }
693
694 stats.providers_scanned += 1;
695
696 match provider.list_sessions() {
697 Ok(sessions) => {
698 for session in sessions {
699 stats.sessions_found += 1;
700
701 if let Some(last) = last_harvest {
703 if session.last_message_date <= last {
704 stats.sessions_skipped += 1;
705 continue;
706 }
707 }
708
709 match insert_or_update_session(
710 &conn,
711 &session,
712 pt.display_name(),
713 None,
714 None,
715 ) {
716 Ok(updated) => {
717 if updated {
718 stats.sessions_updated += 1;
719 } else {
720 stats.sessions_added += 1;
721 }
722 }
723 Err(e) => {
724 stats.errors.push(format!("{}: {}", session.title(), e));
725 }
726 }
727 }
728
729 let session_count = stats.sessions_added + stats.sessions_updated;
730 if session_count > 0 {
731 println!(
732 " {} {}: {} sessions",
733 "[+]".green(),
734 provider.name(),
735 session_count.to_string().cyan()
736 );
737 }
738 }
739 Err(e) => {
740 stats.errors.push(format!("{}: {}", provider.name(), e));
741 }
742 }
743 }
744 }
745
746 if include_providers.is_none()
748 || include_providers
749 .as_ref()
750 .map(|p| {
751 p.iter()
752 .any(|x| x == "copilot" || x == "vscode" || x == "workspace")
753 })
754 .unwrap_or(false)
755 {
756 println!("\n{} Harvesting from VS Code workspaces...", "[*]".blue());
757
758 if let Ok(workspaces) = discover_workspaces() {
759 for ws in &workspaces {
760 if ws.chat_session_count == 0 {
761 continue;
762 }
763
764 stats.workspaces_scanned += 1;
765
766 if let Ok(sessions) = get_chat_sessions_from_workspace(&ws.workspace_path) {
767 for swp in sessions {
768 stats.sessions_found += 1;
769
770 if let Some(last) = last_harvest {
772 if swp.session.last_message_date <= last {
773 stats.sessions_skipped += 1;
774 continue;
775 }
776 }
777
778 let ws_name = ws.project_path.clone();
779
780 match insert_or_update_session(
781 &conn,
782 &swp.session,
783 "GitHub Copilot",
784 Some(&ws.hash),
785 ws_name.as_deref(),
786 ) {
787 Ok(updated) => {
788 if updated {
789 stats.sessions_updated += 1;
790 } else {
791 stats.sessions_added += 1;
792 }
793 }
794 Err(e) => {
795 stats.errors.push(format!("{}: {}", swp.session.title(), e));
796 }
797 }
798 }
799 }
800 }
801
802 println!(
803 " {} Workspaces: {} scanned",
804 "[+]".green(),
805 stats.workspaces_scanned.to_string().cyan()
806 );
807 }
808 }
809
810 let include_list: Vec<String> = include_providers.clone().unwrap_or_default();
812 harvest_web_providers(&conn, &mut stats, &include_list, &exclude_providers)?;
813
814 update_harvest_metadata(&conn)?;
816
817 println!("\n{} Harvest Complete:", "[+]".green().bold());
819 println!(
820 " {} providers scanned",
821 stats.providers_scanned.to_string().cyan()
822 );
823 println!(
824 " {} workspaces scanned",
825 stats.workspaces_scanned.to_string().cyan()
826 );
827 println!(
828 " {} sessions found",
829 stats.sessions_found.to_string().cyan()
830 );
831 println!(
832 " {} sessions added",
833 stats.sessions_added.to_string().green()
834 );
835 println!(
836 " {} sessions updated",
837 stats.sessions_updated.to_string().yellow()
838 );
839 if stats.sessions_skipped > 0 {
840 println!(
841 " {} sessions skipped (unchanged)",
842 stats.sessions_skipped.to_string().dimmed()
843 );
844 }
845
846 if !stats.errors.is_empty() {
847 println!("\n{} Errors ({}):", "[!]".red(), stats.errors.len());
848 for (i, err) in stats.errors.iter().take(5).enumerate() {
849 println!(" {}. {}", i + 1, err);
850 }
851 if stats.errors.len() > 5 {
852 println!(" ... and {} more errors", stats.errors.len() - 5);
853 }
854 }
855
856 if auto_commit && (stats.sessions_added > 0 || stats.sessions_updated > 0) {
858 println!("\n{} Auto-committing changes...", "[*]".blue());
859 let commit_msg = message.unwrap_or("Harvest: update chat sessions");
860 if let Err(e) = git_commit_harvest(&db_path, commit_msg) {
861 println!("{} Git commit failed: {}", "[!]".yellow(), e);
862 } else {
863 println!("{} Changes committed", "[+]".green());
864 }
865 }
866
867 println!("\nDatabase: {}", db_path.display());
868
869 Ok(())
870}
871
872pub fn harvest_status(path: Option<&str>) -> Result<()> {
874 let db_path = get_db_path(path)?;
875
876 println!("\n{} Harvest Database Status", "[H]".magenta().bold());
877 println!("{}", "=".repeat(60));
878
879 if !db_path.exists() {
880 println!(
881 "{} Database not found: {}",
882 "[!]".yellow(),
883 db_path.display()
884 );
885 println!(" Run 'csm harvest init' to create one");
886 return Ok(());
887 }
888
889 let conn = Connection::open(&db_path)?;
890
891 let mut stmt = conn.prepare(
893 "SELECT provider, COUNT(*) as count, SUM(message_count) as messages
894 FROM sessions
895 GROUP BY provider
896 ORDER BY count DESC",
897 )?;
898
899 let provider_stats: Vec<(String, i64, i64)> = stmt
900 .query_map([], |row| {
901 Ok((
902 row.get(0)?,
903 row.get(1)?,
904 row.get::<_, Option<i64>>(2)?.unwrap_or(0),
905 ))
906 })?
907 .filter_map(|r| r.ok())
908 .collect();
909
910 let total_sessions: i64 = conn
912 .query_row("SELECT COUNT(*) FROM sessions", [], |row| row.get(0))
913 .unwrap_or(0);
914
915 let total_messages: i64 = conn
916 .query_row(
917 "SELECT COALESCE(SUM(message_count), 0) FROM sessions",
918 [],
919 |row| row.get(0),
920 )
921 .unwrap_or(0);
922
923 let last_harvest: Option<i64> = conn
925 .query_row("SELECT MAX(harvested_at) FROM sessions", [], |row| {
926 row.get(0)
927 })
928 .ok()
929 .flatten();
930
931 let oldest: Option<i64> = conn
933 .query_row("SELECT MIN(created_at) FROM sessions", [], |row| row.get(0))
934 .ok()
935 .flatten();
936
937 let newest: Option<i64> = conn
938 .query_row("SELECT MAX(updated_at) FROM sessions", [], |row| row.get(0))
939 .ok()
940 .flatten();
941
942 println!("{} Database: {}", "[*]".blue(), db_path.display());
943 println!(
944 "{} Total Sessions: {}",
945 "[*]".blue(),
946 total_sessions.to_string().cyan()
947 );
948 println!(
949 "{} Total Messages: {}",
950 "[*]".blue(),
951 total_messages.to_string().cyan()
952 );
953
954 if let Some(ts) = last_harvest {
955 let dt = DateTime::from_timestamp_millis(ts)
956 .map(|d| d.format("%Y-%m-%d %H:%M:%S").to_string())
957 .unwrap_or_else(|| "Unknown".to_string());
958 println!("{} Last Harvest: {}", "[*]".blue(), dt);
959 }
960
961 if let (Some(old), Some(new)) = (oldest, newest) {
962 let old_dt = DateTime::from_timestamp_millis(old)
963 .map(|d| d.format("%Y-%m-%d").to_string())
964 .unwrap_or_default();
965 let new_dt = DateTime::from_timestamp_millis(new)
966 .map(|d| d.format("%Y-%m-%d").to_string())
967 .unwrap_or_default();
968 println!("{} Date Range: {} to {}", "[*]".blue(), old_dt, new_dt);
969 }
970
971 if !provider_stats.is_empty() {
972 println!("\n{} Sessions by Provider:", "[*]".blue());
973 for (provider, count, messages) in &provider_stats {
974 println!(
975 " {} {}: {} sessions, {} messages",
976 "[+]".green(),
977 provider.bold(),
978 count.to_string().cyan(),
979 messages.to_string().dimmed()
980 );
981 }
982 }
983
984 let db_dir = db_path.parent().unwrap_or(Path::new("."));
986 if db_dir.join(".git").exists()
987 || db_path
988 .parent()
989 .map(|p| p.join(".git").exists())
990 .unwrap_or(false)
991 {
992 println!("\n{} Git Status:", "[*]".blue());
993
994 let output = Command::new("git")
995 .current_dir(db_dir)
996 .args([
997 "status",
998 "--porcelain",
999 db_path.file_name().unwrap().to_str().unwrap(),
1000 ])
1001 .output();
1002
1003 match output {
1004 Ok(out) => {
1005 let status = String::from_utf8_lossy(&out.stdout);
1006 if status.is_empty() {
1007 println!(" {} No uncommitted changes", "[+]".green());
1008 } else {
1009 println!(" {} Uncommitted changes detected", "[!]".yellow());
1010 println!(" Run 'csm harvest git commit' to save changes");
1011 }
1012 }
1013 Err(_) => {
1014 println!(" {} Unable to check git status", "[!]".yellow());
1015 }
1016 }
1017 } else {
1018 println!("\n{} Git tracking not enabled", "[i]".dimmed());
1019 println!(" Run 'csm harvest git init' to enable version tracking");
1020 }
1021
1022 Ok(())
1023}
1024
1025pub fn harvest_list(
1027 path: Option<&str>,
1028 provider: Option<&str>,
1029 limit: usize,
1030 search: Option<&str>,
1031) -> Result<()> {
1032 let db_path = get_db_path(path)?;
1033
1034 if !db_path.exists() {
1035 println!(
1036 "{} Database not found: {}",
1037 "[!]".yellow(),
1038 db_path.display()
1039 );
1040 return Ok(());
1041 }
1042
1043 let conn = Connection::open(&db_path)?;
1044
1045 let mut query = String::from(
1046 "SELECT id, provider, title, message_count, created_at, updated_at, workspace_name
1047 FROM sessions WHERE 1=1",
1048 );
1049 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1050
1051 if let Some(p) = provider {
1052 query.push_str(" AND LOWER(provider) LIKE ?");
1053 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1054 }
1055
1056 if let Some(s) = search {
1057 query.push_str(" AND (LOWER(title) LIKE ? OR LOWER(id) LIKE ?)");
1058 let pattern = format!("%{}%", s.to_lowercase());
1059 params_vec.push(Box::new(pattern.clone()));
1060 params_vec.push(Box::new(pattern));
1061 }
1062
1063 query.push_str(" ORDER BY updated_at DESC LIMIT ?");
1064 params_vec.push(Box::new(limit as i64));
1065
1066 let mut stmt = conn.prepare(&query)?;
1067
1068 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1070
1071 let sessions: Vec<HarvestQueryResult> = stmt
1072 .query_map(params_slice.as_slice(), |row| {
1073 Ok((
1074 row.get(0)?,
1075 row.get(1)?,
1076 row.get(2)?,
1077 row.get(3)?,
1078 row.get(4)?,
1079 row.get(5)?,
1080 row.get(6)?,
1081 ))
1082 })?
1083 .filter_map(|r| r.ok())
1084 .collect();
1085
1086 println!("\n{} Harvested Sessions", "[H]".magenta().bold());
1087 println!("{}", "=".repeat(60));
1088
1089 if sessions.is_empty() {
1090 println!("{} No sessions found", "[i]".dimmed());
1091 return Ok(());
1092 }
1093
1094 for (id, prov, title, msg_count, _created, updated, ws_name) in &sessions {
1095 let date = DateTime::from_timestamp_millis(*updated)
1096 .map(|d| d.format("%Y-%m-%d %H:%M").to_string())
1097 .unwrap_or_default();
1098
1099 println!("\n{} {}", "[S]".cyan(), title.bold());
1100 println!(" ID: {}", &id[..16.min(id.len())].dimmed());
1101 println!(
1102 " Provider: {} | Messages: {} | Updated: {}",
1103 prov.cyan(),
1104 msg_count.to_string().green(),
1105 date.dimmed()
1106 );
1107 if let Some(ws) = ws_name {
1108 println!(" Workspace: {}", ws.dimmed());
1109 }
1110 }
1111
1112 println!(
1113 "\n{} Showing {} of available sessions",
1114 "[i]".dimmed(),
1115 sessions.len()
1116 );
1117
1118 Ok(())
1119}
1120
1121pub fn harvest_export(
1123 path: Option<&str>,
1124 output: &str,
1125 format: &str,
1126 provider: Option<&str>,
1127 session_ids: Option<&[String]>,
1128) -> Result<()> {
1129 let db_path = get_db_path(path)?;
1130 let output_path = PathBuf::from(output);
1131
1132 if !db_path.exists() {
1133 anyhow::bail!("Database not found: {}", db_path.display());
1134 }
1135
1136 let conn = Connection::open(&db_path)?;
1137
1138 println!("\n{} Exporting Sessions", "[H]".magenta().bold());
1139 println!("{}", "=".repeat(60));
1140
1141 let mut query = String::from("SELECT session_json FROM sessions WHERE 1=1");
1143 let mut params_vec: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
1144
1145 if let Some(p) = provider {
1146 query.push_str(" AND LOWER(provider) LIKE ?");
1147 params_vec.push(Box::new(format!("%{}%", p.to_lowercase())));
1148 }
1149
1150 if let Some(ids) = session_ids {
1151 let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
1152 query.push_str(&format!(" AND id IN ({})", placeholders.join(",")));
1153 for id in ids {
1154 params_vec.push(Box::new(id.clone()));
1155 }
1156 }
1157
1158 let mut stmt = conn.prepare(&query)?;
1159 let params_slice: Vec<&dyn rusqlite::ToSql> = params_vec.iter().map(|b| b.as_ref()).collect();
1160
1161 let sessions: Vec<String> = stmt
1162 .query_map(params_slice.as_slice(), |row| row.get(0))?
1163 .filter_map(|r| r.ok())
1164 .collect();
1165
1166 if sessions.is_empty() {
1167 println!("{} No sessions to export", "[i]".dimmed());
1168 return Ok(());
1169 }
1170
1171 if let Some(parent) = output_path.parent() {
1173 if !parent.exists() {
1174 fs::create_dir_all(parent)?;
1175 }
1176 }
1177
1178 match format.to_lowercase().as_str() {
1179 "json" => {
1180 let parsed: Vec<serde_json::Value> = sessions
1182 .iter()
1183 .filter_map(|s| serde_json::from_str(s).ok())
1184 .collect();
1185 let json_output = serde_json::to_string_pretty(&parsed)?;
1186 fs::write(&output_path, json_output)?;
1187 }
1188 "jsonl" => {
1189 let content: String = sessions
1191 .iter()
1192 .filter_map(|s| serde_json::from_str::<serde_json::Value>(s).ok())
1193 .map(|v| serde_json::to_string(&v).unwrap_or_default())
1194 .collect::<Vec<_>>()
1195 .join("\n");
1196 fs::write(&output_path, content)?;
1197 }
1198 "md" | "markdown" => {
1199 let mut md_content = String::from("# Chat Sessions Export\n\n");
1201 md_content.push_str(&format!(
1202 "Exported: {}\n\n",
1203 Utc::now().format("%Y-%m-%d %H:%M:%S")
1204 ));
1205
1206 for session_json in &sessions {
1207 if let Ok(session) = serde_json::from_str::<ChatSession>(session_json) {
1208 md_content.push_str(&format!("## {}\n\n", session.title()));
1209 md_content.push_str(&format!("Messages: {}\n\n", session.request_count()));
1210
1211 for request in &session.requests {
1212 if let Some(msg) = &request.message {
1213 if let Some(text) = &msg.text {
1214 md_content.push_str(&format!("### User\n\n{}\n\n", text));
1215 }
1216 }
1217 if let Some(response) = &request.response {
1219 let response_text = response
1220 .get("text")
1221 .and_then(|v| v.as_str())
1222 .or_else(|| response.get("content").and_then(|v| v.as_str()))
1223 .or_else(|| {
1224 response
1226 .get("value")
1227 .and_then(|v| v.as_array())
1228 .and_then(|arr| arr.first())
1229 .and_then(|v| v.get("value"))
1230 .and_then(|v| v.as_str())
1231 });
1232
1233 if let Some(text) = response_text {
1234 md_content.push_str(&format!("### Assistant\n\n{}\n\n", text));
1235 }
1236 }
1237 }
1238 md_content.push_str("---\n\n");
1239 }
1240 }
1241 fs::write(&output_path, md_content)?;
1242 }
1243 _ => {
1244 anyhow::bail!("Unknown format: {}. Supported: json, jsonl, md", format);
1245 }
1246 }
1247
1248 println!(
1249 "{} Exported {} sessions to {}",
1250 "[+]".green(),
1251 sessions.len().to_string().cyan(),
1252 output_path.display()
1253 );
1254
1255 Ok(())
1256}
1257
1258pub fn harvest_git_init(path: Option<&str>) -> Result<()> {
1260 let db_path = get_db_path(path)?;
1261 init_git_tracking(&db_path)
1262}
1263
1264pub fn harvest_git_commit(path: Option<&str>, message: Option<&str>) -> Result<()> {
1265 let db_path = get_db_path(path)?;
1266 let msg = message.unwrap_or("Update harvest database");
1267 git_commit_harvest(&db_path, msg)
1268}
1269
1270pub fn harvest_git_log(path: Option<&str>, count: usize) -> Result<()> {
1271 let db_path = get_db_path(path)?;
1272 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1273 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1274
1275 println!("\n{} Harvest Git History", "[H]".magenta().bold());
1276 println!("{}", "=".repeat(60));
1277
1278 let output = Command::new("git")
1279 .current_dir(db_dir)
1280 .args([
1281 "log",
1282 "--oneline",
1283 &format!("-{}", count),
1284 "--follow",
1285 "--",
1286 db_name,
1287 ])
1288 .output()
1289 .context("Failed to run git log")?;
1290
1291 if !output.status.success() {
1292 let stderr = String::from_utf8_lossy(&output.stderr);
1293 if stderr.contains("does not have any commits") {
1294 println!("{} No commits yet", "[i]".dimmed());
1295 } else {
1296 anyhow::bail!("Git log failed: {}", stderr);
1297 }
1298 return Ok(());
1299 }
1300
1301 let log = String::from_utf8_lossy(&output.stdout);
1302 if log.trim().is_empty() {
1303 println!("{} No commits found for {}", "[i]".dimmed(), db_name);
1304 } else {
1305 for line in log.lines() {
1306 let parts: Vec<&str> = line.splitn(2, ' ').collect();
1307 if parts.len() == 2 {
1308 println!("{} {} {}", "[C]".yellow(), parts[0].cyan(), parts[1]);
1309 } else {
1310 println!("{}", line);
1311 }
1312 }
1313 }
1314
1315 Ok(())
1316}
1317
1318pub fn harvest_git_diff(path: Option<&str>, commit: Option<&str>) -> Result<()> {
1319 let db_path = get_db_path(path)?;
1320 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1321 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1322
1323 println!("\n{} Harvest Database Changes", "[H]".magenta().bold());
1324 println!("{}", "=".repeat(60));
1325
1326 let mut args = vec!["diff", "--stat"];
1327 if let Some(c) = commit {
1328 args.push(c);
1329 }
1330 args.push("--");
1331 args.push(db_name);
1332
1333 let output = Command::new("git")
1334 .current_dir(db_dir)
1335 .args(&args)
1336 .output()
1337 .context("Failed to run git diff")?;
1338
1339 let diff = String::from_utf8_lossy(&output.stdout);
1340 if diff.trim().is_empty() {
1341 println!("{} No changes", "[+]".green());
1342 } else {
1343 println!("{}", diff);
1344 }
1345
1346 Ok(())
1347}
1348
1349pub fn harvest_git_restore(path: Option<&str>, commit: &str) -> Result<()> {
1350 let db_path = get_db_path(path)?;
1351 let db_dir = db_path.parent().unwrap_or(Path::new("."));
1352 let db_name = db_path.file_name().unwrap().to_str().unwrap();
1353
1354 println!("\n{} Restoring Harvest Database", "[H]".magenta().bold());
1355 println!("{}", "=".repeat(60));
1356
1357 let backup_path = db_path.with_extension("db.backup");
1359 if db_path.exists() {
1360 fs::copy(&db_path, &backup_path)?;
1361 println!(
1362 "{} Created backup: {}",
1363 "[+]".green(),
1364 backup_path.display()
1365 );
1366 }
1367
1368 let output = Command::new("git")
1370 .current_dir(db_dir)
1371 .args(["checkout", commit, "--", db_name])
1372 .output()
1373 .context("Failed to run git checkout")?;
1374
1375 if !output.status.success() {
1376 let stderr = String::from_utf8_lossy(&output.stderr);
1377 anyhow::bail!("Git restore failed: {}", stderr);
1378 }
1379
1380 println!(
1381 "{} Restored database from commit: {}",
1382 "[+]".green(),
1383 commit
1384 );
1385
1386 Ok(())
1387}
1388
1389fn get_db_path(path: Option<&str>) -> Result<PathBuf> {
1394 if let Some(p) = path {
1395 return Ok(PathBuf::from(p));
1396 }
1397
1398 if let Ok(p) = std::env::var("CSM_HARVEST_DB") {
1400 return Ok(PathBuf::from(p));
1401 }
1402
1403 Ok(std::env::current_dir()?.join("chat_sessions.db"))
1405}
1406
1407fn create_harvest_database(path: &Path) -> Result<()> {
1408 let conn = Connection::open(path)?;
1409
1410 conn.execute_batch(
1411 r#"
1412 -- Sessions table (original harvest format)
1413 CREATE TABLE IF NOT EXISTS sessions (
1414 id TEXT PRIMARY KEY,
1415 provider TEXT NOT NULL,
1416 provider_type TEXT,
1417 workspace_id TEXT,
1418 workspace_name TEXT,
1419 title TEXT NOT NULL,
1420 message_count INTEGER DEFAULT 0,
1421 created_at INTEGER NOT NULL,
1422 updated_at INTEGER NOT NULL,
1423 harvested_at INTEGER NOT NULL,
1424 session_json TEXT NOT NULL
1425 );
1426
1427 CREATE INDEX IF NOT EXISTS idx_sessions_provider ON sessions(provider);
1428 CREATE INDEX IF NOT EXISTS idx_sessions_workspace ON sessions(workspace_id);
1429 CREATE INDEX IF NOT EXISTS idx_sessions_updated ON sessions(updated_at);
1430
1431 -- Enhanced messages table with raw markdown and metadata
1432 CREATE TABLE IF NOT EXISTS messages_v2 (
1433 id INTEGER PRIMARY KEY AUTOINCREMENT,
1434 session_id TEXT NOT NULL,
1435 message_index INTEGER NOT NULL,
1436 request_id TEXT,
1437 response_id TEXT,
1438 role TEXT NOT NULL,
1439 content_raw TEXT NOT NULL,
1440 content_markdown TEXT,
1441 model_id TEXT,
1442 timestamp INTEGER,
1443 is_canceled INTEGER DEFAULT 0,
1444 metadata_json TEXT,
1445 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1446 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1447 UNIQUE(session_id, message_index, role)
1448 );
1449
1450 CREATE INDEX IF NOT EXISTS idx_messages_v2_session ON messages_v2(session_id);
1451 CREATE INDEX IF NOT EXISTS idx_messages_v2_role ON messages_v2(role);
1452 CREATE INDEX IF NOT EXISTS idx_messages_v2_timestamp ON messages_v2(timestamp);
1453
1454 -- Tool invocations within messages (file edits, terminal commands, etc.)
1455 CREATE TABLE IF NOT EXISTS tool_invocations (
1456 id INTEGER PRIMARY KEY AUTOINCREMENT,
1457 message_id INTEGER NOT NULL,
1458 session_id TEXT NOT NULL,
1459 tool_name TEXT NOT NULL,
1460 tool_call_id TEXT,
1461 invocation_index INTEGER DEFAULT 0,
1462 input_json TEXT,
1463 output_json TEXT,
1464 status TEXT DEFAULT 'pending',
1465 is_confirmed INTEGER DEFAULT 0,
1466 timestamp INTEGER,
1467 FOREIGN KEY (message_id) REFERENCES messages_v2(id) ON DELETE CASCADE,
1468 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1469 );
1470
1471 CREATE INDEX IF NOT EXISTS idx_tool_invocations_message ON tool_invocations(message_id);
1472 CREATE INDEX IF NOT EXISTS idx_tool_invocations_session ON tool_invocations(session_id);
1473 CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool ON tool_invocations(tool_name);
1474
1475 -- File changes/diffs associated with tool invocations
1476 CREATE TABLE IF NOT EXISTS file_changes (
1477 id INTEGER PRIMARY KEY AUTOINCREMENT,
1478 tool_invocation_id INTEGER,
1479 session_id TEXT NOT NULL,
1480 message_index INTEGER,
1481 file_path TEXT NOT NULL,
1482 change_type TEXT NOT NULL,
1483 old_content TEXT,
1484 new_content TEXT,
1485 diff_unified TEXT,
1486 line_start INTEGER,
1487 line_end INTEGER,
1488 timestamp INTEGER,
1489 FOREIGN KEY (tool_invocation_id) REFERENCES tool_invocations(id) ON DELETE CASCADE,
1490 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
1491 );
1492
1493 CREATE INDEX IF NOT EXISTS idx_file_changes_tool ON file_changes(tool_invocation_id);
1494 CREATE INDEX IF NOT EXISTS idx_file_changes_session ON file_changes(session_id);
1495 CREATE INDEX IF NOT EXISTS idx_file_changes_path ON file_changes(file_path);
1496
1497 -- Message-level checkpoints for versioning
1498 CREATE TABLE IF NOT EXISTS message_checkpoints (
1499 id INTEGER PRIMARY KEY AUTOINCREMENT,
1500 session_id TEXT NOT NULL,
1501 message_index INTEGER NOT NULL,
1502 checkpoint_number INTEGER NOT NULL,
1503 content_hash TEXT NOT NULL,
1504 snapshot_json TEXT,
1505 file_state_json TEXT,
1506 created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
1507 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1508 UNIQUE(session_id, message_index, checkpoint_number)
1509 );
1510
1511 CREATE INDEX IF NOT EXISTS idx_message_checkpoints_session ON message_checkpoints(session_id);
1512 CREATE INDEX IF NOT EXISTS idx_sessions_title ON sessions(title);
1513
1514 -- Messages table for full-text search
1515 CREATE TABLE IF NOT EXISTS messages (
1516 id INTEGER PRIMARY KEY AUTOINCREMENT,
1517 session_id TEXT NOT NULL,
1518 message_index INTEGER NOT NULL,
1519 role TEXT NOT NULL,
1520 content TEXT NOT NULL,
1521 created_at INTEGER,
1522 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1523 UNIQUE(session_id, message_index)
1524 );
1525
1526 CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
1527 CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role);
1528
1529 -- Checkpoints for version tracking
1530 CREATE TABLE IF NOT EXISTS checkpoints (
1531 id INTEGER PRIMARY KEY AUTOINCREMENT,
1532 session_id TEXT NOT NULL,
1533 checkpoint_number INTEGER NOT NULL,
1534 message TEXT,
1535 message_count INTEGER NOT NULL,
1536 content_hash TEXT NOT NULL,
1537 snapshot TEXT,
1538 created_at TEXT NOT NULL DEFAULT (datetime('now')),
1539 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE,
1540 UNIQUE(session_id, checkpoint_number)
1541 );
1542
1543 CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON checkpoints(session_id);
1544
1545 -- Share links for importing shared conversations
1546 CREATE TABLE IF NOT EXISTS share_links (
1547 id TEXT PRIMARY KEY,
1548 session_id TEXT,
1549 provider TEXT NOT NULL,
1550 url TEXT NOT NULL UNIQUE,
1551 share_id TEXT NOT NULL,
1552 title TEXT,
1553 imported INTEGER DEFAULT 0,
1554 imported_at INTEGER,
1555 created_at INTEGER NOT NULL,
1556 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL
1557 );
1558
1559 CREATE INDEX IF NOT EXISTS idx_share_links_provider ON share_links(provider);
1560 CREATE INDEX IF NOT EXISTS idx_share_links_imported ON share_links(imported);
1561
1562 -- Harvest metadata
1563 CREATE TABLE IF NOT EXISTS harvest_metadata (
1564 key TEXT PRIMARY KEY,
1565 value TEXT NOT NULL
1566 );
1567
1568 INSERT OR REPLACE INTO harvest_metadata (key, value)
1569 VALUES ('version', '2.1'),
1570 ('created_at', datetime('now'));
1571
1572 -- Full-text search for messages (standalone FTS table)
1573 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
1574 content_raw
1575 );
1576
1577 -- Triggers to keep FTS index in sync with messages_v2
1578 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
1579 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1580 END;
1581
1582 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
1583 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1584 END;
1585
1586 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
1587 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
1588 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
1589 END;
1590 "#,
1591 )?;
1592
1593 Ok(())
1594}
1595
1596fn insert_or_update_session(
1597 conn: &Connection,
1598 session: &ChatSession,
1599 provider: &str,
1600 workspace_id: Option<&str>,
1601 workspace_name: Option<&str>,
1602) -> Result<bool> {
1603 let session_id = session
1604 .session_id
1605 .clone()
1606 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
1607
1608 let now = Utc::now().timestamp_millis();
1609 let session_json = serde_json::to_string(session)?;
1610
1611 let existing: Option<i64> = conn
1613 .query_row(
1614 "SELECT updated_at FROM sessions WHERE id = ?",
1615 [&session_id],
1616 |row| row.get(0),
1617 )
1618 .ok();
1619
1620 let updated = existing.is_some();
1621
1622 conn.execute(
1623 r#"
1624 INSERT OR REPLACE INTO sessions
1625 (id, provider, provider_type, workspace_id, workspace_name, title,
1626 message_count, created_at, updated_at, harvested_at, session_json)
1627 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1628 "#,
1629 params![
1630 session_id,
1631 provider,
1632 provider,
1633 workspace_id,
1634 workspace_name,
1635 session.title(),
1636 session.request_count() as i64,
1637 session.creation_date,
1638 session.last_message_date,
1639 now,
1640 session_json,
1641 ],
1642 )?;
1643
1644 populate_enhanced_messages(conn, &session_id, session)?;
1646
1647 Ok(updated)
1648}
1649
1650fn populate_enhanced_messages(
1652 conn: &Connection,
1653 session_id: &str,
1654 session: &ChatSession,
1655) -> Result<()> {
1656 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
1658 conn.execute(
1659 "DELETE FROM tool_invocations WHERE session_id = ?",
1660 [session_id],
1661 )?;
1662 conn.execute(
1663 "DELETE FROM file_changes WHERE session_id = ?",
1664 [session_id],
1665 )?;
1666
1667 for (idx, request) in session.requests.iter().enumerate() {
1668 let timestamp = request.timestamp;
1669 let request_id = request.request_id.as_deref();
1670 let response_id = request.response_id.as_deref();
1671 let model_id = request.model_id.as_deref();
1672 let is_canceled = request.is_canceled.unwrap_or(false);
1673
1674 if let Some(ref message) = request.message {
1676 let content = message.text.clone().unwrap_or_default();
1677 if !content.is_empty() {
1678 let metadata = serde_json::json!({
1679 "variable_data": request.variable_data,
1680 });
1681
1682 conn.execute(
1683 r#"
1684 INSERT OR REPLACE INTO messages_v2
1685 (session_id, message_index, request_id, response_id, role,
1686 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1687 VALUES (?, ?, ?, ?, 'user', ?, ?, ?, ?, 0, ?)
1688 "#,
1689 params![
1690 session_id,
1691 (idx * 2) as i64,
1692 request_id,
1693 response_id,
1694 &content,
1695 &content, model_id,
1697 timestamp,
1698 serde_json::to_string(&metadata).ok(),
1699 ],
1700 )?;
1701 }
1702 }
1703
1704 if let Some(ref response) = request.response {
1706 let (content, tool_invocations) = extract_response_content_and_tools(response);
1707
1708 if !content.is_empty() || !tool_invocations.is_empty() {
1709 let metadata = serde_json::json!({
1710 "content_references": request.content_references,
1711 "code_citations": request.code_citations,
1712 "response_markdown_info": request.response_markdown_info,
1713 });
1714
1715 conn.execute(
1716 r#"
1717 INSERT OR REPLACE INTO messages_v2
1718 (session_id, message_index, request_id, response_id, role,
1719 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
1720 VALUES (?, ?, ?, ?, 'assistant', ?, ?, ?, ?, ?, ?)
1721 "#,
1722 params![
1723 session_id,
1724 (idx * 2 + 1) as i64,
1725 request_id,
1726 response_id,
1727 &content,
1728 &content,
1729 model_id,
1730 timestamp,
1731 is_canceled as i64,
1732 serde_json::to_string(&metadata).ok(),
1733 ],
1734 )?;
1735
1736 let message_id: i64 = conn.last_insert_rowid();
1738
1739 for (inv_idx, invocation) in tool_invocations.iter().enumerate() {
1741 insert_tool_invocation(
1742 conn, message_id, session_id, inv_idx, invocation, timestamp,
1743 )?;
1744 }
1745 }
1746 }
1747 }
1748
1749 Ok(())
1750}
1751
1752fn extract_response_content_and_tools(
1754 response: &serde_json::Value,
1755) -> (String, Vec<serde_json::Value>) {
1756 let mut text_parts = Vec::new();
1757 let mut tool_invocations = Vec::new();
1758
1759 if let Some(items) = response.as_array() {
1760 for item in items {
1761 let kind = item.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1762
1763 match kind {
1764 "toolInvocationSerialized" => {
1765 tool_invocations.push(item.clone());
1766 }
1767 "thinking" => {
1768 continue;
1770 }
1771 _ => {
1772 if let Some(value) = item.get("value").and_then(|v| v.as_str()) {
1773 let trimmed = value.trim();
1775 if !trimmed.is_empty() && !is_empty_code_block(trimmed) {
1776 text_parts.push(value.to_string());
1777 }
1778 }
1779 }
1780 }
1781 }
1782 }
1783
1784 (text_parts.join("\n\n"), tool_invocations)
1785}
1786
1787fn insert_tool_invocation(
1789 conn: &Connection,
1790 message_id: i64,
1791 session_id: &str,
1792 inv_idx: usize,
1793 invocation: &serde_json::Value,
1794 timestamp: Option<i64>,
1795) -> Result<()> {
1796 let tool_name = invocation
1797 .get("toolId")
1798 .and_then(|t| t.as_str())
1799 .unwrap_or("unknown");
1800 let tool_call_id = invocation.get("toolCallId").and_then(|t| t.as_str());
1801 let is_complete = invocation
1802 .get("isComplete")
1803 .and_then(|c| c.as_bool())
1804 .unwrap_or(false);
1805 let is_confirmed = invocation.get("isConfirmed");
1806 let tool_data = invocation.get("toolSpecificData");
1807
1808 let input_json = tool_data.map(|d| serde_json::to_string(d).unwrap_or_default());
1809 let status = if is_complete { "complete" } else { "pending" };
1810 let confirmed = match is_confirmed {
1811 Some(v) => v
1812 .get("type")
1813 .and_then(|t| t.as_i64())
1814 .map(|t| t > 0)
1815 .unwrap_or(false),
1816 None => false,
1817 };
1818
1819 conn.execute(
1820 r#"
1821 INSERT INTO tool_invocations
1822 (message_id, session_id, tool_name, tool_call_id, invocation_index,
1823 input_json, status, is_confirmed, timestamp)
1824 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1825 "#,
1826 params![
1827 message_id,
1828 session_id,
1829 tool_name,
1830 tool_call_id,
1831 inv_idx as i64,
1832 input_json,
1833 status,
1834 confirmed as i64,
1835 timestamp,
1836 ],
1837 )?;
1838
1839 let tool_invocation_id = conn.last_insert_rowid();
1840
1841 if let Some(data) = tool_data {
1843 insert_file_changes(conn, tool_invocation_id, session_id, data, timestamp)?;
1844 }
1845
1846 Ok(())
1847}
1848
1849fn insert_file_changes(
1851 conn: &Connection,
1852 tool_invocation_id: i64,
1853 session_id: &str,
1854 tool_data: &serde_json::Value,
1855 timestamp: Option<i64>,
1856) -> Result<()> {
1857 let kind = tool_data.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1858
1859 match kind {
1860 "terminal" => {
1861 if let Some(cmd_line) = tool_data.get("commandLine") {
1863 let original = cmd_line.get("original").and_then(|o| o.as_str());
1864 let edited = cmd_line.get("toolEdited").and_then(|e| e.as_str());
1865 let output = tool_data
1866 .get("terminalCommandOutput")
1867 .map(|o| serde_json::to_string(o).unwrap_or_default());
1868
1869 conn.execute(
1870 r#"
1871 INSERT INTO file_changes
1872 (tool_invocation_id, session_id, file_path, change_type,
1873 old_content, new_content, diff_unified, timestamp)
1874 VALUES (?, ?, '[terminal]', 'command', ?, ?, ?, ?)
1875 "#,
1876 params![
1877 tool_invocation_id,
1878 session_id,
1879 original,
1880 edited.or(original),
1881 output,
1882 timestamp,
1883 ],
1884 )?;
1885 }
1886 }
1887 "replaceFile" | "editFile" => {
1888 let file_path = tool_data
1890 .get("uri")
1891 .or_else(|| tool_data.get("filePath"))
1892 .and_then(|p| p.as_str())
1893 .unwrap_or("[unknown]");
1894 let old_string = tool_data.get("oldString").and_then(|s| s.as_str());
1895 let new_string = tool_data.get("newString").and_then(|s| s.as_str());
1896
1897 let diff = if let (Some(old), Some(new)) = (old_string, new_string) {
1899 Some(generate_unified_diff(old, new, file_path))
1900 } else {
1901 None
1902 };
1903
1904 conn.execute(
1905 r#"
1906 INSERT INTO file_changes
1907 (tool_invocation_id, session_id, file_path, change_type,
1908 old_content, new_content, diff_unified, timestamp)
1909 VALUES (?, ?, ?, 'edit', ?, ?, ?, ?)
1910 "#,
1911 params![
1912 tool_invocation_id,
1913 session_id,
1914 file_path,
1915 old_string,
1916 new_string,
1917 diff,
1918 timestamp,
1919 ],
1920 )?;
1921 }
1922 "createFile" => {
1923 let file_path = tool_data
1924 .get("uri")
1925 .or_else(|| tool_data.get("filePath"))
1926 .and_then(|p| p.as_str())
1927 .unwrap_or("[unknown]");
1928 let content = tool_data.get("content").and_then(|c| c.as_str());
1929
1930 conn.execute(
1931 r#"
1932 INSERT INTO file_changes
1933 (tool_invocation_id, session_id, file_path, change_type,
1934 new_content, timestamp)
1935 VALUES (?, ?, ?, 'create', ?, ?)
1936 "#,
1937 params![
1938 tool_invocation_id,
1939 session_id,
1940 file_path,
1941 content,
1942 timestamp,
1943 ],
1944 )?;
1945 }
1946 "readFile" => {
1947 let file_path = tool_data
1948 .get("uri")
1949 .or_else(|| tool_data.get("filePath"))
1950 .and_then(|p| p.as_str())
1951 .unwrap_or("[unknown]");
1952
1953 conn.execute(
1954 r#"
1955 INSERT INTO file_changes
1956 (tool_invocation_id, session_id, file_path, change_type, timestamp)
1957 VALUES (?, ?, ?, 'read', ?)
1958 "#,
1959 params![tool_invocation_id, session_id, file_path, timestamp,],
1960 )?;
1961 }
1962 _ => {
1963 if !kind.is_empty() {
1965 let data_json = serde_json::to_string(tool_data).ok();
1966 conn.execute(
1967 r#"
1968 INSERT INTO file_changes
1969 (tool_invocation_id, session_id, file_path, change_type,
1970 diff_unified, timestamp)
1971 VALUES (?, ?, ?, ?, ?, ?)
1972 "#,
1973 params![
1974 tool_invocation_id,
1975 session_id,
1976 format!("[{}]", kind),
1977 kind,
1978 data_json,
1979 timestamp,
1980 ],
1981 )?;
1982 }
1983 }
1984 }
1985
1986 Ok(())
1987}
1988
1989fn generate_unified_diff(old: &str, new: &str, file_path: &str) -> String {
1991 let old_lines: Vec<&str> = old.lines().collect();
1992 let new_lines: Vec<&str> = new.lines().collect();
1993
1994 let mut diff = format!("--- a/{}\n+++ b/{}\n", file_path, file_path);
1995
1996 let max_lines = old_lines.len().max(new_lines.len());
1998 let mut in_hunk = false;
1999 let mut hunk_start = 0;
2000 let mut hunk_lines = Vec::new();
2001
2002 for i in 0..max_lines {
2003 let old_line = old_lines.get(i).copied();
2004 let new_line = new_lines.get(i).copied();
2005
2006 match (old_line, new_line) {
2007 (Some(o), Some(n)) if o == n => {
2008 if in_hunk {
2009 hunk_lines.push(format!(" {}", o));
2010 }
2011 }
2012 (Some(o), Some(n)) => {
2013 if !in_hunk {
2014 in_hunk = true;
2015 hunk_start = i + 1;
2016 }
2017 hunk_lines.push(format!("-{}", o));
2018 hunk_lines.push(format!("+{}", n));
2019 }
2020 (Some(o), None) => {
2021 if !in_hunk {
2022 in_hunk = true;
2023 hunk_start = i + 1;
2024 }
2025 hunk_lines.push(format!("-{}", o));
2026 }
2027 (None, Some(n)) => {
2028 if !in_hunk {
2029 in_hunk = true;
2030 hunk_start = i + 1;
2031 }
2032 hunk_lines.push(format!("+{}", n));
2033 }
2034 (None, None) => break,
2035 }
2036 }
2037
2038 if !hunk_lines.is_empty() {
2039 diff.push_str(&format!(
2040 "@@ -{},{} +{},{} @@\n",
2041 hunk_start,
2042 old_lines.len(),
2043 hunk_start,
2044 new_lines.len()
2045 ));
2046 for line in hunk_lines {
2047 diff.push_str(&line);
2048 diff.push('\n');
2049 }
2050 }
2051
2052 diff
2053}
2054
2055fn harvest_web_providers(
2057 conn: &Connection,
2058 stats: &mut HarvestStats,
2059 include_providers: &[String],
2060 exclude_providers: &[String],
2061) -> Result<()> {
2062 use crate::browser::extract_provider_cookies;
2063
2064 println!("\n{} Harvesting from web providers...", "[*]".blue());
2065
2066 let web_provider_configs: Vec<(&str, &str, &str)> = vec![
2068 ("ChatGPT", "chatgpt", "__Secure-next-auth.session-token"),
2069 ("Claude", "claude", "sessionKey"),
2070 ];
2071
2072 let mut web_sessions_harvested = 0;
2073
2074 for (display_name, provider_key, _cookie_name) in &web_provider_configs {
2075 if !include_providers.is_empty()
2077 && !include_providers
2078 .iter()
2079 .any(|p| p.eq_ignore_ascii_case(provider_key))
2080 {
2081 continue;
2082 }
2083 if exclude_providers
2084 .iter()
2085 .any(|p| p.eq_ignore_ascii_case(provider_key))
2086 {
2087 continue;
2088 }
2089
2090 print!(" {} Checking {} ... ", "[-]".yellow(), display_name);
2091
2092 if let Some(creds) = extract_provider_cookies(provider_key) {
2094 if let Some(session_token) = &creds.session_token {
2095 println!("{}", "authenticated".green());
2096
2097 let result = match *provider_key {
2099 "chatgpt" => harvest_chatgpt_sessions(conn, session_token, stats),
2100 "claude" => harvest_claude_sessions(conn, session_token, stats),
2101 _ => Ok(0),
2102 };
2103
2104 match result {
2105 Ok(count) => {
2106 if count > 0 {
2107 println!(
2108 " {} Harvested {} sessions from {}",
2109 "[+]".green(),
2110 count.to_string().cyan(),
2111 display_name
2112 );
2113 web_sessions_harvested += count;
2114 }
2115 }
2116 Err(e) => {
2117 println!(
2118 " {} Failed to harvest {}: {:?}",
2119 "[!]".red(),
2120 display_name,
2121 e
2122 );
2123 }
2124 }
2125 } else {
2126 println!("{}", "no session token".yellow());
2127 }
2128 } else {
2129 println!("{}", "not authenticated".yellow());
2130 }
2131 }
2132
2133 if web_sessions_harvested > 0 {
2134 println!(
2135 " {} Total web sessions harvested: {}",
2136 "[+]".green(),
2137 web_sessions_harvested.to_string().cyan()
2138 );
2139 }
2140
2141 Ok(())
2142}
2143
2144fn harvest_chatgpt_sessions(
2146 conn: &Connection,
2147 session_token: &str,
2148 stats: &mut HarvestStats,
2149) -> Result<usize> {
2150 use crate::providers::cloud::chatgpt::ChatGPTProvider;
2151 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2152
2153 let provider = ChatGPTProvider::with_session_token(session_token.to_string());
2154
2155 let options = FetchOptions {
2157 limit: Some(100),
2158 include_archived: false,
2159 after: None,
2160 before: None,
2161 session_token: Some(session_token.to_string()),
2162 };
2163
2164 let conversations = provider
2165 .list_conversations(&options)
2166 .context("Failed to list ChatGPT conversations")?;
2167
2168 let mut harvested = 0;
2169
2170 for conv_summary in conversations {
2171 let exists: bool = conn
2173 .query_row(
2174 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'chatgpt')",
2175 params![&conv_summary.id],
2176 |row| row.get(0),
2177 )
2178 .unwrap_or(false);
2179
2180 if exists {
2181 continue;
2183 }
2184
2185 match provider.fetch_conversation(&conv_summary.id) {
2187 Ok(conv) => {
2188 if let Err(e) =
2190 insert_cloud_conversation_to_harvest_db(conn, &conv, "chatgpt", None)
2191 {
2192 eprintln!("Failed to insert ChatGPT session: {}", e);
2193 continue;
2194 }
2195 harvested += 1;
2196 stats.sessions_added += 1;
2197 }
2198 Err(e) => {
2199 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2200 }
2201 }
2202
2203 std::thread::sleep(std::time::Duration::from_millis(100));
2205 }
2206
2207 Ok(harvested)
2208}
2209
2210fn harvest_claude_sessions(
2212 conn: &Connection,
2213 session_token: &str,
2214 stats: &mut HarvestStats,
2215) -> Result<usize> {
2216 use crate::providers::cloud::anthropic::AnthropicProvider;
2217 use crate::providers::cloud::common::{CloudProvider, FetchOptions};
2218
2219 let provider = AnthropicProvider::with_session_token(session_token.to_string());
2220
2221 let options = FetchOptions {
2223 limit: Some(100),
2224 include_archived: false,
2225 after: None,
2226 before: None,
2227 session_token: Some(session_token.to_string()),
2228 };
2229
2230 let conversations = provider
2231 .list_conversations(&options)
2232 .context("Failed to list Claude conversations")?;
2233
2234 let mut harvested = 0;
2235
2236 for conv_summary in conversations {
2237 let exists: bool = conn
2239 .query_row(
2240 "SELECT EXISTS(SELECT 1 FROM sessions WHERE id = ? AND provider = 'claude')",
2241 params![&conv_summary.id],
2242 |row| row.get(0),
2243 )
2244 .unwrap_or(false);
2245
2246 if exists {
2247 continue;
2248 }
2249
2250 match provider.fetch_conversation(&conv_summary.id) {
2252 Ok(conv) => {
2253 if let Err(e) = insert_cloud_conversation_to_harvest_db(conn, &conv, "claude", None)
2254 {
2255 eprintln!("Failed to insert Claude session: {}", e);
2256 continue;
2257 }
2258 harvested += 1;
2259 stats.sessions_added += 1;
2260 }
2261 Err(e) => {
2262 eprintln!("Failed to fetch conversation {}: {}", conv_summary.id, e);
2263 }
2264 }
2265
2266 std::thread::sleep(std::time::Duration::from_millis(100));
2267 }
2268
2269 Ok(harvested)
2270}
2271
2272fn insert_cloud_conversation_to_harvest_db(
2274 conn: &Connection,
2275 conv: &crate::providers::cloud::common::CloudConversation,
2276 provider: &str,
2277 workspace_name: Option<&str>,
2278) -> Result<()> {
2279 let now = Utc::now().timestamp_millis();
2280 let created_at = conv.created_at.timestamp_millis();
2281 let updated_at = conv
2282 .updated_at
2283 .map(|dt| dt.timestamp_millis())
2284 .unwrap_or(created_at);
2285
2286 let session_json = serde_json::json!({
2288 "id": conv.id,
2289 "title": conv.title,
2290 "model": conv.model,
2291 "created_at": conv.created_at.to_rfc3339(),
2292 "updated_at": conv.updated_at.map(|dt| dt.to_rfc3339()),
2293 "messages": conv.messages.iter().map(|m| {
2294 serde_json::json!({
2295 "id": m.id,
2296 "role": m.role,
2297 "content": m.content,
2298 "timestamp": m.timestamp.map(|dt| dt.to_rfc3339()),
2299 "model": m.model,
2300 })
2301 }).collect::<Vec<_>>(),
2302 });
2303
2304 conn.execute(
2305 r#"
2306 INSERT OR REPLACE INTO sessions
2307 (id, provider, provider_type, workspace_id, workspace_name, title,
2308 message_count, created_at, updated_at, harvested_at, session_json)
2309 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
2310 "#,
2311 params![
2312 conv.id,
2313 provider,
2314 provider,
2315 workspace_name,
2316 conv.title.clone().unwrap_or_else(|| "Untitled".to_string()),
2317 conv.messages.len() as i64,
2318 created_at,
2319 updated_at,
2320 now,
2321 session_json.to_string(),
2322 ],
2323 )?;
2324
2325 populate_cloud_messages(conn, &conv.id, conv)?;
2327
2328 Ok(())
2329}
2330
2331fn populate_cloud_messages(
2333 conn: &Connection,
2334 session_id: &str,
2335 conv: &crate::providers::cloud::common::CloudConversation,
2336) -> Result<()> {
2337 conn.execute("DELETE FROM messages_v2 WHERE session_id = ?", [session_id])?;
2339
2340 for (idx, message) in conv.messages.iter().enumerate() {
2341 let timestamp = message.timestamp.map(|dt| dt.timestamp_millis());
2342 let role = match message.role.as_str() {
2343 "user" | "human" => "user",
2344 "assistant" => "assistant",
2345 "system" => "system",
2346 other => other,
2347 };
2348
2349 conn.execute(
2350 r#"
2351 INSERT INTO messages_v2
2352 (session_id, message_index, request_id, response_id, role,
2353 content_raw, content_markdown, model_id, timestamp, is_canceled, metadata_json)
2354 VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, 0, NULL)
2355 "#,
2356 params![
2357 session_id,
2358 idx as i64,
2359 message.id,
2360 role,
2361 &message.content,
2362 &message.content,
2363 message.model.as_deref(),
2364 timestamp,
2365 ],
2366 )?;
2367 }
2368
2369 Ok(())
2370}
2371
2372fn update_harvest_metadata(conn: &Connection) -> Result<()> {
2373 conn.execute(
2374 "INSERT OR REPLACE INTO harvest_metadata (key, value) VALUES ('last_harvest', datetime('now'))",
2375 [],
2376 )?;
2377 Ok(())
2378}
2379
2380fn init_git_tracking(db_path: &Path) -> Result<()> {
2381 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2382 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2383
2384 println!("\n{} Initializing Git Tracking", "[G]".green().bold());
2385
2386 if !db_dir.join(".git").exists() {
2388 let output = Command::new("git")
2389 .current_dir(db_dir)
2390 .args(["init"])
2391 .output()
2392 .context("Failed to run git init")?;
2393
2394 if !output.status.success() {
2395 anyhow::bail!(
2396 "Git init failed: {}",
2397 String::from_utf8_lossy(&output.stderr)
2398 );
2399 }
2400 println!("{} Initialized git repository", "[+]".green());
2401 } else {
2402 println!("{} Git repository already exists", "[i]".blue());
2403 }
2404
2405 let gitignore_path = db_dir.join(".gitignore");
2407 if !gitignore_path.exists() {
2408 fs::write(
2409 &gitignore_path,
2410 "*.db-journal\n*.db-wal\n*.db-shm\n*.backup\n",
2411 )?;
2412 println!("{} Created .gitignore", "[+]".green());
2413 }
2414
2415 let output = Command::new("git")
2417 .current_dir(db_dir)
2418 .args(["add", db_name])
2419 .output()?;
2420
2421 if output.status.success() {
2422 println!("{} Added {} to git", "[+]".green(), db_name);
2423 }
2424
2425 let output = Command::new("git")
2427 .current_dir(db_dir)
2428 .args(["rev-parse", "HEAD"])
2429 .output()?;
2430
2431 if !output.status.success() {
2432 let output = Command::new("git")
2433 .current_dir(db_dir)
2434 .args(["commit", "-m", "Initialize harvest database"])
2435 .output()?;
2436
2437 if output.status.success() {
2438 println!("{} Created initial commit", "[+]".green());
2439 }
2440 }
2441
2442 println!("\n{} Git tracking enabled!", "[+]".green().bold());
2443 println!(" Run 'csm harvest git commit -m \"message\"' to save changes");
2444 println!(" Run 'csm harvest git log' to view history");
2445
2446 Ok(())
2447}
2448
2449fn git_commit_harvest(db_path: &Path, message: &str) -> Result<()> {
2450 let db_dir = db_path.parent().unwrap_or(Path::new("."));
2451 let db_name = db_path.file_name().unwrap().to_str().unwrap();
2452
2453 let output = Command::new("git")
2455 .current_dir(db_dir)
2456 .args(["add", db_name])
2457 .output()
2458 .context("Failed to stage database")?;
2459
2460 if !output.status.success() {
2461 anyhow::bail!(
2462 "Git add failed: {}",
2463 String::from_utf8_lossy(&output.stderr)
2464 );
2465 }
2466
2467 let output = Command::new("git")
2469 .current_dir(db_dir)
2470 .args(["commit", "-m", message])
2471 .output()
2472 .context("Failed to commit")?;
2473
2474 if !output.status.success() {
2475 let stderr = String::from_utf8_lossy(&output.stderr);
2476 if stderr.contains("nothing to commit") {
2477 println!("{} Nothing to commit", "[i]".blue());
2478 return Ok(());
2479 }
2480 anyhow::bail!("Git commit failed: {}", stderr);
2481 }
2482
2483 let output = Command::new("git")
2485 .current_dir(db_dir)
2486 .args(["rev-parse", "--short", "HEAD"])
2487 .output()?;
2488
2489 let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
2490 println!("{} Committed: {} - {}", "[+]".green(), hash.cyan(), message);
2491
2492 Ok(())
2493}
2494
2495pub fn harvest_share(
2501 db_path: Option<&str>,
2502 url: &str,
2503 name: Option<&str>,
2504 _workspace: Option<&str>,
2505) -> Result<()> {
2506 let db_path = get_db_path(db_path)?;
2507
2508 let share_info = match ShareLinkParser::parse(url) {
2510 Some(info) => info,
2511 None => {
2512 println!("{} Unrecognized share link format", "[!]".yellow());
2513 println!(" Supported providers: ChatGPT, Claude, Gemini, Perplexity");
2514 println!(" Example URLs:");
2515 println!(" - https://chat.openai.com/share/abc123...");
2516 println!(" - https://claude.ai/share/xyz789...");
2517 anyhow::bail!("Could not parse share link URL");
2518 }
2519 };
2520
2521 println!("{}", "=".repeat(60).cyan());
2522 println!("{}", " Share Link Import ".bold().cyan());
2523 println!("{}", "=".repeat(60).cyan());
2524 println!();
2525
2526 println!(
2527 "{} Detected provider: {}",
2528 "[i]".blue(),
2529 share_info.provider.bold()
2530 );
2531 println!("{} Share ID: {}", "[i]".blue(), share_info.share_id);
2532
2533 let db = ChatDatabase::open(&db_path)?;
2535
2536 let conn = db.connection();
2538 let existing: Option<(String, i64)> = conn
2539 .query_row(
2540 "SELECT id, imported FROM share_links WHERE url = ?",
2541 [url],
2542 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2543 )
2544 .ok();
2545
2546 if let Some((id, imported)) = existing {
2547 println!();
2548 println!(
2549 "{} This share link has already been registered (ID: {})",
2550 "[!]".yellow(),
2551 id
2552 );
2553
2554 let status = if imported == 1 { "imported" } else { "pending" };
2555 println!("{} Current status: {}", "[i]".blue(), status);
2556
2557 if imported == 1 {
2558 println!(
2559 "{} Session already imported - no action needed",
2560 "[+]".green()
2561 );
2562 return Ok(());
2563 }
2564 } else {
2565 let link_id = uuid::Uuid::new_v4().to_string();
2567 let now = chrono::Utc::now().timestamp();
2568 conn.execute(
2569 "INSERT INTO share_links (id, url, provider, share_id, title, imported, created_at) VALUES (?, ?, ?, ?, ?, 0, ?)",
2570 params![link_id, url, share_info.provider, share_info.share_id, name, now],
2571 )?;
2572
2573 println!(
2574 "{} Registered share link (ID: {})",
2575 "[+]".green(),
2576 &link_id[..8]
2577 );
2578 }
2579
2580 println!();
2581 println!("{}", "-".repeat(60).dimmed());
2582 println!();
2583
2584 println!("{} Share link registered as pending", "[i]".blue());
2593 println!();
2594 println!("{}", "Note:".bold().yellow());
2595 println!(" Automatic content fetching from share links is not yet implemented.");
2596 println!(" For now, you can:");
2597 println!(" 1. Open the share link in your browser");
2598 println!(" 2. Export the conversation manually");
2599 println!(" 3. Import with: csm import <file>");
2600 println!();
2601 println!(" Or use 'csm harvest shares' to view pending links.");
2602
2603 Ok(())
2604}
2605
2606pub fn harvest_shares(
2608 db_path: Option<&str>,
2609 status_filter: Option<&str>,
2610 limit: usize,
2611) -> Result<()> {
2612 let db_path = get_db_path(db_path)?;
2613
2614 if !db_path.exists() {
2615 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2616 }
2617
2618 let db = ChatDatabase::open(&db_path)?;
2619 let conn = db.connection();
2620
2621 println!("{}", "=".repeat(70).cyan());
2622 println!("{}", " Share Links ".bold().cyan());
2623 println!("{}", "=".repeat(70).cyan());
2624 println!();
2625
2626 let query = match status_filter {
2627 Some("pending") => format!(
2628 "SELECT id, url, provider, share_id, title, imported, created_at
2629 FROM share_links WHERE imported = 0 ORDER BY created_at DESC LIMIT {}",
2630 limit
2631 ),
2632 Some("imported") => format!(
2633 "SELECT id, url, provider, share_id, title, imported, created_at
2634 FROM share_links WHERE imported = 1 ORDER BY created_at DESC LIMIT {}",
2635 limit
2636 ),
2637 Some(_) | None => format!(
2638 "SELECT id, url, provider, share_id, title, imported, created_at
2639 FROM share_links ORDER BY created_at DESC LIMIT {}",
2640 limit
2641 ),
2642 };
2643
2644 let mut stmt = conn.prepare(&query)?;
2645
2646 let rows: Vec<_> = stmt
2647 .query_map([], |row| {
2648 Ok((
2649 row.get::<_, String>(0)?,
2650 row.get::<_, String>(1)?,
2651 row.get::<_, String>(2)?,
2652 row.get::<_, Option<String>>(3)?,
2653 row.get::<_, Option<String>>(4)?,
2654 row.get::<_, i64>(5)?,
2655 row.get::<_, i64>(6)?,
2656 ))
2657 })?
2658 .collect::<Result<Vec<_>, _>>()?;
2659
2660 if rows.is_empty() {
2661 if let Some(status) = status_filter {
2662 println!("{} No share links with status '{}'", "[i]".blue(), status);
2663 } else {
2664 println!("{} No share links found", "[i]".blue());
2665 }
2666 println!(" Use 'csm harvest share <url>' to add a share link");
2667 return Ok(());
2668 }
2669
2670 let pending_count: i64 = conn
2672 .query_row(
2673 "SELECT COUNT(*) FROM share_links WHERE imported = 0",
2674 [],
2675 |row| row.get(0),
2676 )
2677 .unwrap_or(0);
2678
2679 let imported_count: i64 = conn
2680 .query_row(
2681 "SELECT COUNT(*) FROM share_links WHERE imported = 1",
2682 [],
2683 |row| row.get(0),
2684 )
2685 .unwrap_or(0);
2686
2687 println!(
2688 "{} {} pending, {} imported",
2689 "[i]".blue(),
2690 pending_count.to_string().yellow(),
2691 imported_count.to_string().green()
2692 );
2693 println!();
2694
2695 for (id, url, provider, _share_id, title, imported, created_at) in rows {
2696 let status = if imported == 1 { "imported" } else { "pending" };
2697 let status_colored = if imported == 1 {
2698 status.green()
2699 } else {
2700 status.yellow()
2701 };
2702
2703 println!(
2704 "{} [{}] {} - {}",
2705 format!("#{}", &id[..8]).dimmed(),
2706 status_colored,
2707 provider.bold(),
2708 title.as_deref().unwrap_or("(untitled)")
2709 );
2710
2711 let display_url = if url.len() > 60 {
2713 format!("{}...", &url[..57])
2714 } else {
2715 url.clone()
2716 };
2717 println!(" {} {}", "URL:".dimmed(), display_url.dimmed());
2718
2719 let timestamp = chrono::DateTime::from_timestamp(created_at, 0)
2721 .map(|dt| dt.format("%Y-%m-%d %H:%M").to_string())
2722 .unwrap_or_else(|| created_at.to_string());
2723 println!(" {} {}", "Added:".dimmed(), timestamp.dimmed());
2724 println!();
2725 }
2726
2727 Ok(())
2728}
2729
2730pub fn harvest_checkpoint(
2736 db_path: Option<&str>,
2737 session_id: &str,
2738 message: Option<&str>,
2739) -> Result<()> {
2740 let db_path = get_db_path(db_path)?;
2741
2742 if !db_path.exists() {
2743 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2744 }
2745
2746 let db = ChatDatabase::open(&db_path)?;
2747 let conn = db.connection();
2748
2749 let session: Option<(i64, String, i64)> = conn
2751 .query_row(
2752 "SELECT id, session_id, message_count FROM sessions WHERE session_id = ? OR id = ?",
2753 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2754 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2755 )
2756 .ok();
2757
2758 let (internal_id, actual_session_id, msg_count) = match session {
2759 Some(s) => s,
2760 None => {
2761 println!("{} Session not found: {}", "[!]".red(), session_id);
2762 println!(" Use 'csm harvest list' to see available sessions");
2763 anyhow::bail!("Session not found");
2764 }
2765 };
2766
2767 let checkpoint_num: i64 = conn.query_row(
2769 "SELECT COALESCE(MAX(checkpoint_number), 0) + 1 FROM checkpoints WHERE session_id = ?",
2770 [internal_id],
2771 |row| row.get(0),
2772 )?;
2773
2774 let content_hash = format!(
2776 "{:x}",
2777 md5_hash(&format!("{}:{}", actual_session_id, msg_count))
2778 );
2779
2780 let messages: Vec<String> = {
2782 let mut stmt = conn
2783 .prepare("SELECT content FROM messages WHERE session_id = ? ORDER BY message_index")?;
2784 let rows = stmt.query_map([internal_id], |row| row.get::<_, String>(0))?;
2785 rows.filter_map(|r| r.ok()).collect()
2786 };
2787
2788 let snapshot = serde_json::json!({
2789 "message_count": msg_count,
2790 "messages": messages,
2791 });
2792
2793 let default_message = format!("Checkpoint {}", checkpoint_num);
2794 let message_text = message.unwrap_or(&default_message);
2795
2796 conn.execute(
2798 "INSERT INTO checkpoints (session_id, checkpoint_number, message, message_count, content_hash, snapshot)
2799 VALUES (?, ?, ?, ?, ?, ?)",
2800 params![
2801 internal_id,
2802 checkpoint_num,
2803 message_text,
2804 msg_count,
2805 content_hash,
2806 snapshot.to_string()
2807 ],
2808 )?;
2809
2810 println!("{}", "=".repeat(60).cyan());
2811 println!("{}", " Checkpoint Created ".bold().cyan());
2812 println!("{}", "=".repeat(60).cyan());
2813 println!();
2814 println!("{} Session: {}", "[+]".green(), actual_session_id);
2815 println!(
2816 "{} Checkpoint #{}: {}",
2817 "[+]".green(),
2818 checkpoint_num,
2819 message_text
2820 );
2821 println!("{} Messages: {}", "[i]".blue(), msg_count);
2822 println!("{} Hash: {}", "[i]".blue(), &content_hash[..16]);
2823 println!();
2824 println!(
2825 " Use 'csm harvest checkpoints {}' to view history",
2826 session_id
2827 );
2828
2829 Ok(())
2830}
2831
2832pub fn harvest_checkpoints(db_path: Option<&str>, session_id: &str) -> Result<()> {
2834 let db_path = get_db_path(db_path)?;
2835
2836 if !db_path.exists() {
2837 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2838 }
2839
2840 let db = ChatDatabase::open(&db_path)?;
2841 let conn = db.connection();
2842
2843 let session: Option<(i64, String, String)> = conn
2845 .query_row(
2846 "SELECT id, session_id, name FROM sessions WHERE session_id = ? OR id = ?",
2847 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2848 |row| {
2849 Ok((
2850 row.get(0)?,
2851 row.get(1)?,
2852 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
2853 ))
2854 },
2855 )
2856 .ok();
2857
2858 let (internal_id, actual_session_id, name) = match session {
2859 Some(s) => s,
2860 None => {
2861 println!("{} Session not found: {}", "[!]".red(), session_id);
2862 anyhow::bail!("Session not found");
2863 }
2864 };
2865
2866 println!("{}", "=".repeat(70).cyan());
2867 println!("{}", " Session Checkpoints ".bold().cyan());
2868 println!("{}", "=".repeat(70).cyan());
2869 println!();
2870 println!(
2871 "{} Session: {} {}",
2872 "[i]".blue(),
2873 actual_session_id,
2874 if !name.is_empty() {
2875 format!("({})", name)
2876 } else {
2877 String::new()
2878 }
2879 );
2880 println!();
2881
2882 let mut stmt = conn.prepare(
2883 "SELECT checkpoint_number, message, message_count, content_hash, created_at
2884 FROM checkpoints WHERE session_id = ? ORDER BY checkpoint_number DESC",
2885 )?;
2886
2887 let checkpoints: Vec<_> = stmt
2888 .query_map([internal_id], |row| {
2889 Ok((
2890 row.get::<_, i64>(0)?,
2891 row.get::<_, String>(1)?,
2892 row.get::<_, i64>(2)?,
2893 row.get::<_, String>(3)?,
2894 row.get::<_, String>(4)?,
2895 ))
2896 })?
2897 .collect::<Result<Vec<_>, _>>()?;
2898
2899 if checkpoints.is_empty() {
2900 println!("{} No checkpoints found for this session", "[i]".blue());
2901 println!(
2902 " Use 'csm harvest checkpoint {} -m \"message\"' to create one",
2903 session_id
2904 );
2905 return Ok(());
2906 }
2907
2908 println!("{} {} checkpoints found:", "[i]".blue(), checkpoints.len());
2909 println!();
2910
2911 for (num, msg, msg_count, hash, created_at) in checkpoints {
2912 println!(" {} #{} - {}", "*".cyan(), num.to_string().bold(), msg);
2913 println!(
2914 " {} messages | {} | {}",
2915 msg_count,
2916 &hash[..12],
2917 created_at.dimmed()
2918 );
2919 }
2920
2921 println!();
2922 println!(
2923 " Use 'csm harvest restore {} <checkpoint>' to restore",
2924 session_id
2925 );
2926
2927 Ok(())
2928}
2929
2930pub fn harvest_restore_checkpoint(
2932 db_path: Option<&str>,
2933 session_id: &str,
2934 checkpoint_number: i64,
2935) -> Result<()> {
2936 let db_path = get_db_path(db_path)?;
2937
2938 if !db_path.exists() {
2939 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
2940 }
2941
2942 let db = ChatDatabase::open(&db_path)?;
2943 let conn = db.connection();
2944
2945 let session: Option<(i64, String)> = conn
2947 .query_row(
2948 "SELECT id, session_id FROM sessions WHERE session_id = ? OR id = ?",
2949 params![session_id, session_id.parse::<i64>().unwrap_or(-1)],
2950 |row| Ok((row.get(0)?, row.get(1)?)),
2951 )
2952 .ok();
2953
2954 let (internal_id, actual_session_id) = match session {
2955 Some(s) => s,
2956 None => {
2957 println!("{} Session not found: {}", "[!]".red(), session_id);
2958 anyhow::bail!("Session not found");
2959 }
2960 };
2961
2962 let checkpoint: Option<(String, i64)> = conn
2964 .query_row(
2965 "SELECT snapshot, message_count FROM checkpoints
2966 WHERE session_id = ? AND checkpoint_number = ?",
2967 params![internal_id, checkpoint_number],
2968 |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
2969 )
2970 .ok();
2971
2972 let (snapshot_json, original_msg_count) = match checkpoint {
2973 Some(c) => c,
2974 None => {
2975 println!(
2976 "{} Checkpoint #{} not found for session {}",
2977 "[!]".red(),
2978 checkpoint_number,
2979 session_id
2980 );
2981 println!(
2982 " Use 'csm harvest checkpoints {}' to see available checkpoints",
2983 session_id
2984 );
2985 anyhow::bail!("Checkpoint not found");
2986 }
2987 };
2988
2989 let snapshot: serde_json::Value =
2991 serde_json::from_str(&snapshot_json).context("Failed to parse checkpoint snapshot")?;
2992
2993 let messages = snapshot["messages"]
2994 .as_array()
2995 .context("Invalid snapshot format")?;
2996
2997 println!("{}", "=".repeat(60).cyan());
2998 println!("{}", " Restore Checkpoint ".bold().yellow());
2999 println!("{}", "=".repeat(60).cyan());
3000 println!();
3001 println!("{} Session: {}", "[i]".blue(), actual_session_id);
3002 println!(
3003 "{} Restoring to checkpoint #{}",
3004 "[!]".yellow(),
3005 checkpoint_number
3006 );
3007 println!(
3008 "{} Messages to restore: {}",
3009 "[i]".blue(),
3010 original_msg_count
3011 );
3012 println!();
3013
3014 let deleted = conn.execute("DELETE FROM messages WHERE session_id = ?", [internal_id])?;
3016
3017 println!("{} Removed {} current messages", "[-]".red(), deleted);
3018
3019 for (idx, msg) in messages.iter().enumerate() {
3021 if let Some(content) = msg.as_str() {
3022 conn.execute(
3023 "INSERT INTO messages (session_id, message_index, role, content) VALUES (?, ?, 'unknown', ?)",
3024 params![internal_id, idx as i64, content],
3025 )?;
3026 }
3027 }
3028
3029 conn.execute(
3031 "UPDATE sessions SET message_count = ?, updated_at = datetime('now') WHERE id = ?",
3032 params![original_msg_count, internal_id],
3033 )?;
3034
3035 println!(
3036 "{} Restored {} messages from checkpoint",
3037 "[+]".green(),
3038 messages.len()
3039 );
3040 println!();
3041 println!(
3042 "{} Session restored to checkpoint #{}",
3043 "[+]".green().bold(),
3044 checkpoint_number
3045 );
3046
3047 Ok(())
3048}
3049
3050pub fn harvest_rebuild_fts(db_path: Option<&str>) -> Result<()> {
3056 let db_path = get_db_path(db_path)?;
3057
3058 if !db_path.exists() {
3059 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3060 }
3061
3062 let conn = Connection::open(&db_path)?;
3063
3064 println!("{}", "=".repeat(70).cyan());
3065 println!("{} Rebuilding Full-Text Search Index", "[*]".bold());
3066 println!("{}", "=".repeat(70).cyan());
3067 println!();
3068
3069 println!("{} Dropping old FTS index...", "[*]".blue());
3071 conn.execute_batch(
3072 r#"
3073 DROP TRIGGER IF EXISTS messages_v2_ai;
3074 DROP TRIGGER IF EXISTS messages_v2_ad;
3075 DROP TRIGGER IF EXISTS messages_v2_au;
3076 DROP TABLE IF EXISTS messages_fts;
3077 "#,
3078 )?;
3079
3080 println!("{} Creating new FTS index...", "[*]".blue());
3082 conn.execute_batch(
3083 r#"
3084 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
3085 content_raw
3086 );
3087
3088 CREATE TRIGGER IF NOT EXISTS messages_v2_ai AFTER INSERT ON messages_v2 BEGIN
3089 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3090 END;
3091
3092 CREATE TRIGGER IF NOT EXISTS messages_v2_ad AFTER DELETE ON messages_v2 BEGIN
3093 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3094 END;
3095
3096 CREATE TRIGGER IF NOT EXISTS messages_v2_au AFTER UPDATE ON messages_v2 BEGIN
3097 INSERT INTO messages_fts(messages_fts, rowid, content_raw) VALUES ('delete', old.id, old.content_raw);
3098 INSERT INTO messages_fts(rowid, content_raw) VALUES (new.id, new.content_raw);
3099 END;
3100 "#,
3101 )?;
3102
3103 let total_messages: i64 =
3105 conn.query_row("SELECT COUNT(*) FROM messages_v2", [], |row| row.get(0))?;
3106
3107 println!("{} Indexing {} messages...", "[*]".blue(), total_messages);
3109
3110 conn.execute(
3111 "INSERT INTO messages_fts(rowid, content_raw) SELECT id, content_raw FROM messages_v2",
3112 [],
3113 )?;
3114
3115 let indexed: i64 = conn.query_row("SELECT COUNT(*) FROM messages_fts", [], |row| row.get(0))?;
3117
3118 println!();
3119 println!("{} FTS index rebuilt successfully!", "[+]".green().bold());
3120 println!(" {} messages indexed", indexed);
3121
3122 Ok(())
3123}
3124
3125pub fn harvest_search(
3127 db_path: Option<&str>,
3128 query: &str,
3129 provider_filter: Option<&str>,
3130 limit: usize,
3131) -> Result<()> {
3132 let db_path = get_db_path(db_path)?;
3133
3134 if !db_path.exists() {
3135 anyhow::bail!("Harvest database not found. Run 'csm harvest init' first.");
3136 }
3137
3138 let db = ChatDatabase::open(&db_path)?;
3139 let conn = db.connection();
3140
3141 println!("{}", "=".repeat(70).cyan());
3142 println!("{} Search: {}", "[?]".bold(), query.bold());
3143 println!("{}", "=".repeat(70).cyan());
3144 println!();
3145
3146 let results: Vec<(String, String, String, String)> = {
3148 let fts_exists: bool = conn
3150 .query_row(
3151 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='messages_fts'",
3152 [],
3153 |_| Ok(true),
3154 )
3155 .unwrap_or(false);
3156
3157 if fts_exists {
3158 let sql = format!(
3160 "SELECT s.id, s.provider, s.title, m.content_raw
3161 FROM messages_fts fts
3162 JOIN messages_v2 m ON m.id = fts.rowid
3163 JOIN sessions s ON m.session_id = s.id
3164 WHERE messages_fts MATCH ?
3165 {}
3166 LIMIT {}",
3167 if provider_filter.is_some() {
3168 "AND s.provider = ?"
3169 } else {
3170 ""
3171 },
3172 limit
3173 );
3174
3175 let mut stmt = conn.prepare(&sql)?;
3176
3177 if let Some(provider) = provider_filter {
3178 stmt.query_map(params![query, provider], |row| {
3179 Ok((
3180 row.get::<_, String>(0)?,
3181 row.get::<_, String>(1)?,
3182 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3183 row.get::<_, String>(3)?,
3184 ))
3185 })?
3186 .collect::<Result<Vec<_>, _>>()?
3187 } else {
3188 stmt.query_map([query], |row| {
3189 Ok((
3190 row.get::<_, String>(0)?,
3191 row.get::<_, String>(1)?,
3192 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3193 row.get::<_, String>(3)?,
3194 ))
3195 })?
3196 .collect::<Result<Vec<_>, _>>()?
3197 }
3198 } else {
3199 let search_pattern = format!("%{}%", query);
3201 let sql = format!(
3202 "SELECT s.id, s.provider, s.title, m.content_raw
3203 FROM messages_v2 m
3204 JOIN sessions s ON m.session_id = s.id
3205 WHERE m.content_raw LIKE ?
3206 {}
3207 LIMIT {}",
3208 if provider_filter.is_some() {
3209 "AND s.provider = ?"
3210 } else {
3211 ""
3212 },
3213 limit
3214 );
3215
3216 let mut stmt = conn.prepare(&sql)?;
3217
3218 if let Some(provider) = provider_filter {
3219 stmt.query_map(params![search_pattern, provider], |row| {
3220 Ok((
3221 row.get::<_, String>(0)?,
3222 row.get::<_, String>(1)?,
3223 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3224 row.get::<_, String>(3)?,
3225 ))
3226 })?
3227 .collect::<Result<Vec<_>, _>>()?
3228 } else {
3229 stmt.query_map([search_pattern], |row| {
3230 Ok((
3231 row.get::<_, String>(0)?,
3232 row.get::<_, String>(1)?,
3233 row.get::<_, Option<String>>(2)?.unwrap_or_default(),
3234 row.get::<_, String>(3)?,
3235 ))
3236 })?
3237 .collect::<Result<Vec<_>, _>>()?
3238 }
3239 }
3240 };
3241
3242 if results.is_empty() {
3243 println!("{} No results found for '{}'", "[i]".blue(), query);
3244 return Ok(());
3245 }
3246
3247 println!("{} Found {} result(s):", "[i]".blue(), results.len());
3248 println!();
3249
3250 for (session_id, provider, title, content) in results {
3251 let display_name = if title.is_empty() {
3253 session_id.clone()
3254 } else {
3255 format!("{} ({})", title, &session_id[..8.min(session_id.len())])
3256 };
3257
3258 println!(
3259 "{} {} [{}]",
3260 "*".cyan(),
3261 display_name.bold(),
3262 provider.dimmed()
3263 );
3264
3265 let snippet = create_search_snippet(&content, query, 100);
3267 println!(" {}", snippet.dimmed());
3268 println!();
3269 }
3270
3271 Ok(())
3272}
3273
3274fn create_search_snippet(content: &str, query: &str, max_len: usize) -> String {
3276 let content_lower = content.to_lowercase();
3277 let query_lower = query.to_lowercase();
3278
3279 if let Some(pos) = content_lower.find(&query_lower) {
3280 let start = pos.saturating_sub(max_len / 2);
3281 let end = (pos + query.len() + max_len / 2).min(content.len());
3282
3283 let mut snippet = String::new();
3284 if start > 0 {
3285 snippet.push_str("...");
3286 }
3287 snippet.push_str(&content[start..end]);
3288 if end < content.len() {
3289 snippet.push_str("...");
3290 }
3291
3292 snippet.replace('\n', " ").replace('\r', "")
3294 } else {
3295 if content.len() > max_len {
3297 format!("{}...", &content[..max_len])
3298 } else {
3299 content.to_string()
3300 }
3301 }
3302}
3303
3304fn md5_hash(data: &str) -> u128 {
3306 let mut hash: u128 = 0;
3308 for (i, byte) in data.bytes().enumerate() {
3309 hash = hash.wrapping_add((byte as u128).wrapping_mul((i as u128).wrapping_add(1)));
3310 hash = hash.rotate_left(7);
3311 }
3312 hash
3313}
3314