Skip to main content

chasm_cli/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{ChatSession, ChatSessionIndex, ChatSessionIndexEntry};
7use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use rusqlite::Connection;
11use std::path::{Path, PathBuf};
12use sysinfo::System;
13
14/// Regex to match any Unicode escape sequence (valid or not)
15static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
16
17/// Sanitize JSON content by replacing lone surrogates with replacement character.
18/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
19fn sanitize_json_unicode(content: &str) -> String {
20    // Process all \uXXXX sequences and fix lone surrogates
21    let mut result = String::with_capacity(content.len());
22    let mut last_end = 0;
23
24    // Collect all matches first to avoid borrowing issues
25    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
26
27    for (i, mat) in matches.iter().enumerate() {
28        let start = mat.start();
29        let end = mat.end();
30
31        // Add content before this match
32        result.push_str(&content[last_end..start]);
33
34        // Parse the hex value from the match itself (always ASCII \uXXXX)
35        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
36        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
37            // Check if it's a high surrogate (D800-DBFF)
38            if (0xD800..=0xDBFF).contains(&code_point) {
39                // Check if next match is immediately following and is a low surrogate
40                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
41                    // Must be immediately adjacent (no gap)
42                    if next_mat.start() == end {
43                        let next_hex = &next_mat.as_str()[2..];
44                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
45                            (0xDC00..=0xDFFF).contains(&next_cp)
46                        } else {
47                            false
48                        }
49                    } else {
50                        false
51                    }
52                } else {
53                    false
54                };
55
56                if is_valid_pair {
57                    // Valid surrogate pair, keep the high surrogate
58                    result.push_str(mat.as_str());
59                } else {
60                    // Lone high surrogate - replace with replacement char
61                    result.push_str("\\uFFFD");
62                }
63            }
64            // Check if it's a low surrogate (DC00-DFFF)
65            else if (0xDC00..=0xDFFF).contains(&code_point) {
66                // Check if previous match was immediately before and was a high surrogate
67                let is_valid_pair = if i > 0 {
68                    if let Some(prev_mat) = matches.get(i - 1) {
69                        // Must be immediately adjacent (no gap)
70                        if prev_mat.end() == start {
71                            let prev_hex = &prev_mat.as_str()[2..];
72                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
73                                (0xD800..=0xDBFF).contains(&prev_cp)
74                            } else {
75                                false
76                            }
77                        } else {
78                            false
79                        }
80                    } else {
81                        false
82                    }
83                } else {
84                    false
85                };
86
87                if is_valid_pair {
88                    // Part of valid surrogate pair, keep it
89                    result.push_str(mat.as_str());
90                } else {
91                    // Lone low surrogate - replace with replacement char
92                    result.push_str("\\uFFFD");
93                }
94            }
95            // Normal code point
96            else {
97                result.push_str(mat.as_str());
98            }
99        } else {
100            // Invalid hex - keep as is
101            result.push_str(mat.as_str());
102        }
103        last_end = end;
104    }
105
106    // Add remaining content
107    result.push_str(&content[last_end..]);
108    result
109}
110
111/// Try to parse JSON, sanitizing invalid Unicode if needed
112pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
113    match serde_json::from_str::<ChatSession>(content) {
114        Ok(session) => Ok(session),
115        Err(e) => {
116            // If parsing fails due to Unicode issue, try sanitizing
117            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
118                let sanitized = sanitize_json_unicode(content);
119                serde_json::from_str::<ChatSession>(&sanitized)
120            } else {
121                Err(e)
122            }
123        }
124    }
125}
126
127/// Get the path to the workspace storage database
128pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
129    let storage_path = get_workspace_storage_path()?;
130    Ok(storage_path.join(workspace_id).join("state.vscdb"))
131}
132
133/// Read the chat session index from VS Code storage
134pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
135    let conn = Connection::open(db_path)?;
136
137    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
138        "SELECT value FROM ItemTable WHERE key = ?",
139        ["chat.ChatSessionStore.index"],
140        |row| row.get(0),
141    );
142
143    match result {
144        Ok(json_str) => serde_json::from_str(&json_str)
145            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
146        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
147        Err(e) => Err(CsmError::SqliteError(e)),
148    }
149}
150
151/// Write the chat session index to VS Code storage
152pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
153    let conn = Connection::open(db_path)?;
154    let json_str = serde_json::to_string(index)?;
155
156    // Check if the key exists
157    let exists: bool = conn.query_row(
158        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
159        ["chat.ChatSessionStore.index"],
160        |row| row.get(0),
161    )?;
162
163    if exists {
164        conn.execute(
165            "UPDATE ItemTable SET value = ? WHERE key = ?",
166            [&json_str, "chat.ChatSessionStore.index"],
167        )?;
168    } else {
169        conn.execute(
170            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
171            ["chat.ChatSessionStore.index", &json_str],
172        )?;
173    }
174
175    Ok(())
176}
177
178/// Add a session to the VS Code index
179pub fn add_session_to_index(
180    db_path: &Path,
181    session_id: &str,
182    title: &str,
183    last_message_date_ms: i64,
184    is_imported: bool,
185    initial_location: &str,
186    is_empty: bool,
187) -> Result<()> {
188    let mut index = read_chat_session_index(db_path)?;
189
190    index.entries.insert(
191        session_id.to_string(),
192        ChatSessionIndexEntry {
193            session_id: session_id.to_string(),
194            title: title.to_string(),
195            last_message_date: last_message_date_ms,
196            is_imported,
197            initial_location: initial_location.to_string(),
198            is_empty,
199        },
200    );
201
202    write_chat_session_index(db_path, &index)
203}
204
205/// Remove a session from the VS Code index
206#[allow(dead_code)]
207pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
208    let mut index = read_chat_session_index(db_path)?;
209    let removed = index.entries.remove(session_id).is_some();
210    if removed {
211        write_chat_session_index(db_path, &index)?;
212    }
213    Ok(removed)
214}
215
216/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
217pub fn sync_session_index(
218    workspace_id: &str,
219    chat_sessions_dir: &Path,
220    force: bool,
221) -> Result<(usize, usize)> {
222    let db_path = get_workspace_storage_db(workspace_id)?;
223
224    if !db_path.exists() {
225        return Err(CsmError::WorkspaceNotFound(format!(
226            "Database not found: {}",
227            db_path.display()
228        )));
229    }
230
231    // Check if VS Code is running
232    if !force && is_vscode_running() {
233        return Err(CsmError::VSCodeRunning);
234    }
235
236    // Get current index
237    let mut index = read_chat_session_index(&db_path)?;
238
239    // Get session files on disk
240    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
241    if chat_sessions_dir.exists() {
242        for entry in std::fs::read_dir(chat_sessions_dir)? {
243            let entry = entry?;
244            let path = entry.path();
245            if path.extension().map(|e| e == "json").unwrap_or(false) {
246                if let Some(stem) = path.file_stem() {
247                    files_on_disk.insert(stem.to_string_lossy().to_string());
248                }
249            }
250        }
251    }
252
253    // Remove stale entries (in index but not on disk)
254    let stale_ids: Vec<String> = index
255        .entries
256        .keys()
257        .filter(|id| !files_on_disk.contains(*id))
258        .cloned()
259        .collect();
260
261    let removed = stale_ids.len();
262    for id in &stale_ids {
263        index.entries.remove(id);
264    }
265
266    // Add/update sessions from disk
267    let mut added = 0;
268    for entry in std::fs::read_dir(chat_sessions_dir)? {
269        let entry = entry?;
270        let path = entry.path();
271
272        if path.extension().map(|e| e == "json").unwrap_or(false) {
273            if let Ok(content) = std::fs::read_to_string(&path) {
274                if let Ok(session) = parse_session_json(&content) {
275                    let session_id = session.session_id.clone().unwrap_or_else(|| {
276                        path.file_stem()
277                            .map(|s| s.to_string_lossy().to_string())
278                            .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
279                    });
280
281                    let title = session.title();
282                    let is_empty = session.is_empty();
283                    let last_message_date = session.last_message_date;
284                    let initial_location = session.initial_location.clone();
285
286                    index.entries.insert(
287                        session_id.clone(),
288                        ChatSessionIndexEntry {
289                            session_id,
290                            title,
291                            last_message_date,
292                            is_imported: session.is_imported,
293                            initial_location,
294                            is_empty,
295                        },
296                    );
297                    added += 1;
298                }
299            }
300        }
301    }
302
303    // Write the synced index
304    write_chat_session_index(&db_path, &index)?;
305
306    Ok((added, removed))
307}
308
309/// Register all sessions from a directory into the VS Code index
310pub fn register_all_sessions_from_directory(
311    workspace_id: &str,
312    chat_sessions_dir: &Path,
313    force: bool,
314) -> Result<usize> {
315    let db_path = get_workspace_storage_db(workspace_id)?;
316
317    if !db_path.exists() {
318        return Err(CsmError::WorkspaceNotFound(format!(
319            "Database not found: {}",
320            db_path.display()
321        )));
322    }
323
324    // Check if VS Code is running
325    if !force && is_vscode_running() {
326        return Err(CsmError::VSCodeRunning);
327    }
328
329    // Use sync to ensure index matches disk
330    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
331
332    // Print individual session info
333    for entry in std::fs::read_dir(chat_sessions_dir)? {
334        let entry = entry?;
335        let path = entry.path();
336
337        if path.extension().map(|e| e == "json").unwrap_or(false) {
338            if let Ok(content) = std::fs::read_to_string(&path) {
339                if let Ok(session) = parse_session_json(&content) {
340                    let session_id = session.session_id.clone().unwrap_or_else(|| {
341                        path.file_stem()
342                            .map(|s| s.to_string_lossy().to_string())
343                            .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
344                    });
345
346                    let title = session.title();
347
348                    println!(
349                        "[OK] Registered: {} ({}...)",
350                        title,
351                        &session_id[..12.min(session_id.len())]
352                    );
353                }
354            }
355        }
356    }
357
358    if removed > 0 {
359        println!("[OK] Removed {} stale index entries", removed);
360    }
361
362    Ok(added)
363}
364
365/// Check if VS Code is currently running
366pub fn is_vscode_running() -> bool {
367    let mut sys = System::new();
368    sys.refresh_processes();
369
370    for process in sys.processes().values() {
371        let name = process.name().to_lowercase();
372        if name.contains("code") && !name.contains("codec") {
373            return true;
374        }
375    }
376
377    false
378}
379
380/// Backup workspace sessions to a timestamped directory
381pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
382    let chat_sessions_dir = workspace_dir.join("chatSessions");
383
384    if !chat_sessions_dir.exists() {
385        return Ok(None);
386    }
387
388    let timestamp = std::time::SystemTime::now()
389        .duration_since(std::time::UNIX_EPOCH)
390        .unwrap()
391        .as_secs();
392
393    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
394
395    // Copy directory recursively
396    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
397
398    Ok(Some(backup_dir))
399}
400
401/// Recursively copy a directory
402fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
403    std::fs::create_dir_all(dst)?;
404
405    for entry in std::fs::read_dir(src)? {
406        let entry = entry?;
407        let src_path = entry.path();
408        let dst_path = dst.join(entry.file_name());
409
410        if src_path.is_dir() {
411            copy_dir_all(&src_path, &dst_path)?;
412        } else {
413            std::fs::copy(&src_path, &dst_path)?;
414        }
415    }
416
417    Ok(())
418}
419
420// =============================================================================
421// Empty Window Sessions (ALL SESSIONS)
422// =============================================================================
423
424/// Read all empty window chat sessions (not tied to any workspace)
425/// These appear in VS Code's "ALL SESSIONS" panel
426pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
427    let sessions_path = get_empty_window_sessions_path()?;
428
429    if !sessions_path.exists() {
430        return Ok(Vec::new());
431    }
432
433    let mut sessions = Vec::new();
434
435    for entry in std::fs::read_dir(&sessions_path)? {
436        let entry = entry?;
437        let path = entry.path();
438
439        if path.extension().is_some_and(|e| e == "json") {
440            if let Ok(content) = std::fs::read_to_string(&path) {
441                if let Ok(session) = parse_session_json(&content) {
442                    sessions.push(session);
443                }
444            }
445        }
446    }
447
448    // Sort by last message date (most recent first)
449    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
450
451    Ok(sessions)
452}
453
454/// Get a specific empty window session by ID
455#[allow(dead_code)]
456pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
457    let sessions_path = get_empty_window_sessions_path()?;
458    let session_path = sessions_path.join(format!("{}.json", session_id));
459
460    if !session_path.exists() {
461        return Ok(None);
462    }
463
464    let content = std::fs::read_to_string(&session_path)?;
465    let session: ChatSession = serde_json::from_str(&content)
466        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
467
468    Ok(Some(session))
469}
470
471/// Write an empty window session
472#[allow(dead_code)]
473pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
474    let sessions_path = get_empty_window_sessions_path()?;
475
476    // Create directory if it doesn't exist
477    std::fs::create_dir_all(&sessions_path)?;
478
479    let session_id = session.session_id.as_deref().unwrap_or("unknown");
480    let session_path = sessions_path.join(format!("{}.json", session_id));
481    let content = serde_json::to_string_pretty(session)?;
482    std::fs::write(&session_path, content)?;
483
484    Ok(session_path)
485}
486
487/// Delete an empty window session
488#[allow(dead_code)]
489pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
490    let sessions_path = get_empty_window_sessions_path()?;
491    let session_path = sessions_path.join(format!("{}.json", session_id));
492
493    if session_path.exists() {
494        std::fs::remove_file(&session_path)?;
495        Ok(true)
496    } else {
497        Ok(false)
498    }
499}
500
501/// Count empty window sessions
502pub fn count_empty_window_sessions() -> Result<usize> {
503    let sessions_path = get_empty_window_sessions_path()?;
504
505    if !sessions_path.exists() {
506        return Ok(0);
507    }
508
509    let count = std::fs::read_dir(&sessions_path)?
510        .filter_map(|e| e.ok())
511        .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
512        .count();
513
514    Ok(count)
515}