Skip to main content

chasm_cli/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: Apache-2.0
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry};
7use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use rusqlite::Connection;
11use std::path::{Path, PathBuf};
12use sysinfo::System;
13
14/// Regex to match any Unicode escape sequence (valid or not)
15static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
16
17/// VS Code session format version - helps identify which parsing strategy to use
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum VsCodeSessionFormat {
20    /// Legacy JSON format (VS Code < 1.109.0)
21    /// Single JSON object with ChatSession structure
22    LegacyJson,
23    /// JSONL format (VS Code >= 1.109.0, January 2026+)
24    /// JSON Lines with event sourcing: kind 0 (initial), kind 1 (delta), kind 2 (requests)
25    JsonLines,
26}
27
28impl VsCodeSessionFormat {
29    /// Detect format from file path
30    pub fn from_path(path: &Path) -> Self {
31        match path.extension().and_then(|e| e.to_str()) {
32            Some("jsonl") => Self::JsonLines,
33            _ => Self::LegacyJson,
34        }
35    }
36
37    /// Get minimum VS Code version that uses this format
38    pub fn min_vscode_version(&self) -> &'static str {
39        match self {
40            Self::LegacyJson => "1.0.0",
41            Self::JsonLines => "1.109.0",
42        }
43    }
44
45    /// Get human-readable format description
46    pub fn description(&self) -> &'static str {
47        match self {
48            Self::LegacyJson => "Legacy JSON (single object)",
49            Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
50        }
51    }
52}
53
54impl std::fmt::Display for VsCodeSessionFormat {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        write!(f, "{}", self.description())
57    }
58}
59
60/// Sanitize JSON content by replacing lone surrogates with replacement character.
61/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
62fn sanitize_json_unicode(content: &str) -> String {
63    // Process all \uXXXX sequences and fix lone surrogates
64    let mut result = String::with_capacity(content.len());
65    let mut last_end = 0;
66
67    // Collect all matches first to avoid borrowing issues
68    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
69
70    for (i, mat) in matches.iter().enumerate() {
71        let start = mat.start();
72        let end = mat.end();
73
74        // Add content before this match
75        result.push_str(&content[last_end..start]);
76
77        // Parse the hex value from the match itself (always ASCII \uXXXX)
78        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
79        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
80            // Check if it's a high surrogate (D800-DBFF)
81            if (0xD800..=0xDBFF).contains(&code_point) {
82                // Check if next match is immediately following and is a low surrogate
83                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
84                    // Must be immediately adjacent (no gap)
85                    if next_mat.start() == end {
86                        let next_hex = &next_mat.as_str()[2..];
87                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
88                            (0xDC00..=0xDFFF).contains(&next_cp)
89                        } else {
90                            false
91                        }
92                    } else {
93                        false
94                    }
95                } else {
96                    false
97                };
98
99                if is_valid_pair {
100                    // Valid surrogate pair, keep the high surrogate
101                    result.push_str(mat.as_str());
102                } else {
103                    // Lone high surrogate - replace with replacement char
104                    result.push_str("\\uFFFD");
105                }
106            }
107            // Check if it's a low surrogate (DC00-DFFF)
108            else if (0xDC00..=0xDFFF).contains(&code_point) {
109                // Check if previous match was immediately before and was a high surrogate
110                let is_valid_pair = if i > 0 {
111                    if let Some(prev_mat) = matches.get(i - 1) {
112                        // Must be immediately adjacent (no gap)
113                        if prev_mat.end() == start {
114                            let prev_hex = &prev_mat.as_str()[2..];
115                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
116                                (0xD800..=0xDBFF).contains(&prev_cp)
117                            } else {
118                                false
119                            }
120                        } else {
121                            false
122                        }
123                    } else {
124                        false
125                    }
126                } else {
127                    false
128                };
129
130                if is_valid_pair {
131                    // Part of valid surrogate pair, keep it
132                    result.push_str(mat.as_str());
133                } else {
134                    // Lone low surrogate - replace with replacement char
135                    result.push_str("\\uFFFD");
136                }
137            }
138            // Normal code point
139            else {
140                result.push_str(mat.as_str());
141            }
142        } else {
143            // Invalid hex - keep as is
144            result.push_str(mat.as_str());
145        }
146        last_end = end;
147    }
148
149    // Add remaining content
150    result.push_str(&content[last_end..]);
151    result
152}
153
154/// Try to parse JSON, sanitizing invalid Unicode if needed
155pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
156    match serde_json::from_str::<ChatSession>(content) {
157        Ok(session) => Ok(session),
158        Err(e) => {
159            // If parsing fails due to Unicode issue, try sanitizing
160            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
161                let sanitized = sanitize_json_unicode(content);
162                serde_json::from_str::<ChatSession>(&sanitized)
163            } else {
164                Err(e)
165            }
166        }
167    }
168}
169
170/// JSONL entry kinds for VS Code 1.109.0+ session format
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172enum JsonlKind {
173    /// Initial session state (kind: 0)
174    Initial = 0,
175    /// Delta update to specific keys (kind: 1)  
176    Delta = 1,
177    /// Full requests array update (kind: 2)
178    RequestsUpdate = 2,
179}
180
181/// Parse a JSONL (JSON Lines) session file (VS Code 1.109.0+ format)
182/// Each line is a JSON object with 'kind' field indicating the type:
183/// - kind 0: Initial session metadata with 'v' containing ChatSession-like structure
184/// - kind 1: Delta update with 'k' (keys path) and 'v' (value)
185/// - kind 2: Full requests array update with 'k' and 'v'
186pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
187    let mut session = ChatSession {
188        version: 3,
189        session_id: None,
190        creation_date: 0,
191        last_message_date: 0,
192        is_imported: false,
193        initial_location: "panel".to_string(),
194        custom_title: None,
195        requester_username: None,
196        requester_avatar_icon_uri: None,
197        responder_username: None,
198        responder_avatar_icon_uri: None,
199        requests: Vec::new(),
200    };
201
202    for line in content.lines() {
203        let line = line.trim();
204        if line.is_empty() {
205            continue;
206        }
207
208        // Parse each line as a JSON object
209        let entry: serde_json::Value = match serde_json::from_str(line) {
210            Ok(v) => v,
211            Err(_) => {
212                // Try sanitizing Unicode
213                let sanitized = sanitize_json_unicode(line);
214                serde_json::from_str(&sanitized)?
215            }
216        };
217
218        let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
219
220        match kind {
221            0 => {
222                // Initial state - 'v' contains the session metadata
223                if let Some(v) = entry.get("v") {
224                    // Parse version
225                    if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
226                        session.version = version as u32;
227                    }
228                    // Parse session ID
229                    if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
230                        session.session_id = Some(sid.to_string());
231                    }
232                    // Parse creation date
233                    if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
234                        session.creation_date = cd;
235                    }
236                    // Parse initial location
237                    if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
238                        session.initial_location = loc.to_string();
239                    }
240                    // Parse responder username
241                    if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
242                        session.responder_username = Some(ru.to_string());
243                    }
244                    // Parse requests array if present
245                    if let Some(requests) = v.get("requests") {
246                        if let Ok(reqs) =
247                            serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
248                        {
249                            session.requests = reqs;
250                        }
251                    }
252                }
253            }
254            1 => {
255                // Delta update - 'k' is array of key path, 'v' is the value
256                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
257                    if let Some(keys_arr) = keys.as_array() {
258                        // Handle known keys
259                        if keys_arr.len() == 1 {
260                            if let Some(key) = keys_arr[0].as_str() {
261                                match key {
262                                    "customTitle" => {
263                                        if let Some(title) = value.as_str() {
264                                            session.custom_title = Some(title.to_string());
265                                        }
266                                    }
267                                    "lastMessageDate" => {
268                                        if let Some(date) = value.as_i64() {
269                                            session.last_message_date = date;
270                                        }
271                                    }
272                                    _ => {} // Ignore unknown keys
273                                }
274                            }
275                        }
276                    }
277                }
278            }
279            2 => {
280                // Full requests array update - 'k' contains ["requests"], 'v' is the array
281                if let Some(value) = entry.get("v") {
282                    if let Ok(reqs) = serde_json::from_value::<Vec<ChatRequest>>(value.clone()) {
283                        session.requests = reqs;
284                        // Update last message date from last request
285                        if let Some(last_req) = session.requests.last() {
286                            if let Some(ts) = last_req.timestamp {
287                                session.last_message_date = ts;
288                            }
289                        }
290                    }
291                }
292            }
293            _ => {} // Unknown kind, skip
294        }
295    }
296
297    Ok(session)
298}
299
300/// Check if a file extension indicates a session file (.json or .jsonl)
301pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
302    ext == "json" || ext == "jsonl"
303}
304
305/// Parse a session file, automatically detecting format (.json or .jsonl)
306pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
307    let content = std::fs::read_to_string(path).map_err(|e| {
308        serde_json::Error::io(std::io::Error::new(
309            std::io::ErrorKind::Other,
310            e.to_string(),
311        ))
312    })?;
313
314    match path.extension().and_then(|e| e.to_str()) {
315        Some("jsonl") => parse_session_jsonl(&content),
316        _ => parse_session_json(&content),
317    }
318}
319
320/// Get the path to the workspace storage database
321pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
322    let storage_path = get_workspace_storage_path()?;
323    Ok(storage_path.join(workspace_id).join("state.vscdb"))
324}
325
326/// Read the chat session index from VS Code storage
327pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
328    let conn = Connection::open(db_path)?;
329
330    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
331        "SELECT value FROM ItemTable WHERE key = ?",
332        ["chat.ChatSessionStore.index"],
333        |row| row.get(0),
334    );
335
336    match result {
337        Ok(json_str) => serde_json::from_str(&json_str)
338            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
339        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
340        Err(e) => Err(CsmError::SqliteError(e)),
341    }
342}
343
344/// Write the chat session index to VS Code storage
345pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
346    let conn = Connection::open(db_path)?;
347    let json_str = serde_json::to_string(index)?;
348
349    // Check if the key exists
350    let exists: bool = conn.query_row(
351        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
352        ["chat.ChatSessionStore.index"],
353        |row| row.get(0),
354    )?;
355
356    if exists {
357        conn.execute(
358            "UPDATE ItemTable SET value = ? WHERE key = ?",
359            [&json_str, "chat.ChatSessionStore.index"],
360        )?;
361    } else {
362        conn.execute(
363            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
364            ["chat.ChatSessionStore.index", &json_str],
365        )?;
366    }
367
368    Ok(())
369}
370
371/// Add a session to the VS Code index
372pub fn add_session_to_index(
373    db_path: &Path,
374    session_id: &str,
375    title: &str,
376    last_message_date_ms: i64,
377    is_imported: bool,
378    initial_location: &str,
379    is_empty: bool,
380) -> Result<()> {
381    let mut index = read_chat_session_index(db_path)?;
382
383    index.entries.insert(
384        session_id.to_string(),
385        ChatSessionIndexEntry {
386            session_id: session_id.to_string(),
387            title: title.to_string(),
388            last_message_date: last_message_date_ms,
389            is_imported,
390            initial_location: initial_location.to_string(),
391            is_empty,
392        },
393    );
394
395    write_chat_session_index(db_path, &index)
396}
397
398/// Remove a session from the VS Code index
399#[allow(dead_code)]
400pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
401    let mut index = read_chat_session_index(db_path)?;
402    let removed = index.entries.remove(session_id).is_some();
403    if removed {
404        write_chat_session_index(db_path, &index)?;
405    }
406    Ok(removed)
407}
408
409/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
410pub fn sync_session_index(
411    workspace_id: &str,
412    chat_sessions_dir: &Path,
413    force: bool,
414) -> Result<(usize, usize)> {
415    let db_path = get_workspace_storage_db(workspace_id)?;
416
417    if !db_path.exists() {
418        return Err(CsmError::WorkspaceNotFound(format!(
419            "Database not found: {}",
420            db_path.display()
421        )));
422    }
423
424    // Check if VS Code is running
425    if !force && is_vscode_running() {
426        return Err(CsmError::VSCodeRunning);
427    }
428
429    // Get current index
430    let mut index = read_chat_session_index(&db_path)?;
431
432    // Get session files on disk
433    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
434    if chat_sessions_dir.exists() {
435        for entry in std::fs::read_dir(chat_sessions_dir)? {
436            let entry = entry?;
437            let path = entry.path();
438            if path
439                .extension()
440                .map(|e| is_session_file_extension(e))
441                .unwrap_or(false)
442            {
443                if let Some(stem) = path.file_stem() {
444                    files_on_disk.insert(stem.to_string_lossy().to_string());
445                }
446            }
447        }
448    }
449
450    // Remove stale entries (in index but not on disk)
451    let stale_ids: Vec<String> = index
452        .entries
453        .keys()
454        .filter(|id| !files_on_disk.contains(*id))
455        .cloned()
456        .collect();
457
458    let removed = stale_ids.len();
459    for id in &stale_ids {
460        index.entries.remove(id);
461    }
462
463    // Add/update sessions from disk
464    let mut added = 0;
465    for entry in std::fs::read_dir(chat_sessions_dir)? {
466        let entry = entry?;
467        let path = entry.path();
468
469        if path
470            .extension()
471            .map(|e| is_session_file_extension(e))
472            .unwrap_or(false)
473        {
474            if let Ok(session) = parse_session_file(&path) {
475                let session_id = session.session_id.clone().unwrap_or_else(|| {
476                    path.file_stem()
477                        .map(|s| s.to_string_lossy().to_string())
478                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
479                });
480
481                let title = session.title();
482                let is_empty = session.is_empty();
483                let last_message_date = session.last_message_date;
484                let initial_location = session.initial_location.clone();
485
486                index.entries.insert(
487                    session_id.clone(),
488                    ChatSessionIndexEntry {
489                        session_id,
490                        title,
491                        last_message_date,
492                        is_imported: session.is_imported,
493                        initial_location,
494                        is_empty,
495                    },
496                );
497                added += 1;
498            }
499        }
500    }
501
502    // Write the synced index
503    write_chat_session_index(&db_path, &index)?;
504
505    Ok((added, removed))
506}
507
508/// Register all sessions from a directory into the VS Code index
509pub fn register_all_sessions_from_directory(
510    workspace_id: &str,
511    chat_sessions_dir: &Path,
512    force: bool,
513) -> Result<usize> {
514    let db_path = get_workspace_storage_db(workspace_id)?;
515
516    if !db_path.exists() {
517        return Err(CsmError::WorkspaceNotFound(format!(
518            "Database not found: {}",
519            db_path.display()
520        )));
521    }
522
523    // Check if VS Code is running
524    if !force && is_vscode_running() {
525        return Err(CsmError::VSCodeRunning);
526    }
527
528    // Use sync to ensure index matches disk
529    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
530
531    // Print individual session info
532    for entry in std::fs::read_dir(chat_sessions_dir)? {
533        let entry = entry?;
534        let path = entry.path();
535
536        if path
537            .extension()
538            .map(|e| is_session_file_extension(e))
539            .unwrap_or(false)
540        {
541            if let Ok(session) = parse_session_file(&path) {
542                let session_id = session.session_id.clone().unwrap_or_else(|| {
543                    path.file_stem()
544                        .map(|s| s.to_string_lossy().to_string())
545                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
546                });
547
548                let title = session.title();
549
550                println!(
551                    "[OK] Registered: {} ({}...)",
552                    title,
553                    &session_id[..12.min(session_id.len())]
554                );
555            }
556        }
557    }
558
559    if removed > 0 {
560        println!("[OK] Removed {} stale index entries", removed);
561    }
562
563    Ok(added)
564}
565
566/// Check if VS Code is currently running
567pub fn is_vscode_running() -> bool {
568    let mut sys = System::new();
569    sys.refresh_processes();
570
571    for process in sys.processes().values() {
572        let name = process.name().to_lowercase();
573        if name.contains("code") && !name.contains("codec") {
574            return true;
575        }
576    }
577
578    false
579}
580
581/// Backup workspace sessions to a timestamped directory
582pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
583    let chat_sessions_dir = workspace_dir.join("chatSessions");
584
585    if !chat_sessions_dir.exists() {
586        return Ok(None);
587    }
588
589    let timestamp = std::time::SystemTime::now()
590        .duration_since(std::time::UNIX_EPOCH)
591        .unwrap()
592        .as_secs();
593
594    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
595
596    // Copy directory recursively
597    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
598
599    Ok(Some(backup_dir))
600}
601
602/// Recursively copy a directory
603fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
604    std::fs::create_dir_all(dst)?;
605
606    for entry in std::fs::read_dir(src)? {
607        let entry = entry?;
608        let src_path = entry.path();
609        let dst_path = dst.join(entry.file_name());
610
611        if src_path.is_dir() {
612            copy_dir_all(&src_path, &dst_path)?;
613        } else {
614            std::fs::copy(&src_path, &dst_path)?;
615        }
616    }
617
618    Ok(())
619}
620
621// =============================================================================
622// Empty Window Sessions (ALL SESSIONS)
623// =============================================================================
624
625/// Read all empty window chat sessions (not tied to any workspace)
626/// These appear in VS Code's "ALL SESSIONS" panel
627pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
628    let sessions_path = get_empty_window_sessions_path()?;
629
630    if !sessions_path.exists() {
631        return Ok(Vec::new());
632    }
633
634    let mut sessions = Vec::new();
635
636    for entry in std::fs::read_dir(&sessions_path)? {
637        let entry = entry?;
638        let path = entry.path();
639
640        if path
641            .extension()
642            .is_some_and(|e| is_session_file_extension(e))
643        {
644            if let Ok(session) = parse_session_file(&path) {
645                sessions.push(session);
646            }
647        }
648    }
649
650    // Sort by last message date (most recent first)
651    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
652
653    Ok(sessions)
654}
655
656/// Get a specific empty window session by ID
657#[allow(dead_code)]
658pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
659    let sessions_path = get_empty_window_sessions_path()?;
660    let session_path = sessions_path.join(format!("{}.json", session_id));
661
662    if !session_path.exists() {
663        return Ok(None);
664    }
665
666    let content = std::fs::read_to_string(&session_path)?;
667    let session: ChatSession = serde_json::from_str(&content)
668        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
669
670    Ok(Some(session))
671}
672
673/// Write an empty window session
674#[allow(dead_code)]
675pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
676    let sessions_path = get_empty_window_sessions_path()?;
677
678    // Create directory if it doesn't exist
679    std::fs::create_dir_all(&sessions_path)?;
680
681    let session_id = session.session_id.as_deref().unwrap_or("unknown");
682    let session_path = sessions_path.join(format!("{}.json", session_id));
683    let content = serde_json::to_string_pretty(session)?;
684    std::fs::write(&session_path, content)?;
685
686    Ok(session_path)
687}
688
689/// Delete an empty window session
690#[allow(dead_code)]
691pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
692    let sessions_path = get_empty_window_sessions_path()?;
693    let session_path = sessions_path.join(format!("{}.json", session_id));
694
695    if session_path.exists() {
696        std::fs::remove_file(&session_path)?;
697        Ok(true)
698    } else {
699        Ok(false)
700    }
701}
702
703/// Count empty window sessions
704pub fn count_empty_window_sessions() -> Result<usize> {
705    let sessions_path = get_empty_window_sessions_path()?;
706
707    if !sessions_path.exists() {
708        return Ok(0);
709    }
710
711    let count = std::fs::read_dir(&sessions_path)?
712        .filter_map(|e| e.ok())
713        .filter(|e| {
714            e.path()
715                .extension()
716                .is_some_and(|ext| is_session_file_extension(ext))
717        })
718        .count();
719
720    Ok(count)
721}