Skip to main content

chasm/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{
7    ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry, ChatSessionTiming,
8};
9use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
10use once_cell::sync::Lazy;
11use regex::Regex;
12use rusqlite::Connection;
13use std::collections::HashSet;
14use std::path::{Path, PathBuf};
15use sysinfo::System;
16
17/// A single issue detected during workspace session diagnostics
18#[derive(Debug, Clone)]
19pub struct SessionIssue {
20    /// The session file stem (UUID)
21    pub session_id: String,
22    /// Category of issue
23    pub kind: SessionIssueKind,
24    /// Human-readable description
25    pub detail: String,
26}
27
28/// Categories of session issues that can be detected and auto-fixed
29#[derive(Debug, Clone, PartialEq)]
30pub enum SessionIssueKind {
31    /// JSONL file has multiple lines (operations not compacted)
32    MultiLineJsonl,
33    /// JSONL first line contains concatenated JSON objects (missing newlines)
34    ConcatenatedJsonl,
35    /// Index entry has lastResponseState = 2 (Cancelled), blocks VS Code loading
36    CancelledState,
37    /// File exists on disk but is not in the VS Code index
38    OrphanedSession,
39    /// Index entry references a file that no longer exists on disk
40    StaleIndexEntry,
41    /// Session is missing required VS Code compat fields
42    MissingCompatFields,
43    /// Both .json and .jsonl exist for the same session ID
44    DuplicateFormat,
45}
46
47impl std::fmt::Display for SessionIssueKind {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            SessionIssueKind::MultiLineJsonl => write!(f, "multi-line JSONL"),
51            SessionIssueKind::ConcatenatedJsonl => write!(f, "concatenated JSONL"),
52            SessionIssueKind::CancelledState => write!(f, "cancelled state"),
53            SessionIssueKind::OrphanedSession => write!(f, "orphaned session"),
54            SessionIssueKind::StaleIndexEntry => write!(f, "stale index entry"),
55            SessionIssueKind::MissingCompatFields => write!(f, "missing compat fields"),
56            SessionIssueKind::DuplicateFormat => write!(f, "duplicate .json/.jsonl"),
57        }
58    }
59}
60
61/// Summary of issues found in a single workspace
62#[derive(Debug, Clone, Default)]
63pub struct WorkspaceDiagnosis {
64    /// Project path (if known)
65    pub project_path: Option<String>,
66    /// Workspace hash
67    pub workspace_hash: String,
68    /// Total sessions on disk
69    pub sessions_on_disk: usize,
70    /// Total sessions in index
71    pub sessions_in_index: usize,
72    /// All detected issues
73    pub issues: Vec<SessionIssue>,
74}
75
76impl WorkspaceDiagnosis {
77    pub fn is_healthy(&self) -> bool {
78        self.issues.is_empty()
79    }
80
81    pub fn issue_count_by_kind(&self, kind: &SessionIssueKind) -> usize {
82        self.issues.iter().filter(|i| &i.kind == kind).count()
83    }
84}
85
86/// Diagnose a workspace for session issues without modifying anything.
87/// Returns a structured report of all detected problems.
88pub fn diagnose_workspace_sessions(
89    workspace_id: &str,
90    chat_sessions_dir: &Path,
91) -> Result<WorkspaceDiagnosis> {
92    let mut diagnosis = WorkspaceDiagnosis {
93        workspace_hash: workspace_id.to_string(),
94        ..Default::default()
95    };
96
97    if !chat_sessions_dir.exists() {
98        return Ok(diagnosis);
99    }
100
101    // Collect session files on disk
102    let mut jsonl_sessions: HashSet<String> = HashSet::new();
103    let mut json_sessions: HashSet<String> = HashSet::new();
104    let mut all_session_ids: HashSet<String> = HashSet::new();
105
106    for entry in std::fs::read_dir(chat_sessions_dir)? {
107        let entry = entry?;
108        let path = entry.path();
109        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
110        let stem = path
111            .file_stem()
112            .map(|s| s.to_string_lossy().to_string())
113            .unwrap_or_default();
114
115        match ext {
116            "jsonl" => {
117                jsonl_sessions.insert(stem.clone());
118                all_session_ids.insert(stem);
119            }
120            "json" if !path.to_string_lossy().ends_with(".bak") => {
121                json_sessions.insert(stem.clone());
122                all_session_ids.insert(stem);
123            }
124            _ => {}
125        }
126    }
127    diagnosis.sessions_on_disk = all_session_ids.len();
128
129    // Check for duplicate .json/.jsonl files
130    for id in &jsonl_sessions {
131        if json_sessions.contains(id) {
132            diagnosis.issues.push(SessionIssue {
133                session_id: id.clone(),
134                kind: SessionIssueKind::DuplicateFormat,
135                detail: format!("Both {id}.json and {id}.jsonl exist"),
136            });
137        }
138    }
139
140    // Check JSONL files for content issues
141    for id in &jsonl_sessions {
142        let path = chat_sessions_dir.join(format!("{id}.jsonl"));
143        if let Ok(content) = std::fs::read_to_string(&path) {
144            let line_count = content.lines().count();
145
146            if line_count > 1 {
147                let size_mb = content.len() / (1024 * 1024);
148                diagnosis.issues.push(SessionIssue {
149                    session_id: id.clone(),
150                    kind: SessionIssueKind::MultiLineJsonl,
151                    detail: format!("{line_count} lines, ~{size_mb} MB — needs compaction"),
152                });
153            }
154
155            // Check first line for concatenation
156            if let Some(first_line) = content.lines().next() {
157                if first_line.contains("}{\"kind\":") {
158                    diagnosis.issues.push(SessionIssue {
159                        session_id: id.clone(),
160                        kind: SessionIssueKind::ConcatenatedJsonl,
161                        detail: "First line has concatenated JSON objects".to_string(),
162                    });
163                }
164            }
165
166            // Check for missing compat fields (only single-line files worth checking)
167            if line_count == 1 {
168                if let Some(first_line) = content.lines().next() {
169                    if let Ok(obj) = serde_json::from_str::<serde_json::Value>(first_line) {
170                        let is_kind_0 = obj
171                            .get("kind")
172                            .and_then(|k| k.as_u64())
173                            .map(|k| k == 0)
174                            .unwrap_or(false);
175
176                        if is_kind_0 {
177                            if let Some(v) = obj.get("v") {
178                                let missing_fields: Vec<&str> = [
179                                    "hasPendingEdits",
180                                    "pendingRequests",
181                                    "inputState",
182                                    "sessionId",
183                                    "version",
184                                ]
185                                .iter()
186                                .filter(|f| v.get(**f).is_none())
187                                .copied()
188                                .collect();
189
190                                if !missing_fields.is_empty() {
191                                    diagnosis.issues.push(SessionIssue {
192                                        session_id: id.clone(),
193                                        kind: SessionIssueKind::MissingCompatFields,
194                                        detail: format!("Missing: {}", missing_fields.join(", ")),
195                                    });
196                                }
197                            }
198                        }
199                    }
200                }
201            }
202        }
203    }
204
205    // Check index for stale entries, orphans, and cancelled state
206    let db_path = get_workspace_storage_db(workspace_id)?;
207    if db_path.exists() {
208        if let Ok(index) = read_chat_session_index(&db_path) {
209            diagnosis.sessions_in_index = index.entries.len();
210
211            // Stale index entries (in index but no file on disk)
212            for (id, _entry) in &index.entries {
213                if !all_session_ids.contains(id) {
214                    diagnosis.issues.push(SessionIssue {
215                        session_id: id.clone(),
216                        kind: SessionIssueKind::StaleIndexEntry,
217                        detail: "In index but no file on disk".to_string(),
218                    });
219                }
220            }
221
222            // Cancelled state entries
223            for (id, entry) in &index.entries {
224                if entry.last_response_state == 2 {
225                    diagnosis.issues.push(SessionIssue {
226                        session_id: id.clone(),
227                        kind: SessionIssueKind::CancelledState,
228                        detail: "lastResponseState=2 (Cancelled) — blocks VS Code loading"
229                            .to_string(),
230                    });
231                }
232            }
233
234            // Orphaned sessions (on disk but not in index)
235            let indexed_ids: HashSet<&String> = index.entries.keys().collect();
236            for id in &all_session_ids {
237                if !indexed_ids.contains(id) {
238                    diagnosis.issues.push(SessionIssue {
239                        session_id: id.clone(),
240                        kind: SessionIssueKind::OrphanedSession,
241                        detail: "File on disk but not in VS Code index".to_string(),
242                    });
243                }
244            }
245        }
246    }
247
248    Ok(diagnosis)
249}
250
251/// Regex to match any Unicode escape sequence (valid or not)
252static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
253
254/// VS Code session format version - helps identify which parsing strategy to use
255#[derive(Debug, Clone, Copy, PartialEq, Eq)]
256pub enum VsCodeSessionFormat {
257    /// Legacy JSON format (VS Code < 1.109.0)
258    /// Single JSON object with ChatSession structure
259    LegacyJson,
260    /// JSONL format (VS Code >= 1.109.0, January 2026+)
261    /// JSON Lines with event sourcing: kind 0 (initial), kind 1 (delta), kind 2 (requests)
262    JsonLines,
263}
264
265/// Session schema version - tracks the internal structure version
266#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
267pub enum SessionSchemaVersion {
268    /// Version 1 - Original format (basic fields)
269    V1 = 1,
270    /// Version 2 - Added more metadata fields
271    V2 = 2,
272    /// Version 3 - Current format with full request/response structure
273    V3 = 3,
274    /// Unknown version
275    Unknown = 0,
276}
277
278impl SessionSchemaVersion {
279    /// Create from version number
280    pub fn from_version(v: u32) -> Self {
281        match v {
282            1 => Self::V1,
283            2 => Self::V2,
284            3 => Self::V3,
285            _ => Self::Unknown,
286        }
287    }
288
289    /// Get version number
290    pub fn version_number(&self) -> u32 {
291        match self {
292            Self::V1 => 1,
293            Self::V2 => 2,
294            Self::V3 => 3,
295            Self::Unknown => 0,
296        }
297    }
298
299    /// Get description
300    pub fn description(&self) -> &'static str {
301        match self {
302            Self::V1 => "v1 (basic)",
303            Self::V2 => "v2 (extended metadata)",
304            Self::V3 => "v3 (full structure)",
305            Self::Unknown => "unknown",
306        }
307    }
308}
309
310impl std::fmt::Display for SessionSchemaVersion {
311    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
312        write!(f, "{}", self.description())
313    }
314}
315
316/// Result of session format detection
317#[derive(Debug, Clone)]
318pub struct SessionFormatInfo {
319    /// File format (JSON or JSONL)
320    pub format: VsCodeSessionFormat,
321    /// Schema version detected from content
322    pub schema_version: SessionSchemaVersion,
323    /// Confidence level (0.0 - 1.0)
324    pub confidence: f32,
325    /// Detection method used
326    pub detection_method: &'static str,
327}
328
329impl VsCodeSessionFormat {
330    /// Detect format from file path (by extension)
331    pub fn from_path(path: &Path) -> Self {
332        match path.extension().and_then(|e| e.to_str()) {
333            Some("jsonl") => Self::JsonLines,
334            _ => Self::LegacyJson,
335        }
336    }
337
338    /// Detect format from content by analyzing structure
339    pub fn from_content(content: &str) -> Self {
340        let trimmed = content.trim();
341
342        // JSONL: Multiple lines starting with { or first line has {"kind":
343        if trimmed.starts_with("{\"kind\":") || trimmed.starts_with("{ \"kind\":") {
344            return Self::JsonLines;
345        }
346
347        // Count lines that look like JSON objects
348        let mut json_object_lines = 0;
349        let mut total_non_empty_lines = 0;
350
351        for line in trimmed.lines().take(10) {
352            let line = line.trim();
353            if line.is_empty() {
354                continue;
355            }
356            total_non_empty_lines += 1;
357
358            // Check if line is a JSON object with "kind" field (JSONL marker)
359            if line.starts_with('{') && line.contains("\"kind\"") {
360                json_object_lines += 1;
361            }
362        }
363
364        // If multiple lines look like JSONL entries, it's JSONL
365        if json_object_lines >= 2
366            || (json_object_lines == 1 && total_non_empty_lines == 1 && trimmed.contains("\n{"))
367        {
368            return Self::JsonLines;
369        }
370
371        // Check if it's a single JSON object (legacy format)
372        if trimmed.starts_with('{') && trimmed.ends_with('}') {
373            // Look for ChatSession structure markers
374            if trimmed.contains("\"sessionId\"")
375                || trimmed.contains("\"creationDate\"")
376                || trimmed.contains("\"requests\"")
377            {
378                return Self::LegacyJson;
379            }
380        }
381
382        // Default to legacy JSON if unclear
383        Self::LegacyJson
384    }
385
386    /// Get minimum VS Code version that uses this format
387    pub fn min_vscode_version(&self) -> &'static str {
388        match self {
389            Self::LegacyJson => "1.0.0",
390            Self::JsonLines => "1.109.0",
391        }
392    }
393
394    /// Get human-readable format description
395    pub fn description(&self) -> &'static str {
396        match self {
397            Self::LegacyJson => "Legacy JSON (single object)",
398            Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
399        }
400    }
401
402    /// Get short format name
403    pub fn short_name(&self) -> &'static str {
404        match self {
405            Self::LegacyJson => "json",
406            Self::JsonLines => "jsonl",
407        }
408    }
409}
410
411impl std::fmt::Display for VsCodeSessionFormat {
412    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
413        write!(f, "{}", self.description())
414    }
415}
416
417/// Sanitize JSON content by replacing lone surrogates with replacement character.
418/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
419fn sanitize_json_unicode(content: &str) -> String {
420    // Process all \uXXXX sequences and fix lone surrogates
421    let mut result = String::with_capacity(content.len());
422    let mut last_end = 0;
423
424    // Collect all matches first to avoid borrowing issues
425    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
426
427    for (i, mat) in matches.iter().enumerate() {
428        let start = mat.start();
429        let end = mat.end();
430
431        // Add content before this match
432        result.push_str(&content[last_end..start]);
433
434        // Parse the hex value from the match itself (always ASCII \uXXXX)
435        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
436        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
437            // Check if it's a high surrogate (D800-DBFF)
438            if (0xD800..=0xDBFF).contains(&code_point) {
439                // Check if next match is immediately following and is a low surrogate
440                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
441                    // Must be immediately adjacent (no gap)
442                    if next_mat.start() == end {
443                        let next_hex = &next_mat.as_str()[2..];
444                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
445                            (0xDC00..=0xDFFF).contains(&next_cp)
446                        } else {
447                            false
448                        }
449                    } else {
450                        false
451                    }
452                } else {
453                    false
454                };
455
456                if is_valid_pair {
457                    // Valid surrogate pair, keep the high surrogate
458                    result.push_str(mat.as_str());
459                } else {
460                    // Lone high surrogate - replace with replacement char
461                    result.push_str("\\uFFFD");
462                }
463            }
464            // Check if it's a low surrogate (DC00-DFFF)
465            else if (0xDC00..=0xDFFF).contains(&code_point) {
466                // Check if previous match was immediately before and was a high surrogate
467                let is_valid_pair = if i > 0 {
468                    if let Some(prev_mat) = matches.get(i - 1) {
469                        // Must be immediately adjacent (no gap)
470                        if prev_mat.end() == start {
471                            let prev_hex = &prev_mat.as_str()[2..];
472                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
473                                (0xD800..=0xDBFF).contains(&prev_cp)
474                            } else {
475                                false
476                            }
477                        } else {
478                            false
479                        }
480                    } else {
481                        false
482                    }
483                } else {
484                    false
485                };
486
487                if is_valid_pair {
488                    // Part of valid surrogate pair, keep it
489                    result.push_str(mat.as_str());
490                } else {
491                    // Lone low surrogate - replace with replacement char
492                    result.push_str("\\uFFFD");
493                }
494            }
495            // Normal code point
496            else {
497                result.push_str(mat.as_str());
498            }
499        } else {
500            // Invalid hex - keep as is
501            result.push_str(mat.as_str());
502        }
503        last_end = end;
504    }
505
506    // Add remaining content
507    result.push_str(&content[last_end..]);
508    result
509}
510
511/// Try to parse JSON, sanitizing invalid Unicode if needed
512pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
513    match serde_json::from_str::<ChatSession>(content) {
514        Ok(session) => Ok(session),
515        Err(e) => {
516            // If parsing fails due to Unicode issue, try sanitizing
517            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
518                let sanitized = sanitize_json_unicode(content);
519                serde_json::from_str::<ChatSession>(&sanitized)
520            } else {
521                Err(e)
522            }
523        }
524    }
525}
526
527/// JSONL entry kinds for VS Code 1.109.0+ session format
528#[derive(Debug, Clone, Copy, PartialEq, Eq)]
529enum JsonlKind {
530    /// Initial session state (kind: 0)
531    Initial = 0,
532    /// Delta update to specific keys (kind: 1)  
533    Delta = 1,
534    /// Full requests array update (kind: 2)
535    RequestsUpdate = 2,
536}
537
538/// Parse a JSONL (JSON Lines) session file (VS Code 1.109.0+ format)
539/// Each line is a JSON object with 'kind' field indicating the type:
540/// - kind 0: Initial session metadata with 'v' containing ChatSession-like structure
541/// - kind 1: Delta update with 'k' (keys path) and 'v' (value)
542/// - kind 2: Full requests array update with 'k' and 'v'
543pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
544    // Pre-process: split concatenated JSON objects that lack newline separators
545    let content = split_concatenated_jsonl(content);
546
547    let mut session = ChatSession {
548        version: 3,
549        session_id: None,
550        creation_date: 0,
551        last_message_date: 0,
552        is_imported: false,
553        initial_location: "panel".to_string(),
554        custom_title: None,
555        requester_username: None,
556        requester_avatar_icon_uri: None,
557        responder_username: None,
558        responder_avatar_icon_uri: None,
559        requests: Vec::new(),
560    };
561
562    for line in content.lines() {
563        let line = line.trim();
564        if line.is_empty() {
565            continue;
566        }
567
568        // Parse each line as a JSON object
569        let entry: serde_json::Value = match serde_json::from_str(line) {
570            Ok(v) => v,
571            Err(_) => {
572                // Try sanitizing Unicode
573                let sanitized = sanitize_json_unicode(line);
574                serde_json::from_str(&sanitized)?
575            }
576        };
577
578        let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
579
580        match kind {
581            0 => {
582                // Initial state - 'v' contains the session metadata
583                if let Some(v) = entry.get("v") {
584                    // Parse version
585                    if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
586                        session.version = version as u32;
587                    }
588                    // Parse session ID
589                    if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
590                        session.session_id = Some(sid.to_string());
591                    }
592                    // Parse creation date
593                    if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
594                        session.creation_date = cd;
595                    }
596                    // Parse initial location
597                    if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
598                        session.initial_location = loc.to_string();
599                    }
600                    // Parse responder username
601                    if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
602                        session.responder_username = Some(ru.to_string());
603                    }
604                    // Parse custom title
605                    if let Some(title) = v.get("customTitle").and_then(|x| x.as_str()) {
606                        session.custom_title = Some(title.to_string());
607                    }
608                    // Parse hasPendingEdits as imported marker
609                    if let Some(imported) = v.get("isImported").and_then(|x| x.as_bool()) {
610                        session.is_imported = imported;
611                    }
612                    // Parse requests array if present
613                    if let Some(requests) = v.get("requests") {
614                        if let Ok(reqs) =
615                            serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
616                        {
617                            session.requests = reqs;
618                            // Compute last_message_date from the latest request timestamp
619                            if let Some(latest_ts) =
620                                session.requests.iter().filter_map(|r| r.timestamp).max()
621                            {
622                                session.last_message_date = latest_ts;
623                            }
624                        }
625                    }
626                    // Fall back to creationDate if no request timestamps found
627                    if session.last_message_date == 0 {
628                        session.last_message_date = session.creation_date;
629                    }
630                }
631            }
632            1 => {
633                // Delta update - 'k' is array of key path, 'v' is the value
634                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
635                    if let Some(keys_arr) = keys.as_array() {
636                        // Handle top-level session keys
637                        if keys_arr.len() == 1 {
638                            if let Some(key) = keys_arr[0].as_str() {
639                                match key {
640                                    "customTitle" => {
641                                        if let Some(title) = value.as_str() {
642                                            session.custom_title = Some(title.to_string());
643                                        }
644                                    }
645                                    "lastMessageDate" => {
646                                        if let Some(date) = value.as_i64() {
647                                            session.last_message_date = date;
648                                        }
649                                    }
650                                    "hasPendingEdits" | "isImported" => {
651                                        // Session-level boolean updates, safe to ignore for now
652                                    }
653                                    _ => {} // Ignore unknown keys
654                                }
655                            }
656                        }
657                        // Handle nested request field updates: ["requests", idx, field]
658                        else if keys_arr.len() == 3 {
659                            if let (Some("requests"), Some(idx), Some(field)) = (
660                                keys_arr[0].as_str(),
661                                keys_arr[1].as_u64().map(|i| i as usize),
662                                keys_arr[2].as_str(),
663                            ) {
664                                if idx < session.requests.len() {
665                                    match field {
666                                        "response" => {
667                                            session.requests[idx].response = Some(value.clone());
668                                        }
669                                        "result" => {
670                                            session.requests[idx].result = Some(value.clone());
671                                        }
672                                        "followups" => {
673                                            session.requests[idx].followups =
674                                                serde_json::from_value(value.clone()).ok();
675                                        }
676                                        "isCanceled" => {
677                                            session.requests[idx].is_canceled = value.as_bool();
678                                        }
679                                        "contentReferences" => {
680                                            session.requests[idx].content_references =
681                                                serde_json::from_value(value.clone()).ok();
682                                        }
683                                        "codeCitations" => {
684                                            session.requests[idx].code_citations =
685                                                serde_json::from_value(value.clone()).ok();
686                                        }
687                                        "modelState" | "modelId" | "agent" | "variableData" => {
688                                            // Known request fields - update as generic Value
689                                            // modelState tracks the request lifecycle
690                                        }
691                                        _ => {} // Ignore unknown request fields
692                                    }
693                                }
694                            }
695                        }
696                    }
697                }
698            }
699            2 => {
700                // Array append operation - 'k' is the key path, 'v' is array of items to append
701                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
702                    if let Some(keys_arr) = keys.as_array() {
703                        // Top-level requests append: k=["requests"], v=[new_request]
704                        if keys_arr.len() == 1 {
705                            if let Some("requests") = keys_arr[0].as_str() {
706                                if let Some(items) = value.as_array() {
707                                    for item in items {
708                                        if let Ok(req) =
709                                            serde_json::from_value::<ChatRequest>(item.clone())
710                                        {
711                                            session.requests.push(req);
712                                        }
713                                    }
714                                    // Update last message date from latest request
715                                    if let Some(last_req) = session.requests.last() {
716                                        if let Some(ts) = last_req.timestamp {
717                                            session.last_message_date = ts;
718                                        }
719                                    }
720                                }
721                            }
722                        }
723                        // Nested array append: k=["requests", idx, "response"], v=[parts]
724                        // These are response streaming chunks - we can safely ignore them
725                        // since the final response is captured via kind:1 updates
726                    }
727                }
728            }
729            _ => {} // Unknown kind, skip
730        }
731    }
732
733    Ok(session)
734}
735
736/// Check if a file extension indicates a session file (.json or .jsonl)
737pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
738    ext == "json" || ext == "jsonl"
739}
740
741/// Detect session format and version from content
742pub fn detect_session_format(content: &str) -> SessionFormatInfo {
743    let format = VsCodeSessionFormat::from_content(content);
744    let trimmed = content.trim();
745
746    // Detect schema version based on format
747    let (schema_version, confidence, method) = match format {
748        VsCodeSessionFormat::JsonLines => {
749            // For JSONL, check the first line's "v" object for version
750            if let Some(first_line) = trimmed.lines().next() {
751                if let Ok(entry) = serde_json::from_str::<serde_json::Value>(first_line) {
752                    if let Some(v) = entry.get("v") {
753                        if let Some(ver) = v.get("version").and_then(|x| x.as_u64()) {
754                            (
755                                SessionSchemaVersion::from_version(ver as u32),
756                                0.95,
757                                "jsonl-version-field",
758                            )
759                        } else {
760                            // No version field, likely v3 (current default)
761                            (SessionSchemaVersion::V3, 0.7, "jsonl-default")
762                        }
763                    } else {
764                        (SessionSchemaVersion::V3, 0.6, "jsonl-no-v-field")
765                    }
766                } else {
767                    (SessionSchemaVersion::Unknown, 0.3, "jsonl-parse-error")
768                }
769            } else {
770                (SessionSchemaVersion::Unknown, 0.2, "jsonl-empty")
771            }
772        }
773        VsCodeSessionFormat::LegacyJson => {
774            // For JSON, directly check the version field
775            if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
776                if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
777                    (
778                        SessionSchemaVersion::from_version(ver as u32),
779                        0.95,
780                        "json-version-field",
781                    )
782                } else {
783                    // Infer from structure
784                    if json.get("requests").is_some() && json.get("sessionId").is_some() {
785                        (SessionSchemaVersion::V3, 0.8, "json-structure-inference")
786                    } else if json.get("messages").is_some() {
787                        (SessionSchemaVersion::V1, 0.7, "json-legacy-structure")
788                    } else {
789                        (SessionSchemaVersion::Unknown, 0.4, "json-unknown-structure")
790                    }
791                }
792            } else {
793                // Try sanitizing and parsing again
794                let sanitized = sanitize_json_unicode(trimmed);
795                if let Ok(json) = serde_json::from_str::<serde_json::Value>(&sanitized) {
796                    if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
797                        (
798                            SessionSchemaVersion::from_version(ver as u32),
799                            0.9,
800                            "json-version-after-sanitize",
801                        )
802                    } else {
803                        (SessionSchemaVersion::V3, 0.6, "json-default-after-sanitize")
804                    }
805                } else {
806                    (SessionSchemaVersion::Unknown, 0.2, "json-parse-error")
807                }
808            }
809        }
810    };
811
812    SessionFormatInfo {
813        format,
814        schema_version,
815        confidence,
816        detection_method: method,
817    }
818}
819
820/// Parse session content with automatic format detection
821pub fn parse_session_auto(
822    content: &str,
823) -> std::result::Result<(ChatSession, SessionFormatInfo), serde_json::Error> {
824    let format_info = detect_session_format(content);
825
826    let session = match format_info.format {
827        VsCodeSessionFormat::JsonLines => parse_session_jsonl(content)?,
828        VsCodeSessionFormat::LegacyJson => parse_session_json(content)?,
829    };
830
831    Ok((session, format_info))
832}
833
834/// Parse a session file, automatically detecting format from content (not just extension)
835pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
836    let content = std::fs::read_to_string(path)
837        .map_err(|e| serde_json::Error::io(std::io::Error::other(e.to_string())))?;
838
839    // Use content-based auto-detection
840    let (session, _format_info) = parse_session_auto(&content)?;
841    Ok(session)
842}
843
844/// Get the path to the workspace storage database
845pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
846    let storage_path = get_workspace_storage_path()?;
847    Ok(storage_path.join(workspace_id).join("state.vscdb"))
848}
849
850/// Read the chat session index from VS Code storage
851pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
852    let conn = Connection::open(db_path)?;
853
854    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
855        "SELECT value FROM ItemTable WHERE key = ?",
856        ["chat.ChatSessionStore.index"],
857        |row| row.get(0),
858    );
859
860    match result {
861        Ok(json_str) => serde_json::from_str(&json_str)
862            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
863        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
864        Err(e) => Err(CsmError::SqliteError(e)),
865    }
866}
867
868/// Write the chat session index to VS Code storage
869pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
870    let conn = Connection::open(db_path)?;
871    let json_str = serde_json::to_string(index)?;
872
873    // Check if the key exists
874    let exists: bool = conn.query_row(
875        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
876        ["chat.ChatSessionStore.index"],
877        |row| row.get(0),
878    )?;
879
880    if exists {
881        conn.execute(
882            "UPDATE ItemTable SET value = ? WHERE key = ?",
883            [&json_str, "chat.ChatSessionStore.index"],
884        )?;
885    } else {
886        conn.execute(
887            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
888            ["chat.ChatSessionStore.index", &json_str],
889        )?;
890    }
891
892    Ok(())
893}
894
895/// Add a session to the VS Code index
896pub fn add_session_to_index(
897    db_path: &Path,
898    session_id: &str,
899    title: &str,
900    last_message_date_ms: i64,
901    _is_imported: bool,
902    initial_location: &str,
903    is_empty: bool,
904) -> Result<()> {
905    let mut index = read_chat_session_index(db_path)?;
906
907    index.entries.insert(
908        session_id.to_string(),
909        ChatSessionIndexEntry {
910            session_id: session_id.to_string(),
911            title: title.to_string(),
912            last_message_date: last_message_date_ms,
913            timing: Some(ChatSessionTiming {
914                created: last_message_date_ms,
915                last_request_started: Some(last_message_date_ms),
916                last_request_ended: Some(last_message_date_ms),
917            }),
918            last_response_state: 1, // ResponseModelState.Complete
919            initial_location: initial_location.to_string(),
920            is_empty,
921        },
922    );
923
924    write_chat_session_index(db_path, &index)
925}
926
927/// Remove a session from the VS Code index
928#[allow(dead_code)]
929pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
930    let mut index = read_chat_session_index(db_path)?;
931    let removed = index.entries.remove(session_id).is_some();
932    if removed {
933        write_chat_session_index(db_path, &index)?;
934    }
935    Ok(removed)
936}
937
938/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
939/// When both .json and .jsonl exist for the same session ID, prefers .jsonl.
940pub fn sync_session_index(
941    workspace_id: &str,
942    chat_sessions_dir: &Path,
943    force: bool,
944) -> Result<(usize, usize)> {
945    let db_path = get_workspace_storage_db(workspace_id)?;
946
947    if !db_path.exists() {
948        return Err(CsmError::WorkspaceNotFound(format!(
949            "Database not found: {}",
950            db_path.display()
951        )));
952    }
953
954    // Check if VS Code is running
955    if !force && is_vscode_running() {
956        return Err(CsmError::VSCodeRunning);
957    }
958
959    // Get current index
960    let mut index = read_chat_session_index(&db_path)?;
961
962    // Get session files on disk
963    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
964    if chat_sessions_dir.exists() {
965        for entry in std::fs::read_dir(chat_sessions_dir)? {
966            let entry = entry?;
967            let path = entry.path();
968            if path
969                .extension()
970                .map(is_session_file_extension)
971                .unwrap_or(false)
972            {
973                if let Some(stem) = path.file_stem() {
974                    files_on_disk.insert(stem.to_string_lossy().to_string());
975                }
976            }
977        }
978    }
979
980    // Remove stale entries (in index but not on disk)
981    let stale_ids: Vec<String> = index
982        .entries
983        .keys()
984        .filter(|id| !files_on_disk.contains(*id))
985        .cloned()
986        .collect();
987
988    let removed = stale_ids.len();
989    for id in &stale_ids {
990        index.entries.remove(id);
991    }
992
993    // Add/update sessions from disk
994    // Collect files, preferring .jsonl over .json for the same session ID
995    let mut session_files: std::collections::HashMap<String, PathBuf> =
996        std::collections::HashMap::new();
997    for entry in std::fs::read_dir(chat_sessions_dir)? {
998        let entry = entry?;
999        let path = entry.path();
1000        if path
1001            .extension()
1002            .map(is_session_file_extension)
1003            .unwrap_or(false)
1004        {
1005            if let Some(stem) = path.file_stem() {
1006                let stem_str = stem.to_string_lossy().to_string();
1007                let is_jsonl = path.extension().is_some_and(|e| e == "jsonl");
1008                // Insert if no entry yet, or if this is .jsonl (preferred over .json)
1009                if !session_files.contains_key(&stem_str) || is_jsonl {
1010                    session_files.insert(stem_str, path);
1011                }
1012            }
1013        }
1014    }
1015
1016    let mut added = 0;
1017    for (_, path) in &session_files {
1018        if let Ok(session) = parse_session_file(path) {
1019            let session_id = session.session_id.clone().unwrap_or_else(|| {
1020                path.file_stem()
1021                    .map(|s| s.to_string_lossy().to_string())
1022                    .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1023            });
1024
1025            let title = session.title();
1026            let is_empty = session.is_empty();
1027            let last_message_date = session.last_message_date;
1028            let initial_location = session.initial_location.clone();
1029
1030            index.entries.insert(
1031                session_id.clone(),
1032                ChatSessionIndexEntry {
1033                    session_id,
1034                    title,
1035                    last_message_date,
1036                    timing: Some(ChatSessionTiming {
1037                        created: session.creation_date,
1038                        last_request_started: Some(last_message_date),
1039                        last_request_ended: Some(last_message_date),
1040                    }),
1041                    last_response_state: 1, // ResponseModelState.Complete
1042                    initial_location,
1043                    is_empty,
1044                },
1045            );
1046            added += 1;
1047        }
1048    }
1049
1050    // Write the synced index
1051    write_chat_session_index(&db_path, &index)?;
1052
1053    Ok((added, removed))
1054}
1055
1056/// Register all sessions from a directory into the VS Code index
1057pub fn register_all_sessions_from_directory(
1058    workspace_id: &str,
1059    chat_sessions_dir: &Path,
1060    force: bool,
1061) -> Result<usize> {
1062    let db_path = get_workspace_storage_db(workspace_id)?;
1063
1064    if !db_path.exists() {
1065        return Err(CsmError::WorkspaceNotFound(format!(
1066            "Database not found: {}",
1067            db_path.display()
1068        )));
1069    }
1070
1071    // Check if VS Code is running
1072    if !force && is_vscode_running() {
1073        return Err(CsmError::VSCodeRunning);
1074    }
1075
1076    // Use sync to ensure index matches disk
1077    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
1078
1079    // Print individual session info
1080    for entry in std::fs::read_dir(chat_sessions_dir)? {
1081        let entry = entry?;
1082        let path = entry.path();
1083
1084        if path
1085            .extension()
1086            .map(is_session_file_extension)
1087            .unwrap_or(false)
1088        {
1089            if let Ok(session) = parse_session_file(&path) {
1090                let session_id = session.session_id.clone().unwrap_or_else(|| {
1091                    path.file_stem()
1092                        .map(|s| s.to_string_lossy().to_string())
1093                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1094                });
1095
1096                let title = session.title();
1097
1098                println!(
1099                    "[OK] Registered: {} ({}...)",
1100                    title,
1101                    &session_id[..12.min(session_id.len())]
1102                );
1103            }
1104        }
1105    }
1106
1107    if removed > 0 {
1108        println!("[OK] Removed {} stale index entries", removed);
1109    }
1110
1111    Ok(added)
1112}
1113
1114/// Check if VS Code is currently running
1115pub fn is_vscode_running() -> bool {
1116    let mut sys = System::new();
1117    sys.refresh_processes();
1118
1119    for process in sys.processes().values() {
1120        let name = process.name().to_lowercase();
1121        if name.contains("code") && !name.contains("codec") {
1122            return true;
1123        }
1124    }
1125
1126    false
1127}
1128
1129/// Close VS Code gracefully and wait for it to exit.
1130/// Returns the list of workspace folders that were open (for reopening).
1131pub fn close_vscode_and_wait(timeout_secs: u64) -> Result<()> {
1132    use sysinfo::{ProcessRefreshKind, RefreshKind, Signal};
1133
1134    if !is_vscode_running() {
1135        return Ok(());
1136    }
1137
1138    // Send SIGTERM (graceful close) to all Code processes
1139    let mut sys = System::new_with_specifics(
1140        RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1141    );
1142    sys.refresh_processes();
1143
1144    let mut signaled = 0u32;
1145    for (pid, process) in sys.processes() {
1146        let name = process.name().to_lowercase();
1147        if name.contains("code") && !name.contains("codec") {
1148            // On Windows, kill() sends TerminateProcess; there's no graceful
1149            // SIGTERM equivalent via sysinfo. But the main electron process
1150            // handles WM_CLOSE. We use the `taskkill` approach on Windows for
1151            // a graceful close.
1152            #[cfg(windows)]
1153            {
1154                let _ = std::process::Command::new("taskkill")
1155                    .args(["/PID", &pid.as_u32().to_string()])
1156                    .stdout(std::process::Stdio::null())
1157                    .stderr(std::process::Stdio::null())
1158                    .status();
1159                signaled += 1;
1160            }
1161            #[cfg(not(windows))]
1162            {
1163                if process.kill_with(Signal::Term).unwrap_or(false) {
1164                    signaled += 1;
1165                }
1166            }
1167        }
1168    }
1169
1170    if signaled == 0 {
1171        return Ok(());
1172    }
1173
1174    // Wait for all Code processes to exit
1175    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
1176    loop {
1177        std::thread::sleep(std::time::Duration::from_millis(500));
1178        if !is_vscode_running() {
1179            // Extra wait for file locks to release
1180            std::thread::sleep(std::time::Duration::from_secs(1));
1181            return Ok(());
1182        }
1183        if std::time::Instant::now() >= deadline {
1184            // Force kill remaining processes
1185            let mut sys2 = System::new_with_specifics(
1186                RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1187            );
1188            sys2.refresh_processes();
1189            for (_pid, process) in sys2.processes() {
1190                let name = process.name().to_lowercase();
1191                if name.contains("code") && !name.contains("codec") {
1192                    process.kill();
1193                }
1194            }
1195            std::thread::sleep(std::time::Duration::from_secs(1));
1196            return Ok(());
1197        }
1198    }
1199}
1200
1201/// Reopen VS Code, optionally at a specific path.
1202pub fn reopen_vscode(project_path: Option<&str>) -> Result<()> {
1203    let mut cmd = std::process::Command::new("code");
1204    if let Some(path) = project_path {
1205        cmd.arg(path);
1206    }
1207    cmd.stdout(std::process::Stdio::null())
1208        .stderr(std::process::Stdio::null())
1209        .spawn()?;
1210    Ok(())
1211}
1212
1213/// Backup workspace sessions to a timestamped directory
1214pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
1215    let chat_sessions_dir = workspace_dir.join("chatSessions");
1216
1217    if !chat_sessions_dir.exists() {
1218        return Ok(None);
1219    }
1220
1221    let timestamp = std::time::SystemTime::now()
1222        .duration_since(std::time::UNIX_EPOCH)
1223        .unwrap()
1224        .as_secs();
1225
1226    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
1227
1228    // Copy directory recursively
1229    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
1230
1231    Ok(Some(backup_dir))
1232}
1233
1234/// Recursively copy a directory
1235fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
1236    std::fs::create_dir_all(dst)?;
1237
1238    for entry in std::fs::read_dir(src)? {
1239        let entry = entry?;
1240        let src_path = entry.path();
1241        let dst_path = dst.join(entry.file_name());
1242
1243        if src_path.is_dir() {
1244            copy_dir_all(&src_path, &dst_path)?;
1245        } else {
1246            std::fs::copy(&src_path, &dst_path)?;
1247        }
1248    }
1249
1250    Ok(())
1251}
1252
1253// =============================================================================
1254// Empty Window Sessions (ALL SESSIONS)
1255// =============================================================================
1256
1257/// Read all empty window chat sessions (not tied to any workspace)
1258/// These appear in VS Code's "ALL SESSIONS" panel
1259pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
1260    let sessions_path = get_empty_window_sessions_path()?;
1261
1262    if !sessions_path.exists() {
1263        return Ok(Vec::new());
1264    }
1265
1266    let mut sessions = Vec::new();
1267
1268    for entry in std::fs::read_dir(&sessions_path)? {
1269        let entry = entry?;
1270        let path = entry.path();
1271
1272        if path.extension().is_some_and(is_session_file_extension) {
1273            if let Ok(session) = parse_session_file(&path) {
1274                sessions.push(session);
1275            }
1276        }
1277    }
1278
1279    // Sort by last message date (most recent first)
1280    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
1281
1282    Ok(sessions)
1283}
1284
1285/// Get a specific empty window session by ID
1286#[allow(dead_code)]
1287pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
1288    let sessions_path = get_empty_window_sessions_path()?;
1289    let session_path = sessions_path.join(format!("{}.json", session_id));
1290
1291    if !session_path.exists() {
1292        return Ok(None);
1293    }
1294
1295    let content = std::fs::read_to_string(&session_path)?;
1296    let session: ChatSession = serde_json::from_str(&content)
1297        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1298
1299    Ok(Some(session))
1300}
1301
1302/// Write an empty window session
1303#[allow(dead_code)]
1304pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
1305    let sessions_path = get_empty_window_sessions_path()?;
1306
1307    // Create directory if it doesn't exist
1308    std::fs::create_dir_all(&sessions_path)?;
1309
1310    let session_id = session.session_id.as_deref().unwrap_or("unknown");
1311    let session_path = sessions_path.join(format!("{}.json", session_id));
1312    let content = serde_json::to_string_pretty(session)?;
1313    std::fs::write(&session_path, content)?;
1314
1315    Ok(session_path)
1316}
1317
1318/// Delete an empty window session
1319#[allow(dead_code)]
1320pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
1321    let sessions_path = get_empty_window_sessions_path()?;
1322    let session_path = sessions_path.join(format!("{}.json", session_id));
1323
1324    if session_path.exists() {
1325        std::fs::remove_file(&session_path)?;
1326        Ok(true)
1327    } else {
1328        Ok(false)
1329    }
1330}
1331
1332/// Count empty window sessions
1333pub fn count_empty_window_sessions() -> Result<usize> {
1334    let sessions_path = get_empty_window_sessions_path()?;
1335
1336    if !sessions_path.exists() {
1337        return Ok(0);
1338    }
1339
1340    let count = std::fs::read_dir(&sessions_path)?
1341        .filter_map(|e| e.ok())
1342        .filter(|e| e.path().extension().is_some_and(is_session_file_extension))
1343        .count();
1344
1345    Ok(count)
1346}
1347
1348/// Compact a JSONL session file by replaying all operations into a single kind:0 snapshot.
1349/// This works at the raw JSON level, preserving all fields VS Code expects.
1350/// Returns the path to the compacted file.
1351///
1352/// Handles a common corruption pattern where VS Code appends delta operations
1353/// to line 0 without newline separators (e.g., `}{"kind":1,...}{"kind":2,...}`).
1354pub fn compact_session_jsonl(path: &Path) -> Result<PathBuf> {
1355    let content = std::fs::read_to_string(path).map_err(|e| {
1356        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1357    })?;
1358
1359    // Pre-process: split concatenated JSON objects that lack newline separators.
1360    // VS Code sometimes appends delta ops to line 0 without a \n, producing:
1361    //   {"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}\n{"kind":1,...}\n...
1362    // We fix this by inserting newlines at every `}{"kind":` boundary.
1363    let content = split_concatenated_jsonl(&content);
1364
1365    let mut lines = content.lines();
1366
1367    // First line must be kind:0 (initial snapshot)
1368    let first_line = lines
1369        .next()
1370        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1371
1372    let first_entry: serde_json::Value = match serde_json::from_str(first_line.trim()) {
1373        Ok(v) => v,
1374        Err(_) => {
1375            // Try sanitizing Unicode (lone surrogates, etc.)
1376            let sanitized = sanitize_json_unicode(first_line.trim());
1377            serde_json::from_str(&sanitized).map_err(|e| {
1378                CsmError::InvalidSessionFormat(format!("Invalid JSON on line 1: {}", e))
1379            })?
1380        }
1381    };
1382
1383    let kind = first_entry
1384        .get("kind")
1385        .and_then(|k| k.as_u64())
1386        .unwrap_or(99);
1387    if kind != 0 {
1388        return Err(CsmError::InvalidSessionFormat(
1389            "First JSONL line must be kind:0".to_string(),
1390        ));
1391    }
1392
1393    // Extract the session state from the "v" field
1394    let mut state = first_entry
1395        .get("v")
1396        .cloned()
1397        .ok_or_else(|| CsmError::InvalidSessionFormat("kind:0 missing 'v' field".to_string()))?;
1398
1399    // Replay all subsequent operations
1400    for line in lines {
1401        let line = line.trim();
1402        if line.is_empty() {
1403            continue;
1404        }
1405
1406        let entry: serde_json::Value = match serde_json::from_str(line) {
1407            Ok(v) => v,
1408            Err(_) => continue, // Skip malformed lines
1409        };
1410
1411        let op_kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1412
1413        match op_kind {
1414            1 => {
1415                // Delta update: k=["path","to","field"], v=value
1416                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1417                    if let Some(keys_arr) = keys.as_array() {
1418                        apply_delta(&mut state, keys_arr, value.clone());
1419                    }
1420                }
1421            }
1422            2 => {
1423                // Array append: k=["path","to","array"], v=[items]
1424                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1425                    if let Some(keys_arr) = keys.as_array() {
1426                        apply_append(&mut state, keys_arr, value.clone());
1427                    }
1428                }
1429            }
1430            _ => {} // Skip unknown kinds
1431        }
1432    }
1433
1434    // Inject any missing fields that VS Code's latest format requires
1435    let session_id = path
1436        .file_stem()
1437        .and_then(|s| s.to_str())
1438        .map(|s| s.to_string());
1439    ensure_vscode_compat_fields(&mut state, session_id.as_deref());
1440
1441    // Write the compacted file: single kind:0 line with the final state
1442    let compact_entry = serde_json::json!({"kind": 0, "v": state});
1443    let compact_content = serde_json::to_string(&compact_entry)
1444        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1445
1446    // Backup the original file
1447    let backup_path = path.with_extension("jsonl.bak");
1448    std::fs::rename(path, &backup_path)?;
1449
1450    // Write the compacted file
1451    std::fs::write(path, &compact_content)?;
1452
1453    Ok(backup_path)
1454}
1455
1456/// Split concatenated JSON objects in JSONL content that lack newline separators.
1457///
1458/// VS Code sometimes appends delta operations (kind:1, kind:2) onto the end of
1459/// a JSONL line without inserting a newline first. This produces invalid JSONL like:
1460///   `{"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}`
1461///
1462/// This function inserts newlines at every `}{"kind":` boundary to restore valid JSONL.
1463/// The pattern `}{"kind":` cannot appear inside JSON string values because `{"kind":`
1464/// would need to be escaped as `{\"kind\":` within a JSON string.
1465pub fn split_concatenated_jsonl(content: &str) -> String {
1466    // Fast path: if content has no concatenated objects, return as-is
1467    if !content.contains("}{\"kind\":") {
1468        return content.to_string();
1469    }
1470
1471    content.replace("}{\"kind\":", "}\n{\"kind\":")
1472}
1473
1474/// Apply a delta update (kind:1) to a JSON value at the given key path.
1475fn apply_delta(root: &mut serde_json::Value, keys: &[serde_json::Value], value: serde_json::Value) {
1476    if keys.is_empty() {
1477        return;
1478    }
1479
1480    // Navigate to the parent
1481    let mut current = root;
1482    for key in &keys[..keys.len() - 1] {
1483        if let Some(k) = key.as_str() {
1484            if !current.get(k).is_some() {
1485                current[k] = serde_json::Value::Object(serde_json::Map::new());
1486            }
1487            current = &mut current[k];
1488        } else if let Some(idx) = key.as_u64() {
1489            if let Some(arr) = current.as_array_mut() {
1490                if (idx as usize) < arr.len() {
1491                    current = &mut arr[idx as usize];
1492                } else {
1493                    return; // Index out of bounds
1494                }
1495            } else {
1496                return;
1497            }
1498        }
1499    }
1500
1501    // Set the final key
1502    if let Some(last_key) = keys.last() {
1503        if let Some(k) = last_key.as_str() {
1504            current[k] = value;
1505        } else if let Some(idx) = last_key.as_u64() {
1506            if let Some(arr) = current.as_array_mut() {
1507                if (idx as usize) < arr.len() {
1508                    arr[idx as usize] = value;
1509                }
1510            }
1511        }
1512    }
1513}
1514
1515/// Apply an array append operation (kind:2) to a JSON value at the given key path.
1516fn apply_append(
1517    root: &mut serde_json::Value,
1518    keys: &[serde_json::Value],
1519    items: serde_json::Value,
1520) {
1521    if keys.is_empty() {
1522        return;
1523    }
1524
1525    // Navigate to the target array
1526    let mut current = root;
1527    for key in keys {
1528        if let Some(k) = key.as_str() {
1529            if !current.get(k).is_some() {
1530                current[k] = serde_json::json!([]);
1531            }
1532            current = &mut current[k];
1533        } else if let Some(idx) = key.as_u64() {
1534            if let Some(arr) = current.as_array_mut() {
1535                if (idx as usize) < arr.len() {
1536                    current = &mut arr[idx as usize];
1537                } else {
1538                    return;
1539                }
1540            } else {
1541                return;
1542            }
1543        }
1544    }
1545
1546    // Append items to the target array
1547    if let (Some(target_arr), Some(new_items)) = (current.as_array_mut(), items.as_array()) {
1548        target_arr.extend(new_items.iter().cloned());
1549    }
1550}
1551
1552/// Ensure a JSONL `kind:0` snapshot's `v` object has all fields required by
1553/// VS Code's latest session format (1.109.0+ / version 3). Missing fields are
1554/// injected with sensible defaults so sessions load reliably after recovery,
1555/// conversion, or compaction.
1556///
1557/// Required fields that VS Code now expects:
1558/// - `version` (u32, default 3)
1559/// - `sessionId` (string, extracted from filename or generated)
1560/// - `responderUsername` (string, default "GitHub Copilot")
1561/// - `hasPendingEdits` (bool, default false)
1562/// - `pendingRequests` (array, default [])
1563/// - `inputState` (object with mode, attachments, etc.)
1564pub fn ensure_vscode_compat_fields(state: &mut serde_json::Value, session_id: Option<&str>) {
1565    if let Some(obj) = state.as_object_mut() {
1566        // version
1567        if !obj.contains_key("version") {
1568            obj.insert("version".to_string(), serde_json::json!(3));
1569        }
1570
1571        // sessionId — use provided ID, or try to read from existing field
1572        if !obj.contains_key("sessionId") {
1573            if let Some(id) = session_id {
1574                obj.insert("sessionId".to_string(), serde_json::json!(id));
1575            }
1576        }
1577
1578        // responderUsername
1579        if !obj.contains_key("responderUsername") {
1580            obj.insert(
1581                "responderUsername".to_string(),
1582                serde_json::json!("GitHub Copilot"),
1583            );
1584        }
1585
1586        // hasPendingEdits — always false for recovered/compacted sessions
1587        if !obj.contains_key("hasPendingEdits") {
1588            obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
1589        }
1590
1591        // pendingRequests — always empty for recovered/compacted sessions
1592        if !obj.contains_key("pendingRequests") {
1593            obj.insert("pendingRequests".to_string(), serde_json::json!([]));
1594        }
1595
1596        // inputState — VS Code expects this to exist with at least mode + attachments
1597        if !obj.contains_key("inputState") {
1598            obj.insert(
1599                "inputState".to_string(),
1600                serde_json::json!({
1601                    "attachments": [],
1602                    "mode": { "id": "agent", "kind": "agent" },
1603                    "inputText": "",
1604                    "selections": [],
1605                    "contrib": { "chatDynamicVariableModel": [] }
1606                }),
1607            );
1608        }
1609    }
1610}
1611
1612/// Repair workspace sessions: compact large JSONL files and fix the index.
1613/// Returns (compacted_count, index_fixed_count).
1614pub fn repair_workspace_sessions(
1615    workspace_id: &str,
1616    chat_sessions_dir: &Path,
1617    force: bool,
1618) -> Result<(usize, usize)> {
1619    let db_path = get_workspace_storage_db(workspace_id)?;
1620
1621    if !db_path.exists() {
1622        return Err(CsmError::WorkspaceNotFound(format!(
1623            "Database not found: {}",
1624            db_path.display()
1625        )));
1626    }
1627
1628    if !force && is_vscode_running() {
1629        return Err(CsmError::VSCodeRunning);
1630    }
1631
1632    let mut compacted = 0;
1633    let mut fields_fixed = 0;
1634
1635    if chat_sessions_dir.exists() {
1636        // Pass 1: Compact large JSONL files and fix missing fields
1637        for entry in std::fs::read_dir(chat_sessions_dir)? {
1638            let entry = entry?;
1639            let path = entry.path();
1640            if path.extension().is_some_and(|e| e == "jsonl") {
1641                let metadata = std::fs::metadata(&path)?;
1642                let size_mb = metadata.len() / (1024 * 1024);
1643
1644                let content = std::fs::read_to_string(&path)
1645                    .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
1646                let line_count = content.lines().count();
1647
1648                if line_count > 1 {
1649                    // Compact multi-line JSONL (has operations to replay)
1650                    let stem = path
1651                        .file_stem()
1652                        .map(|s| s.to_string_lossy().to_string())
1653                        .unwrap_or_default();
1654                    println!(
1655                        "   Compacting {} ({} lines, {}MB)...",
1656                        stem, line_count, size_mb
1657                    );
1658
1659                    match compact_session_jsonl(&path) {
1660                        Ok(backup_path) => {
1661                            let new_size = std::fs::metadata(&path)
1662                                .map(|m| m.len() / (1024 * 1024))
1663                                .unwrap_or(0);
1664                            println!(
1665                                "   [OK] Compacted: {}MB -> {}MB (backup: {})",
1666                                size_mb,
1667                                new_size,
1668                                backup_path
1669                                    .file_name()
1670                                    .unwrap_or_default()
1671                                    .to_string_lossy()
1672                            );
1673                            compacted += 1;
1674                        }
1675                        Err(e) => {
1676                            println!("   [WARN] Failed to compact {}: {}", stem, e);
1677                        }
1678                    }
1679                } else {
1680                    // Single-line JSONL — check for missing VS Code fields
1681                    if let Some(first_line) = content.lines().next() {
1682                        if let Ok(mut obj) = serde_json::from_str::<serde_json::Value>(first_line) {
1683                            let is_kind_0 = obj
1684                                .get("kind")
1685                                .and_then(|k| k.as_u64())
1686                                .map(|k| k == 0)
1687                                .unwrap_or(false);
1688
1689                            if is_kind_0 {
1690                                if let Some(v) = obj.get("v") {
1691                                    let missing = !v.get("hasPendingEdits").is_some()
1692                                        || !v.get("pendingRequests").is_some()
1693                                        || !v.get("inputState").is_some()
1694                                        || !v.get("sessionId").is_some();
1695
1696                                    if missing {
1697                                        let session_id = path
1698                                            .file_stem()
1699                                            .and_then(|s| s.to_str())
1700                                            .map(|s| s.to_string());
1701                                        if let Some(v_mut) = obj.get_mut("v") {
1702                                            ensure_vscode_compat_fields(
1703                                                v_mut,
1704                                                session_id.as_deref(),
1705                                            );
1706                                        }
1707                                        let patched = serde_json::to_string(&obj).map_err(|e| {
1708                                            CsmError::InvalidSessionFormat(format!(
1709                                                "Failed to serialize: {}",
1710                                                e
1711                                            ))
1712                                        })?;
1713                                        std::fs::write(&path, &patched)?;
1714                                        let stem = path
1715                                            .file_stem()
1716                                            .map(|s| s.to_string_lossy().to_string())
1717                                            .unwrap_or_default();
1718                                        println!("   [OK] Fixed missing VS Code fields: {}", stem);
1719                                        fields_fixed += 1;
1720                                    }
1721                                }
1722                            }
1723                        }
1724                    }
1725                }
1726            }
1727        }
1728    }
1729
1730    // Pass 2: Rebuild the index with correct metadata
1731    let (index_fixed, _) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
1732
1733    if fields_fixed > 0 {
1734        println!(
1735            "   [OK] Injected missing VS Code fields into {} session(s)",
1736            fields_fixed
1737        );
1738    }
1739
1740    Ok((compacted, index_fixed))
1741}