Skip to main content

chasm/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{
7    ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry, ChatSessionTiming,
8};
9use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
10use once_cell::sync::Lazy;
11use regex::Regex;
12use rusqlite::Connection;
13use std::collections::HashSet;
14use std::path::{Path, PathBuf};
15use sysinfo::System;
16
17/// A single issue detected during workspace session diagnostics
18#[derive(Debug, Clone)]
19pub struct SessionIssue {
20    /// The session file stem (UUID)
21    pub session_id: String,
22    /// Category of issue
23    pub kind: SessionIssueKind,
24    /// Human-readable description
25    pub detail: String,
26}
27
28/// Categories of session issues that can be detected and auto-fixed
29#[derive(Debug, Clone, PartialEq)]
30pub enum SessionIssueKind {
31    /// JSONL file has multiple lines (operations not compacted)
32    MultiLineJsonl,
33    /// JSONL first line contains concatenated JSON objects (missing newlines)
34    ConcatenatedJsonl,
35    /// Index entry has lastResponseState = 2 (Cancelled), blocks VS Code loading
36    CancelledState,
37    /// Last request's modelState.value is 2 (Cancelled) or missing in file content
38    CancelledModelState,
39    /// File exists on disk but is not in the VS Code index
40    OrphanedSession,
41    /// Index entry references a file that no longer exists on disk
42    StaleIndexEntry,
43    /// Session is missing required VS Code compat fields
44    MissingCompatFields,
45    /// Both .json and .jsonl exist for the same session ID
46    DuplicateFormat,
47    /// Legacy .json file is corrupted — contains only structural chars ({}, whitespace)
48    SkeletonJson,
49}
50
51impl std::fmt::Display for SessionIssueKind {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        match self {
54            SessionIssueKind::MultiLineJsonl => write!(f, "multi-line JSONL"),
55            SessionIssueKind::ConcatenatedJsonl => write!(f, "concatenated JSONL"),
56            SessionIssueKind::CancelledState => write!(f, "cancelled state"),
57            SessionIssueKind::CancelledModelState => write!(f, "cancelled modelState in file"),
58            SessionIssueKind::OrphanedSession => write!(f, "orphaned session"),
59            SessionIssueKind::StaleIndexEntry => write!(f, "stale index entry"),
60            SessionIssueKind::MissingCompatFields => write!(f, "missing compat fields"),
61            SessionIssueKind::DuplicateFormat => write!(f, "duplicate .json/.jsonl"),
62            SessionIssueKind::SkeletonJson => write!(f, "skeleton .json (corrupt)"),
63        }
64    }
65}
66
67/// Summary of issues found in a single workspace
68#[derive(Debug, Clone, Default)]
69pub struct WorkspaceDiagnosis {
70    /// Project path (if known)
71    pub project_path: Option<String>,
72    /// Workspace hash
73    pub workspace_hash: String,
74    /// Total sessions on disk
75    pub sessions_on_disk: usize,
76    /// Total sessions in index
77    pub sessions_in_index: usize,
78    /// All detected issues
79    pub issues: Vec<SessionIssue>,
80}
81
82impl WorkspaceDiagnosis {
83    pub fn is_healthy(&self) -> bool {
84        self.issues.is_empty()
85    }
86
87    pub fn issue_count_by_kind(&self, kind: &SessionIssueKind) -> usize {
88        self.issues.iter().filter(|i| &i.kind == kind).count()
89    }
90}
91
92/// Diagnose a workspace for session issues without modifying anything.
93/// Returns a structured report of all detected problems.
94pub fn diagnose_workspace_sessions(
95    workspace_id: &str,
96    chat_sessions_dir: &Path,
97) -> Result<WorkspaceDiagnosis> {
98    let mut diagnosis = WorkspaceDiagnosis {
99        workspace_hash: workspace_id.to_string(),
100        ..Default::default()
101    };
102
103    if !chat_sessions_dir.exists() {
104        return Ok(diagnosis);
105    }
106
107    // Collect session files on disk
108    let mut jsonl_sessions: HashSet<String> = HashSet::new();
109    let mut json_sessions: HashSet<String> = HashSet::new();
110    let mut all_session_ids: HashSet<String> = HashSet::new();
111
112    for entry in std::fs::read_dir(chat_sessions_dir)? {
113        let entry = entry?;
114        let path = entry.path();
115        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
116        let stem = path
117            .file_stem()
118            .map(|s| s.to_string_lossy().to_string())
119            .unwrap_or_default();
120
121        match ext {
122            "jsonl" => {
123                jsonl_sessions.insert(stem.clone());
124                all_session_ids.insert(stem);
125            }
126            "json" if !path.to_string_lossy().ends_with(".bak") => {
127                json_sessions.insert(stem.clone());
128                all_session_ids.insert(stem);
129            }
130            _ => {}
131        }
132    }
133    diagnosis.sessions_on_disk = all_session_ids.len();
134
135    // Check for duplicate .json/.jsonl files
136    for id in &jsonl_sessions {
137        if json_sessions.contains(id) {
138            diagnosis.issues.push(SessionIssue {
139                session_id: id.clone(),
140                kind: SessionIssueKind::DuplicateFormat,
141                detail: format!("Both {id}.json and {id}.jsonl exist"),
142            });
143        }
144    }
145
146    // Check JSONL files for content issues
147    for id in &jsonl_sessions {
148        let path = chat_sessions_dir.join(format!("{id}.jsonl"));
149        if let Ok(content) = std::fs::read_to_string(&path) {
150            let line_count = content.lines().count();
151
152            if line_count > 1 {
153                let size_mb = content.len() / (1024 * 1024);
154                diagnosis.issues.push(SessionIssue {
155                    session_id: id.clone(),
156                    kind: SessionIssueKind::MultiLineJsonl,
157                    detail: format!("{line_count} lines, ~{size_mb} MB — needs compaction"),
158                });
159            }
160
161            // Check first line for concatenation
162            if let Some(first_line) = content.lines().next() {
163                if first_line.contains("}{\"kind\":") {
164                    diagnosis.issues.push(SessionIssue {
165                        session_id: id.clone(),
166                        kind: SessionIssueKind::ConcatenatedJsonl,
167                        detail: "First line has concatenated JSON objects".to_string(),
168                    });
169                }
170            }
171
172            // Check for missing compat fields (only single-line files worth checking)
173            if line_count == 1 {
174                if let Some(first_line) = content.lines().next() {
175                    if let Ok(obj) = serde_json::from_str::<serde_json::Value>(first_line) {
176                        let is_kind_0 = obj
177                            .get("kind")
178                            .and_then(|k| k.as_u64())
179                            .map(|k| k == 0)
180                            .unwrap_or(false);
181
182                        if is_kind_0 {
183                            if let Some(v) = obj.get("v") {
184                                let missing_fields: Vec<&str> = [
185                                    "hasPendingEdits",
186                                    "pendingRequests",
187                                    "inputState",
188                                    "sessionId",
189                                    "version",
190                                ]
191                                .iter()
192                                .filter(|f| v.get(**f).is_none())
193                                .copied()
194                                .collect();
195
196                                if !missing_fields.is_empty() {
197                                    diagnosis.issues.push(SessionIssue {
198                                        session_id: id.clone(),
199                                        kind: SessionIssueKind::MissingCompatFields,
200                                        detail: format!("Missing: {}", missing_fields.join(", ")),
201                                    });
202                                }
203
204                                // Check for cancelled modelState in file content
205                                if let Some(requests) = v.get("requests").and_then(|r| r.as_array())
206                                {
207                                    if let Some(last_req) = requests.last() {
208                                        let model_state_value = last_req
209                                            .get("modelState")
210                                            .and_then(|ms| ms.get("value"))
211                                            .and_then(|v| v.as_u64());
212                                        match model_state_value {
213                                            Some(2) => {
214                                                diagnosis.issues.push(SessionIssue {
215                                                    session_id: id.clone(),
216                                                    kind: SessionIssueKind::CancelledModelState,
217                                                    detail: "Last request modelState.value=2 (Cancelled) in file content".to_string(),
218                                                });
219                                            }
220                                            None => {
221                                                diagnosis.issues.push(SessionIssue {
222                                                    session_id: id.clone(),
223                                                    kind: SessionIssueKind::CancelledModelState,
224                                                    detail: "Last request missing modelState in file content".to_string(),
225                                                });
226                                            }
227                                            _ => {} // Valid state
228                                        }
229                                    }
230                                }
231                            }
232                        }
233                    }
234                }
235            }
236        }
237    }
238
239    // Check .json files for skeleton corruption
240    for id in &json_sessions {
241        // Skip if a .jsonl already exists (it takes precedence)
242        if jsonl_sessions.contains(id) {
243            continue;
244        }
245        let path = chat_sessions_dir.join(format!("{id}.json"));
246        if let Ok(content) = std::fs::read_to_string(&path) {
247            if is_skeleton_json(&content) {
248                diagnosis.issues.push(SessionIssue {
249                    session_id: id.clone(),
250                    kind: SessionIssueKind::SkeletonJson,
251                    detail: format!(
252                        "Legacy .json is corrupt — only structural chars remain ({} bytes)",
253                        content.len()
254                    ),
255                });
256            }
257        }
258    }
259
260    // Check index for stale entries, orphans, and cancelled state
261    let db_path = get_workspace_storage_db(workspace_id)?;
262    if db_path.exists() {
263        if let Ok(index) = read_chat_session_index(&db_path) {
264            diagnosis.sessions_in_index = index.entries.len();
265
266            // Stale index entries (in index but no file on disk)
267            for (id, _entry) in &index.entries {
268                if !all_session_ids.contains(id) {
269                    diagnosis.issues.push(SessionIssue {
270                        session_id: id.clone(),
271                        kind: SessionIssueKind::StaleIndexEntry,
272                        detail: "In index but no file on disk".to_string(),
273                    });
274                }
275            }
276
277            // Cancelled state entries
278            for (id, entry) in &index.entries {
279                if entry.last_response_state == 2 {
280                    diagnosis.issues.push(SessionIssue {
281                        session_id: id.clone(),
282                        kind: SessionIssueKind::CancelledState,
283                        detail: "lastResponseState=2 (Cancelled) — blocks VS Code loading"
284                            .to_string(),
285                    });
286                }
287            }
288
289            // Orphaned sessions (on disk but not in index)
290            let indexed_ids: HashSet<&String> = index.entries.keys().collect();
291            for id in &all_session_ids {
292                if !indexed_ids.contains(id) {
293                    diagnosis.issues.push(SessionIssue {
294                        session_id: id.clone(),
295                        kind: SessionIssueKind::OrphanedSession,
296                        detail: "File on disk but not in VS Code index".to_string(),
297                    });
298                }
299            }
300        }
301    }
302
303    Ok(diagnosis)
304}
305
306/// Regex to match any Unicode escape sequence (valid or not)
307static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
308
309/// VS Code session format version - helps identify which parsing strategy to use
310#[derive(Debug, Clone, Copy, PartialEq, Eq)]
311pub enum VsCodeSessionFormat {
312    /// Legacy JSON format (VS Code < 1.109.0)
313    /// Single JSON object with ChatSession structure
314    LegacyJson,
315    /// JSONL format (VS Code >= 1.109.0, January 2026+)
316    /// JSON Lines with event sourcing: kind 0 (initial), kind 1 (delta), kind 2 (replace/splice)
317    JsonLines,
318}
319
320/// Session schema version - tracks the internal structure version
321#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
322pub enum SessionSchemaVersion {
323    /// Version 1 - Original format (basic fields)
324    V1 = 1,
325    /// Version 2 - Added more metadata fields
326    V2 = 2,
327    /// Version 3 - Current format with full request/response structure
328    V3 = 3,
329    /// Unknown version
330    Unknown = 0,
331}
332
333impl SessionSchemaVersion {
334    /// Create from version number
335    pub fn from_version(v: u32) -> Self {
336        match v {
337            1 => Self::V1,
338            2 => Self::V2,
339            3 => Self::V3,
340            _ => Self::Unknown,
341        }
342    }
343
344    /// Get version number
345    pub fn version_number(&self) -> u32 {
346        match self {
347            Self::V1 => 1,
348            Self::V2 => 2,
349            Self::V3 => 3,
350            Self::Unknown => 0,
351        }
352    }
353
354    /// Get description
355    pub fn description(&self) -> &'static str {
356        match self {
357            Self::V1 => "v1 (basic)",
358            Self::V2 => "v2 (extended metadata)",
359            Self::V3 => "v3 (full structure)",
360            Self::Unknown => "unknown",
361        }
362    }
363}
364
365impl std::fmt::Display for SessionSchemaVersion {
366    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367        write!(f, "{}", self.description())
368    }
369}
370
371/// Result of session format detection
372#[derive(Debug, Clone)]
373pub struct SessionFormatInfo {
374    /// File format (JSON or JSONL)
375    pub format: VsCodeSessionFormat,
376    /// Schema version detected from content
377    pub schema_version: SessionSchemaVersion,
378    /// Confidence level (0.0 - 1.0)
379    pub confidence: f32,
380    /// Detection method used
381    pub detection_method: &'static str,
382}
383
384impl VsCodeSessionFormat {
385    /// Detect format from file path (by extension)
386    pub fn from_path(path: &Path) -> Self {
387        match path.extension().and_then(|e| e.to_str()) {
388            Some("jsonl") => Self::JsonLines,
389            _ => Self::LegacyJson,
390        }
391    }
392
393    /// Detect format from content by analyzing structure
394    pub fn from_content(content: &str) -> Self {
395        let trimmed = content.trim();
396
397        // JSONL: Multiple lines starting with { or first line has {"kind":
398        if trimmed.starts_with("{\"kind\":") || trimmed.starts_with("{ \"kind\":") {
399            return Self::JsonLines;
400        }
401
402        // Count lines that look like JSON objects
403        let mut json_object_lines = 0;
404        let mut total_non_empty_lines = 0;
405
406        for line in trimmed.lines().take(10) {
407            let line = line.trim();
408            if line.is_empty() {
409                continue;
410            }
411            total_non_empty_lines += 1;
412
413            // Check if line is a JSON object with "kind" field (JSONL marker)
414            if line.starts_with('{') && line.contains("\"kind\"") {
415                json_object_lines += 1;
416            }
417        }
418
419        // If multiple lines look like JSONL entries, it's JSONL
420        if json_object_lines >= 2
421            || (json_object_lines == 1 && total_non_empty_lines == 1 && trimmed.contains("\n{"))
422        {
423            return Self::JsonLines;
424        }
425
426        // Check if it's a single JSON object (legacy format)
427        if trimmed.starts_with('{') && trimmed.ends_with('}') {
428            // Look for ChatSession structure markers
429            if trimmed.contains("\"sessionId\"")
430                || trimmed.contains("\"creationDate\"")
431                || trimmed.contains("\"requests\"")
432            {
433                return Self::LegacyJson;
434            }
435        }
436
437        // Default to legacy JSON if unclear
438        Self::LegacyJson
439    }
440
441    /// Get minimum VS Code version that uses this format
442    pub fn min_vscode_version(&self) -> &'static str {
443        match self {
444            Self::LegacyJson => "1.0.0",
445            Self::JsonLines => "1.109.0",
446        }
447    }
448
449    /// Get human-readable format description
450    pub fn description(&self) -> &'static str {
451        match self {
452            Self::LegacyJson => "Legacy JSON (single object)",
453            Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
454        }
455    }
456
457    /// Get short format name
458    pub fn short_name(&self) -> &'static str {
459        match self {
460            Self::LegacyJson => "json",
461            Self::JsonLines => "jsonl",
462        }
463    }
464}
465
466impl std::fmt::Display for VsCodeSessionFormat {
467    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
468        write!(f, "{}", self.description())
469    }
470}
471
472/// Sanitize JSON content by replacing lone surrogates with replacement character.
473/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
474fn sanitize_json_unicode(content: &str) -> String {
475    // Process all \uXXXX sequences and fix lone surrogates
476    let mut result = String::with_capacity(content.len());
477    let mut last_end = 0;
478
479    // Collect all matches first to avoid borrowing issues
480    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
481
482    for (i, mat) in matches.iter().enumerate() {
483        let start = mat.start();
484        let end = mat.end();
485
486        // Add content before this match
487        result.push_str(&content[last_end..start]);
488
489        // Parse the hex value from the match itself (always ASCII \uXXXX)
490        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
491        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
492            // Check if it's a high surrogate (D800-DBFF)
493            if (0xD800..=0xDBFF).contains(&code_point) {
494                // Check if next match is immediately following and is a low surrogate
495                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
496                    // Must be immediately adjacent (no gap)
497                    if next_mat.start() == end {
498                        let next_hex = &next_mat.as_str()[2..];
499                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
500                            (0xDC00..=0xDFFF).contains(&next_cp)
501                        } else {
502                            false
503                        }
504                    } else {
505                        false
506                    }
507                } else {
508                    false
509                };
510
511                if is_valid_pair {
512                    // Valid surrogate pair, keep the high surrogate
513                    result.push_str(mat.as_str());
514                } else {
515                    // Lone high surrogate - replace with replacement char
516                    result.push_str("\\uFFFD");
517                }
518            }
519            // Check if it's a low surrogate (DC00-DFFF)
520            else if (0xDC00..=0xDFFF).contains(&code_point) {
521                // Check if previous match was immediately before and was a high surrogate
522                let is_valid_pair = if i > 0 {
523                    if let Some(prev_mat) = matches.get(i - 1) {
524                        // Must be immediately adjacent (no gap)
525                        if prev_mat.end() == start {
526                            let prev_hex = &prev_mat.as_str()[2..];
527                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
528                                (0xD800..=0xDBFF).contains(&prev_cp)
529                            } else {
530                                false
531                            }
532                        } else {
533                            false
534                        }
535                    } else {
536                        false
537                    }
538                } else {
539                    false
540                };
541
542                if is_valid_pair {
543                    // Part of valid surrogate pair, keep it
544                    result.push_str(mat.as_str());
545                } else {
546                    // Lone low surrogate - replace with replacement char
547                    result.push_str("\\uFFFD");
548                }
549            }
550            // Normal code point
551            else {
552                result.push_str(mat.as_str());
553            }
554        } else {
555            // Invalid hex - keep as is
556            result.push_str(mat.as_str());
557        }
558        last_end = end;
559    }
560
561    // Add remaining content
562    result.push_str(&content[last_end..]);
563    result
564}
565
566/// Try to parse JSON, sanitizing invalid Unicode if needed
567pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
568    match serde_json::from_str::<ChatSession>(content) {
569        Ok(session) => Ok(session),
570        Err(e) => {
571            // If parsing fails due to Unicode issue, try sanitizing
572            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
573                let sanitized = sanitize_json_unicode(content);
574                serde_json::from_str::<ChatSession>(&sanitized)
575            } else {
576                Err(e)
577            }
578        }
579    }
580}
581
582/// JSONL entry kinds for VS Code 1.109.0+ session format
583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
584enum JsonlKind {
585    /// Initial session state (kind: 0)
586    Initial = 0,
587    /// Delta update to specific keys (kind: 1)  
588    Delta = 1,
589    /// Array replace/splice operation (kind: 2)
590    /// Optional 'i' field specifies splice index (truncate at i, then extend)
591    ArraySplice = 2,
592}
593
594/// Parse a JSONL (JSON Lines) session file (VS Code 1.109.0+ format)
595/// Each line is a JSON object with 'kind' field indicating the type:
596/// - kind 0: Initial session metadata with 'v' containing ChatSession-like structure
597/// - kind 1: Delta update with 'k' (keys path) and 'v' (value)
598/// - kind 2: Array replace/splice with 'k' (path), 'v' (items), optional 'i' (splice index)
599pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
600    // Pre-process: split concatenated JSON objects that lack newline separators
601    let content = split_concatenated_jsonl(content);
602
603    let mut session = ChatSession {
604        version: 3,
605        session_id: None,
606        creation_date: 0,
607        last_message_date: 0,
608        is_imported: false,
609        initial_location: "panel".to_string(),
610        custom_title: None,
611        requester_username: None,
612        requester_avatar_icon_uri: None,
613        responder_username: None,
614        responder_avatar_icon_uri: None,
615        requests: Vec::new(),
616    };
617
618    for line in content.lines() {
619        let line = line.trim();
620        if line.is_empty() {
621            continue;
622        }
623
624        // Parse each line as a JSON object
625        let entry: serde_json::Value = match serde_json::from_str(line) {
626            Ok(v) => v,
627            Err(_) => {
628                // Try sanitizing Unicode
629                let sanitized = sanitize_json_unicode(line);
630                serde_json::from_str(&sanitized)?
631            }
632        };
633
634        let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
635
636        match kind {
637            0 => {
638                // Initial state - 'v' contains the session metadata
639                if let Some(v) = entry.get("v") {
640                    // Parse version
641                    if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
642                        session.version = version as u32;
643                    }
644                    // Parse session ID
645                    if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
646                        session.session_id = Some(sid.to_string());
647                    }
648                    // Parse creation date
649                    if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
650                        session.creation_date = cd;
651                    }
652                    // Parse initial location
653                    if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
654                        session.initial_location = loc.to_string();
655                    }
656                    // Parse responder username
657                    if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
658                        session.responder_username = Some(ru.to_string());
659                    }
660                    // Parse custom title
661                    if let Some(title) = v.get("customTitle").and_then(|x| x.as_str()) {
662                        session.custom_title = Some(title.to_string());
663                    }
664                    // Parse hasPendingEdits as imported marker
665                    if let Some(imported) = v.get("isImported").and_then(|x| x.as_bool()) {
666                        session.is_imported = imported;
667                    }
668                    // Parse requests array if present
669                    if let Some(requests) = v.get("requests") {
670                        if let Ok(reqs) =
671                            serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
672                        {
673                            session.requests = reqs;
674                            // Compute last_message_date from the latest request timestamp
675                            if let Some(latest_ts) =
676                                session.requests.iter().filter_map(|r| r.timestamp).max()
677                            {
678                                session.last_message_date = latest_ts;
679                            }
680                        }
681                    }
682                    // Fall back to creationDate if no request timestamps found
683                    if session.last_message_date == 0 {
684                        session.last_message_date = session.creation_date;
685                    }
686                }
687            }
688            1 => {
689                // Delta update - 'k' is array of key path, 'v' is the value
690                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
691                    if let Some(keys_arr) = keys.as_array() {
692                        // Handle top-level session keys
693                        if keys_arr.len() == 1 {
694                            if let Some(key) = keys_arr[0].as_str() {
695                                match key {
696                                    "customTitle" => {
697                                        if let Some(title) = value.as_str() {
698                                            session.custom_title = Some(title.to_string());
699                                        }
700                                    }
701                                    "lastMessageDate" => {
702                                        if let Some(date) = value.as_i64() {
703                                            session.last_message_date = date;
704                                        }
705                                    }
706                                    "hasPendingEdits" | "isImported" => {
707                                        // Session-level boolean updates, safe to ignore for now
708                                    }
709                                    _ => {} // Ignore unknown keys
710                                }
711                            }
712                        }
713                        // Handle nested request field updates: ["requests", idx, field]
714                        else if keys_arr.len() == 3 {
715                            if let (Some("requests"), Some(idx), Some(field)) = (
716                                keys_arr[0].as_str(),
717                                keys_arr[1].as_u64().map(|i| i as usize),
718                                keys_arr[2].as_str(),
719                            ) {
720                                if idx < session.requests.len() {
721                                    match field {
722                                        "response" => {
723                                            session.requests[idx].response = Some(value.clone());
724                                        }
725                                        "result" => {
726                                            session.requests[idx].result = Some(value.clone());
727                                        }
728                                        "followups" => {
729                                            session.requests[idx].followups =
730                                                serde_json::from_value(value.clone()).ok();
731                                        }
732                                        "isCanceled" => {
733                                            session.requests[idx].is_canceled = value.as_bool();
734                                        }
735                                        "contentReferences" => {
736                                            session.requests[idx].content_references =
737                                                serde_json::from_value(value.clone()).ok();
738                                        }
739                                        "codeCitations" => {
740                                            session.requests[idx].code_citations =
741                                                serde_json::from_value(value.clone()).ok();
742                                        }
743                                        "modelState" | "modelId" | "agent" | "variableData" => {
744                                            // Known request fields - update as generic Value
745                                            // modelState tracks the request lifecycle
746                                        }
747                                        _ => {} // Ignore unknown request fields
748                                    }
749                                }
750                            }
751                        }
752                    }
753                }
754            }
755            2 => {
756                // Array replace/splice operation - 'k' is the key path, 'v' is the new array contents
757                // Optional 'i' field is the splice index: replace from index i onward
758                // Without 'i', this is a full replacement of the array at the key path
759                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
760                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
761                    if let Some(keys_arr) = keys.as_array() {
762                        // Top-level requests: k=["requests"], v=[requests_array]
763                        if keys_arr.len() == 1 {
764                            if let Some("requests") = keys_arr[0].as_str() {
765                                if let Some(items) = value.as_array() {
766                                    if let Some(idx) = splice_index {
767                                        // Splice: truncate at index i, then extend with new items
768                                        session.requests.truncate(idx);
769                                    } else {
770                                        // Full replacement: clear existing requests
771                                        session.requests.clear();
772                                    }
773                                    for item in items {
774                                        if let Ok(req) =
775                                            serde_json::from_value::<ChatRequest>(item.clone())
776                                        {
777                                            session.requests.push(req);
778                                        }
779                                    }
780                                    // Update last message date from latest request
781                                    if let Some(last_req) = session.requests.last() {
782                                        if let Some(ts) = last_req.timestamp {
783                                            session.last_message_date = ts;
784                                        }
785                                    }
786                                }
787                            }
788                        }
789                        // Nested array replace/splice: k=["requests", idx, "response"], v=[parts]
790                        else if keys_arr.len() == 3 {
791                            if let (Some("requests"), Some(req_idx), Some(field)) = (
792                                keys_arr[0].as_str(),
793                                keys_arr[1].as_u64().map(|i| i as usize),
794                                keys_arr[2].as_str(),
795                            ) {
796                                if req_idx < session.requests.len() {
797                                    match field {
798                                        "response" => {
799                                            // Response is stored as a JSON Value (array)
800                                            if let Some(idx) = splice_index {
801                                                // Splice: keep items before index i, replace rest
802                                                if let Some(existing) =
803                                                    session.requests[req_idx].response.as_ref()
804                                                {
805                                                    if let Some(existing_arr) = existing.as_array()
806                                                    {
807                                                        let mut new_arr: Vec<serde_json::Value> =
808                                                            existing_arr
809                                                                [..idx.min(existing_arr.len())]
810                                                                .to_vec();
811                                                        if let Some(new_items) = value.as_array() {
812                                                            new_arr
813                                                                .extend(new_items.iter().cloned());
814                                                        }
815                                                        session.requests[req_idx].response =
816                                                            Some(serde_json::Value::Array(new_arr));
817                                                    } else {
818                                                        session.requests[req_idx].response =
819                                                            Some(value.clone());
820                                                    }
821                                                } else {
822                                                    session.requests[req_idx].response =
823                                                        Some(value.clone());
824                                                }
825                                            } else {
826                                                // Full replacement
827                                                session.requests[req_idx].response =
828                                                    Some(value.clone());
829                                            }
830                                        }
831                                        "contentReferences" => {
832                                            session.requests[req_idx].content_references =
833                                                serde_json::from_value(value.clone()).ok();
834                                        }
835                                        _ => {} // Ignore unknown fields
836                                    }
837                                }
838                            }
839                        }
840                    }
841                }
842            }
843            _ => {} // Unknown kind, skip
844        }
845    }
846
847    Ok(session)
848}
849
850/// Check if a file extension indicates a session file (.json or .jsonl)
851pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
852    ext == "json" || ext == "jsonl"
853}
854
855/// Detect session format and version from content
856pub fn detect_session_format(content: &str) -> SessionFormatInfo {
857    let format = VsCodeSessionFormat::from_content(content);
858    let trimmed = content.trim();
859
860    // Detect schema version based on format
861    let (schema_version, confidence, method) = match format {
862        VsCodeSessionFormat::JsonLines => {
863            // For JSONL, check the first line's "v" object for version
864            if let Some(first_line) = trimmed.lines().next() {
865                if let Ok(entry) = serde_json::from_str::<serde_json::Value>(first_line) {
866                    if let Some(v) = entry.get("v") {
867                        if let Some(ver) = v.get("version").and_then(|x| x.as_u64()) {
868                            (
869                                SessionSchemaVersion::from_version(ver as u32),
870                                0.95,
871                                "jsonl-version-field",
872                            )
873                        } else {
874                            // No version field, likely v3 (current default)
875                            (SessionSchemaVersion::V3, 0.7, "jsonl-default")
876                        }
877                    } else {
878                        (SessionSchemaVersion::V3, 0.6, "jsonl-no-v-field")
879                    }
880                } else {
881                    (SessionSchemaVersion::Unknown, 0.3, "jsonl-parse-error")
882                }
883            } else {
884                (SessionSchemaVersion::Unknown, 0.2, "jsonl-empty")
885            }
886        }
887        VsCodeSessionFormat::LegacyJson => {
888            // For JSON, directly check the version field
889            if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
890                if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
891                    (
892                        SessionSchemaVersion::from_version(ver as u32),
893                        0.95,
894                        "json-version-field",
895                    )
896                } else {
897                    // Infer from structure
898                    if json.get("requests").is_some() && json.get("sessionId").is_some() {
899                        (SessionSchemaVersion::V3, 0.8, "json-structure-inference")
900                    } else if json.get("messages").is_some() {
901                        (SessionSchemaVersion::V1, 0.7, "json-legacy-structure")
902                    } else {
903                        (SessionSchemaVersion::Unknown, 0.4, "json-unknown-structure")
904                    }
905                }
906            } else {
907                // Try sanitizing and parsing again
908                let sanitized = sanitize_json_unicode(trimmed);
909                if let Ok(json) = serde_json::from_str::<serde_json::Value>(&sanitized) {
910                    if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
911                        (
912                            SessionSchemaVersion::from_version(ver as u32),
913                            0.9,
914                            "json-version-after-sanitize",
915                        )
916                    } else {
917                        (SessionSchemaVersion::V3, 0.6, "json-default-after-sanitize")
918                    }
919                } else {
920                    (SessionSchemaVersion::Unknown, 0.2, "json-parse-error")
921                }
922            }
923        }
924    };
925
926    SessionFormatInfo {
927        format,
928        schema_version,
929        confidence,
930        detection_method: method,
931    }
932}
933
934/// Parse session content with automatic format detection
935pub fn parse_session_auto(
936    content: &str,
937) -> std::result::Result<(ChatSession, SessionFormatInfo), serde_json::Error> {
938    let format_info = detect_session_format(content);
939
940    let session = match format_info.format {
941        VsCodeSessionFormat::JsonLines => parse_session_jsonl(content)?,
942        VsCodeSessionFormat::LegacyJson => parse_session_json(content)?,
943    };
944
945    Ok((session, format_info))
946}
947
948/// Parse a session file, automatically detecting format from content (not just extension)
949pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
950    let content = std::fs::read_to_string(path)
951        .map_err(|e| serde_json::Error::io(std::io::Error::other(e.to_string())))?;
952
953    // Use content-based auto-detection
954    let (session, _format_info) = parse_session_auto(&content)?;
955    Ok(session)
956}
957
958/// Get the path to the workspace storage database
959pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
960    let storage_path = get_workspace_storage_path()?;
961    Ok(storage_path.join(workspace_id).join("state.vscdb"))
962}
963
964/// Read the chat session index from VS Code storage
965pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
966    let conn = Connection::open(db_path)?;
967
968    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
969        "SELECT value FROM ItemTable WHERE key = ?",
970        ["chat.ChatSessionStore.index"],
971        |row| row.get(0),
972    );
973
974    match result {
975        Ok(json_str) => serde_json::from_str(&json_str)
976            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
977        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
978        Err(e) => Err(CsmError::SqliteError(e)),
979    }
980}
981
982/// Write the chat session index to VS Code storage
983pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
984    let conn = Connection::open(db_path)?;
985    let json_str = serde_json::to_string(index)?;
986
987    // Check if the key exists
988    let exists: bool = conn.query_row(
989        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
990        ["chat.ChatSessionStore.index"],
991        |row| row.get(0),
992    )?;
993
994    if exists {
995        conn.execute(
996            "UPDATE ItemTable SET value = ? WHERE key = ?",
997            [&json_str, "chat.ChatSessionStore.index"],
998        )?;
999    } else {
1000        conn.execute(
1001            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
1002            ["chat.ChatSessionStore.index", &json_str],
1003        )?;
1004    }
1005
1006    Ok(())
1007}
1008
1009/// Add a session to the VS Code index
1010pub fn add_session_to_index(
1011    db_path: &Path,
1012    session_id: &str,
1013    title: &str,
1014    last_message_date_ms: i64,
1015    _is_imported: bool,
1016    initial_location: &str,
1017    is_empty: bool,
1018) -> Result<()> {
1019    let mut index = read_chat_session_index(db_path)?;
1020
1021    index.entries.insert(
1022        session_id.to_string(),
1023        ChatSessionIndexEntry {
1024            session_id: session_id.to_string(),
1025            title: title.to_string(),
1026            last_message_date: last_message_date_ms,
1027            timing: Some(ChatSessionTiming {
1028                created: last_message_date_ms,
1029                last_request_started: Some(last_message_date_ms),
1030                last_request_ended: Some(last_message_date_ms),
1031            }),
1032            last_response_state: 1, // ResponseModelState.Complete
1033            initial_location: initial_location.to_string(),
1034            is_empty,
1035            is_imported: Some(_is_imported),
1036            has_pending_edits: Some(false),
1037            is_external: Some(false),
1038        },
1039    );
1040
1041    write_chat_session_index(db_path, &index)
1042}
1043
1044/// Remove a session from the VS Code index
1045#[allow(dead_code)]
1046pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
1047    let mut index = read_chat_session_index(db_path)?;
1048    let removed = index.entries.remove(session_id).is_some();
1049    if removed {
1050        write_chat_session_index(db_path, &index)?;
1051    }
1052    Ok(removed)
1053}
1054
1055/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
1056/// When both .json and .jsonl exist for the same session ID, prefers .jsonl.
1057pub fn sync_session_index(
1058    workspace_id: &str,
1059    chat_sessions_dir: &Path,
1060    force: bool,
1061) -> Result<(usize, usize)> {
1062    let db_path = get_workspace_storage_db(workspace_id)?;
1063
1064    if !db_path.exists() {
1065        return Err(CsmError::WorkspaceNotFound(format!(
1066            "Database not found: {}",
1067            db_path.display()
1068        )));
1069    }
1070
1071    // Check if VS Code is running
1072    if !force && is_vscode_running() {
1073        return Err(CsmError::VSCodeRunning);
1074    }
1075
1076    // Get current index
1077    let mut index = read_chat_session_index(&db_path)?;
1078
1079    // Get session files on disk
1080    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
1081    if chat_sessions_dir.exists() {
1082        for entry in std::fs::read_dir(chat_sessions_dir)? {
1083            let entry = entry?;
1084            let path = entry.path();
1085            if path
1086                .extension()
1087                .map(is_session_file_extension)
1088                .unwrap_or(false)
1089            {
1090                if let Some(stem) = path.file_stem() {
1091                    files_on_disk.insert(stem.to_string_lossy().to_string());
1092                }
1093            }
1094        }
1095    }
1096
1097    // Remove stale entries (in index but not on disk)
1098    let stale_ids: Vec<String> = index
1099        .entries
1100        .keys()
1101        .filter(|id| !files_on_disk.contains(*id))
1102        .cloned()
1103        .collect();
1104
1105    let removed = stale_ids.len();
1106    for id in &stale_ids {
1107        index.entries.remove(id);
1108    }
1109
1110    // Add/update sessions from disk
1111    // Collect files, preferring .jsonl over .json for the same session ID
1112    let mut session_files: std::collections::HashMap<String, PathBuf> =
1113        std::collections::HashMap::new();
1114    for entry in std::fs::read_dir(chat_sessions_dir)? {
1115        let entry = entry?;
1116        let path = entry.path();
1117        if path
1118            .extension()
1119            .map(is_session_file_extension)
1120            .unwrap_or(false)
1121        {
1122            if let Some(stem) = path.file_stem() {
1123                let stem_str = stem.to_string_lossy().to_string();
1124                let is_jsonl = path.extension().is_some_and(|e| e == "jsonl");
1125                // Insert if no entry yet, or if this is .jsonl (preferred over .json)
1126                if !session_files.contains_key(&stem_str) || is_jsonl {
1127                    session_files.insert(stem_str, path);
1128                }
1129            }
1130        }
1131    }
1132
1133    let mut added = 0;
1134    for (_, path) in &session_files {
1135        if let Ok(session) = parse_session_file(path) {
1136            let session_id = session.session_id.clone().unwrap_or_else(|| {
1137                path.file_stem()
1138                    .map(|s| s.to_string_lossy().to_string())
1139                    .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1140            });
1141
1142            let title = session.title();
1143            let is_empty = session.is_empty();
1144            let last_message_date = session.last_message_date;
1145            let initial_location = session.initial_location.clone();
1146
1147            index.entries.insert(
1148                session_id.clone(),
1149                ChatSessionIndexEntry {
1150                    session_id,
1151                    title,
1152                    last_message_date,
1153                    timing: Some(ChatSessionTiming {
1154                        created: session.creation_date,
1155                        last_request_started: Some(last_message_date),
1156                        last_request_ended: Some(last_message_date),
1157                    }),
1158                    last_response_state: 1, // ResponseModelState.Complete
1159                    initial_location,
1160                    is_empty,
1161                    is_imported: Some(false),
1162                    has_pending_edits: Some(false),
1163                    is_external: Some(false),
1164                },
1165            );
1166            added += 1;
1167        }
1168    }
1169
1170    // Write the synced index
1171    write_chat_session_index(&db_path, &index)?;
1172
1173    Ok((added, removed))
1174}
1175
1176/// Register all sessions from a directory into the VS Code index
1177pub fn register_all_sessions_from_directory(
1178    workspace_id: &str,
1179    chat_sessions_dir: &Path,
1180    force: bool,
1181) -> Result<usize> {
1182    let db_path = get_workspace_storage_db(workspace_id)?;
1183
1184    if !db_path.exists() {
1185        return Err(CsmError::WorkspaceNotFound(format!(
1186            "Database not found: {}",
1187            db_path.display()
1188        )));
1189    }
1190
1191    // Check if VS Code is running
1192    if !force && is_vscode_running() {
1193        return Err(CsmError::VSCodeRunning);
1194    }
1195
1196    // Use sync to ensure index matches disk
1197    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
1198
1199    // Print individual session info
1200    for entry in std::fs::read_dir(chat_sessions_dir)? {
1201        let entry = entry?;
1202        let path = entry.path();
1203
1204        if path
1205            .extension()
1206            .map(is_session_file_extension)
1207            .unwrap_or(false)
1208        {
1209            if let Ok(session) = parse_session_file(&path) {
1210                let session_id = session.session_id.clone().unwrap_or_else(|| {
1211                    path.file_stem()
1212                        .map(|s| s.to_string_lossy().to_string())
1213                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1214                });
1215
1216                let title = session.title();
1217
1218                println!(
1219                    "[OK] Registered: {} ({}...)",
1220                    title,
1221                    &session_id[..12.min(session_id.len())]
1222                );
1223            }
1224        }
1225    }
1226
1227    if removed > 0 {
1228        println!("[OK] Removed {} stale index entries", removed);
1229    }
1230
1231    Ok(added)
1232}
1233
1234/// Check if VS Code is currently running
1235pub fn is_vscode_running() -> bool {
1236    let mut sys = System::new();
1237    sys.refresh_processes();
1238
1239    for process in sys.processes().values() {
1240        let name = process.name().to_lowercase();
1241        if name.contains("code") && !name.contains("codec") {
1242            return true;
1243        }
1244    }
1245
1246    false
1247}
1248
1249/// Close VS Code gracefully and wait for it to exit.
1250/// Returns the list of workspace folders that were open (for reopening).
1251pub fn close_vscode_and_wait(timeout_secs: u64) -> Result<()> {
1252    use sysinfo::{ProcessRefreshKind, RefreshKind, Signal};
1253
1254    if !is_vscode_running() {
1255        return Ok(());
1256    }
1257
1258    // Send SIGTERM (graceful close) to all Code processes
1259    let mut sys = System::new_with_specifics(
1260        RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1261    );
1262    sys.refresh_processes();
1263
1264    let mut signaled = 0u32;
1265    for (pid, process) in sys.processes() {
1266        let name = process.name().to_lowercase();
1267        if name.contains("code") && !name.contains("codec") {
1268            // On Windows, kill() sends TerminateProcess; there's no graceful
1269            // SIGTERM equivalent via sysinfo. But the main electron process
1270            // handles WM_CLOSE. We use the `taskkill` approach on Windows for
1271            // a graceful close.
1272            #[cfg(windows)]
1273            {
1274                let _ = std::process::Command::new("taskkill")
1275                    .args(["/PID", &pid.as_u32().to_string()])
1276                    .stdout(std::process::Stdio::null())
1277                    .stderr(std::process::Stdio::null())
1278                    .status();
1279                signaled += 1;
1280            }
1281            #[cfg(not(windows))]
1282            {
1283                if process.kill_with(Signal::Term).unwrap_or(false) {
1284                    signaled += 1;
1285                }
1286            }
1287        }
1288    }
1289
1290    if signaled == 0 {
1291        return Ok(());
1292    }
1293
1294    // Wait for all Code processes to exit
1295    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
1296    loop {
1297        std::thread::sleep(std::time::Duration::from_millis(500));
1298        if !is_vscode_running() {
1299            // Extra wait for file locks to release
1300            std::thread::sleep(std::time::Duration::from_secs(1));
1301            return Ok(());
1302        }
1303        if std::time::Instant::now() >= deadline {
1304            // Force kill remaining processes
1305            let mut sys2 = System::new_with_specifics(
1306                RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1307            );
1308            sys2.refresh_processes();
1309            for (_pid, process) in sys2.processes() {
1310                let name = process.name().to_lowercase();
1311                if name.contains("code") && !name.contains("codec") {
1312                    process.kill();
1313                }
1314            }
1315            std::thread::sleep(std::time::Duration::from_secs(1));
1316            return Ok(());
1317        }
1318    }
1319}
1320
1321/// Reopen VS Code, optionally at a specific path.
1322pub fn reopen_vscode(project_path: Option<&str>) -> Result<()> {
1323    let mut cmd = std::process::Command::new("code");
1324    if let Some(path) = project_path {
1325        cmd.arg(path);
1326    }
1327    cmd.stdout(std::process::Stdio::null())
1328        .stderr(std::process::Stdio::null())
1329        .spawn()?;
1330    Ok(())
1331}
1332
1333/// Backup workspace sessions to a timestamped directory
1334pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
1335    let chat_sessions_dir = workspace_dir.join("chatSessions");
1336
1337    if !chat_sessions_dir.exists() {
1338        return Ok(None);
1339    }
1340
1341    let timestamp = std::time::SystemTime::now()
1342        .duration_since(std::time::UNIX_EPOCH)
1343        .unwrap()
1344        .as_secs();
1345
1346    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
1347
1348    // Copy directory recursively
1349    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
1350
1351    Ok(Some(backup_dir))
1352}
1353
1354/// Recursively copy a directory
1355fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
1356    std::fs::create_dir_all(dst)?;
1357
1358    for entry in std::fs::read_dir(src)? {
1359        let entry = entry?;
1360        let src_path = entry.path();
1361        let dst_path = dst.join(entry.file_name());
1362
1363        if src_path.is_dir() {
1364            copy_dir_all(&src_path, &dst_path)?;
1365        } else {
1366            std::fs::copy(&src_path, &dst_path)?;
1367        }
1368    }
1369
1370    Ok(())
1371}
1372
1373// =============================================================================
1374// Empty Window Sessions (ALL SESSIONS)
1375// =============================================================================
1376
1377/// Read all empty window chat sessions (not tied to any workspace)
1378/// These appear in VS Code's "ALL SESSIONS" panel
1379pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
1380    let sessions_path = get_empty_window_sessions_path()?;
1381
1382    if !sessions_path.exists() {
1383        return Ok(Vec::new());
1384    }
1385
1386    let mut sessions = Vec::new();
1387
1388    for entry in std::fs::read_dir(&sessions_path)? {
1389        let entry = entry?;
1390        let path = entry.path();
1391
1392        if path.extension().is_some_and(is_session_file_extension) {
1393            if let Ok(session) = parse_session_file(&path) {
1394                sessions.push(session);
1395            }
1396        }
1397    }
1398
1399    // Sort by last message date (most recent first)
1400    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
1401
1402    Ok(sessions)
1403}
1404
1405/// Get a specific empty window session by ID
1406#[allow(dead_code)]
1407pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
1408    let sessions_path = get_empty_window_sessions_path()?;
1409    let session_path = sessions_path.join(format!("{}.json", session_id));
1410
1411    if !session_path.exists() {
1412        return Ok(None);
1413    }
1414
1415    let content = std::fs::read_to_string(&session_path)?;
1416    let session: ChatSession = serde_json::from_str(&content)
1417        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1418
1419    Ok(Some(session))
1420}
1421
1422/// Write an empty window session
1423#[allow(dead_code)]
1424pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
1425    let sessions_path = get_empty_window_sessions_path()?;
1426
1427    // Create directory if it doesn't exist
1428    std::fs::create_dir_all(&sessions_path)?;
1429
1430    let session_id = session.session_id.as_deref().unwrap_or("unknown");
1431    let session_path = sessions_path.join(format!("{}.json", session_id));
1432    let content = serde_json::to_string_pretty(session)?;
1433    std::fs::write(&session_path, content)?;
1434
1435    Ok(session_path)
1436}
1437
1438/// Delete an empty window session
1439#[allow(dead_code)]
1440pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
1441    let sessions_path = get_empty_window_sessions_path()?;
1442    let session_path = sessions_path.join(format!("{}.json", session_id));
1443
1444    if session_path.exists() {
1445        std::fs::remove_file(&session_path)?;
1446        Ok(true)
1447    } else {
1448        Ok(false)
1449    }
1450}
1451
1452/// Count empty window sessions
1453pub fn count_empty_window_sessions() -> Result<usize> {
1454    let sessions_path = get_empty_window_sessions_path()?;
1455
1456    if !sessions_path.exists() {
1457        return Ok(0);
1458    }
1459
1460    let count = std::fs::read_dir(&sessions_path)?
1461        .filter_map(|e| e.ok())
1462        .filter(|e| e.path().extension().is_some_and(is_session_file_extension))
1463        .count();
1464
1465    Ok(count)
1466}
1467
1468/// Compact a JSONL session file by replaying all operations into a single kind:0 snapshot.
1469/// This works at the raw JSON level, preserving all fields VS Code expects.
1470/// Returns the path to the compacted file.
1471///
1472/// Handles a common corruption pattern where VS Code appends delta operations
1473/// to line 0 without newline separators (e.g., `}{"kind":1,...}{"kind":2,...}`).
1474pub fn compact_session_jsonl(path: &Path) -> Result<PathBuf> {
1475    let content = std::fs::read_to_string(path).map_err(|e| {
1476        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1477    })?;
1478
1479    // Pre-process: split concatenated JSON objects that lack newline separators.
1480    // VS Code sometimes appends delta ops to line 0 without a \n, producing:
1481    //   {"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}\n{"kind":1,...}\n...
1482    // We fix this by inserting newlines at every `}{"kind":` boundary.
1483    let content = split_concatenated_jsonl(&content);
1484
1485    let mut lines = content.lines();
1486
1487    // First line must be kind:0 (initial snapshot)
1488    let first_line = lines
1489        .next()
1490        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1491
1492    let first_entry: serde_json::Value = match serde_json::from_str(first_line.trim()) {
1493        Ok(v) => v,
1494        Err(_) => {
1495            // Try sanitizing Unicode (lone surrogates, etc.)
1496            let sanitized = sanitize_json_unicode(first_line.trim());
1497            serde_json::from_str(&sanitized).map_err(|e| {
1498                CsmError::InvalidSessionFormat(format!("Invalid JSON on line 1: {}", e))
1499            })?
1500        }
1501    };
1502
1503    let kind = first_entry
1504        .get("kind")
1505        .and_then(|k| k.as_u64())
1506        .unwrap_or(99);
1507    if kind != 0 {
1508        return Err(CsmError::InvalidSessionFormat(
1509            "First JSONL line must be kind:0".to_string(),
1510        ));
1511    }
1512
1513    // Extract the session state from the "v" field
1514    let mut state = first_entry
1515        .get("v")
1516        .cloned()
1517        .ok_or_else(|| CsmError::InvalidSessionFormat("kind:0 missing 'v' field".to_string()))?;
1518
1519    // Replay all subsequent operations
1520    for line in lines {
1521        let line = line.trim();
1522        if line.is_empty() {
1523            continue;
1524        }
1525
1526        let entry: serde_json::Value = match serde_json::from_str(line) {
1527            Ok(v) => v,
1528            Err(_) => continue, // Skip malformed lines
1529        };
1530
1531        let op_kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1532
1533        match op_kind {
1534            1 => {
1535                // Delta update: k=["path","to","field"], v=value
1536                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1537                    if let Some(keys_arr) = keys.as_array() {
1538                        apply_delta(&mut state, keys_arr, value.clone());
1539                    }
1540                }
1541            }
1542            2 => {
1543                // Array replace/splice: k=["path","to","array"], v=[items], i=splice_index
1544                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1545                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
1546                    if let Some(keys_arr) = keys.as_array() {
1547                        apply_splice(&mut state, keys_arr, value.clone(), splice_index);
1548                    }
1549                }
1550            }
1551            _ => {} // Skip unknown kinds
1552        }
1553    }
1554
1555    // Inject any missing fields that VS Code's latest format requires
1556    let session_id = path
1557        .file_stem()
1558        .and_then(|s| s.to_str())
1559        .map(|s| s.to_string());
1560    ensure_vscode_compat_fields(&mut state, session_id.as_deref());
1561
1562    // Write the compacted file: single kind:0 line with the final state
1563    let compact_entry = serde_json::json!({"kind": 0, "v": state});
1564    let compact_content = serde_json::to_string(&compact_entry)
1565        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1566
1567    // Backup the original file
1568    let backup_path = path.with_extension("jsonl.bak");
1569    std::fs::rename(path, &backup_path)?;
1570
1571    // Write the compacted file
1572    std::fs::write(path, &compact_content)?;
1573
1574    Ok(backup_path)
1575}
1576
1577/// Trim a session JSONL file by keeping only the last `keep` requests.
1578///
1579/// Very long chat sessions (100+ requests) can grow to 50-100+ MB, causing VS Code
1580/// to fail loading them. This function compacts the session first (if needed), then
1581/// removes old requests from the `requests` array, keeping only the most recent ones.
1582///
1583/// The full session is preserved as a `.jsonl.bak` backup. A trimmed summary is
1584/// injected as the first request message so the user knows context was archived.
1585///
1586/// Returns `(original_count, kept_count, original_mb, new_mb)`.
1587pub fn trim_session_jsonl(path: &Path, keep: usize) -> Result<(usize, usize, f64, f64)> {
1588    let content = std::fs::read_to_string(path).map_err(|e| {
1589        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1590    })?;
1591
1592    let original_size = content.len() as f64 / (1024.0 * 1024.0);
1593
1594    // Always handle concatenated JSON objects first, then check line count
1595    let content = split_concatenated_jsonl(&content);
1596    let line_count = content.lines().filter(|l| !l.trim().is_empty()).count();
1597
1598    // If multi-line (concatenated objects or delta ops), compact first
1599    let content = if line_count > 1 {
1600        // Write the split content so compact can process it
1601        std::fs::write(path, &content)?;
1602        compact_session_jsonl(path)?;
1603        std::fs::read_to_string(path).map_err(|e| {
1604            CsmError::InvalidSessionFormat(format!("Failed to read compacted file: {}", e))
1605        })?
1606    } else {
1607        content
1608    };
1609
1610    let first_line = content
1611        .lines()
1612        .next()
1613        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1614
1615    let mut entry: serde_json::Value = serde_json::from_str(first_line.trim())
1616        .map_err(|_| {
1617            let sanitized = sanitize_json_unicode(first_line.trim());
1618            serde_json::from_str::<serde_json::Value>(&sanitized)
1619                .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))
1620        })
1621        .unwrap_or_else(|e| e.unwrap());
1622
1623    let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1624    if kind != 0 {
1625        return Err(
1626            CsmError::InvalidSessionFormat("First JSONL line must be kind:0".to_string()).into(),
1627        );
1628    }
1629
1630    // Get the requests array
1631    let requests = match entry
1632        .get("v")
1633        .and_then(|v| v.get("requests"))
1634        .and_then(|r| r.as_array())
1635    {
1636        Some(r) => r.clone(),
1637        None => {
1638            return Err(CsmError::InvalidSessionFormat(
1639                "Session has no requests array".to_string(),
1640            )
1641            .into());
1642        }
1643    };
1644
1645    let original_count = requests.len();
1646
1647    if original_count <= keep {
1648        // Still strip bloated content even if not reducing request count
1649        strip_bloated_content(&mut entry);
1650
1651        let trimmed_content = serde_json::to_string(&entry)
1652            .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1653        let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
1654
1655        // Only rewrite if we actually reduced size
1656        if new_size < original_size * 0.9 {
1657            let backup_path = path.with_extension("jsonl.bak");
1658            if !backup_path.exists() {
1659                std::fs::copy(path, &backup_path)?;
1660            }
1661            std::fs::write(path, &trimmed_content)?;
1662        }
1663
1664        return Ok((original_count, original_count, original_size, new_size));
1665    }
1666
1667    // Keep only the last `keep` requests
1668    let kept_requests: Vec<serde_json::Value> = requests[original_count - keep..].to_vec();
1669
1670    // Use only the kept requests — no injected trim notice.
1671    // Injecting synthetic requests with non-standard agent/structure fields
1672    // can cause VS Code's session deserializer to reject the entire session.
1673    let final_requests = kept_requests;
1674
1675    // Replace the requests array in the entry
1676    if let Some(v) = entry.get_mut("v") {
1677        if let Some(obj) = v.as_object_mut() {
1678            obj.insert("requests".to_string(), serde_json::json!(final_requests));
1679        }
1680    }
1681
1682    // Strip bloated metadata, tool invocations, textEditGroups, thinking tokens
1683    strip_bloated_content(&mut entry);
1684
1685    // Ensure compat fields
1686    let session_id = path
1687        .file_stem()
1688        .and_then(|s| s.to_str())
1689        .map(|s| s.to_string());
1690    if let Some(v) = entry.get_mut("v") {
1691        ensure_vscode_compat_fields(v, session_id.as_deref());
1692    }
1693
1694    let trimmed_content = serde_json::to_string(&entry)
1695        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1696
1697    let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
1698
1699    // Backup original (if not already backed up by compact)
1700    let backup_path = path.with_extension("jsonl.bak");
1701    if !backup_path.exists() {
1702        std::fs::copy(path, &backup_path)?;
1703    }
1704
1705    // Write the trimmed file
1706    std::fs::write(path, &trimmed_content)?;
1707
1708    Ok((original_count, keep, original_size, new_size))
1709}
1710
1711/// Strip bloated content from a session entry to reduce file size.
1712///
1713/// VS Code sessions accumulate large metadata that isn't needed for session display:
1714/// - `result.metadata`: Can be 100KB-1.5MB per request (Copilot internal state)
1715/// - `editedFileEvents`: Redundant file edit tracking
1716/// - `chatEdits`: File edit diffs
1717/// - `textEditGroup` response items: 80-120KB each with full file diffs
1718/// - `thinking` response items: Model thinking tokens (can be 400+ per request)
1719/// - `toolInvocationSerialized`: Tool call metadata (usually already stripped by compact)
1720/// - `toolSpecificData`: Duplicate data in tool invocations
1721///
1722/// This function strips or truncates all of these while preserving the conversation
1723/// content (markdownContent responses and user messages).
1724fn strip_bloated_content(entry: &mut serde_json::Value) {
1725    let requests = match entry
1726        .get_mut("v")
1727        .and_then(|v| v.get_mut("requests"))
1728        .and_then(|r| r.as_array_mut())
1729    {
1730        Some(r) => r,
1731        None => return,
1732    };
1733
1734    for req in requests.iter_mut() {
1735        let obj = match req.as_object_mut() {
1736            Some(o) => o,
1737            None => continue,
1738        };
1739
1740        // Strip result.metadata (100KB-1.5MB per request)
1741        if let Some(result) = obj.get_mut("result") {
1742            if let Some(result_obj) = result.as_object_mut() {
1743                if let Some(meta) = result_obj.get("metadata") {
1744                    let meta_str = serde_json::to_string(meta).unwrap_or_default();
1745                    if meta_str.len() > 1000 {
1746                        result_obj.insert(
1747                            "metadata".to_string(),
1748                            serde_json::Value::Object(serde_json::Map::new()),
1749                        );
1750                    }
1751                }
1752            }
1753        }
1754
1755        // Strip editedFileEvents
1756        obj.remove("editedFileEvents");
1757
1758        // Strip chatEdits
1759        obj.remove("chatEdits");
1760
1761        // Truncate contentReferences to max 3
1762        if let Some(refs) = obj.get_mut("contentReferences") {
1763            if let Some(arr) = refs.as_array_mut() {
1764                if arr.len() > 3 {
1765                    arr.truncate(3);
1766                }
1767            }
1768        }
1769
1770        // Process response items
1771        if let Some(response) = obj.get_mut("response") {
1772            if let Some(resp_arr) = response.as_array_mut() {
1773                // Remove non-essential response kinds
1774                resp_arr.retain(|r| {
1775                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1776                    !matches!(
1777                        kind,
1778                        "toolInvocationSerialized"
1779                            | "progressMessage"
1780                            | "confirmationWidget"
1781                            | "codeblockUri"
1782                            | "progressTaskSerialized"
1783                            | "undoStop"
1784                            | "mcpServersStarting"
1785                            | "confirmation"
1786                    )
1787                });
1788
1789                // Truncate textEditGroup items (strip edit diffs, keep URI ref)
1790                for r in resp_arr.iter_mut() {
1791                    let kind = r
1792                        .get("kind")
1793                        .and_then(|k| k.as_str())
1794                        .unwrap_or("")
1795                        .to_string();
1796
1797                    if kind == "textEditGroup" {
1798                        if let Some(edits) = r.get_mut("edits") {
1799                            if let Some(arr) = edits.as_array_mut() {
1800                                if serde_json::to_string(arr).unwrap_or_default().len() > 2000 {
1801                                    arr.clear();
1802                                }
1803                            }
1804                        }
1805                    }
1806
1807                    // Truncate thinking tokens
1808                    if kind == "thinking" {
1809                        if let Some(val) = r.get_mut("value") {
1810                            if let Some(s) = val.as_str() {
1811                                if s.len() > 500 {
1812                                    *val = serde_json::Value::String(format!(
1813                                        "{}... [truncated]",
1814                                        &s[..500]
1815                                    ));
1816                                }
1817                            }
1818                        }
1819                        if let Some(thought) = r.get_mut("thought") {
1820                            if let Some(thought_val) = thought.get_mut("value") {
1821                                if let Some(s) = thought_val.as_str() {
1822                                    if s.len() > 500 {
1823                                        *thought_val = serde_json::Value::String(format!(
1824                                            "{}... [truncated]",
1825                                            &s[..500]
1826                                        ));
1827                                    }
1828                                }
1829                            }
1830                        }
1831                    }
1832
1833                    // Truncate large markdownContent
1834                    if kind == "markdownContent" {
1835                        if let Some(content) = r.get_mut("content") {
1836                            if let Some(val) = content.get_mut("value") {
1837                                if let Some(s) = val.as_str() {
1838                                    if s.len() > 20000 {
1839                                        *val = serde_json::Value::String(format!(
1840                                            "{}\n\n---\n*[Chasm: Content truncated for loading performance]*",
1841                                            &s[..20000]
1842                                        ));
1843                                    }
1844                                }
1845                            }
1846                        }
1847                    }
1848                }
1849
1850                // Limit thinking items to last 5 per request
1851                let mut thinking_count = 0;
1852                let mut indices_to_remove = Vec::new();
1853                for (i, r) in resp_arr.iter().enumerate().rev() {
1854                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1855                    if kind == "thinking" {
1856                        thinking_count += 1;
1857                        if thinking_count > 5 {
1858                            indices_to_remove.push(i);
1859                        }
1860                    }
1861                }
1862                for idx in indices_to_remove {
1863                    resp_arr.remove(idx);
1864                }
1865
1866                // Strip toolSpecificData from any remaining tool invocations
1867                for r in resp_arr.iter_mut() {
1868                    if let Some(obj) = r.as_object_mut() {
1869                        obj.remove("toolSpecificData");
1870                    }
1871                }
1872
1873                // Fix response items missing `kind` field — wrap raw MarkdownString
1874                // objects as proper markdownContent response items.
1875                // VS Code sometimes serializes MarkdownString directly instead of
1876                // wrapping it in { kind: "markdownContent", content: MarkdownString }.
1877                // Without the `kind` discriminator, VS Code's deserializer fails.
1878                let fixed: Vec<serde_json::Value> = resp_arr
1879                    .drain(..)
1880                    .map(|item| {
1881                        if item.get("kind").is_none() {
1882                            // Check if it looks like a MarkdownString (has `value` or `supportHtml`)
1883                            if item.get("value").is_some() || item.get("supportHtml").is_some() {
1884                                serde_json::json!({
1885                                    "kind": "markdownContent",
1886                                    "content": item
1887                                })
1888                            } else {
1889                                item
1890                            }
1891                        } else {
1892                            item
1893                        }
1894                    })
1895                    .collect();
1896                *resp_arr = fixed;
1897            }
1898        }
1899    }
1900}
1901
1902/// Split concatenated JSON objects in JSONL content that lack newline separators.
1903///
1904/// VS Code sometimes appends delta operations (kind:1, kind:2) onto the end of
1905/// a JSONL line without inserting a newline first. This produces invalid JSONL like:
1906///   `{"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}`
1907///
1908/// This function inserts newlines at every `}{"kind":` boundary to restore valid JSONL.
1909/// The pattern `}{"kind":` cannot appear inside JSON string values because `{"kind":`
1910/// would need to be escaped as `{\"kind\":` within a JSON string.
1911pub fn split_concatenated_jsonl(content: &str) -> String {
1912    // Fast path: if content has no concatenated objects, return as-is
1913    if !content.contains("}{\"kind\":") {
1914        return content.to_string();
1915    }
1916
1917    content.replace("}{\"kind\":", "}\n{\"kind\":")
1918}
1919
1920/// Apply a delta update (kind:1) to a JSON value at the given key path.
1921fn apply_delta(root: &mut serde_json::Value, keys: &[serde_json::Value], value: serde_json::Value) {
1922    if keys.is_empty() {
1923        return;
1924    }
1925
1926    // Navigate to the parent
1927    let mut current = root;
1928    for key in &keys[..keys.len() - 1] {
1929        if let Some(k) = key.as_str() {
1930            if !current.get(k).is_some() {
1931                current[k] = serde_json::Value::Object(serde_json::Map::new());
1932            }
1933            current = &mut current[k];
1934        } else if let Some(idx) = key.as_u64() {
1935            if let Some(arr) = current.as_array_mut() {
1936                if (idx as usize) < arr.len() {
1937                    current = &mut arr[idx as usize];
1938                } else {
1939                    return; // Index out of bounds
1940                }
1941            } else {
1942                return;
1943            }
1944        }
1945    }
1946
1947    // Set the final key
1948    if let Some(last_key) = keys.last() {
1949        if let Some(k) = last_key.as_str() {
1950            current[k] = value;
1951        } else if let Some(idx) = last_key.as_u64() {
1952            if let Some(arr) = current.as_array_mut() {
1953                if (idx as usize) < arr.len() {
1954                    arr[idx as usize] = value;
1955                }
1956            }
1957        }
1958    }
1959}
1960
1961/// Apply an array replace/splice operation (kind:2) to a JSON value at the given key path.
1962/// When `splice_index` is `Some(i)`, truncates the target array at index `i` before extending.
1963/// When `splice_index` is `None`, replaces the entire array with the new items.
1964fn apply_splice(
1965    root: &mut serde_json::Value,
1966    keys: &[serde_json::Value],
1967    items: serde_json::Value,
1968    splice_index: Option<usize>,
1969) {
1970    if keys.is_empty() {
1971        return;
1972    }
1973
1974    // Navigate to the target array
1975    let mut current = root;
1976    for key in keys {
1977        if let Some(k) = key.as_str() {
1978            if !current.get(k).is_some() {
1979                current[k] = serde_json::json!([]);
1980            }
1981            current = &mut current[k];
1982        } else if let Some(idx) = key.as_u64() {
1983            if let Some(arr) = current.as_array_mut() {
1984                if (idx as usize) < arr.len() {
1985                    current = &mut arr[idx as usize];
1986                } else {
1987                    return;
1988                }
1989            } else {
1990                return;
1991            }
1992        }
1993    }
1994
1995    // Splice or replace items in the target array
1996    if let Some(target_arr) = current.as_array_mut() {
1997        if let Some(idx) = splice_index {
1998            // Splice: truncate at index, then extend with new items
1999            target_arr.truncate(idx);
2000        } else {
2001            // Full replacement: clear the array
2002            target_arr.clear();
2003        }
2004        if let Some(new_items) = items.as_array() {
2005            target_arr.extend(new_items.iter().cloned());
2006        }
2007    }
2008}
2009
2010/// Ensure a JSONL `kind:0` snapshot's `v` object has all fields required by
2011/// VS Code's latest session format (1.109.0+ / version 3). Missing fields are
2012/// injected with sensible defaults so sessions load reliably after recovery,
2013/// conversion, or compaction.
2014///
2015/// Required fields that VS Code now expects:
2016/// - `version` (u32, default 3)
2017/// - `sessionId` (string, extracted from filename or generated)
2018/// - `responderUsername` (string, default "GitHub Copilot")
2019/// - `hasPendingEdits` (bool, default false)
2020/// - `pendingRequests` (array, default [])
2021/// - `inputState` (object with mode, attachments, etc.)
2022pub fn ensure_vscode_compat_fields(state: &mut serde_json::Value, session_id: Option<&str>) {
2023    if let Some(obj) = state.as_object_mut() {
2024        // version
2025        if !obj.contains_key("version") {
2026            obj.insert("version".to_string(), serde_json::json!(3));
2027        }
2028
2029        // sessionId — use provided ID, or try to read from existing field
2030        if !obj.contains_key("sessionId") {
2031            if let Some(id) = session_id {
2032                obj.insert("sessionId".to_string(), serde_json::json!(id));
2033            }
2034        }
2035
2036        // responderUsername
2037        if !obj.contains_key("responderUsername") {
2038            obj.insert(
2039                "responderUsername".to_string(),
2040                serde_json::json!("GitHub Copilot"),
2041            );
2042        }
2043
2044        // hasPendingEdits — always false for recovered/compacted sessions
2045        if !obj.contains_key("hasPendingEdits") {
2046            obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
2047        }
2048
2049        // pendingRequests — always empty for recovered/compacted sessions
2050        if !obj.contains_key("pendingRequests") {
2051            obj.insert("pendingRequests".to_string(), serde_json::json!([]));
2052        }
2053
2054        // inputState — VS Code expects this to exist with at least mode + attachments
2055        if !obj.contains_key("inputState") {
2056            obj.insert(
2057                "inputState".to_string(),
2058                serde_json::json!({
2059                    "attachments": [],
2060                    "mode": { "id": "agent", "kind": "agent" },
2061                    "inputText": "",
2062                    "selections": [],
2063                    "contrib": { "chatDynamicVariableModel": [] }
2064                }),
2065            );
2066        }
2067    }
2068}
2069
2070/// Detect whether a legacy .json file is a "skeleton" — corrupted to contain only
2071/// structural characters ({}, [], commas, colons, whitespace) with all actual data stripped.
2072/// These files parse as valid JSON but contain no useful session content.
2073pub fn is_skeleton_json(content: &str) -> bool {
2074    // Must be non-trivial size to be a skeleton (tiny files might just be empty sessions)
2075    if content.len() < 100 {
2076        return false;
2077    }
2078
2079    // Count structural vs data characters
2080    let structural_chars: usize = content
2081        .chars()
2082        .filter(|c| {
2083            matches!(
2084                c,
2085                '{' | '}' | '[' | ']' | ',' | ':' | ' ' | '\n' | '\r' | '\t' | '"'
2086            )
2087        })
2088        .count();
2089
2090    let total_chars = content.len();
2091    let structural_ratio = structural_chars as f64 / total_chars as f64;
2092
2093    // A skeleton file is >80% structural characters. Normal sessions have lots of
2094    // text content (messages, code, etc.) so the ratio is much lower.
2095    if structural_ratio < 0.80 {
2096        return false;
2097    }
2098
2099    // Additionally verify: parse as JSON and check that requests array is empty or
2100    // contains only empty objects
2101    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
2102        // Check if requests exist and are all empty
2103        if let Some(requests) = parsed.get("requests").and_then(|r| r.as_array()) {
2104            let all_empty = requests.iter().all(|req| {
2105                // A skeleton request has no "message" text or empty message content
2106                let msg = req
2107                    .get("message")
2108                    .and_then(|m| m.get("text"))
2109                    .and_then(|t| t.as_str());
2110                msg.map_or(true, |s| s.is_empty())
2111            });
2112            return all_empty;
2113        }
2114        // No requests array at all — also skeleton-like
2115        return true;
2116    }
2117
2118    // Couldn't parse but high structural ratio — still likely skeleton
2119    structural_ratio > 0.85
2120}
2121
2122/// Convert a skeleton .json file to a valid minimal .jsonl file.
2123/// Preserves title and timestamp from the index entry if available.
2124/// The original .json file is renamed to `.json.corrupt` (non-destructive).
2125/// Returns the path to the new .jsonl file, or None if conversion was skipped.
2126pub fn convert_skeleton_json_to_jsonl(
2127    json_path: &Path,
2128    title: Option<&str>,
2129    last_message_date: Option<i64>,
2130) -> Result<Option<PathBuf>> {
2131    let content = std::fs::read_to_string(json_path)
2132        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2133
2134    if !is_skeleton_json(&content) {
2135        return Ok(None);
2136    }
2137
2138    let session_id = json_path
2139        .file_stem()
2140        .and_then(|s| s.to_str())
2141        .unwrap_or("unknown")
2142        .to_string();
2143
2144    let title = title.unwrap_or("Recovered Session");
2145    let now = std::time::SystemTime::now()
2146        .duration_since(std::time::UNIX_EPOCH)
2147        .unwrap_or_default()
2148        .as_millis() as i64;
2149    let timestamp = last_message_date.unwrap_or(now);
2150
2151    // Build a valid minimal kind:0 JSONL entry
2152    let jsonl_entry = serde_json::json!({
2153        "kind": 0,
2154        "v": {
2155            "sessionId": session_id,
2156            "title": title,
2157            "lastMessageDate": timestamp,
2158            "requests": [],
2159            "version": 4,
2160            "hasPendingEdits": false,
2161            "pendingRequests": [],
2162            "inputState": {
2163                "attachments": [],
2164                "mode": { "id": "agent", "kind": "agent" },
2165                "inputText": "",
2166                "selections": [],
2167                "contrib": { "chatDynamicVariableModel": [] }
2168            },
2169            "responderUsername": "GitHub Copilot",
2170            "isImported": false,
2171            "initialLocation": "panel"
2172        }
2173    });
2174
2175    let jsonl_path = json_path.with_extension("jsonl");
2176    let corrupt_path = json_path.with_extension("json.corrupt");
2177
2178    // Don't overwrite an existing .jsonl
2179    if jsonl_path.exists() {
2180        // Just rename the skeleton to .corrupt
2181        std::fs::rename(json_path, &corrupt_path)?;
2182        return Ok(None);
2183    }
2184
2185    // Write the new .jsonl file
2186    std::fs::write(
2187        &jsonl_path,
2188        serde_json::to_string(&jsonl_entry)
2189            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?,
2190    )?;
2191
2192    // Rename original to .json.corrupt (non-destructive)
2193    std::fs::rename(json_path, &corrupt_path)?;
2194
2195    Ok(Some(jsonl_path))
2196}
2197
2198/// Fix cancelled `modelState` values in a compacted (single-line) JSONL session file.
2199///
2200/// VS Code determines `lastResponseState` from the file content, not the index.
2201/// If the last request's `modelState.value` is `2` (Cancelled) or missing entirely,
2202/// VS Code refuses to load the session. This function:
2203/// 1. Finds the last request in the `requests` array
2204/// 2. If `modelState.value` is `2` (Cancelled), changes it to `1` (Complete)
2205/// 3. If `modelState` is missing entirely, adds `{"value":1,"completedAt":<now>}`
2206///
2207/// Returns `true` if the file was modified.
2208pub fn fix_cancelled_model_state(path: &Path) -> Result<bool> {
2209    let content = std::fs::read_to_string(path)
2210        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2211
2212    let lines: Vec<&str> = content.lines().collect();
2213
2214    // For multi-line JSONL, we need to scan all lines to find the LAST modelState
2215    // delta for the highest request index. For single-line (compacted), we modify
2216    // the kind:0 snapshot directly.
2217    if lines.len() == 1 {
2218        // Compacted single-line JSONL: modify the kind:0 snapshot
2219        let mut entry: serde_json::Value = serde_json::from_str(lines[0].trim())
2220            .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))?;
2221
2222        let is_kind_0 = entry
2223            .get("kind")
2224            .and_then(|k| k.as_u64())
2225            .map(|k| k == 0)
2226            .unwrap_or(false);
2227
2228        if !is_kind_0 {
2229            return Ok(false);
2230        }
2231
2232        let requests = match entry
2233            .get_mut("v")
2234            .and_then(|v| v.get_mut("requests"))
2235            .and_then(|r| r.as_array_mut())
2236        {
2237            Some(r) if !r.is_empty() => r,
2238            _ => return Ok(false),
2239        };
2240
2241        let last_req = requests.last_mut().unwrap();
2242        let model_state = last_req.get("modelState");
2243
2244        let needs_fix = match model_state {
2245            Some(ms) => ms.get("value").and_then(|v| v.as_u64()) == Some(2),
2246            None => true, // Missing modelState = never completed
2247        };
2248
2249        if !needs_fix {
2250            return Ok(false);
2251        }
2252
2253        let now = std::time::SystemTime::now()
2254            .duration_since(std::time::UNIX_EPOCH)
2255            .unwrap_or_default()
2256            .as_millis() as u64;
2257
2258        last_req.as_object_mut().unwrap().insert(
2259            "modelState".to_string(),
2260            serde_json::json!({"value": 1, "completedAt": now}),
2261        );
2262
2263        let patched = serde_json::to_string(&entry)
2264            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?;
2265        std::fs::write(path, patched)?;
2266        return Ok(true);
2267    }
2268
2269    // Multi-line JSONL: find the highest request index referenced across all lines,
2270    // then check if the last modelState delta for that index has value=2 or is missing.
2271    // If so, append a corrective delta.
2272    let mut highest_req_idx: Option<usize> = None;
2273    let mut last_model_state_value: Option<u64> = None;
2274
2275    // Check kind:0 snapshot for request count
2276    if let Ok(first_entry) = serde_json::from_str::<serde_json::Value>(lines[0].trim()) {
2277        if let Some(requests) = first_entry
2278            .get("v")
2279            .and_then(|v| v.get("requests"))
2280            .and_then(|r| r.as_array())
2281        {
2282            if !requests.is_empty() {
2283                let last_idx = requests.len() - 1;
2284                highest_req_idx = Some(last_idx);
2285                // Check modelState in the snapshot's last request
2286                if let Some(ms) = requests[last_idx].get("modelState") {
2287                    last_model_state_value = ms.get("value").and_then(|v| v.as_u64());
2288                }
2289            }
2290        }
2291    }
2292
2293    // Scan deltas for higher request indices and modelState updates
2294    static REQ_IDX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#""k":\["requests",(\d+)"#).unwrap());
2295
2296    for line in &lines[1..] {
2297        if let Some(caps) = REQ_IDX_RE.captures(line) {
2298            if let Ok(idx) = caps[1].parse::<usize>() {
2299                if highest_req_idx.is_none() || idx > highest_req_idx.unwrap() {
2300                    highest_req_idx = Some(idx);
2301                    last_model_state_value = None; // Reset for new highest
2302                }
2303                // Track modelState for the highest request index
2304                if Some(idx) == highest_req_idx && line.contains("\"modelState\"") {
2305                    if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line.trim()) {
2306                        last_model_state_value = entry
2307                            .get("v")
2308                            .and_then(|v| v.get("value"))
2309                            .and_then(|v| v.as_u64());
2310                    }
2311                }
2312            }
2313        }
2314    }
2315
2316    let req_idx = match highest_req_idx {
2317        Some(idx) => idx,
2318        None => return Ok(false),
2319    };
2320
2321    let needs_fix = match last_model_state_value {
2322        Some(2) => true, // Cancelled
2323        None => true,    // Missing (never completed)
2324        _ => false,      // Already complete or other valid state
2325    };
2326
2327    if !needs_fix {
2328        return Ok(false);
2329    }
2330
2331    let now = std::time::SystemTime::now()
2332        .duration_since(std::time::UNIX_EPOCH)
2333        .unwrap_or_default()
2334        .as_millis() as u64;
2335
2336    let fix_delta = format!(
2337        "\n{{\"kind\":1,\"k\":[\"requests\",{},\"modelState\"],\"v\":{{\"value\":1,\"completedAt\":{}}}}}",
2338        req_idx, now
2339    );
2340
2341    use std::io::Write;
2342    let mut file = std::fs::OpenOptions::new().append(true).open(path)?;
2343    file.write_all(fix_delta.as_bytes())?;
2344
2345    Ok(true)
2346}
2347
2348/// Repair workspace sessions: compact large JSONL files and fix the index.
2349/// Returns (compacted_count, index_fixed_count).
2350pub fn repair_workspace_sessions(
2351    workspace_id: &str,
2352    chat_sessions_dir: &Path,
2353    force: bool,
2354) -> Result<(usize, usize)> {
2355    let db_path = get_workspace_storage_db(workspace_id)?;
2356
2357    if !db_path.exists() {
2358        return Err(CsmError::WorkspaceNotFound(format!(
2359            "Database not found: {}",
2360            db_path.display()
2361        )));
2362    }
2363
2364    if !force && is_vscode_running() {
2365        return Err(CsmError::VSCodeRunning);
2366    }
2367
2368    let mut compacted = 0;
2369    let mut fields_fixed = 0;
2370
2371    if chat_sessions_dir.exists() {
2372        // Pass 1: Compact large JSONL files and fix missing fields
2373        for entry in std::fs::read_dir(chat_sessions_dir)? {
2374            let entry = entry?;
2375            let path = entry.path();
2376            if path.extension().is_some_and(|e| e == "jsonl") {
2377                let metadata = std::fs::metadata(&path)?;
2378                let size_mb = metadata.len() / (1024 * 1024);
2379
2380                let content = std::fs::read_to_string(&path)
2381                    .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2382                let line_count = content.lines().count();
2383
2384                if line_count > 1 {
2385                    // Compact multi-line JSONL (has operations to replay)
2386                    let stem = path
2387                        .file_stem()
2388                        .map(|s| s.to_string_lossy().to_string())
2389                        .unwrap_or_default();
2390                    println!(
2391                        "   Compacting {} ({} lines, {}MB)...",
2392                        stem, line_count, size_mb
2393                    );
2394
2395                    match compact_session_jsonl(&path) {
2396                        Ok(backup_path) => {
2397                            let new_size = std::fs::metadata(&path)
2398                                .map(|m| m.len() / (1024 * 1024))
2399                                .unwrap_or(0);
2400                            println!(
2401                                "   [OK] Compacted: {}MB -> {}MB (backup: {})",
2402                                size_mb,
2403                                new_size,
2404                                backup_path
2405                                    .file_name()
2406                                    .unwrap_or_default()
2407                                    .to_string_lossy()
2408                            );
2409                            compacted += 1;
2410                        }
2411                        Err(e) => {
2412                            println!("   [WARN] Failed to compact {}: {}", stem, e);
2413                        }
2414                    }
2415                } else {
2416                    // Single-line JSONL — check for missing VS Code fields
2417                    if let Some(first_line) = content.lines().next() {
2418                        if let Ok(mut obj) = serde_json::from_str::<serde_json::Value>(first_line) {
2419                            let is_kind_0 = obj
2420                                .get("kind")
2421                                .and_then(|k| k.as_u64())
2422                                .map(|k| k == 0)
2423                                .unwrap_or(false);
2424
2425                            if is_kind_0 {
2426                                if let Some(v) = obj.get("v") {
2427                                    let missing = !v.get("hasPendingEdits").is_some()
2428                                        || !v.get("pendingRequests").is_some()
2429                                        || !v.get("inputState").is_some()
2430                                        || !v.get("sessionId").is_some();
2431
2432                                    if missing {
2433                                        let session_id = path
2434                                            .file_stem()
2435                                            .and_then(|s| s.to_str())
2436                                            .map(|s| s.to_string());
2437                                        if let Some(v_mut) = obj.get_mut("v") {
2438                                            ensure_vscode_compat_fields(
2439                                                v_mut,
2440                                                session_id.as_deref(),
2441                                            );
2442                                        }
2443                                        let patched = serde_json::to_string(&obj).map_err(|e| {
2444                                            CsmError::InvalidSessionFormat(format!(
2445                                                "Failed to serialize: {}",
2446                                                e
2447                                            ))
2448                                        })?;
2449                                        std::fs::write(&path, &patched)?;
2450                                        let stem = path
2451                                            .file_stem()
2452                                            .map(|s| s.to_string_lossy().to_string())
2453                                            .unwrap_or_default();
2454                                        println!("   [OK] Fixed missing VS Code fields: {}", stem);
2455                                        fields_fixed += 1;
2456                                    }
2457                                }
2458                            }
2459                        }
2460                    }
2461                }
2462            }
2463        }
2464    }
2465
2466    // Pass 1.5: Convert skeleton .json files to valid .jsonl.
2467    // Skeleton files are legacy .json files where all data has been stripped,
2468    // leaving only structural characters ({}, [], whitespace). We convert them
2469    // to valid minimal .jsonl, preserving title/timestamp from the index,
2470    // and rename the original to .json.corrupt (non-destructive).
2471    let mut skeletons_converted = 0;
2472    if chat_sessions_dir.exists() {
2473        // Read current index to get titles/timestamps for converted sessions
2474        let index_entries: std::collections::HashMap<String, (String, Option<i64>)> =
2475            if let Ok(index) = read_chat_session_index(&db_path) {
2476                index
2477                    .entries
2478                    .iter()
2479                    .map(|(id, e)| (id.clone(), (e.title.clone(), Some(e.last_message_date))))
2480                    .collect()
2481            } else {
2482                std::collections::HashMap::new()
2483            };
2484
2485        // Collect .json files that don't have a corresponding .jsonl
2486        let mut jsonl_stems: HashSet<String> = HashSet::new();
2487        for entry in std::fs::read_dir(chat_sessions_dir)? {
2488            let entry = entry?;
2489            let path = entry.path();
2490            if path.extension().is_some_and(|e| e == "jsonl") {
2491                if let Some(stem) = path.file_stem() {
2492                    jsonl_stems.insert(stem.to_string_lossy().to_string());
2493                }
2494            }
2495        }
2496
2497        for entry in std::fs::read_dir(chat_sessions_dir)? {
2498            let entry = entry?;
2499            let path = entry.path();
2500            if path.extension().is_some_and(|e| e == "json")
2501                && !path.to_string_lossy().ends_with(".bak")
2502                && !path.to_string_lossy().ends_with(".corrupt")
2503            {
2504                let stem = path
2505                    .file_stem()
2506                    .map(|s| s.to_string_lossy().to_string())
2507                    .unwrap_or_default();
2508
2509                // Skip if .jsonl already exists
2510                if jsonl_stems.contains(&stem) {
2511                    continue;
2512                }
2513
2514                let (title, timestamp) = index_entries
2515                    .get(&stem)
2516                    .map(|(t, ts)| (t.as_str(), *ts))
2517                    .unwrap_or(("Recovered Session", None));
2518
2519                match convert_skeleton_json_to_jsonl(&path, Some(title), timestamp) {
2520                    Ok(Some(jsonl_path)) => {
2521                        println!(
2522                            "   [OK] Converted skeleton .json → .jsonl: {} (\"{}\")",
2523                            stem, title
2524                        );
2525                        // Track the new .jsonl so subsequent passes process it
2526                        jsonl_stems.insert(stem);
2527                        skeletons_converted += 1;
2528                        let _ = jsonl_path; // used implicitly via jsonl_stems
2529                    }
2530                    Ok(None) => {} // Not a skeleton or skipped
2531                    Err(e) => {
2532                        println!("   [WARN] Failed to convert skeleton {}: {}", stem, e);
2533                    }
2534                }
2535            }
2536        }
2537    }
2538
2539    // Pass 2: Fix cancelled modelState in all JSONL files.
2540    // VS Code reads modelState from file content (not the index) to determine
2541    // lastResponseState. If the last request has modelState.value=2 (Cancelled)
2542    // or is missing entirely, VS Code refuses to load the session.
2543    let mut cancelled_fixed = 0;
2544    if chat_sessions_dir.exists() {
2545        for entry in std::fs::read_dir(chat_sessions_dir)? {
2546            let entry = entry?;
2547            let path = entry.path();
2548            if path.extension().is_some_and(|e| e == "jsonl") {
2549                match fix_cancelled_model_state(&path) {
2550                    Ok(true) => {
2551                        let stem = path
2552                            .file_stem()
2553                            .map(|s| s.to_string_lossy().to_string())
2554                            .unwrap_or_default();
2555                        println!("   [OK] Fixed cancelled modelState: {}", stem);
2556                        cancelled_fixed += 1;
2557                    }
2558                    Ok(false) => {} // No fix needed
2559                    Err(e) => {
2560                        let stem = path
2561                            .file_stem()
2562                            .map(|s| s.to_string_lossy().to_string())
2563                            .unwrap_or_default();
2564                        println!("   [WARN] Failed to fix modelState for {}: {}", stem, e);
2565                    }
2566                }
2567            }
2568        }
2569    }
2570
2571    // Pass 3: Rebuild the index with correct metadata
2572    let (index_fixed, _) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
2573
2574    if fields_fixed > 0 {
2575        println!(
2576            "   [OK] Injected missing VS Code fields into {} session(s)",
2577            fields_fixed
2578        );
2579    }
2580    if skeletons_converted > 0 {
2581        println!(
2582            "   [OK] Converted {} skeleton .json file(s) to .jsonl",
2583            skeletons_converted
2584        );
2585    }
2586    if cancelled_fixed > 0 {
2587        println!(
2588            "   [OK] Fixed cancelled modelState in {} session(s)",
2589            cancelled_fixed
2590        );
2591    }
2592
2593    Ok((compacted, index_fixed))
2594}