Skip to main content

chasm/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{
7    ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry, ChatSessionTiming,
8    ModelCacheEntry, StateCacheEntry,
9};
10use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
11use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
12use once_cell::sync::Lazy;
13use regex::Regex;
14use rusqlite::Connection;
15use std::collections::HashSet;
16use std::path::{Path, PathBuf};
17use sysinfo::System;
18
19/// A single issue detected during workspace session diagnostics
20#[derive(Debug, Clone)]
21pub struct SessionIssue {
22    /// The session file stem (UUID)
23    pub session_id: String,
24    /// Category of issue
25    pub kind: SessionIssueKind,
26    /// Human-readable description
27    pub detail: String,
28}
29
30/// Categories of session issues that can be detected and auto-fixed
31#[derive(Debug, Clone, PartialEq)]
32pub enum SessionIssueKind {
33    /// JSONL file has multiple lines (operations not compacted)
34    MultiLineJsonl,
35    /// JSONL first line contains concatenated JSON objects (missing newlines)
36    ConcatenatedJsonl,
37    /// Index entry has lastResponseState = 2 (Cancelled), blocks VS Code loading
38    CancelledState,
39    /// Last request's modelState.value is 2 (Cancelled) or missing in file content
40    CancelledModelState,
41    /// File exists on disk but is not in the VS Code index
42    OrphanedSession,
43    /// Index entry references a file that no longer exists on disk
44    StaleIndexEntry,
45    /// Session is missing required VS Code compat fields
46    MissingCompatFields,
47    /// Both .json and .jsonl exist for the same session ID
48    DuplicateFormat,
49    /// Legacy .json file is corrupted — contains only structural chars ({}, whitespace)
50    SkeletonJson,
51}
52
53impl std::fmt::Display for SessionIssueKind {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        match self {
56            SessionIssueKind::MultiLineJsonl => write!(f, "multi-line JSONL"),
57            SessionIssueKind::ConcatenatedJsonl => write!(f, "concatenated JSONL"),
58            SessionIssueKind::CancelledState => write!(f, "cancelled state"),
59            SessionIssueKind::CancelledModelState => write!(f, "cancelled modelState in file"),
60            SessionIssueKind::OrphanedSession => write!(f, "orphaned session"),
61            SessionIssueKind::StaleIndexEntry => write!(f, "stale index entry"),
62            SessionIssueKind::MissingCompatFields => write!(f, "missing compat fields"),
63            SessionIssueKind::DuplicateFormat => write!(f, "duplicate .json/.jsonl"),
64            SessionIssueKind::SkeletonJson => write!(f, "skeleton .json (corrupt)"),
65        }
66    }
67}
68
69/// Summary of issues found in a single workspace
70#[derive(Debug, Clone, Default)]
71pub struct WorkspaceDiagnosis {
72    /// Project path (if known)
73    pub project_path: Option<String>,
74    /// Workspace hash
75    pub workspace_hash: String,
76    /// Total sessions on disk
77    pub sessions_on_disk: usize,
78    /// Total sessions in index
79    pub sessions_in_index: usize,
80    /// All detected issues
81    pub issues: Vec<SessionIssue>,
82}
83
84impl WorkspaceDiagnosis {
85    pub fn is_healthy(&self) -> bool {
86        self.issues.is_empty()
87    }
88
89    pub fn issue_count_by_kind(&self, kind: &SessionIssueKind) -> usize {
90        self.issues.iter().filter(|i| &i.kind == kind).count()
91    }
92}
93
94/// Diagnose a workspace for session issues without modifying anything.
95/// Returns a structured report of all detected problems.
96pub fn diagnose_workspace_sessions(
97    workspace_id: &str,
98    chat_sessions_dir: &Path,
99) -> Result<WorkspaceDiagnosis> {
100    let mut diagnosis = WorkspaceDiagnosis {
101        workspace_hash: workspace_id.to_string(),
102        ..Default::default()
103    };
104
105    if !chat_sessions_dir.exists() {
106        return Ok(diagnosis);
107    }
108
109    // Collect session files on disk
110    let mut jsonl_sessions: HashSet<String> = HashSet::new();
111    let mut json_sessions: HashSet<String> = HashSet::new();
112    let mut all_session_ids: HashSet<String> = HashSet::new();
113
114    for entry in std::fs::read_dir(chat_sessions_dir)? {
115        let entry = entry?;
116        let path = entry.path();
117        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
118        let stem = path
119            .file_stem()
120            .map(|s| s.to_string_lossy().to_string())
121            .unwrap_or_default();
122
123        match ext {
124            "jsonl" => {
125                jsonl_sessions.insert(stem.clone());
126                all_session_ids.insert(stem);
127            }
128            "json" if !path.to_string_lossy().ends_with(".bak") => {
129                json_sessions.insert(stem.clone());
130                all_session_ids.insert(stem);
131            }
132            _ => {}
133        }
134    }
135    diagnosis.sessions_on_disk = all_session_ids.len();
136
137    // Check for duplicate .json/.jsonl files
138    for id in &jsonl_sessions {
139        if json_sessions.contains(id) {
140            diagnosis.issues.push(SessionIssue {
141                session_id: id.clone(),
142                kind: SessionIssueKind::DuplicateFormat,
143                detail: format!("Both {id}.json and {id}.jsonl exist"),
144            });
145        }
146    }
147
148    // Check JSONL files for content issues
149    for id in &jsonl_sessions {
150        let path = chat_sessions_dir.join(format!("{id}.jsonl"));
151        if let Ok(content) = std::fs::read_to_string(&path) {
152            let line_count = content.lines().count();
153
154            if line_count > 1 {
155                let size_mb = content.len() / (1024 * 1024);
156                diagnosis.issues.push(SessionIssue {
157                    session_id: id.clone(),
158                    kind: SessionIssueKind::MultiLineJsonl,
159                    detail: format!("{line_count} lines, ~{size_mb} MB — needs compaction"),
160                });
161            }
162
163            // Check first line for concatenation
164            if let Some(first_line) = content.lines().next() {
165                if first_line.contains("}{\"kind\":") {
166                    diagnosis.issues.push(SessionIssue {
167                        session_id: id.clone(),
168                        kind: SessionIssueKind::ConcatenatedJsonl,
169                        detail: "First line has concatenated JSON objects".to_string(),
170                    });
171                }
172            }
173
174            // Check for missing compat fields (only single-line files worth checking)
175            if line_count == 1 {
176                if let Some(first_line) = content.lines().next() {
177                    if let Ok(obj) = serde_json::from_str::<serde_json::Value>(first_line) {
178                        let is_kind_0 = obj
179                            .get("kind")
180                            .and_then(|k| k.as_u64())
181                            .map(|k| k == 0)
182                            .unwrap_or(false);
183
184                        if is_kind_0 {
185                            if let Some(v) = obj.get("v") {
186                                let missing_fields: Vec<&str> = [
187                                    "hasPendingEdits",
188                                    "pendingRequests",
189                                    "inputState",
190                                    "sessionId",
191                                    "version",
192                                ]
193                                .iter()
194                                .filter(|f| v.get(**f).is_none())
195                                .copied()
196                                .collect();
197
198                                if !missing_fields.is_empty() {
199                                    diagnosis.issues.push(SessionIssue {
200                                        session_id: id.clone(),
201                                        kind: SessionIssueKind::MissingCompatFields,
202                                        detail: format!("Missing: {}", missing_fields.join(", ")),
203                                    });
204                                }
205
206                                // Check for cancelled modelState in file content
207                                if let Some(requests) = v.get("requests").and_then(|r| r.as_array())
208                                {
209                                    if let Some(last_req) = requests.last() {
210                                        let model_state_value = last_req
211                                            .get("modelState")
212                                            .and_then(|ms| ms.get("value"))
213                                            .and_then(|v| v.as_u64());
214                                        match model_state_value {
215                                            Some(1) => {} // Complete — valid
216                                            Some(v) => {
217                                                diagnosis.issues.push(SessionIssue {
218                                                    session_id: id.clone(),
219                                                    kind: SessionIssueKind::CancelledModelState,
220                                                    detail: format!("Last request modelState.value={} (not Complete) in file content", v),
221                                                });
222                                            }
223                                            None => {
224                                                diagnosis.issues.push(SessionIssue {
225                                                    session_id: id.clone(),
226                                                    kind: SessionIssueKind::CancelledModelState,
227                                                    detail: "Last request missing modelState in file content".to_string(),
228                                                });
229                                            }
230                                        }
231                                    }
232                                }
233
234                                // Check hasPendingEdits — true blocks session loading
235                                if v.get("hasPendingEdits")
236                                    .and_then(|v| v.as_bool())
237                                    .unwrap_or(false)
238                                    == true
239                                {
240                                    diagnosis.issues.push(SessionIssue {
241                                        session_id: id.clone(),
242                                        kind: SessionIssueKind::MissingCompatFields,
243                                        detail: "hasPendingEdits is true (blocks session loading)"
244                                            .to_string(),
245                                    });
246                                }
247                            }
248                        }
249                    }
250                }
251            }
252        }
253    }
254
255    // Check .json files for skeleton corruption
256    for id in &json_sessions {
257        // Skip if a .jsonl already exists (it takes precedence)
258        if jsonl_sessions.contains(id) {
259            continue;
260        }
261        let path = chat_sessions_dir.join(format!("{id}.json"));
262        if let Ok(content) = std::fs::read_to_string(&path) {
263            if is_skeleton_json(&content) {
264                diagnosis.issues.push(SessionIssue {
265                    session_id: id.clone(),
266                    kind: SessionIssueKind::SkeletonJson,
267                    detail: format!(
268                        "Legacy .json is corrupt — only structural chars remain ({} bytes)",
269                        content.len()
270                    ),
271                });
272            }
273        }
274    }
275
276    // Check index for stale entries, orphans, and cancelled state
277    let db_path = get_workspace_storage_db(workspace_id)?;
278    if db_path.exists() {
279        if let Ok(index) = read_chat_session_index(&db_path) {
280            diagnosis.sessions_in_index = index.entries.len();
281
282            // Stale index entries (in index but no file on disk)
283            for (id, _entry) in &index.entries {
284                if !all_session_ids.contains(id) {
285                    diagnosis.issues.push(SessionIssue {
286                        session_id: id.clone(),
287                        kind: SessionIssueKind::StaleIndexEntry,
288                        detail: "In index but no file on disk".to_string(),
289                    });
290                }
291            }
292
293            // Cancelled state entries
294            for (id, entry) in &index.entries {
295                if entry.last_response_state == 2 {
296                    diagnosis.issues.push(SessionIssue {
297                        session_id: id.clone(),
298                        kind: SessionIssueKind::CancelledState,
299                        detail: "lastResponseState=2 (Cancelled) — blocks VS Code loading"
300                            .to_string(),
301                    });
302                }
303            }
304
305            // Orphaned sessions (on disk but not in index)
306            let indexed_ids: HashSet<&String> = index.entries.keys().collect();
307            for id in &all_session_ids {
308                if !indexed_ids.contains(id) {
309                    diagnosis.issues.push(SessionIssue {
310                        session_id: id.clone(),
311                        kind: SessionIssueKind::OrphanedSession,
312                        detail: "File on disk but not in VS Code index".to_string(),
313                    });
314                }
315            }
316        }
317    }
318
319    Ok(diagnosis)
320}
321
322/// Regex to match any Unicode escape sequence (valid or not)
323static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
324
325/// VS Code session format version - helps identify which parsing strategy to use
326#[derive(Debug, Clone, Copy, PartialEq, Eq)]
327pub enum VsCodeSessionFormat {
328    /// Legacy JSON format (VS Code < 1.109.0)
329    /// Single JSON object with ChatSession structure
330    LegacyJson,
331    /// JSONL format (VS Code >= 1.109.0, January 2026+)
332    /// JSON Lines with event sourcing: kind 0 (initial), kind 1 (delta), kind 2 (replace/splice)
333    JsonLines,
334}
335
336/// Session schema version - tracks the internal structure version
337#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
338pub enum SessionSchemaVersion {
339    /// Version 1 - Original format (basic fields)
340    V1 = 1,
341    /// Version 2 - Added more metadata fields
342    V2 = 2,
343    /// Version 3 - Current format with full request/response structure
344    V3 = 3,
345    /// Unknown version
346    Unknown = 0,
347}
348
349impl SessionSchemaVersion {
350    /// Create from version number
351    pub fn from_version(v: u32) -> Self {
352        match v {
353            1 => Self::V1,
354            2 => Self::V2,
355            3 => Self::V3,
356            _ => Self::Unknown,
357        }
358    }
359
360    /// Get version number
361    pub fn version_number(&self) -> u32 {
362        match self {
363            Self::V1 => 1,
364            Self::V2 => 2,
365            Self::V3 => 3,
366            Self::Unknown => 0,
367        }
368    }
369
370    /// Get description
371    pub fn description(&self) -> &'static str {
372        match self {
373            Self::V1 => "v1 (basic)",
374            Self::V2 => "v2 (extended metadata)",
375            Self::V3 => "v3 (full structure)",
376            Self::Unknown => "unknown",
377        }
378    }
379}
380
381impl std::fmt::Display for SessionSchemaVersion {
382    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
383        write!(f, "{}", self.description())
384    }
385}
386
387/// Result of session format detection
388#[derive(Debug, Clone)]
389pub struct SessionFormatInfo {
390    /// File format (JSON or JSONL)
391    pub format: VsCodeSessionFormat,
392    /// Schema version detected from content
393    pub schema_version: SessionSchemaVersion,
394    /// Confidence level (0.0 - 1.0)
395    pub confidence: f32,
396    /// Detection method used
397    pub detection_method: &'static str,
398}
399
400impl VsCodeSessionFormat {
401    /// Detect format from file path (by extension)
402    pub fn from_path(path: &Path) -> Self {
403        match path.extension().and_then(|e| e.to_str()) {
404            Some("jsonl") => Self::JsonLines,
405            _ => Self::LegacyJson,
406        }
407    }
408
409    /// Detect format from content by analyzing structure
410    pub fn from_content(content: &str) -> Self {
411        let trimmed = content.trim();
412
413        // JSONL: Multiple lines starting with { or first line has {"kind":
414        if trimmed.starts_with("{\"kind\":") || trimmed.starts_with("{ \"kind\":") {
415            return Self::JsonLines;
416        }
417
418        // Count lines that look like JSON objects
419        let mut json_object_lines = 0;
420        let mut total_non_empty_lines = 0;
421
422        for line in trimmed.lines().take(10) {
423            let line = line.trim();
424            if line.is_empty() {
425                continue;
426            }
427            total_non_empty_lines += 1;
428
429            // Check if line is a JSON object with "kind" field (JSONL marker)
430            if line.starts_with('{') && line.contains("\"kind\"") {
431                json_object_lines += 1;
432            }
433        }
434
435        // If multiple lines look like JSONL entries, it's JSONL
436        if json_object_lines >= 2
437            || (json_object_lines == 1 && total_non_empty_lines == 1 && trimmed.contains("\n{"))
438        {
439            return Self::JsonLines;
440        }
441
442        // Check if it's a single JSON object (legacy format)
443        if trimmed.starts_with('{') && trimmed.ends_with('}') {
444            // Look for ChatSession structure markers
445            if trimmed.contains("\"sessionId\"")
446                || trimmed.contains("\"creationDate\"")
447                || trimmed.contains("\"requests\"")
448            {
449                return Self::LegacyJson;
450            }
451        }
452
453        // Default to legacy JSON if unclear
454        Self::LegacyJson
455    }
456
457    /// Get minimum VS Code version that uses this format
458    pub fn min_vscode_version(&self) -> &'static str {
459        match self {
460            Self::LegacyJson => "1.0.0",
461            Self::JsonLines => "1.109.0",
462        }
463    }
464
465    /// Get human-readable format description
466    pub fn description(&self) -> &'static str {
467        match self {
468            Self::LegacyJson => "Legacy JSON (single object)",
469            Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
470        }
471    }
472
473    /// Get short format name
474    pub fn short_name(&self) -> &'static str {
475        match self {
476            Self::LegacyJson => "json",
477            Self::JsonLines => "jsonl",
478        }
479    }
480}
481
482impl std::fmt::Display for VsCodeSessionFormat {
483    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
484        write!(f, "{}", self.description())
485    }
486}
487
488/// Sanitize JSON content by replacing lone surrogates with replacement character.
489/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
490fn sanitize_json_unicode(content: &str) -> String {
491    // Process all \uXXXX sequences and fix lone surrogates
492    let mut result = String::with_capacity(content.len());
493    let mut last_end = 0;
494
495    // Collect all matches first to avoid borrowing issues
496    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
497
498    for (i, mat) in matches.iter().enumerate() {
499        let start = mat.start();
500        let end = mat.end();
501
502        // Add content before this match
503        result.push_str(&content[last_end..start]);
504
505        // Parse the hex value from the match itself (always ASCII \uXXXX)
506        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
507        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
508            // Check if it's a high surrogate (D800-DBFF)
509            if (0xD800..=0xDBFF).contains(&code_point) {
510                // Check if next match is immediately following and is a low surrogate
511                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
512                    // Must be immediately adjacent (no gap)
513                    if next_mat.start() == end {
514                        let next_hex = &next_mat.as_str()[2..];
515                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
516                            (0xDC00..=0xDFFF).contains(&next_cp)
517                        } else {
518                            false
519                        }
520                    } else {
521                        false
522                    }
523                } else {
524                    false
525                };
526
527                if is_valid_pair {
528                    // Valid surrogate pair, keep the high surrogate
529                    result.push_str(mat.as_str());
530                } else {
531                    // Lone high surrogate - replace with replacement char
532                    result.push_str("\\uFFFD");
533                }
534            }
535            // Check if it's a low surrogate (DC00-DFFF)
536            else if (0xDC00..=0xDFFF).contains(&code_point) {
537                // Check if previous match was immediately before and was a high surrogate
538                let is_valid_pair = if i > 0 {
539                    if let Some(prev_mat) = matches.get(i - 1) {
540                        // Must be immediately adjacent (no gap)
541                        if prev_mat.end() == start {
542                            let prev_hex = &prev_mat.as_str()[2..];
543                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
544                                (0xD800..=0xDBFF).contains(&prev_cp)
545                            } else {
546                                false
547                            }
548                        } else {
549                            false
550                        }
551                    } else {
552                        false
553                    }
554                } else {
555                    false
556                };
557
558                if is_valid_pair {
559                    // Part of valid surrogate pair, keep it
560                    result.push_str(mat.as_str());
561                } else {
562                    // Lone low surrogate - replace with replacement char
563                    result.push_str("\\uFFFD");
564                }
565            }
566            // Normal code point
567            else {
568                result.push_str(mat.as_str());
569            }
570        } else {
571            // Invalid hex - keep as is
572            result.push_str(mat.as_str());
573        }
574        last_end = end;
575    }
576
577    // Add remaining content
578    result.push_str(&content[last_end..]);
579    result
580}
581
582/// Try to parse JSON, sanitizing invalid Unicode if needed
583pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
584    match serde_json::from_str::<ChatSession>(content) {
585        Ok(session) => Ok(session),
586        Err(e) => {
587            // If parsing fails due to Unicode issue, try sanitizing
588            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
589                let sanitized = sanitize_json_unicode(content);
590                serde_json::from_str::<ChatSession>(&sanitized)
591            } else {
592                Err(e)
593            }
594        }
595    }
596}
597
598/// JSONL entry kinds for VS Code 1.109.0+ session format
599#[derive(Debug, Clone, Copy, PartialEq, Eq)]
600enum JsonlKind {
601    /// Initial session state (kind: 0)
602    Initial = 0,
603    /// Delta update to specific keys (kind: 1)  
604    Delta = 1,
605    /// Array replace/splice operation (kind: 2)
606    /// Optional 'i' field specifies splice index (truncate at i, then extend)
607    ArraySplice = 2,
608}
609
610/// Parse a JSONL (JSON Lines) session file (VS Code 1.109.0+ format)
611/// Each line is a JSON object with 'kind' field indicating the type:
612/// - kind 0: Initial session metadata with 'v' containing ChatSession-like structure
613/// - kind 1: Delta update with 'k' (keys path) and 'v' (value)
614/// - kind 2: Array replace/splice with 'k' (path), 'v' (items), optional 'i' (splice index)
615pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
616    // Pre-process: split concatenated JSON objects that lack newline separators
617    let content = split_concatenated_jsonl(content);
618
619    let mut session = ChatSession {
620        version: 3,
621        session_id: None,
622        creation_date: 0,
623        last_message_date: 0,
624        is_imported: false,
625        initial_location: "panel".to_string(),
626        custom_title: None,
627        requester_username: None,
628        requester_avatar_icon_uri: None,
629        responder_username: None,
630        responder_avatar_icon_uri: None,
631        requests: Vec::new(),
632    };
633
634    for line in content.lines() {
635        let line = line.trim();
636        if line.is_empty() {
637            continue;
638        }
639
640        // Parse each line as a JSON object
641        let entry: serde_json::Value = match serde_json::from_str(line) {
642            Ok(v) => v,
643            Err(_) => {
644                // Try sanitizing Unicode
645                let sanitized = sanitize_json_unicode(line);
646                serde_json::from_str(&sanitized)?
647            }
648        };
649
650        let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
651
652        match kind {
653            0 => {
654                // Initial state - 'v' contains the session metadata
655                if let Some(v) = entry.get("v") {
656                    // Parse version
657                    if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
658                        session.version = version as u32;
659                    }
660                    // Parse session ID
661                    if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
662                        session.session_id = Some(sid.to_string());
663                    }
664                    // Parse creation date
665                    if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
666                        session.creation_date = cd;
667                    }
668                    // Parse initial location
669                    if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
670                        session.initial_location = loc.to_string();
671                    }
672                    // Parse responder username
673                    if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
674                        session.responder_username = Some(ru.to_string());
675                    }
676                    // Parse custom title
677                    if let Some(title) = v.get("customTitle").and_then(|x| x.as_str()) {
678                        session.custom_title = Some(title.to_string());
679                    }
680                    // Parse hasPendingEdits as imported marker
681                    if let Some(imported) = v.get("isImported").and_then(|x| x.as_bool()) {
682                        session.is_imported = imported;
683                    }
684                    // Parse requests array if present
685                    if let Some(requests) = v.get("requests") {
686                        if let Ok(reqs) =
687                            serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
688                        {
689                            session.requests = reqs;
690                            // Compute last_message_date from the latest request timestamp
691                            if let Some(latest_ts) =
692                                session.requests.iter().filter_map(|r| r.timestamp).max()
693                            {
694                                session.last_message_date = latest_ts;
695                            }
696                        }
697                    }
698                    // Fall back to creationDate if no request timestamps found
699                    if session.last_message_date == 0 {
700                        session.last_message_date = session.creation_date;
701                    }
702                }
703            }
704            1 => {
705                // Delta update - 'k' is array of key path, 'v' is the value
706                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
707                    if let Some(keys_arr) = keys.as_array() {
708                        // Handle top-level session keys
709                        if keys_arr.len() == 1 {
710                            if let Some(key) = keys_arr[0].as_str() {
711                                match key {
712                                    "customTitle" => {
713                                        if let Some(title) = value.as_str() {
714                                            session.custom_title = Some(title.to_string());
715                                        }
716                                    }
717                                    "lastMessageDate" => {
718                                        if let Some(date) = value.as_i64() {
719                                            session.last_message_date = date;
720                                        }
721                                    }
722                                    "hasPendingEdits" | "isImported" => {
723                                        // Session-level boolean updates, safe to ignore for now
724                                    }
725                                    _ => {} // Ignore unknown keys
726                                }
727                            }
728                        }
729                        // Handle nested request field updates: ["requests", idx, field]
730                        else if keys_arr.len() == 3 {
731                            if let (Some("requests"), Some(idx), Some(field)) = (
732                                keys_arr[0].as_str(),
733                                keys_arr[1].as_u64().map(|i| i as usize),
734                                keys_arr[2].as_str(),
735                            ) {
736                                // Auto-grow requests array to accommodate the referenced index.
737                                // VS Code emits events for request indices before a formal
738                                // kind:2 k=["requests"] append, so we must create placeholder
739                                // requests as needed.
740                                while idx >= session.requests.len() {
741                                    session.requests.push(ChatRequest::default());
742                                }
743                                match field {
744                                    "response" => {
745                                        session.requests[idx].response = Some(value.clone());
746                                    }
747                                    "result" => {
748                                        session.requests[idx].result = Some(value.clone());
749                                    }
750                                    "followups" => {
751                                        session.requests[idx].followups =
752                                            serde_json::from_value(value.clone()).ok();
753                                    }
754                                    "isCanceled" => {
755                                        session.requests[idx].is_canceled = value.as_bool();
756                                    }
757                                    "contentReferences" => {
758                                        session.requests[idx].content_references =
759                                            serde_json::from_value(value.clone()).ok();
760                                    }
761                                    "codeCitations" => {
762                                        session.requests[idx].code_citations =
763                                            serde_json::from_value(value.clone()).ok();
764                                    }
765                                    "modelState" => {
766                                        session.requests[idx].model_state = Some(value.clone());
767                                    }
768                                    "modelId" => {
769                                        session.requests[idx].model_id =
770                                            value.as_str().map(|s| s.to_string());
771                                    }
772                                    "agent" => {
773                                        session.requests[idx].agent = Some(value.clone());
774                                    }
775                                    "variableData" => {
776                                        session.requests[idx].variable_data = Some(value.clone());
777                                    }
778                                    _ => {} // Ignore unknown request fields
779                                }
780                            }
781                        }
782                    }
783                }
784            }
785            2 => {
786                // Array splice operation - 'k' is the key path, 'v' is the new array items
787                // Optional 'i' field is the splice start index (truncate at i, then extend)
788                // Without 'i', items are appended to the end of the array
789                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
790                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
791                    if let Some(keys_arr) = keys.as_array() {
792                        // Top-level requests: k=["requests"], v=[requests_array]
793                        if keys_arr.len() == 1 {
794                            if let Some("requests") = keys_arr[0].as_str() {
795                                if let Some(items) = value.as_array() {
796                                    if let Some(idx) = splice_index {
797                                        // Splice: truncate at index i, then extend with new items
798                                        session.requests.truncate(idx);
799                                    }
800                                    // Without 'i': append to end (no truncation)
801                                    for item in items {
802                                        if let Ok(req) =
803                                            serde_json::from_value::<ChatRequest>(item.clone())
804                                        {
805                                            session.requests.push(req);
806                                        }
807                                    }
808                                    // Update last message date from latest request
809                                    if let Some(last_req) = session.requests.last() {
810                                        if let Some(ts) = last_req.timestamp {
811                                            session.last_message_date = ts;
812                                        }
813                                    }
814                                }
815                            }
816                        }
817                        // Nested array replace/splice: k=["requests", idx, "response"], v=[parts]
818                        else if keys_arr.len() == 3 {
819                            if let (Some("requests"), Some(req_idx), Some(field)) = (
820                                keys_arr[0].as_str(),
821                                keys_arr[1].as_u64().map(|i| i as usize),
822                                keys_arr[2].as_str(),
823                            ) {
824                                // Auto-grow requests array for the referenced index
825                                while req_idx >= session.requests.len() {
826                                    session.requests.push(ChatRequest::default());
827                                }
828                                match field {
829                                    "response" => {
830                                        // Response is stored as a JSON Value (array)
831                                        if let Some(idx) = splice_index {
832                                            // Splice: keep items before index i, replace rest
833                                            if let Some(existing) =
834                                                session.requests[req_idx].response.as_ref()
835                                            {
836                                                if let Some(existing_arr) = existing.as_array() {
837                                                    let mut new_arr: Vec<serde_json::Value> =
838                                                        existing_arr[..idx.min(existing_arr.len())]
839                                                            .to_vec();
840                                                    if let Some(new_items) = value.as_array() {
841                                                        new_arr.extend(new_items.iter().cloned());
842                                                    }
843                                                    session.requests[req_idx].response =
844                                                        Some(serde_json::Value::Array(new_arr));
845                                                } else {
846                                                    session.requests[req_idx].response =
847                                                        Some(value.clone());
848                                                }
849                                            } else {
850                                                session.requests[req_idx].response =
851                                                    Some(value.clone());
852                                            }
853                                        } else {
854                                            // No splice index: append to existing response array
855                                            if let Some(existing) =
856                                                session.requests[req_idx].response.as_ref()
857                                            {
858                                                if let Some(existing_arr) = existing.as_array() {
859                                                    let mut new_arr = existing_arr.clone();
860                                                    if let Some(new_items) = value.as_array() {
861                                                        new_arr.extend(new_items.iter().cloned());
862                                                    }
863                                                    session.requests[req_idx].response =
864                                                        Some(serde_json::Value::Array(new_arr));
865                                                } else {
866                                                    session.requests[req_idx].response =
867                                                        Some(value.clone());
868                                                }
869                                            } else {
870                                                session.requests[req_idx].response =
871                                                    Some(value.clone());
872                                            }
873                                        }
874                                    }
875                                    "contentReferences" => {
876                                        session.requests[req_idx].content_references =
877                                            serde_json::from_value(value.clone()).ok();
878                                    }
879                                    _ => {} // Ignore unknown fields
880                                }
881                            }
882                        }
883                    }
884                }
885            }
886            _ => {} // Unknown kind, skip
887        }
888    }
889
890    Ok(session)
891}
892
893/// Check if a file extension indicates a session file (.json, .jsonl, or .backup)
894pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
895    ext == "json" || ext == "jsonl" || ext == "backup"
896}
897
898/// Detect session format and version from content
899pub fn detect_session_format(content: &str) -> SessionFormatInfo {
900    let format = VsCodeSessionFormat::from_content(content);
901    let trimmed = content.trim();
902
903    // Detect schema version based on format
904    let (schema_version, confidence, method) = match format {
905        VsCodeSessionFormat::JsonLines => {
906            // For JSONL, check the first line's "v" object for version
907            if let Some(first_line) = trimmed.lines().next() {
908                if let Ok(entry) = serde_json::from_str::<serde_json::Value>(first_line) {
909                    if let Some(v) = entry.get("v") {
910                        if let Some(ver) = v.get("version").and_then(|x| x.as_u64()) {
911                            (
912                                SessionSchemaVersion::from_version(ver as u32),
913                                0.95,
914                                "jsonl-version-field",
915                            )
916                        } else {
917                            // No version field, likely v3 (current default)
918                            (SessionSchemaVersion::V3, 0.7, "jsonl-default")
919                        }
920                    } else {
921                        (SessionSchemaVersion::V3, 0.6, "jsonl-no-v-field")
922                    }
923                } else {
924                    (SessionSchemaVersion::Unknown, 0.3, "jsonl-parse-error")
925                }
926            } else {
927                (SessionSchemaVersion::Unknown, 0.2, "jsonl-empty")
928            }
929        }
930        VsCodeSessionFormat::LegacyJson => {
931            // For JSON, directly check the version field
932            if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
933                if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
934                    (
935                        SessionSchemaVersion::from_version(ver as u32),
936                        0.95,
937                        "json-version-field",
938                    )
939                } else {
940                    // Infer from structure
941                    if json.get("requests").is_some() && json.get("sessionId").is_some() {
942                        (SessionSchemaVersion::V3, 0.8, "json-structure-inference")
943                    } else if json.get("messages").is_some() {
944                        (SessionSchemaVersion::V1, 0.7, "json-legacy-structure")
945                    } else {
946                        (SessionSchemaVersion::Unknown, 0.4, "json-unknown-structure")
947                    }
948                }
949            } else {
950                // Try sanitizing and parsing again
951                let sanitized = sanitize_json_unicode(trimmed);
952                if let Ok(json) = serde_json::from_str::<serde_json::Value>(&sanitized) {
953                    if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
954                        (
955                            SessionSchemaVersion::from_version(ver as u32),
956                            0.9,
957                            "json-version-after-sanitize",
958                        )
959                    } else {
960                        (SessionSchemaVersion::V3, 0.6, "json-default-after-sanitize")
961                    }
962                } else {
963                    (SessionSchemaVersion::Unknown, 0.2, "json-parse-error")
964                }
965            }
966        }
967    };
968
969    SessionFormatInfo {
970        format,
971        schema_version,
972        confidence,
973        detection_method: method,
974    }
975}
976
977/// Parse session content with automatic format detection
978pub fn parse_session_auto(
979    content: &str,
980) -> std::result::Result<(ChatSession, SessionFormatInfo), serde_json::Error> {
981    let format_info = detect_session_format(content);
982
983    let session = match format_info.format {
984        VsCodeSessionFormat::JsonLines => parse_session_jsonl(content)?,
985        VsCodeSessionFormat::LegacyJson => parse_session_json(content)?,
986    };
987
988    Ok((session, format_info))
989}
990
991/// Parse a session file, automatically detecting format from content (not just extension)
992pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
993    let content = std::fs::read_to_string(path)
994        .map_err(|e| serde_json::Error::io(std::io::Error::other(e.to_string())))?;
995
996    // Use content-based auto-detection
997    let (session, _format_info) = parse_session_auto(&content)?;
998    Ok(session)
999}
1000
1001/// Get the path to the workspace storage database
1002pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
1003    let storage_path = get_workspace_storage_path()?;
1004    Ok(storage_path.join(workspace_id).join("state.vscdb"))
1005}
1006
1007/// Read the chat session index from VS Code storage
1008pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
1009    let conn = Connection::open(db_path)?;
1010
1011    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
1012        "SELECT value FROM ItemTable WHERE key = ?",
1013        ["chat.ChatSessionStore.index"],
1014        |row| row.get(0),
1015    );
1016
1017    match result {
1018        Ok(json_str) => serde_json::from_str(&json_str)
1019            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
1020        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
1021        Err(e) => Err(CsmError::SqliteError(e)),
1022    }
1023}
1024
1025/// Write the chat session index to VS Code storage
1026pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
1027    let conn = Connection::open(db_path)?;
1028    let json_str = serde_json::to_string(index)?;
1029
1030    // Check if the key exists
1031    let exists: bool = conn.query_row(
1032        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
1033        ["chat.ChatSessionStore.index"],
1034        |row| row.get(0),
1035    )?;
1036
1037    if exists {
1038        conn.execute(
1039            "UPDATE ItemTable SET value = ? WHERE key = ?",
1040            [&json_str, "chat.ChatSessionStore.index"],
1041        )?;
1042    } else {
1043        conn.execute(
1044            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
1045            ["chat.ChatSessionStore.index", &json_str],
1046        )?;
1047    }
1048
1049    Ok(())
1050}
1051
1052// ── Generic DB key read/write ──────────────────────────────────────────────
1053
1054/// Read a JSON value from the VS Code state DB by key
1055pub fn read_db_json(db_path: &Path, key: &str) -> Result<Option<serde_json::Value>> {
1056    let conn = Connection::open(db_path)?;
1057    let result: std::result::Result<String, rusqlite::Error> =
1058        conn.query_row("SELECT value FROM ItemTable WHERE key = ?", [key], |row| {
1059            row.get(0)
1060        });
1061    match result {
1062        Ok(json_str) => {
1063            let v = serde_json::from_str(&json_str)
1064                .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1065            Ok(Some(v))
1066        }
1067        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
1068        Err(e) => Err(CsmError::SqliteError(e)),
1069    }
1070}
1071
1072/// Write a JSON value to the VS Code state DB (upsert)
1073fn write_db_json(db_path: &Path, key: &str, value: &serde_json::Value) -> Result<()> {
1074    let conn = Connection::open(db_path)?;
1075    let json_str = serde_json::to_string(value)?;
1076    conn.execute(
1077        "INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)",
1078        rusqlite::params![key, json_str],
1079    )?;
1080    Ok(())
1081}
1082
1083// ── Session resource URI helpers ───────────────────────────────────────────
1084
1085/// Build the `vscode-chat-session://local/{base64(sessionId)}` resource URI
1086/// that VS Code uses to identify sessions in model cache and state cache.
1087pub fn session_resource_uri(session_id: &str) -> String {
1088    let b64 = BASE64.encode(session_id.as_bytes());
1089    format!("vscode-chat-session://local/{}", b64)
1090}
1091
1092/// Extract a session ID from a `vscode-chat-session://` resource URI.
1093/// Returns `None` if the URI doesn't match the expected format.
1094pub fn session_id_from_resource_uri(uri: &str) -> Option<String> {
1095    let prefix = "vscode-chat-session://local/";
1096    if let Some(b64) = uri.strip_prefix(prefix) {
1097        BASE64
1098            .decode(b64)
1099            .ok()
1100            .and_then(|bytes| String::from_utf8(bytes).ok())
1101    } else {
1102        None
1103    }
1104}
1105
1106// ── Model cache (agentSessions.model.cache) ────────────────────────────────
1107
1108const MODEL_CACHE_KEY: &str = "agentSessions.model.cache";
1109
1110/// Read the `agentSessions.model.cache` from VS Code storage.
1111/// Returns an empty Vec if the key doesn't exist.
1112pub fn read_model_cache(db_path: &Path) -> Result<Vec<ModelCacheEntry>> {
1113    match read_db_json(db_path, MODEL_CACHE_KEY)? {
1114        Some(v) => serde_json::from_value(v)
1115            .map_err(|e| CsmError::InvalidSessionFormat(format!("model cache: {}", e))),
1116        None => Ok(Vec::new()),
1117    }
1118}
1119
1120/// Write the `agentSessions.model.cache` to VS Code storage.
1121pub fn write_model_cache(db_path: &Path, cache: &[ModelCacheEntry]) -> Result<()> {
1122    let v = serde_json::to_value(cache)?;
1123    write_db_json(db_path, MODEL_CACHE_KEY, &v)
1124}
1125
1126/// Rebuild the model cache from the session index. This makes sessions visible
1127/// in the Chat panel sidebar. Only non-empty sessions get entries (VS Code
1128/// hides empty ones).
1129pub fn rebuild_model_cache(db_path: &Path, index: &ChatSessionIndex) -> Result<usize> {
1130    let mut cache: Vec<ModelCacheEntry> = Vec::new();
1131
1132    for (session_id, entry) in &index.entries {
1133        // Only include non-empty sessions — empty ones are hidden in the sidebar
1134        if entry.is_empty {
1135            continue;
1136        }
1137
1138        let timing = entry.timing.clone().unwrap_or(ChatSessionTiming {
1139            created: entry.last_message_date,
1140            last_request_started: Some(entry.last_message_date),
1141            last_request_ended: Some(entry.last_message_date),
1142        });
1143
1144        cache.push(ModelCacheEntry {
1145            provider_type: "local".to_string(),
1146            provider_label: "Local".to_string(),
1147            resource: session_resource_uri(session_id),
1148            icon: "vm".to_string(),
1149            label: entry.title.clone(),
1150            status: 1,
1151            timing,
1152            initial_location: entry.initial_location.clone(),
1153            has_pending_edits: false,
1154            is_empty: false,
1155            is_external: entry.is_external.unwrap_or(false),
1156            last_response_state: 1, // Complete
1157        });
1158    }
1159
1160    let count = cache.len();
1161    write_model_cache(db_path, &cache)?;
1162    Ok(count)
1163}
1164
1165// ── State cache (agentSessions.state.cache) ────────────────────────────────
1166
1167const STATE_CACHE_KEY: &str = "agentSessions.state.cache";
1168
1169/// Read the `agentSessions.state.cache` from VS Code storage.
1170pub fn read_state_cache(db_path: &Path) -> Result<Vec<StateCacheEntry>> {
1171    match read_db_json(db_path, STATE_CACHE_KEY)? {
1172        Some(v) => serde_json::from_value(v)
1173            .map_err(|e| CsmError::InvalidSessionFormat(format!("state cache: {}", e))),
1174        None => Ok(Vec::new()),
1175    }
1176}
1177
1178/// Write the `agentSessions.state.cache` to VS Code storage.
1179pub fn write_state_cache(db_path: &Path, cache: &[StateCacheEntry]) -> Result<()> {
1180    let v = serde_json::to_value(cache)?;
1181    write_db_json(db_path, STATE_CACHE_KEY, &v)
1182}
1183
1184/// Remove state cache entries whose resource URIs reference sessions that no
1185/// longer exist on disk. Returns the number of stale entries removed.
1186pub fn cleanup_state_cache(db_path: &Path, valid_session_ids: &HashSet<String>) -> Result<usize> {
1187    let entries = read_state_cache(db_path)?;
1188    let valid_resources: HashSet<String> = valid_session_ids
1189        .iter()
1190        .map(|id| session_resource_uri(id))
1191        .collect();
1192
1193    let before = entries.len();
1194    let cleaned: Vec<StateCacheEntry> = entries
1195        .into_iter()
1196        .filter(|e| valid_resources.contains(&e.resource))
1197        .collect();
1198    let removed = before - cleaned.len();
1199
1200    if removed > 0 {
1201        write_state_cache(db_path, &cleaned)?;
1202    }
1203
1204    Ok(removed)
1205}
1206
1207// ── Memento (memento/interactive-session-view-copilot) ──────────────────────
1208
1209const MEMENTO_KEY: &str = "memento/interactive-session-view-copilot";
1210
1211/// Read the Copilot Chat memento (tracks the last-active session).
1212pub fn read_session_memento(db_path: &Path) -> Result<Option<serde_json::Value>> {
1213    read_db_json(db_path, MEMENTO_KEY)
1214}
1215
1216/// Write the Copilot Chat memento.
1217pub fn write_session_memento(db_path: &Path, value: &serde_json::Value) -> Result<()> {
1218    write_db_json(db_path, MEMENTO_KEY, value)
1219}
1220
1221/// Fix the memento so it points to a session that actually exists.
1222/// If the current memento references a deleted/non-existent session, update it
1223/// to the most recently active valid session. Returns `true` if the memento was
1224/// changed.
1225pub fn fix_session_memento(
1226    db_path: &Path,
1227    valid_session_ids: &HashSet<String>,
1228    preferred_session_id: Option<&str>,
1229) -> Result<bool> {
1230    let memento = read_session_memento(db_path)?;
1231
1232    let current_sid = memento
1233        .as_ref()
1234        .and_then(|v| v.get("sessionId"))
1235        .and_then(|v| v.as_str())
1236        .map(|s| s.to_string());
1237
1238    // Check if current memento already points to a valid session
1239    if let Some(ref sid) = current_sid {
1240        if valid_session_ids.contains(sid) {
1241            return Ok(false); // Already valid
1242        }
1243    }
1244
1245    // Pick a session to point to: prefer the explicit choice, otherwise pick any valid one
1246    let target = preferred_session_id
1247        .filter(|id| valid_session_ids.contains(*id))
1248        .or_else(|| valid_session_ids.iter().next().map(|s| s.as_str()));
1249
1250    if let Some(target_id) = target {
1251        let mut new_memento = memento.unwrap_or(serde_json::json!({}));
1252        if let Some(obj) = new_memento.as_object_mut() {
1253            obj.insert(
1254                "sessionId".to_string(),
1255                serde_json::Value::String(target_id.to_string()),
1256            );
1257        }
1258        write_session_memento(db_path, &new_memento)?;
1259        Ok(true)
1260    } else {
1261        Ok(false) // No valid sessions to point to
1262    }
1263}
1264
1265// ── .json.bak recovery ─────────────────────────────────────────────────────
1266
1267/// Count the number of requests in a session's `v.requests` array from a JSONL
1268/// file (reads only the first kind:0 line).
1269fn count_jsonl_requests(path: &Path) -> Result<usize> {
1270    let content = std::fs::read_to_string(path)
1271        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
1272    let first_line = content.lines().next().unwrap_or("");
1273    let parsed: serde_json::Value = serde_json::from_str(first_line)
1274        .map_err(|e| CsmError::InvalidSessionFormat(format!("Parse error: {}", e)))?;
1275
1276    let count = parsed
1277        .get("v")
1278        .or_else(|| Some(&parsed)) // bare JSON (non-JSONL) may not have "v" wrapper
1279        .and_then(|v| v.get("requests"))
1280        .and_then(|r| r.as_array())
1281        .map(|a| a.len())
1282        .unwrap_or(0);
1283
1284    Ok(count)
1285}
1286
1287/// Count the number of requests in a `.json.bak` (or `.json`) file.
1288fn count_json_bak_requests(path: &Path) -> Result<usize> {
1289    let content = std::fs::read_to_string(path)
1290        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
1291    let parsed: serde_json::Value = serde_json::from_str(&content)
1292        .map_err(|e| CsmError::InvalidSessionFormat(format!("Parse error: {}", e)))?;
1293
1294    let count = parsed
1295        .get("requests")
1296        .and_then(|r| r.as_array())
1297        .map(|a| a.len())
1298        .unwrap_or(0);
1299
1300    Ok(count)
1301}
1302
1303/// Migrate old-format inputState fields from top-level to a nested `inputState`
1304/// object. VS Code version 3 expects `inputState` as a sub-object with keys
1305/// `attachments`, `mode`, `inputText`, `selections`, `contrib`.
1306///
1307/// Old format (pre-v3): `{ "attachments": [...], "mode": {...}, "inputText": "...", ... }`
1308/// New format (v3):     `{ "inputState": { "attachments": [...], "mode": {...}, ... } }`
1309pub fn migrate_old_input_state(state: &mut serde_json::Value) {
1310    if let Some(obj) = state.as_object_mut() {
1311        // Only migrate if inputState doesn't already exist AND old top-level fields do
1312        if obj.contains_key("inputState") {
1313            return;
1314        }
1315
1316        let old_keys = [
1317            "attachments",
1318            "mode",
1319            "inputText",
1320            "selections",
1321            "contrib",
1322            "selectedModel",
1323        ];
1324        let has_old = old_keys.iter().any(|k| obj.contains_key(*k));
1325
1326        if has_old {
1327            let mut input_state = serde_json::Map::new();
1328
1329            // Move each old key into the nested object (with defaults)
1330            input_state.insert(
1331                "attachments".to_string(),
1332                obj.remove("attachments").unwrap_or(serde_json::json!([])),
1333            );
1334            input_state.insert(
1335                "mode".to_string(),
1336                obj.remove("mode")
1337                    .unwrap_or(serde_json::json!({"id": "agent", "kind": "agent"})),
1338            );
1339            input_state.insert(
1340                "inputText".to_string(),
1341                obj.remove("inputText").unwrap_or(serde_json::json!("")),
1342            );
1343            input_state.insert(
1344                "selections".to_string(),
1345                obj.remove("selections").unwrap_or(serde_json::json!([])),
1346            );
1347            input_state.insert(
1348                "contrib".to_string(),
1349                obj.remove("contrib").unwrap_or(serde_json::json!({})),
1350            );
1351
1352            // selectedModel is optional, only include if present
1353            if let Some(model) = obj.remove("selectedModel") {
1354                input_state.insert("selectedModel".to_string(), model);
1355            }
1356
1357            obj.insert(
1358                "inputState".to_string(),
1359                serde_json::Value::Object(input_state),
1360            );
1361        }
1362    }
1363}
1364
1365/// Recover sessions from `.json.bak` files when the corresponding `.jsonl` has
1366/// fewer requests (indicating a truncated migration/compaction). For each .jsonl
1367/// that has a co-located .json.bak with more requests, rebuilds the .jsonl from
1368/// the backup data.
1369///
1370/// Returns the number of sessions recovered from backups.
1371pub fn recover_from_json_bak(chat_sessions_dir: &Path) -> Result<usize> {
1372    if !chat_sessions_dir.exists() {
1373        return Ok(0);
1374    }
1375
1376    let mut recovered = 0;
1377
1378    // Collect all .json.bak files
1379    let mut bak_files: Vec<PathBuf> = Vec::new();
1380    for entry in std::fs::read_dir(chat_sessions_dir)? {
1381        let entry = entry?;
1382        let path = entry.path();
1383        if path.to_string_lossy().ends_with(".json.bak") {
1384            bak_files.push(path);
1385        }
1386    }
1387
1388    for bak_path in &bak_files {
1389        // Derive session ID and .jsonl path
1390        let bak_name = bak_path
1391            .file_name()
1392            .unwrap_or_default()
1393            .to_string_lossy()
1394            .to_string();
1395        let session_id = bak_name.trim_end_matches(".json.bak");
1396        let jsonl_path = chat_sessions_dir.join(format!("{}.jsonl", session_id));
1397
1398        // Get request counts
1399        let bak_count = match count_json_bak_requests(bak_path) {
1400            Ok(c) => c,
1401            Err(_) => continue, // Skip unparseable backups
1402        };
1403
1404        if bak_count == 0 {
1405            continue; // Backup has no data, skip
1406        }
1407
1408        let jsonl_count = if jsonl_path.exists() {
1409            count_jsonl_requests(&jsonl_path).unwrap_or(0)
1410        } else {
1411            0 // No .jsonl at all — definitely recover from backup
1412        };
1413
1414        if bak_count <= jsonl_count {
1415            continue; // .jsonl already has equal or more data
1416        }
1417
1418        // .json.bak has more requests — recover from it
1419        println!(
1420            "   [*] .json.bak has {} requests vs .jsonl has {} for {}",
1421            bak_count, jsonl_count, session_id
1422        );
1423
1424        // Read the full backup
1425        let bak_content = match std::fs::read_to_string(bak_path) {
1426            Ok(c) => c,
1427            Err(e) => {
1428                println!("   [WARN] Failed to read .json.bak {}: {}", session_id, e);
1429                continue;
1430            }
1431        };
1432        let mut full_data: serde_json::Value = match serde_json::from_str(&bak_content) {
1433            Ok(v) => v,
1434            Err(e) => {
1435                println!("   [WARN] Failed to parse .json.bak {}: {}", session_id, e);
1436                continue;
1437            }
1438        };
1439
1440        // Clean up: build ISerializableChatData3 format
1441        if let Some(obj) = full_data.as_object_mut() {
1442            // Ensure version 3
1443            obj.insert("version".to_string(), serde_json::json!(3));
1444
1445            // Ensure sessionId
1446            if !obj.contains_key("sessionId") {
1447                obj.insert("sessionId".to_string(), serde_json::json!(session_id));
1448            }
1449
1450            // Force safe values
1451            obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
1452            obj.insert("pendingRequests".to_string(), serde_json::json!([]));
1453
1454            // Ensure responderUsername
1455            if !obj.contains_key("responderUsername") {
1456                obj.insert(
1457                    "responderUsername".to_string(),
1458                    serde_json::json!("GitHub Copilot"),
1459                );
1460            }
1461
1462            // Migrate old inputState format
1463            migrate_old_input_state(&mut full_data);
1464
1465            // Fix modelState values in requests
1466            fix_request_model_states(&mut full_data);
1467        }
1468
1469        // Backup existing .jsonl if present
1470        if jsonl_path.exists() {
1471            let pre_fix_bak = jsonl_path.with_extension("jsonl.pre_bak_recovery");
1472            if let Err(e) = std::fs::copy(&jsonl_path, &pre_fix_bak) {
1473                println!(
1474                    "   [WARN] Failed to backup .jsonl before recovery {}: {}",
1475                    session_id, e
1476                );
1477                continue;
1478            }
1479        }
1480
1481        // Write new JSONL kind:0
1482        let jsonl_obj = serde_json::json!({"kind": 0, "v": full_data});
1483        let jsonl_str = serde_json::to_string(&jsonl_obj).map_err(|e| {
1484            CsmError::InvalidSessionFormat(format!("Failed to serialize recovered session: {}", e))
1485        })?;
1486        std::fs::write(&jsonl_path, format!("{}\n", jsonl_str))?;
1487
1488        println!(
1489            "   [OK] Recovered {} from .json.bak ({} → {} requests)",
1490            session_id, jsonl_count, bak_count
1491        );
1492        recovered += 1;
1493    }
1494
1495    Ok(recovered)
1496}
1497
1498/// Recover sessions from `.jsonl.bak` files when the backup is larger than the
1499/// corresponding `.jsonl` file — indicating the live session was truncated.
1500///
1501/// For each `.jsonl.bak` whose byte size exceeds the active `.jsonl`:
1502/// 1. Backs up the active file to `.jsonl.pre-restore`
1503/// 2. Copies the `.jsonl.bak` over the active `.jsonl`
1504///
1505/// This handles the common scenario where VS Code or compaction overwrites a
1506/// session with a truncated version while the backup retains the full data.
1507///
1508/// Returns `(restored_count, total_bytes_recovered)`.
1509pub fn recover_from_jsonl_bak(chat_sessions_dir: &Path, dry_run: bool) -> Result<(usize, u64)> {
1510    if !chat_sessions_dir.exists() {
1511        return Ok((0, 0));
1512    }
1513
1514    let mut restored = 0usize;
1515    let mut bytes_recovered = 0u64;
1516
1517    // Collect all .jsonl.bak files
1518    let mut bak_files: Vec<PathBuf> = Vec::new();
1519    for entry in std::fs::read_dir(chat_sessions_dir)? {
1520        let entry = entry?;
1521        let path = entry.path();
1522        if path.to_string_lossy().ends_with(".jsonl.bak") {
1523            bak_files.push(path);
1524        }
1525    }
1526
1527    for bak_path in &bak_files {
1528        let bak_name = bak_path
1529            .file_name()
1530            .unwrap_or_default()
1531            .to_string_lossy()
1532            .to_string();
1533        let session_id = bak_name.trim_end_matches(".jsonl.bak");
1534        let jsonl_path = chat_sessions_dir.join(format!("{}.jsonl", session_id));
1535
1536        // Only act when the .jsonl exists AND the backup is strictly larger
1537        if !jsonl_path.exists() {
1538            continue;
1539        }
1540
1541        let orig_size = match std::fs::metadata(&jsonl_path) {
1542            Ok(m) => m.len(),
1543            Err(_) => continue,
1544        };
1545        let bak_size = match std::fs::metadata(bak_path) {
1546            Ok(m) => m.len(),
1547            Err(_) => continue,
1548        };
1549
1550        if bak_size <= orig_size {
1551            continue; // Backup is not larger, nothing to recover
1552        }
1553
1554        let delta = bak_size - orig_size;
1555        let orig_kb = orig_size as f64 / 1024.0;
1556        let bak_kb = bak_size as f64 / 1024.0;
1557
1558        if dry_run {
1559            println!(
1560                "   [*] Would restore {} ({:.1}KB → {:.1}KB, +{:.1}KB)",
1561                session_id,
1562                orig_kb,
1563                bak_kb,
1564                delta as f64 / 1024.0
1565            );
1566        } else {
1567            // Safety backup of current file
1568            let pre_restore = jsonl_path.with_extension("jsonl.pre-restore");
1569            if let Err(e) = std::fs::copy(&jsonl_path, &pre_restore) {
1570                println!(
1571                    "   [WARN] Failed to create safety backup for {}: {}",
1572                    session_id, e
1573                );
1574                continue;
1575            }
1576
1577            // Restore from backup
1578            if let Err(e) = std::fs::copy(bak_path, &jsonl_path) {
1579                println!(
1580                    "   [WARN] Failed to restore {} from .jsonl.bak: {}",
1581                    session_id, e
1582                );
1583                // Try to roll back
1584                let _ = std::fs::copy(&pre_restore, &jsonl_path);
1585                continue;
1586            }
1587
1588            println!(
1589                "   [OK] Restored {} from .jsonl.bak ({:.1}KB → {:.1}KB, +{:.1}KB recovered)",
1590                session_id,
1591                orig_kb,
1592                bak_kb,
1593                delta as f64 / 1024.0
1594            );
1595        }
1596
1597        restored += 1;
1598        bytes_recovered += delta;
1599    }
1600
1601    Ok((restored, bytes_recovered))
1602}
1603
1604/// Detail about a single session backup recovery action.
1605#[derive(Debug, Clone)]
1606pub struct BackupRecoveryAction {
1607    /// Session ID (UUID portion of filename)
1608    pub session_id: String,
1609    /// Source file used for recovery (the backup with more requests)
1610    pub source_file: String,
1611    /// Number of requests in the current .jsonl
1612    pub current_requests: usize,
1613    /// Number of requests in the best backup
1614    pub recovered_requests: usize,
1615    /// Size of the current .jsonl in bytes
1616    pub current_size: u64,
1617    /// Size of the best backup in bytes
1618    pub recovered_size: u64,
1619    /// Whether the source was a different format (e.g. .json → .jsonl conversion)
1620    pub converted: bool,
1621}
1622
1623/// Comprehensive session recovery from ALL backup file variants.
1624///
1625/// For each session ID found in `chat_sessions_dir`, examines:
1626/// - `.jsonl.bak` (VS Code JSONL backup)
1627/// - `.jsonl.pre-restore` (chasm safety backup)
1628/// - `.jsonl.pre_bak_recovery` (earlier chasm recovery backup)
1629/// - `.json` (old JSON format — may contain more requests than current JSONL)
1630/// - `.json.bak` (old JSON format backup)
1631///
1632/// Selects the file with the **most requests** (not just largest size) and
1633/// restores it as the active `.jsonl`, converting from JSON format if needed.
1634///
1635/// Returns a list of recovery actions taken (or that would be taken in dry-run).
1636pub fn recover_from_all_backups(
1637    chat_sessions_dir: &Path,
1638    dry_run: bool,
1639) -> Result<Vec<BackupRecoveryAction>> {
1640    use std::collections::HashMap;
1641
1642    if !chat_sessions_dir.exists() {
1643        return Ok(Vec::new());
1644    }
1645
1646    // Collect all files grouped by session ID (first 36 chars = UUID)
1647    let mut session_files: HashMap<String, Vec<(String, PathBuf)>> = HashMap::new();
1648    for entry in std::fs::read_dir(chat_sessions_dir)? {
1649        let entry = entry?;
1650        let path = entry.path();
1651        if !path.is_file() {
1652            continue;
1653        }
1654        let fname = path
1655            .file_name()
1656            .unwrap_or_default()
1657            .to_string_lossy()
1658            .to_string();
1659        // Skip markdown, non-session files
1660        if fname.ends_with(".md") || fname.len() < 36 {
1661            continue;
1662        }
1663        // Session ID is the first 36 characters (UUID format)
1664        let sid = fname[..36].to_string();
1665        // Only include JSON/JSONL files (including .bak, .pre-restore variants)
1666        if fname.contains(".json") {
1667            session_files.entry(sid).or_default().push((fname, path));
1668        }
1669    }
1670
1671    let mut actions = Vec::new();
1672
1673    for (sid, files) in &session_files {
1674        // Find the current active .jsonl file
1675        let current_jsonl_name = format!("{}.jsonl", sid);
1676        let current_jsonl_path = chat_sessions_dir.join(&current_jsonl_name);
1677
1678        // Parse the current .jsonl to get its request count
1679        let current_requests = if current_jsonl_path.exists() {
1680            match parse_session_file(&current_jsonl_path) {
1681                Ok(session) => session.requests.len(),
1682                Err(_) => 0,
1683            }
1684        } else {
1685            0
1686        };
1687        let current_size = if current_jsonl_path.exists() {
1688            std::fs::metadata(&current_jsonl_path)
1689                .map(|m| m.len())
1690                .unwrap_or(0)
1691        } else {
1692            0
1693        };
1694
1695        // Find the backup with the most requests
1696        let mut best_requests = current_requests;
1697        let mut best_file: Option<(&str, &Path)> = None;
1698
1699        for (fname, fpath) in files {
1700            // Skip the current active file
1701            if fname == &current_jsonl_name {
1702                continue;
1703            }
1704            // Skip files that are clearly recovery markers (tiny files)
1705            let size = std::fs::metadata(fpath).map(|m| m.len()).unwrap_or(0);
1706            if size < 100 {
1707                continue;
1708            }
1709            // Parse to count requests
1710            match parse_session_file(fpath) {
1711                Ok(session) => {
1712                    let req_count = session.requests.len();
1713                    if req_count > best_requests {
1714                        best_requests = req_count;
1715                        best_file = Some((fname.as_str(), fpath.as_path()));
1716                    }
1717                }
1718                Err(_) => {
1719                    // Can't parse — skip
1720                }
1721            }
1722        }
1723
1724        if let Some((best_name, best_path)) = best_file {
1725            let best_size = std::fs::metadata(best_path).map(|m| m.len()).unwrap_or(0);
1726            let is_json_source = !best_name.contains(".jsonl");
1727
1728            if !dry_run {
1729                // Safety backup of current file
1730                if current_jsonl_path.exists() {
1731                    let pre_restore = current_jsonl_path.with_extension("jsonl.pre-restore");
1732                    // Don't overwrite existing pre-restore (keep earliest backup)
1733                    if !pre_restore.exists() {
1734                        if let Err(e) = std::fs::copy(&current_jsonl_path, &pre_restore) {
1735                            eprintln!(
1736                                "   [WARN] Failed to create safety backup for {}: {}",
1737                                sid, e
1738                            );
1739                            continue;
1740                        }
1741                    }
1742                }
1743
1744                if is_json_source {
1745                    // Convert JSON → JSONL: parse and re-serialize as kind:0 JSONL entry
1746                    match parse_session_file(best_path) {
1747                        Ok(session) => {
1748                            // Read the raw JSON to preserve all fields
1749                            let raw_content =
1750                                std::fs::read_to_string(best_path).unwrap_or_default();
1751                            let raw_value: serde_json::Value =
1752                                serde_json::from_str(&raw_content).unwrap_or_default();
1753                            let jsonl_entry = serde_json::json!({"kind": 0, "v": raw_value});
1754                            if let Err(e) = std::fs::write(
1755                                &current_jsonl_path,
1756                                serde_json::to_string(&jsonl_entry).unwrap_or_default() + "\n",
1757                            ) {
1758                                eprintln!(
1759                                    "   [WARN] Failed to write converted JSONL for {}: {}",
1760                                    sid, e
1761                                );
1762                                continue;
1763                            }
1764                            // Update the session_id if missing in the converted file
1765                            let _ = session;
1766                        }
1767                        Err(e) => {
1768                            eprintln!("   [WARN] Failed to parse JSON backup for {}: {}", sid, e);
1769                            continue;
1770                        }
1771                    }
1772                } else {
1773                    // JSONL → JSONL: direct copy
1774                    if let Err(e) = std::fs::copy(best_path, &current_jsonl_path) {
1775                        eprintln!(
1776                            "   [WARN] Failed to restore {} from {}: {}",
1777                            sid, best_name, e
1778                        );
1779                        continue;
1780                    }
1781                }
1782            }
1783
1784            actions.push(BackupRecoveryAction {
1785                session_id: sid.clone(),
1786                source_file: best_name.to_string(),
1787                current_requests,
1788                recovered_requests: best_requests,
1789                current_size,
1790                recovered_size: best_size,
1791                converted: is_json_source,
1792            });
1793        }
1794    }
1795
1796    // Sort by session ID for deterministic output
1797    actions.sort_by(|a, b| a.session_id.cmp(&b.session_id));
1798
1799    Ok(actions)
1800}
1801
1802/// Fix modelState values in a session's requests array.
1803/// - Pending (value=0) or Cancelled (value=2) → set to Cancelled (3) with completedAt
1804/// - Terminal states (1, 3, 4) without completedAt → add completedAt from request timestamp
1805fn fix_request_model_states(session_data: &mut serde_json::Value) {
1806    let requests = match session_data
1807        .get_mut("requests")
1808        .and_then(|r| r.as_array_mut())
1809    {
1810        Some(r) => r,
1811        None => return,
1812    };
1813
1814    for req in requests.iter_mut() {
1815        let timestamp = req
1816            .get("timestamp")
1817            .and_then(|t| t.as_i64())
1818            .unwrap_or_else(|| {
1819                std::time::SystemTime::now()
1820                    .duration_since(std::time::UNIX_EPOCH)
1821                    .unwrap_or_default()
1822                    .as_millis() as i64
1823            });
1824
1825        if let Some(ms) = req.get_mut("modelState") {
1826            if let Some(val) = ms.get("value").and_then(|v| v.as_u64()) {
1827                match val {
1828                    0 | 2 => {
1829                        // Pending or Cancelled → force to Cancelled with completedAt
1830                        *ms = serde_json::json!({
1831                            "value": 3,
1832                            "completedAt": timestamp
1833                        });
1834                    }
1835                    1 | 3 | 4 => {
1836                        // Terminal states — ensure completedAt exists
1837                        if ms.get("completedAt").is_none() {
1838                            if let Some(ms_obj) = ms.as_object_mut() {
1839                                ms_obj.insert(
1840                                    "completedAt".to_string(),
1841                                    serde_json::json!(timestamp),
1842                                );
1843                            }
1844                        }
1845                    }
1846                    _ => {}
1847                }
1848            }
1849        }
1850    }
1851}
1852
1853/// Add a session to the VS Code index
1854pub fn add_session_to_index(
1855    db_path: &Path,
1856    session_id: &str,
1857    title: &str,
1858    last_message_date_ms: i64,
1859    _is_imported: bool,
1860    initial_location: &str,
1861    is_empty: bool,
1862) -> Result<()> {
1863    let mut index = read_chat_session_index(db_path)?;
1864
1865    index.entries.insert(
1866        session_id.to_string(),
1867        ChatSessionIndexEntry {
1868            session_id: session_id.to_string(),
1869            title: title.to_string(),
1870            last_message_date: last_message_date_ms,
1871            timing: Some(ChatSessionTiming {
1872                created: last_message_date_ms,
1873                last_request_started: Some(last_message_date_ms),
1874                last_request_ended: Some(last_message_date_ms),
1875            }),
1876            last_response_state: 1, // ResponseModelState.Complete
1877            initial_location: initial_location.to_string(),
1878            is_empty,
1879            is_imported: Some(_is_imported),
1880            has_pending_edits: Some(false),
1881            is_external: Some(false),
1882        },
1883    );
1884
1885    write_chat_session_index(db_path, &index)
1886}
1887
1888/// Remove a session from the VS Code index
1889#[allow(dead_code)]
1890pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
1891    let mut index = read_chat_session_index(db_path)?;
1892    let removed = index.entries.remove(session_id).is_some();
1893    if removed {
1894        write_chat_session_index(db_path, &index)?;
1895    }
1896    Ok(removed)
1897}
1898
1899/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
1900/// When both .json and .jsonl exist for the same session ID, prefers .jsonl.
1901pub fn sync_session_index(
1902    workspace_id: &str,
1903    chat_sessions_dir: &Path,
1904    force: bool,
1905) -> Result<(usize, usize)> {
1906    let db_path = get_workspace_storage_db(workspace_id)?;
1907
1908    if !db_path.exists() {
1909        return Err(CsmError::WorkspaceNotFound(format!(
1910            "Database not found: {}",
1911            db_path.display()
1912        )));
1913    }
1914
1915    // Check if VS Code is running
1916    if !force && is_vscode_running() {
1917        return Err(CsmError::VSCodeRunning);
1918    }
1919
1920    // Get current index
1921    let mut index = read_chat_session_index(&db_path)?;
1922
1923    // Get session files on disk
1924    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
1925    if chat_sessions_dir.exists() {
1926        for entry in std::fs::read_dir(chat_sessions_dir)? {
1927            let entry = entry?;
1928            let path = entry.path();
1929            if path
1930                .extension()
1931                .map(is_session_file_extension)
1932                .unwrap_or(false)
1933            {
1934                if let Some(stem) = path.file_stem() {
1935                    files_on_disk.insert(stem.to_string_lossy().to_string());
1936                }
1937            }
1938        }
1939    }
1940
1941    // Remove stale entries (in index but not on disk)
1942    let stale_ids: Vec<String> = index
1943        .entries
1944        .keys()
1945        .filter(|id| !files_on_disk.contains(*id))
1946        .cloned()
1947        .collect();
1948
1949    let removed = stale_ids.len();
1950    for id in &stale_ids {
1951        index.entries.remove(id);
1952    }
1953
1954    // Add/update sessions from disk
1955    // Collect files, preferring .jsonl over .json for the same session ID
1956    let mut session_files: std::collections::HashMap<String, PathBuf> =
1957        std::collections::HashMap::new();
1958    for entry in std::fs::read_dir(chat_sessions_dir)? {
1959        let entry = entry?;
1960        let path = entry.path();
1961        if path
1962            .extension()
1963            .map(is_session_file_extension)
1964            .unwrap_or(false)
1965        {
1966            if let Some(stem) = path.file_stem() {
1967                let stem_str = stem.to_string_lossy().to_string();
1968                let is_jsonl = path.extension().is_some_and(|e| e == "jsonl");
1969                // Insert if no entry yet, or if this is .jsonl (preferred over .json)
1970                if !session_files.contains_key(&stem_str) || is_jsonl {
1971                    session_files.insert(stem_str, path);
1972                }
1973            }
1974        }
1975    }
1976
1977    let mut added = 0;
1978    for (_, path) in &session_files {
1979        if let Ok(session) = parse_session_file(path) {
1980            let session_id = session.session_id.clone().unwrap_or_else(|| {
1981                path.file_stem()
1982                    .map(|s| s.to_string_lossy().to_string())
1983                    .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1984            });
1985
1986            let title = session.title();
1987            let is_empty = session.is_empty();
1988            let last_message_date = session.last_message_date;
1989            let initial_location = session.initial_location.clone();
1990
1991            index.entries.insert(
1992                session_id.clone(),
1993                ChatSessionIndexEntry {
1994                    session_id,
1995                    title,
1996                    last_message_date,
1997                    timing: Some(ChatSessionTiming {
1998                        created: session.creation_date,
1999                        last_request_started: Some(last_message_date),
2000                        last_request_ended: Some(last_message_date),
2001                    }),
2002                    last_response_state: 1, // ResponseModelState.Complete
2003                    initial_location,
2004                    is_empty,
2005                    is_imported: Some(false),
2006                    has_pending_edits: Some(false),
2007                    is_external: Some(false),
2008                },
2009            );
2010            added += 1;
2011        }
2012    }
2013
2014    // Write the synced index
2015    write_chat_session_index(&db_path, &index)?;
2016
2017    Ok((added, removed))
2018}
2019
2020/// Register all sessions from a directory into the VS Code index
2021pub fn register_all_sessions_from_directory(
2022    workspace_id: &str,
2023    chat_sessions_dir: &Path,
2024    force: bool,
2025) -> Result<usize> {
2026    let db_path = get_workspace_storage_db(workspace_id)?;
2027
2028    if !db_path.exists() {
2029        return Err(CsmError::WorkspaceNotFound(format!(
2030            "Database not found: {}",
2031            db_path.display()
2032        )));
2033    }
2034
2035    // Check if VS Code is running
2036    if !force && is_vscode_running() {
2037        return Err(CsmError::VSCodeRunning);
2038    }
2039
2040    // Use sync to ensure index matches disk
2041    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
2042
2043    // Print individual session info
2044    for entry in std::fs::read_dir(chat_sessions_dir)? {
2045        let entry = entry?;
2046        let path = entry.path();
2047
2048        if path
2049            .extension()
2050            .map(is_session_file_extension)
2051            .unwrap_or(false)
2052        {
2053            if let Ok(session) = parse_session_file(&path) {
2054                let session_id = session.session_id.clone().unwrap_or_else(|| {
2055                    path.file_stem()
2056                        .map(|s| s.to_string_lossy().to_string())
2057                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
2058                });
2059
2060                let title = session.title();
2061
2062                println!(
2063                    "[OK] Registered: {} ({}...)",
2064                    title,
2065                    &session_id[..12.min(session_id.len())]
2066                );
2067            }
2068        }
2069    }
2070
2071    if removed > 0 {
2072        println!("[OK] Removed {} stale index entries", removed);
2073    }
2074
2075    Ok(added)
2076}
2077
2078/// Check if VS Code is currently running
2079pub fn is_vscode_running() -> bool {
2080    let mut sys = System::new();
2081    sys.refresh_processes();
2082
2083    for process in sys.processes().values() {
2084        let name = process.name().to_lowercase();
2085        if name.contains("code") && !name.contains("codec") {
2086            return true;
2087        }
2088    }
2089
2090    false
2091}
2092
2093/// Close VS Code gracefully and wait for it to exit.
2094/// Returns the list of workspace folders that were open (for reopening).
2095pub fn close_vscode_and_wait(timeout_secs: u64) -> Result<()> {
2096    use sysinfo::{ProcessRefreshKind, RefreshKind, Signal};
2097
2098    if !is_vscode_running() {
2099        return Ok(());
2100    }
2101
2102    // Send SIGTERM (graceful close) to all Code processes
2103    let mut sys = System::new_with_specifics(
2104        RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
2105    );
2106    sys.refresh_processes();
2107
2108    let mut signaled = 0u32;
2109    for (pid, process) in sys.processes() {
2110        let name = process.name().to_lowercase();
2111        if name.contains("code") && !name.contains("codec") {
2112            // On Windows, kill() sends TerminateProcess; there's no graceful
2113            // SIGTERM equivalent via sysinfo. But the main electron process
2114            // handles WM_CLOSE. We use the `taskkill` approach on Windows for
2115            // a graceful close.
2116            #[cfg(windows)]
2117            {
2118                let _ = std::process::Command::new("taskkill")
2119                    .args(["/PID", &pid.as_u32().to_string()])
2120                    .stdout(std::process::Stdio::null())
2121                    .stderr(std::process::Stdio::null())
2122                    .status();
2123                signaled += 1;
2124            }
2125            #[cfg(not(windows))]
2126            {
2127                if process.kill_with(Signal::Term).unwrap_or(false) {
2128                    signaled += 1;
2129                }
2130            }
2131        }
2132    }
2133
2134    if signaled == 0 {
2135        return Ok(());
2136    }
2137
2138    // Wait for all Code processes to exit
2139    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
2140    loop {
2141        std::thread::sleep(std::time::Duration::from_millis(500));
2142        if !is_vscode_running() {
2143            // Extra wait for file locks to release
2144            std::thread::sleep(std::time::Duration::from_secs(1));
2145            return Ok(());
2146        }
2147        if std::time::Instant::now() >= deadline {
2148            // Force kill remaining processes
2149            let mut sys2 = System::new_with_specifics(
2150                RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
2151            );
2152            sys2.refresh_processes();
2153            for (_pid, process) in sys2.processes() {
2154                let name = process.name().to_lowercase();
2155                if name.contains("code") && !name.contains("codec") {
2156                    process.kill();
2157                }
2158            }
2159            std::thread::sleep(std::time::Duration::from_secs(1));
2160            return Ok(());
2161        }
2162    }
2163}
2164
2165/// Reopen VS Code, optionally at a specific path.
2166pub fn reopen_vscode(project_path: Option<&str>) -> Result<()> {
2167    let mut cmd = std::process::Command::new("code");
2168    if let Some(path) = project_path {
2169        cmd.arg(path);
2170    }
2171    cmd.stdout(std::process::Stdio::null())
2172        .stderr(std::process::Stdio::null())
2173        .spawn()?;
2174    Ok(())
2175}
2176
2177/// Backup workspace sessions to a timestamped directory
2178pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
2179    let chat_sessions_dir = workspace_dir.join("chatSessions");
2180
2181    if !chat_sessions_dir.exists() {
2182        return Ok(None);
2183    }
2184
2185    let timestamp = std::time::SystemTime::now()
2186        .duration_since(std::time::UNIX_EPOCH)
2187        .unwrap()
2188        .as_secs();
2189
2190    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
2191
2192    // Copy directory recursively
2193    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
2194
2195    Ok(Some(backup_dir))
2196}
2197
2198/// Recursively copy a directory
2199fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
2200    std::fs::create_dir_all(dst)?;
2201
2202    for entry in std::fs::read_dir(src)? {
2203        let entry = entry?;
2204        let src_path = entry.path();
2205        let dst_path = dst.join(entry.file_name());
2206
2207        if src_path.is_dir() {
2208            copy_dir_all(&src_path, &dst_path)?;
2209        } else {
2210            std::fs::copy(&src_path, &dst_path)?;
2211        }
2212    }
2213
2214    Ok(())
2215}
2216
2217// =============================================================================
2218// Empty Window Sessions (ALL SESSIONS)
2219// =============================================================================
2220
2221/// Read all empty window chat sessions (not tied to any workspace)
2222/// These appear in VS Code's "ALL SESSIONS" panel
2223pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
2224    let sessions_path = get_empty_window_sessions_path()?;
2225
2226    if !sessions_path.exists() {
2227        return Ok(Vec::new());
2228    }
2229
2230    let mut sessions = Vec::new();
2231
2232    for entry in std::fs::read_dir(&sessions_path)? {
2233        let entry = entry?;
2234        let path = entry.path();
2235
2236        if path.extension().is_some_and(is_session_file_extension) {
2237            if let Ok(session) = parse_session_file(&path) {
2238                sessions.push(session);
2239            }
2240        }
2241    }
2242
2243    // Sort by last message date (most recent first)
2244    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
2245
2246    Ok(sessions)
2247}
2248
2249/// Get a specific empty window session by ID
2250#[allow(dead_code)]
2251pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
2252    let sessions_path = get_empty_window_sessions_path()?;
2253    let session_path = sessions_path.join(format!("{}.json", session_id));
2254
2255    if !session_path.exists() {
2256        return Ok(None);
2257    }
2258
2259    let content = std::fs::read_to_string(&session_path)?;
2260    let session: ChatSession = serde_json::from_str(&content)
2261        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
2262
2263    Ok(Some(session))
2264}
2265
2266/// Write an empty window session
2267#[allow(dead_code)]
2268pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
2269    let sessions_path = get_empty_window_sessions_path()?;
2270
2271    // Create directory if it doesn't exist
2272    std::fs::create_dir_all(&sessions_path)?;
2273
2274    let session_id = session.session_id.as_deref().unwrap_or("unknown");
2275    let session_path = sessions_path.join(format!("{}.json", session_id));
2276    let content = serde_json::to_string_pretty(session)?;
2277    std::fs::write(&session_path, content)?;
2278
2279    Ok(session_path)
2280}
2281
2282/// Delete an empty window session
2283#[allow(dead_code)]
2284pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
2285    let sessions_path = get_empty_window_sessions_path()?;
2286    let session_path = sessions_path.join(format!("{}.json", session_id));
2287
2288    if session_path.exists() {
2289        std::fs::remove_file(&session_path)?;
2290        Ok(true)
2291    } else {
2292        Ok(false)
2293    }
2294}
2295
2296/// Count empty window sessions
2297pub fn count_empty_window_sessions() -> Result<usize> {
2298    let sessions_path = get_empty_window_sessions_path()?;
2299
2300    if !sessions_path.exists() {
2301        return Ok(0);
2302    }
2303
2304    let count = std::fs::read_dir(&sessions_path)?
2305        .filter_map(|e| e.ok())
2306        .filter(|e| e.path().extension().is_some_and(is_session_file_extension))
2307        .count();
2308
2309    Ok(count)
2310}
2311
2312/// Compact a JSONL session file by replaying all operations into a single kind:0 snapshot.
2313/// This works at the raw JSON level, preserving all fields VS Code expects.
2314/// Returns the path to the compacted file.
2315///
2316/// Handles a common corruption pattern where VS Code appends delta operations
2317/// to line 0 without newline separators (e.g., `}{"kind":1,...}{"kind":2,...}`).
2318pub fn compact_session_jsonl(path: &Path) -> Result<PathBuf> {
2319    let content = std::fs::read_to_string(path).map_err(|e| {
2320        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
2321    })?;
2322
2323    // Pre-process: split concatenated JSON objects that lack newline separators.
2324    // VS Code sometimes appends delta ops to line 0 without a \n, producing:
2325    //   {"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}\n{"kind":1,...}\n...
2326    // We fix this by inserting newlines at every `}{"kind":` boundary.
2327    let content = split_concatenated_jsonl(&content);
2328
2329    let mut lines = content.lines();
2330
2331    // First line must be kind:0 (initial snapshot)
2332    let first_line = lines
2333        .next()
2334        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
2335
2336    let first_entry: serde_json::Value = match serde_json::from_str(first_line.trim()) {
2337        Ok(v) => v,
2338        Err(_) => {
2339            // Try sanitizing Unicode (lone surrogates, etc.)
2340            let sanitized = sanitize_json_unicode(first_line.trim());
2341            serde_json::from_str(&sanitized).map_err(|e| {
2342                CsmError::InvalidSessionFormat(format!("Invalid JSON on line 1: {}", e))
2343            })?
2344        }
2345    };
2346
2347    let kind = first_entry
2348        .get("kind")
2349        .and_then(|k| k.as_u64())
2350        .unwrap_or(99);
2351    if kind != 0 {
2352        return Err(CsmError::InvalidSessionFormat(
2353            "First JSONL line must be kind:0".to_string(),
2354        ));
2355    }
2356
2357    // Extract the session state from the "v" field
2358    let mut state = first_entry
2359        .get("v")
2360        .cloned()
2361        .ok_or_else(|| CsmError::InvalidSessionFormat("kind:0 missing 'v' field".to_string()))?;
2362
2363    // Replay all subsequent operations
2364    for line in lines {
2365        let line = line.trim();
2366        if line.is_empty() {
2367            continue;
2368        }
2369
2370        let entry: serde_json::Value = match serde_json::from_str(line) {
2371            Ok(v) => v,
2372            Err(_) => continue, // Skip malformed lines
2373        };
2374
2375        let op_kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
2376
2377        match op_kind {
2378            1 => {
2379                // Delta update: k=["path","to","field"], v=value
2380                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
2381                    if let Some(keys_arr) = keys.as_array() {
2382                        apply_delta(&mut state, keys_arr, value.clone());
2383                    }
2384                }
2385            }
2386            2 => {
2387                // Array replace/splice: k=["path","to","array"], v=[items], i=splice_index
2388                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
2389                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
2390                    if let Some(keys_arr) = keys.as_array() {
2391                        apply_splice(&mut state, keys_arr, value.clone(), splice_index);
2392                    }
2393                }
2394            }
2395            _ => {} // Skip unknown kinds
2396        }
2397    }
2398
2399    // Inject any missing fields that VS Code's latest format requires
2400    let session_id = path
2401        .file_stem()
2402        .and_then(|s| s.to_str())
2403        .map(|s| s.to_string());
2404    ensure_vscode_compat_fields(&mut state, session_id.as_deref());
2405
2406    // Write the compacted file: single kind:0 line with the final state
2407    let compact_entry = serde_json::json!({"kind": 0, "v": state});
2408    let compact_content = serde_json::to_string(&compact_entry)
2409        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
2410
2411    // Backup the original file
2412    let backup_path = path.with_extension("jsonl.bak");
2413    std::fs::rename(path, &backup_path)?;
2414
2415    // Write the compacted file (trailing newline prevents concatenation
2416    // if VS Code later appends delta operations)
2417    std::fs::write(path, format!("{}\n", compact_content))?;
2418
2419    Ok(backup_path)
2420}
2421
2422/// Trim a session JSONL file by keeping only the last `keep` requests.
2423///
2424/// Very long chat sessions (100+ requests) can grow to 50-100+ MB, causing VS Code
2425/// to fail loading them. This function compacts the session first (if needed), then
2426/// removes old requests from the `requests` array, keeping only the most recent ones.
2427///
2428/// The full session is preserved as a `.jsonl.bak` backup. A trimmed summary is
2429/// injected as the first request message so the user knows context was archived.
2430///
2431/// Returns `(original_count, kept_count, original_mb, new_mb)`.
2432pub fn trim_session_jsonl(path: &Path, keep: usize) -> Result<(usize, usize, f64, f64)> {
2433    let content = std::fs::read_to_string(path).map_err(|e| {
2434        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
2435    })?;
2436
2437    let original_size = content.len() as f64 / (1024.0 * 1024.0);
2438
2439    // Always handle concatenated JSON objects first, then check line count
2440    let content = split_concatenated_jsonl(&content);
2441    let line_count = content.lines().filter(|l| !l.trim().is_empty()).count();
2442
2443    // If multi-line (concatenated objects or delta ops), compact first
2444    let content = if line_count > 1 {
2445        // Write the split content so compact can process it
2446        std::fs::write(path, &content)?;
2447        compact_session_jsonl(path)?;
2448        std::fs::read_to_string(path).map_err(|e| {
2449            CsmError::InvalidSessionFormat(format!("Failed to read compacted file: {}", e))
2450        })?
2451    } else {
2452        content
2453    };
2454
2455    let first_line = content
2456        .lines()
2457        .next()
2458        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
2459
2460    let mut entry: serde_json::Value = serde_json::from_str(first_line.trim())
2461        .map_err(|_| {
2462            let sanitized = sanitize_json_unicode(first_line.trim());
2463            serde_json::from_str::<serde_json::Value>(&sanitized)
2464                .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))
2465        })
2466        .unwrap_or_else(|e| e.unwrap());
2467
2468    let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
2469    if kind != 0 {
2470        return Err(
2471            CsmError::InvalidSessionFormat("First JSONL line must be kind:0".to_string()).into(),
2472        );
2473    }
2474
2475    // Get the requests array
2476    let requests = match entry
2477        .get("v")
2478        .and_then(|v| v.get("requests"))
2479        .and_then(|r| r.as_array())
2480    {
2481        Some(r) => r.clone(),
2482        None => {
2483            return Err(CsmError::InvalidSessionFormat(
2484                "Session has no requests array".to_string(),
2485            )
2486            .into());
2487        }
2488    };
2489
2490    let original_count = requests.len();
2491
2492    if original_count <= keep {
2493        // Still strip bloated content even if not reducing request count
2494        strip_bloated_content(&mut entry);
2495
2496        let trimmed_content = serde_json::to_string(&entry)
2497            .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
2498        let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
2499
2500        // Only rewrite if we actually reduced size
2501        if new_size < original_size * 0.9 {
2502            let backup_path = path.with_extension("jsonl.bak");
2503            if !backup_path.exists() {
2504                std::fs::copy(path, &backup_path)?;
2505            }
2506            std::fs::write(path, format!("{}\n", trimmed_content))?;
2507        }
2508
2509        return Ok((original_count, original_count, original_size, new_size));
2510    }
2511
2512    // Keep only the last `keep` requests
2513    let kept_requests: Vec<serde_json::Value> = requests[original_count - keep..].to_vec();
2514
2515    // Use only the kept requests — no injected trim notice.
2516    // Injecting synthetic requests with non-standard agent/structure fields
2517    // can cause VS Code's session deserializer to reject the entire session.
2518    let final_requests = kept_requests;
2519
2520    // Replace the requests array in the entry
2521    if let Some(v) = entry.get_mut("v") {
2522        if let Some(obj) = v.as_object_mut() {
2523            obj.insert("requests".to_string(), serde_json::json!(final_requests));
2524        }
2525    }
2526
2527    // Strip bloated metadata, tool invocations, textEditGroups, thinking tokens
2528    strip_bloated_content(&mut entry);
2529
2530    // Ensure compat fields
2531    let session_id = path
2532        .file_stem()
2533        .and_then(|s| s.to_str())
2534        .map(|s| s.to_string());
2535    if let Some(v) = entry.get_mut("v") {
2536        ensure_vscode_compat_fields(v, session_id.as_deref());
2537    }
2538
2539    let trimmed_content = serde_json::to_string(&entry)
2540        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
2541
2542    let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
2543
2544    // Backup original (if not already backed up by compact)
2545    let backup_path = path.with_extension("jsonl.bak");
2546    if !backup_path.exists() {
2547        std::fs::copy(path, &backup_path)?;
2548    }
2549
2550    // Write the trimmed file (trailing newline prevents concatenation)
2551    std::fs::write(path, format!("{}\n", trimmed_content))?;
2552
2553    Ok((original_count, keep, original_size, new_size))
2554}
2555
2556/// Strip bloated content from a session entry to reduce file size.
2557///
2558/// VS Code sessions accumulate large metadata that isn't needed for session display:
2559/// - `result.metadata`: Can be 100KB-1.5MB per request (Copilot internal state)
2560/// - `editedFileEvents`: Redundant file edit tracking
2561/// - `chatEdits`: File edit diffs
2562/// - `textEditGroup` response items: 80-120KB each with full file diffs
2563/// - `thinking` response items: Model thinking tokens (can be 400+ per request)
2564/// - `toolInvocationSerialized`: Tool call metadata (usually already stripped by compact)
2565/// - `toolSpecificData`: Duplicate data in tool invocations
2566///
2567/// This function strips or truncates all of these while preserving the conversation
2568/// content (markdownContent responses and user messages).
2569fn strip_bloated_content(entry: &mut serde_json::Value) {
2570    let requests = match entry
2571        .get_mut("v")
2572        .and_then(|v| v.get_mut("requests"))
2573        .and_then(|r| r.as_array_mut())
2574    {
2575        Some(r) => r,
2576        None => return,
2577    };
2578
2579    for req in requests.iter_mut() {
2580        let obj = match req.as_object_mut() {
2581            Some(o) => o,
2582            None => continue,
2583        };
2584
2585        // Strip result.metadata (100KB-1.5MB per request)
2586        if let Some(result) = obj.get_mut("result") {
2587            if let Some(result_obj) = result.as_object_mut() {
2588                if let Some(meta) = result_obj.get("metadata") {
2589                    let meta_str = serde_json::to_string(meta).unwrap_or_default();
2590                    if meta_str.len() > 1000 {
2591                        result_obj.insert(
2592                            "metadata".to_string(),
2593                            serde_json::Value::Object(serde_json::Map::new()),
2594                        );
2595                    }
2596                }
2597            }
2598        }
2599
2600        // Strip editedFileEvents
2601        obj.remove("editedFileEvents");
2602
2603        // Strip chatEdits
2604        obj.remove("chatEdits");
2605
2606        // Truncate contentReferences to max 3
2607        if let Some(refs) = obj.get_mut("contentReferences") {
2608            if let Some(arr) = refs.as_array_mut() {
2609                if arr.len() > 3 {
2610                    arr.truncate(3);
2611                }
2612            }
2613        }
2614
2615        // Process response items
2616        if let Some(response) = obj.get_mut("response") {
2617            if let Some(resp_arr) = response.as_array_mut() {
2618                // Remove non-essential response kinds
2619                resp_arr.retain(|r| {
2620                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
2621                    !matches!(
2622                        kind,
2623                        "toolInvocationSerialized"
2624                            | "progressMessage"
2625                            | "confirmationWidget"
2626                            | "codeblockUri"
2627                            | "progressTaskSerialized"
2628                            | "undoStop"
2629                            | "mcpServersStarting"
2630                            | "confirmation"
2631                    )
2632                });
2633
2634                // Truncate textEditGroup items (strip edit diffs, keep URI ref)
2635                for r in resp_arr.iter_mut() {
2636                    let kind = r
2637                        .get("kind")
2638                        .and_then(|k| k.as_str())
2639                        .unwrap_or("")
2640                        .to_string();
2641
2642                    if kind == "textEditGroup" {
2643                        if let Some(edits) = r.get_mut("edits") {
2644                            if let Some(arr) = edits.as_array_mut() {
2645                                if serde_json::to_string(arr).unwrap_or_default().len() > 2000 {
2646                                    arr.clear();
2647                                }
2648                            }
2649                        }
2650                    }
2651
2652                    // Truncate thinking tokens
2653                    if kind == "thinking" {
2654                        if let Some(val) = r.get_mut("value") {
2655                            if let Some(s) = val.as_str() {
2656                                if s.len() > 500 {
2657                                    *val = serde_json::Value::String(format!(
2658                                        "{}... [truncated]",
2659                                        &s[..500]
2660                                    ));
2661                                }
2662                            }
2663                        }
2664                        if let Some(thought) = r.get_mut("thought") {
2665                            if let Some(thought_val) = thought.get_mut("value") {
2666                                if let Some(s) = thought_val.as_str() {
2667                                    if s.len() > 500 {
2668                                        *thought_val = serde_json::Value::String(format!(
2669                                            "{}... [truncated]",
2670                                            &s[..500]
2671                                        ));
2672                                    }
2673                                }
2674                            }
2675                        }
2676                    }
2677
2678                    // Truncate large markdownContent
2679                    if kind == "markdownContent" {
2680                        if let Some(content) = r.get_mut("content") {
2681                            if let Some(val) = content.get_mut("value") {
2682                                if let Some(s) = val.as_str() {
2683                                    if s.len() > 20000 {
2684                                        *val = serde_json::Value::String(format!(
2685                                            "{}\n\n---\n*[Chasm: Content truncated for loading performance]*",
2686                                            &s[..20000]
2687                                        ));
2688                                    }
2689                                }
2690                            }
2691                        }
2692                    }
2693                }
2694
2695                // Limit thinking items to last 5 per request
2696                let mut thinking_count = 0;
2697                let mut indices_to_remove = Vec::new();
2698                for (i, r) in resp_arr.iter().enumerate().rev() {
2699                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
2700                    if kind == "thinking" {
2701                        thinking_count += 1;
2702                        if thinking_count > 5 {
2703                            indices_to_remove.push(i);
2704                        }
2705                    }
2706                }
2707                for idx in indices_to_remove {
2708                    resp_arr.remove(idx);
2709                }
2710
2711                // Strip toolSpecificData from any remaining tool invocations
2712                for r in resp_arr.iter_mut() {
2713                    if let Some(obj) = r.as_object_mut() {
2714                        obj.remove("toolSpecificData");
2715                    }
2716                }
2717
2718                // Fix response items missing `kind` field — wrap raw MarkdownString
2719                // objects as proper markdownContent response items.
2720                // VS Code sometimes serializes MarkdownString directly instead of
2721                // wrapping it in { kind: "markdownContent", content: MarkdownString }.
2722                // Without the `kind` discriminator, VS Code's deserializer fails.
2723                let fixed: Vec<serde_json::Value> = resp_arr
2724                    .drain(..)
2725                    .map(|item| {
2726                        if item.get("kind").is_none() {
2727                            // Check if it looks like a MarkdownString (has `value` or `supportHtml`)
2728                            if item.get("value").is_some() || item.get("supportHtml").is_some() {
2729                                serde_json::json!({
2730                                    "kind": "markdownContent",
2731                                    "content": item
2732                                })
2733                            } else {
2734                                item
2735                            }
2736                        } else {
2737                            item
2738                        }
2739                    })
2740                    .collect();
2741                *resp_arr = fixed;
2742            }
2743        }
2744    }
2745}
2746
2747/// Split concatenated JSON objects in JSONL content that lack newline separators.
2748///
2749/// VS Code sometimes appends delta operations (kind:1, kind:2) onto the end of
2750/// a JSONL line without inserting a newline first. This produces invalid JSONL like:
2751///   `{"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}`
2752///
2753/// This function inserts newlines at every `}{"kind":` boundary to restore valid JSONL.
2754/// The pattern `}{"kind":` cannot appear inside JSON string values because `{"kind":`
2755/// would need to be escaped as `{\"kind\":` within a JSON string.
2756pub fn split_concatenated_jsonl(content: &str) -> String {
2757    // Fast path: if content has no concatenated objects, return as-is
2758    if !content.contains("}{\"kind\":") {
2759        return content.to_string();
2760    }
2761
2762    content.replace("}{\"kind\":", "}\n{\"kind\":")
2763}
2764
2765/// Apply a delta update (kind:1) to a JSON value at the given key path.
2766fn apply_delta(root: &mut serde_json::Value, keys: &[serde_json::Value], value: serde_json::Value) {
2767    if keys.is_empty() {
2768        return;
2769    }
2770
2771    // Navigate to the parent
2772    let mut current = root;
2773    for key in &keys[..keys.len() - 1] {
2774        if let Some(k) = key.as_str() {
2775            if !current.get(k).is_some() {
2776                current[k] = serde_json::Value::Object(serde_json::Map::new());
2777            }
2778            current = &mut current[k];
2779        } else if let Some(idx) = key.as_u64() {
2780            if let Some(arr) = current.as_array_mut() {
2781                // Auto-grow array with null placeholders if index is beyond current length.
2782                // VS Code's event sourcing may reference indices before they are formally
2783                // added via a kind:2 splice.
2784                while (idx as usize) >= arr.len() {
2785                    arr.push(serde_json::Value::Object(serde_json::Map::new()));
2786                }
2787                current = &mut arr[idx as usize];
2788            } else {
2789                return;
2790            }
2791        }
2792    }
2793
2794    // Set the final key
2795    if let Some(last_key) = keys.last() {
2796        if let Some(k) = last_key.as_str() {
2797            current[k] = value;
2798        } else if let Some(idx) = last_key.as_u64() {
2799            if let Some(arr) = current.as_array_mut() {
2800                while (idx as usize) >= arr.len() {
2801                    arr.push(serde_json::Value::Null);
2802                }
2803                arr[idx as usize] = value;
2804            }
2805        }
2806    }
2807}
2808
2809/// Apply an array replace/splice operation (kind:2) to a JSON value at the given key path.
2810/// When `splice_index` is `Some(i)`, truncates the target array at index `i` before extending.
2811/// When `splice_index` is `None`, replaces the entire array with the new items.
2812fn apply_splice(
2813    root: &mut serde_json::Value,
2814    keys: &[serde_json::Value],
2815    items: serde_json::Value,
2816    splice_index: Option<usize>,
2817) {
2818    if keys.is_empty() {
2819        return;
2820    }
2821
2822    // Navigate to the target array
2823    let mut current = root;
2824    for key in keys {
2825        if let Some(k) = key.as_str() {
2826            if !current.get(k).is_some() {
2827                current[k] = serde_json::json!([]);
2828            }
2829            current = &mut current[k];
2830        } else if let Some(idx) = key.as_u64() {
2831            if let Some(arr) = current.as_array_mut() {
2832                // Auto-grow array if index is beyond current length
2833                while (idx as usize) >= arr.len() {
2834                    arr.push(serde_json::Value::Object(serde_json::Map::new()));
2835                }
2836                current = &mut arr[idx as usize];
2837            } else {
2838                return;
2839            }
2840        }
2841    }
2842
2843    // Splice or replace items in the target array
2844    if let Some(target_arr) = current.as_array_mut() {
2845        if let Some(idx) = splice_index {
2846            // Splice: truncate at index, then extend with new items
2847            target_arr.truncate(idx);
2848        } else {
2849            // Full replacement: clear the array
2850            target_arr.clear();
2851        }
2852        if let Some(new_items) = items.as_array() {
2853            target_arr.extend(new_items.iter().cloned());
2854        }
2855    }
2856}
2857
2858/// Ensure a JSONL `kind:0` snapshot's `v` object has all fields required by
2859/// VS Code's latest session format (1.109.0+ / version 3). Missing fields are
2860/// injected with sensible defaults so sessions load reliably after recovery,
2861/// conversion, or compaction.
2862///
2863/// Required fields that VS Code now expects:
2864/// - `version` (u32, default 3)
2865/// - `sessionId` (string, extracted from filename or generated)
2866/// - `responderUsername` (string, default "GitHub Copilot")
2867/// - `hasPendingEdits` (bool, default false)
2868/// - `pendingRequests` (array, default [])
2869/// - `inputState` (object with mode, attachments, etc.)
2870pub fn ensure_vscode_compat_fields(state: &mut serde_json::Value, session_id: Option<&str>) {
2871    // Migrate old-format inputState (top-level attachments/mode/etc.) to nested object.
2872    // Must run BEFORE the inputState existence check below.
2873    migrate_old_input_state(state);
2874
2875    if let Some(obj) = state.as_object_mut() {
2876        // version
2877        if !obj.contains_key("version") {
2878            obj.insert("version".to_string(), serde_json::json!(3));
2879        }
2880
2881        // sessionId — use provided ID, or try to read from existing field
2882        if !obj.contains_key("sessionId") {
2883            if let Some(id) = session_id {
2884                obj.insert("sessionId".to_string(), serde_json::json!(id));
2885            }
2886        }
2887
2888        // responderUsername
2889        if !obj.contains_key("responderUsername") {
2890            obj.insert(
2891                "responderUsername".to_string(),
2892                serde_json::json!("GitHub Copilot"),
2893            );
2894        }
2895
2896        // hasPendingEdits — ALWAYS force to false for recovered/compacted sessions.
2897        // Sessions with hasPendingEdits:true cause VS Code to attempt restoring
2898        // stale file edits on load, which fails if files have changed since the
2899        // original session, preventing the session from loading entirely.
2900        obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
2901
2902        // pendingRequests — ALWAYS force to empty for recovered/compacted sessions.
2903        // Stale pending requests can also block session loading.
2904        obj.insert("pendingRequests".to_string(), serde_json::json!([]));
2905
2906        // inputState — VS Code expects this to exist with at least mode + attachments
2907        if !obj.contains_key("inputState") {
2908            obj.insert(
2909                "inputState".to_string(),
2910                serde_json::json!({
2911                    "attachments": [],
2912                    "mode": { "id": "agent", "kind": "agent" },
2913                    "inputText": "",
2914                    "selections": [],
2915                    "contrib": { "chatDynamicVariableModel": [] }
2916                }),
2917            );
2918        }
2919    }
2920}
2921
2922/// Detect whether a legacy .json file is a "skeleton" — corrupted to contain only
2923/// structural characters ({}, [], commas, colons, whitespace) with all actual data stripped.
2924/// These files parse as valid JSON but contain no useful session content.
2925pub fn is_skeleton_json(content: &str) -> bool {
2926    // Must be non-trivial size to be a skeleton (tiny files might just be empty sessions)
2927    if content.len() < 100 {
2928        return false;
2929    }
2930
2931    // Count structural vs data characters
2932    let structural_chars: usize = content
2933        .chars()
2934        .filter(|c| {
2935            matches!(
2936                c,
2937                '{' | '}' | '[' | ']' | ',' | ':' | ' ' | '\n' | '\r' | '\t' | '"'
2938            )
2939        })
2940        .count();
2941
2942    let total_chars = content.len();
2943    let structural_ratio = structural_chars as f64 / total_chars as f64;
2944
2945    // A skeleton file is >80% structural characters. Normal sessions have lots of
2946    // text content (messages, code, etc.) so the ratio is much lower.
2947    if structural_ratio < 0.80 {
2948        return false;
2949    }
2950
2951    // Additionally verify: parse as JSON and check that requests array is empty or
2952    // contains only empty objects
2953    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
2954        // Check if requests exist and are all empty
2955        if let Some(requests) = parsed.get("requests").and_then(|r| r.as_array()) {
2956            let all_empty = requests.iter().all(|req| {
2957                // A skeleton request has no "message" text or empty message content
2958                let msg = req
2959                    .get("message")
2960                    .and_then(|m| m.get("text"))
2961                    .and_then(|t| t.as_str());
2962                msg.map_or(true, |s| s.is_empty())
2963            });
2964            return all_empty;
2965        }
2966        // No requests array at all — also skeleton-like
2967        return true;
2968    }
2969
2970    // Couldn't parse but high structural ratio — still likely skeleton
2971    structural_ratio > 0.85
2972}
2973
2974/// Convert a skeleton .json file to a valid minimal .jsonl file.
2975/// Preserves title and timestamp from the index entry if available.
2976/// The original .json file is renamed to `.json.corrupt` (non-destructive).
2977/// Returns the path to the new .jsonl file, or None if conversion was skipped.
2978pub fn convert_skeleton_json_to_jsonl(
2979    json_path: &Path,
2980    title: Option<&str>,
2981    last_message_date: Option<i64>,
2982) -> Result<Option<PathBuf>> {
2983    let content = std::fs::read_to_string(json_path)
2984        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2985
2986    if !is_skeleton_json(&content) {
2987        return Ok(None);
2988    }
2989
2990    let session_id = json_path
2991        .file_stem()
2992        .and_then(|s| s.to_str())
2993        .unwrap_or("unknown")
2994        .to_string();
2995
2996    let title = title.unwrap_or("Recovered Session");
2997    let now = std::time::SystemTime::now()
2998        .duration_since(std::time::UNIX_EPOCH)
2999        .unwrap_or_default()
3000        .as_millis() as i64;
3001    let timestamp = last_message_date.unwrap_or(now);
3002
3003    // Build a valid minimal kind:0 JSONL entry
3004    let jsonl_entry = serde_json::json!({
3005        "kind": 0,
3006        "v": {
3007            "sessionId": session_id,
3008            "title": title,
3009            "lastMessageDate": timestamp,
3010            "requests": [],
3011            "version": 4,
3012            "hasPendingEdits": false,
3013            "pendingRequests": [],
3014            "inputState": {
3015                "attachments": [],
3016                "mode": { "id": "agent", "kind": "agent" },
3017                "inputText": "",
3018                "selections": [],
3019                "contrib": { "chatDynamicVariableModel": [] }
3020            },
3021            "responderUsername": "GitHub Copilot",
3022            "isImported": false,
3023            "initialLocation": "panel"
3024        }
3025    });
3026
3027    let jsonl_path = json_path.with_extension("jsonl");
3028    let corrupt_path = json_path.with_extension("json.corrupt");
3029
3030    // Don't overwrite an existing .jsonl
3031    if jsonl_path.exists() {
3032        // Just rename the skeleton to .corrupt
3033        std::fs::rename(json_path, &corrupt_path)?;
3034        return Ok(None);
3035    }
3036
3037    // Write the new .jsonl file
3038    std::fs::write(
3039        &jsonl_path,
3040        serde_json::to_string(&jsonl_entry)
3041            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?,
3042    )?;
3043
3044    // Rename original to .json.corrupt (non-destructive)
3045    std::fs::rename(json_path, &corrupt_path)?;
3046
3047    Ok(Some(jsonl_path))
3048}
3049
3050/// Fix cancelled `modelState` values in a compacted (single-line) JSONL session file.
3051///
3052/// VS Code determines `lastResponseState` from the file content, not the index.
3053/// If the last request's `modelState.value` is `2` (Cancelled) or missing entirely,
3054/// VS Code refuses to load the session. This function:
3055/// 1. Finds the last request in the `requests` array
3056/// 2. If `modelState.value` is `2` (Cancelled), changes it to `1` (Complete)
3057/// 3. If `modelState` is missing entirely, adds `{"value":1,"completedAt":<now>}`
3058///
3059/// Returns `true` if the file was modified.
3060pub fn fix_cancelled_model_state(path: &Path) -> Result<bool> {
3061    let content = std::fs::read_to_string(path)
3062        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
3063
3064    let lines: Vec<&str> = content.lines().collect();
3065
3066    if lines.is_empty() {
3067        return Ok(false);
3068    }
3069
3070    // For multi-line JSONL, we need to scan all lines to find the LAST modelState
3071    // delta for the highest request index. For single-line (compacted), we modify
3072    // the kind:0 snapshot directly.
3073    if lines.len() == 1 {
3074        // Compacted single-line JSONL: modify the kind:0 snapshot
3075        let mut entry: serde_json::Value = serde_json::from_str(lines[0].trim())
3076            .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))?;
3077
3078        let is_kind_0 = entry
3079            .get("kind")
3080            .and_then(|k| k.as_u64())
3081            .map(|k| k == 0)
3082            .unwrap_or(false);
3083
3084        if !is_kind_0 {
3085            return Ok(false);
3086        }
3087
3088        let requests = match entry
3089            .get_mut("v")
3090            .and_then(|v| v.get_mut("requests"))
3091            .and_then(|r| r.as_array_mut())
3092        {
3093            Some(r) if !r.is_empty() => r,
3094            _ => return Ok(false),
3095        };
3096
3097        let last_req = requests.last_mut().unwrap();
3098        let model_state = last_req.get("modelState");
3099
3100        let needs_fix = match model_state {
3101            Some(ms) => {
3102                // Any value other than 1 (Complete) needs repair:
3103                // 0 = NotStarted/Unknown, 2 = Cancelled, 4 = InProgress
3104                ms.get("value").and_then(|v| v.as_u64()) != Some(1)
3105            }
3106            None => true, // Missing modelState = never completed
3107        };
3108
3109        if !needs_fix {
3110            return Ok(false);
3111        }
3112
3113        let now = std::time::SystemTime::now()
3114            .duration_since(std::time::UNIX_EPOCH)
3115            .unwrap_or_default()
3116            .as_millis() as u64;
3117
3118        last_req.as_object_mut().unwrap().insert(
3119            "modelState".to_string(),
3120            serde_json::json!({"value": 1, "completedAt": now}),
3121        );
3122
3123        let patched = serde_json::to_string(&entry)
3124            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?;
3125        // Trailing newline prevents concatenation if VS Code appends deltas
3126        std::fs::write(path, format!("{}\n", patched))?;
3127        return Ok(true);
3128    }
3129
3130    // Multi-line JSONL: find the highest request index referenced across all lines,
3131    // then check if the last modelState delta for that index has value=2 or is missing.
3132    // If so, append a corrective delta.
3133    let mut highest_req_idx: Option<usize> = None;
3134    let mut last_model_state_value: Option<u64> = None;
3135
3136    // Check kind:0 snapshot for request count
3137    if let Ok(first_entry) = serde_json::from_str::<serde_json::Value>(lines[0].trim()) {
3138        if let Some(requests) = first_entry
3139            .get("v")
3140            .and_then(|v| v.get("requests"))
3141            .and_then(|r| r.as_array())
3142        {
3143            if !requests.is_empty() {
3144                let last_idx = requests.len() - 1;
3145                highest_req_idx = Some(last_idx);
3146                // Check modelState in the snapshot's last request
3147                if let Some(ms) = requests[last_idx].get("modelState") {
3148                    last_model_state_value = ms.get("value").and_then(|v| v.as_u64());
3149                }
3150            }
3151        }
3152    }
3153
3154    // Scan deltas for higher request indices and modelState updates
3155    static REQ_IDX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#""k":\["requests",(\d+)"#).unwrap());
3156
3157    for line in &lines[1..] {
3158        if let Some(caps) = REQ_IDX_RE.captures(line) {
3159            if let Ok(idx) = caps[1].parse::<usize>() {
3160                if highest_req_idx.is_none() || idx > highest_req_idx.unwrap() {
3161                    highest_req_idx = Some(idx);
3162                    last_model_state_value = None; // Reset for new highest
3163                }
3164                // Track modelState for the highest request index
3165                if Some(idx) == highest_req_idx && line.contains("\"modelState\"") {
3166                    if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line.trim()) {
3167                        last_model_state_value = entry
3168                            .get("v")
3169                            .and_then(|v| v.get("value"))
3170                            .and_then(|v| v.as_u64());
3171                    }
3172                }
3173            }
3174        }
3175    }
3176
3177    let req_idx = match highest_req_idx {
3178        Some(idx) => idx,
3179        None => return Ok(false),
3180    };
3181
3182    let needs_fix = match last_model_state_value {
3183        Some(1) => false, // Already complete
3184        _ => true,        // 0=NotStarted, 2=Cancelled, 4=InProgress, None=missing
3185    };
3186
3187    if !needs_fix {
3188        return Ok(false);
3189    }
3190
3191    let now = std::time::SystemTime::now()
3192        .duration_since(std::time::UNIX_EPOCH)
3193        .unwrap_or_default()
3194        .as_millis() as u64;
3195
3196    let fix_delta = format!(
3197        "\n{{\"kind\":1,\"k\":[\"requests\",{},\"modelState\"],\"v\":{{\"value\":1,\"completedAt\":{}}}}}",
3198        req_idx, now
3199    );
3200
3201    use std::io::Write;
3202    let mut file = std::fs::OpenOptions::new().append(true).open(path)?;
3203    file.write_all(fix_delta.as_bytes())?;
3204
3205    Ok(true)
3206}
3207
3208/// Repair workspace sessions: compact large JSONL files and fix the index.
3209/// Returns (compacted_count, index_fixed_count).
3210pub fn repair_workspace_sessions(
3211    workspace_id: &str,
3212    chat_sessions_dir: &Path,
3213    force: bool,
3214) -> Result<(usize, usize)> {
3215    let db_path = get_workspace_storage_db(workspace_id)?;
3216
3217    if !db_path.exists() {
3218        return Err(CsmError::WorkspaceNotFound(format!(
3219            "Database not found: {}",
3220            db_path.display()
3221        )));
3222    }
3223
3224    if !force && is_vscode_running() {
3225        return Err(CsmError::VSCodeRunning);
3226    }
3227
3228    let mut compacted = 0;
3229    let mut fields_fixed = 0;
3230
3231    if chat_sessions_dir.exists() {
3232        // Pass 0.5a: Recover from .json.bak when .jsonl has fewer requests
3233        match recover_from_json_bak(chat_sessions_dir) {
3234            Ok(n) if n > 0 => {
3235                println!("   [OK] Recovered {} session(s) from .json.bak backups", n);
3236            }
3237            _ => {}
3238        }
3239
3240        // Pass 0.5b: Recover from .jsonl.bak when backup is larger than active file
3241        match recover_from_jsonl_bak(chat_sessions_dir, false) {
3242            Ok((n, bytes)) if n > 0 => {
3243                println!(
3244                    "   [OK] Restored {} session(s) from .jsonl.bak ({:.1}MB recovered)",
3245                    n,
3246                    bytes as f64 / (1024.0 * 1024.0)
3247                );
3248            }
3249            _ => {}
3250        }
3251
3252        // Pass 1: Compact large JSONL files and fix missing fields
3253        for entry in std::fs::read_dir(chat_sessions_dir)? {
3254            let entry = entry?;
3255            let path = entry.path();
3256            if path.extension().is_some_and(|e| e == "jsonl") {
3257                let metadata = std::fs::metadata(&path)?;
3258                let size_mb = metadata.len() / (1024 * 1024);
3259
3260                let raw_content = std::fs::read_to_string(&path)
3261                    .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
3262
3263                // Pre-process: split concatenated JSON objects that lack newline
3264                // separators. VS Code sometimes appends delta ops to line 0 without
3265                // a \n, producing: {"kind":0,...}{"kind":1,...}
3266                // If splitting changes the content, rewrite the file first.
3267                let content = split_concatenated_jsonl(&raw_content);
3268                if content != raw_content {
3269                    std::fs::write(&path, content.as_bytes())?;
3270                    let stem = path
3271                        .file_stem()
3272                        .map(|s| s.to_string_lossy().to_string())
3273                        .unwrap_or_default();
3274                    println!("   [OK] Fixed concatenated JSONL objects: {}", stem);
3275                }
3276                let line_count = content.lines().count();
3277
3278                if line_count > 1 {
3279                    // Compact multi-line JSONL (has operations to replay)
3280                    let stem = path
3281                        .file_stem()
3282                        .map(|s| s.to_string_lossy().to_string())
3283                        .unwrap_or_default();
3284                    println!(
3285                        "   Compacting {} ({} lines, {}MB)...",
3286                        stem, line_count, size_mb
3287                    );
3288
3289                    match compact_session_jsonl(&path) {
3290                        Ok(backup_path) => {
3291                            let new_size = std::fs::metadata(&path)
3292                                .map(|m| m.len() / (1024 * 1024))
3293                                .unwrap_or(0);
3294                            println!(
3295                                "   [OK] Compacted: {}MB -> {}MB (backup: {})",
3296                                size_mb,
3297                                new_size,
3298                                backup_path
3299                                    .file_name()
3300                                    .unwrap_or_default()
3301                                    .to_string_lossy()
3302                            );
3303                            compacted += 1;
3304                        }
3305                        Err(e) => {
3306                            println!("   [WARN] Failed to compact {}: {}", stem, e);
3307                        }
3308                    }
3309                } else {
3310                    // Single-line JSONL — check for missing VS Code fields
3311                    if let Some(first_line) = content.lines().next() {
3312                        if let Ok(mut obj) = serde_json::from_str::<serde_json::Value>(first_line) {
3313                            let is_kind_0 = obj
3314                                .get("kind")
3315                                .and_then(|k| k.as_u64())
3316                                .map(|k| k == 0)
3317                                .unwrap_or(false);
3318
3319                            if is_kind_0 {
3320                                if let Some(v) = obj.get("v") {
3321                                    // Check if fields are missing OR have wrong values.
3322                                    // hasPendingEdits must be false — true prevents session loading
3323                                    // because VS Code tries to restore stale file edits that fail.
3324                                    let needs_fix = !v.get("inputState").is_some()
3325                                        || !v.get("sessionId").is_some()
3326                                        || v.get("hasPendingEdits")
3327                                            .and_then(|v| v.as_bool())
3328                                            .unwrap_or(true)
3329                                            != false
3330                                        || v.get("pendingRequests")
3331                                            .and_then(|v| v.as_array())
3332                                            .map(|a| !a.is_empty())
3333                                            .unwrap_or(true);
3334
3335                                    if needs_fix {
3336                                        let session_id = path
3337                                            .file_stem()
3338                                            .and_then(|s| s.to_str())
3339                                            .map(|s| s.to_string());
3340                                        if let Some(v_mut) = obj.get_mut("v") {
3341                                            ensure_vscode_compat_fields(
3342                                                v_mut,
3343                                                session_id.as_deref(),
3344                                            );
3345                                        }
3346                                        let patched = serde_json::to_string(&obj).map_err(|e| {
3347                                            CsmError::InvalidSessionFormat(format!(
3348                                                "Failed to serialize: {}",
3349                                                e
3350                                            ))
3351                                        })?;
3352                                        // Trailing newline prevents concatenation
3353                                        std::fs::write(&path, format!("{}\n", patched))?;
3354                                        let stem = path
3355                                            .file_stem()
3356                                            .map(|s| s.to_string_lossy().to_string())
3357                                            .unwrap_or_default();
3358                                        println!("   [OK] Fixed VS Code compat fields: {}", stem);
3359                                        fields_fixed += 1;
3360                                    } else if !content.ends_with('\n') {
3361                                        // All compat fields correct but missing trailing newline
3362                                        std::fs::write(&path, format!("{}\n", first_line))?;
3363                                        let stem = path
3364                                            .file_stem()
3365                                            .map(|s| s.to_string_lossy().to_string())
3366                                            .unwrap_or_default();
3367                                        println!(
3368                                            "   [OK] Fixed missing trailing newline: {}",
3369                                            stem
3370                                        );
3371                                    }
3372                                }
3373                            }
3374                        }
3375                    }
3376                }
3377            }
3378        }
3379    }
3380
3381    // Pass 1.5: Convert skeleton .json files to valid .jsonl.
3382    // Skeleton files are legacy .json files where all data has been stripped,
3383    // leaving only structural characters ({}, [], whitespace). We convert them
3384    // to valid minimal .jsonl, preserving title/timestamp from the index,
3385    // and rename the original to .json.corrupt (non-destructive).
3386    let mut skeletons_converted = 0;
3387    if chat_sessions_dir.exists() {
3388        // Read current index to get titles/timestamps for converted sessions
3389        let index_entries: std::collections::HashMap<String, (String, Option<i64>)> =
3390            if let Ok(index) = read_chat_session_index(&db_path) {
3391                index
3392                    .entries
3393                    .iter()
3394                    .map(|(id, e)| (id.clone(), (e.title.clone(), Some(e.last_message_date))))
3395                    .collect()
3396            } else {
3397                std::collections::HashMap::new()
3398            };
3399
3400        // Collect .json files that don't have a corresponding .jsonl
3401        let mut jsonl_stems: HashSet<String> = HashSet::new();
3402        for entry in std::fs::read_dir(chat_sessions_dir)? {
3403            let entry = entry?;
3404            let path = entry.path();
3405            if path.extension().is_some_and(|e| e == "jsonl") {
3406                if let Some(stem) = path.file_stem() {
3407                    jsonl_stems.insert(stem.to_string_lossy().to_string());
3408                }
3409            }
3410        }
3411
3412        for entry in std::fs::read_dir(chat_sessions_dir)? {
3413            let entry = entry?;
3414            let path = entry.path();
3415            if path.extension().is_some_and(|e| e == "json")
3416                && !path.to_string_lossy().ends_with(".bak")
3417                && !path.to_string_lossy().ends_with(".corrupt")
3418            {
3419                let stem = path
3420                    .file_stem()
3421                    .map(|s| s.to_string_lossy().to_string())
3422                    .unwrap_or_default();
3423
3424                // Skip if .jsonl already exists
3425                if jsonl_stems.contains(&stem) {
3426                    continue;
3427                }
3428
3429                let (title, timestamp) = index_entries
3430                    .get(&stem)
3431                    .map(|(t, ts)| (t.as_str(), *ts))
3432                    .unwrap_or(("Recovered Session", None));
3433
3434                match convert_skeleton_json_to_jsonl(&path, Some(title), timestamp) {
3435                    Ok(Some(jsonl_path)) => {
3436                        println!(
3437                            "   [OK] Converted skeleton .json → .jsonl: {} (\"{}\")",
3438                            stem, title
3439                        );
3440                        // Track the new .jsonl so subsequent passes process it
3441                        jsonl_stems.insert(stem);
3442                        skeletons_converted += 1;
3443                        let _ = jsonl_path; // used implicitly via jsonl_stems
3444                    }
3445                    Ok(None) => {} // Not a skeleton or skipped
3446                    Err(e) => {
3447                        println!("   [WARN] Failed to convert skeleton {}: {}", stem, e);
3448                    }
3449                }
3450            }
3451        }
3452    }
3453
3454    // Pass 2: Fix cancelled modelState in all JSONL files.
3455    // VS Code reads modelState from file content (not the index) to determine
3456    // lastResponseState. If the last request has modelState.value=2 (Cancelled)
3457    // or is missing entirely, VS Code refuses to load the session.
3458    let mut cancelled_fixed = 0;
3459    if chat_sessions_dir.exists() {
3460        for entry in std::fs::read_dir(chat_sessions_dir)? {
3461            let entry = entry?;
3462            let path = entry.path();
3463            if path.extension().is_some_and(|e| e == "jsonl") {
3464                match fix_cancelled_model_state(&path) {
3465                    Ok(true) => {
3466                        let stem = path
3467                            .file_stem()
3468                            .map(|s| s.to_string_lossy().to_string())
3469                            .unwrap_or_default();
3470                        println!("   [OK] Fixed cancelled modelState: {}", stem);
3471                        cancelled_fixed += 1;
3472                    }
3473                    Ok(false) => {} // No fix needed
3474                    Err(e) => {
3475                        let stem = path
3476                            .file_stem()
3477                            .map(|s| s.to_string_lossy().to_string())
3478                            .unwrap_or_default();
3479                        println!("   [WARN] Failed to fix modelState for {}: {}", stem, e);
3480                    }
3481                }
3482            }
3483        }
3484    }
3485
3486    // Pass 3: Rebuild the index with correct metadata
3487    let (index_fixed, _) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
3488
3489    if fields_fixed > 0 {
3490        println!(
3491            "   [OK] Injected missing VS Code fields into {} session(s)",
3492            fields_fixed
3493        );
3494    }
3495    if skeletons_converted > 0 {
3496        println!(
3497            "   [OK] Converted {} skeleton .json file(s) to .jsonl",
3498            skeletons_converted
3499        );
3500    }
3501    if cancelled_fixed > 0 {
3502        println!(
3503            "   [OK] Fixed cancelled modelState in {} session(s)",
3504            cancelled_fixed
3505        );
3506    }
3507
3508    Ok((compacted, index_fixed))
3509}