Skip to main content

chasm/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{
7    ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry, ChatSessionTiming,
8    ModelCacheEntry, StateCacheEntry,
9};
10use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
11use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
12use once_cell::sync::Lazy;
13use regex::Regex;
14use rusqlite::Connection;
15use std::collections::HashSet;
16use std::path::{Path, PathBuf};
17use sysinfo::System;
18
19/// A single issue detected during workspace session diagnostics
20#[derive(Debug, Clone)]
21pub struct SessionIssue {
22    /// The session file stem (UUID)
23    pub session_id: String,
24    /// Category of issue
25    pub kind: SessionIssueKind,
26    /// Human-readable description
27    pub detail: String,
28}
29
30/// Categories of session issues that can be detected and auto-fixed
31#[derive(Debug, Clone, PartialEq)]
32pub enum SessionIssueKind {
33    /// JSONL file has multiple lines (operations not compacted)
34    MultiLineJsonl,
35    /// JSONL first line contains concatenated JSON objects (missing newlines)
36    ConcatenatedJsonl,
37    /// Index entry has lastResponseState = 2 (Cancelled), blocks VS Code loading
38    CancelledState,
39    /// Last request's modelState.value is 2 (Cancelled) or missing in file content
40    CancelledModelState,
41    /// File exists on disk but is not in the VS Code index
42    OrphanedSession,
43    /// Index entry references a file that no longer exists on disk
44    StaleIndexEntry,
45    /// Session is missing required VS Code compat fields
46    MissingCompatFields,
47    /// Both .json and .jsonl exist for the same session ID
48    DuplicateFormat,
49    /// Legacy .json file is corrupted — contains only structural chars ({}, whitespace)
50    SkeletonJson,
51}
52
53impl std::fmt::Display for SessionIssueKind {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        match self {
56            SessionIssueKind::MultiLineJsonl => write!(f, "multi-line JSONL"),
57            SessionIssueKind::ConcatenatedJsonl => write!(f, "concatenated JSONL"),
58            SessionIssueKind::CancelledState => write!(f, "cancelled state"),
59            SessionIssueKind::CancelledModelState => write!(f, "cancelled modelState in file"),
60            SessionIssueKind::OrphanedSession => write!(f, "orphaned session"),
61            SessionIssueKind::StaleIndexEntry => write!(f, "stale index entry"),
62            SessionIssueKind::MissingCompatFields => write!(f, "missing compat fields"),
63            SessionIssueKind::DuplicateFormat => write!(f, "duplicate .json/.jsonl"),
64            SessionIssueKind::SkeletonJson => write!(f, "skeleton .json (corrupt)"),
65        }
66    }
67}
68
69/// Summary of issues found in a single workspace
70#[derive(Debug, Clone, Default)]
71pub struct WorkspaceDiagnosis {
72    /// Project path (if known)
73    pub project_path: Option<String>,
74    /// Workspace hash
75    pub workspace_hash: String,
76    /// Total sessions on disk
77    pub sessions_on_disk: usize,
78    /// Total sessions in index
79    pub sessions_in_index: usize,
80    /// All detected issues
81    pub issues: Vec<SessionIssue>,
82}
83
84impl WorkspaceDiagnosis {
85    pub fn is_healthy(&self) -> bool {
86        self.issues.is_empty()
87    }
88
89    pub fn issue_count_by_kind(&self, kind: &SessionIssueKind) -> usize {
90        self.issues.iter().filter(|i| &i.kind == kind).count()
91    }
92}
93
94/// Diagnose a workspace for session issues without modifying anything.
95/// Returns a structured report of all detected problems.
96pub fn diagnose_workspace_sessions(
97    workspace_id: &str,
98    chat_sessions_dir: &Path,
99) -> Result<WorkspaceDiagnosis> {
100    let mut diagnosis = WorkspaceDiagnosis {
101        workspace_hash: workspace_id.to_string(),
102        ..Default::default()
103    };
104
105    if !chat_sessions_dir.exists() {
106        return Ok(diagnosis);
107    }
108
109    // Collect session files on disk
110    let mut jsonl_sessions: HashSet<String> = HashSet::new();
111    let mut json_sessions: HashSet<String> = HashSet::new();
112    let mut all_session_ids: HashSet<String> = HashSet::new();
113
114    for entry in std::fs::read_dir(chat_sessions_dir)? {
115        let entry = entry?;
116        let path = entry.path();
117        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
118        let stem = path
119            .file_stem()
120            .map(|s| s.to_string_lossy().to_string())
121            .unwrap_or_default();
122
123        match ext {
124            "jsonl" => {
125                jsonl_sessions.insert(stem.clone());
126                all_session_ids.insert(stem);
127            }
128            "json" if !path.to_string_lossy().ends_with(".bak") => {
129                json_sessions.insert(stem.clone());
130                all_session_ids.insert(stem);
131            }
132            _ => {}
133        }
134    }
135    diagnosis.sessions_on_disk = all_session_ids.len();
136
137    // Check for duplicate .json/.jsonl files
138    for id in &jsonl_sessions {
139        if json_sessions.contains(id) {
140            diagnosis.issues.push(SessionIssue {
141                session_id: id.clone(),
142                kind: SessionIssueKind::DuplicateFormat,
143                detail: format!("Both {id}.json and {id}.jsonl exist"),
144            });
145        }
146    }
147
148    // Check JSONL files for content issues
149    for id in &jsonl_sessions {
150        let path = chat_sessions_dir.join(format!("{id}.jsonl"));
151        if let Ok(content) = std::fs::read_to_string(&path) {
152            let line_count = content.lines().count();
153
154            if line_count > 1 {
155                let size_mb = content.len() / (1024 * 1024);
156                diagnosis.issues.push(SessionIssue {
157                    session_id: id.clone(),
158                    kind: SessionIssueKind::MultiLineJsonl,
159                    detail: format!("{line_count} lines, ~{size_mb} MB — needs compaction"),
160                });
161            }
162
163            // Check first line for concatenation
164            if let Some(first_line) = content.lines().next() {
165                if first_line.contains("}{\"kind\":") {
166                    diagnosis.issues.push(SessionIssue {
167                        session_id: id.clone(),
168                        kind: SessionIssueKind::ConcatenatedJsonl,
169                        detail: "First line has concatenated JSON objects".to_string(),
170                    });
171                }
172            }
173
174            // Check for missing compat fields (only single-line files worth checking)
175            if line_count == 1 {
176                if let Some(first_line) = content.lines().next() {
177                    if let Ok(obj) = serde_json::from_str::<serde_json::Value>(first_line) {
178                        let is_kind_0 = obj
179                            .get("kind")
180                            .and_then(|k| k.as_u64())
181                            .map(|k| k == 0)
182                            .unwrap_or(false);
183
184                        if is_kind_0 {
185                            if let Some(v) = obj.get("v") {
186                                let missing_fields: Vec<&str> = [
187                                    "hasPendingEdits",
188                                    "pendingRequests",
189                                    "inputState",
190                                    "sessionId",
191                                    "version",
192                                ]
193                                .iter()
194                                .filter(|f| v.get(**f).is_none())
195                                .copied()
196                                .collect();
197
198                                if !missing_fields.is_empty() {
199                                    diagnosis.issues.push(SessionIssue {
200                                        session_id: id.clone(),
201                                        kind: SessionIssueKind::MissingCompatFields,
202                                        detail: format!("Missing: {}", missing_fields.join(", ")),
203                                    });
204                                }
205
206                                // Check for cancelled modelState in file content
207                                if let Some(requests) = v.get("requests").and_then(|r| r.as_array())
208                                {
209                                    if let Some(last_req) = requests.last() {
210                                        let model_state_value = last_req
211                                            .get("modelState")
212                                            .and_then(|ms| ms.get("value"))
213                                            .and_then(|v| v.as_u64());
214                                        match model_state_value {
215                                            Some(1) => {} // Complete — valid
216                                            Some(v) => {
217                                                diagnosis.issues.push(SessionIssue {
218                                                    session_id: id.clone(),
219                                                    kind: SessionIssueKind::CancelledModelState,
220                                                    detail: format!("Last request modelState.value={} (not Complete) in file content", v),
221                                                });
222                                            }
223                                            None => {
224                                                diagnosis.issues.push(SessionIssue {
225                                                    session_id: id.clone(),
226                                                    kind: SessionIssueKind::CancelledModelState,
227                                                    detail: "Last request missing modelState in file content".to_string(),
228                                                });
229                                            }
230                                        }
231                                    }
232                                }
233
234                                // Check hasPendingEdits — true blocks session loading
235                                if v.get("hasPendingEdits")
236                                    .and_then(|v| v.as_bool())
237                                    .unwrap_or(false)
238                                    == true
239                                {
240                                    diagnosis.issues.push(SessionIssue {
241                                        session_id: id.clone(),
242                                        kind: SessionIssueKind::MissingCompatFields,
243                                        detail: "hasPendingEdits is true (blocks session loading)"
244                                            .to_string(),
245                                    });
246                                }
247                            }
248                        }
249                    }
250                }
251            }
252        }
253    }
254
255    // Check .json files for skeleton corruption
256    for id in &json_sessions {
257        // Skip if a .jsonl already exists (it takes precedence)
258        if jsonl_sessions.contains(id) {
259            continue;
260        }
261        let path = chat_sessions_dir.join(format!("{id}.json"));
262        if let Ok(content) = std::fs::read_to_string(&path) {
263            if is_skeleton_json(&content) {
264                diagnosis.issues.push(SessionIssue {
265                    session_id: id.clone(),
266                    kind: SessionIssueKind::SkeletonJson,
267                    detail: format!(
268                        "Legacy .json is corrupt — only structural chars remain ({} bytes)",
269                        content.len()
270                    ),
271                });
272            }
273        }
274    }
275
276    // Check index for stale entries, orphans, and cancelled state
277    let db_path = get_workspace_storage_db(workspace_id)?;
278    if db_path.exists() {
279        if let Ok(index) = read_chat_session_index(&db_path) {
280            diagnosis.sessions_in_index = index.entries.len();
281
282            // Stale index entries (in index but no file on disk)
283            for (id, _entry) in &index.entries {
284                if !all_session_ids.contains(id) {
285                    diagnosis.issues.push(SessionIssue {
286                        session_id: id.clone(),
287                        kind: SessionIssueKind::StaleIndexEntry,
288                        detail: "In index but no file on disk".to_string(),
289                    });
290                }
291            }
292
293            // Cancelled state entries
294            for (id, entry) in &index.entries {
295                if entry.last_response_state == 2 {
296                    diagnosis.issues.push(SessionIssue {
297                        session_id: id.clone(),
298                        kind: SessionIssueKind::CancelledState,
299                        detail: "lastResponseState=2 (Cancelled) — blocks VS Code loading"
300                            .to_string(),
301                    });
302                }
303            }
304
305            // Orphaned sessions (on disk but not in index)
306            let indexed_ids: HashSet<&String> = index.entries.keys().collect();
307            for id in &all_session_ids {
308                if !indexed_ids.contains(id) {
309                    diagnosis.issues.push(SessionIssue {
310                        session_id: id.clone(),
311                        kind: SessionIssueKind::OrphanedSession,
312                        detail: "File on disk but not in VS Code index".to_string(),
313                    });
314                }
315            }
316        }
317    }
318
319    Ok(diagnosis)
320}
321
322/// Regex to match any Unicode escape sequence (valid or not)
323static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
324
325/// VS Code session format version - helps identify which parsing strategy to use
326#[derive(Debug, Clone, Copy, PartialEq, Eq)]
327pub enum VsCodeSessionFormat {
328    /// Legacy JSON format (VS Code < 1.109.0)
329    /// Single JSON object with ChatSession structure
330    LegacyJson,
331    /// JSONL format (VS Code >= 1.109.0, January 2026+)
332    /// JSON Lines with event sourcing: kind 0 (initial), kind 1 (delta), kind 2 (replace/splice)
333    JsonLines,
334}
335
336/// Session schema version - tracks the internal structure version
337#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
338pub enum SessionSchemaVersion {
339    /// Version 1 - Original format (basic fields)
340    V1 = 1,
341    /// Version 2 - Added more metadata fields
342    V2 = 2,
343    /// Version 3 - Current format with full request/response structure
344    V3 = 3,
345    /// Unknown version
346    Unknown = 0,
347}
348
349impl SessionSchemaVersion {
350    /// Create from version number
351    pub fn from_version(v: u32) -> Self {
352        match v {
353            1 => Self::V1,
354            2 => Self::V2,
355            3 => Self::V3,
356            _ => Self::Unknown,
357        }
358    }
359
360    /// Get version number
361    pub fn version_number(&self) -> u32 {
362        match self {
363            Self::V1 => 1,
364            Self::V2 => 2,
365            Self::V3 => 3,
366            Self::Unknown => 0,
367        }
368    }
369
370    /// Get description
371    pub fn description(&self) -> &'static str {
372        match self {
373            Self::V1 => "v1 (basic)",
374            Self::V2 => "v2 (extended metadata)",
375            Self::V3 => "v3 (full structure)",
376            Self::Unknown => "unknown",
377        }
378    }
379}
380
381impl std::fmt::Display for SessionSchemaVersion {
382    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
383        write!(f, "{}", self.description())
384    }
385}
386
387/// Result of session format detection
388#[derive(Debug, Clone)]
389pub struct SessionFormatInfo {
390    /// File format (JSON or JSONL)
391    pub format: VsCodeSessionFormat,
392    /// Schema version detected from content
393    pub schema_version: SessionSchemaVersion,
394    /// Confidence level (0.0 - 1.0)
395    pub confidence: f32,
396    /// Detection method used
397    pub detection_method: &'static str,
398}
399
400impl VsCodeSessionFormat {
401    /// Detect format from file path (by extension)
402    pub fn from_path(path: &Path) -> Self {
403        match path.extension().and_then(|e| e.to_str()) {
404            Some("jsonl") => Self::JsonLines,
405            _ => Self::LegacyJson,
406        }
407    }
408
409    /// Detect format from content by analyzing structure
410    pub fn from_content(content: &str) -> Self {
411        let trimmed = content.trim();
412
413        // JSONL: Multiple lines starting with { or first line has {"kind":
414        if trimmed.starts_with("{\"kind\":") || trimmed.starts_with("{ \"kind\":") {
415            return Self::JsonLines;
416        }
417
418        // Count lines that look like JSON objects
419        let mut json_object_lines = 0;
420        let mut total_non_empty_lines = 0;
421
422        for line in trimmed.lines().take(10) {
423            let line = line.trim();
424            if line.is_empty() {
425                continue;
426            }
427            total_non_empty_lines += 1;
428
429            // Check if line is a JSON object with "kind" field (JSONL marker)
430            if line.starts_with('{') && line.contains("\"kind\"") {
431                json_object_lines += 1;
432            }
433        }
434
435        // If multiple lines look like JSONL entries, it's JSONL
436        if json_object_lines >= 2
437            || (json_object_lines == 1 && total_non_empty_lines == 1 && trimmed.contains("\n{"))
438        {
439            return Self::JsonLines;
440        }
441
442        // Check if it's a single JSON object (legacy format)
443        if trimmed.starts_with('{') && trimmed.ends_with('}') {
444            // Look for ChatSession structure markers
445            if trimmed.contains("\"sessionId\"")
446                || trimmed.contains("\"creationDate\"")
447                || trimmed.contains("\"requests\"")
448            {
449                return Self::LegacyJson;
450            }
451        }
452
453        // Default to legacy JSON if unclear
454        Self::LegacyJson
455    }
456
457    /// Get minimum VS Code version that uses this format
458    pub fn min_vscode_version(&self) -> &'static str {
459        match self {
460            Self::LegacyJson => "1.0.0",
461            Self::JsonLines => "1.109.0",
462        }
463    }
464
465    /// Get human-readable format description
466    pub fn description(&self) -> &'static str {
467        match self {
468            Self::LegacyJson => "Legacy JSON (single object)",
469            Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
470        }
471    }
472
473    /// Get short format name
474    pub fn short_name(&self) -> &'static str {
475        match self {
476            Self::LegacyJson => "json",
477            Self::JsonLines => "jsonl",
478        }
479    }
480}
481
482impl std::fmt::Display for VsCodeSessionFormat {
483    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
484        write!(f, "{}", self.description())
485    }
486}
487
488/// Sanitize JSON content by replacing lone surrogates with replacement character.
489/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
490fn sanitize_json_unicode(content: &str) -> String {
491    // Process all \uXXXX sequences and fix lone surrogates
492    let mut result = String::with_capacity(content.len());
493    let mut last_end = 0;
494
495    // Collect all matches first to avoid borrowing issues
496    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
497
498    for (i, mat) in matches.iter().enumerate() {
499        let start = mat.start();
500        let end = mat.end();
501
502        // Add content before this match
503        result.push_str(&content[last_end..start]);
504
505        // Parse the hex value from the match itself (always ASCII \uXXXX)
506        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
507        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
508            // Check if it's a high surrogate (D800-DBFF)
509            if (0xD800..=0xDBFF).contains(&code_point) {
510                // Check if next match is immediately following and is a low surrogate
511                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
512                    // Must be immediately adjacent (no gap)
513                    if next_mat.start() == end {
514                        let next_hex = &next_mat.as_str()[2..];
515                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
516                            (0xDC00..=0xDFFF).contains(&next_cp)
517                        } else {
518                            false
519                        }
520                    } else {
521                        false
522                    }
523                } else {
524                    false
525                };
526
527                if is_valid_pair {
528                    // Valid surrogate pair, keep the high surrogate
529                    result.push_str(mat.as_str());
530                } else {
531                    // Lone high surrogate - replace with replacement char
532                    result.push_str("\\uFFFD");
533                }
534            }
535            // Check if it's a low surrogate (DC00-DFFF)
536            else if (0xDC00..=0xDFFF).contains(&code_point) {
537                // Check if previous match was immediately before and was a high surrogate
538                let is_valid_pair = if i > 0 {
539                    if let Some(prev_mat) = matches.get(i - 1) {
540                        // Must be immediately adjacent (no gap)
541                        if prev_mat.end() == start {
542                            let prev_hex = &prev_mat.as_str()[2..];
543                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
544                                (0xD800..=0xDBFF).contains(&prev_cp)
545                            } else {
546                                false
547                            }
548                        } else {
549                            false
550                        }
551                    } else {
552                        false
553                    }
554                } else {
555                    false
556                };
557
558                if is_valid_pair {
559                    // Part of valid surrogate pair, keep it
560                    result.push_str(mat.as_str());
561                } else {
562                    // Lone low surrogate - replace with replacement char
563                    result.push_str("\\uFFFD");
564                }
565            }
566            // Normal code point
567            else {
568                result.push_str(mat.as_str());
569            }
570        } else {
571            // Invalid hex - keep as is
572            result.push_str(mat.as_str());
573        }
574        last_end = end;
575    }
576
577    // Add remaining content
578    result.push_str(&content[last_end..]);
579    result
580}
581
582/// Try to parse JSON, sanitizing invalid Unicode if needed
583pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
584    match serde_json::from_str::<ChatSession>(content) {
585        Ok(session) => Ok(session),
586        Err(e) => {
587            // If parsing fails due to Unicode issue, try sanitizing
588            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
589                let sanitized = sanitize_json_unicode(content);
590                serde_json::from_str::<ChatSession>(&sanitized)
591            } else {
592                Err(e)
593            }
594        }
595    }
596}
597
598/// JSONL entry kinds for VS Code 1.109.0+ session format
599#[derive(Debug, Clone, Copy, PartialEq, Eq)]
600enum JsonlKind {
601    /// Initial session state (kind: 0)
602    Initial = 0,
603    /// Delta update to specific keys (kind: 1)  
604    Delta = 1,
605    /// Array replace/splice operation (kind: 2)
606    /// Optional 'i' field specifies splice index (truncate at i, then extend)
607    ArraySplice = 2,
608}
609
610/// Parse a JSONL (JSON Lines) session file (VS Code 1.109.0+ format)
611/// Each line is a JSON object with 'kind' field indicating the type:
612/// - kind 0: Initial session metadata with 'v' containing ChatSession-like structure
613/// - kind 1: Delta update with 'k' (keys path) and 'v' (value)
614/// - kind 2: Array replace/splice with 'k' (path), 'v' (items), optional 'i' (splice index)
615pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
616    // Pre-process: split concatenated JSON objects that lack newline separators
617    let content = split_concatenated_jsonl(content);
618
619    let mut session = ChatSession {
620        version: 3,
621        session_id: None,
622        creation_date: 0,
623        last_message_date: 0,
624        is_imported: false,
625        initial_location: "panel".to_string(),
626        custom_title: None,
627        requester_username: None,
628        requester_avatar_icon_uri: None,
629        responder_username: None,
630        responder_avatar_icon_uri: None,
631        requests: Vec::new(),
632    };
633
634    for line in content.lines() {
635        let line = line.trim();
636        if line.is_empty() {
637            continue;
638        }
639
640        // Parse each line as a JSON object
641        let entry: serde_json::Value = match serde_json::from_str(line) {
642            Ok(v) => v,
643            Err(_) => {
644                // Try sanitizing Unicode
645                let sanitized = sanitize_json_unicode(line);
646                serde_json::from_str(&sanitized)?
647            }
648        };
649
650        let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
651
652        match kind {
653            0 => {
654                // Initial state - 'v' contains the session metadata
655                if let Some(v) = entry.get("v") {
656                    // Parse version
657                    if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
658                        session.version = version as u32;
659                    }
660                    // Parse session ID
661                    if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
662                        session.session_id = Some(sid.to_string());
663                    }
664                    // Parse creation date
665                    if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
666                        session.creation_date = cd;
667                    }
668                    // Parse initial location
669                    if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
670                        session.initial_location = loc.to_string();
671                    }
672                    // Parse responder username
673                    if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
674                        session.responder_username = Some(ru.to_string());
675                    }
676                    // Parse custom title
677                    if let Some(title) = v.get("customTitle").and_then(|x| x.as_str()) {
678                        session.custom_title = Some(title.to_string());
679                    }
680                    // Parse hasPendingEdits as imported marker
681                    if let Some(imported) = v.get("isImported").and_then(|x| x.as_bool()) {
682                        session.is_imported = imported;
683                    }
684                    // Parse requests array if present
685                    if let Some(requests) = v.get("requests") {
686                        if let Ok(reqs) =
687                            serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
688                        {
689                            session.requests = reqs;
690                            // Compute last_message_date from the latest request timestamp
691                            if let Some(latest_ts) =
692                                session.requests.iter().filter_map(|r| r.timestamp).max()
693                            {
694                                session.last_message_date = latest_ts;
695                            }
696                        }
697                    }
698                    // Fall back to creationDate if no request timestamps found
699                    if session.last_message_date == 0 {
700                        session.last_message_date = session.creation_date;
701                    }
702                }
703            }
704            1 => {
705                // Delta update - 'k' is array of key path, 'v' is the value
706                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
707                    if let Some(keys_arr) = keys.as_array() {
708                        // Handle top-level session keys
709                        if keys_arr.len() == 1 {
710                            if let Some(key) = keys_arr[0].as_str() {
711                                match key {
712                                    "customTitle" => {
713                                        if let Some(title) = value.as_str() {
714                                            session.custom_title = Some(title.to_string());
715                                        }
716                                    }
717                                    "lastMessageDate" => {
718                                        if let Some(date) = value.as_i64() {
719                                            session.last_message_date = date;
720                                        }
721                                    }
722                                    "hasPendingEdits" | "isImported" => {
723                                        // Session-level boolean updates, safe to ignore for now
724                                    }
725                                    _ => {} // Ignore unknown keys
726                                }
727                            }
728                        }
729                        // Handle nested request field updates: ["requests", idx, field]
730                        else if keys_arr.len() == 3 {
731                            if let (Some("requests"), Some(idx), Some(field)) = (
732                                keys_arr[0].as_str(),
733                                keys_arr[1].as_u64().map(|i| i as usize),
734                                keys_arr[2].as_str(),
735                            ) {
736                                if idx < session.requests.len() {
737                                    match field {
738                                        "response" => {
739                                            session.requests[idx].response = Some(value.clone());
740                                        }
741                                        "result" => {
742                                            session.requests[idx].result = Some(value.clone());
743                                        }
744                                        "followups" => {
745                                            session.requests[idx].followups =
746                                                serde_json::from_value(value.clone()).ok();
747                                        }
748                                        "isCanceled" => {
749                                            session.requests[idx].is_canceled = value.as_bool();
750                                        }
751                                        "contentReferences" => {
752                                            session.requests[idx].content_references =
753                                                serde_json::from_value(value.clone()).ok();
754                                        }
755                                        "codeCitations" => {
756                                            session.requests[idx].code_citations =
757                                                serde_json::from_value(value.clone()).ok();
758                                        }
759                                        "modelState" | "modelId" | "agent" | "variableData" => {
760                                            // Known request fields - update as generic Value
761                                            // modelState tracks the request lifecycle
762                                        }
763                                        _ => {} // Ignore unknown request fields
764                                    }
765                                }
766                            }
767                        }
768                    }
769                }
770            }
771            2 => {
772                // Array splice operation - 'k' is the key path, 'v' is the new array items
773                // Optional 'i' field is the splice start index (truncate at i, then extend)
774                // Without 'i', items are appended to the end of the array
775                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
776                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
777                    if let Some(keys_arr) = keys.as_array() {
778                        // Top-level requests: k=["requests"], v=[requests_array]
779                        if keys_arr.len() == 1 {
780                            if let Some("requests") = keys_arr[0].as_str() {
781                                if let Some(items) = value.as_array() {
782                                    if let Some(idx) = splice_index {
783                                        // Splice: truncate at index i, then extend with new items
784                                        session.requests.truncate(idx);
785                                    }
786                                    // Without 'i': append to end (no truncation)
787                                    for item in items {
788                                        if let Ok(req) =
789                                            serde_json::from_value::<ChatRequest>(item.clone())
790                                        {
791                                            session.requests.push(req);
792                                        }
793                                    }
794                                    // Update last message date from latest request
795                                    if let Some(last_req) = session.requests.last() {
796                                        if let Some(ts) = last_req.timestamp {
797                                            session.last_message_date = ts;
798                                        }
799                                    }
800                                }
801                            }
802                        }
803                        // Nested array replace/splice: k=["requests", idx, "response"], v=[parts]
804                        else if keys_arr.len() == 3 {
805                            if let (Some("requests"), Some(req_idx), Some(field)) = (
806                                keys_arr[0].as_str(),
807                                keys_arr[1].as_u64().map(|i| i as usize),
808                                keys_arr[2].as_str(),
809                            ) {
810                                if req_idx < session.requests.len() {
811                                    match field {
812                                        "response" => {
813                                            // Response is stored as a JSON Value (array)
814                                            if let Some(idx) = splice_index {
815                                                // Splice: keep items before index i, replace rest
816                                                if let Some(existing) =
817                                                    session.requests[req_idx].response.as_ref()
818                                                {
819                                                    if let Some(existing_arr) = existing.as_array()
820                                                    {
821                                                        let mut new_arr: Vec<serde_json::Value> =
822                                                            existing_arr
823                                                                [..idx.min(existing_arr.len())]
824                                                                .to_vec();
825                                                        if let Some(new_items) = value.as_array() {
826                                                            new_arr
827                                                                .extend(new_items.iter().cloned());
828                                                        }
829                                                        session.requests[req_idx].response =
830                                                            Some(serde_json::Value::Array(new_arr));
831                                                    } else {
832                                                        session.requests[req_idx].response =
833                                                            Some(value.clone());
834                                                    }
835                                                } else {
836                                                    session.requests[req_idx].response =
837                                                        Some(value.clone());
838                                                }
839                                            } else {
840                                                // No splice index: append to existing response array
841                                                if let Some(existing) =
842                                                    session.requests[req_idx].response.as_ref()
843                                                {
844                                                    if let Some(existing_arr) = existing.as_array()
845                                                    {
846                                                        let mut new_arr = existing_arr.clone();
847                                                        if let Some(new_items) = value.as_array() {
848                                                            new_arr
849                                                                .extend(new_items.iter().cloned());
850                                                        }
851                                                        session.requests[req_idx].response =
852                                                            Some(serde_json::Value::Array(new_arr));
853                                                    } else {
854                                                        session.requests[req_idx].response =
855                                                            Some(value.clone());
856                                                    }
857                                                } else {
858                                                    session.requests[req_idx].response =
859                                                        Some(value.clone());
860                                                }
861                                            }
862                                        }
863                                        "contentReferences" => {
864                                            session.requests[req_idx].content_references =
865                                                serde_json::from_value(value.clone()).ok();
866                                        }
867                                        _ => {} // Ignore unknown fields
868                                    }
869                                }
870                            }
871                        }
872                    }
873                }
874            }
875            _ => {} // Unknown kind, skip
876        }
877    }
878
879    Ok(session)
880}
881
882/// Check if a file extension indicates a session file (.json, .jsonl, or .backup)
883pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
884    ext == "json" || ext == "jsonl" || ext == "backup"
885}
886
887/// Detect session format and version from content
888pub fn detect_session_format(content: &str) -> SessionFormatInfo {
889    let format = VsCodeSessionFormat::from_content(content);
890    let trimmed = content.trim();
891
892    // Detect schema version based on format
893    let (schema_version, confidence, method) = match format {
894        VsCodeSessionFormat::JsonLines => {
895            // For JSONL, check the first line's "v" object for version
896            if let Some(first_line) = trimmed.lines().next() {
897                if let Ok(entry) = serde_json::from_str::<serde_json::Value>(first_line) {
898                    if let Some(v) = entry.get("v") {
899                        if let Some(ver) = v.get("version").and_then(|x| x.as_u64()) {
900                            (
901                                SessionSchemaVersion::from_version(ver as u32),
902                                0.95,
903                                "jsonl-version-field",
904                            )
905                        } else {
906                            // No version field, likely v3 (current default)
907                            (SessionSchemaVersion::V3, 0.7, "jsonl-default")
908                        }
909                    } else {
910                        (SessionSchemaVersion::V3, 0.6, "jsonl-no-v-field")
911                    }
912                } else {
913                    (SessionSchemaVersion::Unknown, 0.3, "jsonl-parse-error")
914                }
915            } else {
916                (SessionSchemaVersion::Unknown, 0.2, "jsonl-empty")
917            }
918        }
919        VsCodeSessionFormat::LegacyJson => {
920            // For JSON, directly check the version field
921            if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
922                if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
923                    (
924                        SessionSchemaVersion::from_version(ver as u32),
925                        0.95,
926                        "json-version-field",
927                    )
928                } else {
929                    // Infer from structure
930                    if json.get("requests").is_some() && json.get("sessionId").is_some() {
931                        (SessionSchemaVersion::V3, 0.8, "json-structure-inference")
932                    } else if json.get("messages").is_some() {
933                        (SessionSchemaVersion::V1, 0.7, "json-legacy-structure")
934                    } else {
935                        (SessionSchemaVersion::Unknown, 0.4, "json-unknown-structure")
936                    }
937                }
938            } else {
939                // Try sanitizing and parsing again
940                let sanitized = sanitize_json_unicode(trimmed);
941                if let Ok(json) = serde_json::from_str::<serde_json::Value>(&sanitized) {
942                    if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
943                        (
944                            SessionSchemaVersion::from_version(ver as u32),
945                            0.9,
946                            "json-version-after-sanitize",
947                        )
948                    } else {
949                        (SessionSchemaVersion::V3, 0.6, "json-default-after-sanitize")
950                    }
951                } else {
952                    (SessionSchemaVersion::Unknown, 0.2, "json-parse-error")
953                }
954            }
955        }
956    };
957
958    SessionFormatInfo {
959        format,
960        schema_version,
961        confidence,
962        detection_method: method,
963    }
964}
965
966/// Parse session content with automatic format detection
967pub fn parse_session_auto(
968    content: &str,
969) -> std::result::Result<(ChatSession, SessionFormatInfo), serde_json::Error> {
970    let format_info = detect_session_format(content);
971
972    let session = match format_info.format {
973        VsCodeSessionFormat::JsonLines => parse_session_jsonl(content)?,
974        VsCodeSessionFormat::LegacyJson => parse_session_json(content)?,
975    };
976
977    Ok((session, format_info))
978}
979
980/// Parse a session file, automatically detecting format from content (not just extension)
981pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
982    let content = std::fs::read_to_string(path)
983        .map_err(|e| serde_json::Error::io(std::io::Error::other(e.to_string())))?;
984
985    // Use content-based auto-detection
986    let (session, _format_info) = parse_session_auto(&content)?;
987    Ok(session)
988}
989
990/// Get the path to the workspace storage database
991pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
992    let storage_path = get_workspace_storage_path()?;
993    Ok(storage_path.join(workspace_id).join("state.vscdb"))
994}
995
996/// Read the chat session index from VS Code storage
997pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
998    let conn = Connection::open(db_path)?;
999
1000    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
1001        "SELECT value FROM ItemTable WHERE key = ?",
1002        ["chat.ChatSessionStore.index"],
1003        |row| row.get(0),
1004    );
1005
1006    match result {
1007        Ok(json_str) => serde_json::from_str(&json_str)
1008            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
1009        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
1010        Err(e) => Err(CsmError::SqliteError(e)),
1011    }
1012}
1013
1014/// Write the chat session index to VS Code storage
1015pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
1016    let conn = Connection::open(db_path)?;
1017    let json_str = serde_json::to_string(index)?;
1018
1019    // Check if the key exists
1020    let exists: bool = conn.query_row(
1021        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
1022        ["chat.ChatSessionStore.index"],
1023        |row| row.get(0),
1024    )?;
1025
1026    if exists {
1027        conn.execute(
1028            "UPDATE ItemTable SET value = ? WHERE key = ?",
1029            [&json_str, "chat.ChatSessionStore.index"],
1030        )?;
1031    } else {
1032        conn.execute(
1033            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
1034            ["chat.ChatSessionStore.index", &json_str],
1035        )?;
1036    }
1037
1038    Ok(())
1039}
1040
1041// ── Generic DB key read/write ──────────────────────────────────────────────
1042
1043/// Read a JSON value from the VS Code state DB by key
1044fn read_db_json(db_path: &Path, key: &str) -> Result<Option<serde_json::Value>> {
1045    let conn = Connection::open(db_path)?;
1046    let result: std::result::Result<String, rusqlite::Error> =
1047        conn.query_row("SELECT value FROM ItemTable WHERE key = ?", [key], |row| {
1048            row.get(0)
1049        });
1050    match result {
1051        Ok(json_str) => {
1052            let v = serde_json::from_str(&json_str)
1053                .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1054            Ok(Some(v))
1055        }
1056        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
1057        Err(e) => Err(CsmError::SqliteError(e)),
1058    }
1059}
1060
1061/// Write a JSON value to the VS Code state DB (upsert)
1062fn write_db_json(db_path: &Path, key: &str, value: &serde_json::Value) -> Result<()> {
1063    let conn = Connection::open(db_path)?;
1064    let json_str = serde_json::to_string(value)?;
1065    conn.execute(
1066        "INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)",
1067        rusqlite::params![key, json_str],
1068    )?;
1069    Ok(())
1070}
1071
1072// ── Session resource URI helpers ───────────────────────────────────────────
1073
1074/// Build the `vscode-chat-session://local/{base64(sessionId)}` resource URI
1075/// that VS Code uses to identify sessions in model cache and state cache.
1076pub fn session_resource_uri(session_id: &str) -> String {
1077    let b64 = BASE64.encode(session_id.as_bytes());
1078    format!("vscode-chat-session://local/{}", b64)
1079}
1080
1081/// Extract a session ID from a `vscode-chat-session://` resource URI.
1082/// Returns `None` if the URI doesn't match the expected format.
1083pub fn session_id_from_resource_uri(uri: &str) -> Option<String> {
1084    let prefix = "vscode-chat-session://local/";
1085    if let Some(b64) = uri.strip_prefix(prefix) {
1086        BASE64
1087            .decode(b64)
1088            .ok()
1089            .and_then(|bytes| String::from_utf8(bytes).ok())
1090    } else {
1091        None
1092    }
1093}
1094
1095// ── Model cache (agentSessions.model.cache) ────────────────────────────────
1096
1097const MODEL_CACHE_KEY: &str = "agentSessions.model.cache";
1098
1099/// Read the `agentSessions.model.cache` from VS Code storage.
1100/// Returns an empty Vec if the key doesn't exist.
1101pub fn read_model_cache(db_path: &Path) -> Result<Vec<ModelCacheEntry>> {
1102    match read_db_json(db_path, MODEL_CACHE_KEY)? {
1103        Some(v) => serde_json::from_value(v)
1104            .map_err(|e| CsmError::InvalidSessionFormat(format!("model cache: {}", e))),
1105        None => Ok(Vec::new()),
1106    }
1107}
1108
1109/// Write the `agentSessions.model.cache` to VS Code storage.
1110pub fn write_model_cache(db_path: &Path, cache: &[ModelCacheEntry]) -> Result<()> {
1111    let v = serde_json::to_value(cache)?;
1112    write_db_json(db_path, MODEL_CACHE_KEY, &v)
1113}
1114
1115/// Rebuild the model cache from the session index. This makes sessions visible
1116/// in the Chat panel sidebar. Only non-empty sessions get entries (VS Code
1117/// hides empty ones).
1118pub fn rebuild_model_cache(db_path: &Path, index: &ChatSessionIndex) -> Result<usize> {
1119    let mut cache: Vec<ModelCacheEntry> = Vec::new();
1120
1121    for (session_id, entry) in &index.entries {
1122        // Only include non-empty sessions — empty ones are hidden in the sidebar
1123        if entry.is_empty {
1124            continue;
1125        }
1126
1127        let timing = entry.timing.clone().unwrap_or(ChatSessionTiming {
1128            created: entry.last_message_date,
1129            last_request_started: Some(entry.last_message_date),
1130            last_request_ended: Some(entry.last_message_date),
1131        });
1132
1133        cache.push(ModelCacheEntry {
1134            provider_type: "local".to_string(),
1135            provider_label: "Local".to_string(),
1136            resource: session_resource_uri(session_id),
1137            icon: "vm".to_string(),
1138            label: entry.title.clone(),
1139            status: 1,
1140            timing,
1141            initial_location: entry.initial_location.clone(),
1142            has_pending_edits: false,
1143            is_empty: false,
1144            is_external: entry.is_external.unwrap_or(false),
1145            last_response_state: 1, // Complete
1146        });
1147    }
1148
1149    let count = cache.len();
1150    write_model_cache(db_path, &cache)?;
1151    Ok(count)
1152}
1153
1154// ── State cache (agentSessions.state.cache) ────────────────────────────────
1155
1156const STATE_CACHE_KEY: &str = "agentSessions.state.cache";
1157
1158/// Read the `agentSessions.state.cache` from VS Code storage.
1159pub fn read_state_cache(db_path: &Path) -> Result<Vec<StateCacheEntry>> {
1160    match read_db_json(db_path, STATE_CACHE_KEY)? {
1161        Some(v) => serde_json::from_value(v)
1162            .map_err(|e| CsmError::InvalidSessionFormat(format!("state cache: {}", e))),
1163        None => Ok(Vec::new()),
1164    }
1165}
1166
1167/// Write the `agentSessions.state.cache` to VS Code storage.
1168pub fn write_state_cache(db_path: &Path, cache: &[StateCacheEntry]) -> Result<()> {
1169    let v = serde_json::to_value(cache)?;
1170    write_db_json(db_path, STATE_CACHE_KEY, &v)
1171}
1172
1173/// Remove state cache entries whose resource URIs reference sessions that no
1174/// longer exist on disk. Returns the number of stale entries removed.
1175pub fn cleanup_state_cache(db_path: &Path, valid_session_ids: &HashSet<String>) -> Result<usize> {
1176    let entries = read_state_cache(db_path)?;
1177    let valid_resources: HashSet<String> = valid_session_ids
1178        .iter()
1179        .map(|id| session_resource_uri(id))
1180        .collect();
1181
1182    let before = entries.len();
1183    let cleaned: Vec<StateCacheEntry> = entries
1184        .into_iter()
1185        .filter(|e| valid_resources.contains(&e.resource))
1186        .collect();
1187    let removed = before - cleaned.len();
1188
1189    if removed > 0 {
1190        write_state_cache(db_path, &cleaned)?;
1191    }
1192
1193    Ok(removed)
1194}
1195
1196// ── Memento (memento/interactive-session-view-copilot) ──────────────────────
1197
1198const MEMENTO_KEY: &str = "memento/interactive-session-view-copilot";
1199
1200/// Read the Copilot Chat memento (tracks the last-active session).
1201pub fn read_session_memento(db_path: &Path) -> Result<Option<serde_json::Value>> {
1202    read_db_json(db_path, MEMENTO_KEY)
1203}
1204
1205/// Write the Copilot Chat memento.
1206pub fn write_session_memento(db_path: &Path, value: &serde_json::Value) -> Result<()> {
1207    write_db_json(db_path, MEMENTO_KEY, value)
1208}
1209
1210/// Fix the memento so it points to a session that actually exists.
1211/// If the current memento references a deleted/non-existent session, update it
1212/// to the most recently active valid session. Returns `true` if the memento was
1213/// changed.
1214pub fn fix_session_memento(
1215    db_path: &Path,
1216    valid_session_ids: &HashSet<String>,
1217    preferred_session_id: Option<&str>,
1218) -> Result<bool> {
1219    let memento = read_session_memento(db_path)?;
1220
1221    let current_sid = memento
1222        .as_ref()
1223        .and_then(|v| v.get("sessionId"))
1224        .and_then(|v| v.as_str())
1225        .map(|s| s.to_string());
1226
1227    // Check if current memento already points to a valid session
1228    if let Some(ref sid) = current_sid {
1229        if valid_session_ids.contains(sid) {
1230            return Ok(false); // Already valid
1231        }
1232    }
1233
1234    // Pick a session to point to: prefer the explicit choice, otherwise pick any valid one
1235    let target = preferred_session_id
1236        .filter(|id| valid_session_ids.contains(*id))
1237        .or_else(|| valid_session_ids.iter().next().map(|s| s.as_str()));
1238
1239    if let Some(target_id) = target {
1240        let mut new_memento = memento.unwrap_or(serde_json::json!({}));
1241        if let Some(obj) = new_memento.as_object_mut() {
1242            obj.insert(
1243                "sessionId".to_string(),
1244                serde_json::Value::String(target_id.to_string()),
1245            );
1246        }
1247        write_session_memento(db_path, &new_memento)?;
1248        Ok(true)
1249    } else {
1250        Ok(false) // No valid sessions to point to
1251    }
1252}
1253
1254// ── .json.bak recovery ─────────────────────────────────────────────────────
1255
1256/// Count the number of requests in a session's `v.requests` array from a JSONL
1257/// file (reads only the first kind:0 line).
1258fn count_jsonl_requests(path: &Path) -> Result<usize> {
1259    let content = std::fs::read_to_string(path)
1260        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
1261    let first_line = content.lines().next().unwrap_or("");
1262    let parsed: serde_json::Value = serde_json::from_str(first_line)
1263        .map_err(|e| CsmError::InvalidSessionFormat(format!("Parse error: {}", e)))?;
1264
1265    let count = parsed
1266        .get("v")
1267        .or_else(|| Some(&parsed)) // bare JSON (non-JSONL) may not have "v" wrapper
1268        .and_then(|v| v.get("requests"))
1269        .and_then(|r| r.as_array())
1270        .map(|a| a.len())
1271        .unwrap_or(0);
1272
1273    Ok(count)
1274}
1275
1276/// Count the number of requests in a `.json.bak` (or `.json`) file.
1277fn count_json_bak_requests(path: &Path) -> Result<usize> {
1278    let content = std::fs::read_to_string(path)
1279        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
1280    let parsed: serde_json::Value = serde_json::from_str(&content)
1281        .map_err(|e| CsmError::InvalidSessionFormat(format!("Parse error: {}", e)))?;
1282
1283    let count = parsed
1284        .get("requests")
1285        .and_then(|r| r.as_array())
1286        .map(|a| a.len())
1287        .unwrap_or(0);
1288
1289    Ok(count)
1290}
1291
1292/// Migrate old-format inputState fields from top-level to a nested `inputState`
1293/// object. VS Code version 3 expects `inputState` as a sub-object with keys
1294/// `attachments`, `mode`, `inputText`, `selections`, `contrib`.
1295///
1296/// Old format (pre-v3): `{ "attachments": [...], "mode": {...}, "inputText": "...", ... }`
1297/// New format (v3):     `{ "inputState": { "attachments": [...], "mode": {...}, ... } }`
1298pub fn migrate_old_input_state(state: &mut serde_json::Value) {
1299    if let Some(obj) = state.as_object_mut() {
1300        // Only migrate if inputState doesn't already exist AND old top-level fields do
1301        if obj.contains_key("inputState") {
1302            return;
1303        }
1304
1305        let old_keys = [
1306            "attachments",
1307            "mode",
1308            "inputText",
1309            "selections",
1310            "contrib",
1311            "selectedModel",
1312        ];
1313        let has_old = old_keys.iter().any(|k| obj.contains_key(*k));
1314
1315        if has_old {
1316            let mut input_state = serde_json::Map::new();
1317
1318            // Move each old key into the nested object (with defaults)
1319            input_state.insert(
1320                "attachments".to_string(),
1321                obj.remove("attachments").unwrap_or(serde_json::json!([])),
1322            );
1323            input_state.insert(
1324                "mode".to_string(),
1325                obj.remove("mode")
1326                    .unwrap_or(serde_json::json!({"id": "agent", "kind": "agent"})),
1327            );
1328            input_state.insert(
1329                "inputText".to_string(),
1330                obj.remove("inputText").unwrap_or(serde_json::json!("")),
1331            );
1332            input_state.insert(
1333                "selections".to_string(),
1334                obj.remove("selections").unwrap_or(serde_json::json!([])),
1335            );
1336            input_state.insert(
1337                "contrib".to_string(),
1338                obj.remove("contrib").unwrap_or(serde_json::json!({})),
1339            );
1340
1341            // selectedModel is optional, only include if present
1342            if let Some(model) = obj.remove("selectedModel") {
1343                input_state.insert("selectedModel".to_string(), model);
1344            }
1345
1346            obj.insert(
1347                "inputState".to_string(),
1348                serde_json::Value::Object(input_state),
1349            );
1350        }
1351    }
1352}
1353
1354/// Recover sessions from `.json.bak` files when the corresponding `.jsonl` has
1355/// fewer requests (indicating a truncated migration/compaction). For each .jsonl
1356/// that has a co-located .json.bak with more requests, rebuilds the .jsonl from
1357/// the backup data.
1358///
1359/// Returns the number of sessions recovered from backups.
1360pub fn recover_from_json_bak(chat_sessions_dir: &Path) -> Result<usize> {
1361    if !chat_sessions_dir.exists() {
1362        return Ok(0);
1363    }
1364
1365    let mut recovered = 0;
1366
1367    // Collect all .json.bak files
1368    let mut bak_files: Vec<PathBuf> = Vec::new();
1369    for entry in std::fs::read_dir(chat_sessions_dir)? {
1370        let entry = entry?;
1371        let path = entry.path();
1372        if path.to_string_lossy().ends_with(".json.bak") {
1373            bak_files.push(path);
1374        }
1375    }
1376
1377    for bak_path in &bak_files {
1378        // Derive session ID and .jsonl path
1379        let bak_name = bak_path
1380            .file_name()
1381            .unwrap_or_default()
1382            .to_string_lossy()
1383            .to_string();
1384        let session_id = bak_name.trim_end_matches(".json.bak");
1385        let jsonl_path = chat_sessions_dir.join(format!("{}.jsonl", session_id));
1386
1387        // Get request counts
1388        let bak_count = match count_json_bak_requests(bak_path) {
1389            Ok(c) => c,
1390            Err(_) => continue, // Skip unparseable backups
1391        };
1392
1393        if bak_count == 0 {
1394            continue; // Backup has no data, skip
1395        }
1396
1397        let jsonl_count = if jsonl_path.exists() {
1398            count_jsonl_requests(&jsonl_path).unwrap_or(0)
1399        } else {
1400            0 // No .jsonl at all — definitely recover from backup
1401        };
1402
1403        if bak_count <= jsonl_count {
1404            continue; // .jsonl already has equal or more data
1405        }
1406
1407        // .json.bak has more requests — recover from it
1408        println!(
1409            "   [*] .json.bak has {} requests vs .jsonl has {} for {}",
1410            bak_count, jsonl_count, session_id
1411        );
1412
1413        // Read the full backup
1414        let bak_content = match std::fs::read_to_string(bak_path) {
1415            Ok(c) => c,
1416            Err(e) => {
1417                println!("   [WARN] Failed to read .json.bak {}: {}", session_id, e);
1418                continue;
1419            }
1420        };
1421        let mut full_data: serde_json::Value = match serde_json::from_str(&bak_content) {
1422            Ok(v) => v,
1423            Err(e) => {
1424                println!("   [WARN] Failed to parse .json.bak {}: {}", session_id, e);
1425                continue;
1426            }
1427        };
1428
1429        // Clean up: build ISerializableChatData3 format
1430        if let Some(obj) = full_data.as_object_mut() {
1431            // Ensure version 3
1432            obj.insert("version".to_string(), serde_json::json!(3));
1433
1434            // Ensure sessionId
1435            if !obj.contains_key("sessionId") {
1436                obj.insert("sessionId".to_string(), serde_json::json!(session_id));
1437            }
1438
1439            // Force safe values
1440            obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
1441            obj.insert("pendingRequests".to_string(), serde_json::json!([]));
1442
1443            // Ensure responderUsername
1444            if !obj.contains_key("responderUsername") {
1445                obj.insert(
1446                    "responderUsername".to_string(),
1447                    serde_json::json!("GitHub Copilot"),
1448                );
1449            }
1450
1451            // Migrate old inputState format
1452            migrate_old_input_state(&mut full_data);
1453
1454            // Fix modelState values in requests
1455            fix_request_model_states(&mut full_data);
1456        }
1457
1458        // Backup existing .jsonl if present
1459        if jsonl_path.exists() {
1460            let pre_fix_bak = jsonl_path.with_extension("jsonl.pre_bak_recovery");
1461            if let Err(e) = std::fs::copy(&jsonl_path, &pre_fix_bak) {
1462                println!(
1463                    "   [WARN] Failed to backup .jsonl before recovery {}: {}",
1464                    session_id, e
1465                );
1466                continue;
1467            }
1468        }
1469
1470        // Write new JSONL kind:0
1471        let jsonl_obj = serde_json::json!({"kind": 0, "v": full_data});
1472        let jsonl_str = serde_json::to_string(&jsonl_obj).map_err(|e| {
1473            CsmError::InvalidSessionFormat(format!("Failed to serialize recovered session: {}", e))
1474        })?;
1475        std::fs::write(&jsonl_path, format!("{}\n", jsonl_str))?;
1476
1477        println!(
1478            "   [OK] Recovered {} from .json.bak ({} → {} requests)",
1479            session_id, jsonl_count, bak_count
1480        );
1481        recovered += 1;
1482    }
1483
1484    Ok(recovered)
1485}
1486
1487/// Fix modelState values in a session's requests array.
1488/// - Pending (value=0) or Cancelled (value=2) → set to Cancelled (3) with completedAt
1489/// - Terminal states (1, 3, 4) without completedAt → add completedAt from request timestamp
1490fn fix_request_model_states(session_data: &mut serde_json::Value) {
1491    let requests = match session_data
1492        .get_mut("requests")
1493        .and_then(|r| r.as_array_mut())
1494    {
1495        Some(r) => r,
1496        None => return,
1497    };
1498
1499    for req in requests.iter_mut() {
1500        let timestamp = req
1501            .get("timestamp")
1502            .and_then(|t| t.as_i64())
1503            .unwrap_or_else(|| {
1504                std::time::SystemTime::now()
1505                    .duration_since(std::time::UNIX_EPOCH)
1506                    .unwrap_or_default()
1507                    .as_millis() as i64
1508            });
1509
1510        if let Some(ms) = req.get_mut("modelState") {
1511            if let Some(val) = ms.get("value").and_then(|v| v.as_u64()) {
1512                match val {
1513                    0 | 2 => {
1514                        // Pending or Cancelled → force to Cancelled with completedAt
1515                        *ms = serde_json::json!({
1516                            "value": 3,
1517                            "completedAt": timestamp
1518                        });
1519                    }
1520                    1 | 3 | 4 => {
1521                        // Terminal states — ensure completedAt exists
1522                        if ms.get("completedAt").is_none() {
1523                            if let Some(ms_obj) = ms.as_object_mut() {
1524                                ms_obj.insert(
1525                                    "completedAt".to_string(),
1526                                    serde_json::json!(timestamp),
1527                                );
1528                            }
1529                        }
1530                    }
1531                    _ => {}
1532                }
1533            }
1534        }
1535    }
1536}
1537
1538/// Add a session to the VS Code index
1539pub fn add_session_to_index(
1540    db_path: &Path,
1541    session_id: &str,
1542    title: &str,
1543    last_message_date_ms: i64,
1544    _is_imported: bool,
1545    initial_location: &str,
1546    is_empty: bool,
1547) -> Result<()> {
1548    let mut index = read_chat_session_index(db_path)?;
1549
1550    index.entries.insert(
1551        session_id.to_string(),
1552        ChatSessionIndexEntry {
1553            session_id: session_id.to_string(),
1554            title: title.to_string(),
1555            last_message_date: last_message_date_ms,
1556            timing: Some(ChatSessionTiming {
1557                created: last_message_date_ms,
1558                last_request_started: Some(last_message_date_ms),
1559                last_request_ended: Some(last_message_date_ms),
1560            }),
1561            last_response_state: 1, // ResponseModelState.Complete
1562            initial_location: initial_location.to_string(),
1563            is_empty,
1564            is_imported: Some(_is_imported),
1565            has_pending_edits: Some(false),
1566            is_external: Some(false),
1567        },
1568    );
1569
1570    write_chat_session_index(db_path, &index)
1571}
1572
1573/// Remove a session from the VS Code index
1574#[allow(dead_code)]
1575pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
1576    let mut index = read_chat_session_index(db_path)?;
1577    let removed = index.entries.remove(session_id).is_some();
1578    if removed {
1579        write_chat_session_index(db_path, &index)?;
1580    }
1581    Ok(removed)
1582}
1583
1584/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
1585/// When both .json and .jsonl exist for the same session ID, prefers .jsonl.
1586pub fn sync_session_index(
1587    workspace_id: &str,
1588    chat_sessions_dir: &Path,
1589    force: bool,
1590) -> Result<(usize, usize)> {
1591    let db_path = get_workspace_storage_db(workspace_id)?;
1592
1593    if !db_path.exists() {
1594        return Err(CsmError::WorkspaceNotFound(format!(
1595            "Database not found: {}",
1596            db_path.display()
1597        )));
1598    }
1599
1600    // Check if VS Code is running
1601    if !force && is_vscode_running() {
1602        return Err(CsmError::VSCodeRunning);
1603    }
1604
1605    // Get current index
1606    let mut index = read_chat_session_index(&db_path)?;
1607
1608    // Get session files on disk
1609    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
1610    if chat_sessions_dir.exists() {
1611        for entry in std::fs::read_dir(chat_sessions_dir)? {
1612            let entry = entry?;
1613            let path = entry.path();
1614            if path
1615                .extension()
1616                .map(is_session_file_extension)
1617                .unwrap_or(false)
1618            {
1619                if let Some(stem) = path.file_stem() {
1620                    files_on_disk.insert(stem.to_string_lossy().to_string());
1621                }
1622            }
1623        }
1624    }
1625
1626    // Remove stale entries (in index but not on disk)
1627    let stale_ids: Vec<String> = index
1628        .entries
1629        .keys()
1630        .filter(|id| !files_on_disk.contains(*id))
1631        .cloned()
1632        .collect();
1633
1634    let removed = stale_ids.len();
1635    for id in &stale_ids {
1636        index.entries.remove(id);
1637    }
1638
1639    // Add/update sessions from disk
1640    // Collect files, preferring .jsonl over .json for the same session ID
1641    let mut session_files: std::collections::HashMap<String, PathBuf> =
1642        std::collections::HashMap::new();
1643    for entry in std::fs::read_dir(chat_sessions_dir)? {
1644        let entry = entry?;
1645        let path = entry.path();
1646        if path
1647            .extension()
1648            .map(is_session_file_extension)
1649            .unwrap_or(false)
1650        {
1651            if let Some(stem) = path.file_stem() {
1652                let stem_str = stem.to_string_lossy().to_string();
1653                let is_jsonl = path.extension().is_some_and(|e| e == "jsonl");
1654                // Insert if no entry yet, or if this is .jsonl (preferred over .json)
1655                if !session_files.contains_key(&stem_str) || is_jsonl {
1656                    session_files.insert(stem_str, path);
1657                }
1658            }
1659        }
1660    }
1661
1662    let mut added = 0;
1663    for (_, path) in &session_files {
1664        if let Ok(session) = parse_session_file(path) {
1665            let session_id = session.session_id.clone().unwrap_or_else(|| {
1666                path.file_stem()
1667                    .map(|s| s.to_string_lossy().to_string())
1668                    .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1669            });
1670
1671            let title = session.title();
1672            let is_empty = session.is_empty();
1673            let last_message_date = session.last_message_date;
1674            let initial_location = session.initial_location.clone();
1675
1676            index.entries.insert(
1677                session_id.clone(),
1678                ChatSessionIndexEntry {
1679                    session_id,
1680                    title,
1681                    last_message_date,
1682                    timing: Some(ChatSessionTiming {
1683                        created: session.creation_date,
1684                        last_request_started: Some(last_message_date),
1685                        last_request_ended: Some(last_message_date),
1686                    }),
1687                    last_response_state: 1, // ResponseModelState.Complete
1688                    initial_location,
1689                    is_empty,
1690                    is_imported: Some(false),
1691                    has_pending_edits: Some(false),
1692                    is_external: Some(false),
1693                },
1694            );
1695            added += 1;
1696        }
1697    }
1698
1699    // Write the synced index
1700    write_chat_session_index(&db_path, &index)?;
1701
1702    Ok((added, removed))
1703}
1704
1705/// Register all sessions from a directory into the VS Code index
1706pub fn register_all_sessions_from_directory(
1707    workspace_id: &str,
1708    chat_sessions_dir: &Path,
1709    force: bool,
1710) -> Result<usize> {
1711    let db_path = get_workspace_storage_db(workspace_id)?;
1712
1713    if !db_path.exists() {
1714        return Err(CsmError::WorkspaceNotFound(format!(
1715            "Database not found: {}",
1716            db_path.display()
1717        )));
1718    }
1719
1720    // Check if VS Code is running
1721    if !force && is_vscode_running() {
1722        return Err(CsmError::VSCodeRunning);
1723    }
1724
1725    // Use sync to ensure index matches disk
1726    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
1727
1728    // Print individual session info
1729    for entry in std::fs::read_dir(chat_sessions_dir)? {
1730        let entry = entry?;
1731        let path = entry.path();
1732
1733        if path
1734            .extension()
1735            .map(is_session_file_extension)
1736            .unwrap_or(false)
1737        {
1738            if let Ok(session) = parse_session_file(&path) {
1739                let session_id = session.session_id.clone().unwrap_or_else(|| {
1740                    path.file_stem()
1741                        .map(|s| s.to_string_lossy().to_string())
1742                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1743                });
1744
1745                let title = session.title();
1746
1747                println!(
1748                    "[OK] Registered: {} ({}...)",
1749                    title,
1750                    &session_id[..12.min(session_id.len())]
1751                );
1752            }
1753        }
1754    }
1755
1756    if removed > 0 {
1757        println!("[OK] Removed {} stale index entries", removed);
1758    }
1759
1760    Ok(added)
1761}
1762
1763/// Check if VS Code is currently running
1764pub fn is_vscode_running() -> bool {
1765    let mut sys = System::new();
1766    sys.refresh_processes();
1767
1768    for process in sys.processes().values() {
1769        let name = process.name().to_lowercase();
1770        if name.contains("code") && !name.contains("codec") {
1771            return true;
1772        }
1773    }
1774
1775    false
1776}
1777
1778/// Close VS Code gracefully and wait for it to exit.
1779/// Returns the list of workspace folders that were open (for reopening).
1780pub fn close_vscode_and_wait(timeout_secs: u64) -> Result<()> {
1781    use sysinfo::{ProcessRefreshKind, RefreshKind, Signal};
1782
1783    if !is_vscode_running() {
1784        return Ok(());
1785    }
1786
1787    // Send SIGTERM (graceful close) to all Code processes
1788    let mut sys = System::new_with_specifics(
1789        RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1790    );
1791    sys.refresh_processes();
1792
1793    let mut signaled = 0u32;
1794    for (pid, process) in sys.processes() {
1795        let name = process.name().to_lowercase();
1796        if name.contains("code") && !name.contains("codec") {
1797            // On Windows, kill() sends TerminateProcess; there's no graceful
1798            // SIGTERM equivalent via sysinfo. But the main electron process
1799            // handles WM_CLOSE. We use the `taskkill` approach on Windows for
1800            // a graceful close.
1801            #[cfg(windows)]
1802            {
1803                let _ = std::process::Command::new("taskkill")
1804                    .args(["/PID", &pid.as_u32().to_string()])
1805                    .stdout(std::process::Stdio::null())
1806                    .stderr(std::process::Stdio::null())
1807                    .status();
1808                signaled += 1;
1809            }
1810            #[cfg(not(windows))]
1811            {
1812                if process.kill_with(Signal::Term).unwrap_or(false) {
1813                    signaled += 1;
1814                }
1815            }
1816        }
1817    }
1818
1819    if signaled == 0 {
1820        return Ok(());
1821    }
1822
1823    // Wait for all Code processes to exit
1824    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
1825    loop {
1826        std::thread::sleep(std::time::Duration::from_millis(500));
1827        if !is_vscode_running() {
1828            // Extra wait for file locks to release
1829            std::thread::sleep(std::time::Duration::from_secs(1));
1830            return Ok(());
1831        }
1832        if std::time::Instant::now() >= deadline {
1833            // Force kill remaining processes
1834            let mut sys2 = System::new_with_specifics(
1835                RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1836            );
1837            sys2.refresh_processes();
1838            for (_pid, process) in sys2.processes() {
1839                let name = process.name().to_lowercase();
1840                if name.contains("code") && !name.contains("codec") {
1841                    process.kill();
1842                }
1843            }
1844            std::thread::sleep(std::time::Duration::from_secs(1));
1845            return Ok(());
1846        }
1847    }
1848}
1849
1850/// Reopen VS Code, optionally at a specific path.
1851pub fn reopen_vscode(project_path: Option<&str>) -> Result<()> {
1852    let mut cmd = std::process::Command::new("code");
1853    if let Some(path) = project_path {
1854        cmd.arg(path);
1855    }
1856    cmd.stdout(std::process::Stdio::null())
1857        .stderr(std::process::Stdio::null())
1858        .spawn()?;
1859    Ok(())
1860}
1861
1862/// Backup workspace sessions to a timestamped directory
1863pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
1864    let chat_sessions_dir = workspace_dir.join("chatSessions");
1865
1866    if !chat_sessions_dir.exists() {
1867        return Ok(None);
1868    }
1869
1870    let timestamp = std::time::SystemTime::now()
1871        .duration_since(std::time::UNIX_EPOCH)
1872        .unwrap()
1873        .as_secs();
1874
1875    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
1876
1877    // Copy directory recursively
1878    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
1879
1880    Ok(Some(backup_dir))
1881}
1882
1883/// Recursively copy a directory
1884fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
1885    std::fs::create_dir_all(dst)?;
1886
1887    for entry in std::fs::read_dir(src)? {
1888        let entry = entry?;
1889        let src_path = entry.path();
1890        let dst_path = dst.join(entry.file_name());
1891
1892        if src_path.is_dir() {
1893            copy_dir_all(&src_path, &dst_path)?;
1894        } else {
1895            std::fs::copy(&src_path, &dst_path)?;
1896        }
1897    }
1898
1899    Ok(())
1900}
1901
1902// =============================================================================
1903// Empty Window Sessions (ALL SESSIONS)
1904// =============================================================================
1905
1906/// Read all empty window chat sessions (not tied to any workspace)
1907/// These appear in VS Code's "ALL SESSIONS" panel
1908pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
1909    let sessions_path = get_empty_window_sessions_path()?;
1910
1911    if !sessions_path.exists() {
1912        return Ok(Vec::new());
1913    }
1914
1915    let mut sessions = Vec::new();
1916
1917    for entry in std::fs::read_dir(&sessions_path)? {
1918        let entry = entry?;
1919        let path = entry.path();
1920
1921        if path.extension().is_some_and(is_session_file_extension) {
1922            if let Ok(session) = parse_session_file(&path) {
1923                sessions.push(session);
1924            }
1925        }
1926    }
1927
1928    // Sort by last message date (most recent first)
1929    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
1930
1931    Ok(sessions)
1932}
1933
1934/// Get a specific empty window session by ID
1935#[allow(dead_code)]
1936pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
1937    let sessions_path = get_empty_window_sessions_path()?;
1938    let session_path = sessions_path.join(format!("{}.json", session_id));
1939
1940    if !session_path.exists() {
1941        return Ok(None);
1942    }
1943
1944    let content = std::fs::read_to_string(&session_path)?;
1945    let session: ChatSession = serde_json::from_str(&content)
1946        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1947
1948    Ok(Some(session))
1949}
1950
1951/// Write an empty window session
1952#[allow(dead_code)]
1953pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
1954    let sessions_path = get_empty_window_sessions_path()?;
1955
1956    // Create directory if it doesn't exist
1957    std::fs::create_dir_all(&sessions_path)?;
1958
1959    let session_id = session.session_id.as_deref().unwrap_or("unknown");
1960    let session_path = sessions_path.join(format!("{}.json", session_id));
1961    let content = serde_json::to_string_pretty(session)?;
1962    std::fs::write(&session_path, content)?;
1963
1964    Ok(session_path)
1965}
1966
1967/// Delete an empty window session
1968#[allow(dead_code)]
1969pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
1970    let sessions_path = get_empty_window_sessions_path()?;
1971    let session_path = sessions_path.join(format!("{}.json", session_id));
1972
1973    if session_path.exists() {
1974        std::fs::remove_file(&session_path)?;
1975        Ok(true)
1976    } else {
1977        Ok(false)
1978    }
1979}
1980
1981/// Count empty window sessions
1982pub fn count_empty_window_sessions() -> Result<usize> {
1983    let sessions_path = get_empty_window_sessions_path()?;
1984
1985    if !sessions_path.exists() {
1986        return Ok(0);
1987    }
1988
1989    let count = std::fs::read_dir(&sessions_path)?
1990        .filter_map(|e| e.ok())
1991        .filter(|e| e.path().extension().is_some_and(is_session_file_extension))
1992        .count();
1993
1994    Ok(count)
1995}
1996
1997/// Compact a JSONL session file by replaying all operations into a single kind:0 snapshot.
1998/// This works at the raw JSON level, preserving all fields VS Code expects.
1999/// Returns the path to the compacted file.
2000///
2001/// Handles a common corruption pattern where VS Code appends delta operations
2002/// to line 0 without newline separators (e.g., `}{"kind":1,...}{"kind":2,...}`).
2003pub fn compact_session_jsonl(path: &Path) -> Result<PathBuf> {
2004    let content = std::fs::read_to_string(path).map_err(|e| {
2005        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
2006    })?;
2007
2008    // Pre-process: split concatenated JSON objects that lack newline separators.
2009    // VS Code sometimes appends delta ops to line 0 without a \n, producing:
2010    //   {"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}\n{"kind":1,...}\n...
2011    // We fix this by inserting newlines at every `}{"kind":` boundary.
2012    let content = split_concatenated_jsonl(&content);
2013
2014    let mut lines = content.lines();
2015
2016    // First line must be kind:0 (initial snapshot)
2017    let first_line = lines
2018        .next()
2019        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
2020
2021    let first_entry: serde_json::Value = match serde_json::from_str(first_line.trim()) {
2022        Ok(v) => v,
2023        Err(_) => {
2024            // Try sanitizing Unicode (lone surrogates, etc.)
2025            let sanitized = sanitize_json_unicode(first_line.trim());
2026            serde_json::from_str(&sanitized).map_err(|e| {
2027                CsmError::InvalidSessionFormat(format!("Invalid JSON on line 1: {}", e))
2028            })?
2029        }
2030    };
2031
2032    let kind = first_entry
2033        .get("kind")
2034        .and_then(|k| k.as_u64())
2035        .unwrap_or(99);
2036    if kind != 0 {
2037        return Err(CsmError::InvalidSessionFormat(
2038            "First JSONL line must be kind:0".to_string(),
2039        ));
2040    }
2041
2042    // Extract the session state from the "v" field
2043    let mut state = first_entry
2044        .get("v")
2045        .cloned()
2046        .ok_or_else(|| CsmError::InvalidSessionFormat("kind:0 missing 'v' field".to_string()))?;
2047
2048    // Replay all subsequent operations
2049    for line in lines {
2050        let line = line.trim();
2051        if line.is_empty() {
2052            continue;
2053        }
2054
2055        let entry: serde_json::Value = match serde_json::from_str(line) {
2056            Ok(v) => v,
2057            Err(_) => continue, // Skip malformed lines
2058        };
2059
2060        let op_kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
2061
2062        match op_kind {
2063            1 => {
2064                // Delta update: k=["path","to","field"], v=value
2065                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
2066                    if let Some(keys_arr) = keys.as_array() {
2067                        apply_delta(&mut state, keys_arr, value.clone());
2068                    }
2069                }
2070            }
2071            2 => {
2072                // Array replace/splice: k=["path","to","array"], v=[items], i=splice_index
2073                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
2074                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
2075                    if let Some(keys_arr) = keys.as_array() {
2076                        apply_splice(&mut state, keys_arr, value.clone(), splice_index);
2077                    }
2078                }
2079            }
2080            _ => {} // Skip unknown kinds
2081        }
2082    }
2083
2084    // Inject any missing fields that VS Code's latest format requires
2085    let session_id = path
2086        .file_stem()
2087        .and_then(|s| s.to_str())
2088        .map(|s| s.to_string());
2089    ensure_vscode_compat_fields(&mut state, session_id.as_deref());
2090
2091    // Write the compacted file: single kind:0 line with the final state
2092    let compact_entry = serde_json::json!({"kind": 0, "v": state});
2093    let compact_content = serde_json::to_string(&compact_entry)
2094        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
2095
2096    // Backup the original file
2097    let backup_path = path.with_extension("jsonl.bak");
2098    std::fs::rename(path, &backup_path)?;
2099
2100    // Write the compacted file (trailing newline prevents concatenation
2101    // if VS Code later appends delta operations)
2102    std::fs::write(path, format!("{}\n", compact_content))?;
2103
2104    Ok(backup_path)
2105}
2106
2107/// Trim a session JSONL file by keeping only the last `keep` requests.
2108///
2109/// Very long chat sessions (100+ requests) can grow to 50-100+ MB, causing VS Code
2110/// to fail loading them. This function compacts the session first (if needed), then
2111/// removes old requests from the `requests` array, keeping only the most recent ones.
2112///
2113/// The full session is preserved as a `.jsonl.bak` backup. A trimmed summary is
2114/// injected as the first request message so the user knows context was archived.
2115///
2116/// Returns `(original_count, kept_count, original_mb, new_mb)`.
2117pub fn trim_session_jsonl(path: &Path, keep: usize) -> Result<(usize, usize, f64, f64)> {
2118    let content = std::fs::read_to_string(path).map_err(|e| {
2119        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
2120    })?;
2121
2122    let original_size = content.len() as f64 / (1024.0 * 1024.0);
2123
2124    // Always handle concatenated JSON objects first, then check line count
2125    let content = split_concatenated_jsonl(&content);
2126    let line_count = content.lines().filter(|l| !l.trim().is_empty()).count();
2127
2128    // If multi-line (concatenated objects or delta ops), compact first
2129    let content = if line_count > 1 {
2130        // Write the split content so compact can process it
2131        std::fs::write(path, &content)?;
2132        compact_session_jsonl(path)?;
2133        std::fs::read_to_string(path).map_err(|e| {
2134            CsmError::InvalidSessionFormat(format!("Failed to read compacted file: {}", e))
2135        })?
2136    } else {
2137        content
2138    };
2139
2140    let first_line = content
2141        .lines()
2142        .next()
2143        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
2144
2145    let mut entry: serde_json::Value = serde_json::from_str(first_line.trim())
2146        .map_err(|_| {
2147            let sanitized = sanitize_json_unicode(first_line.trim());
2148            serde_json::from_str::<serde_json::Value>(&sanitized)
2149                .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))
2150        })
2151        .unwrap_or_else(|e| e.unwrap());
2152
2153    let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
2154    if kind != 0 {
2155        return Err(
2156            CsmError::InvalidSessionFormat("First JSONL line must be kind:0".to_string()).into(),
2157        );
2158    }
2159
2160    // Get the requests array
2161    let requests = match entry
2162        .get("v")
2163        .and_then(|v| v.get("requests"))
2164        .and_then(|r| r.as_array())
2165    {
2166        Some(r) => r.clone(),
2167        None => {
2168            return Err(CsmError::InvalidSessionFormat(
2169                "Session has no requests array".to_string(),
2170            )
2171            .into());
2172        }
2173    };
2174
2175    let original_count = requests.len();
2176
2177    if original_count <= keep {
2178        // Still strip bloated content even if not reducing request count
2179        strip_bloated_content(&mut entry);
2180
2181        let trimmed_content = serde_json::to_string(&entry)
2182            .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
2183        let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
2184
2185        // Only rewrite if we actually reduced size
2186        if new_size < original_size * 0.9 {
2187            let backup_path = path.with_extension("jsonl.bak");
2188            if !backup_path.exists() {
2189                std::fs::copy(path, &backup_path)?;
2190            }
2191            std::fs::write(path, format!("{}\n", trimmed_content))?;
2192        }
2193
2194        return Ok((original_count, original_count, original_size, new_size));
2195    }
2196
2197    // Keep only the last `keep` requests
2198    let kept_requests: Vec<serde_json::Value> = requests[original_count - keep..].to_vec();
2199
2200    // Use only the kept requests — no injected trim notice.
2201    // Injecting synthetic requests with non-standard agent/structure fields
2202    // can cause VS Code's session deserializer to reject the entire session.
2203    let final_requests = kept_requests;
2204
2205    // Replace the requests array in the entry
2206    if let Some(v) = entry.get_mut("v") {
2207        if let Some(obj) = v.as_object_mut() {
2208            obj.insert("requests".to_string(), serde_json::json!(final_requests));
2209        }
2210    }
2211
2212    // Strip bloated metadata, tool invocations, textEditGroups, thinking tokens
2213    strip_bloated_content(&mut entry);
2214
2215    // Ensure compat fields
2216    let session_id = path
2217        .file_stem()
2218        .and_then(|s| s.to_str())
2219        .map(|s| s.to_string());
2220    if let Some(v) = entry.get_mut("v") {
2221        ensure_vscode_compat_fields(v, session_id.as_deref());
2222    }
2223
2224    let trimmed_content = serde_json::to_string(&entry)
2225        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
2226
2227    let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
2228
2229    // Backup original (if not already backed up by compact)
2230    let backup_path = path.with_extension("jsonl.bak");
2231    if !backup_path.exists() {
2232        std::fs::copy(path, &backup_path)?;
2233    }
2234
2235    // Write the trimmed file (trailing newline prevents concatenation)
2236    std::fs::write(path, format!("{}\n", trimmed_content))?;
2237
2238    Ok((original_count, keep, original_size, new_size))
2239}
2240
2241/// Strip bloated content from a session entry to reduce file size.
2242///
2243/// VS Code sessions accumulate large metadata that isn't needed for session display:
2244/// - `result.metadata`: Can be 100KB-1.5MB per request (Copilot internal state)
2245/// - `editedFileEvents`: Redundant file edit tracking
2246/// - `chatEdits`: File edit diffs
2247/// - `textEditGroup` response items: 80-120KB each with full file diffs
2248/// - `thinking` response items: Model thinking tokens (can be 400+ per request)
2249/// - `toolInvocationSerialized`: Tool call metadata (usually already stripped by compact)
2250/// - `toolSpecificData`: Duplicate data in tool invocations
2251///
2252/// This function strips or truncates all of these while preserving the conversation
2253/// content (markdownContent responses and user messages).
2254fn strip_bloated_content(entry: &mut serde_json::Value) {
2255    let requests = match entry
2256        .get_mut("v")
2257        .and_then(|v| v.get_mut("requests"))
2258        .and_then(|r| r.as_array_mut())
2259    {
2260        Some(r) => r,
2261        None => return,
2262    };
2263
2264    for req in requests.iter_mut() {
2265        let obj = match req.as_object_mut() {
2266            Some(o) => o,
2267            None => continue,
2268        };
2269
2270        // Strip result.metadata (100KB-1.5MB per request)
2271        if let Some(result) = obj.get_mut("result") {
2272            if let Some(result_obj) = result.as_object_mut() {
2273                if let Some(meta) = result_obj.get("metadata") {
2274                    let meta_str = serde_json::to_string(meta).unwrap_or_default();
2275                    if meta_str.len() > 1000 {
2276                        result_obj.insert(
2277                            "metadata".to_string(),
2278                            serde_json::Value::Object(serde_json::Map::new()),
2279                        );
2280                    }
2281                }
2282            }
2283        }
2284
2285        // Strip editedFileEvents
2286        obj.remove("editedFileEvents");
2287
2288        // Strip chatEdits
2289        obj.remove("chatEdits");
2290
2291        // Truncate contentReferences to max 3
2292        if let Some(refs) = obj.get_mut("contentReferences") {
2293            if let Some(arr) = refs.as_array_mut() {
2294                if arr.len() > 3 {
2295                    arr.truncate(3);
2296                }
2297            }
2298        }
2299
2300        // Process response items
2301        if let Some(response) = obj.get_mut("response") {
2302            if let Some(resp_arr) = response.as_array_mut() {
2303                // Remove non-essential response kinds
2304                resp_arr.retain(|r| {
2305                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
2306                    !matches!(
2307                        kind,
2308                        "toolInvocationSerialized"
2309                            | "progressMessage"
2310                            | "confirmationWidget"
2311                            | "codeblockUri"
2312                            | "progressTaskSerialized"
2313                            | "undoStop"
2314                            | "mcpServersStarting"
2315                            | "confirmation"
2316                    )
2317                });
2318
2319                // Truncate textEditGroup items (strip edit diffs, keep URI ref)
2320                for r in resp_arr.iter_mut() {
2321                    let kind = r
2322                        .get("kind")
2323                        .and_then(|k| k.as_str())
2324                        .unwrap_or("")
2325                        .to_string();
2326
2327                    if kind == "textEditGroup" {
2328                        if let Some(edits) = r.get_mut("edits") {
2329                            if let Some(arr) = edits.as_array_mut() {
2330                                if serde_json::to_string(arr).unwrap_or_default().len() > 2000 {
2331                                    arr.clear();
2332                                }
2333                            }
2334                        }
2335                    }
2336
2337                    // Truncate thinking tokens
2338                    if kind == "thinking" {
2339                        if let Some(val) = r.get_mut("value") {
2340                            if let Some(s) = val.as_str() {
2341                                if s.len() > 500 {
2342                                    *val = serde_json::Value::String(format!(
2343                                        "{}... [truncated]",
2344                                        &s[..500]
2345                                    ));
2346                                }
2347                            }
2348                        }
2349                        if let Some(thought) = r.get_mut("thought") {
2350                            if let Some(thought_val) = thought.get_mut("value") {
2351                                if let Some(s) = thought_val.as_str() {
2352                                    if s.len() > 500 {
2353                                        *thought_val = serde_json::Value::String(format!(
2354                                            "{}... [truncated]",
2355                                            &s[..500]
2356                                        ));
2357                                    }
2358                                }
2359                            }
2360                        }
2361                    }
2362
2363                    // Truncate large markdownContent
2364                    if kind == "markdownContent" {
2365                        if let Some(content) = r.get_mut("content") {
2366                            if let Some(val) = content.get_mut("value") {
2367                                if let Some(s) = val.as_str() {
2368                                    if s.len() > 20000 {
2369                                        *val = serde_json::Value::String(format!(
2370                                            "{}\n\n---\n*[Chasm: Content truncated for loading performance]*",
2371                                            &s[..20000]
2372                                        ));
2373                                    }
2374                                }
2375                            }
2376                        }
2377                    }
2378                }
2379
2380                // Limit thinking items to last 5 per request
2381                let mut thinking_count = 0;
2382                let mut indices_to_remove = Vec::new();
2383                for (i, r) in resp_arr.iter().enumerate().rev() {
2384                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
2385                    if kind == "thinking" {
2386                        thinking_count += 1;
2387                        if thinking_count > 5 {
2388                            indices_to_remove.push(i);
2389                        }
2390                    }
2391                }
2392                for idx in indices_to_remove {
2393                    resp_arr.remove(idx);
2394                }
2395
2396                // Strip toolSpecificData from any remaining tool invocations
2397                for r in resp_arr.iter_mut() {
2398                    if let Some(obj) = r.as_object_mut() {
2399                        obj.remove("toolSpecificData");
2400                    }
2401                }
2402
2403                // Fix response items missing `kind` field — wrap raw MarkdownString
2404                // objects as proper markdownContent response items.
2405                // VS Code sometimes serializes MarkdownString directly instead of
2406                // wrapping it in { kind: "markdownContent", content: MarkdownString }.
2407                // Without the `kind` discriminator, VS Code's deserializer fails.
2408                let fixed: Vec<serde_json::Value> = resp_arr
2409                    .drain(..)
2410                    .map(|item| {
2411                        if item.get("kind").is_none() {
2412                            // Check if it looks like a MarkdownString (has `value` or `supportHtml`)
2413                            if item.get("value").is_some() || item.get("supportHtml").is_some() {
2414                                serde_json::json!({
2415                                    "kind": "markdownContent",
2416                                    "content": item
2417                                })
2418                            } else {
2419                                item
2420                            }
2421                        } else {
2422                            item
2423                        }
2424                    })
2425                    .collect();
2426                *resp_arr = fixed;
2427            }
2428        }
2429    }
2430}
2431
2432/// Split concatenated JSON objects in JSONL content that lack newline separators.
2433///
2434/// VS Code sometimes appends delta operations (kind:1, kind:2) onto the end of
2435/// a JSONL line without inserting a newline first. This produces invalid JSONL like:
2436///   `{"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}`
2437///
2438/// This function inserts newlines at every `}{"kind":` boundary to restore valid JSONL.
2439/// The pattern `}{"kind":` cannot appear inside JSON string values because `{"kind":`
2440/// would need to be escaped as `{\"kind\":` within a JSON string.
2441pub fn split_concatenated_jsonl(content: &str) -> String {
2442    // Fast path: if content has no concatenated objects, return as-is
2443    if !content.contains("}{\"kind\":") {
2444        return content.to_string();
2445    }
2446
2447    content.replace("}{\"kind\":", "}\n{\"kind\":")
2448}
2449
2450/// Apply a delta update (kind:1) to a JSON value at the given key path.
2451fn apply_delta(root: &mut serde_json::Value, keys: &[serde_json::Value], value: serde_json::Value) {
2452    if keys.is_empty() {
2453        return;
2454    }
2455
2456    // Navigate to the parent
2457    let mut current = root;
2458    for key in &keys[..keys.len() - 1] {
2459        if let Some(k) = key.as_str() {
2460            if !current.get(k).is_some() {
2461                current[k] = serde_json::Value::Object(serde_json::Map::new());
2462            }
2463            current = &mut current[k];
2464        } else if let Some(idx) = key.as_u64() {
2465            if let Some(arr) = current.as_array_mut() {
2466                if (idx as usize) < arr.len() {
2467                    current = &mut arr[idx as usize];
2468                } else {
2469                    return; // Index out of bounds
2470                }
2471            } else {
2472                return;
2473            }
2474        }
2475    }
2476
2477    // Set the final key
2478    if let Some(last_key) = keys.last() {
2479        if let Some(k) = last_key.as_str() {
2480            current[k] = value;
2481        } else if let Some(idx) = last_key.as_u64() {
2482            if let Some(arr) = current.as_array_mut() {
2483                if (idx as usize) < arr.len() {
2484                    arr[idx as usize] = value;
2485                }
2486            }
2487        }
2488    }
2489}
2490
2491/// Apply an array replace/splice operation (kind:2) to a JSON value at the given key path.
2492/// When `splice_index` is `Some(i)`, truncates the target array at index `i` before extending.
2493/// When `splice_index` is `None`, replaces the entire array with the new items.
2494fn apply_splice(
2495    root: &mut serde_json::Value,
2496    keys: &[serde_json::Value],
2497    items: serde_json::Value,
2498    splice_index: Option<usize>,
2499) {
2500    if keys.is_empty() {
2501        return;
2502    }
2503
2504    // Navigate to the target array
2505    let mut current = root;
2506    for key in keys {
2507        if let Some(k) = key.as_str() {
2508            if !current.get(k).is_some() {
2509                current[k] = serde_json::json!([]);
2510            }
2511            current = &mut current[k];
2512        } else if let Some(idx) = key.as_u64() {
2513            if let Some(arr) = current.as_array_mut() {
2514                if (idx as usize) < arr.len() {
2515                    current = &mut arr[idx as usize];
2516                } else {
2517                    return;
2518                }
2519            } else {
2520                return;
2521            }
2522        }
2523    }
2524
2525    // Splice or replace items in the target array
2526    if let Some(target_arr) = current.as_array_mut() {
2527        if let Some(idx) = splice_index {
2528            // Splice: truncate at index, then extend with new items
2529            target_arr.truncate(idx);
2530        } else {
2531            // Full replacement: clear the array
2532            target_arr.clear();
2533        }
2534        if let Some(new_items) = items.as_array() {
2535            target_arr.extend(new_items.iter().cloned());
2536        }
2537    }
2538}
2539
2540/// Ensure a JSONL `kind:0` snapshot's `v` object has all fields required by
2541/// VS Code's latest session format (1.109.0+ / version 3). Missing fields are
2542/// injected with sensible defaults so sessions load reliably after recovery,
2543/// conversion, or compaction.
2544///
2545/// Required fields that VS Code now expects:
2546/// - `version` (u32, default 3)
2547/// - `sessionId` (string, extracted from filename or generated)
2548/// - `responderUsername` (string, default "GitHub Copilot")
2549/// - `hasPendingEdits` (bool, default false)
2550/// - `pendingRequests` (array, default [])
2551/// - `inputState` (object with mode, attachments, etc.)
2552pub fn ensure_vscode_compat_fields(state: &mut serde_json::Value, session_id: Option<&str>) {
2553    // Migrate old-format inputState (top-level attachments/mode/etc.) to nested object.
2554    // Must run BEFORE the inputState existence check below.
2555    migrate_old_input_state(state);
2556
2557    if let Some(obj) = state.as_object_mut() {
2558        // version
2559        if !obj.contains_key("version") {
2560            obj.insert("version".to_string(), serde_json::json!(3));
2561        }
2562
2563        // sessionId — use provided ID, or try to read from existing field
2564        if !obj.contains_key("sessionId") {
2565            if let Some(id) = session_id {
2566                obj.insert("sessionId".to_string(), serde_json::json!(id));
2567            }
2568        }
2569
2570        // responderUsername
2571        if !obj.contains_key("responderUsername") {
2572            obj.insert(
2573                "responderUsername".to_string(),
2574                serde_json::json!("GitHub Copilot"),
2575            );
2576        }
2577
2578        // hasPendingEdits — ALWAYS force to false for recovered/compacted sessions.
2579        // Sessions with hasPendingEdits:true cause VS Code to attempt restoring
2580        // stale file edits on load, which fails if files have changed since the
2581        // original session, preventing the session from loading entirely.
2582        obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
2583
2584        // pendingRequests — ALWAYS force to empty for recovered/compacted sessions.
2585        // Stale pending requests can also block session loading.
2586        obj.insert("pendingRequests".to_string(), serde_json::json!([]));
2587
2588        // inputState — VS Code expects this to exist with at least mode + attachments
2589        if !obj.contains_key("inputState") {
2590            obj.insert(
2591                "inputState".to_string(),
2592                serde_json::json!({
2593                    "attachments": [],
2594                    "mode": { "id": "agent", "kind": "agent" },
2595                    "inputText": "",
2596                    "selections": [],
2597                    "contrib": { "chatDynamicVariableModel": [] }
2598                }),
2599            );
2600        }
2601    }
2602}
2603
2604/// Detect whether a legacy .json file is a "skeleton" — corrupted to contain only
2605/// structural characters ({}, [], commas, colons, whitespace) with all actual data stripped.
2606/// These files parse as valid JSON but contain no useful session content.
2607pub fn is_skeleton_json(content: &str) -> bool {
2608    // Must be non-trivial size to be a skeleton (tiny files might just be empty sessions)
2609    if content.len() < 100 {
2610        return false;
2611    }
2612
2613    // Count structural vs data characters
2614    let structural_chars: usize = content
2615        .chars()
2616        .filter(|c| {
2617            matches!(
2618                c,
2619                '{' | '}' | '[' | ']' | ',' | ':' | ' ' | '\n' | '\r' | '\t' | '"'
2620            )
2621        })
2622        .count();
2623
2624    let total_chars = content.len();
2625    let structural_ratio = structural_chars as f64 / total_chars as f64;
2626
2627    // A skeleton file is >80% structural characters. Normal sessions have lots of
2628    // text content (messages, code, etc.) so the ratio is much lower.
2629    if structural_ratio < 0.80 {
2630        return false;
2631    }
2632
2633    // Additionally verify: parse as JSON and check that requests array is empty or
2634    // contains only empty objects
2635    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
2636        // Check if requests exist and are all empty
2637        if let Some(requests) = parsed.get("requests").and_then(|r| r.as_array()) {
2638            let all_empty = requests.iter().all(|req| {
2639                // A skeleton request has no "message" text or empty message content
2640                let msg = req
2641                    .get("message")
2642                    .and_then(|m| m.get("text"))
2643                    .and_then(|t| t.as_str());
2644                msg.map_or(true, |s| s.is_empty())
2645            });
2646            return all_empty;
2647        }
2648        // No requests array at all — also skeleton-like
2649        return true;
2650    }
2651
2652    // Couldn't parse but high structural ratio — still likely skeleton
2653    structural_ratio > 0.85
2654}
2655
2656/// Convert a skeleton .json file to a valid minimal .jsonl file.
2657/// Preserves title and timestamp from the index entry if available.
2658/// The original .json file is renamed to `.json.corrupt` (non-destructive).
2659/// Returns the path to the new .jsonl file, or None if conversion was skipped.
2660pub fn convert_skeleton_json_to_jsonl(
2661    json_path: &Path,
2662    title: Option<&str>,
2663    last_message_date: Option<i64>,
2664) -> Result<Option<PathBuf>> {
2665    let content = std::fs::read_to_string(json_path)
2666        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2667
2668    if !is_skeleton_json(&content) {
2669        return Ok(None);
2670    }
2671
2672    let session_id = json_path
2673        .file_stem()
2674        .and_then(|s| s.to_str())
2675        .unwrap_or("unknown")
2676        .to_string();
2677
2678    let title = title.unwrap_or("Recovered Session");
2679    let now = std::time::SystemTime::now()
2680        .duration_since(std::time::UNIX_EPOCH)
2681        .unwrap_or_default()
2682        .as_millis() as i64;
2683    let timestamp = last_message_date.unwrap_or(now);
2684
2685    // Build a valid minimal kind:0 JSONL entry
2686    let jsonl_entry = serde_json::json!({
2687        "kind": 0,
2688        "v": {
2689            "sessionId": session_id,
2690            "title": title,
2691            "lastMessageDate": timestamp,
2692            "requests": [],
2693            "version": 4,
2694            "hasPendingEdits": false,
2695            "pendingRequests": [],
2696            "inputState": {
2697                "attachments": [],
2698                "mode": { "id": "agent", "kind": "agent" },
2699                "inputText": "",
2700                "selections": [],
2701                "contrib": { "chatDynamicVariableModel": [] }
2702            },
2703            "responderUsername": "GitHub Copilot",
2704            "isImported": false,
2705            "initialLocation": "panel"
2706        }
2707    });
2708
2709    let jsonl_path = json_path.with_extension("jsonl");
2710    let corrupt_path = json_path.with_extension("json.corrupt");
2711
2712    // Don't overwrite an existing .jsonl
2713    if jsonl_path.exists() {
2714        // Just rename the skeleton to .corrupt
2715        std::fs::rename(json_path, &corrupt_path)?;
2716        return Ok(None);
2717    }
2718
2719    // Write the new .jsonl file
2720    std::fs::write(
2721        &jsonl_path,
2722        serde_json::to_string(&jsonl_entry)
2723            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?,
2724    )?;
2725
2726    // Rename original to .json.corrupt (non-destructive)
2727    std::fs::rename(json_path, &corrupt_path)?;
2728
2729    Ok(Some(jsonl_path))
2730}
2731
2732/// Fix cancelled `modelState` values in a compacted (single-line) JSONL session file.
2733///
2734/// VS Code determines `lastResponseState` from the file content, not the index.
2735/// If the last request's `modelState.value` is `2` (Cancelled) or missing entirely,
2736/// VS Code refuses to load the session. This function:
2737/// 1. Finds the last request in the `requests` array
2738/// 2. If `modelState.value` is `2` (Cancelled), changes it to `1` (Complete)
2739/// 3. If `modelState` is missing entirely, adds `{"value":1,"completedAt":<now>}`
2740///
2741/// Returns `true` if the file was modified.
2742pub fn fix_cancelled_model_state(path: &Path) -> Result<bool> {
2743    let content = std::fs::read_to_string(path)
2744        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2745
2746    let lines: Vec<&str> = content.lines().collect();
2747
2748    if lines.is_empty() {
2749        return Ok(false);
2750    }
2751
2752    // For multi-line JSONL, we need to scan all lines to find the LAST modelState
2753    // delta for the highest request index. For single-line (compacted), we modify
2754    // the kind:0 snapshot directly.
2755    if lines.len() == 1 {
2756        // Compacted single-line JSONL: modify the kind:0 snapshot
2757        let mut entry: serde_json::Value = serde_json::from_str(lines[0].trim())
2758            .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))?;
2759
2760        let is_kind_0 = entry
2761            .get("kind")
2762            .and_then(|k| k.as_u64())
2763            .map(|k| k == 0)
2764            .unwrap_or(false);
2765
2766        if !is_kind_0 {
2767            return Ok(false);
2768        }
2769
2770        let requests = match entry
2771            .get_mut("v")
2772            .and_then(|v| v.get_mut("requests"))
2773            .and_then(|r| r.as_array_mut())
2774        {
2775            Some(r) if !r.is_empty() => r,
2776            _ => return Ok(false),
2777        };
2778
2779        let last_req = requests.last_mut().unwrap();
2780        let model_state = last_req.get("modelState");
2781
2782        let needs_fix = match model_state {
2783            Some(ms) => {
2784                // Any value other than 1 (Complete) needs repair:
2785                // 0 = NotStarted/Unknown, 2 = Cancelled, 4 = InProgress
2786                ms.get("value").and_then(|v| v.as_u64()) != Some(1)
2787            }
2788            None => true, // Missing modelState = never completed
2789        };
2790
2791        if !needs_fix {
2792            return Ok(false);
2793        }
2794
2795        let now = std::time::SystemTime::now()
2796            .duration_since(std::time::UNIX_EPOCH)
2797            .unwrap_or_default()
2798            .as_millis() as u64;
2799
2800        last_req.as_object_mut().unwrap().insert(
2801            "modelState".to_string(),
2802            serde_json::json!({"value": 1, "completedAt": now}),
2803        );
2804
2805        let patched = serde_json::to_string(&entry)
2806            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?;
2807        // Trailing newline prevents concatenation if VS Code appends deltas
2808        std::fs::write(path, format!("{}\n", patched))?;
2809        return Ok(true);
2810    }
2811
2812    // Multi-line JSONL: find the highest request index referenced across all lines,
2813    // then check if the last modelState delta for that index has value=2 or is missing.
2814    // If so, append a corrective delta.
2815    let mut highest_req_idx: Option<usize> = None;
2816    let mut last_model_state_value: Option<u64> = None;
2817
2818    // Check kind:0 snapshot for request count
2819    if let Ok(first_entry) = serde_json::from_str::<serde_json::Value>(lines[0].trim()) {
2820        if let Some(requests) = first_entry
2821            .get("v")
2822            .and_then(|v| v.get("requests"))
2823            .and_then(|r| r.as_array())
2824        {
2825            if !requests.is_empty() {
2826                let last_idx = requests.len() - 1;
2827                highest_req_idx = Some(last_idx);
2828                // Check modelState in the snapshot's last request
2829                if let Some(ms) = requests[last_idx].get("modelState") {
2830                    last_model_state_value = ms.get("value").and_then(|v| v.as_u64());
2831                }
2832            }
2833        }
2834    }
2835
2836    // Scan deltas for higher request indices and modelState updates
2837    static REQ_IDX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#""k":\["requests",(\d+)"#).unwrap());
2838
2839    for line in &lines[1..] {
2840        if let Some(caps) = REQ_IDX_RE.captures(line) {
2841            if let Ok(idx) = caps[1].parse::<usize>() {
2842                if highest_req_idx.is_none() || idx > highest_req_idx.unwrap() {
2843                    highest_req_idx = Some(idx);
2844                    last_model_state_value = None; // Reset for new highest
2845                }
2846                // Track modelState for the highest request index
2847                if Some(idx) == highest_req_idx && line.contains("\"modelState\"") {
2848                    if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line.trim()) {
2849                        last_model_state_value = entry
2850                            .get("v")
2851                            .and_then(|v| v.get("value"))
2852                            .and_then(|v| v.as_u64());
2853                    }
2854                }
2855            }
2856        }
2857    }
2858
2859    let req_idx = match highest_req_idx {
2860        Some(idx) => idx,
2861        None => return Ok(false),
2862    };
2863
2864    let needs_fix = match last_model_state_value {
2865        Some(1) => false, // Already complete
2866        _ => true,        // 0=NotStarted, 2=Cancelled, 4=InProgress, None=missing
2867    };
2868
2869    if !needs_fix {
2870        return Ok(false);
2871    }
2872
2873    let now = std::time::SystemTime::now()
2874        .duration_since(std::time::UNIX_EPOCH)
2875        .unwrap_or_default()
2876        .as_millis() as u64;
2877
2878    let fix_delta = format!(
2879        "\n{{\"kind\":1,\"k\":[\"requests\",{},\"modelState\"],\"v\":{{\"value\":1,\"completedAt\":{}}}}}",
2880        req_idx, now
2881    );
2882
2883    use std::io::Write;
2884    let mut file = std::fs::OpenOptions::new().append(true).open(path)?;
2885    file.write_all(fix_delta.as_bytes())?;
2886
2887    Ok(true)
2888}
2889
2890/// Repair workspace sessions: compact large JSONL files and fix the index.
2891/// Returns (compacted_count, index_fixed_count).
2892pub fn repair_workspace_sessions(
2893    workspace_id: &str,
2894    chat_sessions_dir: &Path,
2895    force: bool,
2896) -> Result<(usize, usize)> {
2897    let db_path = get_workspace_storage_db(workspace_id)?;
2898
2899    if !db_path.exists() {
2900        return Err(CsmError::WorkspaceNotFound(format!(
2901            "Database not found: {}",
2902            db_path.display()
2903        )));
2904    }
2905
2906    if !force && is_vscode_running() {
2907        return Err(CsmError::VSCodeRunning);
2908    }
2909
2910    let mut compacted = 0;
2911    let mut fields_fixed = 0;
2912
2913    if chat_sessions_dir.exists() {
2914        // Pass 0.5: Recover from .json.bak when .jsonl has fewer requests
2915        match recover_from_json_bak(chat_sessions_dir) {
2916            Ok(n) if n > 0 => {
2917                println!("   [OK] Recovered {} session(s) from .json.bak backups", n);
2918            }
2919            _ => {}
2920        }
2921
2922        // Pass 1: Compact large JSONL files and fix missing fields
2923        for entry in std::fs::read_dir(chat_sessions_dir)? {
2924            let entry = entry?;
2925            let path = entry.path();
2926            if path.extension().is_some_and(|e| e == "jsonl") {
2927                let metadata = std::fs::metadata(&path)?;
2928                let size_mb = metadata.len() / (1024 * 1024);
2929
2930                let raw_content = std::fs::read_to_string(&path)
2931                    .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2932
2933                // Pre-process: split concatenated JSON objects that lack newline
2934                // separators. VS Code sometimes appends delta ops to line 0 without
2935                // a \n, producing: {"kind":0,...}{"kind":1,...}
2936                // If splitting changes the content, rewrite the file first.
2937                let content = split_concatenated_jsonl(&raw_content);
2938                if content != raw_content {
2939                    std::fs::write(&path, content.as_bytes())?;
2940                    let stem = path
2941                        .file_stem()
2942                        .map(|s| s.to_string_lossy().to_string())
2943                        .unwrap_or_default();
2944                    println!("   [OK] Fixed concatenated JSONL objects: {}", stem);
2945                }
2946                let line_count = content.lines().count();
2947
2948                if line_count > 1 {
2949                    // Compact multi-line JSONL (has operations to replay)
2950                    let stem = path
2951                        .file_stem()
2952                        .map(|s| s.to_string_lossy().to_string())
2953                        .unwrap_or_default();
2954                    println!(
2955                        "   Compacting {} ({} lines, {}MB)...",
2956                        stem, line_count, size_mb
2957                    );
2958
2959                    match compact_session_jsonl(&path) {
2960                        Ok(backup_path) => {
2961                            let new_size = std::fs::metadata(&path)
2962                                .map(|m| m.len() / (1024 * 1024))
2963                                .unwrap_or(0);
2964                            println!(
2965                                "   [OK] Compacted: {}MB -> {}MB (backup: {})",
2966                                size_mb,
2967                                new_size,
2968                                backup_path
2969                                    .file_name()
2970                                    .unwrap_or_default()
2971                                    .to_string_lossy()
2972                            );
2973                            compacted += 1;
2974                        }
2975                        Err(e) => {
2976                            println!("   [WARN] Failed to compact {}: {}", stem, e);
2977                        }
2978                    }
2979                } else {
2980                    // Single-line JSONL — check for missing VS Code fields
2981                    if let Some(first_line) = content.lines().next() {
2982                        if let Ok(mut obj) = serde_json::from_str::<serde_json::Value>(first_line) {
2983                            let is_kind_0 = obj
2984                                .get("kind")
2985                                .and_then(|k| k.as_u64())
2986                                .map(|k| k == 0)
2987                                .unwrap_or(false);
2988
2989                            if is_kind_0 {
2990                                if let Some(v) = obj.get("v") {
2991                                    // Check if fields are missing OR have wrong values.
2992                                    // hasPendingEdits must be false — true prevents session loading
2993                                    // because VS Code tries to restore stale file edits that fail.
2994                                    let needs_fix = !v.get("inputState").is_some()
2995                                        || !v.get("sessionId").is_some()
2996                                        || v.get("hasPendingEdits")
2997                                            .and_then(|v| v.as_bool())
2998                                            .unwrap_or(true)
2999                                            != false
3000                                        || v.get("pendingRequests")
3001                                            .and_then(|v| v.as_array())
3002                                            .map(|a| !a.is_empty())
3003                                            .unwrap_or(true);
3004
3005                                    if needs_fix {
3006                                        let session_id = path
3007                                            .file_stem()
3008                                            .and_then(|s| s.to_str())
3009                                            .map(|s| s.to_string());
3010                                        if let Some(v_mut) = obj.get_mut("v") {
3011                                            ensure_vscode_compat_fields(
3012                                                v_mut,
3013                                                session_id.as_deref(),
3014                                            );
3015                                        }
3016                                        let patched = serde_json::to_string(&obj).map_err(|e| {
3017                                            CsmError::InvalidSessionFormat(format!(
3018                                                "Failed to serialize: {}",
3019                                                e
3020                                            ))
3021                                        })?;
3022                                        // Trailing newline prevents concatenation
3023                                        std::fs::write(&path, format!("{}\n", patched))?;
3024                                        let stem = path
3025                                            .file_stem()
3026                                            .map(|s| s.to_string_lossy().to_string())
3027                                            .unwrap_or_default();
3028                                        println!("   [OK] Fixed VS Code compat fields: {}", stem);
3029                                        fields_fixed += 1;
3030                                    } else if !content.ends_with('\n') {
3031                                        // All compat fields correct but missing trailing newline
3032                                        std::fs::write(&path, format!("{}\n", first_line))?;
3033                                        let stem = path
3034                                            .file_stem()
3035                                            .map(|s| s.to_string_lossy().to_string())
3036                                            .unwrap_or_default();
3037                                        println!(
3038                                            "   [OK] Fixed missing trailing newline: {}",
3039                                            stem
3040                                        );
3041                                    }
3042                                }
3043                            }
3044                        }
3045                    }
3046                }
3047            }
3048        }
3049    }
3050
3051    // Pass 1.5: Convert skeleton .json files to valid .jsonl.
3052    // Skeleton files are legacy .json files where all data has been stripped,
3053    // leaving only structural characters ({}, [], whitespace). We convert them
3054    // to valid minimal .jsonl, preserving title/timestamp from the index,
3055    // and rename the original to .json.corrupt (non-destructive).
3056    let mut skeletons_converted = 0;
3057    if chat_sessions_dir.exists() {
3058        // Read current index to get titles/timestamps for converted sessions
3059        let index_entries: std::collections::HashMap<String, (String, Option<i64>)> =
3060            if let Ok(index) = read_chat_session_index(&db_path) {
3061                index
3062                    .entries
3063                    .iter()
3064                    .map(|(id, e)| (id.clone(), (e.title.clone(), Some(e.last_message_date))))
3065                    .collect()
3066            } else {
3067                std::collections::HashMap::new()
3068            };
3069
3070        // Collect .json files that don't have a corresponding .jsonl
3071        let mut jsonl_stems: HashSet<String> = HashSet::new();
3072        for entry in std::fs::read_dir(chat_sessions_dir)? {
3073            let entry = entry?;
3074            let path = entry.path();
3075            if path.extension().is_some_and(|e| e == "jsonl") {
3076                if let Some(stem) = path.file_stem() {
3077                    jsonl_stems.insert(stem.to_string_lossy().to_string());
3078                }
3079            }
3080        }
3081
3082        for entry in std::fs::read_dir(chat_sessions_dir)? {
3083            let entry = entry?;
3084            let path = entry.path();
3085            if path.extension().is_some_and(|e| e == "json")
3086                && !path.to_string_lossy().ends_with(".bak")
3087                && !path.to_string_lossy().ends_with(".corrupt")
3088            {
3089                let stem = path
3090                    .file_stem()
3091                    .map(|s| s.to_string_lossy().to_string())
3092                    .unwrap_or_default();
3093
3094                // Skip if .jsonl already exists
3095                if jsonl_stems.contains(&stem) {
3096                    continue;
3097                }
3098
3099                let (title, timestamp) = index_entries
3100                    .get(&stem)
3101                    .map(|(t, ts)| (t.as_str(), *ts))
3102                    .unwrap_or(("Recovered Session", None));
3103
3104                match convert_skeleton_json_to_jsonl(&path, Some(title), timestamp) {
3105                    Ok(Some(jsonl_path)) => {
3106                        println!(
3107                            "   [OK] Converted skeleton .json → .jsonl: {} (\"{}\")",
3108                            stem, title
3109                        );
3110                        // Track the new .jsonl so subsequent passes process it
3111                        jsonl_stems.insert(stem);
3112                        skeletons_converted += 1;
3113                        let _ = jsonl_path; // used implicitly via jsonl_stems
3114                    }
3115                    Ok(None) => {} // Not a skeleton or skipped
3116                    Err(e) => {
3117                        println!("   [WARN] Failed to convert skeleton {}: {}", stem, e);
3118                    }
3119                }
3120            }
3121        }
3122    }
3123
3124    // Pass 2: Fix cancelled modelState in all JSONL files.
3125    // VS Code reads modelState from file content (not the index) to determine
3126    // lastResponseState. If the last request has modelState.value=2 (Cancelled)
3127    // or is missing entirely, VS Code refuses to load the session.
3128    let mut cancelled_fixed = 0;
3129    if chat_sessions_dir.exists() {
3130        for entry in std::fs::read_dir(chat_sessions_dir)? {
3131            let entry = entry?;
3132            let path = entry.path();
3133            if path.extension().is_some_and(|e| e == "jsonl") {
3134                match fix_cancelled_model_state(&path) {
3135                    Ok(true) => {
3136                        let stem = path
3137                            .file_stem()
3138                            .map(|s| s.to_string_lossy().to_string())
3139                            .unwrap_or_default();
3140                        println!("   [OK] Fixed cancelled modelState: {}", stem);
3141                        cancelled_fixed += 1;
3142                    }
3143                    Ok(false) => {} // No fix needed
3144                    Err(e) => {
3145                        let stem = path
3146                            .file_stem()
3147                            .map(|s| s.to_string_lossy().to_string())
3148                            .unwrap_or_default();
3149                        println!("   [WARN] Failed to fix modelState for {}: {}", stem, e);
3150                    }
3151                }
3152            }
3153        }
3154    }
3155
3156    // Pass 3: Rebuild the index with correct metadata
3157    let (index_fixed, _) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
3158
3159    if fields_fixed > 0 {
3160        println!(
3161            "   [OK] Injected missing VS Code fields into {} session(s)",
3162            fields_fixed
3163        );
3164    }
3165    if skeletons_converted > 0 {
3166        println!(
3167            "   [OK] Converted {} skeleton .json file(s) to .jsonl",
3168            skeletons_converted
3169        );
3170    }
3171    if cancelled_fixed > 0 {
3172        println!(
3173            "   [OK] Fixed cancelled modelState in {} session(s)",
3174            cancelled_fixed
3175        );
3176    }
3177
3178    Ok((compacted, index_fixed))
3179}