Skip to main content

chasm/
storage.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! VS Code storage (SQLite database) operations
4
5use crate::error::{CsmError, Result};
6use crate::models::{
7    ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry, ChatSessionTiming,
8};
9use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
10use once_cell::sync::Lazy;
11use regex::Regex;
12use rusqlite::Connection;
13use std::collections::HashSet;
14use std::path::{Path, PathBuf};
15use sysinfo::System;
16
17/// A single issue detected during workspace session diagnostics
18#[derive(Debug, Clone)]
19pub struct SessionIssue {
20    /// The session file stem (UUID)
21    pub session_id: String,
22    /// Category of issue
23    pub kind: SessionIssueKind,
24    /// Human-readable description
25    pub detail: String,
26}
27
28/// Categories of session issues that can be detected and auto-fixed
29#[derive(Debug, Clone, PartialEq)]
30pub enum SessionIssueKind {
31    /// JSONL file has multiple lines (operations not compacted)
32    MultiLineJsonl,
33    /// JSONL first line contains concatenated JSON objects (missing newlines)
34    ConcatenatedJsonl,
35    /// Index entry has lastResponseState = 2 (Cancelled), blocks VS Code loading
36    CancelledState,
37    /// Last request's modelState.value is 2 (Cancelled) or missing in file content
38    CancelledModelState,
39    /// File exists on disk but is not in the VS Code index
40    OrphanedSession,
41    /// Index entry references a file that no longer exists on disk
42    StaleIndexEntry,
43    /// Session is missing required VS Code compat fields
44    MissingCompatFields,
45    /// Both .json and .jsonl exist for the same session ID
46    DuplicateFormat,
47    /// Legacy .json file is corrupted — contains only structural chars ({}, whitespace)
48    SkeletonJson,
49}
50
51impl std::fmt::Display for SessionIssueKind {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        match self {
54            SessionIssueKind::MultiLineJsonl => write!(f, "multi-line JSONL"),
55            SessionIssueKind::ConcatenatedJsonl => write!(f, "concatenated JSONL"),
56            SessionIssueKind::CancelledState => write!(f, "cancelled state"),
57            SessionIssueKind::CancelledModelState => write!(f, "cancelled modelState in file"),
58            SessionIssueKind::OrphanedSession => write!(f, "orphaned session"),
59            SessionIssueKind::StaleIndexEntry => write!(f, "stale index entry"),
60            SessionIssueKind::MissingCompatFields => write!(f, "missing compat fields"),
61            SessionIssueKind::DuplicateFormat => write!(f, "duplicate .json/.jsonl"),
62            SessionIssueKind::SkeletonJson => write!(f, "skeleton .json (corrupt)"),
63        }
64    }
65}
66
67/// Summary of issues found in a single workspace
68#[derive(Debug, Clone, Default)]
69pub struct WorkspaceDiagnosis {
70    /// Project path (if known)
71    pub project_path: Option<String>,
72    /// Workspace hash
73    pub workspace_hash: String,
74    /// Total sessions on disk
75    pub sessions_on_disk: usize,
76    /// Total sessions in index
77    pub sessions_in_index: usize,
78    /// All detected issues
79    pub issues: Vec<SessionIssue>,
80}
81
82impl WorkspaceDiagnosis {
83    pub fn is_healthy(&self) -> bool {
84        self.issues.is_empty()
85    }
86
87    pub fn issue_count_by_kind(&self, kind: &SessionIssueKind) -> usize {
88        self.issues.iter().filter(|i| &i.kind == kind).count()
89    }
90}
91
92/// Diagnose a workspace for session issues without modifying anything.
93/// Returns a structured report of all detected problems.
94pub fn diagnose_workspace_sessions(
95    workspace_id: &str,
96    chat_sessions_dir: &Path,
97) -> Result<WorkspaceDiagnosis> {
98    let mut diagnosis = WorkspaceDiagnosis {
99        workspace_hash: workspace_id.to_string(),
100        ..Default::default()
101    };
102
103    if !chat_sessions_dir.exists() {
104        return Ok(diagnosis);
105    }
106
107    // Collect session files on disk
108    let mut jsonl_sessions: HashSet<String> = HashSet::new();
109    let mut json_sessions: HashSet<String> = HashSet::new();
110    let mut all_session_ids: HashSet<String> = HashSet::new();
111
112    for entry in std::fs::read_dir(chat_sessions_dir)? {
113        let entry = entry?;
114        let path = entry.path();
115        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
116        let stem = path
117            .file_stem()
118            .map(|s| s.to_string_lossy().to_string())
119            .unwrap_or_default();
120
121        match ext {
122            "jsonl" => {
123                jsonl_sessions.insert(stem.clone());
124                all_session_ids.insert(stem);
125            }
126            "json" if !path.to_string_lossy().ends_with(".bak") => {
127                json_sessions.insert(stem.clone());
128                all_session_ids.insert(stem);
129            }
130            _ => {}
131        }
132    }
133    diagnosis.sessions_on_disk = all_session_ids.len();
134
135    // Check for duplicate .json/.jsonl files
136    for id in &jsonl_sessions {
137        if json_sessions.contains(id) {
138            diagnosis.issues.push(SessionIssue {
139                session_id: id.clone(),
140                kind: SessionIssueKind::DuplicateFormat,
141                detail: format!("Both {id}.json and {id}.jsonl exist"),
142            });
143        }
144    }
145
146    // Check JSONL files for content issues
147    for id in &jsonl_sessions {
148        let path = chat_sessions_dir.join(format!("{id}.jsonl"));
149        if let Ok(content) = std::fs::read_to_string(&path) {
150            let line_count = content.lines().count();
151
152            if line_count > 1 {
153                let size_mb = content.len() / (1024 * 1024);
154                diagnosis.issues.push(SessionIssue {
155                    session_id: id.clone(),
156                    kind: SessionIssueKind::MultiLineJsonl,
157                    detail: format!("{line_count} lines, ~{size_mb} MB — needs compaction"),
158                });
159            }
160
161            // Check first line for concatenation
162            if let Some(first_line) = content.lines().next() {
163                if first_line.contains("}{\"kind\":") {
164                    diagnosis.issues.push(SessionIssue {
165                        session_id: id.clone(),
166                        kind: SessionIssueKind::ConcatenatedJsonl,
167                        detail: "First line has concatenated JSON objects".to_string(),
168                    });
169                }
170            }
171
172            // Check for missing compat fields (only single-line files worth checking)
173            if line_count == 1 {
174                if let Some(first_line) = content.lines().next() {
175                    if let Ok(obj) = serde_json::from_str::<serde_json::Value>(first_line) {
176                        let is_kind_0 = obj
177                            .get("kind")
178                            .and_then(|k| k.as_u64())
179                            .map(|k| k == 0)
180                            .unwrap_or(false);
181
182                        if is_kind_0 {
183                            if let Some(v) = obj.get("v") {
184                                let missing_fields: Vec<&str> = [
185                                    "hasPendingEdits",
186                                    "pendingRequests",
187                                    "inputState",
188                                    "sessionId",
189                                    "version",
190                                ]
191                                .iter()
192                                .filter(|f| v.get(**f).is_none())
193                                .copied()
194                                .collect();
195
196                                if !missing_fields.is_empty() {
197                                    diagnosis.issues.push(SessionIssue {
198                                        session_id: id.clone(),
199                                        kind: SessionIssueKind::MissingCompatFields,
200                                        detail: format!("Missing: {}", missing_fields.join(", ")),
201                                    });
202                                }
203
204                                // Check for cancelled modelState in file content
205                                if let Some(requests) = v.get("requests").and_then(|r| r.as_array())
206                                {
207                                    if let Some(last_req) = requests.last() {
208                                        let model_state_value = last_req
209                                            .get("modelState")
210                                            .and_then(|ms| ms.get("value"))
211                                            .and_then(|v| v.as_u64());
212                                        match model_state_value {
213                                            Some(2) => {
214                                                diagnosis.issues.push(SessionIssue {
215                                                    session_id: id.clone(),
216                                                    kind: SessionIssueKind::CancelledModelState,
217                                                    detail: "Last request modelState.value=2 (Cancelled) in file content".to_string(),
218                                                });
219                                            }
220                                            None => {
221                                                diagnosis.issues.push(SessionIssue {
222                                                    session_id: id.clone(),
223                                                    kind: SessionIssueKind::CancelledModelState,
224                                                    detail: "Last request missing modelState in file content".to_string(),
225                                                });
226                                            }
227                                            _ => {} // Valid state
228                                        }
229                                    }
230                                }
231                            }
232                        }
233                    }
234                }
235            }
236        }
237    }
238
239    // Check .json files for skeleton corruption
240    for id in &json_sessions {
241        // Skip if a .jsonl already exists (it takes precedence)
242        if jsonl_sessions.contains(id) {
243            continue;
244        }
245        let path = chat_sessions_dir.join(format!("{id}.json"));
246        if let Ok(content) = std::fs::read_to_string(&path) {
247            if is_skeleton_json(&content) {
248                diagnosis.issues.push(SessionIssue {
249                    session_id: id.clone(),
250                    kind: SessionIssueKind::SkeletonJson,
251                    detail: format!(
252                        "Legacy .json is corrupt — only structural chars remain ({} bytes)",
253                        content.len()
254                    ),
255                });
256            }
257        }
258    }
259
260    // Check index for stale entries, orphans, and cancelled state
261    let db_path = get_workspace_storage_db(workspace_id)?;
262    if db_path.exists() {
263        if let Ok(index) = read_chat_session_index(&db_path) {
264            diagnosis.sessions_in_index = index.entries.len();
265
266            // Stale index entries (in index but no file on disk)
267            for (id, _entry) in &index.entries {
268                if !all_session_ids.contains(id) {
269                    diagnosis.issues.push(SessionIssue {
270                        session_id: id.clone(),
271                        kind: SessionIssueKind::StaleIndexEntry,
272                        detail: "In index but no file on disk".to_string(),
273                    });
274                }
275            }
276
277            // Cancelled state entries
278            for (id, entry) in &index.entries {
279                if entry.last_response_state == 2 {
280                    diagnosis.issues.push(SessionIssue {
281                        session_id: id.clone(),
282                        kind: SessionIssueKind::CancelledState,
283                        detail: "lastResponseState=2 (Cancelled) — blocks VS Code loading"
284                            .to_string(),
285                    });
286                }
287            }
288
289            // Orphaned sessions (on disk but not in index)
290            let indexed_ids: HashSet<&String> = index.entries.keys().collect();
291            for id in &all_session_ids {
292                if !indexed_ids.contains(id) {
293                    diagnosis.issues.push(SessionIssue {
294                        session_id: id.clone(),
295                        kind: SessionIssueKind::OrphanedSession,
296                        detail: "File on disk but not in VS Code index".to_string(),
297                    });
298                }
299            }
300        }
301    }
302
303    Ok(diagnosis)
304}
305
306/// Regex to match any Unicode escape sequence (valid or not)
307static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
308
309/// VS Code session format version - helps identify which parsing strategy to use
310#[derive(Debug, Clone, Copy, PartialEq, Eq)]
311pub enum VsCodeSessionFormat {
312    /// Legacy JSON format (VS Code < 1.109.0)
313    /// Single JSON object with ChatSession structure
314    LegacyJson,
315    /// JSONL format (VS Code >= 1.109.0, January 2026+)
316    /// JSON Lines with event sourcing: kind 0 (initial), kind 1 (delta), kind 2 (replace/splice)
317    JsonLines,
318}
319
320/// Session schema version - tracks the internal structure version
321#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
322pub enum SessionSchemaVersion {
323    /// Version 1 - Original format (basic fields)
324    V1 = 1,
325    /// Version 2 - Added more metadata fields
326    V2 = 2,
327    /// Version 3 - Current format with full request/response structure
328    V3 = 3,
329    /// Unknown version
330    Unknown = 0,
331}
332
333impl SessionSchemaVersion {
334    /// Create from version number
335    pub fn from_version(v: u32) -> Self {
336        match v {
337            1 => Self::V1,
338            2 => Self::V2,
339            3 => Self::V3,
340            _ => Self::Unknown,
341        }
342    }
343
344    /// Get version number
345    pub fn version_number(&self) -> u32 {
346        match self {
347            Self::V1 => 1,
348            Self::V2 => 2,
349            Self::V3 => 3,
350            Self::Unknown => 0,
351        }
352    }
353
354    /// Get description
355    pub fn description(&self) -> &'static str {
356        match self {
357            Self::V1 => "v1 (basic)",
358            Self::V2 => "v2 (extended metadata)",
359            Self::V3 => "v3 (full structure)",
360            Self::Unknown => "unknown",
361        }
362    }
363}
364
365impl std::fmt::Display for SessionSchemaVersion {
366    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367        write!(f, "{}", self.description())
368    }
369}
370
371/// Result of session format detection
372#[derive(Debug, Clone)]
373pub struct SessionFormatInfo {
374    /// File format (JSON or JSONL)
375    pub format: VsCodeSessionFormat,
376    /// Schema version detected from content
377    pub schema_version: SessionSchemaVersion,
378    /// Confidence level (0.0 - 1.0)
379    pub confidence: f32,
380    /// Detection method used
381    pub detection_method: &'static str,
382}
383
384impl VsCodeSessionFormat {
385    /// Detect format from file path (by extension)
386    pub fn from_path(path: &Path) -> Self {
387        match path.extension().and_then(|e| e.to_str()) {
388            Some("jsonl") => Self::JsonLines,
389            _ => Self::LegacyJson,
390        }
391    }
392
393    /// Detect format from content by analyzing structure
394    pub fn from_content(content: &str) -> Self {
395        let trimmed = content.trim();
396
397        // JSONL: Multiple lines starting with { or first line has {"kind":
398        if trimmed.starts_with("{\"kind\":") || trimmed.starts_with("{ \"kind\":") {
399            return Self::JsonLines;
400        }
401
402        // Count lines that look like JSON objects
403        let mut json_object_lines = 0;
404        let mut total_non_empty_lines = 0;
405
406        for line in trimmed.lines().take(10) {
407            let line = line.trim();
408            if line.is_empty() {
409                continue;
410            }
411            total_non_empty_lines += 1;
412
413            // Check if line is a JSON object with "kind" field (JSONL marker)
414            if line.starts_with('{') && line.contains("\"kind\"") {
415                json_object_lines += 1;
416            }
417        }
418
419        // If multiple lines look like JSONL entries, it's JSONL
420        if json_object_lines >= 2
421            || (json_object_lines == 1 && total_non_empty_lines == 1 && trimmed.contains("\n{"))
422        {
423            return Self::JsonLines;
424        }
425
426        // Check if it's a single JSON object (legacy format)
427        if trimmed.starts_with('{') && trimmed.ends_with('}') {
428            // Look for ChatSession structure markers
429            if trimmed.contains("\"sessionId\"")
430                || trimmed.contains("\"creationDate\"")
431                || trimmed.contains("\"requests\"")
432            {
433                return Self::LegacyJson;
434            }
435        }
436
437        // Default to legacy JSON if unclear
438        Self::LegacyJson
439    }
440
441    /// Get minimum VS Code version that uses this format
442    pub fn min_vscode_version(&self) -> &'static str {
443        match self {
444            Self::LegacyJson => "1.0.0",
445            Self::JsonLines => "1.109.0",
446        }
447    }
448
449    /// Get human-readable format description
450    pub fn description(&self) -> &'static str {
451        match self {
452            Self::LegacyJson => "Legacy JSON (single object)",
453            Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
454        }
455    }
456
457    /// Get short format name
458    pub fn short_name(&self) -> &'static str {
459        match self {
460            Self::LegacyJson => "json",
461            Self::JsonLines => "jsonl",
462        }
463    }
464}
465
466impl std::fmt::Display for VsCodeSessionFormat {
467    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
468        write!(f, "{}", self.description())
469    }
470}
471
472/// Sanitize JSON content by replacing lone surrogates with replacement character.
473/// VS Code sometimes writes invalid JSON with lone Unicode surrogates (e.g., \udde0).
474fn sanitize_json_unicode(content: &str) -> String {
475    // Process all \uXXXX sequences and fix lone surrogates
476    let mut result = String::with_capacity(content.len());
477    let mut last_end = 0;
478
479    // Collect all matches first to avoid borrowing issues
480    let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
481
482    for (i, mat) in matches.iter().enumerate() {
483        let start = mat.start();
484        let end = mat.end();
485
486        // Add content before this match
487        result.push_str(&content[last_end..start]);
488
489        // Parse the hex value from the match itself (always ASCII \uXXXX)
490        let hex_str = &mat.as_str()[2..]; // Skip the \u prefix
491        if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
492            // Check if it's a high surrogate (D800-DBFF)
493            if (0xD800..=0xDBFF).contains(&code_point) {
494                // Check if next match is immediately following and is a low surrogate
495                let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
496                    // Must be immediately adjacent (no gap)
497                    if next_mat.start() == end {
498                        let next_hex = &next_mat.as_str()[2..];
499                        if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
500                            (0xDC00..=0xDFFF).contains(&next_cp)
501                        } else {
502                            false
503                        }
504                    } else {
505                        false
506                    }
507                } else {
508                    false
509                };
510
511                if is_valid_pair {
512                    // Valid surrogate pair, keep the high surrogate
513                    result.push_str(mat.as_str());
514                } else {
515                    // Lone high surrogate - replace with replacement char
516                    result.push_str("\\uFFFD");
517                }
518            }
519            // Check if it's a low surrogate (DC00-DFFF)
520            else if (0xDC00..=0xDFFF).contains(&code_point) {
521                // Check if previous match was immediately before and was a high surrogate
522                let is_valid_pair = if i > 0 {
523                    if let Some(prev_mat) = matches.get(i - 1) {
524                        // Must be immediately adjacent (no gap)
525                        if prev_mat.end() == start {
526                            let prev_hex = &prev_mat.as_str()[2..];
527                            if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
528                                (0xD800..=0xDBFF).contains(&prev_cp)
529                            } else {
530                                false
531                            }
532                        } else {
533                            false
534                        }
535                    } else {
536                        false
537                    }
538                } else {
539                    false
540                };
541
542                if is_valid_pair {
543                    // Part of valid surrogate pair, keep it
544                    result.push_str(mat.as_str());
545                } else {
546                    // Lone low surrogate - replace with replacement char
547                    result.push_str("\\uFFFD");
548                }
549            }
550            // Normal code point
551            else {
552                result.push_str(mat.as_str());
553            }
554        } else {
555            // Invalid hex - keep as is
556            result.push_str(mat.as_str());
557        }
558        last_end = end;
559    }
560
561    // Add remaining content
562    result.push_str(&content[last_end..]);
563    result
564}
565
566/// Try to parse JSON, sanitizing invalid Unicode if needed
567pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
568    match serde_json::from_str::<ChatSession>(content) {
569        Ok(session) => Ok(session),
570        Err(e) => {
571            // If parsing fails due to Unicode issue, try sanitizing
572            if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
573                let sanitized = sanitize_json_unicode(content);
574                serde_json::from_str::<ChatSession>(&sanitized)
575            } else {
576                Err(e)
577            }
578        }
579    }
580}
581
582/// JSONL entry kinds for VS Code 1.109.0+ session format
583#[derive(Debug, Clone, Copy, PartialEq, Eq)]
584enum JsonlKind {
585    /// Initial session state (kind: 0)
586    Initial = 0,
587    /// Delta update to specific keys (kind: 1)  
588    Delta = 1,
589    /// Array replace/splice operation (kind: 2)
590    /// Optional 'i' field specifies splice index (truncate at i, then extend)
591    ArraySplice = 2,
592}
593
594/// Parse a JSONL (JSON Lines) session file (VS Code 1.109.0+ format)
595/// Each line is a JSON object with 'kind' field indicating the type:
596/// - kind 0: Initial session metadata with 'v' containing ChatSession-like structure
597/// - kind 1: Delta update with 'k' (keys path) and 'v' (value)
598/// - kind 2: Array replace/splice with 'k' (path), 'v' (items), optional 'i' (splice index)
599pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
600    // Pre-process: split concatenated JSON objects that lack newline separators
601    let content = split_concatenated_jsonl(content);
602
603    let mut session = ChatSession {
604        version: 3,
605        session_id: None,
606        creation_date: 0,
607        last_message_date: 0,
608        is_imported: false,
609        initial_location: "panel".to_string(),
610        custom_title: None,
611        requester_username: None,
612        requester_avatar_icon_uri: None,
613        responder_username: None,
614        responder_avatar_icon_uri: None,
615        requests: Vec::new(),
616    };
617
618    for line in content.lines() {
619        let line = line.trim();
620        if line.is_empty() {
621            continue;
622        }
623
624        // Parse each line as a JSON object
625        let entry: serde_json::Value = match serde_json::from_str(line) {
626            Ok(v) => v,
627            Err(_) => {
628                // Try sanitizing Unicode
629                let sanitized = sanitize_json_unicode(line);
630                serde_json::from_str(&sanitized)?
631            }
632        };
633
634        let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
635
636        match kind {
637            0 => {
638                // Initial state - 'v' contains the session metadata
639                if let Some(v) = entry.get("v") {
640                    // Parse version
641                    if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
642                        session.version = version as u32;
643                    }
644                    // Parse session ID
645                    if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
646                        session.session_id = Some(sid.to_string());
647                    }
648                    // Parse creation date
649                    if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
650                        session.creation_date = cd;
651                    }
652                    // Parse initial location
653                    if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
654                        session.initial_location = loc.to_string();
655                    }
656                    // Parse responder username
657                    if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
658                        session.responder_username = Some(ru.to_string());
659                    }
660                    // Parse custom title
661                    if let Some(title) = v.get("customTitle").and_then(|x| x.as_str()) {
662                        session.custom_title = Some(title.to_string());
663                    }
664                    // Parse hasPendingEdits as imported marker
665                    if let Some(imported) = v.get("isImported").and_then(|x| x.as_bool()) {
666                        session.is_imported = imported;
667                    }
668                    // Parse requests array if present
669                    if let Some(requests) = v.get("requests") {
670                        if let Ok(reqs) =
671                            serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
672                        {
673                            session.requests = reqs;
674                            // Compute last_message_date from the latest request timestamp
675                            if let Some(latest_ts) =
676                                session.requests.iter().filter_map(|r| r.timestamp).max()
677                            {
678                                session.last_message_date = latest_ts;
679                            }
680                        }
681                    }
682                    // Fall back to creationDate if no request timestamps found
683                    if session.last_message_date == 0 {
684                        session.last_message_date = session.creation_date;
685                    }
686                }
687            }
688            1 => {
689                // Delta update - 'k' is array of key path, 'v' is the value
690                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
691                    if let Some(keys_arr) = keys.as_array() {
692                        // Handle top-level session keys
693                        if keys_arr.len() == 1 {
694                            if let Some(key) = keys_arr[0].as_str() {
695                                match key {
696                                    "customTitle" => {
697                                        if let Some(title) = value.as_str() {
698                                            session.custom_title = Some(title.to_string());
699                                        }
700                                    }
701                                    "lastMessageDate" => {
702                                        if let Some(date) = value.as_i64() {
703                                            session.last_message_date = date;
704                                        }
705                                    }
706                                    "hasPendingEdits" | "isImported" => {
707                                        // Session-level boolean updates, safe to ignore for now
708                                    }
709                                    _ => {} // Ignore unknown keys
710                                }
711                            }
712                        }
713                        // Handle nested request field updates: ["requests", idx, field]
714                        else if keys_arr.len() == 3 {
715                            if let (Some("requests"), Some(idx), Some(field)) = (
716                                keys_arr[0].as_str(),
717                                keys_arr[1].as_u64().map(|i| i as usize),
718                                keys_arr[2].as_str(),
719                            ) {
720                                if idx < session.requests.len() {
721                                    match field {
722                                        "response" => {
723                                            session.requests[idx].response = Some(value.clone());
724                                        }
725                                        "result" => {
726                                            session.requests[idx].result = Some(value.clone());
727                                        }
728                                        "followups" => {
729                                            session.requests[idx].followups =
730                                                serde_json::from_value(value.clone()).ok();
731                                        }
732                                        "isCanceled" => {
733                                            session.requests[idx].is_canceled = value.as_bool();
734                                        }
735                                        "contentReferences" => {
736                                            session.requests[idx].content_references =
737                                                serde_json::from_value(value.clone()).ok();
738                                        }
739                                        "codeCitations" => {
740                                            session.requests[idx].code_citations =
741                                                serde_json::from_value(value.clone()).ok();
742                                        }
743                                        "modelState" | "modelId" | "agent" | "variableData" => {
744                                            // Known request fields - update as generic Value
745                                            // modelState tracks the request lifecycle
746                                        }
747                                        _ => {} // Ignore unknown request fields
748                                    }
749                                }
750                            }
751                        }
752                    }
753                }
754            }
755            2 => {
756                // Array splice operation - 'k' is the key path, 'v' is the new array items
757                // Optional 'i' field is the splice start index (truncate at i, then extend)
758                // Without 'i', items are appended to the end of the array
759                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
760                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
761                    if let Some(keys_arr) = keys.as_array() {
762                        // Top-level requests: k=["requests"], v=[requests_array]
763                        if keys_arr.len() == 1 {
764                            if let Some("requests") = keys_arr[0].as_str() {
765                                if let Some(items) = value.as_array() {
766                                    if let Some(idx) = splice_index {
767                                        // Splice: truncate at index i, then extend with new items
768                                        session.requests.truncate(idx);
769                                    }
770                                    // Without 'i': append to end (no truncation)
771                                    for item in items {
772                                        if let Ok(req) =
773                                            serde_json::from_value::<ChatRequest>(item.clone())
774                                        {
775                                            session.requests.push(req);
776                                        }
777                                    }
778                                    // Update last message date from latest request
779                                    if let Some(last_req) = session.requests.last() {
780                                        if let Some(ts) = last_req.timestamp {
781                                            session.last_message_date = ts;
782                                        }
783                                    }
784                                }
785                            }
786                        }
787                        // Nested array replace/splice: k=["requests", idx, "response"], v=[parts]
788                        else if keys_arr.len() == 3 {
789                            if let (Some("requests"), Some(req_idx), Some(field)) = (
790                                keys_arr[0].as_str(),
791                                keys_arr[1].as_u64().map(|i| i as usize),
792                                keys_arr[2].as_str(),
793                            ) {
794                                if req_idx < session.requests.len() {
795                                    match field {
796                                        "response" => {
797                                            // Response is stored as a JSON Value (array)
798                                            if let Some(idx) = splice_index {
799                                                // Splice: keep items before index i, replace rest
800                                                if let Some(existing) =
801                                                    session.requests[req_idx].response.as_ref()
802                                                {
803                                                    if let Some(existing_arr) = existing.as_array()
804                                                    {
805                                                        let mut new_arr: Vec<serde_json::Value> =
806                                                            existing_arr
807                                                                [..idx.min(existing_arr.len())]
808                                                                .to_vec();
809                                                        if let Some(new_items) = value.as_array() {
810                                                            new_arr
811                                                                .extend(new_items.iter().cloned());
812                                                        }
813                                                        session.requests[req_idx].response =
814                                                            Some(serde_json::Value::Array(new_arr));
815                                                    } else {
816                                                        session.requests[req_idx].response =
817                                                            Some(value.clone());
818                                                    }
819                                                } else {
820                                                    session.requests[req_idx].response =
821                                                        Some(value.clone());
822                                                }
823                                            } else {
824                                                // No splice index: append to existing response array
825                                                if let Some(existing) =
826                                                    session.requests[req_idx].response.as_ref()
827                                                {
828                                                    if let Some(existing_arr) = existing.as_array()
829                                                    {
830                                                        let mut new_arr = existing_arr.clone();
831                                                        if let Some(new_items) = value.as_array() {
832                                                            new_arr
833                                                                .extend(new_items.iter().cloned());
834                                                        }
835                                                        session.requests[req_idx].response =
836                                                            Some(serde_json::Value::Array(new_arr));
837                                                    } else {
838                                                        session.requests[req_idx].response =
839                                                            Some(value.clone());
840                                                    }
841                                                } else {
842                                                    session.requests[req_idx].response =
843                                                        Some(value.clone());
844                                                }
845                                            }
846                                        }
847                                        "contentReferences" => {
848                                            session.requests[req_idx].content_references =
849                                                serde_json::from_value(value.clone()).ok();
850                                        }
851                                        _ => {} // Ignore unknown fields
852                                    }
853                                }
854                            }
855                        }
856                    }
857                }
858            }
859            _ => {} // Unknown kind, skip
860        }
861    }
862
863    Ok(session)
864}
865
866/// Check if a file extension indicates a session file (.json, .jsonl, or .backup)
867pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
868    ext == "json" || ext == "jsonl" || ext == "backup"
869}
870
871/// Detect session format and version from content
872pub fn detect_session_format(content: &str) -> SessionFormatInfo {
873    let format = VsCodeSessionFormat::from_content(content);
874    let trimmed = content.trim();
875
876    // Detect schema version based on format
877    let (schema_version, confidence, method) = match format {
878        VsCodeSessionFormat::JsonLines => {
879            // For JSONL, check the first line's "v" object for version
880            if let Some(first_line) = trimmed.lines().next() {
881                if let Ok(entry) = serde_json::from_str::<serde_json::Value>(first_line) {
882                    if let Some(v) = entry.get("v") {
883                        if let Some(ver) = v.get("version").and_then(|x| x.as_u64()) {
884                            (
885                                SessionSchemaVersion::from_version(ver as u32),
886                                0.95,
887                                "jsonl-version-field",
888                            )
889                        } else {
890                            // No version field, likely v3 (current default)
891                            (SessionSchemaVersion::V3, 0.7, "jsonl-default")
892                        }
893                    } else {
894                        (SessionSchemaVersion::V3, 0.6, "jsonl-no-v-field")
895                    }
896                } else {
897                    (SessionSchemaVersion::Unknown, 0.3, "jsonl-parse-error")
898                }
899            } else {
900                (SessionSchemaVersion::Unknown, 0.2, "jsonl-empty")
901            }
902        }
903        VsCodeSessionFormat::LegacyJson => {
904            // For JSON, directly check the version field
905            if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
906                if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
907                    (
908                        SessionSchemaVersion::from_version(ver as u32),
909                        0.95,
910                        "json-version-field",
911                    )
912                } else {
913                    // Infer from structure
914                    if json.get("requests").is_some() && json.get("sessionId").is_some() {
915                        (SessionSchemaVersion::V3, 0.8, "json-structure-inference")
916                    } else if json.get("messages").is_some() {
917                        (SessionSchemaVersion::V1, 0.7, "json-legacy-structure")
918                    } else {
919                        (SessionSchemaVersion::Unknown, 0.4, "json-unknown-structure")
920                    }
921                }
922            } else {
923                // Try sanitizing and parsing again
924                let sanitized = sanitize_json_unicode(trimmed);
925                if let Ok(json) = serde_json::from_str::<serde_json::Value>(&sanitized) {
926                    if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
927                        (
928                            SessionSchemaVersion::from_version(ver as u32),
929                            0.9,
930                            "json-version-after-sanitize",
931                        )
932                    } else {
933                        (SessionSchemaVersion::V3, 0.6, "json-default-after-sanitize")
934                    }
935                } else {
936                    (SessionSchemaVersion::Unknown, 0.2, "json-parse-error")
937                }
938            }
939        }
940    };
941
942    SessionFormatInfo {
943        format,
944        schema_version,
945        confidence,
946        detection_method: method,
947    }
948}
949
950/// Parse session content with automatic format detection
951pub fn parse_session_auto(
952    content: &str,
953) -> std::result::Result<(ChatSession, SessionFormatInfo), serde_json::Error> {
954    let format_info = detect_session_format(content);
955
956    let session = match format_info.format {
957        VsCodeSessionFormat::JsonLines => parse_session_jsonl(content)?,
958        VsCodeSessionFormat::LegacyJson => parse_session_json(content)?,
959    };
960
961    Ok((session, format_info))
962}
963
964/// Parse a session file, automatically detecting format from content (not just extension)
965pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
966    let content = std::fs::read_to_string(path)
967        .map_err(|e| serde_json::Error::io(std::io::Error::other(e.to_string())))?;
968
969    // Use content-based auto-detection
970    let (session, _format_info) = parse_session_auto(&content)?;
971    Ok(session)
972}
973
974/// Get the path to the workspace storage database
975pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
976    let storage_path = get_workspace_storage_path()?;
977    Ok(storage_path.join(workspace_id).join("state.vscdb"))
978}
979
980/// Read the chat session index from VS Code storage
981pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
982    let conn = Connection::open(db_path)?;
983
984    let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
985        "SELECT value FROM ItemTable WHERE key = ?",
986        ["chat.ChatSessionStore.index"],
987        |row| row.get(0),
988    );
989
990    match result {
991        Ok(json_str) => serde_json::from_str(&json_str)
992            .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
993        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
994        Err(e) => Err(CsmError::SqliteError(e)),
995    }
996}
997
998/// Write the chat session index to VS Code storage
999pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
1000    let conn = Connection::open(db_path)?;
1001    let json_str = serde_json::to_string(index)?;
1002
1003    // Check if the key exists
1004    let exists: bool = conn.query_row(
1005        "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
1006        ["chat.ChatSessionStore.index"],
1007        |row| row.get(0),
1008    )?;
1009
1010    if exists {
1011        conn.execute(
1012            "UPDATE ItemTable SET value = ? WHERE key = ?",
1013            [&json_str, "chat.ChatSessionStore.index"],
1014        )?;
1015    } else {
1016        conn.execute(
1017            "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
1018            ["chat.ChatSessionStore.index", &json_str],
1019        )?;
1020    }
1021
1022    Ok(())
1023}
1024
1025/// Add a session to the VS Code index
1026pub fn add_session_to_index(
1027    db_path: &Path,
1028    session_id: &str,
1029    title: &str,
1030    last_message_date_ms: i64,
1031    _is_imported: bool,
1032    initial_location: &str,
1033    is_empty: bool,
1034) -> Result<()> {
1035    let mut index = read_chat_session_index(db_path)?;
1036
1037    index.entries.insert(
1038        session_id.to_string(),
1039        ChatSessionIndexEntry {
1040            session_id: session_id.to_string(),
1041            title: title.to_string(),
1042            last_message_date: last_message_date_ms,
1043            timing: Some(ChatSessionTiming {
1044                created: last_message_date_ms,
1045                last_request_started: Some(last_message_date_ms),
1046                last_request_ended: Some(last_message_date_ms),
1047            }),
1048            last_response_state: 1, // ResponseModelState.Complete
1049            initial_location: initial_location.to_string(),
1050            is_empty,
1051            is_imported: Some(_is_imported),
1052            has_pending_edits: Some(false),
1053            is_external: Some(false),
1054        },
1055    );
1056
1057    write_chat_session_index(db_path, &index)
1058}
1059
1060/// Remove a session from the VS Code index
1061#[allow(dead_code)]
1062pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
1063    let mut index = read_chat_session_index(db_path)?;
1064    let removed = index.entries.remove(session_id).is_some();
1065    if removed {
1066        write_chat_session_index(db_path, &index)?;
1067    }
1068    Ok(removed)
1069}
1070
1071/// Sync the VS Code index with sessions on disk (remove stale entries, add missing ones)
1072/// When both .json and .jsonl exist for the same session ID, prefers .jsonl.
1073pub fn sync_session_index(
1074    workspace_id: &str,
1075    chat_sessions_dir: &Path,
1076    force: bool,
1077) -> Result<(usize, usize)> {
1078    let db_path = get_workspace_storage_db(workspace_id)?;
1079
1080    if !db_path.exists() {
1081        return Err(CsmError::WorkspaceNotFound(format!(
1082            "Database not found: {}",
1083            db_path.display()
1084        )));
1085    }
1086
1087    // Check if VS Code is running
1088    if !force && is_vscode_running() {
1089        return Err(CsmError::VSCodeRunning);
1090    }
1091
1092    // Get current index
1093    let mut index = read_chat_session_index(&db_path)?;
1094
1095    // Get session files on disk
1096    let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
1097    if chat_sessions_dir.exists() {
1098        for entry in std::fs::read_dir(chat_sessions_dir)? {
1099            let entry = entry?;
1100            let path = entry.path();
1101            if path
1102                .extension()
1103                .map(is_session_file_extension)
1104                .unwrap_or(false)
1105            {
1106                if let Some(stem) = path.file_stem() {
1107                    files_on_disk.insert(stem.to_string_lossy().to_string());
1108                }
1109            }
1110        }
1111    }
1112
1113    // Remove stale entries (in index but not on disk)
1114    let stale_ids: Vec<String> = index
1115        .entries
1116        .keys()
1117        .filter(|id| !files_on_disk.contains(*id))
1118        .cloned()
1119        .collect();
1120
1121    let removed = stale_ids.len();
1122    for id in &stale_ids {
1123        index.entries.remove(id);
1124    }
1125
1126    // Add/update sessions from disk
1127    // Collect files, preferring .jsonl over .json for the same session ID
1128    let mut session_files: std::collections::HashMap<String, PathBuf> =
1129        std::collections::HashMap::new();
1130    for entry in std::fs::read_dir(chat_sessions_dir)? {
1131        let entry = entry?;
1132        let path = entry.path();
1133        if path
1134            .extension()
1135            .map(is_session_file_extension)
1136            .unwrap_or(false)
1137        {
1138            if let Some(stem) = path.file_stem() {
1139                let stem_str = stem.to_string_lossy().to_string();
1140                let is_jsonl = path.extension().is_some_and(|e| e == "jsonl");
1141                // Insert if no entry yet, or if this is .jsonl (preferred over .json)
1142                if !session_files.contains_key(&stem_str) || is_jsonl {
1143                    session_files.insert(stem_str, path);
1144                }
1145            }
1146        }
1147    }
1148
1149    let mut added = 0;
1150    for (_, path) in &session_files {
1151        if let Ok(session) = parse_session_file(path) {
1152            let session_id = session.session_id.clone().unwrap_or_else(|| {
1153                path.file_stem()
1154                    .map(|s| s.to_string_lossy().to_string())
1155                    .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1156            });
1157
1158            let title = session.title();
1159            let is_empty = session.is_empty();
1160            let last_message_date = session.last_message_date;
1161            let initial_location = session.initial_location.clone();
1162
1163            index.entries.insert(
1164                session_id.clone(),
1165                ChatSessionIndexEntry {
1166                    session_id,
1167                    title,
1168                    last_message_date,
1169                    timing: Some(ChatSessionTiming {
1170                        created: session.creation_date,
1171                        last_request_started: Some(last_message_date),
1172                        last_request_ended: Some(last_message_date),
1173                    }),
1174                    last_response_state: 1, // ResponseModelState.Complete
1175                    initial_location,
1176                    is_empty,
1177                    is_imported: Some(false),
1178                    has_pending_edits: Some(false),
1179                    is_external: Some(false),
1180                },
1181            );
1182            added += 1;
1183        }
1184    }
1185
1186    // Write the synced index
1187    write_chat_session_index(&db_path, &index)?;
1188
1189    Ok((added, removed))
1190}
1191
1192/// Register all sessions from a directory into the VS Code index
1193pub fn register_all_sessions_from_directory(
1194    workspace_id: &str,
1195    chat_sessions_dir: &Path,
1196    force: bool,
1197) -> Result<usize> {
1198    let db_path = get_workspace_storage_db(workspace_id)?;
1199
1200    if !db_path.exists() {
1201        return Err(CsmError::WorkspaceNotFound(format!(
1202            "Database not found: {}",
1203            db_path.display()
1204        )));
1205    }
1206
1207    // Check if VS Code is running
1208    if !force && is_vscode_running() {
1209        return Err(CsmError::VSCodeRunning);
1210    }
1211
1212    // Use sync to ensure index matches disk
1213    let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
1214
1215    // Print individual session info
1216    for entry in std::fs::read_dir(chat_sessions_dir)? {
1217        let entry = entry?;
1218        let path = entry.path();
1219
1220        if path
1221            .extension()
1222            .map(is_session_file_extension)
1223            .unwrap_or(false)
1224        {
1225            if let Ok(session) = parse_session_file(&path) {
1226                let session_id = session.session_id.clone().unwrap_or_else(|| {
1227                    path.file_stem()
1228                        .map(|s| s.to_string_lossy().to_string())
1229                        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1230                });
1231
1232                let title = session.title();
1233
1234                println!(
1235                    "[OK] Registered: {} ({}...)",
1236                    title,
1237                    &session_id[..12.min(session_id.len())]
1238                );
1239            }
1240        }
1241    }
1242
1243    if removed > 0 {
1244        println!("[OK] Removed {} stale index entries", removed);
1245    }
1246
1247    Ok(added)
1248}
1249
1250/// Check if VS Code is currently running
1251pub fn is_vscode_running() -> bool {
1252    let mut sys = System::new();
1253    sys.refresh_processes();
1254
1255    for process in sys.processes().values() {
1256        let name = process.name().to_lowercase();
1257        if name.contains("code") && !name.contains("codec") {
1258            return true;
1259        }
1260    }
1261
1262    false
1263}
1264
1265/// Close VS Code gracefully and wait for it to exit.
1266/// Returns the list of workspace folders that were open (for reopening).
1267pub fn close_vscode_and_wait(timeout_secs: u64) -> Result<()> {
1268    use sysinfo::{ProcessRefreshKind, RefreshKind, Signal};
1269
1270    if !is_vscode_running() {
1271        return Ok(());
1272    }
1273
1274    // Send SIGTERM (graceful close) to all Code processes
1275    let mut sys = System::new_with_specifics(
1276        RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1277    );
1278    sys.refresh_processes();
1279
1280    let mut signaled = 0u32;
1281    for (pid, process) in sys.processes() {
1282        let name = process.name().to_lowercase();
1283        if name.contains("code") && !name.contains("codec") {
1284            // On Windows, kill() sends TerminateProcess; there's no graceful
1285            // SIGTERM equivalent via sysinfo. But the main electron process
1286            // handles WM_CLOSE. We use the `taskkill` approach on Windows for
1287            // a graceful close.
1288            #[cfg(windows)]
1289            {
1290                let _ = std::process::Command::new("taskkill")
1291                    .args(["/PID", &pid.as_u32().to_string()])
1292                    .stdout(std::process::Stdio::null())
1293                    .stderr(std::process::Stdio::null())
1294                    .status();
1295                signaled += 1;
1296            }
1297            #[cfg(not(windows))]
1298            {
1299                if process.kill_with(Signal::Term).unwrap_or(false) {
1300                    signaled += 1;
1301                }
1302            }
1303        }
1304    }
1305
1306    if signaled == 0 {
1307        return Ok(());
1308    }
1309
1310    // Wait for all Code processes to exit
1311    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
1312    loop {
1313        std::thread::sleep(std::time::Duration::from_millis(500));
1314        if !is_vscode_running() {
1315            // Extra wait for file locks to release
1316            std::thread::sleep(std::time::Duration::from_secs(1));
1317            return Ok(());
1318        }
1319        if std::time::Instant::now() >= deadline {
1320            // Force kill remaining processes
1321            let mut sys2 = System::new_with_specifics(
1322                RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1323            );
1324            sys2.refresh_processes();
1325            for (_pid, process) in sys2.processes() {
1326                let name = process.name().to_lowercase();
1327                if name.contains("code") && !name.contains("codec") {
1328                    process.kill();
1329                }
1330            }
1331            std::thread::sleep(std::time::Duration::from_secs(1));
1332            return Ok(());
1333        }
1334    }
1335}
1336
1337/// Reopen VS Code, optionally at a specific path.
1338pub fn reopen_vscode(project_path: Option<&str>) -> Result<()> {
1339    let mut cmd = std::process::Command::new("code");
1340    if let Some(path) = project_path {
1341        cmd.arg(path);
1342    }
1343    cmd.stdout(std::process::Stdio::null())
1344        .stderr(std::process::Stdio::null())
1345        .spawn()?;
1346    Ok(())
1347}
1348
1349/// Backup workspace sessions to a timestamped directory
1350pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
1351    let chat_sessions_dir = workspace_dir.join("chatSessions");
1352
1353    if !chat_sessions_dir.exists() {
1354        return Ok(None);
1355    }
1356
1357    let timestamp = std::time::SystemTime::now()
1358        .duration_since(std::time::UNIX_EPOCH)
1359        .unwrap()
1360        .as_secs();
1361
1362    let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
1363
1364    // Copy directory recursively
1365    copy_dir_all(&chat_sessions_dir, &backup_dir)?;
1366
1367    Ok(Some(backup_dir))
1368}
1369
1370/// Recursively copy a directory
1371fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
1372    std::fs::create_dir_all(dst)?;
1373
1374    for entry in std::fs::read_dir(src)? {
1375        let entry = entry?;
1376        let src_path = entry.path();
1377        let dst_path = dst.join(entry.file_name());
1378
1379        if src_path.is_dir() {
1380            copy_dir_all(&src_path, &dst_path)?;
1381        } else {
1382            std::fs::copy(&src_path, &dst_path)?;
1383        }
1384    }
1385
1386    Ok(())
1387}
1388
1389// =============================================================================
1390// Empty Window Sessions (ALL SESSIONS)
1391// =============================================================================
1392
1393/// Read all empty window chat sessions (not tied to any workspace)
1394/// These appear in VS Code's "ALL SESSIONS" panel
1395pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
1396    let sessions_path = get_empty_window_sessions_path()?;
1397
1398    if !sessions_path.exists() {
1399        return Ok(Vec::new());
1400    }
1401
1402    let mut sessions = Vec::new();
1403
1404    for entry in std::fs::read_dir(&sessions_path)? {
1405        let entry = entry?;
1406        let path = entry.path();
1407
1408        if path.extension().is_some_and(is_session_file_extension) {
1409            if let Ok(session) = parse_session_file(&path) {
1410                sessions.push(session);
1411            }
1412        }
1413    }
1414
1415    // Sort by last message date (most recent first)
1416    sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
1417
1418    Ok(sessions)
1419}
1420
1421/// Get a specific empty window session by ID
1422#[allow(dead_code)]
1423pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
1424    let sessions_path = get_empty_window_sessions_path()?;
1425    let session_path = sessions_path.join(format!("{}.json", session_id));
1426
1427    if !session_path.exists() {
1428        return Ok(None);
1429    }
1430
1431    let content = std::fs::read_to_string(&session_path)?;
1432    let session: ChatSession = serde_json::from_str(&content)
1433        .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1434
1435    Ok(Some(session))
1436}
1437
1438/// Write an empty window session
1439#[allow(dead_code)]
1440pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
1441    let sessions_path = get_empty_window_sessions_path()?;
1442
1443    // Create directory if it doesn't exist
1444    std::fs::create_dir_all(&sessions_path)?;
1445
1446    let session_id = session.session_id.as_deref().unwrap_or("unknown");
1447    let session_path = sessions_path.join(format!("{}.json", session_id));
1448    let content = serde_json::to_string_pretty(session)?;
1449    std::fs::write(&session_path, content)?;
1450
1451    Ok(session_path)
1452}
1453
1454/// Delete an empty window session
1455#[allow(dead_code)]
1456pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
1457    let sessions_path = get_empty_window_sessions_path()?;
1458    let session_path = sessions_path.join(format!("{}.json", session_id));
1459
1460    if session_path.exists() {
1461        std::fs::remove_file(&session_path)?;
1462        Ok(true)
1463    } else {
1464        Ok(false)
1465    }
1466}
1467
1468/// Count empty window sessions
1469pub fn count_empty_window_sessions() -> Result<usize> {
1470    let sessions_path = get_empty_window_sessions_path()?;
1471
1472    if !sessions_path.exists() {
1473        return Ok(0);
1474    }
1475
1476    let count = std::fs::read_dir(&sessions_path)?
1477        .filter_map(|e| e.ok())
1478        .filter(|e| e.path().extension().is_some_and(is_session_file_extension))
1479        .count();
1480
1481    Ok(count)
1482}
1483
1484/// Compact a JSONL session file by replaying all operations into a single kind:0 snapshot.
1485/// This works at the raw JSON level, preserving all fields VS Code expects.
1486/// Returns the path to the compacted file.
1487///
1488/// Handles a common corruption pattern where VS Code appends delta operations
1489/// to line 0 without newline separators (e.g., `}{"kind":1,...}{"kind":2,...}`).
1490pub fn compact_session_jsonl(path: &Path) -> Result<PathBuf> {
1491    let content = std::fs::read_to_string(path).map_err(|e| {
1492        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1493    })?;
1494
1495    // Pre-process: split concatenated JSON objects that lack newline separators.
1496    // VS Code sometimes appends delta ops to line 0 without a \n, producing:
1497    //   {"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}\n{"kind":1,...}\n...
1498    // We fix this by inserting newlines at every `}{"kind":` boundary.
1499    let content = split_concatenated_jsonl(&content);
1500
1501    let mut lines = content.lines();
1502
1503    // First line must be kind:0 (initial snapshot)
1504    let first_line = lines
1505        .next()
1506        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1507
1508    let first_entry: serde_json::Value = match serde_json::from_str(first_line.trim()) {
1509        Ok(v) => v,
1510        Err(_) => {
1511            // Try sanitizing Unicode (lone surrogates, etc.)
1512            let sanitized = sanitize_json_unicode(first_line.trim());
1513            serde_json::from_str(&sanitized).map_err(|e| {
1514                CsmError::InvalidSessionFormat(format!("Invalid JSON on line 1: {}", e))
1515            })?
1516        }
1517    };
1518
1519    let kind = first_entry
1520        .get("kind")
1521        .and_then(|k| k.as_u64())
1522        .unwrap_or(99);
1523    if kind != 0 {
1524        return Err(CsmError::InvalidSessionFormat(
1525            "First JSONL line must be kind:0".to_string(),
1526        ));
1527    }
1528
1529    // Extract the session state from the "v" field
1530    let mut state = first_entry
1531        .get("v")
1532        .cloned()
1533        .ok_or_else(|| CsmError::InvalidSessionFormat("kind:0 missing 'v' field".to_string()))?;
1534
1535    // Replay all subsequent operations
1536    for line in lines {
1537        let line = line.trim();
1538        if line.is_empty() {
1539            continue;
1540        }
1541
1542        let entry: serde_json::Value = match serde_json::from_str(line) {
1543            Ok(v) => v,
1544            Err(_) => continue, // Skip malformed lines
1545        };
1546
1547        let op_kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1548
1549        match op_kind {
1550            1 => {
1551                // Delta update: k=["path","to","field"], v=value
1552                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1553                    if let Some(keys_arr) = keys.as_array() {
1554                        apply_delta(&mut state, keys_arr, value.clone());
1555                    }
1556                }
1557            }
1558            2 => {
1559                // Array replace/splice: k=["path","to","array"], v=[items], i=splice_index
1560                if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1561                    let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
1562                    if let Some(keys_arr) = keys.as_array() {
1563                        apply_splice(&mut state, keys_arr, value.clone(), splice_index);
1564                    }
1565                }
1566            }
1567            _ => {} // Skip unknown kinds
1568        }
1569    }
1570
1571    // Inject any missing fields that VS Code's latest format requires
1572    let session_id = path
1573        .file_stem()
1574        .and_then(|s| s.to_str())
1575        .map(|s| s.to_string());
1576    ensure_vscode_compat_fields(&mut state, session_id.as_deref());
1577
1578    // Write the compacted file: single kind:0 line with the final state
1579    let compact_entry = serde_json::json!({"kind": 0, "v": state});
1580    let compact_content = serde_json::to_string(&compact_entry)
1581        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1582
1583    // Backup the original file
1584    let backup_path = path.with_extension("jsonl.bak");
1585    std::fs::rename(path, &backup_path)?;
1586
1587    // Write the compacted file
1588    std::fs::write(path, &compact_content)?;
1589
1590    Ok(backup_path)
1591}
1592
1593/// Trim a session JSONL file by keeping only the last `keep` requests.
1594///
1595/// Very long chat sessions (100+ requests) can grow to 50-100+ MB, causing VS Code
1596/// to fail loading them. This function compacts the session first (if needed), then
1597/// removes old requests from the `requests` array, keeping only the most recent ones.
1598///
1599/// The full session is preserved as a `.jsonl.bak` backup. A trimmed summary is
1600/// injected as the first request message so the user knows context was archived.
1601///
1602/// Returns `(original_count, kept_count, original_mb, new_mb)`.
1603pub fn trim_session_jsonl(path: &Path, keep: usize) -> Result<(usize, usize, f64, f64)> {
1604    let content = std::fs::read_to_string(path).map_err(|e| {
1605        CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1606    })?;
1607
1608    let original_size = content.len() as f64 / (1024.0 * 1024.0);
1609
1610    // Always handle concatenated JSON objects first, then check line count
1611    let content = split_concatenated_jsonl(&content);
1612    let line_count = content.lines().filter(|l| !l.trim().is_empty()).count();
1613
1614    // If multi-line (concatenated objects or delta ops), compact first
1615    let content = if line_count > 1 {
1616        // Write the split content so compact can process it
1617        std::fs::write(path, &content)?;
1618        compact_session_jsonl(path)?;
1619        std::fs::read_to_string(path).map_err(|e| {
1620            CsmError::InvalidSessionFormat(format!("Failed to read compacted file: {}", e))
1621        })?
1622    } else {
1623        content
1624    };
1625
1626    let first_line = content
1627        .lines()
1628        .next()
1629        .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1630
1631    let mut entry: serde_json::Value = serde_json::from_str(first_line.trim())
1632        .map_err(|_| {
1633            let sanitized = sanitize_json_unicode(first_line.trim());
1634            serde_json::from_str::<serde_json::Value>(&sanitized)
1635                .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))
1636        })
1637        .unwrap_or_else(|e| e.unwrap());
1638
1639    let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1640    if kind != 0 {
1641        return Err(
1642            CsmError::InvalidSessionFormat("First JSONL line must be kind:0".to_string()).into(),
1643        );
1644    }
1645
1646    // Get the requests array
1647    let requests = match entry
1648        .get("v")
1649        .and_then(|v| v.get("requests"))
1650        .and_then(|r| r.as_array())
1651    {
1652        Some(r) => r.clone(),
1653        None => {
1654            return Err(CsmError::InvalidSessionFormat(
1655                "Session has no requests array".to_string(),
1656            )
1657            .into());
1658        }
1659    };
1660
1661    let original_count = requests.len();
1662
1663    if original_count <= keep {
1664        // Still strip bloated content even if not reducing request count
1665        strip_bloated_content(&mut entry);
1666
1667        let trimmed_content = serde_json::to_string(&entry)
1668            .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1669        let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
1670
1671        // Only rewrite if we actually reduced size
1672        if new_size < original_size * 0.9 {
1673            let backup_path = path.with_extension("jsonl.bak");
1674            if !backup_path.exists() {
1675                std::fs::copy(path, &backup_path)?;
1676            }
1677            std::fs::write(path, &trimmed_content)?;
1678        }
1679
1680        return Ok((original_count, original_count, original_size, new_size));
1681    }
1682
1683    // Keep only the last `keep` requests
1684    let kept_requests: Vec<serde_json::Value> = requests[original_count - keep..].to_vec();
1685
1686    // Use only the kept requests — no injected trim notice.
1687    // Injecting synthetic requests with non-standard agent/structure fields
1688    // can cause VS Code's session deserializer to reject the entire session.
1689    let final_requests = kept_requests;
1690
1691    // Replace the requests array in the entry
1692    if let Some(v) = entry.get_mut("v") {
1693        if let Some(obj) = v.as_object_mut() {
1694            obj.insert("requests".to_string(), serde_json::json!(final_requests));
1695        }
1696    }
1697
1698    // Strip bloated metadata, tool invocations, textEditGroups, thinking tokens
1699    strip_bloated_content(&mut entry);
1700
1701    // Ensure compat fields
1702    let session_id = path
1703        .file_stem()
1704        .and_then(|s| s.to_str())
1705        .map(|s| s.to_string());
1706    if let Some(v) = entry.get_mut("v") {
1707        ensure_vscode_compat_fields(v, session_id.as_deref());
1708    }
1709
1710    let trimmed_content = serde_json::to_string(&entry)
1711        .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1712
1713    let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
1714
1715    // Backup original (if not already backed up by compact)
1716    let backup_path = path.with_extension("jsonl.bak");
1717    if !backup_path.exists() {
1718        std::fs::copy(path, &backup_path)?;
1719    }
1720
1721    // Write the trimmed file
1722    std::fs::write(path, &trimmed_content)?;
1723
1724    Ok((original_count, keep, original_size, new_size))
1725}
1726
1727/// Strip bloated content from a session entry to reduce file size.
1728///
1729/// VS Code sessions accumulate large metadata that isn't needed for session display:
1730/// - `result.metadata`: Can be 100KB-1.5MB per request (Copilot internal state)
1731/// - `editedFileEvents`: Redundant file edit tracking
1732/// - `chatEdits`: File edit diffs
1733/// - `textEditGroup` response items: 80-120KB each with full file diffs
1734/// - `thinking` response items: Model thinking tokens (can be 400+ per request)
1735/// - `toolInvocationSerialized`: Tool call metadata (usually already stripped by compact)
1736/// - `toolSpecificData`: Duplicate data in tool invocations
1737///
1738/// This function strips or truncates all of these while preserving the conversation
1739/// content (markdownContent responses and user messages).
1740fn strip_bloated_content(entry: &mut serde_json::Value) {
1741    let requests = match entry
1742        .get_mut("v")
1743        .and_then(|v| v.get_mut("requests"))
1744        .and_then(|r| r.as_array_mut())
1745    {
1746        Some(r) => r,
1747        None => return,
1748    };
1749
1750    for req in requests.iter_mut() {
1751        let obj = match req.as_object_mut() {
1752            Some(o) => o,
1753            None => continue,
1754        };
1755
1756        // Strip result.metadata (100KB-1.5MB per request)
1757        if let Some(result) = obj.get_mut("result") {
1758            if let Some(result_obj) = result.as_object_mut() {
1759                if let Some(meta) = result_obj.get("metadata") {
1760                    let meta_str = serde_json::to_string(meta).unwrap_or_default();
1761                    if meta_str.len() > 1000 {
1762                        result_obj.insert(
1763                            "metadata".to_string(),
1764                            serde_json::Value::Object(serde_json::Map::new()),
1765                        );
1766                    }
1767                }
1768            }
1769        }
1770
1771        // Strip editedFileEvents
1772        obj.remove("editedFileEvents");
1773
1774        // Strip chatEdits
1775        obj.remove("chatEdits");
1776
1777        // Truncate contentReferences to max 3
1778        if let Some(refs) = obj.get_mut("contentReferences") {
1779            if let Some(arr) = refs.as_array_mut() {
1780                if arr.len() > 3 {
1781                    arr.truncate(3);
1782                }
1783            }
1784        }
1785
1786        // Process response items
1787        if let Some(response) = obj.get_mut("response") {
1788            if let Some(resp_arr) = response.as_array_mut() {
1789                // Remove non-essential response kinds
1790                resp_arr.retain(|r| {
1791                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1792                    !matches!(
1793                        kind,
1794                        "toolInvocationSerialized"
1795                            | "progressMessage"
1796                            | "confirmationWidget"
1797                            | "codeblockUri"
1798                            | "progressTaskSerialized"
1799                            | "undoStop"
1800                            | "mcpServersStarting"
1801                            | "confirmation"
1802                    )
1803                });
1804
1805                // Truncate textEditGroup items (strip edit diffs, keep URI ref)
1806                for r in resp_arr.iter_mut() {
1807                    let kind = r
1808                        .get("kind")
1809                        .and_then(|k| k.as_str())
1810                        .unwrap_or("")
1811                        .to_string();
1812
1813                    if kind == "textEditGroup" {
1814                        if let Some(edits) = r.get_mut("edits") {
1815                            if let Some(arr) = edits.as_array_mut() {
1816                                if serde_json::to_string(arr).unwrap_or_default().len() > 2000 {
1817                                    arr.clear();
1818                                }
1819                            }
1820                        }
1821                    }
1822
1823                    // Truncate thinking tokens
1824                    if kind == "thinking" {
1825                        if let Some(val) = r.get_mut("value") {
1826                            if let Some(s) = val.as_str() {
1827                                if s.len() > 500 {
1828                                    *val = serde_json::Value::String(format!(
1829                                        "{}... [truncated]",
1830                                        &s[..500]
1831                                    ));
1832                                }
1833                            }
1834                        }
1835                        if let Some(thought) = r.get_mut("thought") {
1836                            if let Some(thought_val) = thought.get_mut("value") {
1837                                if let Some(s) = thought_val.as_str() {
1838                                    if s.len() > 500 {
1839                                        *thought_val = serde_json::Value::String(format!(
1840                                            "{}... [truncated]",
1841                                            &s[..500]
1842                                        ));
1843                                    }
1844                                }
1845                            }
1846                        }
1847                    }
1848
1849                    // Truncate large markdownContent
1850                    if kind == "markdownContent" {
1851                        if let Some(content) = r.get_mut("content") {
1852                            if let Some(val) = content.get_mut("value") {
1853                                if let Some(s) = val.as_str() {
1854                                    if s.len() > 20000 {
1855                                        *val = serde_json::Value::String(format!(
1856                                            "{}\n\n---\n*[Chasm: Content truncated for loading performance]*",
1857                                            &s[..20000]
1858                                        ));
1859                                    }
1860                                }
1861                            }
1862                        }
1863                    }
1864                }
1865
1866                // Limit thinking items to last 5 per request
1867                let mut thinking_count = 0;
1868                let mut indices_to_remove = Vec::new();
1869                for (i, r) in resp_arr.iter().enumerate().rev() {
1870                    let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1871                    if kind == "thinking" {
1872                        thinking_count += 1;
1873                        if thinking_count > 5 {
1874                            indices_to_remove.push(i);
1875                        }
1876                    }
1877                }
1878                for idx in indices_to_remove {
1879                    resp_arr.remove(idx);
1880                }
1881
1882                // Strip toolSpecificData from any remaining tool invocations
1883                for r in resp_arr.iter_mut() {
1884                    if let Some(obj) = r.as_object_mut() {
1885                        obj.remove("toolSpecificData");
1886                    }
1887                }
1888
1889                // Fix response items missing `kind` field — wrap raw MarkdownString
1890                // objects as proper markdownContent response items.
1891                // VS Code sometimes serializes MarkdownString directly instead of
1892                // wrapping it in { kind: "markdownContent", content: MarkdownString }.
1893                // Without the `kind` discriminator, VS Code's deserializer fails.
1894                let fixed: Vec<serde_json::Value> = resp_arr
1895                    .drain(..)
1896                    .map(|item| {
1897                        if item.get("kind").is_none() {
1898                            // Check if it looks like a MarkdownString (has `value` or `supportHtml`)
1899                            if item.get("value").is_some() || item.get("supportHtml").is_some() {
1900                                serde_json::json!({
1901                                    "kind": "markdownContent",
1902                                    "content": item
1903                                })
1904                            } else {
1905                                item
1906                            }
1907                        } else {
1908                            item
1909                        }
1910                    })
1911                    .collect();
1912                *resp_arr = fixed;
1913            }
1914        }
1915    }
1916}
1917
1918/// Split concatenated JSON objects in JSONL content that lack newline separators.
1919///
1920/// VS Code sometimes appends delta operations (kind:1, kind:2) onto the end of
1921/// a JSONL line without inserting a newline first. This produces invalid JSONL like:
1922///   `{"kind":0,"v":{...}}{"kind":1,...}{"kind":2,...}`
1923///
1924/// This function inserts newlines at every `}{"kind":` boundary to restore valid JSONL.
1925/// The pattern `}{"kind":` cannot appear inside JSON string values because `{"kind":`
1926/// would need to be escaped as `{\"kind\":` within a JSON string.
1927pub fn split_concatenated_jsonl(content: &str) -> String {
1928    // Fast path: if content has no concatenated objects, return as-is
1929    if !content.contains("}{\"kind\":") {
1930        return content.to_string();
1931    }
1932
1933    content.replace("}{\"kind\":", "}\n{\"kind\":")
1934}
1935
1936/// Apply a delta update (kind:1) to a JSON value at the given key path.
1937fn apply_delta(root: &mut serde_json::Value, keys: &[serde_json::Value], value: serde_json::Value) {
1938    if keys.is_empty() {
1939        return;
1940    }
1941
1942    // Navigate to the parent
1943    let mut current = root;
1944    for key in &keys[..keys.len() - 1] {
1945        if let Some(k) = key.as_str() {
1946            if !current.get(k).is_some() {
1947                current[k] = serde_json::Value::Object(serde_json::Map::new());
1948            }
1949            current = &mut current[k];
1950        } else if let Some(idx) = key.as_u64() {
1951            if let Some(arr) = current.as_array_mut() {
1952                if (idx as usize) < arr.len() {
1953                    current = &mut arr[idx as usize];
1954                } else {
1955                    return; // Index out of bounds
1956                }
1957            } else {
1958                return;
1959            }
1960        }
1961    }
1962
1963    // Set the final key
1964    if let Some(last_key) = keys.last() {
1965        if let Some(k) = last_key.as_str() {
1966            current[k] = value;
1967        } else if let Some(idx) = last_key.as_u64() {
1968            if let Some(arr) = current.as_array_mut() {
1969                if (idx as usize) < arr.len() {
1970                    arr[idx as usize] = value;
1971                }
1972            }
1973        }
1974    }
1975}
1976
1977/// Apply an array replace/splice operation (kind:2) to a JSON value at the given key path.
1978/// When `splice_index` is `Some(i)`, truncates the target array at index `i` before extending.
1979/// When `splice_index` is `None`, replaces the entire array with the new items.
1980fn apply_splice(
1981    root: &mut serde_json::Value,
1982    keys: &[serde_json::Value],
1983    items: serde_json::Value,
1984    splice_index: Option<usize>,
1985) {
1986    if keys.is_empty() {
1987        return;
1988    }
1989
1990    // Navigate to the target array
1991    let mut current = root;
1992    for key in keys {
1993        if let Some(k) = key.as_str() {
1994            if !current.get(k).is_some() {
1995                current[k] = serde_json::json!([]);
1996            }
1997            current = &mut current[k];
1998        } else if let Some(idx) = key.as_u64() {
1999            if let Some(arr) = current.as_array_mut() {
2000                if (idx as usize) < arr.len() {
2001                    current = &mut arr[idx as usize];
2002                } else {
2003                    return;
2004                }
2005            } else {
2006                return;
2007            }
2008        }
2009    }
2010
2011    // Splice or replace items in the target array
2012    if let Some(target_arr) = current.as_array_mut() {
2013        if let Some(idx) = splice_index {
2014            // Splice: truncate at index, then extend with new items
2015            target_arr.truncate(idx);
2016        } else {
2017            // Full replacement: clear the array
2018            target_arr.clear();
2019        }
2020        if let Some(new_items) = items.as_array() {
2021            target_arr.extend(new_items.iter().cloned());
2022        }
2023    }
2024}
2025
2026/// Ensure a JSONL `kind:0` snapshot's `v` object has all fields required by
2027/// VS Code's latest session format (1.109.0+ / version 3). Missing fields are
2028/// injected with sensible defaults so sessions load reliably after recovery,
2029/// conversion, or compaction.
2030///
2031/// Required fields that VS Code now expects:
2032/// - `version` (u32, default 3)
2033/// - `sessionId` (string, extracted from filename or generated)
2034/// - `responderUsername` (string, default "GitHub Copilot")
2035/// - `hasPendingEdits` (bool, default false)
2036/// - `pendingRequests` (array, default [])
2037/// - `inputState` (object with mode, attachments, etc.)
2038pub fn ensure_vscode_compat_fields(state: &mut serde_json::Value, session_id: Option<&str>) {
2039    if let Some(obj) = state.as_object_mut() {
2040        // version
2041        if !obj.contains_key("version") {
2042            obj.insert("version".to_string(), serde_json::json!(3));
2043        }
2044
2045        // sessionId — use provided ID, or try to read from existing field
2046        if !obj.contains_key("sessionId") {
2047            if let Some(id) = session_id {
2048                obj.insert("sessionId".to_string(), serde_json::json!(id));
2049            }
2050        }
2051
2052        // responderUsername
2053        if !obj.contains_key("responderUsername") {
2054            obj.insert(
2055                "responderUsername".to_string(),
2056                serde_json::json!("GitHub Copilot"),
2057            );
2058        }
2059
2060        // hasPendingEdits — always false for recovered/compacted sessions
2061        if !obj.contains_key("hasPendingEdits") {
2062            obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
2063        }
2064
2065        // pendingRequests — always empty for recovered/compacted sessions
2066        if !obj.contains_key("pendingRequests") {
2067            obj.insert("pendingRequests".to_string(), serde_json::json!([]));
2068        }
2069
2070        // inputState — VS Code expects this to exist with at least mode + attachments
2071        if !obj.contains_key("inputState") {
2072            obj.insert(
2073                "inputState".to_string(),
2074                serde_json::json!({
2075                    "attachments": [],
2076                    "mode": { "id": "agent", "kind": "agent" },
2077                    "inputText": "",
2078                    "selections": [],
2079                    "contrib": { "chatDynamicVariableModel": [] }
2080                }),
2081            );
2082        }
2083    }
2084}
2085
2086/// Detect whether a legacy .json file is a "skeleton" — corrupted to contain only
2087/// structural characters ({}, [], commas, colons, whitespace) with all actual data stripped.
2088/// These files parse as valid JSON but contain no useful session content.
2089pub fn is_skeleton_json(content: &str) -> bool {
2090    // Must be non-trivial size to be a skeleton (tiny files might just be empty sessions)
2091    if content.len() < 100 {
2092        return false;
2093    }
2094
2095    // Count structural vs data characters
2096    let structural_chars: usize = content
2097        .chars()
2098        .filter(|c| {
2099            matches!(
2100                c,
2101                '{' | '}' | '[' | ']' | ',' | ':' | ' ' | '\n' | '\r' | '\t' | '"'
2102            )
2103        })
2104        .count();
2105
2106    let total_chars = content.len();
2107    let structural_ratio = structural_chars as f64 / total_chars as f64;
2108
2109    // A skeleton file is >80% structural characters. Normal sessions have lots of
2110    // text content (messages, code, etc.) so the ratio is much lower.
2111    if structural_ratio < 0.80 {
2112        return false;
2113    }
2114
2115    // Additionally verify: parse as JSON and check that requests array is empty or
2116    // contains only empty objects
2117    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
2118        // Check if requests exist and are all empty
2119        if let Some(requests) = parsed.get("requests").and_then(|r| r.as_array()) {
2120            let all_empty = requests.iter().all(|req| {
2121                // A skeleton request has no "message" text or empty message content
2122                let msg = req
2123                    .get("message")
2124                    .and_then(|m| m.get("text"))
2125                    .and_then(|t| t.as_str());
2126                msg.map_or(true, |s| s.is_empty())
2127            });
2128            return all_empty;
2129        }
2130        // No requests array at all — also skeleton-like
2131        return true;
2132    }
2133
2134    // Couldn't parse but high structural ratio — still likely skeleton
2135    structural_ratio > 0.85
2136}
2137
2138/// Convert a skeleton .json file to a valid minimal .jsonl file.
2139/// Preserves title and timestamp from the index entry if available.
2140/// The original .json file is renamed to `.json.corrupt` (non-destructive).
2141/// Returns the path to the new .jsonl file, or None if conversion was skipped.
2142pub fn convert_skeleton_json_to_jsonl(
2143    json_path: &Path,
2144    title: Option<&str>,
2145    last_message_date: Option<i64>,
2146) -> Result<Option<PathBuf>> {
2147    let content = std::fs::read_to_string(json_path)
2148        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2149
2150    if !is_skeleton_json(&content) {
2151        return Ok(None);
2152    }
2153
2154    let session_id = json_path
2155        .file_stem()
2156        .and_then(|s| s.to_str())
2157        .unwrap_or("unknown")
2158        .to_string();
2159
2160    let title = title.unwrap_or("Recovered Session");
2161    let now = std::time::SystemTime::now()
2162        .duration_since(std::time::UNIX_EPOCH)
2163        .unwrap_or_default()
2164        .as_millis() as i64;
2165    let timestamp = last_message_date.unwrap_or(now);
2166
2167    // Build a valid minimal kind:0 JSONL entry
2168    let jsonl_entry = serde_json::json!({
2169        "kind": 0,
2170        "v": {
2171            "sessionId": session_id,
2172            "title": title,
2173            "lastMessageDate": timestamp,
2174            "requests": [],
2175            "version": 4,
2176            "hasPendingEdits": false,
2177            "pendingRequests": [],
2178            "inputState": {
2179                "attachments": [],
2180                "mode": { "id": "agent", "kind": "agent" },
2181                "inputText": "",
2182                "selections": [],
2183                "contrib": { "chatDynamicVariableModel": [] }
2184            },
2185            "responderUsername": "GitHub Copilot",
2186            "isImported": false,
2187            "initialLocation": "panel"
2188        }
2189    });
2190
2191    let jsonl_path = json_path.with_extension("jsonl");
2192    let corrupt_path = json_path.with_extension("json.corrupt");
2193
2194    // Don't overwrite an existing .jsonl
2195    if jsonl_path.exists() {
2196        // Just rename the skeleton to .corrupt
2197        std::fs::rename(json_path, &corrupt_path)?;
2198        return Ok(None);
2199    }
2200
2201    // Write the new .jsonl file
2202    std::fs::write(
2203        &jsonl_path,
2204        serde_json::to_string(&jsonl_entry)
2205            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?,
2206    )?;
2207
2208    // Rename original to .json.corrupt (non-destructive)
2209    std::fs::rename(json_path, &corrupt_path)?;
2210
2211    Ok(Some(jsonl_path))
2212}
2213
2214/// Fix cancelled `modelState` values in a compacted (single-line) JSONL session file.
2215///
2216/// VS Code determines `lastResponseState` from the file content, not the index.
2217/// If the last request's `modelState.value` is `2` (Cancelled) or missing entirely,
2218/// VS Code refuses to load the session. This function:
2219/// 1. Finds the last request in the `requests` array
2220/// 2. If `modelState.value` is `2` (Cancelled), changes it to `1` (Complete)
2221/// 3. If `modelState` is missing entirely, adds `{"value":1,"completedAt":<now>}`
2222///
2223/// Returns `true` if the file was modified.
2224pub fn fix_cancelled_model_state(path: &Path) -> Result<bool> {
2225    let content = std::fs::read_to_string(path)
2226        .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2227
2228    let lines: Vec<&str> = content.lines().collect();
2229
2230    if lines.is_empty() {
2231        return Ok(false);
2232    }
2233
2234    // For multi-line JSONL, we need to scan all lines to find the LAST modelState
2235    // delta for the highest request index. For single-line (compacted), we modify
2236    // the kind:0 snapshot directly.
2237    if lines.len() == 1 {
2238        // Compacted single-line JSONL: modify the kind:0 snapshot
2239        let mut entry: serde_json::Value = serde_json::from_str(lines[0].trim())
2240            .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))?;
2241
2242        let is_kind_0 = entry
2243            .get("kind")
2244            .and_then(|k| k.as_u64())
2245            .map(|k| k == 0)
2246            .unwrap_or(false);
2247
2248        if !is_kind_0 {
2249            return Ok(false);
2250        }
2251
2252        let requests = match entry
2253            .get_mut("v")
2254            .and_then(|v| v.get_mut("requests"))
2255            .and_then(|r| r.as_array_mut())
2256        {
2257            Some(r) if !r.is_empty() => r,
2258            _ => return Ok(false),
2259        };
2260
2261        let last_req = requests.last_mut().unwrap();
2262        let model_state = last_req.get("modelState");
2263
2264        let needs_fix = match model_state {
2265            Some(ms) => ms.get("value").and_then(|v| v.as_u64()) == Some(2),
2266            None => true, // Missing modelState = never completed
2267        };
2268
2269        if !needs_fix {
2270            return Ok(false);
2271        }
2272
2273        let now = std::time::SystemTime::now()
2274            .duration_since(std::time::UNIX_EPOCH)
2275            .unwrap_or_default()
2276            .as_millis() as u64;
2277
2278        last_req.as_object_mut().unwrap().insert(
2279            "modelState".to_string(),
2280            serde_json::json!({"value": 1, "completedAt": now}),
2281        );
2282
2283        let patched = serde_json::to_string(&entry)
2284            .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?;
2285        std::fs::write(path, patched)?;
2286        return Ok(true);
2287    }
2288
2289    // Multi-line JSONL: find the highest request index referenced across all lines,
2290    // then check if the last modelState delta for that index has value=2 or is missing.
2291    // If so, append a corrective delta.
2292    let mut highest_req_idx: Option<usize> = None;
2293    let mut last_model_state_value: Option<u64> = None;
2294
2295    // Check kind:0 snapshot for request count
2296    if let Ok(first_entry) = serde_json::from_str::<serde_json::Value>(lines[0].trim()) {
2297        if let Some(requests) = first_entry
2298            .get("v")
2299            .and_then(|v| v.get("requests"))
2300            .and_then(|r| r.as_array())
2301        {
2302            if !requests.is_empty() {
2303                let last_idx = requests.len() - 1;
2304                highest_req_idx = Some(last_idx);
2305                // Check modelState in the snapshot's last request
2306                if let Some(ms) = requests[last_idx].get("modelState") {
2307                    last_model_state_value = ms.get("value").and_then(|v| v.as_u64());
2308                }
2309            }
2310        }
2311    }
2312
2313    // Scan deltas for higher request indices and modelState updates
2314    static REQ_IDX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#""k":\["requests",(\d+)"#).unwrap());
2315
2316    for line in &lines[1..] {
2317        if let Some(caps) = REQ_IDX_RE.captures(line) {
2318            if let Ok(idx) = caps[1].parse::<usize>() {
2319                if highest_req_idx.is_none() || idx > highest_req_idx.unwrap() {
2320                    highest_req_idx = Some(idx);
2321                    last_model_state_value = None; // Reset for new highest
2322                }
2323                // Track modelState for the highest request index
2324                if Some(idx) == highest_req_idx && line.contains("\"modelState\"") {
2325                    if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line.trim()) {
2326                        last_model_state_value = entry
2327                            .get("v")
2328                            .and_then(|v| v.get("value"))
2329                            .and_then(|v| v.as_u64());
2330                    }
2331                }
2332            }
2333        }
2334    }
2335
2336    let req_idx = match highest_req_idx {
2337        Some(idx) => idx,
2338        None => return Ok(false),
2339    };
2340
2341    let needs_fix = match last_model_state_value {
2342        Some(2) => true, // Cancelled
2343        None => true,    // Missing (never completed)
2344        _ => false,      // Already complete or other valid state
2345    };
2346
2347    if !needs_fix {
2348        return Ok(false);
2349    }
2350
2351    let now = std::time::SystemTime::now()
2352        .duration_since(std::time::UNIX_EPOCH)
2353        .unwrap_or_default()
2354        .as_millis() as u64;
2355
2356    let fix_delta = format!(
2357        "\n{{\"kind\":1,\"k\":[\"requests\",{},\"modelState\"],\"v\":{{\"value\":1,\"completedAt\":{}}}}}",
2358        req_idx, now
2359    );
2360
2361    use std::io::Write;
2362    let mut file = std::fs::OpenOptions::new().append(true).open(path)?;
2363    file.write_all(fix_delta.as_bytes())?;
2364
2365    Ok(true)
2366}
2367
2368/// Repair workspace sessions: compact large JSONL files and fix the index.
2369/// Returns (compacted_count, index_fixed_count).
2370pub fn repair_workspace_sessions(
2371    workspace_id: &str,
2372    chat_sessions_dir: &Path,
2373    force: bool,
2374) -> Result<(usize, usize)> {
2375    let db_path = get_workspace_storage_db(workspace_id)?;
2376
2377    if !db_path.exists() {
2378        return Err(CsmError::WorkspaceNotFound(format!(
2379            "Database not found: {}",
2380            db_path.display()
2381        )));
2382    }
2383
2384    if !force && is_vscode_running() {
2385        return Err(CsmError::VSCodeRunning);
2386    }
2387
2388    let mut compacted = 0;
2389    let mut fields_fixed = 0;
2390
2391    if chat_sessions_dir.exists() {
2392        // Pass 1: Compact large JSONL files and fix missing fields
2393        for entry in std::fs::read_dir(chat_sessions_dir)? {
2394            let entry = entry?;
2395            let path = entry.path();
2396            if path.extension().is_some_and(|e| e == "jsonl") {
2397                let metadata = std::fs::metadata(&path)?;
2398                let size_mb = metadata.len() / (1024 * 1024);
2399
2400                let content = std::fs::read_to_string(&path)
2401                    .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2402                let line_count = content.lines().count();
2403
2404                if line_count > 1 {
2405                    // Compact multi-line JSONL (has operations to replay)
2406                    let stem = path
2407                        .file_stem()
2408                        .map(|s| s.to_string_lossy().to_string())
2409                        .unwrap_or_default();
2410                    println!(
2411                        "   Compacting {} ({} lines, {}MB)...",
2412                        stem, line_count, size_mb
2413                    );
2414
2415                    match compact_session_jsonl(&path) {
2416                        Ok(backup_path) => {
2417                            let new_size = std::fs::metadata(&path)
2418                                .map(|m| m.len() / (1024 * 1024))
2419                                .unwrap_or(0);
2420                            println!(
2421                                "   [OK] Compacted: {}MB -> {}MB (backup: {})",
2422                                size_mb,
2423                                new_size,
2424                                backup_path
2425                                    .file_name()
2426                                    .unwrap_or_default()
2427                                    .to_string_lossy()
2428                            );
2429                            compacted += 1;
2430                        }
2431                        Err(e) => {
2432                            println!("   [WARN] Failed to compact {}: {}", stem, e);
2433                        }
2434                    }
2435                } else {
2436                    // Single-line JSONL — check for missing VS Code fields
2437                    if let Some(first_line) = content.lines().next() {
2438                        if let Ok(mut obj) = serde_json::from_str::<serde_json::Value>(first_line) {
2439                            let is_kind_0 = obj
2440                                .get("kind")
2441                                .and_then(|k| k.as_u64())
2442                                .map(|k| k == 0)
2443                                .unwrap_or(false);
2444
2445                            if is_kind_0 {
2446                                if let Some(v) = obj.get("v") {
2447                                    let missing = !v.get("hasPendingEdits").is_some()
2448                                        || !v.get("pendingRequests").is_some()
2449                                        || !v.get("inputState").is_some()
2450                                        || !v.get("sessionId").is_some();
2451
2452                                    if missing {
2453                                        let session_id = path
2454                                            .file_stem()
2455                                            .and_then(|s| s.to_str())
2456                                            .map(|s| s.to_string());
2457                                        if let Some(v_mut) = obj.get_mut("v") {
2458                                            ensure_vscode_compat_fields(
2459                                                v_mut,
2460                                                session_id.as_deref(),
2461                                            );
2462                                        }
2463                                        let patched = serde_json::to_string(&obj).map_err(|e| {
2464                                            CsmError::InvalidSessionFormat(format!(
2465                                                "Failed to serialize: {}",
2466                                                e
2467                                            ))
2468                                        })?;
2469                                        std::fs::write(&path, &patched)?;
2470                                        let stem = path
2471                                            .file_stem()
2472                                            .map(|s| s.to_string_lossy().to_string())
2473                                            .unwrap_or_default();
2474                                        println!("   [OK] Fixed missing VS Code fields: {}", stem);
2475                                        fields_fixed += 1;
2476                                    }
2477                                }
2478                            }
2479                        }
2480                    }
2481                }
2482            }
2483        }
2484    }
2485
2486    // Pass 1.5: Convert skeleton .json files to valid .jsonl.
2487    // Skeleton files are legacy .json files where all data has been stripped,
2488    // leaving only structural characters ({}, [], whitespace). We convert them
2489    // to valid minimal .jsonl, preserving title/timestamp from the index,
2490    // and rename the original to .json.corrupt (non-destructive).
2491    let mut skeletons_converted = 0;
2492    if chat_sessions_dir.exists() {
2493        // Read current index to get titles/timestamps for converted sessions
2494        let index_entries: std::collections::HashMap<String, (String, Option<i64>)> =
2495            if let Ok(index) = read_chat_session_index(&db_path) {
2496                index
2497                    .entries
2498                    .iter()
2499                    .map(|(id, e)| (id.clone(), (e.title.clone(), Some(e.last_message_date))))
2500                    .collect()
2501            } else {
2502                std::collections::HashMap::new()
2503            };
2504
2505        // Collect .json files that don't have a corresponding .jsonl
2506        let mut jsonl_stems: HashSet<String> = HashSet::new();
2507        for entry in std::fs::read_dir(chat_sessions_dir)? {
2508            let entry = entry?;
2509            let path = entry.path();
2510            if path.extension().is_some_and(|e| e == "jsonl") {
2511                if let Some(stem) = path.file_stem() {
2512                    jsonl_stems.insert(stem.to_string_lossy().to_string());
2513                }
2514            }
2515        }
2516
2517        for entry in std::fs::read_dir(chat_sessions_dir)? {
2518            let entry = entry?;
2519            let path = entry.path();
2520            if path.extension().is_some_and(|e| e == "json")
2521                && !path.to_string_lossy().ends_with(".bak")
2522                && !path.to_string_lossy().ends_with(".corrupt")
2523            {
2524                let stem = path
2525                    .file_stem()
2526                    .map(|s| s.to_string_lossy().to_string())
2527                    .unwrap_or_default();
2528
2529                // Skip if .jsonl already exists
2530                if jsonl_stems.contains(&stem) {
2531                    continue;
2532                }
2533
2534                let (title, timestamp) = index_entries
2535                    .get(&stem)
2536                    .map(|(t, ts)| (t.as_str(), *ts))
2537                    .unwrap_or(("Recovered Session", None));
2538
2539                match convert_skeleton_json_to_jsonl(&path, Some(title), timestamp) {
2540                    Ok(Some(jsonl_path)) => {
2541                        println!(
2542                            "   [OK] Converted skeleton .json → .jsonl: {} (\"{}\")",
2543                            stem, title
2544                        );
2545                        // Track the new .jsonl so subsequent passes process it
2546                        jsonl_stems.insert(stem);
2547                        skeletons_converted += 1;
2548                        let _ = jsonl_path; // used implicitly via jsonl_stems
2549                    }
2550                    Ok(None) => {} // Not a skeleton or skipped
2551                    Err(e) => {
2552                        println!("   [WARN] Failed to convert skeleton {}: {}", stem, e);
2553                    }
2554                }
2555            }
2556        }
2557    }
2558
2559    // Pass 2: Fix cancelled modelState in all JSONL files.
2560    // VS Code reads modelState from file content (not the index) to determine
2561    // lastResponseState. If the last request has modelState.value=2 (Cancelled)
2562    // or is missing entirely, VS Code refuses to load the session.
2563    let mut cancelled_fixed = 0;
2564    if chat_sessions_dir.exists() {
2565        for entry in std::fs::read_dir(chat_sessions_dir)? {
2566            let entry = entry?;
2567            let path = entry.path();
2568            if path.extension().is_some_and(|e| e == "jsonl") {
2569                match fix_cancelled_model_state(&path) {
2570                    Ok(true) => {
2571                        let stem = path
2572                            .file_stem()
2573                            .map(|s| s.to_string_lossy().to_string())
2574                            .unwrap_or_default();
2575                        println!("   [OK] Fixed cancelled modelState: {}", stem);
2576                        cancelled_fixed += 1;
2577                    }
2578                    Ok(false) => {} // No fix needed
2579                    Err(e) => {
2580                        let stem = path
2581                            .file_stem()
2582                            .map(|s| s.to_string_lossy().to_string())
2583                            .unwrap_or_default();
2584                        println!("   [WARN] Failed to fix modelState for {}: {}", stem, e);
2585                    }
2586                }
2587            }
2588        }
2589    }
2590
2591    // Pass 3: Rebuild the index with correct metadata
2592    let (index_fixed, _) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
2593
2594    if fields_fixed > 0 {
2595        println!(
2596            "   [OK] Injected missing VS Code fields into {} session(s)",
2597            fields_fixed
2598        );
2599    }
2600    if skeletons_converted > 0 {
2601        println!(
2602            "   [OK] Converted {} skeleton .json file(s) to .jsonl",
2603            skeletons_converted
2604        );
2605    }
2606    if cancelled_fixed > 0 {
2607        println!(
2608            "   [OK] Fixed cancelled modelState in {} session(s)",
2609            cancelled_fixed
2610        );
2611    }
2612
2613    Ok((compacted, index_fixed))
2614}