1use crate::error::{CsmError, Result};
6use crate::models::{
7 ChatRequest, ChatSession, ChatSessionIndex, ChatSessionIndexEntry, ChatSessionTiming,
8};
9use crate::workspace::{get_empty_window_sessions_path, get_workspace_storage_path};
10use once_cell::sync::Lazy;
11use regex::Regex;
12use rusqlite::Connection;
13use std::collections::HashSet;
14use std::path::{Path, PathBuf};
15use sysinfo::System;
16
17#[derive(Debug, Clone)]
19pub struct SessionIssue {
20 pub session_id: String,
22 pub kind: SessionIssueKind,
24 pub detail: String,
26}
27
28#[derive(Debug, Clone, PartialEq)]
30pub enum SessionIssueKind {
31 MultiLineJsonl,
33 ConcatenatedJsonl,
35 CancelledState,
37 CancelledModelState,
39 OrphanedSession,
41 StaleIndexEntry,
43 MissingCompatFields,
45 DuplicateFormat,
47 SkeletonJson,
49}
50
51impl std::fmt::Display for SessionIssueKind {
52 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53 match self {
54 SessionIssueKind::MultiLineJsonl => write!(f, "multi-line JSONL"),
55 SessionIssueKind::ConcatenatedJsonl => write!(f, "concatenated JSONL"),
56 SessionIssueKind::CancelledState => write!(f, "cancelled state"),
57 SessionIssueKind::CancelledModelState => write!(f, "cancelled modelState in file"),
58 SessionIssueKind::OrphanedSession => write!(f, "orphaned session"),
59 SessionIssueKind::StaleIndexEntry => write!(f, "stale index entry"),
60 SessionIssueKind::MissingCompatFields => write!(f, "missing compat fields"),
61 SessionIssueKind::DuplicateFormat => write!(f, "duplicate .json/.jsonl"),
62 SessionIssueKind::SkeletonJson => write!(f, "skeleton .json (corrupt)"),
63 }
64 }
65}
66
67#[derive(Debug, Clone, Default)]
69pub struct WorkspaceDiagnosis {
70 pub project_path: Option<String>,
72 pub workspace_hash: String,
74 pub sessions_on_disk: usize,
76 pub sessions_in_index: usize,
78 pub issues: Vec<SessionIssue>,
80}
81
82impl WorkspaceDiagnosis {
83 pub fn is_healthy(&self) -> bool {
84 self.issues.is_empty()
85 }
86
87 pub fn issue_count_by_kind(&self, kind: &SessionIssueKind) -> usize {
88 self.issues.iter().filter(|i| &i.kind == kind).count()
89 }
90}
91
92pub fn diagnose_workspace_sessions(
95 workspace_id: &str,
96 chat_sessions_dir: &Path,
97) -> Result<WorkspaceDiagnosis> {
98 let mut diagnosis = WorkspaceDiagnosis {
99 workspace_hash: workspace_id.to_string(),
100 ..Default::default()
101 };
102
103 if !chat_sessions_dir.exists() {
104 return Ok(diagnosis);
105 }
106
107 let mut jsonl_sessions: HashSet<String> = HashSet::new();
109 let mut json_sessions: HashSet<String> = HashSet::new();
110 let mut all_session_ids: HashSet<String> = HashSet::new();
111
112 for entry in std::fs::read_dir(chat_sessions_dir)? {
113 let entry = entry?;
114 let path = entry.path();
115 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
116 let stem = path
117 .file_stem()
118 .map(|s| s.to_string_lossy().to_string())
119 .unwrap_or_default();
120
121 match ext {
122 "jsonl" => {
123 jsonl_sessions.insert(stem.clone());
124 all_session_ids.insert(stem);
125 }
126 "json" if !path.to_string_lossy().ends_with(".bak") => {
127 json_sessions.insert(stem.clone());
128 all_session_ids.insert(stem);
129 }
130 _ => {}
131 }
132 }
133 diagnosis.sessions_on_disk = all_session_ids.len();
134
135 for id in &jsonl_sessions {
137 if json_sessions.contains(id) {
138 diagnosis.issues.push(SessionIssue {
139 session_id: id.clone(),
140 kind: SessionIssueKind::DuplicateFormat,
141 detail: format!("Both {id}.json and {id}.jsonl exist"),
142 });
143 }
144 }
145
146 for id in &jsonl_sessions {
148 let path = chat_sessions_dir.join(format!("{id}.jsonl"));
149 if let Ok(content) = std::fs::read_to_string(&path) {
150 let line_count = content.lines().count();
151
152 if line_count > 1 {
153 let size_mb = content.len() / (1024 * 1024);
154 diagnosis.issues.push(SessionIssue {
155 session_id: id.clone(),
156 kind: SessionIssueKind::MultiLineJsonl,
157 detail: format!("{line_count} lines, ~{size_mb} MB — needs compaction"),
158 });
159 }
160
161 if let Some(first_line) = content.lines().next() {
163 if first_line.contains("}{\"kind\":") {
164 diagnosis.issues.push(SessionIssue {
165 session_id: id.clone(),
166 kind: SessionIssueKind::ConcatenatedJsonl,
167 detail: "First line has concatenated JSON objects".to_string(),
168 });
169 }
170 }
171
172 if line_count == 1 {
174 if let Some(first_line) = content.lines().next() {
175 if let Ok(obj) = serde_json::from_str::<serde_json::Value>(first_line) {
176 let is_kind_0 = obj
177 .get("kind")
178 .and_then(|k| k.as_u64())
179 .map(|k| k == 0)
180 .unwrap_or(false);
181
182 if is_kind_0 {
183 if let Some(v) = obj.get("v") {
184 let missing_fields: Vec<&str> = [
185 "hasPendingEdits",
186 "pendingRequests",
187 "inputState",
188 "sessionId",
189 "version",
190 ]
191 .iter()
192 .filter(|f| v.get(**f).is_none())
193 .copied()
194 .collect();
195
196 if !missing_fields.is_empty() {
197 diagnosis.issues.push(SessionIssue {
198 session_id: id.clone(),
199 kind: SessionIssueKind::MissingCompatFields,
200 detail: format!("Missing: {}", missing_fields.join(", ")),
201 });
202 }
203
204 if let Some(requests) = v.get("requests").and_then(|r| r.as_array())
206 {
207 if let Some(last_req) = requests.last() {
208 let model_state_value = last_req
209 .get("modelState")
210 .and_then(|ms| ms.get("value"))
211 .and_then(|v| v.as_u64());
212 match model_state_value {
213 Some(2) => {
214 diagnosis.issues.push(SessionIssue {
215 session_id: id.clone(),
216 kind: SessionIssueKind::CancelledModelState,
217 detail: "Last request modelState.value=2 (Cancelled) in file content".to_string(),
218 });
219 }
220 None => {
221 diagnosis.issues.push(SessionIssue {
222 session_id: id.clone(),
223 kind: SessionIssueKind::CancelledModelState,
224 detail: "Last request missing modelState in file content".to_string(),
225 });
226 }
227 _ => {} }
229 }
230 }
231 }
232 }
233 }
234 }
235 }
236 }
237 }
238
239 for id in &json_sessions {
241 if jsonl_sessions.contains(id) {
243 continue;
244 }
245 let path = chat_sessions_dir.join(format!("{id}.json"));
246 if let Ok(content) = std::fs::read_to_string(&path) {
247 if is_skeleton_json(&content) {
248 diagnosis.issues.push(SessionIssue {
249 session_id: id.clone(),
250 kind: SessionIssueKind::SkeletonJson,
251 detail: format!(
252 "Legacy .json is corrupt — only structural chars remain ({} bytes)",
253 content.len()
254 ),
255 });
256 }
257 }
258 }
259
260 let db_path = get_workspace_storage_db(workspace_id)?;
262 if db_path.exists() {
263 if let Ok(index) = read_chat_session_index(&db_path) {
264 diagnosis.sessions_in_index = index.entries.len();
265
266 for (id, _entry) in &index.entries {
268 if !all_session_ids.contains(id) {
269 diagnosis.issues.push(SessionIssue {
270 session_id: id.clone(),
271 kind: SessionIssueKind::StaleIndexEntry,
272 detail: "In index but no file on disk".to_string(),
273 });
274 }
275 }
276
277 for (id, entry) in &index.entries {
279 if entry.last_response_state == 2 {
280 diagnosis.issues.push(SessionIssue {
281 session_id: id.clone(),
282 kind: SessionIssueKind::CancelledState,
283 detail: "lastResponseState=2 (Cancelled) — blocks VS Code loading"
284 .to_string(),
285 });
286 }
287 }
288
289 let indexed_ids: HashSet<&String> = index.entries.keys().collect();
291 for id in &all_session_ids {
292 if !indexed_ids.contains(id) {
293 diagnosis.issues.push(SessionIssue {
294 session_id: id.clone(),
295 kind: SessionIssueKind::OrphanedSession,
296 detail: "File on disk but not in VS Code index".to_string(),
297 });
298 }
299 }
300 }
301 }
302
303 Ok(diagnosis)
304}
305
306static UNICODE_ESCAPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\\u[0-9a-fA-F]{4}").unwrap());
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq)]
311pub enum VsCodeSessionFormat {
312 LegacyJson,
315 JsonLines,
318}
319
320#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
322pub enum SessionSchemaVersion {
323 V1 = 1,
325 V2 = 2,
327 V3 = 3,
329 Unknown = 0,
331}
332
333impl SessionSchemaVersion {
334 pub fn from_version(v: u32) -> Self {
336 match v {
337 1 => Self::V1,
338 2 => Self::V2,
339 3 => Self::V3,
340 _ => Self::Unknown,
341 }
342 }
343
344 pub fn version_number(&self) -> u32 {
346 match self {
347 Self::V1 => 1,
348 Self::V2 => 2,
349 Self::V3 => 3,
350 Self::Unknown => 0,
351 }
352 }
353
354 pub fn description(&self) -> &'static str {
356 match self {
357 Self::V1 => "v1 (basic)",
358 Self::V2 => "v2 (extended metadata)",
359 Self::V3 => "v3 (full structure)",
360 Self::Unknown => "unknown",
361 }
362 }
363}
364
365impl std::fmt::Display for SessionSchemaVersion {
366 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367 write!(f, "{}", self.description())
368 }
369}
370
371#[derive(Debug, Clone)]
373pub struct SessionFormatInfo {
374 pub format: VsCodeSessionFormat,
376 pub schema_version: SessionSchemaVersion,
378 pub confidence: f32,
380 pub detection_method: &'static str,
382}
383
384impl VsCodeSessionFormat {
385 pub fn from_path(path: &Path) -> Self {
387 match path.extension().and_then(|e| e.to_str()) {
388 Some("jsonl") => Self::JsonLines,
389 _ => Self::LegacyJson,
390 }
391 }
392
393 pub fn from_content(content: &str) -> Self {
395 let trimmed = content.trim();
396
397 if trimmed.starts_with("{\"kind\":") || trimmed.starts_with("{ \"kind\":") {
399 return Self::JsonLines;
400 }
401
402 let mut json_object_lines = 0;
404 let mut total_non_empty_lines = 0;
405
406 for line in trimmed.lines().take(10) {
407 let line = line.trim();
408 if line.is_empty() {
409 continue;
410 }
411 total_non_empty_lines += 1;
412
413 if line.starts_with('{') && line.contains("\"kind\"") {
415 json_object_lines += 1;
416 }
417 }
418
419 if json_object_lines >= 2
421 || (json_object_lines == 1 && total_non_empty_lines == 1 && trimmed.contains("\n{"))
422 {
423 return Self::JsonLines;
424 }
425
426 if trimmed.starts_with('{') && trimmed.ends_with('}') {
428 if trimmed.contains("\"sessionId\"")
430 || trimmed.contains("\"creationDate\"")
431 || trimmed.contains("\"requests\"")
432 {
433 return Self::LegacyJson;
434 }
435 }
436
437 Self::LegacyJson
439 }
440
441 pub fn min_vscode_version(&self) -> &'static str {
443 match self {
444 Self::LegacyJson => "1.0.0",
445 Self::JsonLines => "1.109.0",
446 }
447 }
448
449 pub fn description(&self) -> &'static str {
451 match self {
452 Self::LegacyJson => "Legacy JSON (single object)",
453 Self::JsonLines => "JSON Lines (event-sourced, VS Code 1.109.0+)",
454 }
455 }
456
457 pub fn short_name(&self) -> &'static str {
459 match self {
460 Self::LegacyJson => "json",
461 Self::JsonLines => "jsonl",
462 }
463 }
464}
465
466impl std::fmt::Display for VsCodeSessionFormat {
467 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
468 write!(f, "{}", self.description())
469 }
470}
471
472fn sanitize_json_unicode(content: &str) -> String {
475 let mut result = String::with_capacity(content.len());
477 let mut last_end = 0;
478
479 let matches: Vec<_> = UNICODE_ESCAPE_RE.find_iter(content).collect();
481
482 for (i, mat) in matches.iter().enumerate() {
483 let start = mat.start();
484 let end = mat.end();
485
486 result.push_str(&content[last_end..start]);
488
489 let hex_str = &mat.as_str()[2..]; if let Ok(code_point) = u16::from_str_radix(hex_str, 16) {
492 if (0xD800..=0xDBFF).contains(&code_point) {
494 let is_valid_pair = if let Some(next_mat) = matches.get(i + 1) {
496 if next_mat.start() == end {
498 let next_hex = &next_mat.as_str()[2..];
499 if let Ok(next_cp) = u16::from_str_radix(next_hex, 16) {
500 (0xDC00..=0xDFFF).contains(&next_cp)
501 } else {
502 false
503 }
504 } else {
505 false
506 }
507 } else {
508 false
509 };
510
511 if is_valid_pair {
512 result.push_str(mat.as_str());
514 } else {
515 result.push_str("\\uFFFD");
517 }
518 }
519 else if (0xDC00..=0xDFFF).contains(&code_point) {
521 let is_valid_pair = if i > 0 {
523 if let Some(prev_mat) = matches.get(i - 1) {
524 if prev_mat.end() == start {
526 let prev_hex = &prev_mat.as_str()[2..];
527 if let Ok(prev_cp) = u16::from_str_radix(prev_hex, 16) {
528 (0xD800..=0xDBFF).contains(&prev_cp)
529 } else {
530 false
531 }
532 } else {
533 false
534 }
535 } else {
536 false
537 }
538 } else {
539 false
540 };
541
542 if is_valid_pair {
543 result.push_str(mat.as_str());
545 } else {
546 result.push_str("\\uFFFD");
548 }
549 }
550 else {
552 result.push_str(mat.as_str());
553 }
554 } else {
555 result.push_str(mat.as_str());
557 }
558 last_end = end;
559 }
560
561 result.push_str(&content[last_end..]);
563 result
564}
565
566pub fn parse_session_json(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
568 match serde_json::from_str::<ChatSession>(content) {
569 Ok(session) => Ok(session),
570 Err(e) => {
571 if e.to_string().contains("surrogate") || e.to_string().contains("escape") {
573 let sanitized = sanitize_json_unicode(content);
574 serde_json::from_str::<ChatSession>(&sanitized)
575 } else {
576 Err(e)
577 }
578 }
579 }
580}
581
582#[derive(Debug, Clone, Copy, PartialEq, Eq)]
584enum JsonlKind {
585 Initial = 0,
587 Delta = 1,
589 ArraySplice = 2,
592}
593
594pub fn parse_session_jsonl(content: &str) -> std::result::Result<ChatSession, serde_json::Error> {
600 let content = split_concatenated_jsonl(content);
602
603 let mut session = ChatSession {
604 version: 3,
605 session_id: None,
606 creation_date: 0,
607 last_message_date: 0,
608 is_imported: false,
609 initial_location: "panel".to_string(),
610 custom_title: None,
611 requester_username: None,
612 requester_avatar_icon_uri: None,
613 responder_username: None,
614 responder_avatar_icon_uri: None,
615 requests: Vec::new(),
616 };
617
618 for line in content.lines() {
619 let line = line.trim();
620 if line.is_empty() {
621 continue;
622 }
623
624 let entry: serde_json::Value = match serde_json::from_str(line) {
626 Ok(v) => v,
627 Err(_) => {
628 let sanitized = sanitize_json_unicode(line);
630 serde_json::from_str(&sanitized)?
631 }
632 };
633
634 let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(0);
635
636 match kind {
637 0 => {
638 if let Some(v) = entry.get("v") {
640 if let Some(version) = v.get("version").and_then(|x| x.as_u64()) {
642 session.version = version as u32;
643 }
644 if let Some(sid) = v.get("sessionId").and_then(|x| x.as_str()) {
646 session.session_id = Some(sid.to_string());
647 }
648 if let Some(cd) = v.get("creationDate").and_then(|x| x.as_i64()) {
650 session.creation_date = cd;
651 }
652 if let Some(loc) = v.get("initialLocation").and_then(|x| x.as_str()) {
654 session.initial_location = loc.to_string();
655 }
656 if let Some(ru) = v.get("responderUsername").and_then(|x| x.as_str()) {
658 session.responder_username = Some(ru.to_string());
659 }
660 if let Some(title) = v.get("customTitle").and_then(|x| x.as_str()) {
662 session.custom_title = Some(title.to_string());
663 }
664 if let Some(imported) = v.get("isImported").and_then(|x| x.as_bool()) {
666 session.is_imported = imported;
667 }
668 if let Some(requests) = v.get("requests") {
670 if let Ok(reqs) =
671 serde_json::from_value::<Vec<ChatRequest>>(requests.clone())
672 {
673 session.requests = reqs;
674 if let Some(latest_ts) =
676 session.requests.iter().filter_map(|r| r.timestamp).max()
677 {
678 session.last_message_date = latest_ts;
679 }
680 }
681 }
682 if session.last_message_date == 0 {
684 session.last_message_date = session.creation_date;
685 }
686 }
687 }
688 1 => {
689 if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
691 if let Some(keys_arr) = keys.as_array() {
692 if keys_arr.len() == 1 {
694 if let Some(key) = keys_arr[0].as_str() {
695 match key {
696 "customTitle" => {
697 if let Some(title) = value.as_str() {
698 session.custom_title = Some(title.to_string());
699 }
700 }
701 "lastMessageDate" => {
702 if let Some(date) = value.as_i64() {
703 session.last_message_date = date;
704 }
705 }
706 "hasPendingEdits" | "isImported" => {
707 }
709 _ => {} }
711 }
712 }
713 else if keys_arr.len() == 3 {
715 if let (Some("requests"), Some(idx), Some(field)) = (
716 keys_arr[0].as_str(),
717 keys_arr[1].as_u64().map(|i| i as usize),
718 keys_arr[2].as_str(),
719 ) {
720 if idx < session.requests.len() {
721 match field {
722 "response" => {
723 session.requests[idx].response = Some(value.clone());
724 }
725 "result" => {
726 session.requests[idx].result = Some(value.clone());
727 }
728 "followups" => {
729 session.requests[idx].followups =
730 serde_json::from_value(value.clone()).ok();
731 }
732 "isCanceled" => {
733 session.requests[idx].is_canceled = value.as_bool();
734 }
735 "contentReferences" => {
736 session.requests[idx].content_references =
737 serde_json::from_value(value.clone()).ok();
738 }
739 "codeCitations" => {
740 session.requests[idx].code_citations =
741 serde_json::from_value(value.clone()).ok();
742 }
743 "modelState" | "modelId" | "agent" | "variableData" => {
744 }
747 _ => {} }
749 }
750 }
751 }
752 }
753 }
754 }
755 2 => {
756 if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
760 let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
761 if let Some(keys_arr) = keys.as_array() {
762 if keys_arr.len() == 1 {
764 if let Some("requests") = keys_arr[0].as_str() {
765 if let Some(items) = value.as_array() {
766 if let Some(idx) = splice_index {
767 session.requests.truncate(idx);
769 } else {
770 session.requests.clear();
772 }
773 for item in items {
774 if let Ok(req) =
775 serde_json::from_value::<ChatRequest>(item.clone())
776 {
777 session.requests.push(req);
778 }
779 }
780 if let Some(last_req) = session.requests.last() {
782 if let Some(ts) = last_req.timestamp {
783 session.last_message_date = ts;
784 }
785 }
786 }
787 }
788 }
789 else if keys_arr.len() == 3 {
791 if let (Some("requests"), Some(req_idx), Some(field)) = (
792 keys_arr[0].as_str(),
793 keys_arr[1].as_u64().map(|i| i as usize),
794 keys_arr[2].as_str(),
795 ) {
796 if req_idx < session.requests.len() {
797 match field {
798 "response" => {
799 if let Some(idx) = splice_index {
801 if let Some(existing) =
803 session.requests[req_idx].response.as_ref()
804 {
805 if let Some(existing_arr) = existing.as_array()
806 {
807 let mut new_arr: Vec<serde_json::Value> =
808 existing_arr
809 [..idx.min(existing_arr.len())]
810 .to_vec();
811 if let Some(new_items) = value.as_array() {
812 new_arr
813 .extend(new_items.iter().cloned());
814 }
815 session.requests[req_idx].response =
816 Some(serde_json::Value::Array(new_arr));
817 } else {
818 session.requests[req_idx].response =
819 Some(value.clone());
820 }
821 } else {
822 session.requests[req_idx].response =
823 Some(value.clone());
824 }
825 } else {
826 session.requests[req_idx].response =
828 Some(value.clone());
829 }
830 }
831 "contentReferences" => {
832 session.requests[req_idx].content_references =
833 serde_json::from_value(value.clone()).ok();
834 }
835 _ => {} }
837 }
838 }
839 }
840 }
841 }
842 }
843 _ => {} }
845 }
846
847 Ok(session)
848}
849
850pub fn is_session_file_extension(ext: &std::ffi::OsStr) -> bool {
852 ext == "json" || ext == "jsonl"
853}
854
855pub fn detect_session_format(content: &str) -> SessionFormatInfo {
857 let format = VsCodeSessionFormat::from_content(content);
858 let trimmed = content.trim();
859
860 let (schema_version, confidence, method) = match format {
862 VsCodeSessionFormat::JsonLines => {
863 if let Some(first_line) = trimmed.lines().next() {
865 if let Ok(entry) = serde_json::from_str::<serde_json::Value>(first_line) {
866 if let Some(v) = entry.get("v") {
867 if let Some(ver) = v.get("version").and_then(|x| x.as_u64()) {
868 (
869 SessionSchemaVersion::from_version(ver as u32),
870 0.95,
871 "jsonl-version-field",
872 )
873 } else {
874 (SessionSchemaVersion::V3, 0.7, "jsonl-default")
876 }
877 } else {
878 (SessionSchemaVersion::V3, 0.6, "jsonl-no-v-field")
879 }
880 } else {
881 (SessionSchemaVersion::Unknown, 0.3, "jsonl-parse-error")
882 }
883 } else {
884 (SessionSchemaVersion::Unknown, 0.2, "jsonl-empty")
885 }
886 }
887 VsCodeSessionFormat::LegacyJson => {
888 if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
890 if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
891 (
892 SessionSchemaVersion::from_version(ver as u32),
893 0.95,
894 "json-version-field",
895 )
896 } else {
897 if json.get("requests").is_some() && json.get("sessionId").is_some() {
899 (SessionSchemaVersion::V3, 0.8, "json-structure-inference")
900 } else if json.get("messages").is_some() {
901 (SessionSchemaVersion::V1, 0.7, "json-legacy-structure")
902 } else {
903 (SessionSchemaVersion::Unknown, 0.4, "json-unknown-structure")
904 }
905 }
906 } else {
907 let sanitized = sanitize_json_unicode(trimmed);
909 if let Ok(json) = serde_json::from_str::<serde_json::Value>(&sanitized) {
910 if let Some(ver) = json.get("version").and_then(|x| x.as_u64()) {
911 (
912 SessionSchemaVersion::from_version(ver as u32),
913 0.9,
914 "json-version-after-sanitize",
915 )
916 } else {
917 (SessionSchemaVersion::V3, 0.6, "json-default-after-sanitize")
918 }
919 } else {
920 (SessionSchemaVersion::Unknown, 0.2, "json-parse-error")
921 }
922 }
923 }
924 };
925
926 SessionFormatInfo {
927 format,
928 schema_version,
929 confidence,
930 detection_method: method,
931 }
932}
933
934pub fn parse_session_auto(
936 content: &str,
937) -> std::result::Result<(ChatSession, SessionFormatInfo), serde_json::Error> {
938 let format_info = detect_session_format(content);
939
940 let session = match format_info.format {
941 VsCodeSessionFormat::JsonLines => parse_session_jsonl(content)?,
942 VsCodeSessionFormat::LegacyJson => parse_session_json(content)?,
943 };
944
945 Ok((session, format_info))
946}
947
948pub fn parse_session_file(path: &Path) -> std::result::Result<ChatSession, serde_json::Error> {
950 let content = std::fs::read_to_string(path)
951 .map_err(|e| serde_json::Error::io(std::io::Error::other(e.to_string())))?;
952
953 let (session, _format_info) = parse_session_auto(&content)?;
955 Ok(session)
956}
957
958pub fn get_workspace_storage_db(workspace_id: &str) -> Result<PathBuf> {
960 let storage_path = get_workspace_storage_path()?;
961 Ok(storage_path.join(workspace_id).join("state.vscdb"))
962}
963
964pub fn read_chat_session_index(db_path: &Path) -> Result<ChatSessionIndex> {
966 let conn = Connection::open(db_path)?;
967
968 let result: std::result::Result<String, rusqlite::Error> = conn.query_row(
969 "SELECT value FROM ItemTable WHERE key = ?",
970 ["chat.ChatSessionStore.index"],
971 |row| row.get(0),
972 );
973
974 match result {
975 Ok(json_str) => serde_json::from_str(&json_str)
976 .map_err(|e| CsmError::InvalidSessionFormat(e.to_string())),
977 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(ChatSessionIndex::default()),
978 Err(e) => Err(CsmError::SqliteError(e)),
979 }
980}
981
982pub fn write_chat_session_index(db_path: &Path, index: &ChatSessionIndex) -> Result<()> {
984 let conn = Connection::open(db_path)?;
985 let json_str = serde_json::to_string(index)?;
986
987 let exists: bool = conn.query_row(
989 "SELECT COUNT(*) > 0 FROM ItemTable WHERE key = ?",
990 ["chat.ChatSessionStore.index"],
991 |row| row.get(0),
992 )?;
993
994 if exists {
995 conn.execute(
996 "UPDATE ItemTable SET value = ? WHERE key = ?",
997 [&json_str, "chat.ChatSessionStore.index"],
998 )?;
999 } else {
1000 conn.execute(
1001 "INSERT INTO ItemTable (key, value) VALUES (?, ?)",
1002 ["chat.ChatSessionStore.index", &json_str],
1003 )?;
1004 }
1005
1006 Ok(())
1007}
1008
1009pub fn add_session_to_index(
1011 db_path: &Path,
1012 session_id: &str,
1013 title: &str,
1014 last_message_date_ms: i64,
1015 _is_imported: bool,
1016 initial_location: &str,
1017 is_empty: bool,
1018) -> Result<()> {
1019 let mut index = read_chat_session_index(db_path)?;
1020
1021 index.entries.insert(
1022 session_id.to_string(),
1023 ChatSessionIndexEntry {
1024 session_id: session_id.to_string(),
1025 title: title.to_string(),
1026 last_message_date: last_message_date_ms,
1027 timing: Some(ChatSessionTiming {
1028 created: last_message_date_ms,
1029 last_request_started: Some(last_message_date_ms),
1030 last_request_ended: Some(last_message_date_ms),
1031 }),
1032 last_response_state: 1, initial_location: initial_location.to_string(),
1034 is_empty,
1035 is_imported: Some(_is_imported),
1036 has_pending_edits: Some(false),
1037 is_external: Some(false),
1038 },
1039 );
1040
1041 write_chat_session_index(db_path, &index)
1042}
1043
1044#[allow(dead_code)]
1046pub fn remove_session_from_index(db_path: &Path, session_id: &str) -> Result<bool> {
1047 let mut index = read_chat_session_index(db_path)?;
1048 let removed = index.entries.remove(session_id).is_some();
1049 if removed {
1050 write_chat_session_index(db_path, &index)?;
1051 }
1052 Ok(removed)
1053}
1054
1055pub fn sync_session_index(
1058 workspace_id: &str,
1059 chat_sessions_dir: &Path,
1060 force: bool,
1061) -> Result<(usize, usize)> {
1062 let db_path = get_workspace_storage_db(workspace_id)?;
1063
1064 if !db_path.exists() {
1065 return Err(CsmError::WorkspaceNotFound(format!(
1066 "Database not found: {}",
1067 db_path.display()
1068 )));
1069 }
1070
1071 if !force && is_vscode_running() {
1073 return Err(CsmError::VSCodeRunning);
1074 }
1075
1076 let mut index = read_chat_session_index(&db_path)?;
1078
1079 let mut files_on_disk: std::collections::HashSet<String> = std::collections::HashSet::new();
1081 if chat_sessions_dir.exists() {
1082 for entry in std::fs::read_dir(chat_sessions_dir)? {
1083 let entry = entry?;
1084 let path = entry.path();
1085 if path
1086 .extension()
1087 .map(is_session_file_extension)
1088 .unwrap_or(false)
1089 {
1090 if let Some(stem) = path.file_stem() {
1091 files_on_disk.insert(stem.to_string_lossy().to_string());
1092 }
1093 }
1094 }
1095 }
1096
1097 let stale_ids: Vec<String> = index
1099 .entries
1100 .keys()
1101 .filter(|id| !files_on_disk.contains(*id))
1102 .cloned()
1103 .collect();
1104
1105 let removed = stale_ids.len();
1106 for id in &stale_ids {
1107 index.entries.remove(id);
1108 }
1109
1110 let mut session_files: std::collections::HashMap<String, PathBuf> =
1113 std::collections::HashMap::new();
1114 for entry in std::fs::read_dir(chat_sessions_dir)? {
1115 let entry = entry?;
1116 let path = entry.path();
1117 if path
1118 .extension()
1119 .map(is_session_file_extension)
1120 .unwrap_or(false)
1121 {
1122 if let Some(stem) = path.file_stem() {
1123 let stem_str = stem.to_string_lossy().to_string();
1124 let is_jsonl = path.extension().is_some_and(|e| e == "jsonl");
1125 if !session_files.contains_key(&stem_str) || is_jsonl {
1127 session_files.insert(stem_str, path);
1128 }
1129 }
1130 }
1131 }
1132
1133 let mut added = 0;
1134 for (_, path) in &session_files {
1135 if let Ok(session) = parse_session_file(path) {
1136 let session_id = session.session_id.clone().unwrap_or_else(|| {
1137 path.file_stem()
1138 .map(|s| s.to_string_lossy().to_string())
1139 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1140 });
1141
1142 let title = session.title();
1143 let is_empty = session.is_empty();
1144 let last_message_date = session.last_message_date;
1145 let initial_location = session.initial_location.clone();
1146
1147 index.entries.insert(
1148 session_id.clone(),
1149 ChatSessionIndexEntry {
1150 session_id,
1151 title,
1152 last_message_date,
1153 timing: Some(ChatSessionTiming {
1154 created: session.creation_date,
1155 last_request_started: Some(last_message_date),
1156 last_request_ended: Some(last_message_date),
1157 }),
1158 last_response_state: 1, initial_location,
1160 is_empty,
1161 is_imported: Some(false),
1162 has_pending_edits: Some(false),
1163 is_external: Some(false),
1164 },
1165 );
1166 added += 1;
1167 }
1168 }
1169
1170 write_chat_session_index(&db_path, &index)?;
1172
1173 Ok((added, removed))
1174}
1175
1176pub fn register_all_sessions_from_directory(
1178 workspace_id: &str,
1179 chat_sessions_dir: &Path,
1180 force: bool,
1181) -> Result<usize> {
1182 let db_path = get_workspace_storage_db(workspace_id)?;
1183
1184 if !db_path.exists() {
1185 return Err(CsmError::WorkspaceNotFound(format!(
1186 "Database not found: {}",
1187 db_path.display()
1188 )));
1189 }
1190
1191 if !force && is_vscode_running() {
1193 return Err(CsmError::VSCodeRunning);
1194 }
1195
1196 let (added, removed) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
1198
1199 for entry in std::fs::read_dir(chat_sessions_dir)? {
1201 let entry = entry?;
1202 let path = entry.path();
1203
1204 if path
1205 .extension()
1206 .map(is_session_file_extension)
1207 .unwrap_or(false)
1208 {
1209 if let Ok(session) = parse_session_file(&path) {
1210 let session_id = session.session_id.clone().unwrap_or_else(|| {
1211 path.file_stem()
1212 .map(|s| s.to_string_lossy().to_string())
1213 .unwrap_or_else(|| uuid::Uuid::new_v4().to_string())
1214 });
1215
1216 let title = session.title();
1217
1218 println!(
1219 "[OK] Registered: {} ({}...)",
1220 title,
1221 &session_id[..12.min(session_id.len())]
1222 );
1223 }
1224 }
1225 }
1226
1227 if removed > 0 {
1228 println!("[OK] Removed {} stale index entries", removed);
1229 }
1230
1231 Ok(added)
1232}
1233
1234pub fn is_vscode_running() -> bool {
1236 let mut sys = System::new();
1237 sys.refresh_processes();
1238
1239 for process in sys.processes().values() {
1240 let name = process.name().to_lowercase();
1241 if name.contains("code") && !name.contains("codec") {
1242 return true;
1243 }
1244 }
1245
1246 false
1247}
1248
1249pub fn close_vscode_and_wait(timeout_secs: u64) -> Result<()> {
1252 use sysinfo::{ProcessRefreshKind, RefreshKind, Signal};
1253
1254 if !is_vscode_running() {
1255 return Ok(());
1256 }
1257
1258 let mut sys = System::new_with_specifics(
1260 RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1261 );
1262 sys.refresh_processes();
1263
1264 let mut signaled = 0u32;
1265 for (pid, process) in sys.processes() {
1266 let name = process.name().to_lowercase();
1267 if name.contains("code") && !name.contains("codec") {
1268 #[cfg(windows)]
1273 {
1274 let _ = std::process::Command::new("taskkill")
1275 .args(["/PID", &pid.as_u32().to_string()])
1276 .stdout(std::process::Stdio::null())
1277 .stderr(std::process::Stdio::null())
1278 .status();
1279 signaled += 1;
1280 }
1281 #[cfg(not(windows))]
1282 {
1283 if process.kill_with(Signal::Term).unwrap_or(false) {
1284 signaled += 1;
1285 }
1286 }
1287 }
1288 }
1289
1290 if signaled == 0 {
1291 return Ok(());
1292 }
1293
1294 let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
1296 loop {
1297 std::thread::sleep(std::time::Duration::from_millis(500));
1298 if !is_vscode_running() {
1299 std::thread::sleep(std::time::Duration::from_secs(1));
1301 return Ok(());
1302 }
1303 if std::time::Instant::now() >= deadline {
1304 let mut sys2 = System::new_with_specifics(
1306 RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
1307 );
1308 sys2.refresh_processes();
1309 for (_pid, process) in sys2.processes() {
1310 let name = process.name().to_lowercase();
1311 if name.contains("code") && !name.contains("codec") {
1312 process.kill();
1313 }
1314 }
1315 std::thread::sleep(std::time::Duration::from_secs(1));
1316 return Ok(());
1317 }
1318 }
1319}
1320
1321pub fn reopen_vscode(project_path: Option<&str>) -> Result<()> {
1323 let mut cmd = std::process::Command::new("code");
1324 if let Some(path) = project_path {
1325 cmd.arg(path);
1326 }
1327 cmd.stdout(std::process::Stdio::null())
1328 .stderr(std::process::Stdio::null())
1329 .spawn()?;
1330 Ok(())
1331}
1332
1333pub fn backup_workspace_sessions(workspace_dir: &Path) -> Result<Option<PathBuf>> {
1335 let chat_sessions_dir = workspace_dir.join("chatSessions");
1336
1337 if !chat_sessions_dir.exists() {
1338 return Ok(None);
1339 }
1340
1341 let timestamp = std::time::SystemTime::now()
1342 .duration_since(std::time::UNIX_EPOCH)
1343 .unwrap()
1344 .as_secs();
1345
1346 let backup_dir = workspace_dir.join(format!("chatSessions-backup-{}", timestamp));
1347
1348 copy_dir_all(&chat_sessions_dir, &backup_dir)?;
1350
1351 Ok(Some(backup_dir))
1352}
1353
1354fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
1356 std::fs::create_dir_all(dst)?;
1357
1358 for entry in std::fs::read_dir(src)? {
1359 let entry = entry?;
1360 let src_path = entry.path();
1361 let dst_path = dst.join(entry.file_name());
1362
1363 if src_path.is_dir() {
1364 copy_dir_all(&src_path, &dst_path)?;
1365 } else {
1366 std::fs::copy(&src_path, &dst_path)?;
1367 }
1368 }
1369
1370 Ok(())
1371}
1372
1373pub fn read_empty_window_sessions() -> Result<Vec<ChatSession>> {
1380 let sessions_path = get_empty_window_sessions_path()?;
1381
1382 if !sessions_path.exists() {
1383 return Ok(Vec::new());
1384 }
1385
1386 let mut sessions = Vec::new();
1387
1388 for entry in std::fs::read_dir(&sessions_path)? {
1389 let entry = entry?;
1390 let path = entry.path();
1391
1392 if path.extension().is_some_and(is_session_file_extension) {
1393 if let Ok(session) = parse_session_file(&path) {
1394 sessions.push(session);
1395 }
1396 }
1397 }
1398
1399 sessions.sort_by(|a, b| b.last_message_date.cmp(&a.last_message_date));
1401
1402 Ok(sessions)
1403}
1404
1405#[allow(dead_code)]
1407pub fn get_empty_window_session(session_id: &str) -> Result<Option<ChatSession>> {
1408 let sessions_path = get_empty_window_sessions_path()?;
1409 let session_path = sessions_path.join(format!("{}.json", session_id));
1410
1411 if !session_path.exists() {
1412 return Ok(None);
1413 }
1414
1415 let content = std::fs::read_to_string(&session_path)?;
1416 let session: ChatSession = serde_json::from_str(&content)
1417 .map_err(|e| CsmError::InvalidSessionFormat(e.to_string()))?;
1418
1419 Ok(Some(session))
1420}
1421
1422#[allow(dead_code)]
1424pub fn write_empty_window_session(session: &ChatSession) -> Result<PathBuf> {
1425 let sessions_path = get_empty_window_sessions_path()?;
1426
1427 std::fs::create_dir_all(&sessions_path)?;
1429
1430 let session_id = session.session_id.as_deref().unwrap_or("unknown");
1431 let session_path = sessions_path.join(format!("{}.json", session_id));
1432 let content = serde_json::to_string_pretty(session)?;
1433 std::fs::write(&session_path, content)?;
1434
1435 Ok(session_path)
1436}
1437
1438#[allow(dead_code)]
1440pub fn delete_empty_window_session(session_id: &str) -> Result<bool> {
1441 let sessions_path = get_empty_window_sessions_path()?;
1442 let session_path = sessions_path.join(format!("{}.json", session_id));
1443
1444 if session_path.exists() {
1445 std::fs::remove_file(&session_path)?;
1446 Ok(true)
1447 } else {
1448 Ok(false)
1449 }
1450}
1451
1452pub fn count_empty_window_sessions() -> Result<usize> {
1454 let sessions_path = get_empty_window_sessions_path()?;
1455
1456 if !sessions_path.exists() {
1457 return Ok(0);
1458 }
1459
1460 let count = std::fs::read_dir(&sessions_path)?
1461 .filter_map(|e| e.ok())
1462 .filter(|e| e.path().extension().is_some_and(is_session_file_extension))
1463 .count();
1464
1465 Ok(count)
1466}
1467
1468pub fn compact_session_jsonl(path: &Path) -> Result<PathBuf> {
1475 let content = std::fs::read_to_string(path).map_err(|e| {
1476 CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1477 })?;
1478
1479 let content = split_concatenated_jsonl(&content);
1484
1485 let mut lines = content.lines();
1486
1487 let first_line = lines
1489 .next()
1490 .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1491
1492 let first_entry: serde_json::Value = match serde_json::from_str(first_line.trim()) {
1493 Ok(v) => v,
1494 Err(_) => {
1495 let sanitized = sanitize_json_unicode(first_line.trim());
1497 serde_json::from_str(&sanitized).map_err(|e| {
1498 CsmError::InvalidSessionFormat(format!("Invalid JSON on line 1: {}", e))
1499 })?
1500 }
1501 };
1502
1503 let kind = first_entry
1504 .get("kind")
1505 .and_then(|k| k.as_u64())
1506 .unwrap_or(99);
1507 if kind != 0 {
1508 return Err(CsmError::InvalidSessionFormat(
1509 "First JSONL line must be kind:0".to_string(),
1510 ));
1511 }
1512
1513 let mut state = first_entry
1515 .get("v")
1516 .cloned()
1517 .ok_or_else(|| CsmError::InvalidSessionFormat("kind:0 missing 'v' field".to_string()))?;
1518
1519 for line in lines {
1521 let line = line.trim();
1522 if line.is_empty() {
1523 continue;
1524 }
1525
1526 let entry: serde_json::Value = match serde_json::from_str(line) {
1527 Ok(v) => v,
1528 Err(_) => continue, };
1530
1531 let op_kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1532
1533 match op_kind {
1534 1 => {
1535 if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1537 if let Some(keys_arr) = keys.as_array() {
1538 apply_delta(&mut state, keys_arr, value.clone());
1539 }
1540 }
1541 }
1542 2 => {
1543 if let (Some(keys), Some(value)) = (entry.get("k"), entry.get("v")) {
1545 let splice_index = entry.get("i").and_then(|i| i.as_u64()).map(|i| i as usize);
1546 if let Some(keys_arr) = keys.as_array() {
1547 apply_splice(&mut state, keys_arr, value.clone(), splice_index);
1548 }
1549 }
1550 }
1551 _ => {} }
1553 }
1554
1555 let session_id = path
1557 .file_stem()
1558 .and_then(|s| s.to_str())
1559 .map(|s| s.to_string());
1560 ensure_vscode_compat_fields(&mut state, session_id.as_deref());
1561
1562 let compact_entry = serde_json::json!({"kind": 0, "v": state});
1564 let compact_content = serde_json::to_string(&compact_entry)
1565 .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1566
1567 let backup_path = path.with_extension("jsonl.bak");
1569 std::fs::rename(path, &backup_path)?;
1570
1571 std::fs::write(path, &compact_content)?;
1573
1574 Ok(backup_path)
1575}
1576
1577pub fn trim_session_jsonl(path: &Path, keep: usize) -> Result<(usize, usize, f64, f64)> {
1588 let content = std::fs::read_to_string(path).map_err(|e| {
1589 CsmError::InvalidSessionFormat(format!("Failed to read {}: {}", path.display(), e))
1590 })?;
1591
1592 let original_size = content.len() as f64 / (1024.0 * 1024.0);
1593
1594 let content = split_concatenated_jsonl(&content);
1596 let line_count = content.lines().filter(|l| !l.trim().is_empty()).count();
1597
1598 let content = if line_count > 1 {
1600 std::fs::write(path, &content)?;
1602 compact_session_jsonl(path)?;
1603 std::fs::read_to_string(path).map_err(|e| {
1604 CsmError::InvalidSessionFormat(format!("Failed to read compacted file: {}", e))
1605 })?
1606 } else {
1607 content
1608 };
1609
1610 let first_line = content
1611 .lines()
1612 .next()
1613 .ok_or_else(|| CsmError::InvalidSessionFormat("Empty JSONL file".to_string()))?;
1614
1615 let mut entry: serde_json::Value = serde_json::from_str(first_line.trim())
1616 .map_err(|_| {
1617 let sanitized = sanitize_json_unicode(first_line.trim());
1618 serde_json::from_str::<serde_json::Value>(&sanitized)
1619 .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))
1620 })
1621 .unwrap_or_else(|e| e.unwrap());
1622
1623 let kind = entry.get("kind").and_then(|k| k.as_u64()).unwrap_or(99);
1624 if kind != 0 {
1625 return Err(
1626 CsmError::InvalidSessionFormat("First JSONL line must be kind:0".to_string()).into(),
1627 );
1628 }
1629
1630 let requests = match entry
1632 .get("v")
1633 .and_then(|v| v.get("requests"))
1634 .and_then(|r| r.as_array())
1635 {
1636 Some(r) => r.clone(),
1637 None => {
1638 return Err(CsmError::InvalidSessionFormat(
1639 "Session has no requests array".to_string(),
1640 )
1641 .into());
1642 }
1643 };
1644
1645 let original_count = requests.len();
1646
1647 if original_count <= keep {
1648 strip_bloated_content(&mut entry);
1650
1651 let trimmed_content = serde_json::to_string(&entry)
1652 .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1653 let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
1654
1655 if new_size < original_size * 0.9 {
1657 let backup_path = path.with_extension("jsonl.bak");
1658 if !backup_path.exists() {
1659 std::fs::copy(path, &backup_path)?;
1660 }
1661 std::fs::write(path, &trimmed_content)?;
1662 }
1663
1664 return Ok((original_count, original_count, original_size, new_size));
1665 }
1666
1667 let kept_requests: Vec<serde_json::Value> = requests[original_count - keep..].to_vec();
1669
1670 let final_requests = kept_requests;
1674
1675 if let Some(v) = entry.get_mut("v") {
1677 if let Some(obj) = v.as_object_mut() {
1678 obj.insert("requests".to_string(), serde_json::json!(final_requests));
1679 }
1680 }
1681
1682 strip_bloated_content(&mut entry);
1684
1685 let session_id = path
1687 .file_stem()
1688 .and_then(|s| s.to_str())
1689 .map(|s| s.to_string());
1690 if let Some(v) = entry.get_mut("v") {
1691 ensure_vscode_compat_fields(v, session_id.as_deref());
1692 }
1693
1694 let trimmed_content = serde_json::to_string(&entry)
1695 .map_err(|e| CsmError::InvalidSessionFormat(format!("Failed to serialize: {}", e)))?;
1696
1697 let new_size = trimmed_content.len() as f64 / (1024.0 * 1024.0);
1698
1699 let backup_path = path.with_extension("jsonl.bak");
1701 if !backup_path.exists() {
1702 std::fs::copy(path, &backup_path)?;
1703 }
1704
1705 std::fs::write(path, &trimmed_content)?;
1707
1708 Ok((original_count, keep, original_size, new_size))
1709}
1710
1711fn strip_bloated_content(entry: &mut serde_json::Value) {
1725 let requests = match entry
1726 .get_mut("v")
1727 .and_then(|v| v.get_mut("requests"))
1728 .and_then(|r| r.as_array_mut())
1729 {
1730 Some(r) => r,
1731 None => return,
1732 };
1733
1734 for req in requests.iter_mut() {
1735 let obj = match req.as_object_mut() {
1736 Some(o) => o,
1737 None => continue,
1738 };
1739
1740 if let Some(result) = obj.get_mut("result") {
1742 if let Some(result_obj) = result.as_object_mut() {
1743 if let Some(meta) = result_obj.get("metadata") {
1744 let meta_str = serde_json::to_string(meta).unwrap_or_default();
1745 if meta_str.len() > 1000 {
1746 result_obj.insert(
1747 "metadata".to_string(),
1748 serde_json::Value::Object(serde_json::Map::new()),
1749 );
1750 }
1751 }
1752 }
1753 }
1754
1755 obj.remove("editedFileEvents");
1757
1758 obj.remove("chatEdits");
1760
1761 if let Some(refs) = obj.get_mut("contentReferences") {
1763 if let Some(arr) = refs.as_array_mut() {
1764 if arr.len() > 3 {
1765 arr.truncate(3);
1766 }
1767 }
1768 }
1769
1770 if let Some(response) = obj.get_mut("response") {
1772 if let Some(resp_arr) = response.as_array_mut() {
1773 resp_arr.retain(|r| {
1775 let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1776 !matches!(
1777 kind,
1778 "toolInvocationSerialized"
1779 | "progressMessage"
1780 | "confirmationWidget"
1781 | "codeblockUri"
1782 | "progressTaskSerialized"
1783 | "undoStop"
1784 | "mcpServersStarting"
1785 | "confirmation"
1786 )
1787 });
1788
1789 for r in resp_arr.iter_mut() {
1791 let kind = r
1792 .get("kind")
1793 .and_then(|k| k.as_str())
1794 .unwrap_or("")
1795 .to_string();
1796
1797 if kind == "textEditGroup" {
1798 if let Some(edits) = r.get_mut("edits") {
1799 if let Some(arr) = edits.as_array_mut() {
1800 if serde_json::to_string(arr).unwrap_or_default().len() > 2000 {
1801 arr.clear();
1802 }
1803 }
1804 }
1805 }
1806
1807 if kind == "thinking" {
1809 if let Some(val) = r.get_mut("value") {
1810 if let Some(s) = val.as_str() {
1811 if s.len() > 500 {
1812 *val = serde_json::Value::String(format!(
1813 "{}... [truncated]",
1814 &s[..500]
1815 ));
1816 }
1817 }
1818 }
1819 if let Some(thought) = r.get_mut("thought") {
1820 if let Some(thought_val) = thought.get_mut("value") {
1821 if let Some(s) = thought_val.as_str() {
1822 if s.len() > 500 {
1823 *thought_val = serde_json::Value::String(format!(
1824 "{}... [truncated]",
1825 &s[..500]
1826 ));
1827 }
1828 }
1829 }
1830 }
1831 }
1832
1833 if kind == "markdownContent" {
1835 if let Some(content) = r.get_mut("content") {
1836 if let Some(val) = content.get_mut("value") {
1837 if let Some(s) = val.as_str() {
1838 if s.len() > 20000 {
1839 *val = serde_json::Value::String(format!(
1840 "{}\n\n---\n*[Chasm: Content truncated for loading performance]*",
1841 &s[..20000]
1842 ));
1843 }
1844 }
1845 }
1846 }
1847 }
1848 }
1849
1850 let mut thinking_count = 0;
1852 let mut indices_to_remove = Vec::new();
1853 for (i, r) in resp_arr.iter().enumerate().rev() {
1854 let kind = r.get("kind").and_then(|k| k.as_str()).unwrap_or("");
1855 if kind == "thinking" {
1856 thinking_count += 1;
1857 if thinking_count > 5 {
1858 indices_to_remove.push(i);
1859 }
1860 }
1861 }
1862 for idx in indices_to_remove {
1863 resp_arr.remove(idx);
1864 }
1865
1866 for r in resp_arr.iter_mut() {
1868 if let Some(obj) = r.as_object_mut() {
1869 obj.remove("toolSpecificData");
1870 }
1871 }
1872
1873 let fixed: Vec<serde_json::Value> = resp_arr
1879 .drain(..)
1880 .map(|item| {
1881 if item.get("kind").is_none() {
1882 if item.get("value").is_some() || item.get("supportHtml").is_some() {
1884 serde_json::json!({
1885 "kind": "markdownContent",
1886 "content": item
1887 })
1888 } else {
1889 item
1890 }
1891 } else {
1892 item
1893 }
1894 })
1895 .collect();
1896 *resp_arr = fixed;
1897 }
1898 }
1899 }
1900}
1901
1902pub fn split_concatenated_jsonl(content: &str) -> String {
1912 if !content.contains("}{\"kind\":") {
1914 return content.to_string();
1915 }
1916
1917 content.replace("}{\"kind\":", "}\n{\"kind\":")
1918}
1919
1920fn apply_delta(root: &mut serde_json::Value, keys: &[serde_json::Value], value: serde_json::Value) {
1922 if keys.is_empty() {
1923 return;
1924 }
1925
1926 let mut current = root;
1928 for key in &keys[..keys.len() - 1] {
1929 if let Some(k) = key.as_str() {
1930 if !current.get(k).is_some() {
1931 current[k] = serde_json::Value::Object(serde_json::Map::new());
1932 }
1933 current = &mut current[k];
1934 } else if let Some(idx) = key.as_u64() {
1935 if let Some(arr) = current.as_array_mut() {
1936 if (idx as usize) < arr.len() {
1937 current = &mut arr[idx as usize];
1938 } else {
1939 return; }
1941 } else {
1942 return;
1943 }
1944 }
1945 }
1946
1947 if let Some(last_key) = keys.last() {
1949 if let Some(k) = last_key.as_str() {
1950 current[k] = value;
1951 } else if let Some(idx) = last_key.as_u64() {
1952 if let Some(arr) = current.as_array_mut() {
1953 if (idx as usize) < arr.len() {
1954 arr[idx as usize] = value;
1955 }
1956 }
1957 }
1958 }
1959}
1960
1961fn apply_splice(
1965 root: &mut serde_json::Value,
1966 keys: &[serde_json::Value],
1967 items: serde_json::Value,
1968 splice_index: Option<usize>,
1969) {
1970 if keys.is_empty() {
1971 return;
1972 }
1973
1974 let mut current = root;
1976 for key in keys {
1977 if let Some(k) = key.as_str() {
1978 if !current.get(k).is_some() {
1979 current[k] = serde_json::json!([]);
1980 }
1981 current = &mut current[k];
1982 } else if let Some(idx) = key.as_u64() {
1983 if let Some(arr) = current.as_array_mut() {
1984 if (idx as usize) < arr.len() {
1985 current = &mut arr[idx as usize];
1986 } else {
1987 return;
1988 }
1989 } else {
1990 return;
1991 }
1992 }
1993 }
1994
1995 if let Some(target_arr) = current.as_array_mut() {
1997 if let Some(idx) = splice_index {
1998 target_arr.truncate(idx);
2000 } else {
2001 target_arr.clear();
2003 }
2004 if let Some(new_items) = items.as_array() {
2005 target_arr.extend(new_items.iter().cloned());
2006 }
2007 }
2008}
2009
2010pub fn ensure_vscode_compat_fields(state: &mut serde_json::Value, session_id: Option<&str>) {
2023 if let Some(obj) = state.as_object_mut() {
2024 if !obj.contains_key("version") {
2026 obj.insert("version".to_string(), serde_json::json!(3));
2027 }
2028
2029 if !obj.contains_key("sessionId") {
2031 if let Some(id) = session_id {
2032 obj.insert("sessionId".to_string(), serde_json::json!(id));
2033 }
2034 }
2035
2036 if !obj.contains_key("responderUsername") {
2038 obj.insert(
2039 "responderUsername".to_string(),
2040 serde_json::json!("GitHub Copilot"),
2041 );
2042 }
2043
2044 if !obj.contains_key("hasPendingEdits") {
2046 obj.insert("hasPendingEdits".to_string(), serde_json::json!(false));
2047 }
2048
2049 if !obj.contains_key("pendingRequests") {
2051 obj.insert("pendingRequests".to_string(), serde_json::json!([]));
2052 }
2053
2054 if !obj.contains_key("inputState") {
2056 obj.insert(
2057 "inputState".to_string(),
2058 serde_json::json!({
2059 "attachments": [],
2060 "mode": { "id": "agent", "kind": "agent" },
2061 "inputText": "",
2062 "selections": [],
2063 "contrib": { "chatDynamicVariableModel": [] }
2064 }),
2065 );
2066 }
2067 }
2068}
2069
2070pub fn is_skeleton_json(content: &str) -> bool {
2074 if content.len() < 100 {
2076 return false;
2077 }
2078
2079 let structural_chars: usize = content
2081 .chars()
2082 .filter(|c| {
2083 matches!(
2084 c,
2085 '{' | '}' | '[' | ']' | ',' | ':' | ' ' | '\n' | '\r' | '\t' | '"'
2086 )
2087 })
2088 .count();
2089
2090 let total_chars = content.len();
2091 let structural_ratio = structural_chars as f64 / total_chars as f64;
2092
2093 if structural_ratio < 0.80 {
2096 return false;
2097 }
2098
2099 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(content) {
2102 if let Some(requests) = parsed.get("requests").and_then(|r| r.as_array()) {
2104 let all_empty = requests.iter().all(|req| {
2105 let msg = req
2107 .get("message")
2108 .and_then(|m| m.get("text"))
2109 .and_then(|t| t.as_str());
2110 msg.map_or(true, |s| s.is_empty())
2111 });
2112 return all_empty;
2113 }
2114 return true;
2116 }
2117
2118 structural_ratio > 0.85
2120}
2121
2122pub fn convert_skeleton_json_to_jsonl(
2127 json_path: &Path,
2128 title: Option<&str>,
2129 last_message_date: Option<i64>,
2130) -> Result<Option<PathBuf>> {
2131 let content = std::fs::read_to_string(json_path)
2132 .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2133
2134 if !is_skeleton_json(&content) {
2135 return Ok(None);
2136 }
2137
2138 let session_id = json_path
2139 .file_stem()
2140 .and_then(|s| s.to_str())
2141 .unwrap_or("unknown")
2142 .to_string();
2143
2144 let title = title.unwrap_or("Recovered Session");
2145 let now = std::time::SystemTime::now()
2146 .duration_since(std::time::UNIX_EPOCH)
2147 .unwrap_or_default()
2148 .as_millis() as i64;
2149 let timestamp = last_message_date.unwrap_or(now);
2150
2151 let jsonl_entry = serde_json::json!({
2153 "kind": 0,
2154 "v": {
2155 "sessionId": session_id,
2156 "title": title,
2157 "lastMessageDate": timestamp,
2158 "requests": [],
2159 "version": 4,
2160 "hasPendingEdits": false,
2161 "pendingRequests": [],
2162 "inputState": {
2163 "attachments": [],
2164 "mode": { "id": "agent", "kind": "agent" },
2165 "inputText": "",
2166 "selections": [],
2167 "contrib": { "chatDynamicVariableModel": [] }
2168 },
2169 "responderUsername": "GitHub Copilot",
2170 "isImported": false,
2171 "initialLocation": "panel"
2172 }
2173 });
2174
2175 let jsonl_path = json_path.with_extension("jsonl");
2176 let corrupt_path = json_path.with_extension("json.corrupt");
2177
2178 if jsonl_path.exists() {
2180 std::fs::rename(json_path, &corrupt_path)?;
2182 return Ok(None);
2183 }
2184
2185 std::fs::write(
2187 &jsonl_path,
2188 serde_json::to_string(&jsonl_entry)
2189 .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?,
2190 )?;
2191
2192 std::fs::rename(json_path, &corrupt_path)?;
2194
2195 Ok(Some(jsonl_path))
2196}
2197
2198pub fn fix_cancelled_model_state(path: &Path) -> Result<bool> {
2209 let content = std::fs::read_to_string(path)
2210 .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2211
2212 let lines: Vec<&str> = content.lines().collect();
2213
2214 if lines.is_empty() {
2215 return Ok(false);
2216 }
2217
2218 if lines.len() == 1 {
2222 let mut entry: serde_json::Value = serde_json::from_str(lines[0].trim())
2224 .map_err(|e| CsmError::InvalidSessionFormat(format!("Invalid JSON: {}", e)))?;
2225
2226 let is_kind_0 = entry
2227 .get("kind")
2228 .and_then(|k| k.as_u64())
2229 .map(|k| k == 0)
2230 .unwrap_or(false);
2231
2232 if !is_kind_0 {
2233 return Ok(false);
2234 }
2235
2236 let requests = match entry
2237 .get_mut("v")
2238 .and_then(|v| v.get_mut("requests"))
2239 .and_then(|r| r.as_array_mut())
2240 {
2241 Some(r) if !r.is_empty() => r,
2242 _ => return Ok(false),
2243 };
2244
2245 let last_req = requests.last_mut().unwrap();
2246 let model_state = last_req.get("modelState");
2247
2248 let needs_fix = match model_state {
2249 Some(ms) => ms.get("value").and_then(|v| v.as_u64()) == Some(2),
2250 None => true, };
2252
2253 if !needs_fix {
2254 return Ok(false);
2255 }
2256
2257 let now = std::time::SystemTime::now()
2258 .duration_since(std::time::UNIX_EPOCH)
2259 .unwrap_or_default()
2260 .as_millis() as u64;
2261
2262 last_req.as_object_mut().unwrap().insert(
2263 "modelState".to_string(),
2264 serde_json::json!({"value": 1, "completedAt": now}),
2265 );
2266
2267 let patched = serde_json::to_string(&entry)
2268 .map_err(|e| CsmError::InvalidSessionFormat(format!("Serialize error: {}", e)))?;
2269 std::fs::write(path, patched)?;
2270 return Ok(true);
2271 }
2272
2273 let mut highest_req_idx: Option<usize> = None;
2277 let mut last_model_state_value: Option<u64> = None;
2278
2279 if let Ok(first_entry) = serde_json::from_str::<serde_json::Value>(lines[0].trim()) {
2281 if let Some(requests) = first_entry
2282 .get("v")
2283 .and_then(|v| v.get("requests"))
2284 .and_then(|r| r.as_array())
2285 {
2286 if !requests.is_empty() {
2287 let last_idx = requests.len() - 1;
2288 highest_req_idx = Some(last_idx);
2289 if let Some(ms) = requests[last_idx].get("modelState") {
2291 last_model_state_value = ms.get("value").and_then(|v| v.as_u64());
2292 }
2293 }
2294 }
2295 }
2296
2297 static REQ_IDX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#""k":\["requests",(\d+)"#).unwrap());
2299
2300 for line in &lines[1..] {
2301 if let Some(caps) = REQ_IDX_RE.captures(line) {
2302 if let Ok(idx) = caps[1].parse::<usize>() {
2303 if highest_req_idx.is_none() || idx > highest_req_idx.unwrap() {
2304 highest_req_idx = Some(idx);
2305 last_model_state_value = None; }
2307 if Some(idx) == highest_req_idx && line.contains("\"modelState\"") {
2309 if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line.trim()) {
2310 last_model_state_value = entry
2311 .get("v")
2312 .and_then(|v| v.get("value"))
2313 .and_then(|v| v.as_u64());
2314 }
2315 }
2316 }
2317 }
2318 }
2319
2320 let req_idx = match highest_req_idx {
2321 Some(idx) => idx,
2322 None => return Ok(false),
2323 };
2324
2325 let needs_fix = match last_model_state_value {
2326 Some(2) => true, None => true, _ => false, };
2330
2331 if !needs_fix {
2332 return Ok(false);
2333 }
2334
2335 let now = std::time::SystemTime::now()
2336 .duration_since(std::time::UNIX_EPOCH)
2337 .unwrap_or_default()
2338 .as_millis() as u64;
2339
2340 let fix_delta = format!(
2341 "\n{{\"kind\":1,\"k\":[\"requests\",{},\"modelState\"],\"v\":{{\"value\":1,\"completedAt\":{}}}}}",
2342 req_idx, now
2343 );
2344
2345 use std::io::Write;
2346 let mut file = std::fs::OpenOptions::new().append(true).open(path)?;
2347 file.write_all(fix_delta.as_bytes())?;
2348
2349 Ok(true)
2350}
2351
2352pub fn repair_workspace_sessions(
2355 workspace_id: &str,
2356 chat_sessions_dir: &Path,
2357 force: bool,
2358) -> Result<(usize, usize)> {
2359 let db_path = get_workspace_storage_db(workspace_id)?;
2360
2361 if !db_path.exists() {
2362 return Err(CsmError::WorkspaceNotFound(format!(
2363 "Database not found: {}",
2364 db_path.display()
2365 )));
2366 }
2367
2368 if !force && is_vscode_running() {
2369 return Err(CsmError::VSCodeRunning);
2370 }
2371
2372 let mut compacted = 0;
2373 let mut fields_fixed = 0;
2374
2375 if chat_sessions_dir.exists() {
2376 for entry in std::fs::read_dir(chat_sessions_dir)? {
2378 let entry = entry?;
2379 let path = entry.path();
2380 if path.extension().is_some_and(|e| e == "jsonl") {
2381 let metadata = std::fs::metadata(&path)?;
2382 let size_mb = metadata.len() / (1024 * 1024);
2383
2384 let content = std::fs::read_to_string(&path)
2385 .map_err(|e| CsmError::InvalidSessionFormat(format!("Read error: {}", e)))?;
2386 let line_count = content.lines().count();
2387
2388 if line_count > 1 {
2389 let stem = path
2391 .file_stem()
2392 .map(|s| s.to_string_lossy().to_string())
2393 .unwrap_or_default();
2394 println!(
2395 " Compacting {} ({} lines, {}MB)...",
2396 stem, line_count, size_mb
2397 );
2398
2399 match compact_session_jsonl(&path) {
2400 Ok(backup_path) => {
2401 let new_size = std::fs::metadata(&path)
2402 .map(|m| m.len() / (1024 * 1024))
2403 .unwrap_or(0);
2404 println!(
2405 " [OK] Compacted: {}MB -> {}MB (backup: {})",
2406 size_mb,
2407 new_size,
2408 backup_path
2409 .file_name()
2410 .unwrap_or_default()
2411 .to_string_lossy()
2412 );
2413 compacted += 1;
2414 }
2415 Err(e) => {
2416 println!(" [WARN] Failed to compact {}: {}", stem, e);
2417 }
2418 }
2419 } else {
2420 if let Some(first_line) = content.lines().next() {
2422 if let Ok(mut obj) = serde_json::from_str::<serde_json::Value>(first_line) {
2423 let is_kind_0 = obj
2424 .get("kind")
2425 .and_then(|k| k.as_u64())
2426 .map(|k| k == 0)
2427 .unwrap_or(false);
2428
2429 if is_kind_0 {
2430 if let Some(v) = obj.get("v") {
2431 let missing = !v.get("hasPendingEdits").is_some()
2432 || !v.get("pendingRequests").is_some()
2433 || !v.get("inputState").is_some()
2434 || !v.get("sessionId").is_some();
2435
2436 if missing {
2437 let session_id = path
2438 .file_stem()
2439 .and_then(|s| s.to_str())
2440 .map(|s| s.to_string());
2441 if let Some(v_mut) = obj.get_mut("v") {
2442 ensure_vscode_compat_fields(
2443 v_mut,
2444 session_id.as_deref(),
2445 );
2446 }
2447 let patched = serde_json::to_string(&obj).map_err(|e| {
2448 CsmError::InvalidSessionFormat(format!(
2449 "Failed to serialize: {}",
2450 e
2451 ))
2452 })?;
2453 std::fs::write(&path, &patched)?;
2454 let stem = path
2455 .file_stem()
2456 .map(|s| s.to_string_lossy().to_string())
2457 .unwrap_or_default();
2458 println!(" [OK] Fixed missing VS Code fields: {}", stem);
2459 fields_fixed += 1;
2460 }
2461 }
2462 }
2463 }
2464 }
2465 }
2466 }
2467 }
2468 }
2469
2470 let mut skeletons_converted = 0;
2476 if chat_sessions_dir.exists() {
2477 let index_entries: std::collections::HashMap<String, (String, Option<i64>)> =
2479 if let Ok(index) = read_chat_session_index(&db_path) {
2480 index
2481 .entries
2482 .iter()
2483 .map(|(id, e)| (id.clone(), (e.title.clone(), Some(e.last_message_date))))
2484 .collect()
2485 } else {
2486 std::collections::HashMap::new()
2487 };
2488
2489 let mut jsonl_stems: HashSet<String> = HashSet::new();
2491 for entry in std::fs::read_dir(chat_sessions_dir)? {
2492 let entry = entry?;
2493 let path = entry.path();
2494 if path.extension().is_some_and(|e| e == "jsonl") {
2495 if let Some(stem) = path.file_stem() {
2496 jsonl_stems.insert(stem.to_string_lossy().to_string());
2497 }
2498 }
2499 }
2500
2501 for entry in std::fs::read_dir(chat_sessions_dir)? {
2502 let entry = entry?;
2503 let path = entry.path();
2504 if path.extension().is_some_and(|e| e == "json")
2505 && !path.to_string_lossy().ends_with(".bak")
2506 && !path.to_string_lossy().ends_with(".corrupt")
2507 {
2508 let stem = path
2509 .file_stem()
2510 .map(|s| s.to_string_lossy().to_string())
2511 .unwrap_or_default();
2512
2513 if jsonl_stems.contains(&stem) {
2515 continue;
2516 }
2517
2518 let (title, timestamp) = index_entries
2519 .get(&stem)
2520 .map(|(t, ts)| (t.as_str(), *ts))
2521 .unwrap_or(("Recovered Session", None));
2522
2523 match convert_skeleton_json_to_jsonl(&path, Some(title), timestamp) {
2524 Ok(Some(jsonl_path)) => {
2525 println!(
2526 " [OK] Converted skeleton .json → .jsonl: {} (\"{}\")",
2527 stem, title
2528 );
2529 jsonl_stems.insert(stem);
2531 skeletons_converted += 1;
2532 let _ = jsonl_path; }
2534 Ok(None) => {} Err(e) => {
2536 println!(" [WARN] Failed to convert skeleton {}: {}", stem, e);
2537 }
2538 }
2539 }
2540 }
2541 }
2542
2543 let mut cancelled_fixed = 0;
2548 if chat_sessions_dir.exists() {
2549 for entry in std::fs::read_dir(chat_sessions_dir)? {
2550 let entry = entry?;
2551 let path = entry.path();
2552 if path.extension().is_some_and(|e| e == "jsonl") {
2553 match fix_cancelled_model_state(&path) {
2554 Ok(true) => {
2555 let stem = path
2556 .file_stem()
2557 .map(|s| s.to_string_lossy().to_string())
2558 .unwrap_or_default();
2559 println!(" [OK] Fixed cancelled modelState: {}", stem);
2560 cancelled_fixed += 1;
2561 }
2562 Ok(false) => {} Err(e) => {
2564 let stem = path
2565 .file_stem()
2566 .map(|s| s.to_string_lossy().to_string())
2567 .unwrap_or_default();
2568 println!(" [WARN] Failed to fix modelState for {}: {}", stem, e);
2569 }
2570 }
2571 }
2572 }
2573 }
2574
2575 let (index_fixed, _) = sync_session_index(workspace_id, chat_sessions_dir, force)?;
2577
2578 if fields_fixed > 0 {
2579 println!(
2580 " [OK] Injected missing VS Code fields into {} session(s)",
2581 fields_fixed
2582 );
2583 }
2584 if skeletons_converted > 0 {
2585 println!(
2586 " [OK] Converted {} skeleton .json file(s) to .jsonl",
2587 skeletons_converted
2588 );
2589 }
2590 if cancelled_fixed > 0 {
2591 println!(
2592 " [OK] Fixed cancelled modelState in {} session(s)",
2593 cancelled_fixed
2594 );
2595 }
2596
2597 Ok((compacted, index_fixed))
2598}