1use anyhow::{Context, Result};
11use colored::Colorize;
12use std::path::{Path, PathBuf};
13
14use crate::models::{ChatSession, ChatSessionTiming};
15use crate::storage::{
16 add_session_to_index, ensure_vscode_compat_fields, get_workspace_storage_db,
17 parse_session_auto, read_chat_session_index, write_chat_session_index,
18};
19
20pub fn shard_session(
29 file: &str,
30 max_requests: Option<usize>,
31 max_size: Option<String>,
32 output_dir: Option<&str>,
33 update_index: bool,
34 workspace: Option<&str>,
35 dry_run: bool,
36 no_backup: bool,
37) -> Result<()> {
38 let file_path = PathBuf::from(file);
39 if !file_path.exists() {
40 anyhow::bail!("Session file not found: {}", file_path.display());
41 }
42
43 let strategy = parse_strategy(max_requests, max_size)?;
45
46 let content = std::fs::read_to_string(&file_path)
48 .context(format!("Failed to read {}", file_path.display()))?;
49 let (session, _format_info) =
50 parse_session_auto(&content).context("Failed to parse session file")?;
51
52 let request_count = session.requests.len();
53 if request_count == 0 {
54 println!("{}", "Session has no requests, nothing to shard.".yellow());
55 return Ok(());
56 }
57
58 let session_id = file_path
60 .file_stem()
61 .unwrap_or_default()
62 .to_string_lossy()
63 .to_string();
64
65 let shards = split_session(&session, &strategy, &content);
67 if shards.len() <= 1 {
68 println!(
69 "{}",
70 format!(
71 "Session has {} requests and does not exceed the shard threshold — no sharding needed.",
72 request_count
73 )
74 .yellow()
75 );
76 return Ok(());
77 }
78
79 let title = session.title();
80 println!(
81 "{}",
82 format!(
83 "Sharding \"{}\" ({} requests) into {} parts",
84 title,
85 request_count,
86 shards.len()
87 )
88 .cyan()
89 .bold()
90 );
91
92 let shard_uuids: Vec<String> = (0..shards.len())
94 .map(|i| deterministic_uuid(&session_id, i))
95 .collect();
96
97 let out_dir = match output_dir {
99 Some(d) => PathBuf::from(d),
100 None => file_path
101 .parent()
102 .unwrap_or_else(|| Path::new("."))
103 .to_path_buf(),
104 };
105 if !out_dir.exists() {
106 if dry_run {
107 println!(" Would create directory: {}", out_dir.display());
108 } else {
109 std::fs::create_dir_all(&out_dir)?;
110 }
111 }
112
113 for (i, shard) in shards.iter().enumerate() {
115 let uuid = &shard_uuids[i];
116 let shard_title = if shards.len() > 1 {
117 format!("{} (Part {}/{})", title, i + 1, shards.len())
118 } else {
119 title.clone()
120 };
121
122 let prev_shard_id = if i > 0 {
123 Some(shard_uuids[i - 1].as_str())
124 } else {
125 None
126 };
127 let next_shard_id = if i < shards.len() - 1 {
128 Some(shard_uuids[i + 1].as_str())
129 } else {
130 None
131 };
132
133 let jsonl = build_shard_jsonl(
134 &session,
135 &shard.requests,
136 uuid,
137 &shard_title,
138 &session_id,
139 i,
140 shards.len(),
141 prev_shard_id,
142 next_shard_id,
143 shard.start_idx,
144 shard.end_idx,
145 )?;
146
147 let shard_path = out_dir.join(format!("{}.jsonl", uuid));
148 let size_mb = jsonl.len() as f64 / 1024.0 / 1024.0;
149
150 if dry_run {
151 println!(
152 " {} Part {}/{}: {} — {} requests ({:.1} MB)",
153 "[dry-run]".bright_black(),
154 i + 1,
155 shards.len(),
156 &uuid[..8],
157 shard.requests.len(),
158 size_mb
159 );
160 } else {
161 std::fs::write(&shard_path, &jsonl)
162 .context(format!("Failed to write shard {}", shard_path.display()))?;
163 println!(
164 " Part {}/{}: {} — {} requests ({:.1} MB)",
165 i + 1,
166 shards.len(),
167 &uuid[..8],
168 shard.requests.len(),
169 size_mb
170 );
171 }
172 }
173
174 if !no_backup {
176 let backup_path = PathBuf::from(format!("{}.oversized", file_path.display()));
177 if dry_run {
178 println!(
179 " {} Would backup original → {}",
180 "[dry-run]".bright_black(),
181 backup_path
182 .file_name()
183 .unwrap_or_default()
184 .to_string_lossy()
185 );
186 } else {
187 std::fs::copy(&file_path, &backup_path).context("Failed to create backup")?;
188 println!(
189 " Backed up original → {}",
190 backup_path
191 .file_name()
192 .unwrap_or_default()
193 .to_string_lossy()
194 );
195 }
196 }
197
198 {
200 let last_shard = shards.last().unwrap();
201 let latest_title = if shards.len() > 1 {
202 format!(
203 "{} (Latest — Part {}/{})",
204 title,
205 shards.len(),
206 shards.len()
207 )
208 } else {
209 title.clone()
210 };
211
212 let prev_shard_id = if shards.len() > 1 {
213 Some(shard_uuids[shards.len() - 2].as_str())
214 } else {
215 None
216 };
217
218 let latest_jsonl = build_shard_jsonl(
219 &session,
220 &last_shard.requests,
221 &session_id, &latest_title,
223 &session_id,
224 shards.len() - 1,
225 shards.len(),
226 prev_shard_id,
227 None,
228 last_shard.start_idx,
229 last_shard.end_idx,
230 )?;
231
232 let target_path = if file_path.extension().map_or(false, |e| e == "json") {
234 file_path.with_extension("jsonl")
235 } else {
236 file_path.clone()
237 };
238
239 if dry_run {
240 let size_mb = latest_jsonl.len() as f64 / 1024.0 / 1024.0;
241 println!(
242 " {} Would replace original with latest shard ({} requests, {:.1} MB)",
243 "[dry-run]".bright_black(),
244 last_shard.requests.len(),
245 size_mb
246 );
247 } else {
248 std::fs::write(&target_path, &latest_jsonl)?;
249 let size_mb = latest_jsonl.len() as f64 / 1024.0 / 1024.0;
250 println!(
251 " Replaced original with latest shard ({} requests, {:.1} MB)",
252 last_shard.requests.len(),
253 size_mb
254 );
255 }
256 }
257
258 if update_index {
260 let ws_hash = match workspace {
261 Some(w) => w.to_string(),
262 None => infer_workspace_hash(&file_path)?,
263 };
264
265 if dry_run {
266 println!(
267 " {} Would update session index for workspace {}",
268 "[dry-run]".bright_black(),
269 &ws_hash[..8]
270 );
271 } else {
272 update_shard_index(&ws_hash, &session_id, &shards, &shard_uuids, &session)?;
273 println!(" Updated session index ({} shard entries)", shards.len());
274 }
275 }
276
277 if dry_run {
278 println!(
279 "\n{}",
280 "Dry run complete — no files were modified.".bright_black()
281 );
282 } else {
283 println!(
284 "\n{}",
285 format!(
286 "Done — {} shards created with linked-list chain.",
287 shards.len()
288 )
289 .green()
290 .bold()
291 );
292 }
293
294 Ok(())
295}
296
297pub fn shard_workspace(
299 workspace: Option<&str>,
300 max_requests: Option<usize>,
301 max_size: Option<String>,
302 dry_run: bool,
303 no_backup: bool,
304) -> Result<()> {
305 let strategy = parse_strategy(max_requests, max_size.clone())?;
306
307 let (ws_hash, chat_sessions_dir) = resolve_workspace(workspace)?;
309
310 if !chat_sessions_dir.exists() {
311 println!("{}", "No chatSessions directory found.".yellow());
312 return Ok(());
313 }
314
315 let mut candidates: Vec<PathBuf> = Vec::new();
317 for entry in std::fs::read_dir(&chat_sessions_dir)? {
318 let entry = entry?;
319 let path = entry.path();
320 let ext = path
321 .extension()
322 .map(|e| e.to_string_lossy().to_string())
323 .unwrap_or_default();
324
325 if ext != "json" && ext != "jsonl" {
326 continue;
327 }
328
329 let name = path
331 .file_name()
332 .unwrap_or_default()
333 .to_string_lossy()
334 .to_string();
335 if name.contains(".oversized") || name.contains(".bak") || name.contains(".backup") {
336 continue;
337 }
338
339 candidates.push(path);
340 }
341
342 if candidates.is_empty() {
343 println!("{}", "No session files found.".yellow());
344 return Ok(());
345 }
346
347 println!(
348 "Scanning {} session files in workspace {}...",
349 candidates.len(),
350 &ws_hash[..8]
351 );
352
353 let mut sharded_count = 0;
354 for candidate in &candidates {
355 let content = match std::fs::read_to_string(candidate) {
356 Ok(c) => c,
357 Err(_) => continue,
358 };
359 let (session, _) = match parse_session_auto(&content) {
360 Ok(s) => s,
361 Err(_) => continue,
362 };
363
364 let needs_shard = match &strategy {
365 ShardStrategy::ByRequests(max) => session.requests.len() > *max,
366 ShardStrategy::BySize(max_bytes) => content.len() > *max_bytes,
367 };
368
369 if !needs_shard {
370 continue;
371 }
372
373 let file_str = candidate.to_string_lossy().to_string();
374 println!();
375 shard_session(
376 &file_str,
377 max_requests,
378 max_size.clone(),
379 None,
380 true,
381 Some(&ws_hash),
382 dry_run,
383 no_backup,
384 )?;
385 sharded_count += 1;
386 }
387
388 if sharded_count == 0 {
389 println!(
390 "{}",
391 "No sessions exceed the shard threshold.".bright_black()
392 );
393 } else {
394 println!(
395 "\n{}",
396 format!("Sharded {} session(s).", sharded_count)
397 .green()
398 .bold()
399 );
400 }
401
402 Ok(())
403}
404
405pub fn shard_info(file: &str) -> Result<()> {
407 let file_path = PathBuf::from(file);
408 if !file_path.exists() {
409 anyhow::bail!("Session file not found: {}", file_path.display());
410 }
411
412 let content = std::fs::read_to_string(&file_path)?;
413 let (session, _) = parse_session_auto(&content)?;
414
415 let raw: serde_json::Value = if content.trim_start().starts_with('{') {
417 if let Some(first_line) = content.lines().next() {
419 let parsed: serde_json::Value = serde_json::from_str(first_line)?;
420 if let Some(v) = parsed.get("v") {
421 v.clone()
422 } else {
423 parsed
424 }
425 } else {
426 serde_json::Value::Null
427 }
428 } else {
429 serde_json::Value::Null
430 };
431
432 let title = session.title();
433 let req_count = session.requests.len();
434
435 println!("{}", format!("Session: {}", title).cyan().bold());
436 println!(" Requests: {}", req_count);
437 println!(
438 " File: {}",
439 file_path.file_name().unwrap_or_default().to_string_lossy()
440 );
441
442 if let Some(shard_info) = raw.get("_shardInfo") {
443 println!();
444 println!("{}", "Shard Info:".cyan());
445 if let Some(orig) = shard_info.get("originalSessionId") {
446 println!(" Original Session: {}", orig);
447 }
448 if let Some(idx) = shard_info.get("shardIndex") {
449 let total = shard_info
450 .get("totalShards")
451 .and_then(|v| v.as_u64())
452 .unwrap_or(0);
453 println!(
454 " Position: Part {}/{}",
455 idx.as_u64().unwrap_or(0) + 1,
456 total
457 );
458 }
459 if let Some(prev) = shard_info.get("prevShardId") {
460 if !prev.is_null() {
461 println!(" Previous Shard: {}", prev);
462 } else {
463 println!(" Previous Shard: (none — this is the first shard)");
464 }
465 }
466 if let Some(next) = shard_info.get("nextShardId") {
467 if !next.is_null() {
468 println!(" Next Shard: {}", next);
469 } else {
470 println!(" Next Shard: (none — this is the last shard)");
471 }
472 }
473 if let Some(range) = shard_info.get("requestRange") {
474 let start = range.get("start").and_then(|v| v.as_u64()).unwrap_or(0);
475 let end = range.get("end").and_then(|v| v.as_u64()).unwrap_or(0);
476 println!(" Request Range: {}-{}", start, end);
477 }
478 } else {
479 println!(
480 "\n{}",
481 "This session is not a shard (no _shardInfo metadata).".bright_black()
482 );
483 }
484
485 Ok(())
486}
487
488enum ShardStrategy {
494 ByRequests(usize),
495 BySize(usize),
496}
497
498struct Shard {
500 requests: Vec<serde_json::Value>,
501 start_idx: usize,
502 end_idx: usize,
503}
504
505fn parse_strategy(max_requests: Option<usize>, max_size: Option<String>) -> Result<ShardStrategy> {
507 match (max_requests, max_size) {
508 (Some(_), Some(_)) => {
509 anyhow::bail!("Cannot specify both --max-requests and --max-size");
510 }
511 (Some(n), None) => {
512 if n == 0 {
513 anyhow::bail!("--max-requests must be > 0");
514 }
515 Ok(ShardStrategy::ByRequests(n))
516 }
517 (None, Some(s)) => {
518 let bytes = parse_size_string(&s)?;
519 Ok(ShardStrategy::BySize(bytes))
520 }
521 (None, None) => {
522 Ok(ShardStrategy::ByRequests(50))
524 }
525 }
526}
527
528fn parse_size_string(s: &str) -> Result<usize> {
530 let s = s.trim().to_uppercase();
531
532 let (num_str, multiplier) = if s.ends_with("GB") {
534 (&s[..s.len() - 2], 1024 * 1024 * 1024)
535 } else if s.ends_with("MB") {
536 (&s[..s.len() - 2], 1024 * 1024)
537 } else if s.ends_with("KB") {
538 (&s[..s.len() - 2], 1024)
539 } else if s.ends_with('B') {
540 (&s[..s.len() - 1], 1)
541 } else {
542 (s.as_str(), 1024 * 1024)
544 };
545
546 let num: f64 = num_str
547 .trim()
548 .parse()
549 .context(format!("Invalid size: {}", s))?;
550
551 if num <= 0.0 {
552 anyhow::bail!("--max-size must be > 0");
553 }
554
555 Ok((num * multiplier as f64) as usize)
556}
557
558fn split_session(session: &ChatSession, strategy: &ShardStrategy, raw_content: &str) -> Vec<Shard> {
563 let raw_requests = extract_raw_requests(raw_content);
565 let requests = if raw_requests.len() == session.requests.len() {
566 raw_requests
567 } else {
568 session
570 .requests
571 .iter()
572 .map(|r| serde_json::to_value(r).unwrap_or(serde_json::Value::Null))
573 .collect()
574 };
575
576 match strategy {
577 ShardStrategy::ByRequests(max) => split_by_requests(&requests, *max),
578 ShardStrategy::BySize(max_bytes) => split_by_size(&requests, *max_bytes),
579 }
580}
581
582fn split_by_requests(requests: &[serde_json::Value], max: usize) -> Vec<Shard> {
583 let mut shards = Vec::new();
584 let mut start = 0;
585
586 while start < requests.len() {
587 let end = std::cmp::min(start + max, requests.len());
588 shards.push(Shard {
589 requests: requests[start..end].to_vec(),
590 start_idx: start,
591 end_idx: end - 1,
592 });
593 start = end;
594 }
595
596 shards
597}
598
599fn split_by_size(requests: &[serde_json::Value], max_bytes: usize) -> Vec<Shard> {
600 let mut shards = Vec::new();
601 let mut current: Vec<serde_json::Value> = Vec::new();
602 let mut current_size: usize = 0;
603 let mut start_idx: usize = 0;
604
605 for (i, req) in requests.iter().enumerate() {
606 let req_size = serde_json::to_string(req).map(|s| s.len()).unwrap_or(0);
607
608 if !current.is_empty() && current_size + req_size > max_bytes {
610 shards.push(Shard {
611 requests: std::mem::take(&mut current),
612 start_idx,
613 end_idx: i - 1,
614 });
615 current_size = 0;
616 start_idx = i;
617 }
618
619 current.push(req.clone());
620 current_size += req_size;
621 }
622
623 if !current.is_empty() {
624 shards.push(Shard {
625 requests: current,
626 start_idx,
627 end_idx: requests.len() - 1,
628 });
629 }
630
631 shards
632}
633
634fn extract_raw_requests(content: &str) -> Vec<serde_json::Value> {
636 let trimmed = content.trim();
637
638 if let Some(first_line) = trimmed.lines().next() {
640 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(first_line) {
641 if parsed.get("kind").and_then(|k| k.as_u64()) == Some(0) {
642 if let Some(requests) = parsed
643 .get("v")
644 .and_then(|v| v.get("requests"))
645 .and_then(|r| r.as_array())
646 {
647 let line_count = trimmed.lines().count();
652 if line_count == 1 {
653 return requests.clone();
654 }
655 }
656 }
657 }
658 }
659
660 if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(trimmed) {
662 if let Some(requests) = parsed.get("requests").and_then(|r| r.as_array()) {
663 return requests.clone();
664 }
665 }
666
667 Vec::new()
668}
669
670fn deterministic_uuid(session_id: &str, shard_index: usize) -> String {
672 let input = format!("{}-shard-{}", session_id, shard_index);
673 let digest = md5::compute(input.as_bytes());
674 let hex = format!("{:x}", digest);
675 format!(
676 "{}-{}-{}-{}-{}",
677 &hex[0..8],
678 &hex[8..12],
679 &hex[12..16],
680 &hex[16..20],
681 &hex[20..32]
682 )
683}
684
685fn build_shard_jsonl(
687 session: &ChatSession,
688 requests: &[serde_json::Value],
689 shard_session_id: &str,
690 shard_title: &str,
691 original_session_id: &str,
692 shard_index: usize,
693 total_shards: usize,
694 prev_shard_id: Option<&str>,
695 next_shard_id: Option<&str>,
696 start_idx: usize,
697 end_idx: usize,
698) -> Result<String> {
699 let mut initial = serde_json::json!({
700 "kind": 0,
701 "v": {
702 "version": session.version,
703 "sessionId": shard_session_id,
704 "creationDate": session.creation_date,
705 "customTitle": shard_title,
706 "initialLocation": session.initial_location,
707 "responderUsername": session.responder_username,
708 "requests": requests,
709 "_shardInfo": {
710 "originalSessionId": original_session_id,
711 "shardIndex": shard_index,
712 "totalShards": total_shards,
713 "prevShardId": prev_shard_id,
714 "nextShardId": next_shard_id,
715 "requestRange": {
716 "start": start_idx,
717 "end": end_idx,
718 },
719 }
720 }
721 });
722
723 if let Some(v) = initial.get_mut("v") {
725 ensure_vscode_compat_fields(v, Some(shard_session_id));
726 }
727
728 let line = serde_json::to_string(&initial)?;
729 Ok(format!("{}\n", line))
730}
731
732fn infer_workspace_hash(file_path: &Path) -> Result<String> {
737 let mut current = file_path.parent();
739 while let Some(dir) = current {
740 if dir.file_name().map_or(false, |n| n == "chatSessions") {
741 if let Some(ws_dir) = dir.parent() {
742 if let Some(hash) = ws_dir.file_name() {
743 return Ok(hash.to_string_lossy().to_string());
744 }
745 }
746 }
747 current = dir.parent();
748 }
749 anyhow::bail!(
750 "Cannot infer workspace hash from path: {}. Use --workspace to specify it.",
751 file_path.display()
752 );
753}
754
755fn resolve_workspace(workspace: Option<&str>) -> Result<(String, PathBuf)> {
758 if let Some(ws) = workspace {
759 if ws.len() == 32 && ws.chars().all(|c| c.is_ascii_hexdigit()) {
761 let storage = crate::workspace::get_workspace_storage_path()?;
763 let chat_dir = storage.join(ws).join("chatSessions");
764 return Ok((ws.to_string(), chat_dir));
765 }
766
767 match crate::workspace::find_workspace_by_path(ws) {
769 Ok(Some((hash, ws_dir, _folder))) => {
770 let chat_dir = ws_dir.join("chatSessions");
771 Ok((hash, chat_dir))
772 }
773 Ok(None) => anyhow::bail!("No workspace found for path: {}", ws),
774 Err(e) => anyhow::bail!("Error finding workspace: {}", e),
775 }
776 } else {
777 let cwd = std::env::current_dir()?;
779 let cwd_str = cwd.to_string_lossy().to_string();
780 match crate::workspace::find_workspace_by_path(&cwd_str) {
781 Ok(Some((hash, ws_dir, _folder))) => {
782 let chat_dir = ws_dir.join("chatSessions");
783 Ok((hash, chat_dir))
784 }
785 Ok(None) => anyhow::bail!(
786 "No VS Code workspace found for current directory. Use --workspace to specify one."
787 ),
788 Err(e) => anyhow::bail!("Error finding workspace: {}", e),
789 }
790 }
791}
792
793fn update_shard_index(
795 workspace_hash: &str,
796 original_session_id: &str,
797 shards: &[Shard],
798 shard_uuids: &[String],
799 session: &ChatSession,
800) -> Result<()> {
801 let db_path = get_workspace_storage_db(workspace_hash)?;
802 if !db_path.exists() {
803 anyhow::bail!("Workspace database not found: {}", db_path.display());
804 }
805
806 let mut index = read_chat_session_index(&db_path)?;
807 let title = session.title();
808
809 for (i, shard) in shards.iter().enumerate() {
811 if i == shards.len() - 1 {
813 if let Some(entry) = index.entries.get_mut(original_session_id) {
815 entry.title = if shards.len() > 1 {
816 format!(
817 "{} (Latest — Part {}/{})",
818 title,
819 shards.len(),
820 shards.len()
821 )
822 } else {
823 title.clone()
824 };
825 }
826 continue;
827 }
828
829 let uuid = &shard_uuids[i];
830 let shard_title = format!("{} (Part {}/{})", title, i + 1, shards.len());
831
832 let last_req = shard.requests.last();
834 let first_req = shard.requests.first();
835 let last_ts = last_req
836 .and_then(|r| r.get("timestamp"))
837 .and_then(|t| t.as_i64())
838 .unwrap_or(session.last_message_date);
839 let first_ts = first_req
840 .and_then(|r| r.get("timestamp"))
841 .and_then(|t| t.as_i64())
842 .unwrap_or(session.creation_date);
843
844 index.entries.insert(
845 uuid.clone(),
846 crate::models::ChatSessionIndexEntry {
847 session_id: uuid.clone(),
848 title: shard_title,
849 last_message_date: last_ts,
850 timing: Some(ChatSessionTiming {
851 created: first_ts,
852 last_request_started: Some(last_ts),
853 last_request_ended: Some(last_ts),
854 }),
855 last_response_state: 1,
856 initial_location: "panel".to_string(),
857 is_empty: false,
858 is_imported: Some(false),
859 has_pending_edits: Some(false),
860 is_external: Some(false),
861 },
862 );
863 }
864
865 write_chat_session_index(&db_path, &index)?;
866 Ok(())
867}