1use anyhow::Result;
9use serde::{Deserialize, Serialize};
10use std::borrow::Cow;
11use std::collections::{BTreeMap, HashMap, HashSet};
12use std::fs;
13use std::path::{Path, PathBuf};
14
15use crate::timeline::{FrameKind, Kind, TimelineEntry};
16
17#[derive(Debug, Clone)]
23pub struct Chunk {
24 pub id: String,
26 pub project: String,
27 pub agent: String,
28 pub date: String,
30 pub session_id: String,
32 pub cwd: Option<String>,
34 pub kind: Kind,
36 pub frame_kind: Option<FrameKind>,
38 pub run_id: Option<String>,
40 pub prompt_id: Option<String>,
42 pub agent_model: Option<String>,
44 pub started_at: Option<String>,
46 pub completed_at: Option<String>,
48 pub token_usage: Option<u64>,
50 pub findings_count: Option<u32>,
52 pub workflow_phase: Option<String>,
54 pub mode: Option<String>,
56 pub skill_code: Option<String>,
58 pub framework_version: Option<String>,
60 pub msg_range: (usize, usize),
62 pub text: String,
64 pub token_estimate: usize,
66 pub highlights: Vec<String>,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
72pub struct ChunkMetadataSidecar {
73 pub id: String,
74 pub project: String,
75 pub agent: String,
76 pub date: String,
77 pub session_id: String,
78 #[serde(skip_serializing_if = "Option::is_none")]
79 pub cwd: Option<String>,
80 pub kind: Kind,
81 #[serde(skip_serializing_if = "Option::is_none")]
82 pub frame_kind: Option<FrameKind>,
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub run_id: Option<String>,
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub prompt_id: Option<String>,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 pub agent_model: Option<String>,
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub started_at: Option<String>,
91 #[serde(skip_serializing_if = "Option::is_none")]
92 pub completed_at: Option<String>,
93 #[serde(skip_serializing_if = "Option::is_none")]
94 pub token_usage: Option<u64>,
95 #[serde(skip_serializing_if = "Option::is_none")]
96 pub findings_count: Option<u32>,
97 #[serde(skip_serializing_if = "Option::is_none")]
98 pub workflow_phase: Option<String>,
99 #[serde(skip_serializing_if = "Option::is_none")]
100 pub mode: Option<String>,
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub skill_code: Option<String>,
103 #[serde(skip_serializing_if = "Option::is_none")]
104 pub framework_version: Option<String>,
105 #[serde(default, skip_serializing_if = "Vec::is_empty")]
106 pub intent_entries: Vec<crate::types::IntentEntry>,
107}
108
109impl From<&Chunk> for ChunkMetadataSidecar {
110 fn from(chunk: &Chunk) -> Self {
111 Self {
112 id: chunk.id.clone(),
113 project: chunk.project.clone(),
114 agent: chunk.agent.clone(),
115 date: chunk.date.clone(),
116 session_id: chunk.session_id.clone(),
117 cwd: chunk.cwd.clone(),
118 kind: chunk.kind,
119 frame_kind: chunk.frame_kind,
120 run_id: chunk.run_id.clone(),
121 prompt_id: chunk.prompt_id.clone(),
122 agent_model: chunk.agent_model.clone(),
123 started_at: chunk.started_at.clone(),
124 completed_at: chunk.completed_at.clone(),
125 token_usage: chunk.token_usage,
126 findings_count: chunk.findings_count,
127 workflow_phase: chunk.workflow_phase.clone(),
128 mode: chunk.mode.clone(),
129 skill_code: chunk.skill_code.clone(),
130 framework_version: chunk.framework_version.clone(),
131 intent_entries: Vec::new(),
132 }
133 }
134}
135
136#[derive(Debug, Clone)]
138pub struct ChunkerConfig {
139 pub target_tokens: usize,
141 pub min_tokens: usize,
143 pub max_tokens: usize,
145 pub overlap_messages: usize,
147}
148
149impl Default for ChunkerConfig {
150 fn default() -> Self {
151 Self {
152 target_tokens: 1500,
153 min_tokens: 500,
154 max_tokens: 2500,
155 overlap_messages: 2,
156 }
157 }
158}
159
160pub fn estimate_tokens(text: &str) -> usize {
169 text.len().div_ceil(4)
170}
171
172const PLAN_KEYWORDS: &[&str] = &[
175 "implementation plan",
176 "plan:",
177 "## plan",
178 "step 1:",
179 "step 2:",
180 "step 3:",
181 "action items",
182 "milestones",
183 "roadmap",
184 "todo list",
185 "acceptance criteria",
186 "## steps",
187 "## phases",
188];
189
190const REPORT_KEYWORDS: &[&str] = &[
191 "## findings",
192 "## summary",
193 "## report",
194 "audit report",
195 "coverage report",
196 "test results",
197 "## metrics",
198 "## recommendations",
199 "## conclusion",
200 "status report",
201 "incident report",
202 "pr review",
203 "code review",
204];
205
206pub fn classify_kind(entries: &[TimelineEntry]) -> Kind {
216 if entries.is_empty() {
217 return Kind::Other;
218 }
219
220 let mut plan_score: u32 = 0;
221 let mut report_score: u32 = 0;
222 let mut has_conversation = false;
223
224 for entry in entries {
225 let lower = entry.message.to_lowercase();
226
227 if entry.role == "assistant" {
229 for kw in PLAN_KEYWORDS {
230 if lower.contains(kw) {
231 plan_score += 1;
232 }
233 }
234 for kw in REPORT_KEYWORDS {
235 if lower.contains(kw) {
236 report_score += 1;
237 }
238 }
239 }
240
241 if entry.role == "user" || entry.role == "assistant" {
242 has_conversation = true;
243 }
244 }
245
246 let threshold = 3;
247
248 if plan_score >= threshold && plan_score > report_score {
249 Kind::Plans
250 } else if report_score >= threshold && report_score > plan_score {
251 Kind::Reports
252 } else if has_conversation {
253 Kind::Conversations
254 } else {
255 Kind::Other
256 }
257}
258
259fn prepare_entries_for_chunking<'a>(
260 entries: &'a [TimelineEntry],
261) -> (
262 Option<crate::frontmatter::ReportFrontmatter>,
263 Cow<'a, [TimelineEntry]>,
264) {
265 let Some(first) = entries.first() else {
266 return (None, Cow::Borrowed(entries));
267 };
268
269 if !first.message.trim_start().starts_with("---") {
270 return (None, Cow::Borrowed(entries));
271 }
272
273 let (frontmatter, body) = crate::frontmatter::parse(&first.message);
274 if body == first.message {
275 return (None, Cow::Borrowed(entries));
276 }
277
278 let mut stripped_entries = entries.to_vec();
279 if let Some(stripped_first) = stripped_entries.first_mut() {
280 stripped_first.message = body.to_string();
281 }
282
283 (frontmatter, Cow::Owned(stripped_entries))
284}
285
286fn apply_frontmatter(chunk: &mut Chunk, frontmatter: &crate::frontmatter::ReportFrontmatter) {
287 if chunk.frame_kind.is_none() {
288 chunk.frame_kind = frontmatter.telemetry.frame_kind;
289 }
290 chunk.run_id = frontmatter.telemetry.run_id.clone();
291 chunk.prompt_id = frontmatter.telemetry.prompt_id.clone();
292 chunk.agent_model = frontmatter.telemetry.model.clone();
293 chunk.started_at = frontmatter.telemetry.started_at.clone();
294 chunk.completed_at = frontmatter.telemetry.completed_at.clone();
295 chunk.token_usage = frontmatter.telemetry.token_usage;
296 chunk.findings_count = frontmatter.telemetry.findings_count;
297 chunk.workflow_phase = frontmatter.steering.workflow_phase.clone();
298 chunk.mode = frontmatter.steering.mode.clone();
299 chunk.skill_code = frontmatter.steering.skill_code.clone();
300 chunk.framework_version = frontmatter.steering.framework_version.clone();
301}
302
303fn split_day_entries_by_frame_kind<'a>(
304 entries: &'a [(usize, &'a TimelineEntry)],
305) -> Vec<&'a [(usize, &'a TimelineEntry)]> {
306 if entries.is_empty() {
307 return Vec::new();
308 }
309
310 let mut groups = Vec::new();
311 let mut start = 0usize;
312
313 for idx in 1..entries.len() {
314 let previous = entries[idx - 1].1.frame_kind;
315 let current = entries[idx].1.frame_kind;
316 if previous != current {
317 groups.push(&entries[start..idx]);
318 start = idx;
319 }
320 }
321
322 groups.push(&entries[start..]);
323 groups
324}
325
326fn frame_kind_for_window(entries: &[&TimelineEntry]) -> Option<FrameKind> {
327 let first = entries.first().and_then(|entry| entry.frame_kind)?;
328 entries
329 .iter()
330 .all(|entry| entry.frame_kind == Some(first))
331 .then_some(first)
332}
333
334pub fn chunk_entries(
343 entries: &[TimelineEntry],
344 project: &str,
345 agent: &str,
346 config: &ChunkerConfig,
347) -> Vec<Chunk> {
348 if entries.is_empty() {
349 return vec![];
350 }
351
352 let (frontmatter, prepared_entries) = prepare_entries_for_chunking(entries);
353 let prepared_entries = prepared_entries.as_ref();
354
355 let mut by_date: BTreeMap<String, Vec<(usize, &TimelineEntry)>> = BTreeMap::new();
357 for (idx, entry) in prepared_entries.iter().enumerate() {
358 let date = entry.timestamp.format("%Y-%m-%d").to_string();
359 by_date.entry(date).or_default().push((idx, entry));
360 }
361
362 let mut chunks = Vec::new();
363
364 for (date, day_entries) in &by_date {
365 let mut day_chunks = Vec::new();
366 let mut next_seq = 1usize;
367 for frame_group in split_day_entries_by_frame_kind(day_entries) {
368 let (mut group_chunks, updated_seq) =
369 chunk_day_entries(frame_group, project, agent, date, config, next_seq);
370 next_seq = updated_seq;
371 day_chunks.append(&mut group_chunks);
372 }
373 if let Some(frontmatter) = frontmatter.as_ref() {
374 for chunk in &mut day_chunks {
375 apply_frontmatter(chunk, frontmatter);
376 }
377 }
378 chunks.extend(day_chunks);
379 }
380
381 chunks
382}
383
384fn chunk_day_entries(
386 entries: &[(usize, &TimelineEntry)],
387 project: &str,
388 agent: &str,
389 date: &str,
390 config: &ChunkerConfig,
391 start_seq: usize,
392) -> (Vec<Chunk>, usize) {
393 if entries.is_empty() {
394 return (vec![], start_seq);
395 }
396
397 let mut chunks = Vec::new();
398 let mut seq = start_seq;
399 let mut start = 0usize;
400
401 while start < entries.len() {
402 let mut end = start;
404 let mut accumulated_tokens = 0usize;
405
406 while end < entries.len() {
407 let msg_tokens = estimate_tokens(&entries[end].1.message);
408 let next_total = accumulated_tokens + msg_tokens + 20; if next_total > config.max_tokens && end > start {
411 break;
412 }
413
414 accumulated_tokens = next_total;
415 end += 1;
416
417 if accumulated_tokens >= config.target_tokens {
418 break;
419 }
420 }
421
422 let window: Vec<&TimelineEntry> = entries[start..end].iter().map(|(_, e)| *e).collect();
424 let highlights = extract_highlights(&window);
425 let signals = extract_signals(&window);
426 let frame_kind = frame_kind_for_window(&window);
427 let text = format_chunk_text_inner(
428 &window,
429 project,
430 agent,
431 date,
432 frame_kind,
433 &signals,
434 &highlights,
435 );
436 let token_estimate = estimate_tokens(&text);
437
438 let session_id = window
439 .first()
440 .map(|e| e.session_id.clone())
441 .unwrap_or_default();
442 let cwd = window.first().and_then(|entry| entry.cwd.clone());
443
444 let global_start = entries[start].0;
445 let global_end = entries[end - 1].0 + 1;
446
447 let kind = classify_kind(&window.iter().map(|e| (*e).clone()).collect::<Vec<_>>());
448
449 chunks.push(Chunk {
450 id: format!("{}_{}_{}_{{:03}}", project, agent, date)
451 .replace("{:03}", &format!("{:03}", seq)),
452 project: project.to_string(),
453 agent: agent.to_string(),
454 date: date.to_string(),
455 session_id,
456 cwd,
457 kind,
458 frame_kind,
459 run_id: None,
460 prompt_id: None,
461 agent_model: None,
462 started_at: None,
463 completed_at: None,
464 token_usage: None,
465 findings_count: None,
466 workflow_phase: None,
467 mode: None,
468 skill_code: None,
469 framework_version: None,
470 msg_range: (global_start, global_end),
471 text,
472 token_estimate,
473 highlights,
474 });
475
476 seq += 1;
477
478 let overlap = config.overlap_messages.min(end - start);
480 let next_start = if end >= entries.len() {
481 entries.len() } else if end - overlap > start {
483 end - overlap
484 } else {
485 end };
487
488 start = next_start;
489 }
490
491 (chunks, seq)
492}
493
494pub fn format_chunk_text(
496 entries: &[&TimelineEntry],
497 project: &str,
498 agent: &str,
499 date: &str,
500) -> String {
501 let highlights = extract_highlights(entries);
502 let signals = extract_signals(entries);
503 format_chunk_text_inner(
504 entries,
505 project,
506 agent,
507 date,
508 frame_kind_for_window(entries),
509 &signals,
510 &highlights,
511 )
512}
513
514fn format_chunk_text_inner(
515 entries: &[&TimelineEntry],
516 project: &str,
517 agent: &str,
518 date: &str,
519 frame_kind: Option<FrameKind>,
520 signals: &ChunkSignals,
521 highlights: &[String],
522) -> String {
523 let mut text = if let Some(frame_kind) = frame_kind {
524 format!(
525 "[project: {} | agent: {} | date: {} | frame_kind: {}]\n\n",
526 project, agent, date, frame_kind
527 )
528 } else {
529 format!(
530 "[project: {} | agent: {} | date: {}]\n\n",
531 project, agent, date
532 )
533 };
534
535 if let Some(block) = format_signals_block(signals, highlights) {
536 text.push_str(&block);
537 text.push('\n');
538 }
539
540 for entry in entries {
541 let time = entry.timestamp.format("%H:%M:%S");
542 let msg = if entry.message.len() > 4000 {
544 truncate_message_bytes(&entry.message, 4000)
545 } else {
546 entry.message.clone()
547 };
548 text.push_str(&format!("[{}] {}: {}\n", time, entry.role, msg));
549 }
550
551 text
552}
553
554const HIGHLIGHT_KEYWORDS: &[&str] = &[
555 "decision:",
556 "plan:",
557 "architecture",
558 "breaking",
559 "todo:",
560 "fixme:",
561];
562
563const HIGHLIGHT_KEYWORDS_CASE_SENSITIVE: &[&str] = &["WAŻNE", "KEY"];
564
565fn extract_highlights(entries: &[&TimelineEntry]) -> Vec<String> {
566 let mut highlights = Vec::new();
567 for entry in entries {
568 if highlights.len() >= 3 {
569 break;
570 }
571 if !is_highlight_message(&entry.message) {
572 continue;
573 }
574
575 if let Some(line) = entry.message.lines().map(str::trim).find(|l| !l.is_empty())
576 && highlights.last().map(String::as_str) != Some(line)
577 {
578 highlights.push(line.to_string());
579 }
580 }
581 highlights
582}
583
584fn is_highlight_message(message: &str) -> bool {
585 let lower = message.to_lowercase();
586 HIGHLIGHT_KEYWORDS.iter().any(|kw| lower.contains(kw))
587 || HIGHLIGHT_KEYWORDS_CASE_SENSITIVE
588 .iter()
589 .any(|kw| message.contains(kw))
590}
591
592#[derive(Debug, Clone, Default)]
597struct ChunkSignals {
598 todo_open: Vec<String>,
599 todo_done: Vec<String>,
600 ultrathink: Vec<String>,
601 insights: Vec<String>,
602 plan_mode: Vec<String>,
603 intents: Vec<String>,
604 results: Vec<String>,
605 skills: Vec<String>,
606 decisions: Vec<String>,
607 outcomes: Vec<String>,
608}
609
610const MAX_TODO_ITEMS: usize = 8;
611const MAX_ULTRATHINK_BLOCKS: usize = 4;
612const MAX_INSIGHT_BLOCKS: usize = 6;
613const MAX_PLAN_MODE_EVENTS: usize = 8;
614const MAX_INTENT_LINES: usize = 6;
615const MAX_RESULT_LINES: usize = 6;
616const MAX_TAG_BLOCK_LINES: usize = 4;
617
618pub const INTENT_KEYWORDS: &[&str] = &[
619 "mam pomysl",
621 "mam pomysł",
622 "mam taki pomysl",
623 "mam taki pomysł",
624 "pomysl",
625 "pomysł",
626 "proponuje",
627 "proponuję",
628 "zrobmy",
629 "zróbmy",
630 "ustalmy",
631 "ustalmy",
632 "chce",
633 "chcę",
634 "chcialbym",
635 "chciałbym",
636 "potrzebuje",
637 "potrzebuję",
638 "następny krok",
639 "nastepny krok",
640 "kolejny krok",
641 "i want",
643 "i'd like",
644 "let's",
645 "next step",
646];
647
648const RESULT_KEYWORDS: &[&str] = &[
649 "smoke test",
650 "passed",
651 "all checks passed",
652 "0 failed",
653 "completed",
654 "done",
655 "zrobione",
656 "dowiezione",
657 "gotowe",
658 "dziala",
659 "działa",
660];
661
662fn extract_signals(entries: &[&TimelineEntry]) -> ChunkSignals {
663 let (todo_open, todo_done) = extract_checklist_items(entries);
664 let ultrathink = extract_tag_blocks(entries, is_ultrathink_tag, MAX_ULTRATHINK_BLOCKS);
665 let insights = extract_tag_blocks(entries, is_insight_tag, MAX_INSIGHT_BLOCKS);
666 let plan_mode = extract_tag_blocks(entries, is_plan_mode_tag, MAX_PLAN_MODE_EVENTS);
667 let intents = extract_intent_lines(entries);
668 let results = extract_result_lines(entries);
669 let skills = extract_tag_blocks(entries, is_skill_tag, 4);
670 let decisions = extract_tag_blocks(entries, is_decision_tag, 4);
671 let outcomes = extract_tag_blocks(entries, is_outcome_tag, 4);
672
673 ChunkSignals {
674 todo_open,
675 todo_done,
676 ultrathink,
677 insights,
678 plan_mode,
679 intents,
680 results,
681 skills,
682 decisions,
683 outcomes,
684 }
685}
686
687fn extract_checklist_items(entries: &[&TimelineEntry]) -> (Vec<String>, Vec<String>) {
688 #[derive(Debug, Clone, Copy)]
689 enum TaskState {
690 Open,
691 Done,
692 }
693
694 let mut state_by_key: HashMap<String, TaskState> = HashMap::new();
695 let mut display_by_key: HashMap<String, String> = HashMap::new();
696 let mut order: Vec<String> = Vec::new();
697
698 for entry in entries {
699 for line in entry.message.lines() {
700 if let Some((is_done, task)) = parse_checklist_task(line) {
701 let key = normalize_key(&task);
702 if !state_by_key.contains_key(&key) {
703 order.push(key.clone());
704 display_by_key.insert(key.clone(), task);
705 state_by_key.insert(key.clone(), TaskState::Open);
706 }
707
708 if is_done {
710 state_by_key.insert(key, TaskState::Done);
711 }
712 }
713 }
714 }
715
716 let mut open = Vec::new();
717 let mut done = Vec::new();
718 for key in order {
719 let Some(task) = display_by_key.get(&key) else {
720 continue;
721 };
722 match state_by_key.get(&key) {
723 Some(TaskState::Done) => done.push(task.clone()),
724 Some(TaskState::Open) => open.push(task.clone()),
725 None => {}
726 }
727 }
728
729 (open, done)
730}
731
732pub fn parse_checklist_task(line: &str) -> Option<(bool, String)> {
733 let l = line.trim_start();
734 let mut chars = l.chars();
735 let bullet = chars.next()?;
736 if !matches!(bullet, '-' | '*' | '+') {
737 return None;
738 }
739 let rest = chars.as_str().trim_start();
740 let rest = rest.strip_prefix('[')?;
741 let mut chars = rest.chars();
742 let state = chars.next()?;
743 let rest = chars.as_str();
744 let rest = rest.strip_prefix(']')?;
745 let task = rest.trim_start();
746 if task.is_empty() {
747 return None;
748 }
749
750 match state {
751 'x' | 'X' => Some((true, task.trim().to_string())),
752 ' ' => Some((false, task.trim().to_string())),
753 _ => None,
754 }
755}
756
757fn extract_intent_lines(entries: &[&TimelineEntry]) -> Vec<String> {
758 let mut out = Vec::new();
759 let mut seen = HashSet::new();
760
761 for entry in entries {
762 if entry.role.to_lowercase() != "user" {
763 continue;
764 }
765 for line in entry.message.lines().map(str::trim) {
766 if line.is_empty() {
767 continue;
768 }
769 if !is_intent_line(line) {
770 continue;
771 }
772
773 let key = normalize_key(line);
774 if !seen.insert(key) {
775 continue;
776 }
777
778 out.push(truncate_signal_line(line));
779 if out.len() >= MAX_INTENT_LINES {
780 return out;
781 }
782 }
783 }
784
785 out
786}
787
788pub(crate) fn is_intent_line(line: &str) -> bool {
789 let lower = line.to_lowercase();
790 INTENT_KEYWORDS.iter().any(|kw| lower.contains(kw))
791}
792
793fn extract_result_lines(entries: &[&TimelineEntry]) -> Vec<String> {
794 let mut out = Vec::new();
795 let mut seen = HashSet::new();
796
797 for entry in entries {
798 for line in entry.message.lines().map(str::trim) {
799 if line.is_empty() {
800 continue;
801 }
802 if !is_result_line(line) {
803 continue;
804 }
805 let key = normalize_key(line);
806 if !seen.insert(key) {
807 continue;
808 }
809 out.push(truncate_signal_line(line));
810 if out.len() >= MAX_RESULT_LINES {
811 return out;
812 }
813 }
814 }
815
816 out
817}
818
819pub fn is_result_line(line: &str) -> bool {
820 let lower = line.to_lowercase();
821 RESULT_KEYWORDS.iter().any(|kw| lower.contains(kw))
822}
823
824pub fn normalize_key(s: &str) -> String {
825 s.split_whitespace()
826 .collect::<Vec<_>>()
827 .join(" ")
828 .to_lowercase()
829}
830
831pub fn truncate_signal_line(line: &str) -> String {
832 const MAX_BYTES: usize = 240;
833 if line.len() <= MAX_BYTES {
834 return line.to_string();
835 }
836 truncate_message_bytes(line, MAX_BYTES)
837}
838
839fn is_ultrathink_tag(line: &str) -> bool {
840 line.to_lowercase().contains("ultrathink")
841}
842
843fn is_insight_tag(line: &str) -> bool {
844 let lower = line.to_lowercase();
845 lower.starts_with("insight")
847 || lower.contains("★ insight")
848 || lower.contains("insight ─")
849 || lower.contains("insight -")
850}
851
852fn is_plan_mode_tag(line: &str) -> bool {
853 let lower = line.to_lowercase();
854 lower.contains("plan mode")
856 || lower.contains("accept plan")
857 || lower.contains("user accepted the plan")
858 || lower.contains("approve and bypass permissions")
859 || lower.contains("bypass permissions")
860}
861
862fn is_skill_tag(line: &str) -> bool {
863 let lower = line.to_lowercase();
864 lower.contains("[skill_enter]")
865 || lower.contains("vetcoders-partner")
866 || lower.contains("vetcoders-spawn")
867 || lower.contains("vetcoders-ownership")
868 || lower.contains("vetcoders-workflow")
869}
870
871pub fn is_decision_tag(line: &str) -> bool {
872 let lower = line.to_lowercase();
873 lower.contains("[decision]") || lower.starts_with("decision:")
874}
875
876pub fn is_outcome_tag(line: &str) -> bool {
877 let lower = line.to_lowercase();
878 lower.contains("[skill_outcome]")
879 || lower.starts_with("outcome:")
880 || lower.starts_with("validation:")
881}
882
883fn extract_tag_blocks(
884 entries: &[&TimelineEntry],
885 is_tag: fn(&str) -> bool,
886 max_blocks: usize,
887) -> Vec<String> {
888 let mut out = Vec::new();
889 let mut seen = HashSet::new();
890
891 for entry in entries {
892 let lines: Vec<&str> = entry.message.lines().collect();
893 for (i, raw) in lines.iter().enumerate() {
894 let line = raw.trim();
895 if line.is_empty() || !is_tag(line) {
896 continue;
897 }
898
899 let mut block = Vec::new();
900 block.push(line);
901
902 for raw_next in lines.iter().skip(i + 1) {
903 let next = raw_next.trim();
904 if next.is_empty() {
905 break;
906 }
907 if is_tag(next) {
908 break;
909 }
910 block.push(next);
911 if block.len() >= MAX_TAG_BLOCK_LINES {
912 break;
913 }
914 }
915
916 let joined = block.join(" ");
917 let key = normalize_key(&joined);
918 if !seen.insert(key) {
919 continue;
920 }
921
922 out.push(truncate_signal_line(&joined));
923 if out.len() >= max_blocks {
924 return out;
925 }
926 }
927 }
928
929 out
930}
931
932fn format_signals_block(signals: &ChunkSignals, highlights: &[String]) -> Option<String> {
933 let has_any = !signals.todo_open.is_empty()
934 || !signals.todo_done.is_empty()
935 || !signals.ultrathink.is_empty()
936 || !signals.insights.is_empty()
937 || !signals.plan_mode.is_empty()
938 || !signals.intents.is_empty()
939 || !signals.results.is_empty()
940 || !signals.skills.is_empty()
941 || !signals.decisions.is_empty()
942 || !signals.outcomes.is_empty()
943 || !highlights.is_empty();
944 if !has_any {
945 return None;
946 }
947
948 let mut out = String::new();
949 out.push_str("[signals]\n");
950
951 if !signals.skills.is_empty() {
952 out.push_str("=== SKILL ENTER ===\n");
953 for line in &signals.skills {
954 out.push_str(&format!("{}\n", line));
955 }
956 out.push_str("===================\n");
957 }
958
959 if !signals.todo_open.is_empty() || !signals.todo_done.is_empty() {
960 if !signals.todo_open.is_empty() {
961 out.push_str(&format!(
962 "RED LIGHT: checklist detected (open: {}, done: {})\n",
963 signals.todo_open.len(),
964 signals.todo_done.len()
965 ));
966 } else {
967 out.push_str(&format!(
968 "Checklist detected (open: 0, done: {})\n",
969 signals.todo_done.len()
970 ));
971 }
972
973 for task in signals.todo_open.iter().take(MAX_TODO_ITEMS) {
974 out.push_str(&format!("- [ ] {}\n", task));
975 }
976 if signals.todo_open.len() > MAX_TODO_ITEMS {
977 out.push_str(&format!(
978 "... (+{} more open)\n",
979 signals.todo_open.len() - MAX_TODO_ITEMS
980 ));
981 }
982
983 for task in signals.todo_done.iter().take(MAX_TODO_ITEMS) {
984 out.push_str(&format!("- [x] {}\n", task));
985 }
986 if signals.todo_done.len() > MAX_TODO_ITEMS {
987 out.push_str(&format!(
988 "... (+{} more done)\n",
989 signals.todo_done.len() - MAX_TODO_ITEMS
990 ));
991 }
992 }
993
994 if !signals.ultrathink.is_empty() {
995 out.push_str("Ultrathink:\n");
996 for line in &signals.ultrathink {
997 out.push_str(&format!("- {}\n", line));
998 }
999 }
1000
1001 if !signals.insights.is_empty() {
1002 out.push_str("Insight:\n");
1003 for line in &signals.insights {
1004 out.push_str(&format!("- {}\n", line));
1005 }
1006 }
1007
1008 if !signals.plan_mode.is_empty() {
1009 out.push_str("Plan mode:\n");
1010 for line in &signals.plan_mode {
1011 out.push_str(&format!("- {}\n", line));
1012 }
1013 }
1014
1015 if !signals.intents.is_empty() {
1016 out.push_str("Intent:\n");
1017 for line in &signals.intents {
1018 out.push_str(&format!("- {}\n", line));
1019 }
1020 }
1021
1022 if !signals.decisions.is_empty() {
1023 out.push_str("Decision:\n");
1024 for line in &signals.decisions {
1025 out.push_str(&format!("- {}\n", line));
1026 }
1027 }
1028
1029 if !signals.results.is_empty() {
1030 out.push_str("Results:\n");
1031 for line in &signals.results {
1032 out.push_str(&format!("- {}\n", line));
1033 }
1034 }
1035
1036 if !signals.outcomes.is_empty() {
1037 out.push_str("Outcome:\n");
1038 for line in &signals.outcomes {
1039 out.push_str(&format!("- {}\n", line));
1040 }
1041 }
1042
1043 if !highlights.is_empty() {
1044 out.push_str("Notes:\n");
1045 for line in highlights {
1046 out.push_str(&format!("- {}\n", truncate_signal_line(line)));
1047 }
1048 }
1049
1050 out.push_str("[/signals]\n");
1051 Some(out)
1052}
1053
1054fn truncate_message_bytes(message: &str, max_bytes: usize) -> String {
1055 let mut cutoff = max_bytes.min(message.len());
1056 while cutoff > 0 && !message.is_char_boundary(cutoff) {
1057 cutoff -= 1;
1058 }
1059 let mut out = String::with_capacity(cutoff + 15);
1060 out.push_str(&message[..cutoff]);
1061 out.push_str("...[truncated]");
1062 out
1063}
1064
1065pub fn write_chunks_to_dir(chunks: &[Chunk], dir: &Path) -> Result<Vec<PathBuf>> {
1073 fs::create_dir_all(dir)?;
1074
1075 let mut paths = Vec::new();
1076
1077 for chunk in chunks {
1078 let filename = format!("{}.txt", chunk.id);
1079 let path = dir.join(&filename);
1080 fs::write(&path, &chunk.text)?;
1081 let sidecar_path = dir.join(format!("{}.meta.json", chunk.id));
1082 let sidecar = ChunkMetadataSidecar::from(chunk);
1083 fs::write(&sidecar_path, serde_json::to_vec_pretty(&sidecar)?)?;
1084 paths.push(path);
1085 }
1086
1087 Ok(paths)
1088}
1089
1090pub fn chunk_summary(chunks: &[Chunk]) -> String {
1092 if chunks.is_empty() {
1093 return "No chunks generated.".to_string();
1094 }
1095
1096 let total_tokens: usize = chunks.iter().map(|c| c.token_estimate).sum();
1097 let avg_tokens = total_tokens / chunks.len();
1098 let dates: Vec<&str> = chunks
1099 .iter()
1100 .map(|c| c.date.as_str())
1101 .collect::<std::collections::HashSet<_>>()
1102 .into_iter()
1103 .collect();
1104
1105 format!(
1106 "{} chunks, {} total tokens (avg {}), {} days",
1107 chunks.len(),
1108 total_tokens,
1109 avg_tokens,
1110 dates.len(),
1111 )
1112}
1113
1114#[cfg(test)]
1119mod tests {
1120 use super::*;
1121 use chrono::{TimeZone, Utc};
1122
1123 fn make_entry(hour: u32, min: u32, role: &str, msg: &str) -> TimelineEntry {
1124 TimelineEntry {
1125 timestamp: Utc.with_ymd_and_hms(2026, 1, 22, hour, min, 0).unwrap(),
1126 agent: "claude".to_string(),
1127 session_id: "sess-1".to_string(),
1128 role: role.to_string(),
1129 message: msg.to_string(),
1130 frame_kind: None,
1131 branch: None,
1132 cwd: None,
1133 }
1134 }
1135
1136 #[test]
1137 fn test_estimate_tokens() {
1138 assert_eq!(estimate_tokens(""), 0);
1139 assert_eq!(estimate_tokens("hi"), 1); assert_eq!(estimate_tokens("hello world"), 3); assert_eq!(estimate_tokens("1234"), 1); assert_eq!(estimate_tokens("12345"), 2); }
1144
1145 #[test]
1146 fn test_chunk_entries_empty() {
1147 let config = ChunkerConfig::default();
1148 let chunks = chunk_entries(&[], "proj", "claude", &config);
1149 assert!(chunks.is_empty());
1150 }
1151
1152 #[test]
1153 fn test_chunk_entries_single_message() {
1154 let entries = vec![make_entry(14, 0, "user", "short message")];
1155 let config = ChunkerConfig::default();
1156 let chunks = chunk_entries(&entries, "proj", "claude", &config);
1157
1158 assert_eq!(chunks.len(), 1);
1159 assert_eq!(chunks[0].project, "proj");
1160 assert_eq!(chunks[0].agent, "claude");
1161 assert_eq!(chunks[0].date, "2026-01-22");
1162 assert!(chunks[0].text.contains("short message"));
1163 }
1164
1165 #[test]
1166 fn test_chunk_entries_basic() {
1167 let entries: Vec<TimelineEntry> = (0..10)
1170 .map(|i| make_entry(14, i as u32, "user", &"x".repeat(200)))
1171 .collect();
1172
1173 let config = ChunkerConfig {
1174 target_tokens: 150,
1175 min_tokens: 50,
1176 max_tokens: 300,
1177 overlap_messages: 2,
1178 };
1179
1180 let chunks = chunk_entries(&entries, "proj", "claude", &config);
1181 assert!(
1182 chunks.len() > 1,
1183 "Expected multiple chunks, got {}",
1184 chunks.len()
1185 );
1186
1187 for (i, chunk) in chunks.iter().enumerate() {
1189 assert!(chunk.id.contains(&format!("{:03}", i + 1)));
1190 }
1191 }
1192
1193 #[test]
1194 fn test_chunk_entries_respects_max_tokens() {
1195 let entries = vec![make_entry(14, 0, "user", &"x".repeat(20000))];
1197 let config = ChunkerConfig {
1198 target_tokens: 1500,
1199 min_tokens: 500,
1200 max_tokens: 2500,
1201 overlap_messages: 2,
1202 };
1203
1204 let chunks = chunk_entries(&entries, "proj", "claude", &config);
1205 assert_eq!(chunks.len(), 1);
1208 assert!(chunks[0].text.contains("[truncated]"));
1209 }
1210
1211 #[test]
1212 fn test_chunk_entries_groups_by_date() {
1213 let entries = vec![
1214 TimelineEntry {
1215 timestamp: Utc.with_ymd_and_hms(2026, 1, 20, 10, 0, 0).unwrap(),
1216 agent: "claude".to_string(),
1217 session_id: "s1".to_string(),
1218 role: "user".to_string(),
1219 message: "day one".to_string(),
1220 frame_kind: None,
1221 branch: None,
1222 cwd: None,
1223 },
1224 TimelineEntry {
1225 timestamp: Utc.with_ymd_and_hms(2026, 1, 21, 10, 0, 0).unwrap(),
1226 agent: "claude".to_string(),
1227 session_id: "s2".to_string(),
1228 role: "user".to_string(),
1229 message: "day two".to_string(),
1230 frame_kind: None,
1231 branch: None,
1232 cwd: None,
1233 },
1234 ];
1235
1236 let config = ChunkerConfig::default();
1237 let chunks = chunk_entries(&entries, "proj", "claude", &config);
1238
1239 assert_eq!(chunks.len(), 2);
1240 assert_eq!(chunks[0].date, "2026-01-20");
1241 assert_eq!(chunks[1].date, "2026-01-21");
1242 }
1243
1244 #[test]
1245 fn test_format_chunk_text() {
1246 let entries = [
1247 make_entry(14, 30, "user", "hello"),
1248 make_entry(14, 31, "assistant", "hi there"),
1249 ];
1250 let refs: Vec<&TimelineEntry> = entries.iter().collect();
1251
1252 let text = format_chunk_text(&refs, "TestProj", "claude", "2026-01-22");
1253
1254 assert!(text.starts_with("[project: TestProj | agent: claude | date: 2026-01-22]"));
1255 assert!(text.contains("[14:30:00] user: hello"));
1256 assert!(text.contains("[14:31:00] assistant: hi there"));
1257 }
1258
1259 #[test]
1260 fn test_format_chunk_text_truncates_utf8_safely() {
1261 let mut msg = "a".repeat(3999);
1262 msg.push('é'); let entries = [make_entry(14, 30, "user", &msg)];
1264 let refs: Vec<&TimelineEntry> = entries.iter().collect();
1265
1266 let text = format_chunk_text(&refs, "TestProj", "claude", "2026-01-22");
1267
1268 assert!(text.contains("[truncated]"));
1269 assert!(!text.contains('é'));
1270 }
1271
1272 #[test]
1273 fn test_chunk_entries_extracts_frontmatter_telemetry() {
1274 let entries = vec![make_entry(
1275 14,
1276 30,
1277 "assistant",
1278 "---\nrun_id: mrbl-001\nprompt_id: api-redesign_20260327\nmodel: gpt-5.4\nstarted_at: 2026-03-27T10:00:00Z\ncompleted_at: 2026-03-27T10:01:00Z\ntoken_usage: 1234\nfindings_count: 4\nframe_kind: agent_reply\nphase: implement\nmode: session-first\nskill_code: vc-workflow\nframework_version: 2026-03\n---\n## Report\nContent here",
1279 )];
1280
1281 let chunks = chunk_entries(&entries, "proj", "claude", &ChunkerConfig::default());
1282 assert_eq!(chunks.len(), 1);
1283
1284 let chunk = &chunks[0];
1285 assert_eq!(chunk.run_id.as_deref(), Some("mrbl-001"));
1286 assert_eq!(chunk.prompt_id.as_deref(), Some("api-redesign_20260327"));
1287 assert_eq!(chunk.agent_model.as_deref(), Some("gpt-5.4"));
1288 assert_eq!(chunk.started_at.as_deref(), Some("2026-03-27T10:00:00Z"));
1289 assert_eq!(chunk.completed_at.as_deref(), Some("2026-03-27T10:01:00Z"));
1290 assert_eq!(chunk.token_usage, Some(1234));
1291 assert_eq!(chunk.findings_count, Some(4));
1292 assert_eq!(chunk.frame_kind, Some(FrameKind::AgentReply));
1293 assert_eq!(chunk.workflow_phase.as_deref(), Some("implement"));
1294 assert_eq!(chunk.mode.as_deref(), Some("session-first"));
1295 assert_eq!(chunk.skill_code.as_deref(), Some("vc-workflow"));
1296 assert_eq!(chunk.framework_version.as_deref(), Some("2026-03"));
1297 assert!(chunk.text.contains("## Report"));
1298 assert!(!chunk.text.contains("run_id: mrbl-001"));
1299 assert!(!chunk.text.contains("phase: implement"));
1300 }
1301
1302 #[test]
1303 fn test_chunk_entries_strip_malformed_frontmatter_without_metadata() {
1304 let entries = vec![make_entry(
1305 14,
1306 30,
1307 "assistant",
1308 "---\nrun_id: [nope\nmode: session-first\n---\n## Report\nBody survives",
1309 )];
1310
1311 let chunks = chunk_entries(&entries, "proj", "claude", &ChunkerConfig::default());
1312 assert_eq!(chunks.len(), 1);
1313
1314 let chunk = &chunks[0];
1315 assert_eq!(chunk.run_id, None);
1316 assert_eq!(chunk.mode, None);
1317 assert!(chunk.text.contains("## Report"));
1318 assert!(chunk.text.contains("Body survives"));
1319 assert!(!chunk.text.contains("mode: session-first"));
1320 }
1321
1322 #[test]
1323 fn test_write_chunks_to_dir() {
1324 let tmp = std::env::temp_dir().join("ai-ctx-chunker-test");
1325 let _ = fs::remove_dir_all(&tmp);
1326
1327 let chunks = vec![
1328 Chunk {
1329 id: "proj_claude_2026-01-22_001".to_string(),
1330 project: "proj".to_string(),
1331 agent: "claude".to_string(),
1332 date: "2026-01-22".to_string(),
1333 session_id: "s1".to_string(),
1334 cwd: Some("/Users/tester/workspaces/proj".to_string()),
1335 kind: Kind::Conversations,
1336 frame_kind: Some(FrameKind::UserMsg),
1337 run_id: None,
1338 prompt_id: None,
1339 agent_model: None,
1340 started_at: None,
1341 completed_at: None,
1342 token_usage: None,
1343 findings_count: None,
1344 workflow_phase: Some("implement".to_string()),
1345 mode: Some("session-first".to_string()),
1346 skill_code: Some("vc-workflow".to_string()),
1347 framework_version: Some("2026-03".to_string()),
1348 msg_range: (0, 5),
1349 text: "chunk one content".to_string(),
1350 token_estimate: 4,
1351 highlights: vec![],
1352 },
1353 Chunk {
1354 id: "proj_claude_2026-01-22_002".to_string(),
1355 project: "proj".to_string(),
1356 agent: "claude".to_string(),
1357 date: "2026-01-22".to_string(),
1358 session_id: "s1".to_string(),
1359 cwd: None,
1360 kind: Kind::Conversations,
1361 frame_kind: None,
1362 run_id: None,
1363 prompt_id: None,
1364 agent_model: None,
1365 started_at: None,
1366 completed_at: None,
1367 token_usage: None,
1368 findings_count: None,
1369 workflow_phase: None,
1370 mode: None,
1371 skill_code: None,
1372 framework_version: None,
1373 msg_range: (3, 8),
1374 text: "chunk two content".to_string(),
1375 token_estimate: 4,
1376 highlights: vec![],
1377 },
1378 ];
1379
1380 let paths = write_chunks_to_dir(&chunks, &tmp).unwrap();
1381 assert_eq!(paths.len(), 2);
1382 assert!(paths[0].exists());
1383 assert!(paths[1].exists());
1384
1385 let content = fs::read_to_string(&paths[0]).unwrap();
1386 assert_eq!(content, "chunk one content");
1387
1388 let sidecar = fs::read_to_string(tmp.join("proj_claude_2026-01-22_001.meta.json")).unwrap();
1389 let metadata: ChunkMetadataSidecar = serde_json::from_str(&sidecar).unwrap();
1390 assert_eq!(metadata.project, "proj");
1391 assert_eq!(metadata.agent, "claude");
1392 assert_eq!(metadata.date, "2026-01-22");
1393 assert_eq!(
1394 metadata.cwd.as_deref(),
1395 Some("/Users/tester/workspaces/proj")
1396 );
1397 assert_eq!(metadata.kind, Kind::Conversations);
1398 assert_eq!(metadata.frame_kind, Some(FrameKind::UserMsg));
1399 assert_eq!(metadata.workflow_phase.as_deref(), Some("implement"));
1400 assert_eq!(metadata.mode.as_deref(), Some("session-first"));
1401 assert_eq!(metadata.skill_code.as_deref(), Some("vc-workflow"));
1402 assert_eq!(metadata.framework_version.as_deref(), Some("2026-03"));
1403
1404 let legacy: ChunkMetadataSidecar = serde_json::from_value(serde_json::json!({
1405 "id": "legacy",
1406 "project": "proj",
1407 "agent": "claude",
1408 "date": "2026-01-22",
1409 "session_id": "s1",
1410 "kind": "conversations",
1411 }))
1412 .unwrap();
1413 assert_eq!(legacy.cwd, None);
1414 assert_eq!(legacy.frame_kind, None);
1415 assert_eq!(legacy.workflow_phase, None);
1416 assert_eq!(legacy.mode, None);
1417 assert_eq!(legacy.skill_code, None);
1418 assert_eq!(legacy.framework_version, None);
1419
1420 let _ = fs::remove_dir_all(&tmp);
1421 }
1422
1423 #[test]
1424 fn test_overlap_messages() {
1425 let entries: Vec<TimelineEntry> = (0..8)
1428 .map(|i| make_entry(14, i as u32, "user", &format!("msg_{}", i)))
1429 .collect();
1430
1431 let config = ChunkerConfig {
1432 target_tokens: 80,
1433 min_tokens: 20,
1434 max_tokens: 200,
1435 overlap_messages: 2,
1436 };
1437
1438 let chunks = chunk_entries(&entries, "p", "c", &config);
1439
1440 if chunks.len() >= 2 {
1442 let (_, end1) = chunks[0].msg_range;
1444 let (start2, _) = chunks[1].msg_range;
1445 assert!(
1446 start2 < end1,
1447 "Expected overlap: chunk1 ends at {}, chunk2 starts at {}",
1448 end1,
1449 start2
1450 );
1451 }
1452 }
1453
1454 #[test]
1455 fn test_chunk_id_format() {
1456 let entries = vec![make_entry(10, 0, "user", "test")];
1457 let config = ChunkerConfig::default();
1458 let chunks = chunk_entries(&entries, "MyProject", "gemini", &config);
1459
1460 assert_eq!(chunks[0].id, "MyProject_gemini_2026-01-22_001");
1461 }
1462
1463 #[test]
1464 fn test_chunk_summary() {
1465 let chunks = vec![
1466 Chunk {
1467 id: "a".to_string(),
1468 project: "p".to_string(),
1469 agent: "c".to_string(),
1470 date: "2026-01-20".to_string(),
1471 session_id: "s".to_string(),
1472 cwd: None,
1473 kind: Kind::Conversations,
1474 frame_kind: None,
1475 run_id: None,
1476 prompt_id: None,
1477 agent_model: None,
1478 started_at: None,
1479 completed_at: None,
1480 token_usage: None,
1481 findings_count: None,
1482 workflow_phase: None,
1483 mode: None,
1484 skill_code: None,
1485 framework_version: None,
1486 msg_range: (0, 5),
1487 text: "x".repeat(100),
1488 token_estimate: 25,
1489 highlights: vec![],
1490 },
1491 Chunk {
1492 id: "b".to_string(),
1493 project: "p".to_string(),
1494 agent: "c".to_string(),
1495 date: "2026-01-21".to_string(),
1496 session_id: "s".to_string(),
1497 cwd: None,
1498 kind: Kind::Conversations,
1499 frame_kind: None,
1500 run_id: None,
1501 prompt_id: None,
1502 agent_model: None,
1503 started_at: None,
1504 completed_at: None,
1505 token_usage: None,
1506 findings_count: None,
1507 workflow_phase: None,
1508 mode: None,
1509 skill_code: None,
1510 framework_version: None,
1511 msg_range: (5, 10),
1512 text: "y".repeat(200),
1513 token_estimate: 50,
1514 highlights: vec![],
1515 },
1516 ];
1517
1518 let summary = chunk_summary(&chunks);
1519 assert!(summary.contains("2 chunks"));
1520 assert!(summary.contains("75 total tokens"));
1521 assert!(summary.contains("2 days"));
1522 }
1523
1524 #[test]
1525 fn test_extract_highlights_filters_keywords() {
1526 let entries = [
1527 make_entry(10, 0, "user", "Decision: lock chunking heuristics"),
1528 make_entry(10, 1, "assistant", "Just chatting"),
1529 make_entry(10, 2, "user", "TODO: add summarization notes"),
1530 make_entry(10, 3, "user", "KEY architectural choice"),
1531 ];
1532 let refs: Vec<&TimelineEntry> = entries.iter().collect();
1533
1534 let highlights = extract_highlights(&refs);
1535 assert_eq!(
1536 highlights,
1537 vec![
1538 "Decision: lock chunking heuristics",
1539 "TODO: add summarization notes",
1540 "KEY architectural choice"
1541 ]
1542 );
1543 }
1544
1545 #[test]
1546 fn test_format_chunk_text_includes_signals_for_checklist_and_intent() {
1547 let entries = [make_entry(
1548 14,
1549 30,
1550 "user",
1551 "No i tutaj mam taki pomysł, żeby to zrobić\nPlan mode: enabled\nUser accepted the plan\nUltrathink:\n- [ ] pierwsza rzecz\n- [x] druga rzecz\n\n★ Insight ─ to działa",
1552 )];
1553 let refs: Vec<&TimelineEntry> = entries.iter().collect();
1554
1555 let text = format_chunk_text(&refs, "TestProj", "claude", "2026-01-22");
1556
1557 assert!(text.contains("[signals]"));
1558 assert!(text.contains("RED LIGHT: checklist detected (open: 1, done: 1)"));
1559 assert!(text.contains("- [ ] pierwsza rzecz"));
1560 assert!(text.contains("- [x] druga rzecz"));
1561 assert!(text.contains("Ultrathink:"));
1562 assert!(text.contains("- Ultrathink:"));
1563 assert!(text.contains("Insight:"));
1564 assert!(text.contains("- ★ Insight ─ to działa"));
1565 assert!(text.contains("Plan mode:"));
1566 assert!(text.contains("- Plan mode: enabled"));
1567 assert!(text.contains("- User accepted the plan"));
1568 assert!(text.contains("Intent:"));
1569 assert!(text.contains("No i tutaj mam taki pomysł, żeby to zrobić"));
1570 assert!(text.contains("[/signals]"));
1571 }
1572}