use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use crate::timeline::{FrameKind, Kind, TimelineEntry};
#[derive(Debug, Clone)]
pub struct Chunk {
pub id: String,
pub project: String,
pub agent: String,
pub date: String,
pub session_id: String,
pub cwd: Option<String>,
pub kind: Kind,
pub frame_kind: Option<FrameKind>,
pub run_id: Option<String>,
pub prompt_id: Option<String>,
pub agent_model: Option<String>,
pub started_at: Option<String>,
pub completed_at: Option<String>,
pub token_usage: Option<u64>,
pub findings_count: Option<u32>,
pub workflow_phase: Option<String>,
pub mode: Option<String>,
pub skill_code: Option<String>,
pub framework_version: Option<String>,
pub msg_range: (usize, usize),
pub text: String,
pub token_estimate: usize,
pub highlights: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ChunkMetadataSidecar {
pub id: String,
pub project: String,
pub agent: String,
pub date: String,
pub session_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cwd: Option<String>,
pub kind: Kind,
#[serde(skip_serializing_if = "Option::is_none")]
pub frame_kind: Option<FrameKind>,
#[serde(skip_serializing_if = "Option::is_none")]
pub run_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub agent_model: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub started_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub completed_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub token_usage: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub findings_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub workflow_phase: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mode: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub skill_code: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub framework_version: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub intent_entries: Vec<crate::types::IntentEntry>,
}
impl From<&Chunk> for ChunkMetadataSidecar {
fn from(chunk: &Chunk) -> Self {
Self {
id: chunk.id.clone(),
project: chunk.project.clone(),
agent: chunk.agent.clone(),
date: chunk.date.clone(),
session_id: chunk.session_id.clone(),
cwd: chunk.cwd.clone(),
kind: chunk.kind,
frame_kind: chunk.frame_kind,
run_id: chunk.run_id.clone(),
prompt_id: chunk.prompt_id.clone(),
agent_model: chunk.agent_model.clone(),
started_at: chunk.started_at.clone(),
completed_at: chunk.completed_at.clone(),
token_usage: chunk.token_usage,
findings_count: chunk.findings_count,
workflow_phase: chunk.workflow_phase.clone(),
mode: chunk.mode.clone(),
skill_code: chunk.skill_code.clone(),
framework_version: chunk.framework_version.clone(),
intent_entries: Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct ChunkerConfig {
pub target_tokens: usize,
pub min_tokens: usize,
pub max_tokens: usize,
pub overlap_messages: usize,
}
impl Default for ChunkerConfig {
fn default() -> Self {
Self {
target_tokens: 1500,
min_tokens: 500,
max_tokens: 2500,
overlap_messages: 2,
}
}
}
pub fn estimate_tokens(text: &str) -> usize {
text.len().div_ceil(4)
}
const PLAN_KEYWORDS: &[&str] = &[
"implementation plan",
"plan:",
"## plan",
"step 1:",
"step 2:",
"step 3:",
"action items",
"milestones",
"roadmap",
"todo list",
"acceptance criteria",
"## steps",
"## phases",
];
const REPORT_KEYWORDS: &[&str] = &[
"## findings",
"## summary",
"## report",
"audit report",
"coverage report",
"test results",
"## metrics",
"## recommendations",
"## conclusion",
"status report",
"incident report",
"pr review",
"code review",
];
pub fn classify_kind(entries: &[TimelineEntry]) -> Kind {
if entries.is_empty() {
return Kind::Other;
}
let mut plan_score: u32 = 0;
let mut report_score: u32 = 0;
let mut has_conversation = false;
for entry in entries {
let lower = entry.message.to_lowercase();
if entry.role == "assistant" {
for kw in PLAN_KEYWORDS {
if lower.contains(kw) {
plan_score += 1;
}
}
for kw in REPORT_KEYWORDS {
if lower.contains(kw) {
report_score += 1;
}
}
}
if entry.role == "user" || entry.role == "assistant" {
has_conversation = true;
}
}
let threshold = 3;
if plan_score >= threshold && plan_score > report_score {
Kind::Plans
} else if report_score >= threshold && report_score > plan_score {
Kind::Reports
} else if has_conversation {
Kind::Conversations
} else {
Kind::Other
}
}
fn prepare_entries_for_chunking<'a>(
entries: &'a [TimelineEntry],
) -> (
Option<crate::frontmatter::ReportFrontmatter>,
Cow<'a, [TimelineEntry]>,
) {
let Some(first) = entries.first() else {
return (None, Cow::Borrowed(entries));
};
if !first.message.trim_start().starts_with("---") {
return (None, Cow::Borrowed(entries));
}
let (frontmatter, body) = crate::frontmatter::parse(&first.message);
if body == first.message {
return (None, Cow::Borrowed(entries));
}
let mut stripped_entries = entries.to_vec();
if let Some(stripped_first) = stripped_entries.first_mut() {
stripped_first.message = body.to_string();
}
(frontmatter, Cow::Owned(stripped_entries))
}
fn apply_frontmatter(chunk: &mut Chunk, frontmatter: &crate::frontmatter::ReportFrontmatter) {
if chunk.frame_kind.is_none() {
chunk.frame_kind = frontmatter.telemetry.frame_kind;
}
chunk.run_id = frontmatter.telemetry.run_id.clone();
chunk.prompt_id = frontmatter.telemetry.prompt_id.clone();
chunk.agent_model = frontmatter.telemetry.model.clone();
chunk.started_at = frontmatter.telemetry.started_at.clone();
chunk.completed_at = frontmatter.telemetry.completed_at.clone();
chunk.token_usage = frontmatter.telemetry.token_usage;
chunk.findings_count = frontmatter.telemetry.findings_count;
chunk.workflow_phase = frontmatter.steering.workflow_phase.clone();
chunk.mode = frontmatter.steering.mode.clone();
chunk.skill_code = frontmatter.steering.skill_code.clone();
chunk.framework_version = frontmatter.steering.framework_version.clone();
}
fn split_day_entries_by_frame_kind<'a>(
entries: &'a [(usize, &'a TimelineEntry)],
) -> Vec<&'a [(usize, &'a TimelineEntry)]> {
if entries.is_empty() {
return Vec::new();
}
let mut groups = Vec::new();
let mut start = 0usize;
for idx in 1..entries.len() {
let previous = entries[idx - 1].1.frame_kind;
let current = entries[idx].1.frame_kind;
if previous != current {
groups.push(&entries[start..idx]);
start = idx;
}
}
groups.push(&entries[start..]);
groups
}
fn frame_kind_for_window(entries: &[&TimelineEntry]) -> Option<FrameKind> {
let first = entries.first().and_then(|entry| entry.frame_kind)?;
entries
.iter()
.all(|entry| entry.frame_kind == Some(first))
.then_some(first)
}
pub fn chunk_entries(
entries: &[TimelineEntry],
project: &str,
agent: &str,
config: &ChunkerConfig,
) -> Vec<Chunk> {
if entries.is_empty() {
return vec![];
}
let (frontmatter, prepared_entries) = prepare_entries_for_chunking(entries);
let prepared_entries = prepared_entries.as_ref();
let mut by_date: BTreeMap<String, Vec<(usize, &TimelineEntry)>> = BTreeMap::new();
for (idx, entry) in prepared_entries.iter().enumerate() {
let date = entry.timestamp.format("%Y-%m-%d").to_string();
by_date.entry(date).or_default().push((idx, entry));
}
let mut chunks = Vec::new();
for (date, day_entries) in &by_date {
let mut day_chunks = Vec::new();
let mut next_seq = 1usize;
for frame_group in split_day_entries_by_frame_kind(day_entries) {
let (mut group_chunks, updated_seq) =
chunk_day_entries(frame_group, project, agent, date, config, next_seq);
next_seq = updated_seq;
day_chunks.append(&mut group_chunks);
}
if let Some(frontmatter) = frontmatter.as_ref() {
for chunk in &mut day_chunks {
apply_frontmatter(chunk, frontmatter);
}
}
chunks.extend(day_chunks);
}
chunks
}
fn chunk_day_entries(
entries: &[(usize, &TimelineEntry)],
project: &str,
agent: &str,
date: &str,
config: &ChunkerConfig,
start_seq: usize,
) -> (Vec<Chunk>, usize) {
if entries.is_empty() {
return (vec![], start_seq);
}
let mut chunks = Vec::new();
let mut seq = start_seq;
let mut start = 0usize;
while start < entries.len() {
let mut end = start;
let mut accumulated_tokens = 0usize;
while end < entries.len() {
let msg_tokens = estimate_tokens(&entries[end].1.message);
let next_total = accumulated_tokens + msg_tokens + 20;
if next_total > config.max_tokens && end > start {
break;
}
accumulated_tokens = next_total;
end += 1;
if accumulated_tokens >= config.target_tokens {
break;
}
}
let window: Vec<&TimelineEntry> = entries[start..end].iter().map(|(_, e)| *e).collect();
let highlights = extract_highlights(&window);
let signals = extract_signals(&window);
let frame_kind = frame_kind_for_window(&window);
let text = format_chunk_text_inner(
&window,
project,
agent,
date,
frame_kind,
&signals,
&highlights,
);
let token_estimate = estimate_tokens(&text);
let session_id = window
.first()
.map(|e| e.session_id.clone())
.unwrap_or_default();
let cwd = window.first().and_then(|entry| entry.cwd.clone());
let global_start = entries[start].0;
let global_end = entries[end - 1].0 + 1;
let kind = classify_kind(&window.iter().map(|e| (*e).clone()).collect::<Vec<_>>());
chunks.push(Chunk {
id: format!("{}_{}_{}_{{:03}}", project, agent, date)
.replace("{:03}", &format!("{:03}", seq)),
project: project.to_string(),
agent: agent.to_string(),
date: date.to_string(),
session_id,
cwd,
kind,
frame_kind,
run_id: None,
prompt_id: None,
agent_model: None,
started_at: None,
completed_at: None,
token_usage: None,
findings_count: None,
workflow_phase: None,
mode: None,
skill_code: None,
framework_version: None,
msg_range: (global_start, global_end),
text,
token_estimate,
highlights,
});
seq += 1;
let overlap = config.overlap_messages.min(end - start);
let next_start = if end >= entries.len() {
entries.len() } else if end - overlap > start {
end - overlap
} else {
end };
start = next_start;
}
(chunks, seq)
}
pub fn format_chunk_text(
entries: &[&TimelineEntry],
project: &str,
agent: &str,
date: &str,
) -> String {
let highlights = extract_highlights(entries);
let signals = extract_signals(entries);
format_chunk_text_inner(
entries,
project,
agent,
date,
frame_kind_for_window(entries),
&signals,
&highlights,
)
}
fn format_chunk_text_inner(
entries: &[&TimelineEntry],
project: &str,
agent: &str,
date: &str,
frame_kind: Option<FrameKind>,
signals: &ChunkSignals,
highlights: &[String],
) -> String {
let mut text = if let Some(frame_kind) = frame_kind {
format!(
"[project: {} | agent: {} | date: {} | frame_kind: {}]\n\n",
project, agent, date, frame_kind
)
} else {
format!(
"[project: {} | agent: {} | date: {}]\n\n",
project, agent, date
)
};
if let Some(block) = format_signals_block(signals, highlights) {
text.push_str(&block);
text.push('\n');
}
for entry in entries {
let time = entry.timestamp.format("%H:%M:%S");
let msg = if entry.message.len() > 4000 {
truncate_message_bytes(&entry.message, 4000)
} else {
entry.message.clone()
};
text.push_str(&format!("[{}] {}: {}\n", time, entry.role, msg));
}
text
}
const HIGHLIGHT_KEYWORDS: &[&str] = &[
"decision:",
"plan:",
"architecture",
"breaking",
"todo:",
"fixme:",
];
const HIGHLIGHT_KEYWORDS_CASE_SENSITIVE: &[&str] = &["WAŻNE", "KEY"];
fn extract_highlights(entries: &[&TimelineEntry]) -> Vec<String> {
let mut highlights = Vec::new();
for entry in entries {
if highlights.len() >= 3 {
break;
}
if !is_highlight_message(&entry.message) {
continue;
}
if let Some(line) = entry.message.lines().map(str::trim).find(|l| !l.is_empty())
&& highlights.last().map(String::as_str) != Some(line)
{
highlights.push(line.to_string());
}
}
highlights
}
fn is_highlight_message(message: &str) -> bool {
let lower = message.to_lowercase();
HIGHLIGHT_KEYWORDS.iter().any(|kw| lower.contains(kw))
|| HIGHLIGHT_KEYWORDS_CASE_SENSITIVE
.iter()
.any(|kw| message.contains(kw))
}
#[derive(Debug, Clone, Default)]
struct ChunkSignals {
todo_open: Vec<String>,
todo_done: Vec<String>,
ultrathink: Vec<String>,
insights: Vec<String>,
plan_mode: Vec<String>,
intents: Vec<String>,
results: Vec<String>,
skills: Vec<String>,
decisions: Vec<String>,
outcomes: Vec<String>,
}
const MAX_TODO_ITEMS: usize = 8;
const MAX_ULTRATHINK_BLOCKS: usize = 4;
const MAX_INSIGHT_BLOCKS: usize = 6;
const MAX_PLAN_MODE_EVENTS: usize = 8;
const MAX_INTENT_LINES: usize = 6;
const MAX_RESULT_LINES: usize = 6;
const MAX_TAG_BLOCK_LINES: usize = 4;
pub const INTENT_KEYWORDS: &[&str] = &[
"mam pomysl",
"mam pomysł",
"mam taki pomysl",
"mam taki pomysł",
"pomysl",
"pomysł",
"proponuje",
"proponuję",
"zrobmy",
"zróbmy",
"ustalmy",
"ustalmy",
"chce",
"chcę",
"chcialbym",
"chciałbym",
"potrzebuje",
"potrzebuję",
"następny krok",
"nastepny krok",
"kolejny krok",
"i want",
"i'd like",
"let's",
"next step",
];
const RESULT_KEYWORDS: &[&str] = &[
"smoke test",
"passed",
"all checks passed",
"0 failed",
"completed",
"done",
"zrobione",
"dowiezione",
"gotowe",
"dziala",
"działa",
];
fn extract_signals(entries: &[&TimelineEntry]) -> ChunkSignals {
let (todo_open, todo_done) = extract_checklist_items(entries);
let ultrathink = extract_tag_blocks(entries, is_ultrathink_tag, MAX_ULTRATHINK_BLOCKS);
let insights = extract_tag_blocks(entries, is_insight_tag, MAX_INSIGHT_BLOCKS);
let plan_mode = extract_tag_blocks(entries, is_plan_mode_tag, MAX_PLAN_MODE_EVENTS);
let intents = extract_intent_lines(entries);
let results = extract_result_lines(entries);
let skills = extract_tag_blocks(entries, is_skill_tag, 4);
let decisions = extract_tag_blocks(entries, is_decision_tag, 4);
let outcomes = extract_tag_blocks(entries, is_outcome_tag, 4);
ChunkSignals {
todo_open,
todo_done,
ultrathink,
insights,
plan_mode,
intents,
results,
skills,
decisions,
outcomes,
}
}
fn extract_checklist_items(entries: &[&TimelineEntry]) -> (Vec<String>, Vec<String>) {
#[derive(Debug, Clone, Copy)]
enum TaskState {
Open,
Done,
}
let mut state_by_key: HashMap<String, TaskState> = HashMap::new();
let mut display_by_key: HashMap<String, String> = HashMap::new();
let mut order: Vec<String> = Vec::new();
for entry in entries {
for line in entry.message.lines() {
if let Some((is_done, task)) = parse_checklist_task(line) {
let key = normalize_key(&task);
if !state_by_key.contains_key(&key) {
order.push(key.clone());
display_by_key.insert(key.clone(), task);
state_by_key.insert(key.clone(), TaskState::Open);
}
if is_done {
state_by_key.insert(key, TaskState::Done);
}
}
}
}
let mut open = Vec::new();
let mut done = Vec::new();
for key in order {
let Some(task) = display_by_key.get(&key) else {
continue;
};
match state_by_key.get(&key) {
Some(TaskState::Done) => done.push(task.clone()),
Some(TaskState::Open) => open.push(task.clone()),
None => {}
}
}
(open, done)
}
pub fn parse_checklist_task(line: &str) -> Option<(bool, String)> {
let l = line.trim_start();
let mut chars = l.chars();
let bullet = chars.next()?;
if !matches!(bullet, '-' | '*' | '+') {
return None;
}
let rest = chars.as_str().trim_start();
let rest = rest.strip_prefix('[')?;
let mut chars = rest.chars();
let state = chars.next()?;
let rest = chars.as_str();
let rest = rest.strip_prefix(']')?;
let task = rest.trim_start();
if task.is_empty() {
return None;
}
match state {
'x' | 'X' => Some((true, task.trim().to_string())),
' ' => Some((false, task.trim().to_string())),
_ => None,
}
}
fn extract_intent_lines(entries: &[&TimelineEntry]) -> Vec<String> {
let mut out = Vec::new();
let mut seen = HashSet::new();
for entry in entries {
if entry.role.to_lowercase() != "user" {
continue;
}
for line in entry.message.lines().map(str::trim) {
if line.is_empty() {
continue;
}
if !is_intent_line(line) {
continue;
}
let key = normalize_key(line);
if !seen.insert(key) {
continue;
}
out.push(truncate_signal_line(line));
if out.len() >= MAX_INTENT_LINES {
return out;
}
}
}
out
}
pub(crate) fn is_intent_line(line: &str) -> bool {
let lower = line.to_lowercase();
INTENT_KEYWORDS.iter().any(|kw| lower.contains(kw))
}
fn extract_result_lines(entries: &[&TimelineEntry]) -> Vec<String> {
let mut out = Vec::new();
let mut seen = HashSet::new();
for entry in entries {
for line in entry.message.lines().map(str::trim) {
if line.is_empty() {
continue;
}
if !is_result_line(line) {
continue;
}
let key = normalize_key(line);
if !seen.insert(key) {
continue;
}
out.push(truncate_signal_line(line));
if out.len() >= MAX_RESULT_LINES {
return out;
}
}
}
out
}
pub fn is_result_line(line: &str) -> bool {
let lower = line.to_lowercase();
RESULT_KEYWORDS.iter().any(|kw| lower.contains(kw))
}
pub fn normalize_key(s: &str) -> String {
s.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_lowercase()
}
pub fn truncate_signal_line(line: &str) -> String {
const MAX_BYTES: usize = 240;
if line.len() <= MAX_BYTES {
return line.to_string();
}
truncate_message_bytes(line, MAX_BYTES)
}
fn is_ultrathink_tag(line: &str) -> bool {
line.to_lowercase().contains("ultrathink")
}
fn is_insight_tag(line: &str) -> bool {
let lower = line.to_lowercase();
lower.starts_with("insight")
|| lower.contains("★ insight")
|| lower.contains("insight ─")
|| lower.contains("insight -")
}
fn is_plan_mode_tag(line: &str) -> bool {
let lower = line.to_lowercase();
lower.contains("plan mode")
|| lower.contains("accept plan")
|| lower.contains("user accepted the plan")
|| lower.contains("approve and bypass permissions")
|| lower.contains("bypass permissions")
}
fn is_skill_tag(line: &str) -> bool {
let lower = line.to_lowercase();
lower.contains("[skill_enter]")
|| lower.contains("vetcoders-partner")
|| lower.contains("vetcoders-spawn")
|| lower.contains("vetcoders-ownership")
|| lower.contains("vetcoders-workflow")
}
pub fn is_decision_tag(line: &str) -> bool {
let lower = line.to_lowercase();
lower.contains("[decision]") || lower.starts_with("decision:")
}
pub fn is_outcome_tag(line: &str) -> bool {
let lower = line.to_lowercase();
lower.contains("[skill_outcome]")
|| lower.starts_with("outcome:")
|| lower.starts_with("validation:")
}
fn extract_tag_blocks(
entries: &[&TimelineEntry],
is_tag: fn(&str) -> bool,
max_blocks: usize,
) -> Vec<String> {
let mut out = Vec::new();
let mut seen = HashSet::new();
for entry in entries {
let lines: Vec<&str> = entry.message.lines().collect();
for (i, raw) in lines.iter().enumerate() {
let line = raw.trim();
if line.is_empty() || !is_tag(line) {
continue;
}
let mut block = Vec::new();
block.push(line);
for raw_next in lines.iter().skip(i + 1) {
let next = raw_next.trim();
if next.is_empty() {
break;
}
if is_tag(next) {
break;
}
block.push(next);
if block.len() >= MAX_TAG_BLOCK_LINES {
break;
}
}
let joined = block.join(" ");
let key = normalize_key(&joined);
if !seen.insert(key) {
continue;
}
out.push(truncate_signal_line(&joined));
if out.len() >= max_blocks {
return out;
}
}
}
out
}
fn format_signals_block(signals: &ChunkSignals, highlights: &[String]) -> Option<String> {
let has_any = !signals.todo_open.is_empty()
|| !signals.todo_done.is_empty()
|| !signals.ultrathink.is_empty()
|| !signals.insights.is_empty()
|| !signals.plan_mode.is_empty()
|| !signals.intents.is_empty()
|| !signals.results.is_empty()
|| !signals.skills.is_empty()
|| !signals.decisions.is_empty()
|| !signals.outcomes.is_empty()
|| !highlights.is_empty();
if !has_any {
return None;
}
let mut out = String::new();
out.push_str("[signals]\n");
if !signals.skills.is_empty() {
out.push_str("=== SKILL ENTER ===\n");
for line in &signals.skills {
out.push_str(&format!("{}\n", line));
}
out.push_str("===================\n");
}
if !signals.todo_open.is_empty() || !signals.todo_done.is_empty() {
if !signals.todo_open.is_empty() {
out.push_str(&format!(
"RED LIGHT: checklist detected (open: {}, done: {})\n",
signals.todo_open.len(),
signals.todo_done.len()
));
} else {
out.push_str(&format!(
"Checklist detected (open: 0, done: {})\n",
signals.todo_done.len()
));
}
for task in signals.todo_open.iter().take(MAX_TODO_ITEMS) {
out.push_str(&format!("- [ ] {}\n", task));
}
if signals.todo_open.len() > MAX_TODO_ITEMS {
out.push_str(&format!(
"... (+{} more open)\n",
signals.todo_open.len() - MAX_TODO_ITEMS
));
}
for task in signals.todo_done.iter().take(MAX_TODO_ITEMS) {
out.push_str(&format!("- [x] {}\n", task));
}
if signals.todo_done.len() > MAX_TODO_ITEMS {
out.push_str(&format!(
"... (+{} more done)\n",
signals.todo_done.len() - MAX_TODO_ITEMS
));
}
}
if !signals.ultrathink.is_empty() {
out.push_str("Ultrathink:\n");
for line in &signals.ultrathink {
out.push_str(&format!("- {}\n", line));
}
}
if !signals.insights.is_empty() {
out.push_str("Insight:\n");
for line in &signals.insights {
out.push_str(&format!("- {}\n", line));
}
}
if !signals.plan_mode.is_empty() {
out.push_str("Plan mode:\n");
for line in &signals.plan_mode {
out.push_str(&format!("- {}\n", line));
}
}
if !signals.intents.is_empty() {
out.push_str("Intent:\n");
for line in &signals.intents {
out.push_str(&format!("- {}\n", line));
}
}
if !signals.decisions.is_empty() {
out.push_str("Decision:\n");
for line in &signals.decisions {
out.push_str(&format!("- {}\n", line));
}
}
if !signals.results.is_empty() {
out.push_str("Results:\n");
for line in &signals.results {
out.push_str(&format!("- {}\n", line));
}
}
if !signals.outcomes.is_empty() {
out.push_str("Outcome:\n");
for line in &signals.outcomes {
out.push_str(&format!("- {}\n", line));
}
}
if !highlights.is_empty() {
out.push_str("Notes:\n");
for line in highlights {
out.push_str(&format!("- {}\n", truncate_signal_line(line)));
}
}
out.push_str("[/signals]\n");
Some(out)
}
fn truncate_message_bytes(message: &str, max_bytes: usize) -> String {
let mut cutoff = max_bytes.min(message.len());
while cutoff > 0 && !message.is_char_boundary(cutoff) {
cutoff -= 1;
}
let mut out = String::with_capacity(cutoff + 15);
out.push_str(&message[..cutoff]);
out.push_str("...[truncated]");
out
}
pub fn write_chunks_to_dir(chunks: &[Chunk], dir: &Path) -> Result<Vec<PathBuf>> {
fs::create_dir_all(dir)?;
let mut paths = Vec::new();
for chunk in chunks {
let filename = format!("{}.txt", chunk.id);
let path = dir.join(&filename);
fs::write(&path, &chunk.text)?;
let sidecar_path = dir.join(format!("{}.meta.json", chunk.id));
let sidecar = ChunkMetadataSidecar::from(chunk);
fs::write(&sidecar_path, serde_json::to_vec_pretty(&sidecar)?)?;
paths.push(path);
}
Ok(paths)
}
pub fn chunk_summary(chunks: &[Chunk]) -> String {
if chunks.is_empty() {
return "No chunks generated.".to_string();
}
let total_tokens: usize = chunks.iter().map(|c| c.token_estimate).sum();
let avg_tokens = total_tokens / chunks.len();
let dates: Vec<&str> = chunks
.iter()
.map(|c| c.date.as_str())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
format!(
"{} chunks, {} total tokens (avg {}), {} days",
chunks.len(),
total_tokens,
avg_tokens,
dates.len(),
)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{TimeZone, Utc};
fn make_entry(hour: u32, min: u32, role: &str, msg: &str) -> TimelineEntry {
TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 1, 22, hour, min, 0).unwrap(),
agent: "claude".to_string(),
session_id: "sess-1".to_string(),
role: role.to_string(),
message: msg.to_string(),
frame_kind: None,
branch: None,
cwd: None,
}
}
#[test]
fn test_estimate_tokens() {
assert_eq!(estimate_tokens(""), 0);
assert_eq!(estimate_tokens("hi"), 1); assert_eq!(estimate_tokens("hello world"), 3); assert_eq!(estimate_tokens("1234"), 1); assert_eq!(estimate_tokens("12345"), 2); }
#[test]
fn test_chunk_entries_empty() {
let config = ChunkerConfig::default();
let chunks = chunk_entries(&[], "proj", "claude", &config);
assert!(chunks.is_empty());
}
#[test]
fn test_chunk_entries_single_message() {
let entries = vec![make_entry(14, 0, "user", "short message")];
let config = ChunkerConfig::default();
let chunks = chunk_entries(&entries, "proj", "claude", &config);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].project, "proj");
assert_eq!(chunks[0].agent, "claude");
assert_eq!(chunks[0].date, "2026-01-22");
assert!(chunks[0].text.contains("short message"));
}
#[test]
fn test_chunk_entries_basic() {
let entries: Vec<TimelineEntry> = (0..10)
.map(|i| make_entry(14, i as u32, "user", &"x".repeat(200)))
.collect();
let config = ChunkerConfig {
target_tokens: 150,
min_tokens: 50,
max_tokens: 300,
overlap_messages: 2,
};
let chunks = chunk_entries(&entries, "proj", "claude", &config);
assert!(
chunks.len() > 1,
"Expected multiple chunks, got {}",
chunks.len()
);
for (i, chunk) in chunks.iter().enumerate() {
assert!(chunk.id.contains(&format!("{:03}", i + 1)));
}
}
#[test]
fn test_chunk_entries_respects_max_tokens() {
let entries = vec![make_entry(14, 0, "user", &"x".repeat(20000))];
let config = ChunkerConfig {
target_tokens: 1500,
min_tokens: 500,
max_tokens: 2500,
overlap_messages: 2,
};
let chunks = chunk_entries(&entries, "proj", "claude", &config);
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text.contains("[truncated]"));
}
#[test]
fn test_chunk_entries_groups_by_date() {
let entries = vec![
TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 1, 20, 10, 0, 0).unwrap(),
agent: "claude".to_string(),
session_id: "s1".to_string(),
role: "user".to_string(),
message: "day one".to_string(),
frame_kind: None,
branch: None,
cwd: None,
},
TimelineEntry {
timestamp: Utc.with_ymd_and_hms(2026, 1, 21, 10, 0, 0).unwrap(),
agent: "claude".to_string(),
session_id: "s2".to_string(),
role: "user".to_string(),
message: "day two".to_string(),
frame_kind: None,
branch: None,
cwd: None,
},
];
let config = ChunkerConfig::default();
let chunks = chunk_entries(&entries, "proj", "claude", &config);
assert_eq!(chunks.len(), 2);
assert_eq!(chunks[0].date, "2026-01-20");
assert_eq!(chunks[1].date, "2026-01-21");
}
#[test]
fn test_format_chunk_text() {
let entries = [
make_entry(14, 30, "user", "hello"),
make_entry(14, 31, "assistant", "hi there"),
];
let refs: Vec<&TimelineEntry> = entries.iter().collect();
let text = format_chunk_text(&refs, "TestProj", "claude", "2026-01-22");
assert!(text.starts_with("[project: TestProj | agent: claude | date: 2026-01-22]"));
assert!(text.contains("[14:30:00] user: hello"));
assert!(text.contains("[14:31:00] assistant: hi there"));
}
#[test]
fn test_format_chunk_text_truncates_utf8_safely() {
let mut msg = "a".repeat(3999);
msg.push('é'); let entries = [make_entry(14, 30, "user", &msg)];
let refs: Vec<&TimelineEntry> = entries.iter().collect();
let text = format_chunk_text(&refs, "TestProj", "claude", "2026-01-22");
assert!(text.contains("[truncated]"));
assert!(!text.contains('é'));
}
#[test]
fn test_chunk_entries_extracts_frontmatter_telemetry() {
let entries = vec![make_entry(
14,
30,
"assistant",
"---\nrun_id: mrbl-001\nprompt_id: api-redesign_20260327\nmodel: gpt-5.4\nstarted_at: 2026-03-27T10:00:00Z\ncompleted_at: 2026-03-27T10:01:00Z\ntoken_usage: 1234\nfindings_count: 4\nframe_kind: agent_reply\nphase: implement\nmode: session-first\nskill_code: vc-workflow\nframework_version: 2026-03\n---\n## Report\nContent here",
)];
let chunks = chunk_entries(&entries, "proj", "claude", &ChunkerConfig::default());
assert_eq!(chunks.len(), 1);
let chunk = &chunks[0];
assert_eq!(chunk.run_id.as_deref(), Some("mrbl-001"));
assert_eq!(chunk.prompt_id.as_deref(), Some("api-redesign_20260327"));
assert_eq!(chunk.agent_model.as_deref(), Some("gpt-5.4"));
assert_eq!(chunk.started_at.as_deref(), Some("2026-03-27T10:00:00Z"));
assert_eq!(chunk.completed_at.as_deref(), Some("2026-03-27T10:01:00Z"));
assert_eq!(chunk.token_usage, Some(1234));
assert_eq!(chunk.findings_count, Some(4));
assert_eq!(chunk.frame_kind, Some(FrameKind::AgentReply));
assert_eq!(chunk.workflow_phase.as_deref(), Some("implement"));
assert_eq!(chunk.mode.as_deref(), Some("session-first"));
assert_eq!(chunk.skill_code.as_deref(), Some("vc-workflow"));
assert_eq!(chunk.framework_version.as_deref(), Some("2026-03"));
assert!(chunk.text.contains("## Report"));
assert!(!chunk.text.contains("run_id: mrbl-001"));
assert!(!chunk.text.contains("phase: implement"));
}
#[test]
fn test_chunk_entries_strip_malformed_frontmatter_without_metadata() {
let entries = vec![make_entry(
14,
30,
"assistant",
"---\nrun_id: [nope\nmode: session-first\n---\n## Report\nBody survives",
)];
let chunks = chunk_entries(&entries, "proj", "claude", &ChunkerConfig::default());
assert_eq!(chunks.len(), 1);
let chunk = &chunks[0];
assert_eq!(chunk.run_id, None);
assert_eq!(chunk.mode, None);
assert!(chunk.text.contains("## Report"));
assert!(chunk.text.contains("Body survives"));
assert!(!chunk.text.contains("mode: session-first"));
}
#[test]
fn test_write_chunks_to_dir() {
let tmp = std::env::temp_dir().join("ai-ctx-chunker-test");
let _ = fs::remove_dir_all(&tmp);
let chunks = vec![
Chunk {
id: "proj_claude_2026-01-22_001".to_string(),
project: "proj".to_string(),
agent: "claude".to_string(),
date: "2026-01-22".to_string(),
session_id: "s1".to_string(),
cwd: Some("/Users/tester/workspaces/proj".to_string()),
kind: Kind::Conversations,
frame_kind: Some(FrameKind::UserMsg),
run_id: None,
prompt_id: None,
agent_model: None,
started_at: None,
completed_at: None,
token_usage: None,
findings_count: None,
workflow_phase: Some("implement".to_string()),
mode: Some("session-first".to_string()),
skill_code: Some("vc-workflow".to_string()),
framework_version: Some("2026-03".to_string()),
msg_range: (0, 5),
text: "chunk one content".to_string(),
token_estimate: 4,
highlights: vec![],
},
Chunk {
id: "proj_claude_2026-01-22_002".to_string(),
project: "proj".to_string(),
agent: "claude".to_string(),
date: "2026-01-22".to_string(),
session_id: "s1".to_string(),
cwd: None,
kind: Kind::Conversations,
frame_kind: None,
run_id: None,
prompt_id: None,
agent_model: None,
started_at: None,
completed_at: None,
token_usage: None,
findings_count: None,
workflow_phase: None,
mode: None,
skill_code: None,
framework_version: None,
msg_range: (3, 8),
text: "chunk two content".to_string(),
token_estimate: 4,
highlights: vec![],
},
];
let paths = write_chunks_to_dir(&chunks, &tmp).unwrap();
assert_eq!(paths.len(), 2);
assert!(paths[0].exists());
assert!(paths[1].exists());
let content = fs::read_to_string(&paths[0]).unwrap();
assert_eq!(content, "chunk one content");
let sidecar = fs::read_to_string(tmp.join("proj_claude_2026-01-22_001.meta.json")).unwrap();
let metadata: ChunkMetadataSidecar = serde_json::from_str(&sidecar).unwrap();
assert_eq!(metadata.project, "proj");
assert_eq!(metadata.agent, "claude");
assert_eq!(metadata.date, "2026-01-22");
assert_eq!(
metadata.cwd.as_deref(),
Some("/Users/tester/workspaces/proj")
);
assert_eq!(metadata.kind, Kind::Conversations);
assert_eq!(metadata.frame_kind, Some(FrameKind::UserMsg));
assert_eq!(metadata.workflow_phase.as_deref(), Some("implement"));
assert_eq!(metadata.mode.as_deref(), Some("session-first"));
assert_eq!(metadata.skill_code.as_deref(), Some("vc-workflow"));
assert_eq!(metadata.framework_version.as_deref(), Some("2026-03"));
let legacy: ChunkMetadataSidecar = serde_json::from_value(serde_json::json!({
"id": "legacy",
"project": "proj",
"agent": "claude",
"date": "2026-01-22",
"session_id": "s1",
"kind": "conversations",
}))
.unwrap();
assert_eq!(legacy.cwd, None);
assert_eq!(legacy.frame_kind, None);
assert_eq!(legacy.workflow_phase, None);
assert_eq!(legacy.mode, None);
assert_eq!(legacy.skill_code, None);
assert_eq!(legacy.framework_version, None);
let _ = fs::remove_dir_all(&tmp);
}
#[test]
fn test_overlap_messages() {
let entries: Vec<TimelineEntry> = (0..8)
.map(|i| make_entry(14, i as u32, "user", &format!("msg_{}", i)))
.collect();
let config = ChunkerConfig {
target_tokens: 80,
min_tokens: 20,
max_tokens: 200,
overlap_messages: 2,
};
let chunks = chunk_entries(&entries, "p", "c", &config);
if chunks.len() >= 2 {
let (_, end1) = chunks[0].msg_range;
let (start2, _) = chunks[1].msg_range;
assert!(
start2 < end1,
"Expected overlap: chunk1 ends at {}, chunk2 starts at {}",
end1,
start2
);
}
}
#[test]
fn test_chunk_id_format() {
let entries = vec![make_entry(10, 0, "user", "test")];
let config = ChunkerConfig::default();
let chunks = chunk_entries(&entries, "MyProject", "gemini", &config);
assert_eq!(chunks[0].id, "MyProject_gemini_2026-01-22_001");
}
#[test]
fn test_chunk_summary() {
let chunks = vec![
Chunk {
id: "a".to_string(),
project: "p".to_string(),
agent: "c".to_string(),
date: "2026-01-20".to_string(),
session_id: "s".to_string(),
cwd: None,
kind: Kind::Conversations,
frame_kind: None,
run_id: None,
prompt_id: None,
agent_model: None,
started_at: None,
completed_at: None,
token_usage: None,
findings_count: None,
workflow_phase: None,
mode: None,
skill_code: None,
framework_version: None,
msg_range: (0, 5),
text: "x".repeat(100),
token_estimate: 25,
highlights: vec![],
},
Chunk {
id: "b".to_string(),
project: "p".to_string(),
agent: "c".to_string(),
date: "2026-01-21".to_string(),
session_id: "s".to_string(),
cwd: None,
kind: Kind::Conversations,
frame_kind: None,
run_id: None,
prompt_id: None,
agent_model: None,
started_at: None,
completed_at: None,
token_usage: None,
findings_count: None,
workflow_phase: None,
mode: None,
skill_code: None,
framework_version: None,
msg_range: (5, 10),
text: "y".repeat(200),
token_estimate: 50,
highlights: vec![],
},
];
let summary = chunk_summary(&chunks);
assert!(summary.contains("2 chunks"));
assert!(summary.contains("75 total tokens"));
assert!(summary.contains("2 days"));
}
#[test]
fn test_extract_highlights_filters_keywords() {
let entries = [
make_entry(10, 0, "user", "Decision: lock chunking heuristics"),
make_entry(10, 1, "assistant", "Just chatting"),
make_entry(10, 2, "user", "TODO: add summarization notes"),
make_entry(10, 3, "user", "KEY architectural choice"),
];
let refs: Vec<&TimelineEntry> = entries.iter().collect();
let highlights = extract_highlights(&refs);
assert_eq!(
highlights,
vec![
"Decision: lock chunking heuristics",
"TODO: add summarization notes",
"KEY architectural choice"
]
);
}
#[test]
fn test_format_chunk_text_includes_signals_for_checklist_and_intent() {
let entries = [make_entry(
14,
30,
"user",
"No i tutaj mam taki pomysł, żeby to zrobić\nPlan mode: enabled\nUser accepted the plan\nUltrathink:\n- [ ] pierwsza rzecz\n- [x] druga rzecz\n\n★ Insight ─ to działa",
)];
let refs: Vec<&TimelineEntry> = entries.iter().collect();
let text = format_chunk_text(&refs, "TestProj", "claude", "2026-01-22");
assert!(text.contains("[signals]"));
assert!(text.contains("RED LIGHT: checklist detected (open: 1, done: 1)"));
assert!(text.contains("- [ ] pierwsza rzecz"));
assert!(text.contains("- [x] druga rzecz"));
assert!(text.contains("Ultrathink:"));
assert!(text.contains("- Ultrathink:"));
assert!(text.contains("Insight:"));
assert!(text.contains("- ★ Insight ─ to działa"));
assert!(text.contains("Plan mode:"));
assert!(text.contains("- Plan mode: enabled"));
assert!(text.contains("- User accepted the plan"));
assert!(text.contains("Intent:"));
assert!(text.contains("No i tutaj mam taki pomysł, żeby to zrobić"));
assert!(text.contains("[/signals]"));
}
}