use crate::event::{Author, Event, EventStatus, EventType, EvidenceStrength, Source};
use crate::session::parser::*;
#[derive(Debug)]
pub struct ExtractedTask {
pub task_id: String,
pub title: String,
pub session_id: String,
pub events: Vec<Event>,
}
pub fn extract_from_session(session: &ParsedSession) -> Option<ExtractedTask> {
if session.user_message_count() < 2 {
return None;
}
let task_id = format!(
"tj-{}",
&ulid::Ulid::new().to_string()[10..16].to_lowercase()
);
let title = derive_title(session);
let mut events = Vec::new();
let open_text = session
.summary()
.map(|s| truncate(s, 500))
.or_else(|| session.first_user_text().map(|s| truncate(&s, 500)))
.unwrap_or_else(|| title.clone());
let mut open_event = Event::new(
&task_id,
EventType::Open,
Author::Agent,
Source::Cli,
open_text,
);
if let Some(ref ts) = session.first_timestamp {
open_event.timestamp = ts.clone();
}
open_event.meta = serde_json::json!({"title": title, "backfill": true, "session_id": session.session_id});
events.push(open_event);
let mut files_modified: Vec<String> = Vec::new();
let mut tools_used: Vec<String> = Vec::new();
for entry in &session.entries {
match entry {
SessionEntry::Assistant(a) => {
let tool_uses = extract_tool_uses(a);
for (tool_name, input) in &tool_uses {
tools_used.push(tool_name.clone());
if tool_name == "Write" || tool_name == "Edit" {
if let Some(path) = input.get("file_path").and_then(|v| v.as_str()) {
let short = shorten_path(path);
if !files_modified.contains(&short) {
files_modified.push(short);
}
}
}
if tool_name == "Bash" {
if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
if is_test_command(cmd) {
let mut ev = Event::new(
&task_id,
EventType::Evidence,
Author::Agent,
Source::Cli,
format!("Ran tests: {}", truncate(cmd, 200)),
);
ev.timestamp = a.timestamp.clone();
ev.evidence_strength = Some(EvidenceStrength::Medium);
ev.meta = serde_json::json!({"backfill": true});
events.push(ev);
}
}
}
if tool_name == "Bash" {
if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
if cmd.contains("git commit") && !cmd.contains("git commit --amend") {
let mut ev = Event::new(
&task_id,
EventType::Evidence,
Author::Agent,
Source::Cli,
format!("Git commit: {}", truncate(cmd, 200)),
);
ev.timestamp = a.timestamp.clone();
ev.evidence_strength = Some(EvidenceStrength::Strong);
ev.meta = serde_json::json!({"backfill": true});
events.push(ev);
}
}
}
}
let texts = extract_assistant_texts(a);
for text in &texts {
if let Some(ev) = classify_text_heuristic(&task_id, text, &a.timestamp) {
events.push(ev);
}
}
}
SessionEntry::User(_) | SessionEntry::Summary(_) | SessionEntry::Other => {}
}
}
if !files_modified.is_empty() {
let summary = format!(
"Modified {} files: {}",
files_modified.len(),
files_modified.join(", ")
);
let mut ev = Event::new(&task_id, EventType::Finding, Author::Agent, Source::Cli, summary);
if let Some(ref ts) = session.last_timestamp {
ev.timestamp = ts.clone();
}
ev.refs.files = files_modified;
ev.meta = serde_json::json!({"backfill": true});
events.push(ev);
}
let close_text = format!(
"Session ended. {} user messages, {} assistant messages, {} tool calls.",
session.user_message_count(),
session.assistant_message_count(),
tools_used.len()
);
let mut close_event = Event::new(
&task_id,
EventType::Close,
Author::Agent,
Source::Cli,
close_text,
);
if let Some(ref ts) = session.last_timestamp {
close_event.timestamp = ts.clone();
}
close_event.meta = serde_json::json!({
"backfill": true,
"reason": "session_ended",
"outcome": "completed"
});
events.push(close_event);
Some(ExtractedTask {
task_id,
title,
session_id: session.session_id.clone(),
events,
})
}
fn derive_title(session: &ParsedSession) -> String {
if let Some(summary) = session.summary() {
return truncate(&strip_xml_tags(summary), 120);
}
for entry in &session.entries {
if let SessionEntry::User(u) = entry {
if let Some(text) = extract_user_text(u) {
let clean = strip_xml_tags(&text);
let first_line = clean.lines().find(|l| !l.trim().is_empty()).unwrap_or(&clean);
let trimmed = first_line.trim();
if trimmed.len() > 5 {
return truncate(trimmed, 120);
}
}
}
}
format!("Session {}", &session.session_id[..8.min(session.session_id.len())])
}
fn strip_xml_tags(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut in_tag = false;
for ch in text.chars() {
if ch == '<' {
in_tag = true;
} else if ch == '>' {
in_tag = false;
} else if !in_tag {
result.push(ch);
}
}
result
}
fn classify_text_heuristic(task_id: &str, text: &str, timestamp: &str) -> Option<Event> {
let lower = text.to_lowercase();
if text.len() < 50 {
return None;
}
let decision_patterns = [
"decided to",
"will use",
"going with",
"chose to",
"the approach is",
"решил использовать",
"будем использовать",
"выбрал",
];
for pattern in &decision_patterns {
if lower.contains(pattern) {
let mut ev = Event::new(
task_id,
EventType::Decision,
Author::Agent,
Source::Cli,
truncate(text, 300),
);
ev.timestamp = timestamp.to_string();
ev.confidence = Some(0.7);
ev.status = EventStatus::Suggested;
ev.meta = serde_json::json!({"backfill": true, "heuristic": "decision_keyword"});
return Some(ev);
}
}
let rejection_patterns = [
"won't work",
"doesn't work",
"can't use",
"не работает",
"не подходит",
"отказались",
"tried but",
"rejected",
"abandoned",
];
for pattern in &rejection_patterns {
if lower.contains(pattern) {
let mut ev = Event::new(
task_id,
EventType::Rejection,
Author::Agent,
Source::Cli,
truncate(text, 300),
);
ev.timestamp = timestamp.to_string();
ev.confidence = Some(0.6);
ev.status = EventStatus::Suggested;
ev.meta = serde_json::json!({"backfill": true, "heuristic": "rejection_keyword"});
return Some(ev);
}
}
let constraint_patterns = [
"rate limit",
"not supported",
"limitation",
"ограничение",
"не поддерживает",
"requires",
"must be",
];
for pattern in &constraint_patterns {
if lower.contains(pattern) && text.len() < 500 {
let mut ev = Event::new(
task_id,
EventType::Constraint,
Author::Agent,
Source::Cli,
truncate(text, 300),
);
ev.timestamp = timestamp.to_string();
ev.confidence = Some(0.5);
ev.status = EventStatus::Suggested;
ev.meta = serde_json::json!({"backfill": true, "heuristic": "constraint_keyword"});
return Some(ev);
}
}
None
}
fn is_test_command(cmd: &str) -> bool {
let lower = cmd.to_lowercase();
lower.contains("cargo test")
|| lower.contains("npm test")
|| lower.contains("pytest")
|| lower.contains("phpunit")
|| lower.contains("jest")
|| lower.contains("vitest")
|| lower.contains("go test")
|| lower.contains("make test")
}
fn shorten_path(path: &str) -> String {
let parts: Vec<&str> = path.split(['/', '\\']).collect();
if parts.len() <= 2 {
path.to_string()
} else {
parts[parts.len() - 2..].join("/")
}
}
fn truncate(text: &str, max_len: usize) -> String {
if text.len() <= max_len {
text.to_string()
} else {
let mut end = max_len;
while end > 0 && !text.is_char_boundary(end) {
end -= 1;
}
format!("{}…", &text[..end])
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_test_command() {
assert!(is_test_command("cargo test -p my-crate"));
assert!(is_test_command("npm test"));
assert!(is_test_command("python -m pytest tests/"));
assert!(!is_test_command("cargo build"));
assert!(!is_test_command("git push"));
}
#[test]
fn test_shorten_path() {
assert_eq!(shorten_path("/home/user/project/src/main.rs"), "src/main.rs");
assert_eq!(shorten_path("main.rs"), "main.rs");
}
#[test]
fn test_truncate() {
assert_eq!(truncate("hello", 10), "hello");
assert_eq!(truncate("hello world", 5), "hello…");
}
#[test]
fn test_classify_decision() {
let ev = classify_text_heuristic(
"tj-test",
"After analysis, I decided to use the rmcp crate for MCP implementation because it has better macro support.",
"2026-01-01T00:00:00Z",
);
assert!(ev.is_some());
assert_eq!(ev.unwrap().event_type, EventType::Decision);
}
#[test]
fn test_classify_rejection() {
let ev = classify_text_heuristic(
"tj-test",
"The previous approach won't work because the API doesn't support batch operations.",
"2026-01-01T00:00:00Z",
);
assert!(ev.is_some());
assert_eq!(ev.unwrap().event_type, EventType::Rejection);
}
#[test]
fn test_classify_short_text_skipped() {
let ev = classify_text_heuristic("tj-test", "OK, done.", "2026-01-01T00:00:00Z");
assert!(ev.is_none());
}
fn make_user_entry(uuid: &str, ts: &str, text: &str) -> SessionEntry {
SessionEntry::User(UserEntry {
uuid: uuid.into(),
timestamp: ts.into(),
session_id: None,
message: Some(UserMessage {
content: serde_json::json!(text),
}),
cwd: None,
})
}
fn make_assistant_entry(uuid: &str, ts: &str, blocks: Vec<ContentBlock>) -> SessionEntry {
SessionEntry::Assistant(AssistantEntry {
uuid: uuid.into(),
timestamp: ts.into(),
session_id: None,
message: Some(AssistantMessage {
content: blocks,
model: Some("claude-opus-4-20250514".into()),
stop_reason: Some("end_turn".into()),
}),
})
}
#[test]
fn extract_from_session_produces_open_and_close_events() {
let session = ParsedSession {
session_id: "test-session-123".into(),
file_path: "/tmp/test-session-123.jsonl".into(),
entries: vec![
make_user_entry("u1", "2026-01-01T00:00:00Z", "Please fix the login bug"),
make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
ContentBlock::Text { text: "I'll look into the login issue.".into() },
]),
make_user_entry("u2", "2026-01-01T00:00:02Z", "Thanks, looks good"),
make_assistant_entry("a2", "2026-01-01T00:00:03Z", vec![
ContentBlock::Text { text: "The fix is complete.".into() },
]),
],
first_timestamp: Some("2026-01-01T00:00:00Z".into()),
last_timestamp: Some("2026-01-01T00:00:03Z".into()),
};
let task = extract_from_session(&session).unwrap();
assert!(task.task_id.starts_with("tj-"));
assert!(!task.title.is_empty());
assert_eq!(task.session_id, "test-session-123");
assert_eq!(task.events[0].event_type, EventType::Open);
assert_eq!(task.events[0].timestamp, "2026-01-01T00:00:00Z");
let last = task.events.last().unwrap();
assert_eq!(last.event_type, EventType::Close);
assert_eq!(last.timestamp, "2026-01-01T00:00:03Z");
assert!(last.text.contains("user messages"));
}
#[test]
fn extract_from_session_skips_sessions_with_fewer_than_2_user_messages() {
let session = ParsedSession {
session_id: "short-session".into(),
file_path: "/tmp/short.jsonl".into(),
entries: vec![
make_user_entry("u1", "2026-01-01T00:00:00Z", "Hello"),
make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
ContentBlock::Text { text: "Hi!".into() },
]),
],
first_timestamp: Some("2026-01-01T00:00:00Z".into()),
last_timestamp: Some("2026-01-01T00:00:01Z".into()),
};
assert!(extract_from_session(&session).is_none());
}
#[test]
fn extract_from_session_skips_zero_user_messages() {
let session = ParsedSession {
session_id: "empty-session".into(),
file_path: "/tmp/empty.jsonl".into(),
entries: vec![],
first_timestamp: None,
last_timestamp: None,
};
assert!(extract_from_session(&session).is_none());
}
#[test]
fn extract_from_session_tracks_file_modifications() {
let session = ParsedSession {
session_id: "file-mod-session".into(),
file_path: "/tmp/fm.jsonl".into(),
entries: vec![
make_user_entry("u1", "2026-01-01T00:00:00Z", "Update the config file"),
make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
ContentBlock::ToolUse {
name: "Write".into(),
input: serde_json::json!({"file_path": "/home/user/project/src/config.rs"}),
},
]),
make_user_entry("u2", "2026-01-01T00:00:02Z", "Also update main.rs"),
make_assistant_entry("a2", "2026-01-01T00:00:03Z", vec![
ContentBlock::ToolUse {
name: "Edit".into(),
input: serde_json::json!({"file_path": "/home/user/project/src/main.rs", "old_string": "a", "new_string": "b"}),
},
]),
],
first_timestamp: Some("2026-01-01T00:00:00Z".into()),
last_timestamp: Some("2026-01-01T00:00:03Z".into()),
};
let task = extract_from_session(&session).unwrap();
let finding = task.events.iter().find(|e| e.event_type == EventType::Finding);
assert!(finding.is_some());
let finding = finding.unwrap();
assert!(finding.text.contains("2 files"));
assert!(finding.refs.files.contains(&"src/config.rs".to_string()));
assert!(finding.refs.files.contains(&"src/main.rs".to_string()));
}
#[test]
fn extract_from_session_detects_test_commands() {
let session = ParsedSession {
session_id: "test-cmd-session".into(),
file_path: "/tmp/tc.jsonl".into(),
entries: vec![
make_user_entry("u1", "2026-01-01T00:00:00Z", "Run the tests"),
make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
ContentBlock::ToolUse {
name: "Bash".into(),
input: serde_json::json!({"command": "cargo test --workspace"}),
},
]),
make_user_entry("u2", "2026-01-01T00:00:02Z", "Good"),
],
first_timestamp: Some("2026-01-01T00:00:00Z".into()),
last_timestamp: Some("2026-01-01T00:00:02Z".into()),
};
let task = extract_from_session(&session).unwrap();
let evidence = task.events.iter().find(|e| e.event_type == EventType::Evidence);
assert!(evidence.is_some());
assert!(evidence.unwrap().text.contains("cargo test"));
}
#[test]
fn extract_from_session_detects_git_commit() {
let session = ParsedSession {
session_id: "git-commit-session".into(),
file_path: "/tmp/gc.jsonl".into(),
entries: vec![
make_user_entry("u1", "2026-01-01T00:00:00Z", "Commit the changes"),
make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
ContentBlock::ToolUse {
name: "Bash".into(),
input: serde_json::json!({"command": "git commit -m 'fix: resolve login bug'"}),
},
]),
make_user_entry("u2", "2026-01-01T00:00:02Z", "Push it"),
],
first_timestamp: Some("2026-01-01T00:00:00Z".into()),
last_timestamp: Some("2026-01-01T00:00:02Z".into()),
};
let task = extract_from_session(&session).unwrap();
let evidence_events: Vec<_> = task.events.iter()
.filter(|e| e.event_type == EventType::Evidence)
.collect();
let commit_ev = evidence_events.iter().find(|e| e.text.contains("Git commit"));
assert!(commit_ev.is_some());
assert_eq!(commit_ev.unwrap().evidence_strength, Some(EvidenceStrength::Strong));
}
#[test]
fn strip_xml_tags_removes_simple_tags() {
assert_eq!(strip_xml_tags("<b>hello</b>"), "hello");
}
#[test]
fn strip_xml_tags_removes_nested_tags() {
assert_eq!(strip_xml_tags("<div><span>text</span></div>"), "text");
}
#[test]
fn strip_xml_tags_no_tags() {
assert_eq!(strip_xml_tags("plain text"), "plain text");
}
#[test]
fn strip_xml_tags_only_tags() {
assert_eq!(strip_xml_tags("<tag></tag>"), "");
}
#[test]
fn strip_xml_tags_with_attributes() {
assert_eq!(strip_xml_tags("<command-name foo=\"bar\">init</command-name>"), "init");
}
#[test]
fn strip_xml_tags_preserves_angle_bracket_text_between_tags() {
assert_eq!(strip_xml_tags("a < b and c > d"), "a d");
}
#[test]
fn derive_title_from_summary() {
let session = ParsedSession {
session_id: "abcdefghij".into(),
file_path: "/tmp/s.jsonl".into(),
entries: vec![
SessionEntry::Summary(SummaryEntry {
summary: "Fixed authentication bug in login flow".into(),
timestamp: None,
}),
make_user_entry("u1", "t", "some user text that is long enough"),
],
first_timestamp: None,
last_timestamp: None,
};
assert_eq!(derive_title(&session), "Fixed authentication bug in login flow");
}
#[test]
fn derive_title_from_user_text() {
let session = ParsedSession {
session_id: "abcdefghij".into(),
file_path: "/tmp/s.jsonl".into(),
entries: vec![
make_user_entry("u1", "t", "Please implement the new caching layer"),
],
first_timestamp: None,
last_timestamp: None,
};
assert_eq!(derive_title(&session), "Please implement the new caching layer");
}
#[test]
fn derive_title_skips_short_user_text() {
let session = ParsedSession {
session_id: "abcdefghij".into(),
file_path: "/tmp/s.jsonl".into(),
entries: vec![
make_user_entry("u1", "t", "/init"),
make_user_entry("u2", "t", "Implement the feature for user profiles"),
],
first_timestamp: None,
last_timestamp: None,
};
let title = derive_title(&session);
assert!(title.contains("Implement the feature"));
}
#[test]
fn derive_title_fallback_to_session_id() {
let session = ParsedSession {
session_id: "abcdefghij".into(),
file_path: "/tmp/s.jsonl".into(),
entries: vec![
make_user_entry("u1", "t", "hi"),
],
first_timestamp: None,
last_timestamp: None,
};
let title = derive_title(&session);
assert!(title.starts_with("Session "));
assert!(title.contains("abcdefgh"));
}
#[test]
fn derive_title_strips_xml_from_summary() {
let session = ParsedSession {
session_id: "abcdefghij".into(),
file_path: "/tmp/s.jsonl".into(),
entries: vec![
SessionEntry::Summary(SummaryEntry {
summary: "<task>Fix the <b>critical</b> bug</task>".into(),
timestamp: None,
}),
],
first_timestamp: None,
last_timestamp: None,
};
let title = derive_title(&session);
assert_eq!(title, "Fix the critical bug");
}
#[test]
fn test_classify_constraint() {
let ev = classify_text_heuristic(
"tj-test",
"The API has a rate limit of 100 requests per minute, so we need to implement throttling.",
"2026-01-01T00:00:00Z",
);
assert!(ev.is_some());
assert_eq!(ev.unwrap().event_type, EventType::Constraint);
}
#[test]
fn test_classify_no_match_returns_none() {
let ev = classify_text_heuristic(
"tj-test",
"I have successfully implemented the feature and all tests are passing. The code is clean and well-organized.",
"2026-01-01T00:00:00Z",
);
assert!(ev.is_none());
}
#[test]
fn test_is_test_command_additional() {
assert!(is_test_command("jest --coverage"));
assert!(is_test_command("vitest run"));
assert!(is_test_command("go test ./..."));
assert!(is_test_command("make test"));
assert!(is_test_command("phpunit tests/Unit"));
assert!(is_test_command("echo 'cargo test'")); assert!(!is_test_command("ls -la"));
}
#[test]
fn test_shorten_path_windows_separators() {
assert_eq!(shorten_path("C:\\Users\\user\\project\\src\\main.rs"), "src/main.rs");
}
#[test]
fn test_shorten_path_two_components() {
assert_eq!(shorten_path("src/main.rs"), "src/main.rs");
}
#[test]
fn test_truncate_multibyte_utf8() {
let text = "Привет мир";
let truncated = truncate(text, 6);
assert!(truncated.ends_with('…'));
assert!(truncated.starts_with("При"));
}
#[test]
fn test_truncate_exact_boundary() {
assert_eq!(truncate("hello", 5), "hello");
assert_eq!(truncate("hello!", 5), "hello…");
}
}