1use crate::event::{Author, Event, EventStatus, EventType, EvidenceStrength, Source};
7use crate::session::parser::*;
8
9#[derive(Debug)]
11pub struct ExtractedTask {
12 pub task_id: String,
13 pub title: String,
14 pub session_id: String,
15 pub events: Vec<Event>,
16}
17
18pub fn extract_from_session(session: &ParsedSession) -> Option<ExtractedTask> {
21 if session.user_message_count() < 2 {
23 return None;
24 }
25
26 let task_id = format!(
27 "tj-{}",
28 &ulid::Ulid::new().to_string()[10..16].to_lowercase()
29 );
30
31 let title = derive_title(session);
33 let mut events = Vec::new();
34
35 let open_text = session
37 .summary()
38 .map(|s| truncate(s, 500))
39 .or_else(|| session.first_user_text().map(|s| truncate(&s, 500)))
40 .unwrap_or_else(|| title.clone());
41
42 let mut open_event = Event::new(
43 &task_id,
44 EventType::Open,
45 Author::Agent,
46 Source::Cli,
47 open_text,
48 );
49 if let Some(ref ts) = session.first_timestamp {
50 open_event.timestamp = ts.clone();
51 }
52 open_event.meta = serde_json::json!({"title": title, "backfill": true, "session_id": session.session_id});
53 events.push(open_event);
54
55 let mut files_modified: Vec<String> = Vec::new();
57 let mut tools_used: Vec<String> = Vec::new();
58
59 for entry in &session.entries {
60 match entry {
61 SessionEntry::Assistant(a) => {
62 let tool_uses = extract_tool_uses(a);
64 for (tool_name, input) in &tool_uses {
65 tools_used.push(tool_name.clone());
66
67 if tool_name == "Write" || tool_name == "Edit" {
69 if let Some(path) = input.get("file_path").and_then(|v| v.as_str()) {
70 let short = shorten_path(path);
71 if !files_modified.contains(&short) {
72 files_modified.push(short);
73 }
74 }
75 }
76
77 if tool_name == "Bash" {
79 if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
80 if is_test_command(cmd) {
81 let mut ev = Event::new(
82 &task_id,
83 EventType::Evidence,
84 Author::Agent,
85 Source::Cli,
86 format!("Ran tests: {}", truncate(cmd, 200)),
87 );
88 ev.timestamp = a.timestamp.clone();
89 ev.evidence_strength = Some(EvidenceStrength::Medium);
90 ev.meta = serde_json::json!({"backfill": true});
91 events.push(ev);
92 }
93 }
94 }
95
96 if tool_name == "Bash" {
98 if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
99 if cmd.contains("git commit") && !cmd.contains("git commit --amend") {
100 let mut ev = Event::new(
101 &task_id,
102 EventType::Evidence,
103 Author::Agent,
104 Source::Cli,
105 format!("Git commit: {}", truncate(cmd, 200)),
106 );
107 ev.timestamp = a.timestamp.clone();
108 ev.evidence_strength = Some(EvidenceStrength::Strong);
109 ev.meta = serde_json::json!({"backfill": true});
110 events.push(ev);
111 }
112 }
113 }
114 }
115
116 let texts = extract_assistant_texts(a);
118 for text in &texts {
119 if let Some(ev) = classify_text_heuristic(&task_id, text, &a.timestamp) {
120 events.push(ev);
121 }
122 }
123 }
124 SessionEntry::User(_) | SessionEntry::Summary(_) | SessionEntry::Other => {}
125 }
126 }
127
128 if !files_modified.is_empty() {
130 let summary = format!(
131 "Modified {} files: {}",
132 files_modified.len(),
133 files_modified.join(", ")
134 );
135 let mut ev = Event::new(&task_id, EventType::Finding, Author::Agent, Source::Cli, summary);
136 if let Some(ref ts) = session.last_timestamp {
137 ev.timestamp = ts.clone();
138 }
139 ev.refs.files = files_modified;
140 ev.meta = serde_json::json!({"backfill": true});
141 events.push(ev);
142 }
143
144 let close_text = format!(
146 "Session ended. {} user messages, {} assistant messages, {} tool calls.",
147 session.user_message_count(),
148 session.assistant_message_count(),
149 tools_used.len()
150 );
151 let mut close_event = Event::new(
152 &task_id,
153 EventType::Close,
154 Author::Agent,
155 Source::Cli,
156 close_text,
157 );
158 if let Some(ref ts) = session.last_timestamp {
159 close_event.timestamp = ts.clone();
160 }
161 close_event.meta = serde_json::json!({
162 "backfill": true,
163 "reason": "session_ended",
164 "outcome": "completed"
165 });
166 events.push(close_event);
167
168 Some(ExtractedTask {
169 task_id,
170 title,
171 session_id: session.session_id.clone(),
172 events,
173 })
174}
175
176fn derive_title(session: &ParsedSession) -> String {
178 if let Some(summary) = session.summary() {
180 return truncate(&strip_xml_tags(summary), 120);
181 }
182
183 for entry in &session.entries {
185 if let SessionEntry::User(u) = entry {
186 if let Some(text) = extract_user_text(u) {
187 let clean = strip_xml_tags(&text);
188 let first_line = clean.lines().find(|l| !l.trim().is_empty()).unwrap_or(&clean);
189 let trimmed = first_line.trim();
190 if trimmed.len() > 5 {
192 return truncate(trimmed, 120);
193 }
194 }
195 }
196 }
197
198 format!("Session {}", &session.session_id[..8.min(session.session_id.len())])
199}
200
201fn strip_xml_tags(text: &str) -> String {
203 let mut result = String::with_capacity(text.len());
204 let mut in_tag = false;
205 for ch in text.chars() {
206 if ch == '<' {
207 in_tag = true;
208 } else if ch == '>' {
209 in_tag = false;
210 } else if !in_tag {
211 result.push(ch);
212 }
213 }
214 result
215}
216
217fn classify_text_heuristic(task_id: &str, text: &str, timestamp: &str) -> Option<Event> {
220 let lower = text.to_lowercase();
221
222 if text.len() < 50 {
224 return None;
225 }
226
227 let decision_patterns = [
229 "decided to",
230 "will use",
231 "going with",
232 "chose to",
233 "the approach is",
234 "решил использовать",
235 "будем использовать",
236 "выбрал",
237 ];
238 for pattern in &decision_patterns {
239 if lower.contains(pattern) {
240 let mut ev = Event::new(
241 task_id,
242 EventType::Decision,
243 Author::Agent,
244 Source::Cli,
245 truncate(text, 300),
246 );
247 ev.timestamp = timestamp.to_string();
248 ev.confidence = Some(0.7);
249 ev.status = EventStatus::Suggested;
250 ev.meta = serde_json::json!({"backfill": true, "heuristic": "decision_keyword"});
251 return Some(ev);
252 }
253 }
254
255 let rejection_patterns = [
257 "won't work",
258 "doesn't work",
259 "can't use",
260 "не работает",
261 "не подходит",
262 "отказались",
263 "tried but",
264 "rejected",
265 "abandoned",
266 ];
267 for pattern in &rejection_patterns {
268 if lower.contains(pattern) {
269 let mut ev = Event::new(
270 task_id,
271 EventType::Rejection,
272 Author::Agent,
273 Source::Cli,
274 truncate(text, 300),
275 );
276 ev.timestamp = timestamp.to_string();
277 ev.confidence = Some(0.6);
278 ev.status = EventStatus::Suggested;
279 ev.meta = serde_json::json!({"backfill": true, "heuristic": "rejection_keyword"});
280 return Some(ev);
281 }
282 }
283
284 let constraint_patterns = [
286 "rate limit",
287 "not supported",
288 "limitation",
289 "ограничение",
290 "не поддерживает",
291 "requires",
292 "must be",
293 ];
294 for pattern in &constraint_patterns {
295 if lower.contains(pattern) && text.len() < 500 {
296 let mut ev = Event::new(
297 task_id,
298 EventType::Constraint,
299 Author::Agent,
300 Source::Cli,
301 truncate(text, 300),
302 );
303 ev.timestamp = timestamp.to_string();
304 ev.confidence = Some(0.5);
305 ev.status = EventStatus::Suggested;
306 ev.meta = serde_json::json!({"backfill": true, "heuristic": "constraint_keyword"});
307 return Some(ev);
308 }
309 }
310
311 None
312}
313
314fn is_test_command(cmd: &str) -> bool {
316 let lower = cmd.to_lowercase();
317 lower.contains("cargo test")
318 || lower.contains("npm test")
319 || lower.contains("pytest")
320 || lower.contains("phpunit")
321 || lower.contains("jest")
322 || lower.contains("vitest")
323 || lower.contains("go test")
324 || lower.contains("make test")
325}
326
327fn shorten_path(path: &str) -> String {
329 let parts: Vec<&str> = path.split(['/', '\\']).collect();
330 if parts.len() <= 2 {
331 path.to_string()
332 } else {
333 parts[parts.len() - 2..].join("/")
334 }
335}
336
337fn truncate(text: &str, max_len: usize) -> String {
339 if text.len() <= max_len {
340 text.to_string()
341 } else {
342 let mut end = max_len;
343 while end > 0 && !text.is_char_boundary(end) {
345 end -= 1;
346 }
347 format!("{}…", &text[..end])
348 }
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354
355 #[test]
356 fn test_is_test_command() {
357 assert!(is_test_command("cargo test -p my-crate"));
358 assert!(is_test_command("npm test"));
359 assert!(is_test_command("python -m pytest tests/"));
360 assert!(!is_test_command("cargo build"));
361 assert!(!is_test_command("git push"));
362 }
363
364 #[test]
365 fn test_shorten_path() {
366 assert_eq!(shorten_path("/home/user/project/src/main.rs"), "src/main.rs");
367 assert_eq!(shorten_path("main.rs"), "main.rs");
368 }
369
370 #[test]
371 fn test_truncate() {
372 assert_eq!(truncate("hello", 10), "hello");
373 assert_eq!(truncate("hello world", 5), "hello…");
374 }
375
376 #[test]
377 fn test_classify_decision() {
378 let ev = classify_text_heuristic(
379 "tj-test",
380 "After analysis, I decided to use the rmcp crate for MCP implementation because it has better macro support.",
381 "2026-01-01T00:00:00Z",
382 );
383 assert!(ev.is_some());
384 assert_eq!(ev.unwrap().event_type, EventType::Decision);
385 }
386
387 #[test]
388 fn test_classify_rejection() {
389 let ev = classify_text_heuristic(
390 "tj-test",
391 "The previous approach won't work because the API doesn't support batch operations.",
392 "2026-01-01T00:00:00Z",
393 );
394 assert!(ev.is_some());
395 assert_eq!(ev.unwrap().event_type, EventType::Rejection);
396 }
397
398 #[test]
399 fn test_classify_short_text_skipped() {
400 let ev = classify_text_heuristic("tj-test", "OK, done.", "2026-01-01T00:00:00Z");
401 assert!(ev.is_none());
402 }
403
404 fn make_user_entry(uuid: &str, ts: &str, text: &str) -> SessionEntry {
407 SessionEntry::User(UserEntry {
408 uuid: uuid.into(),
409 timestamp: ts.into(),
410 session_id: None,
411 message: Some(UserMessage {
412 content: serde_json::json!(text),
413 }),
414 cwd: None,
415 })
416 }
417
418 fn make_assistant_entry(uuid: &str, ts: &str, blocks: Vec<ContentBlock>) -> SessionEntry {
419 SessionEntry::Assistant(AssistantEntry {
420 uuid: uuid.into(),
421 timestamp: ts.into(),
422 session_id: None,
423 message: Some(AssistantMessage {
424 content: blocks,
425 model: Some("claude-opus-4-20250514".into()),
426 stop_reason: Some("end_turn".into()),
427 }),
428 })
429 }
430
431 #[test]
432 fn extract_from_session_produces_open_and_close_events() {
433 let session = ParsedSession {
434 session_id: "test-session-123".into(),
435 file_path: "/tmp/test-session-123.jsonl".into(),
436 entries: vec![
437 make_user_entry("u1", "2026-01-01T00:00:00Z", "Please fix the login bug"),
438 make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
439 ContentBlock::Text { text: "I'll look into the login issue.".into() },
440 ]),
441 make_user_entry("u2", "2026-01-01T00:00:02Z", "Thanks, looks good"),
442 make_assistant_entry("a2", "2026-01-01T00:00:03Z", vec![
443 ContentBlock::Text { text: "The fix is complete.".into() },
444 ]),
445 ],
446 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
447 last_timestamp: Some("2026-01-01T00:00:03Z".into()),
448 };
449
450 let task = extract_from_session(&session).unwrap();
451 assert!(task.task_id.starts_with("tj-"));
452 assert!(!task.title.is_empty());
453 assert_eq!(task.session_id, "test-session-123");
454
455 assert_eq!(task.events[0].event_type, EventType::Open);
457 assert_eq!(task.events[0].timestamp, "2026-01-01T00:00:00Z");
458
459 let last = task.events.last().unwrap();
461 assert_eq!(last.event_type, EventType::Close);
462 assert_eq!(last.timestamp, "2026-01-01T00:00:03Z");
463 assert!(last.text.contains("user messages"));
464 }
465
466 #[test]
467 fn extract_from_session_skips_sessions_with_fewer_than_2_user_messages() {
468 let session = ParsedSession {
469 session_id: "short-session".into(),
470 file_path: "/tmp/short.jsonl".into(),
471 entries: vec![
472 make_user_entry("u1", "2026-01-01T00:00:00Z", "Hello"),
473 make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
474 ContentBlock::Text { text: "Hi!".into() },
475 ]),
476 ],
477 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
478 last_timestamp: Some("2026-01-01T00:00:01Z".into()),
479 };
480
481 assert!(extract_from_session(&session).is_none());
482 }
483
484 #[test]
485 fn extract_from_session_skips_zero_user_messages() {
486 let session = ParsedSession {
487 session_id: "empty-session".into(),
488 file_path: "/tmp/empty.jsonl".into(),
489 entries: vec![],
490 first_timestamp: None,
491 last_timestamp: None,
492 };
493
494 assert!(extract_from_session(&session).is_none());
495 }
496
497 #[test]
498 fn extract_from_session_tracks_file_modifications() {
499 let session = ParsedSession {
500 session_id: "file-mod-session".into(),
501 file_path: "/tmp/fm.jsonl".into(),
502 entries: vec![
503 make_user_entry("u1", "2026-01-01T00:00:00Z", "Update the config file"),
504 make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
505 ContentBlock::ToolUse {
506 name: "Write".into(),
507 input: serde_json::json!({"file_path": "/home/user/project/src/config.rs"}),
508 },
509 ]),
510 make_user_entry("u2", "2026-01-01T00:00:02Z", "Also update main.rs"),
511 make_assistant_entry("a2", "2026-01-01T00:00:03Z", vec![
512 ContentBlock::ToolUse {
513 name: "Edit".into(),
514 input: serde_json::json!({"file_path": "/home/user/project/src/main.rs", "old_string": "a", "new_string": "b"}),
515 },
516 ]),
517 ],
518 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
519 last_timestamp: Some("2026-01-01T00:00:03Z".into()),
520 };
521
522 let task = extract_from_session(&session).unwrap();
523 let finding = task.events.iter().find(|e| e.event_type == EventType::Finding);
525 assert!(finding.is_some());
526 let finding = finding.unwrap();
527 assert!(finding.text.contains("2 files"));
528 assert!(finding.refs.files.contains(&"src/config.rs".to_string()));
529 assert!(finding.refs.files.contains(&"src/main.rs".to_string()));
530 }
531
532 #[test]
533 fn extract_from_session_detects_test_commands() {
534 let session = ParsedSession {
535 session_id: "test-cmd-session".into(),
536 file_path: "/tmp/tc.jsonl".into(),
537 entries: vec![
538 make_user_entry("u1", "2026-01-01T00:00:00Z", "Run the tests"),
539 make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
540 ContentBlock::ToolUse {
541 name: "Bash".into(),
542 input: serde_json::json!({"command": "cargo test --workspace"}),
543 },
544 ]),
545 make_user_entry("u2", "2026-01-01T00:00:02Z", "Good"),
546 ],
547 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
548 last_timestamp: Some("2026-01-01T00:00:02Z".into()),
549 };
550
551 let task = extract_from_session(&session).unwrap();
552 let evidence = task.events.iter().find(|e| e.event_type == EventType::Evidence);
553 assert!(evidence.is_some());
554 assert!(evidence.unwrap().text.contains("cargo test"));
555 }
556
557 #[test]
558 fn extract_from_session_detects_git_commit() {
559 let session = ParsedSession {
560 session_id: "git-commit-session".into(),
561 file_path: "/tmp/gc.jsonl".into(),
562 entries: vec![
563 make_user_entry("u1", "2026-01-01T00:00:00Z", "Commit the changes"),
564 make_assistant_entry("a1", "2026-01-01T00:00:01Z", vec![
565 ContentBlock::ToolUse {
566 name: "Bash".into(),
567 input: serde_json::json!({"command": "git commit -m 'fix: resolve login bug'"}),
568 },
569 ]),
570 make_user_entry("u2", "2026-01-01T00:00:02Z", "Push it"),
571 ],
572 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
573 last_timestamp: Some("2026-01-01T00:00:02Z".into()),
574 };
575
576 let task = extract_from_session(&session).unwrap();
577 let evidence_events: Vec<_> = task.events.iter()
578 .filter(|e| e.event_type == EventType::Evidence)
579 .collect();
580 let commit_ev = evidence_events.iter().find(|e| e.text.contains("Git commit"));
581 assert!(commit_ev.is_some());
582 assert_eq!(commit_ev.unwrap().evidence_strength, Some(EvidenceStrength::Strong));
583 }
584
585 #[test]
588 fn strip_xml_tags_removes_simple_tags() {
589 assert_eq!(strip_xml_tags("<b>hello</b>"), "hello");
590 }
591
592 #[test]
593 fn strip_xml_tags_removes_nested_tags() {
594 assert_eq!(strip_xml_tags("<div><span>text</span></div>"), "text");
595 }
596
597 #[test]
598 fn strip_xml_tags_no_tags() {
599 assert_eq!(strip_xml_tags("plain text"), "plain text");
600 }
601
602 #[test]
603 fn strip_xml_tags_only_tags() {
604 assert_eq!(strip_xml_tags("<tag></tag>"), "");
605 }
606
607 #[test]
608 fn strip_xml_tags_with_attributes() {
609 assert_eq!(strip_xml_tags("<command-name foo=\"bar\">init</command-name>"), "init");
610 }
611
612 #[test]
613 fn strip_xml_tags_preserves_angle_bracket_text_between_tags() {
614 assert_eq!(strip_xml_tags("a < b and c > d"), "a d");
615 }
617
618 #[test]
621 fn derive_title_from_summary() {
622 let session = ParsedSession {
623 session_id: "abcdefghij".into(),
624 file_path: "/tmp/s.jsonl".into(),
625 entries: vec![
626 SessionEntry::Summary(SummaryEntry {
627 summary: "Fixed authentication bug in login flow".into(),
628 timestamp: None,
629 }),
630 make_user_entry("u1", "t", "some user text that is long enough"),
631 ],
632 first_timestamp: None,
633 last_timestamp: None,
634 };
635 assert_eq!(derive_title(&session), "Fixed authentication bug in login flow");
636 }
637
638 #[test]
639 fn derive_title_from_user_text() {
640 let session = ParsedSession {
641 session_id: "abcdefghij".into(),
642 file_path: "/tmp/s.jsonl".into(),
643 entries: vec![
644 make_user_entry("u1", "t", "Please implement the new caching layer"),
645 ],
646 first_timestamp: None,
647 last_timestamp: None,
648 };
649 assert_eq!(derive_title(&session), "Please implement the new caching layer");
650 }
651
652 #[test]
653 fn derive_title_skips_short_user_text() {
654 let session = ParsedSession {
655 session_id: "abcdefghij".into(),
656 file_path: "/tmp/s.jsonl".into(),
657 entries: vec![
658 make_user_entry("u1", "t", "/init"),
659 make_user_entry("u2", "t", "Implement the feature for user profiles"),
660 ],
661 first_timestamp: None,
662 last_timestamp: None,
663 };
664 let title = derive_title(&session);
666 assert!(title.contains("Implement the feature"));
667 }
668
669 #[test]
670 fn derive_title_fallback_to_session_id() {
671 let session = ParsedSession {
672 session_id: "abcdefghij".into(),
673 file_path: "/tmp/s.jsonl".into(),
674 entries: vec![
675 make_user_entry("u1", "t", "hi"),
676 ],
677 first_timestamp: None,
678 last_timestamp: None,
679 };
680 let title = derive_title(&session);
681 assert!(title.starts_with("Session "));
682 assert!(title.contains("abcdefgh"));
683 }
684
685 #[test]
686 fn derive_title_strips_xml_from_summary() {
687 let session = ParsedSession {
688 session_id: "abcdefghij".into(),
689 file_path: "/tmp/s.jsonl".into(),
690 entries: vec![
691 SessionEntry::Summary(SummaryEntry {
692 summary: "<task>Fix the <b>critical</b> bug</task>".into(),
693 timestamp: None,
694 }),
695 ],
696 first_timestamp: None,
697 last_timestamp: None,
698 };
699 let title = derive_title(&session);
700 assert_eq!(title, "Fix the critical bug");
701 }
702
703 #[test]
706 fn test_classify_constraint() {
707 let ev = classify_text_heuristic(
708 "tj-test",
709 "The API has a rate limit of 100 requests per minute, so we need to implement throttling.",
710 "2026-01-01T00:00:00Z",
711 );
712 assert!(ev.is_some());
713 assert_eq!(ev.unwrap().event_type, EventType::Constraint);
714 }
715
716 #[test]
717 fn test_classify_no_match_returns_none() {
718 let ev = classify_text_heuristic(
719 "tj-test",
720 "I have successfully implemented the feature and all tests are passing. The code is clean and well-organized.",
721 "2026-01-01T00:00:00Z",
722 );
723 assert!(ev.is_none());
724 }
725
726 #[test]
729 fn test_is_test_command_additional() {
730 assert!(is_test_command("jest --coverage"));
731 assert!(is_test_command("vitest run"));
732 assert!(is_test_command("go test ./..."));
733 assert!(is_test_command("make test"));
734 assert!(is_test_command("phpunit tests/Unit"));
735 assert!(is_test_command("echo 'cargo test'")); assert!(!is_test_command("ls -la"));
737 }
738
739 #[test]
742 fn test_shorten_path_windows_separators() {
743 assert_eq!(shorten_path("C:\\Users\\user\\project\\src\\main.rs"), "src/main.rs");
744 }
745
746 #[test]
747 fn test_shorten_path_two_components() {
748 assert_eq!(shorten_path("src/main.rs"), "src/main.rs");
749 }
750
751 #[test]
754 fn test_truncate_multibyte_utf8() {
755 let text = "Привет мир";
757 let truncated = truncate(text, 6);
758 assert!(truncated.ends_with('…'));
760 assert!(truncated.starts_with("При"));
761 }
762
763 #[test]
764 fn test_truncate_exact_boundary() {
765 assert_eq!(truncate("hello", 5), "hello");
766 assert_eq!(truncate("hello!", 5), "hello…");
767 }
768}