1use crate::event::{Author, Event, EventStatus, EventType, EvidenceStrength, Source};
7use crate::session::parser::*;
8
9#[derive(Debug)]
11pub struct ExtractedTask {
12 pub task_id: String,
13 pub title: String,
14 pub session_id: String,
15 pub events: Vec<Event>,
16}
17
18pub fn extract_from_session(session: &ParsedSession) -> Option<ExtractedTask> {
21 if session.user_message_count() < 2 {
23 return None;
24 }
25
26 let task_id = crate::new_task_id();
27
28 let title = derive_title(session);
30 let mut events = Vec::new();
31
32 let open_text = session
34 .summary()
35 .map(|s| truncate(s, 500))
36 .or_else(|| session.first_user_text().map(|s| truncate(&s, 500)))
37 .unwrap_or_else(|| title.clone());
38
39 let mut open_event = Event::new(
40 &task_id,
41 EventType::Open,
42 Author::Agent,
43 Source::Cli,
44 open_text,
45 );
46 if let Some(ref ts) = session.first_timestamp {
47 open_event.timestamp = ts.clone();
48 }
49 open_event.meta =
50 serde_json::json!({"title": title, "backfill": true, "session_id": session.session_id});
51 events.push(open_event);
52
53 let mut files_modified: Vec<String> = Vec::new();
55 let mut tools_used: Vec<String> = Vec::new();
56
57 for entry in &session.entries {
58 match entry {
59 SessionEntry::Assistant(a) => {
60 let tool_uses = extract_tool_uses(a);
62 for (tool_name, input) in &tool_uses {
63 tools_used.push(tool_name.clone());
64
65 if tool_name == "Write" || tool_name == "Edit" {
67 if let Some(path) = input.get("file_path").and_then(|v| v.as_str()) {
68 let short = shorten_path(path);
69 if !files_modified.contains(&short) {
70 files_modified.push(short);
71 }
72 }
73 }
74
75 if tool_name == "Bash" {
77 if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
78 if is_test_command(cmd) {
79 let mut ev = Event::new(
80 &task_id,
81 EventType::Evidence,
82 Author::Agent,
83 Source::Cli,
84 format!("Ran tests: {}", truncate(cmd, 200)),
85 );
86 ev.timestamp = a.timestamp.clone();
87 ev.evidence_strength = Some(EvidenceStrength::Medium);
88 ev.meta = serde_json::json!({"backfill": true});
89 events.push(ev);
90 }
91 }
92 }
93
94 if tool_name == "Bash" {
96 if let Some(cmd) = input.get("command").and_then(|v| v.as_str()) {
97 if cmd.contains("git commit") && !cmd.contains("git commit --amend") {
98 let mut ev = Event::new(
99 &task_id,
100 EventType::Evidence,
101 Author::Agent,
102 Source::Cli,
103 format!("Git commit: {}", truncate(cmd, 200)),
104 );
105 ev.timestamp = a.timestamp.clone();
106 ev.evidence_strength = Some(EvidenceStrength::Strong);
107 ev.meta = serde_json::json!({"backfill": true});
108 events.push(ev);
109 }
110 }
111 }
112 }
113
114 let texts = extract_assistant_texts(a);
116 for text in &texts {
117 if let Some(ev) = classify_text_heuristic(&task_id, text, &a.timestamp) {
118 events.push(ev);
119 }
120 }
121 }
122 SessionEntry::User(_) | SessionEntry::Summary(_) | SessionEntry::Other => {}
123 }
124 }
125
126 if !files_modified.is_empty() {
128 let summary = format!(
129 "Modified {} files: {}",
130 files_modified.len(),
131 files_modified.join(", ")
132 );
133 let mut ev = Event::new(
134 &task_id,
135 EventType::Finding,
136 Author::Agent,
137 Source::Cli,
138 summary,
139 );
140 if let Some(ref ts) = session.last_timestamp {
141 ev.timestamp = ts.clone();
142 }
143 ev.refs.files = files_modified;
144 ev.meta = serde_json::json!({"backfill": true});
145 events.push(ev);
146 }
147
148 let close_text = format!(
150 "Session ended. {} user messages, {} assistant messages, {} tool calls.",
151 session.user_message_count(),
152 session.assistant_message_count(),
153 tools_used.len()
154 );
155 let mut close_event = Event::new(
156 &task_id,
157 EventType::Close,
158 Author::Agent,
159 Source::Cli,
160 close_text,
161 );
162 if let Some(ref ts) = session.last_timestamp {
163 close_event.timestamp = ts.clone();
164 }
165 close_event.meta = serde_json::json!({
166 "backfill": true,
167 "reason": "session_ended",
168 "outcome": "completed"
169 });
170 events.push(close_event);
171
172 Some(ExtractedTask {
173 task_id,
174 title,
175 session_id: session.session_id.clone(),
176 events,
177 })
178}
179
180fn derive_title(session: &ParsedSession) -> String {
182 if let Some(summary) = session.summary() {
184 return truncate(&strip_xml_tags(summary), 120);
185 }
186
187 for entry in &session.entries {
189 if let SessionEntry::User(u) = entry {
190 if let Some(text) = extract_user_text(u) {
191 let clean = strip_xml_tags(&text);
192 let first_line = clean
193 .lines()
194 .find(|l| !l.trim().is_empty())
195 .unwrap_or(&clean);
196 let trimmed = first_line.trim();
197 if trimmed.len() > 5 {
199 return truncate(trimmed, 120);
200 }
201 }
202 }
203 }
204
205 format!(
206 "Session {}",
207 &session.session_id[..8.min(session.session_id.len())]
208 )
209}
210
211fn strip_xml_tags(text: &str) -> String {
213 let mut result = String::with_capacity(text.len());
214 let mut in_tag = false;
215 for ch in text.chars() {
216 if ch == '<' {
217 in_tag = true;
218 } else if ch == '>' {
219 in_tag = false;
220 } else if !in_tag {
221 result.push(ch);
222 }
223 }
224 result
225}
226
227fn classify_text_heuristic(task_id: &str, text: &str, timestamp: &str) -> Option<Event> {
230 let lower = text.to_lowercase();
231
232 if text.len() < 50 {
234 return None;
235 }
236
237 let decision_patterns = [
239 "decided to",
240 "will use",
241 "going with",
242 "chose to",
243 "the approach is",
244 "решил использовать",
245 "будем использовать",
246 "выбрал",
247 ];
248 for pattern in &decision_patterns {
249 if lower.contains(pattern) {
250 let mut ev = Event::new(
251 task_id,
252 EventType::Decision,
253 Author::Agent,
254 Source::Cli,
255 truncate(text, 300),
256 );
257 ev.timestamp = timestamp.to_string();
258 ev.confidence = Some(0.7);
259 ev.status = EventStatus::Suggested;
260 ev.meta = serde_json::json!({"backfill": true, "heuristic": "decision_keyword"});
261 return Some(ev);
262 }
263 }
264
265 let rejection_patterns = [
267 "won't work",
268 "doesn't work",
269 "can't use",
270 "не работает",
271 "не подходит",
272 "отказались",
273 "tried but",
274 "rejected",
275 "abandoned",
276 ];
277 for pattern in &rejection_patterns {
278 if lower.contains(pattern) {
279 let mut ev = Event::new(
280 task_id,
281 EventType::Rejection,
282 Author::Agent,
283 Source::Cli,
284 truncate(text, 300),
285 );
286 ev.timestamp = timestamp.to_string();
287 ev.confidence = Some(0.6);
288 ev.status = EventStatus::Suggested;
289 ev.meta = serde_json::json!({"backfill": true, "heuristic": "rejection_keyword"});
290 return Some(ev);
291 }
292 }
293
294 let constraint_patterns = [
296 "rate limit",
297 "not supported",
298 "limitation",
299 "ограничение",
300 "не поддерживает",
301 "requires",
302 "must be",
303 ];
304 for pattern in &constraint_patterns {
305 if lower.contains(pattern) && text.len() < 500 {
306 let mut ev = Event::new(
307 task_id,
308 EventType::Constraint,
309 Author::Agent,
310 Source::Cli,
311 truncate(text, 300),
312 );
313 ev.timestamp = timestamp.to_string();
314 ev.confidence = Some(0.5);
315 ev.status = EventStatus::Suggested;
316 ev.meta = serde_json::json!({"backfill": true, "heuristic": "constraint_keyword"});
317 return Some(ev);
318 }
319 }
320
321 None
322}
323
324fn is_test_command(cmd: &str) -> bool {
326 let lower = cmd.to_lowercase();
327 lower.contains("cargo test")
328 || lower.contains("npm test")
329 || lower.contains("pytest")
330 || lower.contains("phpunit")
331 || lower.contains("jest")
332 || lower.contains("vitest")
333 || lower.contains("go test")
334 || lower.contains("make test")
335}
336
337fn shorten_path(path: &str) -> String {
339 let parts: Vec<&str> = path.split(['/', '\\']).collect();
340 if parts.len() <= 2 {
341 path.to_string()
342 } else {
343 parts[parts.len() - 2..].join("/")
344 }
345}
346
347fn truncate(text: &str, max_len: usize) -> String {
349 if text.len() <= max_len {
350 text.to_string()
351 } else {
352 let mut end = max_len;
353 while end > 0 && !text.is_char_boundary(end) {
355 end -= 1;
356 }
357 format!("{}…", &text[..end])
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 #[test]
366 fn test_is_test_command() {
367 assert!(is_test_command("cargo test -p my-crate"));
368 assert!(is_test_command("npm test"));
369 assert!(is_test_command("python -m pytest tests/"));
370 assert!(!is_test_command("cargo build"));
371 assert!(!is_test_command("git push"));
372 }
373
374 #[test]
375 fn test_shorten_path() {
376 assert_eq!(
377 shorten_path("/home/user/project/src/main.rs"),
378 "src/main.rs"
379 );
380 assert_eq!(shorten_path("main.rs"), "main.rs");
381 }
382
383 #[test]
384 fn test_truncate() {
385 assert_eq!(truncate("hello", 10), "hello");
386 assert_eq!(truncate("hello world", 5), "hello…");
387 }
388
389 #[test]
390 fn test_classify_decision() {
391 let ev = classify_text_heuristic(
392 "tj-test",
393 "After analysis, I decided to use the rmcp crate for MCP implementation because it has better macro support.",
394 "2026-01-01T00:00:00Z",
395 );
396 assert!(ev.is_some());
397 assert_eq!(ev.unwrap().event_type, EventType::Decision);
398 }
399
400 #[test]
401 fn test_classify_rejection() {
402 let ev = classify_text_heuristic(
403 "tj-test",
404 "The previous approach won't work because the API doesn't support batch operations.",
405 "2026-01-01T00:00:00Z",
406 );
407 assert!(ev.is_some());
408 assert_eq!(ev.unwrap().event_type, EventType::Rejection);
409 }
410
411 #[test]
412 fn test_classify_short_text_skipped() {
413 let ev = classify_text_heuristic("tj-test", "OK, done.", "2026-01-01T00:00:00Z");
414 assert!(ev.is_none());
415 }
416
417 fn make_user_entry(uuid: &str, ts: &str, text: &str) -> SessionEntry {
420 SessionEntry::User(UserEntry {
421 uuid: uuid.into(),
422 timestamp: ts.into(),
423 session_id: None,
424 message: Some(UserMessage {
425 content: serde_json::json!(text),
426 }),
427 cwd: None,
428 })
429 }
430
431 fn make_assistant_entry(uuid: &str, ts: &str, blocks: Vec<ContentBlock>) -> SessionEntry {
432 SessionEntry::Assistant(AssistantEntry {
433 uuid: uuid.into(),
434 timestamp: ts.into(),
435 session_id: None,
436 message: Some(AssistantMessage {
437 content: blocks,
438 model: Some("claude-opus-4-20250514".into()),
439 stop_reason: Some("end_turn".into()),
440 }),
441 })
442 }
443
444 #[test]
445 fn extract_from_session_produces_open_and_close_events() {
446 let session = ParsedSession {
447 session_id: "test-session-123".into(),
448 file_path: "/tmp/test-session-123.jsonl".into(),
449 entries: vec![
450 make_user_entry("u1", "2026-01-01T00:00:00Z", "Please fix the login bug"),
451 make_assistant_entry(
452 "a1",
453 "2026-01-01T00:00:01Z",
454 vec![ContentBlock::Text {
455 text: "I'll look into the login issue.".into(),
456 }],
457 ),
458 make_user_entry("u2", "2026-01-01T00:00:02Z", "Thanks, looks good"),
459 make_assistant_entry(
460 "a2",
461 "2026-01-01T00:00:03Z",
462 vec![ContentBlock::Text {
463 text: "The fix is complete.".into(),
464 }],
465 ),
466 ],
467 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
468 last_timestamp: Some("2026-01-01T00:00:03Z".into()),
469 };
470
471 let task = extract_from_session(&session).unwrap();
472 assert!(task.task_id.starts_with("tj-"));
473 assert!(!task.title.is_empty());
474 assert_eq!(task.session_id, "test-session-123");
475
476 assert_eq!(task.events[0].event_type, EventType::Open);
478 assert_eq!(task.events[0].timestamp, "2026-01-01T00:00:00Z");
479
480 let last = task.events.last().unwrap();
482 assert_eq!(last.event_type, EventType::Close);
483 assert_eq!(last.timestamp, "2026-01-01T00:00:03Z");
484 assert!(last.text.contains("user messages"));
485 }
486
487 #[test]
488 fn extract_from_session_skips_sessions_with_fewer_than_2_user_messages() {
489 let session = ParsedSession {
490 session_id: "short-session".into(),
491 file_path: "/tmp/short.jsonl".into(),
492 entries: vec![
493 make_user_entry("u1", "2026-01-01T00:00:00Z", "Hello"),
494 make_assistant_entry(
495 "a1",
496 "2026-01-01T00:00:01Z",
497 vec![ContentBlock::Text { text: "Hi!".into() }],
498 ),
499 ],
500 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
501 last_timestamp: Some("2026-01-01T00:00:01Z".into()),
502 };
503
504 assert!(extract_from_session(&session).is_none());
505 }
506
507 #[test]
508 fn extract_from_session_skips_zero_user_messages() {
509 let session = ParsedSession {
510 session_id: "empty-session".into(),
511 file_path: "/tmp/empty.jsonl".into(),
512 entries: vec![],
513 first_timestamp: None,
514 last_timestamp: None,
515 };
516
517 assert!(extract_from_session(&session).is_none());
518 }
519
520 #[test]
521 fn extract_from_session_tracks_file_modifications() {
522 let session = ParsedSession {
523 session_id: "file-mod-session".into(),
524 file_path: "/tmp/fm.jsonl".into(),
525 entries: vec![
526 make_user_entry("u1", "2026-01-01T00:00:00Z", "Update the config file"),
527 make_assistant_entry(
528 "a1",
529 "2026-01-01T00:00:01Z",
530 vec![ContentBlock::ToolUse {
531 name: "Write".into(),
532 input: serde_json::json!({"file_path": "/home/user/project/src/config.rs"}),
533 }],
534 ),
535 make_user_entry("u2", "2026-01-01T00:00:02Z", "Also update main.rs"),
536 make_assistant_entry(
537 "a2",
538 "2026-01-01T00:00:03Z",
539 vec![ContentBlock::ToolUse {
540 name: "Edit".into(),
541 input: serde_json::json!({"file_path": "/home/user/project/src/main.rs", "old_string": "a", "new_string": "b"}),
542 }],
543 ),
544 ],
545 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
546 last_timestamp: Some("2026-01-01T00:00:03Z".into()),
547 };
548
549 let task = extract_from_session(&session).unwrap();
550 let finding = task
552 .events
553 .iter()
554 .find(|e| e.event_type == EventType::Finding);
555 assert!(finding.is_some());
556 let finding = finding.unwrap();
557 assert!(finding.text.contains("2 files"));
558 assert!(finding.refs.files.contains(&"src/config.rs".to_string()));
559 assert!(finding.refs.files.contains(&"src/main.rs".to_string()));
560 }
561
562 #[test]
563 fn extract_from_session_detects_test_commands() {
564 let session = ParsedSession {
565 session_id: "test-cmd-session".into(),
566 file_path: "/tmp/tc.jsonl".into(),
567 entries: vec![
568 make_user_entry("u1", "2026-01-01T00:00:00Z", "Run the tests"),
569 make_assistant_entry(
570 "a1",
571 "2026-01-01T00:00:01Z",
572 vec![ContentBlock::ToolUse {
573 name: "Bash".into(),
574 input: serde_json::json!({"command": "cargo test --workspace"}),
575 }],
576 ),
577 make_user_entry("u2", "2026-01-01T00:00:02Z", "Good"),
578 ],
579 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
580 last_timestamp: Some("2026-01-01T00:00:02Z".into()),
581 };
582
583 let task = extract_from_session(&session).unwrap();
584 let evidence = task
585 .events
586 .iter()
587 .find(|e| e.event_type == EventType::Evidence);
588 assert!(evidence.is_some());
589 assert!(evidence.unwrap().text.contains("cargo test"));
590 }
591
592 #[test]
593 fn extract_from_session_detects_git_commit() {
594 let session = ParsedSession {
595 session_id: "git-commit-session".into(),
596 file_path: "/tmp/gc.jsonl".into(),
597 entries: vec![
598 make_user_entry("u1", "2026-01-01T00:00:00Z", "Commit the changes"),
599 make_assistant_entry(
600 "a1",
601 "2026-01-01T00:00:01Z",
602 vec![ContentBlock::ToolUse {
603 name: "Bash".into(),
604 input: serde_json::json!({"command": "git commit -m 'fix: resolve login bug'"}),
605 }],
606 ),
607 make_user_entry("u2", "2026-01-01T00:00:02Z", "Push it"),
608 ],
609 first_timestamp: Some("2026-01-01T00:00:00Z".into()),
610 last_timestamp: Some("2026-01-01T00:00:02Z".into()),
611 };
612
613 let task = extract_from_session(&session).unwrap();
614 let evidence_events: Vec<_> = task
615 .events
616 .iter()
617 .filter(|e| e.event_type == EventType::Evidence)
618 .collect();
619 let commit_ev = evidence_events
620 .iter()
621 .find(|e| e.text.contains("Git commit"));
622 assert!(commit_ev.is_some());
623 assert_eq!(
624 commit_ev.unwrap().evidence_strength,
625 Some(EvidenceStrength::Strong)
626 );
627 }
628
629 #[test]
632 fn strip_xml_tags_removes_simple_tags() {
633 assert_eq!(strip_xml_tags("<b>hello</b>"), "hello");
634 }
635
636 #[test]
637 fn strip_xml_tags_removes_nested_tags() {
638 assert_eq!(strip_xml_tags("<div><span>text</span></div>"), "text");
639 }
640
641 #[test]
642 fn strip_xml_tags_no_tags() {
643 assert_eq!(strip_xml_tags("plain text"), "plain text");
644 }
645
646 #[test]
647 fn strip_xml_tags_only_tags() {
648 assert_eq!(strip_xml_tags("<tag></tag>"), "");
649 }
650
651 #[test]
652 fn strip_xml_tags_with_attributes() {
653 assert_eq!(
654 strip_xml_tags("<command-name foo=\"bar\">init</command-name>"),
655 "init"
656 );
657 }
658
659 #[test]
660 fn strip_xml_tags_preserves_angle_bracket_text_between_tags() {
661 assert_eq!(strip_xml_tags("a < b and c > d"), "a d");
662 }
664
665 #[test]
668 fn derive_title_from_summary() {
669 let session = ParsedSession {
670 session_id: "abcdefghij".into(),
671 file_path: "/tmp/s.jsonl".into(),
672 entries: vec![
673 SessionEntry::Summary(SummaryEntry {
674 summary: "Fixed authentication bug in login flow".into(),
675 timestamp: None,
676 }),
677 make_user_entry("u1", "t", "some user text that is long enough"),
678 ],
679 first_timestamp: None,
680 last_timestamp: None,
681 };
682 assert_eq!(
683 derive_title(&session),
684 "Fixed authentication bug in login flow"
685 );
686 }
687
688 #[test]
689 fn derive_title_from_user_text() {
690 let session = ParsedSession {
691 session_id: "abcdefghij".into(),
692 file_path: "/tmp/s.jsonl".into(),
693 entries: vec![make_user_entry(
694 "u1",
695 "t",
696 "Please implement the new caching layer",
697 )],
698 first_timestamp: None,
699 last_timestamp: None,
700 };
701 assert_eq!(
702 derive_title(&session),
703 "Please implement the new caching layer"
704 );
705 }
706
707 #[test]
708 fn derive_title_skips_short_user_text() {
709 let session = ParsedSession {
710 session_id: "abcdefghij".into(),
711 file_path: "/tmp/s.jsonl".into(),
712 entries: vec![
713 make_user_entry("u1", "t", "/init"),
714 make_user_entry("u2", "t", "Implement the feature for user profiles"),
715 ],
716 first_timestamp: None,
717 last_timestamp: None,
718 };
719 let title = derive_title(&session);
721 assert!(title.contains("Implement the feature"));
722 }
723
724 #[test]
725 fn derive_title_fallback_to_session_id() {
726 let session = ParsedSession {
727 session_id: "abcdefghij".into(),
728 file_path: "/tmp/s.jsonl".into(),
729 entries: vec![make_user_entry("u1", "t", "hi")],
730 first_timestamp: None,
731 last_timestamp: None,
732 };
733 let title = derive_title(&session);
734 assert!(title.starts_with("Session "));
735 assert!(title.contains("abcdefgh"));
736 }
737
738 #[test]
739 fn derive_title_strips_xml_from_summary() {
740 let session = ParsedSession {
741 session_id: "abcdefghij".into(),
742 file_path: "/tmp/s.jsonl".into(),
743 entries: vec![SessionEntry::Summary(SummaryEntry {
744 summary: "<task>Fix the <b>critical</b> bug</task>".into(),
745 timestamp: None,
746 })],
747 first_timestamp: None,
748 last_timestamp: None,
749 };
750 let title = derive_title(&session);
751 assert_eq!(title, "Fix the critical bug");
752 }
753
754 #[test]
757 fn test_classify_constraint() {
758 let ev = classify_text_heuristic(
759 "tj-test",
760 "The API has a rate limit of 100 requests per minute, so we need to implement throttling.",
761 "2026-01-01T00:00:00Z",
762 );
763 assert!(ev.is_some());
764 assert_eq!(ev.unwrap().event_type, EventType::Constraint);
765 }
766
767 #[test]
768 fn test_classify_no_match_returns_none() {
769 let ev = classify_text_heuristic(
770 "tj-test",
771 "I have successfully implemented the feature and all tests are passing. The code is clean and well-organized.",
772 "2026-01-01T00:00:00Z",
773 );
774 assert!(ev.is_none());
775 }
776
777 #[test]
780 fn test_is_test_command_additional() {
781 assert!(is_test_command("jest --coverage"));
782 assert!(is_test_command("vitest run"));
783 assert!(is_test_command("go test ./..."));
784 assert!(is_test_command("make test"));
785 assert!(is_test_command("phpunit tests/Unit"));
786 assert!(is_test_command("echo 'cargo test'")); assert!(!is_test_command("ls -la"));
788 }
789
790 #[test]
793 fn test_shorten_path_windows_separators() {
794 assert_eq!(
795 shorten_path("C:\\Users\\user\\project\\src\\main.rs"),
796 "src/main.rs"
797 );
798 }
799
800 #[test]
801 fn test_shorten_path_two_components() {
802 assert_eq!(shorten_path("src/main.rs"), "src/main.rs");
803 }
804
805 #[test]
808 fn test_truncate_multibyte_utf8() {
809 let text = "Привет мир";
811 let truncated = truncate(text, 6);
812 assert!(truncated.ends_with('…'));
814 assert!(truncated.starts_with("При"));
815 }
816
817 #[test]
818 fn test_truncate_exact_boundary() {
819 assert_eq!(truncate("hello", 5), "hello");
820 assert_eq!(truncate("hello!", 5), "hello…");
821 }
822}