Skip to main content

opensession_core/
extract.rs

1use crate::{ContentBlock, Event, EventType, Session};
2
3/// Metadata extracted from a session for DB storage at upload time.
4#[derive(Debug, Clone)]
5pub struct UploadMetadata {
6    pub title: Option<String>,
7    pub description: Option<String>,
8    pub tags: Option<String>,
9    pub created_at: String,
10    pub working_directory: Option<String>,
11    pub files_modified: Option<String>,
12    pub files_read: Option<String>,
13    pub has_errors: bool,
14}
15
16/// Extract upload metadata from a session, auto-generating title/description
17/// from the first user messages when the session's own metadata is empty.
18///
19/// This consolidates the duplicated logic in server and worker upload handlers.
20pub fn extract_upload_metadata(session: &Session) -> UploadMetadata {
21    let title = session
22        .context
23        .title
24        .clone()
25        .filter(|t| !t.is_empty())
26        .or_else(|| extract_first_user_text(session).map(|t| truncate_str(&t, 80)));
27
28    let description = session
29        .context
30        .description
31        .clone()
32        .filter(|d| !d.is_empty())
33        .or_else(|| extract_user_texts(session, 3).map(|t| truncate_str(&t, 500)));
34
35    let tags = if session.context.tags.is_empty() {
36        None
37    } else {
38        Some(session.context.tags.join(","))
39    };
40
41    let created_at = session.context.created_at.to_rfc3339();
42
43    let working_directory = session
44        .context
45        .attributes
46        .get("cwd")
47        .or_else(|| session.context.attributes.get("working_directory"))
48        .and_then(|v| v.as_str().map(String::from));
49
50    let (files_modified, files_read, has_errors) = extract_file_metadata(session);
51
52    UploadMetadata {
53        title,
54        description,
55        tags,
56        created_at,
57        working_directory,
58        files_modified,
59        files_read,
60        has_errors,
61    }
62}
63
64/// Extract files_modified, files_read (as JSON arrays), and has_errors from a session's events.
65pub fn extract_file_metadata(session: &Session) -> (Option<String>, Option<String>, bool) {
66    use std::collections::BTreeSet;
67
68    let mut modified = BTreeSet::new();
69    let mut read = BTreeSet::new();
70    let mut has_errors = false;
71
72    for event in &session.events {
73        match &event.event_type {
74            EventType::FileEdit { path, .. }
75            | EventType::FileCreate { path }
76            | EventType::FileDelete { path } => {
77                modified.insert(path.clone());
78            }
79            EventType::FileRead { path } => {
80                read.insert(path.clone());
81            }
82            EventType::ShellCommand { exit_code, .. }
83                if *exit_code != Some(0) && exit_code.is_some() =>
84            {
85                has_errors = true;
86            }
87            EventType::ToolResult { is_error: true, .. } => {
88                has_errors = true;
89            }
90            _ => {}
91        }
92    }
93
94    let read: BTreeSet<_> = read.difference(&modified).cloned().collect();
95
96    let files_modified = if modified.is_empty() {
97        None
98    } else {
99        let v: Vec<&String> = modified.iter().collect();
100        Some(serde_json::to_string(&v).unwrap_or_default())
101    };
102
103    let files_read = if read.is_empty() {
104        None
105    } else {
106        let v: Vec<&String> = read.iter().collect();
107        Some(serde_json::to_string(&v).unwrap_or_default())
108    };
109
110    (files_modified, files_read, has_errors)
111}
112
113/// Extract the first non-empty text from a slice of content blocks.
114fn extract_text_from_blocks(blocks: &[ContentBlock]) -> Option<String> {
115    blocks.iter().find_map(|block| match block {
116        ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.trim().to_string()),
117        _ => None,
118    })
119}
120
121/// Extract the text from the first UserMessage event.
122pub fn extract_first_user_text(session: &Session) -> Option<String> {
123    session
124        .events
125        .iter()
126        .filter(|e| matches!(e.event_type, EventType::UserMessage))
127        .find_map(|e| extract_text_from_blocks(&e.content.blocks))
128}
129
130/// Extract and join texts from the first `max` UserMessage events.
131pub fn extract_user_texts(session: &Session, max: usize) -> Option<String> {
132    let texts: Vec<String> = session
133        .events
134        .iter()
135        .filter(|e| matches!(e.event_type, EventType::UserMessage))
136        .filter_map(|e| extract_text_from_blocks(&e.content.blocks))
137        .take(max)
138        .collect();
139    if texts.is_empty() {
140        None
141    } else {
142        Some(texts.join(" "))
143    }
144}
145
146/// Extract modified and deleted file paths from a slice of events.
147///
148/// Returns `(modified_paths, deleted_paths)`.  Both are sorted and deduplicated.
149/// If a file is deleted then re-created in the same event slice, it stays in
150/// `modified` only.
151pub fn extract_changed_paths(events: &[Event]) -> (Vec<String>, Vec<String>) {
152    let mut modified = Vec::new();
153    let mut deleted = Vec::new();
154
155    for event in events {
156        match &event.event_type {
157            EventType::FileEdit { path, .. } | EventType::FileCreate { path } => {
158                modified.push(path.clone());
159            }
160            EventType::FileDelete { path } => deleted.push(path.clone()),
161            _ => {}
162        }
163    }
164
165    modified.sort();
166    modified.dedup();
167    deleted.sort();
168    deleted.dedup();
169
170    // If a file was deleted then re-created, keep it in modified
171    deleted.retain(|d| !modified.contains(d));
172
173    (modified, deleted)
174}
175
176/// Truncate a string to `max_len` characters, appending "..." if truncated.
177pub fn truncate_str(s: &str, max_len: usize) -> String {
178    if s.len() <= max_len {
179        s.to_string()
180    } else {
181        let mut end = max_len.saturating_sub(3);
182        // Don't split in the middle of a multi-byte char
183        while end > 0 && !s.is_char_boundary(end) {
184            end -= 1;
185        }
186        format!("{}...", &s[..end])
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use crate::{Agent, Content, Event, Session};
194    use chrono::Utc;
195    use std::collections::HashMap;
196
197    fn make_session(messages: Vec<(&str, EventType)>) -> Session {
198        let mut session = Session::new(
199            "test".to_string(),
200            Agent {
201                provider: "test".to_string(),
202                model: "test".to_string(),
203                tool: "test".to_string(),
204                tool_version: None,
205            },
206        );
207        for (i, (text, event_type)) in messages.into_iter().enumerate() {
208            session.events.push(Event {
209                event_id: format!("e{i}"),
210                timestamp: Utc::now(),
211                event_type,
212                task_id: None,
213                content: Content::text(text),
214                duration_ms: None,
215                attributes: HashMap::new(),
216            });
217        }
218        session
219    }
220
221    #[test]
222    fn test_extract_first_user_text() {
223        let session = make_session(vec![
224            ("hello world", EventType::UserMessage),
225            ("second message", EventType::UserMessage),
226        ]);
227        assert_eq!(
228            extract_first_user_text(&session),
229            Some("hello world".to_string())
230        );
231    }
232
233    #[test]
234    fn test_extract_first_user_text_skips_agent() {
235        let session = make_session(vec![
236            ("agent reply", EventType::AgentMessage),
237            ("user msg", EventType::UserMessage),
238        ]);
239        assert_eq!(
240            extract_first_user_text(&session),
241            Some("user msg".to_string())
242        );
243    }
244
245    #[test]
246    fn test_extract_first_user_text_empty() {
247        let session = make_session(vec![("agent reply", EventType::AgentMessage)]);
248        assert_eq!(extract_first_user_text(&session), None);
249    }
250
251    #[test]
252    fn test_extract_user_texts() {
253        let session = make_session(vec![
254            ("first", EventType::UserMessage),
255            ("reply", EventType::AgentMessage),
256            ("second", EventType::UserMessage),
257            ("third", EventType::UserMessage),
258        ]);
259        assert_eq!(
260            extract_user_texts(&session, 2),
261            Some("first second".to_string())
262        );
263    }
264
265    #[test]
266    fn test_truncate_str_short() {
267        assert_eq!(truncate_str("hello", 10), "hello");
268    }
269
270    #[test]
271    fn test_truncate_str_exact() {
272        assert_eq!(truncate_str("hello", 5), "hello");
273    }
274
275    #[test]
276    fn test_truncate_str_long() {
277        assert_eq!(truncate_str("hello world", 8), "hello...");
278    }
279
280    #[test]
281    fn test_extract_upload_metadata_auto_title() {
282        let session = make_session(vec![
283            ("Build a REST API", EventType::UserMessage),
284            ("Sure, let me help", EventType::AgentMessage),
285            ("Add auth too", EventType::UserMessage),
286        ]);
287        let meta = extract_upload_metadata(&session);
288        assert_eq!(meta.title.as_deref(), Some("Build a REST API"));
289        // description joins first 3 user messages
290        assert_eq!(
291            meta.description.as_deref(),
292            Some("Build a REST API Add auth too")
293        );
294        assert!(meta.tags.is_none());
295    }
296
297    #[test]
298    fn test_extract_upload_metadata_explicit_title() {
299        let mut session = make_session(vec![("hello", EventType::UserMessage)]);
300        session.context.title = Some("My Title".to_string());
301        session.context.description = Some("My Desc".to_string());
302        session.context.tags = vec!["rust".to_string(), "api".to_string()];
303
304        let meta = extract_upload_metadata(&session);
305        assert_eq!(meta.title.as_deref(), Some("My Title"));
306        assert_eq!(meta.description.as_deref(), Some("My Desc"));
307        assert_eq!(meta.tags.as_deref(), Some("rust,api"));
308    }
309
310    #[test]
311    fn test_extract_changed_paths_basic() {
312        let session = make_session(vec![
313            (
314                "edited file",
315                EventType::FileEdit {
316                    path: "src/main.rs".to_string(),
317                    diff: None,
318                },
319            ),
320            (
321                "created file",
322                EventType::FileCreate {
323                    path: "src/new.rs".to_string(),
324                },
325            ),
326            (
327                "deleted file",
328                EventType::FileDelete {
329                    path: "src/old.rs".to_string(),
330                },
331            ),
332            (
333                "read file",
334                EventType::FileRead {
335                    path: "src/lib.rs".to_string(),
336                },
337            ),
338        ]);
339        let (modified, deleted) = extract_changed_paths(&session.events);
340        assert_eq!(modified, vec!["src/main.rs", "src/new.rs"]);
341        assert_eq!(deleted, vec!["src/old.rs"]);
342    }
343
344    #[test]
345    fn test_extract_changed_paths_delete_then_recreate() {
346        let session = make_session(vec![
347            (
348                "deleted",
349                EventType::FileDelete {
350                    path: "src/foo.rs".to_string(),
351                },
352            ),
353            (
354                "recreated",
355                EventType::FileCreate {
356                    path: "src/foo.rs".to_string(),
357                },
358            ),
359        ]);
360        let (modified, deleted) = extract_changed_paths(&session.events);
361        assert_eq!(modified, vec!["src/foo.rs"]);
362        assert!(deleted.is_empty());
363    }
364
365    #[test]
366    fn test_extract_changed_paths_dedup() {
367        let session = make_session(vec![
368            (
369                "edit1",
370                EventType::FileEdit {
371                    path: "a.rs".to_string(),
372                    diff: None,
373                },
374            ),
375            (
376                "edit2",
377                EventType::FileEdit {
378                    path: "a.rs".to_string(),
379                    diff: None,
380                },
381            ),
382        ]);
383        let (modified, deleted) = extract_changed_paths(&session.events);
384        assert_eq!(modified, vec!["a.rs"]);
385        assert!(deleted.is_empty());
386    }
387
388    #[test]
389    fn test_extract_upload_metadata_empty_strings() {
390        let mut session = make_session(vec![("hello", EventType::UserMessage)]);
391        session.context.title = Some("".to_string());
392        session.context.description = Some("".to_string());
393
394        let meta = extract_upload_metadata(&session);
395        // Empty strings should trigger auto-extraction
396        assert_eq!(meta.title.as_deref(), Some("hello"));
397        assert_eq!(meta.description.as_deref(), Some("hello"));
398    }
399
400    #[test]
401    fn test_extract_file_metadata_basic() {
402        let session = make_session(vec![
403            (
404                "edited",
405                EventType::FileEdit {
406                    path: "src/main.rs".to_string(),
407                    diff: None,
408                },
409            ),
410            (
411                "read",
412                EventType::FileRead {
413                    path: "src/lib.rs".to_string(),
414                },
415            ),
416        ]);
417        let (modified, read, has_errors) = extract_file_metadata(&session);
418        assert_eq!(modified.as_deref(), Some("[\"src/main.rs\"]"));
419        assert_eq!(read.as_deref(), Some("[\"src/lib.rs\"]"));
420        assert!(!has_errors);
421    }
422
423    #[test]
424    fn test_extract_file_metadata_read_minus_mod() {
425        // If a file is both read and modified, it should only appear in modified
426        let session = make_session(vec![
427            (
428                "read",
429                EventType::FileRead {
430                    path: "src/main.rs".to_string(),
431                },
432            ),
433            (
434                "edited",
435                EventType::FileEdit {
436                    path: "src/main.rs".to_string(),
437                    diff: None,
438                },
439            ),
440        ]);
441        let (modified, read, has_errors) = extract_file_metadata(&session);
442        assert_eq!(modified.as_deref(), Some("[\"src/main.rs\"]"));
443        assert!(read.is_none());
444        assert!(!has_errors);
445    }
446
447    #[test]
448    fn test_extract_file_metadata_has_errors_cmd() {
449        let session = make_session(vec![(
450            "cmd",
451            EventType::ShellCommand {
452                command: "cargo build".to_string(),
453                exit_code: Some(1),
454            },
455        )]);
456        let (modified, read, has_errors) = extract_file_metadata(&session);
457        assert!(modified.is_none());
458        assert!(read.is_none());
459        assert!(has_errors);
460    }
461
462    #[test]
463    fn test_extract_file_metadata_has_errors_tool() {
464        let session = make_session(vec![(
465            "tool err",
466            EventType::ToolResult {
467                name: "Bash".to_string(),
468                is_error: true,
469                call_id: None,
470            },
471        )]);
472        let (_, _, has_errors) = extract_file_metadata(&session);
473        assert!(has_errors);
474    }
475
476    #[test]
477    fn test_extract_file_metadata_empty() {
478        let session = make_session(vec![]);
479        let (modified, read, has_errors) = extract_file_metadata(&session);
480        assert!(modified.is_none());
481        assert!(read.is_none());
482        assert!(!has_errors);
483    }
484
485    #[test]
486    fn test_extract_file_metadata_exit_zero() {
487        let session = make_session(vec![(
488            "cmd",
489            EventType::ShellCommand {
490                command: "cargo test".to_string(),
491                exit_code: Some(0),
492            },
493        )]);
494        let (_, _, has_errors) = extract_file_metadata(&session);
495        assert!(!has_errors);
496    }
497}