Skip to main content

codex_helper_core/sessions/
transcript.rs

1use std::collections::VecDeque;
2
3use super::*;
4
5/// Read a best-effort transcript from a Codex session JSONL file.
6///
7/// If `tail` is Some(N), only the last N extracted messages are returned.
8pub async fn read_codex_session_transcript(
9    path: &Path,
10    tail: Option<usize>,
11) -> Result<Vec<SessionTranscriptMessage>> {
12    match tail {
13        Some(0) => Ok(Vec::new()),
14        Some(n) => read_codex_session_transcript_tail(path, n).await,
15        None => read_codex_session_transcript_full(path).await,
16    }
17}
18
19/// Best-effort, case-insensitive substring search within the last `tail` transcript messages.
20///
21/// This is intended for interactive UIs (history/session manager). It trades completeness for speed:
22/// - Only scans the last N extracted messages (not the full file).
23/// - Returns `false` for empty queries or `tail == 0`.
24pub async fn codex_session_transcript_tail_contains_query(
25    path: &Path,
26    query: &str,
27    tail: usize,
28) -> Result<bool> {
29    let needle = query.trim();
30    if needle.is_empty() || tail == 0 {
31        return Ok(false);
32    }
33
34    let needle = needle.to_lowercase();
35    let msgs = read_codex_session_transcript(path, Some(tail)).await?;
36    Ok(msgs
37        .iter()
38        .any(|m| m.text.to_lowercase().contains(needle.as_str())))
39}
40
41async fn read_codex_session_transcript_full(path: &Path) -> Result<Vec<SessionTranscriptMessage>> {
42    let file = fs::File::open(path)
43        .await
44        .with_context(|| format!("failed to open session file {:?}", path))?;
45    let reader = BufReader::new(file);
46    let mut lines = reader.lines();
47
48    let mut out: Vec<SessionTranscriptMessage> = Vec::new();
49    while let Some(line) = lines.next_line().await? {
50        let trimmed = line.trim();
51        if trimmed.is_empty() {
52            continue;
53        }
54        let value: Value = match serde_json::from_str(trimmed) {
55            Ok(v) => v,
56            Err(_) => continue,
57        };
58
59        let Some(msg) = extract_transcript_message(&value) else {
60            continue;
61        };
62        if msg.text.trim().is_empty() {
63            continue;
64        }
65        out.push(msg);
66    }
67    Ok(out)
68}
69
70async fn read_codex_session_transcript_tail(
71    path: &Path,
72    n: usize,
73) -> Result<Vec<SessionTranscriptMessage>> {
74    // Best-effort optimization: read a bounded window from the file tail instead of scanning the
75    // whole JSONL. If we don't collect enough messages, expand the window a few times.
76    let mut max_bytes = TAIL_SCAN_MAX_BYTES;
77    let mut last: Vec<SessionTranscriptMessage> = Vec::new();
78    for _ in 0..5 {
79        let (bytes, started_mid) = read_file_tail_bytes(path, max_bytes).await?;
80        last = extract_transcript_messages_from_jsonl_bytes(&bytes, started_mid, n);
81        if last.len() >= n {
82            break;
83        }
84        max_bytes = max_bytes.saturating_mul(2).min(16 * 1024 * 1024);
85    }
86    Ok(last)
87}
88
89async fn read_file_tail_bytes(path: &Path, max_bytes: usize) -> Result<(Vec<u8>, bool)> {
90    let meta = fs::metadata(path)
91        .await
92        .with_context(|| format!("failed to stat session file {:?}", path))?;
93    let len = meta.len();
94    let start = len.saturating_sub(max_bytes as u64);
95    let started_mid = start > 0;
96
97    let mut file = fs::File::open(path)
98        .await
99        .with_context(|| format!("failed to open session file {:?}", path))?;
100    file.seek(std::io::SeekFrom::Start(start)).await?;
101
102    let mut buf = Vec::new();
103    file.read_to_end(&mut buf).await?;
104    Ok((buf, started_mid))
105}
106
107fn extract_transcript_messages_from_jsonl_bytes(
108    bytes: &[u8],
109    started_mid: bool,
110    tail_n: usize,
111) -> Vec<SessionTranscriptMessage> {
112    if tail_n == 0 {
113        return Vec::new();
114    }
115
116    let mut slice = bytes;
117    if started_mid {
118        // If we started mid-file, the first line might be partial; drop it.
119        if let Some(pos) = slice.iter().position(|&b| b == b'\n') {
120            slice = &slice[pos + 1..];
121        }
122    }
123
124    let mut ring: VecDeque<SessionTranscriptMessage> = VecDeque::with_capacity(tail_n.max(1));
125
126    for raw in slice.split(|&b| b == b'\n') {
127        if raw.is_empty() {
128            continue;
129        }
130        let line = match std::str::from_utf8(raw) {
131            Ok(s) => s.trim().trim_end_matches('\r'),
132            Err(_) => continue,
133        };
134        if line.is_empty() {
135            continue;
136        }
137        let value: Value = match serde_json::from_str(line) {
138            Ok(v) => v,
139            Err(_) => continue,
140        };
141        let Some(msg) = extract_transcript_message(&value) else {
142            continue;
143        };
144        if msg.text.trim().is_empty() {
145            continue;
146        }
147
148        ring.push_back(msg);
149        if ring.len() > tail_n {
150            ring.pop_front();
151        }
152    }
153
154    ring.into_iter().collect()
155}
156
157fn normalize_role(role: &str) -> String {
158    match role {
159        "user" => "User".to_string(),
160        "assistant" => "Assistant".to_string(),
161        "system" => "System".to_string(),
162        other => other.to_string(),
163    }
164}
165
166fn assistant_or_user_message_from_response_item(value: &Value) -> Option<(String, String)> {
167    let obj = value.as_object()?;
168    let type_str = obj.get("type")?.as_str()?;
169    if type_str != "response_item" {
170        return None;
171    }
172    let payload = obj.get("payload")?.as_object()?;
173    let payload_type = payload.get("type")?.as_str()?;
174    if payload_type != "message" {
175        return None;
176    }
177
178    let role = payload.get("role")?.as_str()?;
179    let text = payload
180        .get("content")
181        .and_then(|v| v.as_array())
182        .and_then(|items| extract_text_from_content_items(items))?;
183
184    Some((normalize_role(role), text))
185}
186
187fn extract_text_from_content_items(items: &[Value]) -> Option<String> {
188    let mut out = String::new();
189    for item in items {
190        let obj = match item.as_object() {
191            Some(o) => o,
192            None => continue,
193        };
194        let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
195        if !t.ends_with("_text") && t != "text" {
196            continue;
197        }
198        let Some(text) = obj.get("text").and_then(|v| v.as_str()) else {
199            continue;
200        };
201        out.push_str(text);
202    }
203    if out.is_empty() { None } else { Some(out) }
204}
205
206fn extract_transcript_message(value: &Value) -> Option<SessionTranscriptMessage> {
207    let timestamp = value
208        .get("timestamp")
209        .and_then(|v| v.as_str())
210        .map(|s| s.to_string());
211
212    if let Some(msg) = user_message_text(value) {
213        return Some(SessionTranscriptMessage {
214            timestamp,
215            role: "User".to_string(),
216            text: msg.to_string(),
217        });
218    }
219
220    if let Some((role, text)) = assistant_or_user_message_from_response_item(value) {
221        return Some(SessionTranscriptMessage {
222            timestamp,
223            role,
224            text,
225        });
226    }
227
228    None
229}