codex_helper_core/sessions/
transcript.rs1use std::collections::VecDeque;
2
3use super::*;
4
5pub async fn read_codex_session_transcript(
9 path: &Path,
10 tail: Option<usize>,
11) -> Result<Vec<SessionTranscriptMessage>> {
12 match tail {
13 Some(0) => Ok(Vec::new()),
14 Some(n) => read_codex_session_transcript_tail(path, n).await,
15 None => read_codex_session_transcript_full(path).await,
16 }
17}
18
19pub async fn codex_session_transcript_tail_contains_query(
25 path: &Path,
26 query: &str,
27 tail: usize,
28) -> Result<bool> {
29 let needle = query.trim();
30 if needle.is_empty() || tail == 0 {
31 return Ok(false);
32 }
33
34 let needle = needle.to_lowercase();
35 let msgs = read_codex_session_transcript(path, Some(tail)).await?;
36 Ok(msgs
37 .iter()
38 .any(|m| m.text.to_lowercase().contains(needle.as_str())))
39}
40
41async fn read_codex_session_transcript_full(path: &Path) -> Result<Vec<SessionTranscriptMessage>> {
42 let file = fs::File::open(path)
43 .await
44 .with_context(|| format!("failed to open session file {:?}", path))?;
45 let reader = BufReader::new(file);
46 let mut lines = reader.lines();
47
48 let mut out: Vec<SessionTranscriptMessage> = Vec::new();
49 while let Some(line) = lines.next_line().await? {
50 let trimmed = line.trim();
51 if trimmed.is_empty() {
52 continue;
53 }
54 let value: Value = match serde_json::from_str(trimmed) {
55 Ok(v) => v,
56 Err(_) => continue,
57 };
58
59 let Some(msg) = extract_transcript_message(&value) else {
60 continue;
61 };
62 if msg.text.trim().is_empty() {
63 continue;
64 }
65 out.push(msg);
66 }
67 Ok(out)
68}
69
70async fn read_codex_session_transcript_tail(
71 path: &Path,
72 n: usize,
73) -> Result<Vec<SessionTranscriptMessage>> {
74 let mut max_bytes = TAIL_SCAN_MAX_BYTES;
77 let mut last: Vec<SessionTranscriptMessage> = Vec::new();
78 for _ in 0..5 {
79 let (bytes, started_mid) = read_file_tail_bytes(path, max_bytes).await?;
80 last = extract_transcript_messages_from_jsonl_bytes(&bytes, started_mid, n);
81 if last.len() >= n {
82 break;
83 }
84 max_bytes = max_bytes.saturating_mul(2).min(16 * 1024 * 1024);
85 }
86 Ok(last)
87}
88
89async fn read_file_tail_bytes(path: &Path, max_bytes: usize) -> Result<(Vec<u8>, bool)> {
90 let meta = fs::metadata(path)
91 .await
92 .with_context(|| format!("failed to stat session file {:?}", path))?;
93 let len = meta.len();
94 let start = len.saturating_sub(max_bytes as u64);
95 let started_mid = start > 0;
96
97 let mut file = fs::File::open(path)
98 .await
99 .with_context(|| format!("failed to open session file {:?}", path))?;
100 file.seek(std::io::SeekFrom::Start(start)).await?;
101
102 let mut buf = Vec::new();
103 file.read_to_end(&mut buf).await?;
104 Ok((buf, started_mid))
105}
106
107fn extract_transcript_messages_from_jsonl_bytes(
108 bytes: &[u8],
109 started_mid: bool,
110 tail_n: usize,
111) -> Vec<SessionTranscriptMessage> {
112 if tail_n == 0 {
113 return Vec::new();
114 }
115
116 let mut slice = bytes;
117 if started_mid {
118 if let Some(pos) = slice.iter().position(|&b| b == b'\n') {
120 slice = &slice[pos + 1..];
121 }
122 }
123
124 let mut ring: VecDeque<SessionTranscriptMessage> = VecDeque::with_capacity(tail_n.max(1));
125
126 for raw in slice.split(|&b| b == b'\n') {
127 if raw.is_empty() {
128 continue;
129 }
130 let line = match std::str::from_utf8(raw) {
131 Ok(s) => s.trim().trim_end_matches('\r'),
132 Err(_) => continue,
133 };
134 if line.is_empty() {
135 continue;
136 }
137 let value: Value = match serde_json::from_str(line) {
138 Ok(v) => v,
139 Err(_) => continue,
140 };
141 let Some(msg) = extract_transcript_message(&value) else {
142 continue;
143 };
144 if msg.text.trim().is_empty() {
145 continue;
146 }
147
148 ring.push_back(msg);
149 if ring.len() > tail_n {
150 ring.pop_front();
151 }
152 }
153
154 ring.into_iter().collect()
155}
156
157fn normalize_role(role: &str) -> String {
158 match role {
159 "user" => "User".to_string(),
160 "assistant" => "Assistant".to_string(),
161 "system" => "System".to_string(),
162 other => other.to_string(),
163 }
164}
165
166fn assistant_or_user_message_from_response_item(value: &Value) -> Option<(String, String)> {
167 let obj = value.as_object()?;
168 let type_str = obj.get("type")?.as_str()?;
169 if type_str != "response_item" {
170 return None;
171 }
172 let payload = obj.get("payload")?.as_object()?;
173 let payload_type = payload.get("type")?.as_str()?;
174 if payload_type != "message" {
175 return None;
176 }
177
178 let role = payload.get("role")?.as_str()?;
179 let text = payload
180 .get("content")
181 .and_then(|v| v.as_array())
182 .and_then(|items| extract_text_from_content_items(items))?;
183
184 Some((normalize_role(role), text))
185}
186
187fn extract_text_from_content_items(items: &[Value]) -> Option<String> {
188 let mut out = String::new();
189 for item in items {
190 let obj = match item.as_object() {
191 Some(o) => o,
192 None => continue,
193 };
194 let t = obj.get("type").and_then(|v| v.as_str()).unwrap_or("");
195 if !t.ends_with("_text") && t != "text" {
196 continue;
197 }
198 let Some(text) = obj.get("text").and_then(|v| v.as_str()) else {
199 continue;
200 };
201 out.push_str(text);
202 }
203 if out.is_empty() { None } else { Some(out) }
204}
205
206fn extract_transcript_message(value: &Value) -> Option<SessionTranscriptMessage> {
207 let timestamp = value
208 .get("timestamp")
209 .and_then(|v| v.as_str())
210 .map(|s| s.to_string());
211
212 if let Some(msg) = user_message_text(value) {
213 return Some(SessionTranscriptMessage {
214 timestamp,
215 role: "User".to_string(),
216 text: msg.to_string(),
217 });
218 }
219
220 if let Some((role, text)) = assistant_or_user_message_from_response_item(value) {
221 return Some(SessionTranscriptMessage {
222 timestamp,
223 role,
224 text,
225 });
226 }
227
228 None
229}