coding_agent_search/connectors/
mod.rs1use std::fs;
7use std::io;
8use std::path::{Path, PathBuf};
9
10pub use franken_agent_detection::{
12 Connector,
13 DetectionResult,
14 DiscoveredSourceFile,
15 DiscoveredSourceRole,
16 ExtractedTokenUsage,
17 LOCAL_SOURCE_ID,
18 ModelInfo,
19 NormalizedConversation,
21 NormalizedMessage,
22 NormalizedSnippet,
23 Origin,
24 PathMapping,
25 PathTrie,
27 Platform,
28 ScanContext,
29 ScanRoot,
30 SourceKind,
31 TokenDataSource,
32 WorkspaceCache,
33 estimate_tokens_from_content,
34 extract_claude_code_tokens,
35 extract_codex_tokens,
36 extract_tokens_for_agent,
37 file_modified_since,
38 flatten_content,
39 franken_detection_for_connector,
40 get_connector_factories,
41 normalize_model,
42 parse_timestamp,
43 reindex_messages,
44};
45
46#[doc(hidden)]
50#[derive(Debug, Clone)]
51pub struct CodexScanPreflight {
52 pub scan_roots: Vec<ScanRoot>,
53 pub original_roots: usize,
54 pub explicit_file_roots: usize,
55 pub fallback_roots: usize,
56}
57
58#[doc(hidden)]
67#[must_use]
68pub fn preflight_codex_explicit_file_roots(
69 roots: &[ScanRoot],
70 since_ts: Option<i64>,
71) -> CodexScanPreflight {
72 let mut scan_roots = Vec::new();
73 let mut explicit_file_roots = 0usize;
74 let mut fallback_roots = 0usize;
75
76 for root in roots {
77 if root.path.is_file() {
78 if is_codex_rollout_file(&root.path) {
79 explicit_file_roots = explicit_file_roots.saturating_add(1);
80 }
81 scan_roots.push(root.clone());
82 continue;
83 }
84
85 match codex_explicit_file_roots_for_root(root, since_ts) {
86 Ok(expanded) => {
87 explicit_file_roots = explicit_file_roots.saturating_add(expanded.len());
88 scan_roots.extend(expanded);
89 }
90 Err(_) => {
91 fallback_roots = fallback_roots.saturating_add(1);
92 scan_roots.push(root.clone());
93 }
94 }
95 }
96
97 CodexScanPreflight {
98 scan_roots,
99 original_roots: roots.len(),
100 explicit_file_roots,
101 fallback_roots,
102 }
103}
104
105fn codex_explicit_file_roots_for_root(
106 root: &ScanRoot,
107 since_ts: Option<i64>,
108) -> io::Result<Vec<ScanRoot>> {
109 if !is_under_codex_dir(&root.path) && root.path.join(".codex").exists() {
110 return Err(io::Error::other(
111 "parent codex roots keep directory scan to preserve external IDs",
112 ));
113 }
114
115 let sessions = codex_sessions_dir(&root.path);
116 if sessions == root.path
117 && root
118 .path
119 .file_name()
120 .and_then(|name| name.to_str())
121 .is_none_or(|name| name != "sessions")
122 {
123 return Err(io::Error::other(
124 "roots without a sessions directory keep directory scan to preserve external IDs",
125 ));
126 }
127
128 let files = collect_codex_rollout_files(&sessions, since_ts)?;
129
130 Ok(files
131 .into_iter()
132 .map(|path| {
133 let mut file_root = root.clone();
134 file_root.path = path;
135 file_root
136 })
137 .collect())
138}
139
140fn is_under_codex_dir(path: &Path) -> bool {
141 path.ancestors().any(|ancestor| {
142 ancestor
143 .file_name()
144 .and_then(|name| name.to_str())
145 .is_some_and(|name| name == ".codex")
146 })
147}
148
149fn codex_sessions_dir(home: &Path) -> PathBuf {
150 let sessions = home.join("sessions");
151 if sessions.exists() {
152 sessions
153 } else {
154 home.to_path_buf()
155 }
156}
157
158fn collect_codex_rollout_files(sessions: &Path, since_ts: Option<i64>) -> io::Result<Vec<PathBuf>> {
159 if !sessions.exists() {
160 return Ok(Vec::new());
161 }
162
163 let mut files = Vec::new();
164 let mut pending_dirs = vec![sessions.to_path_buf()];
165 while let Some(dir) = pending_dirs.pop() {
166 let mut entries = fs::read_dir(&dir)?.collect::<io::Result<Vec<_>>>()?;
167 entries.sort_by_key(|entry| entry.path());
168 for entry in entries {
169 let file_type = entry.file_type()?;
170 let path = entry.path();
171 if file_type.is_dir() {
172 pending_dirs.push(path);
173 } else if file_type.is_file()
174 && is_codex_rollout_file(&path)
175 && file_modified_since(&path, since_ts)
176 {
177 files.push(path);
178 }
179 }
180 }
181
182 files.sort();
183 files.dedup();
184 Ok(files)
185}
186
187fn is_codex_rollout_file(path: &Path) -> bool {
188 let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
189 return false;
190 };
191 name.starts_with("rollout-")
192 && path
193 .extension()
194 .and_then(|ext| ext.to_str())
195 .is_some_and(|ext| {
196 ext.eq_ignore_ascii_case("jsonl") || ext.eq_ignore_ascii_case("json")
197 })
198}
199
200pub mod aider;
202pub mod amp;
203pub mod chatgpt;
204pub mod claude_code;
205pub mod clawdbot;
206pub mod cline;
207pub mod codex;
208pub mod copilot;
209pub mod copilot_cli;
210pub mod crush;
211pub mod cursor;
212pub mod factory;
213pub mod gemini;
214pub mod hermes;
215pub mod kimi;
216pub mod openclaw;
217pub mod opencode;
218pub mod pi_agent;
219pub mod qwen;
220pub mod vibe;