Skip to main content

agtrace_runtime/ops/
index.rs

1use crate::{Error, Result};
2use agtrace_index::{Database, LogFileRecord, ProjectRecord, SessionRecord};
3use agtrace_providers::ProviderAdapter;
4use agtrace_types::RepositoryHash;
5use std::collections::{HashMap, HashSet};
6use std::path::PathBuf;
7
8#[derive(Debug, Clone)]
9pub enum IndexProgress {
10    IncrementalHint {
11        indexed_files: usize,
12    },
13    LogRootMissing {
14        provider_name: String,
15        log_root: PathBuf,
16    },
17    ProviderScanning {
18        provider_name: String,
19    },
20    ProviderSessionCount {
21        provider_name: String,
22        count: usize,
23        project_hash: String,
24        all_projects: bool,
25    },
26    SessionRegistered {
27        session_id: String,
28    },
29    Completed {
30        total_sessions: usize,
31        scanned_files: usize,
32        skipped_files: usize,
33    },
34}
35
36pub struct IndexService<'a> {
37    db: &'a Database,
38    providers: Vec<(ProviderAdapter, PathBuf)>,
39}
40
41impl<'a> IndexService<'a> {
42    pub fn new(db: &'a Database, providers: Vec<(ProviderAdapter, PathBuf)>) -> Self {
43        Self { db, providers }
44    }
45
46    pub fn run<F>(
47        &self,
48        scope: agtrace_types::ProjectScope,
49        force: bool,
50        mut on_progress: F,
51    ) -> Result<()>
52    where
53        F: FnMut(IndexProgress),
54    {
55        let indexed_files = if force {
56            HashSet::new()
57        } else {
58            self.db
59                .get_all_log_files()?
60                .into_iter()
61                .filter_map(|f| {
62                    if should_skip_indexed_file(&f) {
63                        Some(f.path)
64                    } else {
65                        None
66                    }
67                })
68                .collect::<HashSet<_>>()
69        };
70
71        if !force {
72            on_progress(IndexProgress::IncrementalHint {
73                indexed_files: indexed_files.len(),
74            });
75        }
76
77        let mut total_sessions = 0;
78        let mut scanned_files = 0;
79        let mut skipped_files = 0;
80
81        // Cache repository_hash per project_root to avoid repeated git subprocess calls
82        let mut repository_hash_cache: HashMap<PathBuf, Option<RepositoryHash>> = HashMap::new();
83
84        for (provider, log_root) in &self.providers {
85            let provider_name = provider.id();
86
87            if !log_root.exists() {
88                on_progress(IndexProgress::LogRootMissing {
89                    provider_name: provider_name.to_string(),
90                    log_root: log_root.clone(),
91                });
92                continue;
93            }
94
95            on_progress(IndexProgress::ProviderScanning {
96                provider_name: provider_name.to_string(),
97            });
98
99            let sessions = provider
100                .discovery
101                .scan_sessions(log_root)
102                .map_err(Error::Provider)?;
103
104            // Filter sessions by project_hash if specified
105            let filtered_sessions: Vec<_> = sessions
106                .into_iter()
107                .filter(|session| {
108                    if let Some(expected_hash) = scope.hash() {
109                        if let Some(session_root) = &session.project_root {
110                            let session_hash = agtrace_core::project_hash_from_root(&session_root.to_string_lossy());
111                            &session_hash == expected_hash
112                        } else {
113                            // Gemini sessions might not have project_root, compute hash from file
114                            if provider_name == "gemini" {
115                                use agtrace_providers::gemini::io::extract_project_hash_from_gemini_file;
116                                if let Some(session_hash) = extract_project_hash_from_gemini_file(&session.main_file) {
117                                    &session_hash == expected_hash
118                                } else {
119                                    false
120                                }
121                            } else {
122                                false
123                            }
124                        }
125                    } else {
126                        true
127                    }
128                })
129                .collect();
130
131            on_progress(IndexProgress::ProviderSessionCount {
132                provider_name: provider_name.to_string(),
133                count: filtered_sessions.len(),
134                project_hash: match &scope {
135                    agtrace_types::ProjectScope::All => "<all>".to_string(),
136                    agtrace_types::ProjectScope::Specific(hash) => hash.to_string(),
137                },
138                all_projects: matches!(scope, agtrace_types::ProjectScope::All),
139            });
140
141            // Sort sessions: parents (no parent_session_id) before children
142            // This ensures FK constraint on parent_session_id is satisfied
143            let mut filtered_sessions = filtered_sessions;
144            filtered_sessions.sort_by_key(|s| s.parent_session_id.is_some());
145
146            for session in filtered_sessions {
147                // Collect all file paths for this session
148                let mut all_files = vec![session.main_file.display().to_string()];
149                for side_file in &session.sidechain_files {
150                    all_files.push(side_file.display().to_string());
151                }
152
153                let all_files_unchanged =
154                    !force && all_files.iter().all(|f| indexed_files.contains(f));
155
156                if all_files_unchanged {
157                    skipped_files += all_files.len();
158                    continue;
159                }
160
161                on_progress(IndexProgress::SessionRegistered {
162                    session_id: session.session_id.clone(),
163                });
164
165                // Calculate project_hash from session data
166                let session_project_hash = if let Some(ref root) = session.project_root {
167                    agtrace_core::project_hash_from_root(&root.to_string_lossy())
168                } else if provider_name == "gemini" {
169                    // For Gemini, extract project_hash directly from the file
170                    use agtrace_providers::gemini::io::extract_project_hash_from_gemini_file;
171                    extract_project_hash_from_gemini_file(&session.main_file).unwrap_or_else(|| {
172                        agtrace_core::project_hash_from_log_path(&session.main_file)
173                    })
174                } else {
175                    // Generate unique hash from log path for orphaned sessions
176                    agtrace_core::project_hash_from_log_path(&session.main_file)
177                };
178
179                // Calculate repository_hash for git worktree support (cached per project_root)
180                let repository_hash = session.project_root.as_ref().and_then(|root| {
181                    repository_hash_cache
182                        .entry(root.clone())
183                        .or_insert_with(|| agtrace_core::repository_hash_from_path(root))
184                        .clone()
185                });
186
187                let project_record = ProjectRecord {
188                    hash: session_project_hash.clone(),
189                    root_path: session
190                        .project_root
191                        .as_ref()
192                        .map(|p| p.to_string_lossy().to_string()),
193                    last_scanned_at: Some(chrono::Utc::now().to_rfc3339()),
194                };
195                self.db.insert_or_update_project(&project_record)?;
196
197                let session_record = SessionRecord {
198                    id: session.session_id.clone(),
199                    project_hash: session_project_hash,
200                    repository_hash,
201                    provider: provider_name.to_string(),
202                    start_ts: session.timestamp.clone(),
203                    end_ts: None,
204                    snippet: session.snippet.clone(),
205                    is_valid: true,
206                    parent_session_id: session.parent_session_id.clone(),
207                    spawned_by: session.spawned_by.clone(),
208                };
209                self.db.insert_or_update_session(&session_record)?;
210
211                // Register main file
212                let to_log_file_record = |path: &PathBuf, role: &str| -> Result<LogFileRecord> {
213                    let meta = std::fs::metadata(path).ok();
214                    Ok(LogFileRecord {
215                        path: path.display().to_string(),
216                        session_id: session.session_id.clone(),
217                        role: role.to_string(),
218                        file_size: meta.as_ref().map(|m| m.len() as i64),
219                        mod_time: meta
220                            .and_then(|m| m.modified().ok())
221                            .map(|t| format!("{:?}", t)),
222                    })
223                };
224
225                scanned_files += 1;
226                let main_log_file = to_log_file_record(&session.main_file, "main")?;
227                self.db.insert_or_update_log_file(&main_log_file)?;
228
229                // Register sidechain files
230                for side_file in &session.sidechain_files {
231                    scanned_files += 1;
232                    let side_log_file = to_log_file_record(side_file, "sidechain")?;
233                    self.db.insert_or_update_log_file(&side_log_file)?;
234                }
235
236                total_sessions += 1;
237            }
238        }
239
240        on_progress(IndexProgress::Completed {
241            total_sessions,
242            scanned_files,
243            skipped_files,
244        });
245
246        Ok(())
247    }
248}
249
250fn should_skip_indexed_file(indexed: &LogFileRecord) -> bool {
251    use std::path::Path;
252
253    let path = Path::new(&indexed.path);
254
255    if !path.exists() {
256        return false;
257    }
258
259    let metadata = match std::fs::metadata(path) {
260        Ok(m) => m,
261        Err(_) => return false,
262    };
263
264    if let Some(db_size) = indexed.file_size {
265        if db_size != metadata.len() as i64 {
266            return false;
267        }
268    } else {
269        return false;
270    }
271
272    if let Some(db_mod_time) = &indexed.mod_time {
273        if let Ok(fs_mod_time) = metadata.modified() {
274            let fs_mod_time_str = format!("{:?}", fs_mod_time);
275            if db_mod_time != &fs_mod_time_str {
276                return false;
277            }
278        } else {
279            return false;
280        }
281    } else {
282        return false;
283    }
284
285    true
286}