Skip to main content

retro_core/ingest/
mod.rs

1pub mod context;
2pub mod history;
3pub mod session;
4
5use crate::config::Config;
6use crate::db;
7use crate::errors::CoreError;
8use crate::models::IngestedSession;
9use chrono::Utc;
10use rusqlite::Connection;
11
12/// Result of an ingestion run.
13#[derive(Debug)]
14pub struct IngestResult {
15    pub sessions_found: usize,
16    pub sessions_ingested: usize,
17    pub sessions_skipped: usize,
18    pub errors: Vec<String>,
19}
20
21/// Run ingestion for a specific project path.
22pub fn ingest_project(
23    conn: &Connection,
24    config: &Config,
25    project_path: &str,
26) -> Result<IngestResult, CoreError> {
27    // Check if project is excluded
28    if config.privacy.exclude_projects.iter().any(|excl| project_path.contains(excl.as_str())) {
29        return Ok(IngestResult {
30            sessions_found: 0,
31            sessions_ingested: 0,
32            sessions_skipped: 0,
33            errors: Vec::new(),
34        });
35    }
36
37    let claude_dir = config.claude_dir();
38    let encoded_path = encode_project_path(project_path);
39    let sessions_dir = claude_dir.join("projects").join(&encoded_path);
40
41    let mut result = IngestResult {
42        sessions_found: 0,
43        sessions_ingested: 0,
44        sessions_skipped: 0,
45        errors: Vec::new(),
46    };
47
48    if !sessions_dir.exists() {
49        return Ok(result);
50    }
51
52    // Find all session JSONL files (direct children, not subagent files)
53    let pattern = sessions_dir.join("*.jsonl");
54    let pattern_str = pattern.to_string_lossy();
55
56    let paths: Vec<_> = glob::glob(&pattern_str)
57        .map_err(|e| CoreError::Parse(format!("glob pattern error: {e}")))?
58        .filter_map(|r| r.ok())
59        .collect();
60
61    result.sessions_found = paths.len();
62
63    for path in paths {
64        let session_id = match path.file_stem().and_then(|s| s.to_str()) {
65            Some(id) => id.to_string(),
66            None => continue,
67        };
68
69        // Get file metadata for change detection
70        let metadata = match std::fs::metadata(&path) {
71            Ok(m) => m,
72            Err(e) => {
73                result
74                    .errors
75                    .push(format!("metadata error for {}: {e}", path.display()));
76                continue;
77            }
78        };
79
80        let file_size = metadata.len();
81        let file_mtime = match metadata.modified() {
82            Ok(t) => {
83                let dt: chrono::DateTime<Utc> = t.into();
84                dt.to_rfc3339()
85            }
86            Err(_) => Utc::now().to_rfc3339(),
87        };
88
89        // Check if already ingested and unchanged
90        if db::is_session_ingested(conn, &session_id, file_size, &file_mtime)? {
91            result.sessions_skipped += 1;
92            continue;
93        }
94
95        // Parse the session
96        match session::parse_session_file(&path, &session_id, project_path) {
97            Ok(_session) => {
98                // Also parse subagent files if the session directory exists
99                let subagent_dir = sessions_dir.join(&session_id).join("subagents");
100                let _subagent_sessions = if subagent_dir.exists() {
101                    match session::parse_subagent_dir(&subagent_dir, &session_id, project_path) {
102                        Ok(subs) => subs,
103                        Err(e) => {
104                            result.errors.push(format!(
105                                "subagent parse error for {}: {e}",
106                                session_id
107                            ));
108                            Vec::new()
109                        }
110                    }
111                } else {
112                    Vec::new()
113                };
114
115                // Record the ingested session
116                let ingested = IngestedSession {
117                    session_id: session_id.clone(),
118                    project: project_path.to_string(),
119                    session_path: path.to_string_lossy().to_string(),
120                    file_size,
121                    file_mtime,
122                    ingested_at: Utc::now(),
123                };
124                db::record_ingested_session(conn, &ingested)?;
125                result.sessions_ingested += 1;
126            }
127            Err(e) => {
128                result
129                    .errors
130                    .push(format!("parse error for {}: {e}", session_id));
131            }
132        }
133    }
134
135    Ok(result)
136}
137
138/// Ingest all projects found in the claude projects directory.
139pub fn ingest_all_projects(
140    conn: &Connection,
141    config: &Config,
142) -> Result<IngestResult, CoreError> {
143    let claude_dir = config.claude_dir();
144    let projects_dir = claude_dir.join("projects");
145
146    let mut total = IngestResult {
147        sessions_found: 0,
148        sessions_ingested: 0,
149        sessions_skipped: 0,
150        errors: Vec::new(),
151    };
152
153    if !projects_dir.exists() {
154        return Ok(total);
155    }
156
157    let entries = std::fs::read_dir(&projects_dir)
158        .map_err(|e| CoreError::Io(format!("reading projects dir: {e}")))?;
159
160    for entry in entries {
161        let entry = match entry {
162            Ok(e) => e,
163            Err(_) => continue,
164        };
165
166        if !entry.path().is_dir() {
167            continue;
168        }
169
170        let dir_name = match entry.file_name().to_str() {
171            Some(n) => n.to_string(),
172            None => continue,
173        };
174
175        // Check if project is excluded
176        if config.privacy.exclude_projects.iter().any(|excl| dir_name.contains(&encode_project_path(excl))) {
177            continue;
178        }
179
180        let sessions_dir = entry.path();
181        let project_path = recover_project_path(&sessions_dir, &dir_name);
182
183        let result = ingest_project(conn, config, &project_path)?;
184        total.sessions_found += result.sessions_found;
185        total.sessions_ingested += result.sessions_ingested;
186        total.sessions_skipped += result.sessions_skipped;
187        total.errors.extend(result.errors);
188    }
189
190    Ok(total)
191}
192
193/// Encode a project path for use as a directory name.
194/// /home/user/project → -home-user-project
195pub fn encode_project_path(path: &str) -> String {
196    path.replace('/', "-")
197}
198
199/// Recover the project path from an encoded directory name by reading `cwd`
200/// from the first session file inside it. Falls back to the encoded name
201/// if no sessions exist (which means the directory has no data anyway).
202pub fn recover_project_path(sessions_dir: &std::path::Path, encoded: &str) -> String {
203    // Try to read the cwd field from the first session file
204    let pattern = sessions_dir.join("*.jsonl");
205    if let Ok(paths) = glob::glob(&pattern.to_string_lossy()) {
206        for path in paths.filter_map(|r| r.ok()) {
207            if let Ok(file) = std::fs::File::open(&path) {
208                let reader = std::io::BufReader::new(file);
209                use std::io::BufRead;
210                for line in reader.lines().take(5) {
211                    if let Ok(line) = line {
212                        if let Ok(val) = serde_json::from_str::<serde_json::Value>(&line) {
213                            if let Some(cwd) = val.get("cwd").and_then(|c| c.as_str()) {
214                                return cwd.to_string();
215                            }
216                        }
217                    }
218                }
219            }
220        }
221    }
222    // Fallback: naive decode (works for paths without hyphens)
223    naive_decode_project_path(encoded)
224}
225
226/// Naive decode — only correct for paths without hyphens in components.
227fn naive_decode_project_path(encoded: &str) -> String {
228    if encoded.starts_with('-') {
229        encoded.replacen('-', "/", 1).replace('-', "/")
230    } else {
231        encoded.replace('-', "/")
232    }
233}
234
235/// Find the encoded project directory for a given project path.
236pub fn find_project_dir(config: &Config, project_path: &str) -> Option<std::path::PathBuf> {
237    let claude_dir = config.claude_dir();
238    let encoded = encode_project_path(project_path);
239    let dir = claude_dir.join("projects").join(&encoded);
240    if dir.exists() {
241        Some(dir)
242    } else {
243        None
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_encode_project_path() {
253        assert_eq!(
254            encode_project_path("/home/user/projects/myapp"),
255            "-home-user-projects-myapp"
256        );
257    }
258
259    #[test]
260    fn test_naive_decode_project_path() {
261        assert_eq!(
262            naive_decode_project_path("-home-user-projects-myapp"),
263            "/home/user/projects/myapp"
264        );
265    }
266}