1use std::collections::{HashMap, HashSet};
9use std::path::Path;
10
11use mimir_core::error::{Error, Result};
12use mimir_core::model::{now_unix, Kind, NewNode, Node, Rel};
13use mimir_core::store::{self, row_to_node, NODE_COLS};
14use rusqlite::{params, Connection, OptionalExtension};
15
16use crate::extract::{self, FileExtract};
17use crate::languages::Lang;
18
19#[derive(Debug, Default, PartialEq, Eq)]
20pub struct GraphStats {
21 pub files_seen: usize,
22 pub files_indexed: usize,
23 pub unchanged: usize,
24 pub removed: usize,
25 pub symbols: usize,
26 pub calls_resolved: usize,
27 pub calls_heuristic: usize,
28 pub imports: usize,
29}
30
31pub fn stable_id(project_id: i64, rel_path: &str, qualified: &str, kind: &str) -> String {
32 blake3::hash(format!("{project_id}|{rel_path}|{qualified}|{kind}").as_bytes())
33 .to_hex()
34 .to_string()
35}
36
37pub fn update(conn: &mut Connection, project: &Node, root: &Path) -> Result<GraphStats> {
41 let mut stats = GraphStats::default();
42 let mut seen: HashSet<String> = HashSet::new();
43 let mut changed_files: Vec<(i64, String, FileExtract)> = Vec::new();
44
45 for entry in ignore::WalkBuilder::new(root).build() {
46 let entry = match entry {
47 Ok(e) => e,
48 Err(err) => {
49 tracing::warn!(%err, "skipping unreadable entry");
50 continue;
51 }
52 };
53 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
54 continue;
55 }
56 let path = entry.path();
57 let rel = path
58 .strip_prefix(root)
59 .unwrap_or(path)
60 .to_string_lossy()
61 .replace('\\', "/");
62 let Some(lang) = Lang::from_path(&rel) else {
63 continue;
64 };
65 seen.insert(rel.clone());
66 stats.files_seen += 1;
67
68 let meta = entry
69 .metadata()
70 .map_err(|e| Error::Invalid(format!("stat {rel}: {e}")))?;
71 let mtime = meta
72 .modified()
73 .ok()
74 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
75 .map(|d| d.as_secs() as i64)
76 .unwrap_or(0);
77 let size = meta.len() as i64;
78
79 let existing = code_file(conn, project.id, &rel)?;
80 if let Some(f) = &existing {
81 if f.deleted_at.is_none()
82 && f.meta.get("mtime").and_then(|v| v.as_i64()) == Some(mtime)
83 && f.meta.get("size").and_then(|v| v.as_i64()) == Some(size)
84 {
85 stats.unchanged += 1;
86 continue;
87 }
88 }
89
90 let raw = std::fs::read(path).map_err(|e| Error::io(path, e))?;
91 let content = String::from_utf8_lossy(&raw);
92 let hash = blake3::hash(content.as_bytes()).as_bytes().to_vec();
93 if let Some(f) = &existing {
94 if f.deleted_at.is_none() && f.content_hash.as_deref() == Some(&hash[..]) {
95 conn.execute(
96 "UPDATE node SET meta = json_set(meta, '$.mtime', ?2, '$.size', ?3),
97 updated_at = ?4 WHERE id = ?1",
98 params![f.id, mtime, size, now_unix()],
99 )?;
100 stats.unchanged += 1;
101 continue;
102 }
103 }
104
105 let fx = extract::extract(lang, &content);
106 let file_id = persist_file(
107 conn,
108 project.id,
109 existing.as_ref(),
110 &rel,
111 lang,
112 &hash,
113 mtime,
114 size,
115 &fx,
116 &mut stats,
117 )?;
118 changed_files.push((file_id, rel, fx));
119 stats.files_indexed += 1;
120 }
121
122 let mut stmt = conn.prepare(
124 "SELECT id, path FROM node
125 WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
126 AND deleted_at IS NULL",
127 )?;
128 let live: Vec<(i64, String)> = stmt
129 .query_map([project.id], |r| Ok((r.get(0)?, r.get(1)?)))?
130 .collect::<rusqlite::Result<_>>()?;
131 drop(stmt);
132 for (id, path) in live {
133 if !seen.contains(&path) {
134 conn.execute(
135 "UPDATE node SET deleted_at = ?2
136 WHERE deleted_at IS NULL AND (id = ?1 OR parent_id = ?1)",
137 params![id, now_unix()],
138 )?;
139 stats.removed += 1;
140 }
141 }
142
143 resolve_calls(conn, project.id, &changed_files, &mut stats)?;
144 Ok(stats)
145}
146
147fn code_file(conn: &Connection, project_id: i64, rel: &str) -> Result<Option<Node>> {
148 Ok(conn
149 .query_row(
150 &format!(
151 "SELECT {NODE_COLS} FROM node
152 WHERE kind = 'file' AND project_id = ?1 AND path = ?2
153 AND collection_id IS NULL"
154 ),
155 params![project_id, rel],
156 row_to_node,
157 )
158 .optional()?)
159}
160
161#[allow(clippy::too_many_arguments)]
162fn persist_file(
163 conn: &mut Connection,
164 project_id: i64,
165 existing: Option<&Node>,
166 rel: &str,
167 lang: Lang,
168 hash: &[u8],
169 mtime: i64,
170 size: i64,
171 fx: &FileExtract,
172 stats: &mut GraphStats,
173) -> Result<i64> {
174 let imports_json: Vec<serde_json::Value> = fx
175 .imports
176 .iter()
177 .map(|i| serde_json::json!({"local": i.local, "source": i.source}))
178 .collect();
179 let calls_json: Vec<serde_json::Value> = fx
180 .calls
181 .iter()
182 .filter(|c| !c.caller.is_empty())
183 .map(|c| serde_json::json!({"caller": c.caller, "callee": c.callee}))
184 .collect();
185 let file_meta = serde_json::json!({
186 "mtime": mtime, "size": size,
187 "imports": imports_json, "calls": calls_json,
188 });
189
190 let tx = conn.transaction()?;
191 let file_id = match existing {
192 Some(f) => {
193 tx.execute(
194 "UPDATE node SET content_hash = ?2, meta = ?3, lang = ?4,
195 updated_at = ?5, deleted_at = NULL WHERE id = ?1",
196 params![f.id, hash, file_meta.to_string(), lang.name(), now_unix()],
197 )?;
198 f.id
199 }
200 None => {
201 let mut new = NewNode::new(Kind::File);
202 new.title = Some(
203 Path::new(rel)
204 .file_name()
205 .map(|s| s.to_string_lossy().into_owned())
206 .unwrap_or_else(|| rel.to_string()),
207 );
208 new.path = Some(rel.to_string());
209 new.lang = Some(lang.name().into());
210 new.project_id = Some(project_id);
211 new.content_hash = Some(hash.to_vec());
212 new.meta = Some(file_meta);
213 store::insert_node(&tx, new)?.id
214 }
215 };
216
217 let mut kept: HashSet<i64> = HashSet::new();
219 for sym in &fx.symbols {
220 let sid = stable_id(project_id, rel, &sym.qualified, sym.kind);
221 let body = match &sym.doc {
222 Some(d) => format!("{}\n{d}", sym.signature),
223 None => sym.signature.clone(),
224 };
225 let meta = serde_json::json!({"stable_id": sid, "name": sym.name});
226 let existing_id: Option<i64> = tx
227 .query_row(
228 "SELECT id FROM node
229 WHERE kind = 'symbol' AND json_extract(meta, '$.stable_id') = ?1",
230 [&sid],
231 |r| r.get(0),
232 )
233 .optional()?;
234 let id = match existing_id {
235 Some(id) => {
236 tx.execute(
237 "UPDATE node SET title = ?2, body = ?3, subkind = ?4, path = ?5,
238 span_start = ?6, span_end = ?7, content_hash = ?8, meta = ?9,
239 lang = ?10, parent_id = ?11, updated_at = ?12, deleted_at = NULL
240 WHERE id = ?1",
241 params![
242 id,
243 sym.qualified,
244 body,
245 sym.kind,
246 rel,
247 sym.start_line as i64,
248 sym.end_line as i64,
249 blake3::hash(body.as_bytes()).as_bytes().to_vec(),
250 meta.to_string(),
251 lang.name(),
252 file_id,
253 now_unix()
254 ],
255 )?;
256 id
257 }
258 None => {
259 let mut new = NewNode::new(Kind::Symbol);
260 new.subkind = Some(sym.kind.into());
261 new.title = Some(sym.qualified.clone());
262 new.body = Some(body.clone());
263 new.path = Some(rel.to_string());
264 new.lang = Some(lang.name().into());
265 new.project_id = Some(project_id);
266 new.parent_id = Some(file_id);
267 new.span_start = Some(sym.start_line as i64);
268 new.span_end = Some(sym.end_line as i64);
269 new.content_hash = Some(blake3::hash(body.as_bytes()).as_bytes().to_vec());
270 new.meta = Some(meta);
271 store::insert_node(&tx, new)?.id
272 }
273 };
274 kept.insert(id);
275 stats.symbols += 1;
276 }
277 {
280 let mut stmt =
281 tx.prepare("SELECT id FROM node WHERE kind = 'symbol' AND parent_id = ?1")?;
282 let all: Vec<i64> = stmt
283 .query_map([file_id], |r| r.get(0))?
284 .collect::<rusqlite::Result<_>>()?;
285 drop(stmt);
286 for id in all {
287 if !kept.contains(&id) {
288 tx.execute("DELETE FROM node WHERE id = ?1", [id])?;
289 }
290 }
291 }
292 tx.commit()?;
293 Ok(file_id)
294}
295
296struct SymRef {
298 id: i64,
299 name: String,
300 qualified: String,
301 path: String,
302}
303
304fn resolve_calls(
307 conn: &Connection,
308 project_id: i64,
309 changed: &[(i64, String, FileExtract)],
310 stats: &mut GraphStats,
311) -> Result<()> {
312 if changed.is_empty() {
313 return Ok(());
314 }
315 let mut stmt = conn.prepare(
317 "SELECT id, json_extract(meta, '$.name'), title, path FROM node
318 WHERE kind = 'symbol' AND project_id = ?1 AND deleted_at IS NULL",
319 )?;
320 let symbols: Vec<SymRef> = stmt
321 .query_map([project_id], |r| {
322 Ok(SymRef {
323 id: r.get(0)?,
324 name: r.get::<_, Option<String>>(1)?.unwrap_or_default(),
325 qualified: r.get::<_, Option<String>>(2)?.unwrap_or_default(),
326 path: r.get::<_, Option<String>>(3)?.unwrap_or_default(),
327 })
328 })?
329 .collect::<rusqlite::Result<_>>()?;
330 drop(stmt);
331
332 let mut by_name: HashMap<&str, Vec<&SymRef>> = HashMap::new();
333 for s in &symbols {
334 by_name.entry(s.name.as_str()).or_default().push(s);
335 }
336 let mut by_file_qualified: HashMap<(&str, &str), i64> = HashMap::new();
337 for s in &symbols {
338 by_file_qualified.insert((s.path.as_str(), s.qualified.as_str()), s.id);
339 }
340 let file_paths: HashSet<&str> = {
341 let mut set = HashSet::new();
342 for s in &symbols {
343 set.insert(s.path.as_str());
344 }
345 set
346 };
347 let file_ids: HashMap<String, i64> = {
348 let mut stmt = conn.prepare(
349 "SELECT path, id FROM node
350 WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
351 AND deleted_at IS NULL",
352 )?;
353 let rows: Vec<(String, i64)> = stmt
354 .query_map([project_id], |r| Ok((r.get(0)?, r.get(1)?)))?
355 .collect::<rusqlite::Result<_>>()?;
356 rows.into_iter().collect()
357 };
358
359 let tx = conn.unchecked_transaction()?;
360 for (file_id, rel, fx) in changed {
361 tx.execute(
363 "DELETE FROM edge WHERE rel = 'calls' AND src IN
364 (SELECT id FROM node WHERE kind = 'symbol' AND parent_id = ?1)",
365 [file_id],
366 )?;
367 tx.execute(
368 "DELETE FROM edge WHERE rel = 'imports' AND src = ?1",
369 [file_id],
370 )?;
371
372 let mut import_target: HashMap<&str, String> = HashMap::new();
374 for imp in &fx.imports {
375 if let Some(target) = resolve_import(rel, &imp.source, &file_paths) {
376 import_target.insert(imp.local.as_str(), target.clone());
377 if let Some(dst) = file_ids.get(&target) {
378 if *dst != *file_id {
379 store::link(&tx, *file_id, *dst, Rel::Imports, 1.0)?;
380 stats.imports += 1;
381 }
382 }
383 }
384 }
385
386 for call in &fx.calls {
387 if call.caller.is_empty() {
388 continue; }
390 let Some(&src) = by_file_qualified.get(&(rel.as_str(), call.caller.as_str())) else {
391 continue;
392 };
393 let candidates = by_name.get(call.callee.as_str());
394 let Some(candidates) = candidates else {
395 continue;
396 };
397 if let Some(c) = candidates.iter().find(|c| c.path == *rel && c.id != src) {
399 link_call(&tx, src, c.id, 1.0, true)?;
400 stats.calls_resolved += 1;
401 continue;
402 }
403 if let Some(target) = import_target.get(call.callee.as_str()) {
405 if let Some(c) = candidates.iter().find(|c| c.path == *target) {
406 link_call(&tx, src, c.id, 1.0, true)?;
407 stats.calls_resolved += 1;
408 continue;
409 }
410 }
411 let global: Vec<&&SymRef> = candidates.iter().filter(|c| c.id != src).collect();
413 match global.len() {
414 0 => {}
415 1 => {
416 link_call(&tx, src, global[0].id, 0.8, true)?;
417 stats.calls_resolved += 1;
418 }
419 n if n <= 3 => {
420 for c in &global {
421 link_call(&tx, src, c.id, 1.0 / n as f64, false)?;
422 stats.calls_heuristic += 1;
423 }
424 }
425 _ => {} }
427 }
428 }
429 tx.commit()?;
430 Ok(())
431}
432
433fn link_call(conn: &Connection, src: i64, dst: i64, weight: f64, resolved: bool) -> Result<()> {
434 conn.execute(
435 "INSERT INTO edge (src, dst, rel, weight, meta, created_at)
436 VALUES (?1, ?2, 'calls', ?3, json_object('resolved', ?4), ?5)
437 ON CONFLICT(src, dst, rel) DO UPDATE SET
438 weight = excluded.weight, meta = excluded.meta",
439 params![src, dst, weight, resolved, now_unix()],
440 )?;
441 Ok(())
442}
443
444fn resolve_import(importer: &str, source: &str, files: &HashSet<&str>) -> Option<String> {
446 let dir = Path::new(importer).parent().unwrap_or(Path::new(""));
447 let try_paths = |bases: Vec<String>| -> Option<String> {
448 bases.into_iter().find(|b| files.contains(b.as_str()))
449 };
450
451 if source.starts_with('.') {
452 if source.contains("::") {
453 return None; }
455 if source.starts_with("./") || source.starts_with("../") {
457 let joined = normalize(&dir.join(source));
458 return try_paths(vec![
459 format!("{joined}.ts"),
460 format!("{joined}.tsx"),
461 format!("{joined}.js"),
462 format!("{joined}.jsx"),
463 format!("{joined}/index.ts"),
464 format!("{joined}/index.js"),
465 joined.clone(),
466 ]);
467 }
468 let dots = source.chars().take_while(|c| *c == '.').count();
470 let module = &source[dots..];
471 let mut base = dir.to_path_buf();
472 for _ in 1..dots {
473 base = base.parent().map(Path::to_path_buf).unwrap_or_default();
474 }
475 let joined = normalize(&base.join(module.replace('.', "/")));
476 return try_paths(vec![
477 format!("{joined}.py"),
478 format!("{joined}/__init__.py"),
479 ]);
480 }
481
482 if source.contains("::") {
483 let segs: Vec<&str> = source
485 .split("::")
486 .filter(|s| !matches!(*s, "crate" | "super" | "self"))
487 .collect();
488 if segs.is_empty() {
489 return None;
490 }
491 for take in (1..=segs.len().min(3)).rev() {
494 let suffix = format!("{}.rs", segs[..take].join("/"));
495 if let Some(hit) = files.iter().find(|f| f.ends_with(&suffix)) {
496 return Some(hit.to_string());
497 }
498 }
499 return None;
500 }
501
502 if source.contains('.') && !source.contains('/') {
503 let joined = source.replace('.', "/");
505 return try_paths(vec![
506 format!("{joined}.py"),
507 format!("{joined}/__init__.py"),
508 ])
509 .or_else(|| {
510 files
511 .iter()
512 .find(|f| f.ends_with(&format!("{joined}.py")))
513 .map(|f| f.to_string())
514 });
515 }
516
517 let last = source.rsplit('/').next().unwrap_or(source);
520 files
521 .iter()
522 .find(|f| {
523 Path::new(f)
524 .parent()
525 .and_then(|p| p.file_name())
526 .map(|d| d.to_string_lossy() == last)
527 .unwrap_or(false)
528 || **f == format!("{last}.py")
529 })
530 .map(|f| f.to_string())
531}
532
533fn normalize(p: &Path) -> String {
534 let mut parts: Vec<&std::ffi::OsStr> = Vec::new();
535 for c in p.components() {
536 match c {
537 std::path::Component::ParentDir => {
538 parts.pop();
539 }
540 std::path::Component::CurDir => {}
541 std::path::Component::Normal(s) => parts.push(s),
542 _ => {}
543 }
544 }
545 parts
546 .iter()
547 .map(|s| s.to_string_lossy())
548 .collect::<Vec<_>>()
549 .join("/")
550}