1use std::collections::{HashMap, HashSet};
9use std::path::Path;
10
11use mimir_core::error::{Error, Result};
12use mimir_core::model::{now_unix, Kind, NewNode, Node, Rel};
13use mimir_core::store::{self, row_to_node, NODE_COLS};
14use rusqlite::{params, Connection, OptionalExtension};
15
16use crate::extract::{self, FileExtract};
17use crate::languages::Lang;
18
19#[derive(Debug, Default, PartialEq, Eq)]
20pub struct GraphStats {
21 pub files_seen: usize,
22 pub files_indexed: usize,
23 pub unchanged: usize,
24 pub removed: usize,
25 pub symbols: usize,
26 pub calls_resolved: usize,
27 pub calls_heuristic: usize,
28 pub imports: usize,
29}
30
31pub fn stable_id(project_id: i64, rel_path: &str, qualified: &str, kind: &str) -> String {
32 blake3::hash(format!("{project_id}|{rel_path}|{qualified}|{kind}").as_bytes())
33 .to_hex()
34 .to_string()
35}
36
37pub fn update(conn: &mut Connection, project: &Node, root: &Path) -> Result<GraphStats> {
42 let mut stats = GraphStats::default();
43 let mut seen: HashSet<String> = HashSet::new();
44 let mut changed_files: Vec<(i64, String, FileExtract)> = Vec::new();
45
46 let tx = conn.transaction()?;
51
52 for entry in ignore::WalkBuilder::new(root).build() {
53 let entry = match entry {
54 Ok(e) => e,
55 Err(err) => {
56 tracing::warn!(%err, "skipping unreadable entry");
57 continue;
58 }
59 };
60 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
61 continue;
62 }
63 let path = entry.path();
64 let rel = path
65 .strip_prefix(root)
66 .unwrap_or(path)
67 .to_string_lossy()
68 .replace('\\', "/");
69 let Some(lang) = Lang::from_path(&rel) else {
70 continue;
71 };
72 seen.insert(rel.clone());
73 stats.files_seen += 1;
74
75 let meta = entry
76 .metadata()
77 .map_err(|e| Error::Invalid(format!("stat {rel}: {e}")))?;
78 let mtime = meta
82 .modified()
83 .ok()
84 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
85 .map(|d| d.as_secs() as i64)
86 .unwrap_or(-1);
87 let size = meta.len() as i64;
88
89 let existing = code_file(&tx, project.id, &rel)?;
90 if let Some(f) = &existing {
91 if mtime >= 0
92 && f.deleted_at.is_none()
93 && f.meta.get("mtime").and_then(|v| v.as_i64()) == Some(mtime)
94 && f.meta.get("size").and_then(|v| v.as_i64()) == Some(size)
95 {
96 stats.unchanged += 1;
97 continue;
98 }
99 }
100
101 let raw = std::fs::read(path).map_err(|e| Error::io(path, e))?;
102 let content = String::from_utf8_lossy(&raw);
103 let hash = blake3::hash(content.as_bytes()).as_bytes().to_vec();
104 if let Some(f) = &existing {
105 if f.deleted_at.is_none() && f.content_hash.as_deref() == Some(&hash[..]) {
106 tx.execute(
107 "UPDATE node SET meta = json_set(meta, '$.mtime', ?2, '$.size', ?3),
108 updated_at = ?4 WHERE id = ?1",
109 params![f.id, mtime, size, now_unix()],
110 )?;
111 stats.unchanged += 1;
112 continue;
113 }
114 }
115
116 let fx = extract::extract(lang, &content);
117 let file_id = persist_file(
118 &tx,
119 project.id,
120 existing.as_ref(),
121 &rel,
122 lang,
123 &hash,
124 mtime,
125 size,
126 &fx,
127 &mut stats,
128 )?;
129 changed_files.push((file_id, rel, fx));
130 stats.files_indexed += 1;
131 }
132
133 let mut stmt = tx.prepare(
135 "SELECT id, path FROM node
136 WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
137 AND deleted_at IS NULL",
138 )?;
139 let live: Vec<(i64, String)> = stmt
140 .query_map([project.id], |r| Ok((r.get(0)?, r.get(1)?)))?
141 .collect::<rusqlite::Result<_>>()?;
142 drop(stmt);
143 for (id, path) in live {
144 if !seen.contains(&path) {
145 tx.execute(
146 "UPDATE node SET deleted_at = ?2
147 WHERE deleted_at IS NULL AND (id = ?1 OR parent_id = ?1)",
148 params![id, now_unix()],
149 )?;
150 stats.removed += 1;
151 }
152 }
153
154 resolve_calls(&tx, project.id, &changed_files, &mut stats)?;
155 tx.commit()?;
156 Ok(stats)
157}
158
159fn code_file(conn: &Connection, project_id: i64, rel: &str) -> Result<Option<Node>> {
160 Ok(conn
161 .query_row(
162 &format!(
163 "SELECT {NODE_COLS} FROM node
164 WHERE kind = 'file' AND project_id = ?1 AND path = ?2
165 AND collection_id IS NULL"
166 ),
167 params![project_id, rel],
168 row_to_node,
169 )
170 .optional()?)
171}
172
173#[allow(clippy::too_many_arguments)]
174fn persist_file(
175 conn: &Connection,
176 project_id: i64,
177 existing: Option<&Node>,
178 rel: &str,
179 lang: Lang,
180 hash: &[u8],
181 mtime: i64,
182 size: i64,
183 fx: &FileExtract,
184 stats: &mut GraphStats,
185) -> Result<i64> {
186 let imports_json: Vec<serde_json::Value> = fx
187 .imports
188 .iter()
189 .map(|i| serde_json::json!({"local": i.local, "source": i.source}))
190 .collect();
191 let calls_json: Vec<serde_json::Value> = fx
192 .calls
193 .iter()
194 .filter(|c| !c.caller.is_empty())
195 .map(|c| serde_json::json!({"caller": c.caller, "callee": c.callee}))
196 .collect();
197 let file_meta = serde_json::json!({
198 "mtime": mtime, "size": size,
199 "imports": imports_json, "calls": calls_json,
200 });
201
202 let file_id = match existing {
203 Some(f) => {
204 conn.execute(
205 "UPDATE node SET content_hash = ?2, meta = ?3, lang = ?4,
206 updated_at = ?5, deleted_at = NULL WHERE id = ?1",
207 params![f.id, hash, file_meta.to_string(), lang.name(), now_unix()],
208 )?;
209 f.id
210 }
211 None => {
212 let mut new = NewNode::new(Kind::File);
213 new.title = Some(
214 Path::new(rel)
215 .file_name()
216 .map(|s| s.to_string_lossy().into_owned())
217 .unwrap_or_else(|| rel.to_string()),
218 );
219 new.path = Some(rel.to_string());
220 new.lang = Some(lang.name().into());
221 new.project_id = Some(project_id);
222 new.content_hash = Some(hash.to_vec());
223 new.meta = Some(file_meta);
224 store::insert_node(conn, new)?.id
225 }
226 };
227
228 let mut kept: HashSet<i64> = HashSet::new();
230 for sym in &fx.symbols {
231 let sid = stable_id(project_id, rel, &sym.qualified, sym.kind);
232 let body = match &sym.doc {
233 Some(d) => format!("{}\n{d}", sym.signature),
234 None => sym.signature.clone(),
235 };
236 let meta = serde_json::json!({"stable_id": sid, "name": sym.name});
237 let existing_id: Option<i64> = conn
238 .query_row(
239 "SELECT id FROM node
240 WHERE kind = 'symbol' AND json_extract(meta, '$.stable_id') = ?1",
241 [&sid],
242 |r| r.get(0),
243 )
244 .optional()?;
245 let id = match existing_id {
246 Some(id) => {
247 conn.execute(
248 "UPDATE node SET title = ?2, body = ?3, subkind = ?4, path = ?5,
249 span_start = ?6, span_end = ?7, content_hash = ?8, meta = ?9,
250 lang = ?10, parent_id = ?11, updated_at = ?12, deleted_at = NULL
251 WHERE id = ?1",
252 params![
253 id,
254 sym.qualified,
255 body,
256 sym.kind,
257 rel,
258 sym.start_line as i64,
259 sym.end_line as i64,
260 blake3::hash(body.as_bytes()).as_bytes().to_vec(),
261 meta.to_string(),
262 lang.name(),
263 file_id,
264 now_unix()
265 ],
266 )?;
267 id
268 }
269 None => {
270 let mut new = NewNode::new(Kind::Symbol);
271 new.subkind = Some(sym.kind.into());
272 new.title = Some(sym.qualified.clone());
273 new.body = Some(body.clone());
274 new.path = Some(rel.to_string());
275 new.lang = Some(lang.name().into());
276 new.project_id = Some(project_id);
277 new.parent_id = Some(file_id);
278 new.span_start = Some(sym.start_line as i64);
279 new.span_end = Some(sym.end_line as i64);
280 new.content_hash = Some(blake3::hash(body.as_bytes()).as_bytes().to_vec());
281 new.meta = Some(meta);
282 store::insert_node(conn, new)?.id
283 }
284 };
285 kept.insert(id);
286 stats.symbols += 1;
287 }
288 {
291 let mut stmt =
292 conn.prepare("SELECT id FROM node WHERE kind = 'symbol' AND parent_id = ?1")?;
293 let all: Vec<i64> = stmt
294 .query_map([file_id], |r| r.get(0))?
295 .collect::<rusqlite::Result<_>>()?;
296 drop(stmt);
297 for id in all {
298 if !kept.contains(&id) {
299 conn.execute("DELETE FROM node WHERE id = ?1", [id])?;
300 }
301 }
302 }
303 Ok(file_id)
304}
305
306struct SymRef {
308 id: i64,
309 name: String,
310 qualified: String,
311 path: String,
312}
313
314fn resolve_calls(
317 conn: &Connection,
318 project_id: i64,
319 changed: &[(i64, String, FileExtract)],
320 stats: &mut GraphStats,
321) -> Result<()> {
322 if changed.is_empty() {
323 return Ok(());
324 }
325 let mut stmt = conn.prepare(
327 "SELECT id, json_extract(meta, '$.name'), title, path FROM node
328 WHERE kind = 'symbol' AND project_id = ?1 AND deleted_at IS NULL",
329 )?;
330 let symbols: Vec<SymRef> = stmt
331 .query_map([project_id], |r| {
332 Ok(SymRef {
333 id: r.get(0)?,
334 name: r.get::<_, Option<String>>(1)?.unwrap_or_default(),
335 qualified: r.get::<_, Option<String>>(2)?.unwrap_or_default(),
336 path: r.get::<_, Option<String>>(3)?.unwrap_or_default(),
337 })
338 })?
339 .collect::<rusqlite::Result<_>>()?;
340 drop(stmt);
341
342 let mut by_name: HashMap<&str, Vec<&SymRef>> = HashMap::new();
343 for s in &symbols {
344 if !s.name.is_empty() {
347 by_name.entry(s.name.as_str()).or_default().push(s);
348 }
349 }
350 let mut by_file_qualified: HashMap<(&str, &str), i64> = HashMap::new();
351 for s in &symbols {
352 by_file_qualified.insert((s.path.as_str(), s.qualified.as_str()), s.id);
353 }
354 let file_paths: HashSet<&str> = {
355 let mut set = HashSet::new();
356 for s in &symbols {
357 set.insert(s.path.as_str());
358 }
359 set
360 };
361 let file_ids: HashMap<String, i64> = {
362 let mut stmt = conn.prepare(
363 "SELECT path, id FROM node
364 WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
365 AND deleted_at IS NULL",
366 )?;
367 let rows: Vec<(String, i64)> = stmt
368 .query_map([project_id], |r| Ok((r.get(0)?, r.get(1)?)))?
369 .collect::<rusqlite::Result<_>>()?;
370 rows.into_iter().collect()
371 };
372
373 for (file_id, rel, fx) in changed {
374 conn.execute(
376 "DELETE FROM edge WHERE rel = 'calls' AND src IN
377 (SELECT id FROM node WHERE kind = 'symbol' AND parent_id = ?1)",
378 [file_id],
379 )?;
380 conn.execute(
381 "DELETE FROM edge WHERE rel = 'imports' AND src = ?1",
382 [file_id],
383 )?;
384
385 let mut import_target: HashMap<&str, String> = HashMap::new();
387 for imp in &fx.imports {
388 if let Some(target) = resolve_import(rel, &imp.source, &file_paths) {
389 import_target.insert(imp.local.as_str(), target.clone());
390 if let Some(dst) = file_ids.get(&target) {
391 if *dst != *file_id {
392 store::link(conn, *file_id, *dst, Rel::Imports, 1.0)?;
393 stats.imports += 1;
394 }
395 }
396 }
397 }
398
399 for call in &fx.calls {
400 if call.caller.is_empty() {
401 continue; }
403 let Some(&src) = by_file_qualified.get(&(rel.as_str(), call.caller.as_str())) else {
404 continue;
405 };
406 let candidates = by_name.get(call.callee.as_str());
407 let Some(candidates) = candidates else {
408 continue;
409 };
410 if let Some(c) = candidates.iter().find(|c| c.path == *rel && c.id != src) {
412 link_call(conn, src, c.id, 1.0, true)?;
413 stats.calls_resolved += 1;
414 continue;
415 }
416 if let Some(target) = import_target.get(call.callee.as_str()) {
418 if let Some(c) = candidates.iter().find(|c| c.path == *target) {
419 link_call(conn, src, c.id, 1.0, true)?;
420 stats.calls_resolved += 1;
421 continue;
422 }
423 }
424 let global: Vec<&&SymRef> = candidates.iter().filter(|c| c.id != src).collect();
426 match global.len() {
427 0 => {}
428 1 => {
429 link_call(conn, src, global[0].id, 0.8, true)?;
430 stats.calls_resolved += 1;
431 }
432 n if n <= 3 => {
433 for c in &global {
434 link_call(conn, src, c.id, 1.0 / n as f64, false)?;
435 stats.calls_heuristic += 1;
436 }
437 }
438 _ => {} }
440 }
441 }
442 Ok(())
443}
444
445fn link_call(conn: &Connection, src: i64, dst: i64, weight: f64, resolved: bool) -> Result<()> {
446 conn.execute(
447 "INSERT INTO edge (src, dst, rel, weight, meta, created_at)
448 VALUES (?1, ?2, 'calls', ?3, json_object('resolved', ?4), ?5)
449 ON CONFLICT(src, dst, rel) DO UPDATE SET
450 weight = excluded.weight, meta = excluded.meta",
451 params![src, dst, weight, resolved, now_unix()],
452 )?;
453 Ok(())
454}
455
456fn resolve_import(importer: &str, source: &str, files: &HashSet<&str>) -> Option<String> {
458 let dir = Path::new(importer).parent().unwrap_or(Path::new(""));
459 let try_paths = |bases: Vec<String>| -> Option<String> {
460 bases.into_iter().find(|b| files.contains(b.as_str()))
461 };
462
463 if source.starts_with('.') {
464 if source.contains("::") {
465 return None; }
467 if source.starts_with("./") || source.starts_with("../") {
469 let joined = normalize(&dir.join(source));
470 return try_paths(vec![
471 format!("{joined}.ts"),
472 format!("{joined}.tsx"),
473 format!("{joined}.js"),
474 format!("{joined}.jsx"),
475 format!("{joined}/index.ts"),
476 format!("{joined}/index.js"),
477 joined.clone(),
478 ]);
479 }
480 let dots = source.chars().take_while(|c| *c == '.').count();
482 let module = &source[dots..];
483 let mut base = dir.to_path_buf();
484 for _ in 1..dots {
485 base = base.parent().map(Path::to_path_buf).unwrap_or_default();
486 }
487 let joined = normalize(&base.join(module.replace('.', "/")));
488 return try_paths(vec![
489 format!("{joined}.py"),
490 format!("{joined}/__init__.py"),
491 ]);
492 }
493
494 if source.contains("::") {
495 let segs: Vec<&str> = source
497 .split("::")
498 .filter(|s| !matches!(*s, "crate" | "super" | "self"))
499 .collect();
500 if segs.is_empty() {
501 return None;
502 }
503 for take in (1..=segs.len().min(3)).rev() {
506 let suffix = format!("{}.rs", segs[..take].join("/"));
507 if let Some(hit) = files.iter().find(|f| f.ends_with(&suffix)) {
508 return Some(hit.to_string());
509 }
510 }
511 return None;
512 }
513
514 if source.contains('.') && !source.contains('/') {
515 let joined = source.replace('.', "/");
517 return try_paths(vec![
518 format!("{joined}.py"),
519 format!("{joined}/__init__.py"),
520 ])
521 .or_else(|| {
522 files
523 .iter()
524 .find(|f| f.ends_with(&format!("{joined}.py")))
525 .map(|f| f.to_string())
526 });
527 }
528
529 let last = source.rsplit('/').next().unwrap_or(source);
532 files
533 .iter()
534 .find(|f| {
535 Path::new(f)
536 .parent()
537 .and_then(|p| p.file_name())
538 .map(|d| d.to_string_lossy() == last)
539 .unwrap_or(false)
540 || **f == format!("{last}.py")
541 })
542 .map(|f| f.to_string())
543}
544
545fn normalize(p: &Path) -> String {
546 let mut parts: Vec<&std::ffi::OsStr> = Vec::new();
547 for c in p.components() {
548 match c {
549 std::path::Component::ParentDir => {
550 parts.pop();
551 }
552 std::path::Component::CurDir => {}
553 std::path::Component::Normal(s) => parts.push(s),
554 _ => {}
555 }
556 }
557 parts
558 .iter()
559 .map(|s| s.to_string_lossy())
560 .collect::<Vec<_>>()
561 .join("/")
562}