1use std::collections::{HashMap, HashSet};
9use std::path::Path;
10
11use mimir_core::error::{Error, Result};
12use mimir_core::model::{now_unix, Kind, NewNode, Node, Rel};
13use mimir_core::store::{self, row_to_node, NODE_COLS};
14use rusqlite::{params, Connection, OptionalExtension};
15
16use crate::extract::{self, FileExtract};
17use crate::languages::Lang;
18
19#[derive(Debug, Default, PartialEq, Eq)]
20pub struct GraphStats {
21 pub files_seen: usize,
22 pub files_indexed: usize,
23 pub unchanged: usize,
24 pub removed: usize,
25 pub symbols: usize,
26 pub calls_resolved: usize,
27 pub calls_heuristic: usize,
28 pub imports: usize,
29}
30
31pub fn stable_id(project_id: i64, rel_path: &str, qualified: &str, kind: &str) -> String {
32 blake3::hash(format!("{project_id}|{rel_path}|{qualified}|{kind}").as_bytes())
33 .to_hex()
34 .to_string()
35}
36
37pub fn update(conn: &mut Connection, project: &Node, root: &Path) -> Result<GraphStats> {
42 let mut stats = GraphStats::default();
43 let mut seen: HashSet<String> = HashSet::new();
44 let mut changed_files: Vec<(i64, String, FileExtract)> = Vec::new();
45
46 let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
53
54 for entry in ignore::WalkBuilder::new(root).build() {
55 let entry = match entry {
56 Ok(e) => e,
57 Err(err) => {
58 tracing::warn!(%err, "skipping unreadable entry");
59 continue;
60 }
61 };
62 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
63 continue;
64 }
65 let path = entry.path();
66 let rel = path
67 .strip_prefix(root)
68 .unwrap_or(path)
69 .to_string_lossy()
70 .replace('\\', "/");
71 let Some(lang) = Lang::from_path(&rel) else {
72 continue;
73 };
74 seen.insert(rel.clone());
75 stats.files_seen += 1;
76
77 let meta = entry
78 .metadata()
79 .map_err(|e| Error::Invalid(format!("stat {rel}: {e}")))?;
80 let mtime = meta
84 .modified()
85 .ok()
86 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
87 .map(|d| d.as_secs() as i64)
88 .unwrap_or(-1);
89 let size = meta.len() as i64;
90
91 let existing = code_file(&tx, project.id, &rel)?;
92 if let Some(f) = &existing {
93 if mtime >= 0
94 && f.deleted_at.is_none()
95 && f.meta.get("mtime").and_then(|v| v.as_i64()) == Some(mtime)
96 && f.meta.get("size").and_then(|v| v.as_i64()) == Some(size)
97 {
98 stats.unchanged += 1;
99 continue;
100 }
101 }
102
103 let raw = std::fs::read(path).map_err(|e| Error::io(path, e))?;
104 let content = String::from_utf8_lossy(&raw);
105 let hash = blake3::hash(content.as_bytes()).as_bytes().to_vec();
106 if let Some(f) = &existing {
107 if f.deleted_at.is_none() && f.content_hash.as_deref() == Some(&hash[..]) {
108 tx.execute(
109 "UPDATE node SET meta = json_set(meta, '$.mtime', ?2, '$.size', ?3),
110 updated_at = ?4 WHERE id = ?1",
111 params![f.id, mtime, size, now_unix()],
112 )?;
113 stats.unchanged += 1;
114 continue;
115 }
116 }
117
118 let fx = extract::extract(lang, &content);
119 let file_id = persist_file(
120 &tx,
121 project.id,
122 existing.as_ref(),
123 &rel,
124 lang,
125 &hash,
126 mtime,
127 size,
128 &fx,
129 &mut stats,
130 )?;
131 changed_files.push((file_id, rel, fx));
132 stats.files_indexed += 1;
133 }
134
135 let mut stmt = tx.prepare(
137 "SELECT id, path FROM node
138 WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
139 AND deleted_at IS NULL",
140 )?;
141 let live: Vec<(i64, String)> = stmt
142 .query_map([project.id], |r| Ok((r.get(0)?, r.get(1)?)))?
143 .collect::<rusqlite::Result<_>>()?;
144 drop(stmt);
145 for (id, path) in live {
146 if !seen.contains(&path) {
147 tx.execute(
148 "UPDATE node SET deleted_at = ?2
149 WHERE deleted_at IS NULL AND (id = ?1 OR parent_id = ?1)",
150 params![id, now_unix()],
151 )?;
152 stats.removed += 1;
153 }
154 }
155
156 resolve_calls(&tx, project.id, &changed_files, &mut stats)?;
157 tx.commit()?;
158 Ok(stats)
159}
160
161fn code_file(conn: &Connection, project_id: i64, rel: &str) -> Result<Option<Node>> {
162 Ok(conn
163 .query_row(
164 &format!(
165 "SELECT {NODE_COLS} FROM node
166 WHERE kind = 'file' AND project_id = ?1 AND path = ?2
167 AND collection_id IS NULL"
168 ),
169 params![project_id, rel],
170 row_to_node,
171 )
172 .optional()?)
173}
174
175#[allow(clippy::too_many_arguments)]
176fn persist_file(
177 conn: &Connection,
178 project_id: i64,
179 existing: Option<&Node>,
180 rel: &str,
181 lang: Lang,
182 hash: &[u8],
183 mtime: i64,
184 size: i64,
185 fx: &FileExtract,
186 stats: &mut GraphStats,
187) -> Result<i64> {
188 let imports_json: Vec<serde_json::Value> = fx
189 .imports
190 .iter()
191 .map(|i| serde_json::json!({"local": i.local, "source": i.source}))
192 .collect();
193 let calls_json: Vec<serde_json::Value> = fx
194 .calls
195 .iter()
196 .filter(|c| !c.caller.is_empty())
197 .map(|c| serde_json::json!({"caller": c.caller, "callee": c.callee}))
198 .collect();
199 let file_meta = serde_json::json!({
200 "mtime": mtime, "size": size,
201 "imports": imports_json, "calls": calls_json,
202 });
203
204 let file_id = match existing {
205 Some(f) => {
206 conn.execute(
207 "UPDATE node SET content_hash = ?2, meta = ?3, lang = ?4,
208 updated_at = ?5, deleted_at = NULL WHERE id = ?1",
209 params![f.id, hash, file_meta.to_string(), lang.name(), now_unix()],
210 )?;
211 f.id
212 }
213 None => {
214 let mut new = NewNode::new(Kind::File);
215 new.title = Some(
216 Path::new(rel)
217 .file_name()
218 .map(|s| s.to_string_lossy().into_owned())
219 .unwrap_or_else(|| rel.to_string()),
220 );
221 new.path = Some(rel.to_string());
222 new.lang = Some(lang.name().into());
223 new.project_id = Some(project_id);
224 new.content_hash = Some(hash.to_vec());
225 new.meta = Some(file_meta);
226 store::insert_node(conn, new)?.id
227 }
228 };
229
230 let mut kept: HashSet<i64> = HashSet::new();
232 for sym in &fx.symbols {
233 let sid = stable_id(project_id, rel, &sym.qualified, sym.kind);
234 let body = match &sym.doc {
235 Some(d) => format!("{}\n{d}", sym.signature),
236 None => sym.signature.clone(),
237 };
238 let meta = serde_json::json!({"stable_id": sid, "name": sym.name});
239 let existing_id: Option<i64> = conn
240 .query_row(
241 "SELECT id FROM node
242 WHERE kind = 'symbol' AND json_extract(meta, '$.stable_id') = ?1",
243 [&sid],
244 |r| r.get(0),
245 )
246 .optional()?;
247 let id = match existing_id {
248 Some(id) => {
249 conn.execute(
250 "UPDATE node SET title = ?2, body = ?3, subkind = ?4, path = ?5,
251 span_start = ?6, span_end = ?7, content_hash = ?8, meta = ?9,
252 lang = ?10, parent_id = ?11, updated_at = ?12, deleted_at = NULL
253 WHERE id = ?1",
254 params![
255 id,
256 sym.qualified,
257 body,
258 sym.kind,
259 rel,
260 sym.start_line as i64,
261 sym.end_line as i64,
262 blake3::hash(body.as_bytes()).as_bytes().to_vec(),
263 meta.to_string(),
264 lang.name(),
265 file_id,
266 now_unix()
267 ],
268 )?;
269 id
270 }
271 None => {
272 let mut new = NewNode::new(Kind::Symbol);
273 new.subkind = Some(sym.kind.into());
274 new.title = Some(sym.qualified.clone());
275 new.body = Some(body.clone());
276 new.path = Some(rel.to_string());
277 new.lang = Some(lang.name().into());
278 new.project_id = Some(project_id);
279 new.parent_id = Some(file_id);
280 new.span_start = Some(sym.start_line as i64);
281 new.span_end = Some(sym.end_line as i64);
282 new.content_hash = Some(blake3::hash(body.as_bytes()).as_bytes().to_vec());
283 new.meta = Some(meta);
284 store::insert_node(conn, new)?.id
285 }
286 };
287 kept.insert(id);
288 stats.symbols += 1;
289 }
290 {
293 let mut stmt =
294 conn.prepare("SELECT id FROM node WHERE kind = 'symbol' AND parent_id = ?1")?;
295 let all: Vec<i64> = stmt
296 .query_map([file_id], |r| r.get(0))?
297 .collect::<rusqlite::Result<_>>()?;
298 drop(stmt);
299 for id in all {
300 if !kept.contains(&id) {
301 conn.execute("DELETE FROM node WHERE id = ?1", [id])?;
302 }
303 }
304 }
305 Ok(file_id)
306}
307
308struct SymRef {
310 id: i64,
311 name: String,
312 qualified: String,
313 path: String,
314}
315
316fn resolve_calls(
319 conn: &Connection,
320 project_id: i64,
321 changed: &[(i64, String, FileExtract)],
322 stats: &mut GraphStats,
323) -> Result<()> {
324 if changed.is_empty() {
325 return Ok(());
326 }
327 let mut stmt = conn.prepare(
329 "SELECT id, json_extract(meta, '$.name'), title, path FROM node
330 WHERE kind = 'symbol' AND project_id = ?1 AND deleted_at IS NULL",
331 )?;
332 let symbols: Vec<SymRef> = stmt
333 .query_map([project_id], |r| {
334 Ok(SymRef {
335 id: r.get(0)?,
336 name: r.get::<_, Option<String>>(1)?.unwrap_or_default(),
337 qualified: r.get::<_, Option<String>>(2)?.unwrap_or_default(),
338 path: r.get::<_, Option<String>>(3)?.unwrap_or_default(),
339 })
340 })?
341 .collect::<rusqlite::Result<_>>()?;
342 drop(stmt);
343
344 let mut by_name: HashMap<&str, Vec<&SymRef>> = HashMap::new();
345 for s in &symbols {
346 if !s.name.is_empty() {
349 by_name.entry(s.name.as_str()).or_default().push(s);
350 }
351 }
352 let mut by_file_qualified: HashMap<(&str, &str), i64> = HashMap::new();
353 for s in &symbols {
354 by_file_qualified.insert((s.path.as_str(), s.qualified.as_str()), s.id);
355 }
356 let file_paths: HashSet<&str> = {
357 let mut set = HashSet::new();
358 for s in &symbols {
359 set.insert(s.path.as_str());
360 }
361 set
362 };
363 let file_ids: HashMap<String, i64> = {
364 let mut stmt = conn.prepare(
365 "SELECT path, id FROM node
366 WHERE kind = 'file' AND project_id = ?1 AND collection_id IS NULL
367 AND deleted_at IS NULL",
368 )?;
369 let rows: Vec<(String, i64)> = stmt
370 .query_map([project_id], |r| Ok((r.get(0)?, r.get(1)?)))?
371 .collect::<rusqlite::Result<_>>()?;
372 rows.into_iter().collect()
373 };
374
375 for (file_id, rel, fx) in changed {
376 conn.execute(
378 "DELETE FROM edge WHERE rel = 'calls' AND src IN
379 (SELECT id FROM node WHERE kind = 'symbol' AND parent_id = ?1)",
380 [file_id],
381 )?;
382 conn.execute(
383 "DELETE FROM edge WHERE rel = 'imports' AND src = ?1",
384 [file_id],
385 )?;
386
387 let mut import_target: HashMap<&str, String> = HashMap::new();
389 for imp in &fx.imports {
390 if let Some(target) = resolve_import(rel, &imp.source, &file_paths) {
391 import_target.insert(imp.local.as_str(), target.clone());
392 if let Some(dst) = file_ids.get(&target) {
393 if *dst != *file_id {
394 store::link(conn, *file_id, *dst, Rel::Imports, 1.0)?;
395 stats.imports += 1;
396 }
397 }
398 }
399 }
400
401 for call in &fx.calls {
402 if call.caller.is_empty() {
403 continue; }
405 let Some(&src) = by_file_qualified.get(&(rel.as_str(), call.caller.as_str())) else {
406 continue;
407 };
408 let candidates = by_name.get(call.callee.as_str());
409 let Some(candidates) = candidates else {
410 continue;
411 };
412 if let Some(c) = candidates.iter().find(|c| c.path == *rel && c.id != src) {
414 link_call(conn, src, c.id, 1.0, true)?;
415 stats.calls_resolved += 1;
416 continue;
417 }
418 if let Some(target) = import_target.get(call.callee.as_str()) {
420 if let Some(c) = candidates.iter().find(|c| c.path == *target) {
421 link_call(conn, src, c.id, 1.0, true)?;
422 stats.calls_resolved += 1;
423 continue;
424 }
425 }
426 let global: Vec<&&SymRef> = candidates.iter().filter(|c| c.id != src).collect();
428 match global.len() {
429 0 => {}
430 1 => {
431 link_call(conn, src, global[0].id, 0.8, true)?;
432 stats.calls_resolved += 1;
433 }
434 n if n <= 3 => {
435 for c in &global {
436 link_call(conn, src, c.id, 1.0 / n as f64, false)?;
437 stats.calls_heuristic += 1;
438 }
439 }
440 _ => {} }
442 }
443 }
444 Ok(())
445}
446
447fn link_call(conn: &Connection, src: i64, dst: i64, weight: f64, resolved: bool) -> Result<()> {
448 conn.execute(
449 "INSERT INTO edge (src, dst, rel, weight, meta, created_at)
450 VALUES (?1, ?2, 'calls', ?3, json_object('resolved', ?4), ?5)
451 ON CONFLICT(src, dst, rel) DO UPDATE SET
452 weight = excluded.weight, meta = excluded.meta",
453 params![src, dst, weight, resolved, now_unix()],
454 )?;
455 Ok(())
456}
457
458fn resolve_import(importer: &str, source: &str, files: &HashSet<&str>) -> Option<String> {
460 let dir = Path::new(importer).parent().unwrap_or(Path::new(""));
461 let try_paths = |bases: Vec<String>| -> Option<String> {
462 bases.into_iter().find(|b| files.contains(b.as_str()))
463 };
464
465 if source.starts_with('.') {
466 if source.contains("::") {
467 return None; }
469 if source.starts_with("./") || source.starts_with("../") {
471 let joined = normalize(&dir.join(source));
472 return try_paths(vec![
473 format!("{joined}.ts"),
474 format!("{joined}.tsx"),
475 format!("{joined}.js"),
476 format!("{joined}.jsx"),
477 format!("{joined}/index.ts"),
478 format!("{joined}/index.js"),
479 joined.clone(),
480 ]);
481 }
482 let dots = source.chars().take_while(|c| *c == '.').count();
484 let module = &source[dots..];
485 let mut base = dir.to_path_buf();
486 for _ in 1..dots {
487 base = base.parent().map(Path::to_path_buf).unwrap_or_default();
488 }
489 let joined = normalize(&base.join(module.replace('.', "/")));
490 return try_paths(vec![
491 format!("{joined}.py"),
492 format!("{joined}/__init__.py"),
493 ]);
494 }
495
496 if source.contains("::") {
497 let segs: Vec<&str> = source
499 .split("::")
500 .filter(|s| !matches!(*s, "crate" | "super" | "self"))
501 .collect();
502 if segs.is_empty() {
503 return None;
504 }
505 for take in (1..=segs.len().min(3)).rev() {
508 let suffix = format!("{}.rs", segs[..take].join("/"));
509 if let Some(hit) = files.iter().find(|f| f.ends_with(&suffix)) {
510 return Some(hit.to_string());
511 }
512 }
513 return None;
514 }
515
516 if source.contains('.') && !source.contains('/') {
517 let joined = source.replace('.', "/");
519 return try_paths(vec![
520 format!("{joined}.py"),
521 format!("{joined}/__init__.py"),
522 ])
523 .or_else(|| {
524 files
525 .iter()
526 .find(|f| f.ends_with(&format!("{joined}.py")))
527 .map(|f| f.to_string())
528 });
529 }
530
531 let last = source.rsplit('/').next().unwrap_or(source);
534 files
535 .iter()
536 .find(|f| {
537 Path::new(f)
538 .parent()
539 .and_then(|p| p.file_name())
540 .map(|d| d.to_string_lossy() == last)
541 .unwrap_or(false)
542 || **f == format!("{last}.py")
543 })
544 .map(|f| f.to_string())
545}
546
547fn normalize(p: &Path) -> String {
548 let mut parts: Vec<&std::ffi::OsStr> = Vec::new();
549 for c in p.components() {
550 match c {
551 std::path::Component::ParentDir => {
552 parts.pop();
553 }
554 std::path::Component::CurDir => {}
555 std::path::Component::Normal(s) => parts.push(s),
556 _ => {}
557 }
558 }
559 parts
560 .iter()
561 .map(|s| s.to_string_lossy())
562 .collect::<Vec<_>>()
563 .join("/")
564}