1use anyhow::{anyhow, Context, Result};
2use serde_json::json;
3use sha2::{Digest, Sha256};
4use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
5use std::fs;
6use std::path::Path;
7use ucm_core::{Block, BlockId, Content, Document, DocumentId, Edge, EdgeType};
8
9use crate::model::*;
10
11use super::languages::ts_js::extend_unique_block_ids;
12use super::{
13 alias_scope_key, analyze_file, ancestor_directories, canonical_fingerprint,
14 collect_repository_files, compare_extracted_symbols, compute_stats, format_coderef,
15 format_line_range, normalize_path, normalize_temporal_fields, parent_directory_id,
16 parent_id_for_file, resolve_alias_target_ids, resolve_import, resolve_relationship_target_ids,
17 resolve_usage_target_ids, sanitize_identifier, sort_edges,
18 sort_structure_children_by_logical_key, unique_symbol_logical_key, validate_code_graph_profile,
19 GitignoreMatcher,
20};
21
22pub fn build_code_graph(input: &CodeGraphBuildInput) -> Result<CodeGraphBuildResult> {
23 let repo_root = input
24 .repository_path
25 .canonicalize()
26 .with_context(|| format!("failed to resolve repo path {:?}", input.repository_path))?;
27
28 if !repo_root.is_dir() {
29 return Err(anyhow!(
30 "repository path is not a directory: {}",
31 repo_root.display()
32 ));
33 }
34
35 let mut diagnostics = Vec::new();
36 let matcher = GitignoreMatcher::from_repository(&repo_root)?;
37 let files = collect_repository_files(&repo_root, &input.config, &matcher, &mut diagnostics)?;
38
39 let repo_name = repo_root
40 .file_name()
41 .map(|s| s.to_string_lossy().to_string())
42 .unwrap_or_else(|| "repository".to_string());
43
44 let mut doc = Document::new(DocumentId::new(format!(
45 "codegraph:{}:{}",
46 sanitize_identifier(&repo_name),
47 sanitize_identifier(&input.commit_hash)
48 )));
49
50 initialize_document_metadata(&mut doc, &repo_root, &repo_name, &input.commit_hash);
51
52 let repo_block = make_repository_block(&repo_name, &input.commit_hash);
53 let root_id = doc.root;
54 let repo_block_id = doc.add_block(repo_block, &root_id)?;
55
56 let mut directories = BTreeSet::new();
57 for file in &files {
58 for dir in ancestor_directories(&file.relative_path) {
59 directories.insert(dir);
60 }
61 }
62
63 let mut directory_ids: BTreeMap<String, BlockId> = BTreeMap::new();
64 for dir in directories {
65 let parent_id = parent_directory_id(&dir, &directory_ids).unwrap_or(repo_block_id);
66 let block = make_directory_block(&dir);
67 let block_id = doc.add_block(block, &parent_id)?;
68 directory_ids.insert(dir, block_id);
69 }
70
71 let mut file_ids: BTreeMap<String, BlockId> = BTreeMap::new();
72 let mut symbol_ids_by_file_identity: BTreeMap<(String, String), BlockId> = BTreeMap::new();
73 let mut top_level_symbol_ids: BTreeMap<(String, String), Vec<BlockId>> = BTreeMap::new();
74 let mut exported_top_level_symbol_ids: BTreeMap<String, Vec<(String, BlockId)>> =
75 BTreeMap::new();
76 let mut default_exported_top_level_symbol_ids: BTreeMap<String, Vec<BlockId>> = BTreeMap::new();
77 let mut file_analyses = Vec::new();
78 let mut used_symbol_keys: HashSet<String> = HashSet::new();
79
80 for file in files {
81 let parent_id = parent_id_for_file(&file.relative_path, repo_block_id, &directory_ids);
82
83 let source = match fs::read_to_string(&file.absolute_path) {
84 Ok(s) => s,
85 Err(err) => {
86 let diag = CodeGraphDiagnostic::error(
87 "CG2003",
88 format!("failed to read source file: {}", err),
89 )
90 .with_path(file.relative_path.clone());
91 diagnostics.push(diag);
92 if input.config.continue_on_parse_error {
93 continue;
94 }
95 return Err(anyhow!(
96 "failed to read source file {}: {}",
97 file.relative_path,
98 err
99 ));
100 }
101 };
102
103 if source.len() > input.config.max_file_bytes {
104 diagnostics.push(
105 CodeGraphDiagnostic::warning(
106 "CG2008",
107 format!(
108 "file skipped due to size limit ({} bytes > {} bytes)",
109 source.len(),
110 input.config.max_file_bytes
111 ),
112 )
113 .with_path(file.relative_path.clone()),
114 );
115 continue;
116 }
117
118 let FileAnalysis {
119 file_description,
120 mut symbols,
121 imports,
122 relationships,
123 usages,
124 aliases,
125 export_bindings,
126 default_exported_symbol_names,
127 diagnostics: analysis_diagnostics,
128 ..
129 } = analyze_file(&file.relative_path, &source, file.language);
130
131 let file_block = make_file_block(
132 &file.relative_path,
133 file.language.as_str(),
134 file_description.as_deref(),
135 );
136 let file_block_id = doc.add_block(file_block, &parent_id)?;
137 file_ids.insert(file.relative_path.clone(), file_block_id);
138
139 for diag in &analysis_diagnostics {
140 diagnostics.push(diag.clone().with_path(file.relative_path.clone()));
141 }
142
143 symbols.sort_by(compare_extracted_symbols);
144 let mut symbol_ids_by_identity: BTreeMap<String, BlockId> = BTreeMap::new();
145
146 for symbol in &symbols {
147 let parent_block_id = symbol
148 .parent_identity
149 .as_ref()
150 .and_then(|identity| symbol_ids_by_identity.get(identity).copied())
151 .unwrap_or(file_block_id);
152 let logical_key = unique_symbol_logical_key(
153 &file.relative_path,
154 &symbol.qualified_name,
155 symbol.start_line,
156 &mut used_symbol_keys,
157 );
158 let symbol_block = make_symbol_block(
159 &logical_key,
160 &file.relative_path,
161 file.language.as_str(),
162 symbol,
163 );
164 let symbol_id = doc.add_block(symbol_block, &parent_block_id)?;
165 symbol_ids_by_identity.insert(symbol.identity.clone(), symbol_id);
166 symbol_ids_by_file_identity.insert(
167 (file.relative_path.clone(), symbol.identity.clone()),
168 symbol_id,
169 );
170
171 if symbol.parent_identity.is_none() {
172 top_level_symbol_ids
173 .entry((file.relative_path.clone(), symbol.name.clone()))
174 .or_default()
175 .push(symbol_id);
176 if symbol.exported {
177 exported_top_level_symbol_ids
178 .entry(file.relative_path.clone())
179 .or_default()
180 .push((symbol.name.clone(), symbol_id));
181 if default_exported_symbol_names.contains(&symbol.name) {
182 default_exported_top_level_symbol_ids
183 .entry(file.relative_path.clone())
184 .or_default()
185 .push(symbol_id);
186 }
187 }
188 }
189
190 if symbol.exported && input.config.emit_export_edges {
191 let mut edge = Edge::new(EdgeType::Custom("exports".to_string()), symbol_id);
192 edge.metadata
193 .custom
194 .insert("relation".to_string(), json!("exports"));
195 edge.metadata
196 .custom
197 .insert("symbol".to_string(), json!(symbol.name.clone()));
198 if let Some(source_block) = doc.get_block_mut(&file_block_id) {
199 source_block.edges.push(edge);
200 }
201 }
202 }
203
204 file_analyses.push(FileAnalysisRecord {
205 file: file.relative_path,
206 language: file.language,
207 imports,
208 relationships,
209 usages,
210 aliases,
211 export_bindings,
212 });
213 }
214
215 let known_files: BTreeSet<String> = file_ids.keys().cloned().collect();
216 let mut exported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
217 BTreeMap::new();
218 let mut imported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
219 BTreeMap::new();
220 let mut imported_module_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
221 BTreeMap::new();
222 let mut imported_module_paths_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
223 BTreeMap::new();
224 let mut alias_names_by_scope: BTreeMap<(String, String), BTreeSet<String>> = BTreeMap::new();
225 let mut alias_records_by_scope: BTreeMap<
226 (String, String),
227 BTreeMap<String, Vec<ExtractedAlias>>,
228 > = BTreeMap::new();
229 let mut aliased_symbol_targets_by_scope: BTreeMap<
230 (String, String),
231 BTreeMap<String, Vec<BlockId>>,
232 > = BTreeMap::new();
233 let mut pending_reference_edges: BTreeSet<(String, String, String)> = BTreeSet::new();
234 let mut pending_symbol_reference_edges: BTreeSet<(String, String, String, String)> =
235 BTreeSet::new();
236 let mut pending_wildcard_symbol_reference_edges: BTreeSet<(String, String, String)> =
237 BTreeSet::new();
238 let mut pending_reexport_edges: BTreeSet<(String, String, String, String)> = BTreeSet::new();
239 let mut pending_wildcard_reexport_edges: BTreeSet<(String, String, String, Vec<String>)> =
240 BTreeSet::new();
241 let mut pending_relationship_edges: Vec<(BlockId, BlockId, String, String)> = Vec::new();
242 let mut pending_usage_edges: Vec<(BlockId, BlockId, String)> = Vec::new();
243
244 for (file, exports) in &exported_top_level_symbol_ids {
245 let entry = exported_symbol_targets_by_file
246 .entry(file.clone())
247 .or_default();
248 for (name, symbol_id) in exports {
249 entry.entry(name.clone()).or_default().push(*symbol_id);
250 }
251 }
252 for (file, ids) in &default_exported_top_level_symbol_ids {
253 exported_symbol_targets_by_file
254 .entry(file.clone())
255 .or_default()
256 .entry("default".to_string())
257 .or_default()
258 .extend(ids.iter().copied());
259 }
260 for record in &file_analyses {
261 let entry = exported_symbol_targets_by_file
262 .entry(record.file.clone())
263 .or_default();
264 for binding in &record.export_bindings {
265 if let Some(ids) =
266 top_level_symbol_ids.get(&(record.file.clone(), binding.local_name.clone()))
267 {
268 extend_unique_block_ids(
269 entry.entry(binding.source_name.clone()).or_default(),
270 ids.iter().copied(),
271 );
272 }
273 }
274 }
275
276 for targets in exported_symbol_targets_by_file.values_mut() {
277 for ids in targets.values_mut() {
278 let existing = std::mem::take(ids);
279 extend_unique_block_ids(ids, existing);
280 }
281 }
282
283 for _ in 0..=file_analyses.len() {
284 let mut progress = false;
285
286 for record in &file_analyses {
287 for import in &record.imports {
288 if !import.reexported {
289 continue;
290 }
291
292 let ImportResolution::Resolved(target) =
293 resolve_import(&record.file, &record.language, &import.module, &known_files)
294 else {
295 continue;
296 };
297
298 let target_exports = exported_symbol_targets_by_file
299 .get(&target)
300 .cloned()
301 .unwrap_or_default();
302 let entry = exported_symbol_targets_by_file
303 .entry(record.file.clone())
304 .or_default();
305
306 if import.wildcard {
307 for (export_name, ids) in target_exports.clone() {
308 if export_name == "default" {
309 continue;
310 }
311 let targets = entry.entry(export_name).or_default();
312 progress |= extend_unique_block_ids(targets, ids.iter().copied());
313 }
314 }
315
316 for binding in &import.bindings {
317 if let Some(ids) = target_exports.get(&binding.source_name) {
318 let targets = entry.entry(binding.local_name.clone()).or_default();
319 progress |= extend_unique_block_ids(targets, ids.iter().copied());
320 }
321 }
322 }
323 }
324
325 if !progress {
326 break;
327 }
328 }
329
330 for record in &file_analyses {
331 for import in &record.imports {
332 match resolve_import(&record.file, &record.language, &import.module, &known_files) {
333 ImportResolution::Resolved(target) if target != record.file => {
334 pending_reference_edges.insert((
335 record.file.clone(),
336 target.clone(),
337 import.module.clone(),
338 ));
339
340 for symbol_name in &import.symbols {
341 pending_symbol_reference_edges.insert((
342 record.file.clone(),
343 target.clone(),
344 symbol_name.clone(),
345 import.module.clone(),
346 ));
347 if import.reexported {
348 pending_reexport_edges.insert((
349 record.file.clone(),
350 target.clone(),
351 symbol_name.clone(),
352 import.module.clone(),
353 ));
354 }
355 }
356
357 if matches!(record.language, CodeLanguage::Rust | CodeLanguage::Python)
358 && import.wildcard
359 {
360 if let Some(exports) = exported_symbol_targets_by_file.get(&target) {
361 let entry = imported_symbol_targets_by_file
362 .entry(record.file.clone())
363 .or_default();
364 for (export_name, target_symbol_ids) in exports {
365 if export_name == "default" {
366 continue;
367 }
368 entry
369 .entry(export_name.clone())
370 .or_default()
371 .extend(target_symbol_ids.iter().copied());
372 }
373 }
374 }
375
376 if !import.bindings.is_empty() {
377 let entry = imported_symbol_targets_by_file
378 .entry(record.file.clone())
379 .or_default();
380 for binding in &import.bindings {
381 if let Some(target_symbol_ids) = exported_symbol_targets_by_file
382 .get(&target)
383 .and_then(|exports| exports.get(&binding.source_name))
384 {
385 entry
386 .entry(binding.local_name.clone())
387 .or_default()
388 .extend(target_symbol_ids.iter().copied());
389 }
390 }
391 }
392
393 if !import.module_aliases.is_empty() {
394 let path_entry = imported_module_paths_by_file
395 .entry(record.file.clone())
396 .or_default();
397 for alias in &import.module_aliases {
398 let paths = path_entry.entry(alias.clone()).or_default();
399 if !paths.contains(&import.module) {
400 paths.push(import.module.clone());
401 }
402 }
403
404 let entry = imported_module_targets_by_file
405 .entry(record.file.clone())
406 .or_default();
407 for alias in &import.module_aliases {
408 let targets = entry.entry(alias.clone()).or_default();
409 if !targets.contains(&target) {
410 targets.push(target.clone());
411 }
412 }
413 }
414
415 if import.reexported && import.wildcard && import.symbols.is_empty() {
416 pending_wildcard_reexport_edges.insert((
417 record.file.clone(),
418 target.clone(),
419 import.module.clone(),
420 import.symbols.clone(),
421 ));
422 }
423
424 if import.wildcard && import.symbols.is_empty() {
425 pending_wildcard_symbol_reference_edges.insert((
426 record.file.clone(),
427 target,
428 import.module.clone(),
429 ));
430 }
431 }
432 ImportResolution::Resolved(_) | ImportResolution::External => {}
433 ImportResolution::Unresolved => {
434 diagnostics.push(
435 CodeGraphDiagnostic::warning(
436 "CG2006",
437 format!("unresolved import '{}'", import.module),
438 )
439 .with_path(record.file.clone()),
440 );
441 }
442 }
443 }
444 }
445
446 for targets in imported_symbol_targets_by_file.values_mut() {
447 for symbol_ids in targets.values_mut() {
448 let mut unique_ids = Vec::new();
449 for symbol_id in symbol_ids.drain(..) {
450 if !unique_ids.contains(&symbol_id) {
451 unique_ids.push(symbol_id);
452 }
453 }
454 *symbol_ids = unique_ids;
455 }
456 }
457
458 for targets in imported_module_targets_by_file.values_mut() {
459 for file_paths in targets.values_mut() {
460 file_paths.sort();
461 file_paths.dedup();
462 }
463 }
464
465 for targets in imported_module_paths_by_file.values_mut() {
466 for module_paths in targets.values_mut() {
467 module_paths.sort();
468 module_paths.dedup();
469 }
470 }
471
472 for record in &file_analyses {
473 for alias in &record.aliases {
474 let scope_key = alias_scope_key(alias.owner_identity.as_deref());
475 alias_names_by_scope
476 .entry((record.file.clone(), scope_key.clone()))
477 .or_default()
478 .insert(alias.name.clone());
479 alias_records_by_scope
480 .entry((record.file.clone(), scope_key))
481 .or_default()
482 .entry(alias.name.clone())
483 .or_default()
484 .push(alias.clone());
485 }
486 }
487
488 let mut unresolved_aliases = file_analyses
489 .iter()
490 .flat_map(|record| {
491 record
492 .aliases
493 .iter()
494 .cloned()
495 .map(|alias| (record.file.clone(), record.language, alias))
496 })
497 .collect::<Vec<_>>();
498
499 while !unresolved_aliases.is_empty() {
500 let mut next_unresolved = Vec::new();
501 let mut made_progress = false;
502
503 for (file, language, alias) in unresolved_aliases {
504 let target_ids = resolve_alias_target_ids(
505 &file,
506 language,
507 &alias,
508 &top_level_symbol_ids,
509 &exported_symbol_targets_by_file,
510 &imported_symbol_targets_by_file,
511 &imported_module_targets_by_file,
512 &imported_module_paths_by_file,
513 &alias_names_by_scope,
514 &aliased_symbol_targets_by_scope,
515 &known_files,
516 );
517 if target_ids.is_empty() {
518 next_unresolved.push((file, language, alias));
519 continue;
520 }
521
522 aliased_symbol_targets_by_scope
523 .entry((file, alias_scope_key(alias.owner_identity.as_deref())))
524 .or_default()
525 .entry(alias.name)
526 .or_default()
527 .extend(target_ids);
528 made_progress = true;
529 }
530
531 if !made_progress {
532 break;
533 }
534 unresolved_aliases = next_unresolved;
535 }
536
537 for targets in aliased_symbol_targets_by_scope.values_mut() {
538 for symbol_ids in targets.values_mut() {
539 let mut unique_ids = Vec::new();
540 for symbol_id in symbol_ids.drain(..) {
541 if !unique_ids.contains(&symbol_id) {
542 unique_ids.push(symbol_id);
543 }
544 }
545 *symbol_ids = unique_ids;
546 }
547 }
548
549 for record in &file_analyses {
550 for relationship in &record.relationships {
551 let Some(source_id) = symbol_ids_by_file_identity
552 .get(&(record.file.clone(), relationship.source_identity.clone()))
553 else {
554 continue;
555 };
556
557 for target_id in resolve_relationship_target_ids(
558 &record.file,
559 record.language,
560 relationship,
561 &top_level_symbol_ids,
562 &imported_symbol_targets_by_file,
563 &known_files,
564 ) {
565 if target_id == *source_id {
566 continue;
567 }
568 let edge = (
569 *source_id,
570 target_id,
571 relationship.relation.clone(),
572 relationship.target_expr.clone(),
573 );
574 if !pending_relationship_edges.contains(&edge) {
575 pending_relationship_edges.push(edge);
576 }
577 }
578 }
579 }
580
581 for record in &file_analyses {
582 for usage in &record.usages {
583 let Some(source_id) = symbol_ids_by_file_identity
584 .get(&(record.file.clone(), usage.source_identity.clone()))
585 else {
586 continue;
587 };
588
589 for target_id in resolve_usage_target_ids(
590 &record.file,
591 record.language,
592 usage,
593 &top_level_symbol_ids,
594 &exported_symbol_targets_by_file,
595 &imported_symbol_targets_by_file,
596 &imported_module_targets_by_file,
597 &imported_module_paths_by_file,
598 &alias_names_by_scope,
599 &alias_records_by_scope,
600 &aliased_symbol_targets_by_scope,
601 &known_files,
602 ) {
603 let edge = (*source_id, target_id, usage.target_expr.clone());
604 if !pending_usage_edges.contains(&edge) {
605 pending_usage_edges.push(edge);
606 }
607 }
608 }
609 }
610
611 for (source_path, target_path, raw_import) in pending_reference_edges {
612 let (Some(source_id), Some(target_id)) =
613 (file_ids.get(&source_path), file_ids.get(&target_path))
614 else {
615 continue;
616 };
617 let mut edge = Edge::new(EdgeType::References, *target_id);
618 edge.metadata
619 .custom
620 .insert("relation".to_string(), json!("imports"));
621 edge.metadata
622 .custom
623 .insert("raw_import".to_string(), json!(raw_import));
624 if let Some(source_block) = doc.get_block_mut(source_id) {
625 source_block.edges.push(edge);
626 }
627 }
628
629 for (source_path, target_path, symbol_name, raw_import) in pending_symbol_reference_edges {
630 let Some(source_id) = file_ids.get(&source_path) else {
631 continue;
632 };
633 let Some(target_symbol_ids) =
634 top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
635 else {
636 continue;
637 };
638
639 for target_symbol_id in target_symbol_ids {
640 let mut edge = Edge::new(
641 EdgeType::Custom("imports_symbol".to_string()),
642 *target_symbol_id,
643 );
644 edge.metadata
645 .custom
646 .insert("relation".to_string(), json!("imports_symbol"));
647 edge.metadata
648 .custom
649 .insert("raw_import".to_string(), json!(raw_import.clone()));
650 edge.metadata
651 .custom
652 .insert("symbol".to_string(), json!(symbol_name.clone()));
653 if let Some(source_block) = doc.get_block_mut(source_id) {
654 source_block.edges.push(edge);
655 }
656 }
657 }
658
659 for (source_path, target_path, raw_import) in pending_wildcard_symbol_reference_edges {
660 let Some(source_id) = file_ids.get(&source_path) else {
661 continue;
662 };
663 let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
664 continue;
665 };
666
667 for (symbol_name, target_symbol_id) in target_symbols {
668 let mut edge = Edge::new(
669 EdgeType::Custom("imports_symbol".to_string()),
670 *target_symbol_id,
671 );
672 edge.metadata
673 .custom
674 .insert("relation".to_string(), json!("imports_symbol"));
675 edge.metadata
676 .custom
677 .insert("raw_import".to_string(), json!(raw_import.clone()));
678 edge.metadata
679 .custom
680 .insert("symbol".to_string(), json!(symbol_name.clone()));
681 if let Some(source_block) = doc.get_block_mut(source_id) {
682 source_block.edges.push(edge);
683 }
684 }
685 }
686
687 if input.config.emit_export_edges {
688 for (source_path, target_path, symbol_name, raw_import) in pending_reexport_edges {
689 let Some(source_id) = file_ids.get(&source_path) else {
690 continue;
691 };
692 let Some(target_symbol_ids) =
693 top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
694 else {
695 continue;
696 };
697
698 for target_symbol_id in target_symbol_ids {
699 let mut edge =
700 Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
701 edge.metadata
702 .custom
703 .insert("relation".to_string(), json!("reexports"));
704 edge.metadata
705 .custom
706 .insert("raw_import".to_string(), json!(raw_import.clone()));
707 edge.metadata
708 .custom
709 .insert("symbol".to_string(), json!(symbol_name.clone()));
710 if let Some(source_block) = doc.get_block_mut(source_id) {
711 source_block.edges.push(edge);
712 }
713 }
714 }
715
716 for (source_path, target_path, raw_import, filter_names) in pending_wildcard_reexport_edges
717 {
718 let Some(source_id) = file_ids.get(&source_path) else {
719 continue;
720 };
721 let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
722 continue;
723 };
724
725 for (symbol_name, target_symbol_id) in target_symbols {
726 if !filter_names.is_empty() && !filter_names.contains(symbol_name) {
727 continue;
728 }
729 let mut edge =
730 Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
731 edge.metadata
732 .custom
733 .insert("relation".to_string(), json!("reexports"));
734 edge.metadata
735 .custom
736 .insert("raw_import".to_string(), json!(raw_import.clone()));
737 edge.metadata
738 .custom
739 .insert("symbol".to_string(), json!(symbol_name.clone()));
740 if let Some(source_block) = doc.get_block_mut(source_id) {
741 source_block.edges.push(edge);
742 }
743 }
744 }
745 }
746
747 for (source_id, target_id, relation, raw_target) in pending_relationship_edges {
748 let mut edge = Edge::new(EdgeType::Custom(relation.clone()), target_id);
749 edge.metadata
750 .custom
751 .insert("relation".to_string(), json!(relation));
752 edge.metadata
753 .custom
754 .insert("raw_target".to_string(), json!(raw_target));
755 if let Some(source_block) = doc.get_block_mut(&source_id) {
756 source_block.edges.push(edge);
757 }
758 }
759
760 for (source_id, target_id, raw_target) in pending_usage_edges {
761 let mut edge = Edge::new(EdgeType::Custom("uses_symbol".to_string()), target_id);
762 edge.metadata
763 .custom
764 .insert("relation".to_string(), json!("uses_symbol"));
765 edge.metadata
766 .custom
767 .insert("raw_target".to_string(), json!(raw_target));
768 if let Some(source_block) = doc.get_block_mut(&source_id) {
769 source_block.edges.push(edge);
770 }
771 }
772
773 sort_structure_children_by_logical_key(&mut doc);
774 sort_edges(&mut doc);
775 normalize_temporal_fields(&mut doc);
776 doc.rebuild_indices();
777
778 let mut validation = validate_code_graph_profile(&doc);
779 diagnostics.append(&mut validation.diagnostics);
780
781 let fingerprint = canonical_fingerprint(&doc)?;
782 let stats = compute_stats(&doc);
783
784 let has_profile_errors = diagnostics
785 .iter()
786 .any(|d| d.severity == CodeGraphSeverity::Error && d.code.starts_with("CG100"));
787 let has_non_info = diagnostics
788 .iter()
789 .any(|d| d.severity != CodeGraphSeverity::Info);
790
791 let status = if has_profile_errors {
792 CodeGraphBuildStatus::FailedValidation
793 } else if has_non_info {
794 CodeGraphBuildStatus::PartialSuccess
795 } else {
796 CodeGraphBuildStatus::Success
797 };
798
799 Ok(CodeGraphBuildResult {
800 document: doc,
801 diagnostics,
802 stats,
803 profile_version: CODEGRAPH_PROFILE_MARKER.to_string(),
804 canonical_fingerprint: fingerprint,
805 status,
806 incremental: None,
807 })
808}
809
810#[derive(Debug, Clone)]
811pub(super) struct LoadedRepoFile {
812 pub repo_file: RepoFile,
813 pub content_hash: Option<String>,
814 pub source: Option<String>,
815 pub diagnostics: Vec<CodeGraphDiagnostic>,
816}
817
818#[derive(Debug, Clone)]
819pub(super) struct AnalyzedRepoFile {
820 pub relative_path: String,
821 pub language: CodeLanguage,
822 pub content_hash: Option<String>,
823 pub analysis: Option<FileAnalysis>,
824 pub diagnostics: Vec<CodeGraphDiagnostic>,
825}
826
827#[derive(Debug, Clone)]
828pub(super) struct AssembledCodeGraph {
829 pub result: CodeGraphBuildResult,
830 pub dependencies_by_file: BTreeMap<String, Vec<String>>,
831}
832
833pub(super) fn hash_source(source: &str) -> String {
834 let mut hasher = Sha256::new();
835 hasher.update(source.as_bytes());
836 hex::encode(hasher.finalize())
837}
838
839pub(super) fn load_repo_file(
840 repo_file: &RepoFile,
841 config: &CodeGraphExtractorConfig,
842) -> Result<LoadedRepoFile> {
843 let source = match fs::read_to_string(&repo_file.absolute_path) {
844 Ok(source) => source,
845 Err(err) => {
846 let diag = CodeGraphDiagnostic::error(
847 "CG2003",
848 format!("failed to read source file: {}", err),
849 )
850 .with_path(repo_file.relative_path.clone());
851 if config.continue_on_parse_error {
852 return Ok(LoadedRepoFile {
853 repo_file: repo_file.clone(),
854 content_hash: None,
855 source: None,
856 diagnostics: vec![diag],
857 });
858 }
859 return Err(anyhow!(
860 "failed to read source file {}: {}",
861 repo_file.relative_path,
862 err
863 ));
864 }
865 };
866
867 let content_hash = hash_source(&source);
868 if source.len() > config.max_file_bytes {
869 let diag = CodeGraphDiagnostic::warning(
870 "CG2008",
871 format!(
872 "file skipped due to size limit ({} bytes > {} bytes)",
873 source.len(),
874 config.max_file_bytes
875 ),
876 )
877 .with_path(repo_file.relative_path.clone());
878 return Ok(LoadedRepoFile {
879 repo_file: repo_file.clone(),
880 content_hash: Some(content_hash),
881 source: None,
882 diagnostics: vec![diag],
883 });
884 }
885
886 Ok(LoadedRepoFile {
887 repo_file: repo_file.clone(),
888 content_hash: Some(content_hash),
889 source: Some(source),
890 diagnostics: Vec::new(),
891 })
892}
893
894pub(super) fn analyze_loaded_repo_file(loaded: LoadedRepoFile) -> AnalyzedRepoFile {
895 let mut diagnostics = loaded.diagnostics;
896 let analysis = loaded.source.as_ref().map(|source| {
897 let analysis = analyze_file(
898 &loaded.repo_file.relative_path,
899 source,
900 loaded.repo_file.language,
901 );
902 for diag in &analysis.diagnostics {
903 diagnostics.push(
904 diag.clone()
905 .with_path(loaded.repo_file.relative_path.clone()),
906 );
907 }
908 analysis
909 });
910
911 AnalyzedRepoFile {
912 relative_path: loaded.repo_file.relative_path,
913 language: loaded.repo_file.language,
914 content_hash: loaded.content_hash,
915 analysis,
916 diagnostics,
917 }
918}
919
920pub(super) fn assemble_code_graph_from_analyzed_files(
921 repo_root: &Path,
922 repo_name: &str,
923 commit_hash: &str,
924 config: &CodeGraphExtractorConfig,
925 analyzed_files: &[AnalyzedRepoFile],
926 mut diagnostics: Vec<CodeGraphDiagnostic>,
927) -> Result<AssembledCodeGraph> {
928 let mut doc = Document::new(DocumentId::new(format!(
929 "codegraph:{}:{}",
930 sanitize_identifier(repo_name),
931 sanitize_identifier(commit_hash)
932 )));
933 initialize_document_metadata(&mut doc, repo_root, repo_name, commit_hash);
934
935 let repo_block = make_repository_block(repo_name, commit_hash);
936 let root_id = doc.root;
937 let repo_block_id = doc.add_block(repo_block, &root_id)?;
938
939 let mut directories = BTreeSet::new();
940 for file in analyzed_files {
941 for dir in ancestor_directories(&file.relative_path) {
942 directories.insert(dir);
943 }
944 }
945
946 let mut directory_ids: BTreeMap<String, BlockId> = BTreeMap::new();
947 for dir in directories {
948 let parent_id = parent_directory_id(&dir, &directory_ids).unwrap_or(repo_block_id);
949 let block = make_directory_block(&dir);
950 let block_id = doc.add_block(block, &parent_id)?;
951 directory_ids.insert(dir, block_id);
952 }
953
954 let mut file_ids: BTreeMap<String, BlockId> = BTreeMap::new();
955 let mut symbol_ids_by_file_identity: BTreeMap<(String, String), BlockId> = BTreeMap::new();
956 let mut symbol_file_by_id: HashMap<BlockId, String> = HashMap::new();
957 let mut top_level_symbol_ids: BTreeMap<(String, String), Vec<BlockId>> = BTreeMap::new();
958 let mut exported_top_level_symbol_ids: BTreeMap<String, Vec<(String, BlockId)>> =
959 BTreeMap::new();
960 let mut default_exported_top_level_symbol_ids: BTreeMap<String, Vec<BlockId>> = BTreeMap::new();
961 let mut file_analyses = Vec::new();
962 let mut used_symbol_keys: HashSet<String> = HashSet::new();
963
964 for analyzed_file in analyzed_files {
965 let parent_id =
966 parent_id_for_file(&analyzed_file.relative_path, repo_block_id, &directory_ids);
967 diagnostics.extend(analyzed_file.diagnostics.clone());
968
969 let Some(analysis) = analyzed_file.analysis.as_ref() else {
970 continue;
971 };
972
973 let file_block = make_file_block(
974 &analyzed_file.relative_path,
975 analyzed_file.language.as_str(),
976 analysis.file_description.as_deref(),
977 );
978 let file_block_id = doc.add_block(file_block, &parent_id)?;
979 file_ids.insert(analyzed_file.relative_path.clone(), file_block_id);
980
981 let mut symbols = analysis.symbols.clone();
982 symbols.sort_by(compare_extracted_symbols);
983 let mut symbol_ids_by_identity: BTreeMap<String, BlockId> = BTreeMap::new();
984
985 for symbol in &symbols {
986 let parent_block_id = symbol
987 .parent_identity
988 .as_ref()
989 .and_then(|identity| symbol_ids_by_identity.get(identity).copied())
990 .unwrap_or(file_block_id);
991 let logical_key = unique_symbol_logical_key(
992 &analyzed_file.relative_path,
993 &symbol.qualified_name,
994 symbol.start_line,
995 &mut used_symbol_keys,
996 );
997 let symbol_block = make_symbol_block(
998 &logical_key,
999 &analyzed_file.relative_path,
1000 analyzed_file.language.as_str(),
1001 symbol,
1002 );
1003 let symbol_id = doc.add_block(symbol_block, &parent_block_id)?;
1004 symbol_ids_by_identity.insert(symbol.identity.clone(), symbol_id);
1005 symbol_file_by_id.insert(symbol_id, analyzed_file.relative_path.clone());
1006 symbol_ids_by_file_identity.insert(
1007 (analyzed_file.relative_path.clone(), symbol.identity.clone()),
1008 symbol_id,
1009 );
1010
1011 if symbol.parent_identity.is_none() {
1012 top_level_symbol_ids
1013 .entry((analyzed_file.relative_path.clone(), symbol.name.clone()))
1014 .or_default()
1015 .push(symbol_id);
1016 if symbol.exported {
1017 exported_top_level_symbol_ids
1018 .entry(analyzed_file.relative_path.clone())
1019 .or_default()
1020 .push((symbol.name.clone(), symbol_id));
1021 if analysis
1022 .default_exported_symbol_names
1023 .contains(&symbol.name)
1024 {
1025 default_exported_top_level_symbol_ids
1026 .entry(analyzed_file.relative_path.clone())
1027 .or_default()
1028 .push(symbol_id);
1029 }
1030 }
1031 }
1032
1033 if symbol.exported && config.emit_export_edges {
1034 let mut edge = Edge::new(EdgeType::Custom("exports".to_string()), symbol_id);
1035 edge.metadata
1036 .custom
1037 .insert("relation".to_string(), json!("exports"));
1038 edge.metadata
1039 .custom
1040 .insert("symbol".to_string(), json!(symbol.name.clone()));
1041 if let Some(source_block) = doc.get_block_mut(&file_block_id) {
1042 source_block.edges.push(edge);
1043 }
1044 }
1045 }
1046
1047 file_analyses.push(FileAnalysisRecord {
1048 file: analyzed_file.relative_path.clone(),
1049 language: analyzed_file.language,
1050 imports: analysis.imports.clone(),
1051 relationships: analysis.relationships.clone(),
1052 usages: analysis.usages.clone(),
1053 aliases: analysis.aliases.clone(),
1054 export_bindings: analysis.export_bindings.clone(),
1055 });
1056 }
1057
1058 let known_files: BTreeSet<String> = file_ids.keys().cloned().collect();
1059 let mut dependencies_by_file: BTreeMap<String, BTreeSet<String>> = analyzed_files
1060 .iter()
1061 .map(|file| (file.relative_path.clone(), BTreeSet::new()))
1062 .collect();
1063 let mut exported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
1064 BTreeMap::new();
1065 let mut imported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
1066 BTreeMap::new();
1067 let mut imported_module_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
1068 BTreeMap::new();
1069 let mut imported_module_paths_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
1070 BTreeMap::new();
1071 let mut alias_names_by_scope: BTreeMap<(String, String), BTreeSet<String>> = BTreeMap::new();
1072 let mut alias_records_by_scope: BTreeMap<
1073 (String, String),
1074 BTreeMap<String, Vec<ExtractedAlias>>,
1075 > = BTreeMap::new();
1076 let mut aliased_symbol_targets_by_scope: BTreeMap<
1077 (String, String),
1078 BTreeMap<String, Vec<BlockId>>,
1079 > = BTreeMap::new();
1080 let mut pending_reference_edges: BTreeSet<(String, String, String)> = BTreeSet::new();
1081 let mut pending_symbol_reference_edges: BTreeSet<(String, String, String, String)> =
1082 BTreeSet::new();
1083 let mut pending_wildcard_symbol_reference_edges: BTreeSet<(String, String, String)> =
1084 BTreeSet::new();
1085 let mut pending_reexport_edges: BTreeSet<(String, String, String, String)> = BTreeSet::new();
1086 let mut pending_wildcard_reexport_edges: BTreeSet<(String, String, String, Vec<String>)> =
1087 BTreeSet::new();
1088 let mut pending_relationship_edges: Vec<(BlockId, BlockId, String, String)> = Vec::new();
1089 let mut pending_usage_edges: Vec<(BlockId, BlockId, String)> = Vec::new();
1090
1091 for (file, exports) in &exported_top_level_symbol_ids {
1092 let entry = exported_symbol_targets_by_file
1093 .entry(file.clone())
1094 .or_default();
1095 for (name, symbol_id) in exports {
1096 entry.entry(name.clone()).or_default().push(*symbol_id);
1097 }
1098 }
1099 for (file, ids) in &default_exported_top_level_symbol_ids {
1100 exported_symbol_targets_by_file
1101 .entry(file.clone())
1102 .or_default()
1103 .entry("default".to_string())
1104 .or_default()
1105 .extend(ids.iter().copied());
1106 }
1107 for record in &file_analyses {
1108 let entry = exported_symbol_targets_by_file
1109 .entry(record.file.clone())
1110 .or_default();
1111 for binding in &record.export_bindings {
1112 if let Some(ids) =
1113 top_level_symbol_ids.get(&(record.file.clone(), binding.local_name.clone()))
1114 {
1115 extend_unique_block_ids(
1116 entry.entry(binding.source_name.clone()).or_default(),
1117 ids.iter().copied(),
1118 );
1119 }
1120 }
1121 }
1122
1123 for targets in exported_symbol_targets_by_file.values_mut() {
1124 for ids in targets.values_mut() {
1125 let existing = std::mem::take(ids);
1126 extend_unique_block_ids(ids, existing);
1127 }
1128 }
1129
1130 for _ in 0..=file_analyses.len() {
1131 let mut progress = false;
1132
1133 for record in &file_analyses {
1134 for import in &record.imports {
1135 if !import.reexported {
1136 continue;
1137 }
1138
1139 let ImportResolution::Resolved(target) =
1140 resolve_import(&record.file, &record.language, &import.module, &known_files)
1141 else {
1142 continue;
1143 };
1144
1145 let target_exports = exported_symbol_targets_by_file
1146 .get(&target)
1147 .cloned()
1148 .unwrap_or_default();
1149 let entry = exported_symbol_targets_by_file
1150 .entry(record.file.clone())
1151 .or_default();
1152
1153 if import.wildcard {
1154 for (export_name, ids) in target_exports.clone() {
1155 if export_name == "default" {
1156 continue;
1157 }
1158 let targets = entry.entry(export_name).or_default();
1159 progress |= extend_unique_block_ids(targets, ids.iter().copied());
1160 }
1161 }
1162
1163 for binding in &import.bindings {
1164 if let Some(ids) = target_exports.get(&binding.source_name) {
1165 let targets = entry.entry(binding.local_name.clone()).or_default();
1166 progress |= extend_unique_block_ids(targets, ids.iter().copied());
1167 }
1168 }
1169 }
1170 }
1171
1172 if !progress {
1173 break;
1174 }
1175 }
1176
1177 for record in &file_analyses {
1178 for import in &record.imports {
1179 match resolve_import(&record.file, &record.language, &import.module, &known_files) {
1180 ImportResolution::Resolved(target) if target != record.file => {
1181 dependencies_by_file
1182 .entry(record.file.clone())
1183 .or_default()
1184 .insert(target.clone());
1185 pending_reference_edges.insert((
1186 record.file.clone(),
1187 target.clone(),
1188 import.module.clone(),
1189 ));
1190
1191 for symbol_name in &import.symbols {
1192 pending_symbol_reference_edges.insert((
1193 record.file.clone(),
1194 target.clone(),
1195 symbol_name.clone(),
1196 import.module.clone(),
1197 ));
1198 if import.reexported {
1199 pending_reexport_edges.insert((
1200 record.file.clone(),
1201 target.clone(),
1202 symbol_name.clone(),
1203 import.module.clone(),
1204 ));
1205 }
1206 }
1207
1208 if matches!(record.language, CodeLanguage::Rust | CodeLanguage::Python)
1209 && import.wildcard
1210 {
1211 if let Some(exports) = exported_symbol_targets_by_file.get(&target) {
1212 let entry = imported_symbol_targets_by_file
1213 .entry(record.file.clone())
1214 .or_default();
1215 for (export_name, target_symbol_ids) in exports {
1216 if export_name == "default" {
1217 continue;
1218 }
1219 entry
1220 .entry(export_name.clone())
1221 .or_default()
1222 .extend(target_symbol_ids.iter().copied());
1223 }
1224 }
1225 }
1226
1227 if !import.bindings.is_empty() {
1228 let entry = imported_symbol_targets_by_file
1229 .entry(record.file.clone())
1230 .or_default();
1231 for binding in &import.bindings {
1232 if let Some(target_symbol_ids) = exported_symbol_targets_by_file
1233 .get(&target)
1234 .and_then(|exports| exports.get(&binding.source_name))
1235 {
1236 entry
1237 .entry(binding.local_name.clone())
1238 .or_default()
1239 .extend(target_symbol_ids.iter().copied());
1240 }
1241 }
1242 }
1243
1244 if !import.module_aliases.is_empty() {
1245 let path_entry = imported_module_paths_by_file
1246 .entry(record.file.clone())
1247 .or_default();
1248 for alias in &import.module_aliases {
1249 let paths = path_entry.entry(alias.clone()).or_default();
1250 if !paths.contains(&import.module) {
1251 paths.push(import.module.clone());
1252 }
1253 }
1254
1255 let entry = imported_module_targets_by_file
1256 .entry(record.file.clone())
1257 .or_default();
1258 for alias in &import.module_aliases {
1259 let targets = entry.entry(alias.clone()).or_default();
1260 if !targets.contains(&target) {
1261 targets.push(target.clone());
1262 }
1263 }
1264 }
1265
1266 if import.reexported && import.wildcard && import.symbols.is_empty() {
1267 pending_wildcard_reexport_edges.insert((
1268 record.file.clone(),
1269 target.clone(),
1270 import.module.clone(),
1271 import.symbols.clone(),
1272 ));
1273 }
1274
1275 if import.wildcard && import.symbols.is_empty() {
1276 pending_wildcard_symbol_reference_edges.insert((
1277 record.file.clone(),
1278 target,
1279 import.module.clone(),
1280 ));
1281 }
1282 }
1283 ImportResolution::Resolved(_) | ImportResolution::External => {}
1284 ImportResolution::Unresolved => {
1285 diagnostics.push(
1286 CodeGraphDiagnostic::warning(
1287 "CG2006",
1288 format!("unresolved import '{}'", import.module),
1289 )
1290 .with_path(record.file.clone()),
1291 );
1292 }
1293 }
1294 }
1295 }
1296
1297 for targets in imported_symbol_targets_by_file.values_mut() {
1298 for symbol_ids in targets.values_mut() {
1299 let mut unique_ids = Vec::new();
1300 for symbol_id in symbol_ids.drain(..) {
1301 if !unique_ids.contains(&symbol_id) {
1302 unique_ids.push(symbol_id);
1303 }
1304 }
1305 *symbol_ids = unique_ids;
1306 }
1307 }
1308
1309 for targets in imported_module_targets_by_file.values_mut() {
1310 for file_paths in targets.values_mut() {
1311 file_paths.sort();
1312 file_paths.dedup();
1313 }
1314 }
1315
1316 for targets in imported_module_paths_by_file.values_mut() {
1317 for module_paths in targets.values_mut() {
1318 module_paths.sort();
1319 module_paths.dedup();
1320 }
1321 }
1322
1323 for record in &file_analyses {
1324 for alias in &record.aliases {
1325 let scope_key = alias_scope_key(alias.owner_identity.as_deref());
1326 alias_names_by_scope
1327 .entry((record.file.clone(), scope_key.clone()))
1328 .or_default()
1329 .insert(alias.name.clone());
1330 alias_records_by_scope
1331 .entry((record.file.clone(), scope_key))
1332 .or_default()
1333 .entry(alias.name.clone())
1334 .or_default()
1335 .push(alias.clone());
1336 }
1337 }
1338
1339 let mut unresolved_aliases = file_analyses
1340 .iter()
1341 .flat_map(|record| {
1342 record
1343 .aliases
1344 .iter()
1345 .cloned()
1346 .map(|alias| (record.file.clone(), record.language, alias))
1347 })
1348 .collect::<Vec<_>>();
1349
1350 while !unresolved_aliases.is_empty() {
1351 let mut next_unresolved = Vec::new();
1352 let mut made_progress = false;
1353
1354 for (file, language, alias) in unresolved_aliases {
1355 let target_ids = resolve_alias_target_ids(
1356 &file,
1357 language,
1358 &alias,
1359 &top_level_symbol_ids,
1360 &exported_symbol_targets_by_file,
1361 &imported_symbol_targets_by_file,
1362 &imported_module_targets_by_file,
1363 &imported_module_paths_by_file,
1364 &alias_names_by_scope,
1365 &aliased_symbol_targets_by_scope,
1366 &known_files,
1367 );
1368 if target_ids.is_empty() {
1369 next_unresolved.push((file, language, alias));
1370 continue;
1371 }
1372
1373 aliased_symbol_targets_by_scope
1374 .entry((file, alias_scope_key(alias.owner_identity.as_deref())))
1375 .or_default()
1376 .entry(alias.name)
1377 .or_default()
1378 .extend(target_ids);
1379 made_progress = true;
1380 }
1381
1382 if !made_progress {
1383 break;
1384 }
1385 unresolved_aliases = next_unresolved;
1386 }
1387
1388 for targets in aliased_symbol_targets_by_scope.values_mut() {
1389 for symbol_ids in targets.values_mut() {
1390 let mut unique_ids = Vec::new();
1391 for symbol_id in symbol_ids.drain(..) {
1392 if !unique_ids.contains(&symbol_id) {
1393 unique_ids.push(symbol_id);
1394 }
1395 }
1396 *symbol_ids = unique_ids;
1397 }
1398 }
1399
1400 for record in &file_analyses {
1401 for relationship in &record.relationships {
1402 let Some(source_id) = symbol_ids_by_file_identity
1403 .get(&(record.file.clone(), relationship.source_identity.clone()))
1404 else {
1405 continue;
1406 };
1407
1408 for target_id in resolve_relationship_target_ids(
1409 &record.file,
1410 record.language,
1411 relationship,
1412 &top_level_symbol_ids,
1413 &imported_symbol_targets_by_file,
1414 &known_files,
1415 ) {
1416 if target_id == *source_id {
1417 continue;
1418 }
1419 if let Some(target_file) = symbol_file_by_id.get(&target_id) {
1420 if target_file != &record.file {
1421 dependencies_by_file
1422 .entry(record.file.clone())
1423 .or_default()
1424 .insert(target_file.clone());
1425 }
1426 }
1427 let edge = (
1428 *source_id,
1429 target_id,
1430 relationship.relation.clone(),
1431 relationship.target_expr.clone(),
1432 );
1433 if !pending_relationship_edges.contains(&edge) {
1434 pending_relationship_edges.push(edge);
1435 }
1436 }
1437 }
1438 }
1439
1440 for record in &file_analyses {
1441 for usage in &record.usages {
1442 let Some(source_id) = symbol_ids_by_file_identity
1443 .get(&(record.file.clone(), usage.source_identity.clone()))
1444 else {
1445 continue;
1446 };
1447
1448 for target_id in resolve_usage_target_ids(
1449 &record.file,
1450 record.language,
1451 usage,
1452 &top_level_symbol_ids,
1453 &exported_symbol_targets_by_file,
1454 &imported_symbol_targets_by_file,
1455 &imported_module_targets_by_file,
1456 &imported_module_paths_by_file,
1457 &alias_names_by_scope,
1458 &alias_records_by_scope,
1459 &aliased_symbol_targets_by_scope,
1460 &known_files,
1461 ) {
1462 if let Some(target_file) = symbol_file_by_id.get(&target_id) {
1463 if target_file != &record.file {
1464 dependencies_by_file
1465 .entry(record.file.clone())
1466 .or_default()
1467 .insert(target_file.clone());
1468 }
1469 }
1470 let edge = (*source_id, target_id, usage.target_expr.clone());
1471 if !pending_usage_edges.contains(&edge) {
1472 pending_usage_edges.push(edge);
1473 }
1474 }
1475 }
1476 }
1477
1478 for (source_path, target_path, raw_import) in pending_reference_edges {
1479 let (Some(source_id), Some(target_id)) =
1480 (file_ids.get(&source_path), file_ids.get(&target_path))
1481 else {
1482 continue;
1483 };
1484 let mut edge = Edge::new(EdgeType::References, *target_id);
1485 edge.metadata
1486 .custom
1487 .insert("relation".to_string(), json!("imports"));
1488 edge.metadata
1489 .custom
1490 .insert("raw_import".to_string(), json!(raw_import));
1491 if let Some(source_block) = doc.get_block_mut(source_id) {
1492 source_block.edges.push(edge);
1493 }
1494 }
1495
1496 for (source_path, target_path, symbol_name, raw_import) in pending_symbol_reference_edges {
1497 let Some(source_id) = file_ids.get(&source_path) else {
1498 continue;
1499 };
1500 let Some(target_symbol_ids) =
1501 top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
1502 else {
1503 continue;
1504 };
1505
1506 for target_symbol_id in target_symbol_ids {
1507 let mut edge = Edge::new(
1508 EdgeType::Custom("imports_symbol".to_string()),
1509 *target_symbol_id,
1510 );
1511 edge.metadata
1512 .custom
1513 .insert("relation".to_string(), json!("imports_symbol"));
1514 edge.metadata
1515 .custom
1516 .insert("raw_import".to_string(), json!(raw_import.clone()));
1517 edge.metadata
1518 .custom
1519 .insert("symbol".to_string(), json!(symbol_name.clone()));
1520 if let Some(source_block) = doc.get_block_mut(source_id) {
1521 source_block.edges.push(edge);
1522 }
1523 }
1524 }
1525
1526 for (source_path, target_path, raw_import) in pending_wildcard_symbol_reference_edges {
1527 let Some(source_id) = file_ids.get(&source_path) else {
1528 continue;
1529 };
1530 let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
1531 continue;
1532 };
1533
1534 for (symbol_name, target_symbol_id) in target_symbols {
1535 let mut edge = Edge::new(
1536 EdgeType::Custom("imports_symbol".to_string()),
1537 *target_symbol_id,
1538 );
1539 edge.metadata
1540 .custom
1541 .insert("relation".to_string(), json!("imports_symbol"));
1542 edge.metadata
1543 .custom
1544 .insert("raw_import".to_string(), json!(raw_import.clone()));
1545 edge.metadata
1546 .custom
1547 .insert("symbol".to_string(), json!(symbol_name.clone()));
1548 if let Some(source_block) = doc.get_block_mut(source_id) {
1549 source_block.edges.push(edge);
1550 }
1551 }
1552 }
1553
1554 if config.emit_export_edges {
1555 for (source_path, target_path, symbol_name, raw_import) in pending_reexport_edges {
1556 let Some(source_id) = file_ids.get(&source_path) else {
1557 continue;
1558 };
1559 let Some(target_symbol_ids) =
1560 top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
1561 else {
1562 continue;
1563 };
1564
1565 for target_symbol_id in target_symbol_ids {
1566 let mut edge =
1567 Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
1568 edge.metadata
1569 .custom
1570 .insert("relation".to_string(), json!("reexports"));
1571 edge.metadata
1572 .custom
1573 .insert("raw_import".to_string(), json!(raw_import.clone()));
1574 edge.metadata
1575 .custom
1576 .insert("symbol".to_string(), json!(symbol_name.clone()));
1577 if let Some(source_block) = doc.get_block_mut(source_id) {
1578 source_block.edges.push(edge);
1579 }
1580 }
1581 }
1582
1583 for (source_path, target_path, raw_import, filter_names) in pending_wildcard_reexport_edges
1584 {
1585 let Some(source_id) = file_ids.get(&source_path) else {
1586 continue;
1587 };
1588 let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
1589 continue;
1590 };
1591
1592 for (symbol_name, target_symbol_id) in target_symbols {
1593 if !filter_names.is_empty() && !filter_names.contains(symbol_name) {
1594 continue;
1595 }
1596 let mut edge =
1597 Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
1598 edge.metadata
1599 .custom
1600 .insert("relation".to_string(), json!("reexports"));
1601 edge.metadata
1602 .custom
1603 .insert("raw_import".to_string(), json!(raw_import.clone()));
1604 edge.metadata
1605 .custom
1606 .insert("symbol".to_string(), json!(symbol_name.clone()));
1607 if let Some(source_block) = doc.get_block_mut(source_id) {
1608 source_block.edges.push(edge);
1609 }
1610 }
1611 }
1612 }
1613
1614 for (source_id, target_id, relation, raw_target) in pending_relationship_edges {
1615 let mut edge = Edge::new(EdgeType::Custom(relation.clone()), target_id);
1616 edge.metadata
1617 .custom
1618 .insert("relation".to_string(), json!(relation));
1619 edge.metadata
1620 .custom
1621 .insert("raw_target".to_string(), json!(raw_target));
1622 if let Some(source_block) = doc.get_block_mut(&source_id) {
1623 source_block.edges.push(edge);
1624 }
1625 }
1626
1627 for (source_id, target_id, raw_target) in pending_usage_edges {
1628 let mut edge = Edge::new(EdgeType::Custom("uses_symbol".to_string()), target_id);
1629 edge.metadata
1630 .custom
1631 .insert("relation".to_string(), json!("uses_symbol"));
1632 edge.metadata
1633 .custom
1634 .insert("raw_target".to_string(), json!(raw_target));
1635 if let Some(source_block) = doc.get_block_mut(&source_id) {
1636 source_block.edges.push(edge);
1637 }
1638 }
1639
1640 sort_structure_children_by_logical_key(&mut doc);
1641 sort_edges(&mut doc);
1642 normalize_temporal_fields(&mut doc);
1643 doc.rebuild_indices();
1644
1645 let mut validation = validate_code_graph_profile(&doc);
1646 diagnostics.append(&mut validation.diagnostics);
1647
1648 let fingerprint = canonical_fingerprint(&doc)?;
1649 let stats = compute_stats(&doc);
1650 let has_profile_errors = diagnostics
1651 .iter()
1652 .any(|d| d.severity == CodeGraphSeverity::Error && d.code.starts_with("CG100"));
1653 let has_non_info = diagnostics
1654 .iter()
1655 .any(|d| d.severity != CodeGraphSeverity::Info);
1656 let status = if has_profile_errors {
1657 CodeGraphBuildStatus::FailedValidation
1658 } else if has_non_info {
1659 CodeGraphBuildStatus::PartialSuccess
1660 } else {
1661 CodeGraphBuildStatus::Success
1662 };
1663
1664 Ok(AssembledCodeGraph {
1665 result: CodeGraphBuildResult {
1666 document: doc,
1667 diagnostics,
1668 stats,
1669 profile_version: CODEGRAPH_PROFILE_MARKER.to_string(),
1670 canonical_fingerprint: fingerprint,
1671 status,
1672 incremental: None,
1673 },
1674 dependencies_by_file: dependencies_by_file
1675 .into_iter()
1676 .map(|(file, deps)| (file, deps.into_iter().collect()))
1677 .collect(),
1678 })
1679}
1680
1681pub(super) fn initialize_document_metadata(
1682 doc: &mut Document,
1683 repo_root: &Path,
1684 repo_name: &str,
1685 commit: &str,
1686) {
1687 doc.metadata.title = Some(format!("CodeGraph: {}", repo_name));
1688 doc.metadata.description = Some("CodeGraphProfile v1 document".to_string());
1689 doc.metadata.language = Some("multi".to_string());
1690 doc.metadata
1691 .custom
1692 .insert("profile".to_string(), json!(CODEGRAPH_PROFILE));
1693 doc.metadata.custom.insert(
1694 "profile_version".to_string(),
1695 json!(CODEGRAPH_PROFILE_VERSION),
1696 );
1697 doc.metadata.custom.insert(
1698 "profile_marker".to_string(),
1699 json!(CODEGRAPH_PROFILE_MARKER),
1700 );
1701 doc.metadata.custom.insert(
1702 "extractor_version".to_string(),
1703 json!(CODEGRAPH_EXTRACTOR_VERSION),
1704 );
1705 doc.metadata
1706 .custom
1707 .insert("commit_hash".to_string(), json!(commit));
1708 doc.metadata.custom.insert(
1709 "repository_path".to_string(),
1710 json!(normalize_path(repo_root)),
1711 );
1712}
1713
1714pub(super) fn make_repository_block(repo_name: &str, commit_hash: &str) -> Block {
1715 let coderef = json!({
1716 "path": ".",
1717 "display": repo_name,
1718 });
1719 let mut block = Block::new(
1720 Content::json(json!({
1721 "coderef": coderef.clone(),
1722 "name": repo_name,
1723 "commit": commit_hash,
1724 })),
1725 Some("custom.repository"),
1726 );
1727 block.metadata.label = Some(repo_name.to_string());
1728 block
1729 .metadata
1730 .custom
1731 .insert(META_NODE_CLASS.to_string(), json!("repository"));
1732 block.metadata.custom.insert(
1733 META_LOGICAL_KEY.to_string(),
1734 json!(format!("repository:{}", repo_name)),
1735 );
1736 block
1737 .metadata
1738 .custom
1739 .insert(META_CODEREF.to_string(), coderef);
1740 block
1741}
1742
1743pub(super) fn make_directory_block(path: &str) -> Block {
1744 let coderef = json!({
1745 "path": path,
1746 "display": path,
1747 });
1748 let mut block = Block::new(
1749 Content::json(json!({
1750 "coderef": coderef.clone(),
1751 })),
1752 Some("custom.directory"),
1753 );
1754 block.metadata.label = Some(path.to_string());
1755 block
1756 .metadata
1757 .custom
1758 .insert(META_NODE_CLASS.to_string(), json!("directory"));
1759 block
1760 .metadata
1761 .custom
1762 .insert(META_CODEREF.to_string(), coderef);
1763 block.metadata.custom.insert(
1764 META_LOGICAL_KEY.to_string(),
1765 json!(format!("directory:{}", path)),
1766 );
1767 block
1768}
1769
1770pub(super) fn make_file_block(path: &str, language: &str, description: Option<&str>) -> Block {
1771 let coderef = json!({
1772 "path": path,
1773 "display": path,
1774 });
1775 let mut content = serde_json::Map::new();
1776 content.insert("coderef".to_string(), coderef.clone());
1777 content.insert("language".to_string(), json!(language));
1778 if let Some(description) = description {
1779 content.insert("description".to_string(), json!(description));
1780 }
1781
1782 let mut block = Block::new(
1783 Content::json(serde_json::Value::Object(content)),
1784 Some("custom.file"),
1785 );
1786 block.metadata.label = Some(path.to_string());
1787 block.metadata.summary = description.map(|value| value.to_string());
1788 block
1789 .metadata
1790 .custom
1791 .insert(META_NODE_CLASS.to_string(), json!("file"));
1792 block
1793 .metadata
1794 .custom
1795 .insert(META_CODEREF.to_string(), coderef);
1796 block
1797 .metadata
1798 .custom
1799 .insert(META_LANGUAGE.to_string(), json!(language));
1800 block.metadata.custom.insert(
1801 META_LOGICAL_KEY.to_string(),
1802 json!(format!("file:{}", path)),
1803 );
1804 block
1805}
1806
1807pub(super) fn make_symbol_block(
1808 logical_key: &str,
1809 path: &str,
1810 language: &str,
1811 symbol: &ExtractedSymbol,
1812) -> Block {
1813 let line_range = format_line_range(symbol.start_line, symbol.end_line);
1814 let coderef = json!({
1815 "path": path,
1816 "start_line": symbol.start_line,
1817 "start_col": symbol.start_col,
1818 "end_line": symbol.end_line,
1819 "end_col": symbol.end_col,
1820 "display": format_coderef(path, &line_range),
1821 });
1822
1823 let mut content = serde_json::Map::new();
1824 content.insert("name".to_string(), json!(symbol.name));
1825 content.insert("kind".to_string(), json!(symbol.kind));
1826 content.insert("coderef".to_string(), coderef.clone());
1827 content.insert("exported".to_string(), json!(symbol.exported));
1828 if let Some(description) = &symbol.description {
1829 content.insert("description".to_string(), json!(description));
1830 }
1831 if !symbol.modifiers.is_empty() {
1832 content.insert("modifiers".to_string(), json!(symbol.modifiers));
1833 }
1834 if !symbol.inputs.is_empty() {
1835 content.insert("inputs".to_string(), json!(symbol.inputs));
1836 }
1837 if let Some(output) = &symbol.output {
1838 content.insert("output".to_string(), json!(output));
1839 }
1840 if let Some(type_info) = &symbol.type_info {
1841 content.insert("type".to_string(), json!(type_info));
1842 }
1843
1844 let mut block = Block::new(
1845 Content::json(serde_json::Value::Object(content)),
1846 Some("custom.symbol"),
1847 );
1848
1849 block.metadata.label = Some(symbol.name.clone());
1850 block.metadata.summary = symbol.description.clone();
1851 block
1852 .metadata
1853 .custom
1854 .insert(META_NODE_CLASS.to_string(), json!("symbol"));
1855 block
1856 .metadata
1857 .custom
1858 .insert(META_LOGICAL_KEY.to_string(), json!(logical_key));
1859 block
1860 .metadata
1861 .custom
1862 .insert(META_CODEREF.to_string(), coderef);
1863 block
1864 .metadata
1865 .custom
1866 .insert(META_LANGUAGE.to_string(), json!(language));
1867 block
1868 .metadata
1869 .custom
1870 .insert(META_SYMBOL_KIND.to_string(), json!(symbol.kind));
1871 block
1872 .metadata
1873 .custom
1874 .insert(META_SYMBOL_NAME.to_string(), json!(symbol.name));
1875 block
1876 .metadata
1877 .custom
1878 .insert(META_EXPORTED.to_string(), json!(symbol.exported));
1879 block
1880}