Skip to main content

ucp_codegraph/legacy/
build.rs

1use anyhow::{anyhow, Context, Result};
2use serde_json::json;
3use sha2::{Digest, Sha256};
4use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
5use std::fs;
6use std::path::Path;
7use ucm_core::{Block, BlockId, Content, Document, DocumentId, Edge, EdgeType};
8
9use crate::model::*;
10
11use super::languages::ts_js::extend_unique_block_ids;
12use super::{
13    alias_scope_key, analyze_file, ancestor_directories, canonical_fingerprint,
14    collect_repository_files, compare_extracted_symbols, compute_stats, format_coderef,
15    format_line_range, normalize_path, normalize_temporal_fields, parent_directory_id,
16    parent_id_for_file, resolve_alias_target_ids, resolve_import, resolve_relationship_target_ids,
17    resolve_usage_target_ids, sanitize_identifier, sort_edges,
18    sort_structure_children_by_logical_key, unique_symbol_logical_key, validate_code_graph_profile,
19    GitignoreMatcher,
20};
21
22pub fn build_code_graph(input: &CodeGraphBuildInput) -> Result<CodeGraphBuildResult> {
23    let repo_root = input
24        .repository_path
25        .canonicalize()
26        .with_context(|| format!("failed to resolve repo path {:?}", input.repository_path))?;
27
28    if !repo_root.is_dir() {
29        return Err(anyhow!(
30            "repository path is not a directory: {}",
31            repo_root.display()
32        ));
33    }
34
35    let mut diagnostics = Vec::new();
36    let matcher = GitignoreMatcher::from_repository(&repo_root)?;
37    let files = collect_repository_files(&repo_root, &input.config, &matcher, &mut diagnostics)?;
38
39    let repo_name = repo_root
40        .file_name()
41        .map(|s| s.to_string_lossy().to_string())
42        .unwrap_or_else(|| "repository".to_string());
43
44    let mut doc = Document::new(DocumentId::new(format!(
45        "codegraph:{}:{}",
46        sanitize_identifier(&repo_name),
47        sanitize_identifier(&input.commit_hash)
48    )));
49
50    initialize_document_metadata(&mut doc, &repo_root, &repo_name, &input.commit_hash);
51
52    let repo_block = make_repository_block(&repo_name, &input.commit_hash);
53    let root_id = doc.root;
54    let repo_block_id = doc.add_block(repo_block, &root_id)?;
55
56    let mut directories = BTreeSet::new();
57    for file in &files {
58        for dir in ancestor_directories(&file.relative_path) {
59            directories.insert(dir);
60        }
61    }
62
63    let mut directory_ids: BTreeMap<String, BlockId> = BTreeMap::new();
64    for dir in directories {
65        let parent_id = parent_directory_id(&dir, &directory_ids).unwrap_or(repo_block_id);
66        let block = make_directory_block(&dir);
67        let block_id = doc.add_block(block, &parent_id)?;
68        directory_ids.insert(dir, block_id);
69    }
70
71    let mut file_ids: BTreeMap<String, BlockId> = BTreeMap::new();
72    let mut symbol_ids_by_file_identity: BTreeMap<(String, String), BlockId> = BTreeMap::new();
73    let mut top_level_symbol_ids: BTreeMap<(String, String), Vec<BlockId>> = BTreeMap::new();
74    let mut exported_top_level_symbol_ids: BTreeMap<String, Vec<(String, BlockId)>> =
75        BTreeMap::new();
76    let mut default_exported_top_level_symbol_ids: BTreeMap<String, Vec<BlockId>> = BTreeMap::new();
77    let mut file_analyses = Vec::new();
78    let mut used_symbol_keys: HashSet<String> = HashSet::new();
79
80    for file in files {
81        let parent_id = parent_id_for_file(&file.relative_path, repo_block_id, &directory_ids);
82
83        let source = match fs::read_to_string(&file.absolute_path) {
84            Ok(s) => s,
85            Err(err) => {
86                let diag = CodeGraphDiagnostic::error(
87                    "CG2003",
88                    format!("failed to read source file: {}", err),
89                )
90                .with_path(file.relative_path.clone());
91                diagnostics.push(diag);
92                if input.config.continue_on_parse_error {
93                    continue;
94                }
95                return Err(anyhow!(
96                    "failed to read source file {}: {}",
97                    file.relative_path,
98                    err
99                ));
100            }
101        };
102
103        if source.len() > input.config.max_file_bytes {
104            diagnostics.push(
105                CodeGraphDiagnostic::warning(
106                    "CG2008",
107                    format!(
108                        "file skipped due to size limit ({} bytes > {} bytes)",
109                        source.len(),
110                        input.config.max_file_bytes
111                    ),
112                )
113                .with_path(file.relative_path.clone()),
114            );
115            continue;
116        }
117
118        let FileAnalysis {
119            file_description,
120            mut symbols,
121            imports,
122            relationships,
123            usages,
124            aliases,
125            export_bindings,
126            default_exported_symbol_names,
127            diagnostics: analysis_diagnostics,
128            ..
129        } = analyze_file(&file.relative_path, &source, file.language);
130
131        let file_block = make_file_block(
132            &file.relative_path,
133            file.language.as_str(),
134            file_description.as_deref(),
135        );
136        let file_block_id = doc.add_block(file_block, &parent_id)?;
137        file_ids.insert(file.relative_path.clone(), file_block_id);
138
139        for diag in &analysis_diagnostics {
140            diagnostics.push(diag.clone().with_path(file.relative_path.clone()));
141        }
142
143        symbols.sort_by(compare_extracted_symbols);
144        let mut symbol_ids_by_identity: BTreeMap<String, BlockId> = BTreeMap::new();
145
146        for symbol in &symbols {
147            let parent_block_id = symbol
148                .parent_identity
149                .as_ref()
150                .and_then(|identity| symbol_ids_by_identity.get(identity).copied())
151                .unwrap_or(file_block_id);
152            let logical_key = unique_symbol_logical_key(
153                &file.relative_path,
154                &symbol.qualified_name,
155                symbol.start_line,
156                &mut used_symbol_keys,
157            );
158            let symbol_block = make_symbol_block(
159                &logical_key,
160                &file.relative_path,
161                file.language.as_str(),
162                symbol,
163            );
164            let symbol_id = doc.add_block(symbol_block, &parent_block_id)?;
165            symbol_ids_by_identity.insert(symbol.identity.clone(), symbol_id);
166            symbol_ids_by_file_identity.insert(
167                (file.relative_path.clone(), symbol.identity.clone()),
168                symbol_id,
169            );
170
171            if symbol.parent_identity.is_none() {
172                top_level_symbol_ids
173                    .entry((file.relative_path.clone(), symbol.name.clone()))
174                    .or_default()
175                    .push(symbol_id);
176                if symbol.exported {
177                    exported_top_level_symbol_ids
178                        .entry(file.relative_path.clone())
179                        .or_default()
180                        .push((symbol.name.clone(), symbol_id));
181                    if default_exported_symbol_names.contains(&symbol.name) {
182                        default_exported_top_level_symbol_ids
183                            .entry(file.relative_path.clone())
184                            .or_default()
185                            .push(symbol_id);
186                    }
187                }
188            }
189
190            if symbol.exported && input.config.emit_export_edges {
191                let mut edge = Edge::new(EdgeType::Custom("exports".to_string()), symbol_id);
192                edge.metadata
193                    .custom
194                    .insert("relation".to_string(), json!("exports"));
195                edge.metadata
196                    .custom
197                    .insert("symbol".to_string(), json!(symbol.name.clone()));
198                if let Some(source_block) = doc.get_block_mut(&file_block_id) {
199                    source_block.edges.push(edge);
200                }
201            }
202        }
203
204        file_analyses.push(FileAnalysisRecord {
205            file: file.relative_path,
206            language: file.language,
207            imports,
208            relationships,
209            usages,
210            aliases,
211            export_bindings,
212        });
213    }
214
215    let known_files: BTreeSet<String> = file_ids.keys().cloned().collect();
216    let mut exported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
217        BTreeMap::new();
218    let mut imported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
219        BTreeMap::new();
220    let mut imported_module_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
221        BTreeMap::new();
222    let mut imported_module_paths_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
223        BTreeMap::new();
224    let mut alias_names_by_scope: BTreeMap<(String, String), BTreeSet<String>> = BTreeMap::new();
225    let mut alias_records_by_scope: BTreeMap<
226        (String, String),
227        BTreeMap<String, Vec<ExtractedAlias>>,
228    > = BTreeMap::new();
229    let mut aliased_symbol_targets_by_scope: BTreeMap<
230        (String, String),
231        BTreeMap<String, Vec<BlockId>>,
232    > = BTreeMap::new();
233    let mut pending_reference_edges: BTreeSet<(String, String, String)> = BTreeSet::new();
234    let mut pending_symbol_reference_edges: BTreeSet<(String, String, String, String)> =
235        BTreeSet::new();
236    let mut pending_wildcard_symbol_reference_edges: BTreeSet<(String, String, String)> =
237        BTreeSet::new();
238    let mut pending_reexport_edges: BTreeSet<(String, String, String, String)> = BTreeSet::new();
239    let mut pending_wildcard_reexport_edges: BTreeSet<(String, String, String, Vec<String>)> =
240        BTreeSet::new();
241    let mut pending_relationship_edges: Vec<(BlockId, BlockId, String, String)> = Vec::new();
242    let mut pending_usage_edges: Vec<(BlockId, BlockId, String)> = Vec::new();
243
244    for (file, exports) in &exported_top_level_symbol_ids {
245        let entry = exported_symbol_targets_by_file
246            .entry(file.clone())
247            .or_default();
248        for (name, symbol_id) in exports {
249            entry.entry(name.clone()).or_default().push(*symbol_id);
250        }
251    }
252    for (file, ids) in &default_exported_top_level_symbol_ids {
253        exported_symbol_targets_by_file
254            .entry(file.clone())
255            .or_default()
256            .entry("default".to_string())
257            .or_default()
258            .extend(ids.iter().copied());
259    }
260    for record in &file_analyses {
261        let entry = exported_symbol_targets_by_file
262            .entry(record.file.clone())
263            .or_default();
264        for binding in &record.export_bindings {
265            if let Some(ids) =
266                top_level_symbol_ids.get(&(record.file.clone(), binding.local_name.clone()))
267            {
268                extend_unique_block_ids(
269                    entry.entry(binding.source_name.clone()).or_default(),
270                    ids.iter().copied(),
271                );
272            }
273        }
274    }
275
276    for targets in exported_symbol_targets_by_file.values_mut() {
277        for ids in targets.values_mut() {
278            let existing = std::mem::take(ids);
279            extend_unique_block_ids(ids, existing);
280        }
281    }
282
283    for _ in 0..=file_analyses.len() {
284        let mut progress = false;
285
286        for record in &file_analyses {
287            for import in &record.imports {
288                if !import.reexported {
289                    continue;
290                }
291
292                let ImportResolution::Resolved(target) =
293                    resolve_import(&record.file, &record.language, &import.module, &known_files)
294                else {
295                    continue;
296                };
297
298                let target_exports = exported_symbol_targets_by_file
299                    .get(&target)
300                    .cloned()
301                    .unwrap_or_default();
302                let entry = exported_symbol_targets_by_file
303                    .entry(record.file.clone())
304                    .or_default();
305
306                if import.wildcard {
307                    for (export_name, ids) in target_exports.clone() {
308                        if export_name == "default" {
309                            continue;
310                        }
311                        let targets = entry.entry(export_name).or_default();
312                        progress |= extend_unique_block_ids(targets, ids.iter().copied());
313                    }
314                }
315
316                for binding in &import.bindings {
317                    if let Some(ids) = target_exports.get(&binding.source_name) {
318                        let targets = entry.entry(binding.local_name.clone()).or_default();
319                        progress |= extend_unique_block_ids(targets, ids.iter().copied());
320                    }
321                }
322            }
323        }
324
325        if !progress {
326            break;
327        }
328    }
329
330    for record in &file_analyses {
331        for import in &record.imports {
332            match resolve_import(&record.file, &record.language, &import.module, &known_files) {
333                ImportResolution::Resolved(target) if target != record.file => {
334                    pending_reference_edges.insert((
335                        record.file.clone(),
336                        target.clone(),
337                        import.module.clone(),
338                    ));
339
340                    for symbol_name in &import.symbols {
341                        pending_symbol_reference_edges.insert((
342                            record.file.clone(),
343                            target.clone(),
344                            symbol_name.clone(),
345                            import.module.clone(),
346                        ));
347                        if import.reexported {
348                            pending_reexport_edges.insert((
349                                record.file.clone(),
350                                target.clone(),
351                                symbol_name.clone(),
352                                import.module.clone(),
353                            ));
354                        }
355                    }
356
357                    if matches!(record.language, CodeLanguage::Rust | CodeLanguage::Python)
358                        && import.wildcard
359                    {
360                        if let Some(exports) = exported_symbol_targets_by_file.get(&target) {
361                            let entry = imported_symbol_targets_by_file
362                                .entry(record.file.clone())
363                                .or_default();
364                            for (export_name, target_symbol_ids) in exports {
365                                if export_name == "default" {
366                                    continue;
367                                }
368                                entry
369                                    .entry(export_name.clone())
370                                    .or_default()
371                                    .extend(target_symbol_ids.iter().copied());
372                            }
373                        }
374                    }
375
376                    if !import.bindings.is_empty() {
377                        let entry = imported_symbol_targets_by_file
378                            .entry(record.file.clone())
379                            .or_default();
380                        for binding in &import.bindings {
381                            if let Some(target_symbol_ids) = exported_symbol_targets_by_file
382                                .get(&target)
383                                .and_then(|exports| exports.get(&binding.source_name))
384                            {
385                                entry
386                                    .entry(binding.local_name.clone())
387                                    .or_default()
388                                    .extend(target_symbol_ids.iter().copied());
389                            }
390                        }
391                    }
392
393                    if !import.module_aliases.is_empty() {
394                        let path_entry = imported_module_paths_by_file
395                            .entry(record.file.clone())
396                            .or_default();
397                        for alias in &import.module_aliases {
398                            let paths = path_entry.entry(alias.clone()).or_default();
399                            if !paths.contains(&import.module) {
400                                paths.push(import.module.clone());
401                            }
402                        }
403
404                        let entry = imported_module_targets_by_file
405                            .entry(record.file.clone())
406                            .or_default();
407                        for alias in &import.module_aliases {
408                            let targets = entry.entry(alias.clone()).or_default();
409                            if !targets.contains(&target) {
410                                targets.push(target.clone());
411                            }
412                        }
413                    }
414
415                    if import.reexported && import.wildcard && import.symbols.is_empty() {
416                        pending_wildcard_reexport_edges.insert((
417                            record.file.clone(),
418                            target.clone(),
419                            import.module.clone(),
420                            import.symbols.clone(),
421                        ));
422                    }
423
424                    if import.wildcard && import.symbols.is_empty() {
425                        pending_wildcard_symbol_reference_edges.insert((
426                            record.file.clone(),
427                            target,
428                            import.module.clone(),
429                        ));
430                    }
431                }
432                ImportResolution::Resolved(_) | ImportResolution::External => {}
433                ImportResolution::Unresolved => {
434                    diagnostics.push(
435                        CodeGraphDiagnostic::warning(
436                            "CG2006",
437                            format!("unresolved import '{}'", import.module),
438                        )
439                        .with_path(record.file.clone()),
440                    );
441                }
442            }
443        }
444    }
445
446    for targets in imported_symbol_targets_by_file.values_mut() {
447        for symbol_ids in targets.values_mut() {
448            let mut unique_ids = Vec::new();
449            for symbol_id in symbol_ids.drain(..) {
450                if !unique_ids.contains(&symbol_id) {
451                    unique_ids.push(symbol_id);
452                }
453            }
454            *symbol_ids = unique_ids;
455        }
456    }
457
458    for targets in imported_module_targets_by_file.values_mut() {
459        for file_paths in targets.values_mut() {
460            file_paths.sort();
461            file_paths.dedup();
462        }
463    }
464
465    for targets in imported_module_paths_by_file.values_mut() {
466        for module_paths in targets.values_mut() {
467            module_paths.sort();
468            module_paths.dedup();
469        }
470    }
471
472    for record in &file_analyses {
473        for alias in &record.aliases {
474            let scope_key = alias_scope_key(alias.owner_identity.as_deref());
475            alias_names_by_scope
476                .entry((record.file.clone(), scope_key.clone()))
477                .or_default()
478                .insert(alias.name.clone());
479            alias_records_by_scope
480                .entry((record.file.clone(), scope_key))
481                .or_default()
482                .entry(alias.name.clone())
483                .or_default()
484                .push(alias.clone());
485        }
486    }
487
488    let mut unresolved_aliases = file_analyses
489        .iter()
490        .flat_map(|record| {
491            record
492                .aliases
493                .iter()
494                .cloned()
495                .map(|alias| (record.file.clone(), record.language, alias))
496        })
497        .collect::<Vec<_>>();
498
499    while !unresolved_aliases.is_empty() {
500        let mut next_unresolved = Vec::new();
501        let mut made_progress = false;
502
503        for (file, language, alias) in unresolved_aliases {
504            let target_ids = resolve_alias_target_ids(
505                &file,
506                language,
507                &alias,
508                &top_level_symbol_ids,
509                &exported_symbol_targets_by_file,
510                &imported_symbol_targets_by_file,
511                &imported_module_targets_by_file,
512                &imported_module_paths_by_file,
513                &alias_names_by_scope,
514                &aliased_symbol_targets_by_scope,
515                &known_files,
516            );
517            if target_ids.is_empty() {
518                next_unresolved.push((file, language, alias));
519                continue;
520            }
521
522            aliased_symbol_targets_by_scope
523                .entry((file, alias_scope_key(alias.owner_identity.as_deref())))
524                .or_default()
525                .entry(alias.name)
526                .or_default()
527                .extend(target_ids);
528            made_progress = true;
529        }
530
531        if !made_progress {
532            break;
533        }
534        unresolved_aliases = next_unresolved;
535    }
536
537    for targets in aliased_symbol_targets_by_scope.values_mut() {
538        for symbol_ids in targets.values_mut() {
539            let mut unique_ids = Vec::new();
540            for symbol_id in symbol_ids.drain(..) {
541                if !unique_ids.contains(&symbol_id) {
542                    unique_ids.push(symbol_id);
543                }
544            }
545            *symbol_ids = unique_ids;
546        }
547    }
548
549    for record in &file_analyses {
550        for relationship in &record.relationships {
551            let Some(source_id) = symbol_ids_by_file_identity
552                .get(&(record.file.clone(), relationship.source_identity.clone()))
553            else {
554                continue;
555            };
556
557            for target_id in resolve_relationship_target_ids(
558                &record.file,
559                record.language,
560                relationship,
561                &top_level_symbol_ids,
562                &imported_symbol_targets_by_file,
563                &known_files,
564            ) {
565                if target_id == *source_id {
566                    continue;
567                }
568                let edge = (
569                    *source_id,
570                    target_id,
571                    relationship.relation.clone(),
572                    relationship.target_expr.clone(),
573                );
574                if !pending_relationship_edges.contains(&edge) {
575                    pending_relationship_edges.push(edge);
576                }
577            }
578        }
579    }
580
581    for record in &file_analyses {
582        for usage in &record.usages {
583            let Some(source_id) = symbol_ids_by_file_identity
584                .get(&(record.file.clone(), usage.source_identity.clone()))
585            else {
586                continue;
587            };
588
589            for target_id in resolve_usage_target_ids(
590                &record.file,
591                record.language,
592                usage,
593                &top_level_symbol_ids,
594                &exported_symbol_targets_by_file,
595                &imported_symbol_targets_by_file,
596                &imported_module_targets_by_file,
597                &imported_module_paths_by_file,
598                &alias_names_by_scope,
599                &alias_records_by_scope,
600                &aliased_symbol_targets_by_scope,
601                &known_files,
602            ) {
603                let edge = (*source_id, target_id, usage.target_expr.clone());
604                if !pending_usage_edges.contains(&edge) {
605                    pending_usage_edges.push(edge);
606                }
607            }
608        }
609    }
610
611    for (source_path, target_path, raw_import) in pending_reference_edges {
612        let (Some(source_id), Some(target_id)) =
613            (file_ids.get(&source_path), file_ids.get(&target_path))
614        else {
615            continue;
616        };
617        let mut edge = Edge::new(EdgeType::References, *target_id);
618        edge.metadata
619            .custom
620            .insert("relation".to_string(), json!("imports"));
621        edge.metadata
622            .custom
623            .insert("raw_import".to_string(), json!(raw_import));
624        if let Some(source_block) = doc.get_block_mut(source_id) {
625            source_block.edges.push(edge);
626        }
627    }
628
629    for (source_path, target_path, symbol_name, raw_import) in pending_symbol_reference_edges {
630        let Some(source_id) = file_ids.get(&source_path) else {
631            continue;
632        };
633        let Some(target_symbol_ids) =
634            top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
635        else {
636            continue;
637        };
638
639        for target_symbol_id in target_symbol_ids {
640            let mut edge = Edge::new(
641                EdgeType::Custom("imports_symbol".to_string()),
642                *target_symbol_id,
643            );
644            edge.metadata
645                .custom
646                .insert("relation".to_string(), json!("imports_symbol"));
647            edge.metadata
648                .custom
649                .insert("raw_import".to_string(), json!(raw_import.clone()));
650            edge.metadata
651                .custom
652                .insert("symbol".to_string(), json!(symbol_name.clone()));
653            if let Some(source_block) = doc.get_block_mut(source_id) {
654                source_block.edges.push(edge);
655            }
656        }
657    }
658
659    for (source_path, target_path, raw_import) in pending_wildcard_symbol_reference_edges {
660        let Some(source_id) = file_ids.get(&source_path) else {
661            continue;
662        };
663        let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
664            continue;
665        };
666
667        for (symbol_name, target_symbol_id) in target_symbols {
668            let mut edge = Edge::new(
669                EdgeType::Custom("imports_symbol".to_string()),
670                *target_symbol_id,
671            );
672            edge.metadata
673                .custom
674                .insert("relation".to_string(), json!("imports_symbol"));
675            edge.metadata
676                .custom
677                .insert("raw_import".to_string(), json!(raw_import.clone()));
678            edge.metadata
679                .custom
680                .insert("symbol".to_string(), json!(symbol_name.clone()));
681            if let Some(source_block) = doc.get_block_mut(source_id) {
682                source_block.edges.push(edge);
683            }
684        }
685    }
686
687    if input.config.emit_export_edges {
688        for (source_path, target_path, symbol_name, raw_import) in pending_reexport_edges {
689            let Some(source_id) = file_ids.get(&source_path) else {
690                continue;
691            };
692            let Some(target_symbol_ids) =
693                top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
694            else {
695                continue;
696            };
697
698            for target_symbol_id in target_symbol_ids {
699                let mut edge =
700                    Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
701                edge.metadata
702                    .custom
703                    .insert("relation".to_string(), json!("reexports"));
704                edge.metadata
705                    .custom
706                    .insert("raw_import".to_string(), json!(raw_import.clone()));
707                edge.metadata
708                    .custom
709                    .insert("symbol".to_string(), json!(symbol_name.clone()));
710                if let Some(source_block) = doc.get_block_mut(source_id) {
711                    source_block.edges.push(edge);
712                }
713            }
714        }
715
716        for (source_path, target_path, raw_import, filter_names) in pending_wildcard_reexport_edges
717        {
718            let Some(source_id) = file_ids.get(&source_path) else {
719                continue;
720            };
721            let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
722                continue;
723            };
724
725            for (symbol_name, target_symbol_id) in target_symbols {
726                if !filter_names.is_empty() && !filter_names.contains(symbol_name) {
727                    continue;
728                }
729                let mut edge =
730                    Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
731                edge.metadata
732                    .custom
733                    .insert("relation".to_string(), json!("reexports"));
734                edge.metadata
735                    .custom
736                    .insert("raw_import".to_string(), json!(raw_import.clone()));
737                edge.metadata
738                    .custom
739                    .insert("symbol".to_string(), json!(symbol_name.clone()));
740                if let Some(source_block) = doc.get_block_mut(source_id) {
741                    source_block.edges.push(edge);
742                }
743            }
744        }
745    }
746
747    for (source_id, target_id, relation, raw_target) in pending_relationship_edges {
748        let mut edge = Edge::new(EdgeType::Custom(relation.clone()), target_id);
749        edge.metadata
750            .custom
751            .insert("relation".to_string(), json!(relation));
752        edge.metadata
753            .custom
754            .insert("raw_target".to_string(), json!(raw_target));
755        if let Some(source_block) = doc.get_block_mut(&source_id) {
756            source_block.edges.push(edge);
757        }
758    }
759
760    for (source_id, target_id, raw_target) in pending_usage_edges {
761        let mut edge = Edge::new(EdgeType::Custom("uses_symbol".to_string()), target_id);
762        edge.metadata
763            .custom
764            .insert("relation".to_string(), json!("uses_symbol"));
765        edge.metadata
766            .custom
767            .insert("raw_target".to_string(), json!(raw_target));
768        if let Some(source_block) = doc.get_block_mut(&source_id) {
769            source_block.edges.push(edge);
770        }
771    }
772
773    sort_structure_children_by_logical_key(&mut doc);
774    sort_edges(&mut doc);
775    normalize_temporal_fields(&mut doc);
776    doc.rebuild_indices();
777
778    let mut validation = validate_code_graph_profile(&doc);
779    diagnostics.append(&mut validation.diagnostics);
780
781    let fingerprint = canonical_fingerprint(&doc)?;
782    let stats = compute_stats(&doc);
783
784    let has_profile_errors = diagnostics
785        .iter()
786        .any(|d| d.severity == CodeGraphSeverity::Error && d.code.starts_with("CG100"));
787    let has_non_info = diagnostics
788        .iter()
789        .any(|d| d.severity != CodeGraphSeverity::Info);
790
791    let status = if has_profile_errors {
792        CodeGraphBuildStatus::FailedValidation
793    } else if has_non_info {
794        CodeGraphBuildStatus::PartialSuccess
795    } else {
796        CodeGraphBuildStatus::Success
797    };
798
799    Ok(CodeGraphBuildResult {
800        document: doc,
801        diagnostics,
802        stats,
803        profile_version: CODEGRAPH_PROFILE_MARKER.to_string(),
804        canonical_fingerprint: fingerprint,
805        status,
806        incremental: None,
807    })
808}
809
810#[derive(Debug, Clone)]
811pub(super) struct LoadedRepoFile {
812    pub repo_file: RepoFile,
813    pub content_hash: Option<String>,
814    pub source: Option<String>,
815    pub diagnostics: Vec<CodeGraphDiagnostic>,
816}
817
818#[derive(Debug, Clone)]
819pub(super) struct AnalyzedRepoFile {
820    pub relative_path: String,
821    pub language: CodeLanguage,
822    pub content_hash: Option<String>,
823    pub analysis: Option<FileAnalysis>,
824    pub diagnostics: Vec<CodeGraphDiagnostic>,
825}
826
827#[derive(Debug, Clone)]
828pub(super) struct AssembledCodeGraph {
829    pub result: CodeGraphBuildResult,
830    pub dependencies_by_file: BTreeMap<String, Vec<String>>,
831}
832
833pub(super) fn hash_source(source: &str) -> String {
834    let mut hasher = Sha256::new();
835    hasher.update(source.as_bytes());
836    hex::encode(hasher.finalize())
837}
838
839pub(super) fn load_repo_file(
840    repo_file: &RepoFile,
841    config: &CodeGraphExtractorConfig,
842) -> Result<LoadedRepoFile> {
843    let source = match fs::read_to_string(&repo_file.absolute_path) {
844        Ok(source) => source,
845        Err(err) => {
846            let diag = CodeGraphDiagnostic::error(
847                "CG2003",
848                format!("failed to read source file: {}", err),
849            )
850            .with_path(repo_file.relative_path.clone());
851            if config.continue_on_parse_error {
852                return Ok(LoadedRepoFile {
853                    repo_file: repo_file.clone(),
854                    content_hash: None,
855                    source: None,
856                    diagnostics: vec![diag],
857                });
858            }
859            return Err(anyhow!(
860                "failed to read source file {}: {}",
861                repo_file.relative_path,
862                err
863            ));
864        }
865    };
866
867    let content_hash = hash_source(&source);
868    if source.len() > config.max_file_bytes {
869        let diag = CodeGraphDiagnostic::warning(
870            "CG2008",
871            format!(
872                "file skipped due to size limit ({} bytes > {} bytes)",
873                source.len(),
874                config.max_file_bytes
875            ),
876        )
877        .with_path(repo_file.relative_path.clone());
878        return Ok(LoadedRepoFile {
879            repo_file: repo_file.clone(),
880            content_hash: Some(content_hash),
881            source: None,
882            diagnostics: vec![diag],
883        });
884    }
885
886    Ok(LoadedRepoFile {
887        repo_file: repo_file.clone(),
888        content_hash: Some(content_hash),
889        source: Some(source),
890        diagnostics: Vec::new(),
891    })
892}
893
894pub(super) fn analyze_loaded_repo_file(loaded: LoadedRepoFile) -> AnalyzedRepoFile {
895    let mut diagnostics = loaded.diagnostics;
896    let analysis = loaded.source.as_ref().map(|source| {
897        let analysis = analyze_file(
898            &loaded.repo_file.relative_path,
899            source,
900            loaded.repo_file.language,
901        );
902        for diag in &analysis.diagnostics {
903            diagnostics.push(
904                diag.clone()
905                    .with_path(loaded.repo_file.relative_path.clone()),
906            );
907        }
908        analysis
909    });
910
911    AnalyzedRepoFile {
912        relative_path: loaded.repo_file.relative_path,
913        language: loaded.repo_file.language,
914        content_hash: loaded.content_hash,
915        analysis,
916        diagnostics,
917    }
918}
919
920pub(super) fn assemble_code_graph_from_analyzed_files(
921    repo_root: &Path,
922    repo_name: &str,
923    commit_hash: &str,
924    config: &CodeGraphExtractorConfig,
925    analyzed_files: &[AnalyzedRepoFile],
926    mut diagnostics: Vec<CodeGraphDiagnostic>,
927) -> Result<AssembledCodeGraph> {
928    let mut doc = Document::new(DocumentId::new(format!(
929        "codegraph:{}:{}",
930        sanitize_identifier(repo_name),
931        sanitize_identifier(commit_hash)
932    )));
933    initialize_document_metadata(&mut doc, repo_root, repo_name, commit_hash);
934
935    let repo_block = make_repository_block(repo_name, commit_hash);
936    let root_id = doc.root;
937    let repo_block_id = doc.add_block(repo_block, &root_id)?;
938
939    let mut directories = BTreeSet::new();
940    for file in analyzed_files {
941        for dir in ancestor_directories(&file.relative_path) {
942            directories.insert(dir);
943        }
944    }
945
946    let mut directory_ids: BTreeMap<String, BlockId> = BTreeMap::new();
947    for dir in directories {
948        let parent_id = parent_directory_id(&dir, &directory_ids).unwrap_or(repo_block_id);
949        let block = make_directory_block(&dir);
950        let block_id = doc.add_block(block, &parent_id)?;
951        directory_ids.insert(dir, block_id);
952    }
953
954    let mut file_ids: BTreeMap<String, BlockId> = BTreeMap::new();
955    let mut symbol_ids_by_file_identity: BTreeMap<(String, String), BlockId> = BTreeMap::new();
956    let mut symbol_file_by_id: HashMap<BlockId, String> = HashMap::new();
957    let mut top_level_symbol_ids: BTreeMap<(String, String), Vec<BlockId>> = BTreeMap::new();
958    let mut exported_top_level_symbol_ids: BTreeMap<String, Vec<(String, BlockId)>> =
959        BTreeMap::new();
960    let mut default_exported_top_level_symbol_ids: BTreeMap<String, Vec<BlockId>> = BTreeMap::new();
961    let mut file_analyses = Vec::new();
962    let mut used_symbol_keys: HashSet<String> = HashSet::new();
963
964    for analyzed_file in analyzed_files {
965        let parent_id =
966            parent_id_for_file(&analyzed_file.relative_path, repo_block_id, &directory_ids);
967        diagnostics.extend(analyzed_file.diagnostics.clone());
968
969        let Some(analysis) = analyzed_file.analysis.as_ref() else {
970            continue;
971        };
972
973        let file_block = make_file_block(
974            &analyzed_file.relative_path,
975            analyzed_file.language.as_str(),
976            analysis.file_description.as_deref(),
977        );
978        let file_block_id = doc.add_block(file_block, &parent_id)?;
979        file_ids.insert(analyzed_file.relative_path.clone(), file_block_id);
980
981        let mut symbols = analysis.symbols.clone();
982        symbols.sort_by(compare_extracted_symbols);
983        let mut symbol_ids_by_identity: BTreeMap<String, BlockId> = BTreeMap::new();
984
985        for symbol in &symbols {
986            let parent_block_id = symbol
987                .parent_identity
988                .as_ref()
989                .and_then(|identity| symbol_ids_by_identity.get(identity).copied())
990                .unwrap_or(file_block_id);
991            let logical_key = unique_symbol_logical_key(
992                &analyzed_file.relative_path,
993                &symbol.qualified_name,
994                symbol.start_line,
995                &mut used_symbol_keys,
996            );
997            let symbol_block = make_symbol_block(
998                &logical_key,
999                &analyzed_file.relative_path,
1000                analyzed_file.language.as_str(),
1001                symbol,
1002            );
1003            let symbol_id = doc.add_block(symbol_block, &parent_block_id)?;
1004            symbol_ids_by_identity.insert(symbol.identity.clone(), symbol_id);
1005            symbol_file_by_id.insert(symbol_id, analyzed_file.relative_path.clone());
1006            symbol_ids_by_file_identity.insert(
1007                (analyzed_file.relative_path.clone(), symbol.identity.clone()),
1008                symbol_id,
1009            );
1010
1011            if symbol.parent_identity.is_none() {
1012                top_level_symbol_ids
1013                    .entry((analyzed_file.relative_path.clone(), symbol.name.clone()))
1014                    .or_default()
1015                    .push(symbol_id);
1016                if symbol.exported {
1017                    exported_top_level_symbol_ids
1018                        .entry(analyzed_file.relative_path.clone())
1019                        .or_default()
1020                        .push((symbol.name.clone(), symbol_id));
1021                    if analysis
1022                        .default_exported_symbol_names
1023                        .contains(&symbol.name)
1024                    {
1025                        default_exported_top_level_symbol_ids
1026                            .entry(analyzed_file.relative_path.clone())
1027                            .or_default()
1028                            .push(symbol_id);
1029                    }
1030                }
1031            }
1032
1033            if symbol.exported && config.emit_export_edges {
1034                let mut edge = Edge::new(EdgeType::Custom("exports".to_string()), symbol_id);
1035                edge.metadata
1036                    .custom
1037                    .insert("relation".to_string(), json!("exports"));
1038                edge.metadata
1039                    .custom
1040                    .insert("symbol".to_string(), json!(symbol.name.clone()));
1041                if let Some(source_block) = doc.get_block_mut(&file_block_id) {
1042                    source_block.edges.push(edge);
1043                }
1044            }
1045        }
1046
1047        file_analyses.push(FileAnalysisRecord {
1048            file: analyzed_file.relative_path.clone(),
1049            language: analyzed_file.language,
1050            imports: analysis.imports.clone(),
1051            relationships: analysis.relationships.clone(),
1052            usages: analysis.usages.clone(),
1053            aliases: analysis.aliases.clone(),
1054            export_bindings: analysis.export_bindings.clone(),
1055        });
1056    }
1057
1058    let known_files: BTreeSet<String> = file_ids.keys().cloned().collect();
1059    let mut dependencies_by_file: BTreeMap<String, BTreeSet<String>> = analyzed_files
1060        .iter()
1061        .map(|file| (file.relative_path.clone(), BTreeSet::new()))
1062        .collect();
1063    let mut exported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
1064        BTreeMap::new();
1065    let mut imported_symbol_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<BlockId>>> =
1066        BTreeMap::new();
1067    let mut imported_module_targets_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
1068        BTreeMap::new();
1069    let mut imported_module_paths_by_file: BTreeMap<String, BTreeMap<String, Vec<String>>> =
1070        BTreeMap::new();
1071    let mut alias_names_by_scope: BTreeMap<(String, String), BTreeSet<String>> = BTreeMap::new();
1072    let mut alias_records_by_scope: BTreeMap<
1073        (String, String),
1074        BTreeMap<String, Vec<ExtractedAlias>>,
1075    > = BTreeMap::new();
1076    let mut aliased_symbol_targets_by_scope: BTreeMap<
1077        (String, String),
1078        BTreeMap<String, Vec<BlockId>>,
1079    > = BTreeMap::new();
1080    let mut pending_reference_edges: BTreeSet<(String, String, String)> = BTreeSet::new();
1081    let mut pending_symbol_reference_edges: BTreeSet<(String, String, String, String)> =
1082        BTreeSet::new();
1083    let mut pending_wildcard_symbol_reference_edges: BTreeSet<(String, String, String)> =
1084        BTreeSet::new();
1085    let mut pending_reexport_edges: BTreeSet<(String, String, String, String)> = BTreeSet::new();
1086    let mut pending_wildcard_reexport_edges: BTreeSet<(String, String, String, Vec<String>)> =
1087        BTreeSet::new();
1088    let mut pending_relationship_edges: Vec<(BlockId, BlockId, String, String)> = Vec::new();
1089    let mut pending_usage_edges: Vec<(BlockId, BlockId, String)> = Vec::new();
1090
1091    for (file, exports) in &exported_top_level_symbol_ids {
1092        let entry = exported_symbol_targets_by_file
1093            .entry(file.clone())
1094            .or_default();
1095        for (name, symbol_id) in exports {
1096            entry.entry(name.clone()).or_default().push(*symbol_id);
1097        }
1098    }
1099    for (file, ids) in &default_exported_top_level_symbol_ids {
1100        exported_symbol_targets_by_file
1101            .entry(file.clone())
1102            .or_default()
1103            .entry("default".to_string())
1104            .or_default()
1105            .extend(ids.iter().copied());
1106    }
1107    for record in &file_analyses {
1108        let entry = exported_symbol_targets_by_file
1109            .entry(record.file.clone())
1110            .or_default();
1111        for binding in &record.export_bindings {
1112            if let Some(ids) =
1113                top_level_symbol_ids.get(&(record.file.clone(), binding.local_name.clone()))
1114            {
1115                extend_unique_block_ids(
1116                    entry.entry(binding.source_name.clone()).or_default(),
1117                    ids.iter().copied(),
1118                );
1119            }
1120        }
1121    }
1122
1123    for targets in exported_symbol_targets_by_file.values_mut() {
1124        for ids in targets.values_mut() {
1125            let existing = std::mem::take(ids);
1126            extend_unique_block_ids(ids, existing);
1127        }
1128    }
1129
1130    for _ in 0..=file_analyses.len() {
1131        let mut progress = false;
1132
1133        for record in &file_analyses {
1134            for import in &record.imports {
1135                if !import.reexported {
1136                    continue;
1137                }
1138
1139                let ImportResolution::Resolved(target) =
1140                    resolve_import(&record.file, &record.language, &import.module, &known_files)
1141                else {
1142                    continue;
1143                };
1144
1145                let target_exports = exported_symbol_targets_by_file
1146                    .get(&target)
1147                    .cloned()
1148                    .unwrap_or_default();
1149                let entry = exported_symbol_targets_by_file
1150                    .entry(record.file.clone())
1151                    .or_default();
1152
1153                if import.wildcard {
1154                    for (export_name, ids) in target_exports.clone() {
1155                        if export_name == "default" {
1156                            continue;
1157                        }
1158                        let targets = entry.entry(export_name).or_default();
1159                        progress |= extend_unique_block_ids(targets, ids.iter().copied());
1160                    }
1161                }
1162
1163                for binding in &import.bindings {
1164                    if let Some(ids) = target_exports.get(&binding.source_name) {
1165                        let targets = entry.entry(binding.local_name.clone()).or_default();
1166                        progress |= extend_unique_block_ids(targets, ids.iter().copied());
1167                    }
1168                }
1169            }
1170        }
1171
1172        if !progress {
1173            break;
1174        }
1175    }
1176
1177    for record in &file_analyses {
1178        for import in &record.imports {
1179            match resolve_import(&record.file, &record.language, &import.module, &known_files) {
1180                ImportResolution::Resolved(target) if target != record.file => {
1181                    dependencies_by_file
1182                        .entry(record.file.clone())
1183                        .or_default()
1184                        .insert(target.clone());
1185                    pending_reference_edges.insert((
1186                        record.file.clone(),
1187                        target.clone(),
1188                        import.module.clone(),
1189                    ));
1190
1191                    for symbol_name in &import.symbols {
1192                        pending_symbol_reference_edges.insert((
1193                            record.file.clone(),
1194                            target.clone(),
1195                            symbol_name.clone(),
1196                            import.module.clone(),
1197                        ));
1198                        if import.reexported {
1199                            pending_reexport_edges.insert((
1200                                record.file.clone(),
1201                                target.clone(),
1202                                symbol_name.clone(),
1203                                import.module.clone(),
1204                            ));
1205                        }
1206                    }
1207
1208                    if matches!(record.language, CodeLanguage::Rust | CodeLanguage::Python)
1209                        && import.wildcard
1210                    {
1211                        if let Some(exports) = exported_symbol_targets_by_file.get(&target) {
1212                            let entry = imported_symbol_targets_by_file
1213                                .entry(record.file.clone())
1214                                .or_default();
1215                            for (export_name, target_symbol_ids) in exports {
1216                                if export_name == "default" {
1217                                    continue;
1218                                }
1219                                entry
1220                                    .entry(export_name.clone())
1221                                    .or_default()
1222                                    .extend(target_symbol_ids.iter().copied());
1223                            }
1224                        }
1225                    }
1226
1227                    if !import.bindings.is_empty() {
1228                        let entry = imported_symbol_targets_by_file
1229                            .entry(record.file.clone())
1230                            .or_default();
1231                        for binding in &import.bindings {
1232                            if let Some(target_symbol_ids) = exported_symbol_targets_by_file
1233                                .get(&target)
1234                                .and_then(|exports| exports.get(&binding.source_name))
1235                            {
1236                                entry
1237                                    .entry(binding.local_name.clone())
1238                                    .or_default()
1239                                    .extend(target_symbol_ids.iter().copied());
1240                            }
1241                        }
1242                    }
1243
1244                    if !import.module_aliases.is_empty() {
1245                        let path_entry = imported_module_paths_by_file
1246                            .entry(record.file.clone())
1247                            .or_default();
1248                        for alias in &import.module_aliases {
1249                            let paths = path_entry.entry(alias.clone()).or_default();
1250                            if !paths.contains(&import.module) {
1251                                paths.push(import.module.clone());
1252                            }
1253                        }
1254
1255                        let entry = imported_module_targets_by_file
1256                            .entry(record.file.clone())
1257                            .or_default();
1258                        for alias in &import.module_aliases {
1259                            let targets = entry.entry(alias.clone()).or_default();
1260                            if !targets.contains(&target) {
1261                                targets.push(target.clone());
1262                            }
1263                        }
1264                    }
1265
1266                    if import.reexported && import.wildcard && import.symbols.is_empty() {
1267                        pending_wildcard_reexport_edges.insert((
1268                            record.file.clone(),
1269                            target.clone(),
1270                            import.module.clone(),
1271                            import.symbols.clone(),
1272                        ));
1273                    }
1274
1275                    if import.wildcard && import.symbols.is_empty() {
1276                        pending_wildcard_symbol_reference_edges.insert((
1277                            record.file.clone(),
1278                            target,
1279                            import.module.clone(),
1280                        ));
1281                    }
1282                }
1283                ImportResolution::Resolved(_) | ImportResolution::External => {}
1284                ImportResolution::Unresolved => {
1285                    diagnostics.push(
1286                        CodeGraphDiagnostic::warning(
1287                            "CG2006",
1288                            format!("unresolved import '{}'", import.module),
1289                        )
1290                        .with_path(record.file.clone()),
1291                    );
1292                }
1293            }
1294        }
1295    }
1296
1297    for targets in imported_symbol_targets_by_file.values_mut() {
1298        for symbol_ids in targets.values_mut() {
1299            let mut unique_ids = Vec::new();
1300            for symbol_id in symbol_ids.drain(..) {
1301                if !unique_ids.contains(&symbol_id) {
1302                    unique_ids.push(symbol_id);
1303                }
1304            }
1305            *symbol_ids = unique_ids;
1306        }
1307    }
1308
1309    for targets in imported_module_targets_by_file.values_mut() {
1310        for file_paths in targets.values_mut() {
1311            file_paths.sort();
1312            file_paths.dedup();
1313        }
1314    }
1315
1316    for targets in imported_module_paths_by_file.values_mut() {
1317        for module_paths in targets.values_mut() {
1318            module_paths.sort();
1319            module_paths.dedup();
1320        }
1321    }
1322
1323    for record in &file_analyses {
1324        for alias in &record.aliases {
1325            let scope_key = alias_scope_key(alias.owner_identity.as_deref());
1326            alias_names_by_scope
1327                .entry((record.file.clone(), scope_key.clone()))
1328                .or_default()
1329                .insert(alias.name.clone());
1330            alias_records_by_scope
1331                .entry((record.file.clone(), scope_key))
1332                .or_default()
1333                .entry(alias.name.clone())
1334                .or_default()
1335                .push(alias.clone());
1336        }
1337    }
1338
1339    let mut unresolved_aliases = file_analyses
1340        .iter()
1341        .flat_map(|record| {
1342            record
1343                .aliases
1344                .iter()
1345                .cloned()
1346                .map(|alias| (record.file.clone(), record.language, alias))
1347        })
1348        .collect::<Vec<_>>();
1349
1350    while !unresolved_aliases.is_empty() {
1351        let mut next_unresolved = Vec::new();
1352        let mut made_progress = false;
1353
1354        for (file, language, alias) in unresolved_aliases {
1355            let target_ids = resolve_alias_target_ids(
1356                &file,
1357                language,
1358                &alias,
1359                &top_level_symbol_ids,
1360                &exported_symbol_targets_by_file,
1361                &imported_symbol_targets_by_file,
1362                &imported_module_targets_by_file,
1363                &imported_module_paths_by_file,
1364                &alias_names_by_scope,
1365                &aliased_symbol_targets_by_scope,
1366                &known_files,
1367            );
1368            if target_ids.is_empty() {
1369                next_unresolved.push((file, language, alias));
1370                continue;
1371            }
1372
1373            aliased_symbol_targets_by_scope
1374                .entry((file, alias_scope_key(alias.owner_identity.as_deref())))
1375                .or_default()
1376                .entry(alias.name)
1377                .or_default()
1378                .extend(target_ids);
1379            made_progress = true;
1380        }
1381
1382        if !made_progress {
1383            break;
1384        }
1385        unresolved_aliases = next_unresolved;
1386    }
1387
1388    for targets in aliased_symbol_targets_by_scope.values_mut() {
1389        for symbol_ids in targets.values_mut() {
1390            let mut unique_ids = Vec::new();
1391            for symbol_id in symbol_ids.drain(..) {
1392                if !unique_ids.contains(&symbol_id) {
1393                    unique_ids.push(symbol_id);
1394                }
1395            }
1396            *symbol_ids = unique_ids;
1397        }
1398    }
1399
1400    for record in &file_analyses {
1401        for relationship in &record.relationships {
1402            let Some(source_id) = symbol_ids_by_file_identity
1403                .get(&(record.file.clone(), relationship.source_identity.clone()))
1404            else {
1405                continue;
1406            };
1407
1408            for target_id in resolve_relationship_target_ids(
1409                &record.file,
1410                record.language,
1411                relationship,
1412                &top_level_symbol_ids,
1413                &imported_symbol_targets_by_file,
1414                &known_files,
1415            ) {
1416                if target_id == *source_id {
1417                    continue;
1418                }
1419                if let Some(target_file) = symbol_file_by_id.get(&target_id) {
1420                    if target_file != &record.file {
1421                        dependencies_by_file
1422                            .entry(record.file.clone())
1423                            .or_default()
1424                            .insert(target_file.clone());
1425                    }
1426                }
1427                let edge = (
1428                    *source_id,
1429                    target_id,
1430                    relationship.relation.clone(),
1431                    relationship.target_expr.clone(),
1432                );
1433                if !pending_relationship_edges.contains(&edge) {
1434                    pending_relationship_edges.push(edge);
1435                }
1436            }
1437        }
1438    }
1439
1440    for record in &file_analyses {
1441        for usage in &record.usages {
1442            let Some(source_id) = symbol_ids_by_file_identity
1443                .get(&(record.file.clone(), usage.source_identity.clone()))
1444            else {
1445                continue;
1446            };
1447
1448            for target_id in resolve_usage_target_ids(
1449                &record.file,
1450                record.language,
1451                usage,
1452                &top_level_symbol_ids,
1453                &exported_symbol_targets_by_file,
1454                &imported_symbol_targets_by_file,
1455                &imported_module_targets_by_file,
1456                &imported_module_paths_by_file,
1457                &alias_names_by_scope,
1458                &alias_records_by_scope,
1459                &aliased_symbol_targets_by_scope,
1460                &known_files,
1461            ) {
1462                if let Some(target_file) = symbol_file_by_id.get(&target_id) {
1463                    if target_file != &record.file {
1464                        dependencies_by_file
1465                            .entry(record.file.clone())
1466                            .or_default()
1467                            .insert(target_file.clone());
1468                    }
1469                }
1470                let edge = (*source_id, target_id, usage.target_expr.clone());
1471                if !pending_usage_edges.contains(&edge) {
1472                    pending_usage_edges.push(edge);
1473                }
1474            }
1475        }
1476    }
1477
1478    for (source_path, target_path, raw_import) in pending_reference_edges {
1479        let (Some(source_id), Some(target_id)) =
1480            (file_ids.get(&source_path), file_ids.get(&target_path))
1481        else {
1482            continue;
1483        };
1484        let mut edge = Edge::new(EdgeType::References, *target_id);
1485        edge.metadata
1486            .custom
1487            .insert("relation".to_string(), json!("imports"));
1488        edge.metadata
1489            .custom
1490            .insert("raw_import".to_string(), json!(raw_import));
1491        if let Some(source_block) = doc.get_block_mut(source_id) {
1492            source_block.edges.push(edge);
1493        }
1494    }
1495
1496    for (source_path, target_path, symbol_name, raw_import) in pending_symbol_reference_edges {
1497        let Some(source_id) = file_ids.get(&source_path) else {
1498            continue;
1499        };
1500        let Some(target_symbol_ids) =
1501            top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
1502        else {
1503            continue;
1504        };
1505
1506        for target_symbol_id in target_symbol_ids {
1507            let mut edge = Edge::new(
1508                EdgeType::Custom("imports_symbol".to_string()),
1509                *target_symbol_id,
1510            );
1511            edge.metadata
1512                .custom
1513                .insert("relation".to_string(), json!("imports_symbol"));
1514            edge.metadata
1515                .custom
1516                .insert("raw_import".to_string(), json!(raw_import.clone()));
1517            edge.metadata
1518                .custom
1519                .insert("symbol".to_string(), json!(symbol_name.clone()));
1520            if let Some(source_block) = doc.get_block_mut(source_id) {
1521                source_block.edges.push(edge);
1522            }
1523        }
1524    }
1525
1526    for (source_path, target_path, raw_import) in pending_wildcard_symbol_reference_edges {
1527        let Some(source_id) = file_ids.get(&source_path) else {
1528            continue;
1529        };
1530        let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
1531            continue;
1532        };
1533
1534        for (symbol_name, target_symbol_id) in target_symbols {
1535            let mut edge = Edge::new(
1536                EdgeType::Custom("imports_symbol".to_string()),
1537                *target_symbol_id,
1538            );
1539            edge.metadata
1540                .custom
1541                .insert("relation".to_string(), json!("imports_symbol"));
1542            edge.metadata
1543                .custom
1544                .insert("raw_import".to_string(), json!(raw_import.clone()));
1545            edge.metadata
1546                .custom
1547                .insert("symbol".to_string(), json!(symbol_name.clone()));
1548            if let Some(source_block) = doc.get_block_mut(source_id) {
1549                source_block.edges.push(edge);
1550            }
1551        }
1552    }
1553
1554    if config.emit_export_edges {
1555        for (source_path, target_path, symbol_name, raw_import) in pending_reexport_edges {
1556            let Some(source_id) = file_ids.get(&source_path) else {
1557                continue;
1558            };
1559            let Some(target_symbol_ids) =
1560                top_level_symbol_ids.get(&(target_path.clone(), symbol_name.clone()))
1561            else {
1562                continue;
1563            };
1564
1565            for target_symbol_id in target_symbol_ids {
1566                let mut edge =
1567                    Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
1568                edge.metadata
1569                    .custom
1570                    .insert("relation".to_string(), json!("reexports"));
1571                edge.metadata
1572                    .custom
1573                    .insert("raw_import".to_string(), json!(raw_import.clone()));
1574                edge.metadata
1575                    .custom
1576                    .insert("symbol".to_string(), json!(symbol_name.clone()));
1577                if let Some(source_block) = doc.get_block_mut(source_id) {
1578                    source_block.edges.push(edge);
1579                }
1580            }
1581        }
1582
1583        for (source_path, target_path, raw_import, filter_names) in pending_wildcard_reexport_edges
1584        {
1585            let Some(source_id) = file_ids.get(&source_path) else {
1586                continue;
1587            };
1588            let Some(target_symbols) = exported_top_level_symbol_ids.get(&target_path) else {
1589                continue;
1590            };
1591
1592            for (symbol_name, target_symbol_id) in target_symbols {
1593                if !filter_names.is_empty() && !filter_names.contains(symbol_name) {
1594                    continue;
1595                }
1596                let mut edge =
1597                    Edge::new(EdgeType::Custom("exports".to_string()), *target_symbol_id);
1598                edge.metadata
1599                    .custom
1600                    .insert("relation".to_string(), json!("reexports"));
1601                edge.metadata
1602                    .custom
1603                    .insert("raw_import".to_string(), json!(raw_import.clone()));
1604                edge.metadata
1605                    .custom
1606                    .insert("symbol".to_string(), json!(symbol_name.clone()));
1607                if let Some(source_block) = doc.get_block_mut(source_id) {
1608                    source_block.edges.push(edge);
1609                }
1610            }
1611        }
1612    }
1613
1614    for (source_id, target_id, relation, raw_target) in pending_relationship_edges {
1615        let mut edge = Edge::new(EdgeType::Custom(relation.clone()), target_id);
1616        edge.metadata
1617            .custom
1618            .insert("relation".to_string(), json!(relation));
1619        edge.metadata
1620            .custom
1621            .insert("raw_target".to_string(), json!(raw_target));
1622        if let Some(source_block) = doc.get_block_mut(&source_id) {
1623            source_block.edges.push(edge);
1624        }
1625    }
1626
1627    for (source_id, target_id, raw_target) in pending_usage_edges {
1628        let mut edge = Edge::new(EdgeType::Custom("uses_symbol".to_string()), target_id);
1629        edge.metadata
1630            .custom
1631            .insert("relation".to_string(), json!("uses_symbol"));
1632        edge.metadata
1633            .custom
1634            .insert("raw_target".to_string(), json!(raw_target));
1635        if let Some(source_block) = doc.get_block_mut(&source_id) {
1636            source_block.edges.push(edge);
1637        }
1638    }
1639
1640    sort_structure_children_by_logical_key(&mut doc);
1641    sort_edges(&mut doc);
1642    normalize_temporal_fields(&mut doc);
1643    doc.rebuild_indices();
1644
1645    let mut validation = validate_code_graph_profile(&doc);
1646    diagnostics.append(&mut validation.diagnostics);
1647
1648    let fingerprint = canonical_fingerprint(&doc)?;
1649    let stats = compute_stats(&doc);
1650    let has_profile_errors = diagnostics
1651        .iter()
1652        .any(|d| d.severity == CodeGraphSeverity::Error && d.code.starts_with("CG100"));
1653    let has_non_info = diagnostics
1654        .iter()
1655        .any(|d| d.severity != CodeGraphSeverity::Info);
1656    let status = if has_profile_errors {
1657        CodeGraphBuildStatus::FailedValidation
1658    } else if has_non_info {
1659        CodeGraphBuildStatus::PartialSuccess
1660    } else {
1661        CodeGraphBuildStatus::Success
1662    };
1663
1664    Ok(AssembledCodeGraph {
1665        result: CodeGraphBuildResult {
1666            document: doc,
1667            diagnostics,
1668            stats,
1669            profile_version: CODEGRAPH_PROFILE_MARKER.to_string(),
1670            canonical_fingerprint: fingerprint,
1671            status,
1672            incremental: None,
1673        },
1674        dependencies_by_file: dependencies_by_file
1675            .into_iter()
1676            .map(|(file, deps)| (file, deps.into_iter().collect()))
1677            .collect(),
1678    })
1679}
1680
1681pub(super) fn initialize_document_metadata(
1682    doc: &mut Document,
1683    repo_root: &Path,
1684    repo_name: &str,
1685    commit: &str,
1686) {
1687    doc.metadata.title = Some(format!("CodeGraph: {}", repo_name));
1688    doc.metadata.description = Some("CodeGraphProfile v1 document".to_string());
1689    doc.metadata.language = Some("multi".to_string());
1690    doc.metadata
1691        .custom
1692        .insert("profile".to_string(), json!(CODEGRAPH_PROFILE));
1693    doc.metadata.custom.insert(
1694        "profile_version".to_string(),
1695        json!(CODEGRAPH_PROFILE_VERSION),
1696    );
1697    doc.metadata.custom.insert(
1698        "profile_marker".to_string(),
1699        json!(CODEGRAPH_PROFILE_MARKER),
1700    );
1701    doc.metadata.custom.insert(
1702        "extractor_version".to_string(),
1703        json!(CODEGRAPH_EXTRACTOR_VERSION),
1704    );
1705    doc.metadata
1706        .custom
1707        .insert("commit_hash".to_string(), json!(commit));
1708    doc.metadata.custom.insert(
1709        "repository_path".to_string(),
1710        json!(normalize_path(repo_root)),
1711    );
1712}
1713
1714pub(super) fn make_repository_block(repo_name: &str, commit_hash: &str) -> Block {
1715    let coderef = json!({
1716        "path": ".",
1717        "display": repo_name,
1718    });
1719    let mut block = Block::new(
1720        Content::json(json!({
1721            "coderef": coderef.clone(),
1722            "name": repo_name,
1723            "commit": commit_hash,
1724        })),
1725        Some("custom.repository"),
1726    );
1727    block.metadata.label = Some(repo_name.to_string());
1728    block
1729        .metadata
1730        .custom
1731        .insert(META_NODE_CLASS.to_string(), json!("repository"));
1732    block.metadata.custom.insert(
1733        META_LOGICAL_KEY.to_string(),
1734        json!(format!("repository:{}", repo_name)),
1735    );
1736    block
1737        .metadata
1738        .custom
1739        .insert(META_CODEREF.to_string(), coderef);
1740    block
1741}
1742
1743pub(super) fn make_directory_block(path: &str) -> Block {
1744    let coderef = json!({
1745        "path": path,
1746        "display": path,
1747    });
1748    let mut block = Block::new(
1749        Content::json(json!({
1750            "coderef": coderef.clone(),
1751        })),
1752        Some("custom.directory"),
1753    );
1754    block.metadata.label = Some(path.to_string());
1755    block
1756        .metadata
1757        .custom
1758        .insert(META_NODE_CLASS.to_string(), json!("directory"));
1759    block
1760        .metadata
1761        .custom
1762        .insert(META_CODEREF.to_string(), coderef);
1763    block.metadata.custom.insert(
1764        META_LOGICAL_KEY.to_string(),
1765        json!(format!("directory:{}", path)),
1766    );
1767    block
1768}
1769
1770pub(super) fn make_file_block(path: &str, language: &str, description: Option<&str>) -> Block {
1771    let coderef = json!({
1772        "path": path,
1773        "display": path,
1774    });
1775    let mut content = serde_json::Map::new();
1776    content.insert("coderef".to_string(), coderef.clone());
1777    content.insert("language".to_string(), json!(language));
1778    if let Some(description) = description {
1779        content.insert("description".to_string(), json!(description));
1780    }
1781
1782    let mut block = Block::new(
1783        Content::json(serde_json::Value::Object(content)),
1784        Some("custom.file"),
1785    );
1786    block.metadata.label = Some(path.to_string());
1787    block.metadata.summary = description.map(|value| value.to_string());
1788    block
1789        .metadata
1790        .custom
1791        .insert(META_NODE_CLASS.to_string(), json!("file"));
1792    block
1793        .metadata
1794        .custom
1795        .insert(META_CODEREF.to_string(), coderef);
1796    block
1797        .metadata
1798        .custom
1799        .insert(META_LANGUAGE.to_string(), json!(language));
1800    block.metadata.custom.insert(
1801        META_LOGICAL_KEY.to_string(),
1802        json!(format!("file:{}", path)),
1803    );
1804    block
1805}
1806
1807pub(super) fn make_symbol_block(
1808    logical_key: &str,
1809    path: &str,
1810    language: &str,
1811    symbol: &ExtractedSymbol,
1812) -> Block {
1813    let line_range = format_line_range(symbol.start_line, symbol.end_line);
1814    let coderef = json!({
1815        "path": path,
1816        "start_line": symbol.start_line,
1817        "start_col": symbol.start_col,
1818        "end_line": symbol.end_line,
1819        "end_col": symbol.end_col,
1820        "display": format_coderef(path, &line_range),
1821    });
1822
1823    let mut content = serde_json::Map::new();
1824    content.insert("name".to_string(), json!(symbol.name));
1825    content.insert("kind".to_string(), json!(symbol.kind));
1826    content.insert("coderef".to_string(), coderef.clone());
1827    content.insert("exported".to_string(), json!(symbol.exported));
1828    if let Some(description) = &symbol.description {
1829        content.insert("description".to_string(), json!(description));
1830    }
1831    if !symbol.modifiers.is_empty() {
1832        content.insert("modifiers".to_string(), json!(symbol.modifiers));
1833    }
1834    if !symbol.inputs.is_empty() {
1835        content.insert("inputs".to_string(), json!(symbol.inputs));
1836    }
1837    if let Some(output) = &symbol.output {
1838        content.insert("output".to_string(), json!(output));
1839    }
1840    if let Some(type_info) = &symbol.type_info {
1841        content.insert("type".to_string(), json!(type_info));
1842    }
1843
1844    let mut block = Block::new(
1845        Content::json(serde_json::Value::Object(content)),
1846        Some("custom.symbol"),
1847    );
1848
1849    block.metadata.label = Some(symbol.name.clone());
1850    block.metadata.summary = symbol.description.clone();
1851    block
1852        .metadata
1853        .custom
1854        .insert(META_NODE_CLASS.to_string(), json!("symbol"));
1855    block
1856        .metadata
1857        .custom
1858        .insert(META_LOGICAL_KEY.to_string(), json!(logical_key));
1859    block
1860        .metadata
1861        .custom
1862        .insert(META_CODEREF.to_string(), coderef);
1863    block
1864        .metadata
1865        .custom
1866        .insert(META_LANGUAGE.to_string(), json!(language));
1867    block
1868        .metadata
1869        .custom
1870        .insert(META_SYMBOL_KIND.to_string(), json!(symbol.kind));
1871    block
1872        .metadata
1873        .custom
1874        .insert(META_SYMBOL_NAME.to_string(), json!(symbol.name));
1875    block
1876        .metadata
1877        .custom
1878        .insert(META_EXPORTED.to_string(), json!(symbol.exported));
1879    block
1880}