Skip to main content

ucp_codegraph/legacy/
incremental.rs

1use std::collections::{BTreeMap, BTreeSet, VecDeque};
2use std::fs;
3use std::path::Path;
4
5use anyhow::{Context, Result};
6use serde::{Deserialize, Serialize};
7
8use crate::model::{CodeLanguage, ExtractedInput, ExtractedModifiers, FileAnalysis, ImportBinding};
9use crate::{
10    CodeGraphBuildResult, CodeGraphDiagnostic, CodeGraphExtractorConfig,
11    CodeGraphIncrementalBuildInput, CodeGraphIncrementalStats, CODEGRAPH_EXTRACTOR_VERSION,
12};
13
14use super::build::{
15    analyze_loaded_repo_file, assemble_code_graph_from_analyzed_files, load_repo_file,
16    AnalyzedRepoFile,
17};
18use super::*;
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21struct IncrementalBuildState {
22    extractor_version: String,
23    repository_path: String,
24    config: CodeGraphExtractorConfig,
25    files: BTreeMap<String, IncrementalFileState>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29struct IncrementalFileState {
30    relative_path: String,
31    language: CodeLanguage,
32    content_hash: String,
33    #[serde(default)]
34    surface_signature: String,
35    analysis: Option<FileAnalysis>,
36    diagnostics: Vec<CodeGraphDiagnostic>,
37    dependencies: Vec<String>,
38}
39
40impl IncrementalFileState {
41    fn to_analyzed_repo_file(&self) -> AnalyzedRepoFile {
42        AnalyzedRepoFile {
43            relative_path: self.relative_path.clone(),
44            language: self.language,
45            content_hash: Some(self.content_hash.clone()),
46            analysis: self.analysis.clone(),
47            diagnostics: self.diagnostics.clone(),
48        }
49    }
50
51    fn from_analyzed_repo_file(file: &AnalyzedRepoFile, dependencies: Vec<String>) -> Option<Self> {
52        Some(Self {
53            relative_path: file.relative_path.clone(),
54            language: file.language,
55            content_hash: file.content_hash.clone()?,
56            surface_signature: compute_file_surface_signature(file.analysis.as_ref()),
57            analysis: file.analysis.clone(),
58            diagnostics: file.diagnostics.clone(),
59            dependencies,
60        })
61    }
62}
63
64pub fn build_code_graph_incremental(
65    input: &CodeGraphIncrementalBuildInput,
66) -> Result<CodeGraphBuildResult> {
67    let repo_root = input
68        .build
69        .repository_path
70        .canonicalize()
71        .with_context(|| {
72            format!(
73                "failed to resolve repository path {}",
74                input.build.repository_path.display()
75            )
76        })?;
77    if !repo_root.is_dir() {
78        anyhow::bail!(
79            "repository path is not a directory: {}",
80            repo_root.display()
81        );
82    }
83
84    let repo_name = repo_root
85        .file_name()
86        .map(|name| name.to_string_lossy().into_owned())
87        .unwrap_or_else(|| "repository".to_string());
88
89    let normalized_config = normalize_incremental_config(&input.build.config);
90    let normalized_repo_path = normalize_path(&repo_root);
91    let mut diagnostics = Vec::new();
92    let matcher = GitignoreMatcher::from_repository(&repo_root)?;
93    let repo_files =
94        collect_repository_files(&repo_root, &normalized_config, &matcher, &mut diagnostics)?;
95
96    let state_status = load_compatible_state(
97        &input.state_file,
98        &normalized_repo_path,
99        &normalized_config,
100        &mut diagnostics,
101    )?;
102
103    let loaded_files = repo_files
104        .iter()
105        .map(|repo_file| load_repo_file(repo_file, &normalized_config))
106        .collect::<Result<Vec<_>>>()?;
107
108    let previous_state = state_status.state.as_ref();
109    let state_entries = previous_state.map(|state| state.files.len()).unwrap_or(0);
110    let current_paths: BTreeSet<String> = loaded_files
111        .iter()
112        .map(|loaded| loaded.repo_file.relative_path.clone())
113        .collect();
114    let deleted_paths: BTreeSet<String> = previous_state
115        .map(|state| {
116            state
117                .files
118                .keys()
119                .filter(|path| !current_paths.contains(*path))
120                .cloned()
121                .collect()
122        })
123        .unwrap_or_default();
124
125    let (
126        analyzed_files,
127        added_files,
128        changed_files,
129        direct_invalidated_files,
130        surface_changed_files,
131        rebuilt_files,
132        reused_files,
133        invalidated_files,
134    ) = if let Some(state) = previous_state {
135        let mut added_files = 0usize;
136        let mut changed_files = 0usize;
137        let mut direct_rebuild_paths = BTreeSet::new();
138
139        for loaded in &loaded_files {
140            let path = &loaded.repo_file.relative_path;
141            match state.files.get(path) {
142                None => {
143                    direct_rebuild_paths.insert(path.clone());
144                    added_files += 1;
145                }
146                Some(previous) => {
147                    if loaded
148                        .content_hash
149                        .as_ref()
150                        .map(|hash| hash != &previous.content_hash)
151                        .unwrap_or(true)
152                    {
153                        direct_rebuild_paths.insert(path.clone());
154                        changed_files += 1;
155                    }
156                }
157            }
158        }
159
160        let mut pre_analyzed = BTreeMap::new();
161        let mut surface_change_roots = deleted_paths.clone();
162        let mut surface_changed_files = deleted_paths.len();
163
164        for loaded in loaded_files
165            .iter()
166            .filter(|loaded| direct_rebuild_paths.contains(&loaded.repo_file.relative_path))
167        {
168            let analyzed = analyze_loaded_repo_file(loaded.clone());
169            let current_surface = compute_file_surface_signature(analyzed.analysis.as_ref());
170            let previous_surface = state
171                .files
172                .get(&loaded.repo_file.relative_path)
173                .map(|entry| entry.surface_signature.as_str())
174                .unwrap_or("");
175            if current_surface != previous_surface {
176                surface_change_roots.insert(loaded.repo_file.relative_path.clone());
177                surface_changed_files += 1;
178            }
179            pre_analyzed.insert(loaded.repo_file.relative_path.clone(), analyzed);
180        }
181
182        let expanded_invalidations = expand_invalidations(&surface_change_roots, state);
183        let rebuild_paths = direct_rebuild_paths
184            .union(&expanded_invalidations)
185            .cloned()
186            .collect::<BTreeSet<_>>();
187        let counted_rebuild_paths = rebuild_paths
188            .iter()
189            .filter(|path| current_paths.contains(*path))
190            .count();
191
192        let mut reused_files = 0usize;
193        let mut rebuilt_files = 0usize;
194        let analyzed_files = loaded_files
195            .into_iter()
196            .map(|loaded| {
197                let path = loaded.repo_file.relative_path.clone();
198                if let Some(analyzed) = pre_analyzed.remove(&path) {
199                    rebuilt_files += 1;
200                    return analyzed;
201                }
202                if !rebuild_paths.contains(&path) {
203                    if let (Some(content_hash), Some(previous)) =
204                        (loaded.content_hash.as_ref(), state.files.get(&path))
205                    {
206                        if content_hash == &previous.content_hash {
207                            reused_files += 1;
208                            return previous.to_analyzed_repo_file();
209                        }
210                    }
211                }
212
213                rebuilt_files += 1;
214                analyze_loaded_repo_file(loaded)
215            })
216            .collect::<Vec<_>>();
217
218        (
219            analyzed_files,
220            added_files,
221            changed_files,
222            direct_rebuild_paths.len() + deleted_paths.len(),
223            surface_changed_files,
224            rebuilt_files,
225            reused_files,
226            counted_rebuild_paths + deleted_paths.len(),
227        )
228    } else {
229        let rebuilt_files = loaded_files.len();
230        let analyzed_files = loaded_files
231            .into_iter()
232            .map(analyze_loaded_repo_file)
233            .collect::<Vec<_>>();
234        (
235            analyzed_files,
236            0,
237            0,
238            current_paths.len(),
239            0,
240            rebuilt_files,
241            0,
242            current_paths.len(),
243        )
244    };
245
246    let assembled = assemble_code_graph_from_analyzed_files(
247        &repo_root,
248        &repo_name,
249        &input.build.commit_hash,
250        &normalized_config,
251        &analyzed_files,
252        diagnostics,
253    )?;
254
255    write_state(
256        &input.state_file,
257        &normalized_repo_path,
258        &normalized_config,
259        &analyzed_files,
260        &assembled.dependencies_by_file,
261    )?;
262
263    let mut result = assembled.result;
264    result.incremental = Some(CodeGraphIncrementalStats {
265        requested: true,
266        scanned_files: repo_files.len(),
267        state_entries,
268        direct_invalidated_files,
269        surface_changed_files,
270        reused_files,
271        rebuilt_files,
272        added_files,
273        changed_files,
274        deleted_files: deleted_paths.len(),
275        invalidated_files,
276        full_rebuild_reason: state_status.full_rebuild_reason,
277    });
278    Ok(result)
279}
280
281#[derive(Debug)]
282struct StateLoadStatus {
283    state: Option<IncrementalBuildState>,
284    full_rebuild_reason: Option<String>,
285}
286
287fn load_compatible_state(
288    state_file: &Path,
289    normalized_repo_path: &str,
290    normalized_config: &CodeGraphExtractorConfig,
291    diagnostics: &mut Vec<CodeGraphDiagnostic>,
292) -> Result<StateLoadStatus> {
293    let contents = match fs::read_to_string(state_file) {
294        Ok(contents) => contents,
295        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
296            return Ok(StateLoadStatus {
297                state: None,
298                full_rebuild_reason: Some("missing_state".to_string()),
299            });
300        }
301        Err(err) => {
302            diagnostics.push(CodeGraphDiagnostic::warning(
303                "CG2009",
304                format!(
305                    "incremental state unreadable; falling back to full rebuild: {}",
306                    err
307                ),
308            ));
309            return Ok(StateLoadStatus {
310                state: None,
311                full_rebuild_reason: Some("unreadable_state".to_string()),
312            });
313        }
314    };
315
316    let state: IncrementalBuildState = match serde_json::from_str(&contents) {
317        Ok(state) => state,
318        Err(err) => {
319            diagnostics.push(CodeGraphDiagnostic::warning(
320                "CG2009",
321                format!(
322                    "incremental state invalid; falling back to full rebuild: {}",
323                    err
324                ),
325            ));
326            return Ok(StateLoadStatus {
327                state: None,
328                full_rebuild_reason: Some("invalid_state".to_string()),
329            });
330        }
331    };
332
333    if state.extractor_version != CODEGRAPH_EXTRACTOR_VERSION {
334        return Ok(StateLoadStatus {
335            state: None,
336            full_rebuild_reason: Some("extractor_version_changed".to_string()),
337        });
338    }
339    if state.repository_path != normalized_repo_path {
340        return Ok(StateLoadStatus {
341            state: None,
342            full_rebuild_reason: Some("repository_changed".to_string()),
343        });
344    }
345    if state.config != *normalized_config {
346        return Ok(StateLoadStatus {
347            state: None,
348            full_rebuild_reason: Some("config_changed".to_string()),
349        });
350    }
351
352    Ok(StateLoadStatus {
353        state: Some(state),
354        full_rebuild_reason: None,
355    })
356}
357
358fn write_state(
359    state_file: &Path,
360    normalized_repo_path: &str,
361    normalized_config: &CodeGraphExtractorConfig,
362    analyzed_files: &[AnalyzedRepoFile],
363    dependencies_by_file: &BTreeMap<String, Vec<String>>,
364) -> Result<()> {
365    let mut files = BTreeMap::new();
366    for file in analyzed_files {
367        let dependencies = dependencies_by_file
368            .get(&file.relative_path)
369            .cloned()
370            .unwrap_or_default();
371        if let Some(state) = IncrementalFileState::from_analyzed_repo_file(file, dependencies) {
372            files.insert(file.relative_path.clone(), state);
373        }
374    }
375
376    let state = IncrementalBuildState {
377        extractor_version: CODEGRAPH_EXTRACTOR_VERSION.to_string(),
378        repository_path: normalized_repo_path.to_string(),
379        config: normalized_config.clone(),
380        files,
381    };
382    if let Some(parent) = state_file.parent() {
383        fs::create_dir_all(parent).with_context(|| {
384            format!(
385                "failed to create incremental state directory {}",
386                parent.display()
387            )
388        })?;
389    }
390    let json = serde_json::to_string_pretty(&state)?;
391    fs::write(state_file, json).with_context(|| {
392        format!(
393            "failed to write incremental state file {}",
394            state_file.display()
395        )
396    })?;
397    Ok(())
398}
399
400fn expand_invalidations(
401    initial_invalidations: &BTreeSet<String>,
402    state: &IncrementalBuildState,
403) -> BTreeSet<String> {
404    let mut reverse_dependencies: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
405    for (file, entry) in &state.files {
406        for dependency in &entry.dependencies {
407            reverse_dependencies
408                .entry(dependency.clone())
409                .or_default()
410                .insert(file.clone());
411        }
412    }
413
414    let mut invalidated = BTreeSet::new();
415    let mut queue: VecDeque<String> = initial_invalidations.iter().cloned().collect();
416    while let Some(path) = queue.pop_front() {
417        if !invalidated.insert(path.clone()) {
418            continue;
419        }
420        if let Some(dependents) = reverse_dependencies.get(&path) {
421            queue.extend(dependents.iter().cloned());
422        }
423    }
424    invalidated
425}
426
427fn normalize_incremental_config(config: &CodeGraphExtractorConfig) -> CodeGraphExtractorConfig {
428    let mut normalized = config.clone();
429    normalized.include_extensions.sort();
430    normalized.include_extensions.dedup();
431    normalized.exclude_dirs.sort();
432    normalized.exclude_dirs.dedup();
433    normalized
434}
435
436#[derive(Serialize)]
437struct SurfaceSignatureSymbol {
438    name: String,
439    qualified_name: String,
440    parent_identity: Option<String>,
441    kind: String,
442    modifiers: ExtractedModifiers,
443    inputs: Vec<ExtractedInput>,
444    output: Option<String>,
445    type_info: Option<String>,
446    exported: bool,
447}
448
449#[derive(Serialize)]
450struct SurfaceSignatureReexport {
451    module: String,
452    symbols: Vec<String>,
453    bindings: Vec<ImportBinding>,
454    wildcard: bool,
455}
456
457#[derive(Serialize)]
458struct SurfaceSignatureSnapshot {
459    symbols: Vec<SurfaceSignatureSymbol>,
460    export_bindings: Vec<ImportBinding>,
461    exported_symbol_names: Vec<String>,
462    default_exported_symbol_names: Vec<String>,
463    reexports: Vec<SurfaceSignatureReexport>,
464}
465
466fn compute_file_surface_signature(analysis: Option<&FileAnalysis>) -> String {
467    let Some(analysis) = analysis else {
468        return String::new();
469    };
470
471    let mut symbols = analysis
472        .symbols
473        .iter()
474        .map(|symbol| SurfaceSignatureSymbol {
475            name: symbol.name.clone(),
476            qualified_name: symbol.qualified_name.clone(),
477            parent_identity: symbol.parent_identity.clone(),
478            kind: symbol.kind.clone(),
479            modifiers: symbol.modifiers.clone(),
480            inputs: symbol.inputs.clone(),
481            output: symbol.output.clone(),
482            type_info: symbol.type_info.clone(),
483            exported: symbol.exported,
484        })
485        .collect::<Vec<_>>();
486    symbols.sort_by(|left, right| {
487        left.qualified_name
488            .cmp(&right.qualified_name)
489            .then_with(|| left.kind.cmp(&right.kind))
490            .then_with(|| left.parent_identity.cmp(&right.parent_identity))
491            .then_with(|| left.name.cmp(&right.name))
492    });
493
494    let mut export_bindings = analysis.export_bindings.clone();
495    export_bindings.sort();
496
497    let mut exported_symbol_names = analysis
498        .exported_symbol_names
499        .iter()
500        .cloned()
501        .collect::<Vec<_>>();
502    exported_symbol_names.sort();
503
504    let mut default_exported_symbol_names = analysis
505        .default_exported_symbol_names
506        .iter()
507        .cloned()
508        .collect::<Vec<_>>();
509    default_exported_symbol_names.sort();
510
511    let mut reexports = analysis
512        .imports
513        .iter()
514        .filter(|import| import.reexported)
515        .map(|import| {
516            let mut symbols = import.symbols.clone();
517            symbols.sort();
518            let mut bindings = import.bindings.clone();
519            bindings.sort();
520            SurfaceSignatureReexport {
521                module: import.module.clone(),
522                symbols,
523                bindings,
524                wildcard: import.wildcard,
525            }
526        })
527        .collect::<Vec<_>>();
528    reexports.sort_by(|left, right| {
529        left.module
530            .cmp(&right.module)
531            .then_with(|| left.wildcard.cmp(&right.wildcard))
532            .then_with(|| left.symbols.cmp(&right.symbols))
533            .then_with(|| left.bindings.cmp(&right.bindings))
534    });
535
536    let snapshot = SurfaceSignatureSnapshot {
537        symbols,
538        export_bindings,
539        exported_symbol_names,
540        default_exported_symbol_names,
541        reexports,
542    };
543    let serialized = serde_json::to_string(&snapshot).expect("surface signature serialization");
544    super::build::hash_source(&serialized)
545}