1use std::collections::{BTreeMap, BTreeSet, VecDeque};
2use std::fs;
3use std::path::Path;
4
5use anyhow::{Context, Result};
6use serde::{Deserialize, Serialize};
7
8use crate::model::{CodeLanguage, ExtractedInput, ExtractedModifiers, FileAnalysis, ImportBinding};
9use crate::{
10 CodeGraphBuildResult, CodeGraphDiagnostic, CodeGraphExtractorConfig,
11 CodeGraphIncrementalBuildInput, CodeGraphIncrementalStats, CODEGRAPH_EXTRACTOR_VERSION,
12};
13
14use super::build::{
15 analyze_loaded_repo_file, assemble_code_graph_from_analyzed_files, load_repo_file,
16 AnalyzedRepoFile,
17};
18use super::*;
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21struct IncrementalBuildState {
22 extractor_version: String,
23 repository_path: String,
24 config: CodeGraphExtractorConfig,
25 files: BTreeMap<String, IncrementalFileState>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29struct IncrementalFileState {
30 relative_path: String,
31 language: CodeLanguage,
32 content_hash: String,
33 #[serde(default)]
34 surface_signature: String,
35 analysis: Option<FileAnalysis>,
36 diagnostics: Vec<CodeGraphDiagnostic>,
37 dependencies: Vec<String>,
38}
39
40impl IncrementalFileState {
41 fn to_analyzed_repo_file(&self) -> AnalyzedRepoFile {
42 AnalyzedRepoFile {
43 relative_path: self.relative_path.clone(),
44 language: self.language,
45 content_hash: Some(self.content_hash.clone()),
46 analysis: self.analysis.clone(),
47 diagnostics: self.diagnostics.clone(),
48 }
49 }
50
51 fn from_analyzed_repo_file(file: &AnalyzedRepoFile, dependencies: Vec<String>) -> Option<Self> {
52 Some(Self {
53 relative_path: file.relative_path.clone(),
54 language: file.language,
55 content_hash: file.content_hash.clone()?,
56 surface_signature: compute_file_surface_signature(file.analysis.as_ref()),
57 analysis: file.analysis.clone(),
58 diagnostics: file.diagnostics.clone(),
59 dependencies,
60 })
61 }
62}
63
64pub fn build_code_graph_incremental(
65 input: &CodeGraphIncrementalBuildInput,
66) -> Result<CodeGraphBuildResult> {
67 let repo_root = input
68 .build
69 .repository_path
70 .canonicalize()
71 .with_context(|| {
72 format!(
73 "failed to resolve repository path {}",
74 input.build.repository_path.display()
75 )
76 })?;
77 if !repo_root.is_dir() {
78 anyhow::bail!(
79 "repository path is not a directory: {}",
80 repo_root.display()
81 );
82 }
83
84 let repo_name = repo_root
85 .file_name()
86 .map(|name| name.to_string_lossy().into_owned())
87 .unwrap_or_else(|| "repository".to_string());
88
89 let normalized_config = normalize_incremental_config(&input.build.config);
90 let normalized_repo_path = normalize_path(&repo_root);
91 let mut diagnostics = Vec::new();
92 let matcher = GitignoreMatcher::from_repository(&repo_root)?;
93 let repo_files =
94 collect_repository_files(&repo_root, &normalized_config, &matcher, &mut diagnostics)?;
95
96 let state_status = load_compatible_state(
97 &input.state_file,
98 &normalized_repo_path,
99 &normalized_config,
100 &mut diagnostics,
101 )?;
102
103 let loaded_files = repo_files
104 .iter()
105 .map(|repo_file| load_repo_file(repo_file, &normalized_config))
106 .collect::<Result<Vec<_>>>()?;
107
108 let previous_state = state_status.state.as_ref();
109 let state_entries = previous_state.map(|state| state.files.len()).unwrap_or(0);
110 let current_paths: BTreeSet<String> = loaded_files
111 .iter()
112 .map(|loaded| loaded.repo_file.relative_path.clone())
113 .collect();
114 let deleted_paths: BTreeSet<String> = previous_state
115 .map(|state| {
116 state
117 .files
118 .keys()
119 .filter(|path| !current_paths.contains(*path))
120 .cloned()
121 .collect()
122 })
123 .unwrap_or_default();
124
125 let (
126 analyzed_files,
127 added_files,
128 changed_files,
129 direct_invalidated_files,
130 surface_changed_files,
131 rebuilt_files,
132 reused_files,
133 invalidated_files,
134 ) = if let Some(state) = previous_state {
135 let mut added_files = 0usize;
136 let mut changed_files = 0usize;
137 let mut direct_rebuild_paths = BTreeSet::new();
138
139 for loaded in &loaded_files {
140 let path = &loaded.repo_file.relative_path;
141 match state.files.get(path) {
142 None => {
143 direct_rebuild_paths.insert(path.clone());
144 added_files += 1;
145 }
146 Some(previous) => {
147 if loaded
148 .content_hash
149 .as_ref()
150 .map(|hash| hash != &previous.content_hash)
151 .unwrap_or(true)
152 {
153 direct_rebuild_paths.insert(path.clone());
154 changed_files += 1;
155 }
156 }
157 }
158 }
159
160 let mut pre_analyzed = BTreeMap::new();
161 let mut surface_change_roots = deleted_paths.clone();
162 let mut surface_changed_files = deleted_paths.len();
163
164 for loaded in loaded_files
165 .iter()
166 .filter(|loaded| direct_rebuild_paths.contains(&loaded.repo_file.relative_path))
167 {
168 let analyzed = analyze_loaded_repo_file(loaded.clone());
169 let current_surface = compute_file_surface_signature(analyzed.analysis.as_ref());
170 let previous_surface = state
171 .files
172 .get(&loaded.repo_file.relative_path)
173 .map(|entry| entry.surface_signature.as_str())
174 .unwrap_or("");
175 if current_surface != previous_surface {
176 surface_change_roots.insert(loaded.repo_file.relative_path.clone());
177 surface_changed_files += 1;
178 }
179 pre_analyzed.insert(loaded.repo_file.relative_path.clone(), analyzed);
180 }
181
182 let expanded_invalidations = expand_invalidations(&surface_change_roots, state);
183 let rebuild_paths = direct_rebuild_paths
184 .union(&expanded_invalidations)
185 .cloned()
186 .collect::<BTreeSet<_>>();
187 let counted_rebuild_paths = rebuild_paths
188 .iter()
189 .filter(|path| current_paths.contains(*path))
190 .count();
191
192 let mut reused_files = 0usize;
193 let mut rebuilt_files = 0usize;
194 let analyzed_files = loaded_files
195 .into_iter()
196 .map(|loaded| {
197 let path = loaded.repo_file.relative_path.clone();
198 if let Some(analyzed) = pre_analyzed.remove(&path) {
199 rebuilt_files += 1;
200 return analyzed;
201 }
202 if !rebuild_paths.contains(&path) {
203 if let (Some(content_hash), Some(previous)) =
204 (loaded.content_hash.as_ref(), state.files.get(&path))
205 {
206 if content_hash == &previous.content_hash {
207 reused_files += 1;
208 return previous.to_analyzed_repo_file();
209 }
210 }
211 }
212
213 rebuilt_files += 1;
214 analyze_loaded_repo_file(loaded)
215 })
216 .collect::<Vec<_>>();
217
218 (
219 analyzed_files,
220 added_files,
221 changed_files,
222 direct_rebuild_paths.len() + deleted_paths.len(),
223 surface_changed_files,
224 rebuilt_files,
225 reused_files,
226 counted_rebuild_paths + deleted_paths.len(),
227 )
228 } else {
229 let rebuilt_files = loaded_files.len();
230 let analyzed_files = loaded_files
231 .into_iter()
232 .map(analyze_loaded_repo_file)
233 .collect::<Vec<_>>();
234 (
235 analyzed_files,
236 0,
237 0,
238 current_paths.len(),
239 0,
240 rebuilt_files,
241 0,
242 current_paths.len(),
243 )
244 };
245
246 let assembled = assemble_code_graph_from_analyzed_files(
247 &repo_root,
248 &repo_name,
249 &input.build.commit_hash,
250 &normalized_config,
251 &analyzed_files,
252 diagnostics,
253 )?;
254
255 write_state(
256 &input.state_file,
257 &normalized_repo_path,
258 &normalized_config,
259 &analyzed_files,
260 &assembled.dependencies_by_file,
261 )?;
262
263 let mut result = assembled.result;
264 result.incremental = Some(CodeGraphIncrementalStats {
265 requested: true,
266 scanned_files: repo_files.len(),
267 state_entries,
268 direct_invalidated_files,
269 surface_changed_files,
270 reused_files,
271 rebuilt_files,
272 added_files,
273 changed_files,
274 deleted_files: deleted_paths.len(),
275 invalidated_files,
276 full_rebuild_reason: state_status.full_rebuild_reason,
277 });
278 Ok(result)
279}
280
281#[derive(Debug)]
282struct StateLoadStatus {
283 state: Option<IncrementalBuildState>,
284 full_rebuild_reason: Option<String>,
285}
286
287fn load_compatible_state(
288 state_file: &Path,
289 normalized_repo_path: &str,
290 normalized_config: &CodeGraphExtractorConfig,
291 diagnostics: &mut Vec<CodeGraphDiagnostic>,
292) -> Result<StateLoadStatus> {
293 let contents = match fs::read_to_string(state_file) {
294 Ok(contents) => contents,
295 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
296 return Ok(StateLoadStatus {
297 state: None,
298 full_rebuild_reason: Some("missing_state".to_string()),
299 });
300 }
301 Err(err) => {
302 diagnostics.push(CodeGraphDiagnostic::warning(
303 "CG2009",
304 format!(
305 "incremental state unreadable; falling back to full rebuild: {}",
306 err
307 ),
308 ));
309 return Ok(StateLoadStatus {
310 state: None,
311 full_rebuild_reason: Some("unreadable_state".to_string()),
312 });
313 }
314 };
315
316 let state: IncrementalBuildState = match serde_json::from_str(&contents) {
317 Ok(state) => state,
318 Err(err) => {
319 diagnostics.push(CodeGraphDiagnostic::warning(
320 "CG2009",
321 format!(
322 "incremental state invalid; falling back to full rebuild: {}",
323 err
324 ),
325 ));
326 return Ok(StateLoadStatus {
327 state: None,
328 full_rebuild_reason: Some("invalid_state".to_string()),
329 });
330 }
331 };
332
333 if state.extractor_version != CODEGRAPH_EXTRACTOR_VERSION {
334 return Ok(StateLoadStatus {
335 state: None,
336 full_rebuild_reason: Some("extractor_version_changed".to_string()),
337 });
338 }
339 if state.repository_path != normalized_repo_path {
340 return Ok(StateLoadStatus {
341 state: None,
342 full_rebuild_reason: Some("repository_changed".to_string()),
343 });
344 }
345 if state.config != *normalized_config {
346 return Ok(StateLoadStatus {
347 state: None,
348 full_rebuild_reason: Some("config_changed".to_string()),
349 });
350 }
351
352 Ok(StateLoadStatus {
353 state: Some(state),
354 full_rebuild_reason: None,
355 })
356}
357
358fn write_state(
359 state_file: &Path,
360 normalized_repo_path: &str,
361 normalized_config: &CodeGraphExtractorConfig,
362 analyzed_files: &[AnalyzedRepoFile],
363 dependencies_by_file: &BTreeMap<String, Vec<String>>,
364) -> Result<()> {
365 let mut files = BTreeMap::new();
366 for file in analyzed_files {
367 let dependencies = dependencies_by_file
368 .get(&file.relative_path)
369 .cloned()
370 .unwrap_or_default();
371 if let Some(state) = IncrementalFileState::from_analyzed_repo_file(file, dependencies) {
372 files.insert(file.relative_path.clone(), state);
373 }
374 }
375
376 let state = IncrementalBuildState {
377 extractor_version: CODEGRAPH_EXTRACTOR_VERSION.to_string(),
378 repository_path: normalized_repo_path.to_string(),
379 config: normalized_config.clone(),
380 files,
381 };
382 if let Some(parent) = state_file.parent() {
383 fs::create_dir_all(parent).with_context(|| {
384 format!(
385 "failed to create incremental state directory {}",
386 parent.display()
387 )
388 })?;
389 }
390 let json = serde_json::to_string_pretty(&state)?;
391 fs::write(state_file, json).with_context(|| {
392 format!(
393 "failed to write incremental state file {}",
394 state_file.display()
395 )
396 })?;
397 Ok(())
398}
399
400fn expand_invalidations(
401 initial_invalidations: &BTreeSet<String>,
402 state: &IncrementalBuildState,
403) -> BTreeSet<String> {
404 let mut reverse_dependencies: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
405 for (file, entry) in &state.files {
406 for dependency in &entry.dependencies {
407 reverse_dependencies
408 .entry(dependency.clone())
409 .or_default()
410 .insert(file.clone());
411 }
412 }
413
414 let mut invalidated = BTreeSet::new();
415 let mut queue: VecDeque<String> = initial_invalidations.iter().cloned().collect();
416 while let Some(path) = queue.pop_front() {
417 if !invalidated.insert(path.clone()) {
418 continue;
419 }
420 if let Some(dependents) = reverse_dependencies.get(&path) {
421 queue.extend(dependents.iter().cloned());
422 }
423 }
424 invalidated
425}
426
427fn normalize_incremental_config(config: &CodeGraphExtractorConfig) -> CodeGraphExtractorConfig {
428 let mut normalized = config.clone();
429 normalized.include_extensions.sort();
430 normalized.include_extensions.dedup();
431 normalized.exclude_dirs.sort();
432 normalized.exclude_dirs.dedup();
433 normalized
434}
435
436#[derive(Serialize)]
437struct SurfaceSignatureSymbol {
438 name: String,
439 qualified_name: String,
440 parent_identity: Option<String>,
441 kind: String,
442 modifiers: ExtractedModifiers,
443 inputs: Vec<ExtractedInput>,
444 output: Option<String>,
445 type_info: Option<String>,
446 exported: bool,
447}
448
449#[derive(Serialize)]
450struct SurfaceSignatureReexport {
451 module: String,
452 symbols: Vec<String>,
453 bindings: Vec<ImportBinding>,
454 wildcard: bool,
455}
456
457#[derive(Serialize)]
458struct SurfaceSignatureSnapshot {
459 symbols: Vec<SurfaceSignatureSymbol>,
460 export_bindings: Vec<ImportBinding>,
461 exported_symbol_names: Vec<String>,
462 default_exported_symbol_names: Vec<String>,
463 reexports: Vec<SurfaceSignatureReexport>,
464}
465
466fn compute_file_surface_signature(analysis: Option<&FileAnalysis>) -> String {
467 let Some(analysis) = analysis else {
468 return String::new();
469 };
470
471 let mut symbols = analysis
472 .symbols
473 .iter()
474 .map(|symbol| SurfaceSignatureSymbol {
475 name: symbol.name.clone(),
476 qualified_name: symbol.qualified_name.clone(),
477 parent_identity: symbol.parent_identity.clone(),
478 kind: symbol.kind.clone(),
479 modifiers: symbol.modifiers.clone(),
480 inputs: symbol.inputs.clone(),
481 output: symbol.output.clone(),
482 type_info: symbol.type_info.clone(),
483 exported: symbol.exported,
484 })
485 .collect::<Vec<_>>();
486 symbols.sort_by(|left, right| {
487 left.qualified_name
488 .cmp(&right.qualified_name)
489 .then_with(|| left.kind.cmp(&right.kind))
490 .then_with(|| left.parent_identity.cmp(&right.parent_identity))
491 .then_with(|| left.name.cmp(&right.name))
492 });
493
494 let mut export_bindings = analysis.export_bindings.clone();
495 export_bindings.sort();
496
497 let mut exported_symbol_names = analysis
498 .exported_symbol_names
499 .iter()
500 .cloned()
501 .collect::<Vec<_>>();
502 exported_symbol_names.sort();
503
504 let mut default_exported_symbol_names = analysis
505 .default_exported_symbol_names
506 .iter()
507 .cloned()
508 .collect::<Vec<_>>();
509 default_exported_symbol_names.sort();
510
511 let mut reexports = analysis
512 .imports
513 .iter()
514 .filter(|import| import.reexported)
515 .map(|import| {
516 let mut symbols = import.symbols.clone();
517 symbols.sort();
518 let mut bindings = import.bindings.clone();
519 bindings.sort();
520 SurfaceSignatureReexport {
521 module: import.module.clone(),
522 symbols,
523 bindings,
524 wildcard: import.wildcard,
525 }
526 })
527 .collect::<Vec<_>>();
528 reexports.sort_by(|left, right| {
529 left.module
530 .cmp(&right.module)
531 .then_with(|| left.wildcard.cmp(&right.wildcard))
532 .then_with(|| left.symbols.cmp(&right.symbols))
533 .then_with(|| left.bindings.cmp(&right.bindings))
534 });
535
536 let snapshot = SurfaceSignatureSnapshot {
537 symbols,
538 export_bindings,
539 exported_symbol_names,
540 default_exported_symbol_names,
541 reexports,
542 };
543 let serialized = serde_json::to_string(&snapshot).expect("surface signature serialization");
544 super::build::hash_source(&serialized)
545}