thread_flow/incremental/dependency_builder.rs
1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4//! Dependency graph builder that coordinates language-specific extractors.
5//!
6//! This module provides a unified interface for building dependency graphs
7//! across multiple programming languages. It uses the extractor subsystem
8//! to parse import/dependency statements and constructs a [`DependencyGraph`]
9//! representing the file-level and symbol-level dependencies in a codebase.
10//!
11//! ## Architecture
12//!
13//! ```text
14//! DependencyGraphBuilder
15//! ├─> LanguageDetector (file extension → Language)
16//! ├─> RustDependencyExtractor (use statements)
17//! ├─> TypeScriptDependencyExtractor (import/require)
18//! ├─> PythonDependencyExtractor (import statements)
19//! └─> GoDependencyExtractor (import blocks)
20//! ```
21//!
22//! ## Example Usage
23//!
24//! ```rust
25//! use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
26//! use thread_flow::incremental::storage::InMemoryStorage;
27//! use std::path::Path;
28//!
29//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
30//! let storage = Box::new(InMemoryStorage::new());
31//! let mut builder = DependencyGraphBuilder::new(storage);
32//!
33//! // Extract dependencies from files
34//! builder.extract_file(Path::new("src/main.rs")).await?;
35//! builder.extract_file(Path::new("src/utils.ts")).await?;
36//!
37//! // Access the built graph
38//! let graph = builder.graph();
39//! println!("Found {} files with {} dependencies",
40//! graph.node_count(), graph.edge_count());
41//!
42//! // Persist to storage
43//! builder.persist().await?;
44//! # Ok(())
45//! # }
46//! ```
47
48use super::extractors::{
49 GoDependencyExtractor, PythonDependencyExtractor, RustDependencyExtractor,
50 TypeScriptDependencyExtractor, go::ExtractionError as GoExtractionError,
51 python::ExtractionError as PyExtractionError, rust::ExtractionError as RustExtractionError,
52 typescript::ExtractionError as TsExtractionError,
53};
54use super::graph::DependencyGraph;
55use super::storage::{StorageBackend, StorageError};
56use super::types::AnalysisDefFingerprint;
57use std::path::{Path, PathBuf};
58use tracing::{debug, warn};
59
60// ─── Language Types ──────────────────────────────────────────────────────────
61
62/// Supported programming languages for dependency extraction.
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
64pub enum Language {
65 /// Rust programming language (.rs files)
66 Rust,
67 /// TypeScript (.ts, .tsx files)
68 TypeScript,
69 /// JavaScript (.js, .jsx files)
70 JavaScript,
71 /// Python (.py files)
72 Python,
73 /// Go (.go files)
74 Go,
75}
76
77// ─── Language Detection ──────────────────────────────────────────────────────
78
79/// Detects programming language from file extension.
80pub struct LanguageDetector;
81
82impl LanguageDetector {
83 /// Detects the programming language from a file path.
84 ///
85 /// Returns `Some(Language)` if the extension is recognized,
86 /// or `None` for unsupported file types.
87 ///
88 /// # Examples
89 ///
90 /// ```
91 /// use thread_flow::incremental::dependency_builder::{Language, LanguageDetector};
92 /// use std::path::Path;
93 ///
94 /// assert_eq!(
95 /// LanguageDetector::detect_language(Path::new("main.rs")),
96 /// Some(Language::Rust)
97 /// );
98 /// assert_eq!(
99 /// LanguageDetector::detect_language(Path::new("app.ts")),
100 /// Some(Language::TypeScript)
101 /// );
102 /// assert_eq!(
103 /// LanguageDetector::detect_language(Path::new("file.java")),
104 /// None
105 /// );
106 /// ```
107 pub fn detect_language(path: &Path) -> Option<Language> {
108 path.extension()
109 .and_then(|ext| ext.to_str())
110 .and_then(|ext| match ext.to_lowercase().as_str() {
111 "rs" => Some(Language::Rust),
112 "ts" | "tsx" => Some(Language::TypeScript),
113 "js" | "jsx" => Some(Language::JavaScript),
114 "py" => Some(Language::Python),
115 "go" => Some(Language::Go),
116 _ => None,
117 })
118 }
119}
120
121// ─── Build Errors ────────────────────────────────────────────────────────────
122
123/// Errors that can occur during dependency graph building.
124#[derive(Debug, thiserror::Error)]
125pub enum BuildError {
126 /// Language not supported for dependency extraction.
127 #[error("Unsupported language for file: {0}")]
128 UnsupportedLanguage(PathBuf),
129
130 /// Failed to read file contents.
131 #[error("IO error reading {file}: {error}")]
132 IoError {
133 file: PathBuf,
134 error: std::io::Error,
135 },
136
137 /// Dependency extraction failed for a file.
138 #[error("Extraction failed for {file}: {error}")]
139 ExtractionFailed { file: PathBuf, error: String },
140
141 /// Storage backend operation failed.
142 #[error("Storage error: {0}")]
143 Storage(#[from] StorageError),
144
145 /// Rust extraction error.
146 #[error("Rust extraction error: {0}")]
147 RustExtraction(#[from] RustExtractionError),
148
149 /// TypeScript/JavaScript extraction error.
150 #[error("TypeScript extraction error: {0}")]
151 TypeScriptExtraction(#[from] TsExtractionError),
152
153 /// Python extraction error.
154 #[error("Python extraction error: {0}")]
155 PythonExtraction(#[from] PyExtractionError),
156
157 /// Go extraction error.
158 #[error("Go extraction error: {0}")]
159 GoExtraction(#[from] GoExtractionError),
160}
161
162// ─── Dependency Graph Builder ────────────────────────────────────────────────
163
164/// Coordinates dependency extraction across multiple languages to build a unified dependency graph.
165///
166/// The builder uses language-specific extractors to parse import/dependency
167/// statements and progressively constructs a [`DependencyGraph`]. It manages
168/// the storage backend for persistence and provides batch processing capabilities.
169///
170/// ## Usage Pattern
171///
172/// 1. Create builder with storage backend
173/// 2. Extract files using `extract_file()` or `extract_files()`
174/// 3. Access graph with `graph()`
175/// 4. Optionally persist with `persist()`
176///
177/// # Examples
178///
179/// ```rust,no_run
180/// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
181/// # use thread_flow::incremental::storage::InMemoryStorage;
182/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
183/// let storage = Box::new(InMemoryStorage::new());
184/// let mut builder = DependencyGraphBuilder::new(storage);
185///
186/// // Extract single file
187/// builder.extract_file(std::path::Path::new("src/main.rs")).await?;
188///
189/// // Batch extraction
190/// let files = vec![
191/// std::path::PathBuf::from("src/utils.rs"),
192/// std::path::PathBuf::from("src/config.ts"),
193/// ];
194/// builder.extract_files(&files).await?;
195///
196/// // Access graph
197/// println!("Graph has {} nodes", builder.graph().node_count());
198///
199/// // Persist to storage
200/// builder.persist().await?;
201/// # Ok(())
202/// # }
203/// ```
204pub struct DependencyGraphBuilder {
205 /// The dependency graph being built.
206 graph: DependencyGraph,
207
208 /// Storage backend for persistence.
209 storage: Box<dyn StorageBackend>,
210
211 /// Language-specific extractors.
212 rust_extractor: RustDependencyExtractor,
213 typescript_extractor: TypeScriptDependencyExtractor,
214 python_extractor: PythonDependencyExtractor,
215 go_extractor: GoDependencyExtractor,
216}
217
218impl DependencyGraphBuilder {
219 /// Creates a new dependency graph builder with the given storage backend.
220 ///
221 /// # Arguments
222 ///
223 /// * `storage` - Storage backend for persisting fingerprints and graph data
224 ///
225 /// # Examples
226 ///
227 /// ```rust
228 /// use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
229 /// use thread_flow::incremental::storage::InMemoryStorage;
230 ///
231 /// let storage = Box::new(InMemoryStorage::new());
232 /// let builder = DependencyGraphBuilder::new(storage);
233 /// ```
234 pub fn new(storage: Box<dyn StorageBackend>) -> Self {
235 Self {
236 graph: DependencyGraph::new(),
237 storage,
238 rust_extractor: RustDependencyExtractor::new(),
239 typescript_extractor: TypeScriptDependencyExtractor::new(),
240 python_extractor: PythonDependencyExtractor::new(),
241 go_extractor: GoDependencyExtractor::new(None), // No module path by default
242 }
243 }
244
245 /// Accesses the built dependency graph.
246 ///
247 /// Returns a reference to the [`DependencyGraph`] constructed from
248 /// all extracted files.
249 ///
250 /// # Examples
251 ///
252 /// ```rust
253 /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
254 /// # use thread_flow::incremental::storage::InMemoryStorage;
255 /// let storage = Box::new(InMemoryStorage::new());
256 /// let builder = DependencyGraphBuilder::new(storage);
257 /// let graph = builder.graph();
258 /// assert_eq!(graph.node_count(), 0); // Empty graph initially
259 /// ```
260 pub fn graph(&self) -> &DependencyGraph {
261 &self.graph
262 }
263
264 /// Extracts dependencies from a single file.
265 ///
266 /// Detects the file's language, uses the appropriate extractor,
267 /// and adds the resulting edges to the dependency graph.
268 ///
269 /// # Arguments
270 ///
271 /// * `file_path` - Path to the source file to analyze
272 ///
273 /// # Errors
274 ///
275 /// Returns an error if:
276 /// - The file's language is not supported
277 /// - The file cannot be read
278 /// - Dependency extraction fails
279 ///
280 /// # Examples
281 ///
282 /// ```rust,no_run
283 /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
284 /// # use thread_flow::incremental::storage::InMemoryStorage;
285 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
286 /// let storage = Box::new(InMemoryStorage::new());
287 /// let mut builder = DependencyGraphBuilder::new(storage);
288 ///
289 /// builder.extract_file(std::path::Path::new("src/main.rs")).await?;
290 /// # Ok(())
291 /// # }
292 /// ```
293 pub async fn extract_file(&mut self, file_path: &Path) -> Result<(), BuildError> {
294 // Detect language
295 let language = LanguageDetector::detect_language(file_path)
296 .ok_or_else(|| BuildError::UnsupportedLanguage(file_path.to_path_buf()))?;
297
298 debug!(
299 "Extracting dependencies from {:?} ({:?})",
300 file_path, language
301 );
302
303 // Read file contents
304 let content = tokio::fs::read(file_path)
305 .await
306 .map_err(|error| BuildError::IoError {
307 file: file_path.to_path_buf(),
308 error,
309 })?;
310
311 // Convert to UTF-8 string for extractors
312 let source = String::from_utf8_lossy(&content);
313
314 // Compute fingerprint and add node
315 let fingerprint = AnalysisDefFingerprint::new(&content);
316 self.graph
317 .nodes
318 .insert(file_path.to_path_buf(), fingerprint);
319
320 // Extract dependencies using language-specific extractor
321 let edges = match language {
322 Language::Rust => self
323 .rust_extractor
324 .extract_dependency_edges(&source, file_path)?,
325
326 Language::TypeScript | Language::JavaScript => self
327 .typescript_extractor
328 .extract_dependency_edges(&source, file_path)?,
329
330 Language::Python => self
331 .python_extractor
332 .extract_dependency_edges(&source, file_path)?,
333
334 Language::Go => self
335 .go_extractor
336 .extract_dependency_edges(&source, file_path)?,
337 };
338
339 // Add edges to graph
340 for edge in edges {
341 self.graph.add_edge(edge);
342 }
343
344 Ok(())
345 }
346
347 /// Extracts dependencies from multiple files in batch.
348 ///
349 /// Processes all files and continues on individual extraction failures.
350 /// Returns an error only if all extractions fail.
351 ///
352 /// # Arguments
353 ///
354 /// * `files` - Slice of file paths to analyze
355 ///
356 /// # Errors
357 ///
358 /// Returns the last error encountered if ANY extraction fails.
359 /// Individual extraction errors are logged as warnings.
360 ///
361 /// # Examples
362 ///
363 /// ```rust,no_run
364 /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
365 /// # use thread_flow::incremental::storage::InMemoryStorage;
366 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
367 /// let storage = Box::new(InMemoryStorage::new());
368 /// let mut builder = DependencyGraphBuilder::new(storage);
369 ///
370 /// let files = vec![
371 /// std::path::PathBuf::from("src/main.rs"),
372 /// std::path::PathBuf::from("src/lib.rs"),
373 /// ];
374 /// builder.extract_files(&files).await?;
375 /// # Ok(())
376 /// # }
377 /// ```
378 pub async fn extract_files(&mut self, files: &[PathBuf]) -> Result<(), BuildError> {
379 let mut last_error = None;
380 let mut success_count = 0;
381
382 for file in files {
383 match self.extract_file(file).await {
384 Ok(_) => success_count += 1,
385 Err(e) => {
386 warn!("Failed to extract {}: {}", file.display(), e);
387 last_error = Some(e);
388 }
389 }
390 }
391
392 debug!(
393 "Batch extraction: {}/{} files succeeded",
394 success_count,
395 files.len()
396 );
397
398 // Return error only if we had failures
399 if let Some(err) = last_error {
400 if success_count == 0 {
401 // All failed - propagate error
402 return Err(err);
403 }
404 // Some succeeded - log warning but continue
405 warn!(
406 "Batch extraction: {}/{} files failed",
407 files.len() - success_count,
408 files.len()
409 );
410 }
411
412 Ok(())
413 }
414
415 /// Persists the dependency graph to the storage backend.
416 ///
417 /// Saves all fingerprints and edges to the configured storage.
418 ///
419 /// # Errors
420 ///
421 /// Returns an error if storage operations fail.
422 ///
423 /// # Examples
424 ///
425 /// ```rust,no_run
426 /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
427 /// # use thread_flow::incremental::storage::InMemoryStorage;
428 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
429 /// let storage = Box::new(InMemoryStorage::new());
430 /// let mut builder = DependencyGraphBuilder::new(storage);
431 ///
432 /// // ... extract files ...
433 ///
434 /// // Persist to storage
435 /// builder.persist().await?;
436 /// # Ok(())
437 /// # }
438 /// ```
439 pub async fn persist(&self) -> Result<(), BuildError> {
440 debug!(
441 "Persisting graph: {} nodes, {} edges",
442 self.graph.node_count(),
443 self.graph.edge_count()
444 );
445
446 // Save the full graph
447 self.storage.save_full_graph(&self.graph).await?;
448
449 Ok(())
450 }
451}
452
453#[cfg(test)]
454mod tests {
455 use super::*;
456 use crate::incremental::storage::InMemoryStorage;
457
458 #[test]
459 fn test_language_detection() {
460 assert_eq!(
461 LanguageDetector::detect_language(Path::new("file.rs")),
462 Some(Language::Rust)
463 );
464 assert_eq!(
465 LanguageDetector::detect_language(Path::new("file.ts")),
466 Some(Language::TypeScript)
467 );
468 assert_eq!(
469 LanguageDetector::detect_language(Path::new("file.tsx")),
470 Some(Language::TypeScript)
471 );
472 assert_eq!(
473 LanguageDetector::detect_language(Path::new("file.js")),
474 Some(Language::JavaScript)
475 );
476 assert_eq!(
477 LanguageDetector::detect_language(Path::new("file.jsx")),
478 Some(Language::JavaScript)
479 );
480 assert_eq!(
481 LanguageDetector::detect_language(Path::new("file.py")),
482 Some(Language::Python)
483 );
484 assert_eq!(
485 LanguageDetector::detect_language(Path::new("file.go")),
486 Some(Language::Go)
487 );
488
489 // Unsupported
490 assert_eq!(
491 LanguageDetector::detect_language(Path::new("file.java")),
492 None
493 );
494
495 // Case insensitive
496 assert_eq!(
497 LanguageDetector::detect_language(Path::new("FILE.RS")),
498 Some(Language::Rust)
499 );
500 }
501
502 #[test]
503 fn test_builder_creation() {
504 let storage = Box::new(InMemoryStorage::new());
505 let builder = DependencyGraphBuilder::new(storage);
506
507 assert_eq!(builder.graph().node_count(), 0);
508 assert_eq!(builder.graph().edge_count(), 0);
509 }
510}