Skip to main content

thread_flow/incremental/
dependency_builder.rs

1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4//! Dependency graph builder that coordinates language-specific extractors.
5//!
6//! This module provides a unified interface for building dependency graphs
7//! across multiple programming languages. It uses the extractor subsystem
8//! to parse import/dependency statements and constructs a [`DependencyGraph`]
9//! representing the file-level and symbol-level dependencies in a codebase.
10//!
11//! ## Architecture
12//!
13//! ```text
14//! DependencyGraphBuilder
15//!   ├─> LanguageDetector (file extension → Language)
16//!   ├─> RustDependencyExtractor (use statements)
17//!   ├─> TypeScriptDependencyExtractor (import/require)
18//!   ├─> PythonDependencyExtractor (import statements)
19//!   └─> GoDependencyExtractor (import blocks)
20//! ```
21//!
22//! ## Example Usage
23//!
24//! ```rust
25//! use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
26//! use thread_flow::incremental::storage::InMemoryStorage;
27//! use std::path::Path;
28//!
29//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
30//! let storage = Box::new(InMemoryStorage::new());
31//! let mut builder = DependencyGraphBuilder::new(storage);
32//!
33//! // Extract dependencies from files
34//! builder.extract_file(Path::new("src/main.rs")).await?;
35//! builder.extract_file(Path::new("src/utils.ts")).await?;
36//!
37//! // Access the built graph
38//! let graph = builder.graph();
39//! println!("Found {} files with {} dependencies",
40//!          graph.node_count(), graph.edge_count());
41//!
42//! // Persist to storage
43//! builder.persist().await?;
44//! # Ok(())
45//! # }
46//! ```
47
48use super::extractors::{
49    GoDependencyExtractor, PythonDependencyExtractor, RustDependencyExtractor,
50    TypeScriptDependencyExtractor, go::ExtractionError as GoExtractionError,
51    python::ExtractionError as PyExtractionError, rust::ExtractionError as RustExtractionError,
52    typescript::ExtractionError as TsExtractionError,
53};
54use super::graph::DependencyGraph;
55use super::storage::{StorageBackend, StorageError};
56use super::types::AnalysisDefFingerprint;
57use std::path::{Path, PathBuf};
58use tracing::{debug, warn};
59
60// ─── Language Types ──────────────────────────────────────────────────────────
61
62/// Supported programming languages for dependency extraction.
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
64pub enum Language {
65    /// Rust programming language (.rs files)
66    Rust,
67    /// TypeScript (.ts, .tsx files)
68    TypeScript,
69    /// JavaScript (.js, .jsx files)
70    JavaScript,
71    /// Python (.py files)
72    Python,
73    /// Go (.go files)
74    Go,
75}
76
77// ─── Language Detection ──────────────────────────────────────────────────────
78
79/// Detects programming language from file extension.
80pub struct LanguageDetector;
81
82impl LanguageDetector {
83    /// Detects the programming language from a file path.
84    ///
85    /// Returns `Some(Language)` if the extension is recognized,
86    /// or `None` for unsupported file types.
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// use thread_flow::incremental::dependency_builder::{Language, LanguageDetector};
92    /// use std::path::Path;
93    ///
94    /// assert_eq!(
95    ///     LanguageDetector::detect_language(Path::new("main.rs")),
96    ///     Some(Language::Rust)
97    /// );
98    /// assert_eq!(
99    ///     LanguageDetector::detect_language(Path::new("app.ts")),
100    ///     Some(Language::TypeScript)
101    /// );
102    /// assert_eq!(
103    ///     LanguageDetector::detect_language(Path::new("file.java")),
104    ///     None
105    /// );
106    /// ```
107    pub fn detect_language(path: &Path) -> Option<Language> {
108        path.extension()
109            .and_then(|ext| ext.to_str())
110            .and_then(|ext| match ext.to_lowercase().as_str() {
111                "rs" => Some(Language::Rust),
112                "ts" | "tsx" => Some(Language::TypeScript),
113                "js" | "jsx" => Some(Language::JavaScript),
114                "py" => Some(Language::Python),
115                "go" => Some(Language::Go),
116                _ => None,
117            })
118    }
119}
120
121// ─── Build Errors ────────────────────────────────────────────────────────────
122
123/// Errors that can occur during dependency graph building.
124#[derive(Debug, thiserror::Error)]
125pub enum BuildError {
126    /// Language not supported for dependency extraction.
127    #[error("Unsupported language for file: {0}")]
128    UnsupportedLanguage(PathBuf),
129
130    /// Failed to read file contents.
131    #[error("IO error reading {file}: {error}")]
132    IoError {
133        file: PathBuf,
134        error: std::io::Error,
135    },
136
137    /// Dependency extraction failed for a file.
138    #[error("Extraction failed for {file}: {error}")]
139    ExtractionFailed { file: PathBuf, error: String },
140
141    /// Storage backend operation failed.
142    #[error("Storage error: {0}")]
143    Storage(#[from] StorageError),
144
145    /// Rust extraction error.
146    #[error("Rust extraction error: {0}")]
147    RustExtraction(#[from] RustExtractionError),
148
149    /// TypeScript/JavaScript extraction error.
150    #[error("TypeScript extraction error: {0}")]
151    TypeScriptExtraction(#[from] TsExtractionError),
152
153    /// Python extraction error.
154    #[error("Python extraction error: {0}")]
155    PythonExtraction(#[from] PyExtractionError),
156
157    /// Go extraction error.
158    #[error("Go extraction error: {0}")]
159    GoExtraction(#[from] GoExtractionError),
160}
161
162// ─── Dependency Graph Builder ────────────────────────────────────────────────
163
164/// Coordinates dependency extraction across multiple languages to build a unified dependency graph.
165///
166/// The builder uses language-specific extractors to parse import/dependency
167/// statements and progressively constructs a [`DependencyGraph`]. It manages
168/// the storage backend for persistence and provides batch processing capabilities.
169///
170/// ## Usage Pattern
171///
172/// 1. Create builder with storage backend
173/// 2. Extract files using `extract_file()` or `extract_files()`
174/// 3. Access graph with `graph()`
175/// 4. Optionally persist with `persist()`
176///
177/// # Examples
178///
179/// ```rust,no_run
180/// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
181/// # use thread_flow::incremental::storage::InMemoryStorage;
182/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
183/// let storage = Box::new(InMemoryStorage::new());
184/// let mut builder = DependencyGraphBuilder::new(storage);
185///
186/// // Extract single file
187/// builder.extract_file(std::path::Path::new("src/main.rs")).await?;
188///
189/// // Batch extraction
190/// let files = vec![
191///     std::path::PathBuf::from("src/utils.rs"),
192///     std::path::PathBuf::from("src/config.ts"),
193/// ];
194/// builder.extract_files(&files).await?;
195///
196/// // Access graph
197/// println!("Graph has {} nodes", builder.graph().node_count());
198///
199/// // Persist to storage
200/// builder.persist().await?;
201/// # Ok(())
202/// # }
203/// ```
204pub struct DependencyGraphBuilder {
205    /// The dependency graph being built.
206    graph: DependencyGraph,
207
208    /// Storage backend for persistence.
209    storage: Box<dyn StorageBackend>,
210
211    /// Language-specific extractors.
212    rust_extractor: RustDependencyExtractor,
213    typescript_extractor: TypeScriptDependencyExtractor,
214    python_extractor: PythonDependencyExtractor,
215    go_extractor: GoDependencyExtractor,
216}
217
218impl DependencyGraphBuilder {
219    /// Creates a new dependency graph builder with the given storage backend.
220    ///
221    /// # Arguments
222    ///
223    /// * `storage` - Storage backend for persisting fingerprints and graph data
224    ///
225    /// # Examples
226    ///
227    /// ```rust
228    /// use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
229    /// use thread_flow::incremental::storage::InMemoryStorage;
230    ///
231    /// let storage = Box::new(InMemoryStorage::new());
232    /// let builder = DependencyGraphBuilder::new(storage);
233    /// ```
234    pub fn new(storage: Box<dyn StorageBackend>) -> Self {
235        Self {
236            graph: DependencyGraph::new(),
237            storage,
238            rust_extractor: RustDependencyExtractor::new(),
239            typescript_extractor: TypeScriptDependencyExtractor::new(),
240            python_extractor: PythonDependencyExtractor::new(),
241            go_extractor: GoDependencyExtractor::new(None), // No module path by default
242        }
243    }
244
245    /// Accesses the built dependency graph.
246    ///
247    /// Returns a reference to the [`DependencyGraph`] constructed from
248    /// all extracted files.
249    ///
250    /// # Examples
251    ///
252    /// ```rust
253    /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
254    /// # use thread_flow::incremental::storage::InMemoryStorage;
255    /// let storage = Box::new(InMemoryStorage::new());
256    /// let builder = DependencyGraphBuilder::new(storage);
257    /// let graph = builder.graph();
258    /// assert_eq!(graph.node_count(), 0); // Empty graph initially
259    /// ```
260    pub fn graph(&self) -> &DependencyGraph {
261        &self.graph
262    }
263
264    /// Extracts dependencies from a single file.
265    ///
266    /// Detects the file's language, uses the appropriate extractor,
267    /// and adds the resulting edges to the dependency graph.
268    ///
269    /// # Arguments
270    ///
271    /// * `file_path` - Path to the source file to analyze
272    ///
273    /// # Errors
274    ///
275    /// Returns an error if:
276    /// - The file's language is not supported
277    /// - The file cannot be read
278    /// - Dependency extraction fails
279    ///
280    /// # Examples
281    ///
282    /// ```rust,no_run
283    /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
284    /// # use thread_flow::incremental::storage::InMemoryStorage;
285    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
286    /// let storage = Box::new(InMemoryStorage::new());
287    /// let mut builder = DependencyGraphBuilder::new(storage);
288    ///
289    /// builder.extract_file(std::path::Path::new("src/main.rs")).await?;
290    /// # Ok(())
291    /// # }
292    /// ```
293    pub async fn extract_file(&mut self, file_path: &Path) -> Result<(), BuildError> {
294        // Detect language
295        let language = LanguageDetector::detect_language(file_path)
296            .ok_or_else(|| BuildError::UnsupportedLanguage(file_path.to_path_buf()))?;
297
298        debug!(
299            "Extracting dependencies from {:?} ({:?})",
300            file_path, language
301        );
302
303        // Read file contents
304        let content = tokio::fs::read(file_path)
305            .await
306            .map_err(|error| BuildError::IoError {
307                file: file_path.to_path_buf(),
308                error,
309            })?;
310
311        // Convert to UTF-8 string for extractors
312        let source = String::from_utf8_lossy(&content);
313
314        // Compute fingerprint and add node
315        let fingerprint = AnalysisDefFingerprint::new(&content);
316        self.graph
317            .nodes
318            .insert(file_path.to_path_buf(), fingerprint);
319
320        // Extract dependencies using language-specific extractor
321        let edges = match language {
322            Language::Rust => self
323                .rust_extractor
324                .extract_dependency_edges(&source, file_path)?,
325
326            Language::TypeScript | Language::JavaScript => self
327                .typescript_extractor
328                .extract_dependency_edges(&source, file_path)?,
329
330            Language::Python => self
331                .python_extractor
332                .extract_dependency_edges(&source, file_path)?,
333
334            Language::Go => self
335                .go_extractor
336                .extract_dependency_edges(&source, file_path)?,
337        };
338
339        // Add edges to graph
340        for edge in edges {
341            self.graph.add_edge(edge);
342        }
343
344        Ok(())
345    }
346
347    /// Extracts dependencies from multiple files in batch.
348    ///
349    /// Processes all files and continues on individual extraction failures.
350    /// Returns an error only if all extractions fail.
351    ///
352    /// # Arguments
353    ///
354    /// * `files` - Slice of file paths to analyze
355    ///
356    /// # Errors
357    ///
358    /// Returns the last error encountered if ANY extraction fails.
359    /// Individual extraction errors are logged as warnings.
360    ///
361    /// # Examples
362    ///
363    /// ```rust,no_run
364    /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
365    /// # use thread_flow::incremental::storage::InMemoryStorage;
366    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
367    /// let storage = Box::new(InMemoryStorage::new());
368    /// let mut builder = DependencyGraphBuilder::new(storage);
369    ///
370    /// let files = vec![
371    ///     std::path::PathBuf::from("src/main.rs"),
372    ///     std::path::PathBuf::from("src/lib.rs"),
373    /// ];
374    /// builder.extract_files(&files).await?;
375    /// # Ok(())
376    /// # }
377    /// ```
378    pub async fn extract_files(&mut self, files: &[PathBuf]) -> Result<(), BuildError> {
379        let mut last_error = None;
380        let mut success_count = 0;
381
382        for file in files {
383            match self.extract_file(file).await {
384                Ok(_) => success_count += 1,
385                Err(e) => {
386                    warn!("Failed to extract {}: {}", file.display(), e);
387                    last_error = Some(e);
388                }
389            }
390        }
391
392        debug!(
393            "Batch extraction: {}/{} files succeeded",
394            success_count,
395            files.len()
396        );
397
398        // Return error only if we had failures
399        if let Some(err) = last_error {
400            if success_count == 0 {
401                // All failed - propagate error
402                return Err(err);
403            }
404            // Some succeeded - log warning but continue
405            warn!(
406                "Batch extraction: {}/{} files failed",
407                files.len() - success_count,
408                files.len()
409            );
410        }
411
412        Ok(())
413    }
414
415    /// Persists the dependency graph to the storage backend.
416    ///
417    /// Saves all fingerprints and edges to the configured storage.
418    ///
419    /// # Errors
420    ///
421    /// Returns an error if storage operations fail.
422    ///
423    /// # Examples
424    ///
425    /// ```rust,no_run
426    /// # use thread_flow::incremental::dependency_builder::DependencyGraphBuilder;
427    /// # use thread_flow::incremental::storage::InMemoryStorage;
428    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
429    /// let storage = Box::new(InMemoryStorage::new());
430    /// let mut builder = DependencyGraphBuilder::new(storage);
431    ///
432    /// // ... extract files ...
433    ///
434    /// // Persist to storage
435    /// builder.persist().await?;
436    /// # Ok(())
437    /// # }
438    /// ```
439    pub async fn persist(&self) -> Result<(), BuildError> {
440        debug!(
441            "Persisting graph: {} nodes, {} edges",
442            self.graph.node_count(),
443            self.graph.edge_count()
444        );
445
446        // Save the full graph
447        self.storage.save_full_graph(&self.graph).await?;
448
449        Ok(())
450    }
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456    use crate::incremental::storage::InMemoryStorage;
457
458    #[test]
459    fn test_language_detection() {
460        assert_eq!(
461            LanguageDetector::detect_language(Path::new("file.rs")),
462            Some(Language::Rust)
463        );
464        assert_eq!(
465            LanguageDetector::detect_language(Path::new("file.ts")),
466            Some(Language::TypeScript)
467        );
468        assert_eq!(
469            LanguageDetector::detect_language(Path::new("file.tsx")),
470            Some(Language::TypeScript)
471        );
472        assert_eq!(
473            LanguageDetector::detect_language(Path::new("file.js")),
474            Some(Language::JavaScript)
475        );
476        assert_eq!(
477            LanguageDetector::detect_language(Path::new("file.jsx")),
478            Some(Language::JavaScript)
479        );
480        assert_eq!(
481            LanguageDetector::detect_language(Path::new("file.py")),
482            Some(Language::Python)
483        );
484        assert_eq!(
485            LanguageDetector::detect_language(Path::new("file.go")),
486            Some(Language::Go)
487        );
488
489        // Unsupported
490        assert_eq!(
491            LanguageDetector::detect_language(Path::new("file.java")),
492            None
493        );
494
495        // Case insensitive
496        assert_eq!(
497            LanguageDetector::detect_language(Path::new("FILE.RS")),
498            Some(Language::Rust)
499        );
500    }
501
502    #[test]
503    fn test_builder_creation() {
504        let storage = Box::new(InMemoryStorage::new());
505        let builder = DependencyGraphBuilder::new(storage);
506
507        assert_eq!(builder.graph().node_count(), 0);
508        assert_eq!(builder.graph().edge_count(), 0);
509    }
510}