Skip to main content

thread_flow/incremental/extractors/
go.rs

1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4//! Go dependency extractor using tree-sitter queries.
5//!
6//! Extracts `import` declarations from Go source files, handling all import forms:
7//!
8//! - Single imports: `import "fmt"`
9//! - Import blocks: `import ( "fmt"\n "os" )`
10//! - Aliased imports: `import alias "package"`
11//! - Dot imports: `import . "package"`
12//! - Blank imports: `import _ "package"`
13//! - CGo imports: `import "C"`
14//!
15//! ## Performance
16//!
17//! Target: <5ms per file. Uses tree-sitter's incremental parsing and query API
18//! for efficient extraction without full AST traversal.
19//!
20//! ## Module Resolution
21//!
22//! Supports go.mod-aware path resolution, GOPATH fallback, and vendor directory
23//! mode for mapping import paths to local file paths.
24
25use std::path::{Path, PathBuf};
26
27use crate::incremental::types::{DependencyEdge, DependencyType};
28
29/// Error types for Go dependency extraction.
30#[derive(Debug, thiserror::Error)]
31pub enum ExtractionError {
32    /// Tree-sitter failed to parse the source file.
33    #[error("parse error: failed to parse Go source")]
34    ParseError,
35
36    /// Tree-sitter query compilation failed.
37    #[error("query error: {0}")]
38    QueryError(String),
39
40    /// Import path could not be resolved to a local file path.
41    #[error("unresolved import: {path}")]
42    UnresolvedImport {
43        /// The import path that could not be resolved.
44        path: String,
45    },
46}
47
48/// Information about a single Go import statement.
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ImportInfo {
51    /// The import path string (e.g., `"fmt"` or `"github.com/user/repo"`).
52    pub import_path: String,
53
54    /// Optional alias for the import (e.g., `f` in `import f "fmt"`).
55    pub alias: Option<String>,
56
57    /// Whether this is a dot import (`import . "package"`).
58    pub is_dot_import: bool,
59
60    /// Whether this is a blank import (`import _ "package"`).
61    pub is_blank_import: bool,
62}
63
64/// Go dependency extractor with tree-sitter query-based import extraction.
65///
66/// Supports go.mod module path resolution and vendor directory mode for
67/// mapping import paths to local file system paths.
68///
69/// # Examples
70///
71/// ```rust,ignore
72/// use thread_flow::incremental::extractors::go::GoDependencyExtractor;
73/// use std::path::Path;
74///
75/// let extractor = GoDependencyExtractor::new(Some("github.com/user/repo".to_string()));
76/// let imports = extractor.extract_imports(source, Path::new("main.go")).unwrap();
77/// ```
78#[derive(Debug, Clone)]
79pub struct GoDependencyExtractor {
80    /// The go.mod module path, if known (e.g., `"github.com/user/repo"`).
81    module_path: Option<String>,
82    /// Whether to resolve external imports via the vendor directory.
83    vendor_mode: bool,
84}
85
86impl GoDependencyExtractor {
87    /// Create a new extractor with optional go.mod module path.
88    ///
89    /// When `module_path` is provided, imports matching the module prefix
90    /// are resolved to local paths relative to the module root.
91    pub fn new(module_path: Option<String>) -> Self {
92        Self {
93            module_path,
94            vendor_mode: false,
95        }
96    }
97
98    /// Create a new extractor with vendor directory support.
99    ///
100    /// When `vendor_mode` is true, external imports are resolved to the
101    /// `vendor/` directory instead of returning an error.
102    pub fn with_vendor(module_path: Option<String>, vendor_mode: bool) -> Self {
103        Self {
104            module_path,
105            vendor_mode,
106        }
107    }
108
109    /// Extract all import statements from a Go source file.
110    ///
111    /// Parses the source using tree-sitter and walks `import_declaration` nodes
112    /// to collect import paths, aliases, and import variants (dot, blank).
113    ///
114    /// # Errors
115    ///
116    /// Returns [`ExtractionError::ParseError`] if tree-sitter cannot parse the source.
117    pub fn extract_imports(
118        &self,
119        source: &str,
120        _file_path: &Path,
121    ) -> Result<Vec<ImportInfo>, ExtractionError> {
122        if source.is_empty() {
123            return Ok(Vec::new());
124        }
125
126        let language = thread_language::parsers::language_go();
127        let mut parser = tree_sitter::Parser::new();
128        parser
129            .set_language(&language)
130            .map_err(|_| ExtractionError::ParseError)?;
131
132        let tree = parser
133            .parse(source, None)
134            .ok_or(ExtractionError::ParseError)?;
135
136        let root_node = tree.root_node();
137        let mut imports = Vec::new();
138
139        self.walk_imports(root_node, source.as_bytes(), &mut imports);
140
141        Ok(imports)
142    }
143
144    /// Walk the tree-sitter AST to extract import declarations.
145    fn walk_imports(
146        &self,
147        node: tree_sitter::Node<'_>,
148        source: &[u8],
149        imports: &mut Vec<ImportInfo>,
150    ) {
151        if node.kind() == "import_declaration" {
152            self.extract_from_import_declaration(node, source, imports);
153            return;
154        }
155
156        let mut cursor = node.walk();
157        for child in node.children(&mut cursor) {
158            self.walk_imports(child, source, imports);
159        }
160    }
161
162    /// Extract imports from a single `import_declaration` node.
163    ///
164    /// Handles both single imports and import blocks (import_spec_list).
165    fn extract_from_import_declaration(
166        &self,
167        node: tree_sitter::Node<'_>,
168        source: &[u8],
169        imports: &mut Vec<ImportInfo>,
170    ) {
171        let mut cursor = node.walk();
172        for child in node.children(&mut cursor) {
173            match child.kind() {
174                "import_spec" => {
175                    if let Some(info) = self.parse_import_spec(child, source) {
176                        imports.push(info);
177                    }
178                }
179                "import_spec_list" => {
180                    let mut list_cursor = child.walk();
181                    for spec in child.children(&mut list_cursor) {
182                        if spec.kind() == "import_spec"
183                            && let Some(info) = self.parse_import_spec(spec, source)
184                        {
185                            imports.push(info);
186                        }
187                    }
188                }
189                _ => {}
190            }
191        }
192    }
193
194    /// Parse a single `import_spec` node into an [`ImportInfo`].
195    ///
196    /// The import_spec grammar in tree-sitter-go:
197    /// ```text
198    /// import_spec: $ => seq(
199    ///   optional(field('name', choice($.dot, $.blank_identifier, $._package_identifier))),
200    ///   field('path', $._string_literal)
201    /// )
202    /// ```
203    fn parse_import_spec(&self, node: tree_sitter::Node<'_>, source: &[u8]) -> Option<ImportInfo> {
204        let mut alias: Option<String> = None;
205        let mut is_dot_import = false;
206        let mut is_blank_import = false;
207        let mut import_path: Option<String> = None;
208
209        let mut cursor = node.walk();
210        for child in node.children(&mut cursor) {
211            match child.kind() {
212                "dot" => {
213                    is_dot_import = true;
214                }
215                "blank_identifier" => {
216                    is_blank_import = true;
217                }
218                "package_identifier" => {
219                    let name = child.utf8_text(source).ok()?.to_string();
220                    alias = Some(name);
221                }
222                "interpreted_string_literal" => {
223                    let raw = child.utf8_text(source).ok()?;
224                    // Strip surrounding quotes
225                    let path = raw.trim_matches('"').to_string();
226                    import_path = Some(path);
227                }
228                _ => {}
229            }
230        }
231
232        import_path.map(|path| ImportInfo {
233            import_path: path,
234            alias,
235            is_dot_import,
236            is_blank_import,
237        })
238    }
239
240    /// Resolve a Go import path to a local file path.
241    ///
242    /// Resolution strategy:
243    /// 1. If the import matches the module path prefix, strip it to get a relative path.
244    /// 2. If vendor mode is enabled, external imports resolve to `vendor/<import_path>`.
245    /// 3. Standard library and unresolvable external imports return an error.
246    ///
247    /// # Errors
248    ///
249    /// Returns [`ExtractionError::UnresolvedImport`] if the import cannot be mapped
250    /// to a local file path.
251    pub fn resolve_import_path(
252        &self,
253        _source_file: &Path,
254        import_path: &str,
255    ) -> Result<PathBuf, ExtractionError> {
256        // Module-internal import
257        if let Some(ref module) = self.module_path
258            && let Some(relative) = import_path.strip_prefix(module)
259        {
260            let relative = relative.strip_prefix('/').unwrap_or(relative);
261            return Ok(PathBuf::from(relative));
262        }
263
264        // Vendor mode for external imports
265        if self.vendor_mode {
266            return Ok(PathBuf::from(format!("vendor/{import_path}")));
267        }
268
269        Err(ExtractionError::UnresolvedImport {
270            path: import_path.to_string(),
271        })
272    }
273
274    /// Extract [`DependencyEdge`] values from a Go source file.
275    ///
276    /// Combines import extraction with path resolution to produce edges
277    /// suitable for the incremental dependency graph. Only module-internal
278    /// and vendor-resolvable imports produce edges; standard library and
279    /// unresolvable external imports are silently skipped.
280    ///
281    /// # Errors
282    ///
283    /// Returns an error if the source file cannot be parsed.
284    pub fn extract_dependency_edges(
285        &self,
286        source: &str,
287        file_path: &Path,
288    ) -> Result<Vec<DependencyEdge>, ExtractionError> {
289        let imports = self.extract_imports(source, file_path)?;
290        let mut edges = Vec::new();
291
292        for import in &imports {
293            // Only create edges for resolvable imports (module-internal or vendor)
294            // Stdlib and external imports are silently skipped per design spec
295            if let Ok(resolved) = self.resolve_import_path(file_path, &import.import_path) {
296                edges.push(DependencyEdge::new(
297                    file_path.to_path_buf(),
298                    resolved,
299                    DependencyType::Import,
300                ));
301            }
302        }
303
304        Ok(edges)
305    }
306}