thread_flow/incremental/extractors/go.rs
1// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4//! Go dependency extractor using tree-sitter queries.
5//!
6//! Extracts `import` declarations from Go source files, handling all import forms:
7//!
8//! - Single imports: `import "fmt"`
9//! - Import blocks: `import ( "fmt"\n "os" )`
10//! - Aliased imports: `import alias "package"`
11//! - Dot imports: `import . "package"`
12//! - Blank imports: `import _ "package"`
13//! - CGo imports: `import "C"`
14//!
15//! ## Performance
16//!
17//! Target: <5ms per file. Uses tree-sitter's incremental parsing and query API
18//! for efficient extraction without full AST traversal.
19//!
20//! ## Module Resolution
21//!
22//! Supports go.mod-aware path resolution, GOPATH fallback, and vendor directory
23//! mode for mapping import paths to local file paths.
24
25use std::path::{Path, PathBuf};
26
27use crate::incremental::types::{DependencyEdge, DependencyType};
28
29/// Error types for Go dependency extraction.
30#[derive(Debug, thiserror::Error)]
31pub enum ExtractionError {
32 /// Tree-sitter failed to parse the source file.
33 #[error("parse error: failed to parse Go source")]
34 ParseError,
35
36 /// Tree-sitter query compilation failed.
37 #[error("query error: {0}")]
38 QueryError(String),
39
40 /// Import path could not be resolved to a local file path.
41 #[error("unresolved import: {path}")]
42 UnresolvedImport {
43 /// The import path that could not be resolved.
44 path: String,
45 },
46}
47
48/// Information about a single Go import statement.
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct ImportInfo {
51 /// The import path string (e.g., `"fmt"` or `"github.com/user/repo"`).
52 pub import_path: String,
53
54 /// Optional alias for the import (e.g., `f` in `import f "fmt"`).
55 pub alias: Option<String>,
56
57 /// Whether this is a dot import (`import . "package"`).
58 pub is_dot_import: bool,
59
60 /// Whether this is a blank import (`import _ "package"`).
61 pub is_blank_import: bool,
62}
63
64/// Go dependency extractor with tree-sitter query-based import extraction.
65///
66/// Supports go.mod module path resolution and vendor directory mode for
67/// mapping import paths to local file system paths.
68///
69/// # Examples
70///
71/// ```rust,ignore
72/// use thread_flow::incremental::extractors::go::GoDependencyExtractor;
73/// use std::path::Path;
74///
75/// let extractor = GoDependencyExtractor::new(Some("github.com/user/repo".to_string()));
76/// let imports = extractor.extract_imports(source, Path::new("main.go")).unwrap();
77/// ```
78#[derive(Debug, Clone)]
79pub struct GoDependencyExtractor {
80 /// The go.mod module path, if known (e.g., `"github.com/user/repo"`).
81 module_path: Option<String>,
82 /// Whether to resolve external imports via the vendor directory.
83 vendor_mode: bool,
84}
85
86impl GoDependencyExtractor {
87 /// Create a new extractor with optional go.mod module path.
88 ///
89 /// When `module_path` is provided, imports matching the module prefix
90 /// are resolved to local paths relative to the module root.
91 pub fn new(module_path: Option<String>) -> Self {
92 Self {
93 module_path,
94 vendor_mode: false,
95 }
96 }
97
98 /// Create a new extractor with vendor directory support.
99 ///
100 /// When `vendor_mode` is true, external imports are resolved to the
101 /// `vendor/` directory instead of returning an error.
102 pub fn with_vendor(module_path: Option<String>, vendor_mode: bool) -> Self {
103 Self {
104 module_path,
105 vendor_mode,
106 }
107 }
108
109 /// Extract all import statements from a Go source file.
110 ///
111 /// Parses the source using tree-sitter and walks `import_declaration` nodes
112 /// to collect import paths, aliases, and import variants (dot, blank).
113 ///
114 /// # Errors
115 ///
116 /// Returns [`ExtractionError::ParseError`] if tree-sitter cannot parse the source.
117 pub fn extract_imports(
118 &self,
119 source: &str,
120 _file_path: &Path,
121 ) -> Result<Vec<ImportInfo>, ExtractionError> {
122 if source.is_empty() {
123 return Ok(Vec::new());
124 }
125
126 let language = thread_language::parsers::language_go();
127 let mut parser = tree_sitter::Parser::new();
128 parser
129 .set_language(&language)
130 .map_err(|_| ExtractionError::ParseError)?;
131
132 let tree = parser
133 .parse(source, None)
134 .ok_or(ExtractionError::ParseError)?;
135
136 let root_node = tree.root_node();
137 let mut imports = Vec::new();
138
139 self.walk_imports(root_node, source.as_bytes(), &mut imports);
140
141 Ok(imports)
142 }
143
144 /// Walk the tree-sitter AST to extract import declarations.
145 fn walk_imports(
146 &self,
147 node: tree_sitter::Node<'_>,
148 source: &[u8],
149 imports: &mut Vec<ImportInfo>,
150 ) {
151 if node.kind() == "import_declaration" {
152 self.extract_from_import_declaration(node, source, imports);
153 return;
154 }
155
156 let mut cursor = node.walk();
157 for child in node.children(&mut cursor) {
158 self.walk_imports(child, source, imports);
159 }
160 }
161
162 /// Extract imports from a single `import_declaration` node.
163 ///
164 /// Handles both single imports and import blocks (import_spec_list).
165 fn extract_from_import_declaration(
166 &self,
167 node: tree_sitter::Node<'_>,
168 source: &[u8],
169 imports: &mut Vec<ImportInfo>,
170 ) {
171 let mut cursor = node.walk();
172 for child in node.children(&mut cursor) {
173 match child.kind() {
174 "import_spec" => {
175 if let Some(info) = self.parse_import_spec(child, source) {
176 imports.push(info);
177 }
178 }
179 "import_spec_list" => {
180 let mut list_cursor = child.walk();
181 for spec in child.children(&mut list_cursor) {
182 if spec.kind() == "import_spec"
183 && let Some(info) = self.parse_import_spec(spec, source)
184 {
185 imports.push(info);
186 }
187 }
188 }
189 _ => {}
190 }
191 }
192 }
193
194 /// Parse a single `import_spec` node into an [`ImportInfo`].
195 ///
196 /// The import_spec grammar in tree-sitter-go:
197 /// ```text
198 /// import_spec: $ => seq(
199 /// optional(field('name', choice($.dot, $.blank_identifier, $._package_identifier))),
200 /// field('path', $._string_literal)
201 /// )
202 /// ```
203 fn parse_import_spec(&self, node: tree_sitter::Node<'_>, source: &[u8]) -> Option<ImportInfo> {
204 let mut alias: Option<String> = None;
205 let mut is_dot_import = false;
206 let mut is_blank_import = false;
207 let mut import_path: Option<String> = None;
208
209 let mut cursor = node.walk();
210 for child in node.children(&mut cursor) {
211 match child.kind() {
212 "dot" => {
213 is_dot_import = true;
214 }
215 "blank_identifier" => {
216 is_blank_import = true;
217 }
218 "package_identifier" => {
219 let name = child.utf8_text(source).ok()?.to_string();
220 alias = Some(name);
221 }
222 "interpreted_string_literal" => {
223 let raw = child.utf8_text(source).ok()?;
224 // Strip surrounding quotes
225 let path = raw.trim_matches('"').to_string();
226 import_path = Some(path);
227 }
228 _ => {}
229 }
230 }
231
232 import_path.map(|path| ImportInfo {
233 import_path: path,
234 alias,
235 is_dot_import,
236 is_blank_import,
237 })
238 }
239
240 /// Resolve a Go import path to a local file path.
241 ///
242 /// Resolution strategy:
243 /// 1. If the import matches the module path prefix, strip it to get a relative path.
244 /// 2. If vendor mode is enabled, external imports resolve to `vendor/<import_path>`.
245 /// 3. Standard library and unresolvable external imports return an error.
246 ///
247 /// # Errors
248 ///
249 /// Returns [`ExtractionError::UnresolvedImport`] if the import cannot be mapped
250 /// to a local file path.
251 pub fn resolve_import_path(
252 &self,
253 _source_file: &Path,
254 import_path: &str,
255 ) -> Result<PathBuf, ExtractionError> {
256 // Module-internal import
257 if let Some(ref module) = self.module_path
258 && let Some(relative) = import_path.strip_prefix(module)
259 {
260 let relative = relative.strip_prefix('/').unwrap_or(relative);
261 return Ok(PathBuf::from(relative));
262 }
263
264 // Vendor mode for external imports
265 if self.vendor_mode {
266 return Ok(PathBuf::from(format!("vendor/{import_path}")));
267 }
268
269 Err(ExtractionError::UnresolvedImport {
270 path: import_path.to_string(),
271 })
272 }
273
274 /// Extract [`DependencyEdge`] values from a Go source file.
275 ///
276 /// Combines import extraction with path resolution to produce edges
277 /// suitable for the incremental dependency graph. Only module-internal
278 /// and vendor-resolvable imports produce edges; standard library and
279 /// unresolvable external imports are silently skipped.
280 ///
281 /// # Errors
282 ///
283 /// Returns an error if the source file cannot be parsed.
284 pub fn extract_dependency_edges(
285 &self,
286 source: &str,
287 file_path: &Path,
288 ) -> Result<Vec<DependencyEdge>, ExtractionError> {
289 let imports = self.extract_imports(source, file_path)?;
290 let mut edges = Vec::new();
291
292 for import in &imports {
293 // Only create edges for resolvable imports (module-internal or vendor)
294 // Stdlib and external imports are silently skipped per design spec
295 if let Ok(resolved) = self.resolve_import_path(file_path, &import.import_path) {
296 edges.push(DependencyEdge::new(
297 file_path.to_path_buf(),
298 resolved,
299 DependencyType::Import,
300 ));
301 }
302 }
303
304 Ok(edges)
305 }
306}