use std::path::{Path, PathBuf};
#[derive(Debug, thiserror::Error)]
pub enum ExtractionError {
#[error("parse error: failed to parse Rust source")]
ParseError,
#[error("unresolved module: {module} from {source_file}: {reason}")]
ResolutionError {
module: String,
source_file: PathBuf,
reason: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Visibility {
Public,
Crate,
Super,
Restricted,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RustImportInfo {
pub module_path: String,
pub symbols: Vec<String>,
pub is_wildcard: bool,
pub aliases: Vec<(String, String)>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ExportInfo {
pub symbol_name: String,
pub module_path: String,
pub visibility: Visibility,
}
pub struct RustDependencyExtractor {
_private: (),
}
impl RustDependencyExtractor {
pub fn new() -> Self {
Self { _private: () }
}
fn parse_source(source: &str) -> Result<tree_sitter::Tree, ExtractionError> {
let language = thread_language::parsers::language_rust();
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&language)
.map_err(|_| ExtractionError::ParseError)?;
parser
.parse(source, None)
.ok_or(ExtractionError::ParseError)
}
pub fn extract_imports(
&self,
source: &str,
_file_path: &Path,
) -> Result<Vec<RustImportInfo>, ExtractionError> {
if source.is_empty() {
return Ok(Vec::new());
}
let tree = Self::parse_source(source)?;
let root = tree.root_node();
let src = source.as_bytes();
let mut imports = Vec::new();
self.walk_use_declarations(root, src, &mut imports);
self.walk_mod_declarations(root, src, &mut imports);
Ok(imports)
}
pub fn extract_exports(
&self,
source: &str,
_file_path: &Path,
) -> Result<Vec<ExportInfo>, ExtractionError> {
if source.is_empty() {
return Ok(Vec::new());
}
let tree = Self::parse_source(source)?;
let root = tree.root_node();
let src = source.as_bytes();
let mut exports = Vec::new();
self.walk_export_declarations(root, src, &mut exports);
Ok(exports)
}
pub fn resolve_module_path(
&self,
source_file: &Path,
module_path: &str,
) -> Result<PathBuf, ExtractionError> {
if let Some(rest) = module_path.strip_prefix("crate::") {
let relative = rest.replace("::", "/");
return Ok(PathBuf::from(format!("src/{relative}.rs")));
}
if let Some(rest) = module_path.strip_prefix("super::") {
let super_dir = self.super_directory(source_file)?;
let relative = rest.replace("::", "/");
return Ok(super_dir.join(format!("{relative}.rs")));
}
if module_path == "super" {
let super_dir = self.super_directory(source_file)?;
return Ok(super_dir.join("mod.rs"));
}
if let Some(rest) = module_path.strip_prefix("self::") {
let dir = self.module_directory(source_file)?;
let relative = rest.replace("::", "/");
return Ok(dir.join(format!("{relative}.rs")));
}
if !module_path.contains("::") && !module_path.is_empty() {
let dir = self.module_directory(source_file)?;
return Ok(dir.join(format!("{module_path}.rs")));
}
Err(ExtractionError::ResolutionError {
module: module_path.to_string(),
source_file: source_file.to_path_buf(),
reason: "external crate path cannot be resolved to a local file".to_string(),
})
}
pub fn extract_dependency_edges(
&self,
source: &str,
file_path: &Path,
) -> Result<Vec<super::super::types::DependencyEdge>, ExtractionError> {
let imports = self.extract_imports(source, file_path)?;
let mut edges = Vec::new();
for import in &imports {
if let Ok(resolved) = self.resolve_module_path(file_path, &import.module_path) {
let symbol = if !import.symbols.is_empty() && !import.is_wildcard {
Some(super::super::types::SymbolDependency {
from_symbol: import.symbols[0].clone(),
to_symbol: import.symbols[0].clone(),
kind: super::super::types::SymbolKind::Module,
strength: super::super::types::DependencyStrength::Strong,
})
} else {
None
};
let mut edge = super::super::types::DependencyEdge::new(
file_path.to_path_buf(),
resolved,
super::super::types::DependencyType::Import,
);
edge.symbol = symbol;
edges.push(edge);
}
}
Ok(edges)
}
fn module_directory(&self, source_file: &Path) -> Result<PathBuf, ExtractionError> {
source_file
.parent()
.map(|p| p.to_path_buf())
.ok_or_else(|| ExtractionError::ResolutionError {
module: String::new(),
source_file: source_file.to_path_buf(),
reason: "source file has no parent directory".to_string(),
})
}
fn is_module_root(source_file: &Path) -> bool {
source_file
.file_name()
.map(|f| f == "mod.rs" || f == "lib.rs")
.unwrap_or(false)
}
fn super_directory(&self, source_file: &Path) -> Result<PathBuf, ExtractionError> {
let dir = self.module_directory(source_file)?;
if Self::is_module_root(source_file) {
dir.parent()
.map(|p| p.to_path_buf())
.ok_or_else(|| ExtractionError::ResolutionError {
module: String::new(),
source_file: source_file.to_path_buf(),
reason: "no parent directory for super resolution from module root".to_string(),
})
} else {
Ok(dir)
}
}
fn walk_use_declarations(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
imports: &mut Vec<RustImportInfo>,
) {
if node.kind() == "use_declaration" {
self.extract_use_declaration(node, source, imports);
return;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_use_declarations(child, source, imports);
}
}
fn walk_mod_declarations(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
imports: &mut Vec<RustImportInfo>,
) {
if node.kind() == "mod_item" {
self.extract_mod_declaration(node, source, imports);
return;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_mod_declarations(child, source, imports);
}
}
fn extract_mod_declaration(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
imports: &mut Vec<RustImportInfo>,
) {
let has_block = node
.children(&mut node.walk())
.any(|c| c.kind() == "declaration_list");
if has_block {
return;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" {
if let Ok(name) = child.utf8_text(source) {
imports.push(RustImportInfo {
module_path: name.to_string(),
symbols: Vec::new(),
is_wildcard: false,
aliases: Vec::new(),
});
}
return;
}
}
}
fn extract_use_declaration(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
imports: &mut Vec<RustImportInfo>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"scoped_identifier" | "scoped_use_list" | "use_as_clause" | "use_wildcard"
| "use_list" | "identifier" => {
let mut info = RustImportInfo {
module_path: String::new(),
symbols: Vec::new(),
is_wildcard: false,
aliases: Vec::new(),
};
self.extract_use_clause(child, source, &mut info);
imports.push(info);
}
_ => {}
}
}
}
fn extract_use_clause(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
info: &mut RustImportInfo,
) {
match node.kind() {
"identifier" => {
info.module_path = self.node_text(node, source);
}
"scoped_identifier" => {
let full_path = self.node_text(node, source);
if let Some((path, symbol)) = full_path.rsplit_once("::") {
info.module_path = path.to_string();
info.symbols.push(symbol.to_string());
} else {
info.module_path = full_path;
}
}
"use_as_clause" => {
self.extract_use_as_clause(node, source, info);
}
"scoped_use_list" => {
self.extract_scoped_use_list(node, source, info);
}
"use_wildcard" => {
self.extract_use_wildcard(node, source, info);
}
"use_list" => {
self.extract_use_list(node, source, info);
}
_ => {}
}
}
fn extract_use_as_clause(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
info: &mut RustImportInfo,
) {
let mut cursor = node.walk();
let children: Vec<_> = node
.children(&mut cursor)
.filter(|c| c.is_named())
.collect();
if children.len() >= 2 {
let path_node = children[0];
let alias_node = children[children.len() - 1];
let full_path = self.node_text(path_node, source);
let alias = self.node_text(alias_node, source);
if let Some((path, symbol)) = full_path.rsplit_once("::") {
info.module_path = path.to_string();
info.symbols.push(symbol.to_string());
info.aliases.push((symbol.to_string(), alias));
} else {
info.module_path = full_path.clone();
info.aliases.push((full_path, alias));
}
}
}
fn extract_scoped_use_list(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
info: &mut RustImportInfo,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"identifier" | "scoped_identifier" | "self" | "crate" | "super" => {
info.module_path = self.node_text(child, source);
}
"use_list" => {
self.extract_use_list(child, source, info);
}
_ => {}
}
}
}
fn extract_use_list(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
info: &mut RustImportInfo,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"identifier" => {
info.symbols.push(self.node_text(child, source));
}
"use_as_clause" => {
let mut inner_cursor = child.walk();
let named: Vec<_> = child
.children(&mut inner_cursor)
.filter(|c| c.is_named())
.collect();
if named.len() >= 2 {
let original = self.node_text(named[0], source);
let alias = self.node_text(named[named.len() - 1], source);
info.symbols.push(original.clone());
info.aliases.push((original, alias));
}
}
"self" => {
info.symbols.push("self".to_string());
}
"use_wildcard" => {
info.is_wildcard = true;
}
_ => {}
}
}
}
fn extract_use_wildcard(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
info: &mut RustImportInfo,
) {
info.is_wildcard = true;
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" || child.kind() == "scoped_identifier" {
info.module_path = self.node_text(child, source);
}
}
}
fn walk_export_declarations(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
exports: &mut Vec<ExportInfo>,
) {
if node.kind() == "use_declaration" {
if let Some(vis) = self.get_visibility(node, source) {
self.extract_export_from_use(node, source, vis, exports);
}
return;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_export_declarations(child, source, exports);
}
}
fn get_visibility(&self, node: tree_sitter::Node<'_>, source: &[u8]) -> Option<Visibility> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "visibility_modifier" {
let text = self.node_text(child, source);
return Some(self.parse_visibility(&text));
}
}
None
}
fn parse_visibility(&self, text: &str) -> Visibility {
let trimmed = text.trim();
if trimmed == "pub" {
Visibility::Public
} else if trimmed.starts_with("pub(crate)") {
Visibility::Crate
} else if trimmed.starts_with("pub(super)") {
Visibility::Super
} else if trimmed.starts_with("pub(in") {
Visibility::Restricted
} else {
Visibility::Public
}
}
fn extract_export_from_use(
&self,
node: tree_sitter::Node<'_>,
source: &[u8],
visibility: Visibility,
exports: &mut Vec<ExportInfo>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"scoped_identifier" => {
let full = self.node_text(child, source);
if let Some((path, symbol)) = full.rsplit_once("::") {
exports.push(ExportInfo {
symbol_name: symbol.to_string(),
module_path: path.to_string(),
visibility,
});
}
}
"scoped_use_list" => {
let mut module_path = String::new();
let mut symbols = Vec::new();
let mut inner_cursor = child.walk();
for inner in child.children(&mut inner_cursor) {
match inner.kind() {
"identifier" | "scoped_identifier" => {
module_path = self.node_text(inner, source);
}
"use_list" => {
let mut list_cursor = inner.walk();
for item in inner.children(&mut list_cursor) {
if item.kind() == "identifier" {
symbols.push(self.node_text(item, source));
}
}
}
_ => {}
}
}
for sym in symbols {
exports.push(ExportInfo {
symbol_name: sym,
module_path: module_path.clone(),
visibility,
});
}
}
"use_wildcard" => {
let mut module_path = String::new();
let mut wc_cursor = child.walk();
for wc_child in child.children(&mut wc_cursor) {
if wc_child.kind() == "identifier" || wc_child.kind() == "scoped_identifier"
{
module_path = self.node_text(wc_child, source);
}
}
exports.push(ExportInfo {
symbol_name: "*".to_string(),
module_path,
visibility,
});
}
"use_as_clause" => {
let mut inner_cursor = child.walk();
let named: Vec<_> = child
.children(&mut inner_cursor)
.filter(|c| c.is_named())
.collect();
if !named.is_empty() {
let full = self.node_text(named[0], source);
if let Some((path, symbol)) = full.rsplit_once("::") {
exports.push(ExportInfo {
symbol_name: symbol.to_string(),
module_path: path.to_string(),
visibility,
});
}
}
}
"identifier" => {
let name = self.node_text(child, source);
exports.push(ExportInfo {
symbol_name: name.clone(),
module_path: name,
visibility,
});
}
_ => {}
}
}
}
fn node_text(&self, node: tree_sitter::Node<'_>, source: &[u8]) -> String {
node.utf8_text(source).unwrap_or("").to_string()
}
}
impl Default for RustDependencyExtractor {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn verify_ast_structure() {
let source = "use std::collections::HashMap;";
let tree = RustDependencyExtractor::parse_source(source).unwrap();
let root = tree.root_node();
assert_eq!(root.kind(), "source_file");
let use_decl = root.child(0).unwrap();
assert_eq!(use_decl.kind(), "use_declaration");
}
}