#[derive(Debug, Clone, serde::Serialize)]
pub struct FileExtract {
pub file: String,
pub language: String,
pub imports: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cfg_test_line: Option<usize>,
pub items: Vec<ExtractedItem>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ExtractedItem {
pub name: String,
#[serde(rename = "type")]
pub item_type: String,
pub start_line: usize,
pub end_line: usize,
pub lines: usize,
pub visibility: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub children: Vec<ExtractedItem>,
}
pub fn extract_file_details(
path: &str,
source: &str,
language: Language,
) -> Result<FileExtract, String> {
if source.trim().is_empty() {
return Ok(FileExtract {
file: path.to_string(),
language: language.as_str().to_string(),
imports: Vec::new(),
cfg_test_line: None,
items: Vec::new(),
});
}
let tree = parse_for_language(source, language)?;
let root = tree.root_node();
let imports = collect_imports(root, source, language);
let cfg_test_line = if language == Language::Rust {
find_cfg_test_line(root, source)
} else {
None
};
let mut chunks = Vec::new();
match language {
#[cfg(feature = "rust-ast")]
Language::Rust => extract_rust_items(root, source, &mut chunks),
#[cfg(feature = "typescript-ast")]
Language::TypeScript => extract_typescript_items(root, source, &mut chunks),
#[cfg(feature = "python-ast")]
Language::Python => extract_python_items(root, source, &mut chunks),
#[cfg(feature = "c-ast")]
Language::C => extract_c_items(root, source, &mut chunks),
#[cfg(feature = "c-ast")]
Language::Cpp => extract_cpp_items(root, source, &mut chunks),
Language::Go => extract_go_items(root, source, &mut chunks),
Language::Lua => extract_lua_items(root, source, &mut chunks),
#[allow(unreachable_patterns)]
_ => {}
}
let visibility_map = collect_visibility(root, source, language);
let mut flat_items: Vec<ExtractedItem> = chunks
.into_iter()
.filter(|c| c.chunk_type != ChunkType::File)
.map(|c| {
let vis = visibility_map
.get(&c.start_line)
.cloned()
.unwrap_or_default();
let item_type = if c.chunk_type == ChunkType::Module {
if let Some(ct_line) = cfg_test_line {
if c.start_line == ct_line + 1 || c.start_line == ct_line {
ChunkType::TestModule.as_str().to_string()
} else {
c.chunk_type.as_str().to_string()
}
} else {
c.chunk_type.as_str().to_string()
}
} else {
c.chunk_type.as_str().to_string()
};
ExtractedItem {
name: c.chunk_name,
item_type,
start_line: c.start_line,
end_line: c.end_line,
lines: c.end_line.saturating_sub(c.start_line) + 1,
visibility: vis,
children: Vec::new(),
}
})
.collect();
flat_items.sort_by_key(|i| i.start_line);
let items = nest_children_into_modules(flat_items);
Ok(FileExtract {
file: path.to_string(),
language: language.as_str().to_string(),
imports,
cfg_test_line,
items,
})
}
fn nest_children_into_modules(flat_items: Vec<ExtractedItem>) -> Vec<ExtractedItem> {
let module_ranges: Vec<(usize, usize, usize)> = flat_items
.iter()
.enumerate()
.filter(|(_, item)| item.item_type == "module" || item.item_type == "test_module")
.map(|(idx, item)| (idx, item.start_line, item.end_line))
.collect();
if module_ranges.is_empty() {
return flat_items;
}
let mut parent_of: Vec<Option<usize>> = vec![None; flat_items.len()];
for (i, item) in flat_items.iter().enumerate() {
for &(mod_idx, mod_start, mod_end) in &module_ranges {
if i != mod_idx
&& item.start_line > mod_start
&& item.end_line <= mod_end
{
parent_of[i] = Some(mod_idx);
break; }
}
}
let mut result = Vec::new();
let mut children_by_parent: Vec<Vec<ExtractedItem>> = vec![Vec::new(); flat_items.len()];
for (i, item) in flat_items.into_iter().enumerate() {
if let Some(parent_idx) = parent_of[i] {
children_by_parent[parent_idx].push(item);
} else {
result.push((i, item));
}
}
result
.into_iter()
.map(|(orig_idx, mut item)| {
let children = std::mem::take(&mut children_by_parent[orig_idx]);
if !children.is_empty() {
item.children = children;
}
item
})
.collect()
}
fn parse_for_language(source: &str, language: Language) -> Result<Tree, String> {
match language {
#[cfg(feature = "rust-ast")]
Language::Rust => parse_rust(source),
#[cfg(feature = "typescript-ast")]
Language::TypeScript => parse_typescript(source),
#[cfg(feature = "python-ast")]
Language::Python => parse_python(source),
#[cfg(feature = "c-ast")]
Language::C => parse_c(source),
#[cfg(feature = "c-ast")]
Language::Cpp => parse_cpp(source),
Language::Go => parse_go(source),
Language::Lua => parse_lua(source),
#[allow(unreachable_patterns)]
_ => Err(format!("language {:?} not enabled", language.as_str())),
}
}
fn collect_imports(root: Node, source: &str, language: Language) -> Vec<String> {
let import_kinds: &[&str] = match language {
Language::Rust => &["use_declaration", "extern_crate_declaration"],
Language::Python => &["import_statement", "import_from_statement"],
Language::TypeScript => &["import_statement"],
Language::Go => &["import_declaration"],
Language::C | Language::Cpp => &["preproc_include"],
Language::Lua | Language::Ptx => return Vec::new(),
};
let mut imports = Vec::new();
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if import_kinds.contains(&child.kind()) {
let text = source[child.byte_range()].trim().to_string();
if !text.is_empty() {
imports.push(text);
}
}
}
imports
}
fn find_cfg_test_line(root: Node, source: &str) -> Option<usize> {
let mut cursor = root.walk();
for child in root.children(&mut cursor) {
if child.kind() == "attribute_item" {
let text = &source[child.byte_range()];
if text.contains("cfg(test)") {
return Some(child.start_position().row + 1);
}
}
}
None
}
fn collect_visibility(root: Node, source: &str, language: Language) -> HashMap<usize, String> {
let mut map = HashMap::new();
match language {
Language::Rust => collect_rust_visibility(root, source, &mut map),
Language::Go => collect_go_visibility(root, source, &mut map),
Language::TypeScript => collect_ts_visibility(root, source, &mut map, false),
_ => {}
}
map
}
fn find_visibility_modifier<'a>(node: Node<'a>) -> Option<Node<'a>> {
let mut cursor = node.walk();
let result = node
.children(&mut cursor)
.find(|child| child.kind() == "visibility_modifier");
result
}
fn collect_rust_visibility(node: Node, source: &str, map: &mut HashMap<usize, String>) {
let is_container = matches!(node.kind(), "impl_item" | "mod_item" | "trait_item");
if rust_node_to_chunk(node.kind()).is_some() {
let line = node.start_position().row + 1;
let vis = if let Some(vis_node) = find_visibility_modifier(node) {
source[vis_node.byte_range()].to_string()
} else {
String::new()
};
map.insert(line, vis);
if !is_container {
return;
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_rust_visibility(child, source, map);
}
}
fn collect_go_visibility(node: Node, source: &str, map: &mut HashMap<usize, String>) {
match node.kind() {
"function_declaration" | "method_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let name = &source[name_node.byte_range()];
let line = node.start_position().row + 1;
let vis = if name.starts_with(|c: char| c.is_uppercase()) {
"pub".to_string()
} else {
String::new()
};
map.insert(line, vis);
}
return;
}
"type_declaration" => {
if let Some(name) = extract_go_type_name(node, source) {
let line = node.start_position().row + 1;
let vis = if name.starts_with(|c: char| c.is_uppercase()) {
"pub".to_string()
} else {
String::new()
};
map.insert(line, vis);
}
return;
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_go_visibility(child, source, map);
}
}
fn collect_ts_visibility(
node: Node,
source: &str,
map: &mut HashMap<usize, String>,
exported: bool,
) {
let is_export = node.kind() == "export_statement";
let currently_exported = exported || is_export;
match node.kind() {
"function_declaration" | "class_declaration" | "interface_declaration" => {
if currently_exported {
let line = node.start_position().row + 1;
map.insert(line, "export".to_string());
}
}
"lexical_declaration" | "variable_declaration" => {
let mut inner = node.walk();
for child in node.children(&mut inner) {
if child.kind() == "variable_declarator" && currently_exported {
let line = child.start_position().row + 1;
map.insert(line, "export".to_string());
}
}
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
collect_ts_visibility(child, source, map, currently_exported);
}
}