use tree_sitter::{Node, Parser};
use super::common::lang_c;
use crate::indexer::SymbolChunk;
pub(super) fn extract_c_chunks(source: &str) -> Vec<SymbolChunk> {
let mut parser = Parser::new();
parser
.set_language(&lang_c())
.expect("Failed to set C language");
let tree = match parser.parse(source, None) {
Some(tree) => tree,
None => {
tracing::warn!(
"Failed to parse C source - malformed syntax or grammar incompatibility"
);
return Vec::new();
}
};
let mut chunks = Vec::new();
let mut includes = Vec::new();
let root = tree.root_node();
walk_c_decls(source, root, &mut chunks, &mut includes);
if !includes.is_empty() {
chunks.push(SymbolChunk {
symbol_name: Some("__imports__".to_string()),
kind: "imports".to_string(),
signature: None,
docstring: None,
start_line: 1,
end_line: 1,
metadata: Some(serde_json::json!(includes)),
});
}
let func_count = chunks.iter().filter(|c| c.kind == "func").count();
let struct_count = chunks.iter().filter(|c| c.kind == "struct").count();
let enum_count = chunks.iter().filter(|c| c.kind == "enum").count();
let typedef_count = chunks.iter().filter(|c| c.kind == "typedef").count();
let other_count = chunks.len() - func_count - struct_count - enum_count - typedef_count;
tracing::debug!(
"Parsed C source: {} chunks extracted ({} functions, {} structs, {} enums, {} typedefs, {} other)",
chunks.len(),
func_count,
struct_count,
enum_count,
typedef_count,
other_count
);
chunks
}
fn walk_c_decls(
source: &str,
node: Node,
chunks: &mut Vec<SymbolChunk>,
includes: &mut Vec<serde_json::Value>,
) {
match node.kind() {
"function_definition" => {
extract_c_function(source, node, chunks);
}
"declaration" => {
extract_c_declaration(source, node, chunks);
}
"type_definition" => {
extract_c_typedef(source, node, chunks);
}
"preproc_include" => {
collect_c_include(source, node, includes);
}
"struct_specifier" => {
if let Some(body) = node.child_by_field_name("body") {
extract_c_struct(source, node, body, node, chunks);
}
}
"enum_specifier" => {
if let Some(body) = node.child_by_field_name("body") {
extract_c_enum(source, node, body, node, chunks);
}
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_c_decls(source, child, chunks, includes);
}
}
fn extract_c_function(source: &str, node: Node, chunks: &mut Vec<SymbolChunk>) {
extract_c_function_common(source, node, chunks);
}
fn extract_c_function_common(source: &str, node: Node, chunks: &mut Vec<SymbolChunk>) {
let return_type = node
.child_by_field_name("type")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
let declarator = match node.child_by_field_name("declarator") {
Some(decl) => decl,
None => return,
};
let (name, params) = extract_function_name_and_params(source, declarator);
let name = match name {
Some(n) => n,
None => return,
};
let signature = match (&return_type, ¶ms) {
(Some(ret), Some(par)) => Some(format!("{} {}", ret, par)),
(Some(ret), None) => Some(ret.clone()),
(None, Some(par)) => Some(par.clone()),
(None, None) => None,
};
let docstring = extract_c_doc_comment(source, node);
let storage_class = extract_storage_class(source, node);
let mut metadata_obj = serde_json::Map::new();
if let Some(ref storage) = storage_class {
metadata_obj.insert(
"storage_class".to_string(),
serde_json::Value::String(storage.clone()),
);
}
if let Some(ref ret) = return_type {
metadata_obj.insert(
"return_type".to_string(),
serde_json::Value::String(ret.clone()),
);
}
let start = node.start_position();
let end = node.end_position();
chunks.push(SymbolChunk {
symbol_name: Some(name),
kind: "func".to_string(),
signature,
docstring,
start_line: (start.row + 1) as i32,
end_line: (end.row + 1) as i32,
metadata: if metadata_obj.is_empty() {
None
} else {
Some(serde_json::Value::Object(metadata_obj))
},
});
}
fn extract_c_declaration(source: &str, node: Node, chunks: &mut Vec<SymbolChunk>) {
let type_node = match node.child_by_field_name("type") {
Some(t) => t,
None => return,
};
match type_node.kind() {
"struct_specifier" => {
if let Some(body) = type_node.child_by_field_name("body") {
extract_c_struct(source, type_node, body, node, chunks);
return; }
}
"enum_specifier" => {
if let Some(body) = type_node.child_by_field_name("body") {
extract_c_enum(source, type_node, body, node, chunks);
return; }
}
_ => {}
}
if let Some(declarator) = node.child_by_field_name("declarator") {
if is_function_declarator(&declarator) {
extract_c_function_declaration(source, node, chunks);
return;
}
}
extract_c_global_variable(source, node, chunks);
}
fn is_function_declarator(node: &Node) -> bool {
match node.kind() {
"function_declarator" => true,
"pointer_declarator" => {
if let Some(declarator) = node.child_by_field_name("declarator") {
is_function_declarator(&declarator)
} else {
false
}
}
_ => false,
}
}
fn extract_c_function_declaration(source: &str, node: Node, chunks: &mut Vec<SymbolChunk>) {
extract_c_function_common(source, node, chunks);
}
fn extract_c_struct(
source: &str,
type_node: Node,
body: Node,
declaration_node: Node,
chunks: &mut Vec<SymbolChunk>,
) {
let name = type_node
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
let field_count = count_struct_fields(body);
let docstring = extract_c_doc_comment(source, declaration_node);
let mut metadata_obj = serde_json::Map::new();
metadata_obj.insert(
"field_count".to_string(),
serde_json::Value::Number(serde_json::Number::from(field_count)),
);
let start = declaration_node.start_position();
let end = declaration_node.end_position();
chunks.push(SymbolChunk {
symbol_name: name,
kind: "struct".to_string(),
signature: None,
docstring,
start_line: (start.row + 1) as i32,
end_line: (end.row + 1) as i32,
metadata: Some(serde_json::Value::Object(metadata_obj)),
});
}
fn extract_c_enum(
source: &str,
type_node: Node,
body: Node,
declaration_node: Node,
chunks: &mut Vec<SymbolChunk>,
) {
let name = type_node
.child_by_field_name("name")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
let enumerator_count = count_enumerators(body);
let docstring = extract_c_doc_comment(source, declaration_node);
let mut metadata_obj = serde_json::Map::new();
metadata_obj.insert(
"enumerator_count".to_string(),
serde_json::Value::Number(serde_json::Number::from(enumerator_count)),
);
let start = declaration_node.start_position();
let end = declaration_node.end_position();
chunks.push(SymbolChunk {
symbol_name: name,
kind: "enum".to_string(),
signature: None,
docstring,
start_line: (start.row + 1) as i32,
end_line: (end.row + 1) as i32,
metadata: Some(serde_json::Value::Object(metadata_obj)),
});
}
fn extract_c_global_variable(source: &str, node: Node, chunks: &mut Vec<SymbolChunk>) {
let type_text = node
.child_by_field_name("type")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
let declarator = match node.child_by_field_name("declarator") {
Some(decl) => decl,
None => return,
};
extract_multi_variable_declarators(source, node, &type_text, chunks);
extract_single_variable_declarator(source, node, declarator, &type_text, chunks);
}
fn extract_multi_variable_declarators(
source: &str,
node: Node,
type_text: &Option<String>,
chunks: &mut Vec<SymbolChunk>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "init_declarator" || child.kind() == "pointer_declarator" {
if let Some(name) = extract_declarator_name(source, child) {
add_variable_chunk(source, node, &name, type_text, chunks);
}
}
}
}
fn extract_single_variable_declarator(
source: &str,
node: Node,
declarator: Node,
type_text: &Option<String>,
chunks: &mut Vec<SymbolChunk>,
) {
if let Some(name) = extract_declarator_name(source, declarator) {
if !chunks
.iter()
.any(|c| c.symbol_name.as_deref() == Some(&name))
{
add_variable_chunk(source, node, &name, type_text, chunks);
}
}
}
fn add_variable_chunk(
source: &str,
node: Node,
name: &str,
type_text: &Option<String>,
chunks: &mut Vec<SymbolChunk>,
) {
let docstring = extract_c_doc_comment(source, node);
let mut metadata_obj = serde_json::Map::new();
if let Some(ref typ) = type_text {
metadata_obj.insert("type".to_string(), serde_json::Value::String(typ.clone()));
}
let start = node.start_position();
let end = node.end_position();
chunks.push(SymbolChunk {
symbol_name: Some(name.to_string()),
kind: "variable".to_string(),
signature: type_text.clone(),
docstring,
start_line: (start.row + 1) as i32,
end_line: (end.row + 1) as i32,
metadata: if metadata_obj.is_empty() {
None
} else {
Some(serde_json::Value::Object(metadata_obj))
},
});
}
fn extract_c_typedef(source: &str, node: Node, chunks: &mut Vec<SymbolChunk>) {
let type_node = node.child_by_field_name("type");
let type_text = type_node
.as_ref()
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
let declarator = match node.child_by_field_name("declarator") {
Some(decl) => decl,
None => {
if let Some(type_node) = type_node {
if type_node.kind() == "struct_specifier" {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "type_identifier" {
if let Ok(name) = child.utf8_text(source.as_bytes()) {
let docstring = extract_c_doc_comment(source, node);
let mut metadata_obj = serde_json::Map::new();
metadata_obj.insert(
"underlying_type".to_string(),
serde_json::Value::String("struct".to_string()),
);
let start = node.start_position();
let end = node.end_position();
chunks.push(SymbolChunk {
symbol_name: Some(name.to_string()),
kind: "typedef".to_string(),
signature: Some("struct".to_string()),
docstring,
start_line: (start.row + 1) as i32,
end_line: (end.row + 1) as i32,
metadata: Some(serde_json::Value::Object(metadata_obj)),
});
return;
}
}
}
}
}
return;
}
};
let name = extract_declarator_name(source, declarator);
if let Some(name) = name {
let docstring = extract_c_doc_comment(source, node);
let mut metadata_obj = serde_json::Map::new();
if let Some(ref typ) = type_text {
metadata_obj.insert(
"underlying_type".to_string(),
serde_json::Value::String(typ.clone()),
);
}
let start = node.start_position();
let end = node.end_position();
chunks.push(SymbolChunk {
symbol_name: Some(name),
kind: "typedef".to_string(),
signature: type_text,
docstring,
start_line: (start.row + 1) as i32,
end_line: (end.row + 1) as i32,
metadata: if metadata_obj.is_empty() {
None
} else {
Some(serde_json::Value::Object(metadata_obj))
},
});
}
}
fn collect_c_include(source: &str, node: Node, includes: &mut Vec<serde_json::Value>) {
let path = node
.child_by_field_name("path")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
if let Some(path) = path {
let is_system = path.starts_with('<');
includes.push(serde_json::json!({
"type": if is_system { "system" } else { "local" },
"path": path
}));
}
}
fn extract_c_doc_comment(source: &str, node: Node) -> Option<String> {
let start_line = node.start_position().row;
let lines: Vec<&str> = source.lines().collect();
let mut doc_lines = Vec::new();
for i in (0..start_line).rev() {
let line = lines.get(i)?.trim();
if line.starts_with("//") {
let comment = line.strip_prefix("//").unwrap_or("").trim();
doc_lines.insert(0, comment);
}
else if line.ends_with("*/") {
if let Some(block_comment) = extract_block_comment(&lines, i) {
for comment_line in block_comment {
doc_lines.insert(0, comment_line);
}
}
break; } else if line.starts_with("/*") {
let comment = line
.strip_prefix("/*")
.unwrap_or("")
.trim_end_matches("*/")
.trim();
doc_lines.insert(0, comment);
break; } else if !line.is_empty() {
break;
}
}
if doc_lines.is_empty() {
None
} else {
Some(doc_lines.join("\n"))
}
}
fn extract_block_comment<'a>(lines: &'a [&str], end_index: usize) -> Option<Vec<&'a str>> {
let mut block_lines = Vec::new();
let mut j = end_index;
let mut found_start = false;
while j < lines.len() {
let block_line = lines[j].trim();
block_lines.push(block_line);
if block_line.starts_with("/*") || block_line.starts_with("/**") {
found_start = true;
break;
}
if j == 0 {
break;
}
j -= 1;
}
if !found_start {
return None;
}
let mut cleaned_lines = Vec::new();
for block_line in block_lines.iter().rev() {
let cleaned = block_line
.trim_start_matches("/*")
.trim_start_matches("/**")
.trim_end_matches("*/")
.trim_start_matches('*')
.trim();
if !cleaned.is_empty() {
cleaned_lines.push(cleaned);
}
}
Some(cleaned_lines)
}
fn extract_function_name_and_params(source: &str, node: Node) -> (Option<String>, Option<String>) {
match node.kind() {
"function_declarator" => {
let name_node = node.child_by_field_name("declarator");
let name = name_node.and_then(|n| extract_identifier(source, n));
let params = node
.child_by_field_name("parameters")
.and_then(|n| n.utf8_text(source.as_bytes()).ok())
.map(|s| s.to_string());
(name, params)
}
"pointer_declarator" => {
if let Some(declarator) = node.child_by_field_name("declarator") {
extract_function_name_and_params(source, declarator)
} else {
(None, None)
}
}
"identifier" => {
let name = node
.utf8_text(source.as_bytes())
.ok()
.map(|s| s.to_string());
(name, None)
}
_ => (None, None),
}
}
fn extract_identifier(source: &str, node: Node) -> Option<String> {
match node.kind() {
"identifier" => node
.utf8_text(source.as_bytes())
.ok()
.map(|s| s.to_string()),
"pointer_declarator" => {
let declarator = node.child_by_field_name("declarator")?;
extract_identifier(source, declarator)
}
"function_declarator" => {
let declarator = node.child_by_field_name("declarator")?;
extract_identifier(source, declarator)
}
_ => None,
}
}
fn extract_declarator_name(source: &str, node: Node) -> Option<String> {
match node.kind() {
"identifier" => node
.utf8_text(source.as_bytes())
.ok()
.map(|s| s.to_string()),
"init_declarator" => {
let declarator = node.child_by_field_name("declarator")?;
extract_declarator_name(source, declarator)
}
"pointer_declarator" => {
let declarator = node.child_by_field_name("declarator")?;
extract_declarator_name(source, declarator)
}
"array_declarator" => {
let declarator = node.child_by_field_name("declarator")?;
extract_declarator_name(source, declarator)
}
"type_identifier" => node
.utf8_text(source.as_bytes())
.ok()
.map(|s| s.to_string()),
_ => None,
}
}
fn extract_storage_class(source: &str, node: Node) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "storage_class_specifier" {
return child
.utf8_text(source.as_bytes())
.ok()
.map(|s| s.to_string());
}
}
None
}
fn count_struct_fields(body: Node) -> usize {
let mut count = 0;
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if child.kind() == "field_declaration" {
count += 1;
}
}
count
}
fn count_enumerators(body: Node) -> usize {
let mut count = 0;
let mut cursor = body.walk();
for child in body.children(&mut cursor) {
if child.kind() == "enumerator" {
count += 1;
}
}
count
}