use super::{extract_doc_comment, field_text, node_text};
use crate::types::*;
pub fn extract(tree: &tree_sitter::Tree, source: &[u8]) -> (Vec<Symbol>, Vec<Import>) {
let root = tree.root_node();
let mut symbols = Vec::new();
let mut imports = Vec::new();
extract_children(&root, source, &mut symbols, &mut imports, None);
(symbols, imports)
}
fn extract_children(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
imports: &mut Vec<Import>,
parent_name: Option<&str>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"function_definition" => {
if let Some(sym) = extract_function(&child, source, parent_name) {
symbols.push(sym);
}
}
"method_declaration" => {
if let Some(sym) = extract_method(&child, source, parent_name) {
symbols.push(sym);
}
}
"class_declaration" => {
if let Some(sym) = extract_type_decl(&child, source, SymbolKind::Class, parent_name)
{
symbols.push(sym);
}
}
"interface_declaration" => {
if let Some(sym) =
extract_type_decl(&child, source, SymbolKind::Interface, parent_name)
{
symbols.push(sym);
}
}
"trait_declaration" => {
if let Some(sym) = extract_type_decl(&child, source, SymbolKind::Trait, parent_name)
{
symbols.push(sym);
}
}
"enum_declaration" => {
if let Some(sym) = extract_type_decl(&child, source, SymbolKind::Enum, parent_name)
{
symbols.push(sym);
}
}
"namespace_definition" => {
extract_namespace(&child, source, symbols, imports);
}
"namespace_use_declaration" => {
let text = node_text(&child, source).trim().to_string();
imports.push(Import {
path: text,
alias: None,
span: Span::from_node(&child),
});
}
"const_declaration" => {
extract_const(&child, source, symbols, parent_name);
}
"property_declaration" => {
extract_property(&child, source, symbols, parent_name);
}
"declaration_list" | "program" => {
extract_children(&child, source, symbols, imports, parent_name);
}
_ => {}
}
}
}
fn extract_function(
node: &tree_sitter::Node,
source: &[u8],
parent_name: Option<&str>,
) -> Option<Symbol> {
let name = field_text(node, "name", source)?;
let signature = if let Some(body) = node.child_by_field_name("body") {
let sig = &source[node.start_byte()..body.start_byte()];
std::str::from_utf8(sig).unwrap_or("").trim().to_string()
} else {
node_text(node, source).to_string()
};
Some(Symbol {
name: name.to_string(),
kind: SymbolKind::Function,
span: Span::from_node(node),
signature,
doc_comment: extract_doc_comment(node, source),
parent: parent_name.map(|s| s.to_string()),
children: Vec::new(),
})
}
fn extract_method(
node: &tree_sitter::Node,
source: &[u8],
parent_name: Option<&str>,
) -> Option<Symbol> {
let name = field_text(node, "name", source)?;
let kind = if parent_name.is_some() {
SymbolKind::Method
} else {
SymbolKind::Function
};
let signature = if let Some(body) = node.child_by_field_name("body") {
let sig = &source[node.start_byte()..body.start_byte()];
std::str::from_utf8(sig).unwrap_or("").trim().to_string()
} else {
node_text(node, source)
.trim_end_matches(';')
.trim()
.to_string()
};
Some(Symbol {
name: name.to_string(),
kind,
span: Span::from_node(node),
signature,
doc_comment: extract_doc_comment(node, source),
parent: parent_name.map(|s| s.to_string()),
children: Vec::new(),
})
}
fn extract_type_decl(
node: &tree_sitter::Node,
source: &[u8],
kind: SymbolKind,
parent_name: Option<&str>,
) -> Option<Symbol> {
let name = field_text(node, "name", source)?;
let body = find_child_of_kind(node, "declaration_list");
let signature = if let Some(ref body) = body {
let sig = &source[node.start_byte()..body.start_byte()];
std::str::from_utf8(sig).unwrap_or("").trim().to_string()
} else {
node_text(node, source)
.lines()
.next()
.unwrap_or("")
.trim()
.to_string()
};
let mut children = Vec::new();
let mut child_imports = Vec::new();
if let Some(body) = body {
extract_children(&body, source, &mut children, &mut child_imports, Some(name));
}
Some(Symbol {
name: name.to_string(),
kind,
span: Span::from_node(node),
signature,
doc_comment: extract_doc_comment(node, source),
parent: parent_name.map(|s| s.to_string()),
children,
})
}
fn extract_namespace(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
imports: &mut Vec<Import>,
) {
let name = field_text(node, "name", source).unwrap_or("(anonymous)");
let body = find_child_of_kind(node, "compound_statement");
if let Some(body) = body {
let mut children = Vec::new();
extract_children(&body, source, &mut children, imports, Some(name));
symbols.push(Symbol {
name: name.to_string(),
kind: SymbolKind::Module,
span: Span::from_node(node),
signature: format!("namespace {}", name),
doc_comment: extract_doc_comment(node, source),
parent: None,
children,
});
return;
}
let mut children = Vec::new();
let mut sibling = node.next_sibling();
while let Some(s) = sibling {
if s.kind() == "namespace_definition" {
break;
}
match s.kind() {
"class_declaration"
| "interface_declaration"
| "trait_declaration"
| "enum_declaration"
| "function_definition"
| "const_declaration" => {
let mut child_syms = Vec::new();
match s.kind() {
"class_declaration" => {
if let Some(sym) =
extract_type_decl(&s, source, SymbolKind::Class, Some(name))
{
child_syms.push(sym);
}
}
"interface_declaration" => {
if let Some(sym) =
extract_type_decl(&s, source, SymbolKind::Interface, Some(name))
{
child_syms.push(sym);
}
}
"trait_declaration" => {
if let Some(sym) =
extract_type_decl(&s, source, SymbolKind::Trait, Some(name))
{
child_syms.push(sym);
}
}
"enum_declaration" => {
if let Some(sym) =
extract_type_decl(&s, source, SymbolKind::Enum, Some(name))
{
child_syms.push(sym);
}
}
"function_definition" => {
if let Some(sym) = extract_function(&s, source, Some(name)) {
child_syms.push(sym);
}
}
"const_declaration" => {
extract_const(&s, source, &mut child_syms, Some(name));
}
_ => {}
}
children.extend(child_syms);
}
"namespace_use_declaration" => {
let text = node_text(&s, source).trim().to_string();
imports.push(Import {
path: text,
alias: None,
span: Span::from_node(&s),
});
}
_ => {}
}
sibling = s.next_sibling();
}
symbols.push(Symbol {
name: name.to_string(),
kind: SymbolKind::Module,
span: Span::from_node(node),
signature: format!("namespace {}", name),
doc_comment: extract_doc_comment(node, source),
parent: None,
children,
});
}
fn extract_const(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
parent_name: Option<&str>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "const_element" {
if let Some(name) = field_text(&child, "name", source) {
symbols.push(Symbol {
name: name.to_string(),
kind: SymbolKind::Const,
span: Span::from_node(&child),
signature: node_text(node, source)
.trim_end_matches(';')
.trim()
.to_string(),
doc_comment: extract_doc_comment(node, source),
parent: parent_name.map(|s| s.to_string()),
children: Vec::new(),
});
}
}
}
}
fn extract_property(
node: &tree_sitter::Node,
source: &[u8],
symbols: &mut Vec<Symbol>,
parent_name: Option<&str>,
) {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "property_element" {
if let Some(var_node) = child.child_by_field_name("name") {
let name = node_text(&var_node, source);
if !name.is_empty() {
symbols.push(Symbol {
name: name.to_string(),
kind: SymbolKind::Const,
span: Span::from_node(node),
signature: node_text(node, source)
.trim_end_matches(';')
.trim()
.to_string(),
doc_comment: extract_doc_comment(node, source),
parent: parent_name.map(|s| s.to_string()),
children: Vec::new(),
});
}
}
}
}
}
fn find_child_of_kind<'a>(
node: &'a tree_sitter::Node,
kind: &str,
) -> Option<tree_sitter::Node<'a>> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == kind {
return Some(child);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_php(source: &str) -> (Vec<Symbol>, Vec<Import>) {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_php::LANGUAGE_PHP.into())
.unwrap();
let tree = parser.parse(source, None).unwrap();
extract(&tree, source.as_bytes())
}
#[test]
fn test_class_with_methods() {
let source = r#"<?php
namespace App\Models;
use App\Contracts\Serializable;
interface Loggable {
public function log(): void;
}
trait Timestamped {
public function createdAt(): string {
return $this->created;
}
}
class User extends Model implements Serializable {
const MAX_NAME_LENGTH = 255;
public function getName(): string {
return $this->name;
}
public function setName(string $name): void {
$this->name = $name;
}
}
"#;
let (symbols, imports) = parse_php(source);
assert!(
imports.len() >= 1,
"expected at least 1 import, got {}",
imports.len()
);
assert!(
imports.iter().any(|i| i.path.contains("Serializable")),
"missing Serializable import in {:?}",
imports
);
let ns = symbols.iter().find(|s| s.kind == SymbolKind::Module);
assert!(
ns.is_some(),
"missing namespace in {:?}",
symbols
.iter()
.map(|s| (&s.name, &s.kind))
.collect::<Vec<_>>()
);
let ns = ns.unwrap();
assert!(
ns.name.contains("Models"),
"namespace name should contain Models: {}",
ns.name
);
let ns_children = &ns.children;
let names: Vec<&str> = ns_children.iter().map(|s| s.name.as_str()).collect();
let iface = ns_children.iter().find(|s| s.name == "Loggable");
assert!(iface.is_some(), "missing Loggable in {:?}", names);
assert_eq!(iface.unwrap().kind, SymbolKind::Interface);
let tr = ns_children.iter().find(|s| s.name == "Timestamped");
assert!(tr.is_some(), "missing Timestamped in {:?}", names);
assert_eq!(tr.unwrap().kind, SymbolKind::Trait);
let cls = ns_children.iter().find(|s| s.name == "User");
assert!(cls.is_some(), "missing User in {:?}", names);
let cls = cls.unwrap();
assert_eq!(cls.kind, SymbolKind::Class);
let method_names: Vec<&str> = cls.children.iter().map(|s| s.name.as_str()).collect();
assert!(
method_names.contains(&"getName"),
"missing getName in {:?}",
method_names
);
assert!(
method_names.contains(&"setName"),
"missing setName in {:?}",
method_names
);
}
#[test]
fn test_standalone_function() {
let source = r#"<?php
function add(int $a, int $b): int {
return $a + $b;
}
"#;
let (symbols, _imports) = parse_php(source);
assert_eq!(symbols.len(), 1);
assert_eq!(symbols[0].name, "add");
assert_eq!(symbols[0].kind, SymbolKind::Function);
}
#[test]
fn test_enum_declaration() {
let source = r#"<?php
enum Status {
case Active;
case Inactive;
}
"#;
let (symbols, _imports) = parse_php(source);
let enm = symbols.iter().find(|s| s.name == "Status");
assert!(enm.is_some(), "missing Status enum");
assert_eq!(enm.unwrap().kind, SymbolKind::Enum);
}
}