use crate::models::{Language, SearchResult, Span, SymbolKind};
use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Parser, Query, QueryCursor};
pub fn parse(path: &str, source: &str) -> Result<Vec<SearchResult>> {
let mut parser = Parser::new();
let language = tree_sitter_php::LANGUAGE_PHP;
parser
.set_language(&language.into())
.context("Failed to set PHP language")?;
let tree = parser
.parse(source, None)
.context("Failed to parse PHP source")?;
let root_node = tree.root_node();
let mut symbols = Vec::new();
symbols.extend(extract_functions(source, &root_node, &language.into())?);
symbols.extend(extract_classes(source, &root_node, &language.into())?);
symbols.extend(extract_interfaces(source, &root_node, &language.into())?);
symbols.extend(extract_traits(source, &root_node, &language.into())?);
symbols.extend(extract_attributes(source, &root_node, &language.into())?);
symbols.extend(extract_methods(source, &root_node, &language.into())?);
symbols.extend(extract_properties(source, &root_node, &language.into())?);
symbols.extend(extract_local_variables(
source,
&root_node,
&language.into(),
)?);
symbols.extend(extract_constants(source, &root_node, &language.into())?);
symbols.extend(extract_namespaces(source, &root_node, &language.into())?);
symbols.extend(extract_enums(source, &root_node, &language.into())?);
for symbol in &mut symbols {
symbol.path = path.to_string();
symbol.lang = Language::PHP;
}
Ok(symbols)
}
fn extract_functions(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(function_definition
name: (name) @name) @function
"#;
let query = Query::new(language, query_str).context("Failed to create function query")?;
extract_symbols(source, root, &query, SymbolKind::Function, None)
}
fn extract_classes(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(class_declaration
name: (name) @name) @class
"#;
let query = Query::new(language, query_str).context("Failed to create class query")?;
extract_symbols(source, root, &query, SymbolKind::Class, None)
}
fn extract_interfaces(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(interface_declaration
name: (name) @name) @interface
"#;
let query = Query::new(language, query_str).context("Failed to create interface query")?;
extract_symbols(source, root, &query, SymbolKind::Interface, None)
}
fn extract_traits(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(trait_declaration
name: (name) @name) @trait
"#;
let query = Query::new(language, query_str).context("Failed to create trait query")?;
extract_symbols(source, root, &query, SymbolKind::Trait, None)
}
fn extract_attributes(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let mut symbols = Vec::new();
let def_query_str = r#"
(class_declaration
(attribute_list)
name: (name) @name) @attribute_class
"#;
let def_query = Query::new(language, def_query_str)
.context("Failed to create attribute definition query")?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&def_query, *root, source.as_bytes());
while let Some(match_) = matches.next() {
let mut name = None;
let mut class_node = None;
for capture in match_.captures {
let capture_name: &str = &def_query.capture_names()[capture.index as usize];
match capture_name {
"name" => {
name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
}
"attribute_class" => {
class_node = Some(capture.node);
}
_ => {}
}
}
if let (Some(name), Some(node)) = (name, class_node) {
let class_text = node.utf8_text(source.as_bytes()).unwrap_or("");
if class_text.contains("#[Attribute") {
let span = node_to_span(&node);
let preview = extract_preview(source, &span);
symbols.push(SearchResult::new(
String::new(),
Language::PHP,
SymbolKind::Attribute,
Some(name),
span,
None,
preview,
));
}
}
}
let use_query_str = r#"
(attribute_list
(attribute_group
(attribute
(name) @name))) @attr
"#;
let use_query =
Query::new(language, use_query_str).context("Failed to create attribute use query")?;
symbols.extend(extract_symbols(
source,
root,
&use_query,
SymbolKind::Attribute,
None,
)?);
Ok(symbols)
}
fn extract_methods(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(class_declaration
name: (name) @class_name
body: (declaration_list
(method_declaration
name: (name) @method_name))) @class
(trait_declaration
name: (name) @trait_name
body: (declaration_list
(method_declaration
name: (name) @method_name))) @trait
(interface_declaration
name: (name) @interface_name
body: (declaration_list
(method_declaration
name: (name) @method_name))) @interface
"#;
let query = Query::new(language, query_str).context("Failed to create method query")?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root, source.as_bytes());
let mut symbols = Vec::new();
while let Some(match_) = matches.next() {
let mut scope_name = None;
let mut scope_type = None;
let mut method_name = None;
let mut method_node = None;
for capture in match_.captures {
let capture_name: &str = &query.capture_names()[capture.index as usize];
match capture_name {
"class_name" => {
scope_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
scope_type = Some("class");
}
"trait_name" => {
scope_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
scope_type = Some("trait");
}
"interface_name" => {
scope_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
scope_type = Some("interface");
}
"method_name" => {
method_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
let mut current = capture.node;
while let Some(parent) = current.parent() {
if parent.kind() == "method_declaration" {
method_node = Some(parent);
break;
}
current = parent;
}
}
_ => {}
}
}
if let (Some(scope_name), Some(scope_type), Some(method_name), Some(node)) =
(scope_name, scope_type, method_name, method_node)
{
let scope = format!("{} {}", scope_type, scope_name);
let span = node_to_span(&node);
let preview = extract_preview(source, &span);
symbols.push(SearchResult::new(
String::new(),
Language::PHP,
SymbolKind::Method,
Some(method_name),
span,
Some(scope),
preview,
));
}
}
Ok(symbols)
}
fn extract_properties(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(class_declaration
name: (name) @class_name
body: (declaration_list
(property_declaration
(property_element
(variable_name
(name) @prop_name))))) @class
(trait_declaration
name: (name) @trait_name
body: (declaration_list
(property_declaration
(property_element
(variable_name
(name) @prop_name))))) @trait
"#;
let query = Query::new(language, query_str).context("Failed to create property query")?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root, source.as_bytes());
let mut symbols = Vec::new();
while let Some(match_) = matches.next() {
let mut scope_name = None;
let mut scope_type = None;
let mut prop_name = None;
let mut prop_node = None;
for capture in match_.captures {
let capture_name: &str = &query.capture_names()[capture.index as usize];
match capture_name {
"class_name" => {
scope_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
scope_type = Some("class");
}
"trait_name" => {
scope_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
scope_type = Some("trait");
}
"prop_name" => {
prop_name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
let mut current = capture.node;
while let Some(parent) = current.parent() {
if parent.kind() == "property_declaration" {
prop_node = Some(parent);
break;
}
current = parent;
}
}
_ => {}
}
}
if let (Some(scope_name), Some(scope_type), Some(prop_name), Some(node)) =
(scope_name, scope_type, prop_name, prop_node)
{
let scope = format!("{} {}", scope_type, scope_name);
let span = node_to_span(&node);
let preview = extract_preview(source, &span);
symbols.push(SearchResult::new(
String::new(),
Language::PHP,
SymbolKind::Variable,
Some(prop_name),
span,
Some(scope),
preview,
));
}
}
Ok(symbols)
}
fn extract_local_variables(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(assignment_expression
left: (variable_name
(name) @name)) @assignment
"#;
let query = Query::new(language, query_str).context("Failed to create local variable query")?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root, source.as_bytes());
let mut symbols = Vec::new();
while let Some(match_) = matches.next() {
let mut name = None;
let mut assignment_node = None;
for capture in match_.captures {
let capture_name: &str = &query.capture_names()[capture.index as usize];
match capture_name {
"name" => {
name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
}
"assignment" => {
assignment_node = Some(capture.node);
}
_ => {}
}
}
if let (Some(name), Some(node)) = (name, assignment_node) {
let span = node_to_span(&node);
let preview = extract_preview(source, &span);
symbols.push(SearchResult::new(
String::new(),
Language::PHP,
SymbolKind::Variable,
Some(name),
span,
None, preview,
));
}
}
Ok(symbols)
}
fn extract_constants(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(const_declaration
(const_element
(name) @name)) @const
"#;
let query = Query::new(language, query_str).context("Failed to create constant query")?;
extract_symbols(source, root, &query, SymbolKind::Constant, None)
}
fn extract_namespaces(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(namespace_definition
name: (namespace_name) @name) @namespace
"#;
let query = Query::new(language, query_str).context("Failed to create namespace query")?;
extract_symbols(source, root, &query, SymbolKind::Namespace, None)
}
fn extract_enums(
source: &str,
root: &tree_sitter::Node,
language: &tree_sitter::Language,
) -> Result<Vec<SearchResult>> {
let query_str = r#"
(enum_declaration
name: (name) @name) @enum
"#;
let query = Query::new(language, query_str).context("Failed to create enum query")?;
extract_symbols(source, root, &query, SymbolKind::Enum, None)
}
fn extract_symbols(
source: &str,
root: &tree_sitter::Node,
query: &Query,
kind: SymbolKind,
scope: Option<String>,
) -> Result<Vec<SearchResult>> {
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(query, *root, source.as_bytes());
let mut symbols = Vec::new();
while let Some(match_) = matches.next() {
let mut name = None;
let mut full_node = None;
for capture in match_.captures {
let capture_name: &str = &query.capture_names()[capture.index as usize];
if capture_name == "name" {
name = Some(
capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string(),
);
} else {
full_node = Some(capture.node);
}
}
match (name, full_node) {
(Some(name), Some(node)) => {
let span = node_to_span(&node);
let preview = extract_preview(source, &span);
symbols.push(SearchResult::new(
String::new(),
Language::PHP,
kind.clone(),
Some(name),
span,
scope.clone(),
preview,
));
}
(None, Some(node)) => {
log::warn!(
"PHP parser: Failed to extract name from {:?} capture at line {}",
kind,
node.start_position().row + 1
);
}
(Some(_), None) => {
log::warn!("PHP parser: Failed to extract node for {:?} symbol", kind);
}
(None, None) => {
log::warn!(
"PHP parser: Failed to extract both name and node for {:?} symbol",
kind
);
}
}
}
Ok(symbols)
}
fn node_to_span(node: &tree_sitter::Node) -> Span {
let start = node.start_position();
let end = node.end_position();
Span::new(
start.row + 1, start.column,
end.row + 1,
end.column,
)
}
fn extract_preview(source: &str, span: &Span) -> String {
let lines: Vec<&str> = source.lines().collect();
let start_idx = (span.start_line - 1) as usize; let end_idx = (start_idx + 7).min(lines.len());
lines[start_idx..end_idx].join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_function() {
let source = r#"
<?php
function greet($name) {
return "Hello, $name!";
}
"#;
let symbols = parse("test.php", source).unwrap();
assert_eq!(symbols.len(), 1);
assert_eq!(symbols[0].symbol.as_deref(), Some("greet"));
assert!(matches!(symbols[0].kind, SymbolKind::Function));
}
#[test]
fn test_parse_class() {
let source = r#"
<?php
class User {
private $name;
private $email;
public function __construct($name, $email) {
$this->name = $name;
$this->email = $email;
}
}
"#;
let symbols = parse("test.php", source).unwrap();
let class_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Class))
.collect();
assert_eq!(class_symbols.len(), 1);
assert_eq!(class_symbols[0].symbol.as_deref(), Some("User"));
}
#[test]
fn test_parse_class_with_methods() {
let source = r#"
<?php
class Calculator {
public function add($a, $b) {
return $a + $b;
}
public function subtract($a, $b) {
return $a - $b;
}
}
"#;
let symbols = parse("test.php", source).unwrap();
assert!(symbols.len() >= 3);
let method_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Method))
.collect();
assert_eq!(method_symbols.len(), 2);
assert!(
method_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("add"))
);
assert!(
method_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("subtract"))
);
for method in method_symbols {
}
}
#[test]
fn test_parse_interface() {
let source = r#"
<?php
interface Drawable {
public function draw();
}
"#;
let symbols = parse("test.php", source).unwrap();
let interface_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Interface))
.collect();
assert_eq!(interface_symbols.len(), 1);
assert_eq!(interface_symbols[0].symbol.as_deref(), Some("Drawable"));
}
#[test]
fn test_parse_trait() {
let source = r#"
<?php
trait Loggable {
public function log($message) {
echo $message;
}
}
"#;
let symbols = parse("test.php", source).unwrap();
let trait_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Trait))
.collect();
assert_eq!(trait_symbols.len(), 1);
assert_eq!(trait_symbols[0].symbol.as_deref(), Some("Loggable"));
}
#[test]
fn test_parse_namespace() {
let source = r#"
<?php
namespace App\Controllers;
class HomeController {
public function index() {
return 'Home';
}
}
"#;
let symbols = parse("test.php", source).unwrap();
let namespace_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Namespace))
.collect();
assert_eq!(namespace_symbols.len(), 1);
assert_eq!(
namespace_symbols[0].symbol.as_deref(),
Some("App\\Controllers")
);
}
#[test]
fn test_parse_constants() {
let source = r#"
<?php
const MAX_SIZE = 100;
const DEFAULT_NAME = 'Anonymous';
"#;
let symbols = parse("test.php", source).unwrap();
let const_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Constant))
.collect();
assert_eq!(const_symbols.len(), 2);
assert!(
const_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("MAX_SIZE"))
);
assert!(
const_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("DEFAULT_NAME"))
);
}
#[test]
fn test_parse_properties() {
let source = r#"
<?php
class Config {
private $debug = false;
public $timeout = 30;
protected $secret;
}
"#;
let symbols = parse("test.php", source).unwrap();
let prop_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Variable))
.collect();
assert_eq!(prop_symbols.len(), 3);
assert!(
prop_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("debug"))
);
assert!(
prop_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("timeout"))
);
assert!(
prop_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("secret"))
);
}
#[test]
fn test_parse_enum() {
let source = r#"
<?php
enum Status {
case Active;
case Inactive;
case Pending;
}
"#;
let symbols = parse("test.php", source).unwrap();
let enum_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Enum))
.collect();
assert_eq!(enum_symbols.len(), 1);
assert_eq!(enum_symbols[0].symbol.as_deref(), Some("Status"));
}
#[test]
fn test_parse_mixed_symbols() {
let source = r#"
<?php
namespace App\Models;
interface UserInterface {
public function getName();
}
trait Timestampable {
private $createdAt;
public function getCreatedAt() {
return $this->createdAt;
}
}
class User implements UserInterface {
use Timestampable;
private $name;
const DEFAULT_ROLE = 'user';
public function __construct($name) {
$this->name = $name;
}
public function getName() {
return $this->name;
}
}
function createUser($name) {
return new User($name);
}
"#;
let symbols = parse("test.php", source).unwrap();
assert!(symbols.len() >= 8);
let kinds: Vec<&SymbolKind> = symbols.iter().map(|s| &s.kind).collect();
assert!(kinds.contains(&&SymbolKind::Namespace));
assert!(kinds.contains(&&SymbolKind::Interface));
assert!(kinds.contains(&&SymbolKind::Trait));
assert!(kinds.contains(&&SymbolKind::Class));
assert!(kinds.contains(&&SymbolKind::Method));
assert!(kinds.contains(&&SymbolKind::Variable));
assert!(kinds.contains(&&SymbolKind::Constant));
assert!(kinds.contains(&&SymbolKind::Function));
}
#[test]
fn test_local_variables_included() {
let source = r#"
<?php
$global_count = 100;
function calculate() {
$local_count = 50;
$result = $local_count + 10;
return $result;
}
class Math {
private $value = 5;
public function compute() {
$temp = $this->value * 2;
return $temp;
}
}
"#;
let symbols = parse("test.php", source).unwrap();
let variables: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Variable))
.collect();
assert_eq!(variables.len(), 5);
assert!(
variables
.iter()
.any(|v| v.symbol.as_deref() == Some("local_count"))
);
assert!(
variables
.iter()
.any(|v| v.symbol.as_deref() == Some("result"))
);
assert!(
variables
.iter()
.any(|v| v.symbol.as_deref() == Some("temp"))
);
assert!(
variables
.iter()
.any(|v| v.symbol.as_deref() == Some("global_count"))
);
assert!(
variables
.iter()
.any(|v| v.symbol.as_deref() == Some("value"))
);
let local_vars: Vec<_> = variables
.iter()
.filter(|v| {
v.symbol.as_deref() == Some("local_count")
|| v.symbol.as_deref() == Some("result")
|| v.symbol.as_deref() == Some("temp")
})
.collect();
for var in local_vars {
}
let property = variables
.iter()
.find(|v| v.symbol.as_deref() == Some("value"))
.unwrap();
}
#[test]
fn test_parse_attribute_class() {
let source = r#"
<?php
#[Attribute]
class Route {
public function __construct(
public string $path,
public array $methods = []
) {}
}
#[Attribute(Attribute::TARGET_METHOD)]
class Deprecated {
public string $message;
}
"#;
let symbols = parse("test.php", source).unwrap();
let attribute_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Attribute))
.collect();
assert!(attribute_symbols.len() >= 2);
assert!(
attribute_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("Route"))
);
assert!(
attribute_symbols
.iter()
.any(|s| s.symbol.as_deref() == Some("Deprecated"))
);
}
#[test]
fn test_parse_attribute_uses() {
let source = r#"
<?php
#[Attribute]
class Route {
public function __construct(public string $path) {}
}
#[Attribute]
class Deprecated {}
#[Route("/api/users")]
class UserController {
#[Route("/list")]
public function list() {
return [];
}
#[Route("/get/{id}")]
#[Deprecated]
public function get($id) {
return null;
}
}
#[Route("/api/posts")]
class PostController {
#[Route("/all")]
public function all() {
return [];
}
}
"#;
let symbols = parse("test.php", source).unwrap();
let attribute_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Attribute))
.collect();
assert!(attribute_symbols.len() >= 6);
let route_count = attribute_symbols
.iter()
.filter(|s| s.symbol.as_deref() == Some("Route"))
.count();
let deprecated_count = attribute_symbols
.iter()
.filter(|s| s.symbol.as_deref() == Some("Deprecated"))
.count();
assert!(route_count >= 5);
assert!(deprecated_count >= 2);
}
#[test]
fn test_parse_class_implementing_multiple_interfaces() {
let source = r#"
<?php
interface Interface1 {
public function method1();
}
interface Interface2 {
public function method2();
}
class SimpleClass {
public $value;
}
// Class implementing multiple interfaces
class MultiInterfaceClass implements Interface1, Interface2 {
public function method1() {
return true;
}
public function method2() {
return false;
}
}
/**
* Complex edge case: Class with large docblock, extends base class, implements multiple interfaces
*
* @property string $name
* @property string $email
* @property-read int $id
* @property-read string $created_at
* @property-read Collection|Role[] $roles
* @property-read Collection|Permission[] $permissions
* @property-read Workflow $workflow
* @property-read Collection|NotificationSetting[] $notificationSettings
* @property-read Collection|Watch[] $watches
*
**/
class ComplexClass extends SimpleClass implements Interface1, Interface2 {
private $data;
public function method1() {
return $this->data;
}
public function method2() {
return !$this->data;
}
}
"#;
let symbols = parse("test.php", source).unwrap();
let class_symbols: Vec<_> = symbols
.iter()
.filter(|s| matches!(s.kind, SymbolKind::Class))
.collect();
assert_eq!(class_symbols.len(), 3, "Should find exactly 3 classes");
assert!(
class_symbols
.iter()
.any(|c| c.symbol.as_deref() == Some("SimpleClass")),
"Should find SimpleClass"
);
assert!(
class_symbols
.iter()
.any(|c| c.symbol.as_deref() == Some("MultiInterfaceClass")),
"Should find MultiInterfaceClass implementing multiple interfaces"
);
assert!(
class_symbols
.iter()
.any(|c| c.symbol.as_deref() == Some("ComplexClass")),
"Should find ComplexClass with large docblock, extends, and implements multiple interfaces"
);
}
#[test]
fn test_extract_php_use_dependencies() {
let source = r#"
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::create('test', function (Blueprint $table) {
$table->id();
});
}
};
"#;
let deps = PhpDependencyExtractor::extract_dependencies(source).unwrap();
assert_eq!(deps.len(), 3, "Should extract 3 use statements");
assert!(deps.iter().any(|d| d.imported_path.contains("Migration")));
assert!(deps.iter().any(|d| d.imported_path.contains("Blueprint")));
assert!(deps.iter().any(|d| d.imported_path.contains("Schema")));
for dep in &deps {
assert!(
matches!(dep.import_type, ImportType::Internal),
"Laravel classes should be classified as Internal"
);
}
}
#[test]
fn test_dynamic_requires_filtered() {
let source = r#"
<?php
use App\Models\User;
use App\Services\Auth;
require 'config.php';
require_once 'helpers.php';
// Dynamic requires - should be filtered out
require $variable;
require CONSTANT . '/file.php';
require_once $path;
include dirname(__FILE__) . '/dynamic.php';
"#;
let deps = PhpDependencyExtractor::extract_dependencies(source).unwrap();
assert_eq!(deps.len(), 4, "Should extract 4 static imports only");
assert!(deps.iter().any(|d| d.imported_path.contains("User")));
assert!(deps.iter().any(|d| d.imported_path.contains("Auth")));
assert!(deps.iter().any(|d| d.imported_path == "config.php"));
assert!(deps.iter().any(|d| d.imported_path == "helpers.php"));
assert!(!deps.iter().any(|d| d.imported_path.contains("variable")));
assert!(!deps.iter().any(|d| d.imported_path.contains("CONSTANT")));
assert!(!deps.iter().any(|d| d.imported_path.contains("dirname")));
}
}
use crate::models::ImportType;
use crate::parsers::{DependencyExtractor, ImportInfo};
pub struct PhpDependencyExtractor;
impl DependencyExtractor for PhpDependencyExtractor {
fn extract_dependencies(source: &str) -> Result<Vec<ImportInfo>> {
let mut parser = Parser::new();
let language = tree_sitter_php::LANGUAGE_PHP;
parser
.set_language(&language.into())
.context("Failed to set PHP language")?;
let tree = parser
.parse(source, None)
.context("Failed to parse PHP source")?;
let root_node = tree.root_node();
let mut imports = Vec::new();
imports.extend(extract_php_uses(source, &root_node)?);
imports.extend(extract_php_requires(source, &root_node)?);
Ok(imports)
}
}
fn extract_php_uses(source: &str, root: &tree_sitter::Node) -> Result<Vec<ImportInfo>> {
let language = tree_sitter_php::LANGUAGE_PHP;
let query_str = r#"
(namespace_use_clause
[
(name) @use_path
(qualified_name) @use_path
])
"#;
let query =
Query::new(&language.into(), query_str).context("Failed to create PHP use query")?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root, source.as_bytes());
let mut imports = Vec::new();
while let Some(match_) = matches.next() {
for capture in match_.captures {
let capture_name: &str = &query.capture_names()[capture.index as usize];
if capture_name == "use_path" {
let path = capture
.node
.utf8_text(source.as_bytes())
.unwrap_or("")
.to_string();
let import_type = classify_php_use(&path);
let line_number = capture.node.start_position().row + 1;
imports.push(ImportInfo {
imported_path: path,
import_type,
line_number,
imported_symbols: None, });
}
}
}
Ok(imports)
}
fn extract_php_requires(source: &str, root: &tree_sitter::Node) -> Result<Vec<ImportInfo>> {
let language = tree_sitter_php::LANGUAGE_PHP;
let query_str = r#"
(expression_statement
(require_expression
(string) @require_path)) @require
(expression_statement
(require_once_expression
(string) @require_path)) @require
(expression_statement
(include_expression
(string) @require_path)) @require
(expression_statement
(include_once_expression
(string) @require_path)) @require
"#;
let query = Query::new(&language.into(), query_str)
.context("Failed to create PHP require/include query")?;
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root, source.as_bytes());
let mut imports = Vec::new();
while let Some(match_) = matches.next() {
let mut require_path = None;
let mut require_node = None;
for capture in match_.captures {
let capture_name: &str = &query.capture_names()[capture.index as usize];
match capture_name {
"require_path" => {
let raw_path = capture.node.utf8_text(source.as_bytes()).unwrap_or("");
require_path =
Some(raw_path.trim_matches(|c| c == '"' || c == '\'').to_string());
}
"require" => {
require_node = Some(capture.node);
}
_ => {}
}
}
if let (Some(path), Some(node)) = (require_path, require_node) {
let line_number = node.start_position().row + 1;
imports.push(ImportInfo {
imported_path: path,
import_type: ImportType::Internal, line_number,
imported_symbols: None, });
}
}
Ok(imports)
}
fn classify_php_use(use_path: &str) -> ImportType {
const PHP_STDLIB_NAMESPACES: &[&str] = &[
"Psr\\",
"Psr\\Http",
"Psr\\Log",
"Psr\\Cache",
"Psr\\Container",
"Exception",
"Error",
"DateTime",
"DateTimeImmutable",
"DateTimeInterface",
"DateInterval",
"DatePeriod",
"PDO",
"PDOStatement",
"Closure",
"Generator",
"ArrayIterator",
"IteratorAggregate",
"Traversable",
"Iterator",
"Countable",
"Serializable",
"JsonSerializable",
"SplFileInfo",
"SplFileObject",
"SplDoublyLinkedList",
"SplQueue",
"SplStack",
"SplHeap",
"SplMinHeap",
"SplMaxHeap",
"SplPriorityQueue",
"SplFixedArray",
"SplObjectStorage",
"SimpleXMLElement",
"DOMDocument",
"DOMElement",
"DOMNode",
"XMLReader",
"XMLWriter",
];
const PHP_VENDOR_NAMESPACES: &[&str] = &[
"Symfony\\",
"Spatie\\",
"Stancl\\",
"Doctrine\\",
"Monolog\\",
"PHPUnit\\",
"Carbon\\",
"GuzzleHttp\\",
"Composer\\",
"Predis\\",
"League\\",
"Ramsey\\",
"Webmozart\\",
"Brick\\",
"Mockery\\",
"Faker\\",
"PhpParser\\",
"PHPStan\\",
"Psalm\\",
"Pest\\",
"Filament\\",
"Livewire\\",
"Inertia\\",
"Socialite\\",
"Sanctum\\",
"Passport\\",
"Horizon\\",
"Telescope\\",
"Forge\\",
"Vapor\\",
"Cashier\\",
"Nova\\",
"Spark\\",
"Jetstream\\",
"Fortify\\",
"Breeze\\",
"Vonage\\",
"Twilio\\",
"Stripe\\",
"Pusher\\",
"Algolia\\",
"Aws\\",
"Google\\",
"Microsoft\\",
"Facebook\\",
"Twitter\\",
"Sentry\\",
"Bugsnag\\",
"Rollbar\\",
"NewRelic\\",
"Datadog\\",
"Elasticsearch\\",
"Redis\\",
"Memcached\\",
"MongoDB\\",
"PhpOffice\\",
"Dompdf\\",
"TCPDF\\",
"Mpdf\\",
"Intervention\\",
"Barryvdh\\",
"Maatwebsite\\",
"Rap2hpoutre\\",
"Yajra\\",
];
for stdlib_ns in PHP_STDLIB_NAMESPACES {
if use_path == *stdlib_ns || use_path.starts_with(stdlib_ns) {
return ImportType::Stdlib;
}
}
for vendor_ns in PHP_VENDOR_NAMESPACES {
if use_path.starts_with(vendor_ns) {
return ImportType::External;
}
}
ImportType::Internal
}
#[derive(Debug, Clone)]
pub struct Psr4Mapping {
pub namespace_prefix: String, pub directory: String, pub project_root: String, }
pub fn parse_composer_psr4(project_root: &Path) -> Result<Vec<Psr4Mapping>> {
let composer_path = project_root.join("composer.json");
if !composer_path.exists() {
log::debug!("No composer.json found at {:?}", composer_path);
return Ok(Vec::new());
}
let content =
std::fs::read_to_string(&composer_path).context("Failed to read composer.json")?;
let json: serde_json::Value =
serde_json::from_str(&content).context("Failed to parse composer.json")?;
let mut mappings = Vec::new();
if let Some(autoload) = json.get("autoload") {
if let Some(psr4) = autoload.get("psr-4") {
if let Some(psr4_obj) = psr4.as_object() {
for (namespace, path) in psr4_obj {
let directories = match path {
serde_json::Value::String(s) => vec![s.clone()],
serde_json::Value::Array(arr) => arr
.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect(),
_ => continue,
};
for dir in directories {
mappings.push(Psr4Mapping {
namespace_prefix: namespace.clone(),
directory: dir,
project_root: String::new(), });
}
}
}
}
}
mappings.sort_by(|a, b| b.namespace_prefix.len().cmp(&a.namespace_prefix.len()));
log::debug!(
"Loaded {} PSR-4 mappings from composer.json",
mappings.len()
);
for mapping in &mappings {
log::trace!(" {} => {}", mapping.namespace_prefix, mapping.directory);
}
Ok(mappings)
}
pub fn find_all_composer_json(index_root: &Path) -> Result<Vec<PathBuf>> {
use ignore::WalkBuilder;
let mut composer_files = Vec::new();
let walker = WalkBuilder::new(index_root)
.follow_links(false)
.git_ignore(true)
.build();
for entry in walker {
let entry = entry?;
let path = entry.path();
if !path.is_file() || path.file_name() != Some(std::ffi::OsStr::new("composer.json")) {
continue;
}
if path.components().any(|c| c.as_os_str() == "vendor") {
log::trace!("Skipping vendor composer.json: {:?}", path);
continue;
}
composer_files.push(path.to_path_buf());
}
log::debug!("Found {} project composer.json files", composer_files.len());
Ok(composer_files)
}
pub fn parse_all_composer_psr4(index_root: &Path) -> Result<Vec<Psr4Mapping>> {
let composer_files = find_all_composer_json(index_root)?;
if composer_files.is_empty() {
log::debug!("No composer.json files found in {:?}", index_root);
return Ok(Vec::new());
}
let mut all_mappings = Vec::new();
let composer_count = composer_files.len();
for composer_path in composer_files {
let project_root = composer_path
.parent()
.ok_or_else(|| anyhow::anyhow!("composer.json has no parent directory"))?;
let relative_project_root = project_root
.strip_prefix(index_root)
.unwrap_or(project_root)
.to_string_lossy()
.to_string();
log::debug!("Parsing composer.json at {:?}", composer_path);
let mappings = parse_composer_psr4(project_root)?;
for mut mapping in mappings {
mapping.project_root = relative_project_root.clone();
all_mappings.push(mapping);
}
}
all_mappings.sort_by(|a, b| b.namespace_prefix.len().cmp(&a.namespace_prefix.len()));
log::info!(
"Loaded {} total PSR-4 mappings from {} projects",
all_mappings.len(),
composer_count
);
Ok(all_mappings)
}
pub fn resolve_php_namespace_to_path(
namespace: &str,
psr4_mappings: &[Psr4Mapping],
) -> Option<String> {
for mapping in psr4_mappings {
if namespace.starts_with(&mapping.namespace_prefix) {
let relative_namespace = &namespace[mapping.namespace_prefix.len()..];
let relative_path = relative_namespace.replace('\\', "/");
let file_path = if relative_path.is_empty() {
return None;
} else {
let base_path = if mapping.project_root.is_empty() {
format!("{}{}.php", mapping.directory, relative_path)
} else {
format!(
"{}/{}{}.php",
mapping.project_root, mapping.directory, relative_path
)
};
base_path.replace("//", "/")
};
log::trace!("Resolved namespace '{}' to path '{}'", namespace, file_path);
return Some(file_path);
}
}
log::trace!("No PSR-4 mapping found for namespace '{}'", namespace);
None
}