similarity_php/
php_parser.rs

1use similarity_core::language_parser::{
2    GenericFunctionDef, GenericTypeDef, Language, LanguageParser,
3};
4use similarity_core::tree::TreeNode;
5use std::error::Error;
6use std::rc::Rc;
7use tree_sitter::{Node, Parser};
8
9pub struct PhpParser {
10    parser: Parser,
11}
12
13impl PhpParser {
14    #[allow(dead_code)]
15    pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
16        let mut parser = Parser::new();
17        parser.set_language(&tree_sitter_php::LANGUAGE_PHP.into())?;
18
19        Ok(Self { parser })
20    }
21
22    #[allow(clippy::only_used_in_recursion)]
23    fn convert_node(&self, node: Node, source: &str, id_counter: &mut usize) -> TreeNode {
24        let current_id = *id_counter;
25        *id_counter += 1;
26
27        let label = node.kind().to_string();
28        let value = match node.kind() {
29            "name" | "variable_name" | "string" | "integer" | "float" | "true" | "false" | "null" => {
30                node.utf8_text(source.as_bytes()).unwrap_or("").to_string()
31            }
32            _ => "".to_string(),
33        };
34
35        let mut tree_node = TreeNode::new(label, value, current_id);
36
37        for child in node.children(&mut node.walk()) {
38            let child_node = self.convert_node(child, source, id_counter);
39            tree_node.add_child(Rc::new(child_node));
40        }
41
42        tree_node
43    }
44
45    fn extract_functions_from_node(
46        &self,
47        node: Node,
48        source: &str,
49        class_name: Option<&str>,
50        namespace: Option<&str>,
51    ) -> Vec<GenericFunctionDef> {
52        let mut functions = Vec::new();
53        
54        // Check if there's a namespace declaration at the root level
55        let mut current_namespace: Option<String> = None;
56        let mut cursor = node.walk();
57        for child in node.children(&mut cursor) {
58            if child.kind() == "namespace_definition" {
59                if let Some(name_node) = child.child_by_field_name("name") {
60                    if let Ok(ns_name) = name_node.utf8_text(source.as_bytes()) {
61                        current_namespace = Some(ns_name.to_string());
62                        break;
63                    }
64                }
65            }
66        }
67
68        fn visit_node(
69            node: Node,
70            source: &str,
71            functions: &mut Vec<GenericFunctionDef>,
72            class_name: Option<&str>,
73            namespace: Option<&str>,
74        ) {
75            match node.kind() {
76                "function_definition" => {
77                    if let Some(name_node) = node.child_by_field_name("name") {
78                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
79                            let parameters_node = node.child_by_field_name("parameters");
80                            let body_node = node.child_by_field_name("body");
81
82                            let params = extract_params(parameters_node, source);
83                            let full_name = if let Some(ns) = namespace {
84                                format!("{ns}\\{name}")
85                            } else {
86                                name.to_string()
87                            };
88
89                            functions.push(GenericFunctionDef {
90                                name: full_name,
91                                start_line: node.start_position().row as u32 + 1,
92                                end_line: node.end_position().row as u32 + 1,
93                                body_start_line: body_node
94                                    .map(|n| n.start_position().row as u32 + 1)
95                                    .unwrap_or(0),
96                                body_end_line: body_node
97                                    .map(|n| n.end_position().row as u32 + 1)
98                                    .unwrap_or(0),
99                                parameters: params,
100                                is_method: class_name.is_some(),
101                                class_name: class_name.map(|s| s.to_string()),
102                                is_async: false, // PHP doesn't have async/await syntax
103                                is_generator: is_generator_function(node, source),
104                                decorators: Vec::new(), // PHP doesn't have decorators like Python
105                            });
106                        }
107                    }
108                }
109                "method_declaration" => {
110                    if let Some(name_node) = node.child_by_field_name("name") {
111                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
112                            let parameters_node = node.child_by_field_name("parameters");
113                            let body_node = node.child_by_field_name("body");
114
115                            let params = extract_params(parameters_node, source);
116                            let visibility = extract_visibility(node, source);
117                            let is_static = is_static_method(node, source);
118                            let is_abstract = is_abstract_method(node, source);
119
120                            let method_name = format!("{}::{}", class_name.unwrap_or(""), name);
121
122                            functions.push(GenericFunctionDef {
123                                name: method_name,
124                                start_line: node.start_position().row as u32 + 1,
125                                end_line: node.end_position().row as u32 + 1,
126                                body_start_line: body_node
127                                    .map(|n| n.start_position().row as u32 + 1)
128                                    .unwrap_or(0),
129                                body_end_line: body_node
130                                    .map(|n| n.end_position().row as u32 + 1)
131                                    .unwrap_or(0),
132                                parameters: params,
133                                is_method: true,
134                                class_name: class_name.map(|s| s.to_string()),
135                                is_async: false,
136                                is_generator: is_generator_function(node, source),
137                                decorators: vec![
138                                    visibility,
139                                    if is_static { "static".to_string() } else { "".to_string() },
140                                    if is_abstract { "abstract".to_string() } else { "".to_string() },
141                                ].into_iter().filter(|s| !s.is_empty()).collect(),
142                            });
143                        }
144                    }
145                }
146                "class_declaration" => {
147                    if let Some(name_node) = node.child_by_field_name("name") {
148                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
149                            let mut subcursor = node.walk();
150                            for child in node.children(&mut subcursor) {
151                                visit_node(child, source, functions, Some(name), namespace);
152                            }
153                        }
154                    }
155                }
156                "namespace_definition" => {
157                    // For namespace without braces (like "namespace App\Controllers;")
158                    if let Some(name_node) = node.child_by_field_name("name") {
159                        if let Ok(_ns_name) = name_node.utf8_text(source.as_bytes()) {
160                            // Continue traversing sibling nodes with this namespace
161                        }
162                    }
163                }
164                "namespace_use_declaration" => {
165                    // Skip use statements
166                }
167                _ => {
168                    let mut subcursor = node.walk();
169                    for child in node.children(&mut subcursor) {
170                        visit_node(child, source, functions, class_name, namespace);
171                    }
172                }
173            }
174        }
175
176        fn is_generator_function(node: Node, source: &str) -> bool {
177            if let Some(body) = node.child_by_field_name("body") {
178                if let Ok(body_text) = body.utf8_text(source.as_bytes()) {
179                    return body_text.contains("yield");
180                }
181            }
182            false
183        }
184
185        fn extract_visibility(node: Node, source: &str) -> String {
186            let mut cursor = node.walk();
187            for child in node.children(&mut cursor) {
188                if child.kind() == "visibility_modifier" {
189                    if let Ok(visibility) = child.utf8_text(source.as_bytes()) {
190                        return visibility.to_string();
191                    }
192                }
193            }
194            "public".to_string() // Default visibility in PHP
195        }
196
197        fn is_static_method(node: Node, _source: &str) -> bool {
198            let mut cursor = node.walk();
199            for child in node.children(&mut cursor) {
200                if child.kind() == "static_modifier" {
201                    return true;
202                }
203            }
204            false
205        }
206
207        fn is_abstract_method(node: Node, _source: &str) -> bool {
208            let mut cursor = node.walk();
209            for child in node.children(&mut cursor) {
210                if child.kind() == "abstract_modifier" {
211                    return true;
212                }
213            }
214            false
215        }
216
217        fn extract_params(params_node: Option<Node>, source: &str) -> Vec<String> {
218            if let Some(node) = params_node {
219                let mut params = Vec::new();
220                let mut cursor = node.walk();
221
222                for child in node.children(&mut cursor) {
223                    match child.kind() {
224                        "simple_parameter" => {
225                            if let Some(var_node) = child.child_by_field_name("name") {
226                                if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
227                                    params.push(param_text.to_string());
228                                }
229                            }
230                        }
231                        "typed_parameter" => {
232                            if let Some(var_node) = child.child_by_field_name("name") {
233                                if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
234                                    params.push(param_text.to_string());
235                                }
236                            }
237                        }
238                        "variadic_parameter" => {
239                            if let Some(var_node) = child.child_by_field_name("name") {
240                                if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
241                                    params.push(format!("..{param_text}"));
242                                }
243                            }
244                        }
245                        _ => {}
246                    }
247                }
248
249                params
250            } else {
251                Vec::new()
252            }
253        }
254
255        let final_namespace = current_namespace.as_deref().or(namespace);
256        visit_node(node, source, &mut functions, class_name, final_namespace);
257        functions
258    }
259}
260
261impl LanguageParser for PhpParser {
262    fn parse(&mut self, source: &str, _filename: &str) -> Result<Rc<TreeNode>, Box<dyn Error + Send + Sync>> {
263        let tree = self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
264            "Failed to parse PHP source".into()
265        })?;
266
267        let root_node = tree.root_node();
268        let mut id_counter = 0;
269        Ok(Rc::new(self.convert_node(root_node, source, &mut id_counter)))
270    }
271
272    fn extract_functions(
273        &mut self,
274        source: &str,
275        _filename: &str,
276    ) -> Result<Vec<GenericFunctionDef>, Box<dyn Error + Send + Sync>> {
277        let tree = self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
278            "Failed to parse PHP source".into()
279        })?;
280
281        let root_node = tree.root_node();
282        Ok(self.extract_functions_from_node(root_node, source, None, None))
283    }
284
285    fn extract_types(
286        &mut self,
287        source: &str,
288        _filename: &str,
289    ) -> Result<Vec<GenericTypeDef>, Box<dyn Error + Send + Sync>> {
290        let tree = self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
291            "Failed to parse PHP source".into()
292        })?;
293
294        let root_node = tree.root_node();
295        let mut types = Vec::new();
296
297        fn visit_node_for_types(node: Node, source: &str, types: &mut Vec<GenericTypeDef>) {
298            match node.kind() {
299                "class_declaration" => {
300                    if let Some(name_node) = node.child_by_field_name("name") {
301                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
302                            types.push(GenericTypeDef {
303                                name: name.to_string(),
304                                kind: "class".to_string(),
305                                start_line: node.start_position().row as u32 + 1,
306                                end_line: node.end_position().row as u32 + 1,
307                                fields: extract_class_properties(node, source),
308                            });
309                        }
310                    }
311                }
312                "interface_declaration" => {
313                    if let Some(name_node) = node.child_by_field_name("name") {
314                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
315                            types.push(GenericTypeDef {
316                                name: name.to_string(),
317                                kind: "interface".to_string(),
318                                start_line: node.start_position().row as u32 + 1,
319                                end_line: node.end_position().row as u32 + 1,
320                                fields: extract_interface_methods(node, source),
321                            });
322                        }
323                    }
324                }
325                "trait_declaration" => {
326                    if let Some(name_node) = node.child_by_field_name("name") {
327                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
328                            types.push(GenericTypeDef {
329                                name: name.to_string(),
330                                kind: "trait".to_string(),
331                                start_line: node.start_position().row as u32 + 1,
332                                end_line: node.end_position().row as u32 + 1,
333                                fields: extract_trait_methods(node, source),
334                            });
335                        }
336                    }
337                }
338                _ => {}
339            }
340
341            let mut cursor = node.walk();
342            for child in node.children(&mut cursor) {
343                visit_node_for_types(child, source, types);
344            }
345        }
346
347        fn extract_class_properties(node: Node, source: &str) -> Vec<String> {
348            let mut properties = Vec::new();
349
350            if let Some(body) = node.child_by_field_name("body") {
351                let mut cursor = body.walk();
352                for child in body.children(&mut cursor) {
353                    if child.kind() == "property_declaration" {
354                        let mut prop_cursor = child.walk();
355                        for prop_child in child.children(&mut prop_cursor) {
356                            if prop_child.kind() == "variable_name" {
357                                if let Ok(prop_name) = prop_child.utf8_text(source.as_bytes()) {
358                                    properties.push(prop_name.to_string());
359                                }
360                            }
361                        }
362                    }
363                }
364            }
365
366            properties
367        }
368
369        fn extract_interface_methods(node: Node, source: &str) -> Vec<String> {
370            let mut methods = Vec::new();
371
372            if let Some(body) = node.child_by_field_name("body") {
373                let mut cursor = body.walk();
374                for child in body.children(&mut cursor) {
375                    if child.kind() == "method_declaration" {
376                        if let Some(name_node) = child.child_by_field_name("name") {
377                            if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
378                                methods.push(method_name.to_string());
379                            }
380                        }
381                    }
382                }
383            }
384
385            methods
386        }
387
388        fn extract_trait_methods(node: Node, source: &str) -> Vec<String> {
389            let mut methods = Vec::new();
390
391            if let Some(body) = node.child_by_field_name("body") {
392                let mut cursor = body.walk();
393                for child in body.children(&mut cursor) {
394                    if child.kind() == "method_declaration" {
395                        if let Some(name_node) = child.child_by_field_name("name") {
396                            if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
397                                methods.push(method_name.to_string());
398                            }
399                        }
400                    }
401                }
402            }
403
404            methods
405        }
406
407        visit_node_for_types(root_node, source, &mut types);
408        Ok(types)
409    }
410
411    fn language(&self) -> Language {
412        Language::Php
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419
420    #[test]
421    fn test_php_functions() {
422        let mut parser = PhpParser::new().unwrap();
423        let source = r#"
424<?php
425function hello($name) {
426    return "Hello, " . $name . "!";
427}
428
429function add($a, $b = 0) {
430    return $a + $b;
431}
432
433class Calculator {
434    public function __construct() {
435        $this->result = 0;
436    }
437    
438    public function add($x) {
439        $this->result += $x;
440        return $this->result;
441    }
442    
443    private static function multiply($a, $b) {
444        return $a * $b;
445    }
446}
447"#;
448
449        let functions = parser.extract_functions(source, "test.php").unwrap();
450        assert!(functions.len() >= 4);
451        
452        let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
453        assert!(function_names.contains(&"hello"));
454        assert!(function_names.contains(&"add"));
455        assert!(function_names.contains(&"Calculator::__construct"));
456        assert!(function_names.contains(&"Calculator::add"));
457        assert!(function_names.contains(&"Calculator::multiply"));
458    }
459
460    #[test]
461    fn test_php_classes() {
462        let mut parser = PhpParser::new().unwrap();
463        let source = r#"
464<?php
465class User {
466    public $name;
467    private $email;
468    
469    public function __construct($name, $email) {
470        $this->name = $name;
471        $this->email = $email;
472    }
473}
474
475interface UserInterface {
476    public function getName();
477}
478
479trait Loggable {
480    public function log($message) {
481        echo $message;
482    }
483}
484"#;
485
486        let types = parser.extract_types(source, "test.php").unwrap();
487        assert_eq!(types.len(), 3);
488        assert_eq!(types[0].name, "User");
489        assert_eq!(types[0].kind, "class");
490        assert_eq!(types[1].name, "UserInterface");
491        assert_eq!(types[1].kind, "interface");
492        assert_eq!(types[2].name, "Loggable");
493        assert_eq!(types[2].kind, "trait");
494    }
495
496    #[test]
497    fn test_php_namespace() {
498        let mut parser = PhpParser::new().unwrap();
499        let source = r#"
500<?php
501namespace App\Controllers;
502
503function processRequest() {
504    return "processed";
505}
506
507class UserController {
508    public function index() {
509        return "user list";
510    }
511}
512"#;
513
514        let functions = parser.extract_functions(source, "test.php").unwrap();
515        assert!(functions.len() >= 2);
516        
517        let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
518        assert!(function_names.contains(&"App\\Controllers\\processRequest"));
519        assert!(function_names.contains(&"UserController::index"));
520    }
521
522    #[test]
523    fn test_php_class_detection() {
524        let mut parser = PhpParser::new().unwrap();
525        let source = r#"
526<?php
527class TestClass {
528    public function method1() {
529        return "test1";
530    }
531    
532    public function method2() {
533        return "test2";
534    }
535}
536
537function standalone_function() {
538    return "standalone";
539}
540"#;
541
542        let functions = parser.extract_functions(source, "test.php").unwrap();
543        assert_eq!(functions.len(), 3);
544        
545        // Check class methods have class_name set
546        let method1 = functions.iter().find(|f| f.name.contains("method1")).unwrap();
547        let method2 = functions.iter().find(|f| f.name.contains("method2")).unwrap();
548        let standalone = functions.iter().find(|f| f.name == "standalone_function").unwrap();
549        
550        assert_eq!(method1.class_name, Some("TestClass".to_string()));
551        assert_eq!(method2.class_name, Some("TestClass".to_string()));
552        assert_eq!(standalone.class_name, None);
553        
554        assert!(method1.is_method);
555        assert!(method2.is_method);
556        assert!(!standalone.is_method);
557    }
558}