similarity_php/
php_parser.rs

1use similarity_core::language_parser::{
2    GenericFunctionDef, GenericTypeDef, Language, LanguageParser,
3};
4use similarity_core::tree::TreeNode;
5use std::error::Error;
6use std::rc::Rc;
7use tree_sitter::{Node, Parser};
8
9pub struct PhpParser {
10    parser: Parser,
11}
12
13impl PhpParser {
14    #[allow(dead_code)]
15    pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
16        let mut parser = Parser::new();
17        parser.set_language(&tree_sitter_php::LANGUAGE_PHP.into())?;
18
19        Ok(Self { parser })
20    }
21
22    #[allow(clippy::only_used_in_recursion)]
23    fn convert_node(&self, node: Node, source: &str, id_counter: &mut usize) -> TreeNode {
24        let current_id = *id_counter;
25        *id_counter += 1;
26
27        let label = node.kind().to_string();
28        let value = match node.kind() {
29            "name" | "variable_name" | "string" | "integer" | "float" | "true" | "false"
30            | "null" => node.utf8_text(source.as_bytes()).unwrap_or("").to_string(),
31            _ => "".to_string(),
32        };
33
34        let mut tree_node = TreeNode::new(label, value, current_id);
35
36        for child in node.children(&mut node.walk()) {
37            let child_node = self.convert_node(child, source, id_counter);
38            tree_node.add_child(Rc::new(child_node));
39        }
40
41        tree_node
42    }
43
44    fn extract_functions_from_node(
45        &self,
46        node: Node,
47        source: &str,
48        class_name: Option<&str>,
49        namespace: Option<&str>,
50    ) -> Vec<GenericFunctionDef> {
51        let mut functions = Vec::new();
52
53        // Check if there's a namespace declaration at the root level
54        let mut current_namespace: Option<String> = None;
55        let mut cursor = node.walk();
56        for child in node.children(&mut cursor) {
57            if child.kind() == "namespace_definition" {
58                if let Some(name_node) = child.child_by_field_name("name") {
59                    if let Ok(ns_name) = name_node.utf8_text(source.as_bytes()) {
60                        current_namespace = Some(ns_name.to_string());
61                        break;
62                    }
63                }
64            }
65        }
66
67        fn visit_node(
68            node: Node,
69            source: &str,
70            functions: &mut Vec<GenericFunctionDef>,
71            class_name: Option<&str>,
72            namespace: Option<&str>,
73        ) {
74            match node.kind() {
75                "function_definition" => {
76                    if let Some(name_node) = node.child_by_field_name("name") {
77                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
78                            let parameters_node = node.child_by_field_name("parameters");
79                            let body_node = node.child_by_field_name("body");
80
81                            let params = extract_params(parameters_node, source);
82                            let full_name = if let Some(ns) = namespace {
83                                format!("{ns}\\{name}")
84                            } else {
85                                name.to_string()
86                            };
87
88                            functions.push(GenericFunctionDef {
89                                name: full_name,
90                                start_line: node.start_position().row as u32 + 1,
91                                end_line: node.end_position().row as u32 + 1,
92                                body_start_line: body_node
93                                    .map(|n| n.start_position().row as u32 + 1)
94                                    .unwrap_or(0),
95                                body_end_line: body_node
96                                    .map(|n| n.end_position().row as u32 + 1)
97                                    .unwrap_or(0),
98                                parameters: params,
99                                is_method: class_name.is_some(),
100                                class_name: class_name.map(|s| s.to_string()),
101                                is_async: false, // PHP doesn't have async/await syntax
102                                is_generator: is_generator_function(node, source),
103                                decorators: Vec::new(), // PHP doesn't have decorators like Python
104                            });
105                        }
106                    }
107                }
108                "method_declaration" => {
109                    if let Some(name_node) = node.child_by_field_name("name") {
110                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
111                            let parameters_node = node.child_by_field_name("parameters");
112                            let body_node = node.child_by_field_name("body");
113
114                            let params = extract_params(parameters_node, source);
115                            let visibility = extract_visibility(node, source);
116                            let is_static = is_static_method(node, source);
117                            let is_abstract = is_abstract_method(node, source);
118
119                            let method_name = format!("{}::{}", class_name.unwrap_or(""), name);
120
121                            functions.push(GenericFunctionDef {
122                                name: method_name,
123                                start_line: node.start_position().row as u32 + 1,
124                                end_line: node.end_position().row as u32 + 1,
125                                body_start_line: body_node
126                                    .map(|n| n.start_position().row as u32 + 1)
127                                    .unwrap_or(0),
128                                body_end_line: body_node
129                                    .map(|n| n.end_position().row as u32 + 1)
130                                    .unwrap_or(0),
131                                parameters: params,
132                                is_method: true,
133                                class_name: class_name.map(|s| s.to_string()),
134                                is_async: false,
135                                is_generator: is_generator_function(node, source),
136                                decorators: vec![
137                                    visibility,
138                                    if is_static { "static".to_string() } else { "".to_string() },
139                                    if is_abstract {
140                                        "abstract".to_string()
141                                    } else {
142                                        "".to_string()
143                                    },
144                                ]
145                                .into_iter()
146                                .filter(|s| !s.is_empty())
147                                .collect(),
148                            });
149                        }
150                    }
151                }
152                "class_declaration" => {
153                    if let Some(name_node) = node.child_by_field_name("name") {
154                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
155                            let mut subcursor = node.walk();
156                            for child in node.children(&mut subcursor) {
157                                visit_node(child, source, functions, Some(name), namespace);
158                            }
159                        }
160                    }
161                }
162                "namespace_definition" => {
163                    // For namespace without braces (like "namespace App\Controllers;")
164                    if let Some(name_node) = node.child_by_field_name("name") {
165                        if let Ok(_ns_name) = name_node.utf8_text(source.as_bytes()) {
166                            // Continue traversing sibling nodes with this namespace
167                        }
168                    }
169                }
170                "namespace_use_declaration" => {
171                    // Skip use statements
172                }
173                _ => {
174                    let mut subcursor = node.walk();
175                    for child in node.children(&mut subcursor) {
176                        visit_node(child, source, functions, class_name, namespace);
177                    }
178                }
179            }
180        }
181
182        fn is_generator_function(node: Node, source: &str) -> bool {
183            if let Some(body) = node.child_by_field_name("body") {
184                if let Ok(body_text) = body.utf8_text(source.as_bytes()) {
185                    return body_text.contains("yield");
186                }
187            }
188            false
189        }
190
191        fn extract_visibility(node: Node, source: &str) -> String {
192            let mut cursor = node.walk();
193            for child in node.children(&mut cursor) {
194                if child.kind() == "visibility_modifier" {
195                    if let Ok(visibility) = child.utf8_text(source.as_bytes()) {
196                        return visibility.to_string();
197                    }
198                }
199            }
200            "public".to_string() // Default visibility in PHP
201        }
202
203        fn is_static_method(node: Node, _source: &str) -> bool {
204            let mut cursor = node.walk();
205            for child in node.children(&mut cursor) {
206                if child.kind() == "static_modifier" {
207                    return true;
208                }
209            }
210            false
211        }
212
213        fn is_abstract_method(node: Node, _source: &str) -> bool {
214            let mut cursor = node.walk();
215            for child in node.children(&mut cursor) {
216                if child.kind() == "abstract_modifier" {
217                    return true;
218                }
219            }
220            false
221        }
222
223        fn extract_params(params_node: Option<Node>, source: &str) -> Vec<String> {
224            if let Some(node) = params_node {
225                let mut params = Vec::new();
226                let mut cursor = node.walk();
227
228                for child in node.children(&mut cursor) {
229                    match child.kind() {
230                        "simple_parameter" => {
231                            if let Some(var_node) = child.child_by_field_name("name") {
232                                if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
233                                    params.push(param_text.to_string());
234                                }
235                            }
236                        }
237                        "typed_parameter" => {
238                            if let Some(var_node) = child.child_by_field_name("name") {
239                                if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
240                                    params.push(param_text.to_string());
241                                }
242                            }
243                        }
244                        "variadic_parameter" => {
245                            if let Some(var_node) = child.child_by_field_name("name") {
246                                if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
247                                    params.push(format!("..{param_text}"));
248                                }
249                            }
250                        }
251                        _ => {}
252                    }
253                }
254
255                params
256            } else {
257                Vec::new()
258            }
259        }
260
261        let final_namespace = current_namespace.as_deref().or(namespace);
262        visit_node(node, source, &mut functions, class_name, final_namespace);
263        functions
264    }
265}
266
267impl LanguageParser for PhpParser {
268    fn parse(
269        &mut self,
270        source: &str,
271        _filename: &str,
272    ) -> Result<Rc<TreeNode>, Box<dyn Error + Send + Sync>> {
273        let tree =
274            self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
275                "Failed to parse PHP source".into()
276            })?;
277
278        let root_node = tree.root_node();
279        let mut id_counter = 0;
280        Ok(Rc::new(self.convert_node(root_node, source, &mut id_counter)))
281    }
282
283    fn extract_functions(
284        &mut self,
285        source: &str,
286        _filename: &str,
287    ) -> Result<Vec<GenericFunctionDef>, Box<dyn Error + Send + Sync>> {
288        let tree =
289            self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
290                "Failed to parse PHP source".into()
291            })?;
292
293        let root_node = tree.root_node();
294        Ok(self.extract_functions_from_node(root_node, source, None, None))
295    }
296
297    fn extract_types(
298        &mut self,
299        source: &str,
300        _filename: &str,
301    ) -> Result<Vec<GenericTypeDef>, Box<dyn Error + Send + Sync>> {
302        let tree =
303            self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
304                "Failed to parse PHP source".into()
305            })?;
306
307        let root_node = tree.root_node();
308        let mut types = Vec::new();
309
310        fn visit_node_for_types(node: Node, source: &str, types: &mut Vec<GenericTypeDef>) {
311            match node.kind() {
312                "class_declaration" => {
313                    if let Some(name_node) = node.child_by_field_name("name") {
314                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
315                            types.push(GenericTypeDef {
316                                name: name.to_string(),
317                                kind: "class".to_string(),
318                                start_line: node.start_position().row as u32 + 1,
319                                end_line: node.end_position().row as u32 + 1,
320                                fields: extract_class_properties(node, source),
321                            });
322                        }
323                    }
324                }
325                "interface_declaration" => {
326                    if let Some(name_node) = node.child_by_field_name("name") {
327                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
328                            types.push(GenericTypeDef {
329                                name: name.to_string(),
330                                kind: "interface".to_string(),
331                                start_line: node.start_position().row as u32 + 1,
332                                end_line: node.end_position().row as u32 + 1,
333                                fields: extract_interface_methods(node, source),
334                            });
335                        }
336                    }
337                }
338                "trait_declaration" => {
339                    if let Some(name_node) = node.child_by_field_name("name") {
340                        if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
341                            types.push(GenericTypeDef {
342                                name: name.to_string(),
343                                kind: "trait".to_string(),
344                                start_line: node.start_position().row as u32 + 1,
345                                end_line: node.end_position().row as u32 + 1,
346                                fields: extract_trait_methods(node, source),
347                            });
348                        }
349                    }
350                }
351                _ => {}
352            }
353
354            let mut cursor = node.walk();
355            for child in node.children(&mut cursor) {
356                visit_node_for_types(child, source, types);
357            }
358        }
359
360        fn extract_class_properties(node: Node, source: &str) -> Vec<String> {
361            let mut properties = Vec::new();
362
363            if let Some(body) = node.child_by_field_name("body") {
364                let mut cursor = body.walk();
365                for child in body.children(&mut cursor) {
366                    if child.kind() == "property_declaration" {
367                        let mut prop_cursor = child.walk();
368                        for prop_child in child.children(&mut prop_cursor) {
369                            if prop_child.kind() == "variable_name" {
370                                if let Ok(prop_name) = prop_child.utf8_text(source.as_bytes()) {
371                                    properties.push(prop_name.to_string());
372                                }
373                            }
374                        }
375                    }
376                }
377            }
378
379            properties
380        }
381
382        fn extract_interface_methods(node: Node, source: &str) -> Vec<String> {
383            let mut methods = Vec::new();
384
385            if let Some(body) = node.child_by_field_name("body") {
386                let mut cursor = body.walk();
387                for child in body.children(&mut cursor) {
388                    if child.kind() == "method_declaration" {
389                        if let Some(name_node) = child.child_by_field_name("name") {
390                            if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
391                                methods.push(method_name.to_string());
392                            }
393                        }
394                    }
395                }
396            }
397
398            methods
399        }
400
401        fn extract_trait_methods(node: Node, source: &str) -> Vec<String> {
402            let mut methods = Vec::new();
403
404            if let Some(body) = node.child_by_field_name("body") {
405                let mut cursor = body.walk();
406                for child in body.children(&mut cursor) {
407                    if child.kind() == "method_declaration" {
408                        if let Some(name_node) = child.child_by_field_name("name") {
409                            if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
410                                methods.push(method_name.to_string());
411                            }
412                        }
413                    }
414                }
415            }
416
417            methods
418        }
419
420        visit_node_for_types(root_node, source, &mut types);
421        Ok(types)
422    }
423
424    fn language(&self) -> Language {
425        Language::Php
426    }
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432
433    #[test]
434    fn test_php_functions() {
435        let mut parser = PhpParser::new().unwrap();
436        let source = r#"
437<?php
438function hello($name) {
439    return "Hello, " . $name . "!";
440}
441
442function add($a, $b = 0) {
443    return $a + $b;
444}
445
446class Calculator {
447    public function __construct() {
448        $this->result = 0;
449    }
450    
451    public function add($x) {
452        $this->result += $x;
453        return $this->result;
454    }
455    
456    private static function multiply($a, $b) {
457        return $a * $b;
458    }
459}
460"#;
461
462        let functions = parser.extract_functions(source, "test.php").unwrap();
463        assert!(functions.len() >= 4);
464
465        let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
466        assert!(function_names.contains(&"hello"));
467        assert!(function_names.contains(&"add"));
468        assert!(function_names.contains(&"Calculator::__construct"));
469        assert!(function_names.contains(&"Calculator::add"));
470        assert!(function_names.contains(&"Calculator::multiply"));
471    }
472
473    #[test]
474    fn test_php_classes() {
475        let mut parser = PhpParser::new().unwrap();
476        let source = r#"
477<?php
478class User {
479    public $name;
480    private $email;
481    
482    public function __construct($name, $email) {
483        $this->name = $name;
484        $this->email = $email;
485    }
486}
487
488interface UserInterface {
489    public function getName();
490}
491
492trait Loggable {
493    public function log($message) {
494        echo $message;
495    }
496}
497"#;
498
499        let types = parser.extract_types(source, "test.php").unwrap();
500        assert_eq!(types.len(), 3);
501        assert_eq!(types[0].name, "User");
502        assert_eq!(types[0].kind, "class");
503        assert_eq!(types[1].name, "UserInterface");
504        assert_eq!(types[1].kind, "interface");
505        assert_eq!(types[2].name, "Loggable");
506        assert_eq!(types[2].kind, "trait");
507    }
508
509    #[test]
510    fn test_php_namespace() {
511        let mut parser = PhpParser::new().unwrap();
512        let source = r#"
513<?php
514namespace App\Controllers;
515
516function processRequest() {
517    return "processed";
518}
519
520class UserController {
521    public function index() {
522        return "user list";
523    }
524}
525"#;
526
527        let functions = parser.extract_functions(source, "test.php").unwrap();
528        assert!(functions.len() >= 2);
529
530        let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
531        assert!(function_names.contains(&"App\\Controllers\\processRequest"));
532        assert!(function_names.contains(&"UserController::index"));
533    }
534
535    #[test]
536    fn test_php_class_detection() {
537        let mut parser = PhpParser::new().unwrap();
538        let source = r#"
539<?php
540class TestClass {
541    public function method1() {
542        return "test1";
543    }
544    
545    public function method2() {
546        return "test2";
547    }
548}
549
550function standalone_function() {
551    return "standalone";
552}
553"#;
554
555        let functions = parser.extract_functions(source, "test.php").unwrap();
556        assert_eq!(functions.len(), 3);
557
558        // Check class methods have class_name set
559        let method1 = functions.iter().find(|f| f.name.contains("method1")).unwrap();
560        let method2 = functions.iter().find(|f| f.name.contains("method2")).unwrap();
561        let standalone = functions.iter().find(|f| f.name == "standalone_function").unwrap();
562
563        assert_eq!(method1.class_name, Some("TestClass".to_string()));
564        assert_eq!(method2.class_name, Some("TestClass".to_string()));
565        assert_eq!(standalone.class_name, None);
566
567        assert!(method1.is_method);
568        assert!(method2.is_method);
569        assert!(!standalone.is_method);
570    }
571}