1mod fallback;
2mod phpactor;
3
4use std::collections::HashMap;
5use std::fs;
6use std::path::Path;
7
8use anyhow::Result;
9use regex::Regex;
10
11use crate::Framework;
12use crate::composer::ComposerExport;
13use crate::models::{SymbolDoc, make_stable_id};
14use crate::sanitizer::Sanitizer;
15
16#[derive(Debug, Clone)]
17pub struct DeclarationCandidate {
18 pub kind: String,
19 pub name: String,
20 pub owner_class: Option<String>,
21 pub namespace: Option<String>,
22 pub line_start: usize,
23 pub line_end: usize,
24 pub signature: Option<String>,
25 pub extraction_confidence: String,
26 pub references_count: u32,
27}
28
29#[derive(Debug, Clone, Default)]
30struct ParsedComments {
31 summary: Option<String>,
32 description: Option<String>,
33 params: Vec<String>,
34 return_doc: Option<String>,
35 throws_docs: Vec<String>,
36 inline_comments: Vec<String>,
37}
38
39pub fn extract_symbols(
40 repo: &Path,
41 repo_name: &str,
42 framework: Framework,
43 files: &[crate::scanner::ScannedFile],
44 packages: &ComposerExport,
45 sanitizer: &Sanitizer,
46) -> Result<Vec<SymbolDoc>> {
47 let mut phpactor = phpactor::PhpactorExtractor::connect(repo).ok();
48 let mut symbols = Vec::new();
49
50 for file in files {
51 if !file.relative_path.to_string_lossy().ends_with(".php") {
52 continue;
53 }
54
55 let contents = fs::read_to_string(&file.absolute_path)?;
56 let declarations = if let Some(client) = phpactor.as_mut() {
57 client
58 .extract_candidates(&file.absolute_path, &contents)
59 .unwrap_or_else(|_| fallback::extract_candidates(&contents))
60 } else {
61 fallback::extract_candidates(&contents)
62 };
63 let comment_map = collect_comments(&contents, sanitizer);
64 let package = packages.package_for_path(&file.absolute_path);
65 let is_test =
66 file.relative_path.starts_with("tests") || file.relative_path.starts_with("test");
67 let path_str = file.relative_path.to_string_lossy().into_owned();
68 let abs_str = file.absolute_path.to_string_lossy().into_owned();
69
70 for declaration in declarations {
71 let fqn = build_fqn(&declaration);
72 let stable_key = format!("{}|{}|{}", repo_name, declaration.kind, fqn);
73 let comments = comment_map
74 .get(&declaration.line_start)
75 .cloned()
76 .unwrap_or_default();
77
78 symbols.push(SymbolDoc {
79 id: make_stable_id(&[
80 repo_name,
81 &declaration.kind,
82 &fqn,
83 &path_str,
84 &declaration.line_start.to_string(),
85 ]),
86 stable_key,
87 repo: repo_name.to_string(),
88 framework: framework.as_str().to_string(),
89 kind: declaration.kind.clone(),
90 short_name: declaration.name.clone(),
91 fqn,
92 owner_class: declaration.owner_class.clone(),
93 namespace: declaration.namespace.clone(),
94 signature: declaration.signature.clone(),
95 doc_summary: comments.summary.clone(),
96 doc_description: comments.description.clone(),
97 param_docs: comments.params.clone(),
98 return_doc: comments.return_doc.clone(),
99 throws_docs: comments.throws_docs.clone(),
100 magic_methods: Vec::new(),
101 magic_properties: Vec::new(),
102 inline_rule_comments: comments.inline_comments.clone(),
103 comment_keywords: keywordize(
104 comments
105 .summary
106 .iter()
107 .chain(comments.inline_comments.iter())
108 .map(String::as_str)
109 .collect::<Vec<_>>()
110 .join(" ")
111 .as_str(),
112 ),
113 symbol_tokens: keywordize(&declaration.name),
114 framework_tags: vec![framework.as_str().to_string()],
115 risk_tags: infer_risk_tags(&path_str, comments.summary.as_deref()),
116 route_ids: Vec::new(),
117 related_symbols: Vec::new(),
118 related_tests: Vec::new(),
119 related_tests_count: 0,
120 validation_commands: Vec::new(),
121 missing_test_warning: None,
122 package_name: package.name.clone(),
123 package_type: package.package_type.clone(),
124 package_version: package.version.clone(),
125 package_keywords: package.keywords.clone(),
126 is_vendor: !package.is_root,
127 is_project_code: package.is_root,
128 is_test,
129 autoloadable: true,
130 extraction_confidence: declaration.extraction_confidence.clone(),
131 references_count: declaration.references_count,
132 path: path_str.clone(),
133 absolute_path: abs_str.clone(),
134 line_start: declaration.line_start,
135 line_end: declaration.line_end,
136 });
137 }
138 }
139
140 Ok(symbols)
141}
142
143pub fn fallback_candidates(contents: &str) -> Vec<DeclarationCandidate> {
144 fallback::extract_candidates(contents)
145}
146
147fn build_fqn(declaration: &DeclarationCandidate) -> String {
148 match (&declaration.namespace, &declaration.owner_class) {
149 (Some(namespace), Some(owner)) if declaration.kind == "method" => {
150 format!("{namespace}\\{owner}::{}", declaration.name)
151 }
152 (Some(namespace), _) => format!("{namespace}\\{}", declaration.name),
153 (None, Some(owner)) if declaration.kind == "method" => {
154 format!("{owner}::{}", declaration.name)
155 }
156 _ => declaration.name.clone(),
157 }
158}
159
160fn collect_comments(contents: &str, sanitizer: &Sanitizer) -> HashMap<usize, ParsedComments> {
161 let mut map = HashMap::new();
162 let lines: Vec<_> = contents.lines().collect();
163 let decl_re = Regex::new(r"^\s*(?:final\s+|abstract\s+)?(?:class|interface|trait|enum|function|public\s+function|protected\s+function|private\s+function)").unwrap();
164 let param_re = Regex::new(r"@param\s+(.+)").unwrap();
165 let return_re = Regex::new(r"@return\s+(.+)").unwrap();
166 let throws_re = Regex::new(r"@throws\s+(.+)").unwrap();
167
168 for (idx, line) in lines.iter().enumerate() {
169 if !decl_re.is_match(line) {
170 continue;
171 }
172 let mut cursor = idx as isize - 1;
173 let mut doc_lines = Vec::new();
174 let mut inline_comments = Vec::new();
175 while cursor >= 0 {
176 let candidate = lines[cursor as usize].trim();
177 if candidate.starts_with("//") || candidate.starts_with('#') {
178 if let Some(value) =
179 sanitizer.sanitize_text(candidate.trim_start_matches(&['/', '#'][..]).trim())
180 {
181 inline_comments.push(value);
182 }
183 cursor -= 1;
184 continue;
185 }
186 if candidate.ends_with("*/")
187 || candidate.starts_with('*')
188 || candidate.starts_with("/**")
189 {
190 doc_lines.push(candidate.to_string());
191 cursor -= 1;
192 continue;
193 }
194 break;
195 }
196 doc_lines.reverse();
197 inline_comments.reverse();
198
199 let mut parsed = ParsedComments::default();
200 let mut description_lines = Vec::new();
201 for raw in doc_lines {
202 let cleaned = raw
203 .trim_start_matches("/**")
204 .trim_start_matches("/*")
205 .trim_start_matches('*')
206 .trim_end_matches("*/")
207 .trim();
208 if cleaned.is_empty() {
209 continue;
210 }
211 if let Some(param) = param_re
212 .captures(cleaned)
213 .and_then(|caps| caps.get(1).map(|item| item.as_str()))
214 .and_then(|value| sanitizer.sanitize_text(value))
215 {
216 parsed.params.push(param);
217 continue;
218 }
219 if let Some(return_doc) = return_re
220 .captures(cleaned)
221 .and_then(|caps| caps.get(1).map(|item| item.as_str()))
222 .and_then(|value| sanitizer.sanitize_text(value))
223 {
224 parsed.return_doc = Some(return_doc);
225 continue;
226 }
227 if let Some(throws_doc) = throws_re
228 .captures(cleaned)
229 .and_then(|caps| caps.get(1).map(|item| item.as_str()))
230 .and_then(|value| sanitizer.sanitize_text(value))
231 {
232 parsed.throws_docs.push(throws_doc);
233 continue;
234 }
235 if parsed.summary.is_none() {
236 parsed.summary = sanitizer.sanitize_text(cleaned);
237 } else if let Some(line) = sanitizer.sanitize_text(cleaned) {
238 description_lines.push(line);
239 }
240 }
241 parsed.description = if description_lines.is_empty() {
242 None
243 } else {
244 Some(description_lines.join(" "))
245 };
246 parsed.inline_comments = inline_comments;
247
248 map.insert(idx + 1, parsed);
249 }
250
251 map
252}
253
254fn keywordize(text: &str) -> Vec<String> {
255 text.split(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '\\')
256 .filter(|token| token.len() > 2)
257 .map(|token| token.to_ascii_lowercase())
258 .collect()
259}
260
261fn infer_risk_tags(path: &str, summary: Option<&str>) -> Vec<String> {
262 let mut tags = Vec::new();
263 let text = format!("{path} {}", summary.unwrap_or_default()).to_ascii_lowercase();
264 for (needle, tag) in [
265 ("policy", "risk:access-control"),
266 ("auth", "risk:access-control"),
267 ("consent", "risk:patient-consent"),
268 ("audit", "risk:audit-trail"),
269 ("patient", "risk:patient-data"),
270 ] {
271 if text.contains(needle) {
272 tags.push(tag.to_string());
273 }
274 }
275 tags.sort();
276 tags.dedup();
277 tags
278}
279
280#[cfg(test)]
281mod tests {
282 use std::fs;
283
284 use tempfile::tempdir;
285
286 use crate::Framework;
287 use crate::composer::export_packages;
288 use crate::config::IndexerConfig;
289 use crate::sanitizer::Sanitizer;
290 use crate::scanner::scan_repo;
291
292 use super::extract_symbols;
293
294 #[test]
295 fn extracts_symbols_with_docblocks() {
296 let dir = tempdir().unwrap();
297 fs::create_dir_all(dir.path().join("app")).unwrap();
298 fs::write(dir.path().join("composer.json"), r#"{"name":"acme/app"}"#).unwrap();
299 fs::write(
300 dir.path().join("app/ConsentService.php"),
301 r#"<?php
302namespace App\Services;
303
304class ConsentService {
305 /**
306 * Sign consent.
307 * @param string $patientId patient id
308 * @return bool
309 */
310 public function sign(string $patientId): bool
311 {
312 return true;
313 }
314}
315"#,
316 )
317 .unwrap();
318
319 let files = scan_repo(dir.path(), &IndexerConfig::default().paths).unwrap();
320 let packages = export_packages(dir.path()).unwrap();
321 let symbols = extract_symbols(
322 dir.path(),
323 "acme/app",
324 Framework::Laravel,
325 &files,
326 &packages,
327 &Sanitizer::default(),
328 )
329 .unwrap();
330
331 assert!(
332 symbols
333 .iter()
334 .any(|symbol| symbol.fqn == "App\\Services\\ConsentService")
335 );
336 assert!(
337 symbols
338 .iter()
339 .any(|symbol| symbol.fqn == "App\\Services\\ConsentService::sign"
340 && symbol.doc_summary.as_deref() == Some("Sign consent."))
341 );
342 }
343}