Skip to main content

php_lsp/
semantic_diagnostics.rs

1/// Semantic diagnostics bridge.
2///
3/// Delegates all analysis to the `mir-analyzer` crate and converts its `Issue`
4/// type into the `tower-lsp` `Diagnostic` type expected by the LSP backend.
5use php_ast::StmtKind;
6use tower_lsp::lsp_types::{Diagnostic, DiagnosticSeverity, NumberOrString, Position, Range, Url};
7
8use crate::ast::{ParsedDoc, SourceView};
9use crate::backend::DiagnosticsConfig;
10
11/// Run semantic checks on `doc` using the backend's persistent codebase.
12/// The codebase is updated incrementally: the current file's definitions are
13/// evicted and re-collected, then `finalize()` rebuilds inheritance tables.
14///
15/// `php_version` is a version string like `"8.1"` sourced from `LspConfig`.
16/// Parsed to `mir_analyzer::PhpVersion` and forwarded to `StatementsAnalyzer`.
17///
18/// Legacy mutating path — runs `remove_file_definitions` + collect + finalize
19/// on the codebase. Kept for benchmarks (`benches/semantic.rs`) and as the
20/// reference implementation while Phase D wraps Pass-2 in salsa. Not used by
21/// the LSP handlers anymore (they use `semantic_diagnostics_no_rebuild`
22/// against the salsa-built codebase).
23pub fn semantic_diagnostics(
24    uri: &Url,
25    doc: &ParsedDoc,
26    codebase: &mir_codebase::Codebase,
27    cfg: &DiagnosticsConfig,
28    php_version: Option<&str>,
29) -> Vec<Diagnostic> {
30    if !cfg.enabled {
31        return vec![];
32    }
33
34    let file: std::sync::Arc<str> = std::sync::Arc::from(uri.as_str());
35
36    // Incremental update: evict stale definitions for this file, re-collect,
37    // and rebuild inheritance tables.
38    codebase.remove_file_definitions(&file);
39    let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
40    let collector_issues = {
41        let _span = tracing::debug_span!("collect_definitions", file = %uri).entered();
42        let collector = mir_analyzer::collector::DefinitionCollector::new(
43            codebase,
44            file.clone(),
45            doc.source(),
46            &source_map,
47        );
48        collector.collect(doc.program())
49    };
50    {
51        let _span = tracing::debug_span!("codebase_finalize", file = %uri).entered();
52        codebase.finalize();
53    }
54
55    // Pass 2: analyse function/method bodies in the current document.
56    let ver = php_version
57        .and_then(|s| s.parse::<mir_analyzer::PhpVersion>().ok())
58        .unwrap_or(mir_analyzer::PhpVersion::LATEST);
59    let mut issue_buffer = mir_issues::IssueBuffer::new();
60    let mut symbols = Vec::new();
61    let mut analyzer = mir_analyzer::stmt::StatementsAnalyzer::new(
62        codebase,
63        file.clone(),
64        doc.source(),
65        &source_map,
66        &mut issue_buffer,
67        &mut symbols,
68        ver,
69    );
70    let mut ctx = mir_analyzer::context::Context::new();
71    {
72        let _span = tracing::debug_span!("analyze_stmts", file = %uri).entered();
73        analyzer.analyze_stmts(&doc.program().stmts, &mut ctx);
74    }
75
76    collector_issues
77        .into_iter()
78        .chain(issue_buffer.into_issues())
79        .filter(|i| !i.suppressed)
80        .filter(|i| issue_passes_filter(i, cfg))
81        .map(|i| to_lsp_diagnostic(i, uri))
82        .collect()
83}
84
85/// Run semantic body analysis on `doc` assuming the codebase is already
86/// finalized (all definitions collected, `finalize()` already called).
87///
88/// Unlike [`semantic_diagnostics`], this function does **not** mutate the
89/// codebase — it skips the `remove_file_definitions` / re-collect / `finalize`
90/// cycle. Intended for workspace diagnostic batch passes where the codebase is
91/// built once upfront and `finalize()` is called a single time before the loop.
92///
93/// Phase I: LSP handlers now read issues through the salsa `semantic_issues`
94/// query + `issues_to_diagnostics`. This function is retained for
95/// `benches/semantic.rs` as a single-call reference implementation.
96pub fn semantic_diagnostics_no_rebuild(
97    uri: &Url,
98    doc: &ParsedDoc,
99    codebase: &mir_codebase::Codebase,
100    cfg: &DiagnosticsConfig,
101    php_version: Option<&str>,
102) -> Vec<Diagnostic> {
103    if !cfg.enabled {
104        return vec![];
105    }
106
107    let file: std::sync::Arc<str> = std::sync::Arc::from(uri.as_str());
108    let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
109
110    // Pass 2 only: analyse function/method bodies.
111    // The codebase is already finalized — skip remove/re-collect/finalize so
112    // that inheritance tables are not torn down and rebuilt for every file.
113    let ver = php_version
114        .and_then(|s| s.parse::<mir_analyzer::PhpVersion>().ok())
115        .unwrap_or(mir_analyzer::PhpVersion::LATEST);
116    let mut issue_buffer = mir_issues::IssueBuffer::new();
117    let mut symbols = Vec::new();
118    let mut analyzer = mir_analyzer::stmt::StatementsAnalyzer::new(
119        codebase,
120        file,
121        doc.source(),
122        &source_map,
123        &mut issue_buffer,
124        &mut symbols,
125        ver,
126    );
127    let mut ctx = mir_analyzer::context::Context::new();
128    analyzer.analyze_stmts(&doc.program().stmts, &mut ctx);
129
130    issue_buffer
131        .into_issues()
132        .into_iter()
133        .filter(|i| !i.suppressed)
134        .filter(|i| issue_passes_filter(i, cfg))
135        .map(|i| to_lsp_diagnostic(i, uri))
136        .collect()
137}
138
139/// Convert pre-computed raw issues (from `db::semantic::semantic_issues`) into
140/// LSP diagnostics, applying the user's `DiagnosticsConfig` filter. Keeping
141/// filter + conversion outside the salsa query preserves memoization across
142/// config toggles (the user flipping a category must not rerun the analyzer).
143pub fn issues_to_diagnostics(
144    issues: &[mir_issues::Issue],
145    uri: &Url,
146    cfg: &DiagnosticsConfig,
147) -> Vec<Diagnostic> {
148    if !cfg.enabled {
149        return vec![];
150    }
151    issues
152        .iter()
153        .filter(|i| issue_passes_filter(i, cfg))
154        .cloned()
155        .map(|i| to_lsp_diagnostic(i, uri))
156        .collect()
157}
158
159/// Returns `true` if the mir-analyzer issue is allowed through by the config.
160fn issue_passes_filter(issue: &mir_issues::Issue, cfg: &DiagnosticsConfig) -> bool {
161    use mir_issues::IssueKind;
162    match &issue.kind {
163        IssueKind::UndefinedVariable { .. } | IssueKind::PossiblyUndefinedVariable { .. } => {
164            cfg.undefined_variables
165        }
166        IssueKind::UndefinedFunction { .. } | IssueKind::UndefinedMethod { .. } => {
167            cfg.undefined_functions
168        }
169        IssueKind::UndefinedClass { .. } => cfg.undefined_classes,
170        // InvalidArgument covers both arity errors and type mismatches in mir-analyzer;
171        // show it if either toggle is on.
172        IssueKind::InvalidArgument { .. } => cfg.arity_errors || cfg.type_errors,
173        IssueKind::InvalidReturnType { .. }
174        | IssueKind::NullMethodCall { .. }
175        | IssueKind::NullPropertyFetch { .. }
176        | IssueKind::NullableReturnStatement { .. }
177        | IssueKind::InvalidPropertyAssignment { .. }
178        | IssueKind::InvalidOperand { .. } => cfg.type_errors,
179        IssueKind::DeprecatedCall { .. }
180        | IssueKind::DeprecatedMethodCall { .. }
181        | IssueKind::DeprecatedMethod { .. }
182        | IssueKind::DeprecatedClass { .. } => cfg.deprecated_calls,
183        _ => true,
184    }
185}
186
187/// Check for duplicate class/function/interface/trait/enum declarations.
188pub fn duplicate_declaration_diagnostics(
189    _source: &str,
190    doc: &ParsedDoc,
191    cfg: &DiagnosticsConfig,
192) -> Vec<Diagnostic> {
193    if !cfg.enabled || !cfg.duplicate_declarations {
194        return vec![];
195    }
196    let sv = doc.view();
197    let mut seen: std::collections::HashMap<String, ()> = std::collections::HashMap::new();
198    let mut diags = Vec::new();
199    collect_duplicate_decls(sv, &doc.program().stmts, "", &mut seen, &mut diags);
200    diags
201}
202
203fn collect_duplicate_decls(
204    sv: SourceView<'_>,
205    stmts: &[php_ast::Stmt<'_, '_>],
206    current_ns: &str,
207    seen: &mut std::collections::HashMap<String, ()>,
208    diags: &mut Vec<Diagnostic>,
209) {
210    // Track the active namespace for unbraced `namespace Foo;` declarations.
211    let mut active_ns = current_ns.to_string();
212
213    for stmt in stmts {
214        let name_and_span: Option<(&str, u32)> = match &stmt.kind {
215            StmtKind::Class(c) => c.name.map(|n| (n, stmt.span.start)),
216            StmtKind::Interface(i) => Some((i.name, stmt.span.start)),
217            StmtKind::Trait(t) => Some((t.name, stmt.span.start)),
218            StmtKind::Enum(e) => Some((e.name, stmt.span.start)),
219            StmtKind::Function(f) => Some((f.name, stmt.span.start)),
220            StmtKind::Namespace(ns) => {
221                let ns_name = ns
222                    .name
223                    .as_ref()
224                    .map(|n| n.to_string_repr().to_string())
225                    .unwrap_or_default();
226                match &ns.body {
227                    php_ast::NamespaceBody::Braced(inner) => {
228                        let child_ns = if current_ns.is_empty() {
229                            ns_name
230                        } else {
231                            format!("{}\\{}", current_ns, ns_name)
232                        };
233                        collect_duplicate_decls(sv, inner, &child_ns, seen, diags);
234                    }
235                    php_ast::NamespaceBody::Simple => {
236                        // Unbraced namespace: subsequent siblings belong to this namespace.
237                        active_ns = if current_ns.is_empty() {
238                            ns_name
239                        } else {
240                            format!("{}\\{}", current_ns, ns_name)
241                        };
242                    }
243                }
244                None
245            }
246            _ => None,
247        };
248        if let Some((name, span_start)) = name_and_span {
249            let key = if active_ns.is_empty() {
250                name.to_string()
251            } else {
252                format!("{}\\{}", active_ns, name)
253            };
254            if seen.insert(key, ()).is_some() {
255                // Find the byte offset of the actual name by searching forward from span_start.
256                // The span_start points to keywords like "class", "function", etc.,
257                // so we need to find where the identifier name appears.
258                let name_byte_offset = find_name_offset(&sv.source()[span_start as usize..], name)
259                    .map(|off| span_start + off as u32)
260                    .unwrap_or(span_start);
261
262                let start_pos = sv.position_of(name_byte_offset);
263                // Calculate end position by converting UTF-8 character length to UTF-16 code units
264                let name_utf16_len = name.chars().map(|c| c.len_utf16() as u32).sum::<u32>();
265                let end_pos = Position {
266                    line: start_pos.line,
267                    character: start_pos.character + name_utf16_len,
268                };
269                diags.push(Diagnostic {
270                    range: Range {
271                        start: start_pos,
272                        end: end_pos,
273                    },
274                    severity: Some(DiagnosticSeverity::WARNING),
275                    message: format!(
276                        "Duplicate declaration: `{name}` is already defined in this file"
277                    ),
278                    source: Some("php-lsp".to_string()),
279                    ..Default::default()
280                });
281            }
282        }
283    }
284}
285
286/// Find the byte offset of an identifier name within a sv.source() slice.
287/// Searches for word boundary matches (not substring matches).
288fn find_name_offset(source: &str, name: &str) -> Option<usize> {
289    let bytes = source.as_bytes();
290    for i in 0..source.len() {
291        if source[i..].starts_with(name) {
292            // Check word boundary before
293            let before_ok = i == 0 || !is_identifier_char(bytes[i - 1] as char);
294            // Check word boundary after
295            let after_idx = i + name.len();
296            let after_ok =
297                after_idx >= source.len() || !is_identifier_char(bytes[after_idx] as char);
298            if before_ok && after_ok {
299                return Some(i);
300            }
301        }
302    }
303    None
304}
305
306/// Check if a character is valid in a PHP identifier.
307fn is_identifier_char(c: char) -> bool {
308    c.is_alphanumeric() || c == '_'
309}
310
311fn to_lsp_diagnostic(issue: mir_issues::Issue, _uri: &Url) -> Diagnostic {
312    // mir-analyzer uses 1-based line numbers; LSP uses 0-based.
313    let line = issue.location.line.saturating_sub(1);
314    let col_start = issue.location.col_start as u32;
315    let col_end = issue.location.col_end as u32;
316    Diagnostic {
317        range: Range {
318            start: Position {
319                line,
320                character: col_start,
321            },
322            end: Position {
323                line,
324                character: col_end.max(col_start + 1),
325            },
326        },
327        severity: Some(match issue.severity {
328            mir_issues::Severity::Error => DiagnosticSeverity::ERROR,
329            mir_issues::Severity::Warning => DiagnosticSeverity::WARNING,
330            mir_issues::Severity::Info => DiagnosticSeverity::INFORMATION,
331        }),
332        code: Some(NumberOrString::String(issue.kind.name().to_string())),
333        source: Some("php-lsp".to_string()),
334        message: issue.kind.message(),
335        ..Default::default()
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    #[test]
344    fn duplicate_class_emits_warning() {
345        let src = "<?php\nclass Foo {}\nclass Foo {}";
346        let doc = ParsedDoc::parse(src.to_string());
347        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
348        assert_eq!(
349            diags.len(),
350            1,
351            "expected exactly 1 duplicate warning, got: {:?}",
352            diags
353        );
354        assert_eq!(diags[0].severity, Some(DiagnosticSeverity::WARNING));
355        assert!(
356            diags[0].message.contains("Foo"),
357            "message should mention 'Foo'"
358        );
359    }
360
361    #[test]
362    fn no_duplicate_for_unique_declarations() {
363        let src = "<?php\nclass Foo {}\nclass Bar {}";
364        let doc = ParsedDoc::parse(src.to_string());
365        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
366        assert!(diags.is_empty());
367    }
368
369    #[test]
370    fn namespace_scoped_duplicate_not_flagged() {
371        // Two classes named `Foo` in different namespaces — should produce zero diagnostics.
372        let src = "<?php\nnamespace App\\A {\nclass Foo {}\n}\nnamespace App\\B {\nclass Foo {}\n}";
373        let doc = ParsedDoc::parse(src.to_string());
374        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
375        assert!(
376            diags.is_empty(),
377            "classes with same name in different namespaces should not be flagged, got: {:?}",
378            diags
379        );
380    }
381
382    #[test]
383    fn duplicate_interface_declaration() {
384        // Same interface defined twice in same file — should produce exactly one error.
385        let src = "<?php\ninterface Logger {}\ninterface Logger {}";
386        let doc = ParsedDoc::parse(src.to_string());
387        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
388        assert_eq!(
389            diags.len(),
390            1,
391            "expected exactly 1 duplicate-declaration diagnostic, got: {:?}",
392            diags
393        );
394        assert!(
395            diags[0].message.contains("Logger"),
396            "diagnostic message should mention 'Logger'"
397        );
398        assert_eq!(
399            diags[0].severity,
400            Some(DiagnosticSeverity::WARNING),
401            "duplicate declaration should be a warning"
402        );
403    }
404
405    #[test]
406    fn duplicate_trait_declaration() {
407        // Same trait defined twice in same file — should produce exactly one error.
408        let src = "<?php\ntrait Serializable {}\ntrait Serializable {}";
409        let doc = ParsedDoc::parse(src.to_string());
410        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
411        assert_eq!(
412            diags.len(),
413            1,
414            "expected exactly 1 duplicate-declaration diagnostic, got: {:?}",
415            diags
416        );
417        assert!(
418            diags[0].message.contains("Serializable"),
419            "diagnostic message should mention 'Serializable'"
420        );
421        assert_eq!(
422            diags[0].severity,
423            Some(DiagnosticSeverity::WARNING),
424            "duplicate trait declaration should be a warning"
425        );
426    }
427
428    #[test]
429    fn duplicate_diagnostic_has_warning_severity() {
430        // Duplicate declarations are reported as WARNING by our implementation.
431        // (Note: `duplicate_declaration_diagnostics` emits DiagnosticSeverity::WARNING.)
432        let src = "<?php\nfunction doWork() {}\nfunction doWork() {}";
433        let doc = ParsedDoc::parse(src.to_string());
434        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
435        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
436        assert_eq!(
437            diags[0].severity,
438            Some(DiagnosticSeverity::WARNING),
439            "duplicate declaration diagnostic should have WARNING severity"
440        );
441    }
442
443    #[test]
444    fn unbraced_namespace_classes_with_same_name_not_flagged() {
445        // Two classes named `Foo` in different unbraced namespaces — should not be a duplicate.
446        let src = "<?php\nnamespace App\\A;\nclass Foo {}\nnamespace App\\B;\nclass Foo {}";
447        let doc = ParsedDoc::parse(src.to_string());
448        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
449        assert!(
450            diags.is_empty(),
451            "classes with same name in different unbraced namespaces should not be flagged, got: {:?}",
452            diags
453        );
454    }
455
456    #[test]
457    fn unbraced_namespace_duplicate_in_same_namespace_is_flagged() {
458        // Two classes named `Foo` in the same unbraced namespace — should produce one warning.
459        let src = "<?php\nnamespace App;\nclass Foo {}\nclass Foo {}";
460        let doc = ParsedDoc::parse(src.to_string());
461        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
462        assert_eq!(
463            diags.len(),
464            1,
465            "expected 1 duplicate-declaration diagnostic, got: {:?}",
466            diags
467        );
468        assert!(diags[0].message.contains("Foo"));
469    }
470
471    #[test]
472    fn duplicate_declaration_range_spans_full_name() {
473        // Duplicate declaration diagnostic range should span the entire name, not just first character.
474        let src = "<?php\nclass Foo {}\nclass Foo {}";
475        let doc = ParsedDoc::parse(src.to_string());
476        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
477        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
478
479        let d = &diags[0];
480        let range_len = d.range.end.character - d.range.start.character;
481        let expected_len = "Foo".chars().map(|c| c.len_utf16() as u32).sum::<u32>();
482        assert_eq!(
483            range_len, expected_len,
484            "range length {} should match 'Foo' length {}",
485            range_len, expected_len
486        );
487
488        // Verify the range actually points to "Foo", not "class"
489        // "Foo" appears at character position 6 on line 2: "class Foo {}"
490        //                                          012345678...
491        assert_eq!(
492            d.range.start.character, 6,
493            "range should start at 'F' in 'Foo'"
494        );
495        assert_eq!(
496            d.range.end.character, 9,
497            "range should end after 'o' in 'Foo'"
498        );
499    }
500
501    #[test]
502    fn duplicate_function_declaration_range_spans_name() {
503        // Function duplicate should also span the full function name.
504        let src = "<?php\nfunction doWork() {}\nfunction doWork() {}";
505        let doc = ParsedDoc::parse(src.to_string());
506        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
507        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
508
509        let d = &diags[0];
510        let range_len = d.range.end.character - d.range.start.character;
511        let expected_len = "doWork".chars().map(|c| c.len_utf16() as u32).sum::<u32>();
512        assert_eq!(
513            range_len, expected_len,
514            "range length {} should match 'doWork' length {}",
515            range_len, expected_len
516        );
517
518        // Verify the range points to "doWork", not "function"
519        // "doWork" appears at character position 9 on line 2: "function doWork() {}"
520        //                                              0123456789...
521        assert_eq!(
522            d.range.start.character, 9,
523            "range should start at 'd' in 'doWork'"
524        );
525        assert_eq!(
526            d.range.end.character, 15,
527            "range should end after 'k' in 'doWork'"
528        );
529    }
530
531    #[test]
532    fn duplicate_interface_range_spans_name() {
533        // Interface duplicate should span the full interface name.
534        let src = "<?php\ninterface Logger {}\ninterface Logger {}";
535        let doc = ParsedDoc::parse(src.to_string());
536        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
537        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
538
539        let d = &diags[0];
540        let range_len = d.range.end.character - d.range.start.character;
541        let expected_len = "Logger".chars().map(|c| c.len_utf16() as u32).sum::<u32>();
542        assert_eq!(
543            range_len, expected_len,
544            "range length {} should match 'Logger' length {}",
545            range_len, expected_len
546        );
547
548        // Verify the range points to "Logger", not "interface"
549        // "Logger" appears at character position 10 on line 2: "interface Logger {}"
550        //                                               01234567890...
551        assert_eq!(
552            d.range.start.character, 10,
553            "range should start at 'L' in 'Logger'"
554        );
555        assert_eq!(
556            d.range.end.character, 16,
557            "range should end after 'r' in 'Logger'"
558        );
559    }
560
561    #[test]
562    fn duplicate_declaration_range_on_correct_line() {
563        // Diagnostic range should be on the correct line.
564        let src = "<?php\nclass Foo {}\n\nclass Foo {}";
565        let doc = ParsedDoc::parse(src.to_string());
566        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
567        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
568
569        let d = &diags[0];
570        // The second "class Foo" is on line 3 (0-indexed: line 3)
571        assert_eq!(
572            d.range.start.line, 3,
573            "duplicate should be reported on line 3 (0-indexed)"
574        );
575        assert_eq!(
576            d.range.end.line, 3,
577            "range end should be on same line as start"
578        );
579    }
580
581    #[test]
582    fn to_lsp_diagnostic_sets_code_to_issue_kind_name() {
583        use mir_issues::{Issue, IssueKind, Location};
584        use std::sync::Arc;
585        use tower_lsp::lsp_types::{NumberOrString, Url};
586
587        let uri = Url::parse("file:///test.php").unwrap();
588        let location = Location {
589            file: Arc::from("file:///test.php"),
590            line: 1,
591            col_start: 0,
592            col_end: 3,
593        };
594        let issue = Issue::new(
595            IssueKind::UndefinedClass {
596                name: "Foo".to_string(),
597            },
598            location,
599        );
600        let diag = to_lsp_diagnostic(issue, &uri);
601        assert_eq!(
602            diag.code,
603            Some(NumberOrString::String("UndefinedClass".to_string())),
604            "diagnostic code must be the IssueKind name so code actions can match by type"
605        );
606        assert!(
607            diag.message.contains("Foo"),
608            "diagnostic message should mention the class name"
609        );
610    }
611}