Skip to main content

php_lsp/
semantic_diagnostics.rs

1/// Semantic diagnostics bridge.
2///
3/// Delegates all analysis to the `mir-analyzer` crate and converts its `Issue`
4/// type into the `tower-lsp` `Diagnostic` type expected by the LSP backend.
5use php_ast::StmtKind;
6use tower_lsp::lsp_types::{Diagnostic, DiagnosticSeverity, NumberOrString, Position, Range, Url};
7
8use crate::ast::{ParsedDoc, SourceView};
9use crate::backend::DiagnosticsConfig;
10
11/// Run semantic checks on `doc` using the backend's persistent codebase.
12/// The codebase is updated incrementally: the current file's definitions are
13/// evicted and re-collected, then `finalize()` rebuilds inheritance tables.
14///
15/// `php_version` is a version string like `"8.1"` sourced from `LspConfig`.
16/// Parsed to `mir_analyzer::PhpVersion` and forwarded to `StatementsAnalyzer`.
17///
18/// Legacy mutating path — runs `remove_file_definitions` + collect + finalize
19/// on the codebase. Kept for benchmarks (`benches/semantic.rs`) and as the
20/// reference implementation while Phase D wraps Pass-2 in salsa. Not used by
21/// the LSP handlers anymore (they use `semantic_diagnostics_no_rebuild`
22/// against the salsa-built codebase).
23pub fn semantic_diagnostics(
24    uri: &Url,
25    doc: &ParsedDoc,
26    codebase: &mir_codebase::Codebase,
27    cfg: &DiagnosticsConfig,
28    php_version: Option<&str>,
29) -> Vec<Diagnostic> {
30    if !cfg.enabled {
31        return vec![];
32    }
33
34    let file: std::sync::Arc<str> = std::sync::Arc::from(uri.as_str());
35
36    // Incremental update: evict stale definitions for this file, re-collect,
37    // and rebuild inheritance tables.
38    codebase.remove_file_definitions(&file);
39    let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
40    let collector_issues = {
41        let _span = tracing::debug_span!("collect_definitions", file = %uri).entered();
42        let collector = mir_analyzer::collector::DefinitionCollector::new(
43            codebase,
44            file.clone(),
45            doc.source(),
46            &source_map,
47        );
48        collector.collect(doc.program())
49    };
50    {
51        let _span = tracing::debug_span!("codebase_finalize", file = %uri).entered();
52        codebase.finalize();
53    }
54
55    // Pass 2: analyse function/method bodies in the current document.
56    let ver = php_version
57        .and_then(|s| s.parse::<mir_analyzer::PhpVersion>().ok())
58        .unwrap_or(mir_analyzer::PhpVersion::LATEST);
59    let mut issue_buffer = mir_issues::IssueBuffer::new();
60    let mut symbols = Vec::new();
61    let mut analyzer = mir_analyzer::stmt::StatementsAnalyzer::new(
62        codebase,
63        file.clone(),
64        doc.source(),
65        &source_map,
66        &mut issue_buffer,
67        &mut symbols,
68        ver,
69        false,
70    );
71    let mut ctx = mir_analyzer::context::Context::new();
72    {
73        let _span = tracing::debug_span!("analyze_stmts", file = %uri).entered();
74        analyzer.analyze_stmts(&doc.program().stmts, &mut ctx);
75    }
76
77    collector_issues
78        .into_iter()
79        .chain(issue_buffer.into_issues())
80        .filter(|i| !i.suppressed)
81        .filter(|i| issue_passes_filter(i, cfg))
82        .map(|i| to_lsp_diagnostic(i, uri))
83        .collect()
84}
85
86/// Run semantic body analysis on `doc` assuming the codebase is already
87/// finalized (all definitions collected, `finalize()` already called).
88///
89/// Unlike [`semantic_diagnostics`], this function does **not** mutate the
90/// codebase — it skips the `remove_file_definitions` / re-collect / `finalize`
91/// cycle. Intended for workspace diagnostic batch passes where the codebase is
92/// built once upfront and `finalize()` is called a single time before the loop.
93///
94/// Phase I: LSP handlers now read issues through the salsa `semantic_issues`
95/// query + `issues_to_diagnostics`. This function is retained for
96/// `benches/semantic.rs` as a single-call reference implementation.
97pub fn semantic_diagnostics_no_rebuild(
98    uri: &Url,
99    doc: &ParsedDoc,
100    codebase: &mir_codebase::Codebase,
101    cfg: &DiagnosticsConfig,
102    php_version: Option<&str>,
103) -> Vec<Diagnostic> {
104    if !cfg.enabled {
105        return vec![];
106    }
107
108    let file: std::sync::Arc<str> = std::sync::Arc::from(uri.as_str());
109    let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
110
111    // Pass 2 only: analyse function/method bodies.
112    // The codebase is already finalized — skip remove/re-collect/finalize so
113    // that inheritance tables are not torn down and rebuilt for every file.
114    let ver = php_version
115        .and_then(|s| s.parse::<mir_analyzer::PhpVersion>().ok())
116        .unwrap_or(mir_analyzer::PhpVersion::LATEST);
117    let mut issue_buffer = mir_issues::IssueBuffer::new();
118    let mut symbols = Vec::new();
119    let mut analyzer = mir_analyzer::stmt::StatementsAnalyzer::new(
120        codebase,
121        file,
122        doc.source(),
123        &source_map,
124        &mut issue_buffer,
125        &mut symbols,
126        ver,
127        false,
128    );
129    let mut ctx = mir_analyzer::context::Context::new();
130    analyzer.analyze_stmts(&doc.program().stmts, &mut ctx);
131
132    issue_buffer
133        .into_issues()
134        .into_iter()
135        .filter(|i| !i.suppressed)
136        .filter(|i| issue_passes_filter(i, cfg))
137        .map(|i| to_lsp_diagnostic(i, uri))
138        .collect()
139}
140
141/// Convert pre-computed raw issues (from `db::semantic::semantic_issues`) into
142/// LSP diagnostics, applying the user's `DiagnosticsConfig` filter. Keeping
143/// filter + conversion outside the salsa query preserves memoization across
144/// config toggles (the user flipping a category must not rerun the analyzer).
145pub fn issues_to_diagnostics(
146    issues: &[mir_issues::Issue],
147    uri: &Url,
148    cfg: &DiagnosticsConfig,
149) -> Vec<Diagnostic> {
150    if !cfg.enabled {
151        return vec![];
152    }
153    issues
154        .iter()
155        .filter(|i| issue_passes_filter(i, cfg))
156        .cloned()
157        .map(|i| to_lsp_diagnostic(i, uri))
158        .collect()
159}
160
161/// Returns `true` if the mir-analyzer issue is allowed through by the config.
162fn issue_passes_filter(issue: &mir_issues::Issue, cfg: &DiagnosticsConfig) -> bool {
163    use mir_issues::IssueKind;
164    match &issue.kind {
165        IssueKind::UndefinedVariable { .. } | IssueKind::PossiblyUndefinedVariable { .. } => {
166            cfg.undefined_variables
167        }
168        IssueKind::UndefinedFunction { .. } | IssueKind::UndefinedMethod { .. } => {
169            cfg.undefined_functions
170        }
171        IssueKind::UndefinedClass { .. } => cfg.undefined_classes,
172        // InvalidArgument covers both arity errors and type mismatches in mir-analyzer;
173        // show it if either toggle is on.
174        IssueKind::InvalidArgument { .. } => cfg.arity_errors || cfg.type_errors,
175        IssueKind::InvalidReturnType { .. }
176        | IssueKind::NullMethodCall { .. }
177        | IssueKind::NullPropertyFetch { .. }
178        | IssueKind::NullableReturnStatement { .. }
179        | IssueKind::InvalidPropertyAssignment { .. }
180        | IssueKind::InvalidOperand { .. } => cfg.type_errors,
181        IssueKind::DeprecatedCall { .. }
182        | IssueKind::DeprecatedMethodCall { .. }
183        | IssueKind::DeprecatedMethod { .. }
184        | IssueKind::DeprecatedClass { .. } => cfg.deprecated_calls,
185        _ => true,
186    }
187}
188
189/// Check for duplicate class/function/interface/trait/enum declarations.
190pub fn duplicate_declaration_diagnostics(
191    _source: &str,
192    doc: &ParsedDoc,
193    cfg: &DiagnosticsConfig,
194) -> Vec<Diagnostic> {
195    if !cfg.enabled || !cfg.duplicate_declarations {
196        return vec![];
197    }
198    let sv = doc.view();
199    let mut seen: std::collections::HashMap<String, ()> = std::collections::HashMap::new();
200    let mut diags = Vec::new();
201    collect_duplicate_decls(sv, &doc.program().stmts, "", &mut seen, &mut diags);
202    diags
203}
204
205fn collect_duplicate_decls(
206    sv: SourceView<'_>,
207    stmts: &[php_ast::Stmt<'_, '_>],
208    current_ns: &str,
209    seen: &mut std::collections::HashMap<String, ()>,
210    diags: &mut Vec<Diagnostic>,
211) {
212    // Track the active namespace for unbraced `namespace Foo;` declarations.
213    let mut active_ns = current_ns.to_string();
214
215    for stmt in stmts {
216        let name_and_span: Option<(&str, u32)> = match &stmt.kind {
217            StmtKind::Class(c) => c.name.map(|n| (n, stmt.span.start)),
218            StmtKind::Interface(i) => Some((i.name, stmt.span.start)),
219            StmtKind::Trait(t) => Some((t.name, stmt.span.start)),
220            StmtKind::Enum(e) => Some((e.name, stmt.span.start)),
221            StmtKind::Function(f) => Some((f.name, stmt.span.start)),
222            StmtKind::Namespace(ns) => {
223                let ns_name = ns
224                    .name
225                    .as_ref()
226                    .map(|n| n.to_string_repr().to_string())
227                    .unwrap_or_default();
228                match &ns.body {
229                    php_ast::NamespaceBody::Braced(inner) => {
230                        let child_ns = if current_ns.is_empty() {
231                            ns_name
232                        } else {
233                            format!("{}\\{}", current_ns, ns_name)
234                        };
235                        collect_duplicate_decls(sv, inner, &child_ns, seen, diags);
236                    }
237                    php_ast::NamespaceBody::Simple => {
238                        // Unbraced namespace: subsequent siblings belong to this namespace.
239                        active_ns = if current_ns.is_empty() {
240                            ns_name
241                        } else {
242                            format!("{}\\{}", current_ns, ns_name)
243                        };
244                    }
245                }
246                None
247            }
248            _ => None,
249        };
250        if let Some((name, span_start)) = name_and_span {
251            let key = if active_ns.is_empty() {
252                name.to_string()
253            } else {
254                format!("{}\\{}", active_ns, name)
255            };
256            if seen.insert(key, ()).is_some() {
257                // Find the byte offset of the actual name by searching forward from span_start.
258                // The span_start points to keywords like "class", "function", etc.,
259                // so we need to find where the identifier name appears.
260                let name_byte_offset = find_name_offset(&sv.source()[span_start as usize..], name)
261                    .map(|off| span_start + off as u32)
262                    .unwrap_or(span_start);
263
264                let start_pos = sv.position_of(name_byte_offset);
265                // Calculate end position by converting UTF-8 character length to UTF-16 code units
266                let name_utf16_len = name.chars().map(|c| c.len_utf16() as u32).sum::<u32>();
267                let end_pos = Position {
268                    line: start_pos.line,
269                    character: start_pos.character + name_utf16_len,
270                };
271                diags.push(Diagnostic {
272                    range: Range {
273                        start: start_pos,
274                        end: end_pos,
275                    },
276                    severity: Some(DiagnosticSeverity::WARNING),
277                    message: format!(
278                        "Duplicate declaration: `{name}` is already defined in this file"
279                    ),
280                    source: Some("php-lsp".to_string()),
281                    ..Default::default()
282                });
283            }
284        }
285    }
286}
287
288/// Find the byte offset of an identifier name within a sv.source() slice.
289/// Searches for word boundary matches (not substring matches).
290fn find_name_offset(source: &str, name: &str) -> Option<usize> {
291    let bytes = source.as_bytes();
292    for i in 0..source.len() {
293        if source[i..].starts_with(name) {
294            // Check word boundary before
295            let before_ok = i == 0 || !is_identifier_char(bytes[i - 1] as char);
296            // Check word boundary after
297            let after_idx = i + name.len();
298            let after_ok =
299                after_idx >= source.len() || !is_identifier_char(bytes[after_idx] as char);
300            if before_ok && after_ok {
301                return Some(i);
302            }
303        }
304    }
305    None
306}
307
308/// Check if a character is valid in a PHP identifier.
309fn is_identifier_char(c: char) -> bool {
310    c.is_alphanumeric() || c == '_'
311}
312
313fn to_lsp_diagnostic(issue: mir_issues::Issue, _uri: &Url) -> Diagnostic {
314    // mir-analyzer uses 1-based line numbers; LSP uses 0-based.
315    let line = issue.location.line.saturating_sub(1);
316    let col_start = issue.location.col_start as u32;
317    let col_end = issue.location.col_end as u32;
318    Diagnostic {
319        range: Range {
320            start: Position {
321                line,
322                character: col_start,
323            },
324            end: Position {
325                line,
326                character: col_end.max(col_start + 1),
327            },
328        },
329        severity: Some(match issue.severity {
330            mir_issues::Severity::Error => DiagnosticSeverity::ERROR,
331            mir_issues::Severity::Warning => DiagnosticSeverity::WARNING,
332            mir_issues::Severity::Info => DiagnosticSeverity::INFORMATION,
333        }),
334        code: Some(NumberOrString::String(issue.kind.name().to_string())),
335        source: Some("php-lsp".to_string()),
336        message: issue.kind.message(),
337        ..Default::default()
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344
345    #[test]
346    fn duplicate_class_emits_warning() {
347        let src = "<?php\nclass Foo {}\nclass Foo {}";
348        let doc = ParsedDoc::parse(src.to_string());
349        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
350        assert_eq!(
351            diags.len(),
352            1,
353            "expected exactly 1 duplicate warning, got: {:?}",
354            diags
355        );
356        assert_eq!(diags[0].severity, Some(DiagnosticSeverity::WARNING));
357        assert!(
358            diags[0].message.contains("Foo"),
359            "message should mention 'Foo'"
360        );
361    }
362
363    #[test]
364    fn no_duplicate_for_unique_declarations() {
365        let src = "<?php\nclass Foo {}\nclass Bar {}";
366        let doc = ParsedDoc::parse(src.to_string());
367        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
368        assert!(diags.is_empty());
369    }
370
371    #[test]
372    fn namespace_scoped_duplicate_not_flagged() {
373        // Two classes named `Foo` in different namespaces — should produce zero diagnostics.
374        let src = "<?php\nnamespace App\\A {\nclass Foo {}\n}\nnamespace App\\B {\nclass Foo {}\n}";
375        let doc = ParsedDoc::parse(src.to_string());
376        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
377        assert!(
378            diags.is_empty(),
379            "classes with same name in different namespaces should not be flagged, got: {:?}",
380            diags
381        );
382    }
383
384    #[test]
385    fn duplicate_interface_declaration() {
386        // Same interface defined twice in same file — should produce exactly one error.
387        let src = "<?php\ninterface Logger {}\ninterface Logger {}";
388        let doc = ParsedDoc::parse(src.to_string());
389        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
390        assert_eq!(
391            diags.len(),
392            1,
393            "expected exactly 1 duplicate-declaration diagnostic, got: {:?}",
394            diags
395        );
396        assert!(
397            diags[0].message.contains("Logger"),
398            "diagnostic message should mention 'Logger'"
399        );
400        assert_eq!(
401            diags[0].severity,
402            Some(DiagnosticSeverity::WARNING),
403            "duplicate declaration should be a warning"
404        );
405    }
406
407    #[test]
408    fn duplicate_trait_declaration() {
409        // Same trait defined twice in same file — should produce exactly one error.
410        let src = "<?php\ntrait Serializable {}\ntrait Serializable {}";
411        let doc = ParsedDoc::parse(src.to_string());
412        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
413        assert_eq!(
414            diags.len(),
415            1,
416            "expected exactly 1 duplicate-declaration diagnostic, got: {:?}",
417            diags
418        );
419        assert!(
420            diags[0].message.contains("Serializable"),
421            "diagnostic message should mention 'Serializable'"
422        );
423        assert_eq!(
424            diags[0].severity,
425            Some(DiagnosticSeverity::WARNING),
426            "duplicate trait declaration should be a warning"
427        );
428    }
429
430    #[test]
431    fn duplicate_diagnostic_has_warning_severity() {
432        // Duplicate declarations are reported as WARNING by our implementation.
433        // (Note: `duplicate_declaration_diagnostics` emits DiagnosticSeverity::WARNING.)
434        let src = "<?php\nfunction doWork() {}\nfunction doWork() {}";
435        let doc = ParsedDoc::parse(src.to_string());
436        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
437        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
438        assert_eq!(
439            diags[0].severity,
440            Some(DiagnosticSeverity::WARNING),
441            "duplicate declaration diagnostic should have WARNING severity"
442        );
443    }
444
445    #[test]
446    fn unbraced_namespace_classes_with_same_name_not_flagged() {
447        // Two classes named `Foo` in different unbraced namespaces — should not be a duplicate.
448        let src = "<?php\nnamespace App\\A;\nclass Foo {}\nnamespace App\\B;\nclass Foo {}";
449        let doc = ParsedDoc::parse(src.to_string());
450        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
451        assert!(
452            diags.is_empty(),
453            "classes with same name in different unbraced namespaces should not be flagged, got: {:?}",
454            diags
455        );
456    }
457
458    #[test]
459    fn unbraced_namespace_duplicate_in_same_namespace_is_flagged() {
460        // Two classes named `Foo` in the same unbraced namespace — should produce one warning.
461        let src = "<?php\nnamespace App;\nclass Foo {}\nclass Foo {}";
462        let doc = ParsedDoc::parse(src.to_string());
463        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
464        assert_eq!(
465            diags.len(),
466            1,
467            "expected 1 duplicate-declaration diagnostic, got: {:?}",
468            diags
469        );
470        assert!(diags[0].message.contains("Foo"));
471    }
472
473    #[test]
474    fn duplicate_declaration_range_spans_full_name() {
475        // Duplicate declaration diagnostic range should span the entire name, not just first character.
476        let src = "<?php\nclass Foo {}\nclass Foo {}";
477        let doc = ParsedDoc::parse(src.to_string());
478        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
479        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
480
481        let d = &diags[0];
482        let range_len = d.range.end.character - d.range.start.character;
483        let expected_len = "Foo".chars().map(|c| c.len_utf16() as u32).sum::<u32>();
484        assert_eq!(
485            range_len, expected_len,
486            "range length {} should match 'Foo' length {}",
487            range_len, expected_len
488        );
489
490        // Verify the range actually points to "Foo", not "class"
491        // "Foo" appears at character position 6 on line 2: "class Foo {}"
492        //                                          012345678...
493        assert_eq!(
494            d.range.start.character, 6,
495            "range should start at 'F' in 'Foo'"
496        );
497        assert_eq!(
498            d.range.end.character, 9,
499            "range should end after 'o' in 'Foo'"
500        );
501    }
502
503    #[test]
504    fn duplicate_function_declaration_range_spans_name() {
505        // Function duplicate should also span the full function name.
506        let src = "<?php\nfunction doWork() {}\nfunction doWork() {}";
507        let doc = ParsedDoc::parse(src.to_string());
508        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
509        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
510
511        let d = &diags[0];
512        let range_len = d.range.end.character - d.range.start.character;
513        let expected_len = "doWork".chars().map(|c| c.len_utf16() as u32).sum::<u32>();
514        assert_eq!(
515            range_len, expected_len,
516            "range length {} should match 'doWork' length {}",
517            range_len, expected_len
518        );
519
520        // Verify the range points to "doWork", not "function"
521        // "doWork" appears at character position 9 on line 2: "function doWork() {}"
522        //                                              0123456789...
523        assert_eq!(
524            d.range.start.character, 9,
525            "range should start at 'd' in 'doWork'"
526        );
527        assert_eq!(
528            d.range.end.character, 15,
529            "range should end after 'k' in 'doWork'"
530        );
531    }
532
533    #[test]
534    fn duplicate_interface_range_spans_name() {
535        // Interface duplicate should span the full interface name.
536        let src = "<?php\ninterface Logger {}\ninterface Logger {}";
537        let doc = ParsedDoc::parse(src.to_string());
538        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
539        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
540
541        let d = &diags[0];
542        let range_len = d.range.end.character - d.range.start.character;
543        let expected_len = "Logger".chars().map(|c| c.len_utf16() as u32).sum::<u32>();
544        assert_eq!(
545            range_len, expected_len,
546            "range length {} should match 'Logger' length {}",
547            range_len, expected_len
548        );
549
550        // Verify the range points to "Logger", not "interface"
551        // "Logger" appears at character position 10 on line 2: "interface Logger {}"
552        //                                               01234567890...
553        assert_eq!(
554            d.range.start.character, 10,
555            "range should start at 'L' in 'Logger'"
556        );
557        assert_eq!(
558            d.range.end.character, 16,
559            "range should end after 'r' in 'Logger'"
560        );
561    }
562
563    #[test]
564    fn duplicate_declaration_range_on_correct_line() {
565        // Diagnostic range should be on the correct line.
566        let src = "<?php\nclass Foo {}\n\nclass Foo {}";
567        let doc = ParsedDoc::parse(src.to_string());
568        let diags = duplicate_declaration_diagnostics(src, &doc, &DiagnosticsConfig::all_enabled());
569        assert_eq!(diags.len(), 1, "expected exactly 1 duplicate diagnostic");
570
571        let d = &diags[0];
572        // The second "class Foo" is on line 3 (0-indexed: line 3)
573        assert_eq!(
574            d.range.start.line, 3,
575            "duplicate should be reported on line 3 (0-indexed)"
576        );
577        assert_eq!(
578            d.range.end.line, 3,
579            "range end should be on same line as start"
580        );
581    }
582
583    #[test]
584    fn to_lsp_diagnostic_sets_code_to_issue_kind_name() {
585        use mir_issues::{Issue, IssueKind, Location};
586        use std::sync::Arc;
587        use tower_lsp::lsp_types::{NumberOrString, Url};
588
589        let uri = Url::parse("file:///test.php").unwrap();
590        let location = Location {
591            file: Arc::from("file:///test.php"),
592            line: 1,
593            line_end: 1,
594            col_start: 0,
595            col_end: 3,
596        };
597        let issue = Issue::new(
598            IssueKind::UndefinedClass {
599                name: "Foo".to_string(),
600            },
601            location,
602        );
603        let diag = to_lsp_diagnostic(issue, &uri);
604        assert_eq!(
605            diag.code,
606            Some(NumberOrString::String("UndefinedClass".to_string())),
607            "diagnostic code must be the IssueKind name so code actions can match by type"
608        );
609        assert!(
610            diag.message.contains("Foo"),
611            "diagnostic message should mention the class name"
612        );
613    }
614}