mir-analyzer 0.22.0

Analysis engine for the mir PHP static analyzer
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
use std::fmt::Write as FmtWrite;
use std::fs;
use std::path::{Path, PathBuf};

use rayon::prelude::*;

struct FixtureCategory {
    mod_name: String,
    fixtures: Vec<(PathBuf, String, String)>,
}

fn main() {
    let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
    let out_dir = std::env::var("OUT_DIR").unwrap();

    generate_builtin_fn_names(Path::new(&manifest_dir), Path::new(&out_dir));
    generate_stub_files(Path::new(&manifest_dir), Path::new(&out_dir));

    let fixtures_dir = Path::new(&manifest_dir).join("tests").join("fixtures");

    let out_path = Path::new(&out_dir).join("fixture_tests.rs");

    let mut code = String::from("// Auto-generated by build.rs — do not edit manually\n");

    if !fixtures_dir.exists() {
        fs::write(&out_path, &code).unwrap();
        return;
    }

    // Rerun when the top-level fixtures directory changes (category added/removed).
    println!("cargo:rerun-if-changed={}", fixtures_dir.display());

    let mut categories: Vec<_> = fs::read_dir(&fixtures_dir)
        .unwrap()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_ok_and(|t| t.is_dir()))
        .collect();
    categories.sort_by_key(|e| e.file_name());

    // Collect all fixtures before reading (separate collection from I/O).
    let mut all_fixture_data: Vec<FixtureCategory> = Vec::new();

    for cat_entry in categories {
        let cat_dir_name = cat_entry.file_name().to_string_lossy().into_owned();
        let cat_mod_name = cat_dir_name.replace('-', "_");

        // Rerun when a fixture is added to or removed from this category.
        println!("cargo:rerun-if-changed={}", cat_entry.path().display());

        let mut fixtures: Vec<_> = fs::read_dir(cat_entry.path())
            .unwrap()
            .filter_map(|e| e.ok())
            .filter(|e| e.path().extension().is_some_and(|ext| ext == "phpt"))
            .collect();
        fixtures.sort_by_key(|e| e.file_name());

        if fixtures.is_empty() {
            continue;
        }

        let fixture_data: Vec<_> = fixtures
            .into_iter()
            .map(|f| {
                let path = f.path();
                let file_name = path.file_name().unwrap().to_string_lossy().into_owned();
                let rel = format!("tests/fixtures/{cat_dir_name}/{file_name}");
                (path, file_name, rel)
            })
            .collect();

        all_fixture_data.push(FixtureCategory {
            mod_name: cat_mod_name,
            fixtures: fixture_data,
        });
    }

    // Parallel file reading: flatten all fixtures and read in parallel.
    let all_paths: Vec<_> = all_fixture_data
        .iter()
        .flat_map(|cat| cat.fixtures.iter().map(|(path, _, _)| path.clone()))
        .collect();

    let file_contents: std::collections::HashMap<PathBuf, String> = all_paths
        .par_iter()
        .map(|path| (path.clone(), fs::read_to_string(path).unwrap_or_default()))
        .collect();

    // Emit cargo directives and generate code.
    for category in all_fixture_data {
        code.push_str(&format!("\nmod {} {{\n", category.mod_name));

        for (path, _file_name, rel) in category.fixtures {
            let stem = path
                .file_stem()
                .unwrap()
                .to_string_lossy()
                .replace('-', "_");

            // Rerun when this specific fixture file changes.
            println!("cargo:rerun-if-changed={manifest_dir}/{rel}");

            let content = &file_contents[&path];
            let ignore_attr = if content.contains("===ignore===") {
                "    #[ignore]\n"
            } else {
                ""
            };
            let doc_comment = extract_description(content)
                .map(|d| {
                    d.lines()
                        .map(|l| format!("    /// {}\n", l.trim()))
                        .collect::<String>()
                })
                .unwrap_or_default();

            code.push_str(&format!(
                "{doc_comment}    #[test]\n{ignore_attr}    fn {stem}() {{\n        \
                 mir_analyzer::test_utils::run_fixture(concat!(env!(\"CARGO_MANIFEST_DIR\"), \"/{rel}\"));\n    \
                 }}\n"
            ));
        }

        code.push_str("}\n");
    }

    fs::write(&out_path, code).unwrap();
}

/// Parse `PhpStormStubsMap.php` and generate four sorted static slices for the
/// extensions present in `stubs/`:
/// * `BUILTIN_FN_NAMES` — function names (legacy; used for existence checks)
/// * `STUB_FN_INDEX` — lowercased fn name → stub virtual path
/// * `STUB_CLASS_INDEX` — lowercased FQCN → stub virtual path (PHP class
///   names are case-insensitive so the lookup key is lowercased; backslashes
///   preserved)
/// * `STUB_CONST_INDEX` — case-sensitive constant name → stub virtual path
///
/// All `*_INDEX` entries store the stub virtual path that matches keys of
/// `STUB_FILES` (e.g. `"stubs/standard/standard_0.php"`), so the lazy-stub-
/// loading code in `crate::stubs` can resolve a name → path → embedded
/// content in two `O(log n)` lookups.
fn generate_builtin_fn_names(manifest_dir: &Path, out_dir: &Path) {
    let stubs_root = manifest_dir.join("phpstorm-stubs");
    let map_path = stubs_root.join("PhpStormStubsMap.php");
    let out_path = out_dir.join("phpstorm_builtin_fns.rs");

    if !map_path.exists() {
        fs::write(
            &out_path,
            "pub(crate) static BUILTIN_FN_NAMES: &[&str] = &[];\n\
             pub(crate) static STUB_FN_INDEX: &[(&str, &str)] = &[];\n\
             pub(crate) static STUB_CLASS_INDEX: &[(&str, &str)] = &[];\n\
             pub(crate) static STUB_CONST_INDEX: &[(&str, &str)] = &[];\n",
        )
        .unwrap();
        return;
    }

    println!("cargo:rerun-if-changed={}", map_path.display());

    let content = fs::read_to_string(&map_path).unwrap();

    // Build lowercase set of stub directory names from the stubs/ directory for O(1) lookup.
    // `stubs/` lives inside the crate so it is included in `cargo package` and survives
    // publication to crates.io — see `generate_stub_files` for the regression history.
    let stubs_dir = manifest_dir.join("stubs");
    assert!(
        stubs_dir.is_dir(),
        "mir-analyzer build.rs: stubs/ directory is missing at {} — \
         the stub index would be empty and all built-ins would be reported as undefined. \
         If this fired in cargo package, ensure stubs/ lives inside the crate.",
        stubs_dir.display()
    );
    let stub_dirs_lower: std::collections::HashSet<String> = fs::read_dir(&stubs_dir)
        .unwrap()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_ok_and(|t| t.is_dir()))
        .map(|e| e.file_name().to_string_lossy().to_lowercase())
        .collect();

    // Section state: which array we're currently parsing. Sections appear in
    // file order: CLASSES, FUNCTIONS, CONSTANTS.
    enum Section {
        None,
        Classes,
        Functions,
        Constants,
    }
    let mut section = Section::None;

    let mut fn_names: Vec<String> = Vec::new();
    // Index entries: (lookup_key, virtual_path). Virtual path matches STUB_FILES keys.
    let mut fn_index: Vec<(String, String)> = Vec::new();
    let mut class_index: Vec<(String, String)> = Vec::new();
    let mut const_index: Vec<(String, String)> = Vec::new();

    for line in content.lines() {
        let trimmed = line.trim();

        match trimmed {
            "const CLASSES = array (" => {
                section = Section::Classes;
                continue;
            }
            "const FUNCTIONS = array (" => {
                section = Section::Functions;
                continue;
            }
            "const CONSTANTS = array (" => {
                section = Section::Constants;
                continue;
            }
            ");" => {
                section = Section::None;
                continue;
            }
            _ => {}
        }

        // Each entry: '\\some\\name' => 'ext_dir/file.php',
        let Some(rest) = trimmed.strip_prefix('\'') else {
            continue;
        };
        let Some((raw_name, rest)) = rest.split_once('\'') else {
            continue;
        };
        let Some(rest) = rest.trim().strip_prefix("=> '") else {
            continue;
        };
        let Some((path, _)) = rest.split_once('\'') else {
            continue;
        };
        let Some((dir, _)) = path.split_once('/') else {
            continue;
        };
        if !stub_dirs_lower.contains(&dir.to_lowercase()) {
            continue;
        }

        // PHP source `'\\Foo'` represents the literal string `\Foo`; un-escape.
        let name = raw_name.replace("\\\\", "\\");
        let virtual_path = format!("stubs/{path}");

        match section {
            Section::Functions => {
                fn_names.push(name.clone());
                fn_index.push((name.to_lowercase(), virtual_path));
            }
            Section::Classes => {
                class_index.push((name.to_lowercase(), virtual_path));
            }
            Section::Constants => {
                const_index.push((name, virtual_path));
            }
            Section::None => {}
        }
    }

    fn_names.sort();
    fn_names.dedup();
    fn_index.sort_by(|a, b| a.0.cmp(&b.0));
    fn_index.dedup_by(|a, b| a.0 == b.0);
    class_index.sort_by(|a, b| a.0.cmp(&b.0));
    class_index.dedup_by(|a, b| a.0 == b.0);
    const_index.sort_by(|a, b| a.0.cmp(&b.0));
    const_index.dedup_by(|a, b| a.0 == b.0);

    let mut code = String::from(
        "// Auto-generated by build.rs from PhpStormStubsMap.php — do not edit directly.\n\n\
         /// Sorted list of PHP built-in function names. Used for fast existence checks.\n\
         pub(crate) static BUILTIN_FN_NAMES: &[&str] = &[\n",
    );
    for name in &fn_names {
        writeln!(code, "    {name:?},").unwrap();
    }
    code.push_str("];\n\n");

    fn write_index(code: &mut String, doc: &str, name: &str, entries: &[(String, String)]) {
        writeln!(code, "/// {doc}").unwrap();
        writeln!(code, "pub(crate) static {name}: &[(&str, &str)] = &[").unwrap();
        for (key, path) in entries {
            writeln!(code, "    ({key:?}, {path:?}),").unwrap();
        }
        code.push_str("];\n\n");
    }

    write_index(
        &mut code,
        "Sorted lowercased function name → stub virtual path.",
        "STUB_FN_INDEX",
        &fn_index,
    );
    write_index(
        &mut code,
        "Sorted lowercased class FQCN → stub virtual path.",
        "STUB_CLASS_INDEX",
        &class_index,
    );
    write_index(
        &mut code,
        "Sorted constant name (case-sensitive) → stub virtual path.",
        "STUB_CONST_INDEX",
        &const_index,
    );

    fs::write(&out_path, code).unwrap();
}

/// Recursively collect `.php` files under `dir`, appending `include_str!` entries to `code`.
fn collect_php_files(dir: &Path, stubs_root: &Path, code: &mut String) {
    let mut entries: Vec<PathBuf> = match fs::read_dir(dir) {
        Ok(rd) => rd.filter_map(|e| e.ok()).map(|e| e.path()).collect(),
        Err(_) => return,
    };
    entries.sort();

    for path in entries {
        if path.is_dir() {
            collect_php_files(&path, stubs_root, code);
        } else if path.extension().is_some_and(|e| e == "php") {
            let relative = path
                .strip_prefix(stubs_root)
                .unwrap_or(&path)
                .to_string_lossy()
                .replace('\\', "/");

            // Canonicalize gives us a stable absolute path for include_str!.
            // On Windows, canonicalize() returns \\?\-prefixed UNC paths; strip
            // that prefix so include_str! receives a plain absolute path.
            let abs = {
                let canonical = path.canonicalize().unwrap_or_else(|_| path.clone());
                let s = canonical.to_string_lossy();
                let s = s.strip_prefix(r"\\?\").unwrap_or(&s);
                s.replace('\\', "/")
            };

            writeln!(
                code,
                "    ({}, include_str!({})),",
                format_args!("{relative:?}"),
                format_args!("{abs:?}"),
            )
            .unwrap();
        }
    }
}

// ---------------------------------------------------------------------------
// Stub embedding — stubs/{ext}/*.php
// ---------------------------------------------------------------------------

/// Walk every `stubs/{ext}/` directory and embed each `.php` file as a
/// `(workspace-relative-path, content)` pair in `STUB_FILES`.
///
/// Paths use the workspace root as the prefix so they look like
/// `"stubs/standard/standard_9.php"` — stable virtual identifiers for go-to-definition.
fn generate_stub_files(manifest_dir: &Path, out_dir: &Path) {
    let stubs_dir = manifest_dir.join("stubs");
    let out_path = out_dir.join("stub_files.rs");

    // Hard fail rather than silently emit an empty `STUB_FILES`. An empty static is the
    // exact failure mode that shipped in 0.17.1: the workspace `stubs/` directory was
    // not packaged into the crate, the build script took the "no stubs" path, and every
    // built-in function/class was reported `UndefinedFunction` / `UndefinedClass` for
    // every downstream consumer. `tests/packaging.rs` guards the packaging side; this
    // guard catches the local-build side.
    assert!(
        stubs_dir.is_dir(),
        "mir-analyzer build.rs: stubs/ directory is missing at {} — \
         the published crate would have no built-in symbols. \
         If this fired in cargo package, ensure stubs/ lives inside the crate.",
        stubs_dir.display()
    );

    println!("cargo:rerun-if-changed={}", stubs_dir.display());

    let mut code = String::from(
        "/// PHP stubs embedded from stubs/ — the single source of built-in definitions.\n\
         /// Auto-generated by build.rs — do not edit directly.\n\
         pub(crate) static STUB_FILES: &[(&str, &str)] = &[\n",
    );

    let mut ext_dirs: Vec<PathBuf> = fs::read_dir(&stubs_dir)
        .unwrap()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_ok_and(|t| t.is_dir()))
        .map(|e| e.path())
        .collect();
    ext_dirs.sort();

    for ext_dir in &ext_dirs {
        println!("cargo:rerun-if-changed={}", ext_dir.display());
        // Strip the crate root so embedded paths look like `"stubs/Core/Core.php"` —
        // stable virtual identifiers used by go-to-definition.
        collect_php_files(ext_dir, manifest_dir, &mut code);
    }

    code.push_str("];\n");
    fs::write(&out_path, code).unwrap();
}

/// Extract the body of `===description===` from a `.phpt` file, if present.
fn extract_description(content: &str) -> Option<String> {
    const MARKER: &str = "===description===";
    let start = content.find(MARKER)? + MARKER.len();
    let end = content[start..]
        .find("===")
        .map(|r| start + r)
        .unwrap_or(content.len());
    let text = content[start..end].trim();
    if text.is_empty() {
        None
    } else {
        Some(text.to_string())
    }
}