source-map-php 0.1.3

CLI-first PHP code search indexer for Laravel and Hyperf repositories
Documentation
use std::path::{Path, PathBuf};

use anyhow::Result;
use globset::{Glob, GlobMatcher};
use walkdir::WalkDir;

use crate::config::PathsConfig;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ScannedFile {
    pub absolute_path: PathBuf,
    pub relative_path: PathBuf,
}

pub fn scan_repo(repo: &Path, paths: &PathsConfig) -> Result<Vec<ScannedFile>> {
    let deny_matchers = build_matchers(&paths.deny)?;
    let vendor_matchers = build_matchers(&paths.allow_vendor_paths)?;
    let mut results = Vec::new();

    for allow in &paths.allow {
        let root = repo.join(allow);
        if !root.exists() {
            continue;
        }
        if root.is_file() {
            let rel = root.strip_prefix(repo).unwrap().to_path_buf();
            if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
                results.push(ScannedFile {
                    absolute_path: root,
                    relative_path: rel,
                });
            }
            continue;
        }

        for entry in WalkDir::new(&root)
            .into_iter()
            .filter_map(Result::ok)
            .filter(|entry| entry.file_type().is_file())
        {
            let rel = entry.path().strip_prefix(repo).unwrap().to_path_buf();
            if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
                results.push(ScannedFile {
                    absolute_path: entry.path().to_path_buf(),
                    relative_path: rel,
                });
            }
        }
    }

    if paths.allow_vendor {
        let vendor_root = repo.join("vendor");
        if vendor_root.exists() {
            for entry in WalkDir::new(&vendor_root)
                .into_iter()
                .filter_map(Result::ok)
                .filter(|entry| entry.file_type().is_file())
            {
                let rel = entry.path().strip_prefix(repo).unwrap().to_path_buf();
                if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
                    results.push(ScannedFile {
                        absolute_path: entry.path().to_path_buf(),
                        relative_path: rel,
                    });
                }
            }
        }
    }

    results.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
    results.dedup_by(|left, right| left.relative_path == right.relative_path);
    Ok(results)
}

fn build_matchers(globs: &[String]) -> Result<Vec<GlobMatcher>> {
    globs
        .iter()
        .map(|glob| Ok(Glob::new(glob)?.compile_matcher()))
        .collect()
}

fn is_allowed_file(
    relative: &Path,
    deny_matchers: &[GlobMatcher],
    paths: &PathsConfig,
    vendor_matchers: &[GlobMatcher],
) -> bool {
    let rel = relative.to_string_lossy();
    if deny_matchers.iter().any(|matcher| matcher.is_match(&*rel)) {
        return false;
    }

    let is_vendor = rel.starts_with("vendor/");
    if is_vendor {
        if !paths.allow_vendor {
            return false;
        }
        if !vendor_matchers.iter().any(|matcher| {
            matcher.is_match(&*rel)
                || relative
                    .ancestors()
                    .any(|ancestor| matcher.is_match(ancestor.to_string_lossy().as_ref()))
        }) {
            return false;
        }
    }

    rel.ends_with(".php")
        || matches!(
            rel.as_ref(),
            "composer.json" | "composer.lock" | "phpunit.xml" | "pest.php"
        )
}

#[cfg(test)]
mod tests {
    use std::fs;

    use tempfile::tempdir;

    use crate::config::IndexerConfig;

    use super::scan_repo;

    #[test]
    fn scans_allowlisted_php_and_blocks_denied_files() {
        let dir = tempdir().unwrap();
        fs::create_dir_all(dir.path().join("app")).unwrap();
        fs::create_dir_all(dir.path().join("storage")).unwrap();
        fs::create_dir_all(dir.path().join("routes")).unwrap();
        fs::write(dir.path().join("app/Service.php"), "<?php class Service {}").unwrap();
        fs::write(dir.path().join("routes/web.php"), "<?php").unwrap();
        fs::write(dir.path().join("storage/secret.php"), "<?php").unwrap();
        fs::write(dir.path().join(".env"), "DB_PASSWORD=secret").unwrap();
        fs::write(dir.path().join("dump.csv"), "bad").unwrap();

        let scanned = scan_repo(dir.path(), &IndexerConfig::default().paths).unwrap();
        let files: Vec<_> = scanned
            .into_iter()
            .map(|entry| entry.relative_path.to_string_lossy().into_owned())
            .collect();

        assert_eq!(files, vec!["app/Service.php", "routes/web.php"]);
    }

    #[test]
    fn vendor_paths_respect_flag_and_glob() {
        let dir = tempdir().unwrap();
        fs::create_dir_all(dir.path().join("vendor/acme/package/src")).unwrap();
        fs::create_dir_all(dir.path().join("vendor/acme/package/tests")).unwrap();
        fs::write(
            dir.path().join("vendor/acme/package/src/Thing.php"),
            "<?php class Thing {}",
        )
        .unwrap();
        fs::write(
            dir.path().join("vendor/acme/package/tests/ThingTest.php"),
            "<?php",
        )
        .unwrap();

        let config = IndexerConfig::default();
        let scanned = scan_repo(dir.path(), &config.paths).unwrap();
        let files: Vec<_> = scanned
            .into_iter()
            .map(|entry| entry.relative_path.to_string_lossy().into_owned())
            .collect();

        assert_eq!(files, vec!["vendor/acme/package/src/Thing.php"]);
    }
}