guardy 0.2.4

Fast, secure git hooks in Rust with secret scanning and protected file synchronization
Documentation
use std::path::Path;

use anyhow::Result;
use globset::{Glob, GlobSetBuilder};

/// Parameters for filtering files
#[derive(Debug)]
pub struct FilterParams<'a> {
    pub glob: &'a [String],
    pub exclude: &'a [String],
    pub root: Option<&'a str>,
    pub file_types: &'a [String],
}

/// Apply the complete filter chain to a list of files
///
/// Filter order:
/// 1. Glob pattern filtering
/// 2. Exclude pattern filtering
/// 3. Root directory filtering
/// 4. File type filtering
pub fn apply_file_filters(files: &[String], params: FilterParams) -> Result<Vec<String>> {
    if files.is_empty() {
        return Ok(vec![]);
    }

    tracing::trace!("Applying filters to {} files", files.len());
    tracing::trace!("Glob patterns: {:?}", params.glob);
    tracing::trace!("Exclude patterns: {:?}", params.exclude);
    tracing::trace!("File types: {:?}", params.file_types);

    let mut filtered = files.to_vec();

    // Apply filter chain
    filtered = filter_by_glob(&filtered, params.glob)?;
    filtered = filter_by_exclude(&filtered, params.exclude)?;
    filtered = filter_by_root(&filtered, params.root)?;
    filtered = filter_by_file_types(&filtered, params.file_types)?;

    tracing::trace!("Filtered from {} to {} files", files.len(), filtered.len());
    Ok(filtered)
}

/// Filter files by glob patterns
fn filter_by_glob(files: &[String], patterns: &[String]) -> Result<Vec<String>> {
    if patterns.is_empty() {
        return Ok(files.to_vec());
    }

    // Use globset crate for efficient pattern matching
    let mut builder = GlobSetBuilder::new();
    let mut has_patterns = false;

    for pattern in patterns {
        if !pattern.is_empty() {
            builder.add(Glob::new(pattern)?);
            has_patterns = true;
        }
    }

    if !has_patterns {
        return Ok(files.to_vec());
    }

    let glob_set = builder.build()?;

    let filtered: Vec<String> = files
        .iter()
        .filter(|file| glob_set.is_match(file))
        .cloned()
        .collect();

    tracing::trace!("Glob filter: {} -> {} files", files.len(), filtered.len());
    Ok(filtered)
}

/// Filter files by exclude patterns
fn filter_by_exclude(files: &[String], patterns: &[String]) -> Result<Vec<String>> {
    if patterns.is_empty() {
        return Ok(files.to_vec());
    }

    // Build exclude glob set
    let mut builder = GlobSetBuilder::new();
    let mut has_patterns = false;

    for pattern in patterns {
        if !pattern.is_empty() {
            builder.add(Glob::new(pattern)?);
            has_patterns = true;
        }
    }

    if !has_patterns {
        return Ok(files.to_vec());
    }

    let exclude_set = builder.build()?;

    let filtered: Vec<String> = files
        .iter()
        .filter(|file| !exclude_set.is_match(file))
        .cloned()
        .collect();

    tracing::trace!(
        "Exclude filter: {} -> {} files",
        files.len(),
        filtered.len()
    );
    Ok(filtered)
}

/// Filter files by root directory
fn filter_by_root(files: &[String], root: Option<&str>) -> Result<Vec<String>> {
    let Some(root_prefix) = root else {
        return Ok(files.to_vec());
    };

    if root_prefix.is_empty() {
        return Ok(files.to_vec());
    }

    let filtered: Vec<String> = files
        .iter()
        .filter_map(|file| {
            if file.starts_with(root_prefix) {
                // Replace root with "./"
                Some(file.replacen(root_prefix, "./", 1))
            } else {
                None
            }
        })
        .collect();

    tracing::trace!(
        "Root filter ({}): {} -> {} files",
        root_prefix,
        files.len(),
        filtered.len()
    );
    Ok(filtered)
}

/// Filter files by file types (executable, text, binary, symlink)
fn filter_by_file_types(files: &[String], types: &[String]) -> Result<Vec<String>> {
    if types.is_empty() {
        return Ok(files.to_vec());
    }

    let mut filtered = Vec::new();

    for file in files {
        if matches_file_types(file, types)? {
            filtered.push(file.clone());
        }
    }

    tracing::trace!(
        "File type filter: {} -> {} files",
        files.len(),
        filtered.len()
    );
    Ok(filtered)
}

/// Check if a file matches the specified file types
fn matches_file_types(file: &str, types: &[String]) -> Result<bool> {
    use std::os::unix::fs::PermissionsExt;

    let path = Path::new(file);

    // Get file metadata
    let metadata = match std::fs::symlink_metadata(path) {
        Ok(meta) => meta,
        Err(_) => {
            tracing::warn!("Cannot access file metadata for: {}", file);
            return Ok(false);
        }
    };

    let is_symlink = metadata.file_type().is_symlink();
    let is_executable = metadata.permissions().mode() & 0o111 != 0;

    for file_type in types {
        match file_type.as_str() {
            "executable" => {
                if is_executable && !is_symlink {
                    return Ok(true);
                }
            }
            "not executable" => {
                if !is_executable || is_symlink {
                    return Ok(true);
                }
            }
            "symlink" => {
                if is_symlink {
                    return Ok(true);
                }
            }
            "not symlink" => {
                if !is_symlink {
                    return Ok(true);
                }
            }
            "text" => {
                if !is_symlink && metadata.is_file() && is_text_file(path)? {
                    return Ok(true);
                }
            }
            "binary" => {
                if !is_symlink && metadata.is_file() && !is_text_file(path)? {
                    return Ok(true);
                }
            }
            _ => {
                tracing::warn!("Unknown file type filter: {}", file_type);
            }
        }
    }

    Ok(false)
}

/// Detect if a file is text or binary by reading first 1024 bytes
fn is_text_file(path: &Path) -> Result<bool> {
    use std::io::Read;

    let mut file = std::fs::File::open(path)?;
    let mut buffer = [0u8; 1024];

    let bytes_read = file.read(&mut buffer)?;
    if bytes_read == 0 {
        return Ok(true); // Empty file is considered text
    }

    // Check for null bytes (common binary indicator)
    let has_null = buffer[..bytes_read].contains(&0);
    Ok(!has_null)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_empty_files_returns_empty() {
        let params = FilterParams {
            glob: &[],
            exclude: &[],
            root: None,
            file_types: &[],
        };

        let result = apply_file_filters(&[], params).unwrap();
        assert!(result.is_empty());
    }

    #[test]
    fn test_no_filters_returns_all_files() {
        let files = vec!["test.rs".to_string(), "test.js".to_string()];
        let params = FilterParams {
            glob: &[],
            exclude: &[],
            root: None,
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, files);
    }

    #[test]
    fn test_glob_filtering() {
        let files = vec![
            "test.rs".to_string(),
            "test.js".to_string(),
            "main.rs".to_string(),
            "README.md".to_string(),
        ];

        let params = FilterParams {
            glob: &["*.rs".to_string()],
            exclude: &[],
            root: None,
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, vec!["test.rs", "main.rs"]);
    }

    #[test]
    fn test_exclude_filtering() {
        let files = vec![
            "test.rs".to_string(),
            "test.js".to_string(),
            "README.md".to_string(),
        ];

        let params = FilterParams {
            glob: &[],
            exclude: &["README.md".to_string()],
            root: None,
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, vec!["test.rs", "test.js"]);
    }

    #[test]
    fn test_root_filtering() {
        let files = vec![
            "src/main.rs".to_string(),
            "tests/test.rs".to_string(),
            "other/file.js".to_string(),
        ];

        let params = FilterParams {
            glob: &[],
            exclude: &[],
            root: Some("src/"),
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, vec!["./main.rs"]);
    }

    #[test]
    fn test_combined_filtering() {
        let files = vec![
            "src/main.rs".to_string(),
            "src/lib.rs".to_string(),
            "src/test.js".to_string(),
            "tests/test.rs".to_string(),
            "README.md".to_string(),
        ];

        let params = FilterParams {
            glob: &["*.rs".to_string()],
            exclude: &["README.md".to_string()],
            root: Some("src/"),
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, vec!["./main.rs", "./lib.rs"]);
    }

    #[test]
    fn test_multiple_glob_patterns() {
        let files = vec![
            "test.rs".to_string(),
            "test.js".to_string(),
            "test.py".to_string(),
            "README.md".to_string(),
        ];

        let params = FilterParams {
            glob: &["*.rs".to_string(), "*.js".to_string()],
            exclude: &[],
            root: None,
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, vec!["test.rs", "test.js"]);
    }

    #[test]
    fn test_empty_patterns_ignored() {
        let files = vec!["test.rs".to_string()];

        let params = FilterParams {
            glob: &["".to_string(), "*.rs".to_string()],
            exclude: &["".to_string()],
            root: None,
            file_types: &[],
        };

        let result = apply_file_filters(&files, params).unwrap();
        assert_eq!(result, vec!["test.rs"]);
    }
}