prek 0.3.11

A fast Git hook manager written in Rust, designed as a drop-in alternative to pre-commit, reimagined.
use std::collections::BTreeSet;
use std::io::Write;
use std::path::Path;

use anyhow::Result;
use clap::Parser;
use fancy_regex::{Regex, escape};
use tokio::io::{AsyncBufReadExt, BufReader};

use crate::hook::Hook;
use crate::hooks::run_concurrent_file_checks;
use crate::run::CONCURRENCY;

#[derive(Parser)]
#[command(disable_help_subcommand = true)]
#[command(disable_version_flag = true)]
#[command(disable_help_flag = true)]
struct Args {
    #[arg(long = "additional-github-domain")]
    additional_github_domains: Vec<String>,
}

#[derive(Debug)]
struct GithubPermalinkMatcher {
    patterns: Vec<Regex>,
}

impl GithubPermalinkMatcher {
    fn from_hook(hook: &Hook) -> Result<Self> {
        let args =
            Args::try_parse_from(hook.entry.expect_direct().split()?.iter().chain(&hook.args))?;
        Ok(Self::new(args.additional_github_domains))
    }

    fn new(additional_domains: Vec<String>) -> Self {
        let mut domains = BTreeSet::from([String::from("github.com")]);
        domains.extend(additional_domains);

        let patterns = domains
            .into_iter()
            .map(|domain| {
                let domain = escape(&domain);
                let pattern = format!(
                    r"https://{domain}/[^/ ]+/[^/ ]+/blob/(?![a-fA-F0-9]{{4,64}}/)([^/. ]+)/[^# ]+#L\d+"
                );
                Regex::new(&pattern).expect("vcs permalink regex must be valid")
            })
            .collect();

        Self { patterns }
    }

    fn is_non_permalink(&self, line: &[u8]) -> bool {
        let line = String::from_utf8_lossy(line);
        self.patterns
            .iter()
            .any(|pattern| pattern.is_match(&line).unwrap_or(false))
    }
}

pub(crate) async fn check_vcs_permalinks(
    hook: &Hook,
    filenames: &[&Path],
) -> Result<(i32, Vec<u8>)> {
    let file_base = hook.project().relative_path();
    let matcher = GithubPermalinkMatcher::from_hook(hook)?;

    run_concurrent_file_checks(filenames.iter().copied(), *CONCURRENCY, |filename| {
        check_file(file_base, filename, &matcher)
    })
    .await
}

async fn check_file(
    file_base: &Path,
    filename: &Path,
    matcher: &GithubPermalinkMatcher,
) -> Result<(i32, Vec<u8>)> {
    let path = file_base.join(filename);
    let file = fs_err::tokio::File::open(&path).await?;
    let mut reader = BufReader::new(file);

    let mut retval = 0;
    let mut output = Vec::new();
    let mut line = Vec::new();
    let mut line_number = 0;

    while reader.read_until(b'\n', &mut line).await? != 0 {
        line_number += 1;
        if matcher.is_non_permalink(&line) {
            retval = 1;
            write!(output, "{}:{}:", filename.display(), line_number)?;
            output.write_all(&line)?;
            if !line.ends_with(b"\n") {
                writeln!(output)?;
            }
        }
        line.clear();
    }

    if retval != 0 {
        writeln!(output)?;
        writeln!(output, "Non-permanent github link detected.")?;
        writeln!(
            output,
            "On any page on github press [y] to load a permalink."
        )?;
    }

    Ok((retval, output))
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;
    use tempfile::tempdir;

    fn matcher(domains: &[&str]) -> GithubPermalinkMatcher {
        GithubPermalinkMatcher::new(domains.iter().map(ToString::to_string).collect())
    }

    #[test]
    fn test_permalink_not_flagged() {
        let matcher = matcher(&[]);
        assert!(
            !matcher
                .is_non_permalink(b"https://github.com/owner/repo/blob/abc123def456/file.py#L10")
        );
        assert!(!matcher.is_non_permalink(
            b"https://github.com/owner/repo/blob/abcdef1234567890abcdef1234567890abcdef12/src/main.rs#L42",
        ));
    }

    #[test]
    fn test_branch_link_flagged() {
        let matcher = matcher(&[]);
        assert!(matcher.is_non_permalink(b"https://github.com/owner/repo/blob/main/file.py#L10"));
        assert!(
            matcher.is_non_permalink(b"https://github.com/owner/repo/blob/master/src/lib.rs#L5")
        );
        assert!(
            matcher.is_non_permalink(b"https://github.com/owner/repo/blob/develop/README.md#L1")
        );
    }

    #[test]
    fn test_no_line_number_not_flagged() {
        let matcher = matcher(&[]);
        assert!(!matcher.is_non_permalink(b"https://github.com/owner/repo/blob/main/file.py"));
    }

    #[test]
    fn test_additional_github_domain_flagged() {
        let matcher = matcher(&["github.example.com"]);
        assert!(
            matcher
                .is_non_permalink(b"https://github.example.com/owner/repo/blob/main/file.py#L10",)
        );
    }

    #[test]
    fn test_github_domains_are_deduplicated() {
        let matcher = GithubPermalinkMatcher::new(vec![
            "github.example.com".to_string(),
            "github.com".to_string(),
            "github.example.com".to_string(),
        ]);
        assert_eq!(matcher.patterns.len(), 2);
    }

    #[tokio::test]
    async fn test_check_file_with_additional_domain() -> Result<()> {
        let dir = tempdir()?;
        let file_path = dir.path().join("links.md");
        fs_err::tokio::write(
            &file_path,
            b"https://github.example.com/owner/repo/blob/main/file.py#L10\n",
        )
        .await?;

        let matcher = matcher(&["github.example.com"]);
        let relative = PathBuf::from("links.md");
        let (code, output) = check_file(dir.path(), &relative, &matcher).await?;

        assert_eq!(code, 1);
        assert_eq!(
            String::from_utf8(output)?,
            "links.md:1:https://github.example.com/owner/repo/blob/main/file.py#L10\n\nNon-permanent github link detected.\nOn any page on github press [y] to load a permalink.\n",
        );

        Ok(())
    }
}