mise 2026.4.11

The front-end to your dev env
use crate::Result;
use once_cell::sync::Lazy;
use regex::Regex;
use std::path::PathBuf;

use async_trait::async_trait;

use crate::{
    dirs, env,
    file::display_path,
    git::{self, CloneOptions},
    hash,
    lock_file::LockFile,
};

use super::TaskFileProvider;

static SSH_REGEX: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r"^git::(?P<url>ssh://((?P<user>[^@]+)@)(?P<host>[^/]+)/(?P<repo>.+)\.git)//(?P<path>[^?]+)(\?ref=(?P<branch>[^?]+))?$").unwrap()
});

static HTTPS_REGEX: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r"^git::(?P<url>https?://(?P<host>[^/]+)/(?P<repo>.+)\.git)//(?P<path>[^?]+)(\?ref=(?P<branch>[^?]+))?$").unwrap()
});

#[derive(Debug)]
pub struct RemoteTaskGitBuilder {
    store_path: PathBuf,
    use_cache: bool,
}

impl RemoteTaskGitBuilder {
    pub fn new() -> Self {
        Self {
            store_path: env::temp_dir(),
            use_cache: false,
        }
    }

    pub fn with_cache(mut self, use_cache: bool) -> Self {
        if use_cache {
            self.store_path = dirs::CACHE.join("remote-git-tasks-cache");
            self.use_cache = true;
        }
        self
    }

    pub fn build(self) -> RemoteTaskGit {
        RemoteTaskGit {
            storage_path: self.store_path,
            is_cached: self.use_cache,
        }
    }
}

#[derive(Debug)]
pub struct RemoteTaskGit {
    storage_path: PathBuf,
    is_cached: bool,
}

#[derive(Debug, Clone)]
struct GitRepoStructure {
    url_without_path: String,
    path: String,
    branch: Option<String>,
}

impl GitRepoStructure {
    pub fn new(url_without_path: &str, path: &str, branch: Option<String>) -> Self {
        Self {
            url_without_path: url_without_path.to_string(),
            path: path.to_string(),
            branch,
        }
    }
}

impl RemoteTaskGit {
    fn get_cache_key(&self, repo_structure: &GitRepoStructure) -> String {
        let key = format!(
            "{}{}",
            &repo_structure.url_without_path,
            &repo_structure.branch.to_owned().unwrap_or("".to_string())
        );
        hash::hash_sha256_to_str(&key)
    }

    fn get_repo_structure(&self, file: &str) -> GitRepoStructure {
        if let Some(repo) = Self::parse_ssh(file) {
            return repo;
        }
        Self::parse_https(file).unwrap()
    }

    fn parse_ssh(file: &str) -> Option<GitRepoStructure> {
        let captures = SSH_REGEX.captures(file)?;
        let url_without_path = captures.name("url").unwrap().as_str();
        let path = captures.name("path").unwrap().as_str();
        let branch = captures.name("branch").map(|m| m.as_str().to_string());
        Some(GitRepoStructure::new(url_without_path, path, branch))
    }

    fn parse_https(file: &str) -> Option<GitRepoStructure> {
        let captures = HTTPS_REGEX.captures(file)?;
        let url_without_path = captures.name("url").unwrap().as_str();
        let path = captures.name("path").unwrap().as_str();
        let branch = captures.name("branch").map(|m| m.as_str().to_string());
        Some(GitRepoStructure::new(url_without_path, path, branch))
    }
}

#[async_trait]
impl TaskFileProvider for RemoteTaskGit {
    fn is_match(&self, file: &str) -> bool {
        SSH_REGEX.is_match(file) || HTTPS_REGEX.is_match(file)
    }

    async fn get_local_path(&self, file: &str) -> Result<PathBuf> {
        let repo_structure = self.get_repo_structure(file);
        let cache_key = self.get_cache_key(&repo_structure);
        let destination = self.storage_path.join(&cache_key);
        let repo_file_path = repo_structure.path.clone();
        let full_path = destination.join(&repo_file_path);

        debug!("Repo structure: {:?}", repo_structure);

        let _lock = LockFile::new(&destination)
            .with_callback(|l| {
                debug!(
                    "waiting for lock on remote git task cache: {}",
                    display_path(l)
                );
            })
            .lock()?;

        if self.is_cached {
            trace!("Cache mode enabled");
            if full_path.exists() {
                debug!("Using cached file: {:?}", full_path);
                return Ok(full_path);
            }
        } else {
            trace!("Cache mode disabled");
        }

        let tmp_destination = self.storage_path.join(format!("{}.clone-tmp", &cache_key));
        if tmp_destination.exists() {
            crate::file::remove_all(&tmp_destination)?;
        }

        let git_repo = git::Git::new(&tmp_destination);

        let mut clone_options = CloneOptions::default();

        if let Some(branch) = &repo_structure.branch {
            trace!("Use specific branch {}", branch);
            clone_options = clone_options.branch(branch);
        }

        match git_repo.clone(repo_structure.url_without_path.as_str(), clone_options) {
            Ok(()) => {
                if destination.exists()
                    && let Err(e) = crate::file::remove_all(&destination)
                {
                    let _ = crate::file::remove_all(&tmp_destination);
                    return Err(e);
                }
                if let Err(e) = std::fs::rename(&tmp_destination, &destination) {
                    let _ = crate::file::remove_all(&tmp_destination);
                    return Err(eyre::eyre!(
                        "failed to move cloned repo into cache at {}: {e}",
                        display_path(&destination)
                    ));
                }
            }
            Err(e) => {
                let _ = crate::file::remove_all(&tmp_destination);
                return Err(e);
            }
        }

        Ok(full_path)
    }
}

#[cfg(test)]
mod tests {

    use super::*;

    #[test]
    fn test_valid_parse_ssh() {
        let test_cases = vec![
            "git::ssh://git@github.com/myorg/example.git//myfile?ref=v1.0.0",
            "git::ssh://git@github.com/myorg/example.git//terraform/myfile?ref=master",
            "git::ssh://git@git.acme.com:1222/myorg/example.git//terraform/myfile?ref=master",
            "git::ssh://git@myserver.com/example.git//terraform/myfile",
            "git::ssh://user@myserver.com/example.git//myfile?ref=master",
        ];

        for url in test_cases {
            assert!(
                RemoteTaskGit::parse_ssh(url).is_some(),
                "Failed for: {}",
                url
            );
        }
    }

    #[test]
    fn test_invalid_parse_ssh() {
        let test_cases = vec![
            "git::ssh://myserver.com/example.git//myfile?ref=master",
            "git::ssh://user@myserver.com/example.git?ref=master",
            "git::ssh://user@myserver.com/example.git",
            "git::https://github.com/myorg/example.git//myfile?ref=v1.0.0",
        ];

        for url in test_cases {
            assert!(
                RemoteTaskGit::parse_ssh(url).is_none(),
                "Should fail for: {}",
                url
            );
        }
    }

    #[test]
    fn test_valid_parse_https() {
        let test_cases = vec![
            "git::https://github.com/myorg/example.git//myfile?ref=v1.0.0",
            "git::https://github.com/myorg/example.git//terraform/myfile?ref=master",
            "git::https://git.acme.com:8080/myorg/example.git//terraform/myfile?ref=master",
            "git::https://myserver.com/example.git//terraform/myfile",
            "git::https://myserver.com/example.git//myfile?ref=master",
            "git::http://localhost:8080/repo.git//xtasks/lint/ripgrep", // HTTP support for local testing
        ];

        for url in test_cases {
            assert!(
                RemoteTaskGit::parse_https(url).is_some(),
                "Failed for: {}",
                url
            );
        }
    }

    #[test]
    fn test_invalid_parse_https() {
        let test_cases = vec![
            "git::https://myserver.com/example.git?ref=master",
            "git::https://user@myserver.com/example.git",
            "git::ssh://git@github.com/myorg/example.git//myfile?ref=v1.0.0",
        ];

        for url in test_cases {
            assert!(
                RemoteTaskGit::parse_https(url).is_none(),
                "Should fail for: {}",
                url
            );
        }
    }

    #[test]
    fn test_extract_ssh_url_information() {
        let test_cases: Vec<(&str, &str, &str, Option<String>)> = vec![
            (
                "git::ssh://git@github.com/myorg/example.git//myfile?ref=v1.0.0",
                "ssh://git@github.com/myorg/example.git",
                "myfile",
                Some("v1.0.0".to_string()),
            ),
            (
                "git::ssh://git@github.com/myorg/example.git//terraform/myfile?ref=master",
                "ssh://git@github.com/myorg/example.git",
                "terraform/myfile",
                Some("master".to_string()),
            ),
            (
                "git::ssh://git@myserver.com/example.git//terraform/myfile",
                "ssh://git@myserver.com/example.git",
                "terraform/myfile",
                None,
            ),
        ];

        for (url, expected_repo, expected_path, expected_branch) in test_cases {
            let repo = RemoteTaskGit::parse_ssh(url).unwrap();
            assert_eq!(expected_repo, repo.url_without_path);
            assert_eq!(expected_path, repo.path);
            assert_eq!(expected_branch, repo.branch);
        }
    }

    #[test]
    fn test_extract_https_url_information() {
        let test_cases: Vec<(&str, &str, &str, Option<String>)> = vec![
            (
                "git::https://github.com/myorg/example.git//myfile?ref=v1.0.0",
                "https://github.com/myorg/example.git",
                "myfile",
                Some("v1.0.0".to_string()),
            ),
            (
                "git::https://github.com/myorg/example.git//terraform/myfile?ref=master",
                "https://github.com/myorg/example.git",
                "terraform/myfile",
                Some("master".to_string()),
            ),
            (
                "git::https://myserver.com/example.git//terraform/myfile",
                "https://myserver.com/example.git",
                "terraform/myfile",
                None,
            ),
        ];

        for (url, expected_repo, expected_path, expected_branch) in test_cases {
            let repo = RemoteTaskGit::parse_https(url).unwrap();
            assert_eq!(expected_repo, repo.url_without_path);
            assert_eq!(expected_path, repo.path);
            assert_eq!(expected_branch, repo.branch);
        }
    }

    #[test]
    fn test_compare_ssh_get_cache_key() {
        let remote_task_git = RemoteTaskGitBuilder::new().build();

        let test_cases = vec![
            (
                "git::ssh://git@github.com/myorg/example.git//myfile?ref=v1.0.0",
                "git::ssh://git@github.com/myorg/example.git//myfile?ref=v2.0.0",
                false,
            ),
            (
                "git::ssh://git@github.com/myorg/example.git//myfile?ref=v1.0.0",
                "git::ssh://user@myserver.com/example.git//myfile?ref=master",
                false,
            ),
            (
                "git::ssh://git@github.com/example.git//myfile?ref=v1.0.0",
                "git::ssh://git@github.com/example.git//subfolder/mysecondfile?ref=v1.0.0",
                true,
            ),
            (
                "git::ssh://git@github.com/myorg/example.git//myfile?ref=v1.0.0",
                "git::ssh://git@github.com/myorg/example.git//subfolder/mysecondfile?ref=v1.0.0",
                true,
            ),
        ];

        for (first_url, second_url, expected) in test_cases {
            let first_repo = RemoteTaskGit::parse_ssh(first_url).unwrap();
            let second_repo = RemoteTaskGit::parse_ssh(second_url).unwrap();
            let first_cache_key = remote_task_git.get_cache_key(&first_repo);
            let second_cache_key = remote_task_git.get_cache_key(&second_repo);
            assert_eq!(expected, first_cache_key == second_cache_key);
        }
    }

    #[test]
    fn test_compare_https_get_cache_key() {
        let remote_task_git = RemoteTaskGitBuilder::new().build();

        let test_cases = vec![
            (
                "git::https://github.com/myorg/example.git//myfile?ref=v1.0.0",
                "git::https://github.com/myorg/example.git//myfile?ref=v2.0.0",
                false,
            ),
            (
                "git::https://github.com/myorg/example.git//myfile?ref=v1.0.0",
                "git::https://bitbucket.com/myorg/example.git//myfile?ref=v1.0.0",
                false,
            ),
            (
                "git::https://github.com/myorg/example.git//myfile?ref=v1.0.0",
                "git::https://github.com/myorg/example.git//subfolder/myfile?ref=v1.0.0",
                true,
            ),
            (
                "git::https://github.com/example.git//myfile?ref=v1.0.0",
                "git::https://github.com/example.git//subfolder/myfile?ref=v1.0.0",
                true,
            ),
        ];

        for (first_url, second_url, expected) in test_cases {
            let first_repo = RemoteTaskGit::parse_https(first_url).unwrap();
            let second_repo = RemoteTaskGit::parse_https(second_url).unwrap();
            let first_cache_key = remote_task_git.get_cache_key(&first_repo);
            let second_cache_key = remote_task_git.get_cache_key(&second_repo);
            assert_eq!(expected, first_cache_key == second_cache_key);
        }
    }
}