repokit 5.0.3

A knowledgebase for your repository - wrapped in a CLI
use std::{
    collections::HashSet,
    path::{Path, PathBuf},
};

use regex::Regex;

use crate::{
    caches::file_cache::FileCache, context::git_scope::GitScope, executor::executor::Executor,
    logger::logger::Logger, post_processing::post_processor::PostProcessor,
};

#[derive(Clone)]
pub struct CrawlCache {
    pub cache_directory: Option<PathBuf>,
    pub changed_files: Option<Vec<String>>,
    pub files_to_crawl: Option<Vec<String>>,
}

impl CrawlCache {
    pub fn crawl_cache_enabled(&self) -> bool {
        self.files_to_crawl.is_some()
    }

    pub fn cache_crawl_results(&self, results: String) {
        self.write(&results, |_| {
            self.on_crawl_cache_storage_error();
        });
    }

    fn on_crawl_cache_storage_error(&self) {
        if let Some(storage_path) = self.storage_path() {
            PostProcessor::get().register_task(move || {
            Logger::info(
                "I attempted to cache the results of a file crawl, but couldn't write to disk",
            );
            CrawlCache::log_cache_write_error();
            Logger::info(
                "To avoid issues with stale caches I'm going to delete what's currently on disk",
            );
            CrawlCache::clear_cache_file(storage_path.clone(), true);
        });
        }
    }

    fn get_changed_files(&self, git_root: &Path) -> (bool, HashSet<String>) {
        let mut contains_git_ignore = false;
        let file_path_matcher = Regex::new(r#"^.*\s(.*\.ts)$"#).unwrap();
        let stdout = Executor::exec("git status --porcelain -uall", |cmd| cmd);
        let files: HashSet<String> = stdout
            .split("\n")
            .filter_map(|file| {
                if !contains_git_ignore && file.ends_with(".gitignore") {
                    contains_git_ignore = true;
                    return None;
                }
                let matches: Vec<&str> = file_path_matcher
                    .captures_iter(file)
                    .filter_map(|entry| {
                        if let Some(match_result) = entry.get(1) {
                            return Some(match_result.as_str());
                        }
                        None
                    })
                    .collect();
                if let Some(file_path) = matches.first()
                    && git_root.join(file_path).exists()
                {
                    return Some(file_path.to_string());
                }
                None
            })
            .collect();
        (contains_git_ignore, files)
    }
}

impl FileCache<GitScope> for CrawlCache {
    fn cache_file(&self) -> &str {
        ".crawl_cache"
    }

    fn cache_directory(&self) -> &Option<PathBuf> {
        &self.cache_directory
    }

    fn default_cache_contents(&self) -> &str {
        ""
    }

    fn creator(cache_directory: Option<PathBuf>) -> Self {
        CrawlCache {
            changed_files: None,
            files_to_crawl: None,
            cache_directory: cache_directory.clone(),
        }
    }

    fn initialize(&mut self, git_scope: GitScope) {
        if let Some(head_commit) = &git_scope.head_commit_hash
            && let Some((mut lines, path)) = self.read()
        {
            if &self.unwrap_line(lines.nth(0), "non_existent_hash") != head_commit {
                return;
            }
            let lines = self.line_buffer_to_vec(lines);
            if lines.is_empty() {
                return;
            }
            if let Some(git_root) = &git_scope.root_path {
                let (git_ignore_changed, mut changed_files) = self.get_changed_files(git_root);
                if git_ignore_changed {
                    CrawlCache::clear_cache_file(path.to_owned(), false);
                    return;
                }
                for line in &lines {
                    if changed_files.contains(line) {
                        changed_files.remove(line);
                    }
                }
                if !changed_files.is_empty() {
                    self.changed_files = Some(changed_files.into_iter().collect());
                }
                self.files_to_crawl = Some(lines);
            }
        }
    }
}