Skip to main content

jirust_cli/utils/
cached_scanner.rs

1use anyhow::{Context, Result};
2use git2::Repository;
3use std::env;
4use std::fs;
5use std::path::{Path, PathBuf};
6use walkdir::WalkDir;
7use yara_x::{Compiler, Rules, Scanner};
8
9use crate::config::config_file::YaraSection;
10
11/// Source type for YARA rules
12#[derive(Debug, Clone, PartialEq)]
13enum SourceType {
14    Git,
15    Zip,
16}
17
18impl SourceType {
19    /// Detect source type from URL
20    fn detect(url: &str) -> Self {
21        if url.ends_with(".git") {
22            SourceType::Git
23        } else if url.ends_with(".zip") {
24            SourceType::Zip
25        } else {
26            // Default to Git for backward compatibility
27            SourceType::Git
28        }
29    }
30}
31
32/// Internal configuration for YARA scanner paths
33struct YaraConfig {
34    rules_source: String,
35    rules_dir: PathBuf,
36    cache_file: PathBuf,
37    cache_version_file: PathBuf,
38    source_type: SourceType,
39}
40
41impl YaraConfig {
42    /// Create from ConfigFile
43    fn from_config_file(cfg: &YaraSection) -> Result<Self> {
44        let base_dir = Self::get_base_dir()?;
45        Self::from_config_file_with_base_dir(cfg, base_dir)
46    }
47
48    /// Create from ConfigFile with explicit base directory (useful for testing)
49    fn from_config_file_with_base_dir(cfg: &YaraSection, base_dir: PathBuf) -> Result<Self> {
50        let rules_dir = base_dir.join(cfg.get_rules_directory());
51        let cache_file = base_dir.join(cfg.get_cache_file());
52        let cache_version_file = base_dir.join(cfg.get_cache_version_file());
53
54        let rules_source = cfg.get_rules_source().to_string();
55        let source_type = SourceType::detect(&rules_source);
56
57        Ok(YaraConfig {
58            rules_source,
59            rules_dir,
60            cache_file,
61            cache_version_file,
62            source_type,
63        })
64    }
65
66    /// Create with defaults (for backward compatibility)
67    fn default() -> Result<Self> {
68        let base_dir = Self::get_base_dir()?;
69
70        // Use existing constants as fallback
71        let rules_source = "https://github.com/Yara-Rules/rules.git".to_string();
72        let source_type = SourceType::Git;
73
74        Ok(YaraConfig {
75            rules_source,
76            rules_dir: base_dir.join("yara-rules"),
77            cache_file: base_dir.join("yara_rules.cache"),
78            cache_version_file: base_dir.join("yara_rules.cache.version"),
79            source_type,
80        })
81    }
82
83    /// Get base directory (~/.jirust-cli/)
84    fn get_base_dir() -> Result<PathBuf> {
85        match env::var_os("HOME") {
86            Some(home) => {
87                let base = PathBuf::from(home).join(".jirust-cli");
88                if !base.exists() {
89                    fs::create_dir_all(&base).context("Failed to create .jirust-cli directory")?;
90                }
91                Ok(base)
92            }
93            None => anyhow::bail!("HOME environment variable not set"),
94        }
95    }
96}
97
98/// Structure to manage compiled YARA rules
99pub struct CachedYaraScanner {
100    rules: Rules,
101    #[allow(dead_code)]
102    config: YaraConfig,
103}
104
105impl CachedYaraScanner {
106    /// Create scanner using configuration from ConfigFile
107    pub async fn from_config(cfg: &YaraSection) -> Result<Self> {
108        let config = YaraConfig::from_config_file(cfg)?;
109        let rules = Self::load_or_compile_rules(&config).await?;
110        Ok(Self { rules, config })
111    }
112
113    /// Create scanner using configuration with explicit base directory (useful for testing)
114    #[cfg(test)]
115    pub async fn from_config_with_base_dir(cfg: &YaraSection, base_dir: PathBuf) -> Result<Self> {
116        let config = YaraConfig::from_config_file_with_base_dir(cfg, base_dir)?;
117        let rules = Self::load_or_compile_rules(&config).await?;
118        Ok(Self { rules, config })
119    }
120
121    /// Generate the new scanner with defaults (backward compatibility):
122    /// - If the YARA rules in the repo have not been updated -> load cache (~0.5s)
123    /// - If the YARA rules in the repo have been updated -> rebuild & save in cache (~30s)
124    pub async fn new() -> Result<Self> {
125        let config = YaraConfig::default()?;
126        let rules = Self::load_or_compile_rules(&config).await?;
127        Ok(Self { rules, config })
128    }
129
130    /// Load cached rules or rebuilds them if required
131    async fn load_or_compile_rules(config: &YaraConfig) -> Result<Rules> {
132        let current_version = Self::get_current_version(config)?;
133        let cached_version = Self::get_cached_version(config);
134
135        // Check if the cache can be used
136        if let (Some(cached), Some(current)) = (cached_version, &current_version) {
137            if cached == *current && config.cache_file.exists() {
138                println!("šŸ“¦ Loading cached rules...");
139
140                match Self::load_cached_rules(config) {
141                    Ok(rules) => {
142                        println!("āœ… cached rules loaded (version: {})", &current[..8]);
143                        return Ok(rules);
144                    }
145                    Err(e) => {
146                        println!("āš ļø  Cache corruption: {} - rebuilding...", e);
147                    }
148                }
149            } else {
150                println!("šŸ”„ Rules updated, rebuilding...");
151            }
152        } else {
153            println!("šŸ”Ø No cache found, building...");
154        }
155
156        let rules = Self::compile_all_rules(config).await?;
157
158        if let Err(e) = Self::save_to_cache(config, &rules, &current_version) {
159            eprintln!("āš ļø  Can't save cache: {}", e);
160        } else {
161            println!("šŸ’¾ Compiled rules cached!");
162        }
163
164        Ok(rules)
165    }
166
167    /// Get current version identifier for the rules
168    fn get_current_version(config: &YaraConfig) -> Result<Option<String>> {
169        match config.source_type {
170            SourceType::Git => Self::get_git_version(config),
171            SourceType::Zip => Self::get_zip_version(config),
172        }
173    }
174
175    /// Get git commit hash as version
176    fn get_git_version(config: &YaraConfig) -> Result<Option<String>> {
177        if !config.rules_dir.exists() {
178            return Ok(None);
179        }
180
181        let repo = Repository::open(&config.rules_dir).context("Can't open git repository")?;
182
183        let head = repo.head().context("Can't read HEAD")?;
184
185        let commit = head.peel_to_commit().context("Can't read commit")?;
186
187        Ok(Some(commit.id().to_string()))
188    }
189
190    /// Get content hash as version for zip files
191    fn get_zip_version(config: &YaraConfig) -> Result<Option<String>> {
192        if !config.rules_dir.exists() {
193            return Ok(None);
194        }
195
196        // Read version from metadata file if exists
197        let version_marker = config.rules_dir.join(".version");
198        if version_marker.exists() {
199            return Ok(Some(fs::read_to_string(version_marker)?));
200        }
201
202        Ok(None)
203    }
204
205    /// Check the cached version
206    fn get_cached_version(config: &YaraConfig) -> Option<String> {
207        fs::read_to_string(&config.cache_version_file).ok()
208    }
209
210    /// Load cached rules
211    fn load_cached_rules(config: &YaraConfig) -> Result<Rules> {
212        let cache_bytes = fs::read(&config.cache_file).context("Can't read cache")?;
213
214        let rules = Rules::deserialize(&cache_bytes).context("Can't read rules")?;
215
216        Ok(rules)
217    }
218
219    /// Store compiled rules in cache
220    fn save_to_cache(config: &YaraConfig, rules: &Rules, version: &Option<String>) -> Result<()> {
221        let serialized = rules.serialize()?;
222
223        fs::write(&config.cache_file, serialized).context("Can't write cache")?;
224
225        if let Some(ver) = version {
226            fs::write(&config.cache_version_file, ver).context("Can't store cache version")?;
227        }
228
229        Ok(())
230    }
231
232    /// Compile all YARA rules
233    async fn compile_all_rules(config: &YaraConfig) -> Result<Rules> {
234        if !config.rules_dir.exists() {
235            println!(
236                "Can't find YARA rules directory ({}).",
237                config.rules_dir.display()
238            );
239            update_yara_rules_with_config(config)
240                .await
241                .context("Can't download YARA rules")?;
242        }
243
244        let mut compiler = Compiler::new();
245        let mut compiled_count = 0;
246        let mut skipped_count = 0;
247
248        println!("šŸ”Ø Building YARA rules...");
249
250        for entry in WalkDir::new(&config.rules_dir)
251            .follow_links(false)
252            .into_iter()
253            .filter_map(|e| e.ok())
254        {
255            let path = entry.path();
256
257            if path.is_file() {
258                let extension = path.extension().and_then(|s| s.to_str());
259                if matches!(extension, Some("yar") | Some("yara")) {
260                    match fs::read_to_string(path) {
261                        Ok(content) => match compiler.add_source(&*content) {
262                            Ok(_) => {
263                                compiled_count += 1;
264                                if compiled_count % 100 == 0 {
265                                    print!(".");
266                                    use std::io::Write;
267                                    std::io::stdout().flush().ok();
268                                }
269                            }
270                            Err(e) => {
271                                skipped_count += 1;
272                                if std::env::var("VERBOSE").is_ok() {
273                                    eprintln!("\nāš ļø  Skipped rule {}: {}", path.display(), e);
274                                }
275                            }
276                        },
277                        Err(e) => {
278                            eprintln!("\nāš ļø  Can't read {}: {}", path.display(), e);
279                            skipped_count += 1;
280                        }
281                    }
282                }
283            }
284        }
285
286        println!(
287            "\nāœ… Built {} rules (skipped: {})",
288            compiled_count, skipped_count
289        );
290
291        if compiled_count == 0 {
292            anyhow::bail!("No rules built");
293        }
294
295        let rules = compiler.build();
296        Ok(rules)
297    }
298
299    /// Check a binary file using YARA rules
300    pub fn scan_file<P: AsRef<Path>>(&self, file_path: P) -> Result<Vec<String>> {
301        let path = file_path.as_ref();
302
303        if !path.exists() {
304            anyhow::bail!("File {} not found", path.display());
305        }
306
307        let file_content = fs::read(path).context(format!("Can't read file {}", path.display()))?;
308
309        let mut scanner = Scanner::new(&self.rules);
310        let scan_results = scanner.scan(&file_content).context("Error scanning file")?;
311
312        let matches: Vec<String> = scan_results
313            .matching_rules()
314            .map(|rule| rule.identifier().to_string())
315            .collect();
316
317        Ok(matches)
318    }
319
320    /// Scan a memory buffer
321    pub fn scan_buffer(&self, buffer: &[u8]) -> Result<Vec<String>> {
322        let mut scanner = Scanner::new(&self.rules);
323        let results = scanner.scan(buffer)?;
324
325        let matches: Vec<String> = results
326            .matching_rules()
327            .map(|rule| rule.identifier().to_string())
328            .collect();
329
330        Ok(matches)
331    }
332
333    /// Force rules rebuilding (invalidate cache) - uses default config
334    pub fn force_recompile() -> Result<()> {
335        let config = YaraConfig::default()?;
336        Self::force_recompile_internal(&config)
337    }
338
339    /// Force rules rebuilding (invalidate cache) with specific config (internal)
340    fn force_recompile_internal(config: &YaraConfig) -> Result<()> {
341        println!("šŸ—‘ļø  Deleting cache...");
342
343        fs::remove_file(&config.cache_file).ok();
344        fs::remove_file(&config.cache_version_file).ok();
345
346        println!("āœ… Cache deleted");
347        Ok(())
348    }
349}
350
351/// Update or download YARA rules based on source type
352async fn update_yara_rules_with_config(config: &YaraConfig) -> Result<bool> {
353    match config.source_type {
354        SourceType::Git => update_git_rules(config),
355        SourceType::Zip => update_zip_rules(config).await,
356    }
357}
358
359/// Update git repository
360fn update_git_rules(config: &YaraConfig) -> Result<bool> {
361    if config.rules_dir.exists() {
362        println!("šŸ“¦ Git repository exists, checking for updates...");
363
364        let repo = Repository::open(&config.rules_dir).context("Can't open local repository")?;
365
366        let mut remote = repo
367            .find_remote("origin")
368            .context("Remote 'origin' not found")?;
369
370        remote.fetch(&["main"], None, None).context("Fetch error")?;
371
372        let fetch_head = repo.refname_to_id("FETCH_HEAD")?;
373        let head = repo.head()?.target().unwrap();
374
375        if fetch_head == head {
376            println!("āœ… Repository already up to date");
377            return Ok(false);
378        }
379
380        let fetch_commit = repo.find_commit(fetch_head)?;
381        repo.reset(fetch_commit.as_object(), git2::ResetType::Hard, None)
382            .context("Update error")?;
383
384        println!("āœ… Repository updated");
385
386        // Invalidate cache after update
387        fs::remove_file(&config.cache_file).ok();
388        fs::remove_file(&config.cache_version_file).ok();
389
390        Ok(true)
391    } else {
392        println!("šŸ“„ Cloning git repository (this might take a while)...");
393
394        Repository::clone(&config.rules_source, &config.rules_dir).context("Cloning error")?;
395
396        println!(
397            "āœ… Repository cloned successfully to {}",
398            config.rules_dir.display()
399        );
400        Ok(true)
401    }
402}
403
404/// Download and extract zip rules
405async fn update_zip_rules(config: &YaraConfig) -> Result<bool> {
406    use sha2::{Digest, Sha256};
407    use zip::ZipArchive;
408
409    println!("šŸ“„ Downloading YARA rules from {}...", config.rules_source);
410
411    // Download to memory
412    let response = reqwest::get(&config.rules_source)
413        .await
414        .context(format!("Failed to download from {}", config.rules_source))?;
415
416    if !response.status().is_success() {
417        anyhow::bail!("Download failed with status: {}", response.status());
418    }
419
420    // Read response bytes
421    let zip_bytes = response
422        .bytes()
423        .await
424        .context("Failed to read response body")?;
425
426    // Calculate hash for version tracking
427    let mut hasher = Sha256::new();
428    hasher.update(&zip_bytes);
429    let new_version = format!("{:x}", hasher.finalize());
430
431    // Check if we already have this version
432    let version_marker = config.rules_dir.join(".version");
433    if version_marker.exists() {
434        let current_version = fs::read_to_string(&version_marker).ok();
435        if current_version.as_deref() == Some(new_version.as_str()) {
436            println!(
437                "āœ… Rules already up to date (version: {})",
438                &new_version[..8]
439            );
440            return Ok(false);
441        }
442    }
443
444    // Clean existing rules directory
445    if config.rules_dir.exists() {
446        fs::remove_dir_all(&config.rules_dir)
447            .context("Failed to clean existing rules directory")?;
448    }
449
450    // Create rules directory
451    fs::create_dir_all(&config.rules_dir).context("Failed to create rules directory")?;
452
453    // Extract zip
454    println!("šŸ“¦ Extracting rules...");
455    let cursor = std::io::Cursor::new(zip_bytes);
456    let mut archive = ZipArchive::new(cursor).context("Failed to read zip archive")?;
457
458    for i in 0..archive.len() {
459        let mut file = archive.by_index(i).context("Failed to read zip entry")?;
460
461        let outpath = match file.enclosed_name() {
462            Some(path) => config.rules_dir.join(path),
463            None => continue,
464        };
465
466        if file.name().ends_with('/') {
467            // Directory
468            fs::create_dir_all(&outpath).context("Failed to create directory")?;
469        } else {
470            // File
471            if let Some(parent) = outpath.parent() {
472                fs::create_dir_all(parent).context("Failed to create parent directory")?;
473            }
474
475            let mut outfile = fs::File::create(&outpath).context("Failed to create file")?;
476            std::io::copy(&mut file, &mut outfile).context("Failed to extract file")?;
477        }
478    }
479
480    // Write version marker
481    fs::write(&version_marker, &new_version).context("Failed to write version marker")?;
482
483    println!(
484        "āœ… Rules extracted successfully (version: {})",
485        &new_version[..8]
486    );
487
488    // Invalidate cache
489    fs::remove_file(&config.cache_file).ok();
490    fs::remove_file(&config.cache_version_file).ok();
491
492    Ok(true)
493}
494
495/// Update YARA-Rules with default configuration (backward compatibility).
496/// Returns Ok(true) if updated.
497pub async fn update_yara_rules() -> Result<bool> {
498    let config = YaraConfig::default()?;
499    update_yara_rules_with_config(&config).await
500}
501
502/// Entrypoint
503pub async fn scan_file<P: AsRef<Path>>(file_path: P) -> Result<Vec<String>> {
504    let scanner = CachedYaraScanner::new().await?;
505    scanner.scan_file(file_path)
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511    use crate::config::config_file::YaraSection;
512    use std::sync::Mutex;
513    use tempfile::tempdir;
514
515    static ENV_MUTEX: Mutex<()> = Mutex::new(());
516
517    #[test]
518    fn detects_source_type() {
519        assert_eq!(
520            SourceType::detect("https://example.com/rules.git"),
521            SourceType::Git
522        );
523        assert_eq!(
524            SourceType::detect("https://example.com/rules.zip"),
525            SourceType::Zip
526        );
527        assert_eq!(
528            SourceType::detect("https://example.com/rules"),
529            SourceType::Git
530        );
531    }
532
533    #[tokio::test]
534    async fn builds_rules_and_writes_cache_version() {
535        let _guard = ENV_MUTEX.lock().unwrap();
536
537        let temp_home = tempdir().expect("temp HOME");
538        let base_dir = temp_home.path().join(".jirust-cli");
539        let rules_dir = base_dir.join("rules");
540        fs::create_dir_all(&rules_dir).expect("create rules dir");
541
542        fs::write(rules_dir.join(".version"), "v1").expect("write version marker");
543        fs::write(
544            rules_dir.join("test_rule.yar"),
545            r#"
546rule CacheRule {
547  strings:
548    $a = "cache-hit"
549  condition:
550    $a
551}
552"#,
553        )
554        .expect("write yara rule");
555
556        let section = YaraSection::new(
557            "local_rules.zip".to_string(),
558            "rules".to_string(),
559            "yara_rules.cache".to_string(),
560            "yara_rules.cache.version".to_string(),
561        );
562
563        let scanner = CachedYaraScanner::from_config_with_base_dir(&section, base_dir.clone())
564            .await
565            .expect("scanner builds");
566        let matches = scanner
567            .scan_buffer(b"cache-hit")
568            .expect("scan buffer succeeds");
569
570        assert!(matches.contains(&"CacheRule".to_string()));
571        assert!(base_dir.join("yara_rules.cache").exists());
572        let version = fs::read_to_string(base_dir.join("yara_rules.cache.version"))
573            .expect("version cache exists");
574        assert_eq!(version, "v1");
575    }
576
577    #[tokio::test]
578    #[ignore]
579    async fn test_cached_scanner() {
580        let scanner = CachedYaraScanner::new().await.unwrap();
581
582        // test file creation
583        std::fs::write("/tmp/test_file.txt", b"Hello World").unwrap();
584
585        let result = scanner.scan_file("/tmp/test_file.txt");
586        assert!(result.is_ok());
587
588        std::fs::remove_file("/tmp/test_file.txt").ok();
589    }
590
591    #[tokio::test]
592    #[ignore]
593    async fn test_cache_persistence() {
594        // First scan - rules should be compiled and cached
595        let scanner1 = CachedYaraScanner::new().await.unwrap();
596        drop(scanner1);
597
598        // Second scan - rules should be loaded from cache
599        let start = std::time::Instant::now();
600        let scanner2 = CachedYaraScanner::new().await.unwrap();
601        let elapsed = start.elapsed();
602
603        // Cache load should be fast (< 2 seconds)
604        assert!(elapsed.as_secs() < 2);
605
606        drop(scanner2);
607    }
608}