scribe_selection/
quota.rs

1use rayon::prelude::*;
2use regex::RegexSet;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6use scribe_analysis::heuristics::ScanResult;
7use scribe_core::{Result as ScribeResult, ScribeError};
8
9/// Simple ScanResult implementation for quota system
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct QuotaScanResult {
12    pub path: String,
13    pub relative_path: String,
14    pub depth: usize,
15    pub content: String,
16    pub is_entrypoint: bool,
17    pub priority_boost: f64,
18    pub churn_score: f64,
19    pub centrality_in: f64,
20    pub imports: Option<Vec<String>>,
21    pub is_docs: bool,
22    pub is_readme: bool,
23    pub is_test: bool,
24    pub has_examples: bool,
25}
26
27impl ScanResult for QuotaScanResult {
28    fn path(&self) -> &str {
29        &self.path
30    }
31
32    fn relative_path(&self) -> &str {
33        &self.relative_path
34    }
35
36    fn depth(&self) -> usize {
37        self.depth
38    }
39
40    fn is_docs(&self) -> bool {
41        self.is_docs
42    }
43
44    fn is_readme(&self) -> bool {
45        self.is_readme
46    }
47
48    fn is_test(&self) -> bool {
49        self.is_test
50    }
51
52    fn is_entrypoint(&self) -> bool {
53        self.is_entrypoint
54    }
55
56    fn has_examples(&self) -> bool {
57        self.has_examples
58    }
59
60    fn priority_boost(&self) -> f64 {
61        self.priority_boost
62    }
63
64    fn churn_score(&self) -> f64 {
65        self.churn_score
66    }
67
68    fn centrality_in(&self) -> f64 {
69        self.centrality_in
70    }
71
72    fn imports(&self) -> Option<&[String]> {
73        self.imports.as_deref()
74    }
75
76    fn doc_analysis(&self) -> Option<&scribe_analysis::heuristics::DocumentAnalysis> {
77        None // Simplified for now
78    }
79}
80
81/// File category classification for quota allocation
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
83pub enum FileCategory {
84    Config,
85    Entry,
86    Examples,
87    General,
88}
89
90impl FileCategory {
91    pub fn as_str(&self) -> &'static str {
92        match self {
93            FileCategory::Config => "config",
94            FileCategory::Entry => "entry",
95            FileCategory::Examples => "examples",
96            FileCategory::General => "general",
97        }
98    }
99}
100
101/// Budget quota configuration for a file category
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct CategoryQuota {
104    pub category: FileCategory,
105    pub min_budget_pct: f64,      // Minimum budget percentage reserved
106    pub max_budget_pct: f64,      // Maximum budget percentage allowed
107    pub recall_target: f64,       // Recall target (0.0-1.0, 0 means no target)
108    pub priority_multiplier: f64, // Priority boost for this category
109}
110
111impl CategoryQuota {
112    pub fn new(
113        category: FileCategory,
114        min_budget_pct: f64,
115        max_budget_pct: f64,
116        recall_target: f64,
117        priority_multiplier: f64,
118    ) -> Self {
119        Self {
120            category,
121            min_budget_pct,
122            max_budget_pct,
123            recall_target,
124            priority_multiplier,
125        }
126    }
127}
128
129/// Actual budget allocation result for a category
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct QuotaAllocation {
132    pub category: FileCategory,
133    pub allocated_budget: usize,
134    pub used_budget: usize,
135    pub file_count: usize,
136    pub recall_achieved: f64,
137    pub density_score: f64,
138}
139
140/// Detects file categories for quota allocation
141#[derive(Debug)]
142pub struct CategoryDetector {
143    config_regex_set: RegexSet,
144    entry_regex_set: RegexSet,
145    examples_regex_set: RegexSet,
146}
147
148impl Default for CategoryDetector {
149    fn default() -> Self {
150        Self::new().expect("Failed to create CategoryDetector")
151    }
152}
153
154impl CategoryDetector {
155    pub fn new() -> Result<Self, regex::Error> {
156        // Config file patterns - escape regex special characters and convert to regex patterns
157        let config_patterns = vec![
158            // Configuration files (as regex patterns)
159            r"\.json$",
160            r"\.yaml$",
161            r"\.yml$",
162            r"\.toml$",
163            r"\.ini$",
164            r"\.cfg$",
165            r"\.conf$",
166            // Build and dependency files
167            r"package\.json$",
168            r"requirements\.txt$",
169            r"pyproject\.toml$",
170            r"cargo\.toml$",
171            r"setup\.py$",
172            r"setup\.cfg$",
173            r"makefile$",
174            r"dockerfile$",
175            r"docker-compose\.yml$",
176            // CI/CD configuration
177            r"\.github",
178            r"\.gitlab-ci\.yml$",
179            r"\.travis\.yml$",
180            r"\.circleci",
181            // IDE and tool configuration
182            r"\.vscode",
183            r"\.idea",
184            r"\.editorconfig$",
185            r"tsconfig\.json$",
186            r"tslint\.json$",
187            r"eslint\.json$",
188            r"\.eslintrc",
189            r"\.prettierrc",
190            r"jest\.config\.js$",
191        ];
192
193        // Entry point patterns (exact filename matches)
194        let entry_patterns = vec![
195            r"main\.py$",
196            r"__main__\.py$",
197            r"app\.py$",
198            r"server\.py$",
199            r"index\.py$",
200            r"main\.js$",
201            r"index\.js$",
202            r"app\.js$",
203            r"server\.js$",
204            r"index\.ts$",
205            r"main\.ts$",
206            r"main\.go$",
207            r"main\.rs$",
208            r"lib\.rs$",
209            r"mod\.rs$",
210        ];
211
212        // Example/demo patterns (directory or filename contains)
213        let examples_patterns = vec![
214            r"example",
215            r"examples",
216            r"demo",
217            r"demos",
218            r"sample",
219            r"samples",
220            r"tutorial",
221            r"tutorials",
222            r"test",
223            r"tests",
224            r"spec",
225            r"specs",
226            r"benchmark",
227            r"benchmarks",
228        ];
229
230        Ok(Self {
231            config_regex_set: RegexSet::new(&config_patterns)?,
232            entry_regex_set: RegexSet::new(&entry_patterns)?,
233            examples_regex_set: RegexSet::new(&examples_patterns)?,
234        })
235    }
236
237    /// Detect the category of a file based on its scan result
238    pub fn detect_category(&self, scan_result: &QuotaScanResult) -> FileCategory {
239        let path = scan_result.path.to_lowercase();
240        let filename = scan_result
241            .path
242            .split('/')
243            .last()
244            .unwrap_or("")
245            .to_lowercase();
246
247        // Check for config files using RegexSet
248        if self.config_regex_set.is_match(&path) || self.config_regex_set.is_match(&filename) {
249            return FileCategory::Config;
250        }
251
252        // Check for entry points
253        if scan_result.is_entrypoint || self.entry_regex_set.is_match(&filename) {
254            return FileCategory::Entry;
255        }
256
257        // Check for examples using RegexSet
258        if self.examples_regex_set.is_match(&path) || self.examples_regex_set.is_match(&filename) {
259            return FileCategory::Examples;
260        }
261
262        FileCategory::General
263    }
264}
265
266/// Manages budget quotas and density-greedy selection
267#[derive(Debug)]
268pub struct QuotaManager {
269    pub total_budget: usize,
270    pub detector: CategoryDetector,
271    pub category_quotas: HashMap<FileCategory, CategoryQuota>,
272}
273
274impl QuotaManager {
275    pub fn new(total_budget: usize) -> ScribeResult<Self> {
276        let mut category_quotas = HashMap::new();
277
278        // Default quota configuration (research-optimized)
279        category_quotas.insert(
280            FileCategory::Config,
281            CategoryQuota::new(
282                FileCategory::Config,
283                15.0, // Reserve at least 15% for config
284                30.0, // Cap at 30% to avoid over-allocation
285                0.95, // 95% recall target for config files
286                2.0,  // High priority for config files
287            ),
288        );
289
290        category_quotas.insert(
291            FileCategory::Entry,
292            CategoryQuota::new(
293                FileCategory::Entry,
294                2.0,  // Minimum for entry points
295                7.0,  // Max 7% for entry points
296                0.90, // High recall for entry points
297                1.8,  // High priority
298            ),
299        );
300
301        category_quotas.insert(
302            FileCategory::Examples,
303            CategoryQuota::new(
304                FileCategory::Examples,
305                1.0, // Small allocation for examples
306                3.0, // Max 3% for examples
307                0.0, // No recall target for examples
308                0.5, // Lower priority
309            ),
310        );
311
312        category_quotas.insert(
313            FileCategory::General,
314            CategoryQuota::new(
315                FileCategory::General,
316                60.0, // Most budget goes to general files
317                82.0, // Leave room for other categories
318                0.0,  // No specific recall target
319                1.0,  // Standard priority
320            ),
321        );
322
323        Ok(Self {
324            total_budget,
325            detector: CategoryDetector::new().map_err(|e| {
326                ScribeError::parse(format!("Failed to create category detector: {}", e))
327            })?,
328            category_quotas,
329        })
330    }
331
332    /// Classify files into categories using references to avoid expensive cloning
333    pub fn classify_files<'a>(
334        &self,
335        scan_results: &'a [QuotaScanResult],
336    ) -> HashMap<FileCategory, Vec<&'a QuotaScanResult>> {
337        let mut categorized = HashMap::new();
338
339        for result in scan_results {
340            let category = self.detector.detect_category(result);
341            categorized
342                .entry(category)
343                .or_insert_with(Vec::new)
344                .push(result);
345        }
346
347        categorized
348    }
349
350    /// Calculate density score (importance per token)
351    /// Density = importance_score / token_cost * priority_multiplier
352    pub fn calculate_density_score(
353        &self,
354        scan_result: &QuotaScanResult,
355        heuristic_score: f64,
356    ) -> f64 {
357        // Estimate token cost - simple heuristic for now
358        let estimated_tokens = self.estimate_tokens(scan_result);
359
360        // Avoid division by zero
361        let estimated_tokens = if estimated_tokens == 0 {
362            1
363        } else {
364            estimated_tokens
365        };
366
367        let mut density = heuristic_score / estimated_tokens as f64;
368
369        // Apply category priority multiplier
370        let category = self.detector.detect_category(scan_result);
371        if let Some(quota) = self.category_quotas.get(&category) {
372            density *= quota.priority_multiplier;
373        }
374
375        density
376    }
377
378    /// Simple token estimation based on file size
379    fn estimate_tokens(&self, scan_result: &QuotaScanResult) -> usize {
380        // Rough approximation: 1 token per 3-4 characters for code
381        // More sophisticated estimation would use actual tokenizer
382        (scan_result.content.len() / 3).max(1)
383    }
384
385    /// Apply density-greedy selection algorithm with quotas
386    pub fn select_files_density_greedy(
387        &self,
388        categorized_files: &HashMap<FileCategory, Vec<&QuotaScanResult>>,
389        heuristic_scores: &HashMap<String, f64>,
390        adaptation_factor: f64,
391    ) -> ScribeResult<(Vec<QuotaScanResult>, HashMap<FileCategory, QuotaAllocation>)> {
392        let mut selected_files = Vec::new();
393        let mut allocations = HashMap::new();
394
395        // Adapt total budget under pressure
396        let effective_budget = if adaptation_factor > 0.4 {
397            // Reduce effective budget to force faster selection
398            (self.total_budget as f64 * (1.0 - adaptation_factor * 0.3)) as usize
399        } else {
400            self.total_budget
401        };
402
403        let mut remaining_budget = effective_budget;
404
405        // Phase 1: Allocate minimum budgets
406        let mut min_allocations = HashMap::new();
407        for (category, quota) in &self.category_quotas {
408            if !categorized_files.contains_key(category) {
409                continue;
410            }
411
412            let min_budget = (effective_budget as f64 * quota.min_budget_pct / 100.0) as usize;
413            min_allocations.insert(*category, min_budget);
414            remaining_budget = remaining_budget.saturating_sub(min_budget);
415        }
416
417        // Phase 2: Distribute remaining budget based on demand and priority
418        let additional_allocations = self.distribute_remaining_budget(
419            categorized_files,
420            heuristic_scores,
421            remaining_budget,
422        )?;
423
424        // Phase 3: Select files within each category using density-greedy
425        for (category, files) in categorized_files {
426            if !self.category_quotas.contains_key(category) {
427                continue;
428            }
429
430            let quota = &self.category_quotas[category];
431            let allocated_budget = min_allocations.get(category).unwrap_or(&0)
432                + additional_allocations.get(category).unwrap_or(&0);
433
434            // Select files for this category
435            let (selected, allocation) = self.select_category_files(
436                *category,
437                files,
438                allocated_budget,
439                quota,
440                heuristic_scores,
441            )?;
442
443            selected_files.extend(selected);
444            allocations.insert(*category, allocation);
445        }
446
447        Ok((selected_files, allocations))
448    }
449
450    /// Distribute remaining budget based on category demands and priorities
451    fn distribute_remaining_budget(
452        &self,
453        categorized_files: &HashMap<FileCategory, Vec<&QuotaScanResult>>,
454        heuristic_scores: &HashMap<String, f64>,
455        remaining_budget: usize,
456    ) -> ScribeResult<HashMap<FileCategory, usize>> {
457        let mut additional_allocations = HashMap::new();
458
459        // Calculate demand scores for each category
460        let mut category_demands = HashMap::new();
461        for (category, files) in categorized_files {
462            if !self.category_quotas.contains_key(category) {
463                continue;
464            }
465
466            let quota = &self.category_quotas[category];
467
468            // Calculate total value density for this category
469            let mut total_density = 0.0;
470            for file_result in files {
471                let heuristic_score = heuristic_scores.get(&file_result.path).unwrap_or(&0.0);
472                let density = self.calculate_density_score(file_result, *heuristic_score);
473                total_density += density;
474            }
475
476            // Weight by priority multiplier and file count
477            let demand_score =
478                total_density * quota.priority_multiplier * (files.len() as f64 + 1.0).ln();
479            category_demands.insert(*category, demand_score);
480        }
481
482        // Distribute remaining budget proportionally to demand
483        let total_demand: f64 = category_demands.values().sum();
484        if total_demand > 0.0 {
485            for (category, demand) in &category_demands {
486                let proportion = demand / total_demand;
487                let additional_budget = (remaining_budget as f64 * proportion) as usize;
488
489                // Respect maximum budget constraints
490                let quota = &self.category_quotas[category];
491                let max_budget = (self.total_budget as f64 * quota.max_budget_pct / 100.0) as usize;
492                let min_budget = (self.total_budget as f64 * quota.min_budget_pct / 100.0) as usize;
493
494                // Don't exceed maximum allocation
495                let current_allocation = min_budget + additional_budget;
496                let final_additional = if current_allocation > max_budget {
497                    max_budget.saturating_sub(min_budget)
498                } else {
499                    additional_budget
500                };
501
502                additional_allocations.insert(*category, final_additional);
503            }
504        }
505
506        Ok(additional_allocations)
507    }
508
509    /// Select files within a category using density-greedy algorithm
510    fn select_category_files(
511        &self,
512        category: FileCategory,
513        files: &[&QuotaScanResult],
514        allocated_budget: usize,
515        quota: &CategoryQuota,
516        heuristic_scores: &HashMap<String, f64>,
517    ) -> ScribeResult<(Vec<QuotaScanResult>, QuotaAllocation)> {
518        // Calculate density scores for all files in category using parallel processing
519        let mut file_densities: Vec<_> = files
520            .par_iter()
521            .map(|file_result| {
522                let heuristic_score = heuristic_scores.get(&file_result.path).unwrap_or(&0.0);
523                let density = self.calculate_density_score(file_result, *heuristic_score);
524                let estimated_tokens = self.estimate_tokens(file_result);
525                (*file_result, density, *heuristic_score, estimated_tokens)
526            })
527            .collect();
528
529        // Sort by density (descending)
530        file_densities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
531
532        // Greedy selection within budget
533        let mut selected = Vec::new();
534        let mut used_budget = 0;
535        let mut total_importance = 0.0;
536
537        for (file_result, density, importance, tokens) in &file_densities {
538            if used_budget + tokens <= allocated_budget {
539                selected.push((*file_result).clone());
540                used_budget += tokens;
541                total_importance += importance;
542            } else if quota.recall_target > 0.0 {
543                // For categories with recall targets, try to fit more critical files
544                // even if it means going slightly over budget
545                let importance_threshold = self.calculate_importance_threshold(
546                    &file_densities
547                        .iter()
548                        .map(|(_, _, imp, _)| *imp)
549                        .collect::<Vec<_>>(),
550                    quota.recall_target,
551                )?;
552                if *importance >= importance_threshold
553                    && used_budget + tokens <= (allocated_budget as f64 * 1.05) as usize
554                {
555                    selected.push((*file_result).clone());
556                    used_budget += tokens;
557                    total_importance += importance;
558                }
559            }
560        }
561
562        // Calculate achieved recall
563        let achieved_recall = if quota.recall_target > 0.0 && !files.is_empty() {
564            // Recall = selected high-importance files / total high-importance files
565            let importance_scores: Vec<f64> = files
566                .iter()
567                .map(|f| heuristic_scores.get(&f.path).unwrap_or(&0.0))
568                .cloned()
569                .collect();
570            let importance_threshold =
571                self.calculate_importance_threshold(&importance_scores, quota.recall_target)?;
572
573            let high_importance_files: Vec<_> = files
574                .iter()
575                .filter(|f| heuristic_scores.get(&f.path).unwrap_or(&0.0) >= &importance_threshold)
576                .collect();
577
578            let selected_high_importance: Vec<_> = selected
579                .iter()
580                .filter(|f| heuristic_scores.get(&f.path).unwrap_or(&0.0) >= &importance_threshold)
581                .collect();
582
583            selected_high_importance.len() as f64 / high_importance_files.len().max(1) as f64
584        } else {
585            selected.len() as f64 / files.len().max(1) as f64 // Selection ratio
586        };
587
588        // Calculate density score for selected set
589        let density_score = if used_budget > 0 {
590            total_importance / used_budget as f64
591        } else {
592            0.0
593        };
594
595        let allocation = QuotaAllocation {
596            category,
597            allocated_budget,
598            used_budget,
599            file_count: selected.len(),
600            recall_achieved: achieved_recall,
601            density_score,
602        };
603
604        Ok((selected, allocation))
605    }
606
607    /// Calculate importance threshold for achieving target recall
608    fn calculate_importance_threshold(
609        &self,
610        importance_scores: &[f64],
611        recall_target: f64,
612    ) -> ScribeResult<f64> {
613        if importance_scores.is_empty() {
614            return Ok(0.0);
615        }
616
617        // Sort scores in descending order
618        let mut sorted_scores = importance_scores.to_vec();
619        sorted_scores.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
620
621        // Find threshold that captures top recall_target fraction
622        let target_count = (sorted_scores.len() as f64 * recall_target) as usize;
623        let target_count = target_count.max(1).min(sorted_scores.len());
624
625        let threshold_index = target_count - 1;
626        Ok(sorted_scores[threshold_index])
627    }
628
629    /// Main entry point for quotas-based selection
630    pub fn apply_quotas_selection(
631        &self,
632        scan_results: &[QuotaScanResult],
633        heuristic_scores: &HashMap<String, f64>,
634    ) -> ScribeResult<(Vec<QuotaScanResult>, HashMap<FileCategory, QuotaAllocation>)> {
635        // Apply quotas-based selection
636        let categorized_files = self.classify_files(scan_results);
637        self.select_files_density_greedy(&categorized_files, heuristic_scores, 0.0)
638    }
639}
640
641/// Create a QuotaManager instance
642pub fn create_quota_manager(total_budget: usize) -> ScribeResult<QuotaManager> {
643    QuotaManager::new(total_budget)
644}
645
646#[cfg(test)]
647mod tests {
648    use super::*;
649
650    #[test]
651    fn test_category_detection_with_regex_set() {
652        let detector = CategoryDetector::new().expect("Failed to create CategoryDetector");
653
654        // Test config file detection
655        let config_file = QuotaScanResult {
656            path: "package.json".to_string(),
657            relative_path: "package.json".to_string(),
658            depth: 0,
659            content: "{}".to_string(),
660            is_entrypoint: false,
661            priority_boost: 0.0,
662            churn_score: 0.0,
663            centrality_in: 0.0,
664            imports: None,
665            is_docs: false,
666            is_readme: false,
667            is_test: false,
668            has_examples: false,
669        };
670        assert_eq!(detector.detect_category(&config_file), FileCategory::Config);
671
672        // Test entry point detection
673        let entry_file = QuotaScanResult {
674            path: "src/main.rs".to_string(),
675            relative_path: "src/main.rs".to_string(),
676            depth: 1,
677            content: "fn main() {}".to_string(),
678            is_entrypoint: false,
679            priority_boost: 0.0,
680            churn_score: 0.0,
681            centrality_in: 0.0,
682            imports: None,
683            is_docs: false,
684            is_readme: false,
685            is_test: false,
686            has_examples: false,
687        };
688        assert_eq!(detector.detect_category(&entry_file), FileCategory::Entry);
689
690        // Test examples detection
691        let examples_file = QuotaScanResult {
692            path: "examples/demo.rs".to_string(),
693            relative_path: "examples/demo.rs".to_string(),
694            depth: 1,
695            content: "// demo".to_string(),
696            is_entrypoint: false,
697            priority_boost: 0.0,
698            churn_score: 0.0,
699            centrality_in: 0.0,
700            imports: None,
701            is_docs: false,
702            is_readme: false,
703            is_test: false,
704            has_examples: false,
705        };
706        assert_eq!(
707            detector.detect_category(&examples_file),
708            FileCategory::Examples
709        );
710
711        // Test general file detection (should be Entry since lib.rs matches entry pattern)
712        let entry_lib_file = QuotaScanResult {
713            path: "src/lib.rs".to_string(),
714            relative_path: "src/lib.rs".to_string(),
715            depth: 1,
716            content: "pub mod utils;".to_string(),
717            is_entrypoint: false,
718            priority_boost: 0.0,
719            churn_score: 0.0,
720            centrality_in: 0.0,
721            imports: None,
722            is_docs: false,
723            is_readme: false,
724            is_test: false,
725            has_examples: false,
726        };
727        assert_eq!(
728            detector.detect_category(&entry_lib_file),
729            FileCategory::Entry
730        );
731
732        // Test actual general file detection
733        let general_file = QuotaScanResult {
734            path: "src/utils.rs".to_string(),
735            relative_path: "src/utils.rs".to_string(),
736            depth: 1,
737            content: "pub fn helper() {}".to_string(),
738            is_entrypoint: false,
739            priority_boost: 0.0,
740            churn_score: 0.0,
741            centrality_in: 0.0,
742            imports: None,
743            is_docs: false,
744            is_readme: false,
745            is_test: false,
746            has_examples: false,
747        };
748        assert_eq!(
749            detector.detect_category(&general_file),
750            FileCategory::General
751        );
752    }
753
754    #[test]
755    fn test_quota_manager_creation() {
756        let manager = QuotaManager::new(1000).expect("Failed to create QuotaManager");
757        assert_eq!(manager.total_budget, 1000);
758        assert_eq!(manager.category_quotas.len(), 4);
759    }
760
761    #[test]
762    fn test_regex_patterns_directly() {
763        use regex::RegexSet;
764
765        let entry_patterns = vec![
766            r"main\.py$",
767            r"__main__\.py$",
768            r"app\.py$",
769            r"server\.py$",
770            r"index\.py$",
771            r"main\.js$",
772            r"index\.js$",
773            r"app\.js$",
774            r"server\.js$",
775            r"index\.ts$",
776            r"main\.ts$",
777            r"main\.go$",
778            r"main\.rs$",
779            r"lib\.rs$",
780            r"mod\.rs$",
781        ];
782
783        let regex_set = RegexSet::new(&entry_patterns).unwrap();
784
785        // Test that lib.rs matches
786        assert!(
787            regex_set.is_match("lib.rs"),
788            "lib.rs should match entry patterns"
789        );
790        assert!(
791            regex_set.is_match("main.rs"),
792            "main.rs should match entry patterns"
793        );
794
795        // Test filename extraction
796        let path = "src/lib.rs";
797        let filename = path.split('/').last().unwrap_or("").to_lowercase();
798        assert_eq!(filename, "lib.rs");
799        assert!(
800            regex_set.is_match(&filename),
801            "Extracted filename should match"
802        );
803    }
804}