1use std::collections::HashMap;
2use serde::{Deserialize, Serialize};
3
4use scribe_core::{ScribeError, Result as ScribeResult};
5use scribe_analysis::heuristics::ScanResult;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct QuotaScanResult {
10 pub path: String,
11 pub relative_path: String,
12 pub depth: usize,
13 pub content: String,
14 pub is_entrypoint: bool,
15 pub priority_boost: f64,
16 pub churn_score: f64,
17 pub centrality_in: f64,
18 pub imports: Option<Vec<String>>,
19 pub is_docs: bool,
20 pub is_readme: bool,
21 pub is_test: bool,
22 pub has_examples: bool,
23}
24
25impl ScanResult for QuotaScanResult {
26 fn path(&self) -> &str {
27 &self.path
28 }
29
30 fn relative_path(&self) -> &str {
31 &self.relative_path
32 }
33
34 fn depth(&self) -> usize {
35 self.depth
36 }
37
38 fn is_docs(&self) -> bool {
39 self.is_docs
40 }
41
42 fn is_readme(&self) -> bool {
43 self.is_readme
44 }
45
46 fn is_test(&self) -> bool {
47 self.is_test
48 }
49
50 fn is_entrypoint(&self) -> bool {
51 self.is_entrypoint
52 }
53
54 fn has_examples(&self) -> bool {
55 self.has_examples
56 }
57
58 fn priority_boost(&self) -> f64 {
59 self.priority_boost
60 }
61
62 fn churn_score(&self) -> f64 {
63 self.churn_score
64 }
65
66 fn centrality_in(&self) -> f64 {
67 self.centrality_in
68 }
69
70 fn imports(&self) -> Option<&[String]> {
71 self.imports.as_deref()
72 }
73
74 fn doc_analysis(&self) -> Option<&scribe_analysis::heuristics::DocumentAnalysis> {
75 None }
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
81pub enum FileCategory {
82 Config,
83 Entry,
84 Examples,
85 General,
86}
87
88impl FileCategory {
89 pub fn as_str(&self) -> &'static str {
90 match self {
91 FileCategory::Config => "config",
92 FileCategory::Entry => "entry",
93 FileCategory::Examples => "examples",
94 FileCategory::General => "general",
95 }
96 }
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct CategoryQuota {
102 pub category: FileCategory,
103 pub min_budget_pct: f64, pub max_budget_pct: f64, pub recall_target: f64, pub priority_multiplier: f64, }
108
109impl CategoryQuota {
110 pub fn new(
111 category: FileCategory,
112 min_budget_pct: f64,
113 max_budget_pct: f64,
114 recall_target: f64,
115 priority_multiplier: f64,
116 ) -> Self {
117 Self {
118 category,
119 min_budget_pct,
120 max_budget_pct,
121 recall_target,
122 priority_multiplier,
123 }
124 }
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct QuotaAllocation {
130 pub category: FileCategory,
131 pub allocated_budget: usize,
132 pub used_budget: usize,
133 pub file_count: usize,
134 pub recall_achieved: f64,
135 pub density_score: f64,
136}
137
138#[derive(Debug, Clone)]
140pub struct CategoryDetector {
141 config_patterns: Vec<&'static str>,
142 entry_patterns: Vec<&'static str>,
143 examples_patterns: Vec<&'static str>,
144}
145
146impl Default for CategoryDetector {
147 fn default() -> Self {
148 Self::new()
149 }
150}
151
152impl CategoryDetector {
153 pub fn new() -> Self {
154 Self {
155 config_patterns: vec![
157 ".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf",
159 "package.json", "requirements.txt", "pyproject.toml", "cargo.toml",
161 "setup.py", "setup.cfg", "makefile", "dockerfile", "docker-compose.yml",
162 ".github", ".gitlab-ci.yml", ".travis.yml", ".circleci",
164 ".vscode", ".idea", ".editorconfig", "tsconfig.json", "tslint.json",
166 "eslint.json", ".eslintrc", ".prettierrc", "jest.config.js"
167 ],
168
169 entry_patterns: vec![
171 "main.py", "__main__.py", "app.py", "server.py", "index.py",
172 "main.js", "index.js", "app.js", "server.js", "index.ts", "main.ts",
173 "main.go", "main.rs", "lib.rs", "mod.rs"
174 ],
175
176 examples_patterns: vec![
178 "example", "examples", "demo", "demos", "sample", "samples",
179 "tutorial", "tutorials", "test", "tests", "spec", "specs",
180 "benchmark", "benchmarks"
181 ],
182 }
183 }
184
185 pub fn detect_category(&self, scan_result: &QuotaScanResult) -> FileCategory {
187 let path = scan_result.path.to_lowercase();
188 let filename = scan_result.path
189 .split('/')
190 .last()
191 .unwrap_or("")
192 .to_lowercase();
193
194 if self.is_config_file(&path, &filename) {
196 return FileCategory::Config;
197 }
198
199 if self.is_entry_file(&path, &filename, scan_result) {
201 return FileCategory::Entry;
202 }
203
204 if self.is_examples_file(&path, &filename) {
206 return FileCategory::Examples;
207 }
208
209 FileCategory::General
210 }
211
212 fn is_config_file(&self, path: &str, filename: &str) -> bool {
213 for pattern in &self.config_patterns {
215 if filename.contains(pattern) || path.contains(pattern) {
216 return true;
217 }
218 }
219 false
220 }
221
222 fn is_entry_file(&self, path: &str, filename: &str, scan_result: &QuotaScanResult) -> bool {
223 if scan_result.is_entrypoint {
225 return true;
226 }
227
228 for pattern in &self.entry_patterns {
230 if filename == *pattern {
231 return true;
232 }
233 }
234
235 false
238 }
239
240 fn is_examples_file(&self, path: &str, filename: &str) -> bool {
241 for pattern in &self.examples_patterns {
243 if path.contains(pattern) || filename.contains(pattern) {
244 return true;
245 }
246 }
247 false
248 }
249}
250
251#[derive(Debug, Clone)]
253pub struct QuotaManager {
254 pub total_budget: usize,
255 pub detector: CategoryDetector,
256 pub category_quotas: HashMap<FileCategory, CategoryQuota>,
257}
258
259impl QuotaManager {
260 pub fn new(total_budget: usize) -> Self {
261 let mut category_quotas = HashMap::new();
262
263 category_quotas.insert(
265 FileCategory::Config,
266 CategoryQuota::new(
267 FileCategory::Config,
268 15.0, 30.0, 0.95, 2.0, )
273 );
274
275 category_quotas.insert(
276 FileCategory::Entry,
277 CategoryQuota::new(
278 FileCategory::Entry,
279 2.0, 7.0, 0.90, 1.8, )
284 );
285
286 category_quotas.insert(
287 FileCategory::Examples,
288 CategoryQuota::new(
289 FileCategory::Examples,
290 1.0, 3.0, 0.0, 0.5, )
295 );
296
297 category_quotas.insert(
298 FileCategory::General,
299 CategoryQuota::new(
300 FileCategory::General,
301 60.0, 82.0, 0.0, 1.0, )
306 );
307
308 Self {
309 total_budget,
310 detector: CategoryDetector::new(),
311 category_quotas,
312 }
313 }
314
315 pub fn classify_files(&self, scan_results: &[QuotaScanResult]) -> HashMap<FileCategory, Vec<QuotaScanResult>> {
317 let mut categorized = HashMap::new();
318
319 for result in scan_results {
320 let category = self.detector.detect_category(result);
321 categorized.entry(category)
322 .or_insert_with(Vec::new)
323 .push(result.clone());
324 }
325
326 categorized
327 }
328
329 pub fn calculate_density_score(&self, scan_result: &QuotaScanResult, heuristic_score: f64) -> f64 {
332 let estimated_tokens = self.estimate_tokens(scan_result);
334
335 let estimated_tokens = if estimated_tokens == 0 { 1 } else { estimated_tokens };
337
338 let mut density = heuristic_score / estimated_tokens as f64;
339
340 let category = self.detector.detect_category(scan_result);
342 if let Some(quota) = self.category_quotas.get(&category) {
343 density *= quota.priority_multiplier;
344 }
345
346 density
347 }
348
349 fn estimate_tokens(&self, scan_result: &QuotaScanResult) -> usize {
351 (scan_result.content.len() / 3).max(1)
354 }
355
356 pub fn select_files_density_greedy(
358 &self,
359 categorized_files: &HashMap<FileCategory, Vec<QuotaScanResult>>,
360 heuristic_scores: &HashMap<String, f64>,
361 adaptation_factor: f64,
362 ) -> ScribeResult<(Vec<QuotaScanResult>, HashMap<FileCategory, QuotaAllocation>)> {
363 let mut selected_files = Vec::new();
364 let mut allocations = HashMap::new();
365
366 let effective_budget = if adaptation_factor > 0.4 {
368 (self.total_budget as f64 * (1.0 - adaptation_factor * 0.3)) as usize
370 } else {
371 self.total_budget
372 };
373
374 let mut remaining_budget = effective_budget;
375
376 let mut min_allocations = HashMap::new();
378 for (category, quota) in &self.category_quotas {
379 if !categorized_files.contains_key(category) {
380 continue;
381 }
382
383 let min_budget = (effective_budget as f64 * quota.min_budget_pct / 100.0) as usize;
384 min_allocations.insert(*category, min_budget);
385 remaining_budget = remaining_budget.saturating_sub(min_budget);
386 }
387
388 let additional_allocations = self.distribute_remaining_budget(
390 categorized_files,
391 heuristic_scores,
392 remaining_budget
393 )?;
394
395 for (category, files) in categorized_files {
397 if !self.category_quotas.contains_key(category) {
398 continue;
399 }
400
401 let quota = &self.category_quotas[category];
402 let allocated_budget = min_allocations.get(category).unwrap_or(&0)
403 + additional_allocations.get(category).unwrap_or(&0);
404
405 let (selected, allocation) = self.select_category_files(
407 *category,
408 files,
409 allocated_budget,
410 quota,
411 heuristic_scores,
412 )?;
413
414 selected_files.extend(selected);
415 allocations.insert(*category, allocation);
416 }
417
418 Ok((selected_files, allocations))
419 }
420
421 fn distribute_remaining_budget(
423 &self,
424 categorized_files: &HashMap<FileCategory, Vec<QuotaScanResult>>,
425 heuristic_scores: &HashMap<String, f64>,
426 remaining_budget: usize,
427 ) -> ScribeResult<HashMap<FileCategory, usize>> {
428 let mut additional_allocations = HashMap::new();
429
430 let mut category_demands = HashMap::new();
432 for (category, files) in categorized_files {
433 if !self.category_quotas.contains_key(category) {
434 continue;
435 }
436
437 let quota = &self.category_quotas[category];
438
439 let mut total_density = 0.0;
441 for file_result in files {
442 let heuristic_score = heuristic_scores.get(&file_result.path).unwrap_or(&0.0);
443 let density = self.calculate_density_score(file_result, *heuristic_score);
444 total_density += density;
445 }
446
447 let demand_score = total_density * quota.priority_multiplier * (files.len() as f64 + 1.0).ln();
449 category_demands.insert(*category, demand_score);
450 }
451
452 let total_demand: f64 = category_demands.values().sum();
454 if total_demand > 0.0 {
455 for (category, demand) in &category_demands {
456 let proportion = demand / total_demand;
457 let additional_budget = (remaining_budget as f64 * proportion) as usize;
458
459 let quota = &self.category_quotas[category];
461 let max_budget = (self.total_budget as f64 * quota.max_budget_pct / 100.0) as usize;
462 let min_budget = (self.total_budget as f64 * quota.min_budget_pct / 100.0) as usize;
463
464 let current_allocation = min_budget + additional_budget;
466 let final_additional = if current_allocation > max_budget {
467 max_budget.saturating_sub(min_budget)
468 } else {
469 additional_budget
470 };
471
472 additional_allocations.insert(*category, final_additional);
473 }
474 }
475
476 Ok(additional_allocations)
477 }
478
479 fn select_category_files(
481 &self,
482 category: FileCategory,
483 files: &[QuotaScanResult],
484 allocated_budget: usize,
485 quota: &CategoryQuota,
486 heuristic_scores: &HashMap<String, f64>,
487 ) -> ScribeResult<(Vec<QuotaScanResult>, QuotaAllocation)> {
488 let mut file_densities = Vec::new();
490 for file_result in files {
491 let heuristic_score = heuristic_scores.get(&file_result.path).unwrap_or(&0.0);
492 let density = self.calculate_density_score(file_result, *heuristic_score);
493 let estimated_tokens = self.estimate_tokens(file_result);
494 file_densities.push((file_result.clone(), density, *heuristic_score, estimated_tokens));
495 }
496
497 file_densities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
499
500 let mut selected = Vec::new();
502 let mut used_budget = 0;
503 let mut total_importance = 0.0;
504
505 for (file_result, density, importance, tokens) in &file_densities {
506 if used_budget + tokens <= allocated_budget {
507 selected.push(file_result.clone());
508 used_budget += tokens;
509 total_importance += importance;
510 } else if quota.recall_target > 0.0 {
511 let importance_threshold = self.calculate_importance_threshold(
514 &file_densities.iter().map(|(_, _, imp, _)| *imp).collect::<Vec<_>>(),
515 quota.recall_target
516 )?;
517 if *importance >= importance_threshold && used_budget + tokens <= (allocated_budget as f64 * 1.05) as usize {
518 selected.push(file_result.clone());
519 used_budget += tokens;
520 total_importance += importance;
521 }
522 }
523 }
524
525 let achieved_recall = if quota.recall_target > 0.0 && !files.is_empty() {
527 let importance_scores: Vec<f64> = files.iter()
529 .map(|f| heuristic_scores.get(&f.path).unwrap_or(&0.0))
530 .cloned()
531 .collect();
532 let importance_threshold = self.calculate_importance_threshold(&importance_scores, quota.recall_target)?;
533
534 let high_importance_files: Vec<_> = files.iter()
535 .filter(|f| heuristic_scores.get(&f.path).unwrap_or(&0.0) >= &importance_threshold)
536 .collect();
537
538 let selected_high_importance: Vec<_> = selected.iter()
539 .filter(|f| heuristic_scores.get(&f.path).unwrap_or(&0.0) >= &importance_threshold)
540 .collect();
541
542 selected_high_importance.len() as f64 / high_importance_files.len().max(1) as f64
543 } else {
544 selected.len() as f64 / files.len().max(1) as f64 };
546
547 let density_score = if used_budget > 0 {
549 total_importance / used_budget as f64
550 } else {
551 0.0
552 };
553
554 let allocation = QuotaAllocation {
555 category,
556 allocated_budget,
557 used_budget,
558 file_count: selected.len(),
559 recall_achieved: achieved_recall,
560 density_score,
561 };
562
563 Ok((selected, allocation))
564 }
565
566 fn calculate_importance_threshold(&self, importance_scores: &[f64], recall_target: f64) -> ScribeResult<f64> {
568 if importance_scores.is_empty() {
569 return Ok(0.0);
570 }
571
572 let mut sorted_scores = importance_scores.to_vec();
574 sorted_scores.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
575
576 let target_count = (sorted_scores.len() as f64 * recall_target) as usize;
578 let target_count = target_count.max(1).min(sorted_scores.len());
579
580 let threshold_index = target_count - 1;
581 Ok(sorted_scores[threshold_index])
582 }
583
584 pub fn apply_quotas_selection(
586 &self,
587 scan_results: &[QuotaScanResult],
588 heuristic_scores: &HashMap<String, f64>,
589 ) -> ScribeResult<(Vec<QuotaScanResult>, HashMap<FileCategory, QuotaAllocation>)> {
590 let categorized_files = self.classify_files(scan_results);
592 self.select_files_density_greedy(&categorized_files, heuristic_scores, 0.0)
593 }
594}
595
596pub fn create_quota_manager(total_budget: usize) -> QuotaManager {
598 QuotaManager::new(total_budget)
599}