use crate::model::CodebaseIndex;
use crate::model::declarations::{Declaration, Visibility};
pub fn estimate_tokens(text: &str) -> usize {
text.len().div_ceil(4)
}
fn estimate_index_tokens(index: &CodebaseIndex) -> usize {
let mut tokens = 0usize;
tokens += 20 + estimate_tokens(&index.root_name) + 30;
tokens += 10;
for entry in &index.tree {
tokens += estimate_tokens(&entry.path) + entry.depth + 1;
}
tokens += 15;
for file in &index.files {
let path_str = file.path.to_string_lossy();
tokens += 15 + estimate_tokens(&path_str);
for import in &file.imports {
tokens += 3 + estimate_tokens(&import.text);
}
tokens += estimate_declarations_tokens(&file.declarations);
}
tokens
}
fn estimate_declarations_tokens(decls: &[Declaration]) -> usize {
let mut tokens = 0usize;
for decl in decls {
tokens += 3 + estimate_tokens(&decl.signature);
if let Some(ref doc) = decl.doc_comment {
tokens += 2 + estimate_tokens(doc);
}
if decl.line > 0 {
tokens += 5;
}
for rel in &decl.relationships {
tokens += 3 + estimate_tokens(&rel.target);
}
tokens += estimate_declarations_tokens(&decl.children);
}
tokens
}
fn estimate_file_tokens(file: &crate::model::FileIndex) -> usize {
let path_str = file.path.to_string_lossy();
let mut tokens = 15 + estimate_tokens(&path_str);
for import in &file.imports {
tokens += 3 + estimate_tokens(&import.text);
}
tokens += estimate_declarations_tokens(&file.declarations);
tokens
}
fn file_importance(file: &crate::model::FileIndex) -> i64 {
let mut score: i64 = 0;
let path = file.path.to_string_lossy();
let filename = file
.path
.file_name()
.map(|f| f.to_string_lossy().to_string())
.unwrap_or_default();
match filename.as_str() {
"main.rs" | "main.py" | "main.go" | "main.ts" | "main.js" | "main.java" | "index.ts"
| "index.js" | "index.tsx" | "index.jsx" => score += 100,
"lib.rs" | "lib.py" => score += 90,
"mod.rs" | "__init__.py" | "mod.ts" => score += 50,
_ => {}
}
let depth = path.matches('/').count();
score -= depth as i64 * 5;
let public_count = count_public_decls(&file.declarations);
score += public_count as i64 * 3;
score -= (file.lines / 100) as i64;
score
}
fn count_public_decls(decls: &[Declaration]) -> usize {
let mut count = 0;
for decl in decls {
if matches!(decl.visibility, Visibility::Public) {
count += 1;
}
count += count_public_decls(&decl.children);
}
count
}
pub fn apply_token_budget(index: &mut CodebaseIndex, max_tokens: usize) {
let mut current = estimate_index_tokens(index);
if current <= max_tokens {
return;
}
for file in &mut index.files {
current -= truncate_doc_comments(&mut file.declarations, 80);
}
if current <= max_tokens {
return;
}
for file in &mut index.files {
current -= strip_doc_comments(&mut file.declarations);
}
if current <= max_tokens {
return;
}
for file in &mut index.files {
let old_tokens = estimate_declarations_tokens(&file.declarations);
file.declarations = remove_private_declarations(&file.declarations);
let new_tokens = estimate_declarations_tokens(&file.declarations);
current = current.saturating_sub(old_tokens.saturating_sub(new_tokens));
}
index
.files
.retain(|f| !f.declarations.is_empty() || !f.imports.is_empty());
if current <= max_tokens {
return;
}
for file in &mut index.files {
let old_tokens = estimate_declarations_tokens(&file.declarations);
strip_children(&mut file.declarations);
let new_tokens = estimate_declarations_tokens(&file.declarations);
current = current.saturating_sub(old_tokens.saturating_sub(new_tokens));
}
if current <= max_tokens {
return;
}
index.files.sort_by_key(file_importance);
while index.files.len() > 1 && current > max_tokens {
if let Some(dropped) = index.files.first() {
current = current.saturating_sub(estimate_file_tokens(dropped));
}
index.files.remove(0);
}
index.files.sort_by(|a, b| a.path.cmp(&b.path));
}
fn truncate_doc_comments(decls: &mut [Declaration], max_len: usize) -> usize {
let mut saved = 0usize;
for decl in decls.iter_mut() {
if let Some(ref mut doc) = decl.doc_comment
&& doc.len() > max_len
{
let old_tokens = estimate_tokens(doc);
let truncated = doc
.split('\n')
.next()
.unwrap_or(doc)
.chars()
.take(max_len)
.collect::<String>();
let new_doc = if truncated.len() < doc.len() {
format!("{}...", truncated.trim_end_matches('.'))
} else {
truncated
};
let new_tokens = estimate_tokens(&new_doc);
saved += old_tokens.saturating_sub(new_tokens);
*doc = new_doc;
}
saved += truncate_doc_comments(&mut decl.children, max_len);
}
saved
}
fn strip_doc_comments(decls: &mut [Declaration]) -> usize {
let mut saved = 0usize;
for decl in decls.iter_mut() {
if let Some(ref doc) = decl.doc_comment {
saved += 2 + estimate_tokens(doc);
}
decl.doc_comment = None;
saved += strip_doc_comments(&mut decl.children);
}
saved
}
fn remove_private_declarations(decls: &[Declaration]) -> Vec<Declaration> {
let mut result = Vec::new();
for decl in decls {
if matches!(decl.visibility, Visibility::Private) {
continue;
}
let mut filtered = decl.clone();
filtered.children = remove_private_declarations(&decl.children);
result.push(filtered);
}
result
}
fn strip_children(decls: &mut [Declaration]) {
for decl in decls.iter_mut() {
decl.children.clear();
}
}