use crate::path::FileEntry;
use serde::{Deserialize, Serialize};
use std::cmp::{Ordering, Reverse};
use std::collections::{BTreeMap, BinaryHeap, HashMap};
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TokenMapEntry {
pub path: String,
pub name: String,
pub tokens: usize,
pub percentage: f64,
pub depth: usize,
pub is_last_child: bool,
pub has_children: bool,
pub metadata: EntryMetadata,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct EntryMetadata {
pub is_dir: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtensionStat {
pub extension: String,
pub file_count: usize,
pub tokens: usize,
pub percentage: f64,
}
#[derive(Debug, Clone)]
pub struct TokenMapOptions {
pub max_lines: usize,
pub min_percent: f64,
}
impl Default for TokenMapOptions {
fn default() -> Self {
Self {
max_lines: 20,
min_percent: 0.1,
}
}
}
pub struct CodebaseAnalysis<'a> {
files: &'a [FileEntry],
total_tokens: usize,
}
impl<'a> CodebaseAnalysis<'a> {
pub fn new(files: &'a [FileEntry], total_tokens: usize) -> Self {
Self {
files,
total_tokens,
}
}
pub fn token_map(&self, options: TokenMapOptions) -> Vec<TokenMapEntry> {
let common_prefix = find_common_path_prefix(self.files);
let mut root = TreeNode::new(String::new());
root.tokens = self.total_tokens;
for file in self.files {
let path = Path::new(&file.path);
let path_to_use = if let Some(prefix) = &common_prefix {
path.strip_prefix(prefix).unwrap_or(path)
} else {
path
};
let components: Vec<&str> = path_to_use
.components()
.filter_map(|c| c.as_os_str().to_str())
.filter(|c| *c != "/" && !c.is_empty()) .collect();
if !components.is_empty() {
insert_path(
&mut root,
&components,
file.token_count,
String::new(),
EntryMetadata {
is_dir: file.metadata.is_dir,
},
);
}
}
let allowed_nodes = select_nodes_to_display(&root, self.total_tokens, &options);
let mut entries = Vec::new();
rebuild_filtered_tree(
&root,
String::new(),
&allowed_nodes,
&mut entries,
0,
self.total_tokens,
true,
);
let displayed_file_tokens: usize = entries
.iter()
.filter(|e| !e.metadata.is_dir)
.map(|e| e.tokens)
.sum();
let total_file_tokens = calculate_file_tokens(&root);
let hidden_tokens = total_file_tokens.saturating_sub(displayed_file_tokens);
if hidden_tokens > 0 {
if let Some(last) = entries.last_mut()
&& last.depth == 0
{
last.is_last_child = false;
}
entries.push(TokenMapEntry {
path: "(other files)".to_string(),
name: "(other files)".to_string(),
tokens: hidden_tokens,
percentage: (hidden_tokens as f64 / self.total_tokens as f64) * 100.0,
depth: 0,
is_last_child: true,
has_children: false,
metadata: EntryMetadata { is_dir: false },
});
}
entries
}
pub fn by_extension(&self) -> Vec<ExtensionStat> {
let mut stats: HashMap<String, (usize, usize)> = HashMap::new();
for file in self.files {
if file.metadata.is_dir {
continue;
}
let ext = Path::new(&file.path)
.extension()
.and_then(|s| s.to_str())
.unwrap_or("(no extension)")
.to_string();
let entry = stats.entry(ext).or_insert((0, 0));
entry.0 += 1; entry.1 += file.token_count; }
let mut result: Vec<ExtensionStat> = stats
.into_iter()
.map(|(ext, (count, tokens))| ExtensionStat {
extension: ext,
file_count: count,
tokens,
percentage: (tokens as f64 / self.total_tokens as f64) * 100.0,
})
.collect();
result.sort_by_key(|b| Reverse(b.tokens));
result
}
pub fn raw_files(&self) -> &[FileEntry] {
self.files
}
}
#[derive(Debug, Clone)]
struct TreeNode {
tokens: usize,
children: BTreeMap<String, TreeNode>, path: String,
metadata: Option<EntryMetadata>,
}
impl TreeNode {
fn new(path: String) -> Self {
TreeNode {
tokens: 0,
children: BTreeMap::new(),
path,
metadata: None,
}
}
}
fn insert_path(
node: &mut TreeNode,
components: &[&str],
tokens: usize,
parent_path: String,
file_metadata: EntryMetadata,
) {
if components.is_empty() {
return;
}
if components.len() == 1 {
let file_name = components[0].to_string();
let file_path = if parent_path.is_empty() {
file_name.clone()
} else {
format!("{}/{}", parent_path, file_name)
};
let child = node
.children
.entry(file_name)
.or_insert_with(|| TreeNode::new(file_path));
child.tokens = tokens;
child.metadata = Some(file_metadata);
} else {
let dir_name = components[0].to_string();
let dir_path = if parent_path.is_empty() {
dir_name.clone()
} else {
format!("{}/{}", parent_path, dir_name)
};
let child = node
.children
.entry(dir_name)
.or_insert_with(|| TreeNode::new(dir_path.clone()));
child.tokens += tokens; child.metadata = Some(EntryMetadata { is_dir: true });
insert_path(child, &components[1..], tokens, dir_path, file_metadata);
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
struct NodePriority {
tokens: usize,
path: String,
depth: usize,
}
impl Ord for NodePriority {
fn cmp(&self, other: &Self) -> Ordering {
self.tokens
.cmp(&other.tokens)
.then_with(|| other.depth.cmp(&self.depth)) .then_with(|| self.path.cmp(&other.path))
}
}
impl PartialOrd for NodePriority {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
fn select_nodes_to_display(
root: &TreeNode,
total_tokens: usize,
options: &TokenMapOptions,
) -> HashMap<String, usize> {
let mut heap = BinaryHeap::new();
let mut allowed_nodes = HashMap::new();
let min_tokens = (total_tokens as f64 * options.min_percent / 100.0) as usize;
for child in root.children.values() {
if child.tokens >= min_tokens {
heap.push(NodePriority {
tokens: child.tokens,
path: child.path.clone(),
depth: 0,
});
}
}
while allowed_nodes.len() < options.max_lines.saturating_sub(1) && !heap.is_empty() {
if let Some(node_priority) = heap.pop() {
allowed_nodes.insert(node_priority.path.clone(), node_priority.depth);
if let Some(node) = find_node_by_path(root, &node_priority.path) {
for child in node.children.values() {
if child.tokens >= min_tokens && !allowed_nodes.contains_key(&child.path) {
heap.push(NodePriority {
tokens: child.tokens,
path: child.path.clone(),
depth: node_priority.depth + 1,
});
}
}
}
}
}
allowed_nodes
}
fn find_node_by_path<'a>(root: &'a TreeNode, path: &str) -> Option<&'a TreeNode> {
if path.is_empty() {
return Some(root);
}
let components: Vec<&str> = path.split('/').collect();
let mut current = root;
for component in components {
match current.children.get(component) {
Some(child) => current = child,
None => return None,
}
}
Some(current)
}
fn rebuild_filtered_tree(
node: &TreeNode,
path: String,
allowed_nodes: &HashMap<String, usize>,
entries: &mut Vec<TokenMapEntry>,
depth: usize,
total_tokens: usize,
is_last: bool,
) {
if !path.is_empty() && allowed_nodes.contains_key(&path) {
let percentage = (node.tokens as f64 / total_tokens as f64) * 100.0;
let name = path
.split('/')
.rfind(|s| !s.is_empty())
.unwrap_or(&path)
.to_string();
let metadata = node.metadata.unwrap_or(EntryMetadata { is_dir: true });
let has_visible_children = node
.children
.values()
.any(|child| allowed_nodes.contains_key(&child.path));
entries.push(TokenMapEntry {
path: path.clone(),
name,
tokens: node.tokens,
percentage,
depth: depth.saturating_sub(1), is_last_child: is_last,
has_children: has_visible_children,
metadata,
});
}
let mut filtered_children: Vec<(&String, &TreeNode)> = node
.children
.iter()
.filter(|(_, child)| allowed_nodes.contains_key(&child.path))
.collect();
filtered_children.sort_by_key(|b| Reverse(b.1.tokens));
let child_count = filtered_children.len();
for (i, (name, child)) in filtered_children.into_iter().enumerate() {
let child_path = if path.is_empty() {
name.clone()
} else {
format!("{}/{}", path, name)
};
let is_last_child = i == child_count - 1;
rebuild_filtered_tree(
child,
child_path,
allowed_nodes,
entries,
depth + 1,
total_tokens,
is_last_child,
);
}
}
fn calculate_file_tokens(node: &TreeNode) -> usize {
if node.metadata.is_some_and(|m| !m.is_dir) {
node.tokens
} else {
node.children.values().map(calculate_file_tokens).sum()
}
}
fn find_common_path_prefix(files: &[FileEntry]) -> Option<std::path::PathBuf> {
if files.is_empty() {
return None;
}
let first_path = Path::new(&files[0].path);
if !first_path.is_absolute() {
return None;
}
let first_components: Vec<_> = first_path.components().collect();
let mut common_len = first_components.len();
for file in files.iter().skip(1) {
let path = Path::new(&file.path);
let components: Vec<_> = path.components().collect();
let mut matching = 0;
for (a, b) in first_components.iter().zip(components.iter()) {
if a == b {
matching += 1;
} else {
break;
}
}
common_len = common_len.min(matching);
if common_len == 0 {
return None; }
}
if common_len >= first_components.len() {
common_len = first_components.len().saturating_sub(1);
}
if common_len == 0 {
return None;
}
let mut prefix = std::path::PathBuf::new();
for component in first_components.iter().take(common_len) {
prefix.push(component);
}
Some(prefix)
}