use serde::{Deserialize, Serialize};
use std::fmt;
use std::path::PathBuf;
pub use crate::tokenizer::{TokenCounts, TokenModel};
pub type TokenizerModel = TokenModel;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Repository {
pub name: String,
pub path: PathBuf,
pub files: Vec<RepoFile>,
pub metadata: RepoMetadata,
}
impl Repository {
pub fn new(name: impl Into<String>, path: impl Into<PathBuf>) -> Self {
Self {
name: name.into(),
path: path.into(),
files: Vec::new(),
metadata: RepoMetadata::default(),
}
}
pub fn total_tokens(&self, model: TokenizerModel) -> u32 {
self.files.iter().map(|f| f.token_count.get(model)).sum()
}
pub fn files_by_language(&self, language: &str) -> Vec<&RepoFile> {
self.files
.iter()
.filter(|f| f.language.as_deref() == Some(language))
.collect()
}
#[must_use]
pub fn files_by_importance(&self) -> Vec<&RepoFile> {
let mut files: Vec<_> = self.files.iter().collect();
files.sort_by(|a, b| {
b.importance
.partial_cmp(&a.importance)
.unwrap_or(std::cmp::Ordering::Equal)
});
files
}
}
impl fmt::Display for Repository {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Repository({}: {} files, {} lines)",
self.name, self.metadata.total_files, self.metadata.total_lines
)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepoFile {
pub path: PathBuf,
pub relative_path: String,
pub language: Option<String>,
pub size_bytes: u64,
pub token_count: TokenCounts,
pub symbols: Vec<Symbol>,
pub importance: f32,
pub content: Option<String>,
}
impl RepoFile {
pub fn new(path: impl Into<PathBuf>, relative_path: impl Into<String>) -> Self {
Self {
path: path.into(),
relative_path: relative_path.into(),
language: None,
size_bytes: 0,
token_count: TokenCounts::default(),
symbols: Vec::new(),
importance: 0.5,
content: None,
}
}
pub fn extension(&self) -> Option<&str> {
self.path.extension().and_then(|e| e.to_str())
}
#[must_use]
pub fn filename(&self) -> &str {
self.path.file_name().and_then(|n| n.to_str()).unwrap_or("")
}
}
impl fmt::Display for RepoFile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} ({}, {} tokens)",
self.relative_path,
self.language.as_deref().unwrap_or("unknown"),
self.token_count.claude
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum Visibility {
#[default]
Public,
Private,
Protected,
Internal, }
impl Visibility {
pub fn name(&self) -> &'static str {
match self {
Self::Public => "public",
Self::Private => "private",
Self::Protected => "protected",
Self::Internal => "internal",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Symbol {
pub name: String,
pub kind: SymbolKind,
pub signature: Option<String>,
pub docstring: Option<String>,
pub start_line: u32,
pub end_line: u32,
pub references: u32,
pub importance: f32,
pub parent: Option<String>,
pub visibility: Visibility,
pub calls: Vec<String>,
pub extends: Option<String>,
pub implements: Vec<String>,
}
impl Symbol {
pub fn new(name: impl Into<String>, kind: SymbolKind) -> Self {
Self {
name: name.into(),
kind,
signature: None,
docstring: None,
start_line: 0,
end_line: 0,
references: 0,
importance: 0.5,
parent: None,
visibility: Visibility::default(),
calls: Vec::new(),
extends: None,
implements: Vec::new(),
}
}
#[must_use]
pub fn line_count(&self) -> u32 {
if self.end_line >= self.start_line {
self.end_line - self.start_line + 1
} else {
1
}
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}:{} (lines {}-{})",
self.kind.name(),
self.name,
self.start_line,
self.end_line
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
pub enum SymbolKind {
#[default]
Function,
Method,
Class,
Interface,
Struct,
Enum,
Constant,
Variable,
Import,
Export,
TypeAlias,
Module,
Trait,
Macro,
}
impl SymbolKind {
#[must_use]
pub fn name(&self) -> &'static str {
match self {
Self::Function => "function",
Self::Method => "method",
Self::Class => "class",
Self::Interface => "interface",
Self::Struct => "struct",
Self::Enum => "enum",
Self::Constant => "constant",
Self::Variable => "variable",
Self::Import => "import",
Self::Export => "export",
Self::TypeAlias => "type",
Self::Module => "module",
Self::Trait => "trait",
Self::Macro => "macro",
}
}
#[must_use]
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"function" => Some(Self::Function),
"method" => Some(Self::Method),
"class" => Some(Self::Class),
"interface" => Some(Self::Interface),
"struct" => Some(Self::Struct),
"enum" => Some(Self::Enum),
"constant" => Some(Self::Constant),
"variable" => Some(Self::Variable),
"import" => Some(Self::Import),
"export" => Some(Self::Export),
"type" | "typealias" => Some(Self::TypeAlias),
"module" => Some(Self::Module),
"trait" => Some(Self::Trait),
"macro" => Some(Self::Macro),
_ => None,
}
}
}
impl std::str::FromStr for SymbolKind {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
SymbolKind::from_str(s).ok_or(())
}
}
impl fmt::Display for SymbolKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.name())
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct RepoMetadata {
pub total_files: u32,
pub total_lines: u64,
pub total_tokens: TokenCounts,
pub languages: Vec<LanguageStats>,
pub framework: Option<String>,
pub description: Option<String>,
pub branch: Option<String>,
pub commit: Option<String>,
pub directory_structure: Option<String>,
pub external_dependencies: Vec<String>,
pub git_history: Option<GitHistory>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageStats {
pub language: String,
pub files: u32,
pub lines: u64,
pub percentage: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GitCommitInfo {
pub hash: String,
pub short_hash: String,
pub author: String,
pub date: String,
pub message: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GitHistory {
pub commits: Vec<GitCommitInfo>,
pub changed_files: Vec<GitChangedFile>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GitChangedFile {
pub path: String,
pub status: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub diff_content: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum CompressionLevel {
None,
Minimal,
#[default]
Balanced,
Aggressive,
Extreme,
Focused,
Semantic,
}
impl CompressionLevel {
pub fn expected_reduction(&self) -> u8 {
match self {
Self::None => 0,
Self::Minimal => 15,
Self::Balanced => 35,
Self::Aggressive => 60,
Self::Extreme => 80,
Self::Focused => 75,
Self::Semantic => 65,
}
}
pub fn description(&self) -> &'static str {
match self {
Self::None => "No compression - original content preserved",
Self::Minimal => "Remove empty lines, trim whitespace",
Self::Balanced => "Remove comments, normalize whitespace",
Self::Aggressive => "Remove docstrings, keep signatures only",
Self::Extreme => "Key symbols only - minimal context",
Self::Focused => "Focused symbols with small surrounding context",
Self::Semantic => "Semantic chunking with intelligent sampling",
}
}
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"none" => Some(Self::None),
"minimal" => Some(Self::Minimal),
"balanced" => Some(Self::Balanced),
"aggressive" => Some(Self::Aggressive),
"extreme" => Some(Self::Extreme),
"focused" => Some(Self::Focused),
"semantic" => Some(Self::Semantic),
_ => None,
}
}
pub fn name(&self) -> &'static str {
match self {
Self::None => "none",
Self::Minimal => "minimal",
Self::Balanced => "balanced",
Self::Aggressive => "aggressive",
Self::Extreme => "extreme",
Self::Focused => "focused",
Self::Semantic => "semantic",
}
}
pub fn all() -> &'static [Self] {
&[
Self::None,
Self::Minimal,
Self::Balanced,
Self::Aggressive,
Self::Extreme,
Self::Focused,
Self::Semantic,
]
}
}
impl std::str::FromStr for CompressionLevel {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
CompressionLevel::from_str(s).ok_or(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_repository_new() {
let repo = Repository::new("test", "/tmp/test");
assert_eq!(repo.name, "test");
assert!(repo.files.is_empty());
}
#[test]
fn test_repository_total_tokens() {
let mut repo = Repository::new("test", "/tmp/test");
let mut file1 = RepoFile::new("/tmp/test/a.rs", "a.rs");
file1.token_count.set(TokenizerModel::Claude, 100);
let mut file2 = RepoFile::new("/tmp/test/b.rs", "b.rs");
file2.token_count.set(TokenizerModel::Claude, 200);
repo.files.push(file1);
repo.files.push(file2);
assert_eq!(repo.total_tokens(TokenizerModel::Claude), 300);
}
#[test]
fn test_repository_files_by_language() {
let mut repo = Repository::new("test", "/tmp/test");
let mut file1 = RepoFile::new("/tmp/test/a.rs", "a.rs");
file1.language = Some("rust".to_owned());
let mut file2 = RepoFile::new("/tmp/test/b.py", "b.py");
file2.language = Some("python".to_owned());
let mut file3 = RepoFile::new("/tmp/test/c.rs", "c.rs");
file3.language = Some("rust".to_owned());
repo.files.push(file1);
repo.files.push(file2);
repo.files.push(file3);
let rust_files = repo.files_by_language("rust");
assert_eq!(rust_files.len(), 2);
let python_files = repo.files_by_language("python");
assert_eq!(python_files.len(), 1);
let go_files = repo.files_by_language("go");
assert_eq!(go_files.len(), 0);
}
#[test]
fn test_repository_files_by_importance() {
let mut repo = Repository::new("test", "/tmp/test");
let mut file1 = RepoFile::new("/tmp/test/a.rs", "a.rs");
file1.importance = 0.3;
let mut file2 = RepoFile::new("/tmp/test/b.rs", "b.rs");
file2.importance = 0.9;
let mut file3 = RepoFile::new("/tmp/test/c.rs", "c.rs");
file3.importance = 0.6;
repo.files.push(file1);
repo.files.push(file2);
repo.files.push(file3);
let sorted = repo.files_by_importance();
assert_eq!(sorted[0].relative_path, "b.rs");
assert_eq!(sorted[1].relative_path, "c.rs");
assert_eq!(sorted[2].relative_path, "a.rs");
}
#[test]
fn test_repository_display() {
let mut repo = Repository::new("my-project", "/tmp/my-project");
repo.metadata.total_files = 42;
repo.metadata.total_lines = 1000;
let display = format!("{}", repo);
assert!(display.contains("my-project"));
assert!(display.contains("42 files"));
assert!(display.contains("1000 lines"));
}
#[test]
fn test_repo_file_new() {
let file = RepoFile::new("/tmp/test/src/main.rs", "src/main.rs");
assert_eq!(file.relative_path, "src/main.rs");
assert!(file.language.is_none());
assert_eq!(file.importance, 0.5);
}
#[test]
fn test_repo_file_extension() {
let file = RepoFile::new("/tmp/test/main.rs", "main.rs");
assert_eq!(file.extension(), Some("rs"));
let file_no_ext = RepoFile::new("/tmp/test/Makefile", "Makefile");
assert_eq!(file_no_ext.extension(), None);
}
#[test]
fn test_repo_file_filename() {
let file = RepoFile::new("/tmp/test/src/main.rs", "src/main.rs");
assert_eq!(file.filename(), "main.rs");
}
#[test]
fn test_repo_file_display() {
let mut file = RepoFile::new("/tmp/test/main.rs", "main.rs");
file.language = Some("rust".to_owned());
file.token_count.claude = 150;
let display = format!("{}", file);
assert!(display.contains("main.rs"));
assert!(display.contains("rust"));
assert!(display.contains("150"));
}
#[test]
fn test_repo_file_display_unknown_language() {
let file = RepoFile::new("/tmp/test/data.xyz", "data.xyz");
let display = format!("{}", file);
assert!(display.contains("unknown"));
}
#[test]
fn test_token_counts() {
let mut counts = TokenCounts::default();
counts.set(TokenizerModel::Claude, 100);
assert_eq!(counts.get(TokenizerModel::Claude), 100);
}
#[test]
fn test_symbol_new() {
let sym = Symbol::new("my_function", SymbolKind::Function);
assert_eq!(sym.name, "my_function");
assert_eq!(sym.kind, SymbolKind::Function);
assert_eq!(sym.importance, 0.5);
assert!(sym.signature.is_none());
assert!(sym.calls.is_empty());
}
#[test]
fn test_symbol_line_count() {
let mut sym = Symbol::new("test", SymbolKind::Function);
sym.start_line = 10;
sym.end_line = 20;
assert_eq!(sym.line_count(), 11);
}
#[test]
fn test_symbol_line_count_single_line() {
let mut sym = Symbol::new("test", SymbolKind::Variable);
sym.start_line = 5;
sym.end_line = 5;
assert_eq!(sym.line_count(), 1);
}
#[test]
fn test_symbol_line_count_inverted() {
let mut sym = Symbol::new("test", SymbolKind::Variable);
sym.start_line = 20;
sym.end_line = 10; assert_eq!(sym.line_count(), 1);
}
#[test]
fn test_symbol_display() {
let mut sym = Symbol::new("calculate", SymbolKind::Function);
sym.start_line = 10;
sym.end_line = 25;
let display = format!("{}", sym);
assert!(display.contains("function"));
assert!(display.contains("calculate"));
assert!(display.contains("10-25"));
}
#[test]
fn test_symbol_kind_name() {
assert_eq!(SymbolKind::Function.name(), "function");
assert_eq!(SymbolKind::Method.name(), "method");
assert_eq!(SymbolKind::Class.name(), "class");
assert_eq!(SymbolKind::Interface.name(), "interface");
assert_eq!(SymbolKind::Struct.name(), "struct");
assert_eq!(SymbolKind::Enum.name(), "enum");
assert_eq!(SymbolKind::Constant.name(), "constant");
assert_eq!(SymbolKind::Variable.name(), "variable");
assert_eq!(SymbolKind::Import.name(), "import");
assert_eq!(SymbolKind::Export.name(), "export");
assert_eq!(SymbolKind::TypeAlias.name(), "type");
assert_eq!(SymbolKind::Module.name(), "module");
assert_eq!(SymbolKind::Trait.name(), "trait");
assert_eq!(SymbolKind::Macro.name(), "macro");
}
#[test]
fn test_symbol_kind_from_str() {
assert_eq!(SymbolKind::from_str("function"), Some(SymbolKind::Function));
assert_eq!(SymbolKind::from_str("method"), Some(SymbolKind::Method));
assert_eq!(SymbolKind::from_str("class"), Some(SymbolKind::Class));
assert_eq!(SymbolKind::from_str("interface"), Some(SymbolKind::Interface));
assert_eq!(SymbolKind::from_str("struct"), Some(SymbolKind::Struct));
assert_eq!(SymbolKind::from_str("enum"), Some(SymbolKind::Enum));
assert_eq!(SymbolKind::from_str("constant"), Some(SymbolKind::Constant));
assert_eq!(SymbolKind::from_str("variable"), Some(SymbolKind::Variable));
assert_eq!(SymbolKind::from_str("import"), Some(SymbolKind::Import));
assert_eq!(SymbolKind::from_str("export"), Some(SymbolKind::Export));
assert_eq!(SymbolKind::from_str("type"), Some(SymbolKind::TypeAlias));
assert_eq!(SymbolKind::from_str("typealias"), Some(SymbolKind::TypeAlias));
assert_eq!(SymbolKind::from_str("module"), Some(SymbolKind::Module));
assert_eq!(SymbolKind::from_str("trait"), Some(SymbolKind::Trait));
assert_eq!(SymbolKind::from_str("macro"), Some(SymbolKind::Macro));
assert_eq!(SymbolKind::from_str("FUNCTION"), Some(SymbolKind::Function));
assert_eq!(SymbolKind::from_str("Class"), Some(SymbolKind::Class));
assert_eq!(SymbolKind::from_str("unknown"), None);
assert_eq!(SymbolKind::from_str(""), None);
}
#[test]
fn test_symbol_kind_std_from_str() {
assert_eq!("function".parse::<SymbolKind>(), Ok(SymbolKind::Function));
assert_eq!("class".parse::<SymbolKind>(), Ok(SymbolKind::Class));
assert!("invalid".parse::<SymbolKind>().is_err());
}
#[test]
fn test_symbol_kind_display() {
assert_eq!(format!("{}", SymbolKind::Function), "function");
assert_eq!(format!("{}", SymbolKind::Class), "class");
}
#[test]
fn test_visibility_name() {
assert_eq!(Visibility::Public.name(), "public");
assert_eq!(Visibility::Private.name(), "private");
assert_eq!(Visibility::Protected.name(), "protected");
assert_eq!(Visibility::Internal.name(), "internal");
}
#[test]
fn test_visibility_default() {
let vis = Visibility::default();
assert_eq!(vis, Visibility::Public);
}
#[test]
fn test_language_stats() {
let stats =
LanguageStats { language: "rust".to_owned(), files: 10, lines: 5000, percentage: 45.5 };
assert_eq!(stats.language, "rust");
assert_eq!(stats.files, 10);
assert_eq!(stats.lines, 5000);
assert!((stats.percentage - 45.5).abs() < f32::EPSILON);
}
#[test]
fn test_git_commit_info() {
let commit = GitCommitInfo {
hash: "abc123def456".to_owned(),
short_hash: "abc123d".to_owned(),
author: "Test Author".to_owned(),
date: "2025-01-01".to_owned(),
message: "Test commit".to_owned(),
};
assert_eq!(commit.hash, "abc123def456");
assert_eq!(commit.short_hash, "abc123d");
assert_eq!(commit.author, "Test Author");
}
#[test]
fn test_git_changed_file() {
let changed = GitChangedFile {
path: "src/main.rs".to_owned(),
status: "M".to_owned(),
diff_content: Some("+new line".to_owned()),
};
assert_eq!(changed.path, "src/main.rs");
assert_eq!(changed.status, "M");
assert!(changed.diff_content.is_some());
}
#[test]
fn test_git_history_default() {
let history = GitHistory::default();
assert!(history.commits.is_empty());
assert!(history.changed_files.is_empty());
}
#[test]
fn test_repo_metadata_default() {
let meta = RepoMetadata::default();
assert_eq!(meta.total_files, 0);
assert_eq!(meta.total_lines, 0);
assert!(meta.languages.is_empty());
assert!(meta.framework.is_none());
assert!(meta.branch.is_none());
}
#[test]
fn test_compression_level_from_str() {
assert_eq!(CompressionLevel::from_str("none"), Some(CompressionLevel::None));
assert_eq!(CompressionLevel::from_str("minimal"), Some(CompressionLevel::Minimal));
assert_eq!(CompressionLevel::from_str("balanced"), Some(CompressionLevel::Balanced));
assert_eq!(CompressionLevel::from_str("aggressive"), Some(CompressionLevel::Aggressive));
assert_eq!(CompressionLevel::from_str("extreme"), Some(CompressionLevel::Extreme));
assert_eq!(CompressionLevel::from_str("focused"), Some(CompressionLevel::Focused));
assert_eq!(CompressionLevel::from_str("semantic"), Some(CompressionLevel::Semantic));
assert_eq!(CompressionLevel::from_str("SEMANTIC"), Some(CompressionLevel::Semantic));
assert_eq!(CompressionLevel::from_str("Balanced"), Some(CompressionLevel::Balanced));
assert_eq!(CompressionLevel::from_str("unknown"), None);
assert_eq!(CompressionLevel::from_str(""), None);
}
#[test]
fn test_compression_level_std_from_str() {
assert_eq!("balanced".parse::<CompressionLevel>(), Ok(CompressionLevel::Balanced));
assert!("invalid".parse::<CompressionLevel>().is_err());
}
#[test]
fn test_compression_level_name() {
assert_eq!(CompressionLevel::None.name(), "none");
assert_eq!(CompressionLevel::Semantic.name(), "semantic");
}
#[test]
fn test_compression_level_expected_reduction() {
assert_eq!(CompressionLevel::None.expected_reduction(), 0);
assert_eq!(CompressionLevel::Minimal.expected_reduction(), 15);
assert_eq!(CompressionLevel::Balanced.expected_reduction(), 35);
assert_eq!(CompressionLevel::Aggressive.expected_reduction(), 60);
assert_eq!(CompressionLevel::Extreme.expected_reduction(), 80);
assert_eq!(CompressionLevel::Focused.expected_reduction(), 75);
assert_eq!(CompressionLevel::Semantic.expected_reduction(), 65);
}
#[test]
fn test_compression_level_description() {
for level in CompressionLevel::all() {
assert!(!level.description().is_empty());
}
}
#[test]
fn test_compression_level_all() {
let all = CompressionLevel::all();
assert_eq!(all.len(), 7);
assert!(all.contains(&CompressionLevel::Semantic));
}
#[test]
fn test_compression_level_default() {
let level = CompressionLevel::default();
assert_eq!(level, CompressionLevel::Balanced);
}
}