1use serde::{Deserialize, Serialize};
4use std::fmt;
5use std::path::PathBuf;
6
7pub use crate::tokenizer::{TokenCounts, TokenModel};
9
10pub type TokenizerModel = TokenModel;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct Repository {
16 pub name: String,
18 pub path: PathBuf,
20 pub files: Vec<RepoFile>,
22 pub metadata: RepoMetadata,
24}
25
26impl Repository {
27 pub fn new(name: impl Into<String>, path: impl Into<PathBuf>) -> Self {
29 Self {
30 name: name.into(),
31 path: path.into(),
32 files: Vec::new(),
33 metadata: RepoMetadata::default(),
34 }
35 }
36
37 pub fn total_tokens(&self, model: TokenizerModel) -> u32 {
39 self.files.iter().map(|f| f.token_count.get(model)).sum()
40 }
41
42 pub fn files_by_language(&self, language: &str) -> Vec<&RepoFile> {
44 self.files
45 .iter()
46 .filter(|f| f.language.as_deref() == Some(language))
47 .collect()
48 }
49
50 #[must_use]
52 pub fn files_by_importance(&self) -> Vec<&RepoFile> {
53 let mut files: Vec<_> = self.files.iter().collect();
54 files.sort_by(|a, b| {
55 b.importance
56 .partial_cmp(&a.importance)
57 .unwrap_or(std::cmp::Ordering::Equal)
58 });
59 files
60 }
61}
62
63impl fmt::Display for Repository {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 write!(
66 f,
67 "Repository({}: {} files, {} lines)",
68 self.name, self.metadata.total_files, self.metadata.total_lines
69 )
70 }
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct RepoFile {
76 pub path: PathBuf,
78 pub relative_path: String,
80 pub language: Option<String>,
82 pub size_bytes: u64,
84 pub token_count: TokenCounts,
86 pub symbols: Vec<Symbol>,
88 pub importance: f32,
90 pub content: Option<String>,
92}
93
94impl RepoFile {
95 pub fn new(path: impl Into<PathBuf>, relative_path: impl Into<String>) -> Self {
97 Self {
98 path: path.into(),
99 relative_path: relative_path.into(),
100 language: None,
101 size_bytes: 0,
102 token_count: TokenCounts::default(),
103 symbols: Vec::new(),
104 importance: 0.5,
105 content: None,
106 }
107 }
108
109 pub fn extension(&self) -> Option<&str> {
111 self.path.extension().and_then(|e| e.to_str())
112 }
113
114 #[must_use]
116 pub fn filename(&self) -> &str {
117 self.path.file_name().and_then(|n| n.to_str()).unwrap_or("")
118 }
119}
120
121impl fmt::Display for RepoFile {
122 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123 write!(
124 f,
125 "{} ({}, {} tokens)",
126 self.relative_path,
127 self.language.as_deref().unwrap_or("unknown"),
128 self.token_count.claude
129 )
130 }
131}
132
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
135pub enum Visibility {
136 #[default]
137 Public,
138 Private,
139 Protected,
140 Internal, }
142
143impl Visibility {
144 pub fn name(&self) -> &'static str {
145 match self {
146 Self::Public => "public",
147 Self::Private => "private",
148 Self::Protected => "protected",
149 Self::Internal => "internal",
150 }
151 }
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct Symbol {
157 pub name: String,
159 pub kind: SymbolKind,
161 pub signature: Option<String>,
163 pub docstring: Option<String>,
165 pub start_line: u32,
167 pub end_line: u32,
169 pub references: u32,
171 pub importance: f32,
173 pub parent: Option<String>,
175 pub visibility: Visibility,
177 pub calls: Vec<String>,
179 pub extends: Option<String>,
181 pub implements: Vec<String>,
183}
184
185impl Symbol {
186 pub fn new(name: impl Into<String>, kind: SymbolKind) -> Self {
188 Self {
189 name: name.into(),
190 kind,
191 signature: None,
192 docstring: None,
193 start_line: 0,
194 end_line: 0,
195 references: 0,
196 importance: 0.5,
197 parent: None,
198 visibility: Visibility::default(),
199 calls: Vec::new(),
200 extends: None,
201 implements: Vec::new(),
202 }
203 }
204
205 #[must_use]
207 pub fn line_count(&self) -> u32 {
208 if self.end_line >= self.start_line {
209 self.end_line - self.start_line + 1
210 } else {
211 1
212 }
213 }
214}
215
216impl fmt::Display for Symbol {
217 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218 write!(
219 f,
220 "{}:{} (lines {}-{})",
221 self.kind.name(),
222 self.name,
223 self.start_line,
224 self.end_line
225 )
226 }
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
231pub enum SymbolKind {
232 Function,
233 Method,
234 Class,
235 Interface,
236 Struct,
237 Enum,
238 Constant,
239 Variable,
240 Import,
241 Export,
242 TypeAlias,
243 Module,
244 Trait,
245 Macro,
246}
247
248impl SymbolKind {
249 #[must_use]
251 pub fn name(&self) -> &'static str {
252 match self {
253 Self::Function => "function",
254 Self::Method => "method",
255 Self::Class => "class",
256 Self::Interface => "interface",
257 Self::Struct => "struct",
258 Self::Enum => "enum",
259 Self::Constant => "constant",
260 Self::Variable => "variable",
261 Self::Import => "import",
262 Self::Export => "export",
263 Self::TypeAlias => "type",
264 Self::Module => "module",
265 Self::Trait => "trait",
266 Self::Macro => "macro",
267 }
268 }
269
270 #[must_use]
272 #[allow(clippy::should_implement_trait)]
273 pub fn from_str(s: &str) -> Option<Self> {
274 match s.to_lowercase().as_str() {
275 "function" => Some(Self::Function),
276 "method" => Some(Self::Method),
277 "class" => Some(Self::Class),
278 "interface" => Some(Self::Interface),
279 "struct" => Some(Self::Struct),
280 "enum" => Some(Self::Enum),
281 "constant" => Some(Self::Constant),
282 "variable" => Some(Self::Variable),
283 "import" => Some(Self::Import),
284 "export" => Some(Self::Export),
285 "type" | "typealias" => Some(Self::TypeAlias),
286 "module" => Some(Self::Module),
287 "trait" => Some(Self::Trait),
288 "macro" => Some(Self::Macro),
289 _ => None,
290 }
291 }
292}
293
294impl std::str::FromStr for SymbolKind {
295 type Err = ();
296
297 fn from_str(s: &str) -> Result<Self, Self::Err> {
298 SymbolKind::from_str(s).ok_or(())
299 }
300}
301
302impl fmt::Display for SymbolKind {
303 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
304 write!(f, "{}", self.name())
305 }
306}
307
308#[derive(Debug, Clone, Default, Serialize, Deserialize)]
310pub struct RepoMetadata {
311 pub total_files: u32,
313 pub total_lines: u64,
315 pub total_tokens: TokenCounts,
317 pub languages: Vec<LanguageStats>,
319 pub framework: Option<String>,
321 pub description: Option<String>,
323 pub branch: Option<String>,
325 pub commit: Option<String>,
327 pub directory_structure: Option<String>,
329 pub external_dependencies: Vec<String>,
331 pub git_history: Option<GitHistory>,
333}
334
335#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct LanguageStats {
338 pub language: String,
340 pub files: u32,
342 pub lines: u64,
344 pub percentage: f32,
346}
347
348#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct GitCommitInfo {
351 pub hash: String,
353 pub short_hash: String,
355 pub author: String,
357 pub date: String,
359 pub message: String,
361}
362
363#[derive(Debug, Clone, Default, Serialize, Deserialize)]
365pub struct GitHistory {
366 pub commits: Vec<GitCommitInfo>,
368 pub changed_files: Vec<GitChangedFile>,
370}
371
372#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct GitChangedFile {
375 pub path: String,
377 pub status: String,
379 #[serde(skip_serializing_if = "Option::is_none")]
381 pub diff_content: Option<String>,
382}
383
384#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
386pub enum CompressionLevel {
387 None,
389 Minimal,
391 #[default]
393 Balanced,
394 Aggressive,
396 Extreme,
398 Focused,
400 Semantic,
414}
415
416impl CompressionLevel {
417 pub fn expected_reduction(&self) -> u8 {
424 match self {
425 Self::None => 0,
426 Self::Minimal => 15,
427 Self::Balanced => 35,
428 Self::Aggressive => 60,
429 Self::Extreme => 80,
430 Self::Focused => 75,
431 Self::Semantic => 65,
434 }
435 }
436
437 pub fn description(&self) -> &'static str {
439 match self {
440 Self::None => "No compression - original content preserved",
441 Self::Minimal => "Remove empty lines, trim whitespace",
442 Self::Balanced => "Remove comments, normalize whitespace",
443 Self::Aggressive => "Remove docstrings, keep signatures only",
444 Self::Extreme => "Key symbols only - minimal context",
445 Self::Focused => "Focused symbols with small surrounding context",
446 Self::Semantic => "Semantic chunking with intelligent sampling",
447 }
448 }
449
450 #[allow(clippy::should_implement_trait)]
455 pub fn from_str(s: &str) -> Option<Self> {
456 match s.to_lowercase().as_str() {
457 "none" => Some(Self::None),
458 "minimal" => Some(Self::Minimal),
459 "balanced" => Some(Self::Balanced),
460 "aggressive" => Some(Self::Aggressive),
461 "extreme" => Some(Self::Extreme),
462 "focused" => Some(Self::Focused),
463 "semantic" => Some(Self::Semantic),
464 _ => None,
465 }
466 }
467
468 pub fn name(&self) -> &'static str {
470 match self {
471 Self::None => "none",
472 Self::Minimal => "minimal",
473 Self::Balanced => "balanced",
474 Self::Aggressive => "aggressive",
475 Self::Extreme => "extreme",
476 Self::Focused => "focused",
477 Self::Semantic => "semantic",
478 }
479 }
480
481 pub fn all() -> &'static [Self] {
483 &[
484 Self::None,
485 Self::Minimal,
486 Self::Balanced,
487 Self::Aggressive,
488 Self::Extreme,
489 Self::Focused,
490 Self::Semantic,
491 ]
492 }
493}
494
495impl std::str::FromStr for CompressionLevel {
496 type Err = ();
497
498 fn from_str(s: &str) -> Result<Self, Self::Err> {
499 CompressionLevel::from_str(s).ok_or(())
500 }
501}
502
503#[cfg(test)]
504mod tests {
505 use super::*;
506
507 #[test]
508 fn test_repository_new() {
509 let repo = Repository::new("test", "/tmp/test");
510 assert_eq!(repo.name, "test");
511 assert!(repo.files.is_empty());
512 }
513
514 #[test]
515 fn test_token_counts() {
516 let mut counts = TokenCounts::default();
517 counts.set(TokenizerModel::Claude, 100);
518 assert_eq!(counts.get(TokenizerModel::Claude), 100);
519 }
520
521 #[test]
522 fn test_symbol_line_count() {
523 let mut sym = Symbol::new("test", SymbolKind::Function);
524 sym.start_line = 10;
525 sym.end_line = 20;
526 assert_eq!(sym.line_count(), 11);
527 }
528
529 #[test]
530 fn test_compression_level_from_str() {
531 assert_eq!(CompressionLevel::from_str("none"), Some(CompressionLevel::None));
532 assert_eq!(CompressionLevel::from_str("minimal"), Some(CompressionLevel::Minimal));
533 assert_eq!(CompressionLevel::from_str("balanced"), Some(CompressionLevel::Balanced));
534 assert_eq!(CompressionLevel::from_str("aggressive"), Some(CompressionLevel::Aggressive));
535 assert_eq!(CompressionLevel::from_str("extreme"), Some(CompressionLevel::Extreme));
536 assert_eq!(CompressionLevel::from_str("focused"), Some(CompressionLevel::Focused));
537 assert_eq!(CompressionLevel::from_str("semantic"), Some(CompressionLevel::Semantic));
538
539 assert_eq!(CompressionLevel::from_str("SEMANTIC"), Some(CompressionLevel::Semantic));
541 assert_eq!(CompressionLevel::from_str("Balanced"), Some(CompressionLevel::Balanced));
542
543 assert_eq!(CompressionLevel::from_str("unknown"), None);
545 assert_eq!(CompressionLevel::from_str(""), None);
546 }
547
548 #[test]
549 fn test_compression_level_name() {
550 assert_eq!(CompressionLevel::None.name(), "none");
551 assert_eq!(CompressionLevel::Semantic.name(), "semantic");
552 }
553
554 #[test]
555 fn test_compression_level_expected_reduction() {
556 assert_eq!(CompressionLevel::None.expected_reduction(), 0);
557 assert_eq!(CompressionLevel::Minimal.expected_reduction(), 15);
558 assert_eq!(CompressionLevel::Balanced.expected_reduction(), 35);
559 assert_eq!(CompressionLevel::Aggressive.expected_reduction(), 60);
560 assert_eq!(CompressionLevel::Extreme.expected_reduction(), 80);
561 assert_eq!(CompressionLevel::Focused.expected_reduction(), 75);
562 assert_eq!(CompressionLevel::Semantic.expected_reduction(), 65);
563 }
564
565 #[test]
566 fn test_compression_level_description() {
567 for level in CompressionLevel::all() {
569 assert!(!level.description().is_empty());
570 }
571 }
572
573 #[test]
574 fn test_compression_level_all() {
575 let all = CompressionLevel::all();
576 assert_eq!(all.len(), 7);
577 assert!(all.contains(&CompressionLevel::Semantic));
578 }
579}