infiniloom_engine/chunking/types.rs
1//! Types for intelligent code chunking
2//!
3//! Public types exported by the chunking module.
4
5use crate::types::TokenizerModel;
6use serde::Serialize;
7
8/// A chunk of repository content
9#[derive(Debug, Clone, Serialize)]
10pub struct Chunk {
11 /// Chunk index (0-based)
12 pub index: usize,
13 /// Total number of chunks
14 pub total: usize,
15 /// Focus/theme of this chunk
16 pub focus: String,
17 /// Token count for this chunk
18 pub tokens: u32,
19 /// Files included in this chunk
20 pub files: Vec<ChunkFile>,
21 /// Context information
22 pub context: ChunkContext,
23}
24
25/// A file within a chunk
26#[derive(Debug, Clone, Serialize)]
27pub struct ChunkFile {
28 /// Relative file path
29 pub path: String,
30 /// File content (may be compressed)
31 pub content: String,
32 /// Token count
33 pub tokens: u32,
34 /// Whether content is truncated
35 pub truncated: bool,
36}
37
38/// Context for chunk continuity
39#[derive(Debug, Clone, Serialize)]
40pub struct ChunkContext {
41 /// Summary of previous chunks
42 pub previous_summary: Option<String>,
43 /// Current focus description
44 pub current_focus: String,
45 /// Preview of next chunk
46 pub next_preview: Option<String>,
47 /// Cross-references to other chunks
48 pub cross_references: Vec<CrossReference>,
49 /// Overlap content from previous chunk (for context continuity)
50 pub overlap_content: Option<String>,
51}
52
53/// Reference to symbol in another chunk
54#[derive(Debug, Clone, Serialize)]
55pub struct CrossReference {
56 /// Symbol name
57 pub symbol: String,
58 /// Chunk containing the symbol
59 pub chunk_index: usize,
60 /// File containing the symbol
61 pub file: String,
62}
63
64/// Internal type for symbol-based chunking
65#[derive(Debug, Clone)]
66pub(crate) struct SymbolSnippet {
67 pub file_path: String,
68 pub symbol_name: String,
69 pub start_line: u32,
70 pub content: String,
71 pub tokens: u32,
72 pub importance: f32,
73}
74
75/// Chunking strategy
76#[derive(Debug, Clone, Copy, Default)]
77pub enum ChunkStrategy {
78 /// Fixed token size chunks
79 Fixed {
80 /// Maximum tokens per chunk
81 size: u32,
82 },
83 /// One file per chunk
84 File,
85 /// Group by module/directory
86 Module,
87 /// Group by symbols (AST-based)
88 Symbol,
89 /// Group by semantic similarity
90 #[default]
91 Semantic,
92 /// Group by dependency order
93 Dependency,
94}
95
96/// Chunker for splitting repositories
97pub struct Chunker {
98 /// Chunking strategy
99 pub(crate) strategy: ChunkStrategy,
100 /// Maximum tokens per chunk
101 pub(crate) max_tokens: u32,
102 /// Overlap tokens between chunks
103 pub(crate) overlap_tokens: u32,
104 /// Target model for token counting
105 pub(crate) model: TokenizerModel,
106}