infiniloom_engine/chunking/
types.rs

1//! Types for intelligent code chunking
2//!
3//! Public types exported by the chunking module.
4
5use crate::types::TokenizerModel;
6use serde::Serialize;
7
8/// A chunk of repository content
9#[derive(Debug, Clone, Serialize)]
10pub struct Chunk {
11    /// Chunk index (0-based)
12    pub index: usize,
13    /// Total number of chunks
14    pub total: usize,
15    /// Focus/theme of this chunk
16    pub focus: String,
17    /// Token count for this chunk
18    pub tokens: u32,
19    /// Files included in this chunk
20    pub files: Vec<ChunkFile>,
21    /// Context information
22    pub context: ChunkContext,
23}
24
25/// A file within a chunk
26#[derive(Debug, Clone, Serialize)]
27pub struct ChunkFile {
28    /// Relative file path
29    pub path: String,
30    /// File content (may be compressed)
31    pub content: String,
32    /// Token count
33    pub tokens: u32,
34    /// Whether content is truncated
35    pub truncated: bool,
36}
37
38/// Context for chunk continuity
39#[derive(Debug, Clone, Serialize)]
40pub struct ChunkContext {
41    /// Summary of previous chunks
42    pub previous_summary: Option<String>,
43    /// Current focus description
44    pub current_focus: String,
45    /// Preview of next chunk
46    pub next_preview: Option<String>,
47    /// Cross-references to other chunks
48    pub cross_references: Vec<CrossReference>,
49    /// Overlap content from previous chunk (for context continuity)
50    pub overlap_content: Option<String>,
51}
52
53/// Reference to symbol in another chunk
54#[derive(Debug, Clone, Serialize)]
55pub struct CrossReference {
56    /// Symbol name
57    pub symbol: String,
58    /// Chunk containing the symbol
59    pub chunk_index: usize,
60    /// File containing the symbol
61    pub file: String,
62}
63
64/// Internal type for symbol-based chunking
65#[derive(Debug, Clone)]
66pub(crate) struct SymbolSnippet {
67    pub file_path: String,
68    pub symbol_name: String,
69    pub start_line: u32,
70    pub content: String,
71    pub tokens: u32,
72    pub importance: f32,
73}
74
75/// Chunking strategy
76#[derive(Debug, Clone, Copy, Default)]
77pub enum ChunkStrategy {
78    /// Fixed token size chunks
79    Fixed {
80        /// Maximum tokens per chunk
81        size: u32,
82    },
83    /// One file per chunk
84    File,
85    /// Group by module/directory
86    Module,
87    /// Group by symbols (AST-based)
88    Symbol,
89    /// Group by semantic similarity
90    #[default]
91    Semantic,
92    /// Group by dependency order
93    Dependency,
94}
95
96/// Chunker for splitting repositories
97pub struct Chunker {
98    /// Chunking strategy
99    pub(crate) strategy: ChunkStrategy,
100    /// Maximum tokens per chunk
101    pub(crate) max_tokens: u32,
102    /// Overlap tokens between chunks
103    pub(crate) overlap_tokens: u32,
104    /// Target model for token counting
105    pub(crate) model: TokenizerModel,
106}