Skip to main content

codetether_rlm/chunker/
types.rs

1//! Core types for semantic chunking.
2
3use serde::{Deserialize, Serialize};
4
5/// Content type for optimized processing.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7#[serde(rename_all = "lowercase")]
8pub enum ContentType {
9    Code,
10    Documents,
11    Logs,
12    Conversation,
13    Mixed,
14}
15
16/// A chunk of content with metadata.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct Chunk {
19    pub content: String,
20    #[serde(rename = "type")]
21    pub chunk_type: ChunkType,
22    pub start_line: usize,
23    pub end_line: usize,
24    pub tokens: usize,
25    /// Higher = more important to keep.
26    pub priority: u8,
27}
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum ChunkType {
32    Code,
33    Text,
34    ToolOutput,
35    Conversation,
36}
37
38/// Options for chunking.
39#[derive(Debug, Clone)]
40pub struct ChunkOptions {
41    /// Maximum tokens per chunk.
42    pub max_chunk_tokens: usize,
43    /// Number of recent lines to always preserve.
44    pub preserve_recent: usize,
45}
46
47impl Default for ChunkOptions {
48    fn default() -> Self {
49        Self {
50            max_chunk_tokens: 4000,
51            preserve_recent: 100,
52        }
53    }
54}