ragit/index/
config.rs

1use serde::{Deserialize, Serialize};
2
3// This struct is used for loading partial configurations from ~/.config/ragit/build.json
4#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
5pub struct PartialBuildConfig {
6    pub chunk_size: Option<usize>,
7    pub slide_len: Option<usize>,
8    pub image_size: Option<usize>,
9    pub min_summary_len: Option<usize>,
10    pub max_summary_len: Option<usize>,
11    pub strict_file_reader: Option<bool>,
12    pub compression_threshold: Option<u64>,
13    pub compression_level: Option<u32>,
14    pub summary_after_build: Option<bool>,
15}
16
17impl PartialBuildConfig {
18    // Apply partial config to a full config
19    pub fn apply_to(&self, config: &mut BuildConfig) {
20        if let Some(chunk_size) = self.chunk_size {
21            config.chunk_size = chunk_size;
22        }
23        if let Some(slide_len) = self.slide_len {
24            config.slide_len = slide_len;
25        }
26        if let Some(image_size) = self.image_size {
27            config.image_size = image_size;
28        }
29        if let Some(min_summary_len) = self.min_summary_len {
30            config.min_summary_len = min_summary_len;
31        }
32        if let Some(max_summary_len) = self.max_summary_len {
33            config.max_summary_len = max_summary_len;
34        }
35        if let Some(strict_file_reader) = self.strict_file_reader {
36            config.strict_file_reader = strict_file_reader;
37        }
38        if let Some(compression_threshold) = self.compression_threshold {
39            config.compression_threshold = compression_threshold;
40        }
41        if let Some(compression_level) = self.compression_level {
42            config.compression_level = compression_level;
43        }
44        if let Some(summary_after_build) = self.summary_after_build {
45            config.summary_after_build = summary_after_build;
46        }
47    }
48}
49
50#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
51pub struct BuildConfig {
52    /// It's not a max_chunk_size, and it's impossible to make every chunk have the same size because
53    ///
54    /// 1. An image cannot be splitted.
55    /// 2. Different files cannot be merged.
56    ///
57    /// But it's guaranteed that a chunk is never bigger than chunk_size * 2.
58    pub chunk_size: usize,
59
60    pub slide_len: usize,
61
62    /// An image is treated like an N characters string, and this is N.
63    pub image_size: usize,
64
65    /// It forces the LLM to generate a summary that has at least `min_summary_len` characters
66    /// and at most `max_summary_len` characters.
67    pub min_summary_len: usize,
68    pub max_summary_len: usize,
69
70    /// If it's set, `rag build` panics if there's any error with a file.
71    /// For example, if there's an invalid utf-8 character `PlainTextReader` would die.
72    /// If it cannot follow a link of an image in a markdown file, it would die.
73    /// You don't need this option unless you're debugging ragit itself.
74    pub strict_file_reader: bool,
75
76    /// If the `.chunks` file is bigger than this (in bytes), the file is compressed
77    pub compression_threshold: u64,
78
79    /// 0 ~ 9
80    pub compression_level: u32,
81
82    /// If it's set, it runs `rag summary` after `rag build` is complete.
83    #[serde(default = "_true")]
84    pub summary_after_build: bool,
85}
86
87fn _true() -> bool {
88    true
89}
90
91impl Default for BuildConfig {
92    fn default() -> Self {
93        BuildConfig {
94            chunk_size: 4_000,
95            slide_len: 1_000,
96            image_size: 2_000,
97            min_summary_len: 200,
98            max_summary_len: 1000,
99            strict_file_reader: false,
100            compression_threshold: 2048,
101            compression_level: 3,
102            summary_after_build: true,
103        }
104    }
105}