vectorless/index/pipeline/
context.rs1use std::collections::HashMap;
7use std::path::PathBuf;
8
9use crate::document::{DocumentTree, NodeId};
10use crate::llm::LlmClient;
11use crate::parser::{DocumentFormat, RawNode};
12
13use super::super::{PipelineOptions, SummaryStrategy};
14use super::metrics::IndexMetrics;
15
16#[derive(Debug, Clone)]
18pub enum IndexInput {
19 File(PathBuf),
21
22 Content {
24 content: String,
26 name: String,
28 format: DocumentFormat,
30 },
31}
32
33impl IndexInput {
34 pub fn file(path: impl Into<PathBuf>) -> Self {
36 Self::File(path.into())
37 }
38
39 pub fn content(
41 content: impl Into<String>,
42 name: impl Into<String>,
43 format: DocumentFormat,
44 ) -> Self {
45 Self::Content {
46 content: content.into(),
47 name: name.into(),
48 format,
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct StageResult {
56 pub success: bool,
58
59 pub duration_ms: u64,
61
62 pub metadata: HashMap<String, serde_json::Value>,
64}
65
66impl StageResult {
67 pub fn success(name: &str) -> Self {
69 println!("Stage '{}' completed successfully", name);
70
71 Self {
72 success: true,
73 duration_ms: 0,
74 metadata: HashMap::new(),
75 }
76 }
77
78 pub fn failure(name: &str, error: &str) -> Self {
80 println!("Stage '{}' failed: {}", name, error);
81
82 let mut metadata = HashMap::new();
83 metadata.insert(
84 "error".to_string(),
85 serde_json::Value::String(error.to_string()),
86 );
87 Self {
88 success: false,
89 duration_ms: 0,
90 metadata,
91 }
92 }
93
94 pub fn with_duration(mut self, ms: u64) -> Self {
96 self.duration_ms = ms;
97 self
98 }
99
100 pub fn with_metadata(mut self, key: &str, value: serde_json::Value) -> Self {
102 self.metadata.insert(key.to_string(), value);
103 self
104 }
105}
106
107#[derive(Debug, Clone, Default)]
109pub struct SummaryCache {
110 summaries: HashMap<NodeId, String>,
112
113 persist: bool,
115}
116
117impl SummaryCache {
118 pub fn new(persist: bool) -> Self {
120 Self {
121 summaries: HashMap::new(),
122 persist,
123 }
124 }
125
126 pub fn get(&self, node_id: NodeId) -> Option<&str> {
128 self.summaries.get(&node_id).map(|s| s.as_str())
129 }
130
131 pub fn put(&mut self, node_id: NodeId, summary: String) {
133 self.summaries.insert(node_id, summary);
134 }
135
136 pub fn should_persist(&self) -> bool {
138 self.persist
139 }
140
141 pub fn all(&self) -> &HashMap<NodeId, String> {
143 &self.summaries
144 }
145}
146
147#[derive(Debug)]
149pub struct IndexContext {
150 pub doc_id: String,
152
153 pub input: IndexInput,
155
156 pub format: DocumentFormat,
158
159 pub name: String,
161
162 pub source_path: Option<PathBuf>,
164
165 pub raw_nodes: Vec<RawNode>,
167
168 pub tree: Option<DocumentTree>,
170
171 pub options: PipelineOptions,
173
174 pub llm_client: Option<LlmClient>,
176
177 pub summary_cache: SummaryCache,
179
180 pub stage_results: HashMap<String, StageResult>,
182
183 pub metrics: IndexMetrics,
185
186 pub description: Option<String>,
188
189 pub page_count: Option<usize>,
191
192 pub line_count: Option<usize>,
194}
195
196impl IndexContext {
197 pub fn new(input: IndexInput, options: PipelineOptions) -> Self {
199 Self {
200 doc_id: uuid::Uuid::new_v4().to_string(),
201 input,
202 format: DocumentFormat::Markdown,
203 name: String::new(),
204 source_path: None,
205 raw_nodes: Vec::new(),
206 tree: None,
207 options,
208 llm_client: None,
209 summary_cache: SummaryCache::default(),
210 stage_results: HashMap::new(),
211 metrics: IndexMetrics::default(),
212 description: None,
213 page_count: None,
214 line_count: None,
215 }
216 }
217
218 pub fn with_doc_id(mut self, doc_id: impl Into<String>) -> Self {
220 self.doc_id = doc_id.into();
221 self
222 }
223
224 pub fn with_llm_client(mut self, client: LlmClient) -> Self {
226 self.llm_client = Some(client);
227 self
228 }
229
230 pub fn with_format(mut self, format: DocumentFormat) -> Self {
232 self.format = format;
233 self
234 }
235
236 pub fn with_name(mut self, name: impl Into<String>) -> Self {
238 self.name = name.into();
239 self
240 }
241
242 pub fn with_source_path(mut self, path: impl Into<PathBuf>) -> Self {
244 self.source_path = Some(path.into());
245 self
246 }
247
248 pub fn init_summary_cache(&mut self) {
250 if let SummaryStrategy::Lazy { persist, .. } = self.options.summary_strategy {
251 self.summary_cache = SummaryCache::new(persist);
252 }
253 }
254
255 pub fn record_stage(&mut self, name: &str, result: StageResult) {
257 self.stage_results.insert(name.to_string(), result);
258 }
259
260 pub fn tree(&self) -> Result<&DocumentTree, &'static str> {
262 self.tree.as_ref().ok_or("Tree not built")
263 }
264
265 pub fn tree_mut(&mut self) -> Result<&mut DocumentTree, &'static str> {
267 self.tree.as_mut().ok_or("Tree not built")
268 }
269
270 pub fn finalize(self) -> IndexResult {
272 IndexResult {
273 doc_id: self.doc_id,
274 name: self.name,
275 format: self.format,
276 source_path: self.source_path,
277 tree: self.tree,
278 description: self.description,
279 page_count: self.page_count,
280 line_count: self.line_count,
281 metrics: self.metrics,
282 summary_cache: self.summary_cache,
283 }
284 }
285}
286
287#[derive(Debug)]
289pub struct IndexResult {
290 pub doc_id: String,
292
293 pub name: String,
295
296 pub format: DocumentFormat,
298
299 pub source_path: Option<PathBuf>,
301
302 pub tree: Option<DocumentTree>,
304
305 pub description: Option<String>,
307
308 pub page_count: Option<usize>,
310
311 pub line_count: Option<usize>,
313
314 pub metrics: IndexMetrics,
316
317 pub summary_cache: SummaryCache,
319}
320
321impl IndexResult {
322 pub fn has_tree(&self) -> bool {
324 self.tree.is_some()
325 }
326
327 pub fn tree(&self) -> Option<&DocumentTree> {
329 self.tree.as_ref()
330 }
331
332 pub fn total_time_ms(&self) -> u64 {
334 self.metrics.parse_time_ms
335 + self.metrics.build_time_ms
336 + self.metrics.enhance_time_ms
337 + self.metrics.enrich_time_ms
338 + self.metrics.optimize_time_ms
339 + self.metrics.persist_time_ms
340 }
341}