1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
//! File processor module for handling different file types intelligently.
//!
//! This module provides a strategy pattern for processing file contents based on their extension
//! in order to optimize for LLM token usage. The main idea is to extract the schema rather than
//! raw data where applicable. (e.g., schema + sample for CSV, code cells for Jupyter notebooks).
use Result;
use Path;
pub use CsvProcessor;
pub use DefaultTextProcessor;
pub use JupyterNotebookProcessor;
pub use JsonLinesProcessor;
pub use TsvProcessor;
/// Trait for processing file contents into LLM-optimized string representations.
///
/// Each processor takes raw bytes and produces a formatted string suitable for
/// inclusion in an LLM prompt. Processors may extract schemas, truncate content,
/// or apply other transformations to reduce token usage while preserving semantic value.
/// Factory function to get the appropriate processor for a file extension.
///
/// # Arguments
///
/// * `extension` - File extension (without dot)
///
/// # Returns
///
/// * `Box<dyn FileProcessor>` - Processor instance for the given extension
///
/// # Examples
///
/// ```ignore
/// let processor = get_processor_for_extension("csv");
/// let result = processor.process(&bytes, path)?;
/// ```