1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//! File processor module for handling different file types intelligently.
//!
//! This module provides a strategy pattern for processing file contents based on their extension
//! in order to optimize for LLM token usage. The main idea is to extract the schema rather than
//! raw data where applicable. (e.g., schema + sample for CSV, code cells for Jupyter notebooks).
use Result;
use Path;
pub use CsvProcessor;
pub use DefaultTextProcessor;
pub use JupyterNotebookProcessor;
pub use JsonLinesProcessor;
pub use TsvProcessor;
/// Trait for processing file contents into LLM-optimized string representations.
///
/// Each processor takes raw bytes and produces a formatted string suitable for
/// inclusion in an LLM prompt. Processors may extract schemas, truncate content,
/// or apply other transformations to reduce token usage while preserving semantic value.
/// Factory function to get the appropriate processor for a file extension.
///
/// # Arguments
///
/// * `extension` - File extension (without dot)
///
/// # Returns
///
/// * `Box<dyn FileProcessor>` - Processor instance for the given extension
///
/// # Examples
///
/// ```rs
/// use std::path::Path;
/// use code2prompt_core::file_processor::get_processor_for_extension;
///
/// let processor = get_processor_for_extension("csv");
/// let bytes = b"column1,column2\nvalue1,value2";
/// let path = Path::new("fake_file.csv");
///
/// let result = processor.process(bytes, path).unwrap();
/// ```