llmcc_core/
file.rs

1use std::fs::File as StdFile;
2use std::hash::{DefaultHasher, Hash, Hasher};
3use std::io::Read;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, Default)]
7pub struct FileId {
8    pub path: Option<String>,
9    content: Arc<[u8]>,
10    pub content_hash: u64,
11}
12
13impl FileId {
14    pub fn new_path(path: String) -> std::io::Result<Self> {
15        let mut file = StdFile::open(&path)?;
16        let capacity = file.metadata().map(|meta| meta.len() as usize).unwrap_or(0);
17        let mut content = Vec::with_capacity(capacity);
18        file.read_to_end(&mut content)?;
19
20        let mut hasher = DefaultHasher::new();
21        content.hash(&mut hasher);
22        let content_hash = hasher.finish();
23
24        Ok(FileId {
25            path: Some(path),
26            content: Arc::from(content),
27            content_hash,
28        })
29    }
30
31    /// Create a FileId by reading from `physical_path` but storing `logical_path`.
32    /// This is useful when files have prefixes (like "000_") that should be stripped
33    /// for downstream processing while still reading the actual file from disk.
34    pub fn new_path_with_logical(
35        physical_path: &str,
36        logical_path: String,
37    ) -> std::io::Result<Self> {
38        let mut file = StdFile::open(physical_path)?;
39        let capacity = file.metadata().map(|meta| meta.len() as usize).unwrap_or(0);
40        let mut content = Vec::with_capacity(capacity);
41        file.read_to_end(&mut content)?;
42
43        let mut hasher = DefaultHasher::new();
44        content.hash(&mut hasher);
45        let content_hash = hasher.finish();
46
47        Ok(FileId {
48            path: Some(logical_path),
49            content: Arc::from(content),
50            content_hash,
51        })
52    }
53
54    pub fn new_content(content: Vec<u8>) -> Self {
55        let mut hasher = DefaultHasher::new();
56        hasher.write(&content);
57        let content_hash = hasher.finish();
58
59        FileId {
60            path: None,
61            content: Arc::from(content),
62            content_hash,
63        }
64    }
65
66    pub fn content(&self) -> &[u8] {
67        self.content.as_ref()
68    }
69
70    pub fn get_text(&self, start_byte: usize, end_byte: usize) -> Option<String> {
71        let content_bytes = self.content();
72
73        if start_byte > end_byte
74            || start_byte > content_bytes.len()
75            || end_byte > content_bytes.len()
76        {
77            return None;
78        }
79
80        let slice = &content_bytes[start_byte..end_byte];
81        Some(String::from_utf8_lossy(slice).into_owned())
82    }
83
84    pub fn get_full_text(&self) -> Option<String> {
85        let content_bytes = self.content();
86        Some(String::from_utf8_lossy(content_bytes).into_owned())
87    }
88}
89
90#[derive(Debug, Clone, Default)]
91pub struct File {
92    // TODO: add cache and all other stuff
93    pub file: FileId,
94}
95
96impl File {
97    pub fn new_source(source: Vec<u8>) -> Self {
98        File {
99            file: FileId::new_content(source),
100        }
101    }
102
103    pub fn new_file(file: String) -> std::io::Result<Self> {
104        Ok(File {
105            file: FileId::new_path(file)?,
106        })
107    }
108
109    /// Create a File by reading from `physical_path` but storing `logical_path`.
110    pub fn new_file_with_logical(
111        physical_path: &str,
112        logical_path: String,
113    ) -> std::io::Result<Self> {
114        Ok(File {
115            file: FileId::new_path_with_logical(physical_path, logical_path)?,
116        })
117    }
118
119    pub fn content(&self) -> &[u8] {
120        self.file.content()
121    }
122
123    pub fn get_text(&self, start: usize, end: usize) -> String {
124        self.file.get_text(start, end).unwrap()
125    }
126
127    pub fn opt_get_text(&self, start: usize, end: usize) -> Option<String> {
128        self.file.get_text(start, end)
129    }
130
131    pub fn path(&self) -> Option<&str> {
132        self.file.path.as_deref()
133    }
134}