1use anyhow::{Context, Result};
31use memmap2::Mmap;
32use std::fs::{File, OpenOptions};
33use std::io::Write;
34use std::path::{Path, PathBuf};
35
36const MAGIC: &[u8; 4] = b"RFCT";
37const VERSION: u32 = 1;
38const HEADER_SIZE: usize = 32; #[derive(Debug, Clone)]
42pub struct FileEntry {
43 pub path: PathBuf,
45 pub offset: u64,
47 pub length: u64,
49}
50
51pub struct ContentWriter {
57 files: Vec<FileEntry>,
58 writer: Option<std::io::BufWriter<File>>,
59 current_offset: u64,
60 file_path: Option<PathBuf>,
61 content: Vec<u8>,
63}
64
65impl ContentWriter {
66 pub fn new() -> Self {
70 Self {
71 files: Vec::new(),
72 writer: None,
73 current_offset: 0,
74 file_path: None,
75 content: Vec::new(),
76 }
77 }
78
79 pub fn init(&mut self, path: PathBuf) -> Result<()> {
81 let file = OpenOptions::new()
82 .create(true)
83 .write(true)
84 .truncate(true)
85 .open(&path)
86 .with_context(|| format!("Failed to create {}", path.display()))?;
87
88 let mut writer = std::io::BufWriter::with_capacity(16 * 1024 * 1024, file);
90
91 writer.write_all(MAGIC)?;
93 writer.write_all(&VERSION.to_le_bytes())?;
94 writer.write_all(&0u64.to_le_bytes())?; writer.write_all(&0u64.to_le_bytes())?; writer.write_all(&[0u8; 8])?; self.writer = Some(writer);
99 self.current_offset = 0; self.file_path = Some(path);
101
102 Ok(())
103 }
104
105 pub fn add_file(&mut self, path: PathBuf, content: &str) -> u32 {
112 let file_id = self.files.len() as u32;
113 let content_bytes = content.as_bytes();
114 let length = content_bytes.len() as u64;
115
116 if let Some(ref mut w) = self.writer {
117 let offset = self.current_offset;
119 w.write_all(content_bytes)
120 .expect("Failed to write file content to content.bin");
121 self.current_offset += length;
122
123 self.files.push(FileEntry {
124 path,
125 offset,
126 length,
127 });
128 } else {
129 let offset = self.content.len() as u64;
131 self.content.extend_from_slice(content_bytes);
132
133 self.files.push(FileEntry {
134 path,
135 offset,
136 length,
137 });
138 }
139
140 file_id
141 }
142
143 pub fn write(&mut self, path: impl AsRef<Path>) -> Result<()> {
148 let path = path.as_ref();
149
150 if self.writer.is_none() && self.file_path.is_none() {
152 return self.write_legacy(path);
155 }
156
157 self.finalize_if_needed()?;
159
160 Ok(())
161 }
162
163 fn write_legacy(&self, path: impl AsRef<Path>) -> Result<()> {
168 let path = path.as_ref();
169 let file = OpenOptions::new()
170 .create(true)
171 .write(true)
172 .truncate(true)
173 .open(path)
174 .with_context(|| format!("Failed to create {}", path.display()))?;
175
176 let mut writer = std::io::BufWriter::with_capacity(8 * 1024 * 1024, file);
178
179 let index_offset = HEADER_SIZE as u64 + self.content.len() as u64;
181
182 writer.write_all(MAGIC)?;
184 writer.write_all(&VERSION.to_le_bytes())?;
185 writer.write_all(&(self.files.len() as u64).to_le_bytes())?;
186 writer.write_all(&index_offset.to_le_bytes())?;
187 writer.write_all(&[0u8; 8])?; writer.write_all(&self.content)?;
191
192 for entry in &self.files {
194 let path_str = entry.path.to_string_lossy();
195 let path_bytes = path_str.as_bytes();
196
197 writer.write_all(&(path_bytes.len() as u32).to_le_bytes())?;
198 writer.write_all(path_bytes)?;
199 writer.write_all(&entry.offset.to_le_bytes())?;
200 writer.write_all(&entry.length.to_le_bytes())?;
201 }
202
203 writer.flush()?;
204 Ok(())
205 }
206
207 fn finalize(&mut self) -> Result<()> {
209 let writer = self.writer.as_mut()
210 .ok_or_else(|| anyhow::anyhow!("ContentWriter not initialized"))?;
211
212 let index_offset = HEADER_SIZE as u64 + self.current_offset;
214
215 for entry in &self.files {
216 let path_str = entry.path.to_string_lossy();
217 let path_bytes = path_str.as_bytes();
218
219 writer.write_all(&(path_bytes.len() as u32).to_le_bytes())?;
220 writer.write_all(path_bytes)?;
221 writer.write_all(&entry.offset.to_le_bytes())?;
222 writer.write_all(&entry.length.to_le_bytes())?;
223 }
224
225 writer.flush()?;
227
228 let file = writer.get_mut();
230
231 use std::io::Seek;
233 file.seek(std::io::SeekFrom::Start(0))?;
234
235 file.write_all(MAGIC)?;
237 file.write_all(&VERSION.to_le_bytes())?;
238 file.write_all(&(self.files.len() as u64).to_le_bytes())?;
239 file.write_all(&index_offset.to_le_bytes())?;
240 file.write_all(&[0u8; 8])?; file.sync_all()?;
244
245 log::debug!(
246 "Finalized content.bin: {} files, {} bytes of content",
247 self.files.len(),
248 self.current_offset
249 );
250
251 Ok(())
252 }
253
254 pub fn file_count(&self) -> usize {
256 self.files.len()
257 }
258
259 pub fn content_size(&self) -> usize {
261 if self.writer.is_some() || self.file_path.is_some() {
262 self.current_offset as usize
264 } else {
265 self.content.len()
267 }
268 }
269
270 pub fn finalize_if_needed(&mut self) -> Result<()> {
274 if self.writer.is_some() {
275 self.finalize()?;
276 self.writer = None;
278 }
279 Ok(())
280 }
281}
282
283impl Default for ContentWriter {
284 fn default() -> Self {
285 Self::new()
286 }
287}
288
289pub struct ContentReader {
293 _file: File,
294 mmap: Mmap,
295 files: Vec<FileEntry>,
296}
297
298impl ContentReader {
299 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
301 let path = path.as_ref();
302
303 let file = File::open(path)
304 .with_context(|| format!("Failed to open {}", path.display()))?;
305
306 let mmap = unsafe {
307 Mmap::map(&file)
308 .with_context(|| format!("Failed to mmap {}", path.display()))?
309 };
310
311 if mmap.len() < HEADER_SIZE {
313 anyhow::bail!("content.bin too small (expected at least {} bytes)", HEADER_SIZE);
314 }
315
316 if &mmap[0..4] != MAGIC {
317 anyhow::bail!("Invalid content.bin (wrong magic bytes)");
318 }
319
320 let version = u32::from_le_bytes([mmap[4], mmap[5], mmap[6], mmap[7]]);
321 if version != VERSION {
322 anyhow::bail!("Unsupported content.bin version: {}", version);
323 }
324
325 let num_files = u64::from_le_bytes([
326 mmap[8], mmap[9], mmap[10], mmap[11],
327 mmap[12], mmap[13], mmap[14], mmap[15],
328 ]);
329
330 let index_offset = u64::from_le_bytes([
331 mmap[16], mmap[17], mmap[18], mmap[19],
332 mmap[20], mmap[21], mmap[22], mmap[23],
333 ]) as usize;
334
335 let mut files = Vec::new();
337 let mut pos = index_offset;
338
339 for i in 0..num_files {
340 if pos + 4 > mmap.len() {
341 anyhow::bail!("Truncated file index at file {} (pos={}, mmap.len()={})", i, pos, mmap.len());
342 }
343
344 let path_len = u32::from_le_bytes([
345 mmap[pos],
346 mmap[pos + 1],
347 mmap[pos + 2],
348 mmap[pos + 3],
349 ]) as usize;
350 pos += 4;
351
352 if pos + path_len + 16 > mmap.len() {
353 anyhow::bail!("Truncated file entry at file {} (pos={}, path_len={}, need={}, mmap.len()={})",
354 i, pos, path_len, pos + path_len + 16, mmap.len());
355 }
356
357 let path_bytes = &mmap[pos..pos + path_len];
358 let path_str = std::str::from_utf8(path_bytes)
359 .context("Invalid UTF-8 in file path")?;
360 let path = PathBuf::from(path_str);
361 pos += path_len;
362
363 let offset = u64::from_le_bytes([
364 mmap[pos],
365 mmap[pos + 1],
366 mmap[pos + 2],
367 mmap[pos + 3],
368 mmap[pos + 4],
369 mmap[pos + 5],
370 mmap[pos + 6],
371 mmap[pos + 7],
372 ]);
373 pos += 8;
374
375 let length = u64::from_le_bytes([
376 mmap[pos],
377 mmap[pos + 1],
378 mmap[pos + 2],
379 mmap[pos + 3],
380 mmap[pos + 4],
381 mmap[pos + 5],
382 mmap[pos + 6],
383 mmap[pos + 7],
384 ]);
385 pos += 8;
386
387 files.push(FileEntry {
388 path,
389 offset,
390 length,
391 });
392 }
393
394 Ok(Self {
395 _file: file,
396 mmap,
397 files,
398 })
399 }
400
401 pub fn get_file_content(&self, file_id: u32) -> Result<&str> {
403 let entry = self.files
404 .get(file_id as usize)
405 .ok_or_else(|| anyhow::anyhow!("Invalid file_id: {}", file_id))?;
406
407 let start = HEADER_SIZE + entry.offset as usize;
408 let end = start + entry.length as usize;
409
410 if end > self.mmap.len() {
411 anyhow::bail!("File content out of bounds");
412 }
413
414 let bytes = &self.mmap[start..end];
415 std::str::from_utf8(bytes).context("Invalid UTF-8 in file content")
416 }
417
418 pub fn get_file_path(&self, file_id: u32) -> Option<&Path> {
420 self.files.get(file_id as usize).map(|e| e.path.as_path())
421 }
422
423 pub fn file_count(&self) -> usize {
425 self.files.len()
426 }
427
428 pub fn get_file_id_by_path(&self, path: &str) -> Option<u32> {
435 let normalized_input = path.strip_prefix("./").unwrap_or(path);
437
438 self.files.iter().position(|entry| {
439 let stored_path = entry.path.to_string_lossy();
441 let normalized_stored = stored_path.strip_prefix("./").unwrap_or(&stored_path);
442 normalized_stored == normalized_input
443 }).map(|idx| idx as u32)
444 }
445
446 pub fn get_content_at_offset(&self, file_id: u32, byte_offset: u32, length: usize) -> Result<&str> {
448 let entry = self.files
449 .get(file_id as usize)
450 .ok_or_else(|| anyhow::anyhow!("Invalid file_id: {}", file_id))?;
451
452 let start = HEADER_SIZE + entry.offset as usize + byte_offset as usize;
453 let end = start + length;
454
455 if end > self.mmap.len() {
456 anyhow::bail!("Content out of bounds");
457 }
458
459 let bytes = &self.mmap[start..end];
460 std::str::from_utf8(bytes).context("Invalid UTF-8 in content")
461 }
462
463 pub fn get_context(&self, file_id: u32, byte_offset: u32, context_lines: usize) -> Result<(Vec<String>, String, Vec<String>)> {
467 let content = self.get_file_content(file_id)?;
468 let lines: Vec<&str> = content.lines().collect();
469
470 let mut current_offset = 0;
472 let mut line_idx = 0;
473
474 for (idx, line) in lines.iter().enumerate() {
475 let line_end = current_offset + line.len() + 1; if byte_offset as usize >= current_offset && (byte_offset as usize) < line_end {
477 line_idx = idx;
478 break;
479 }
480 current_offset = line_end;
481 }
482
483 let start = line_idx.saturating_sub(context_lines);
485 let end = (line_idx + context_lines + 1).min(lines.len());
486
487 let before: Vec<String> = lines[start..line_idx]
488 .iter()
489 .map(|s| s.to_string())
490 .collect();
491
492 let matching = lines.get(line_idx)
493 .map(|s| s.to_string())
494 .unwrap_or_default();
495
496 let after: Vec<String> = lines[line_idx + 1..end]
497 .iter()
498 .map(|s| s.to_string())
499 .collect();
500
501 Ok((before, matching, after))
502 }
503
504 pub fn get_context_by_line(&self, file_id: u32, line_number: usize, context_lines: usize) -> Result<(Vec<String>, Vec<String>)> {
508 let content = self.get_file_content(file_id)?;
509 let lines: Vec<&str> = content.lines().collect();
510
511 let line_idx = line_number.saturating_sub(1);
513
514 let start = line_idx.saturating_sub(context_lines);
516 let end = (line_idx + context_lines + 1).min(lines.len());
517
518 let before: Vec<String> = lines[start..line_idx]
519 .iter()
520 .map(|s| s.to_string())
521 .collect();
522
523 let after: Vec<String> = lines[line_idx + 1..end]
524 .iter()
525 .map(|s| s.to_string())
526 .collect();
527
528 Ok((before, after))
529 }
530}
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535 use tempfile::TempDir;
536
537 #[test]
538 fn test_content_writer_basic() {
539 let mut writer = ContentWriter::new();
540
541 let file1_id = writer.add_file(PathBuf::from("test1.txt"), "Hello, world!");
542 let file2_id = writer.add_file(PathBuf::from("test2.txt"), "Goodbye, world!");
543
544 assert_eq!(file1_id, 0);
545 assert_eq!(file2_id, 1);
546 assert_eq!(writer.file_count(), 2);
547 }
548
549 #[test]
550 fn test_content_roundtrip() {
551 let temp = TempDir::new().unwrap();
552 let content_path = temp.path().join("content.bin");
553
554 let mut writer = ContentWriter::new();
556 writer.add_file(PathBuf::from("file1.txt"), "First file content");
557 writer.add_file(PathBuf::from("file2.txt"), "Second file content");
558 writer.write(&content_path).unwrap();
559
560 let reader = ContentReader::open(&content_path).unwrap();
562
563 assert_eq!(reader.file_count(), 2);
564 assert_eq!(reader.get_file_content(0).unwrap(), "First file content");
565 assert_eq!(reader.get_file_content(1).unwrap(), "Second file content");
566 assert_eq!(reader.get_file_path(0).unwrap(), Path::new("file1.txt"));
567 assert_eq!(reader.get_file_path(1).unwrap(), Path::new("file2.txt"));
568 }
569
570 #[test]
571 fn test_get_context() {
572 let temp = TempDir::new().unwrap();
573 let content_path = temp.path().join("content.bin");
574
575 let mut writer = ContentWriter::new();
576 writer.add_file(
577 PathBuf::from("test.txt"),
578 "Line 1\nLine 2\nLine 3 with match\nLine 4\nLine 5",
579 );
580 writer.write(&content_path).unwrap();
581
582 let reader = ContentReader::open(&content_path).unwrap();
583
584 let (before, matching, after) = reader.get_context(0, 14, 1).unwrap();
586
587 assert_eq!(before.len(), 1);
588 assert_eq!(before[0], "Line 2");
589 assert_eq!(matching, "Line 3 with match");
590 assert_eq!(after.len(), 1);
591 assert_eq!(after[0], "Line 4");
592 }
593
594 #[test]
595 fn test_multiline_file() {
596 let temp = TempDir::new().unwrap();
597 let content_path = temp.path().join("content.bin");
598
599 let content = "fn main() {\n println!(\"Hello\");\n}\n";
600
601 let mut writer = ContentWriter::new();
602 writer.add_file(PathBuf::from("main.rs"), content);
603 writer.write(&content_path).unwrap();
604
605 let reader = ContentReader::open(&content_path).unwrap();
606 assert_eq!(reader.get_file_content(0).unwrap(), content);
607 }
608}