1use anyhow::{Context, Result};
31use memmap2::Mmap;
32use std::fs::{File, OpenOptions};
33use std::io::Write;
34use std::path::{Path, PathBuf};
35
36const MAGIC: &[u8; 4] = b"RFCT";
37const VERSION: u32 = 1;
38const HEADER_SIZE: usize = 32; #[derive(Debug, Clone)]
42pub struct FileEntry {
43 pub path: PathBuf,
45 pub offset: u64,
47 pub length: u64,
49}
50
51pub struct ContentWriter {
57 files: Vec<FileEntry>,
58 writer: Option<std::io::BufWriter<File>>,
59 current_offset: u64,
60 file_path: Option<PathBuf>,
61 content: Vec<u8>,
63}
64
65impl ContentWriter {
66 pub fn new() -> Self {
70 Self {
71 files: Vec::new(),
72 writer: None,
73 current_offset: 0,
74 file_path: None,
75 content: Vec::new(),
76 }
77 }
78
79 pub fn init(&mut self, path: PathBuf) -> Result<()> {
81 let file = OpenOptions::new()
82 .create(true)
83 .write(true)
84 .truncate(true)
85 .open(&path)
86 .with_context(|| format!("Failed to create {}", path.display()))?;
87
88 let mut writer = std::io::BufWriter::with_capacity(16 * 1024 * 1024, file);
90
91 writer.write_all(MAGIC)?;
93 writer.write_all(&VERSION.to_le_bytes())?;
94 writer.write_all(&0u64.to_le_bytes())?; writer.write_all(&0u64.to_le_bytes())?; writer.write_all(&[0u8; 8])?; self.writer = Some(writer);
99 self.current_offset = 0; self.file_path = Some(path);
101
102 Ok(())
103 }
104
105 pub fn add_file(&mut self, path: PathBuf, content: &str) -> u32 {
112 let file_id = self.files.len() as u32;
113 let content_bytes = content.as_bytes();
114 let length = content_bytes.len() as u64;
115
116 if let Some(ref mut w) = self.writer {
117 let offset = self.current_offset;
119 w.write_all(content_bytes)
120 .expect("Failed to write file content to content.bin");
121 self.current_offset += length;
122
123 self.files.push(FileEntry {
124 path,
125 offset,
126 length,
127 });
128 } else {
129 let offset = self.content.len() as u64;
131 self.content.extend_from_slice(content_bytes);
132
133 self.files.push(FileEntry {
134 path,
135 offset,
136 length,
137 });
138 }
139
140 file_id
141 }
142
143 pub fn write(&mut self, path: impl AsRef<Path>) -> Result<()> {
148 let path = path.as_ref();
149
150 if self.writer.is_none() && self.file_path.is_none() {
152 return self.write_legacy(path);
155 }
156
157 self.finalize_if_needed()?;
159
160 Ok(())
161 }
162
163 fn write_legacy(&self, path: impl AsRef<Path>) -> Result<()> {
168 let path = path.as_ref();
169 let file = OpenOptions::new()
170 .create(true)
171 .write(true)
172 .truncate(true)
173 .open(path)
174 .with_context(|| format!("Failed to create {}", path.display()))?;
175
176 let mut writer = std::io::BufWriter::with_capacity(8 * 1024 * 1024, file);
178
179 let index_offset = HEADER_SIZE as u64 + self.content.len() as u64;
181
182 writer.write_all(MAGIC)?;
184 writer.write_all(&VERSION.to_le_bytes())?;
185 writer.write_all(&(self.files.len() as u64).to_le_bytes())?;
186 writer.write_all(&index_offset.to_le_bytes())?;
187 writer.write_all(&[0u8; 8])?; writer.write_all(&self.content)?;
191
192 for entry in &self.files {
194 let path_str = entry.path.to_string_lossy();
195 let path_bytes = path_str.as_bytes();
196
197 writer.write_all(&(path_bytes.len() as u32).to_le_bytes())?;
198 writer.write_all(path_bytes)?;
199 writer.write_all(&entry.offset.to_le_bytes())?;
200 writer.write_all(&entry.length.to_le_bytes())?;
201 }
202
203 writer.flush()?;
204 Ok(())
205 }
206
207 fn finalize(&mut self) -> Result<()> {
209 let mut writer = self
210 .writer
211 .take()
212 .ok_or_else(|| anyhow::anyhow!("ContentWriter not initialized"))?;
213
214 let index_offset = HEADER_SIZE as u64 + self.current_offset;
216
217 for entry in &self.files {
218 let path_str = entry.path.to_string_lossy();
219 let path_bytes = path_str.as_bytes();
220
221 writer.write_all(&(path_bytes.len() as u32).to_le_bytes())?;
222 writer.write_all(path_bytes)?;
223 writer.write_all(&entry.offset.to_le_bytes())?;
224 writer.write_all(&entry.length.to_le_bytes())?;
225 }
226
227 let mut file = writer
229 .into_inner()
230 .map_err(|e| anyhow::anyhow!("Failed to flush BufWriter: {}", e.error()))?;
231
232 use std::io::Seek;
234 file.seek(std::io::SeekFrom::Start(0))?;
235
236 file.write_all(MAGIC)?;
238 file.write_all(&VERSION.to_le_bytes())?;
239 file.write_all(&(self.files.len() as u64).to_le_bytes())?;
240 file.write_all(&index_offset.to_le_bytes())?;
241 file.write_all(&[0u8; 8])?; file.sync_all()?;
245
246 log::debug!(
247 "Finalized content.bin: {} files, {} bytes of content",
248 self.files.len(),
249 self.current_offset
250 );
251
252 Ok(())
253 }
254
255 pub fn file_count(&self) -> usize {
257 self.files.len()
258 }
259
260 pub fn content_size(&self) -> usize {
262 if self.writer.is_some() || self.file_path.is_some() {
263 self.current_offset as usize
265 } else {
266 self.content.len()
268 }
269 }
270
271 pub fn finalize_if_needed(&mut self) -> Result<()> {
275 if self.writer.is_some() {
276 self.finalize()?;
277 self.writer = None;
279 }
280 Ok(())
281 }
282}
283
284impl Default for ContentWriter {
285 fn default() -> Self {
286 Self::new()
287 }
288}
289
290pub struct ContentReader {
294 _file: File,
295 mmap: Mmap,
296 files: Vec<FileEntry>,
297}
298
299impl ContentReader {
300 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
302 let path = path.as_ref();
303
304 let file =
305 File::open(path).with_context(|| format!("Failed to open {}", path.display()))?;
306
307 let mmap = unsafe {
308 Mmap::map(&file).with_context(|| format!("Failed to mmap {}", path.display()))?
309 };
310
311 if mmap.len() < HEADER_SIZE {
313 anyhow::bail!(
314 "content.bin too small (expected at least {} bytes)",
315 HEADER_SIZE
316 );
317 }
318
319 if &mmap[0..4] != MAGIC {
320 anyhow::bail!("Invalid content.bin (wrong magic bytes)");
321 }
322
323 let version = u32::from_le_bytes([mmap[4], mmap[5], mmap[6], mmap[7]]);
324 if version != VERSION {
325 anyhow::bail!("Unsupported content.bin version: {}", version);
326 }
327
328 let num_files = u64::from_le_bytes([
329 mmap[8], mmap[9], mmap[10], mmap[11], mmap[12], mmap[13], mmap[14], mmap[15],
330 ]);
331
332 let index_offset = u64::from_le_bytes([
333 mmap[16], mmap[17], mmap[18], mmap[19], mmap[20], mmap[21], mmap[22], mmap[23],
334 ]) as usize;
335
336 let mut files = Vec::new();
338 let mut pos = index_offset;
339
340 for i in 0..num_files {
341 if pos + 4 > mmap.len() {
342 anyhow::bail!(
343 "Truncated file index at file {} (pos={}, mmap.len()={})",
344 i,
345 pos,
346 mmap.len()
347 );
348 }
349
350 let path_len =
351 u32::from_le_bytes([mmap[pos], mmap[pos + 1], mmap[pos + 2], mmap[pos + 3]])
352 as usize;
353 pos += 4;
354
355 if pos + path_len + 16 > mmap.len() {
356 anyhow::bail!(
357 "Truncated file entry at file {} (pos={}, path_len={}, need={}, mmap.len()={})",
358 i,
359 pos,
360 path_len,
361 pos + path_len + 16,
362 mmap.len()
363 );
364 }
365
366 let path_bytes = &mmap[pos..pos + path_len];
367 let path_str = std::str::from_utf8(path_bytes).context("Invalid UTF-8 in file path")?;
368 let path = PathBuf::from(path_str);
369 pos += path_len;
370
371 let offset = u64::from_le_bytes([
372 mmap[pos],
373 mmap[pos + 1],
374 mmap[pos + 2],
375 mmap[pos + 3],
376 mmap[pos + 4],
377 mmap[pos + 5],
378 mmap[pos + 6],
379 mmap[pos + 7],
380 ]);
381 pos += 8;
382
383 let length = u64::from_le_bytes([
384 mmap[pos],
385 mmap[pos + 1],
386 mmap[pos + 2],
387 mmap[pos + 3],
388 mmap[pos + 4],
389 mmap[pos + 5],
390 mmap[pos + 6],
391 mmap[pos + 7],
392 ]);
393 pos += 8;
394
395 files.push(FileEntry {
396 path,
397 offset,
398 length,
399 });
400 }
401
402 Ok(Self {
403 _file: file,
404 mmap,
405 files,
406 })
407 }
408
409 pub fn get_file_content(&self, file_id: u32) -> Result<&str> {
411 let entry = self
412 .files
413 .get(file_id as usize)
414 .ok_or_else(|| anyhow::anyhow!("Invalid file_id: {}", file_id))?;
415
416 let start = HEADER_SIZE + entry.offset as usize;
417 let end = start + entry.length as usize;
418
419 if end > self.mmap.len() {
420 anyhow::bail!("File content out of bounds");
421 }
422
423 let bytes = &self.mmap[start..end];
424 std::str::from_utf8(bytes).context("Invalid UTF-8 in file content")
425 }
426
427 pub fn get_file_path(&self, file_id: u32) -> Option<&Path> {
429 self.files.get(file_id as usize).map(|e| e.path.as_path())
430 }
431
432 pub fn file_count(&self) -> usize {
434 self.files.len()
435 }
436
437 pub fn get_file_id_by_path(&self, path: &str) -> Option<u32> {
444 let normalized_input = path.strip_prefix("./").unwrap_or(path);
446
447 self.files
448 .iter()
449 .position(|entry| {
450 let stored_path = entry.path.to_string_lossy();
452 let normalized_stored = stored_path.strip_prefix("./").unwrap_or(&stored_path);
453 normalized_stored == normalized_input
454 })
455 .map(|idx| idx as u32)
456 }
457
458 pub fn get_content_at_offset(
460 &self,
461 file_id: u32,
462 byte_offset: u32,
463 length: usize,
464 ) -> Result<&str> {
465 let entry = self
466 .files
467 .get(file_id as usize)
468 .ok_or_else(|| anyhow::anyhow!("Invalid file_id: {}", file_id))?;
469
470 let start = HEADER_SIZE + entry.offset as usize + byte_offset as usize;
471 let end = start + length;
472
473 if end > self.mmap.len() {
474 anyhow::bail!("Content out of bounds");
475 }
476
477 let bytes = &self.mmap[start..end];
478 std::str::from_utf8(bytes).context("Invalid UTF-8 in content")
479 }
480
481 pub fn get_context(
485 &self,
486 file_id: u32,
487 byte_offset: u32,
488 context_lines: usize,
489 ) -> Result<(Vec<String>, String, Vec<String>)> {
490 let content = self.get_file_content(file_id)?;
491 let lines: Vec<&str> = content.lines().collect();
492
493 let mut current_offset = 0;
495 let mut line_idx = 0;
496
497 for (idx, line) in lines.iter().enumerate() {
498 let line_end = current_offset + line.len() + 1; if byte_offset as usize >= current_offset && (byte_offset as usize) < line_end {
500 line_idx = idx;
501 break;
502 }
503 current_offset = line_end;
504 }
505
506 let start = line_idx.saturating_sub(context_lines);
508 let end = (line_idx + context_lines + 1).min(lines.len());
509
510 let before: Vec<String> = lines[start..line_idx]
511 .iter()
512 .map(|s| s.to_string())
513 .collect();
514
515 let matching = lines
516 .get(line_idx)
517 .map(|s| s.to_string())
518 .unwrap_or_default();
519
520 let after: Vec<String> = lines[line_idx + 1..end]
521 .iter()
522 .map(|s| s.to_string())
523 .collect();
524
525 Ok((before, matching, after))
526 }
527
528 pub fn get_context_by_line(
532 &self,
533 file_id: u32,
534 line_number: usize,
535 context_lines: usize,
536 ) -> Result<(Vec<String>, Vec<String>)> {
537 let content = self.get_file_content(file_id)?;
538 let lines: Vec<&str> = content.lines().collect();
539
540 let line_idx = line_number.saturating_sub(1);
542
543 let start = line_idx.saturating_sub(context_lines);
545 let end = (line_idx + context_lines + 1).min(lines.len());
546
547 let before: Vec<String> = lines[start..line_idx]
548 .iter()
549 .map(|s| s.to_string())
550 .collect();
551
552 let after: Vec<String> = lines[line_idx + 1..end]
553 .iter()
554 .map(|s| s.to_string())
555 .collect();
556
557 Ok((before, after))
558 }
559}
560
561#[cfg(test)]
562mod tests {
563 use super::*;
564 use tempfile::TempDir;
565
566 #[test]
567 fn test_content_writer_basic() {
568 let mut writer = ContentWriter::new();
569
570 let file1_id = writer.add_file(PathBuf::from("test1.txt"), "Hello, world!");
571 let file2_id = writer.add_file(PathBuf::from("test2.txt"), "Goodbye, world!");
572
573 assert_eq!(file1_id, 0);
574 assert_eq!(file2_id, 1);
575 assert_eq!(writer.file_count(), 2);
576 }
577
578 #[test]
579 fn test_content_roundtrip() {
580 let temp = TempDir::new().unwrap();
581 let content_path = temp.path().join("content.bin");
582
583 let mut writer = ContentWriter::new();
585 writer.add_file(PathBuf::from("file1.txt"), "First file content");
586 writer.add_file(PathBuf::from("file2.txt"), "Second file content");
587 writer.write(&content_path).unwrap();
588
589 let reader = ContentReader::open(&content_path).unwrap();
591
592 assert_eq!(reader.file_count(), 2);
593 assert_eq!(reader.get_file_content(0).unwrap(), "First file content");
594 assert_eq!(reader.get_file_content(1).unwrap(), "Second file content");
595 assert_eq!(reader.get_file_path(0).unwrap(), Path::new("file1.txt"));
596 assert_eq!(reader.get_file_path(1).unwrap(), Path::new("file2.txt"));
597 }
598
599 #[test]
600 fn test_get_context() {
601 let temp = TempDir::new().unwrap();
602 let content_path = temp.path().join("content.bin");
603
604 let mut writer = ContentWriter::new();
605 writer.add_file(
606 PathBuf::from("test.txt"),
607 "Line 1\nLine 2\nLine 3 with match\nLine 4\nLine 5",
608 );
609 writer.write(&content_path).unwrap();
610
611 let reader = ContentReader::open(&content_path).unwrap();
612
613 let (before, matching, after) = reader.get_context(0, 14, 1).unwrap();
615
616 assert_eq!(before.len(), 1);
617 assert_eq!(before[0], "Line 2");
618 assert_eq!(matching, "Line 3 with match");
619 assert_eq!(after.len(), 1);
620 assert_eq!(after[0], "Line 4");
621 }
622
623 #[test]
624 fn test_streaming_roundtrip() {
625 let temp = TempDir::new().unwrap();
626 let content_path = temp.path().join("content.bin");
627
628 let mut writer = ContentWriter::new();
630 writer.init(content_path.clone()).unwrap();
631 writer.add_file(PathBuf::from("src/main.rs"), "fn main() {}\n");
632 writer.add_file(
633 PathBuf::from("src/lib.rs"),
634 "pub fn hello() -> &'static str { \"hi\" }\n",
635 );
636 writer.finalize_if_needed().unwrap();
637
638 let reader = ContentReader::open(&content_path).unwrap();
640 assert_eq!(reader.file_count(), 2);
641 assert_eq!(reader.get_file_content(0).unwrap(), "fn main() {}\n");
642 assert_eq!(
643 reader.get_file_content(1).unwrap(),
644 "pub fn hello() -> &'static str { \"hi\" }\n"
645 );
646 assert_eq!(reader.get_file_path(0).unwrap(), Path::new("src/main.rs"));
647 assert_eq!(reader.get_file_path(1).unwrap(), Path::new("src/lib.rs"));
648 }
649
650 #[test]
651 fn test_multiline_file() {
652 let temp = TempDir::new().unwrap();
653 let content_path = temp.path().join("content.bin");
654
655 let content = "fn main() {\n println!(\"Hello\");\n}\n";
656
657 let mut writer = ContentWriter::new();
658 writer.add_file(PathBuf::from("main.rs"), content);
659 writer.write(&content_path).unwrap();
660
661 let reader = ContentReader::open(&content_path).unwrap();
662 assert_eq!(reader.get_file_content(0).unwrap(), content);
663 }
664}