1use anyhow::{Context, Result};
31use memmap2::Mmap;
32use std::fs::{File, OpenOptions};
33use std::io::Write;
34use std::path::{Path, PathBuf};
35
36const MAGIC: &[u8; 4] = b"RFCT";
37const VERSION: u32 = 1;
38const HEADER_SIZE: usize = 32; #[derive(Debug, Clone)]
42pub struct FileEntry {
43 pub path: PathBuf,
45 pub offset: u64,
47 pub length: u64,
49}
50
51pub struct ContentWriter {
57 files: Vec<FileEntry>,
58 writer: Option<std::io::BufWriter<File>>,
59 current_offset: u64,
60 file_path: Option<PathBuf>,
61 content: Vec<u8>,
63}
64
65impl ContentWriter {
66 pub fn new() -> Self {
70 Self {
71 files: Vec::new(),
72 writer: None,
73 current_offset: 0,
74 file_path: None,
75 content: Vec::new(),
76 }
77 }
78
79 pub fn init(&mut self, path: PathBuf) -> Result<()> {
81 let file = OpenOptions::new()
82 .create(true)
83 .write(true)
84 .truncate(true)
85 .open(&path)
86 .with_context(|| format!("Failed to create {}", path.display()))?;
87
88 let mut writer = std::io::BufWriter::with_capacity(16 * 1024 * 1024, file);
90
91 writer.write_all(MAGIC)?;
93 writer.write_all(&VERSION.to_le_bytes())?;
94 writer.write_all(&0u64.to_le_bytes())?; writer.write_all(&0u64.to_le_bytes())?; writer.write_all(&[0u8; 8])?; self.writer = Some(writer);
99 self.current_offset = 0; self.file_path = Some(path);
101
102 Ok(())
103 }
104
105 pub fn add_file(&mut self, path: PathBuf, content: &str) -> u32 {
112 let file_id = self.files.len() as u32;
113 let content_bytes = content.as_bytes();
114 let length = content_bytes.len() as u64;
115
116 if let Some(ref mut w) = self.writer {
117 let offset = self.current_offset;
119 w.write_all(content_bytes)
120 .expect("Failed to write file content to content.bin");
121 self.current_offset += length;
122
123 self.files.push(FileEntry {
124 path,
125 offset,
126 length,
127 });
128 } else {
129 let offset = self.content.len() as u64;
131 self.content.extend_from_slice(content_bytes);
132
133 self.files.push(FileEntry {
134 path,
135 offset,
136 length,
137 });
138 }
139
140 file_id
141 }
142
143 pub fn write(&mut self, path: impl AsRef<Path>) -> Result<()> {
148 let path = path.as_ref();
149
150 if self.writer.is_none() && self.file_path.is_none() {
152 return self.write_legacy(path);
155 }
156
157 self.finalize_if_needed()?;
159
160 Ok(())
161 }
162
163 fn write_legacy(&self, path: impl AsRef<Path>) -> Result<()> {
168 let path = path.as_ref();
169 let file = OpenOptions::new()
170 .create(true)
171 .write(true)
172 .truncate(true)
173 .open(path)
174 .with_context(|| format!("Failed to create {}", path.display()))?;
175
176 let mut writer = std::io::BufWriter::with_capacity(8 * 1024 * 1024, file);
178
179 let index_offset = HEADER_SIZE as u64 + self.content.len() as u64;
181
182 writer.write_all(MAGIC)?;
184 writer.write_all(&VERSION.to_le_bytes())?;
185 writer.write_all(&(self.files.len() as u64).to_le_bytes())?;
186 writer.write_all(&index_offset.to_le_bytes())?;
187 writer.write_all(&[0u8; 8])?; writer.write_all(&self.content)?;
191
192 for entry in &self.files {
194 let path_str = entry.path.to_string_lossy();
195 let path_bytes = path_str.as_bytes();
196
197 writer.write_all(&(path_bytes.len() as u32).to_le_bytes())?;
198 writer.write_all(path_bytes)?;
199 writer.write_all(&entry.offset.to_le_bytes())?;
200 writer.write_all(&entry.length.to_le_bytes())?;
201 }
202
203 writer.flush()?;
204 Ok(())
205 }
206
207 fn finalize(&mut self) -> Result<()> {
209 let mut writer = self.writer.take()
210 .ok_or_else(|| anyhow::anyhow!("ContentWriter not initialized"))?;
211
212 let index_offset = HEADER_SIZE as u64 + self.current_offset;
214
215 for entry in &self.files {
216 let path_str = entry.path.to_string_lossy();
217 let path_bytes = path_str.as_bytes();
218
219 writer.write_all(&(path_bytes.len() as u32).to_le_bytes())?;
220 writer.write_all(path_bytes)?;
221 writer.write_all(&entry.offset.to_le_bytes())?;
222 writer.write_all(&entry.length.to_le_bytes())?;
223 }
224
225 let mut file = writer.into_inner()
227 .map_err(|e| anyhow::anyhow!("Failed to flush BufWriter: {}", e.error()))?;
228
229 use std::io::Seek;
231 file.seek(std::io::SeekFrom::Start(0))?;
232
233 file.write_all(MAGIC)?;
235 file.write_all(&VERSION.to_le_bytes())?;
236 file.write_all(&(self.files.len() as u64).to_le_bytes())?;
237 file.write_all(&index_offset.to_le_bytes())?;
238 file.write_all(&[0u8; 8])?; file.sync_all()?;
242
243 log::debug!(
244 "Finalized content.bin: {} files, {} bytes of content",
245 self.files.len(),
246 self.current_offset
247 );
248
249 Ok(())
250 }
251
252 pub fn file_count(&self) -> usize {
254 self.files.len()
255 }
256
257 pub fn content_size(&self) -> usize {
259 if self.writer.is_some() || self.file_path.is_some() {
260 self.current_offset as usize
262 } else {
263 self.content.len()
265 }
266 }
267
268 pub fn finalize_if_needed(&mut self) -> Result<()> {
272 if self.writer.is_some() {
273 self.finalize()?;
274 self.writer = None;
276 }
277 Ok(())
278 }
279}
280
281impl Default for ContentWriter {
282 fn default() -> Self {
283 Self::new()
284 }
285}
286
287pub struct ContentReader {
291 _file: File,
292 mmap: Mmap,
293 files: Vec<FileEntry>,
294}
295
296impl ContentReader {
297 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
299 let path = path.as_ref();
300
301 let file = File::open(path)
302 .with_context(|| format!("Failed to open {}", path.display()))?;
303
304 let mmap = unsafe {
305 Mmap::map(&file)
306 .with_context(|| format!("Failed to mmap {}", path.display()))?
307 };
308
309 if mmap.len() < HEADER_SIZE {
311 anyhow::bail!("content.bin too small (expected at least {} bytes)", HEADER_SIZE);
312 }
313
314 if &mmap[0..4] != MAGIC {
315 anyhow::bail!("Invalid content.bin (wrong magic bytes)");
316 }
317
318 let version = u32::from_le_bytes([mmap[4], mmap[5], mmap[6], mmap[7]]);
319 if version != VERSION {
320 anyhow::bail!("Unsupported content.bin version: {}", version);
321 }
322
323 let num_files = u64::from_le_bytes([
324 mmap[8], mmap[9], mmap[10], mmap[11],
325 mmap[12], mmap[13], mmap[14], mmap[15],
326 ]);
327
328 let index_offset = u64::from_le_bytes([
329 mmap[16], mmap[17], mmap[18], mmap[19],
330 mmap[20], mmap[21], mmap[22], mmap[23],
331 ]) as usize;
332
333 let mut files = Vec::new();
335 let mut pos = index_offset;
336
337 for i in 0..num_files {
338 if pos + 4 > mmap.len() {
339 anyhow::bail!("Truncated file index at file {} (pos={}, mmap.len()={})", i, pos, mmap.len());
340 }
341
342 let path_len = u32::from_le_bytes([
343 mmap[pos],
344 mmap[pos + 1],
345 mmap[pos + 2],
346 mmap[pos + 3],
347 ]) as usize;
348 pos += 4;
349
350 if pos + path_len + 16 > mmap.len() {
351 anyhow::bail!("Truncated file entry at file {} (pos={}, path_len={}, need={}, mmap.len()={})",
352 i, pos, path_len, pos + path_len + 16, mmap.len());
353 }
354
355 let path_bytes = &mmap[pos..pos + path_len];
356 let path_str = std::str::from_utf8(path_bytes)
357 .context("Invalid UTF-8 in file path")?;
358 let path = PathBuf::from(path_str);
359 pos += path_len;
360
361 let offset = u64::from_le_bytes([
362 mmap[pos],
363 mmap[pos + 1],
364 mmap[pos + 2],
365 mmap[pos + 3],
366 mmap[pos + 4],
367 mmap[pos + 5],
368 mmap[pos + 6],
369 mmap[pos + 7],
370 ]);
371 pos += 8;
372
373 let length = u64::from_le_bytes([
374 mmap[pos],
375 mmap[pos + 1],
376 mmap[pos + 2],
377 mmap[pos + 3],
378 mmap[pos + 4],
379 mmap[pos + 5],
380 mmap[pos + 6],
381 mmap[pos + 7],
382 ]);
383 pos += 8;
384
385 files.push(FileEntry {
386 path,
387 offset,
388 length,
389 });
390 }
391
392 Ok(Self {
393 _file: file,
394 mmap,
395 files,
396 })
397 }
398
399 pub fn get_file_content(&self, file_id: u32) -> Result<&str> {
401 let entry = self.files
402 .get(file_id as usize)
403 .ok_or_else(|| anyhow::anyhow!("Invalid file_id: {}", file_id))?;
404
405 let start = HEADER_SIZE + entry.offset as usize;
406 let end = start + entry.length as usize;
407
408 if end > self.mmap.len() {
409 anyhow::bail!("File content out of bounds");
410 }
411
412 let bytes = &self.mmap[start..end];
413 std::str::from_utf8(bytes).context("Invalid UTF-8 in file content")
414 }
415
416 pub fn get_file_path(&self, file_id: u32) -> Option<&Path> {
418 self.files.get(file_id as usize).map(|e| e.path.as_path())
419 }
420
421 pub fn file_count(&self) -> usize {
423 self.files.len()
424 }
425
426 pub fn get_file_id_by_path(&self, path: &str) -> Option<u32> {
433 let normalized_input = path.strip_prefix("./").unwrap_or(path);
435
436 self.files.iter().position(|entry| {
437 let stored_path = entry.path.to_string_lossy();
439 let normalized_stored = stored_path.strip_prefix("./").unwrap_or(&stored_path);
440 normalized_stored == normalized_input
441 }).map(|idx| idx as u32)
442 }
443
444 pub fn get_content_at_offset(&self, file_id: u32, byte_offset: u32, length: usize) -> Result<&str> {
446 let entry = self.files
447 .get(file_id as usize)
448 .ok_or_else(|| anyhow::anyhow!("Invalid file_id: {}", file_id))?;
449
450 let start = HEADER_SIZE + entry.offset as usize + byte_offset as usize;
451 let end = start + length;
452
453 if end > self.mmap.len() {
454 anyhow::bail!("Content out of bounds");
455 }
456
457 let bytes = &self.mmap[start..end];
458 std::str::from_utf8(bytes).context("Invalid UTF-8 in content")
459 }
460
461 pub fn get_context(&self, file_id: u32, byte_offset: u32, context_lines: usize) -> Result<(Vec<String>, String, Vec<String>)> {
465 let content = self.get_file_content(file_id)?;
466 let lines: Vec<&str> = content.lines().collect();
467
468 let mut current_offset = 0;
470 let mut line_idx = 0;
471
472 for (idx, line) in lines.iter().enumerate() {
473 let line_end = current_offset + line.len() + 1; if byte_offset as usize >= current_offset && (byte_offset as usize) < line_end {
475 line_idx = idx;
476 break;
477 }
478 current_offset = line_end;
479 }
480
481 let start = line_idx.saturating_sub(context_lines);
483 let end = (line_idx + context_lines + 1).min(lines.len());
484
485 let before: Vec<String> = lines[start..line_idx]
486 .iter()
487 .map(|s| s.to_string())
488 .collect();
489
490 let matching = lines.get(line_idx)
491 .map(|s| s.to_string())
492 .unwrap_or_default();
493
494 let after: Vec<String> = lines[line_idx + 1..end]
495 .iter()
496 .map(|s| s.to_string())
497 .collect();
498
499 Ok((before, matching, after))
500 }
501
502 pub fn get_context_by_line(&self, file_id: u32, line_number: usize, context_lines: usize) -> Result<(Vec<String>, Vec<String>)> {
506 let content = self.get_file_content(file_id)?;
507 let lines: Vec<&str> = content.lines().collect();
508
509 let line_idx = line_number.saturating_sub(1);
511
512 let start = line_idx.saturating_sub(context_lines);
514 let end = (line_idx + context_lines + 1).min(lines.len());
515
516 let before: Vec<String> = lines[start..line_idx]
517 .iter()
518 .map(|s| s.to_string())
519 .collect();
520
521 let after: Vec<String> = lines[line_idx + 1..end]
522 .iter()
523 .map(|s| s.to_string())
524 .collect();
525
526 Ok((before, after))
527 }
528}
529
530#[cfg(test)]
531mod tests {
532 use super::*;
533 use tempfile::TempDir;
534
535 #[test]
536 fn test_content_writer_basic() {
537 let mut writer = ContentWriter::new();
538
539 let file1_id = writer.add_file(PathBuf::from("test1.txt"), "Hello, world!");
540 let file2_id = writer.add_file(PathBuf::from("test2.txt"), "Goodbye, world!");
541
542 assert_eq!(file1_id, 0);
543 assert_eq!(file2_id, 1);
544 assert_eq!(writer.file_count(), 2);
545 }
546
547 #[test]
548 fn test_content_roundtrip() {
549 let temp = TempDir::new().unwrap();
550 let content_path = temp.path().join("content.bin");
551
552 let mut writer = ContentWriter::new();
554 writer.add_file(PathBuf::from("file1.txt"), "First file content");
555 writer.add_file(PathBuf::from("file2.txt"), "Second file content");
556 writer.write(&content_path).unwrap();
557
558 let reader = ContentReader::open(&content_path).unwrap();
560
561 assert_eq!(reader.file_count(), 2);
562 assert_eq!(reader.get_file_content(0).unwrap(), "First file content");
563 assert_eq!(reader.get_file_content(1).unwrap(), "Second file content");
564 assert_eq!(reader.get_file_path(0).unwrap(), Path::new("file1.txt"));
565 assert_eq!(reader.get_file_path(1).unwrap(), Path::new("file2.txt"));
566 }
567
568 #[test]
569 fn test_get_context() {
570 let temp = TempDir::new().unwrap();
571 let content_path = temp.path().join("content.bin");
572
573 let mut writer = ContentWriter::new();
574 writer.add_file(
575 PathBuf::from("test.txt"),
576 "Line 1\nLine 2\nLine 3 with match\nLine 4\nLine 5",
577 );
578 writer.write(&content_path).unwrap();
579
580 let reader = ContentReader::open(&content_path).unwrap();
581
582 let (before, matching, after) = reader.get_context(0, 14, 1).unwrap();
584
585 assert_eq!(before.len(), 1);
586 assert_eq!(before[0], "Line 2");
587 assert_eq!(matching, "Line 3 with match");
588 assert_eq!(after.len(), 1);
589 assert_eq!(after[0], "Line 4");
590 }
591
592 #[test]
593 fn test_streaming_roundtrip() {
594 let temp = TempDir::new().unwrap();
595 let content_path = temp.path().join("content.bin");
596
597 let mut writer = ContentWriter::new();
599 writer.init(content_path.clone()).unwrap();
600 writer.add_file(PathBuf::from("src/main.rs"), "fn main() {}\n");
601 writer.add_file(PathBuf::from("src/lib.rs"), "pub fn hello() -> &'static str { \"hi\" }\n");
602 writer.finalize_if_needed().unwrap();
603
604 let reader = ContentReader::open(&content_path).unwrap();
606 assert_eq!(reader.file_count(), 2);
607 assert_eq!(reader.get_file_content(0).unwrap(), "fn main() {}\n");
608 assert_eq!(reader.get_file_content(1).unwrap(), "pub fn hello() -> &'static str { \"hi\" }\n");
609 assert_eq!(reader.get_file_path(0).unwrap(), Path::new("src/main.rs"));
610 assert_eq!(reader.get_file_path(1).unwrap(), Path::new("src/lib.rs"));
611 }
612
613 #[test]
614 fn test_multiline_file() {
615 let temp = TempDir::new().unwrap();
616 let content_path = temp.path().join("content.bin");
617
618 let content = "fn main() {\n println!(\"Hello\");\n}\n";
619
620 let mut writer = ContentWriter::new();
621 writer.add_file(PathBuf::from("main.rs"), content);
622 writer.write(&content_path).unwrap();
623
624 let reader = ContentReader::open(&content_path).unwrap();
625 assert_eq!(reader.get_file_content(0).unwrap(), content);
626 }
627}