1use sha2::{Digest, Sha256};
13use serde::{Deserialize, Serialize};
14use std::fs::File;
15use std::io::{BufReader, BufWriter, Read, Write};
16use std::path::{Path, PathBuf};
17
18use crate::document::DocumentTree;
19use crate::error::Result;
20use crate::Error;
21
22const FORMAT_VERSION: u32 = 1;
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct DocumentMeta {
28 pub id: String,
30
31 pub name: String,
33
34 pub format: String,
36
37 pub source_path: Option<PathBuf>,
39
40 pub description: Option<String>,
42
43 pub page_count: Option<usize>,
45
46 pub line_count: Option<usize>,
48
49 pub created_at: chrono::DateTime<chrono::Utc>,
51
52 pub modified_at: chrono::DateTime<chrono::Utc>,
54}
55
56impl DocumentMeta {
57 pub fn new(id: impl Into<String>, name: impl Into<String>, format: impl Into<String>) -> Self {
59 let now = chrono::Utc::now();
60 Self {
61 id: id.into(),
62 name: name.into(),
63 format: format.into(),
64 source_path: None,
65 description: None,
66 page_count: None,
67 line_count: None,
68 created_at: now,
69 modified_at: now,
70 }
71 }
72
73 pub fn with_source_path(mut self, path: impl Into<PathBuf>) -> Self {
75 self.source_path = Some(path.into());
76 self
77 }
78
79 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
81 self.description = Some(desc.into());
82 self
83 }
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct PersistedDocument {
89 pub meta: DocumentMeta,
91
92 pub tree: DocumentTree,
94
95 #[serde(default)]
97 pub pages: Vec<PageContent>,
98}
99
100impl PersistedDocument {
101 pub fn new(meta: DocumentMeta, tree: DocumentTree) -> Self {
103 Self {
104 meta,
105 tree,
106 pages: Vec::new(),
107 }
108 }
109
110 pub fn add_page(&mut self, page: usize, content: impl Into<String>) {
112 self.pages.push(PageContent {
113 page,
114 content: content.into(),
115 });
116 }
117}
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct PageContent {
122 pub page: usize,
124
125 pub content: String,
127}
128
129#[derive(Debug, Serialize, Deserialize)]
131struct PersistedWrapper<T> {
132 version: u32,
134 checksum: String,
136 payload: T,
138}
139
140#[derive(Debug, Clone)]
142pub struct PersistenceOptions {
143 pub atomic_writes: bool,
145 pub verify_checksum: bool,
147}
148
149impl Default for PersistenceOptions {
150 fn default() -> Self {
151 Self {
152 atomic_writes: true,
153 verify_checksum: true,
154 }
155 }
156}
157
158impl PersistenceOptions {
159 pub fn new() -> Self {
161 Self::default()
162 }
163
164 pub fn with_atomic_writes(mut self, enabled: bool) -> Self {
166 self.atomic_writes = enabled;
167 self
168 }
169
170 pub fn with_verify_checksum(mut self, enabled: bool) -> Self {
172 self.verify_checksum = enabled;
173 self
174 }
175}
176
177fn calculate_checksum(data: &[u8]) -> String {
179 let mut hasher = Sha256::new();
180 hasher.update(data);
181 format!("{:x}", hasher.finalize())
182}
183
184pub fn save_document(path: &Path, doc: &PersistedDocument) -> Result<()> {
202 save_document_with_options(path, doc, &PersistenceOptions::default())
203}
204
205pub fn save_document_with_options(
207 path: &Path,
208 doc: &PersistedDocument,
209 options: &PersistenceOptions,
210) -> Result<()> {
211 let payload_bytes = serde_json::to_vec(doc)
213 .map_err(|e| Error::Serialization(e.to_string()))?;
214
215 let checksum = calculate_checksum(&payload_bytes);
217
218 let wrapper = PersistedWrapper {
220 version: FORMAT_VERSION,
221 checksum,
222 payload: doc.clone(),
223 };
224
225 let json = serde_json::to_string_pretty(&wrapper)
227 .map_err(|e| Error::Serialization(e.to_string()))?;
228
229 if options.atomic_writes {
230 let temp_path = path.with_extension("tmp");
232
233 if let Some(parent) = path.parent() {
235 std::fs::create_dir_all(parent).map_err(Error::Io)?;
236 }
237
238 {
240 let file = File::create(&temp_path).map_err(Error::Io)?;
241 let mut writer = BufWriter::new(file);
242 writer.write_all(json.as_bytes()).map_err(Error::Io)?;
243 writer.flush().map_err(Error::Io)?;
244 }
245
246 std::fs::rename(&temp_path, path).map_err(Error::Io)?;
248 } else {
249 std::fs::write(path, json).map_err(Error::Io)?;
251 }
252
253 Ok(())
254}
255
256pub fn load_document(path: &Path) -> Result<PersistedDocument> {
274 load_document_with_options(path, &PersistenceOptions::default())
275}
276
277pub fn load_document_with_options(
279 path: &Path,
280 options: &PersistenceOptions,
281) -> Result<PersistedDocument> {
282 if !path.exists() {
283 return Err(Error::DocumentNotFound(
284 path.display().to_string()
285 ));
286 }
287
288 let file = File::open(path).map_err(Error::Io)?;
289 let reader = BufReader::new(file);
290
291 let wrapper: PersistedWrapper<PersistedDocument> = serde_json::from_reader(reader)
293 .map_err(|e| Error::Parse(format!("Failed to parse document: {}", e)))?;
294
295 if wrapper.version != FORMAT_VERSION {
297 return Err(Error::Parse(format!(
298 "Unsupported format version: {} (expected {})",
299 wrapper.version, FORMAT_VERSION
300 )));
301 }
302
303 if options.verify_checksum {
305 let payload_bytes = serde_json::to_vec(&wrapper.payload)
306 .map_err(|e| Error::Serialization(e.to_string()))?;
307
308 let expected_checksum = calculate_checksum(&payload_bytes);
309
310 if wrapper.checksum != expected_checksum {
311 return Err(Error::Parse(format!(
312 "Checksum mismatch: expected {}, got {}",
313 expected_checksum, wrapper.checksum
314 )));
315 }
316 }
317
318 Ok(wrapper.payload)
319}
320
321pub fn save_index(path: &Path, entries: &[DocumentMeta]) -> Result<()> {
323 save_index_with_options(path, entries, &PersistenceOptions::default())
324}
325
326pub fn save_index_with_options(
328 path: &Path,
329 entries: &[DocumentMeta],
330 options: &PersistenceOptions,
331) -> Result<()> {
332 let payload_bytes = serde_json::to_vec(entries)
334 .map_err(|e| Error::Serialization(e.to_string()))?;
335
336 let checksum = calculate_checksum(&payload_bytes);
337
338 let wrapper = PersistedWrapper {
339 version: FORMAT_VERSION,
340 checksum,
341 payload: entries.to_vec(),
342 };
343
344 let json = serde_json::to_string_pretty(&wrapper)
345 .map_err(|e| Error::Serialization(e.to_string()))?;
346
347 if options.atomic_writes {
348 let temp_path = path.with_extension("tmp");
349
350 if let Some(parent) = path.parent() {
352 std::fs::create_dir_all(parent).map_err(Error::Io)?;
353 }
354
355 {
357 let file = File::create(&temp_path).map_err(Error::Io)?;
358 let mut writer = BufWriter::new(file);
359 writer.write_all(json.as_bytes()).map_err(Error::Io)?;
360 writer.flush().map_err(Error::Io)?;
361 }
362
363 std::fs::rename(&temp_path, path).map_err(Error::Io)?;
365 } else {
366 std::fs::write(path, json).map_err(Error::Io)?;
367 }
368
369 Ok(())
370}
371
372pub fn load_index(path: &Path) -> Result<Vec<DocumentMeta>> {
374 load_index_with_options(path, &PersistenceOptions::default())
375}
376
377pub fn load_index_with_options(
379 path: &Path,
380 options: &PersistenceOptions,
381) -> Result<Vec<DocumentMeta>> {
382 if !path.exists() {
383 return Ok(Vec::new());
384 }
385
386 let file = File::open(path).map_err(Error::Io)?;
387 let reader = BufReader::new(file);
388
389 let wrapper: PersistedWrapper<Vec<DocumentMeta>> = serde_json::from_reader(reader)
390 .map_err(|e| Error::Parse(format!("Failed to parse index: {}", e)))?;
391
392 if wrapper.version != FORMAT_VERSION {
394 return Err(Error::Parse(format!(
395 "Unsupported format version: {} (expected {})",
396 wrapper.version, FORMAT_VERSION
397 )));
398 }
399
400 if options.verify_checksum {
402 let payload_bytes = serde_json::to_vec(&wrapper.payload)
403 .map_err(|e| Error::Serialization(e.to_string()))?;
404
405 let expected_checksum = calculate_checksum(&payload_bytes);
406
407 if wrapper.checksum != expected_checksum {
408 return Err(Error::Parse(format!(
409 "Checksum mismatch: expected {}, got {}",
410 expected_checksum, wrapper.checksum
411 )));
412 }
413 }
414
415 Ok(wrapper.payload)
416}
417
418pub fn save_document_to_bytes(doc: &PersistedDocument) -> Result<Vec<u8>> {
426 let payload_bytes = serde_json::to_vec(doc)
428 .map_err(|e| Error::Serialization(e.to_string()))?;
429
430 let checksum = calculate_checksum(&payload_bytes);
432
433 let wrapper = PersistedWrapper {
435 version: FORMAT_VERSION,
436 checksum,
437 payload: doc.clone(),
438 };
439
440 serde_json::to_vec(&wrapper)
442 .map_err(|e| Error::Serialization(e.to_string()))
443}
444
445pub fn load_document_from_bytes(data: &[u8]) -> Result<PersistedDocument> {
449 load_document_from_bytes_with_options(data, true)
450}
451
452pub fn load_document_from_bytes_with_options(
454 data: &[u8],
455 verify_checksum: bool,
456) -> Result<PersistedDocument> {
457 let wrapper: PersistedWrapper<PersistedDocument> = serde_json::from_slice(data)
459 .map_err(|e| Error::Parse(format!("Failed to parse document: {}", e)))?;
460
461 if wrapper.version != FORMAT_VERSION {
463 return Err(Error::VersionMismatch(format!(
464 "Expected version {}, got {}",
465 FORMAT_VERSION, wrapper.version
466 )));
467 }
468
469 if verify_checksum {
471 let payload_bytes = serde_json::to_vec(&wrapper.payload)
472 .map_err(|e| Error::Serialization(e.to_string()))?;
473
474 let expected_checksum = calculate_checksum(&payload_bytes);
475
476 if wrapper.checksum != expected_checksum {
477 return Err(Error::ChecksumMismatch(format!(
478 "Expected {}, got {}",
479 expected_checksum, wrapper.checksum
480 )));
481 }
482 }
483
484 Ok(wrapper.payload)
485}
486
487pub fn save_index_to_bytes(entries: &[DocumentMeta]) -> Result<Vec<u8>> {
489 let payload_bytes = serde_json::to_vec(entries)
490 .map_err(|e| Error::Serialization(e.to_string()))?;
491
492 let checksum = calculate_checksum(&payload_bytes);
493
494 let wrapper = PersistedWrapper {
495 version: FORMAT_VERSION,
496 checksum,
497 payload: entries.to_vec(),
498 };
499
500 serde_json::to_vec(&wrapper)
501 .map_err(|e| Error::Serialization(e.to_string()))
502}
503
504pub fn load_index_from_bytes(data: &[u8]) -> Result<Vec<DocumentMeta>> {
506 load_index_from_bytes_with_options(data, true)
507}
508
509pub fn load_index_from_bytes_with_options(
511 data: &[u8],
512 verify_checksum: bool,
513) -> Result<Vec<DocumentMeta>> {
514 let wrapper: PersistedWrapper<Vec<DocumentMeta>> = serde_json::from_slice(data)
515 .map_err(|e| Error::Parse(format!("Failed to parse index: {}", e)))?;
516
517 if wrapper.version != FORMAT_VERSION {
519 return Err(Error::VersionMismatch(format!(
520 "Expected version {}, got {}",
521 FORMAT_VERSION, wrapper.version
522 )));
523 }
524
525 if verify_checksum {
527 let payload_bytes = serde_json::to_vec(&wrapper.payload)
528 .map_err(|e| Error::Serialization(e.to_string()))?;
529
530 let expected_checksum = calculate_checksum(&payload_bytes);
531
532 if wrapper.checksum != expected_checksum {
533 return Err(Error::ChecksumMismatch(format!(
534 "Expected {}, got {}",
535 expected_checksum, wrapper.checksum
536 )));
537 }
538 }
539
540 Ok(wrapper.payload)
541}
542
543#[cfg(test)]
544mod tests {
545 use super::*;
546 use tempfile::TempDir;
547
548 fn create_test_doc(id: &str) -> PersistedDocument {
549 let meta = DocumentMeta::new(id, "Test Doc", "md");
550 let tree = DocumentTree::new("Root", "Content");
551 PersistedDocument::new(meta, tree)
552 }
553
554 #[test]
555 fn test_save_and_load_document() {
556 let temp = TempDir::new().unwrap();
557 let path = temp.path().join("test.json");
558
559 let doc = create_test_doc("doc-1");
560 save_document(&path, &doc).unwrap();
561
562 let loaded = load_document(&path).unwrap();
563 assert_eq!(loaded.meta.id, "doc-1");
564 assert_eq!(loaded.meta.name, "Test Doc");
565 }
566
567 #[test]
568 fn test_atomic_write() {
569 let temp = TempDir::new().unwrap();
570 let path = temp.path().join("atomic.json");
571
572 let doc = create_test_doc("doc-atomic");
573 let options = PersistenceOptions::new().with_atomic_writes(true);
574 save_document_with_options(&path, &doc, &options).unwrap();
575
576 assert!(!path.with_extension("tmp").exists());
578
579 let loaded = load_document(&path).unwrap();
580 assert_eq!(loaded.meta.id, "doc-atomic");
581 }
582
583 #[test]
584 fn test_checksum_verification() {
585 let temp = TempDir::new().unwrap();
586 let path = temp.path().join("checksum.json");
587
588 let doc = create_test_doc("doc-checksum");
589 save_document(&path, &doc).unwrap();
590
591 let content = std::fs::read_to_string(&path).unwrap();
593 let corrupted = content.replace("doc-checksum", "doc-corrupted");
594 std::fs::write(&path, corrupted).unwrap();
595
596 let result = load_document(&path);
598 assert!(result.is_err());
599 let err = result.unwrap_err();
600 assert!(matches!(err, Error::Parse(_)));
601 }
602
603 #[test]
604 fn test_checksum_disabled() {
605 let temp = TempDir::new().unwrap();
606 let path = temp.path().join("no-checksum.json");
607
608 let doc = create_test_doc("doc-no-check");
609 save_document(&path, &doc).unwrap();
610
611 let options = PersistenceOptions::new().with_verify_checksum(false);
613 let result = load_document_with_options(&path, &options);
614 assert!(result.is_ok());
615 let loaded = result.unwrap();
616 assert_eq!(loaded.meta.id, "doc-no-check");
617
618 let content = std::fs::read_to_string(&path).unwrap();
620 let corrupted = content.replace(
622 &calculate_checksum(&serde_json::to_vec(&doc).unwrap()),
623 "0000000000000000000000000000000000000000000000000000000000000000"
624 );
625 std::fs::write(&path, corrupted).unwrap();
626
627 let result = load_document_with_options(&path, &options);
629 assert!(result.is_ok());
630
631 let options_enabled = PersistenceOptions::new().with_verify_checksum(true);
633 let result = load_document_with_options(&path, &options_enabled);
634 assert!(result.is_err());
635 }
636
637 #[test]
638 fn test_load_nonexistent() {
639 let result = load_document(Path::new("/nonexistent/path.json"));
640 assert!(result.is_err());
641 assert!(result.unwrap_err().is_not_found());
642 }
643
644 #[test]
645 fn test_save_and_load_index() {
646 let temp = TempDir::new().unwrap();
647 let path = temp.path().join("_meta.json");
648
649 let mut entries = Vec::new();
650 entries.push(DocumentMeta::new("doc-1", "Doc 1", "md"));
651 entries.push(DocumentMeta::new("doc-2", "Doc 2", "pdf"));
652
653 save_index(&path, &entries).unwrap();
654
655 let loaded = load_index(&path).unwrap();
656 assert_eq!(loaded.len(), 2);
657 assert_eq!(loaded[0].id, "doc-1");
658 assert_eq!(loaded[1].format, "pdf");
659 }
660
661 #[test]
662 fn test_load_empty_index() {
663 let temp = TempDir::new().unwrap();
664 let path = temp.path().join("nonexistent.json");
665
666 let loaded = load_index(&path).unwrap();
667 assert!(loaded.is_empty());
668 }
669
670 #[test]
671 fn test_checksum_calculation() {
672 let data1 = b"test data";
673 let data2 = b"test data";
674 let data3 = b"different data";
675
676 let checksum1 = calculate_checksum(data1);
677 let checksum2 = calculate_checksum(data2);
678 let checksum3 = calculate_checksum(data3);
679
680 assert_eq!(checksum1, checksum2);
681 assert_ne!(checksum1, checksum3);
682 assert_eq!(checksum1.len(), 64); }
684}