1#![cfg(not(target_arch = "wasm32"))]
49
50use crate::storage::compression::{self, Codec, CompressionStats};
51use crate::{Document, RagError, Result};
52use serde::{Deserialize, Serialize};
53use std::fs::{self, File};
54use std::io::{Read, Write};
55use std::path::{Component, Path, PathBuf};
56use std::sync::atomic::{AtomicU64, Ordering};
57use std::time::{SystemTime, UNIX_EPOCH};
58
59const STORAGE_VERSION: u32 = 2;
60const DATA_EXTENSION: &str = "data";
61const META_EXTENSION: &str = "meta";
62const TMP_EXTENSION: &str = "tmp";
63static TMP_FILE_COUNTER: AtomicU64 = AtomicU64::new(0);
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct StorageMetadata {
71 pub version: u32,
73 pub created_at: u64,
75 pub updated_at: u64,
77 pub item_type: String,
79 pub compression: Codec,
81 pub original_size: usize,
83 pub compressed_size: usize,
85}
86
87impl StorageMetadata {
88 fn new(
90 item_type: String,
91 compression: Codec,
92 original_size: usize,
93 compressed_size: usize,
94 ) -> Self {
95 let now = SystemTime::now()
96 .duration_since(UNIX_EPOCH)
97 .unwrap()
98 .as_secs();
99
100 Self {
101 version: STORAGE_VERSION,
102 created_at: now,
103 updated_at: now,
104 item_type,
105 compression,
106 original_size,
107 compressed_size,
108 }
109 }
110
111 fn touch(&mut self) {
113 self.updated_at = SystemTime::now()
114 .duration_since(UNIX_EPOCH)
115 .unwrap()
116 .as_secs();
117 }
118}
119
120#[derive(Debug)]
135pub struct FileStorage {
136 base_path: PathBuf,
137 codec: Codec,
138}
139
140impl FileStorage {
141 pub fn new(base_path: impl AsRef<Path>) -> Result<Self> {
164 Self::with_codec(base_path, Codec::None)
165 }
166
167 pub fn with_codec(base_path: impl AsRef<Path>, codec: Codec) -> Result<Self> {
190 let base_path = base_path.as_ref().to_path_buf();
191
192 if !base_path.exists() {
194 fs::create_dir_all(&base_path).map_err(|e| {
195 RagError::StorageError(format!("Failed to create storage directory: {}", e))
196 })?;
197 }
198
199 if !base_path.is_dir() {
201 return Err(RagError::StorageError(format!(
202 "Storage path is not a directory: {}",
203 base_path.display()
204 )));
205 }
206
207 Ok(Self { base_path, codec })
208 }
209
210 pub fn save_document(&self, id: &str, document: &Document) -> Result<CompressionStats> {
244 Self::validate_item_name(id)?;
245
246 let serialized = serde_json::to_vec(document)
248 .map_err(|e| RagError::StorageError(format!("JSON serialization failed: {}", e)))?;
249
250 let (compressed, stats) = compression::compress_with(&serialized, self.codec)
252 .map_err(|e| RagError::StorageError(format!("Compression failed: {}", e)))?;
253
254 let metadata = if self.exists(id) {
256 let mut meta = self.get_metadata(id)?;
257 meta.touch();
258 meta.original_size = stats.original_size;
259 meta.compressed_size = stats.compressed_size;
260 meta.compression = stats.codec;
261 meta
262 } else {
263 StorageMetadata::new(
264 "document".to_string(),
265 stats.codec,
266 stats.original_size,
267 stats.compressed_size,
268 )
269 };
270
271 let data_path = self.item_path(id);
273 self.write_atomic(&data_path, &compressed)?;
274
275 let meta_path = self.metadata_path(id);
277 let meta_bytes = serde_json::to_vec(&metadata)
278 .map_err(|e| RagError::StorageError(format!("metadata serialize failed: {}", e)))?;
279 self.write_atomic(&meta_path, &meta_bytes)?;
280
281 Ok(stats)
282 }
283
284 pub fn load_document(&self, id: &str) -> Result<Document> {
310 Self::validate_item_name(id)?;
311
312 if !self.exists(id) {
314 return Err(RagError::StorageError(format!(
315 "Document not found: {}",
316 id
317 )));
318 }
319
320 let metadata = self.get_metadata(id)?;
322
323 if metadata.version != STORAGE_VERSION {
325 return Err(RagError::StorageError(format!(
326 "Incompatible storage version: expected {}, got {}",
327 STORAGE_VERSION, metadata.version
328 )));
329 }
330
331 let data_path = self.item_path(id);
333 let mut file = File::open(&data_path)?;
334 let mut compressed = Vec::new();
335 file.read_to_end(&mut compressed)?;
336
337 if compressed.len() != metadata.compressed_size {
339 return Err(RagError::StorageError(format!(
340 "Data corruption detected: size mismatch for {}",
341 id
342 )));
343 }
344
345 let decompressed = compression::decompress(&compressed)
347 .map_err(|e| RagError::StorageError(format!("Decompression failed: {}", e)))?;
348
349 let document: Document = serde_json::from_slice(&decompressed)
351 .map_err(|e| RagError::StorageError(format!("JSON deserialization failed: {}", e)))?;
352
353 Ok(document)
354 }
355
356 pub fn save_flat_index(
371 &self,
372 name: &str,
373 index: &FlatIndexWrapper,
374 ) -> Result<CompressionStats> {
375 Self::validate_item_name(name)?;
376 self.save_with_metadata(name, index, "flat_index")
377 }
378
379 pub fn load_flat_index(&self, name: &str) -> Result<FlatIndexWrapper> {
393 Self::validate_item_name(name)?;
394 self.load_with_metadata(name)
395 }
396
397 pub fn save_hnsw_index(
412 &self,
413 name: &str,
414 index: &HNSWIndexWrapper,
415 ) -> Result<CompressionStats> {
416 Self::validate_item_name(name)?;
417 self.save_with_metadata(name, index, "hnsw_index")
418 }
419
420 pub fn load_hnsw_index(&self, name: &str) -> Result<HNSWIndexWrapper> {
434 Self::validate_item_name(name)?;
435 self.load_with_metadata(name)
436 }
437
438 pub fn delete(&self, name: &str) -> Result<()> {
465 Self::validate_item_name(name)?;
466
467 let data_path = self.item_path(name);
468 let meta_path = self.metadata_path(name);
469
470 if data_path.exists() {
472 fs::remove_file(&data_path).map_err(|e| {
473 RagError::StorageError(format!("Failed to delete data file: {}", e))
474 })?;
475 }
476
477 if meta_path.exists() {
479 fs::remove_file(&meta_path).map_err(|e| {
480 RagError::StorageError(format!("Failed to delete metadata file: {}", e))
481 })?;
482 }
483
484 Ok(())
485 }
486
487 pub fn list(&self) -> Result<Vec<String>> {
513 let entries = fs::read_dir(&self.base_path).map_err(|e| {
514 RagError::StorageError(format!("Failed to read storage directory: {}", e))
515 })?;
516
517 let mut names = std::collections::HashSet::new();
518
519 for entry in entries {
520 let entry = entry.map_err(|e| {
521 RagError::StorageError(format!("Failed to read directory entry: {}", e))
522 })?;
523
524 let path = entry.path();
525 if path.is_file() {
526 if let Some(ext) = path.extension() {
527 if ext == DATA_EXTENSION || ext == META_EXTENSION {
528 if let Some(stem) = path.file_stem() {
529 if let Some(name) = stem.to_str() {
530 names.insert(name.to_string());
531 }
532 }
533 }
534 }
535 }
536 }
537
538 let mut result: Vec<String> = names.into_iter().collect();
539 result.sort();
540 Ok(result)
541 }
542
543 pub fn get_metadata(&self, name: &str) -> Result<StorageMetadata> {
569 Self::validate_item_name(name)?;
570
571 let meta_path = self.metadata_path(name);
572
573 if !meta_path.exists() {
574 return Err(RagError::StorageError(format!(
575 "Metadata not found for item: {}",
576 name
577 )));
578 }
579
580 let mut file = File::open(&meta_path)?;
581 let mut contents = Vec::new();
582 file.read_to_end(&mut contents)?;
583
584 let metadata: StorageMetadata = serde_json::from_slice(&contents)
586 .or_else(|_| bincode::deserialize::<StorageMetadata>(&contents))
587 .map_err(|e| RagError::StorageError(format!("metadata deserialize failed: {}", e)))?;
588 Ok(metadata)
589 }
590
591 pub fn total_size(&self) -> Result<u64> {
615 let entries = fs::read_dir(&self.base_path).map_err(|e| {
616 RagError::StorageError(format!("Failed to read storage directory: {}", e))
617 })?;
618
619 let mut total = 0u64;
620
621 for entry in entries {
622 let entry = entry.map_err(|e| {
623 RagError::StorageError(format!("Failed to read directory entry: {}", e))
624 })?;
625
626 let metadata = entry.metadata()?;
627 if metadata.is_file() {
628 total += metadata.len();
629 }
630 }
631
632 Ok(total)
633 }
634
635 pub fn clear(&self) -> Result<()> {
658 let entries = fs::read_dir(&self.base_path).map_err(|e| {
659 RagError::StorageError(format!("Failed to read storage directory: {}", e))
660 })?;
661
662 for entry in entries {
663 let entry = entry.map_err(|e| {
664 RagError::StorageError(format!("Failed to read directory entry: {}", e))
665 })?;
666
667 let path = entry.path();
668 if path.is_file() {
669 fs::remove_file(&path)
670 .map_err(|e| RagError::StorageError(format!("Failed to delete file: {}", e)))?;
671 }
672 }
673
674 Ok(())
675 }
676
677 pub fn exists(&self, name: &str) -> bool {
700 if Self::is_invalid_item_name(name) {
701 return false;
702 }
703 self.item_path(name).exists() && self.metadata_path(name).exists()
704 }
705
706 fn is_invalid_item_name(name: &str) -> bool {
709 if name.is_empty() {
710 return true;
711 }
712
713 if name.contains('\0') {
715 return true;
716 }
717
718 if name.contains('/') || name.contains('\\') {
721 return true;
722 }
723
724 let path = Path::new(name);
725 if path.is_absolute() {
726 return true;
727 }
728
729 let mut components = path.components();
731 match components.next() {
732 Some(Component::Normal(_)) => {
733 if components.next().is_some() {
734 return true;
735 }
736 }
737 _ => return true,
738 }
739
740 let base_name = name.split('.').next().unwrap_or(name);
745 let stem_upper = base_name.to_ascii_uppercase();
746 let is_reserved = matches!(
747 stem_upper.as_str(),
748 "CON"
749 | "PRN"
750 | "AUX"
751 | "NUL"
752 | "COM1"
753 | "COM2"
754 | "COM3"
755 | "COM4"
756 | "COM5"
757 | "COM6"
758 | "COM7"
759 | "COM8"
760 | "COM9"
761 | "LPT1"
762 | "LPT2"
763 | "LPT3"
764 | "LPT4"
765 | "LPT5"
766 | "LPT6"
767 | "LPT7"
768 | "LPT8"
769 | "LPT9"
770 );
771 if is_reserved {
772 return true;
773 }
774
775 false
776 }
777
778 fn validate_item_name(name: &str) -> Result<()> {
779 if Self::is_invalid_item_name(name) {
780 return Err(RagError::StorageError(format!(
781 "Invalid item name: '{}'. Names must be a single path segment",
782 name
783 )));
784 }
785 Ok(())
786 }
787
788 fn item_path(&self, name: &str) -> PathBuf {
790 self.base_path.join(format!("{}.{}", name, DATA_EXTENSION))
791 }
792
793 fn metadata_path(&self, name: &str) -> PathBuf {
795 self.base_path.join(format!("{}.{}", name, META_EXTENSION))
796 }
797
798 fn write_atomic(&self, path: &Path, data: &[u8]) -> Result<()> {
816 let filename = path.file_name().and_then(|f| f.to_str()).unwrap_or("item");
818 let counter = TMP_FILE_COUNTER.fetch_add(1, Ordering::Relaxed);
819 let tmp_path = path.with_file_name(format!(
820 "{}.{}.{}.{}",
821 filename,
822 std::process::id(),
823 counter,
824 TMP_EXTENSION
825 ));
826
827 {
829 let mut file = File::create(&tmp_path)?;
830 file.write_all(data)?;
831 file.sync_all()?; }
833
834 fs::rename(&tmp_path, path).map_err(|e| {
836 let _ = fs::remove_file(&tmp_path);
838 RagError::IoError(e)
839 })?;
840
841 Ok(())
842 }
843
844 fn save_with_metadata<T: Serialize>(
848 &self,
849 name: &str,
850 item: &T,
851 item_type: &str,
852 ) -> Result<CompressionStats> {
853 let serialized = serde_json::to_vec(item)
855 .map_err(|e| RagError::StorageError(format!("JSON serialization failed: {}", e)))?;
856
857 let (compressed, stats) = compression::compress_with(&serialized, self.codec)
859 .map_err(|e| RagError::StorageError(format!("Compression failed: {}", e)))?;
860
861 let metadata = if self.exists(name) {
863 let mut meta = self.get_metadata(name)?;
864 meta.touch();
865 meta.original_size = stats.original_size;
866 meta.compressed_size = stats.compressed_size;
867 meta.compression = stats.codec;
868 meta
869 } else {
870 StorageMetadata::new(
871 item_type.to_string(),
872 stats.codec,
873 stats.original_size,
874 stats.compressed_size,
875 )
876 };
877
878 let data_path = self.item_path(name);
880 self.write_atomic(&data_path, &compressed)?;
881
882 let meta_path = self.metadata_path(name);
884 let meta_bytes = serde_json::to_vec(&metadata)
885 .map_err(|e| RagError::StorageError(format!("metadata serialize failed: {}", e)))?;
886 self.write_atomic(&meta_path, &meta_bytes)?;
887
888 Ok(stats)
889 }
890
891 fn load_with_metadata<T: for<'de> Deserialize<'de>>(&self, name: &str) -> Result<T> {
895 if !self.exists(name) {
897 return Err(RagError::StorageError(format!("Item not found: {}", name)));
898 }
899
900 let metadata = self.get_metadata(name)?;
902
903 if metadata.version != STORAGE_VERSION {
905 return Err(RagError::StorageError(format!(
906 "Incompatible storage version: expected {}, got {}",
907 STORAGE_VERSION, metadata.version
908 )));
909 }
910
911 let data_path = self.item_path(name);
913 let mut file = File::open(&data_path)?;
914 let mut compressed = Vec::new();
915 file.read_to_end(&mut compressed)?;
916
917 if compressed.len() != metadata.compressed_size {
919 return Err(RagError::StorageError(format!(
920 "Data corruption detected: size mismatch for {}",
921 name
922 )));
923 }
924
925 let decompressed = compression::decompress(&compressed)
927 .map_err(|e| RagError::StorageError(format!("Decompression failed: {}", e)))?;
928
929 let item: T = serde_json::from_slice::<T>(&decompressed)
931 .map_err(|e| RagError::StorageError(format!("JSON deserialization failed: {}", e)))?;
932
933 Ok(item)
934 }
935}
936
937#[derive(Debug, Clone, Serialize, Deserialize)]
942pub struct FlatIndexWrapper {
943 pub embedding_dim: usize,
944 pub documents: Vec<Document>,
945}
946
947impl FlatIndexWrapper {
948 pub fn from_index(index: &crate::index::FlatIndex) -> Self {
950 Self {
951 embedding_dim: index.embedding_dim(),
952 documents: index.get_all_documents(),
953 }
954 }
955
956 pub fn to_index(&self) -> Result<crate::index::FlatIndex> {
958 let mut index = crate::index::FlatIndex::new(self.embedding_dim);
959 index.add_batch(self.documents.clone())?;
960 Ok(index)
961 }
962}
963
964#[derive(Debug, Clone, Serialize, Deserialize)]
969pub struct HNSWIndexWrapper {
970 pub embedding_dim: usize,
971 pub documents: Vec<Document>,
972 pub config: HNSWConfigWrapper,
973}
974
975#[derive(Debug, Clone, Serialize, Deserialize)]
977pub struct HNSWConfigWrapper {
978 pub m: usize,
979 pub m0: usize,
980 pub ef_construction: usize,
981 pub ef_search: usize,
982 pub ml: f32,
983 #[serde(default = "default_use_heuristic")]
984 pub use_heuristic: bool,
985 #[serde(default)]
986 pub extend_candidates: bool,
987 #[serde(default = "default_keep_pruned")]
988 pub keep_pruned_connections: bool,
989}
990
991fn default_use_heuristic() -> bool {
992 true
993}
994fn default_keep_pruned() -> bool {
995 true
996}
997
998impl From<&crate::index::HNSWConfig> for HNSWConfigWrapper {
999 fn from(config: &crate::index::HNSWConfig) -> Self {
1000 Self {
1001 m: config.m,
1002 m0: config.m0,
1003 ef_construction: config.ef_construction,
1004 ef_search: config.ef_search,
1005 ml: config.ml,
1006 use_heuristic: config.use_heuristic,
1007 extend_candidates: config.extend_candidates,
1008 keep_pruned_connections: config.keep_pruned_connections,
1009 }
1010 }
1011}
1012
1013impl From<HNSWConfigWrapper> for crate::index::HNSWConfig {
1014 fn from(wrapper: HNSWConfigWrapper) -> Self {
1015 Self {
1016 m: wrapper.m,
1017 m0: wrapper.m0,
1018 ef_construction: wrapper.ef_construction,
1019 ef_search: wrapper.ef_search,
1020 ml: wrapper.ml,
1021 use_heuristic: wrapper.use_heuristic,
1022 extend_candidates: wrapper.extend_candidates,
1023 keep_pruned_connections: wrapper.keep_pruned_connections,
1024 build_strategy: crate::index::BuildStrategy::default(),
1025 seed: None,
1026 }
1027 }
1028}
1029
1030impl HNSWIndexWrapper {
1031 pub fn from_index(index: &crate::index::HNSWIndex) -> Self {
1033 Self {
1034 embedding_dim: index.embedding_dim(),
1035 documents: index.get_all_documents(),
1036 config: HNSWConfigWrapper::from(index.config()),
1037 }
1038 }
1039
1040 pub fn to_index(&self) -> Result<crate::index::HNSWIndex> {
1042 let config: crate::index::HNSWConfig = self.config.clone().into();
1043 let mut index = crate::index::HNSWIndex::new(self.embedding_dim, config);
1044 for doc in &self.documents {
1045 index.add(doc.clone())?;
1046 }
1047 Ok(index)
1048 }
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053 use super::*;
1054 use std::sync::{Arc, Barrier};
1055 use std::thread;
1056 use tempfile::tempdir;
1057
1058 fn create_test_document(id: &str) -> Document {
1059 Document {
1060 id: id.to_string(),
1061 content: format!("Test content for {}", id),
1062 embedding: vec![0.1, 0.2, 0.3, 0.4, 0.5],
1063 metadata: Some(serde_json::json!({"test": true})),
1064 }
1065 }
1066
1067 fn create_test_flat_index() -> crate::index::FlatIndex {
1068 let mut index = crate::index::FlatIndex::new(5);
1069 index.add(create_test_document("doc1")).unwrap();
1070 index.add(create_test_document("doc2")).unwrap();
1071 index
1072 }
1073
1074 fn create_test_hnsw_index() -> crate::index::HNSWIndex {
1075 let mut index = crate::index::HNSWIndex::with_defaults(5);
1076 index.add(create_test_document("doc1")).unwrap();
1077 index.add(create_test_document("doc2")).unwrap();
1078 index
1079 }
1080
1081 #[test]
1082 fn test_new_storage() {
1083 let dir = tempdir().unwrap();
1084 let _storage = FileStorage::new(dir.path()).unwrap();
1085 assert!(dir.path().exists());
1086 assert!(dir.path().is_dir());
1087 }
1088
1089 #[test]
1090 fn test_new_storage_with_codec() {
1091 let dir = tempdir().unwrap();
1092 let _storage = FileStorage::with_codec(dir.path(), Codec::Gzip).unwrap();
1093 assert!(dir.path().exists());
1094 }
1095
1096 #[test]
1097 fn test_invalid_storage_path() {
1098 let dir = tempdir().unwrap();
1099 let file_path = dir.path().join("file.txt");
1100 std::fs::write(&file_path, b"test").unwrap();
1101
1102 let result = FileStorage::new(&file_path);
1103 assert!(result.is_err());
1104 }
1105
1106 #[test]
1107 fn test_document_save_load() {
1108 let dir = tempdir().unwrap();
1109 let storage = FileStorage::new(dir.path()).unwrap();
1110
1111 let doc = create_test_document("doc1");
1112 let stats = storage.save_document("doc1", &doc).unwrap();
1113
1114 assert!(stats.original_size > 0);
1115 assert_eq!(stats.codec, Codec::None);
1116
1117 let loaded = storage.load_document("doc1").unwrap();
1118 assert_eq!(loaded.id, doc.id);
1119 assert_eq!(loaded.content, doc.content);
1120 assert_eq!(loaded.embedding, doc.embedding);
1121 }
1122
1123 #[test]
1124 fn test_document_not_found() {
1125 let dir = tempdir().unwrap();
1126 let storage = FileStorage::new(dir.path()).unwrap();
1127
1128 let result = storage.load_document("nonexistent");
1129 assert!(result.is_err());
1130 }
1131
1132 #[test]
1133 fn test_flat_index_persistence() {
1134 let dir = tempdir().unwrap();
1135 let storage = FileStorage::new(dir.path()).unwrap();
1136
1137 let index = create_test_flat_index();
1138 let wrapper = FlatIndexWrapper::from_index(&index);
1139
1140 let stats = storage.save_flat_index("index1", &wrapper).unwrap();
1141 assert!(stats.original_size > 0);
1142
1143 let loaded_wrapper = storage.load_flat_index("index1").unwrap();
1144 let loaded_index = loaded_wrapper.to_index().unwrap();
1145
1146 assert_eq!(loaded_index.len(), index.len());
1147 assert_eq!(loaded_index.embedding_dim(), index.embedding_dim());
1148 }
1149
1150 #[test]
1151 fn test_hnsw_index_persistence() {
1152 let dir = tempdir().unwrap();
1153 let storage = FileStorage::new(dir.path()).unwrap();
1154
1155 let index = create_test_hnsw_index();
1156 let wrapper = HNSWIndexWrapper::from_index(&index);
1157
1158 let stats = storage.save_hnsw_index("index1", &wrapper).unwrap();
1159 assert!(stats.original_size > 0);
1160
1161 let loaded_wrapper = storage.load_hnsw_index("index1").unwrap();
1162 let loaded_index = loaded_wrapper.to_index().unwrap();
1163
1164 assert_eq!(loaded_index.len(), index.len());
1165 assert_eq!(loaded_index.embedding_dim(), index.embedding_dim());
1166 }
1167
1168 #[test]
1169 fn test_atomic_write() {
1170 let dir = tempdir().unwrap();
1171 let storage = FileStorage::new(dir.path()).unwrap();
1172
1173 let path = dir.path().join("test.data");
1174 let data = b"test data";
1175
1176 storage.write_atomic(&path, data).unwrap();
1177
1178 assert!(path.exists());
1179 let read_data = std::fs::read(&path).unwrap();
1180 assert_eq!(read_data, data);
1181
1182 let has_tmp = std::fs::read_dir(dir.path())
1184 .unwrap()
1185 .filter_map(|entry| entry.ok())
1186 .map(|entry| entry.file_name().to_string_lossy().to_string())
1187 .any(|name| name.ends_with(".tmp"));
1188 assert!(!has_tmp);
1189 }
1190
1191 #[test]
1192 fn concurrent_atomic_writes_to_sibling_paths_do_not_cross_contaminate() {
1193 let dir = tempdir().unwrap();
1194 let storage = Arc::new(FileStorage::new(dir.path()).unwrap());
1195 let data_path = dir.path().join("doc.data");
1196 let meta_path = dir.path().join("doc.meta");
1197
1198 for _ in 0..128 {
1199 let barrier = Arc::new(Barrier::new(3));
1200 let s1 = Arc::clone(&storage);
1201 let b1 = Arc::clone(&barrier);
1202 let data_path_1 = data_path.clone();
1203 let t1 = thread::spawn(move || {
1204 b1.wait();
1205 s1.write_atomic(&data_path_1, b"DATA").unwrap();
1206 });
1207
1208 let s2 = Arc::clone(&storage);
1209 let b2 = Arc::clone(&barrier);
1210 let meta_path_1 = meta_path.clone();
1211 let t2 = thread::spawn(move || {
1212 b2.wait();
1213 s2.write_atomic(&meta_path_1, b"META").unwrap();
1214 });
1215
1216 barrier.wait();
1217 t1.join().unwrap();
1218 t2.join().unwrap();
1219
1220 assert_eq!(std::fs::read(&data_path).unwrap(), b"DATA");
1221 assert_eq!(std::fs::read(&meta_path).unwrap(), b"META");
1222 }
1223 }
1224
1225 #[test]
1226 fn test_metadata() {
1227 let dir = tempdir().unwrap();
1228 let storage = FileStorage::new(dir.path()).unwrap();
1229
1230 let doc = create_test_document("doc1");
1231 storage.save_document("doc1", &doc).unwrap();
1232
1233 let metadata = storage.get_metadata("doc1").unwrap();
1234 assert_eq!(metadata.version, STORAGE_VERSION);
1235 assert_eq!(metadata.item_type, "document");
1236 assert!(metadata.created_at > 0);
1237 assert_eq!(metadata.created_at, metadata.updated_at);
1238 assert_eq!(metadata.compression, Codec::None);
1239 assert!(metadata.original_size > 0);
1240 }
1241
1242 #[test]
1243 fn test_metadata_update() {
1244 let dir = tempdir().unwrap();
1245 let storage = FileStorage::new(dir.path()).unwrap();
1246
1247 let doc = create_test_document("doc1");
1248 storage.save_document("doc1", &doc).unwrap();
1249
1250 let meta1 = storage.get_metadata("doc1").unwrap();
1251
1252 std::thread::sleep(std::time::Duration::from_millis(10));
1254
1255 storage.save_document("doc1", &doc).unwrap();
1257
1258 let meta2 = storage.get_metadata("doc1").unwrap();
1259 assert_eq!(meta2.created_at, meta1.created_at);
1260 assert!(meta2.updated_at >= meta1.updated_at);
1261 }
1262
1263 #[test]
1264 fn test_list_storage() {
1265 let dir = tempdir().unwrap();
1266 let storage = FileStorage::new(dir.path()).unwrap();
1267
1268 assert_eq!(storage.list().unwrap().len(), 0);
1269
1270 storage
1271 .save_document("doc1", &create_test_document("doc1"))
1272 .unwrap();
1273 storage
1274 .save_document("doc2", &create_test_document("doc2"))
1275 .unwrap();
1276 storage
1277 .save_document("doc3", &create_test_document("doc3"))
1278 .unwrap();
1279
1280 let items = storage.list().unwrap();
1281 assert_eq!(items.len(), 3);
1282 assert!(items.contains(&"doc1".to_string()));
1283 assert!(items.contains(&"doc2".to_string()));
1284 assert!(items.contains(&"doc3".to_string()));
1285 }
1286
1287 #[test]
1288 fn test_delete() {
1289 let dir = tempdir().unwrap();
1290 let storage = FileStorage::new(dir.path()).unwrap();
1291
1292 let doc = create_test_document("doc1");
1293 storage.save_document("doc1", &doc).unwrap();
1294
1295 assert!(storage.exists("doc1"));
1296 assert_eq!(storage.list().unwrap().len(), 1);
1297
1298 storage.delete("doc1").unwrap();
1299
1300 assert!(!storage.exists("doc1"));
1301 assert_eq!(storage.list().unwrap().len(), 0);
1302 }
1303
1304 #[test]
1305 fn test_delete_nonexistent() {
1306 let dir = tempdir().unwrap();
1307 let storage = FileStorage::new(dir.path()).unwrap();
1308
1309 let result = storage.delete("nonexistent");
1311 assert!(result.is_ok());
1312 }
1313
1314 #[test]
1315 fn test_clear() {
1316 let dir = tempdir().unwrap();
1317 let storage = FileStorage::new(dir.path()).unwrap();
1318
1319 storage
1320 .save_document("doc1", &create_test_document("doc1"))
1321 .unwrap();
1322 storage
1323 .save_document("doc2", &create_test_document("doc2"))
1324 .unwrap();
1325 storage
1326 .save_document("doc3", &create_test_document("doc3"))
1327 .unwrap();
1328
1329 assert_eq!(storage.list().unwrap().len(), 3);
1330
1331 storage.clear().unwrap();
1332
1333 assert_eq!(storage.list().unwrap().len(), 0);
1334 }
1335
1336 #[test]
1337 fn test_storage_size() {
1338 let dir = tempdir().unwrap();
1339 let storage = FileStorage::new(dir.path()).unwrap();
1340
1341 assert_eq!(storage.total_size().unwrap(), 0);
1342
1343 storage
1344 .save_document("doc1", &create_test_document("doc1"))
1345 .unwrap();
1346
1347 let size = storage.total_size().unwrap();
1348 assert!(size > 0);
1349
1350 storage
1351 .save_document("doc2", &create_test_document("doc2"))
1352 .unwrap();
1353
1354 let size2 = storage.total_size().unwrap();
1355 assert!(size2 > size);
1356 }
1357
1358 #[test]
1359 fn test_compression_codecs() {
1360 let dir = tempdir().unwrap();
1361
1362 #[allow(unused_mut)]
1364 let mut codecs = vec![Codec::None, Codec::Gzip];
1365
1366 #[cfg(feature = "zstd")]
1367 codecs.push(Codec::Zstd);
1368
1369 #[cfg(feature = "lz4")]
1370 codecs.push(Codec::Lz4);
1371
1372 for codec in codecs {
1373 let storage = FileStorage::with_codec(dir.path(), codec).unwrap();
1374 let doc = create_test_document("doc1");
1375
1376 let stats = storage.save_document("test", &doc).unwrap();
1377 assert!(stats.original_size > 0);
1378
1379 let loaded = storage.load_document("test").unwrap();
1380 assert_eq!(loaded.id, doc.id);
1381 assert_eq!(loaded.content, doc.content);
1382
1383 storage.delete("test").unwrap();
1384 }
1385 }
1386
1387 #[test]
1388 fn test_exists() {
1389 let dir = tempdir().unwrap();
1390 let storage = FileStorage::new(dir.path()).unwrap();
1391
1392 assert!(!storage.exists("doc1"));
1393
1394 storage
1395 .save_document("doc1", &create_test_document("doc1"))
1396 .unwrap();
1397
1398 assert!(storage.exists("doc1"));
1399 assert!(!storage.exists("doc2"));
1400 }
1401
1402 #[test]
1403 fn test_flat_index_wrapper_roundtrip() {
1404 let index = create_test_flat_index();
1405 let wrapper = FlatIndexWrapper::from_index(&index);
1406 let restored = wrapper.to_index().unwrap();
1407
1408 assert_eq!(restored.len(), index.len());
1409 assert_eq!(restored.embedding_dim(), index.embedding_dim());
1410
1411 let query = vec![0.1, 0.2, 0.3, 0.4, 0.5];
1413 let results = restored.search(&query, 2).unwrap();
1414 assert_eq!(results.len(), 2);
1415 }
1416
1417 #[test]
1418 fn test_hnsw_index_wrapper_roundtrip() {
1419 let index = create_test_hnsw_index();
1420 let wrapper = HNSWIndexWrapper::from_index(&index);
1421 let restored = wrapper.to_index().unwrap();
1422
1423 assert_eq!(restored.len(), index.len());
1424 assert_eq!(restored.embedding_dim(), index.embedding_dim());
1425
1426 let query = vec![0.1, 0.2, 0.3, 0.4, 0.5];
1428 let results = restored.search(&query, 2).unwrap();
1429 assert_eq!(results.len(), 2);
1430 }
1431
1432 #[test]
1433 fn test_concurrent_writes() {
1434 let dir = tempdir().unwrap();
1435 let storage = FileStorage::new(dir.path()).unwrap();
1436
1437 let doc = create_test_document("doc1");
1439
1440 for _ in 0..10 {
1441 storage.save_document("doc1", &doc).unwrap();
1442 let loaded = storage.load_document("doc1").unwrap();
1443 assert_eq!(loaded.id, doc.id);
1444 }
1445 }
1446
1447 #[test]
1448 fn test_large_document() {
1449 let dir = tempdir().unwrap();
1450 let storage = FileStorage::new(dir.path()).unwrap();
1451
1452 let mut large_doc = create_test_document("large");
1454 large_doc.embedding = vec![0.5; 10000];
1455 large_doc.content = "x".repeat(100000);
1456
1457 let stats = storage.save_document("large", &large_doc).unwrap();
1458 assert!(stats.original_size > 100000);
1459
1460 let loaded = storage.load_document("large").unwrap();
1461 assert_eq!(loaded.id, large_doc.id);
1462 assert_eq!(loaded.embedding.len(), 10000);
1463 assert_eq!(loaded.content.len(), 100000);
1464 }
1465
1466 #[test]
1467 fn test_rejects_path_traversal_item_names() {
1468 let dir = tempdir().unwrap();
1469 let storage = FileStorage::new(dir.path()).unwrap();
1470 let doc = create_test_document("doc1");
1471
1472 let result = storage.save_document("../outside", &doc);
1473 assert!(result.is_err(), "path traversal names should be rejected");
1474 }
1475
1476 #[test]
1477 fn test_is_invalid_item_name_comprehensive() {
1478 assert!(!FileStorage::is_invalid_item_name("hello"));
1480 assert!(!FileStorage::is_invalid_item_name("my_index"));
1481 assert!(!FileStorage::is_invalid_item_name("data-2024"));
1482 assert!(!FileStorage::is_invalid_item_name("file.txt"));
1483
1484 assert!(FileStorage::is_invalid_item_name(""));
1486
1487 assert!(FileStorage::is_invalid_item_name(".."));
1489 assert!(FileStorage::is_invalid_item_name("."));
1490 assert!(FileStorage::is_invalid_item_name("foo/bar"));
1491 assert!(FileStorage::is_invalid_item_name("foo\\bar"));
1492 assert!(FileStorage::is_invalid_item_name("../outside"));
1493
1494 assert!(FileStorage::is_invalid_item_name("/absolute"));
1496 #[cfg(target_os = "windows")]
1497 assert!(FileStorage::is_invalid_item_name("C:\\Windows\\System32"));
1498
1499 assert!(FileStorage::is_invalid_item_name("hello\0world"));
1501 assert!(FileStorage::is_invalid_item_name("\0"));
1502
1503 assert!(FileStorage::is_invalid_item_name("CON"));
1505 assert!(FileStorage::is_invalid_item_name("con"));
1506 assert!(FileStorage::is_invalid_item_name("Con"));
1507 assert!(FileStorage::is_invalid_item_name("PRN"));
1508 assert!(FileStorage::is_invalid_item_name("AUX"));
1509 assert!(FileStorage::is_invalid_item_name("NUL"));
1510 assert!(FileStorage::is_invalid_item_name("nul"));
1511 assert!(FileStorage::is_invalid_item_name("COM1"));
1512 assert!(FileStorage::is_invalid_item_name("com1"));
1513 assert!(FileStorage::is_invalid_item_name("COM9"));
1514 assert!(FileStorage::is_invalid_item_name("LPT1"));
1515 assert!(FileStorage::is_invalid_item_name("lpt1"));
1516 assert!(FileStorage::is_invalid_item_name("LPT9"));
1517
1518 assert!(FileStorage::is_invalid_item_name("CON.txt"));
1520 assert!(FileStorage::is_invalid_item_name("NUL.tar.gz"));
1521 assert!(FileStorage::is_invalid_item_name("com1.data"));
1522 assert!(FileStorage::is_invalid_item_name("lpt3.log"));
1523 }
1524}