use crate::config::ShardexConfig;
use crate::document_text_entry::{DocumentTextEntry, TextDataHeader, TextIndexHeader};
use crate::error::ShardexError;
use crate::identifiers::ShardId;
use crate::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
pub const LAYOUT_VERSION: u32 = 1;
pub const METADATA_FILE: &str = "layout.meta";
pub const CENTROIDS_DIR: &str = "centroids";
pub const SHARDS_DIR: &str = "shards";
pub const WAL_DIR: &str = "wal";
pub const VECTORS_EXT: &str = "vectors";
pub const POSTINGS_EXT: &str = "postings";
pub const SEGMENT_EXT: &str = "shx";
pub const WAL_EXT: &str = "log";
#[derive(Debug, Clone)]
pub struct DirectoryLayout {
root_path: PathBuf,
metadata_path: PathBuf,
centroids_dir: PathBuf,
shards_dir: PathBuf,
wal_dir: PathBuf,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndexMetadata {
pub layout_version: u32,
pub config: ShardexConfig,
pub created_at: u64,
pub modified_at: u64,
pub shard_count: usize,
pub centroid_segment_count: usize,
pub wal_segment_count: usize,
pub flags: IndexFlags,
pub text_storage_enabled: bool,
pub max_document_text_size: Option<usize>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndexFlags {
pub active: bool,
pub needs_recovery: bool,
pub clean_shutdown: bool,
}
#[derive(Debug)]
pub struct FileDiscovery {
layout: DirectoryLayout,
}
#[derive(Debug, Clone)]
pub struct DiscoveredFiles {
pub shards: Vec<ShardFiles>,
pub centroid_segments: Vec<SegmentFile>,
pub wal_segments: Vec<SegmentFile>,
pub orphaned_files: Vec<PathBuf>,
}
#[derive(Debug, Clone)]
pub struct ShardFiles {
pub shard_id: ShardId,
pub vectors_file: Option<PathBuf>,
pub postings_file: Option<PathBuf>,
}
#[derive(Debug, Clone)]
pub struct SegmentFile {
pub segment_number: u32,
pub path: PathBuf,
pub size: u64,
}
#[derive(Debug)]
pub struct CleanupManager {
layout: DirectoryLayout,
temp_files: HashSet<PathBuf>,
}
#[derive(Debug, Clone, Default)]
pub struct CleanupStats {
pub text_storage_size: u64,
pub temp_file_count: usize,
}
impl CleanupStats {
pub fn new() -> Self {
Self::default()
}
}
impl DirectoryLayout {
pub fn new<P: AsRef<Path>>(root_path: P) -> Self {
let root_path = root_path.as_ref().to_path_buf();
let metadata_path = root_path.join(METADATA_FILE);
let centroids_dir = root_path.join(CENTROIDS_DIR);
let shards_dir = root_path.join(SHARDS_DIR);
let wal_dir = root_path.join(WAL_DIR);
Self {
root_path,
metadata_path,
centroids_dir,
shards_dir,
wal_dir,
}
}
pub fn root_path(&self) -> &Path {
&self.root_path
}
pub fn metadata_path(&self) -> &Path {
&self.metadata_path
}
pub fn centroids_dir(&self) -> &Path {
&self.centroids_dir
}
pub fn shards_dir(&self) -> &Path {
&self.shards_dir
}
pub fn wal_dir(&self) -> &Path {
&self.wal_dir
}
pub fn shard_vectors_path(&self, shard_id: &ShardId) -> PathBuf {
self.shards_dir
.join(format!("{}.{}", shard_id, VECTORS_EXT))
}
pub fn shard_postings_path(&self, shard_id: &ShardId) -> PathBuf {
self.shards_dir
.join(format!("{}.{}", shard_id, POSTINGS_EXT))
}
pub fn centroid_segment_path(&self, segment_number: u32) -> PathBuf {
self.centroids_dir
.join(format!("segment_{:06}.{}", segment_number, SEGMENT_EXT))
}
pub fn wal_segment_path(&self, segment_number: u32) -> PathBuf {
self.wal_dir
.join(format!("wal_{:06}.{}", segment_number, WAL_EXT))
}
pub fn text_index_file(&self) -> PathBuf {
self.root_path.join("text_index.dat")
}
pub fn text_data_file(&self) -> PathBuf {
self.root_path.join("text_data.dat")
}
pub fn has_text_storage(&self) -> bool {
self.text_index_file().exists() && self.text_data_file().exists()
}
pub fn text_storage_size(&self) -> Result<u64> {
let index_size = std::fs::metadata(self.text_index_file())?.len();
let data_size = std::fs::metadata(self.text_data_file())?.len();
Ok(index_size + data_size)
}
pub fn validate_text_storage(&self) -> Result<()> {
if !self.has_text_storage() {
return Ok(()); }
let index_file = std::fs::File::open(self.text_index_file())?;
let index_header = self.read_text_index_header(&index_file)?;
index_header.validate()?;
let data_file = std::fs::File::open(self.text_data_file())?;
let data_header = self.read_text_data_header(&data_file)?;
data_header.validate()?;
self.validate_text_storage_consistency(&index_header, &data_header)?;
Ok(())
}
fn read_text_index_header(&self, file: &std::fs::File) -> Result<TextIndexHeader> {
use std::io::{Read, Seek, SeekFrom};
let mut file = file;
file.seek(SeekFrom::Start(0))?;
let mut header_bytes = vec![0u8; TextIndexHeader::SIZE];
file.read_exact(&mut header_bytes)?;
let header: TextIndexHeader = bytemuck::pod_read_unaligned(&header_bytes);
Ok(header)
}
fn read_text_data_header(&self, file: &std::fs::File) -> Result<TextDataHeader> {
use std::io::{Read, Seek, SeekFrom};
let mut file = file;
file.seek(SeekFrom::Start(0))?;
let mut header_bytes = vec![0u8; TextDataHeader::SIZE];
file.read_exact(&mut header_bytes)?;
let header: TextDataHeader = bytemuck::pod_read_unaligned(&header_bytes);
Ok(header)
}
fn validate_text_storage_consistency(
&self,
index_header: &TextIndexHeader,
data_header: &TextDataHeader,
) -> Result<()> {
let expected_index_size = TextIndexHeader::SIZE + (index_header.entry_count as usize * DocumentTextEntry::SIZE);
let actual_index_size = std::fs::metadata(self.text_index_file())?.len() as usize;
if expected_index_size != actual_index_size {
return Err(ShardexError::text_corruption(format!(
"Index file size mismatch: expected {}, actual {}",
expected_index_size, actual_index_size
)));
}
let actual_data_size = std::fs::metadata(self.text_data_file())?.len();
if data_header.next_text_offset > actual_data_size {
return Err(ShardexError::text_corruption(format!(
"Data file next offset {} exceeds file size {}",
data_header.next_text_offset, actual_data_size
)));
}
Ok(())
}
pub fn create_directories(&self) -> Result<()> {
fs::create_dir_all(&self.root_path).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to create root directory {}: {}", self.root_path.display(), e),
))
})?;
for dir in [&self.centroids_dir, &self.shards_dir, &self.wal_dir] {
fs::create_dir_all(dir).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to create directory {}: {}", dir.display(), e),
))
})?;
}
Ok(())
}
pub fn exists(&self) -> bool {
self.root_path.is_dir()
&& self.metadata_path.is_file()
&& self.centroids_dir.is_dir()
&& self.shards_dir.is_dir()
&& self.wal_dir.is_dir()
}
pub fn validate(&self) -> Result<()> {
if !self.root_path.is_dir() {
return Err(ShardexError::Corruption(format!(
"Index root directory does not exist: {}",
self.root_path.display()
)));
}
if !self.metadata_path.is_file() {
return Err(ShardexError::Corruption(format!(
"Index metadata file does not exist: {}",
self.metadata_path.display()
)));
}
for (name, dir) in [
(CENTROIDS_DIR, &self.centroids_dir),
(SHARDS_DIR, &self.shards_dir),
(WAL_DIR, &self.wal_dir),
] {
if !dir.is_dir() {
return Err(ShardexError::Corruption(format!(
"Index {} directory does not exist: {}",
name,
dir.display()
)));
}
}
Ok(())
}
}
impl IndexMetadata {
pub fn new(config: ShardexConfig) -> Result<Self> {
config.validate()?;
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(|e| ShardexError::Config(format!("System time error: {}", e)))?
.as_secs();
Ok(Self {
layout_version: LAYOUT_VERSION,
config,
created_at: now,
modified_at: now,
shard_count: 0,
centroid_segment_count: 0,
wal_segment_count: 0,
flags: IndexFlags {
active: false,
needs_recovery: false,
clean_shutdown: true,
},
text_storage_enabled: false,
max_document_text_size: None,
})
}
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref();
let contents = fs::read_to_string(path).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to read metadata file {}: {}", path.display(), e),
))
})?;
let metadata: Self = toml::from_str(&contents).map_err(|e| {
ShardexError::Corruption(format!("Failed to parse metadata file {}: {}", path.display(), e))
})?;
if metadata.layout_version != LAYOUT_VERSION {
return Err(ShardexError::Config(format!(
"Unsupported layout version: expected {}, found {}",
LAYOUT_VERSION, metadata.layout_version
)));
}
metadata.config.validate()?;
Ok(metadata)
}
pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
let path = path.as_ref();
self.modified_at = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(|e| ShardexError::Config(format!("System time error: {}", e)))?
.as_secs();
let contents = toml::to_string_pretty(self)
.map_err(|e| ShardexError::Corruption(format!("Failed to serialize metadata: {}", e)))?;
let temp_path = path.with_extension("tmp");
fs::write(&temp_path, contents).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to write metadata to {}: {}", temp_path.display(), e),
))
})?;
fs::rename(&temp_path, path).map_err(|e| {
let _ = fs::remove_file(&temp_path);
ShardexError::Io(std::io::Error::new(
e.kind(),
format!(
"Failed to move metadata from {} to {}: {}",
temp_path.display(),
path.display(),
e
),
))
})?;
Ok(())
}
pub fn is_compatible_with(&self, config: &ShardexConfig) -> bool {
self.config.vector_size == config.vector_size && self.config.directory_path == config.directory_path
}
pub fn set_shard_count(&mut self, count: usize) {
self.shard_count = count;
}
pub fn set_centroid_segment_count(&mut self, count: usize) {
self.centroid_segment_count = count;
}
pub fn set_wal_segment_count(&mut self, count: usize) {
self.wal_segment_count = count;
}
pub fn mark_active(&mut self) {
self.flags.active = true;
self.flags.clean_shutdown = false;
}
pub fn mark_inactive(&mut self) {
self.flags.active = false;
self.flags.clean_shutdown = true;
self.flags.needs_recovery = false;
}
pub fn mark_needs_recovery(&mut self) {
self.flags.needs_recovery = true;
self.flags.clean_shutdown = false;
}
}
impl Default for IndexFlags {
fn default() -> Self {
Self {
active: false,
needs_recovery: false,
clean_shutdown: true,
}
}
}
impl FileDiscovery {
pub fn new(layout: DirectoryLayout) -> Self {
Self { layout }
}
pub fn discover_all(&self) -> Result<DiscoveredFiles> {
let shards = self.discover_shards()?;
let centroid_segments = self.discover_centroid_segments()?;
let wal_segments = self.discover_wal_segments()?;
let orphaned_files = self.find_orphaned_files(&shards, ¢roid_segments, &wal_segments)?;
Ok(DiscoveredFiles {
shards,
centroid_segments,
wal_segments,
orphaned_files,
})
}
pub fn discover_shards(&self) -> Result<Vec<ShardFiles>> {
let mut shard_map: std::collections::HashMap<ShardId, ShardFiles> = std::collections::HashMap::new();
if !self.layout.shards_dir().exists() {
return Ok(Vec::new());
}
let entries = fs::read_dir(self.layout.shards_dir()).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!(
"Failed to read shards directory {}: {}",
self.layout.shards_dir().display(),
e
),
))
})?;
for entry in entries {
let entry = entry.map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to read directory entry: {}", e),
))
})?;
let path = entry.path();
if !path.is_file() {
continue;
}
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(name) => name,
None => continue,
};
let parts: Vec<&str> = file_name.split('.').collect();
if parts.len() != 2 {
continue;
}
let shard_id_str = parts[0];
let extension = parts[1];
let shard_id = match ShardId::parse_str(shard_id_str) {
Ok(id) => id,
Err(_) => continue, };
let shard_files = shard_map.entry(shard_id).or_insert_with(|| ShardFiles {
shard_id,
vectors_file: None,
postings_file: None,
});
match extension {
VECTORS_EXT => shard_files.vectors_file = Some(path),
POSTINGS_EXT => shard_files.postings_file = Some(path),
_ => {} }
}
Ok(shard_map.into_values().collect())
}
pub fn discover_centroid_segments(&self) -> Result<Vec<SegmentFile>> {
self.discover_segments(self.layout.centroids_dir(), "segment_", SEGMENT_EXT)
}
pub fn discover_wal_segments(&self) -> Result<Vec<SegmentFile>> {
self.discover_segments(self.layout.wal_dir(), "wal_", WAL_EXT)
}
fn discover_segments(&self, dir: &Path, prefix: &str, extension: &str) -> Result<Vec<SegmentFile>> {
let mut segments = Vec::new();
if !dir.exists() {
return Ok(segments);
}
let entries = fs::read_dir(dir).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to read directory {}: {}", dir.display(), e),
))
})?;
for entry in entries {
let entry = entry.map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to read directory entry: {}", e),
))
})?;
let path = entry.path();
if !path.is_file() {
continue;
}
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(name) => name,
None => continue,
};
if !file_name.starts_with(prefix) || !file_name.ends_with(&format!(".{}", extension)) {
continue;
}
let number_part = &file_name[prefix.len()..file_name.len() - extension.len() - 1];
let segment_number = match number_part.parse::<u32>() {
Ok(num) => num,
Err(_) => continue, };
let metadata = entry.metadata().map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to get metadata for {}: {}", path.display(), e),
))
})?;
segments.push(SegmentFile {
segment_number,
path,
size: metadata.len(),
});
}
segments.sort_by_key(|s| s.segment_number);
Ok(segments)
}
fn find_orphaned_files(
&self,
shards: &[ShardFiles],
centroid_segments: &[SegmentFile],
wal_segments: &[SegmentFile],
) -> Result<Vec<PathBuf>> {
let mut orphaned = Vec::new();
let mut known_files = HashSet::new();
for shard in shards {
if let Some(ref path) = shard.vectors_file {
known_files.insert(path.clone());
}
if let Some(ref path) = shard.postings_file {
known_files.insert(path.clone());
}
}
for segment in centroid_segments {
known_files.insert(segment.path.clone());
}
for segment in wal_segments {
known_files.insert(segment.path.clone());
}
for dir in [
self.layout.shards_dir(),
self.layout.centroids_dir(),
self.layout.wal_dir(),
] {
if !dir.exists() {
continue;
}
let entries = fs::read_dir(dir).map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to read directory {}: {}", dir.display(), e),
))
})?;
for entry in entries {
let entry = entry.map_err(|e| {
ShardexError::Io(std::io::Error::new(
e.kind(),
format!("Failed to read directory entry: {}", e),
))
})?;
let path = entry.path();
if path.is_file() && !known_files.contains(&path) {
orphaned.push(path);
}
}
}
Ok(orphaned)
}
}
impl CleanupManager {
pub fn new(layout: DirectoryLayout) -> Self {
Self {
layout,
temp_files: HashSet::new(),
}
}
pub fn register_temp_file(&mut self, path: PathBuf) {
self.temp_files.insert(path);
}
pub fn cleanup_temp_files(&mut self) -> Result<()> {
let mut errors = Vec::new();
for path in &self.temp_files {
if path.exists() {
if let Err(e) = fs::remove_file(path) {
errors.push(format!("Failed to remove {}: {}", path.display(), e));
}
}
}
self.temp_files.clear();
if !errors.is_empty() {
return Err(ShardexError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Cleanup errors: {}", errors.join(", ")),
)));
}
Ok(())
}
pub fn cleanup_orphaned_files(&mut self, orphaned_files: &[PathBuf]) -> Result<()> {
let mut errors = Vec::new();
for path in orphaned_files {
if path.exists() {
if let Err(e) = fs::remove_file(path) {
errors.push(format!("Failed to remove orphaned file {}: {}", path.display(), e));
}
}
}
if !errors.is_empty() {
return Err(ShardexError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Orphaned file cleanup errors: {}", errors.join(", ")),
)));
}
Ok(())
}
pub fn cleanup_text_storage(&self) -> Result<()> {
let layout = &self.layout;
if layout.text_index_file().exists() {
std::fs::remove_file(layout.text_index_file())?;
}
if layout.text_data_file().exists() {
std::fs::remove_file(layout.text_data_file())?;
}
Ok(())
}
pub fn get_cleanup_stats(&self) -> Result<CleanupStats> {
let mut stats = CleanupStats {
text_storage_size: 0,
temp_file_count: self.temp_files.len(),
};
if self.layout.has_text_storage() {
stats.text_storage_size = self.layout.text_storage_size()?;
}
Ok(stats)
}
pub fn layout(&self) -> &DirectoryLayout {
&self.layout
}
}
impl Drop for CleanupManager {
fn drop(&mut self) {
let _ = self.cleanup_temp_files();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::identifiers::ShardId;
use tempfile::TempDir;
fn create_test_config() -> ShardexConfig {
ShardexConfig::new().vector_size(128).shard_size(1000)
}
#[test]
fn test_directory_layout_creation() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
assert_eq!(layout.root_path(), temp_dir.path());
assert_eq!(layout.metadata_path(), temp_dir.path().join(METADATA_FILE));
assert_eq!(layout.centroids_dir(), temp_dir.path().join(CENTROIDS_DIR));
assert_eq!(layout.shards_dir(), temp_dir.path().join(SHARDS_DIR));
assert_eq!(layout.wal_dir(), temp_dir.path().join(WAL_DIR));
}
#[test]
fn test_directory_creation() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
assert!(!layout.exists());
layout.create_directories().unwrap();
assert!(layout.centroids_dir().is_dir());
assert!(layout.shards_dir().is_dir());
assert!(layout.wal_dir().is_dir());
}
#[test]
fn test_file_path_generation() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let shard_id = ShardId::new();
let vectors_path = layout.shard_vectors_path(&shard_id);
let postings_path = layout.shard_postings_path(&shard_id);
assert!(vectors_path
.to_string_lossy()
.contains(&shard_id.to_string()));
assert!(vectors_path.to_string_lossy().ends_with(".vectors"));
assert!(postings_path.to_string_lossy().ends_with(".postings"));
let centroid_path = layout.centroid_segment_path(1);
assert!(centroid_path.to_string_lossy().contains("segment_000001"));
assert!(centroid_path.to_string_lossy().ends_with(".shx"));
let wal_path = layout.wal_segment_path(5);
assert!(wal_path.to_string_lossy().contains("wal_000005"));
assert!(wal_path.to_string_lossy().ends_with(".log"));
}
#[test]
fn test_index_metadata_creation() {
let config = create_test_config();
let metadata = IndexMetadata::new(config.clone()).unwrap();
assert_eq!(metadata.layout_version, LAYOUT_VERSION);
assert_eq!(metadata.config, config);
assert_eq!(metadata.shard_count, 0);
assert_eq!(metadata.centroid_segment_count, 0);
assert_eq!(metadata.wal_segment_count, 0);
assert!(!metadata.flags.active);
assert!(!metadata.flags.needs_recovery);
assert!(metadata.flags.clean_shutdown);
}
#[test]
fn test_metadata_serialization() {
let temp_dir = TempDir::new().unwrap();
let metadata_path = temp_dir.path().join("test.meta");
let config = create_test_config();
let mut metadata = IndexMetadata::new(config).unwrap();
metadata.shard_count = 10;
metadata.mark_active();
metadata.save(&metadata_path).unwrap();
assert!(metadata_path.exists());
let loaded_metadata = IndexMetadata::load(&metadata_path).unwrap();
assert_eq!(loaded_metadata.layout_version, metadata.layout_version);
assert_eq!(loaded_metadata.shard_count, metadata.shard_count);
assert_eq!(loaded_metadata.flags.active, metadata.flags.active);
}
#[test]
fn test_metadata_compatibility() {
let config1 = create_test_config();
let config2 = ShardexConfig::new().vector_size(256).shard_size(1000);
let metadata = IndexMetadata::new(config1.clone()).unwrap();
assert!(metadata.is_compatible_with(&config1));
assert!(!metadata.is_compatible_with(&config2)); }
#[test]
fn test_file_discovery_empty_directory() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
layout.create_directories().unwrap();
let discovery = FileDiscovery::new(layout);
let discovered = discovery.discover_all().unwrap();
assert!(discovered.shards.is_empty());
assert!(discovered.centroid_segments.is_empty());
assert!(discovered.wal_segments.is_empty());
assert!(discovered.orphaned_files.is_empty());
}
#[test]
fn test_file_discovery_with_files() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
layout.create_directories().unwrap();
let shard_id = ShardId::new();
let vectors_path = layout.shard_vectors_path(&shard_id);
let postings_path = layout.shard_postings_path(&shard_id);
let centroid_path = layout.centroid_segment_path(1);
let wal_path = layout.wal_segment_path(1);
fs::write(&vectors_path, b"vectors data").unwrap();
fs::write(&postings_path, b"postings data").unwrap();
fs::write(¢roid_path, b"centroid data").unwrap();
fs::write(&wal_path, b"wal data").unwrap();
let discovery = FileDiscovery::new(layout);
let discovered = discovery.discover_all().unwrap();
assert_eq!(discovered.shards.len(), 1);
assert_eq!(discovered.shards[0].shard_id, shard_id);
assert!(discovered.shards[0].vectors_file.is_some());
assert!(discovered.shards[0].postings_file.is_some());
assert_eq!(discovered.centroid_segments.len(), 1);
assert_eq!(discovered.centroid_segments[0].segment_number, 1);
assert_eq!(discovered.wal_segments.len(), 1);
assert_eq!(discovered.wal_segments[0].segment_number, 1);
assert!(discovered.orphaned_files.is_empty());
}
#[test]
fn test_orphaned_file_detection() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
layout.create_directories().unwrap();
let shard_id = ShardId::new();
let vectors_path = layout.shard_vectors_path(&shard_id);
fs::write(&vectors_path, b"vectors data").unwrap();
let orphaned_path = layout.shards_dir().join("orphaned_file.txt");
fs::write(&orphaned_path, b"orphaned data").unwrap();
let discovery = FileDiscovery::new(layout);
let discovered = discovery.discover_all().unwrap();
assert_eq!(discovered.shards.len(), 1);
assert_eq!(discovered.orphaned_files.len(), 1);
assert_eq!(discovered.orphaned_files[0], orphaned_path);
}
#[test]
fn test_cleanup_manager() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let mut cleanup_manager = CleanupManager::new(layout);
let temp_file = temp_dir.path().join("temp_file.tmp");
fs::write(&temp_file, b"temporary data").unwrap();
assert!(temp_file.exists());
cleanup_manager.register_temp_file(temp_file.clone());
cleanup_manager.cleanup_temp_files().unwrap();
assert!(!temp_file.exists());
}
#[test]
fn test_atomic_metadata_save() {
let temp_dir = TempDir::new().unwrap();
let metadata_path = temp_dir.path().join("test.meta");
let config = create_test_config();
let mut metadata = IndexMetadata::new(config).unwrap();
metadata.save(&metadata_path).unwrap();
assert!(metadata_path.exists());
assert!(!metadata_path.with_extension("tmp").exists());
}
#[test]
fn test_layout_validation() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
assert!(layout.validate().is_err());
assert!(!layout.exists());
layout.create_directories().unwrap();
let mut metadata = IndexMetadata::new(create_test_config()).unwrap();
metadata.save(layout.metadata_path()).unwrap();
assert!(layout.validate().is_ok());
assert!(layout.exists());
}
#[test]
fn test_text_storage_file_paths() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let text_index_path = layout.text_index_file();
let text_data_path = layout.text_data_file();
assert_eq!(text_index_path, temp_dir.path().join("text_index.dat"));
assert_eq!(text_data_path, temp_dir.path().join("text_data.dat"));
}
#[test]
fn test_text_storage_existence_check() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
assert!(!layout.has_text_storage());
std::fs::write(layout.text_index_file(), b"index data").unwrap();
assert!(!layout.has_text_storage());
std::fs::write(layout.text_data_file(), b"data content").unwrap();
assert!(layout.has_text_storage());
}
#[test]
fn test_text_storage_size_calculation() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let index_content = b"index file content";
let data_content = b"data file content with more data";
std::fs::write(layout.text_index_file(), index_content).unwrap();
std::fs::write(layout.text_data_file(), data_content).unwrap();
let total_size = layout.text_storage_size().unwrap();
let expected_size = index_content.len() as u64 + data_content.len() as u64;
assert_eq!(total_size, expected_size);
}
#[test]
fn test_text_storage_validation_no_files() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
assert!(layout.validate_text_storage().is_ok());
}
#[test]
fn test_cleanup_text_storage() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let cleanup_manager = CleanupManager::new(layout.clone());
std::fs::write(layout.text_index_file(), b"index data").unwrap();
std::fs::write(layout.text_data_file(), b"data content").unwrap();
assert!(layout.has_text_storage());
cleanup_manager.cleanup_text_storage().unwrap();
assert!(!layout.has_text_storage());
assert!(!layout.text_index_file().exists());
assert!(!layout.text_data_file().exists());
}
#[test]
fn test_cleanup_stats() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let mut cleanup_manager = CleanupManager::new(layout.clone());
let temp_file = temp_dir.path().join("temp.txt");
cleanup_manager.register_temp_file(temp_file.clone());
let index_content = b"index content";
let data_content = b"data content";
std::fs::write(layout.text_index_file(), index_content).unwrap();
std::fs::write(layout.text_data_file(), data_content).unwrap();
let stats = cleanup_manager.get_cleanup_stats().unwrap();
assert_eq!(stats.temp_file_count, 1);
assert_eq!(
stats.text_storage_size,
index_content.len() as u64 + data_content.len() as u64
);
}
#[test]
fn test_cleanup_stats_no_text_storage() {
let temp_dir = TempDir::new().unwrap();
let layout = DirectoryLayout::new(temp_dir.path());
let cleanup_manager = CleanupManager::new(layout);
let stats = cleanup_manager.get_cleanup_stats().unwrap();
assert_eq!(stats.text_storage_size, 0);
assert_eq!(stats.temp_file_count, 0);
}
#[test]
fn test_cleanup_stats_default() {
let stats = CleanupStats::default();
assert_eq!(stats.text_storage_size, 0);
assert_eq!(stats.temp_file_count, 0);
let stats_new = CleanupStats::new();
assert_eq!(stats_new.text_storage_size, 0);
assert_eq!(stats_new.temp_file_count, 0);
}
#[test]
fn test_index_metadata_text_storage_fields() {
let config = create_test_config();
let metadata = IndexMetadata::new(config).unwrap();
assert!(!metadata.text_storage_enabled);
assert!(metadata.max_document_text_size.is_none());
}
#[test]
fn test_index_metadata_serialization_with_text_fields() {
let temp_dir = TempDir::new().unwrap();
let metadata_path = temp_dir.path().join("test.meta");
let config = create_test_config();
let mut metadata = IndexMetadata::new(config).unwrap();
metadata.text_storage_enabled = true;
metadata.max_document_text_size = Some(5 * 1024 * 1024);
metadata.save(&metadata_path).unwrap();
let loaded_metadata = IndexMetadata::load(&metadata_path).unwrap();
assert!(loaded_metadata.text_storage_enabled);
assert_eq!(loaded_metadata.max_document_text_size, Some(5 * 1024 * 1024));
}
}