use std::collections::HashMap;
use std::fmt;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use super::super::file::id::FileId;
use crate::graph::node::Language;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RegistryError {
CanonicalizationFailed {
path: PathBuf,
message: String,
},
CapacityExhausted,
}
impl fmt::Display for RegistryError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::CanonicalizationFailed { path, message } => {
write!(
f,
"failed to canonicalize path '{}': {}",
path.display(),
message
)
}
Self::CapacityExhausted => {
write!(f, "file registry capacity exhausted (> 2^32 files)")
}
}
}
}
impl std::error::Error for RegistryError {}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FileEntry {
path: Arc<Path>,
language: Option<Language>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileRegistry {
entries: Vec<Option<FileEntry>>,
lookup: HashMap<Arc<Path>, u32>,
free_list: Vec<u32>,
}
impl FileRegistry {
#[must_use]
pub fn new() -> Self {
Self {
entries: vec![None],
lookup: HashMap::new(),
free_list: Vec::new(),
}
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
let mut entries = Vec::with_capacity(capacity + 1);
entries.push(None);
Self {
entries,
lookup: HashMap::with_capacity(capacity),
free_list: Vec::new(),
}
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.lookup.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.lookup.is_empty()
}
pub fn register(&mut self, path: &Path) -> Result<FileId, RegistryError> {
self.register_with_language(path, None)
}
pub fn register_with_language(
&mut self,
path: &Path,
language: Option<Language>,
) -> Result<FileId, RegistryError> {
let canonical = Self::normalize_path(path);
let arc_path: Arc<Path> = Arc::from(canonical.as_path());
if let Some(&index) = self.lookup.get(&arc_path) {
if let Some(lang) = language
&& let Some(Some(entry)) = self.entries.get_mut(index as usize)
{
entry.language = Some(lang);
}
return Ok(FileId::new(index));
}
let entry = FileEntry {
path: Arc::clone(&arc_path),
language,
};
let index = if let Some(free_idx) = self.free_list.pop() {
self.entries[free_idx as usize] = Some(entry);
free_idx
} else {
let idx = self.entries.len();
if idx > u32::MAX as usize - 1 {
return Err(RegistryError::CapacityExhausted);
}
self.entries.push(Some(entry));
u32::try_from(idx).map_err(|_| RegistryError::CapacityExhausted)?
};
self.lookup.insert(arc_path, index);
Ok(FileId::new(index))
}
pub fn try_register_strict(&mut self, path: &Path) -> Result<FileId, RegistryError> {
let canonical = path
.canonicalize()
.map_err(|e| RegistryError::CanonicalizationFailed {
path: path.to_path_buf(),
message: e.to_string(),
})?;
let arc_path: Arc<Path> = Arc::from(canonical.as_path());
if let Some(&index) = self.lookup.get(&arc_path) {
return Ok(FileId::new(index));
}
let entry = FileEntry {
path: Arc::clone(&arc_path),
language: None,
};
let index = if let Some(free_idx) = self.free_list.pop() {
self.entries[free_idx as usize] = Some(entry);
free_idx
} else {
let idx = self.entries.len();
if idx > u32::MAX as usize - 1 {
return Err(RegistryError::CapacityExhausted);
}
self.entries.push(Some(entry));
u32::try_from(idx).map_err(|_| RegistryError::CapacityExhausted)?
};
self.lookup.insert(arc_path, index);
Ok(FileId::new(index))
}
pub fn register_canonical(&mut self, path: &Path) -> Result<FileId, RegistryError> {
self.register_canonical_with_language(path, None)
}
pub fn register_canonical_with_language(
&mut self,
path: &Path,
language: Option<Language>,
) -> Result<FileId, RegistryError> {
let arc_path: Arc<Path> = Arc::from(path);
if let Some(&index) = self.lookup.get(&arc_path) {
if let Some(lang) = language
&& let Some(Some(entry)) = self.entries.get_mut(index as usize)
{
entry.language = Some(lang);
}
return Ok(FileId::new(index));
}
let entry = FileEntry {
path: Arc::clone(&arc_path),
language,
};
let index = if let Some(free_idx) = self.free_list.pop() {
self.entries[free_idx as usize] = Some(entry);
free_idx
} else {
let idx = self.entries.len();
if idx > u32::MAX as usize - 1 {
return Err(RegistryError::CapacityExhausted);
}
self.entries.push(Some(entry));
u32::try_from(idx).map_err(|_| RegistryError::CapacityExhausted)?
};
self.lookup.insert(arc_path, index);
Ok(FileId::new(index))
}
#[must_use]
pub fn resolve(&self, id: FileId) -> Option<Arc<Path>> {
if id.is_invalid() {
return None;
}
let index = id.index() as usize;
self.entries
.get(index)
.and_then(|opt| opt.as_ref().map(|entry| Arc::clone(&entry.path)))
}
#[must_use]
pub fn language_for_file(&self, file_id: FileId) -> Option<Language> {
if file_id.is_invalid() {
return None;
}
let index = file_id.index() as usize;
self.entries
.get(index)
.and_then(|opt| opt.as_ref())
.and_then(|entry| entry.language)
}
pub fn set_language(&mut self, file_id: FileId, language: Language) -> bool {
if file_id.is_invalid() {
return false;
}
let index = file_id.index() as usize;
if let Some(Some(entry)) = self.entries.get_mut(index) {
entry.language = Some(language);
true
} else {
false
}
}
#[must_use]
pub fn files_by_language(&self, language: Language) -> Vec<(FileId, Arc<Path>)> {
self.entries
.iter()
.enumerate()
.skip(1) .filter_map(|(idx, opt)| {
opt.as_ref().and_then(|entry| {
if entry.language == Some(language) {
let idx_u32 = u32::try_from(idx).ok()?;
Some((FileId::new(idx_u32), Arc::clone(&entry.path)))
} else {
None
}
})
})
.collect()
}
pub fn unregister(&mut self, id: FileId) -> Option<Arc<Path>> {
if id.is_invalid() {
return None;
}
let index = id.index() as usize;
if index >= self.entries.len() {
return None;
}
if let Some(entry) = self.entries[index].take() {
self.lookup.remove(&entry.path);
if let Ok(index_u32) = u32::try_from(index) {
self.free_list.push(index_u32);
} else {
log::warn!("File registry index overflow when recycling slot {index}");
}
Some(entry.path)
} else {
None
}
}
#[must_use]
pub fn contains(&self, path: &Path) -> bool {
let canonical = Self::normalize_path(path);
self.lookup.contains_key(canonical.as_path())
}
#[must_use]
pub fn contains_canonical(&self, path: &Path) -> bool {
self.lookup.contains_key(path)
}
#[must_use]
pub fn get(&self, path: &Path) -> Option<FileId> {
let canonical = Self::normalize_path(path);
self.lookup
.get(canonical.as_path())
.map(|&idx| FileId::new(idx))
}
#[must_use]
pub fn get_canonical(&self, path: &Path) -> Option<FileId> {
self.lookup.get(path).map(|&idx| FileId::new(idx))
}
pub fn iter(&self) -> impl Iterator<Item = (FileId, &Arc<Path>)> {
self.entries
.iter()
.enumerate()
.skip(1) .filter_map(|(idx, opt)| {
opt.as_ref().and_then(|entry| {
u32::try_from(idx)
.ok()
.map(|idx_u32| (FileId::new(idx_u32), &entry.path))
})
})
}
pub fn iter_with_language(
&self,
) -> impl Iterator<Item = (FileId, &Arc<Path>, Option<Language>)> {
self.entries
.iter()
.enumerate()
.skip(1) .filter_map(|(idx, opt)| {
opt.as_ref().and_then(|entry| {
u32::try_from(idx)
.ok()
.map(|idx_u32| (FileId::new(idx_u32), &entry.path, entry.language))
})
})
}
pub fn register_batch(
&mut self,
files: &[(PathBuf, Option<Language>)],
) -> Result<Vec<FileId>, RegistryError> {
let mut ids = Vec::with_capacity(files.len());
for (path, language) in files {
let id = self.register_with_language(path, *language)?;
ids.push(id);
}
Ok(ids)
}
pub fn clear(&mut self) {
self.entries.truncate(1); self.entries[0] = None;
self.lookup.clear();
self.free_list.clear();
}
pub fn reserve(&mut self, additional: usize) {
self.entries.reserve(additional);
self.lookup.reserve(additional);
}
#[must_use]
pub fn stats(&self) -> RegistryStats {
RegistryStats {
file_count: self.len(),
free_slots: self.free_list.len(),
capacity: self.entries.capacity(),
}
}
fn normalize_path(path: &Path) -> PathBuf {
if let Ok(canonical) = path.canonicalize() {
return canonical;
}
if path.is_relative()
&& let Ok(cwd) = std::env::current_dir()
{
return cwd.join(path);
}
path.to_path_buf()
}
}
impl Default for FileRegistry {
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for FileRegistry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"FileRegistry(files={}, free={})",
self.len(),
self.free_list.len()
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RegistryStats {
pub file_count: usize,
pub free_slots: usize,
pub capacity: usize,
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_new() {
let registry = FileRegistry::new();
assert_eq!(registry.len(), 0);
assert!(registry.is_empty());
}
#[test]
fn test_with_capacity() {
let registry = FileRegistry::with_capacity(100);
assert_eq!(registry.len(), 0);
assert!(registry.entries.capacity() >= 101); }
#[test]
fn test_register_and_resolve() {
let tmp = TempDir::new().unwrap();
let file_path = tmp.path().join("test.rs");
fs::write(&file_path, "fn main() {}").unwrap();
let mut registry = FileRegistry::new();
let id = registry.register(&file_path).unwrap();
assert!(!id.is_invalid());
assert_eq!(registry.len(), 1);
let resolved = registry.resolve(id).unwrap();
assert_eq!(resolved.canonicalize().ok(), file_path.canonicalize().ok());
}
#[test]
fn test_register_duplicate() {
let tmp = TempDir::new().unwrap();
let file_path = tmp.path().join("test.rs");
fs::write(&file_path, "").unwrap();
let mut registry = FileRegistry::new();
let id1 = registry.register(&file_path).unwrap();
let id2 = registry.register(&file_path).unwrap();
assert_eq!(id1, id2);
assert_eq!(registry.len(), 1);
}
#[test]
fn test_register_different() {
let tmp = TempDir::new().unwrap();
let file1 = tmp.path().join("a.rs");
let file2 = tmp.path().join("b.rs");
fs::write(&file1, "").unwrap();
fs::write(&file2, "").unwrap();
let mut registry = FileRegistry::new();
let id1 = registry.register(&file1).unwrap();
let id2 = registry.register(&file2).unwrap();
assert_ne!(id1, id2);
assert_eq!(registry.len(), 2);
}
#[test]
fn test_register_canonical() {
let mut registry = FileRegistry::new();
let path = Path::new("/canonical/path/file.rs");
let id = registry.register_canonical(path).unwrap();
assert!(!id.is_invalid());
assert_eq!(registry.resolve(id).unwrap().as_ref(), path);
}
#[test]
fn test_resolve_invalid() {
let registry = FileRegistry::new();
assert!(registry.resolve(FileId::INVALID).is_none());
}
#[test]
fn test_resolve_out_of_bounds() {
let registry = FileRegistry::new();
assert!(registry.resolve(FileId::new(999)).is_none());
}
#[test]
fn test_unregister() {
let mut registry = FileRegistry::new();
let path = Path::new("/test/file.rs");
let id = registry.register_canonical(path).unwrap();
assert_eq!(registry.len(), 1);
let removed = registry.unregister(id);
assert!(removed.is_some());
assert_eq!(registry.len(), 0);
assert!(registry.resolve(id).is_none());
}
#[test]
fn test_unregister_invalid() {
let mut registry = FileRegistry::new();
assert!(registry.unregister(FileId::INVALID).is_none());
}
#[test]
fn test_free_list_reuse() {
let mut registry = FileRegistry::new();
let path1 = Path::new("/test/a.rs");
let path2 = Path::new("/test/b.rs");
let id1 = registry.register_canonical(path1).unwrap();
registry.unregister(id1);
let id2 = registry.register_canonical(path2).unwrap();
assert_eq!(id1.index(), id2.index()); }
#[test]
fn test_contains() {
let tmp = TempDir::new().unwrap();
let file_path = tmp.path().join("test.rs");
fs::write(&file_path, "").unwrap();
let mut registry = FileRegistry::new();
registry.register(&file_path).unwrap();
assert!(registry.contains(&file_path));
assert!(!registry.contains(Path::new("/nonexistent/path.rs")));
}
#[test]
fn test_contains_canonical() {
let mut registry = FileRegistry::new();
let path = Path::new("/canonical/test.rs");
registry.register_canonical(path).unwrap();
assert!(registry.contains_canonical(path));
assert!(!registry.contains_canonical(Path::new("/other/path.rs")));
}
#[test]
fn test_get() {
let tmp = TempDir::new().unwrap();
let file_path = tmp.path().join("test.rs");
fs::write(&file_path, "").unwrap();
let mut registry = FileRegistry::new();
let id = registry.register(&file_path).unwrap();
assert_eq!(registry.get(&file_path), Some(id));
assert_eq!(registry.get(Path::new("/nonexistent.rs")), None);
}
#[test]
fn test_get_canonical() {
let mut registry = FileRegistry::new();
let path = Path::new("/canonical/test.rs");
let id = registry.register_canonical(path).unwrap();
assert_eq!(registry.get_canonical(path), Some(id));
assert_eq!(registry.get_canonical(Path::new("/other.rs")), None);
}
#[test]
fn test_iter() {
let mut registry = FileRegistry::new();
registry.register_canonical(Path::new("/a.rs")).unwrap();
registry.register_canonical(Path::new("/b.rs")).unwrap();
registry.register_canonical(Path::new("/c.rs")).unwrap();
let paths: Vec<_> = registry.iter().map(|(_, p)| p.to_path_buf()).collect();
assert_eq!(paths.len(), 3);
assert!(paths.contains(&PathBuf::from("/a.rs")));
assert!(paths.contains(&PathBuf::from("/b.rs")));
assert!(paths.contains(&PathBuf::from("/c.rs")));
}
#[test]
fn test_clear() {
let mut registry = FileRegistry::new();
registry.register_canonical(Path::new("/a.rs")).unwrap();
registry.register_canonical(Path::new("/b.rs")).unwrap();
assert_eq!(registry.len(), 2);
registry.clear();
assert_eq!(registry.len(), 0);
assert!(registry.is_empty());
}
#[test]
fn test_reserve() {
let mut registry = FileRegistry::new();
registry.reserve(1000);
assert!(registry.entries.capacity() >= 1001);
}
#[test]
fn test_display() {
let mut registry = FileRegistry::new();
registry.register_canonical(Path::new("/test.rs")).unwrap();
let display = format!("{registry}");
assert!(display.contains("FileRegistry"));
assert!(display.contains("files=1"));
}
#[test]
fn test_stats() {
let mut registry = FileRegistry::new();
registry.register_canonical(Path::new("/a.rs")).unwrap();
registry.register_canonical(Path::new("/b.rs")).unwrap();
let stats = registry.stats();
assert_eq!(stats.file_count, 2);
assert_eq!(stats.free_slots, 0);
}
#[test]
fn test_default() {
let registry: FileRegistry = FileRegistry::default();
assert_eq!(registry.len(), 0);
}
#[test]
fn test_registry_error_display() {
let err = RegistryError::CanonicalizationFailed {
path: PathBuf::from("/test/path"),
message: "not found".to_string(),
};
let display = format!("{err}");
assert!(display.contains("/test/path"));
assert!(display.contains("not found"));
let err2 = RegistryError::CapacityExhausted;
let display2 = format!("{err2}");
assert!(display2.contains("capacity exhausted"));
}
#[test]
fn test_unicode_path() {
let mut registry = FileRegistry::new();
let path = Path::new("/日本語/ファイル.rs");
let id = registry.register_canonical(path).unwrap();
let resolved = registry.resolve(id).unwrap();
assert_eq!(resolved.as_ref(), path);
}
#[test]
fn test_try_register_strict_success() {
let tmp = TempDir::new().unwrap();
let file_path = tmp.path().join("test.rs");
fs::write(&file_path, "fn main() {}").unwrap();
let mut registry = FileRegistry::new();
let id = registry.try_register_strict(&file_path).unwrap();
assert!(!id.is_invalid());
assert_eq!(registry.len(), 1);
let resolved = registry.resolve(id).unwrap();
assert_eq!(resolved.as_ref(), file_path.canonicalize().unwrap());
}
#[test]
fn test_try_register_strict_nonexistent() {
let mut registry = FileRegistry::new();
let path = Path::new("/nonexistent/path/that/does/not/exist.rs");
let result = registry.try_register_strict(path);
assert!(result.is_err());
match result.unwrap_err() {
RegistryError::CanonicalizationFailed {
path: err_path,
message,
} => {
assert_eq!(err_path, path);
assert!(!message.is_empty());
}
RegistryError::CapacityExhausted => {
panic!("Expected CanonicalizationFailed, got CapacityExhausted")
}
}
}
#[test]
fn test_register_fallback_nonexistent() {
let mut registry = FileRegistry::new();
let path = Path::new("/nonexistent/path/file.rs");
let result = registry.register(path);
assert!(result.is_ok());
let id = result.unwrap();
let resolved = registry.resolve(id).unwrap();
assert!(resolved.to_string_lossy().contains("file.rs"));
}
#[test]
fn test_try_register_strict_duplicate() {
let tmp = TempDir::new().unwrap();
let file_path = tmp.path().join("test.rs");
fs::write(&file_path, "").unwrap();
let mut registry = FileRegistry::new();
let id1 = registry.try_register_strict(&file_path).unwrap();
let id2 = registry.try_register_strict(&file_path).unwrap();
assert_eq!(id1, id2);
assert_eq!(registry.len(), 1);
}
#[test]
fn test_language_tracking_basic() {
let mut registry = FileRegistry::new();
let path = Path::new("/test/file.rs");
let id = registry.register_canonical(path).unwrap();
assert_eq!(registry.language_for_file(id), None);
assert!(registry.set_language(id, Language::Rust));
assert_eq!(registry.language_for_file(id), Some(Language::Rust));
assert!(registry.set_language(id, Language::JavaScript));
assert_eq!(registry.language_for_file(id), Some(Language::JavaScript));
}
#[test]
fn test_language_tracking_invalid_id() {
let mut registry = FileRegistry::new();
assert_eq!(registry.language_for_file(FileId::INVALID), None);
assert!(!registry.set_language(FileId::INVALID, Language::Rust));
}
#[test]
fn test_register_with_language() {
let mut registry = FileRegistry::new();
let path = Path::new("/test/main.py");
let id = registry
.register_canonical_with_language(path, Some(Language::Python))
.unwrap();
assert_eq!(registry.language_for_file(id), Some(Language::Python));
assert_eq!(registry.resolve(id).unwrap().as_ref(), path);
}
#[test]
fn test_register_with_language_duplicate_updates() {
let mut registry = FileRegistry::new();
let path = Path::new("/test/script.js");
let id1 = registry
.register_canonical_with_language(path, Some(Language::JavaScript))
.unwrap();
assert_eq!(registry.language_for_file(id1), Some(Language::JavaScript));
let id2 = registry
.register_canonical_with_language(path, Some(Language::TypeScript))
.unwrap();
assert_eq!(id1, id2, "Should return same ID for duplicate path");
assert_eq!(registry.language_for_file(id2), Some(Language::TypeScript));
}
#[test]
fn test_files_by_language_empty() {
let registry = FileRegistry::new();
let files = registry.files_by_language(Language::Rust);
assert!(files.is_empty());
}
#[test]
fn test_files_by_language_single() {
let mut registry = FileRegistry::new();
let path = Path::new("/src/main.rs");
let id = registry
.register_canonical_with_language(path, Some(Language::Rust))
.unwrap();
let files = registry.files_by_language(Language::Rust);
assert_eq!(files.len(), 1);
assert_eq!(files[0].0, id);
assert_eq!(files[0].1.as_ref(), path);
let js_files = registry.files_by_language(Language::JavaScript);
assert!(js_files.is_empty());
}
#[test]
fn test_files_by_language_multiple() {
let mut registry = FileRegistry::new();
let rs1 = Path::new("/src/main.rs");
let rs2 = Path::new("/src/lib.rs");
let id1 = registry
.register_canonical_with_language(rs1, Some(Language::Rust))
.unwrap();
let id2 = registry
.register_canonical_with_language(rs2, Some(Language::Rust))
.unwrap();
let py1 = Path::new("/scripts/test.py");
let id3 = registry
.register_canonical_with_language(py1, Some(Language::Python))
.unwrap();
let _ = registry
.register_canonical(Path::new("/data/config.json"))
.unwrap();
let rust_files = registry.files_by_language(Language::Rust);
assert_eq!(rust_files.len(), 2);
let rust_ids: Vec<_> = rust_files.iter().map(|(id, _)| *id).collect();
assert!(rust_ids.contains(&id1));
assert!(rust_ids.contains(&id2));
let python_files = registry.files_by_language(Language::Python);
assert_eq!(python_files.len(), 1);
assert_eq!(python_files[0].0, id3);
let js_files = registry.files_by_language(Language::JavaScript);
assert!(js_files.is_empty());
}
#[test]
fn test_iter_with_language() {
let mut registry = FileRegistry::new();
let rs_path = Path::new("/src/main.rs");
let py_path = Path::new("/scripts/test.py");
let no_lang_path = Path::new("/config.json");
let id1 = registry
.register_canonical_with_language(rs_path, Some(Language::Rust))
.unwrap();
let id2 = registry
.register_canonical_with_language(py_path, Some(Language::Python))
.unwrap();
let id3 = registry.register_canonical(no_lang_path).unwrap();
let entries: Vec<_> = registry.iter_with_language().collect();
assert_eq!(entries.len(), 3);
let rs_entry = entries.iter().find(|(id, _, _)| *id == id1).unwrap();
assert_eq!(rs_entry.1.as_ref(), rs_path);
assert_eq!(rs_entry.2, Some(Language::Rust));
let py_entry = entries.iter().find(|(id, _, _)| *id == id2).unwrap();
assert_eq!(py_entry.1.as_ref(), py_path);
assert_eq!(py_entry.2, Some(Language::Python));
let no_lang_entry = entries.iter().find(|(id, _, _)| *id == id3).unwrap();
assert_eq!(no_lang_entry.1.as_ref(), no_lang_path);
assert_eq!(no_lang_entry.2, None);
}
#[test]
fn test_unregister_with_language() {
let mut registry = FileRegistry::new();
let path = Path::new("/test/file.rs");
let id = registry
.register_canonical_with_language(path, Some(Language::Rust))
.unwrap();
assert_eq!(registry.language_for_file(id), Some(Language::Rust));
assert_eq!(registry.files_by_language(Language::Rust).len(), 1);
let removed = registry.unregister(id);
assert!(removed.is_some());
assert_eq!(registry.language_for_file(id), None);
assert_eq!(registry.files_by_language(Language::Rust).len(), 0);
}
#[test]
fn test_language_serialization() {
let mut registry = FileRegistry::new();
let path1 = Path::new("/src/main.rs");
let path2 = Path::new("/src/lib.py");
registry
.register_canonical_with_language(path1, Some(Language::Rust))
.unwrap();
registry
.register_canonical_with_language(path2, Some(Language::Python))
.unwrap();
let json = serde_json::to_string(®istry).unwrap();
let deserialized: FileRegistry = serde_json::from_str(&json).unwrap();
let rust_files = deserialized.files_by_language(Language::Rust);
assert_eq!(rust_files.len(), 1);
let python_files = deserialized.files_by_language(Language::Python);
assert_eq!(python_files.len(), 1);
}
#[test]
fn test_language_with_best_effort_register() {
let mut registry = FileRegistry::new();
let path = Path::new("/nonexistent/file.rs");
let id = registry
.register_with_language(path, Some(Language::Rust))
.unwrap();
assert_eq!(registry.language_for_file(id), Some(Language::Rust));
}
#[test]
fn test_register_batch() {
let tmp = TempDir::new().unwrap();
let file1 = tmp.path().join("alpha.rs");
let file2 = tmp.path().join("beta.py");
let file3 = tmp.path().join("gamma.js");
fs::write(&file1, "fn main() {}").unwrap();
fs::write(&file2, "print('hello')").unwrap();
fs::write(&file3, "console.log('hi')").unwrap();
let mut registry = FileRegistry::new();
let files: Vec<(PathBuf, Option<Language>)> = vec![
(file1.clone(), Some(Language::Rust)),
(file2.clone(), Some(Language::Python)),
(file3.clone(), Some(Language::JavaScript)),
];
let ids = registry.register_batch(&files).unwrap();
assert_eq!(ids.len(), 3);
assert_ne!(ids[0], ids[1]);
assert_ne!(ids[1], ids[2]);
assert_ne!(ids[0], ids[2]);
for (i, id) in ids.iter().enumerate() {
let resolved = registry.resolve(*id).unwrap();
let expected_canonical = files[i].0.canonicalize().unwrap();
assert_eq!(resolved.as_ref(), expected_canonical.as_path());
}
assert_eq!(registry.language_for_file(ids[0]), Some(Language::Rust));
assert_eq!(registry.language_for_file(ids[1]), Some(Language::Python));
assert_eq!(
registry.language_for_file(ids[2]),
Some(Language::JavaScript)
);
assert_eq!(registry.len(), 3);
}
#[test]
fn test_register_batch_empty() {
let mut registry = FileRegistry::new();
let files: Vec<(PathBuf, Option<Language>)> = vec![];
let ids = registry.register_batch(&files).unwrap();
assert!(ids.is_empty());
assert_eq!(registry.len(), 0);
}
#[test]
fn test_register_batch_duplicate_paths() {
let tmp = TempDir::new().unwrap();
let file = tmp.path().join("dup.rs");
fs::write(&file, "fn main() {}").unwrap();
let mut registry = FileRegistry::new();
let files: Vec<(PathBuf, Option<Language>)> = vec![
(file.clone(), Some(Language::Rust)),
(file.clone(), Some(Language::Rust)),
];
let ids = registry.register_batch(&files).unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(ids[0], ids[1]);
assert_eq!(registry.len(), 1);
}
#[test]
fn test_register_batch_duplicate_paths_different_languages() {
let tmp = TempDir::new().unwrap();
let file = tmp.path().join("polyglot.rs");
fs::write(&file, "fn main() {}").unwrap();
let mut registry = FileRegistry::new();
let files: Vec<(PathBuf, Option<Language>)> = vec![
(file.clone(), Some(Language::Rust)),
(file.clone(), Some(Language::Python)),
];
let ids = registry.register_batch(&files).unwrap();
assert_eq!(ids[0], ids[1]);
assert_eq!(registry.len(), 1);
assert_eq!(registry.language_for_file(ids[0]), Some(Language::Python));
}
}