use std::{
collections::HashMap,
fmt, fs, io,
path::{Path, PathBuf},
sync::{
Mutex,
atomic::{AtomicBool, Ordering},
},
};
use serde::Deserialize;
use crate::KnownValue;
#[derive(Debug, Deserialize)]
pub struct RegistryEntry {
pub codepoint: u64,
pub name: String,
#[serde(rename = "type")]
pub entry_type: Option<String>,
pub uri: Option<String>,
pub description: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct OntologyInfo {
pub name: Option<String>,
pub source_url: Option<String>,
pub start_code_point: Option<u64>,
pub processing_strategy: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct RegistryFile {
pub ontology: Option<OntologyInfo>,
pub generated: Option<GeneratedInfo>,
pub entries: Vec<RegistryEntry>,
#[serde(default)]
pub statistics: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
pub struct GeneratedInfo {
pub tool: Option<String>,
}
#[derive(Debug)]
pub enum LoadError {
Io(io::Error),
Json {
file: PathBuf,
error: serde_json::Error,
},
}
impl fmt::Display for LoadError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LoadError::Io(e) => write!(f, "IO error: {}", e),
LoadError::Json { file, error } => {
write!(f, "JSON parse error in {}: {}", file.display(), error)
}
}
}
}
impl std::error::Error for LoadError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
LoadError::Io(e) => Some(e),
LoadError::Json { error, .. } => Some(error),
}
}
}
impl From<io::Error> for LoadError {
fn from(error: io::Error) -> Self { LoadError::Io(error) }
}
#[derive(Debug, Default)]
pub struct LoadResult {
pub values: HashMap<u64, KnownValue>,
pub files_processed: Vec<PathBuf>,
pub errors: Vec<(PathBuf, LoadError)>,
}
impl LoadResult {
pub fn values_count(&self) -> usize { self.values.len() }
pub fn values_iter(&self) -> impl Iterator<Item = &KnownValue> {
self.values.values()
}
pub fn into_values(self) -> impl Iterator<Item = KnownValue> {
self.values.into_values()
}
pub fn has_errors(&self) -> bool { !self.errors.is_empty() }
}
type TolerantLoadResult = (Vec<KnownValue>, Vec<(PathBuf, LoadError)>);
#[derive(Debug, Clone, Default)]
pub struct DirectoryConfig {
paths: Vec<PathBuf>,
}
impl DirectoryConfig {
pub fn new() -> Self { Self { paths: Vec::new() } }
pub fn default_only() -> Self {
Self { paths: vec![Self::default_directory()] }
}
pub fn with_paths(paths: Vec<PathBuf>) -> Self { Self { paths } }
pub fn with_paths_and_default(mut paths: Vec<PathBuf>) -> Self {
paths.push(Self::default_directory());
Self { paths }
}
pub fn default_directory() -> PathBuf {
dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(".known-values")
}
pub fn paths(&self) -> &[PathBuf] { &self.paths }
pub fn add_path(&mut self, path: PathBuf) { self.paths.push(path); }
}
pub fn load_from_directory(path: &Path) -> Result<Vec<KnownValue>, LoadError> {
let mut values = Vec::new();
if !path.exists() || !path.is_dir() {
return Ok(values);
}
for entry in fs::read_dir(path)? {
let entry = entry?;
let file_path = entry.path();
if file_path.extension().is_some_and(|ext| ext == "json") {
let content = fs::read_to_string(&file_path)?;
let registry: RegistryFile =
serde_json::from_str(&content).map_err(|e| {
LoadError::Json { file: file_path.clone(), error: e }
})?;
for entry in registry.entries {
values.push(KnownValue::new_with_name(
entry.codepoint,
entry.name,
));
}
}
}
Ok(values)
}
pub fn load_from_config(config: &DirectoryConfig) -> LoadResult {
let mut result = LoadResult::default();
for dir_path in config.paths() {
match load_from_directory_tolerant(dir_path) {
Ok((values, errors)) => {
for value in values {
result.values.insert(value.value(), value);
}
if !errors.is_empty() {
result.errors.extend(errors);
}
result.files_processed.push(dir_path.clone());
}
Err(e) => {
result.errors.push((dir_path.clone(), e));
}
}
}
result
}
fn load_from_directory_tolerant(
path: &Path,
) -> Result<TolerantLoadResult, LoadError> {
let mut values = Vec::new();
let mut errors = Vec::new();
if !path.exists() || !path.is_dir() {
return Ok((values, errors));
}
for entry in fs::read_dir(path)? {
let entry = entry?;
let file_path = entry.path();
if file_path.extension().is_some_and(|ext| ext == "json") {
match load_single_file(&file_path) {
Ok(file_values) => values.extend(file_values),
Err(e) => errors.push((file_path, e)),
}
}
}
Ok((values, errors))
}
fn load_single_file(path: &Path) -> Result<Vec<KnownValue>, LoadError> {
let content = fs::read_to_string(path)?;
let registry: RegistryFile = serde_json::from_str(&content)
.map_err(|e| LoadError::Json { file: path.to_path_buf(), error: e })?;
Ok(registry
.entries
.into_iter()
.map(|entry| KnownValue::new_with_name(entry.codepoint, entry.name))
.collect())
}
static CUSTOM_CONFIG: Mutex<Option<DirectoryConfig>> = Mutex::new(None);
static CONFIG_LOCKED: AtomicBool = AtomicBool::new(false);
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ConfigError {
AlreadyInitialized,
}
impl fmt::Display for ConfigError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ConfigError::AlreadyInitialized => {
write!(
f,
"Cannot modify directory configuration after KNOWN_VALUES has been accessed"
)
}
}
}
}
impl std::error::Error for ConfigError {}
pub fn set_directory_config(
config: DirectoryConfig,
) -> Result<(), ConfigError> {
if CONFIG_LOCKED.load(Ordering::SeqCst) {
return Err(ConfigError::AlreadyInitialized);
}
*CUSTOM_CONFIG.lock().unwrap() = Some(config);
Ok(())
}
pub fn add_search_paths(paths: Vec<PathBuf>) -> Result<(), ConfigError> {
if CONFIG_LOCKED.load(Ordering::SeqCst) {
return Err(ConfigError::AlreadyInitialized);
}
let mut guard = CUSTOM_CONFIG.lock().unwrap();
let config = guard.get_or_insert_with(DirectoryConfig::default_only);
for path in paths {
config.add_path(path);
}
Ok(())
}
pub(crate) fn get_and_lock_config() -> DirectoryConfig {
CONFIG_LOCKED.store(true, Ordering::SeqCst);
CUSTOM_CONFIG
.lock()
.unwrap()
.take()
.unwrap_or_else(DirectoryConfig::default_only)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_registry_json() {
let json = r#"{
"ontology": {"name": "test"},
"entries": [
{"codepoint": 9999, "name": "testValue", "type": "property"}
],
"statistics": {}
}"#;
let registry: RegistryFile = serde_json::from_str(json).unwrap();
assert_eq!(registry.entries.len(), 1);
assert_eq!(registry.entries[0].codepoint, 9999);
assert_eq!(registry.entries[0].name, "testValue");
}
#[test]
fn test_parse_minimal_registry() {
let json = r#"{"entries": [{"codepoint": 1, "name": "minimal"}]}"#;
let registry: RegistryFile = serde_json::from_str(json).unwrap();
assert_eq!(registry.entries.len(), 1);
assert_eq!(registry.entries[0].codepoint, 1);
}
#[test]
fn test_parse_full_entry() {
let json = r#"{
"entries": [{
"codepoint": 100,
"name": "fullEntry",
"type": "class",
"uri": "https://example.com/vocab#fullEntry",
"description": "A complete entry with all fields"
}]
}"#;
let registry: RegistryFile = serde_json::from_str(json).unwrap();
let entry = ®istry.entries[0];
assert_eq!(entry.codepoint, 100);
assert_eq!(entry.name, "fullEntry");
assert_eq!(entry.entry_type.as_deref(), Some("class"));
assert_eq!(
entry.uri.as_deref(),
Some("https://example.com/vocab#fullEntry")
);
assert!(entry.description.is_some());
}
#[test]
fn test_directory_config_default() {
let config = DirectoryConfig::default_only();
assert_eq!(config.paths().len(), 1);
assert!(config.paths()[0].ends_with(".known-values"));
}
#[test]
fn test_directory_config_custom_paths() {
let config = DirectoryConfig::with_paths(vec![
PathBuf::from("/a"),
PathBuf::from("/b"),
]);
assert_eq!(config.paths().len(), 2);
assert_eq!(config.paths()[0], PathBuf::from("/a"));
assert_eq!(config.paths()[1], PathBuf::from("/b"));
}
#[test]
fn test_directory_config_with_default() {
let config =
DirectoryConfig::with_paths_and_default(vec![PathBuf::from(
"/custom",
)]);
assert_eq!(config.paths().len(), 2);
assert_eq!(config.paths()[0], PathBuf::from("/custom"));
assert!(config.paths()[1].ends_with(".known-values"));
}
#[test]
fn test_load_from_nonexistent_directory() {
let result = load_from_directory(Path::new("/nonexistent/path/12345"));
assert!(result.is_ok());
assert!(result.unwrap().is_empty());
}
#[test]
fn test_load_result_methods() {
let mut result = LoadResult::default();
assert_eq!(result.values_count(), 0);
assert!(!result.has_errors());
result
.values
.insert(1, KnownValue::new_with_name(1u64, "test".to_string()));
assert_eq!(result.values_count(), 1);
}
}