mod error;
#[cfg(feature = "python")]
mod python;
pub use error::{ConfigurationError, Error, Result};
use deadpool_postgres::{Config as PoolConfig, Pool, Runtime};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::RwLock;
use tokio_postgres::NoTls;
use tracing::{debug, info, instrument, warn};
pub const MAX_PREFIX_LENGTH: usize = 64;
pub const MAX_URI_LENGTH: usize = 2048;
#[derive(Debug, Clone)]
pub struct RetryConfig {
pub max_retries: u32,
pub initial_delay: Duration,
pub max_delay: Duration,
}
impl Default for RetryConfig {
fn default() -> Self {
Self {
max_retries: 5,
initial_delay: Duration::from_secs(1),
max_delay: Duration::from_secs(30),
}
}
}
impl RetryConfig {
pub fn new(max_retries: u32, initial_delay: Duration, max_delay: Duration) -> Self {
Self {
max_retries,
initial_delay,
max_delay,
}
}
pub fn none() -> Self {
Self {
max_retries: 0,
initial_delay: Duration::ZERO,
max_delay: Duration::ZERO,
}
}
}
fn validate_prefix(prefix: &str) -> Result<()> {
if prefix.is_empty() {
return Err(Error::invalid_prefix("prefix cannot be empty"));
}
if prefix.len() > MAX_PREFIX_LENGTH {
return Err(Error::invalid_prefix(format!(
"prefix exceeds maximum length of {} characters",
MAX_PREFIX_LENGTH
)));
}
if !prefix
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
{
return Err(Error::invalid_prefix(
"prefix must contain only alphanumeric characters, underscores, and hyphens",
));
}
Ok(())
}
fn validate_uri(uri: &str) -> Result<()> {
if uri.is_empty() {
return Err(Error::invalid_uri("URI cannot be empty"));
}
if uri.len() > MAX_URI_LENGTH {
return Err(Error::invalid_uri(format!(
"URI exceeds maximum length of {} characters",
MAX_URI_LENGTH
)));
}
Ok(())
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct BatchStoreResult {
pub stored: usize,
pub skipped: usize,
}
impl BatchStoreResult {
pub fn total(&self) -> usize {
self.stored + self.skipped
}
pub fn all_stored(&self) -> bool {
self.skipped == 0
}
pub fn none_stored(&self) -> bool {
self.stored == 0
}
}
#[derive(Clone)]
pub struct PrefixRegistry {
pool: Pool,
prefix_cache: Arc<RwLock<HashMap<String, String>>>,
uri_to_prefix: Arc<RwLock<HashMap<String, String>>>,
}
impl PrefixRegistry {
#[instrument(skip(database_url), fields(max_connections))]
pub async fn new(database_url: &str, max_connections: usize) -> Result<Self> {
if max_connections == 0 {
return Err(ConfigurationError::InvalidMaxConnections(max_connections).into());
}
if database_url.is_empty() {
return Err(ConfigurationError::InvalidDatabaseUrl("empty URL".to_string()).into());
}
let mut cfg = PoolConfig::new();
cfg.url = Some(database_url.to_string());
cfg.pool = Some(deadpool_postgres::PoolConfig::new(max_connections));
let pool = cfg.create_pool(Some(Runtime::Tokio1), NoTls)?;
debug!("created connection pool");
let client = pool.get().await?;
let rows = client
.query("SELECT prefix, uri FROM namespaces", &[])
.await?;
let prefix_count = rows.len();
let mut cache = HashMap::new();
let mut reverse_cache = HashMap::new();
for row in rows {
let prefix: String = row.get(0);
let uri: String = row.get(1);
reverse_cache.insert(uri.clone(), prefix.clone());
cache.insert(prefix, uri);
}
info!(prefix_count, "connected and loaded prefix cache");
Ok(Self {
pool,
prefix_cache: Arc::new(RwLock::new(cache)),
uri_to_prefix: Arc::new(RwLock::new(reverse_cache)),
})
}
#[instrument(skip(database_url, retry_config), fields(max_connections, max_retries = retry_config.max_retries))]
pub async fn new_with_retry(
database_url: &str,
max_connections: usize,
retry_config: RetryConfig,
) -> Result<Self> {
let mut last_error: Option<Error> = None;
let mut delay = retry_config.initial_delay;
for attempt in 0..=retry_config.max_retries {
match Self::new(database_url, max_connections).await {
Ok(registry) => return Ok(registry),
Err(e) => {
if matches!(e, Error::Configuration(_)) {
return Err(e);
}
last_error = Some(e);
if attempt < retry_config.max_retries {
warn!(
attempt = attempt + 1,
max_retries = retry_config.max_retries,
delay_ms = delay.as_millis() as u64,
"connection failed, retrying"
);
tokio::time::sleep(delay).await;
delay = std::cmp::min(delay * 2, retry_config.max_delay);
}
}
}
}
Err(last_error.expect("should have at least one error after retries"))
}
#[instrument(skip(self), level = "debug")]
pub async fn get_uri_for_prefix(&self, prefix: &str) -> Result<Option<String>> {
{
let cache = self.prefix_cache.read().await;
if let Some(uri) = cache.get(prefix) {
debug!("cache hit");
return Ok(Some(uri.clone()));
}
}
debug!("cache miss, querying database");
let client = self.pool.get().await?;
let row = client
.query_opt("SELECT uri FROM namespaces WHERE prefix = $1", &[&prefix])
.await?;
if let Some(row) = row {
let uri: String = row.get(0);
{
let mut cache = self.prefix_cache.write().await;
cache.insert(prefix.to_string(), uri.clone());
}
debug!("found in database, cached");
Ok(Some(uri))
} else {
debug!("not found");
Ok(None)
}
}
#[instrument(skip(self), level = "debug")]
pub async fn get_prefix_for_uri(&self, uri: &str) -> Result<Option<String>> {
let client = self.pool.get().await?;
let row = client
.query_opt("SELECT prefix FROM namespaces WHERE uri = $1", &[&uri])
.await?;
Ok(row.map(|r| r.get(0)))
}
#[instrument(skip(self))]
pub async fn store_prefix_if_new(&self, prefix: &str, uri: &str) -> Result<bool> {
validate_prefix(prefix)?;
validate_uri(uri)?;
let client = self.pool.get().await?;
let result = client
.execute(
"INSERT INTO namespaces (uri, prefix) VALUES ($1, $2)
ON CONFLICT (uri) DO NOTHING",
&[&uri, &prefix],
)
.await?;
if result > 0 {
{
let mut cache = self.prefix_cache.write().await;
cache.insert(prefix.to_string(), uri.to_string());
}
{
let mut reverse = self.uri_to_prefix.write().await;
reverse.insert(uri.to_string(), prefix.to_string());
}
debug!("stored new prefix");
Ok(true)
} else {
debug!("skipped, URI already has prefix");
Ok(false)
}
}
#[instrument(skip(self, prefixes))]
pub async fn store_prefixes_if_new<'a, I>(&self, prefixes: I) -> Result<BatchStoreResult>
where
I: IntoIterator<Item = (&'a str, &'a str)>,
{
let prefixes: Vec<_> = prefixes.into_iter().collect();
if prefixes.is_empty() {
return Ok(BatchStoreResult::default());
}
for (prefix, uri) in &prefixes {
validate_prefix(prefix)?;
validate_uri(uri)?;
}
let total = prefixes.len();
let uris: Vec<&str> = prefixes.iter().map(|(_, uri)| *uri).collect();
let prefix_names: Vec<&str> = prefixes.iter().map(|(prefix, _)| *prefix).collect();
let client = self.pool.get().await?;
let rows = client
.query(
"INSERT INTO namespaces (uri, prefix)
SELECT * FROM UNNEST($1::text[], $2::text[])
ON CONFLICT (uri) DO NOTHING
RETURNING prefix, uri",
&[&uris, &prefix_names],
)
.await?;
let stored = rows.len();
let skipped = total - stored;
if !rows.is_empty() {
let mut cache = self.prefix_cache.write().await;
let mut reverse = self.uri_to_prefix.write().await;
for row in &rows {
let prefix: String = row.get(0);
let uri: String = row.get(1);
reverse.insert(uri.clone(), prefix.clone());
cache.insert(prefix, uri);
}
}
info!(total, stored, skipped, "batch store complete");
Ok(BatchStoreResult { stored, skipped })
}
#[instrument(skip(self), level = "debug")]
pub async fn expand_curie(&self, prefix: &str, local_name: &str) -> Result<Option<String>> {
if let Some(base_uri) = self.get_uri_for_prefix(prefix).await? {
Ok(Some(format!("{}{}", base_uri, local_name)))
} else {
Ok(None)
}
}
pub async fn get_all_prefixes(&self) -> HashMap<String, String> {
self.prefix_cache.read().await.clone()
}
pub async fn prefix_count(&self) -> usize {
self.prefix_cache.read().await.len()
}
#[instrument(skip(self), level = "debug")]
pub async fn shorten_uri(&self, uri: &str) -> Result<Option<(String, String)>> {
let reverse = self.uri_to_prefix.read().await;
let mut best_match: Option<(&str, &str)> = None;
let mut best_len = 0;
for (namespace_uri, prefix) in reverse.iter() {
if uri.starts_with(namespace_uri) && namespace_uri.len() > best_len {
best_match = Some((prefix.as_str(), namespace_uri.as_str()));
best_len = namespace_uri.len();
}
}
if let Some((prefix, namespace_uri)) = best_match {
let local_name = &uri[namespace_uri.len()..];
debug!(prefix, local_name, "shortened URI");
Ok(Some((prefix.to_string(), local_name.to_string())))
} else {
debug!("no matching namespace found");
Ok(None)
}
}
#[instrument(skip(self), level = "debug")]
pub async fn shorten_uri_or_full(&self, uri: &str) -> Result<String> {
if let Some((prefix, local_name)) = self.shorten_uri(uri).await? {
Ok(format!("{}:{}", prefix, local_name))
} else {
Ok(uri.to_string())
}
}
#[instrument(skip(self, uris), level = "debug")]
pub async fn shorten_uri_batch<'a, I>(&self, uris: I) -> Result<Vec<Option<(String, String)>>>
where
I: IntoIterator<Item = &'a str>,
{
let reverse = self.uri_to_prefix.read().await;
let uris: Vec<_> = uris.into_iter().collect();
let mut results = Vec::with_capacity(uris.len());
for uri in uris {
let mut best_match: Option<(&str, &str)> = None;
let mut best_len = 0;
for (namespace_uri, prefix) in reverse.iter() {
if uri.starts_with(namespace_uri) && namespace_uri.len() > best_len {
best_match = Some((prefix.as_str(), namespace_uri.as_str()));
best_len = namespace_uri.len();
}
}
if let Some((prefix, namespace_uri)) = best_match {
let local_name = &uri[namespace_uri.len()..];
results.push(Some((prefix.to_string(), local_name.to_string())));
} else {
results.push(None);
}
}
debug!(count = results.len(), "batch shorten complete");
Ok(results)
}
#[instrument(skip(self, curies), level = "debug")]
pub async fn expand_curie_batch<'a, I>(&self, curies: I) -> Result<Vec<Option<String>>>
where
I: IntoIterator<Item = (&'a str, &'a str)>,
{
let cache = self.prefix_cache.read().await;
let curies: Vec<_> = curies.into_iter().collect();
let mut results = Vec::with_capacity(curies.len());
for (prefix, local_name) in curies {
if let Some(base_uri) = cache.get(prefix) {
results.push(Some(format!("{}{}", base_uri, local_name)));
} else {
results.push(None);
}
}
debug!(count = results.len(), "batch expand complete");
Ok(results)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_configuration_error_max_connections() {
let err = ConfigurationError::InvalidMaxConnections(0);
assert!(err.to_string().contains("max_connections"));
}
#[test]
fn test_configuration_error_database_url() {
let err = ConfigurationError::InvalidDatabaseUrl("empty".to_string());
assert!(err.to_string().contains("database_url"));
}
#[test]
fn test_batch_store_result_default() {
let result = BatchStoreResult::default();
assert_eq!(result.stored, 0);
assert_eq!(result.skipped, 0);
assert_eq!(result.total(), 0);
assert!(result.all_stored());
assert!(result.none_stored());
}
#[test]
fn test_batch_store_result_all_stored() {
let result = BatchStoreResult {
stored: 5,
skipped: 0,
};
assert_eq!(result.total(), 5);
assert!(result.all_stored());
assert!(!result.none_stored());
}
#[test]
fn test_batch_store_result_mixed() {
let result = BatchStoreResult {
stored: 3,
skipped: 2,
};
assert_eq!(result.total(), 5);
assert!(!result.all_stored());
assert!(!result.none_stored());
}
#[test]
fn test_batch_store_result_all_skipped() {
let result = BatchStoreResult {
stored: 0,
skipped: 5,
};
assert_eq!(result.total(), 5);
assert!(!result.all_stored());
assert!(result.none_stored());
}
#[test]
fn test_validate_prefix_valid() {
assert!(validate_prefix("foaf").is_ok());
assert!(validate_prefix("rdf").is_ok());
assert!(validate_prefix("schema_org").is_ok());
assert!(validate_prefix("my-prefix").is_ok());
assert!(validate_prefix("prefix123").is_ok());
assert!(validate_prefix("a").is_ok()); }
#[test]
fn test_validate_prefix_empty() {
let result = validate_prefix("");
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("empty"));
}
#[test]
fn test_validate_prefix_too_long() {
let long_prefix = "a".repeat(MAX_PREFIX_LENGTH + 1);
let result = validate_prefix(&long_prefix);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("maximum length"));
}
#[test]
fn test_validate_prefix_max_length_ok() {
let max_prefix = "a".repeat(MAX_PREFIX_LENGTH);
assert!(validate_prefix(&max_prefix).is_ok());
}
#[test]
fn test_validate_prefix_invalid_chars() {
let result = validate_prefix("foo bar");
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("alphanumeric"));
let result = validate_prefix("foo:bar");
assert!(result.is_err());
let result = validate_prefix("foo/bar");
assert!(result.is_err());
let result = validate_prefix("préfix");
assert!(result.is_err());
}
#[test]
fn test_validate_uri_valid() {
assert!(validate_uri("http://example.org/").is_ok());
assert!(validate_uri("https://schema.org/Person").is_ok());
assert!(validate_uri("urn:isbn:0451450523").is_ok());
assert!(validate_uri("http://example.org/path?query=1#fragment").is_ok());
}
#[test]
fn test_validate_uri_empty() {
let result = validate_uri("");
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("empty"));
}
#[test]
fn test_validate_uri_too_long() {
let long_uri = format!("http://example.org/{}", "a".repeat(MAX_URI_LENGTH));
let result = validate_uri(&long_uri);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("maximum length"));
}
#[test]
fn test_validate_uri_max_length_ok() {
let base = "http://example.org/";
let padding = "a".repeat(MAX_URI_LENGTH - base.len());
let max_uri = format!("{}{}", base, padding);
assert_eq!(max_uri.len(), MAX_URI_LENGTH);
assert!(validate_uri(&max_uri).is_ok());
}
#[test]
fn test_retry_config_default() {
let config = RetryConfig::default();
assert_eq!(config.max_retries, 5);
assert_eq!(config.initial_delay, Duration::from_secs(1));
assert_eq!(config.max_delay, Duration::from_secs(30));
}
#[test]
fn test_retry_config_none() {
let config = RetryConfig::none();
assert_eq!(config.max_retries, 0);
assert_eq!(config.initial_delay, Duration::ZERO);
assert_eq!(config.max_delay, Duration::ZERO);
}
#[test]
fn test_retry_config_custom() {
let config = RetryConfig::new(3, Duration::from_millis(100), Duration::from_secs(5));
assert_eq!(config.max_retries, 3);
assert_eq!(config.initial_delay, Duration::from_millis(100));
assert_eq!(config.max_delay, Duration::from_secs(5));
}
#[tokio::test]
async fn test_new_with_retry_config_error_not_retried() {
let result = PrefixRegistry::new_with_retry(
"", 5,
RetryConfig::default(),
)
.await;
assert!(matches!(
result,
Err(Error::Configuration(
ConfigurationError::InvalidDatabaseUrl(_)
))
));
}
fn get_test_database_url() -> Option<String> {
std::env::var("DATABASE_URL").ok()
}
async fn cleanup_test_data(registry: &PrefixRegistry, test_prefix: &str) {
let client = registry.pool.get().await.unwrap();
client
.execute(
"DELETE FROM namespaces WHERE prefix LIKE $1",
&[&format!("{}%", test_prefix)],
)
.await
.unwrap();
}
#[tokio::test]
async fn test_new_with_invalid_max_connections() {
let result = PrefixRegistry::new("postgres://localhost/test", 0).await;
assert!(matches!(
result,
Err(Error::Configuration(
ConfigurationError::InvalidMaxConnections(0)
))
));
}
#[tokio::test]
async fn test_new_with_empty_url() {
let result = PrefixRegistry::new("", 5).await;
assert!(matches!(
result,
Err(Error::Configuration(
ConfigurationError::InvalidDatabaseUrl(_)
))
));
}
#[tokio::test]
async fn test_store_and_retrieve_prefix() {
let Some(db_url) = get_test_database_url() else {
return; };
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_sr_";
cleanup_test_data(®istry, test_prefix).await;
let prefix = format!("{test_prefix}ns");
let uri = format!("http://example.org/{test_prefix}ns/");
let stored = registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
assert!(stored, "First store should succeed");
let retrieved = registry.get_uri_for_prefix(&prefix).await.unwrap();
assert_eq!(retrieved, Some(uri.to_string()));
let retrieved_prefix = registry.get_prefix_for_uri(&uri).await.unwrap();
assert_eq!(retrieved_prefix, Some(prefix.clone()));
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_first_prefix_wins() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_fpw_";
cleanup_test_data(®istry, test_prefix).await;
let uri = "http://example.org/test/first-wins/";
let first_prefix = format!("{test_prefix}first");
let second_prefix = format!("{test_prefix}second");
let stored1 = registry
.store_prefix_if_new(&first_prefix, uri)
.await
.unwrap();
assert!(stored1, "First prefix should be stored");
let stored2 = registry
.store_prefix_if_new(&second_prefix, uri)
.await
.unwrap();
assert!(!stored2, "Second prefix should be rejected");
let retrieved = registry.get_prefix_for_uri(uri).await.unwrap();
assert_eq!(retrieved, Some(first_prefix));
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_expand_curie() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_ec_";
cleanup_test_data(®istry, test_prefix).await;
let prefix = format!("{test_prefix}ns");
let uri = format!("http://example.org/{test_prefix}ns/");
registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
let expanded = registry.expand_curie(&prefix, "Person").await.unwrap();
assert_eq!(expanded, Some(format!("{uri}Person")));
let unknown = registry
.expand_curie(&format!("{test_prefix}unknown"), "Thing")
.await
.unwrap();
assert_eq!(unknown, None);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_batch_store_prefixes() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_bs_";
cleanup_test_data(®istry, test_prefix).await;
let prefixes = [
(
format!("{test_prefix}ns1"),
format!("http://example.org/{test_prefix}ns1/"),
),
(
format!("{test_prefix}ns2"),
format!("http://example.org/{test_prefix}ns2/"),
),
(
format!("{test_prefix}ns3"),
format!("http://example.org/{test_prefix}ns3/"),
),
];
let initial_count = registry.prefix_count().await;
let prefix_refs: Vec<(&str, &str)> = prefixes
.iter()
.map(|(p, u)| (p.as_str(), u.as_str()))
.collect();
let result = registry.store_prefixes_if_new(prefix_refs).await.unwrap();
assert_eq!(result.stored, 3);
assert_eq!(result.skipped, 0);
assert!(result.all_stored());
assert_eq!(registry.prefix_count().await, initial_count + 3);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_batch_store_with_duplicates() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_bsd_";
cleanup_test_data(®istry, test_prefix).await;
let existing_uri = "http://example.org/existing/";
registry
.store_prefix_if_new(&format!("{test_prefix}existing"), existing_uri)
.await
.unwrap();
let prefixes = [
(
format!("{test_prefix}new1"),
"http://example.org/new1/".to_string(),
),
(format!("{test_prefix}duplicate"), existing_uri.to_string()), (
format!("{test_prefix}new2"),
"http://example.org/new2/".to_string(),
),
];
let prefix_refs: Vec<(&str, &str)> = prefixes
.iter()
.map(|(p, u)| (p.as_str(), u.as_str()))
.collect();
let result = registry.store_prefixes_if_new(prefix_refs).await.unwrap();
assert_eq!(result.stored, 2);
assert_eq!(result.skipped, 1);
assert!(!result.all_stored());
assert!(!result.none_stored());
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_batch_store_empty() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let empty: Vec<(&str, &str)> = vec![];
let result = registry.store_prefixes_if_new(empty).await.unwrap();
assert_eq!(result.stored, 0);
assert_eq!(result.skipped, 0);
assert!(result.all_stored()); assert!(result.none_stored());
}
#[tokio::test]
async fn test_get_all_prefixes() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_gap_";
cleanup_test_data(®istry, test_prefix).await;
let prefix1 = format!("{test_prefix}a");
let prefix2 = format!("{test_prefix}b");
registry
.store_prefix_if_new(&prefix1, "http://example.org/a/")
.await
.unwrap();
registry
.store_prefix_if_new(&prefix2, "http://example.org/b/")
.await
.unwrap();
let all = registry.get_all_prefixes().await;
assert!(all.contains_key(&prefix1));
assert!(all.contains_key(&prefix2));
assert_eq!(
all.get(&prefix1),
Some(&"http://example.org/a/".to_string())
);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_cache_populated_on_startup() {
let Some(db_url) = get_test_database_url() else {
return;
};
let test_prefix = "test_cache_";
{
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
cleanup_test_data(®istry, test_prefix).await;
registry
.store_prefix_if_new(
&format!("{test_prefix}cached"),
"http://example.org/cached/",
)
.await
.unwrap();
}
let registry2 = PrefixRegistry::new(&db_url, 5).await.unwrap();
let cached = registry2.get_all_prefixes().await;
assert!(cached.contains_key(&format!("{test_prefix}cached")));
cleanup_test_data(®istry2, test_prefix).await;
}
#[tokio::test]
async fn test_unknown_prefix_returns_none() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let result = registry
.get_uri_for_prefix("definitely_not_a_real_prefix_xyz123")
.await
.unwrap();
assert_eq!(result, None);
}
#[tokio::test]
async fn test_unknown_uri_returns_none() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let result = registry
.get_prefix_for_uri("http://definitely-not-registered.example.org/")
.await
.unwrap();
assert_eq!(result, None);
}
#[tokio::test]
async fn test_new_with_retry_succeeds() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new_with_retry(
&db_url,
5,
RetryConfig::none(), )
.await
.unwrap();
let test_prefix = "test_nwr_";
cleanup_test_data(®istry, test_prefix).await;
let stored = registry
.store_prefix_if_new(&format!("{test_prefix}retry"), "http://example.org/retry/")
.await
.unwrap();
assert!(stored);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_shorten_uri() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_su_";
cleanup_test_data(®istry, test_prefix).await;
let prefix = format!("{test_prefix}ns");
let uri = format!("http://example.org/{test_prefix}ns/");
registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
let result = registry.shorten_uri(&format!("{uri}Person")).await.unwrap();
assert_eq!(result, Some((prefix.clone(), "Person".to_string())));
let unknown = registry
.shorten_uri("http://unknown.org/thing")
.await
.unwrap();
assert_eq!(unknown, None);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_shorten_uri_longest_match() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_sulm_";
cleanup_test_data(®istry, test_prefix).await;
let short_prefix = format!("{test_prefix}short");
let long_prefix = format!("{test_prefix}long");
registry
.store_prefix_if_new(&short_prefix, "http://example.org/")
.await
.unwrap();
registry
.store_prefix_if_new(&long_prefix, "http://example.org/nested/")
.await
.unwrap();
let result = registry
.shorten_uri("http://example.org/nested/thing")
.await
.unwrap();
assert_eq!(result, Some((long_prefix.clone(), "thing".to_string())));
let result = registry
.shorten_uri("http://example.org/other")
.await
.unwrap();
assert_eq!(result, Some((short_prefix.clone(), "other".to_string())));
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_shorten_uri_or_full() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_suof_";
cleanup_test_data(®istry, test_prefix).await;
let prefix = format!("{test_prefix}ns");
let uri = format!("http://example.org/{test_prefix}ns/");
registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
let result = registry
.shorten_uri_or_full(&format!("{uri}Person"))
.await
.unwrap();
assert_eq!(result, format!("{}:Person", prefix));
let unknown_uri = "http://unknown.org/thing";
let result = registry.shorten_uri_or_full(unknown_uri).await.unwrap();
assert_eq!(result, unknown_uri);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_shorten_uri_batch() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_sub_";
cleanup_test_data(®istry, test_prefix).await;
let prefix1 = format!("{test_prefix}ns1");
let prefix2 = format!("{test_prefix}ns2");
let uri1 = format!("http://example.org/{test_prefix}ns1/");
let uri2 = format!("http://example.org/{test_prefix}ns2/");
registry.store_prefix_if_new(&prefix1, &uri1).await.unwrap();
registry.store_prefix_if_new(&prefix2, &uri2).await.unwrap();
let uris = [
format!("{uri1}Person"),
"http://unknown.org/thing".to_string(),
format!("{uri2}Organization"),
];
let results = registry
.shorten_uri_batch(uris.iter().map(|s| s.as_str()))
.await
.unwrap();
assert_eq!(results.len(), 3);
assert_eq!(results[0], Some((prefix1.clone(), "Person".to_string())));
assert_eq!(results[1], None); assert_eq!(
results[2],
Some((prefix2.clone(), "Organization".to_string()))
);
cleanup_test_data(®istry, test_prefix).await;
}
#[tokio::test]
async fn test_expand_curie_batch() {
let Some(db_url) = get_test_database_url() else {
return;
};
let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
let test_prefix = "test_ecb_";
cleanup_test_data(®istry, test_prefix).await;
let prefix1 = format!("{test_prefix}ns1");
let prefix2 = format!("{test_prefix}ns2");
let uri1 = format!("http://example.org/{test_prefix}ns1/");
let uri2 = format!("http://example.org/{test_prefix}ns2/");
registry.store_prefix_if_new(&prefix1, &uri1).await.unwrap();
registry.store_prefix_if_new(&prefix2, &uri2).await.unwrap();
let curies = vec![
(prefix1.as_str(), "Person"),
("unknown_prefix_xyz", "Thing"),
(prefix2.as_str(), "Organization"),
];
let results = registry.expand_curie_batch(curies).await.unwrap();
assert_eq!(results.len(), 3);
assert_eq!(results[0], Some(format!("{uri1}Person")));
assert_eq!(results[1], None); assert_eq!(results[2], Some(format!("{uri2}Organization")));
cleanup_test_data(®istry, test_prefix).await;
}
}