prefix_register/
lib.rs

1// Copyright TELICENT LTD
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Prefix Register
16//!
17//! **Status: Beta** - API may change before 1.0 release.
18//!
19//! A PostgreSQL-backed namespace prefix registry for [CURIE](https://www.w3.org/TR/curie/)
20//! expansion and prefix management.
21//!
22//! This library provides bidirectional mapping between namespace prefixes
23//! (like "foaf", "rdf", "schema") and their full URI bases, optimised for
24//! use in RDF/semantic web applications.
25//!
26//! **API:** Async-only, built on tokio and deadpool-postgres for high concurrency.
27//!
28//! ## Features
29//!
30//! - **Async-only** - Built on tokio for high concurrency
31//! - **In-memory caching** - Prefixes loaded on startup for fast CURIE expansion
32//! - **First-prefix-wins** - Each URI can only have one registered prefix
33//! - **Batch operations** - Efficiently store multiple prefixes in a single round trip
34//! - **PostgreSQL backend** - Durable, scalable storage with connection pooling
35//! - **Startup resilience** - Optional retry with exponential backoff for container orchestration
36//! - **Input validation** - Prevents DoS via length limits (prefix max 64, URI max 2048 chars)
37//! - **Tracing instrumentation** - Built-in spans and events for observability
38//!
39//! ## Use Cases
40//!
41//! - CURIE expansion in RDF processing
42//! - Namespace prefix management for semantic web applications
43//! - Prefix discovery from Turtle, JSON-LD, XML documents
44//!
45//! ## Example
46//!
47//! ```rust,no_run
48//! use prefix_register::PrefixRegistry;
49//!
50//! #[tokio::main]
51//! async fn main() -> prefix_register::Result<()> {
52//!     // Connect to PostgreSQL (schema must have namespaces table)
53//!     let registry = PrefixRegistry::new(
54//!         "postgres://localhost/mydb",
55//!         10,  // max connections
56//!     ).await?;
57//!
58//!     // Store a prefix (only if URI doesn't already have one)
59//!     let stored = registry.store_prefix_if_new("foaf", "http://xmlns.com/foaf/0.1/").await?;
60//!     println!("Prefix stored: {}", stored);
61//!
62//!     // Expand a CURIE
63//!     if let Some(uri) = registry.expand_curie("foaf", "Person").await? {
64//!         println!("foaf:Person = {}", uri);
65//!     }
66//!
67//!     Ok(())
68//! }
69//! ```
70
71mod error;
72
73#[cfg(feature = "python")]
74mod python;
75
76pub use error::{ConfigurationError, Error, Result};
77
78use deadpool_postgres::{Config as PoolConfig, Pool, Runtime};
79use std::collections::HashMap;
80use std::sync::Arc;
81use std::time::Duration;
82use tokio::sync::RwLock;
83use tokio_postgres::NoTls;
84use tracing::{debug, info, instrument, warn};
85
86/// Maximum length for a prefix (64 characters).
87pub const MAX_PREFIX_LENGTH: usize = 64;
88
89/// Maximum length for a URI (2048 characters).
90pub const MAX_URI_LENGTH: usize = 2048;
91
92/// Configuration for connection retry behaviour.
93///
94/// Used with [`PrefixRegistry::new_with_retry`] to handle transient
95/// database unavailability during startup.
96#[derive(Debug, Clone)]
97pub struct RetryConfig {
98    /// Maximum number of retry attempts (0 = no retries, just fail immediately).
99    pub max_retries: u32,
100    /// Initial delay before first retry. Doubles with each attempt (exponential backoff).
101    pub initial_delay: Duration,
102    /// Maximum delay between retries (caps the exponential growth).
103    pub max_delay: Duration,
104}
105
106impl Default for RetryConfig {
107    /// Default retry configuration: 5 retries, starting at 1 second, max 30 seconds.
108    fn default() -> Self {
109        Self {
110            max_retries: 5,
111            initial_delay: Duration::from_secs(1),
112            max_delay: Duration::from_secs(30),
113        }
114    }
115}
116
117impl RetryConfig {
118    /// Create a new retry configuration.
119    pub fn new(max_retries: u32, initial_delay: Duration, max_delay: Duration) -> Self {
120        Self {
121            max_retries,
122            initial_delay,
123            max_delay,
124        }
125    }
126
127    /// No retries - fail immediately on first error.
128    pub fn none() -> Self {
129        Self {
130            max_retries: 0,
131            initial_delay: Duration::ZERO,
132            max_delay: Duration::ZERO,
133        }
134    }
135}
136
137/// Validate a prefix string.
138///
139/// Prefixes must be:
140/// - Non-empty
141/// - At most 64 characters
142/// - Contain only alphanumeric characters, underscores, and hyphens
143fn validate_prefix(prefix: &str) -> Result<()> {
144    if prefix.is_empty() {
145        return Err(Error::invalid_prefix("prefix cannot be empty"));
146    }
147    if prefix.len() > MAX_PREFIX_LENGTH {
148        return Err(Error::invalid_prefix(format!(
149            "prefix exceeds maximum length of {} characters",
150            MAX_PREFIX_LENGTH
151        )));
152    }
153    if !prefix
154        .chars()
155        .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
156    {
157        return Err(Error::invalid_prefix(
158            "prefix must contain only alphanumeric characters, underscores, and hyphens",
159        ));
160    }
161    Ok(())
162}
163
164/// Validate a URI string.
165///
166/// URIs must be:
167/// - Non-empty
168/// - At most 2048 characters
169fn validate_uri(uri: &str) -> Result<()> {
170    if uri.is_empty() {
171        return Err(Error::invalid_uri("URI cannot be empty"));
172    }
173    if uri.len() > MAX_URI_LENGTH {
174        return Err(Error::invalid_uri(format!(
175            "URI exceeds maximum length of {} characters",
176            MAX_URI_LENGTH
177        )));
178    }
179    Ok(())
180}
181
182/// Result of a batch store operation.
183///
184/// Provides detailed information about what happened during
185/// a batch prefix store, allowing callers to log appropriately.
186#[derive(Debug, Clone, Default, PartialEq, Eq)]
187pub struct BatchStoreResult {
188    /// Number of new prefixes successfully stored.
189    pub stored: usize,
190    /// Number of prefixes skipped (URI already had a prefix).
191    pub skipped: usize,
192}
193
194impl BatchStoreResult {
195    /// Total number of prefixes processed.
196    pub fn total(&self) -> usize {
197        self.stored + self.skipped
198    }
199
200    /// Returns true if all prefixes were stored (none skipped).
201    pub fn all_stored(&self) -> bool {
202        self.skipped == 0
203    }
204
205    /// Returns true if no prefixes were stored (all skipped or empty input).
206    pub fn none_stored(&self) -> bool {
207        self.stored == 0
208    }
209}
210
211/// Registry for namespace prefixes.
212///
213/// Provides async access to the namespaces table in PostgreSQL.
214/// Prefixes are stored with their corresponding URIs, following
215/// the rule that each URI can only have one prefix (first one wins).
216///
217/// The registry maintains an in-memory cache of all prefixes, which
218/// is populated on startup and updated as new prefixes are stored.
219/// This ensures fast CURIE expansion without database round-trips.
220#[derive(Clone)]
221pub struct PrefixRegistry {
222    /// Connection pool for the prefix database.
223    /// Pool is internally Arc-wrapped, so cloning is cheap.
224    pool: Pool,
225    /// In-memory cache of prefix -> URI mappings for fast lookups.
226    /// This cache is populated on startup and updated as new prefixes are stored.
227    /// Wrapped in Arc to allow cheap cloning for async Python bindings.
228    prefix_cache: Arc<RwLock<HashMap<String, String>>>,
229    /// Reverse cache of URI -> prefix mappings for URI shortening.
230    /// Used by shorten_uri() to find the longest matching namespace.
231    uri_to_prefix: Arc<RwLock<HashMap<String, String>>>,
232}
233
234impl PrefixRegistry {
235    /// Create a new prefix registry connected to the given PostgreSQL database.
236    ///
237    /// The registry will connect to the database and pre-populate its
238    /// in-memory cache with existing prefixes for fast CURIE expansion.
239    ///
240    /// # Arguments
241    ///
242    /// * `database_url` - PostgreSQL connection URL (e.g., "postgres://user:pass@host:port/db")
243    /// * `max_connections` - Maximum number of connections in the pool
244    ///
245    /// # Errors
246    ///
247    /// Returns an error if the database connection cannot be established
248    /// or if the namespaces table does not exist.
249    ///
250    /// # Example
251    ///
252    /// ```rust,no_run
253    /// use prefix_register::PrefixRegistry;
254    ///
255    /// # async fn example() -> prefix_register::Result<()> {
256    /// let registry = PrefixRegistry::new(
257    ///     "postgres://localhost/mydb",
258    ///     10,
259    /// ).await?;
260    /// # Ok(())
261    /// # }
262    /// ```
263    #[instrument(skip(database_url), fields(max_connections))]
264    pub async fn new(database_url: &str, max_connections: usize) -> Result<Self> {
265        if max_connections == 0 {
266            return Err(ConfigurationError::InvalidMaxConnections(max_connections).into());
267        }
268        if database_url.is_empty() {
269            return Err(ConfigurationError::InvalidDatabaseUrl("empty URL".to_string()).into());
270        }
271
272        // Parse the database URL and create pool configuration
273        let mut cfg = PoolConfig::new();
274        cfg.url = Some(database_url.to_string());
275        cfg.pool = Some(deadpool_postgres::PoolConfig::new(max_connections));
276
277        // Create the connection pool
278        let pool = cfg.create_pool(Some(Runtime::Tokio1), NoTls)?;
279
280        debug!("created connection pool");
281
282        // Verify connection by getting a client
283        let client = pool.get().await?;
284
285        // Pre-populate the cache with existing prefixes
286        let rows = client
287            .query("SELECT prefix, uri FROM namespaces", &[])
288            .await?;
289
290        let prefix_count = rows.len();
291        let mut cache = HashMap::new();
292        let mut reverse_cache = HashMap::new();
293        for row in rows {
294            let prefix: String = row.get(0);
295            let uri: String = row.get(1);
296            reverse_cache.insert(uri.clone(), prefix.clone());
297            cache.insert(prefix, uri);
298        }
299
300        info!(prefix_count, "connected and loaded prefix cache");
301
302        Ok(Self {
303            pool,
304            prefix_cache: Arc::new(RwLock::new(cache)),
305            uri_to_prefix: Arc::new(RwLock::new(reverse_cache)),
306        })
307    }
308
309    /// Create a new prefix registry with retry logic for transient failures.
310    ///
311    /// This variant of [`new`](Self::new) implements exponential backoff retry
312    /// to handle transient database unavailability during startup (e.g., during
313    /// container orchestration where the database may start after this service).
314    ///
315    /// # Arguments
316    ///
317    /// * `database_url` - PostgreSQL connection URL
318    /// * `max_connections` - Maximum number of connections in the pool
319    /// * `retry_config` - Configuration for retry behaviour
320    ///
321    /// # Example
322    ///
323    /// ```rust,no_run
324    /// use prefix_register::{PrefixRegistry, RetryConfig};
325    /// use std::time::Duration;
326    ///
327    /// # async fn example() -> prefix_register::Result<()> {
328    /// // Use default retry config (5 retries, 1s initial, 30s max)
329    /// let registry = PrefixRegistry::new_with_retry(
330    ///     "postgres://localhost/mydb",
331    ///     10,
332    ///     RetryConfig::default(),
333    /// ).await?;
334    ///
335    /// // Or customize retry behaviour
336    /// let registry = PrefixRegistry::new_with_retry(
337    ///     "postgres://localhost/mydb",
338    ///     10,
339    ///     RetryConfig::new(
340    ///         3,                           // max 3 retries
341    ///         Duration::from_millis(500),  // start at 500ms
342    ///         Duration::from_secs(10),     // cap at 10s
343    ///     ),
344    /// ).await?;
345    /// # Ok(())
346    /// # }
347    /// ```
348    #[instrument(skip(database_url, retry_config), fields(max_connections, max_retries = retry_config.max_retries))]
349    pub async fn new_with_retry(
350        database_url: &str,
351        max_connections: usize,
352        retry_config: RetryConfig,
353    ) -> Result<Self> {
354        let mut last_error: Option<Error> = None;
355        let mut delay = retry_config.initial_delay;
356
357        for attempt in 0..=retry_config.max_retries {
358            match Self::new(database_url, max_connections).await {
359                Ok(registry) => return Ok(registry),
360                Err(e) => {
361                    // Configuration errors should not be retried
362                    if matches!(e, Error::Configuration(_)) {
363                        return Err(e);
364                    }
365
366                    last_error = Some(e);
367
368                    // Don't sleep after the last attempt
369                    if attempt < retry_config.max_retries {
370                        warn!(
371                            attempt = attempt + 1,
372                            max_retries = retry_config.max_retries,
373                            delay_ms = delay.as_millis() as u64,
374                            "connection failed, retrying"
375                        );
376                        tokio::time::sleep(delay).await;
377                        // Exponential backoff with cap
378                        delay = std::cmp::min(delay * 2, retry_config.max_delay);
379                    }
380                }
381            }
382        }
383
384        // All retries exhausted - return the last error
385        Err(last_error.expect("should have at least one error after retries"))
386    }
387
388    /// Get the URI for a given prefix.
389    ///
390    /// First checks the in-memory cache, then falls back to the database.
391    /// This is the primary method used for CURIE expansion.
392    ///
393    /// # Arguments
394    ///
395    /// * `prefix` - The namespace prefix (e.g., "foaf", "rdf")
396    ///
397    /// # Returns
398    ///
399    /// The URI if the prefix is known, None otherwise.
400    ///
401    /// # Example
402    ///
403    /// ```rust,no_run
404    /// # use prefix_register::PrefixRegistry;
405    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
406    /// if let Some(uri) = registry.get_uri_for_prefix("foaf").await? {
407    ///     println!("foaf = {}", uri);
408    /// }
409    /// # Ok(())
410    /// # }
411    /// ```
412    #[instrument(skip(self), level = "debug")]
413    pub async fn get_uri_for_prefix(&self, prefix: &str) -> Result<Option<String>> {
414        // Check cache first (fast path)
415        {
416            let cache = self.prefix_cache.read().await;
417            if let Some(uri) = cache.get(prefix) {
418                debug!("cache hit");
419                return Ok(Some(uri.clone()));
420            }
421        }
422
423        // Cache miss - check database (handles concurrent updates)
424        debug!("cache miss, querying database");
425        let client = self.pool.get().await?;
426        let row = client
427            .query_opt("SELECT uri FROM namespaces WHERE prefix = $1", &[&prefix])
428            .await?;
429
430        if let Some(row) = row {
431            let uri: String = row.get(0);
432            // Update cache for future lookups
433            {
434                let mut cache = self.prefix_cache.write().await;
435                cache.insert(prefix.to_string(), uri.clone());
436            }
437            debug!("found in database, cached");
438            Ok(Some(uri))
439        } else {
440            debug!("not found");
441            Ok(None)
442        }
443    }
444
445    /// Get the prefix for a given URI.
446    ///
447    /// Used to check if a URI already has a registered prefix.
448    ///
449    /// # Arguments
450    ///
451    /// * `uri` - The full namespace URI
452    ///
453    /// # Returns
454    ///
455    /// The prefix if the URI is registered, None otherwise.
456    ///
457    /// # Example
458    ///
459    /// ```rust,no_run
460    /// # use prefix_register::PrefixRegistry;
461    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
462    /// if let Some(prefix) = registry.get_prefix_for_uri("http://xmlns.com/foaf/0.1/").await? {
463    ///     println!("URI has prefix: {}", prefix);
464    /// }
465    /// # Ok(())
466    /// # }
467    /// ```
468    #[instrument(skip(self), level = "debug")]
469    pub async fn get_prefix_for_uri(&self, uri: &str) -> Result<Option<String>> {
470        let client = self.pool.get().await?;
471        let row = client
472            .query_opt("SELECT prefix FROM namespaces WHERE uri = $1", &[&uri])
473            .await?;
474
475        Ok(row.map(|r| r.get(0)))
476    }
477
478    /// Store a new prefix if the URI doesn't already have one.
479    ///
480    /// This follows the "first prefix wins" rule - if a URI already
481    /// has a prefix registered, the new prefix is ignored.
482    ///
483    /// # Arguments
484    ///
485    /// * `prefix` - The namespace prefix to store
486    /// * `uri` - The full namespace URI
487    ///
488    /// # Returns
489    ///
490    /// `true` if the prefix was stored, `false` if the URI already had a prefix.
491    ///
492    /// # Example
493    ///
494    /// ```rust,no_run
495    /// # use prefix_register::PrefixRegistry;
496    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
497    /// let stored = registry.store_prefix_if_new("schema", "https://schema.org/").await?;
498    /// if stored {
499    ///     println!("New prefix stored");
500    /// } else {
501    ///     println!("URI already has a prefix");
502    /// }
503    /// # Ok(())
504    /// # }
505    /// ```
506    #[instrument(skip(self))]
507    pub async fn store_prefix_if_new(&self, prefix: &str, uri: &str) -> Result<bool> {
508        // Validate inputs to prevent DoS via memory exhaustion
509        validate_prefix(prefix)?;
510        validate_uri(uri)?;
511
512        let client = self.pool.get().await?;
513
514        // Use INSERT ... ON CONFLICT to atomically check and insert
515        // This handles race conditions between multiple consumers
516        let result = client
517            .execute(
518                "INSERT INTO namespaces (uri, prefix) VALUES ($1, $2)
519                 ON CONFLICT (uri) DO NOTHING",
520                &[&uri, &prefix],
521            )
522            .await?;
523
524        if result > 0 {
525            // Successfully inserted - update our caches
526            {
527                let mut cache = self.prefix_cache.write().await;
528                cache.insert(prefix.to_string(), uri.to_string());
529            }
530            {
531                let mut reverse = self.uri_to_prefix.write().await;
532                reverse.insert(uri.to_string(), prefix.to_string());
533            }
534            debug!("stored new prefix");
535            Ok(true)
536        } else {
537            // URI already has a prefix
538            debug!("skipped, URI already has prefix");
539            Ok(false)
540        }
541    }
542
543    /// Store multiple prefixes, skipping any where the URI already has a prefix.
544    ///
545    /// More efficient than calling store_prefix_if_new repeatedly.
546    ///
547    /// # Arguments
548    ///
549    /// * `prefixes` - Iterator of (prefix, uri) pairs to store
550    ///
551    /// # Returns
552    ///
553    /// A [`BatchStoreResult`] with counts of stored and skipped prefixes.
554    ///
555    /// # Example
556    ///
557    /// ```rust,no_run
558    /// # use prefix_register::PrefixRegistry;
559    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
560    /// let prefixes = vec![
561    ///     ("foaf", "http://xmlns.com/foaf/0.1/"),
562    ///     ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
563    ///     ("schema", "https://schema.org/"),
564    /// ];
565    /// let result = registry.store_prefixes_if_new(prefixes).await?;
566    /// println!("Stored {}, skipped {}", result.stored, result.skipped);
567    /// # Ok(())
568    /// # }
569    /// ```
570    #[instrument(skip(self, prefixes))]
571    pub async fn store_prefixes_if_new<'a, I>(&self, prefixes: I) -> Result<BatchStoreResult>
572    where
573        I: IntoIterator<Item = (&'a str, &'a str)>,
574    {
575        let prefixes: Vec<_> = prefixes.into_iter().collect();
576        if prefixes.is_empty() {
577            return Ok(BatchStoreResult::default());
578        }
579
580        // Validate all inputs upfront to prevent DoS via memory exhaustion
581        for (prefix, uri) in &prefixes {
582            validate_prefix(prefix)?;
583            validate_uri(uri)?;
584        }
585
586        let total = prefixes.len();
587
588        // Collect into separate arrays for UNNEST (single round trip)
589        let uris: Vec<&str> = prefixes.iter().map(|(_, uri)| *uri).collect();
590        let prefix_names: Vec<&str> = prefixes.iter().map(|(prefix, _)| *prefix).collect();
591
592        let client = self.pool.get().await?;
593
594        // Single batch INSERT using UNNEST, returning the rows that were inserted
595        let rows = client
596            .query(
597                "INSERT INTO namespaces (uri, prefix)
598                 SELECT * FROM UNNEST($1::text[], $2::text[])
599                 ON CONFLICT (uri) DO NOTHING
600                 RETURNING prefix, uri",
601                &[&uris, &prefix_names],
602            )
603            .await?;
604
605        let stored = rows.len();
606        let skipped = total - stored;
607
608        // Update caches with the rows that were actually inserted
609        if !rows.is_empty() {
610            let mut cache = self.prefix_cache.write().await;
611            let mut reverse = self.uri_to_prefix.write().await;
612            for row in &rows {
613                let prefix: String = row.get(0);
614                let uri: String = row.get(1);
615                reverse.insert(uri.clone(), prefix.clone());
616                cache.insert(prefix, uri);
617            }
618        }
619
620        info!(total, stored, skipped, "batch store complete");
621
622        Ok(BatchStoreResult { stored, skipped })
623    }
624
625    /// Expand a CURIE (Compact URI) to a full URI.
626    ///
627    /// Given a prefix and local name, returns the expanded URI
628    /// if the prefix is known.
629    ///
630    /// # Arguments
631    ///
632    /// * `prefix` - The namespace prefix (e.g., "foaf")
633    /// * `local_name` - The local part (e.g., "Person")
634    ///
635    /// # Returns
636    ///
637    /// The full URI (e.g., "http://xmlns.com/foaf/0.1/Person")
638    /// or None if the prefix is unknown.
639    ///
640    /// # Example
641    ///
642    /// ```rust,no_run
643    /// # use prefix_register::PrefixRegistry;
644    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
645    /// if let Some(uri) = registry.expand_curie("foaf", "Person").await? {
646    ///     println!("foaf:Person = {}", uri);
647    ///     // Output: foaf:Person = http://xmlns.com/foaf/0.1/Person
648    /// }
649    /// # Ok(())
650    /// # }
651    /// ```
652    #[instrument(skip(self), level = "debug")]
653    pub async fn expand_curie(&self, prefix: &str, local_name: &str) -> Result<Option<String>> {
654        if let Some(base_uri) = self.get_uri_for_prefix(prefix).await? {
655            Ok(Some(format!("{}{}", base_uri, local_name)))
656        } else {
657            // Unknown prefix - caller can decide how to handle
658            Ok(None)
659        }
660    }
661
662    /// Get all registered prefixes.
663    ///
664    /// Returns a copy of the in-memory cache containing all prefix -> URI mappings.
665    ///
666    /// # Example
667    ///
668    /// ```rust,no_run
669    /// # use prefix_register::PrefixRegistry;
670    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
671    /// let prefixes = registry.get_all_prefixes().await;
672    /// for (prefix, uri) in prefixes {
673    ///     println!("{}: {}", prefix, uri);
674    /// }
675    /// # Ok(())
676    /// # }
677    /// ```
678    pub async fn get_all_prefixes(&self) -> HashMap<String, String> {
679        self.prefix_cache.read().await.clone()
680    }
681
682    /// Get the number of registered prefixes.
683    ///
684    /// # Example
685    ///
686    /// ```rust,no_run
687    /// # use prefix_register::PrefixRegistry;
688    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
689    /// let count = registry.prefix_count().await;
690    /// println!("Registered prefixes: {}", count);
691    /// # Ok(())
692    /// # }
693    /// ```
694    pub async fn prefix_count(&self) -> usize {
695        self.prefix_cache.read().await.len()
696    }
697
698    /// Shorten a URI to a (prefix, local_name) tuple.
699    ///
700    /// Uses longest-match semantics: if multiple namespaces match the URI,
701    /// the longest one wins. For example, with namespaces:
702    /// - A = `http://a/`
703    /// - B = `http://a/b#`
704    ///
705    /// The URI `http://a/b#thing` would shorten to `("B", "thing")`, not `("A", "b#thing")`.
706    ///
707    /// # Arguments
708    ///
709    /// * `uri` - The full URI to shorten
710    ///
711    /// # Returns
712    ///
713    /// `Some((prefix, local_name))` if a matching namespace is found, `None` otherwise.
714    ///
715    /// # Example
716    ///
717    /// ```rust,no_run
718    /// # use prefix_register::PrefixRegistry;
719    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
720    /// if let Some((prefix, local)) = registry.shorten_uri("http://xmlns.com/foaf/0.1/Person").await? {
721    ///     println!("{}:{}", prefix, local);
722    ///     // Output: foaf:Person
723    /// }
724    /// # Ok(())
725    /// # }
726    /// ```
727    #[instrument(skip(self), level = "debug")]
728    pub async fn shorten_uri(&self, uri: &str) -> Result<Option<(String, String)>> {
729        let reverse = self.uri_to_prefix.read().await;
730
731        // Find the longest matching namespace.
732        // NOTE: This is O(n) where n = number of registered namespaces. Fine for typical
733        // registries (thousands of entries), but if this becomes a bottleneck with very
734        // large registries, consider replacing HashMap with a Trie for O(m) lookup
735        // where m = URI length.
736        let mut best_match: Option<(&str, &str)> = None;
737        let mut best_len = 0;
738
739        for (namespace_uri, prefix) in reverse.iter() {
740            if uri.starts_with(namespace_uri) && namespace_uri.len() > best_len {
741                best_match = Some((prefix.as_str(), namespace_uri.as_str()));
742                best_len = namespace_uri.len();
743            }
744        }
745
746        if let Some((prefix, namespace_uri)) = best_match {
747            let local_name = &uri[namespace_uri.len()..];
748            debug!(prefix, local_name, "shortened URI");
749            Ok(Some((prefix.to_string(), local_name.to_string())))
750        } else {
751            debug!("no matching namespace found");
752            Ok(None)
753        }
754    }
755
756    /// Shorten a URI to a CURIE string, or return the original URI if no match.
757    ///
758    /// This is a convenience method that returns a ready-to-use string.
759    ///
760    /// # Arguments
761    ///
762    /// * `uri` - The full URI to shorten
763    ///
764    /// # Returns
765    ///
766    /// A CURIE string like `"prefix:local"`, or the original URI if no namespace matches.
767    ///
768    /// # Example
769    ///
770    /// ```rust,no_run
771    /// # use prefix_register::PrefixRegistry;
772    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
773    /// let result = registry.shorten_uri_or_full("http://xmlns.com/foaf/0.1/Person").await?;
774    /// println!("{}", result);
775    /// // Output: foaf:Person (or the full URI if foaf is not registered)
776    /// # Ok(())
777    /// # }
778    /// ```
779    #[instrument(skip(self), level = "debug")]
780    pub async fn shorten_uri_or_full(&self, uri: &str) -> Result<String> {
781        if let Some((prefix, local_name)) = self.shorten_uri(uri).await? {
782            Ok(format!("{}:{}", prefix, local_name))
783        } else {
784            Ok(uri.to_string())
785        }
786    }
787
788    /// Shorten multiple URIs in batch.
789    ///
790    /// Preserves order. Returns `None` for URIs that don't match any namespace.
791    ///
792    /// # Arguments
793    ///
794    /// * `uris` - Iterator of URIs to shorten
795    ///
796    /// # Returns
797    ///
798    /// A vector with `Some((prefix, local_name))` for matched URIs, `None` for unmatched.
799    ///
800    /// # Example
801    ///
802    /// ```rust,no_run
803    /// # use prefix_register::PrefixRegistry;
804    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
805    /// let uris = vec![
806    ///     "http://xmlns.com/foaf/0.1/Person",
807    ///     "http://unknown.org/thing",
808    ///     "https://schema.org/Organization",
809    /// ];
810    /// let results = registry.shorten_uri_batch(uris).await?;
811    /// for (uri, result) in ["Person", "thing", "Organization"].iter().zip(results) {
812    ///     match result {
813    ///         Some((prefix, local)) => println!("{}:{}", prefix, local),
814    ///         None => println!("No match"),
815    ///     }
816    /// }
817    /// # Ok(())
818    /// # }
819    /// ```
820    #[instrument(skip(self, uris), level = "debug")]
821    pub async fn shorten_uri_batch<'a, I>(&self, uris: I) -> Result<Vec<Option<(String, String)>>>
822    where
823        I: IntoIterator<Item = &'a str>,
824    {
825        let reverse = self.uri_to_prefix.read().await;
826        let uris: Vec<_> = uris.into_iter().collect();
827        let mut results = Vec::with_capacity(uris.len());
828
829        for uri in uris {
830            // Find the longest matching namespace
831            let mut best_match: Option<(&str, &str)> = None;
832            let mut best_len = 0;
833
834            for (namespace_uri, prefix) in reverse.iter() {
835                if uri.starts_with(namespace_uri) && namespace_uri.len() > best_len {
836                    best_match = Some((prefix.as_str(), namespace_uri.as_str()));
837                    best_len = namespace_uri.len();
838                }
839            }
840
841            if let Some((prefix, namespace_uri)) = best_match {
842                let local_name = &uri[namespace_uri.len()..];
843                results.push(Some((prefix.to_string(), local_name.to_string())));
844            } else {
845                results.push(None);
846            }
847        }
848
849        debug!(count = results.len(), "batch shorten complete");
850        Ok(results)
851    }
852
853    /// Expand multiple CURIEs in batch.
854    ///
855    /// Preserves order. Returns `None` for CURIEs with unknown prefixes.
856    ///
857    /// # Arguments
858    ///
859    /// * `curies` - Iterator of (prefix, local_name) pairs
860    ///
861    /// # Returns
862    ///
863    /// A vector with `Some(uri)` for known prefixes, `None` for unknown.
864    ///
865    /// # Example
866    ///
867    /// ```rust,no_run
868    /// # use prefix_register::PrefixRegistry;
869    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
870    /// let curies = vec![
871    ///     ("foaf", "Person"),
872    ///     ("unknown", "Thing"),
873    ///     ("schema", "Organization"),
874    /// ];
875    /// let results = registry.expand_curie_batch(curies).await?;
876    /// for result in results {
877    ///     match result {
878    ///         Some(uri) => println!("{}", uri),
879    ///         None => println!("Unknown prefix"),
880    ///     }
881    /// }
882    /// # Ok(())
883    /// # }
884    /// ```
885    #[instrument(skip(self, curies), level = "debug")]
886    pub async fn expand_curie_batch<'a, I>(&self, curies: I) -> Result<Vec<Option<String>>>
887    where
888        I: IntoIterator<Item = (&'a str, &'a str)>,
889    {
890        let cache = self.prefix_cache.read().await;
891        let curies: Vec<_> = curies.into_iter().collect();
892        let mut results = Vec::with_capacity(curies.len());
893
894        for (prefix, local_name) in curies {
895            if let Some(base_uri) = cache.get(prefix) {
896                results.push(Some(format!("{}{}", base_uri, local_name)));
897            } else {
898                results.push(None);
899            }
900        }
901
902        debug!(count = results.len(), "batch expand complete");
903        Ok(results)
904    }
905}
906
907#[cfg(test)]
908mod tests {
909    use super::*;
910
911    // ==================== Unit Tests ====================
912    // These run without a database
913
914    #[test]
915    fn test_configuration_error_max_connections() {
916        let err = ConfigurationError::InvalidMaxConnections(0);
917        assert!(err.to_string().contains("max_connections"));
918    }
919
920    #[test]
921    fn test_configuration_error_database_url() {
922        let err = ConfigurationError::InvalidDatabaseUrl("empty".to_string());
923        assert!(err.to_string().contains("database_url"));
924    }
925
926    #[test]
927    fn test_batch_store_result_default() {
928        let result = BatchStoreResult::default();
929        assert_eq!(result.stored, 0);
930        assert_eq!(result.skipped, 0);
931        assert_eq!(result.total(), 0);
932        assert!(result.all_stored());
933        assert!(result.none_stored());
934    }
935
936    #[test]
937    fn test_batch_store_result_all_stored() {
938        let result = BatchStoreResult {
939            stored: 5,
940            skipped: 0,
941        };
942        assert_eq!(result.total(), 5);
943        assert!(result.all_stored());
944        assert!(!result.none_stored());
945    }
946
947    #[test]
948    fn test_batch_store_result_mixed() {
949        let result = BatchStoreResult {
950            stored: 3,
951            skipped: 2,
952        };
953        assert_eq!(result.total(), 5);
954        assert!(!result.all_stored());
955        assert!(!result.none_stored());
956    }
957
958    #[test]
959    fn test_batch_store_result_all_skipped() {
960        let result = BatchStoreResult {
961            stored: 0,
962            skipped: 5,
963        };
964        assert_eq!(result.total(), 5);
965        assert!(!result.all_stored());
966        assert!(result.none_stored());
967    }
968
969    // ==================== Validation Tests ====================
970
971    #[test]
972    fn test_validate_prefix_valid() {
973        assert!(validate_prefix("foaf").is_ok());
974        assert!(validate_prefix("rdf").is_ok());
975        assert!(validate_prefix("schema_org").is_ok());
976        assert!(validate_prefix("my-prefix").is_ok());
977        assert!(validate_prefix("prefix123").is_ok());
978        assert!(validate_prefix("a").is_ok()); // Single char is valid
979    }
980
981    #[test]
982    fn test_validate_prefix_empty() {
983        let result = validate_prefix("");
984        assert!(result.is_err());
985        assert!(result.unwrap_err().to_string().contains("empty"));
986    }
987
988    #[test]
989    fn test_validate_prefix_too_long() {
990        let long_prefix = "a".repeat(MAX_PREFIX_LENGTH + 1);
991        let result = validate_prefix(&long_prefix);
992        assert!(result.is_err());
993        assert!(result.unwrap_err().to_string().contains("maximum length"));
994    }
995
996    #[test]
997    fn test_validate_prefix_max_length_ok() {
998        let max_prefix = "a".repeat(MAX_PREFIX_LENGTH);
999        assert!(validate_prefix(&max_prefix).is_ok());
1000    }
1001
1002    #[test]
1003    fn test_validate_prefix_invalid_chars() {
1004        // Spaces not allowed
1005        let result = validate_prefix("foo bar");
1006        assert!(result.is_err());
1007        assert!(result.unwrap_err().to_string().contains("alphanumeric"));
1008
1009        // Colons not allowed
1010        let result = validate_prefix("foo:bar");
1011        assert!(result.is_err());
1012
1013        // Slashes not allowed
1014        let result = validate_prefix("foo/bar");
1015        assert!(result.is_err());
1016
1017        // Unicode not allowed
1018        let result = validate_prefix("préfix");
1019        assert!(result.is_err());
1020    }
1021
1022    #[test]
1023    fn test_validate_uri_valid() {
1024        assert!(validate_uri("http://example.org/").is_ok());
1025        assert!(validate_uri("https://schema.org/Person").is_ok());
1026        assert!(validate_uri("urn:isbn:0451450523").is_ok());
1027        assert!(validate_uri("http://example.org/path?query=1#fragment").is_ok());
1028    }
1029
1030    #[test]
1031    fn test_validate_uri_empty() {
1032        let result = validate_uri("");
1033        assert!(result.is_err());
1034        assert!(result.unwrap_err().to_string().contains("empty"));
1035    }
1036
1037    #[test]
1038    fn test_validate_uri_too_long() {
1039        let long_uri = format!("http://example.org/{}", "a".repeat(MAX_URI_LENGTH));
1040        let result = validate_uri(&long_uri);
1041        assert!(result.is_err());
1042        assert!(result.unwrap_err().to_string().contains("maximum length"));
1043    }
1044
1045    #[test]
1046    fn test_validate_uri_max_length_ok() {
1047        // Create a URI exactly at the max length
1048        let base = "http://example.org/";
1049        let padding = "a".repeat(MAX_URI_LENGTH - base.len());
1050        let max_uri = format!("{}{}", base, padding);
1051        assert_eq!(max_uri.len(), MAX_URI_LENGTH);
1052        assert!(validate_uri(&max_uri).is_ok());
1053    }
1054
1055    // ==================== RetryConfig Tests ====================
1056
1057    #[test]
1058    fn test_retry_config_default() {
1059        let config = RetryConfig::default();
1060        assert_eq!(config.max_retries, 5);
1061        assert_eq!(config.initial_delay, Duration::from_secs(1));
1062        assert_eq!(config.max_delay, Duration::from_secs(30));
1063    }
1064
1065    #[test]
1066    fn test_retry_config_none() {
1067        let config = RetryConfig::none();
1068        assert_eq!(config.max_retries, 0);
1069        assert_eq!(config.initial_delay, Duration::ZERO);
1070        assert_eq!(config.max_delay, Duration::ZERO);
1071    }
1072
1073    #[test]
1074    fn test_retry_config_custom() {
1075        let config = RetryConfig::new(3, Duration::from_millis(100), Duration::from_secs(5));
1076        assert_eq!(config.max_retries, 3);
1077        assert_eq!(config.initial_delay, Duration::from_millis(100));
1078        assert_eq!(config.max_delay, Duration::from_secs(5));
1079    }
1080
1081    #[tokio::test]
1082    async fn test_new_with_retry_config_error_not_retried() {
1083        // Configuration errors should fail immediately, not retry
1084        let result = PrefixRegistry::new_with_retry(
1085            "", // Empty URL is a config error
1086            5,
1087            RetryConfig::default(),
1088        )
1089        .await;
1090
1091        assert!(matches!(
1092            result,
1093            Err(Error::Configuration(
1094                ConfigurationError::InvalidDatabaseUrl(_)
1095            ))
1096        ));
1097    }
1098
1099    // ==================== Integration Tests ====================
1100    // These require DATABASE_URL to be set (provided by CI)
1101
1102    /// Helper to get database URL from environment.
1103    /// Returns None if not set (skips integration tests locally).
1104    fn get_test_database_url() -> Option<String> {
1105        std::env::var("DATABASE_URL").ok()
1106    }
1107
1108    /// Helper to clean up test data. Uses a unique prefix to avoid conflicts.
1109    async fn cleanup_test_data(registry: &PrefixRegistry, test_prefix: &str) {
1110        let client = registry.pool.get().await.unwrap();
1111        client
1112            .execute(
1113                "DELETE FROM namespaces WHERE prefix LIKE $1",
1114                &[&format!("{}%", test_prefix)],
1115            )
1116            .await
1117            .unwrap();
1118    }
1119
1120    #[tokio::test]
1121    async fn test_new_with_invalid_max_connections() {
1122        let result = PrefixRegistry::new("postgres://localhost/test", 0).await;
1123        assert!(matches!(
1124            result,
1125            Err(Error::Configuration(
1126                ConfigurationError::InvalidMaxConnections(0)
1127            ))
1128        ));
1129    }
1130
1131    #[tokio::test]
1132    async fn test_new_with_empty_url() {
1133        let result = PrefixRegistry::new("", 5).await;
1134        assert!(matches!(
1135            result,
1136            Err(Error::Configuration(
1137                ConfigurationError::InvalidDatabaseUrl(_)
1138            ))
1139        ));
1140    }
1141
1142    #[tokio::test]
1143    async fn test_store_and_retrieve_prefix() {
1144        let Some(db_url) = get_test_database_url() else {
1145            return; // Skip if no database
1146        };
1147
1148        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1149        let test_prefix = "test_sr_";
1150        cleanup_test_data(&registry, test_prefix).await;
1151
1152        // Store a new prefix (use unique URI to avoid conflicts)
1153        let prefix = format!("{test_prefix}ns");
1154        let uri = format!("http://example.org/{test_prefix}ns/");
1155        let stored = registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
1156        assert!(stored, "First store should succeed");
1157
1158        // Retrieve by prefix
1159        let retrieved = registry.get_uri_for_prefix(&prefix).await.unwrap();
1160        assert_eq!(retrieved, Some(uri.to_string()));
1161
1162        // Retrieve by URI
1163        let retrieved_prefix = registry.get_prefix_for_uri(&uri).await.unwrap();
1164        assert_eq!(retrieved_prefix, Some(prefix.clone()));
1165
1166        cleanup_test_data(&registry, test_prefix).await;
1167    }
1168
1169    #[tokio::test]
1170    async fn test_first_prefix_wins() {
1171        let Some(db_url) = get_test_database_url() else {
1172            return;
1173        };
1174
1175        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1176        let test_prefix = "test_fpw_";
1177        cleanup_test_data(&registry, test_prefix).await;
1178
1179        let uri = "http://example.org/test/first-wins/";
1180        let first_prefix = format!("{test_prefix}first");
1181        let second_prefix = format!("{test_prefix}second");
1182
1183        // Store first prefix
1184        let stored1 = registry
1185            .store_prefix_if_new(&first_prefix, uri)
1186            .await
1187            .unwrap();
1188        assert!(stored1, "First prefix should be stored");
1189
1190        // Try to store second prefix for same URI
1191        let stored2 = registry
1192            .store_prefix_if_new(&second_prefix, uri)
1193            .await
1194            .unwrap();
1195        assert!(!stored2, "Second prefix should be rejected");
1196
1197        // Verify the first prefix is still there
1198        let retrieved = registry.get_prefix_for_uri(uri).await.unwrap();
1199        assert_eq!(retrieved, Some(first_prefix));
1200
1201        cleanup_test_data(&registry, test_prefix).await;
1202    }
1203
1204    #[tokio::test]
1205    async fn test_expand_curie() {
1206        let Some(db_url) = get_test_database_url() else {
1207            return;
1208        };
1209
1210        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1211        let test_prefix = "test_ec_";
1212        cleanup_test_data(&registry, test_prefix).await;
1213
1214        // Use unique URI to avoid conflicts
1215        let prefix = format!("{test_prefix}ns");
1216        let uri = format!("http://example.org/{test_prefix}ns/");
1217        registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
1218
1219        // Expand known prefix
1220        let expanded = registry.expand_curie(&prefix, "Person").await.unwrap();
1221        assert_eq!(expanded, Some(format!("{uri}Person")));
1222
1223        // Unknown prefix returns None
1224        let unknown = registry
1225            .expand_curie(&format!("{test_prefix}unknown"), "Thing")
1226            .await
1227            .unwrap();
1228        assert_eq!(unknown, None);
1229
1230        cleanup_test_data(&registry, test_prefix).await;
1231    }
1232
1233    #[tokio::test]
1234    async fn test_batch_store_prefixes() {
1235        let Some(db_url) = get_test_database_url() else {
1236            return;
1237        };
1238
1239        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1240        let test_prefix = "test_bs_";
1241        cleanup_test_data(&registry, test_prefix).await;
1242
1243        // Use unique URIs to avoid conflicts with pre-existing data
1244        let prefixes = [
1245            (
1246                format!("{test_prefix}ns1"),
1247                format!("http://example.org/{test_prefix}ns1/"),
1248            ),
1249            (
1250                format!("{test_prefix}ns2"),
1251                format!("http://example.org/{test_prefix}ns2/"),
1252            ),
1253            (
1254                format!("{test_prefix}ns3"),
1255                format!("http://example.org/{test_prefix}ns3/"),
1256            ),
1257        ];
1258
1259        let initial_count = registry.prefix_count().await;
1260
1261        let prefix_refs: Vec<(&str, &str)> = prefixes
1262            .iter()
1263            .map(|(p, u)| (p.as_str(), u.as_str()))
1264            .collect();
1265
1266        let result = registry.store_prefixes_if_new(prefix_refs).await.unwrap();
1267        assert_eq!(result.stored, 3);
1268        assert_eq!(result.skipped, 0);
1269        assert!(result.all_stored());
1270
1271        // Verify all were stored (count increased by 3)
1272        assert_eq!(registry.prefix_count().await, initial_count + 3);
1273
1274        cleanup_test_data(&registry, test_prefix).await;
1275    }
1276
1277    #[tokio::test]
1278    async fn test_batch_store_with_duplicates() {
1279        let Some(db_url) = get_test_database_url() else {
1280            return;
1281        };
1282
1283        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1284        let test_prefix = "test_bsd_";
1285        cleanup_test_data(&registry, test_prefix).await;
1286
1287        // Pre-store one prefix
1288        let existing_uri = "http://example.org/existing/";
1289        registry
1290            .store_prefix_if_new(&format!("{test_prefix}existing"), existing_uri)
1291            .await
1292            .unwrap();
1293
1294        // Batch store including the existing URI with a different prefix
1295        let prefixes = [
1296            (
1297                format!("{test_prefix}new1"),
1298                "http://example.org/new1/".to_string(),
1299            ),
1300            (format!("{test_prefix}duplicate"), existing_uri.to_string()), // Should be skipped
1301            (
1302                format!("{test_prefix}new2"),
1303                "http://example.org/new2/".to_string(),
1304            ),
1305        ];
1306
1307        let prefix_refs: Vec<(&str, &str)> = prefixes
1308            .iter()
1309            .map(|(p, u)| (p.as_str(), u.as_str()))
1310            .collect();
1311
1312        let result = registry.store_prefixes_if_new(prefix_refs).await.unwrap();
1313        assert_eq!(result.stored, 2);
1314        assert_eq!(result.skipped, 1);
1315        assert!(!result.all_stored());
1316        assert!(!result.none_stored());
1317
1318        cleanup_test_data(&registry, test_prefix).await;
1319    }
1320
1321    #[tokio::test]
1322    async fn test_batch_store_empty() {
1323        let Some(db_url) = get_test_database_url() else {
1324            return;
1325        };
1326
1327        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1328        let empty: Vec<(&str, &str)> = vec![];
1329
1330        let result = registry.store_prefixes_if_new(empty).await.unwrap();
1331        assert_eq!(result.stored, 0);
1332        assert_eq!(result.skipped, 0);
1333        assert!(result.all_stored()); // Vacuously true
1334        assert!(result.none_stored());
1335    }
1336
1337    #[tokio::test]
1338    async fn test_get_all_prefixes() {
1339        let Some(db_url) = get_test_database_url() else {
1340            return;
1341        };
1342
1343        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1344        let test_prefix = "test_gap_";
1345        cleanup_test_data(&registry, test_prefix).await;
1346
1347        // Store some prefixes
1348        let prefix1 = format!("{test_prefix}a");
1349        let prefix2 = format!("{test_prefix}b");
1350        registry
1351            .store_prefix_if_new(&prefix1, "http://example.org/a/")
1352            .await
1353            .unwrap();
1354        registry
1355            .store_prefix_if_new(&prefix2, "http://example.org/b/")
1356            .await
1357            .unwrap();
1358
1359        let all = registry.get_all_prefixes().await;
1360        assert!(all.contains_key(&prefix1));
1361        assert!(all.contains_key(&prefix2));
1362        assert_eq!(
1363            all.get(&prefix1),
1364            Some(&"http://example.org/a/".to_string())
1365        );
1366
1367        cleanup_test_data(&registry, test_prefix).await;
1368    }
1369
1370    #[tokio::test]
1371    async fn test_cache_populated_on_startup() {
1372        let Some(db_url) = get_test_database_url() else {
1373            return;
1374        };
1375
1376        let test_prefix = "test_cache_";
1377
1378        // Create first registry and store a prefix
1379        {
1380            let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1381            cleanup_test_data(&registry, test_prefix).await;
1382            registry
1383                .store_prefix_if_new(
1384                    &format!("{test_prefix}cached"),
1385                    "http://example.org/cached/",
1386                )
1387                .await
1388                .unwrap();
1389        }
1390
1391        // Create new registry - should have prefix in cache from startup
1392        let registry2 = PrefixRegistry::new(&db_url, 5).await.unwrap();
1393        let cached = registry2.get_all_prefixes().await;
1394        assert!(cached.contains_key(&format!("{test_prefix}cached")));
1395
1396        cleanup_test_data(&registry2, test_prefix).await;
1397    }
1398
1399    #[tokio::test]
1400    async fn test_unknown_prefix_returns_none() {
1401        let Some(db_url) = get_test_database_url() else {
1402            return;
1403        };
1404
1405        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1406
1407        let result = registry
1408            .get_uri_for_prefix("definitely_not_a_real_prefix_xyz123")
1409            .await
1410            .unwrap();
1411        assert_eq!(result, None);
1412    }
1413
1414    #[tokio::test]
1415    async fn test_unknown_uri_returns_none() {
1416        let Some(db_url) = get_test_database_url() else {
1417            return;
1418        };
1419
1420        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1421
1422        let result = registry
1423            .get_prefix_for_uri("http://definitely-not-registered.example.org/")
1424            .await
1425            .unwrap();
1426        assert_eq!(result, None);
1427    }
1428
1429    #[tokio::test]
1430    async fn test_new_with_retry_succeeds() {
1431        let Some(db_url) = get_test_database_url() else {
1432            return;
1433        };
1434
1435        // Should succeed on first try with valid database
1436        let registry = PrefixRegistry::new_with_retry(
1437            &db_url,
1438            5,
1439            RetryConfig::none(), // No retries needed for working DB
1440        )
1441        .await
1442        .unwrap();
1443
1444        // Verify it works
1445        let test_prefix = "test_nwr_";
1446        cleanup_test_data(&registry, test_prefix).await;
1447
1448        let stored = registry
1449            .store_prefix_if_new(&format!("{test_prefix}retry"), "http://example.org/retry/")
1450            .await
1451            .unwrap();
1452        assert!(stored);
1453
1454        cleanup_test_data(&registry, test_prefix).await;
1455    }
1456
1457    // ==================== Shorten URI Tests ====================
1458
1459    #[tokio::test]
1460    async fn test_shorten_uri() {
1461        let Some(db_url) = get_test_database_url() else {
1462            return;
1463        };
1464
1465        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1466        let test_prefix = "test_su_";
1467        cleanup_test_data(&registry, test_prefix).await;
1468
1469        // Use unique URI to avoid conflicts
1470        let prefix = format!("{test_prefix}ns");
1471        let uri = format!("http://example.org/{test_prefix}ns/");
1472        registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
1473
1474        // Shorten a known URI
1475        let result = registry.shorten_uri(&format!("{uri}Person")).await.unwrap();
1476        assert_eq!(result, Some((prefix.clone(), "Person".to_string())));
1477
1478        // Unknown URI returns None
1479        let unknown = registry
1480            .shorten_uri("http://unknown.org/thing")
1481            .await
1482            .unwrap();
1483        assert_eq!(unknown, None);
1484
1485        cleanup_test_data(&registry, test_prefix).await;
1486    }
1487
1488    #[tokio::test]
1489    async fn test_shorten_uri_longest_match() {
1490        let Some(db_url) = get_test_database_url() else {
1491            return;
1492        };
1493
1494        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1495        let test_prefix = "test_sulm_";
1496        cleanup_test_data(&registry, test_prefix).await;
1497
1498        // Register overlapping namespaces
1499        let short_prefix = format!("{test_prefix}short");
1500        let long_prefix = format!("{test_prefix}long");
1501        registry
1502            .store_prefix_if_new(&short_prefix, "http://example.org/")
1503            .await
1504            .unwrap();
1505        registry
1506            .store_prefix_if_new(&long_prefix, "http://example.org/nested/")
1507            .await
1508            .unwrap();
1509
1510        // URI that matches both - should use the longer namespace
1511        let result = registry
1512            .shorten_uri("http://example.org/nested/thing")
1513            .await
1514            .unwrap();
1515        assert_eq!(result, Some((long_prefix.clone(), "thing".to_string())));
1516
1517        // URI that only matches the short namespace
1518        let result = registry
1519            .shorten_uri("http://example.org/other")
1520            .await
1521            .unwrap();
1522        assert_eq!(result, Some((short_prefix.clone(), "other".to_string())));
1523
1524        cleanup_test_data(&registry, test_prefix).await;
1525    }
1526
1527    #[tokio::test]
1528    async fn test_shorten_uri_or_full() {
1529        let Some(db_url) = get_test_database_url() else {
1530            return;
1531        };
1532
1533        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1534        let test_prefix = "test_suof_";
1535        cleanup_test_data(&registry, test_prefix).await;
1536
1537        // Use unique URIs to avoid conflicts with pre-existing data
1538        let prefix = format!("{test_prefix}ns");
1539        let uri = format!("http://example.org/{test_prefix}ns/");
1540        registry.store_prefix_if_new(&prefix, &uri).await.unwrap();
1541
1542        // Known URI returns CURIE
1543        let result = registry
1544            .shorten_uri_or_full(&format!("{uri}Person"))
1545            .await
1546            .unwrap();
1547        assert_eq!(result, format!("{}:Person", prefix));
1548
1549        // Unknown URI returns original
1550        let unknown_uri = "http://unknown.org/thing";
1551        let result = registry.shorten_uri_or_full(unknown_uri).await.unwrap();
1552        assert_eq!(result, unknown_uri);
1553
1554        cleanup_test_data(&registry, test_prefix).await;
1555    }
1556
1557    #[tokio::test]
1558    async fn test_shorten_uri_batch() {
1559        let Some(db_url) = get_test_database_url() else {
1560            return;
1561        };
1562
1563        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1564        let test_prefix = "test_sub_";
1565        cleanup_test_data(&registry, test_prefix).await;
1566
1567        // Use unique URIs to avoid conflicts with pre-existing data
1568        let prefix1 = format!("{test_prefix}ns1");
1569        let prefix2 = format!("{test_prefix}ns2");
1570        let uri1 = format!("http://example.org/{test_prefix}ns1/");
1571        let uri2 = format!("http://example.org/{test_prefix}ns2/");
1572        registry.store_prefix_if_new(&prefix1, &uri1).await.unwrap();
1573        registry.store_prefix_if_new(&prefix2, &uri2).await.unwrap();
1574
1575        let uris = [
1576            format!("{uri1}Person"),
1577            "http://unknown.org/thing".to_string(),
1578            format!("{uri2}Organization"),
1579        ];
1580
1581        let results = registry
1582            .shorten_uri_batch(uris.iter().map(|s| s.as_str()))
1583            .await
1584            .unwrap();
1585        assert_eq!(results.len(), 3);
1586        assert_eq!(results[0], Some((prefix1.clone(), "Person".to_string())));
1587        assert_eq!(results[1], None); // Unknown
1588        assert_eq!(
1589            results[2],
1590            Some((prefix2.clone(), "Organization".to_string()))
1591        );
1592
1593        cleanup_test_data(&registry, test_prefix).await;
1594    }
1595
1596    #[tokio::test]
1597    async fn test_expand_curie_batch() {
1598        let Some(db_url) = get_test_database_url() else {
1599            return;
1600        };
1601
1602        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1603        let test_prefix = "test_ecb_";
1604        cleanup_test_data(&registry, test_prefix).await;
1605
1606        // Use unique URIs to avoid conflicts
1607        let prefix1 = format!("{test_prefix}ns1");
1608        let prefix2 = format!("{test_prefix}ns2");
1609        let uri1 = format!("http://example.org/{test_prefix}ns1/");
1610        let uri2 = format!("http://example.org/{test_prefix}ns2/");
1611        registry.store_prefix_if_new(&prefix1, &uri1).await.unwrap();
1612        registry.store_prefix_if_new(&prefix2, &uri2).await.unwrap();
1613
1614        let curies = vec![
1615            (prefix1.as_str(), "Person"),
1616            ("unknown_prefix_xyz", "Thing"),
1617            (prefix2.as_str(), "Organization"),
1618        ];
1619
1620        let results = registry.expand_curie_batch(curies).await.unwrap();
1621        assert_eq!(results.len(), 3);
1622        assert_eq!(results[0], Some(format!("{uri1}Person")));
1623        assert_eq!(results[1], None); // Unknown prefix
1624        assert_eq!(results[2], Some(format!("{uri2}Organization")));
1625
1626        cleanup_test_data(&registry, test_prefix).await;
1627    }
1628}