prefix_register/
lib.rs

1// Copyright TELICENT LTD
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Prefix Register
16//!
17//! **Status: Beta** - API may change before 1.0 release.
18//!
19//! A PostgreSQL-backed namespace prefix registry for [CURIE](https://www.w3.org/TR/curie/)
20//! expansion and prefix management.
21//!
22//! This library provides bidirectional mapping between namespace prefixes
23//! (like "foaf", "rdf", "schema") and their full URI bases, optimised for
24//! use in RDF/semantic web applications.
25//!
26//! **API:** Async-only, built on tokio and deadpool-postgres for high concurrency.
27//!
28//! ## Features
29//!
30//! - **Async-only** - Built on tokio for high concurrency
31//! - **In-memory caching** - Prefixes loaded on startup for fast CURIE expansion
32//! - **First-prefix-wins** - Each URI can only have one registered prefix
33//! - **Batch operations** - Efficiently store multiple prefixes in a single round trip
34//! - **PostgreSQL backend** - Durable, scalable storage with connection pooling
35//! - **Startup resilience** - Optional retry with exponential backoff for container orchestration
36//! - **Input validation** - Prevents DoS via length limits (prefix max 64, URI max 2048 chars)
37//! - **Tracing instrumentation** - Built-in spans and events for observability
38//!
39//! ## Use Cases
40//!
41//! - CURIE expansion in RDF processing
42//! - Namespace prefix management for semantic web applications
43//! - Prefix discovery from Turtle, JSON-LD, XML documents
44//!
45//! ## Example
46//!
47//! ```rust,no_run
48//! use prefix_register::PrefixRegistry;
49//!
50//! #[tokio::main]
51//! async fn main() -> prefix_register::Result<()> {
52//!     // Connect to PostgreSQL (schema must have namespaces table)
53//!     let registry = PrefixRegistry::new(
54//!         "postgres://localhost/mydb",
55//!         10,  // max connections
56//!     ).await?;
57//!
58//!     // Store a prefix (only if URI doesn't already have one)
59//!     let stored = registry.store_prefix_if_new("foaf", "http://xmlns.com/foaf/0.1/").await?;
60//!     println!("Prefix stored: {}", stored);
61//!
62//!     // Expand a CURIE
63//!     if let Some(uri) = registry.expand_curie("foaf", "Person").await? {
64//!         println!("foaf:Person = {}", uri);
65//!     }
66//!
67//!     Ok(())
68//! }
69//! ```
70
71mod error;
72
73pub use error::{ConfigurationError, Error, Result};
74
75use deadpool_postgres::{Config as PoolConfig, Pool, Runtime};
76use std::collections::HashMap;
77use std::time::Duration;
78use tokio::sync::RwLock;
79use tokio_postgres::NoTls;
80use tracing::{debug, info, instrument, warn};
81
82/// Maximum length for a prefix (64 characters).
83pub const MAX_PREFIX_LENGTH: usize = 64;
84
85/// Maximum length for a URI (2048 characters).
86pub const MAX_URI_LENGTH: usize = 2048;
87
88/// Configuration for connection retry behaviour.
89///
90/// Used with [`PrefixRegistry::new_with_retry`] to handle transient
91/// database unavailability during startup.
92#[derive(Debug, Clone)]
93pub struct RetryConfig {
94    /// Maximum number of retry attempts (0 = no retries, just fail immediately).
95    pub max_retries: u32,
96    /// Initial delay before first retry. Doubles with each attempt (exponential backoff).
97    pub initial_delay: Duration,
98    /// Maximum delay between retries (caps the exponential growth).
99    pub max_delay: Duration,
100}
101
102impl Default for RetryConfig {
103    /// Default retry configuration: 5 retries, starting at 1 second, max 30 seconds.
104    fn default() -> Self {
105        Self {
106            max_retries: 5,
107            initial_delay: Duration::from_secs(1),
108            max_delay: Duration::from_secs(30),
109        }
110    }
111}
112
113impl RetryConfig {
114    /// Create a new retry configuration.
115    pub fn new(max_retries: u32, initial_delay: Duration, max_delay: Duration) -> Self {
116        Self {
117            max_retries,
118            initial_delay,
119            max_delay,
120        }
121    }
122
123    /// No retries - fail immediately on first error.
124    pub fn none() -> Self {
125        Self {
126            max_retries: 0,
127            initial_delay: Duration::ZERO,
128            max_delay: Duration::ZERO,
129        }
130    }
131}
132
133/// Validate a prefix string.
134///
135/// Prefixes must be:
136/// - Non-empty
137/// - At most 64 characters
138/// - Contain only alphanumeric characters, underscores, and hyphens
139fn validate_prefix(prefix: &str) -> Result<()> {
140    if prefix.is_empty() {
141        return Err(Error::invalid_prefix("prefix cannot be empty"));
142    }
143    if prefix.len() > MAX_PREFIX_LENGTH {
144        return Err(Error::invalid_prefix(format!(
145            "prefix exceeds maximum length of {} characters",
146            MAX_PREFIX_LENGTH
147        )));
148    }
149    if !prefix
150        .chars()
151        .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
152    {
153        return Err(Error::invalid_prefix(
154            "prefix must contain only alphanumeric characters, underscores, and hyphens",
155        ));
156    }
157    Ok(())
158}
159
160/// Validate a URI string.
161///
162/// URIs must be:
163/// - Non-empty
164/// - At most 2048 characters
165fn validate_uri(uri: &str) -> Result<()> {
166    if uri.is_empty() {
167        return Err(Error::invalid_uri("URI cannot be empty"));
168    }
169    if uri.len() > MAX_URI_LENGTH {
170        return Err(Error::invalid_uri(format!(
171            "URI exceeds maximum length of {} characters",
172            MAX_URI_LENGTH
173        )));
174    }
175    Ok(())
176}
177
178/// Result of a batch store operation.
179///
180/// Provides detailed information about what happened during
181/// a batch prefix store, allowing callers to log appropriately.
182#[derive(Debug, Clone, Default, PartialEq, Eq)]
183pub struct BatchStoreResult {
184    /// Number of new prefixes successfully stored.
185    pub stored: usize,
186    /// Number of prefixes skipped (URI already had a prefix).
187    pub skipped: usize,
188}
189
190impl BatchStoreResult {
191    /// Total number of prefixes processed.
192    pub fn total(&self) -> usize {
193        self.stored + self.skipped
194    }
195
196    /// Returns true if all prefixes were stored (none skipped).
197    pub fn all_stored(&self) -> bool {
198        self.skipped == 0
199    }
200
201    /// Returns true if no prefixes were stored (all skipped or empty input).
202    pub fn none_stored(&self) -> bool {
203        self.stored == 0
204    }
205}
206
207/// Registry for namespace prefixes.
208///
209/// Provides async access to the namespaces table in PostgreSQL.
210/// Prefixes are stored with their corresponding URIs, following
211/// the rule that each URI can only have one prefix (first one wins).
212///
213/// The registry maintains an in-memory cache of all prefixes, which
214/// is populated on startup and updated as new prefixes are stored.
215/// This ensures fast CURIE expansion without database round-trips.
216pub struct PrefixRegistry {
217    /// Connection pool for the prefix database.
218    pool: Pool,
219    /// In-memory cache of prefix -> URI mappings for fast lookups.
220    /// This cache is populated on startup and updated as new prefixes are stored.
221    prefix_cache: RwLock<HashMap<String, String>>,
222}
223
224impl PrefixRegistry {
225    /// Create a new prefix registry connected to the given PostgreSQL database.
226    ///
227    /// The registry will connect to the database and pre-populate its
228    /// in-memory cache with existing prefixes for fast CURIE expansion.
229    ///
230    /// # Arguments
231    ///
232    /// * `database_url` - PostgreSQL connection URL (e.g., "postgres://user:pass@host:port/db")
233    /// * `max_connections` - Maximum number of connections in the pool
234    ///
235    /// # Errors
236    ///
237    /// Returns an error if the database connection cannot be established
238    /// or if the namespaces table does not exist.
239    ///
240    /// # Example
241    ///
242    /// ```rust,no_run
243    /// use prefix_register::PrefixRegistry;
244    ///
245    /// # async fn example() -> prefix_register::Result<()> {
246    /// let registry = PrefixRegistry::new(
247    ///     "postgres://localhost/mydb",
248    ///     10,
249    /// ).await?;
250    /// # Ok(())
251    /// # }
252    /// ```
253    #[instrument(skip(database_url), fields(max_connections))]
254    pub async fn new(database_url: &str, max_connections: usize) -> Result<Self> {
255        if max_connections == 0 {
256            return Err(ConfigurationError::InvalidMaxConnections(max_connections).into());
257        }
258        if database_url.is_empty() {
259            return Err(ConfigurationError::InvalidDatabaseUrl("empty URL".to_string()).into());
260        }
261
262        // Parse the database URL and create pool configuration
263        let mut cfg = PoolConfig::new();
264        cfg.url = Some(database_url.to_string());
265        cfg.pool = Some(deadpool_postgres::PoolConfig::new(max_connections));
266
267        // Create the connection pool
268        let pool = cfg.create_pool(Some(Runtime::Tokio1), NoTls)?;
269
270        debug!("created connection pool");
271
272        // Verify connection by getting a client
273        let client = pool.get().await?;
274
275        // Pre-populate the cache with existing prefixes
276        let rows = client
277            .query("SELECT prefix, uri FROM namespaces", &[])
278            .await?;
279
280        let prefix_count = rows.len();
281        let mut cache = HashMap::new();
282        for row in rows {
283            let prefix: String = row.get(0);
284            let uri: String = row.get(1);
285            cache.insert(prefix, uri);
286        }
287
288        info!(prefix_count, "connected and loaded prefix cache");
289
290        Ok(Self {
291            pool,
292            prefix_cache: RwLock::new(cache),
293        })
294    }
295
296    /// Create a new prefix registry with retry logic for transient failures.
297    ///
298    /// This variant of [`new`](Self::new) implements exponential backoff retry
299    /// to handle transient database unavailability during startup (e.g., during
300    /// container orchestration where the database may start after this service).
301    ///
302    /// # Arguments
303    ///
304    /// * `database_url` - PostgreSQL connection URL
305    /// * `max_connections` - Maximum number of connections in the pool
306    /// * `retry_config` - Configuration for retry behaviour
307    ///
308    /// # Example
309    ///
310    /// ```rust,no_run
311    /// use prefix_register::{PrefixRegistry, RetryConfig};
312    /// use std::time::Duration;
313    ///
314    /// # async fn example() -> prefix_register::Result<()> {
315    /// // Use default retry config (5 retries, 1s initial, 30s max)
316    /// let registry = PrefixRegistry::new_with_retry(
317    ///     "postgres://localhost/mydb",
318    ///     10,
319    ///     RetryConfig::default(),
320    /// ).await?;
321    ///
322    /// // Or customize retry behaviour
323    /// let registry = PrefixRegistry::new_with_retry(
324    ///     "postgres://localhost/mydb",
325    ///     10,
326    ///     RetryConfig::new(
327    ///         3,                           // max 3 retries
328    ///         Duration::from_millis(500),  // start at 500ms
329    ///         Duration::from_secs(10),     // cap at 10s
330    ///     ),
331    /// ).await?;
332    /// # Ok(())
333    /// # }
334    /// ```
335    #[instrument(skip(database_url, retry_config), fields(max_connections, max_retries = retry_config.max_retries))]
336    pub async fn new_with_retry(
337        database_url: &str,
338        max_connections: usize,
339        retry_config: RetryConfig,
340    ) -> Result<Self> {
341        let mut last_error: Option<Error> = None;
342        let mut delay = retry_config.initial_delay;
343
344        for attempt in 0..=retry_config.max_retries {
345            match Self::new(database_url, max_connections).await {
346                Ok(registry) => return Ok(registry),
347                Err(e) => {
348                    // Configuration errors should not be retried
349                    if matches!(e, Error::Configuration(_)) {
350                        return Err(e);
351                    }
352
353                    last_error = Some(e);
354
355                    // Don't sleep after the last attempt
356                    if attempt < retry_config.max_retries {
357                        warn!(
358                            attempt = attempt + 1,
359                            max_retries = retry_config.max_retries,
360                            delay_ms = delay.as_millis() as u64,
361                            "connection failed, retrying"
362                        );
363                        tokio::time::sleep(delay).await;
364                        // Exponential backoff with cap
365                        delay = std::cmp::min(delay * 2, retry_config.max_delay);
366                    }
367                }
368            }
369        }
370
371        // All retries exhausted - return the last error
372        Err(last_error.expect("should have at least one error after retries"))
373    }
374
375    /// Get the URI for a given prefix.
376    ///
377    /// First checks the in-memory cache, then falls back to the database.
378    /// This is the primary method used for CURIE expansion.
379    ///
380    /// # Arguments
381    ///
382    /// * `prefix` - The namespace prefix (e.g., "foaf", "rdf")
383    ///
384    /// # Returns
385    ///
386    /// The URI if the prefix is known, None otherwise.
387    ///
388    /// # Example
389    ///
390    /// ```rust,no_run
391    /// # use prefix_register::PrefixRegistry;
392    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
393    /// if let Some(uri) = registry.get_uri_for_prefix("foaf").await? {
394    ///     println!("foaf = {}", uri);
395    /// }
396    /// # Ok(())
397    /// # }
398    /// ```
399    #[instrument(skip(self), level = "debug")]
400    pub async fn get_uri_for_prefix(&self, prefix: &str) -> Result<Option<String>> {
401        // Check cache first (fast path)
402        {
403            let cache = self.prefix_cache.read().await;
404            if let Some(uri) = cache.get(prefix) {
405                debug!("cache hit");
406                return Ok(Some(uri.clone()));
407            }
408        }
409
410        // Cache miss - check database (handles concurrent updates)
411        debug!("cache miss, querying database");
412        let client = self.pool.get().await?;
413        let row = client
414            .query_opt("SELECT uri FROM namespaces WHERE prefix = $1", &[&prefix])
415            .await?;
416
417        if let Some(row) = row {
418            let uri: String = row.get(0);
419            // Update cache for future lookups
420            {
421                let mut cache = self.prefix_cache.write().await;
422                cache.insert(prefix.to_string(), uri.clone());
423            }
424            debug!("found in database, cached");
425            Ok(Some(uri))
426        } else {
427            debug!("not found");
428            Ok(None)
429        }
430    }
431
432    /// Get the prefix for a given URI.
433    ///
434    /// Used to check if a URI already has a registered prefix.
435    ///
436    /// # Arguments
437    ///
438    /// * `uri` - The full namespace URI
439    ///
440    /// # Returns
441    ///
442    /// The prefix if the URI is registered, None otherwise.
443    ///
444    /// # Example
445    ///
446    /// ```rust,no_run
447    /// # use prefix_register::PrefixRegistry;
448    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
449    /// if let Some(prefix) = registry.get_prefix_for_uri("http://xmlns.com/foaf/0.1/").await? {
450    ///     println!("URI has prefix: {}", prefix);
451    /// }
452    /// # Ok(())
453    /// # }
454    /// ```
455    #[instrument(skip(self), level = "debug")]
456    pub async fn get_prefix_for_uri(&self, uri: &str) -> Result<Option<String>> {
457        let client = self.pool.get().await?;
458        let row = client
459            .query_opt("SELECT prefix FROM namespaces WHERE uri = $1", &[&uri])
460            .await?;
461
462        Ok(row.map(|r| r.get(0)))
463    }
464
465    /// Store a new prefix if the URI doesn't already have one.
466    ///
467    /// This follows the "first prefix wins" rule - if a URI already
468    /// has a prefix registered, the new prefix is ignored.
469    ///
470    /// # Arguments
471    ///
472    /// * `prefix` - The namespace prefix to store
473    /// * `uri` - The full namespace URI
474    ///
475    /// # Returns
476    ///
477    /// `true` if the prefix was stored, `false` if the URI already had a prefix.
478    ///
479    /// # Example
480    ///
481    /// ```rust,no_run
482    /// # use prefix_register::PrefixRegistry;
483    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
484    /// let stored = registry.store_prefix_if_new("schema", "https://schema.org/").await?;
485    /// if stored {
486    ///     println!("New prefix stored");
487    /// } else {
488    ///     println!("URI already has a prefix");
489    /// }
490    /// # Ok(())
491    /// # }
492    /// ```
493    #[instrument(skip(self))]
494    pub async fn store_prefix_if_new(&self, prefix: &str, uri: &str) -> Result<bool> {
495        // Validate inputs to prevent DoS via memory exhaustion
496        validate_prefix(prefix)?;
497        validate_uri(uri)?;
498
499        let client = self.pool.get().await?;
500
501        // Use INSERT ... ON CONFLICT to atomically check and insert
502        // This handles race conditions between multiple consumers
503        let result = client
504            .execute(
505                "INSERT INTO namespaces (uri, prefix) VALUES ($1, $2)
506                 ON CONFLICT (uri) DO NOTHING",
507                &[&uri, &prefix],
508            )
509            .await?;
510
511        if result > 0 {
512            // Successfully inserted - update our cache
513            let mut cache = self.prefix_cache.write().await;
514            cache.insert(prefix.to_string(), uri.to_string());
515            debug!("stored new prefix");
516            Ok(true)
517        } else {
518            // URI already has a prefix
519            debug!("skipped, URI already has prefix");
520            Ok(false)
521        }
522    }
523
524    /// Store multiple prefixes, skipping any where the URI already has a prefix.
525    ///
526    /// More efficient than calling store_prefix_if_new repeatedly.
527    ///
528    /// # Arguments
529    ///
530    /// * `prefixes` - Iterator of (prefix, uri) pairs to store
531    ///
532    /// # Returns
533    ///
534    /// A [`BatchStoreResult`] with counts of stored and skipped prefixes.
535    ///
536    /// # Example
537    ///
538    /// ```rust,no_run
539    /// # use prefix_register::PrefixRegistry;
540    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
541    /// let prefixes = vec![
542    ///     ("foaf", "http://xmlns.com/foaf/0.1/"),
543    ///     ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
544    ///     ("schema", "https://schema.org/"),
545    /// ];
546    /// let result = registry.store_prefixes_if_new(prefixes).await?;
547    /// println!("Stored {}, skipped {}", result.stored, result.skipped);
548    /// # Ok(())
549    /// # }
550    /// ```
551    #[instrument(skip(self, prefixes))]
552    pub async fn store_prefixes_if_new<'a, I>(&self, prefixes: I) -> Result<BatchStoreResult>
553    where
554        I: IntoIterator<Item = (&'a str, &'a str)>,
555    {
556        let prefixes: Vec<_> = prefixes.into_iter().collect();
557        if prefixes.is_empty() {
558            return Ok(BatchStoreResult::default());
559        }
560
561        // Validate all inputs upfront to prevent DoS via memory exhaustion
562        for (prefix, uri) in &prefixes {
563            validate_prefix(prefix)?;
564            validate_uri(uri)?;
565        }
566
567        let total = prefixes.len();
568
569        // Collect into separate arrays for UNNEST (single round trip)
570        let uris: Vec<&str> = prefixes.iter().map(|(_, uri)| *uri).collect();
571        let prefix_names: Vec<&str> = prefixes.iter().map(|(prefix, _)| *prefix).collect();
572
573        let client = self.pool.get().await?;
574
575        // Single batch INSERT using UNNEST, returning the rows that were inserted
576        let rows = client
577            .query(
578                "INSERT INTO namespaces (uri, prefix)
579                 SELECT * FROM UNNEST($1::text[], $2::text[])
580                 ON CONFLICT (uri) DO NOTHING
581                 RETURNING prefix, uri",
582                &[&uris, &prefix_names],
583            )
584            .await?;
585
586        let stored = rows.len();
587        let skipped = total - stored;
588
589        // Update cache with the rows that were actually inserted
590        if !rows.is_empty() {
591            let mut cache = self.prefix_cache.write().await;
592            for row in &rows {
593                let prefix: String = row.get(0);
594                let uri: String = row.get(1);
595                cache.insert(prefix, uri);
596            }
597        }
598
599        info!(total, stored, skipped, "batch store complete");
600
601        Ok(BatchStoreResult { stored, skipped })
602    }
603
604    /// Expand a CURIE (Compact URI) to a full URI.
605    ///
606    /// Given a prefix and local name, returns the expanded URI
607    /// if the prefix is known.
608    ///
609    /// # Arguments
610    ///
611    /// * `prefix` - The namespace prefix (e.g., "foaf")
612    /// * `local_name` - The local part (e.g., "Person")
613    ///
614    /// # Returns
615    ///
616    /// The full URI (e.g., "http://xmlns.com/foaf/0.1/Person")
617    /// or None if the prefix is unknown.
618    ///
619    /// # Example
620    ///
621    /// ```rust,no_run
622    /// # use prefix_register::PrefixRegistry;
623    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
624    /// if let Some(uri) = registry.expand_curie("foaf", "Person").await? {
625    ///     println!("foaf:Person = {}", uri);
626    ///     // Output: foaf:Person = http://xmlns.com/foaf/0.1/Person
627    /// }
628    /// # Ok(())
629    /// # }
630    /// ```
631    #[instrument(skip(self), level = "debug")]
632    pub async fn expand_curie(&self, prefix: &str, local_name: &str) -> Result<Option<String>> {
633        if let Some(base_uri) = self.get_uri_for_prefix(prefix).await? {
634            Ok(Some(format!("{}{}", base_uri, local_name)))
635        } else {
636            // Unknown prefix - caller can decide how to handle
637            Ok(None)
638        }
639    }
640
641    /// Get all registered prefixes.
642    ///
643    /// Returns a copy of the in-memory cache containing all prefix -> URI mappings.
644    ///
645    /// # Example
646    ///
647    /// ```rust,no_run
648    /// # use prefix_register::PrefixRegistry;
649    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
650    /// let prefixes = registry.get_all_prefixes().await;
651    /// for (prefix, uri) in prefixes {
652    ///     println!("{}: {}", prefix, uri);
653    /// }
654    /// # Ok(())
655    /// # }
656    /// ```
657    pub async fn get_all_prefixes(&self) -> HashMap<String, String> {
658        self.prefix_cache.read().await.clone()
659    }
660
661    /// Get the number of registered prefixes.
662    ///
663    /// # Example
664    ///
665    /// ```rust,no_run
666    /// # use prefix_register::PrefixRegistry;
667    /// # async fn example(registry: &PrefixRegistry) -> prefix_register::Result<()> {
668    /// let count = registry.prefix_count().await;
669    /// println!("Registered prefixes: {}", count);
670    /// # Ok(())
671    /// # }
672    /// ```
673    pub async fn prefix_count(&self) -> usize {
674        self.prefix_cache.read().await.len()
675    }
676}
677
678#[cfg(test)]
679mod tests {
680    use super::*;
681
682    // ==================== Unit Tests ====================
683    // These run without a database
684
685    #[test]
686    fn test_configuration_error_max_connections() {
687        let err = ConfigurationError::InvalidMaxConnections(0);
688        assert!(err.to_string().contains("max_connections"));
689    }
690
691    #[test]
692    fn test_configuration_error_database_url() {
693        let err = ConfigurationError::InvalidDatabaseUrl("empty".to_string());
694        assert!(err.to_string().contains("database_url"));
695    }
696
697    #[test]
698    fn test_batch_store_result_default() {
699        let result = BatchStoreResult::default();
700        assert_eq!(result.stored, 0);
701        assert_eq!(result.skipped, 0);
702        assert_eq!(result.total(), 0);
703        assert!(result.all_stored());
704        assert!(result.none_stored());
705    }
706
707    #[test]
708    fn test_batch_store_result_all_stored() {
709        let result = BatchStoreResult {
710            stored: 5,
711            skipped: 0,
712        };
713        assert_eq!(result.total(), 5);
714        assert!(result.all_stored());
715        assert!(!result.none_stored());
716    }
717
718    #[test]
719    fn test_batch_store_result_mixed() {
720        let result = BatchStoreResult {
721            stored: 3,
722            skipped: 2,
723        };
724        assert_eq!(result.total(), 5);
725        assert!(!result.all_stored());
726        assert!(!result.none_stored());
727    }
728
729    #[test]
730    fn test_batch_store_result_all_skipped() {
731        let result = BatchStoreResult {
732            stored: 0,
733            skipped: 5,
734        };
735        assert_eq!(result.total(), 5);
736        assert!(!result.all_stored());
737        assert!(result.none_stored());
738    }
739
740    // ==================== Validation Tests ====================
741
742    #[test]
743    fn test_validate_prefix_valid() {
744        assert!(validate_prefix("foaf").is_ok());
745        assert!(validate_prefix("rdf").is_ok());
746        assert!(validate_prefix("schema_org").is_ok());
747        assert!(validate_prefix("my-prefix").is_ok());
748        assert!(validate_prefix("prefix123").is_ok());
749        assert!(validate_prefix("a").is_ok()); // Single char is valid
750    }
751
752    #[test]
753    fn test_validate_prefix_empty() {
754        let result = validate_prefix("");
755        assert!(result.is_err());
756        assert!(result.unwrap_err().to_string().contains("empty"));
757    }
758
759    #[test]
760    fn test_validate_prefix_too_long() {
761        let long_prefix = "a".repeat(MAX_PREFIX_LENGTH + 1);
762        let result = validate_prefix(&long_prefix);
763        assert!(result.is_err());
764        assert!(result.unwrap_err().to_string().contains("maximum length"));
765    }
766
767    #[test]
768    fn test_validate_prefix_max_length_ok() {
769        let max_prefix = "a".repeat(MAX_PREFIX_LENGTH);
770        assert!(validate_prefix(&max_prefix).is_ok());
771    }
772
773    #[test]
774    fn test_validate_prefix_invalid_chars() {
775        // Spaces not allowed
776        let result = validate_prefix("foo bar");
777        assert!(result.is_err());
778        assert!(result.unwrap_err().to_string().contains("alphanumeric"));
779
780        // Colons not allowed
781        let result = validate_prefix("foo:bar");
782        assert!(result.is_err());
783
784        // Slashes not allowed
785        let result = validate_prefix("foo/bar");
786        assert!(result.is_err());
787
788        // Unicode not allowed
789        let result = validate_prefix("préfix");
790        assert!(result.is_err());
791    }
792
793    #[test]
794    fn test_validate_uri_valid() {
795        assert!(validate_uri("http://example.org/").is_ok());
796        assert!(validate_uri("https://schema.org/Person").is_ok());
797        assert!(validate_uri("urn:isbn:0451450523").is_ok());
798        assert!(validate_uri("http://example.org/path?query=1#fragment").is_ok());
799    }
800
801    #[test]
802    fn test_validate_uri_empty() {
803        let result = validate_uri("");
804        assert!(result.is_err());
805        assert!(result.unwrap_err().to_string().contains("empty"));
806    }
807
808    #[test]
809    fn test_validate_uri_too_long() {
810        let long_uri = format!("http://example.org/{}", "a".repeat(MAX_URI_LENGTH));
811        let result = validate_uri(&long_uri);
812        assert!(result.is_err());
813        assert!(result.unwrap_err().to_string().contains("maximum length"));
814    }
815
816    #[test]
817    fn test_validate_uri_max_length_ok() {
818        // Create a URI exactly at the max length
819        let base = "http://example.org/";
820        let padding = "a".repeat(MAX_URI_LENGTH - base.len());
821        let max_uri = format!("{}{}", base, padding);
822        assert_eq!(max_uri.len(), MAX_URI_LENGTH);
823        assert!(validate_uri(&max_uri).is_ok());
824    }
825
826    // ==================== RetryConfig Tests ====================
827
828    #[test]
829    fn test_retry_config_default() {
830        let config = RetryConfig::default();
831        assert_eq!(config.max_retries, 5);
832        assert_eq!(config.initial_delay, Duration::from_secs(1));
833        assert_eq!(config.max_delay, Duration::from_secs(30));
834    }
835
836    #[test]
837    fn test_retry_config_none() {
838        let config = RetryConfig::none();
839        assert_eq!(config.max_retries, 0);
840        assert_eq!(config.initial_delay, Duration::ZERO);
841        assert_eq!(config.max_delay, Duration::ZERO);
842    }
843
844    #[test]
845    fn test_retry_config_custom() {
846        let config = RetryConfig::new(3, Duration::from_millis(100), Duration::from_secs(5));
847        assert_eq!(config.max_retries, 3);
848        assert_eq!(config.initial_delay, Duration::from_millis(100));
849        assert_eq!(config.max_delay, Duration::from_secs(5));
850    }
851
852    #[tokio::test]
853    async fn test_new_with_retry_config_error_not_retried() {
854        // Configuration errors should fail immediately, not retry
855        let result = PrefixRegistry::new_with_retry(
856            "", // Empty URL is a config error
857            5,
858            RetryConfig::default(),
859        )
860        .await;
861
862        assert!(matches!(
863            result,
864            Err(Error::Configuration(
865                ConfigurationError::InvalidDatabaseUrl(_)
866            ))
867        ));
868    }
869
870    // ==================== Integration Tests ====================
871    // These require DATABASE_URL to be set (provided by CI)
872
873    /// Helper to get database URL from environment.
874    /// Returns None if not set (skips integration tests locally).
875    fn get_test_database_url() -> Option<String> {
876        std::env::var("DATABASE_URL").ok()
877    }
878
879    /// Helper to clean up test data. Uses a unique prefix to avoid conflicts.
880    async fn cleanup_test_data(registry: &PrefixRegistry, test_prefix: &str) {
881        let client = registry.pool.get().await.unwrap();
882        client
883            .execute(
884                "DELETE FROM namespaces WHERE prefix LIKE $1",
885                &[&format!("{}%", test_prefix)],
886            )
887            .await
888            .unwrap();
889    }
890
891    #[tokio::test]
892    async fn test_new_with_invalid_max_connections() {
893        let result = PrefixRegistry::new("postgres://localhost/test", 0).await;
894        assert!(matches!(
895            result,
896            Err(Error::Configuration(
897                ConfigurationError::InvalidMaxConnections(0)
898            ))
899        ));
900    }
901
902    #[tokio::test]
903    async fn test_new_with_empty_url() {
904        let result = PrefixRegistry::new("", 5).await;
905        assert!(matches!(
906            result,
907            Err(Error::Configuration(
908                ConfigurationError::InvalidDatabaseUrl(_)
909            ))
910        ));
911    }
912
913    #[tokio::test]
914    async fn test_store_and_retrieve_prefix() {
915        let Some(db_url) = get_test_database_url() else {
916            return; // Skip if no database
917        };
918
919        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
920        let test_prefix = "test_sr_";
921        cleanup_test_data(&registry, test_prefix).await;
922
923        // Store a new prefix
924        let prefix = format!("{test_prefix}foaf");
925        let uri = "http://xmlns.com/foaf/0.1/";
926        let stored = registry.store_prefix_if_new(&prefix, uri).await.unwrap();
927        assert!(stored, "First store should succeed");
928
929        // Retrieve by prefix
930        let retrieved = registry.get_uri_for_prefix(&prefix).await.unwrap();
931        assert_eq!(retrieved, Some(uri.to_string()));
932
933        // Retrieve by URI
934        let retrieved_prefix = registry.get_prefix_for_uri(uri).await.unwrap();
935        assert_eq!(retrieved_prefix, Some(prefix.clone()));
936
937        cleanup_test_data(&registry, test_prefix).await;
938    }
939
940    #[tokio::test]
941    async fn test_first_prefix_wins() {
942        let Some(db_url) = get_test_database_url() else {
943            return;
944        };
945
946        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
947        let test_prefix = "test_fpw_";
948        cleanup_test_data(&registry, test_prefix).await;
949
950        let uri = "http://example.org/test/first-wins/";
951        let first_prefix = format!("{test_prefix}first");
952        let second_prefix = format!("{test_prefix}second");
953
954        // Store first prefix
955        let stored1 = registry
956            .store_prefix_if_new(&first_prefix, uri)
957            .await
958            .unwrap();
959        assert!(stored1, "First prefix should be stored");
960
961        // Try to store second prefix for same URI
962        let stored2 = registry
963            .store_prefix_if_new(&second_prefix, uri)
964            .await
965            .unwrap();
966        assert!(!stored2, "Second prefix should be rejected");
967
968        // Verify the first prefix is still there
969        let retrieved = registry.get_prefix_for_uri(uri).await.unwrap();
970        assert_eq!(retrieved, Some(first_prefix));
971
972        cleanup_test_data(&registry, test_prefix).await;
973    }
974
975    #[tokio::test]
976    async fn test_expand_curie() {
977        let Some(db_url) = get_test_database_url() else {
978            return;
979        };
980
981        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
982        let test_prefix = "test_ec_";
983        cleanup_test_data(&registry, test_prefix).await;
984
985        let prefix = format!("{test_prefix}schema");
986        let uri = "https://schema.org/";
987        registry.store_prefix_if_new(&prefix, uri).await.unwrap();
988
989        // Expand known prefix
990        let expanded = registry.expand_curie(&prefix, "Person").await.unwrap();
991        assert_eq!(expanded, Some("https://schema.org/Person".to_string()));
992
993        // Unknown prefix returns None
994        let unknown = registry
995            .expand_curie(&format!("{test_prefix}unknown"), "Thing")
996            .await
997            .unwrap();
998        assert_eq!(unknown, None);
999
1000        cleanup_test_data(&registry, test_prefix).await;
1001    }
1002
1003    #[tokio::test]
1004    async fn test_batch_store_prefixes() {
1005        let Some(db_url) = get_test_database_url() else {
1006            return;
1007        };
1008
1009        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1010        let test_prefix = "test_bs_";
1011        cleanup_test_data(&registry, test_prefix).await;
1012
1013        let prefixes = [
1014            (
1015                format!("{test_prefix}rdf"),
1016                "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
1017            ),
1018            (
1019                format!("{test_prefix}rdfs"),
1020                "http://www.w3.org/2000/01/rdf-schema#".to_string(),
1021            ),
1022            (
1023                format!("{test_prefix}owl"),
1024                "http://www.w3.org/2002/07/owl#".to_string(),
1025            ),
1026        ];
1027
1028        let prefix_refs: Vec<(&str, &str)> = prefixes
1029            .iter()
1030            .map(|(p, u)| (p.as_str(), u.as_str()))
1031            .collect();
1032
1033        let result = registry.store_prefixes_if_new(prefix_refs).await.unwrap();
1034        assert_eq!(result.stored, 3);
1035        assert_eq!(result.skipped, 0);
1036        assert!(result.all_stored());
1037
1038        // Verify all were stored
1039        assert_eq!(registry.prefix_count().await, 3);
1040
1041        cleanup_test_data(&registry, test_prefix).await;
1042    }
1043
1044    #[tokio::test]
1045    async fn test_batch_store_with_duplicates() {
1046        let Some(db_url) = get_test_database_url() else {
1047            return;
1048        };
1049
1050        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1051        let test_prefix = "test_bsd_";
1052        cleanup_test_data(&registry, test_prefix).await;
1053
1054        // Pre-store one prefix
1055        let existing_uri = "http://example.org/existing/";
1056        registry
1057            .store_prefix_if_new(&format!("{test_prefix}existing"), existing_uri)
1058            .await
1059            .unwrap();
1060
1061        // Batch store including the existing URI with a different prefix
1062        let prefixes = [
1063            (
1064                format!("{test_prefix}new1"),
1065                "http://example.org/new1/".to_string(),
1066            ),
1067            (format!("{test_prefix}duplicate"), existing_uri.to_string()), // Should be skipped
1068            (
1069                format!("{test_prefix}new2"),
1070                "http://example.org/new2/".to_string(),
1071            ),
1072        ];
1073
1074        let prefix_refs: Vec<(&str, &str)> = prefixes
1075            .iter()
1076            .map(|(p, u)| (p.as_str(), u.as_str()))
1077            .collect();
1078
1079        let result = registry.store_prefixes_if_new(prefix_refs).await.unwrap();
1080        assert_eq!(result.stored, 2);
1081        assert_eq!(result.skipped, 1);
1082        assert!(!result.all_stored());
1083        assert!(!result.none_stored());
1084
1085        cleanup_test_data(&registry, test_prefix).await;
1086    }
1087
1088    #[tokio::test]
1089    async fn test_batch_store_empty() {
1090        let Some(db_url) = get_test_database_url() else {
1091            return;
1092        };
1093
1094        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1095        let empty: Vec<(&str, &str)> = vec![];
1096
1097        let result = registry.store_prefixes_if_new(empty).await.unwrap();
1098        assert_eq!(result.stored, 0);
1099        assert_eq!(result.skipped, 0);
1100        assert!(result.all_stored()); // Vacuously true
1101        assert!(result.none_stored());
1102    }
1103
1104    #[tokio::test]
1105    async fn test_get_all_prefixes() {
1106        let Some(db_url) = get_test_database_url() else {
1107            return;
1108        };
1109
1110        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1111        let test_prefix = "test_gap_";
1112        cleanup_test_data(&registry, test_prefix).await;
1113
1114        // Store some prefixes
1115        let prefix1 = format!("{test_prefix}a");
1116        let prefix2 = format!("{test_prefix}b");
1117        registry
1118            .store_prefix_if_new(&prefix1, "http://example.org/a/")
1119            .await
1120            .unwrap();
1121        registry
1122            .store_prefix_if_new(&prefix2, "http://example.org/b/")
1123            .await
1124            .unwrap();
1125
1126        let all = registry.get_all_prefixes().await;
1127        assert!(all.contains_key(&prefix1));
1128        assert!(all.contains_key(&prefix2));
1129        assert_eq!(
1130            all.get(&prefix1),
1131            Some(&"http://example.org/a/".to_string())
1132        );
1133
1134        cleanup_test_data(&registry, test_prefix).await;
1135    }
1136
1137    #[tokio::test]
1138    async fn test_cache_populated_on_startup() {
1139        let Some(db_url) = get_test_database_url() else {
1140            return;
1141        };
1142
1143        let test_prefix = "test_cache_";
1144
1145        // Create first registry and store a prefix
1146        {
1147            let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1148            cleanup_test_data(&registry, test_prefix).await;
1149            registry
1150                .store_prefix_if_new(
1151                    &format!("{test_prefix}cached"),
1152                    "http://example.org/cached/",
1153                )
1154                .await
1155                .unwrap();
1156        }
1157
1158        // Create new registry - should have prefix in cache from startup
1159        let registry2 = PrefixRegistry::new(&db_url, 5).await.unwrap();
1160        let cached = registry2.get_all_prefixes().await;
1161        assert!(cached.contains_key(&format!("{test_prefix}cached")));
1162
1163        cleanup_test_data(&registry2, test_prefix).await;
1164    }
1165
1166    #[tokio::test]
1167    async fn test_unknown_prefix_returns_none() {
1168        let Some(db_url) = get_test_database_url() else {
1169            return;
1170        };
1171
1172        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1173
1174        let result = registry
1175            .get_uri_for_prefix("definitely_not_a_real_prefix_xyz123")
1176            .await
1177            .unwrap();
1178        assert_eq!(result, None);
1179    }
1180
1181    #[tokio::test]
1182    async fn test_unknown_uri_returns_none() {
1183        let Some(db_url) = get_test_database_url() else {
1184            return;
1185        };
1186
1187        let registry = PrefixRegistry::new(&db_url, 5).await.unwrap();
1188
1189        let result = registry
1190            .get_prefix_for_uri("http://definitely-not-registered.example.org/")
1191            .await
1192            .unwrap();
1193        assert_eq!(result, None);
1194    }
1195
1196    #[tokio::test]
1197    async fn test_new_with_retry_succeeds() {
1198        let Some(db_url) = get_test_database_url() else {
1199            return;
1200        };
1201
1202        // Should succeed on first try with valid database
1203        let registry = PrefixRegistry::new_with_retry(
1204            &db_url,
1205            5,
1206            RetryConfig::none(), // No retries needed for working DB
1207        )
1208        .await
1209        .unwrap();
1210
1211        // Verify it works
1212        let test_prefix = "test_nwr_";
1213        cleanup_test_data(&registry, test_prefix).await;
1214
1215        let stored = registry
1216            .store_prefix_if_new(&format!("{test_prefix}retry"), "http://example.org/retry/")
1217            .await
1218            .unwrap();
1219        assert!(stored);
1220
1221        cleanup_test_data(&registry, test_prefix).await;
1222    }
1223}