prefix-register 0.2.2

A PostgreSQL-backed namespace prefix registry for CURIE expansion and prefix management
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
// Copyright TELICENT LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Python bindings for prefix-register using PyO3

use crate::{PrefixRegistry, RetryConfig};
use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::prelude::*;
use std::collections::HashMap;
use std::time::Duration;

/// Python wrapper for PrefixRegistry
#[pyclass(name = "PrefixRegistry")]
struct PyPrefixRegistry {
    inner: PrefixRegistry,
}

#[pymethods]
impl PyPrefixRegistry {
    /// Create a new prefix registry connected to PostgreSQL
    ///
    /// Args:
    ///     database_url: PostgreSQL connection string (e.g., "postgres://user:password@host:port/database")
    ///     max_connections: Maximum number of connections in the pool (recommended: 5-20)
    ///
    /// Returns:
    ///     PrefixRegistry: A new prefix registry instance
    ///
    /// Example:
    ///     >>> registry = await PrefixRegistry.new("postgres://localhost/mydb", 10)
    #[staticmethod]
    #[allow(clippy::new_ret_no_self)]
    #[pyo3(signature = (database_url, max_connections))]
    fn new<'p>(
        py: Python<'p>,
        database_url: String,
        max_connections: u32,
    ) -> PyResult<Bound<'p, PyAny>> {
        if database_url.is_empty() {
            return Err(PyValueError::new_err("database_url cannot be empty"));
        }
        if max_connections == 0 {
            return Err(PyValueError::new_err(
                "max_connections must be greater than 0",
            ));
        }

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let registry = PrefixRegistry::new(&database_url, max_connections as usize)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Failed to connect: {}", e)))?;

            Ok(PyPrefixRegistry { inner: registry })
        })
    }

    /// Create a new prefix registry with retry logic for transient failures
    ///
    /// Args:
    ///     database_url: PostgreSQL connection string
    ///     max_connections: Maximum number of connections in the pool
    ///     max_retries: Maximum number of retry attempts (default: 5)
    ///     initial_delay_ms: Initial delay in milliseconds before first retry (default: 1000)
    ///     max_delay_ms: Maximum delay in milliseconds between retries (default: 30000)
    ///
    /// Returns:
    ///     PrefixRegistry: A new prefix registry instance
    ///
    /// Example:
    ///     >>> registry = await PrefixRegistry.new_with_retry(
    ///     ...     "postgres://localhost/mydb", 10, 5, 1000, 30000
    ///     ... )
    #[staticmethod]
    #[pyo3(signature = (database_url, max_connections, max_retries=5, initial_delay_ms=1000, max_delay_ms=30000))]
    fn new_with_retry<'p>(
        py: Python<'p>,
        database_url: String,
        max_connections: u32,
        max_retries: u32,
        initial_delay_ms: u64,
        max_delay_ms: u64,
    ) -> PyResult<Bound<'p, PyAny>> {
        if database_url.is_empty() {
            return Err(PyValueError::new_err("database_url cannot be empty"));
        }
        if max_connections == 0 {
            return Err(PyValueError::new_err(
                "max_connections must be greater than 0",
            ));
        }

        let retry_config = RetryConfig::new(
            max_retries,
            Duration::from_millis(initial_delay_ms),
            Duration::from_millis(max_delay_ms),
        );

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let registry = PrefixRegistry::new_with_retry(
                &database_url,
                max_connections as usize,
                retry_config,
            )
            .await
            .map_err(|e| PyRuntimeError::new_err(format!("Failed to connect: {}", e)))?;

            Ok(PyPrefixRegistry { inner: registry })
        })
    }

    /// Store a new prefix if the URI doesn't already have one
    ///
    /// Args:
    ///     prefix: The namespace prefix (e.g., "foaf", "rdf")
    ///     uri: The full namespace URI (e.g., "http://xmlns.com/foaf/0.1/")
    ///
    /// Returns:
    ///     bool: True if the prefix was stored, False if the URI already had a prefix
    ///
    /// Example:
    ///     >>> stored = await registry.store_prefix_if_new("foaf", "http://xmlns.com/foaf/0.1/")
    ///     >>> if stored:
    ///     ...     print("New prefix stored")
    fn store_prefix_if_new<'p>(
        &self,
        py: Python<'p>,
        prefix: String,
        uri: String,
    ) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let stored = inner
                .store_prefix_if_new(&prefix, &uri)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Store failed: {}", e)))?;

            Ok(stored)
        })
    }

    /// Store multiple prefixes in batch
    ///
    /// Args:
    ///     prefixes: List of (prefix, uri) tuples to store
    ///
    /// Returns:
    ///     dict: Dictionary with 'stored' and 'skipped' counts
    ///
    /// Example:
    ///     >>> prefixes = [("foaf", "http://xmlns.com/foaf/0.1/"), ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")]
    ///     >>> result = await registry.store_prefixes_if_new(prefixes)
    ///     >>> print(f"Stored {result['stored']}, skipped {result['skipped']}")
    fn store_prefixes_if_new<'p>(
        &self,
        py: Python<'p>,
        prefixes: Vec<(String, String)>,
    ) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            // Convert owned strings to references for the API
            let prefix_refs: Vec<(&str, &str)> = prefixes
                .iter()
                .map(|(p, u)| (p.as_str(), u.as_str()))
                .collect();

            let result = inner
                .store_prefixes_if_new(prefix_refs)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Batch store failed: {}", e)))?;

            let mut dict = HashMap::new();
            dict.insert("stored", result.stored);
            dict.insert("skipped", result.skipped);

            Ok(dict)
        })
    }

    /// Get the URI for a given prefix
    ///
    /// Args:
    ///     prefix: The namespace prefix (e.g., "foaf")
    ///
    /// Returns:
    ///     Optional[str]: The URI if the prefix is known, None otherwise
    ///
    /// Example:
    ///     >>> uri = await registry.get_uri_for_prefix("foaf")
    ///     >>> if uri:
    ///     ...     print(f"foaf = {uri}")
    fn get_uri_for_prefix<'p>(&self, py: Python<'p>, prefix: String) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let uri = inner
                .get_uri_for_prefix(&prefix)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Lookup failed: {}", e)))?;

            Ok(uri)
        })
    }

    /// Get the prefix for a given URI
    ///
    /// Args:
    ///     uri: The full namespace URI
    ///
    /// Returns:
    ///     Optional[str]: The prefix if the URI is registered, None otherwise
    ///
    /// Example:
    ///     >>> prefix = await registry.get_prefix_for_uri("http://xmlns.com/foaf/0.1/")
    ///     >>> if prefix:
    ///     ...     print(f"URI has prefix: {prefix}")
    fn get_prefix_for_uri<'p>(&self, py: Python<'p>, uri: String) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let prefix = inner
                .get_prefix_for_uri(&uri)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Lookup failed: {}", e)))?;

            Ok(prefix)
        })
    }

    /// Expand a CURIE to a full URI
    ///
    /// Args:
    ///     prefix: The namespace prefix (e.g., "foaf")
    ///     local_name: The local part (e.g., "Person")
    ///
    /// Returns:
    ///     Optional[str]: The full URI (e.g., "http://xmlns.com/foaf/0.1/Person") or None if prefix unknown
    ///
    /// Example:
    ///     >>> uri = await registry.expand_curie("foaf", "Person")
    ///     >>> if uri:
    ///     ...     print(f"foaf:Person = {uri}")
    fn expand_curie<'p>(
        &self,
        py: Python<'p>,
        prefix: String,
        local_name: String,
    ) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let uri = inner
                .expand_curie(&prefix, &local_name)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Expand failed: {}", e)))?;

            Ok(uri)
        })
    }

    /// Get all registered prefixes
    ///
    /// Returns:
    ///     dict[str, str]: Dictionary mapping prefixes to URIs
    ///
    /// Example:
    ///     >>> prefixes = await registry.get_all_prefixes()
    ///     >>> for prefix, uri in prefixes.items():
    ///     ...     print(f"{prefix}: {uri}")
    fn get_all_prefixes<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let prefixes = inner.get_all_prefixes().await;
            Ok(prefixes)
        })
    }

    /// Get the number of registered prefixes
    ///
    /// Returns:
    ///     int: The number of registered prefixes
    ///
    /// Example:
    ///     >>> count = await registry.prefix_count()
    ///     >>> print(f"Registered prefixes: {count}")
    fn prefix_count<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let count = inner.prefix_count().await;
            Ok(count)
        })
    }

    /// Shorten a URI to a (prefix, local_name) tuple
    ///
    /// Uses longest-match semantics: if multiple namespaces match the URI,
    /// the longest one wins.
    ///
    /// Args:
    ///     uri: The full URI to shorten
    ///
    /// Returns:
    ///     Optional[tuple[str, str]]: A (prefix, local_name) tuple if a match is found, None otherwise
    ///
    /// Example:
    ///     >>> result = await registry.shorten_uri("http://xmlns.com/foaf/0.1/Person")
    ///     >>> if result:
    ///     ...     prefix, local = result
    ///     ...     print(f"{prefix}:{local}")
    fn shorten_uri<'p>(&self, py: Python<'p>, uri: String) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let result = inner
                .shorten_uri(&uri)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Shorten failed: {}", e)))?;

            Ok(result)
        })
    }

    /// Shorten a URI to a CURIE string, or return the original URI if no match
    ///
    /// Args:
    ///     uri: The full URI to shorten
    ///
    /// Returns:
    ///     str: A CURIE string like "prefix:local", or the original URI if no namespace matches
    ///
    /// Example:
    ///     >>> result = await registry.shorten_uri_or_full("http://xmlns.com/foaf/0.1/Person")
    ///     >>> print(result)  # "foaf:Person" or the full URI
    fn shorten_uri_or_full<'p>(&self, py: Python<'p>, uri: String) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let result = inner
                .shorten_uri_or_full(&uri)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Shorten failed: {}", e)))?;

            Ok(result)
        })
    }

    /// Shorten multiple URIs in batch
    ///
    /// Args:
    ///     uris: List of URIs to shorten
    ///
    /// Returns:
    ///     list[tuple[str, str] | None]: List with (prefix, local_name) for matched URIs, None for unmatched
    ///
    /// Example:
    ///     >>> uris = ["http://xmlns.com/foaf/0.1/Person", "http://unknown.org/thing"]
    ///     >>> results = await registry.shorten_uri_batch(uris)
    ///     >>> for result in results:
    ///     ...     if result:
    ///     ...         print(f"{result[0]}:{result[1]}")
    ///     ...     else:
    ///     ...         print("No match")
    fn shorten_uri_batch<'p>(
        &self,
        py: Python<'p>,
        uris: Vec<String>,
    ) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let uri_refs: Vec<&str> = uris.iter().map(|s| s.as_str()).collect();

            let results = inner
                .shorten_uri_batch(uri_refs)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Batch shorten failed: {}", e)))?;

            Ok(results)
        })
    }

    /// Expand multiple CURIEs in batch
    ///
    /// Args:
    ///     curies: List of (prefix, local_name) tuples
    ///
    /// Returns:
    ///     list[str | None]: List with expanded URIs for known prefixes, None for unknown
    ///
    /// Example:
    ///     >>> curies = [("foaf", "Person"), ("unknown", "Thing")]
    ///     >>> results = await registry.expand_curie_batch(curies)
    ///     >>> for result in results:
    ///     ...     if result:
    ///     ...         print(result)
    ///     ...     else:
    ///     ...         print("Unknown prefix")
    fn expand_curie_batch<'p>(
        &self,
        py: Python<'p>,
        curies: Vec<(String, String)>,
    ) -> PyResult<Bound<'p, PyAny>> {
        let inner = self.inner.clone();

        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let curie_refs: Vec<(&str, &str)> = curies
                .iter()
                .map(|(p, l)| (p.as_str(), l.as_str()))
                .collect();

            let results = inner
                .expand_curie_batch(curie_refs)
                .await
                .map_err(|e| PyRuntimeError::new_err(format!("Batch expand failed: {}", e)))?;

            Ok(results)
        })
    }

    fn __repr__(&self) -> String {
        "PrefixRegistry(connected)".to_string()
    }
}

/// Python module initialization
#[pymodule]
fn _prefix_register(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<PyPrefixRegistry>()?;

    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
    m.add(
        "__doc__",
        "Prefix Register - A PostgreSQL-backed namespace prefix registry for CURIE expansion",
    )?;

    Ok(())
}