1use std::collections::HashMap;
2use std::num::NonZeroU32;
3
4use serde::{Deserialize, Serialize};
5
6use crate::error::{Error, Result};
7use crate::limits::Limits;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11pub struct IriId(NonZeroU32);
12
13impl IriId {
14 #[must_use]
20 pub fn from_index(index: u32) -> Self {
21 Self(NonZeroU32::new(index).expect("IriId index must be non-zero"))
22 }
23
24 pub fn try_from_index(index: u32) -> Result<Self> {
26 NonZeroU32::new(index)
27 .map(Self)
28 .ok_or_else(|| Error::InvalidIri("IriId index must be non-zero".into()))
29 }
30
31 #[must_use]
33 pub fn index(self) -> u32 {
34 self.0.get()
35 }
36}
37
38#[derive(Debug, Default, Clone, PartialEq, Eq)]
40pub struct InternPool {
41 strings: Vec<Box<str>>,
42 index: HashMap<Box<str>, IriId>,
43}
44
45impl InternPool {
46 #[must_use]
48 pub fn new() -> Self {
49 Self::default()
50 }
51
52 #[must_use]
54 pub fn len(&self) -> usize {
55 self.strings.len()
56 }
57
58 #[must_use]
60 pub fn is_empty(&self) -> bool {
61 self.strings.is_empty()
62 }
63
64 #[must_use]
66 pub fn get(&self, iri: &str) -> Option<IriId> {
67 self.index.get(iri).copied()
68 }
69
70 pub fn intern(&mut self, iri: &str) -> Result<IriId> {
72 self.intern_with_limit(iri, Limits::default().max_iri_len)
73 }
74
75 pub fn intern_with_limit(&mut self, iri: &str, max_len: usize) -> Result<IriId> {
77 validate_iri_with_max_len(iri, max_len)?;
78
79 if let Some(&id) = self.index.get(iri) {
80 return Ok(id);
81 }
82
83 let index = u32::try_from(self.strings.len())
84 .map_err(|_| Error::InvalidIri("IRI pool capacity exceeded".into()))?
85 + 1;
86 let id = IriId(NonZeroU32::new(index).expect("index is non-zero"));
87 let owned: Box<str> = iri.into();
88 self.index.insert(owned.clone(), id);
89 self.strings.push(owned);
90 Ok(id)
91 }
92
93 pub fn resolve(&self, id: IriId) -> Result<&str> {
95 let idx = usize::try_from(id.index())
96 .map_err(|_| Error::InvalidIri(format!("invalid IriId index: {}", id.index())))?
97 - 1;
98 self.strings
99 .get(idx)
100 .map(AsRef::as_ref)
101 .ok_or_else(|| Error::InvalidIri(format!("unknown IriId: {}", id.index())))
102 }
103
104 pub fn iter(&self) -> impl Iterator<Item = &str> {
106 self.strings.iter().map(AsRef::as_ref)
107 }
108}
109
110const ALLOWED_SCHEMES: &[&str] = &["http", "https", "urn"];
111
112pub fn validate_iri(iri: &str) -> Result<()> {
114 validate_iri_with_max_len(iri, Limits::default().max_iri_len)
115}
116
117pub fn validate_iri_with_max_len(iri: &str, max_len: usize) -> Result<()> {
119 if iri.is_empty() {
120 return Err(Error::InvalidIri("IRI must not be empty".into()));
121 }
122
123 if iri.len() > max_len {
124 return Err(Error::InvalidIri(format!(
125 "IRI exceeds maximum length of {max_len} bytes"
126 )));
127 }
128
129 if iri.chars().any(|c| c.is_ascii_control()) {
130 return Err(Error::InvalidIri(format!(
131 "IRI contains control characters: {iri}"
132 )));
133 }
134
135 if iri.contains(' ') {
136 return Err(Error::InvalidIri(format!("IRI contains whitespace: {iri}")));
137 }
138
139 let Some((scheme, rest)) = iri.split_once(':') else {
140 return Err(Error::InvalidIri(format!(
141 "IRI must be absolute (scheme:...): {iri}"
142 )));
143 };
144
145 if scheme.is_empty() || rest.is_empty() {
146 return Err(Error::InvalidIri(format!("invalid IRI scheme: {iri}")));
147 }
148
149 if !scheme
150 .chars()
151 .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.')
152 {
153 return Err(Error::InvalidIri(format!("invalid IRI scheme: {iri}")));
154 }
155
156 if !ALLOWED_SCHEMES.contains(&scheme) {
157 return Err(Error::InvalidIri(format!(
158 "IRI scheme '{scheme}' is not allowed (allowed: http, https, urn)"
159 )));
160 }
161
162 Ok(())
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 #[test]
170 fn intern_deduplicates() {
171 let mut pool = InternPool::new();
172 let a = pool.intern("http://example.org/A").expect("intern");
173 let b = pool.intern("http://example.org/A").expect("intern");
174 let c = pool.intern("http://example.org/B").expect("intern");
175 assert_eq!(a, b);
176 assert_ne!(a, c);
177 assert_eq!(pool.len(), 2);
178 }
179
180 #[test]
181 fn resolve_round_trip() {
182 let mut pool = InternPool::new();
183 let id = pool.intern("http://example.org/Test").expect("intern");
184 assert_eq!(
185 pool.resolve(id).expect("resolve"),
186 "http://example.org/Test"
187 );
188 }
189
190 #[test]
191 fn rejects_relative_iri() {
192 let mut pool = InternPool::new();
193 assert!(pool.intern("relative/path").is_err());
194 }
195
196 #[test]
197 fn accepts_urn() {
198 let mut pool = InternPool::new();
199 let id = pool.intern("urn:example:animal").expect("intern");
200 assert_eq!(pool.resolve(id).expect("resolve"), "urn:example:animal");
201 }
202
203 #[test]
204 fn rejects_empty_iri() {
205 assert!(validate_iri("").is_err());
206 }
207
208 #[test]
209 fn rejects_whitespace_iri() {
210 assert!(validate_iri("http://example.org/a b").is_err());
211 }
212
213 #[test]
214 fn rejects_javascript_scheme() {
215 assert!(validate_iri("javascript:alert(1)").is_err());
216 }
217
218 #[test]
219 fn rejects_control_characters() {
220 assert!(validate_iri("http://example.org/\u{0009}").is_err());
221 }
222
223 #[test]
224 fn try_from_index_rejects_zero() {
225 assert!(IriId::try_from_index(0).is_err());
226 }
227
228 #[test]
229 fn get_returns_none_for_unknown() {
230 let pool = InternPool::new();
231 assert!(pool.get("http://example.org/missing").is_none());
232 }
233
234 #[test]
235 fn resolve_unknown_id_errors() {
236 let pool = InternPool::new();
237 let err = pool.resolve(IriId::from_index(1)).expect_err("unknown id");
238 assert!(matches!(err, Error::InvalidIri(_)));
239 }
240}