rdf_types/
blankid.rs

1use std::borrow::{Borrow, ToOwned};
2use std::fmt;
3use std::ops::Deref;
4use std::str::FromStr;
5
6/// Invalid blank node identifier.
7///
8/// This error is raised by the [`BlankId::new`] and [`BlankIdBuf::new`] functions
9/// when the input string is not a valid blank node identifier.
10#[derive(Debug)]
11pub struct InvalidBlankId<T>(pub T);
12
13/// Blank node identifier.
14///
15/// A blank node identifier is a string matching
16/// the `BLANK_NODE_LABEL` production in the following [EBNF](http://www.w3.org/TR/REC-xml/#sec-notation) grammar:
17///
18/// ```ebnf
19/// [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
20/// [157s] PN_CHARS_BASE    ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
21/// [158s] PN_CHARS_U       ::= PN_CHARS_BASE | '_' | ':'
22/// [160s] PN_CHARS         ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
23/// ```
24#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
25pub struct BlankId(str);
26
27impl BlankId {
28	/// Parses a blank node identifier.
29	#[inline(always)]
30	pub fn new(s: &str) -> Result<&Self, InvalidBlankId<&str>> {
31		if check(s.chars()) {
32			Ok(unsafe { Self::new_unchecked(s) })
33		} else {
34			Err(InvalidBlankId(s))
35		}
36	}
37
38	/// Creates a new blank node identifier from `s` without checking it.
39	///
40	/// # Safety
41	///
42	/// The input string `s` must be a valid blank node identifier.
43	#[inline(always)]
44	pub unsafe fn new_unchecked(s: &str) -> &Self {
45		std::mem::transmute(s)
46	}
47
48	/// Returns a reference to the underlying string defining the blank node identifier.
49	#[inline(always)]
50	pub fn as_str(&self) -> &str {
51		&self.0
52	}
53
54	/// Returns the suffix part (after `_:`) of the blank node identifier.
55	#[inline(always)]
56	pub fn suffix(&self) -> &str {
57		&self.0[2..]
58	}
59}
60
61impl Deref for BlankId {
62	type Target = str;
63
64	#[inline(always)]
65	fn deref(&self) -> &str {
66		self.as_str()
67	}
68}
69
70impl AsRef<str> for BlankId {
71	fn as_ref(&self) -> &str {
72		self.as_str()
73	}
74}
75
76impl AsRef<[u8]> for BlankId {
77	fn as_ref(&self) -> &[u8] {
78		self.as_bytes()
79	}
80}
81
82impl Borrow<str> for BlankId {
83	fn borrow(&self) -> &str {
84		self.as_str()
85	}
86}
87
88impl ToOwned for BlankId {
89	type Owned = BlankIdBuf;
90
91	#[inline(always)]
92	fn to_owned(&self) -> BlankIdBuf {
93		unsafe { BlankIdBuf::new_unchecked(self.as_str().to_owned()) }
94	}
95}
96
97impl fmt::Display for BlankId {
98	#[inline(always)]
99	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100		self.0.fmt(f)
101	}
102}
103
104impl fmt::Debug for BlankId {
105	#[inline(always)]
106	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
107		self.0.fmt(f)
108	}
109}
110
111impl PartialEq<str> for BlankId {
112	#[inline(always)]
113	fn eq(&self, other: &str) -> bool {
114		self.0 == *other
115	}
116}
117
118/// Owned blank node identifier.
119///
120/// A blank node identifier is a string matching
121/// the `BLANK_NODE_LABEL` production in the following [EBNF](http://www.w3.org/TR/REC-xml/#sec-notation) grammar:
122///
123/// ```ebnf
124/// [141s] BLANK_NODE_LABEL ::= '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
125/// [157s] PN_CHARS_BASE    ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
126/// [158s] PN_CHARS_U       ::= PN_CHARS_BASE | '_' | ':'
127/// [160s] PN_CHARS         ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
128/// ```
129#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
130#[cfg_attr(feature = "serde", derive(serde::Serialize), serde(transparent))]
131pub struct BlankIdBuf(String);
132
133impl BlankIdBuf {
134	/// Parses a blank node identifier.
135	#[inline(always)]
136	pub fn new(s: String) -> Result<Self, InvalidBlankId<String>> {
137		if check(s.chars()) {
138			Ok(unsafe { Self::new_unchecked(s) })
139		} else {
140			Err(InvalidBlankId(s))
141		}
142	}
143
144	/// Creates a new blank node identifier from `s` without checking it.
145	///
146	/// # Safety
147	///
148	/// The input string `s` must be a valid blank node identifier.
149	#[inline(always)]
150	pub unsafe fn new_unchecked(s: String) -> Self {
151		std::mem::transmute(s)
152	}
153
154	/// Creates a blank node identifier using the given `u8` as suffix.
155	#[inline(always)]
156	pub fn from_u8(i: u8) -> Self {
157		unsafe { Self::new_unchecked(format!("_:{i}")) }
158	}
159
160	/// Creates a blank node identifier using the given `u16` as suffix.
161	#[inline(always)]
162	pub fn from_u16(i: u16) -> Self {
163		unsafe { Self::new_unchecked(format!("_:{i}")) }
164	}
165
166	/// Creates a blank node identifier using the given `u32` as suffix.
167	#[inline(always)]
168	pub fn from_u32(i: u32) -> Self {
169		unsafe { Self::new_unchecked(format!("_:{i}")) }
170	}
171
172	/// Creates a blank node identifier using the given `u64` as suffix.
173	#[inline(always)]
174	pub fn from_u64(i: u64) -> Self {
175		unsafe { Self::new_unchecked(format!("_:{i}")) }
176	}
177
178	/// Creates a blank node identifier using the given suffix.
179	#[inline(always)]
180	pub fn from_suffix(suffix: &str) -> Result<Self, InvalidBlankId<String>> {
181		Self::new(format!("_:{suffix}"))
182	}
183
184	/// Returns a reference to this blank id as a `BlankId`.
185	#[inline(always)]
186	pub fn as_blank_id_ref(&self) -> &BlankId {
187		unsafe { BlankId::new_unchecked(&self.0) }
188	}
189}
190
191impl FromStr for BlankIdBuf {
192	type Err = InvalidBlankId<String>;
193
194	fn from_str(s: &str) -> Result<Self, InvalidBlankId<String>> {
195		Self::new(s.to_owned())
196	}
197}
198
199impl Deref for BlankIdBuf {
200	type Target = BlankId;
201
202	#[inline(always)]
203	fn deref(&self) -> &BlankId {
204		self.as_blank_id_ref()
205	}
206}
207
208impl AsRef<BlankId> for BlankIdBuf {
209	#[inline(always)]
210	fn as_ref(&self) -> &BlankId {
211		self.as_blank_id_ref()
212	}
213}
214
215impl Borrow<BlankId> for BlankIdBuf {
216	#[inline(always)]
217	fn borrow(&self) -> &BlankId {
218		self.as_blank_id_ref()
219	}
220}
221
222impl AsRef<str> for BlankIdBuf {
223	#[inline(always)]
224	fn as_ref(&self) -> &str {
225		self.0.as_str()
226	}
227}
228
229impl Borrow<str> for BlankIdBuf {
230	#[inline(always)]
231	fn borrow(&self) -> &str {
232		self.0.as_str()
233	}
234}
235
236impl AsRef<[u8]> for BlankIdBuf {
237	#[inline(always)]
238	fn as_ref(&self) -> &[u8] {
239		self.0.as_bytes()
240	}
241}
242
243impl Borrow<BlankId> for &BlankIdBuf {
244	#[inline(always)]
245	fn borrow(&self) -> &BlankId {
246		self.as_blank_id_ref()
247	}
248}
249
250impl Borrow<str> for &BlankIdBuf {
251	#[inline(always)]
252	fn borrow(&self) -> &str {
253		self.0.as_str()
254	}
255}
256
257impl<'a> From<&'a BlankIdBuf> for &'a BlankId {
258	#[inline(always)]
259	fn from(b: &'a BlankIdBuf) -> Self {
260		b.as_ref()
261	}
262}
263
264impl fmt::Display for BlankIdBuf {
265	#[inline(always)]
266	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
267		self.0.fmt(f)
268	}
269}
270
271impl fmt::Debug for BlankIdBuf {
272	#[inline(always)]
273	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
274		self.0.fmt(f)
275	}
276}
277
278impl PartialEq<BlankId> for BlankIdBuf {
279	fn eq(&self, other: &BlankId) -> bool {
280		self.as_blank_id_ref() == other
281	}
282}
283
284impl<'a> PartialEq<&'a BlankId> for BlankIdBuf {
285	fn eq(&self, other: &&'a BlankId) -> bool {
286		self.as_blank_id_ref() == *other
287	}
288}
289
290impl PartialEq<BlankIdBuf> for &BlankId {
291	fn eq(&self, other: &BlankIdBuf) -> bool {
292		*self == other.as_blank_id_ref()
293	}
294}
295
296impl PartialEq<BlankIdBuf> for BlankId {
297	fn eq(&self, other: &BlankIdBuf) -> bool {
298		self == other.as_blank_id_ref()
299	}
300}
301
302fn check<C: Iterator<Item = char>>(mut chars: C) -> bool {
303	match chars.next() {
304		Some('_') => match chars.next() {
305			Some(':') => match chars.next() {
306				Some(c) if c.is_ascii_digit() || is_pn_char_u(c) => {
307					for c in chars {
308						if !is_pn_char(c) {
309							return false;
310						}
311					}
312
313					true
314				}
315				_ => false,
316			},
317			_ => false,
318		},
319		_ => false,
320	}
321}
322
323fn is_pn_char_base(c: char) -> bool {
324	matches!(c, 'A'..='Z' | 'a'..='z' | '\u{00c0}'..='\u{00d6}' | '\u{00d8}'..='\u{00f6}' | '\u{00f8}'..='\u{02ff}' | '\u{0370}'..='\u{037d}' | '\u{037f}'..='\u{1fff}' | '\u{200c}'..='\u{200d}' | '\u{2070}'..='\u{218f}' | '\u{2c00}'..='\u{2fef}' | '\u{3001}'..='\u{d7ff}' | '\u{f900}'..='\u{fdcf}' | '\u{fdf0}'..='\u{fffd}' | '\u{10000}'..='\u{effff}')
325}
326
327fn is_pn_char_u(c: char) -> bool {
328	is_pn_char_base(c) || matches!(c, '_' | ':')
329}
330
331fn is_pn_char(c: char) -> bool {
332	is_pn_char_u(c)
333		|| matches!(c, '-' | '0'..='9' | '\u{00b7}' | '\u{0300}'..='\u{036f}' | '\u{203f}'..='\u{2040}')
334}
335
336#[cfg(feature = "serde")]
337impl<'de> serde::Deserialize<'de> for BlankIdBuf {
338	fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
339	where
340		D: serde::Deserializer<'de>,
341	{
342		struct Visitor;
343
344		impl<'de> serde::de::Visitor<'de> for Visitor {
345			type Value = BlankIdBuf;
346
347			fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
348				write!(formatter, "blank node identifier")
349			}
350
351			fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
352			where
353				E: serde::de::Error,
354			{
355				BlankIdBuf::new(v).map_err(|InvalidBlankId(unexpected)| {
356					E::invalid_value(serde::de::Unexpected::Str(&unexpected), &self)
357				})
358			}
359
360			fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
361			where
362				E: serde::de::Error,
363			{
364				self.visit_string(v.to_owned())
365			}
366
367			fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
368			where
369				E: serde::de::Error,
370			{
371				self.visit_string(v.to_owned())
372			}
373		}
374
375		deserializer.deserialize_string(Visitor)
376	}
377}