1#[cfg(feature = "rdf-12")]
2use crate::BaseDirection;
3use crate::vocab::xsd;
4use crate::{
5 BlankNode, BlankNodeIdParseError, GraphName, IriParseError, LanguageTagParseError, Literal,
6 NamedNode, Quad, Term, Triple, Variable, VariableNameParseError,
7};
8use std::borrow::Cow;
9use std::char;
10use std::str::{Chars, FromStr};
11
12const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 128;
15
16impl FromStr for NamedNode {
17 type Err = TermParseError;
18
19 fn from_str(s: &str) -> Result<Self, Self::Err> {
32 let (term, left) = read_named_node(s)?;
33 if !left.is_empty() {
34 return Err(Self::Err::msg(
35 "Named node serialization should end with a >",
36 ));
37 }
38 Ok(term)
39 }
40}
41
42impl FromStr for BlankNode {
43 type Err = TermParseError;
44
45 fn from_str(s: &str) -> Result<Self, Self::Err> {
55 let (term, left) = read_blank_node(s)?;
56 if !left.is_empty() {
57 return Err(Self::Err::msg(
58 "Blank node serialization should not contain whitespaces",
59 ));
60 }
61 Ok(term)
62 }
63}
64
65impl FromStr for Literal {
66 type Err = TermParseError;
67
68 fn from_str(s: &str) -> Result<Self, Self::Err> {
109 let (term, left) = read_literal(s)?;
110 if !left.is_empty() {
111 return Err(Self::Err::msg("Invalid literal serialization"));
112 }
113 Ok(term)
114 }
115}
116
117impl FromStr for Term {
118 type Err = TermParseError;
119
120 fn from_str(s: &str) -> Result<Self, Self::Err> {
133 let (term, left) = read_term(s, 0)?;
134 if !left.is_empty() {
135 return Err(Self::Err::msg("Invalid term serialization"));
136 }
137 Ok(term)
138 }
139}
140
141impl FromStr for Triple {
142 type Err = TermParseError;
143
144 fn from_str(s: &str) -> Result<Self, Self::Err> {
161 let (triple, left) = read_triple(s, 0)?;
162 if !matches!(left.trim(), "" | ".") {
163 return Err(Self::Err::msg("Invalid triple serialization"));
164 }
165 Ok(triple)
166 }
167}
168
169impl FromStr for Quad {
170 type Err = TermParseError;
171
172 fn from_str(s: &str) -> Result<Self, Self::Err> {
199 let (triple, left) = read_triple(s, 0)?;
200 if matches!(left.trim(), "" | ".") {
201 return Ok(triple.in_graph(GraphName::DefaultGraph));
202 }
203 let (graph_name, left) = read_term(left, 0)?;
204 if !matches!(left.trim(), "" | ".") {
205 return Err(Self::Err::msg("Invalid triple serialization"));
206 }
207 Ok(triple.in_graph(match graph_name {
208 Term::NamedNode(graph_name) => GraphName::from(graph_name),
209 Term::BlankNode(graph_name) => GraphName::from(graph_name),
210 Term::Literal(_) => {
211 return Err(TermParseError::msg(
212 "Literals are not allowed in graph name position",
213 ));
214 }
215 #[cfg(feature = "rdf-12")]
216 Term::Triple(_) => {
217 return Err(TermParseError::msg(
218 "Triple terms are not allowed in graph name position",
219 ));
220 }
221 }))
222 }
223}
224
225impl FromStr for Variable {
226 type Err = TermParseError;
227
228 fn from_str(s: &str) -> Result<Self, Self::Err> {
238 if !s.starts_with('?') && !s.starts_with('$') {
239 return Err(Self::Err::msg(
240 "Variable serialization should start with ? or $",
241 ));
242 }
243 Self::new(&s[1..]).map_err(|error| {
244 TermParseError(TermParseErrorKind::Variable {
245 value: s.to_owned(),
246 error,
247 })
248 })
249 }
250}
251
252fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
253 let s = s.trim();
254 if let Some(remain) = s.strip_prefix('<') {
255 let end = remain
256 .find('>')
257 .ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
258 let (value, remain) = remain.split_at(end);
259 let remain = &remain[1..];
260 let value = if value.contains('\\') {
261 let mut escaped = String::with_capacity(value.len());
262 let mut chars = value.chars();
263 while let Some(c) = chars.next() {
264 if c == '\\' {
265 match chars.next() {
266 Some('u') => escaped.push(read_hexa_char(&mut chars, 4)?),
267 Some('U') => escaped.push(read_hexa_char(&mut chars, 8)?),
268 Some(c) => {
269 escaped.push('\\');
270 escaped.push(c);
271 }
272 None => escaped.push('\\'),
273 }
274 } else {
275 escaped.push(c);
276 }
277 }
278 Cow::Owned(escaped)
279 } else {
280 Cow::Borrowed(value)
281 };
282 let term = NamedNode::new(value.as_ref()).map_err(|error| {
283 TermParseError(TermParseErrorKind::Iri {
284 value: value.into_owned(),
285 error,
286 })
287 })?;
288 Ok((term, remain))
289 } else {
290 Err(TermParseError::msg(
291 "Named node serialization should start with a <",
292 ))
293 }
294}
295
296fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
297 let s = s.trim();
298 if let Some(remain) = s.strip_prefix("_:") {
299 let mut end = remain
300 .find(|v: char| {
301 v.is_whitespace()
302 || matches!(
303 v,
304 '<' | '?'
305 | '$'
306 | '"'
307 | '\''
308 | '>'
309 | '@'
310 | '^'
311 | ':'
312 | '('
313 | ')'
314 | '{'
315 | '}'
316 | '['
317 | ']'
318 )
319 })
320 .unwrap_or(remain.len());
321 if let Some(pos) = remain[..end].find("..") {
322 end = pos;
323 }
324 if remain[..end].ends_with('.') {
325 end -= 1;
327 }
328 let (value, remain) = remain.split_at(end);
329 let term = BlankNode::new(value).map_err(|error| {
330 TermParseError(TermParseErrorKind::BlankNode {
331 value: value.to_owned(),
332 error,
333 })
334 })?;
335 Ok((term, remain))
336 } else {
337 Err(TermParseError::msg(
338 "Blank node serialization should start with '_:'",
339 ))
340 }
341}
342
343fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
344 let s = s.trim();
345 if let Some(s) = s.strip_prefix('"') {
346 let mut value = String::with_capacity(s.len());
347 let mut chars = s.chars();
348 while let Some(c) = chars.next() {
349 match c {
350 '"' => {
351 let remain = chars.as_str().trim();
352 return if let Some(remain) = remain.strip_prefix('@') {
353 let end = remain
354 .find(|v: char| !v.is_ascii_alphanumeric() && v != '-')
355 .unwrap_or(remain.len());
356 let (language, remain) = remain.split_at(end);
357 #[cfg(feature = "rdf-12")]
358 if let Some((language, direction)) = language.split_once("--") {
359 return Ok((
360 Literal::new_directional_language_tagged_literal(value, language, match direction {
361 "ltr" => BaseDirection::Ltr,
362 "rtl" => BaseDirection::Rtl,
363 _ => return Err(TermParseError(TermParseErrorKind::Msg(format!("The only two possible base directions are 'rtl' and 'ltr', found '{direction}'"))))
364 }).map_err(
365 |error| {
366 TermParseError(TermParseErrorKind::LanguageTag {
367 value: language.to_owned(),
368 error,
369 })
370 },
371 )?,
372 remain,
373 ));
374 }
375 Ok((
376 Literal::new_language_tagged_literal(value, language).map_err(
377 |error| {
378 TermParseError(TermParseErrorKind::LanguageTag {
379 value: language.to_owned(),
380 error,
381 })
382 },
383 )?,
384 remain,
385 ))
386 } else if let Some(remain) = remain.strip_prefix("^^") {
387 let (datatype, remain) = read_named_node(remain)?;
388 Ok((Literal::new_typed_literal(value, datatype), remain))
389 } else {
390 Ok((Literal::new_simple_literal(value), remain))
391 };
392 }
393 '\\' => {
394 if let Some(c) = chars.next() {
395 value.push(match c {
396 't' => '\t',
397 'b' => '\u{08}',
398 'n' => '\n',
399 'r' => '\r',
400 'f' => '\u{0C}',
401 '"' => '"',
402 '\'' => '\'',
403 '\\' => '\\',
404 'u' => read_hexa_char(&mut chars, 4)?,
405 'U' => read_hexa_char(&mut chars, 8)?,
406 _ => return Err(TermParseError::msg("Unexpected escaped char")),
407 })
408 } else {
409 return Err(TermParseError::msg("Unexpected literal end"));
410 }
411 }
412 _ => value.push(c),
413 }
414 }
415 Err(TermParseError::msg("Unexpected literal end"))
416 } else if let Some(remain) = s.strip_prefix("true") {
417 Ok((Literal::new_typed_literal("true", xsd::BOOLEAN), remain))
418 } else if let Some(remain) = s.strip_prefix("false") {
419 Ok((Literal::new_typed_literal("false", xsd::BOOLEAN), remain))
420 } else {
421 let input = s.as_bytes();
422 if input.is_empty() {
423 return Err(TermParseError::msg("Empty term serialization"));
424 }
425
426 let mut cursor = match input.first() {
427 Some(b'+' | b'-') => 1,
428 _ => 0,
429 };
430 let mut with_dot = false;
431
432 let mut count_before: usize = 0;
433 while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
434 count_before += 1;
435 cursor += 1;
436 }
437
438 let mut count_after: usize = 0;
439 if cursor < input.len() && input[cursor] == b'.' {
440 with_dot = true;
441 cursor += 1;
442 while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
443 count_after += 1;
444 cursor += 1;
445 }
446 }
447
448 if cursor < input.len() && (input[cursor] == b'e' || input[cursor] == b'E') {
449 cursor += 1;
450 cursor += match input.get(cursor) {
451 Some(b'+' | b'-') => 1,
452 _ => 0,
453 };
454 let mut count_exponent = 0;
455 while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
456 count_exponent += 1;
457 cursor += 1;
458 }
459 if count_exponent > 0 {
460 Ok((Literal::new_typed_literal(s, xsd::DOUBLE), &s[cursor..]))
461 } else {
462 Err(TermParseError::msg(
463 "Double serialization with an invalid exponent",
464 ))
465 }
466 } else if with_dot {
467 if count_after > 0 {
468 Ok((Literal::new_typed_literal(s, xsd::DECIMAL), &s[cursor..]))
469 } else {
470 Err(TermParseError::msg(
471 "Decimal serialization without floating part",
472 ))
473 }
474 } else if count_before > 0 {
475 Ok((Literal::new_typed_literal(s, xsd::INTEGER), &s[cursor..]))
476 } else {
477 Err(TermParseError::msg("Empty integer serialization"))
478 }
479 }
480}
481
482fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> {
483 if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
484 return Err(TermParseError::msg(
485 "Too many nested triples. The parser fails here to avoid a stack overflow.",
486 ));
487 }
488 let s = s.trim();
489 #[allow(unused_variables, clippy::allow_attributes)]
490 if let Some(remain) = s.strip_prefix("<<(") {
491 #[cfg(feature = "rdf-12")]
492 {
493 let (triple, remain) = read_triple(remain, number_of_recursive_calls + 1)?;
494 let remain = remain.trim_start();
495 if let Some(remain) = remain.strip_prefix(")>>") {
496 Ok((triple.into(), remain))
497 } else {
498 Err(TermParseError::msg(
499 "Triple term serialization must be enclosed between <<( and )>>",
500 ))
501 }
502 }
503 #[cfg(not(feature = "rdf-12"))]
504 {
505 Err(TermParseError::msg("RDF 1.2 is not supported"))
506 }
507 } else if s.starts_with('<') {
508 let (term, remain) = read_named_node(s)?;
509 Ok((term.into(), remain))
510 } else if s.starts_with('_') {
511 let (term, remain) = read_blank_node(s)?;
512 Ok((term.into(), remain))
513 } else {
514 let (term, remain) = read_literal(s)?;
515 Ok((term.into(), remain))
516 }
517}
518
519fn read_triple(
520 s: &str,
521 number_of_recursive_calls: usize,
522) -> Result<(Triple, &str), TermParseError> {
523 let s = s.trim();
524 let (subject, remain) = read_term(s, number_of_recursive_calls + 1)?;
525 let (predicate, remain) = read_named_node(remain)?;
526 let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?;
527 Ok((
528 Triple {
529 subject: match subject {
530 Term::NamedNode(s) => s.into(),
531 Term::BlankNode(s) => s.into(),
532 Term::Literal(_) => {
533 return Err(TermParseError::msg(
534 "Literals are not allowed in subject position",
535 ));
536 }
537 #[cfg(feature = "rdf-12")]
538 Term::Triple(_) => {
539 return Err(TermParseError::msg(
540 "Triple terms are not allowed in subject position",
541 ));
542 }
543 },
544 predicate,
545 object,
546 },
547 remain,
548 ))
549}
550
551fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseError> {
552 let mut value = 0;
553 for _ in 0..len {
554 if let Some(c) = input.next() {
555 value = value * 16
556 + match c {
557 '0'..='9' => u32::from(c) - u32::from('0'),
558 'a'..='f' => u32::from(c) - u32::from('a') + 10,
559 'A'..='F' => u32::from(c) - u32::from('A') + 10,
560 _ => {
561 return Err(TermParseError::msg(format!(
562 "Unexpected character in a unicode escape: {c}"
563 )));
564 }
565 }
566 } else {
567 return Err(TermParseError::msg("Unexpected literal string end"));
568 }
569 }
570 char::from_u32(value).ok_or_else(|| TermParseError::msg("Invalid encoded unicode code point"))
571}
572
573#[derive(Debug, thiserror::Error)]
575#[error(transparent)]
576pub struct TermParseError(#[from] TermParseErrorKind);
577
578#[derive(Debug, thiserror::Error)]
580enum TermParseErrorKind {
581 #[error("Error while parsing the named node '{value}': {error}")]
582 Iri { error: IriParseError, value: String },
583 #[error("Error while parsing the blank node '{value}': {error}")]
584 BlankNode {
585 error: BlankNodeIdParseError,
586 value: String,
587 },
588 #[error("Error while parsing the language tag '{value}': {error}")]
589 LanguageTag {
590 error: LanguageTagParseError,
591 value: String,
592 },
593 #[error("Error while parsing the variable '{value}': {error}")]
594 Variable {
595 error: VariableNameParseError,
596 value: String,
597 },
598 #[error("{0}")]
599 Msg(String),
600}
601
602impl TermParseError {
603 pub(crate) fn msg(msg: impl Into<String>) -> Self {
604 Self(TermParseErrorKind::Msg(msg.into()))
605 }
606}
607
608#[cfg(test)]
609#[cfg(feature = "rdf-12")]
610mod tests {
611 use super::*;
612
613 #[test]
614 fn triple_term_parsing() {
615 assert_eq!(
616 Term::from_str("\"ex\\u00E9\\U000000E9\"").unwrap(),
617 Literal::new_simple_literal("ex\u{e9}\u{e9}").into()
618 );
619 assert_eq!(
620 Term::from_str("<http://example.com/\\u00E9\\U000000E9>").unwrap(),
621 NamedNode::new_unchecked("http://example.com/\u{e9}\u{e9}").into()
622 );
623 assert_eq!(
624 Term::from_str("<<( _:s <http://example.com/p> \"o\" )>>").unwrap(),
625 Triple::new(
626 BlankNode::new("s").unwrap(),
627 NamedNode::new("http://example.com/p").unwrap(),
628 Literal::new_simple_literal("o"),
629 )
630 .into()
631 );
632 }
633}