Skip to main content

ntriples_parser/
lib.rs

1mod lexer;
2
3use std::ops::Range;
4
5use lexer::Token;
6use logos::Logos;
7
8pub struct Triple<'a>(pub &'a [u8], pub &'a [u8], pub &'a [u8]);
9
10pub struct ParseError {
11    pub span: Range<usize>,
12    pub message: String,
13}
14
15pub fn parse<'a>(input: &'a [u8]) -> Result<(Vec<Triple<'a>>, Vec<ParseError>), ()> {
16    let mut tokens = Token::lexer(input);
17    let mut triples = Vec::new();
18    let mut counter: u8 = 0;
19    let mut subject: &[u8] = &[];
20    let mut predicate: &[u8] = &[];
21    let mut errors: Vec<ParseError> = Vec::new();
22    while let Some(token) = tokens.next() {
23        let token = token?;
24        // NOTE: skip comments without advancing the state counter
25        if matches!(token, Token::Comment) {
26            continue;
27        }
28        match (counter, token) {
29            (0, Token::Iri(bytes) | Token::BlankNode(bytes)) => {
30                subject = bytes;
31            }
32            (0, Token::Literal(bytes)) => {
33                errors.push(ParseError {
34                    span: tokens.span(),
35                    message: "Expected Iri or BlankNode, found Literal".to_string(),
36                });
37                subject = bytes;
38            }
39            (1, Token::Iri(bytes)) => {
40                predicate = bytes;
41            }
42            (1, Token::Literal(bytes)) => {
43                errors.push(ParseError {
44                    span: tokens.span(),
45                    message: "Expected Iri or BlankNode, found Literal".to_string(),
46                });
47                predicate = bytes;
48            }
49            (1, Token::BlankNode(bytes)) => {
50                errors.push(ParseError {
51                    span: tokens.span(),
52                    message: "Expected Iri or BlankNode, found BlankNode".to_string(),
53                });
54                predicate = bytes;
55            }
56            (2, Token::Literal(bytes) | Token::Iri(bytes) | Token::BlankNode(bytes)) => {
57                triples.push(Triple(subject, predicate, bytes))
58            }
59            (3, Token::Dot) => {}
60            _ => return Err(()),
61        }
62        counter = (counter + 1) % 4;
63    }
64    if counter != 0 {
65        return Err(());
66    }
67    Ok((triples, errors))
68}
69
70#[cfg(test)]
71mod test {
72    use super::parse;
73
74    #[test]
75    fn parse_comment_line() {
76        let input = b"# this is a comment\n<s> <p> <o> .";
77        let triples = parse(input).unwrap();
78        assert_eq!(triples.0.len(), 1);
79        assert_eq!(triples.0[0].0, b"<s>");
80    }
81
82    #[test]
83    fn parse_invalid_input_returns_err() {
84        let input = b"not valid ntriples!";
85        assert!(parse(input).is_err());
86    }
87
88    #[test]
89    fn parse_incomplete_triple_returns_err() {
90        let input = b"<s> <p>";
91        assert!(parse(input).is_err());
92    }
93
94    #[test]
95    fn parse_blank_node_as_object() {
96        let input = b"_:a <p> _:b .";
97        let triples = parse(input).unwrap();
98        assert_eq!(triples.0.len(), 1);
99        assert_eq!(triples.0[0].2, b"_:b");
100    }
101
102    #[test]
103    fn literal_triple() {
104        let input = b"\"a\" \"a\" \"a\" .\n";
105        let result = parse(input).unwrap();
106        assert_eq!(result.0.len(), 1);
107        assert_eq!(result.1.len(), 2);
108    }
109}