Skip to main content

ntriples_parser/
lib.rs

1mod lexer;
2
3use lexer::Token;
4use logos::Logos;
5
6pub struct Triple<'a>(pub &'a [u8], pub &'a [u8], pub &'a [u8]);
7
8pub fn parse<'a>(input: &'a [u8]) -> Result<Vec<Triple<'a>>, ()> {
9    let tokens = Token::lexer(input);
10    let mut triples = Vec::new();
11    let mut counter: u8 = 0;
12    let mut subject: &[u8] = &[];
13    let mut predicate: &[u8] = &[];
14    for token in tokens {
15        let token = token?;
16        // NOTE: skip comments without advancing the state counter
17        if matches!(token, Token::Comment) {
18            continue;
19        }
20        match (counter, token) {
21            (0, Token::Iri(bytes) | Token::BlankNode(bytes)) => {
22                subject = bytes;
23            }
24            (1, Token::Iri(bytes)) => {
25                predicate = bytes;
26            }
27            (2, Token::Literal(bytes) | Token::Iri(bytes) | Token::BlankNode(bytes)) => {
28                triples.push(Triple(subject, predicate, bytes))
29            }
30            (3, Token::Dot) => {}
31            _ => return Err(()),
32        }
33        counter = (counter + 1) % 4;
34    }
35    if counter != 0 {
36        return Err(());
37    }
38    Ok(triples)
39}
40
41#[cfg(test)]
42mod test {
43    use super::parse;
44
45    #[test]
46    fn parse_comment_line() {
47        let input = b"# this is a comment\n<s> <p> <o> .";
48        let triples = parse(input).unwrap();
49        assert_eq!(triples.len(), 1);
50        assert_eq!(triples[0].0, b"<s>");
51    }
52
53    #[test]
54    fn parse_invalid_input_returns_err() {
55        let input = b"not valid ntriples!";
56        assert!(parse(input).is_err());
57    }
58
59    #[test]
60    fn parse_incomplete_triple_returns_err() {
61        let input = b"<s> <p>";
62        assert!(parse(input).is_err());
63    }
64
65    #[test]
66    fn parse_blank_node_as_object() {
67        let input = b"_:a <p> _:b .";
68        let triples = parse(input).unwrap();
69        assert_eq!(triples.len(), 1);
70        assert_eq!(triples[0].2, b"_:b");
71    }
72}