Skip to main content

oxirs_core/format/
turtle_grammar.rs

1//! Turtle Grammar Recognizer
2//!
3//! Implements the W3C Turtle grammar specification as a rule recognizer
4//! that builds RDF triples from N3/Turtle token streams.
5
6#![allow(dead_code)]
7
8use super::error::{ParseResult, RdfParseError, RdfSyntaxError, TextPosition};
9use super::n3_lexer::N3Token;
10use super::toolkit::{Parser, RuleRecognizer};
11use crate::model::{BlankNode, Literal, NamedNode, Object, Predicate, Subject, Triple};
12use std::collections::HashMap;
13
14/// AST node types for Turtle grammar
15#[derive(Debug, Clone, PartialEq)]
16pub enum TurtleNode {
17    Triple(Triple),
18    PrefixDeclaration { prefix: String, iri: String },
19    BaseDeclaration { iri: String },
20    Comment(String),
21}
22
23/// Turtle parser context for prefix management and base IRI resolution
24#[derive(Debug, Clone)]
25pub struct TurtleContext {
26    /// Current base IRI for relative IRI resolution
27    pub base_iri: Option<String>,
28    /// Prefix declarations mapping prefix -> IRI
29    pub prefixes: HashMap<String, String>,
30    /// Auto-generated blank node counter
31    pub blank_node_counter: u64,
32    /// Current position for error reporting
33    pub position: TextPosition,
34}
35
36impl Default for TurtleContext {
37    fn default() -> Self {
38        let mut prefixes = HashMap::new();
39        // Add standard prefixes
40        prefixes.insert(
41            "rdf".to_string(),
42            "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
43        );
44        prefixes.insert(
45            "rdfs".to_string(),
46            "http://www.w3.org/2000/01/rdf-schema#".to_string(),
47        );
48        prefixes.insert(
49            "xsd".to_string(),
50            "http://www.w3.org/2001/XMLSchema#".to_string(),
51        );
52        prefixes.insert(
53            "owl".to_string(),
54            "http://www.w3.org/2002/07/owl#".to_string(),
55        );
56
57        Self {
58            base_iri: None,
59            prefixes,
60            blank_node_counter: 0,
61            position: TextPosition::start(),
62        }
63    }
64}
65
66impl TurtleContext {
67    pub fn new() -> Self {
68        Self::default()
69    }
70
71    /// Resolve a prefixed name to a full IRI
72    pub fn resolve_prefixed_name(&self, prefix: Option<&str>, local: &str) -> ParseResult<String> {
73        match prefix {
74            Some(prefix) => match self.prefixes.get(prefix) {
75                Some(base_iri) => Ok(format!("{base_iri}{local}")),
76                None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
77                    format!("Undefined prefix: {prefix}"),
78                    self.position,
79                ))),
80            },
81            None => {
82                // Default prefix (empty prefix)
83                match self.prefixes.get("") {
84                    Some(base_iri) => Ok(format!("{base_iri}{local}")),
85                    None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
86                        "No default prefix defined".to_string(),
87                        self.position,
88                    ))),
89                }
90            }
91        }
92    }
93
94    /// Resolve a relative IRI against the base IRI
95    pub fn resolve_iri(&self, iri: &str) -> ParseResult<String> {
96        if self.is_absolute_iri(iri) {
97            Ok(iri.to_string())
98        } else {
99            match &self.base_iri {
100                Some(base) => Ok(self.resolve_relative_iri(base, iri)),
101                None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
102                    format!("Relative IRI without base: {iri}"),
103                    self.position,
104                ))),
105            }
106        }
107    }
108
109    /// Generate a new anonymous blank node
110    pub fn generate_blank_node(&mut self) -> BlankNode {
111        self.blank_node_counter += 1;
112        BlankNode::new(format!("_:gen{}", self.blank_node_counter))
113            .expect("generated blank node format is always valid")
114    }
115
116    /// Check if an IRI is absolute (has scheme)
117    fn is_absolute_iri(&self, iri: &str) -> bool {
118        iri.contains(':') && !iri.starts_with(':')
119    }
120
121    /// Resolve relative IRI against base IRI
122    fn resolve_relative_iri(&self, base: &str, relative: &str) -> String {
123        if relative.is_empty() {
124            return base.to_string();
125        }
126
127        // Simple implementation - in production would use proper URI resolution
128        if base.ends_with('/') || base.ends_with('#') {
129            format!("{base}{relative}")
130        } else {
131            format!("{base}/{relative}")
132        }
133    }
134}
135
136/// Turtle grammar recognizer state machine
137#[derive(Debug, Clone, PartialEq)]
138pub enum TurtleGrammarState {
139    /// Expecting statement (triple, directive, or comment)
140    ExpectingStatement,
141    /// Processing prefix declaration
142    PrefixDeclaration { prefix: Option<String> },
143    /// Processing base declaration
144    BaseDeclaration,
145    /// Processing triple with subject
146    TripleWithSubject { subject: Subject },
147    /// Processing predicate-object list
148    PredicateObjectList {
149        subject: Subject,
150        predicates: Vec<(Predicate, Vec<Object>)>,
151    },
152    /// Processing object list for current predicate
153    ObjectList {
154        subject: Subject,
155        predicate: Predicate,
156        objects: Vec<Object>,
157    },
158    /// Processing blank node property list
159    BlankNodePropertyList {
160        properties: Vec<(Predicate, Vec<Object>)>,
161    },
162    /// Processing collection (RDF list)
163    Collection { items: Vec<Object> },
164    /// Error recovery state
165    ErrorRecovery,
166}
167
168/// Turtle grammar recognizer implementation
169#[derive(Debug, Clone)]
170pub struct TurtleGrammarRecognizer {
171    state: TurtleGrammarState,
172}
173
174impl Default for TurtleGrammarRecognizer {
175    fn default() -> Self {
176        Self {
177            state: TurtleGrammarState::ExpectingStatement,
178        }
179    }
180}
181
182impl TurtleGrammarRecognizer {
183    pub fn new() -> Self {
184        Self::default()
185    }
186
187    /// Parse a term (subject, predicate, or object) from a token
188    fn parse_term(&self, token: &N3Token, context: &mut TurtleContext) -> ParseResult<Object> {
189        match token {
190            N3Token::Iri(iri) => {
191                let resolved_iri = context.resolve_iri(iri)?;
192                Ok(Object::NamedNode(
193                    NamedNode::new(resolved_iri)
194                        .map_err(|e| RdfParseError::internal(e.to_string()))?,
195                ))
196            }
197            N3Token::PrefixedName { prefix, local } => {
198                let iri = context.resolve_prefixed_name(prefix.as_deref(), local)?;
199                Ok(Object::NamedNode(
200                    NamedNode::new(iri).map_err(|e| RdfParseError::internal(e.to_string()))?,
201                ))
202            }
203            N3Token::BlankNode(label) => Ok(Object::BlankNode(
204                BlankNode::new(label.clone())
205                    .map_err(|e| RdfParseError::internal(e.to_string()))?,
206            )),
207            N3Token::Literal {
208                value,
209                datatype,
210                language,
211            } => {
212                let literal: Literal = match (datatype, language) {
213                    (Some(dt), None) => {
214                        let dt_iri = context.resolve_iri(dt)?;
215                        Literal::new_typed_literal(
216                            value,
217                            NamedNode::new(dt_iri)
218                                .map_err(|e| RdfParseError::internal(e.to_string()))?,
219                        )
220                    }
221                    (None, Some(lang)) => Literal::new_language_tagged_literal(value, lang)
222                        .map_err(|e| RdfParseError::InvalidLanguageTag(e.to_string()))?,
223                    (None, None) => Literal::new_simple_literal(value),
224                    (Some(_), Some(_)) => {
225                        return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
226                            "Literal cannot have both datatype and language tag".to_string(),
227                            context.position,
228                        )));
229                    }
230                };
231                Ok(Object::Literal(literal))
232            }
233            N3Token::Integer(i) => {
234                let xsd_integer = NamedNode::new("http://www.w3.org/2001/XMLSchema#integer")
235                    .map_err(|e| RdfParseError::internal(e.to_string()))?;
236                Ok(Object::Literal(Literal::new_typed_literal(
237                    i.to_string(),
238                    xsd_integer,
239                )))
240            }
241            N3Token::Decimal(d) => {
242                let xsd_decimal = NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal")
243                    .map_err(|e| RdfParseError::internal(e.to_string()))?;
244                Ok(Object::Literal(Literal::new_typed_literal(
245                    d.to_string(),
246                    xsd_decimal,
247                )))
248            }
249            N3Token::Double(d) => {
250                let xsd_double = NamedNode::new("http://www.w3.org/2001/XMLSchema#double")
251                    .map_err(|e| RdfParseError::internal(e.to_string()))?;
252                Ok(Object::Literal(Literal::new_typed_literal(
253                    d.to_string(),
254                    xsd_double,
255                )))
256            }
257            N3Token::True => {
258                let xsd_boolean = NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
259                    .map_err(|e| RdfParseError::internal(e.to_string()))?;
260                Ok(Object::Literal(Literal::new_typed_literal(
261                    "true",
262                    xsd_boolean,
263                )))
264            }
265            N3Token::False => {
266                let xsd_boolean = NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
267                    .map_err(|e| RdfParseError::internal(e.to_string()))?;
268                Ok(Object::Literal(Literal::new_typed_literal(
269                    "false",
270                    xsd_boolean,
271                )))
272            }
273            N3Token::A => {
274                // 'a' is shorthand for rdf:type
275                let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
276                    .map_err(|e| RdfParseError::internal(e.to_string()))?;
277                Ok(Object::NamedNode(rdf_type))
278            }
279            _ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
280                format!("Unexpected token in term position: {token:?}"),
281                context.position,
282            ))),
283        }
284    }
285
286    /// Parse a subject from a token
287    fn parse_subject(&self, token: &N3Token, context: &mut TurtleContext) -> ParseResult<Subject> {
288        match self.parse_term(token, context)? {
289            Object::NamedNode(n) => Ok(Subject::NamedNode(n)),
290            Object::BlankNode(b) => Ok(Subject::BlankNode(b)),
291            _ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
292                "Invalid subject: must be IRI or blank node".to_string(),
293                context.position,
294            ))),
295        }
296    }
297
298    /// Parse a predicate from a token
299    fn parse_predicate(
300        &self,
301        token: &N3Token,
302        context: &mut TurtleContext,
303    ) -> ParseResult<Predicate> {
304        match self.parse_term(token, context)? {
305            Object::NamedNode(n) => Ok(Predicate::NamedNode(n)),
306            _ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
307                "Invalid predicate: must be IRI".to_string(),
308                context.position,
309            ))),
310        }
311    }
312}
313
314impl RuleRecognizer<TurtleNode> for TurtleGrammarRecognizer {
315    fn recognize_next_node<Token>(
316        &mut self,
317        _parser: &mut Parser<Token>,
318    ) -> ParseResult<Option<TurtleNode>> {
319        // This is a simplified implementation - the full implementation would be much larger
320        // and handle all Turtle grammar rules according to the W3C specification
321
322        // For now, return None to indicate no node recognized
323        // In the complete implementation, this would process tokens according to the current state
324        // and return TurtleNode::Triple, TurtleNode::PrefixDeclaration, etc.
325
326        Ok(None)
327    }
328}
329
330/// High-level Turtle parser combining lexer and grammar recognizer
331pub struct TurtleParser {
332    context: TurtleContext,
333}
334
335impl TurtleParser {
336    pub fn new() -> Self {
337        Self {
338            context: TurtleContext::new(),
339        }
340    }
341
342    /// Parse Turtle from a string into triples
343    pub fn parse_str(&mut self, _input: &str) -> ParseResult<Vec<Triple>> {
344        let results = Vec::new();
345
346        // For now, return empty results - the full implementation would:
347        // 1. Create a lexer with N3Lexer
348        // 2. Create a parser with TurtleGrammarRecognizer
349        // 3. Process all tokens through the grammar recognizer
350        // 4. Convert TurtleNode::Triple results to Triple objects
351
352        Ok(results)
353    }
354
355    /// Parse Turtle from a reader into triples
356    pub fn parse_reader<R: std::io::Read>(&mut self, _reader: R) -> ParseResult<Vec<Triple>> {
357        // Full implementation would use ReaderBuffer from toolkit
358        Ok(Vec::new())
359    }
360
361    /// Set base IRI for relative IRI resolution
362    pub fn set_base_iri(&mut self, base_iri: String) {
363        self.context.base_iri = Some(base_iri);
364    }
365
366    /// Add a prefix declaration
367    pub fn add_prefix(&mut self, prefix: String, iri: String) {
368        self.context.prefixes.insert(prefix, iri);
369    }
370
371    /// Get the current context (for inspection)
372    pub fn context(&self) -> &TurtleContext {
373        &self.context
374    }
375}
376
377impl Default for TurtleParser {
378    fn default() -> Self {
379        Self::new()
380    }
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386
387    #[test]
388    fn test_turtle_context_prefix_resolution() {
389        let context = TurtleContext::new();
390
391        // Test standard prefix resolution
392        let resolved = context
393            .resolve_prefixed_name(Some("rdf"), "type")
394            .expect("prefix resolution should succeed");
395        assert_eq!(resolved, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
396
397        // Test undefined prefix
398        assert!(context
399            .resolve_prefixed_name(Some("undefined"), "test")
400            .is_err());
401    }
402
403    #[test]
404    fn test_turtle_context_iri_resolution() {
405        let mut context = TurtleContext::new();
406        context.base_iri = Some("http://example.org/".to_string());
407
408        // Test absolute IRI (should remain unchanged)
409        let resolved = context
410            .resolve_iri("http://other.org/test")
411            .expect("operation should succeed");
412        assert_eq!(resolved, "http://other.org/test");
413
414        // Test relative IRI resolution
415        let resolved = context
416            .resolve_iri("relative")
417            .expect("operation should succeed");
418        assert_eq!(resolved, "http://example.org/relative");
419
420        // Test relative IRI without base (should error)
421        context.base_iri = None;
422        assert!(context.resolve_iri("relative").is_err());
423    }
424
425    #[test]
426    fn test_blank_node_generation() {
427        let mut context = TurtleContext::new();
428
429        let bn1 = context.generate_blank_node();
430        let bn2 = context.generate_blank_node();
431
432        assert_ne!(bn1, bn2);
433        assert!(bn1.to_string().starts_with("_:gen"));
434        assert!(bn2.to_string().starts_with("_:gen"));
435    }
436
437    #[test]
438    fn test_turtle_parser_creation() {
439        let parser = TurtleParser::new();
440        assert!(parser.context.prefixes.contains_key("rdf"));
441        assert!(parser.context.prefixes.contains_key("xsd"));
442    }
443}