1#![allow(dead_code)]
7
8use super::error::{ParseResult, RdfParseError, RdfSyntaxError, TextPosition};
9use super::n3_lexer::N3Token;
10use super::toolkit::{Parser, RuleRecognizer};
11use crate::model::{BlankNode, Literal, NamedNode, Object, Predicate, Subject, Triple};
12use std::collections::HashMap;
13
14#[derive(Debug, Clone, PartialEq)]
16pub enum TurtleNode {
17 Triple(Triple),
18 PrefixDeclaration { prefix: String, iri: String },
19 BaseDeclaration { iri: String },
20 Comment(String),
21}
22
23#[derive(Debug, Clone)]
25pub struct TurtleContext {
26 pub base_iri: Option<String>,
28 pub prefixes: HashMap<String, String>,
30 pub blank_node_counter: u64,
32 pub position: TextPosition,
34}
35
36impl Default for TurtleContext {
37 fn default() -> Self {
38 let mut prefixes = HashMap::new();
39 prefixes.insert(
41 "rdf".to_string(),
42 "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
43 );
44 prefixes.insert(
45 "rdfs".to_string(),
46 "http://www.w3.org/2000/01/rdf-schema#".to_string(),
47 );
48 prefixes.insert(
49 "xsd".to_string(),
50 "http://www.w3.org/2001/XMLSchema#".to_string(),
51 );
52 prefixes.insert(
53 "owl".to_string(),
54 "http://www.w3.org/2002/07/owl#".to_string(),
55 );
56
57 Self {
58 base_iri: None,
59 prefixes,
60 blank_node_counter: 0,
61 position: TextPosition::start(),
62 }
63 }
64}
65
66impl TurtleContext {
67 pub fn new() -> Self {
68 Self::default()
69 }
70
71 pub fn resolve_prefixed_name(&self, prefix: Option<&str>, local: &str) -> ParseResult<String> {
73 match prefix {
74 Some(prefix) => match self.prefixes.get(prefix) {
75 Some(base_iri) => Ok(format!("{base_iri}{local}")),
76 None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
77 format!("Undefined prefix: {prefix}"),
78 self.position,
79 ))),
80 },
81 None => {
82 match self.prefixes.get("") {
84 Some(base_iri) => Ok(format!("{base_iri}{local}")),
85 None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
86 "No default prefix defined".to_string(),
87 self.position,
88 ))),
89 }
90 }
91 }
92 }
93
94 pub fn resolve_iri(&self, iri: &str) -> ParseResult<String> {
96 if self.is_absolute_iri(iri) {
97 Ok(iri.to_string())
98 } else {
99 match &self.base_iri {
100 Some(base) => Ok(self.resolve_relative_iri(base, iri)),
101 None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
102 format!("Relative IRI without base: {iri}"),
103 self.position,
104 ))),
105 }
106 }
107 }
108
109 pub fn generate_blank_node(&mut self) -> BlankNode {
111 self.blank_node_counter += 1;
112 BlankNode::new(format!("_:gen{}", self.blank_node_counter))
113 .expect("generated blank node format is always valid")
114 }
115
116 fn is_absolute_iri(&self, iri: &str) -> bool {
118 iri.contains(':') && !iri.starts_with(':')
119 }
120
121 fn resolve_relative_iri(&self, base: &str, relative: &str) -> String {
123 if relative.is_empty() {
124 return base.to_string();
125 }
126
127 if base.ends_with('/') || base.ends_with('#') {
129 format!("{base}{relative}")
130 } else {
131 format!("{base}/{relative}")
132 }
133 }
134}
135
136#[derive(Debug, Clone, PartialEq)]
138pub enum TurtleGrammarState {
139 ExpectingStatement,
141 PrefixDeclaration { prefix: Option<String> },
143 BaseDeclaration,
145 TripleWithSubject { subject: Subject },
147 PredicateObjectList {
149 subject: Subject,
150 predicates: Vec<(Predicate, Vec<Object>)>,
151 },
152 ObjectList {
154 subject: Subject,
155 predicate: Predicate,
156 objects: Vec<Object>,
157 },
158 BlankNodePropertyList {
160 properties: Vec<(Predicate, Vec<Object>)>,
161 },
162 Collection { items: Vec<Object> },
164 ErrorRecovery,
166}
167
168#[derive(Debug, Clone)]
170pub struct TurtleGrammarRecognizer {
171 state: TurtleGrammarState,
172}
173
174impl Default for TurtleGrammarRecognizer {
175 fn default() -> Self {
176 Self {
177 state: TurtleGrammarState::ExpectingStatement,
178 }
179 }
180}
181
182impl TurtleGrammarRecognizer {
183 pub fn new() -> Self {
184 Self::default()
185 }
186
187 fn parse_term(&self, token: &N3Token, context: &mut TurtleContext) -> ParseResult<Object> {
189 match token {
190 N3Token::Iri(iri) => {
191 let resolved_iri = context.resolve_iri(iri)?;
192 Ok(Object::NamedNode(
193 NamedNode::new(resolved_iri)
194 .map_err(|e| RdfParseError::internal(e.to_string()))?,
195 ))
196 }
197 N3Token::PrefixedName { prefix, local } => {
198 let iri = context.resolve_prefixed_name(prefix.as_deref(), local)?;
199 Ok(Object::NamedNode(
200 NamedNode::new(iri).map_err(|e| RdfParseError::internal(e.to_string()))?,
201 ))
202 }
203 N3Token::BlankNode(label) => Ok(Object::BlankNode(
204 BlankNode::new(label.clone())
205 .map_err(|e| RdfParseError::internal(e.to_string()))?,
206 )),
207 N3Token::Literal {
208 value,
209 datatype,
210 language,
211 } => {
212 let literal: Literal = match (datatype, language) {
213 (Some(dt), None) => {
214 let dt_iri = context.resolve_iri(dt)?;
215 Literal::new_typed_literal(
216 value,
217 NamedNode::new(dt_iri)
218 .map_err(|e| RdfParseError::internal(e.to_string()))?,
219 )
220 }
221 (None, Some(lang)) => Literal::new_language_tagged_literal(value, lang)
222 .map_err(|e| RdfParseError::InvalidLanguageTag(e.to_string()))?,
223 (None, None) => Literal::new_simple_literal(value),
224 (Some(_), Some(_)) => {
225 return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
226 "Literal cannot have both datatype and language tag".to_string(),
227 context.position,
228 )));
229 }
230 };
231 Ok(Object::Literal(literal))
232 }
233 N3Token::Integer(i) => {
234 let xsd_integer = NamedNode::new("http://www.w3.org/2001/XMLSchema#integer")
235 .map_err(|e| RdfParseError::internal(e.to_string()))?;
236 Ok(Object::Literal(Literal::new_typed_literal(
237 i.to_string(),
238 xsd_integer,
239 )))
240 }
241 N3Token::Decimal(d) => {
242 let xsd_decimal = NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal")
243 .map_err(|e| RdfParseError::internal(e.to_string()))?;
244 Ok(Object::Literal(Literal::new_typed_literal(
245 d.to_string(),
246 xsd_decimal,
247 )))
248 }
249 N3Token::Double(d) => {
250 let xsd_double = NamedNode::new("http://www.w3.org/2001/XMLSchema#double")
251 .map_err(|e| RdfParseError::internal(e.to_string()))?;
252 Ok(Object::Literal(Literal::new_typed_literal(
253 d.to_string(),
254 xsd_double,
255 )))
256 }
257 N3Token::True => {
258 let xsd_boolean = NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
259 .map_err(|e| RdfParseError::internal(e.to_string()))?;
260 Ok(Object::Literal(Literal::new_typed_literal(
261 "true",
262 xsd_boolean,
263 )))
264 }
265 N3Token::False => {
266 let xsd_boolean = NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
267 .map_err(|e| RdfParseError::internal(e.to_string()))?;
268 Ok(Object::Literal(Literal::new_typed_literal(
269 "false",
270 xsd_boolean,
271 )))
272 }
273 N3Token::A => {
274 let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
276 .map_err(|e| RdfParseError::internal(e.to_string()))?;
277 Ok(Object::NamedNode(rdf_type))
278 }
279 _ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
280 format!("Unexpected token in term position: {token:?}"),
281 context.position,
282 ))),
283 }
284 }
285
286 fn parse_subject(&self, token: &N3Token, context: &mut TurtleContext) -> ParseResult<Subject> {
288 match self.parse_term(token, context)? {
289 Object::NamedNode(n) => Ok(Subject::NamedNode(n)),
290 Object::BlankNode(b) => Ok(Subject::BlankNode(b)),
291 _ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
292 "Invalid subject: must be IRI or blank node".to_string(),
293 context.position,
294 ))),
295 }
296 }
297
298 fn parse_predicate(
300 &self,
301 token: &N3Token,
302 context: &mut TurtleContext,
303 ) -> ParseResult<Predicate> {
304 match self.parse_term(token, context)? {
305 Object::NamedNode(n) => Ok(Predicate::NamedNode(n)),
306 _ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
307 "Invalid predicate: must be IRI".to_string(),
308 context.position,
309 ))),
310 }
311 }
312}
313
314impl RuleRecognizer<TurtleNode> for TurtleGrammarRecognizer {
315 fn recognize_next_node<Token>(
316 &mut self,
317 _parser: &mut Parser<Token>,
318 ) -> ParseResult<Option<TurtleNode>> {
319 Ok(None)
327 }
328}
329
330pub struct TurtleParser {
332 context: TurtleContext,
333}
334
335impl TurtleParser {
336 pub fn new() -> Self {
337 Self {
338 context: TurtleContext::new(),
339 }
340 }
341
342 pub fn parse_str(&mut self, _input: &str) -> ParseResult<Vec<Triple>> {
344 let results = Vec::new();
345
346 Ok(results)
353 }
354
355 pub fn parse_reader<R: std::io::Read>(&mut self, _reader: R) -> ParseResult<Vec<Triple>> {
357 Ok(Vec::new())
359 }
360
361 pub fn set_base_iri(&mut self, base_iri: String) {
363 self.context.base_iri = Some(base_iri);
364 }
365
366 pub fn add_prefix(&mut self, prefix: String, iri: String) {
368 self.context.prefixes.insert(prefix, iri);
369 }
370
371 pub fn context(&self) -> &TurtleContext {
373 &self.context
374 }
375}
376
377impl Default for TurtleParser {
378 fn default() -> Self {
379 Self::new()
380 }
381}
382
383#[cfg(test)]
384mod tests {
385 use super::*;
386
387 #[test]
388 fn test_turtle_context_prefix_resolution() {
389 let context = TurtleContext::new();
390
391 let resolved = context
393 .resolve_prefixed_name(Some("rdf"), "type")
394 .expect("prefix resolution should succeed");
395 assert_eq!(resolved, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
396
397 assert!(context
399 .resolve_prefixed_name(Some("undefined"), "test")
400 .is_err());
401 }
402
403 #[test]
404 fn test_turtle_context_iri_resolution() {
405 let mut context = TurtleContext::new();
406 context.base_iri = Some("http://example.org/".to_string());
407
408 let resolved = context
410 .resolve_iri("http://other.org/test")
411 .expect("operation should succeed");
412 assert_eq!(resolved, "http://other.org/test");
413
414 let resolved = context
416 .resolve_iri("relative")
417 .expect("operation should succeed");
418 assert_eq!(resolved, "http://example.org/relative");
419
420 context.base_iri = None;
422 assert!(context.resolve_iri("relative").is_err());
423 }
424
425 #[test]
426 fn test_blank_node_generation() {
427 let mut context = TurtleContext::new();
428
429 let bn1 = context.generate_blank_node();
430 let bn2 = context.generate_blank_node();
431
432 assert_ne!(bn1, bn2);
433 assert!(bn1.to_string().starts_with("_:gen"));
434 assert!(bn2.to_string().starts_with("_:gen"));
435 }
436
437 #[test]
438 fn test_turtle_parser_creation() {
439 let parser = TurtleParser::new();
440 assert!(parser.context.prefixes.contains_key("rdf"));
441 assert!(parser.context.prefixes.contains_key("xsd"));
442 }
443}