1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
//! Error types for Turtle-family format parsing
//!
//! This module provides comprehensive error handling for parsing and serialization
//! operations, including position tracking and error recovery capabilities.
use std::fmt;
use thiserror::Error;
/// Position in a text document
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct TextPosition {
/// Line number (1-based)
pub line: usize,
/// Column number (1-based)
pub column: usize,
/// Byte offset from start of document
pub offset: usize,
}
impl Default for TextPosition {
fn default() -> Self {
Self::start()
}
}
impl TextPosition {
/// Create a new text position
pub fn new(line: usize, column: usize, offset: usize) -> Self {
Self {
line,
column,
offset,
}
}
/// Initial position at start of document
pub fn start() -> Self {
Self::new(1, 1, 0)
}
/// Advance position by one character
pub fn advance_char(&mut self, ch: char) {
if ch == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
self.offset += ch.len_utf8();
}
/// Advance position by multiple bytes
pub fn advance_bytes(&mut self, bytes: &[u8]) {
for &byte in bytes {
if byte == b'\n' {
self.line += 1;
self.column = 1;
} else if byte >= 0x20 || byte == b'\t' {
self.column += 1;
}
self.offset += 1;
}
}
}
impl fmt::Display for TextPosition {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "line {}, column {}", self.line, self.column)
}
}
/// Syntax error in Turtle-family format
#[derive(Debug, Clone, Error)]
pub enum TurtleSyntaxError {
/// Unexpected character
#[error("Unexpected character '{character}' at {position}")]
UnexpectedCharacter {
/// The unexpected character
character: char,
/// Position where error occurred
position: TextPosition,
},
/// Unexpected end of input
#[error("Unexpected end of input at {position}")]
UnexpectedEof {
/// Position where EOF was encountered
position: TextPosition,
},
/// Invalid IRI
#[error("Invalid IRI '{iri}' at {position}: {reason}")]
InvalidIri {
/// The invalid IRI
iri: String,
/// Reason for invalidity
reason: String,
/// Position of the IRI
position: TextPosition,
},
/// Invalid language tag
#[error("Invalid language tag '{tag}' at {position}: {reason}")]
InvalidLanguageTag {
/// The invalid language tag
tag: String,
/// Reason for invalidity
reason: String,
/// Position of the tag
position: TextPosition,
},
/// Invalid literal
#[error("Invalid literal '{literal}' at {position}: {reason}")]
InvalidLiteral {
/// The invalid literal
literal: String,
/// Reason for invalidity
reason: String,
/// Position of the literal
position: TextPosition,
},
/// Invalid escape sequence
#[error("Invalid escape sequence '\\{sequence}' at {position}")]
InvalidEscape {
/// The invalid escape sequence (without backslash)
sequence: String,
/// Position of the escape
position: TextPosition,
},
/// Invalid Unicode code point
#[error("Invalid Unicode code point U+{codepoint:04X} at {position}")]
InvalidUnicode {
/// The invalid code point
codepoint: u32,
/// Position of the code point
position: TextPosition,
},
/// Invalid blank node identifier
#[error("Invalid blank node identifier '{id}' at {position}")]
InvalidBlankNode {
/// The invalid identifier
id: String,
/// Position of the identifier
position: TextPosition,
},
/// Undefined prefix
#[error("Undefined prefix '{prefix}' at {position}")]
UndefinedPrefix {
/// The undefined prefix
prefix: String,
/// Position where prefix was used
position: TextPosition,
},
/// Invalid prefix declaration
#[error("Invalid prefix declaration for '{prefix}' at {position}: {reason}")]
InvalidPrefix {
/// The prefix being declared
prefix: String,
/// Reason for invalidity
reason: String,
/// Position of the declaration
position: TextPosition,
},
/// Invalid base IRI declaration
#[error("Invalid base IRI '{iri}' at {position}: {reason}")]
InvalidBase {
/// The invalid base IRI
iri: String,
/// Reason for invalidity
reason: String,
/// Position of the declaration
position: TextPosition,
},
/// Generic syntax error
#[error("Syntax error at {position}: {message}")]
Generic {
/// Error message
message: String,
/// Position where error occurred
position: TextPosition,
},
}
/// High-level parsing error
#[derive(Debug, Error)]
pub enum TurtleParseError {
/// Syntax error in the input
#[error("Syntax error: {0}")]
Syntax(#[from] TurtleSyntaxError),
/// I/O error while reading
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
/// RDF model error (invalid terms, etc.)
#[error("RDF model error: {0}")]
Model(#[from] oxirs_core::OxirsError),
/// Multiple errors (for batch processing)
#[error("Multiple errors occurred ({} errors)", .errors.len())]
Multiple {
/// Collection of errors
errors: Vec<TurtleParseError>,
},
}
/// Result type for parsing operations
pub type TurtleResult<T> = Result<T, TurtleParseError>;
/// Error that can occur during tokenization
#[derive(Debug, Clone, Error)]
pub enum TokenRecognizerError {
/// Unexpected character
#[error("Unexpected character: {0}")]
UnexpectedCharacter(char),
/// Unexpected end of input
#[error("Unexpected end of input")]
UnexpectedEof,
/// Invalid token
#[error("Invalid token: {0}")]
Invalid(String),
}
/// Error that can occur during rule recognition
#[derive(Debug, Clone, Error)]
pub enum RuleRecognizerError {
/// Unexpected token
#[error("Unexpected token: {0}")]
UnexpectedToken(String),
/// Missing required token
#[error("Missing required token: {0}")]
MissingToken(String),
/// Invalid rule application
#[error("Invalid rule: {0}")]
InvalidRule(String),
}
impl TurtleParseError {
/// Create a new syntax error
pub fn syntax(error: TurtleSyntaxError) -> Self {
Self::Syntax(error)
}
/// Create a new I/O error
pub fn io(error: std::io::Error) -> Self {
Self::Io(error)
}
/// Create a new model error
pub fn model(error: oxirs_core::OxirsError) -> Self {
Self::Model(error)
}
/// Combine multiple errors
pub fn multiple(errors: Vec<TurtleParseError>) -> Self {
Self::Multiple { errors }
}
/// Get the position of this error, if available
pub fn position(&self) -> Option<TextPosition> {
match self {
Self::Syntax(syntax_error) => Some(syntax_error.position()),
_ => None,
}
}
}
impl TurtleSyntaxError {
/// Get the position where this error occurred
pub fn position(&self) -> TextPosition {
match self {
Self::UnexpectedCharacter { position, .. } => *position,
Self::UnexpectedEof { position } => *position,
Self::InvalidIri { position, .. } => *position,
Self::InvalidLanguageTag { position, .. } => *position,
Self::InvalidLiteral { position, .. } => *position,
Self::InvalidEscape { position, .. } => *position,
Self::InvalidUnicode { position, .. } => *position,
Self::InvalidBlankNode { position, .. } => *position,
Self::UndefinedPrefix { position, .. } => *position,
Self::InvalidPrefix { position, .. } => *position,
Self::InvalidBase { position, .. } => *position,
Self::Generic { position, .. } => *position,
}
}
}