oxirs_stream/patch/
parser.rs1use crate::{PatchOperation, RdfPatch};
4use anyhow::{anyhow, Result};
5use std::collections::HashMap;
6use tracing::{debug, warn};
7
8pub struct PatchParser {
9 strict_mode: bool,
10 current_line: usize,
11 prefixes: HashMap<String, String>,
12}
13
14impl PatchParser {
15 pub fn new() -> Self {
16 Self {
17 strict_mode: false,
18 current_line: 0,
19 prefixes: HashMap::new(),
20 }
21 }
22
23 pub fn with_strict_mode(mut self, strict: bool) -> Self {
24 self.strict_mode = strict;
25 self
26 }
27
28 pub fn parse(&mut self, input: &str) -> Result<RdfPatch> {
30 let mut patch = RdfPatch::new();
31 self.current_line = 0;
32 self.prefixes.clear();
33
34 self.prefixes.insert(
36 "rdf".to_string(),
37 "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
38 );
39 self.prefixes.insert(
40 "rdfs".to_string(),
41 "http://www.w3.org/2000/01/rdf-schema#".to_string(),
42 );
43 self.prefixes.insert(
44 "xsd".to_string(),
45 "http://www.w3.org/2001/XMLSchema#".to_string(),
46 );
47
48 for line in input.lines() {
49 self.current_line += 1;
50 let line = line.trim();
51
52 if line.is_empty() || line.starts_with('#') {
54 continue;
55 }
56
57 match self.parse_line(line) {
59 Ok(Some(operation)) => {
60 match &operation {
62 PatchOperation::TransactionBegin { transaction_id } => {
63 patch.transaction_id = transaction_id.clone();
64 }
65 PatchOperation::Header { key, value } => {
66 patch.headers.insert(key.clone(), value.clone());
67 }
68 PatchOperation::AddPrefix { prefix, namespace } => {
69 patch.prefixes.insert(prefix.clone(), namespace.clone());
70 }
71 _ => {}
72 }
73 patch.add_operation(operation);
74 }
75 Ok(None) => {
76 continue;
78 }
79 Err(e) => {
80 if self.strict_mode {
81 return Err(anyhow!("Parse error at line {}: {}", self.current_line, e));
82 } else {
83 warn!(
84 "Ignoring invalid line {}: {} ({})",
85 self.current_line, line, e
86 );
87 }
88 }
89 }
90 }
91
92 debug!(
93 "Parsed RDF Patch with {} operations",
94 patch.operations.len()
95 );
96 Ok(patch)
97 }
98
99 fn parse_line(&mut self, line: &str) -> Result<Option<PatchOperation>> {
100 if line.starts_with("@prefix") {
102 self.parse_prefix(line)?;
103 return Ok(None);
104 }
105
106 let parts = self.tokenize_line(line);
110 if parts.is_empty() {
111 return Err(anyhow!("Empty operation line"));
112 }
113
114 let operation = &parts[0];
115 match operation.as_str() {
116 "A" => self.parse_add_operation(&parts[1..]),
117 "D" => self.parse_delete_operation(&parts[1..]),
118 "PA" => self.parse_prefix_add(&parts[1..]),
119 "PD" => self.parse_prefix_delete(&parts[1..]),
120 "GA" => self.parse_graph_add(&parts[1..]),
121 "GD" => self.parse_graph_delete(&parts[1..]),
122 "TX" => self.parse_transaction_begin(&parts[1..]),
123 "TC" => Ok(Some(PatchOperation::TransactionCommit)),
124 "TA" => Ok(Some(PatchOperation::TransactionAbort)),
125 "H" => self.parse_header(&parts[1..]),
126 _ => Err(anyhow!("Unknown operation: {}", operation)),
127 }
128 }
129
130 fn parse_prefix(&mut self, line: &str) -> Result<()> {
131 let parts: Vec<&str> = line.split_whitespace().collect();
133 if parts.len() < 3 {
134 return Err(anyhow!("Invalid prefix declaration"));
135 }
136
137 let prefix_with_colon = parts[1];
138 let prefix = prefix_with_colon.trim_end_matches(':');
139 let uri = parts[2].trim_matches('<').trim_matches('>');
140
141 self.prefixes.insert(prefix.to_string(), uri.to_string());
142 debug!("Added prefix: {} -> {}", prefix, uri);
143 Ok(())
144 }
145
146 fn parse_add_operation(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
147 if parts.len() < 3 {
148 return Err(anyhow!(
149 "Add operation requires subject, predicate, and object"
150 ));
151 }
152
153 let subject = self.expand_term(&parts[0])?;
154 let predicate = self.expand_term(&parts[1])?;
155 let object = self.expand_term(&parts[2])?;
156
157 Ok(Some(PatchOperation::Add {
158 subject,
159 predicate,
160 object,
161 }))
162 }
163
164 fn parse_delete_operation(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
165 if parts.len() < 3 {
166 return Err(anyhow!(
167 "Delete operation requires subject, predicate, and object"
168 ));
169 }
170
171 let subject = self.expand_term(&parts[0])?;
172 let predicate = self.expand_term(&parts[1])?;
173 let object = self.expand_term(&parts[2])?;
174
175 Ok(Some(PatchOperation::Delete {
176 subject,
177 predicate,
178 object,
179 }))
180 }
181
182 fn parse_prefix_add(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
183 if parts.len() < 2 {
184 return Err(anyhow!("Prefix add requires prefix and namespace"));
185 }
186
187 let prefix = parts[0].trim_end_matches(':').to_string();
188 let namespace = parts[1].trim_matches('<').trim_matches('>').to_string();
189
190 Ok(Some(PatchOperation::AddPrefix { prefix, namespace }))
191 }
192
193 fn parse_prefix_delete(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
194 if parts.is_empty() {
195 return Err(anyhow!("Prefix delete requires prefix name"));
196 }
197
198 let prefix = parts[0].trim_end_matches(':').to_string();
199
200 Ok(Some(PatchOperation::DeletePrefix { prefix }))
201 }
202
203 fn parse_graph_add(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
204 if parts.is_empty() {
205 return Err(anyhow!("Graph add operation requires graph URI"));
206 }
207
208 let graph = self.expand_term(&parts[0])?;
209 Ok(Some(PatchOperation::AddGraph { graph }))
210 }
211
212 fn parse_graph_delete(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
213 if parts.is_empty() {
214 return Err(anyhow!("Graph delete operation requires graph URI"));
215 }
216
217 let graph = self.expand_term(&parts[0])?;
218 Ok(Some(PatchOperation::DeleteGraph { graph }))
219 }
220
221 fn parse_transaction_begin(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
222 let transaction_id = if !parts.is_empty() {
223 Some(parts[0].clone())
224 } else {
225 None
226 };
227
228 Ok(Some(PatchOperation::TransactionBegin { transaction_id }))
229 }
230
231 fn parse_header(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
232 if parts.len() < 2 {
233 return Err(anyhow!("Header requires key and value"));
234 }
235
236 let key = parts[0].clone();
237
238 let value_parts = if parts.len() > 2 && parts[parts.len() - 1] == "." {
240 &parts[1..parts.len() - 1]
241 } else {
242 &parts[1..]
243 };
244 let value = value_parts.join(" ");
245
246 Ok(Some(PatchOperation::Header { key, value }))
247 }
248
249 fn tokenize_line(&self, line: &str) -> Vec<String> {
251 let mut tokens = Vec::new();
252 let mut current_token = String::new();
253 let mut in_quotes = false;
254 let mut in_uri = false;
255 let mut chars = line.chars().peekable();
256
257 while let Some(ch) = chars.next() {
258 match ch {
259 '"' => {
261 current_token.push(ch);
262 in_quotes = !in_quotes;
263 }
264 '<' if !in_quotes => {
266 if !current_token.is_empty() {
267 tokens.push(current_token.clone());
268 current_token.clear();
269 }
270 current_token.push(ch);
271 in_uri = true;
272 }
273 '>' if !in_quotes && in_uri => {
274 current_token.push(ch);
275 tokens.push(current_token.clone());
276 current_token.clear();
277 in_uri = false;
278 }
279 c if c.is_whitespace() && !in_quotes && !in_uri => {
281 if !current_token.is_empty() {
282 tokens.push(current_token.clone());
283 current_token.clear();
284 }
285 }
286 '.' if !in_quotes && !in_uri => {
288 if let Some(&next_ch) = chars.peek() {
290 if next_ch.is_whitespace() || current_token.is_empty() {
291 if !current_token.is_empty() {
292 tokens.push(current_token.clone());
293 current_token.clear();
294 }
295 tokens.push(".".to_string());
296 continue;
297 }
298 } else {
299 if !current_token.is_empty() {
301 tokens.push(current_token.clone());
302 current_token.clear();
303 }
304 tokens.push(".".to_string());
305 continue;
306 }
307 current_token.push(ch);
308 }
309 _ => {
311 current_token.push(ch);
312 }
313 }
314 }
315
316 if !current_token.is_empty() {
318 tokens.push(current_token);
319 }
320
321 tokens
322 }
323
324 fn expand_term(&self, term: &str) -> Result<String> {
325 if term.starts_with('<') && term.ends_with('>') {
326 Ok(term[1..term.len() - 1].to_string())
328 } else if term.starts_with('"') {
329 Ok(term.to_string())
331 } else if term.starts_with('_') {
332 Ok(term.to_string())
334 } else if term.contains(':') {
335 let parts: Vec<&str> = term.splitn(2, ':').collect();
337 if parts.len() == 2 {
338 let prefix = parts[0];
339 let local = parts[1];
340
341 if let Some(namespace) = self.prefixes.get(prefix) {
342 Ok(format!("{namespace}{local}"))
343 } else if self.strict_mode {
344 Err(anyhow!("Unknown prefix: {}", prefix))
345 } else {
346 Ok(term.to_string())
348 }
349 } else {
350 Err(anyhow!("Invalid prefixed name: {}", term))
351 }
352 } else {
353 Ok(term.to_string())
355 }
356 }
357}
358
359impl Default for PatchParser {
360 fn default() -> Self {
361 Self::new()
362 }
363}