oxirs_stream/patch/
parser.rs

1//! RDF Patch parser
2
3use crate::{PatchOperation, RdfPatch};
4use anyhow::{anyhow, Result};
5use std::collections::HashMap;
6use tracing::{debug, warn};
7
8pub struct PatchParser {
9    strict_mode: bool,
10    current_line: usize,
11    prefixes: HashMap<String, String>,
12}
13
14impl PatchParser {
15    pub fn new() -> Self {
16        Self {
17            strict_mode: false,
18            current_line: 0,
19            prefixes: HashMap::new(),
20        }
21    }
22
23    pub fn with_strict_mode(mut self, strict: bool) -> Self {
24        self.strict_mode = strict;
25        self
26    }
27
28    /// Parse RDF Patch from string
29    pub fn parse(&mut self, input: &str) -> Result<RdfPatch> {
30        let mut patch = RdfPatch::new();
31        self.current_line = 0;
32        self.prefixes.clear();
33
34        // Add common prefixes
35        self.prefixes.insert(
36            "rdf".to_string(),
37            "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
38        );
39        self.prefixes.insert(
40            "rdfs".to_string(),
41            "http://www.w3.org/2000/01/rdf-schema#".to_string(),
42        );
43        self.prefixes.insert(
44            "xsd".to_string(),
45            "http://www.w3.org/2001/XMLSchema#".to_string(),
46        );
47
48        for line in input.lines() {
49            self.current_line += 1;
50            let line = line.trim();
51
52            // Skip empty lines and comments
53            if line.is_empty() || line.starts_with('#') {
54                continue;
55            }
56
57            // Parse the line
58            match self.parse_line(line) {
59                Ok(Some(operation)) => {
60                    // Handle special operations that affect patch state
61                    match &operation {
62                        PatchOperation::TransactionBegin { transaction_id } => {
63                            patch.transaction_id = transaction_id.clone();
64                        }
65                        PatchOperation::Header { key, value } => {
66                            patch.headers.insert(key.clone(), value.clone());
67                        }
68                        PatchOperation::AddPrefix { prefix, namespace } => {
69                            patch.prefixes.insert(prefix.clone(), namespace.clone());
70                        }
71                        _ => {}
72                    }
73                    patch.add_operation(operation);
74                }
75                Ok(None) => {
76                    // Line was a directive (like @prefix), not an operation
77                    continue;
78                }
79                Err(e) => {
80                    if self.strict_mode {
81                        return Err(anyhow!("Parse error at line {}: {}", self.current_line, e));
82                    } else {
83                        warn!(
84                            "Ignoring invalid line {}: {} ({})",
85                            self.current_line, line, e
86                        );
87                    }
88                }
89            }
90        }
91
92        debug!(
93            "Parsed RDF Patch with {} operations",
94            patch.operations.len()
95        );
96        Ok(patch)
97    }
98
99    fn parse_line(&mut self, line: &str) -> Result<Option<PatchOperation>> {
100        // Handle prefix declarations
101        if line.starts_with("@prefix") {
102            self.parse_prefix(line)?;
103            return Ok(None);
104        }
105
106        // Transaction and header operations are now handled in the main match below
107
108        // Parse operation lines with proper tokenization that respects quoted strings
109        let parts = self.tokenize_line(line);
110        if parts.is_empty() {
111            return Err(anyhow!("Empty operation line"));
112        }
113
114        let operation = &parts[0];
115        match operation.as_str() {
116            "A" => self.parse_add_operation(&parts[1..]),
117            "D" => self.parse_delete_operation(&parts[1..]),
118            "PA" => self.parse_prefix_add(&parts[1..]),
119            "PD" => self.parse_prefix_delete(&parts[1..]),
120            "GA" => self.parse_graph_add(&parts[1..]),
121            "GD" => self.parse_graph_delete(&parts[1..]),
122            "TX" => self.parse_transaction_begin(&parts[1..]),
123            "TC" => Ok(Some(PatchOperation::TransactionCommit)),
124            "TA" => Ok(Some(PatchOperation::TransactionAbort)),
125            "H" => self.parse_header(&parts[1..]),
126            _ => Err(anyhow!("Unknown operation: {}", operation)),
127        }
128    }
129
130    fn parse_prefix(&mut self, line: &str) -> Result<()> {
131        // Format: @prefix prefix: <uri>
132        let parts: Vec<&str> = line.split_whitespace().collect();
133        if parts.len() < 3 {
134            return Err(anyhow!("Invalid prefix declaration"));
135        }
136
137        let prefix_with_colon = parts[1];
138        let prefix = prefix_with_colon.trim_end_matches(':');
139        let uri = parts[2].trim_matches('<').trim_matches('>');
140
141        self.prefixes.insert(prefix.to_string(), uri.to_string());
142        debug!("Added prefix: {} -> {}", prefix, uri);
143        Ok(())
144    }
145
146    fn parse_add_operation(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
147        if parts.len() < 3 {
148            return Err(anyhow!(
149                "Add operation requires subject, predicate, and object"
150            ));
151        }
152
153        let subject = self.expand_term(&parts[0])?;
154        let predicate = self.expand_term(&parts[1])?;
155        let object = self.expand_term(&parts[2])?;
156
157        Ok(Some(PatchOperation::Add {
158            subject,
159            predicate,
160            object,
161        }))
162    }
163
164    fn parse_delete_operation(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
165        if parts.len() < 3 {
166            return Err(anyhow!(
167                "Delete operation requires subject, predicate, and object"
168            ));
169        }
170
171        let subject = self.expand_term(&parts[0])?;
172        let predicate = self.expand_term(&parts[1])?;
173        let object = self.expand_term(&parts[2])?;
174
175        Ok(Some(PatchOperation::Delete {
176            subject,
177            predicate,
178            object,
179        }))
180    }
181
182    fn parse_prefix_add(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
183        if parts.len() < 2 {
184            return Err(anyhow!("Prefix add requires prefix and namespace"));
185        }
186
187        let prefix = parts[0].trim_end_matches(':').to_string();
188        let namespace = parts[1].trim_matches('<').trim_matches('>').to_string();
189
190        Ok(Some(PatchOperation::AddPrefix { prefix, namespace }))
191    }
192
193    fn parse_prefix_delete(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
194        if parts.is_empty() {
195            return Err(anyhow!("Prefix delete requires prefix name"));
196        }
197
198        let prefix = parts[0].trim_end_matches(':').to_string();
199
200        Ok(Some(PatchOperation::DeletePrefix { prefix }))
201    }
202
203    fn parse_graph_add(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
204        if parts.is_empty() {
205            return Err(anyhow!("Graph add operation requires graph URI"));
206        }
207
208        let graph = self.expand_term(&parts[0])?;
209        Ok(Some(PatchOperation::AddGraph { graph }))
210    }
211
212    fn parse_graph_delete(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
213        if parts.is_empty() {
214            return Err(anyhow!("Graph delete operation requires graph URI"));
215        }
216
217        let graph = self.expand_term(&parts[0])?;
218        Ok(Some(PatchOperation::DeleteGraph { graph }))
219    }
220
221    fn parse_transaction_begin(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
222        let transaction_id = if !parts.is_empty() {
223            Some(parts[0].clone())
224        } else {
225            None
226        };
227
228        Ok(Some(PatchOperation::TransactionBegin { transaction_id }))
229    }
230
231    fn parse_header(&self, parts: &[String]) -> Result<Option<PatchOperation>> {
232        if parts.len() < 2 {
233            return Err(anyhow!("Header requires key and value"));
234        }
235
236        let key = parts[0].clone();
237
238        // Handle RDF Patch line terminator - exclude trailing "." from value
239        let value_parts = if parts.len() > 2 && parts[parts.len() - 1] == "." {
240            &parts[1..parts.len() - 1]
241        } else {
242            &parts[1..]
243        };
244        let value = value_parts.join(" ");
245
246        Ok(Some(PatchOperation::Header { key, value }))
247    }
248
249    /// Tokenize a line while respecting quoted strings and RDF Patch terminators
250    fn tokenize_line(&self, line: &str) -> Vec<String> {
251        let mut tokens = Vec::new();
252        let mut current_token = String::new();
253        let mut in_quotes = false;
254        let mut in_uri = false;
255        let mut chars = line.chars().peekable();
256
257        while let Some(ch) = chars.next() {
258            match ch {
259                // Handle quoted strings
260                '"' => {
261                    current_token.push(ch);
262                    in_quotes = !in_quotes;
263                }
264                // Handle URI brackets
265                '<' if !in_quotes => {
266                    if !current_token.is_empty() {
267                        tokens.push(current_token.clone());
268                        current_token.clear();
269                    }
270                    current_token.push(ch);
271                    in_uri = true;
272                }
273                '>' if !in_quotes && in_uri => {
274                    current_token.push(ch);
275                    tokens.push(current_token.clone());
276                    current_token.clear();
277                    in_uri = false;
278                }
279                // Handle whitespace
280                c if c.is_whitespace() && !in_quotes && !in_uri => {
281                    if !current_token.is_empty() {
282                        tokens.push(current_token.clone());
283                        current_token.clear();
284                    }
285                }
286                // Handle RDF Patch line terminator
287                '.' if !in_quotes && !in_uri => {
288                    // Check if this is a standalone terminator (followed by whitespace or end)
289                    if let Some(&next_ch) = chars.peek() {
290                        if next_ch.is_whitespace() || current_token.is_empty() {
291                            if !current_token.is_empty() {
292                                tokens.push(current_token.clone());
293                                current_token.clear();
294                            }
295                            tokens.push(".".to_string());
296                            continue;
297                        }
298                    } else {
299                        // End of line
300                        if !current_token.is_empty() {
301                            tokens.push(current_token.clone());
302                            current_token.clear();
303                        }
304                        tokens.push(".".to_string());
305                        continue;
306                    }
307                    current_token.push(ch);
308                }
309                // Regular characters
310                _ => {
311                    current_token.push(ch);
312                }
313            }
314        }
315
316        // Add any remaining token
317        if !current_token.is_empty() {
318            tokens.push(current_token);
319        }
320
321        tokens
322    }
323
324    fn expand_term(&self, term: &str) -> Result<String> {
325        if term.starts_with('<') && term.ends_with('>') {
326            // Full URI
327            Ok(term[1..term.len() - 1].to_string())
328        } else if term.starts_with('"') {
329            // Literal
330            Ok(term.to_string())
331        } else if term.starts_with('_') {
332            // Blank node
333            Ok(term.to_string())
334        } else if term.contains(':') {
335            // Prefixed name
336            let parts: Vec<&str> = term.splitn(2, ':').collect();
337            if parts.len() == 2 {
338                let prefix = parts[0];
339                let local = parts[1];
340
341                if let Some(namespace) = self.prefixes.get(prefix) {
342                    Ok(format!("{namespace}{local}"))
343                } else if self.strict_mode {
344                    Err(anyhow!("Unknown prefix: {}", prefix))
345                } else {
346                    // Return as-is in non-strict mode
347                    Ok(term.to_string())
348                }
349            } else {
350                Err(anyhow!("Invalid prefixed name: {}", term))
351            }
352        } else {
353            // Assume it's a relative URI or local name
354            Ok(term.to_string())
355        }
356    }
357}
358
359impl Default for PatchParser {
360    fn default() -> Self {
361        Self::new()
362    }
363}