gml_parser/
lib.rs

1//! This crate allows for reading [Graph Modeling Language (GML)](https://en.wikipedia.org/wiki/Graph_Modelling_Language) files.
2//!
3//!
4//! This crate first parses the GML into [GMLObject]s and [GMLValue]s. Then the root GMLObject can
5//! be transformed into a [Graph] containing [Node]s and [Edge]s.
6//!
7//! # Examples
8//! ```
9//! use gml_parser::{GMLObject, Graph};
10//!
11//! let data = r#"
12//! graph [            
13//!    id 4           
14//!    node [         
15//!        id 0       
16//!    ]              
17//!    node [         
18//!        id 1       
19//!    ]              
20//!    edge [         
21//!        source 1   
22//!        target 0   
23//!        label "Edge"
24//!    ]              
25//!]"#;
26//! let root = GMLObject::from_str(data).unwrap();
27//! let graph = Graph::from_gml(root).unwrap();
28//! assert_eq!(graph.id, Some(4));
29//! assert_eq!(graph.nodes.len(), 2);
30//! assert_eq!(graph.edges.len(), 1);
31//! ```
32//!
33//! # Limitations
34//! - This implementation can be fragile and GML is not a very picky standard
35//! - We duplicate the data when parsing which can have performance impacts on very large graphs
36//!
37
38use std::{error::Error, fmt::Display};
39extern crate pest;
40#[macro_use]
41extern crate pest_derive;
42
43use pest::{iterators::Pairs, Parser};
44
45#[derive(Debug)]
46pub struct GMLError(String);
47
48impl Error for GMLError {
49    fn description(&self) -> &str {
50        &self.0
51    }
52}
53
54impl Display for GMLError {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        write!(f, "GMLError: {}", self.0)
57    }
58}
59
60#[derive(Parser, Debug)]
61#[grammar = "grammar.pest"]
62struct GMLParser;
63
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct GMLObject {
66    pub pairs: Vec<(String, GMLValue)>,
67}
68impl GMLObject {
69    fn parse(obj: Pairs<'_, Rule>) -> Result<Self, Box<dyn Error>> {
70        let mut current_key = None;
71        let mut pairs = Vec::new();
72        for entry in obj {
73            match entry.as_rule() {
74                Rule::identifier => {
75                    current_key = Some(entry.into_inner().as_str().to_owned());
76                }
77                Rule::value => {
78                    let inner_value = entry
79                        .into_inner()
80                        .next()
81                        .ok_or(GMLError("No rule inner value. Please report this.".into()))?;
82                    match inner_value.as_rule() {
83                        Rule::string => {
84                            pairs.push((
85                                current_key.clone().ok_or(GMLError(
86                                    "String: No rule current key. Please report this.".into(),
87                                ))?,
88                                GMLValue::GMLString(inner_value.into_inner().as_str().to_string()),
89                            ));
90                        }
91                        Rule::number => {
92                            pairs.push((
93                                current_key.clone().ok_or(GMLError(
94                                    "Number: No rule current key. Please report this".into(),
95                                ))?,
96                                GMLValue::GMLInt(inner_value.as_str().parse()?),
97                            ));
98                        }
99                        Rule::object => {
100                            pairs.push((
101                                current_key.clone().ok_or(GMLError(
102                                    "Object: No rule current key. Please report this".into(),
103                                ))?,
104                                GMLValue::GMLObject(Box::new(GMLObject::parse(
105                                    inner_value.into_inner(),
106                                )?)),
107                            ));
108                        }
109                        _ => {
110                            dbg!(inner_value.as_rule());
111                            unreachable!()
112                        }
113                    }
114                }
115                Rule::EOI => {}
116                _ => {
117                    dbg!(entry.as_rule());
118                    unreachable!()
119                }
120            }
121        }
122        Ok(GMLObject { pairs })
123    }
124    pub fn from_str(text: &str) -> Result<GMLObject, GMLError> {
125        let file = match GMLParser::parse(Rule::text, text) {
126            Ok(k) => Ok(k),
127            Err(e) => Err(GMLError(format!(
128                "Failed to parse GML! (syntactic): {:?}",
129                e
130            ))),
131        }?
132        .next()
133        .unwrap();
134        match GMLObject::parse(file.into_inner()) {
135            Ok(k) => Ok(k),
136            Err(e) => Err(GMLError(format!(
137                "Failed to parse GML! (semantic): {:?}",
138                e
139            ))),
140        }
141    }
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub enum GMLValue {
146    GMLString(String),
147    GMLInt(i64),
148    GMLObject(Box<GMLObject>),
149}
150
151#[derive(Debug, Clone)]
152pub struct Graph {
153    pub directed: Option<bool>,
154    pub id: Option<i64>,
155    pub label: Option<String>,
156    pub nodes: Vec<Node>,
157    pub edges: Vec<Edge>,
158    attrs: Vec<(String, GMLValue)>,
159}
160#[derive(Debug, Clone, PartialEq, Eq)]
161pub struct Node {
162    pub id: i64,
163    pub label: Option<String>,
164    attrs: Vec<(String, GMLValue)>,
165}
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub struct Edge {
168    pub source: i64,
169    pub target: i64,
170    pub label: Option<String>,
171    attrs: Vec<(String, GMLValue)>,
172}
173
174impl Graph {
175    // This turns the data into the object.
176    // The other function is a wrapper to deal with the
177    // outer graph[...] nonsense
178    fn int_from_gml(mut obj: GMLObject) -> Result<Self, GMLError> {
179        let id = int_take_attribute(&mut obj.pairs, "id");
180        let id = if let Some(id) = id {
181            let GMLValue::GMLInt(id) = id.1 else {
182                return Err(GMLError(format!("Failed to parse graph id: {:?}. Expected int but found invalid type.", id.1)));
183            };
184            Some(id)
185        } else {
186            None
187        };
188        let directed = int_take_attribute(&mut obj.pairs, "directed");
189        let directed = if let Some(directed) = directed {
190            let GMLValue::GMLInt(directed) = directed.1 else {
191                return Err(GMLError(format!("Failed to parse graph directed: {:?}. Expected int but found invalid type.", directed.1)));
192            };
193            Some(directed == 1)
194        } else {
195            None
196        };
197
198        let label = int_take_attribute(&mut obj.pairs, "label");
199        let label = if let Some(label) = label {
200            let GMLValue::GMLString(label) = label.1 else {
201                return Err(GMLError(format!("Failed to parse edge label: {:?}. Expected str but found invalid type.", label.1)));
202            };
203            Some(label)
204        } else {
205            None
206        };
207        let mut nodes = Vec::new();
208        let mut edges = Vec::new();
209        while let Some((_, node)) = int_take_attribute(&mut obj.pairs, "node") {
210            let GMLValue::GMLObject(node) = node else {
211                return Err(GMLError(format!("Failed to parse node: {:?}. Expected object but found invalid type.", node)));
212            };
213            nodes.push(Node::from_gml(*node)?);
214        }
215        while let Some((_, edge)) = int_take_attribute(&mut obj.pairs, "edge") {
216            let GMLValue::GMLObject(edge) = edge else {
217                return Err(GMLError(format!("Failed to parse edge: {:?}. Expected object but found invalid type.", edge)));
218            };
219            edges.push(Edge::from_gml(*edge)?);
220        }
221        Ok(Graph {
222            directed,
223            id,
224            label,
225            nodes,
226            edges,
227            attrs: obj.pairs,
228        })
229    }
230    /// Transform a [GMLObject] into a graph. This expects the root node
231    /// of the graph.
232    ///
233    /// Note: This does not currently accept multiple graphs in a single file
234    pub fn from_gml(mut obj: GMLObject) -> Result<Self, GMLError> {
235        let graph = int_take_attribute(&mut obj.pairs, "graph");
236        let Some(graph) = graph else {
237            return Err(GMLError(format!("Unable to parse graph from GMLObject")));
238        };
239        let GMLValue::GMLObject(graph) = graph.1 else {
240            return Err(GMLError(format!("Failed to parse graph: {:?}. Expected graph but found invalid type.", graph.1)));
241        };
242        Self::int_from_gml(*graph)
243    }
244}
245
246impl Node {
247    fn from_gml(mut obj: GMLObject) -> Result<Self, GMLError> {
248        let id = int_take_attribute(&mut obj.pairs, "id");
249        let Some(id) = id else {
250            return Err(GMLError(format!("Unable to parse id from node")));
251        };
252        let GMLValue::GMLInt(id) = id.1 else {
253            return Err(GMLError(format!("Failed to parse node id: {:?}. Expected int but found invalid type.", id.1)));
254        };
255        let label = int_take_attribute(&mut obj.pairs, "label");
256        let label = if let Some(label) = label {
257            let GMLValue::GMLString(label) = label.1 else {
258                return Err(GMLError(format!("Failed to parse edge label: {:?}. Expected str but found invalid type.", label.1)));
259            };
260            Some(label)
261        } else {
262            None
263        };
264        Ok(Self {
265            id,
266            label,
267            attrs: obj.pairs,
268        })
269    }
270}
271impl Edge {
272    fn from_gml(mut obj: GMLObject) -> Result<Self, GMLError> {
273        let source = int_take_attribute(&mut obj.pairs, "source");
274        let Some(source) = source else {
275            return Err(GMLError(format!("Unable to parse source from edge")));
276        };
277        let GMLValue::GMLInt(source) = source.1 else {
278            return Err(GMLError(format!("Failed to parse edge source id: {:?}. Expected int but found invalid type.", source.1)));
279        };
280        let target = int_take_attribute(&mut obj.pairs, "target");
281        let Some(target) = target else {
282            return Err(GMLError(format!("Unable to parse target from edge")));
283        };
284        let GMLValue::GMLInt(target) = target.1 else {
285            return Err(GMLError(format!("Failed to parse edge source id: {:?}. Expected int but found invalid type.", target.1)));
286        };
287        let label = int_take_attribute(&mut obj.pairs, "label");
288        let label = if let Some(label) = label {
289            let GMLValue::GMLString(label) = label.1 else {
290                return Err(GMLError(format!("Failed to parse edge label: {:?}. Expected str but found invalid type.", label.1)));
291            };
292            Some(label)
293        } else {
294            None
295        };
296
297        Ok(Self {
298            source,
299            target,
300            label,
301            attrs: obj.pairs,
302        })
303    }
304}
305pub trait HasGMLAttributes {
306    fn attributes(&self) -> &Vec<(String, GMLValue)>;
307    fn attributes_mut(&mut self) -> &mut Vec<(String, GMLValue)>;
308}
309
310pub trait ReadableGMLAttributes<'a> {
311    /// Take the attribute from the object if the key == name
312    fn take_attribute(&mut self, name: &str) -> Option<(String, GMLValue)>;
313    /// Return a reference to the object if the key == name
314    fn get_attribute(&'a self, name: &str) -> Option<&'a (String, GMLValue)>;
315}
316fn int_take_attribute(
317    attrs: &mut Vec<(String, GMLValue)>,
318    name: &str,
319) -> Option<(String, GMLValue)> {
320    let mut index = None;
321    for (i, attr) in attrs.iter().enumerate() {
322        if attr.0 == name {
323            index = Some(i);
324            break;
325        }
326    }
327    if let Some(index) = index {
328        // remove is O(n) which would make
329        // building the graph O(n^2)
330        Some(attrs.swap_remove(index))
331    } else {
332        None
333    }
334}
335fn int_get_attribute<'a>(
336    attrs: &'a Vec<(String, GMLValue)>,
337    name: &str,
338) -> Option<&'a (String, GMLValue)> {
339    for attr in attrs {
340        if attr.0 == name {
341            return Some(&attr);
342        }
343    }
344    None
345}
346// Blanket impl is far better but it doesn't show up in the docs.
347// impl<'a, T> ReadableGMLAttributes<'a> for T
348// where
349//     T: HasGMLAttributes,
350// {
351//     fn take_attribute(&mut self, name: &str) -> Option<(String, GMLValue)> {
352//         let attrs = self.attributes_mut();
353//         int_take_attribute(attrs, name)
354//     }
355//     fn get_attribute(&'a self, name: &str) -> Option<&'a (String, GMLValue)> {
356//         let attrs = self.attributes();
357//         int_get_attribute(attrs, name)
358//     }
359// }
360impl<'a> ReadableGMLAttributes<'a> for Node {
361    fn take_attribute(&mut self, name: &str) -> Option<(String, GMLValue)> {
362        let attrs = self.attributes_mut();
363        int_take_attribute(attrs, name)
364    }
365    fn get_attribute(&'a self, name: &str) -> Option<&'a (String, GMLValue)> {
366        let attrs = self.attributes();
367        int_get_attribute(attrs, name)
368    }
369}
370impl<'a> ReadableGMLAttributes<'a> for Edge {
371    fn take_attribute(&mut self, name: &str) -> Option<(String, GMLValue)> {
372        let attrs = self.attributes_mut();
373        int_take_attribute(attrs, name)
374    }
375    fn get_attribute(&'a self, name: &str) -> Option<&'a (String, GMLValue)> {
376        let attrs = self.attributes();
377        int_get_attribute(attrs, name)
378    }
379}
380impl<'a> ReadableGMLAttributes<'a> for Graph {
381    fn take_attribute(&mut self, name: &str) -> Option<(String, GMLValue)> {
382        let attrs = self.attributes_mut();
383        int_take_attribute(attrs, name)
384    }
385    fn get_attribute(&'a self, name: &str) -> Option<&'a (String, GMLValue)> {
386        let attrs = self.attributes();
387        int_get_attribute(attrs, name)
388    }
389}
390
391impl HasGMLAttributes for Node {
392    fn attributes(&self) -> &Vec<(String, GMLValue)> {
393        return &self.attrs;
394    }
395    fn attributes_mut(&mut self) -> &mut Vec<(String, GMLValue)> {
396        return &mut self.attrs;
397    }
398}
399impl HasGMLAttributes for Edge {
400    fn attributes(&self) -> &Vec<(String, GMLValue)> {
401        return &self.attrs;
402    }
403    fn attributes_mut(&mut self) -> &mut Vec<(String, GMLValue)> {
404        return &mut self.attrs;
405    }
406}
407impl HasGMLAttributes for Graph {
408    fn attributes(&self) -> &Vec<(String, GMLValue)> {
409        return &self.attrs;
410    }
411    fn attributes_mut(&mut self) -> &mut Vec<(String, GMLValue)> {
412        return &mut self.attrs;
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use std::fs;
420    #[test]
421    fn parse_empty() {
422        let file = fs::read_to_string("tests/empty.gml").unwrap();
423        let file = GMLParser::parse(Rule::text, &file).unwrap().next().unwrap();
424        let root = GMLObject::parse(file.into_inner()).unwrap();
425        assert!(Graph::from_gml(root).is_ok());
426    }
427    #[test]
428    fn parse_single() {
429        let file = fs::read_to_string("tests/single.gml").unwrap();
430        let file = GMLParser::parse(Rule::text, &file).unwrap().next().unwrap();
431        let root = GMLObject::parse(file.into_inner()).unwrap();
432        let expected = GMLObject {
433            pairs: vec![(
434                "graph".into(),
435                GMLValue::GMLObject(Box::new(GMLObject {
436                    pairs: vec![("k".into(), GMLValue::GMLString("test".into()))],
437                })),
438            )],
439        };
440        assert_eq!(root, expected);
441        assert!(Graph::from_gml(root).is_ok());
442    }
443    #[test]
444    fn parse_simple() {
445        let file = fs::read_to_string("tests/simple.gml").unwrap();
446        let file = GMLParser::parse(Rule::text, &file).unwrap().next().unwrap();
447        let root = GMLObject::parse(file.into_inner()).unwrap();
448        assert!(Graph::from_gml(root).is_ok());
449    }
450    #[test]
451    fn parse_wikipedia() {
452        let file = fs::read_to_string("tests/wikipedia.gml").unwrap();
453        let file = GMLParser::parse(Rule::text, &file).unwrap().next().unwrap();
454        let root = GMLObject::parse(file.into_inner()).unwrap();
455        let graph = Graph::from_gml(root).unwrap();
456        assert_eq!(graph.id, Some(42));
457        assert_eq!(graph.directed, Some(true));
458        assert_eq!(graph.label, Some("Hello, I am a graph".into()));
459        assert_eq!(graph.nodes.len(), 3);
460        assert_eq!(graph.edges.len(), 3);
461    }
462
463    #[test]
464    fn parse_synoptic() {
465        let file = fs::read_to_string("tests/synoptic.gml").unwrap();
466        let file = GMLParser::parse(Rule::text, &file).unwrap().next().unwrap();
467        let root = GMLObject::parse(file.into_inner()).unwrap();
468        let graph = Graph::from_gml(root).unwrap();
469        assert_eq!(graph.nodes.len(), 7);
470        assert_eq!(graph.nodes[0].id, 0);
471        assert_eq!(graph.nodes[0].label, Some("a".into()));
472        assert_eq!(graph.nodes[6].id, 6);
473        assert_eq!(graph.nodes[6].label, Some("INITIAL".into()));
474        dbg!(&graph);
475        assert_eq!(graph.edges.len(), 8);
476        assert_eq!(graph.edges[0].label, Some("P: 1.00".into()));
477        assert_eq!(graph.edges[0].source, 6);
478        assert_eq!(graph.edges[0].target, 0);
479    }
480}