rust_igraph/algorithms/io/
gml.rs1use std::io::{BufRead, BufReader, Read, Write};
15
16use crate::core::{Graph, IgraphError, IgraphResult};
17
18pub fn read_gml<R: Read>(input: R) -> IgraphResult<Graph> {
39 let reader = BufReader::new(input);
40 let tokens = tokenize(reader)?;
41 parse_graph(&tokens)
42}
43
44pub fn write_gml<W: Write>(graph: &Graph, writer: &mut W) -> IgraphResult<()> {
71 writeln!(writer, "graph")?;
72 writeln!(writer, "[")?;
73 writeln!(writer, " directed {}", i32::from(graph.is_directed()))?;
74
75 for v in 0..graph.vcount() {
76 writeln!(writer, " node")?;
77 writeln!(writer, " [")?;
78 writeln!(writer, " id {v}")?;
79 writeln!(writer, " ]")?;
80 }
81
82 for eid in 0..graph.ecount() {
83 #[allow(clippy::cast_possible_truncation)]
84 let (src, tgt) = graph.edge(eid as u32)?;
85 writeln!(writer, " edge")?;
86 writeln!(writer, " [")?;
87 writeln!(writer, " source {src}")?;
88 writeln!(writer, " target {tgt}")?;
89 writeln!(writer, " ]")?;
90 }
91
92 writeln!(writer, "]")?;
93 Ok(())
94}
95
96#[derive(Debug, Clone, PartialEq)]
99enum Token {
100 Key(String),
101 Integer(i64),
102 Float(f64),
103 Str(String),
104 Open, Close, }
107
108fn tokenize<R: BufRead>(reader: R) -> IgraphResult<Vec<Token>> {
109 let mut tokens = Vec::new();
110 let mut line_no: usize = 0;
111
112 for line_result in reader.lines() {
113 line_no = line_no.wrapping_add(1);
114 let line = line_result?;
115 let trimmed = line.trim();
116
117 if trimmed.is_empty() {
120 continue;
121 }
122
123 let mut chars = trimmed.chars().peekable();
124 while let Some(&ch) = chars.peek() {
125 match ch {
126 ' ' | '\t' | '\r' => {
127 chars.next();
128 }
129 '[' => {
130 tokens.push(Token::Open);
131 chars.next();
132 }
133 ']' => {
134 tokens.push(Token::Close);
135 chars.next();
136 }
137 '"' => {
138 chars.next(); let mut s = String::new();
140 let mut escaped = false;
141 loop {
142 match chars.next() {
143 None => {
144 return Err(IgraphError::Parse {
145 line: line_no,
146 message: "unterminated string".into(),
147 });
148 }
149 Some('\\') if !escaped => {
150 escaped = true;
151 }
152 Some('"') if !escaped => break,
153 Some(c) => {
154 if escaped {
155 match c {
156 'n' => s.push('\n'),
157 't' => s.push('\t'),
158 '\\' => s.push('\\'),
159 '"' => s.push('"'),
160 _ => {
161 s.push('\\');
162 s.push(c);
163 }
164 }
165 escaped = false;
166 } else {
167 s.push(c);
168 }
169 }
170 }
171 }
172 let decoded = decode_entities(&s);
174 tokens.push(Token::Str(decoded));
175 }
176 '#' => break, _ => {
178 let mut word = String::new();
180 while let Some(&c) = chars.peek() {
181 if c == ' ' || c == '\t' || c == '[' || c == ']' || c == '"' {
182 break;
183 }
184 word.push(c);
185 chars.next();
186 }
187
188 if word.is_empty() {
189 return Err(IgraphError::Parse {
190 line: line_no,
191 message: format!("unexpected character '{ch}'"),
192 });
193 }
194
195 if let Ok(i) = word.parse::<i64>() {
197 tokens.push(Token::Integer(i));
198 } else if let Ok(f) = parse_gml_float(&word) {
199 tokens.push(Token::Float(f));
200 } else {
201 tokens.push(Token::Key(word));
202 }
203 }
204 }
205 }
206 }
207
208 Ok(tokens)
209}
210
211fn parse_gml_float(s: &str) -> Result<f64, ()> {
212 let lower = s.to_ascii_lowercase();
213 match lower.as_str() {
214 "inf" | "+inf" => Ok(f64::INFINITY),
215 "-inf" => Ok(f64::NEG_INFINITY),
216 "nan" => Ok(f64::NAN),
217 _ => s.parse::<f64>().map_err(|_| ()),
218 }
219}
220
221fn decode_entities(s: &str) -> String {
222 s.replace("&", "&")
223 .replace(""", "\"")
224 .replace("'", "'")
225 .replace("<", "<")
226 .replace(">", ">")
227}
228
229fn parse_graph(tokens: &[Token]) -> IgraphResult<Graph> {
232 let mut pos = 0;
234
235 while pos < tokens.len() {
237 match &tokens[pos] {
238 Token::Key(k) if k == "graph" => {
239 pos += 1;
240 break;
241 }
242 Token::Key(_) => {
243 pos += 1;
244 pos = skip_value(tokens, pos);
246 }
247 _ => {
248 pos += 1;
249 }
250 }
251 }
252
253 if pos >= tokens.len() {
255 return Err(IgraphError::Parse {
256 line: 0,
257 message: "no 'graph' block found in GML input".into(),
258 });
259 }
260 if tokens[pos] != Token::Open {
261 return Err(IgraphError::Parse {
262 line: 0,
263 message: "expected '[' after 'graph'".into(),
264 });
265 }
266 pos += 1;
267
268 let mut directed = false;
269 let mut node_ids: Vec<i64> = Vec::new();
270 let mut edges: Vec<(i64, i64)> = Vec::new();
271
272 while pos < tokens.len() && tokens[pos] != Token::Close {
274 match &tokens[pos] {
275 Token::Key(k) => {
276 let key = k.clone();
277 pos += 1;
278
279 match key.as_str() {
280 "directed" => {
281 if let Some(Token::Integer(v)) = tokens.get(pos) {
282 directed = *v != 0;
283 pos += 1;
284 } else {
285 pos = skip_value(tokens, pos);
286 }
287 }
288 "node" if pos < tokens.len() && tokens[pos] == Token::Open => {
289 pos += 1;
290 let (node_id, new_pos) = parse_node(tokens, pos)?;
291 node_ids.push(node_id);
292 pos = new_pos;
293 }
294 "edge" if pos < tokens.len() && tokens[pos] == Token::Open => {
295 pos += 1;
296 let (edge, new_pos) = parse_edge(tokens, pos)?;
297 edges.push(edge);
298 pos = new_pos;
299 }
300 _ => {
301 pos = skip_value(tokens, pos);
302 }
303 }
304 }
305 _ => {
306 return Err(IgraphError::Parse {
307 line: 0,
308 message: format!("unexpected token in graph block: {:?}", tokens[pos]),
309 });
310 }
311 }
312 }
313
314 if pos < tokens.len() && tokens[pos] == Token::Close {
316 }
318
319 build_graph(directed, &node_ids, &edges)
321}
322
323fn parse_node(tokens: &[Token], mut pos: usize) -> IgraphResult<(i64, usize)> {
324 let mut node_id: Option<i64> = None;
325
326 while pos < tokens.len() && tokens[pos] != Token::Close {
327 match &tokens[pos] {
328 Token::Key(k) => {
329 let key = k.clone();
330 pos += 1;
331 if key == "id" {
332 if let Some(Token::Integer(v)) = tokens.get(pos) {
333 node_id = Some(*v);
334 pos += 1;
335 } else {
336 pos = skip_value(tokens, pos);
337 }
338 } else {
339 pos = skip_value(tokens, pos);
340 }
341 }
342 _ => {
343 return Err(IgraphError::Parse {
344 line: 0,
345 message: format!("unexpected token in node block: {:?}", tokens[pos]),
346 });
347 }
348 }
349 }
350
351 if pos < tokens.len() && tokens[pos] == Token::Close {
353 pos += 1;
354 }
355
356 let id = node_id.ok_or_else(|| IgraphError::Parse {
357 line: 0,
358 message: "node without 'id' field".into(),
359 })?;
360
361 Ok((id, pos))
362}
363
364fn parse_edge(tokens: &[Token], mut pos: usize) -> IgraphResult<((i64, i64), usize)> {
365 let mut source: Option<i64> = None;
366 let mut target: Option<i64> = None;
367
368 while pos < tokens.len() && tokens[pos] != Token::Close {
369 match &tokens[pos] {
370 Token::Key(k) => {
371 let key = k.clone();
372 pos += 1;
373 match key.as_str() {
374 "source" => {
375 if let Some(Token::Integer(v)) = tokens.get(pos) {
376 source = Some(*v);
377 pos += 1;
378 } else {
379 pos = skip_value(tokens, pos);
380 }
381 }
382 "target" => {
383 if let Some(Token::Integer(v)) = tokens.get(pos) {
384 target = Some(*v);
385 pos += 1;
386 } else {
387 pos = skip_value(tokens, pos);
388 }
389 }
390 _ => {
391 pos = skip_value(tokens, pos);
392 }
393 }
394 }
395 _ => {
396 return Err(IgraphError::Parse {
397 line: 0,
398 message: format!("unexpected token in edge block: {:?}", tokens[pos]),
399 });
400 }
401 }
402 }
403
404 if pos < tokens.len() && tokens[pos] == Token::Close {
406 pos += 1;
407 }
408
409 let src = source.ok_or_else(|| IgraphError::Parse {
410 line: 0,
411 message: "edge without 'source' field".into(),
412 })?;
413 let tgt = target.ok_or_else(|| IgraphError::Parse {
414 line: 0,
415 message: "edge without 'target' field".into(),
416 })?;
417
418 Ok(((src, tgt), pos))
419}
420
421fn skip_value(tokens: &[Token], mut pos: usize) -> usize {
422 if pos >= tokens.len() {
423 return pos;
424 }
425 match &tokens[pos] {
426 Token::Integer(_) | Token::Float(_) | Token::Str(_) | Token::Key(_) => pos + 1,
427 Token::Open => {
428 pos += 1;
429 let mut depth: u32 = 1;
430 while pos < tokens.len() && depth > 0 {
431 match &tokens[pos] {
432 Token::Open => depth = depth.saturating_add(1),
433 Token::Close => depth = depth.saturating_sub(1),
434 _ => {}
435 }
436 pos += 1;
437 }
438 pos
439 }
440 Token::Close => pos,
441 }
442}
443
444fn build_graph(directed: bool, node_ids: &[i64], edges: &[(i64, i64)]) -> IgraphResult<Graph> {
445 use std::collections::HashMap;
446
447 let mut id_map: HashMap<i64, u32> = HashMap::with_capacity(node_ids.len());
449 for (idx, &gml_id) in node_ids.iter().enumerate() {
450 #[allow(clippy::cast_possible_truncation)]
451 let internal_id = idx as u32;
452 if id_map.insert(gml_id, internal_id).is_some() {
453 return Err(IgraphError::Parse {
454 line: 0,
455 message: format!("duplicate node id {gml_id}"),
456 });
457 }
458 }
459
460 #[allow(clippy::cast_possible_truncation)]
461 let n = node_ids.len() as u32;
462 let mut graph = Graph::new(n, directed)?;
463
464 let mut edge_list: Vec<(u32, u32)> = Vec::with_capacity(edges.len());
465 for &(src_gml, tgt_gml) in edges {
466 let src = id_map.get(&src_gml).ok_or_else(|| IgraphError::Parse {
467 line: 0,
468 message: format!("edge references unknown node id {src_gml}"),
469 })?;
470 let tgt = id_map.get(&tgt_gml).ok_or_else(|| IgraphError::Parse {
471 line: 0,
472 message: format!("edge references unknown node id {tgt_gml}"),
473 })?;
474 edge_list.push((*src, *tgt));
475 }
476
477 graph.add_edges(edge_list)?;
478 Ok(graph)
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn test_empty_graph() {
487 let gml = b"graph [ ]";
488 let g = read_gml(&gml[..]).unwrap();
489 assert_eq!(g.vcount(), 0);
490 assert_eq!(g.ecount(), 0);
491 assert!(!g.is_directed());
492 }
493
494 #[test]
495 fn test_directed_flag() {
496 let gml = b"graph [ directed 1 node [ id 0 ] node [ id 1 ] edge [ source 0 target 1 ] ]";
497 let g = read_gml(&gml[..]).unwrap();
498 assert!(g.is_directed());
499 assert_eq!(g.vcount(), 2);
500 assert_eq!(g.ecount(), 1);
501 }
502
503 #[test]
504 fn test_undirected_default() {
505 let gml = b"graph [ node [ id 0 ] node [ id 1 ] edge [ source 0 target 1 ] ]";
506 let g = read_gml(&gml[..]).unwrap();
507 assert!(!g.is_directed());
508 }
509
510 #[test]
511 fn test_non_contiguous_ids() {
512 let gml = b"graph [\n node [ id 10 ]\n node [ id 20 ]\n node [ id 30 ]\n edge [ source 10 target 30 ]\n]";
513 let g = read_gml(&gml[..]).unwrap();
514 assert_eq!(g.vcount(), 3);
515 assert_eq!(g.ecount(), 1);
516 let (src, tgt) = g.edge(0).unwrap();
518 assert_eq!(src, 0);
519 assert_eq!(tgt, 2);
520 }
521
522 #[test]
523 fn test_multiline() {
524 let gml = b"Creator \"test\"\ngraph\n[\n directed 0\n node\n [\n id 0\n label \"A\"\n ]\n node\n [\n id 1\n label \"B\"\n ]\n edge\n [\n source 0\n target 1\n weight 1.5\n ]\n]\n";
525 let g = read_gml(&gml[..]).unwrap();
526 assert_eq!(g.vcount(), 2);
527 assert_eq!(g.ecount(), 1);
528 }
529
530 #[test]
531 fn test_self_loop() {
532 let gml = b"graph [ node [ id 0 ] edge [ source 0 target 0 ] ]";
533 let g = read_gml(&gml[..]).unwrap();
534 assert_eq!(g.ecount(), 1);
535 let (s, t) = g.edge(0).unwrap();
536 assert_eq!(s, t);
537 }
538
539 #[test]
540 fn test_multiple_edges() {
541 let gml = b"graph [ node [ id 0 ] node [ id 1 ] edge [ source 0 target 1 ] edge [ source 0 target 1 ] ]";
542 let g = read_gml(&gml[..]).unwrap();
543 assert_eq!(g.ecount(), 2);
544 }
545
546 #[test]
547 fn test_error_missing_graph() {
548 let gml = b"node [ id 0 ]";
549 let result = read_gml(&gml[..]);
550 assert!(result.is_err());
551 }
552
553 #[test]
554 fn test_error_duplicate_node_id() {
555 let gml = b"graph [ node [ id 0 ] node [ id 0 ] ]";
556 let result = read_gml(&gml[..]);
557 assert!(result.is_err());
558 }
559
560 #[test]
561 fn test_error_unknown_edge_target() {
562 let gml = b"graph [ node [ id 0 ] edge [ source 0 target 99 ] ]";
563 let result = read_gml(&gml[..]);
564 assert!(result.is_err());
565 }
566
567 #[test]
568 fn test_error_node_without_id() {
569 let gml = b"graph [ node [ label \"x\" ] ]";
570 let result = read_gml(&gml[..]);
571 assert!(result.is_err());
572 }
573
574 #[test]
575 fn test_error_edge_without_source() {
576 let gml = b"graph [ node [ id 0 ] node [ id 1 ] edge [ target 1 ] ]";
577 let result = read_gml(&gml[..]);
578 assert!(result.is_err());
579 }
580
581 #[test]
582 fn test_write_read_round_trip_undirected() {
583 let mut g = Graph::with_vertices(4);
584 g.add_edge(0, 1).unwrap();
585 g.add_edge(1, 2).unwrap();
586 g.add_edge(2, 3).unwrap();
587 g.add_edge(3, 0).unwrap();
588
589 let mut buf = Vec::new();
590 write_gml(&g, &mut buf).unwrap();
591
592 let g2 = read_gml(&buf[..]).unwrap();
593 assert_eq!(g2.vcount(), 4);
594 assert_eq!(g2.ecount(), 4);
595 assert!(!g2.is_directed());
596 }
597
598 #[test]
599 fn test_write_read_round_trip_directed() {
600 let mut g = Graph::new(3, true).unwrap();
601 g.add_edge(0, 1).unwrap();
602 g.add_edge(1, 2).unwrap();
603 g.add_edge(2, 0).unwrap();
604
605 let mut buf = Vec::new();
606 write_gml(&g, &mut buf).unwrap();
607
608 let g2 = read_gml(&buf[..]).unwrap();
609 assert_eq!(g2.vcount(), 3);
610 assert_eq!(g2.ecount(), 3);
611 assert!(g2.is_directed());
612
613 for eid in 0..3u32 {
614 let (s1, t1) = g.edge(eid).unwrap();
615 let (s2, t2) = g2.edge(eid).unwrap();
616 assert_eq!(s1, s2);
617 assert_eq!(t1, t2);
618 }
619 }
620
621 #[test]
622 fn test_write_empty() {
623 let g = Graph::with_vertices(0);
624 let mut buf = Vec::new();
625 write_gml(&g, &mut buf).unwrap();
626 let s = String::from_utf8(buf).unwrap();
627 assert!(s.contains("graph"));
628 assert!(s.contains("directed 0"));
629 }
630
631 #[test]
632 fn test_string_with_entities() {
633 let gml = b"graph [ node [ id 0 label \"a&b\" ] ]";
634 let g = read_gml(&gml[..]).unwrap();
635 assert_eq!(g.vcount(), 1);
636 }
637
638 #[test]
639 fn test_top_level_creator_ignored() {
640 let gml = b"Creator \"Someone\"\nVersion 1\ngraph [ node [ id 0 ] ]";
641 let g = read_gml(&gml[..]).unwrap();
642 assert_eq!(g.vcount(), 1);
643 }
644
645 #[test]
646 fn test_negative_node_id() {
647 let gml = b"graph [ node [ id -5 ] node [ id -3 ] edge [ source -5 target -3 ] ]";
648 let g = read_gml(&gml[..]).unwrap();
649 assert_eq!(g.vcount(), 2);
650 assert_eq!(g.ecount(), 1);
651 }
652
653 #[test]
654 fn test_lesmis_style_header() {
655 let gml = b"Creator \"Mark Newman on Fri Jul 21 12:44:53 2006\"\ngraph\n[\n node\n [\n id 0\n label \"Myriel\"\n ]\n node\n [\n id 1\n label \"Napoleon\"\n ]\n edge\n [\n source 0\n target 1\n value 1\n ]\n]\n";
656 let g = read_gml(&gml[..]).unwrap();
657 assert_eq!(g.vcount(), 2);
658 assert_eq!(g.ecount(), 1);
659 }
660}