rust_igraph/algorithms/io/
dl.rs1use std::io::{BufRead, BufReader, Read};
24
25use crate::core::{Graph, IgraphError, IgraphResult};
26
27#[derive(Debug, Clone)]
29pub struct DlGraph {
30 pub graph: Graph,
32 pub labels: Option<Vec<String>>,
34 pub weights: Option<Vec<f64>>,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq)]
39enum DlFormat {
40 FullMatrix,
41 EdgeList1,
42 NodeList1,
43}
44
45#[allow(clippy::too_many_lines)]
62pub fn read_dl<R: Read>(input: R, directed: bool) -> IgraphResult<DlGraph> {
63 let reader = BufReader::new(input);
64 let mut lines: Vec<String> = Vec::new();
65
66 for line_result in reader.lines() {
67 let line = line_result?;
68 lines.push(line);
69 }
70
71 let mut pos = 0;
72 let mut n_vertices: Option<u32> = None;
73 let mut format = DlFormat::FullMatrix;
74 let mut labels: Vec<String> = Vec::new();
75 let mut labels_embedded = false;
76
77 pos = skip_empty(&lines, pos);
79 if pos >= lines.len() {
80 return Err(parse_err(0, "empty DL file"));
81 }
82
83 let header_line = lines[pos].trim().to_ascii_lowercase();
84 if !header_line.starts_with("dl") {
85 return Err(parse_err(pos + 1, "DL file must start with 'DL'"));
86 }
87
88 let header_rest = &lines[pos].trim()[2..];
90 if let Some(n) = extract_n(header_rest) {
91 n_vertices = Some(n);
92 }
93 pos += 1;
94
95 loop {
97 pos = skip_empty(&lines, pos);
98 if pos >= lines.len() {
99 break;
100 }
101
102 let trimmed = lines[pos].trim();
103 let lower = trimmed.to_ascii_lowercase();
104
105 if lower.starts_with("data:") || lower == "data" {
106 pos += 1;
107 break;
108 }
109
110 if lower.starts_with("n=") || lower.starts_with("n =") {
111 if n_vertices.is_none() {
112 let val =
113 extract_n(trimmed).ok_or_else(|| parse_err(pos + 1, "invalid n= value"))?;
114 n_vertices = Some(val);
115 }
116 pos += 1;
117 continue;
118 }
119
120 if lower.starts_with("format") {
121 if lower.contains("fullmatrix") || lower.contains("full matrix") {
122 format = DlFormat::FullMatrix;
123 } else if lower.contains("edgelist1")
124 || lower.contains("edge list1")
125 || lower.contains("edgelist 1")
126 {
127 format = DlFormat::EdgeList1;
128 } else if lower.contains("nodelist1")
129 || lower.contains("node list1")
130 || lower.contains("nodelist 1")
131 {
132 format = DlFormat::NodeList1;
133 }
134 pos += 1;
135 continue;
136 }
137
138 if lower.starts_with("labels embedded") {
139 labels_embedded = true;
140 pos += 1;
141 continue;
142 }
143
144 if lower.starts_with("labels:") || lower == "labels" {
145 pos += 1;
146 while pos < lines.len() {
148 let lbl_line = lines[pos].trim();
149 let lbl_lower = lbl_line.to_ascii_lowercase();
150 if lbl_lower.starts_with("data")
151 || lbl_lower.starts_with("format")
152 || lbl_lower.starts_with("labels")
153 {
154 break;
155 }
156 if !lbl_line.is_empty() {
157 for token in split_labels(lbl_line) {
159 labels.push(token);
160 }
161 }
162 pos += 1;
163 }
164 continue;
165 }
166
167 if let Some(n) = extract_n(trimmed) {
169 if n_vertices.is_none() {
170 n_vertices = Some(n);
171 }
172 pos += 1;
173 continue;
174 }
175
176 pos += 1;
177 }
178
179 let n = n_vertices.ok_or_else(|| parse_err(0, "no vertex count (n=) found in DL file"))?;
180
181 let mut edges: Vec<(u32, u32)> = Vec::new();
182 let mut weights: Vec<f64> = Vec::new();
183 let mut has_weights = false;
184
185 match format {
186 DlFormat::FullMatrix => {
187 if labels_embedded {
188 pos = skip_empty(&lines, pos);
190 if pos < lines.len() {
191 let header_labels = split_labels(lines[pos].trim());
192 if labels.is_empty() {
193 labels = header_labels;
194 }
195 pos += 1;
196 }
197 let mut row = 0u32;
198 while pos < lines.len() && row < n {
199 let trimmed = lines[pos].trim();
200 if trimmed.is_empty() {
201 pos += 1;
202 continue;
203 }
204 let tokens: Vec<&str> = trimmed.split_whitespace().collect();
205 if tokens.is_empty() {
206 pos += 1;
207 continue;
208 }
209 let data_start = 1;
211 for (col, &val) in tokens.iter().skip(data_start).enumerate() {
212 if val == "1" {
213 #[allow(clippy::cast_possible_truncation)]
214 edges.push((row, col as u32));
215 }
216 }
217 row += 1;
218 pos += 1;
219 }
220 } else {
221 let mut row = 0u32;
222 while pos < lines.len() && row < n {
223 let trimmed = lines[pos].trim();
224 if trimmed.is_empty() {
225 pos += 1;
226 continue;
227 }
228 for (col, ch) in trimmed.split_whitespace().enumerate() {
229 if ch == "1" {
230 #[allow(clippy::cast_possible_truncation)]
231 edges.push((row, col as u32));
232 }
233 }
234 row += 1;
235 pos += 1;
236 }
237 }
238 }
239 DlFormat::EdgeList1 => {
240 if labels_embedded {
241 while pos < lines.len() {
242 let trimmed = lines[pos].trim();
243 if trimmed.is_empty() {
244 pos += 1;
245 continue;
246 }
247 let tokens: Vec<&str> = trimmed.split_whitespace().collect();
248 if tokens.len() < 2 {
249 pos += 1;
250 continue;
251 }
252 let from_label = tokens[0].to_string();
253 let to_label = tokens[1].to_string();
254 let from_id = get_or_add_label(&mut labels, &from_label);
255 let to_id = get_or_add_label(&mut labels, &to_label);
256 edges.push((from_id, to_id));
257 if tokens.len() >= 3 {
258 if let Ok(w) = tokens[2].parse::<f64>() {
259 has_weights = true;
260 weights.push(w);
261 } else {
262 weights.push(0.0);
263 }
264 } else {
265 weights.push(0.0);
266 }
267 pos += 1;
268 }
269 } else {
270 while pos < lines.len() {
271 let trimmed = lines[pos].trim();
272 if trimmed.is_empty() {
273 pos += 1;
274 continue;
275 }
276 let tokens: Vec<&str> = trimmed.split_whitespace().collect();
277 if tokens.len() < 2 {
278 pos += 1;
279 continue;
280 }
281 let from: u32 = tokens[0]
282 .parse()
283 .map_err(|e| parse_err(pos + 1, &format!("invalid source id: {e}")))?;
284 let to: u32 = tokens[1]
285 .parse()
286 .map_err(|e| parse_err(pos + 1, &format!("invalid target id: {e}")))?;
287 if from == 0 || to == 0 || from > n || to > n {
288 return Err(parse_err(
289 pos + 1,
290 &format!("vertex ID out of range: {from} {to} (n={n})"),
291 ));
292 }
293 edges.push((from - 1, to - 1));
294 if tokens.len() >= 3 {
295 if let Ok(w) = tokens[2].parse::<f64>() {
296 has_weights = true;
297 weights.push(w);
298 } else {
299 weights.push(0.0);
300 }
301 } else {
302 weights.push(0.0);
303 }
304 pos += 1;
305 }
306 }
307 }
308 DlFormat::NodeList1 => {
309 if labels_embedded {
310 while pos < lines.len() {
311 let trimmed = lines[pos].trim();
312 if trimmed.is_empty() {
313 pos += 1;
314 continue;
315 }
316 let tokens: Vec<&str> = trimmed.split_whitespace().collect();
317 if tokens.is_empty() {
318 pos += 1;
319 continue;
320 }
321 let from_label = tokens[0].to_string();
322 let from_id = get_or_add_label(&mut labels, &from_label);
323 for &tok in &tokens[1..] {
324 let to_label = tok.to_string();
325 let to_id = get_or_add_label(&mut labels, &to_label);
326 edges.push((from_id, to_id));
327 }
328 pos += 1;
329 }
330 } else {
331 while pos < lines.len() {
332 let trimmed = lines[pos].trim();
333 if trimmed.is_empty() {
334 pos += 1;
335 continue;
336 }
337 let tokens: Vec<&str> = trimmed.split_whitespace().collect();
338 if tokens.is_empty() {
339 pos += 1;
340 continue;
341 }
342 let from: u32 = tokens[0]
343 .parse()
344 .map_err(|e| parse_err(pos + 1, &format!("invalid source id: {e}")))?;
345 if from == 0 || from > n {
346 return Err(parse_err(
347 pos + 1,
348 &format!("source vertex ID out of range: {from} (n={n})"),
349 ));
350 }
351 for &tok in &tokens[1..] {
352 let to: u32 = tok
353 .parse()
354 .map_err(|e| parse_err(pos + 1, &format!("invalid target id: {e}")))?;
355 if to == 0 || to > n {
356 return Err(parse_err(
357 pos + 1,
358 &format!("target vertex ID out of range: {to} (n={n})"),
359 ));
360 }
361 edges.push((from - 1, to - 1));
362 }
363 pos += 1;
364 }
365 }
366 }
367 }
368
369 let mut graph = Graph::new(n, directed)?;
370 graph.add_edges(edges)?;
371
372 let final_labels = if labels.is_empty() {
373 None
374 } else {
375 Some(labels)
376 };
377 let final_weights = if has_weights { Some(weights) } else { None };
378
379 Ok(DlGraph {
380 graph,
381 labels: final_labels,
382 weights: final_weights,
383 })
384}
385
386fn parse_err(line: usize, msg: &str) -> IgraphError {
387 IgraphError::Parse {
388 line,
389 message: msg.to_string(),
390 }
391}
392
393fn skip_empty(lines: &[String], mut pos: usize) -> usize {
394 while pos < lines.len() && lines[pos].trim().is_empty() {
395 pos += 1;
396 }
397 pos
398}
399
400fn extract_n(s: &str) -> Option<u32> {
401 let lower = s.to_ascii_lowercase();
402 for part in lower.split([',', ';']) {
404 let trimmed = part.trim();
405 if let Some(after_n) = trimmed.strip_prefix('n') {
406 let rest = after_n.trim();
407 if let Some(stripped) = rest.strip_prefix('=') {
408 if let Ok(val) = stripped.trim().parse::<u32>() {
409 return Some(val);
410 }
411 }
412 }
413 }
414 None
415}
416
417fn split_labels(s: &str) -> Vec<String> {
418 if s.contains(',') {
419 s.split(',')
420 .map(|t| t.trim().trim_matches('"').to_string())
421 .filter(|t| !t.is_empty())
422 .collect()
423 } else {
424 s.split_whitespace()
425 .map(|t| t.trim_matches('"').to_string())
426 .collect()
427 }
428}
429
430fn get_or_add_label(labels: &mut Vec<String>, name: &str) -> u32 {
431 for (i, lbl) in labels.iter().enumerate() {
432 if lbl == name {
433 #[allow(clippy::cast_possible_truncation)]
434 return i as u32;
435 }
436 }
437 labels.push(name.to_string());
438 #[allow(clippy::cast_possible_truncation)]
439 let id = (labels.len() - 1) as u32;
440 id
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446
447 #[test]
448 fn test_edgelist1_basic() {
449 let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2\n2 3\n1 3\n";
450 let result = read_dl(&input[..], true).unwrap();
451 assert_eq!(result.graph.vcount(), 3);
452 assert_eq!(result.graph.ecount(), 3);
453 assert!(result.graph.is_directed());
454 }
455
456 #[test]
457 fn test_edgelist1_undirected() {
458 let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2\n2 3\n";
459 let result = read_dl(&input[..], false).unwrap();
460 assert_eq!(result.graph.vcount(), 3);
461 assert_eq!(result.graph.ecount(), 2);
462 assert!(!result.graph.is_directed());
463 }
464
465 #[test]
466 fn test_edgelist1_with_weights() {
467 let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2 1.5\n2 3 2.5\n";
468 let result = read_dl(&input[..], true).unwrap();
469 let w = result.weights.unwrap();
470 assert!((w[0] - 1.5).abs() < 1e-10);
471 assert!((w[1] - 2.5).abs() < 1e-10);
472 }
473
474 #[test]
475 fn test_edgelist1_with_labels() {
476 let input = b"DL n=3\nformat = edgelist1\nlabels:\nA,B,C\ndata:\n1 2\n2 3\n";
477 let result = read_dl(&input[..], true).unwrap();
478 let labels = result.labels.unwrap();
479 assert_eq!(labels, vec!["A", "B", "C"]);
480 }
481
482 #[test]
483 fn test_edgelist1_labels_embedded() {
484 let input = b"DL n=3\nformat = edgelist1\nlabels embedded:\ndata:\nAlice Bob\nBob Carol\n";
485 let result = read_dl(&input[..], true).unwrap();
486 assert_eq!(result.graph.ecount(), 2);
487 let labels = result.labels.unwrap();
488 assert_eq!(labels[0], "Alice");
489 assert_eq!(labels[1], "Bob");
490 assert_eq!(labels[2], "Carol");
491 }
492
493 #[test]
494 fn test_fullmatrix_basic() {
495 let input = b"DL n=3\nformat = fullmatrix\ndata:\n0 1 0\n0 0 1\n1 0 0\n";
496 let result = read_dl(&input[..], true).unwrap();
497 assert_eq!(result.graph.vcount(), 3);
498 assert_eq!(result.graph.ecount(), 3);
499 }
500
501 #[test]
502 fn test_fullmatrix_default_format() {
503 let input = b"DL n=3\ndata:\n0 1 1\n1 0 0\n0 1 0\n";
505 let result = read_dl(&input[..], true).unwrap();
506 assert_eq!(result.graph.vcount(), 3);
507 assert_eq!(result.graph.ecount(), 4);
508 }
509
510 #[test]
511 fn test_fullmatrix_labels_embedded() {
512 let input = b"DL n=3\nlabels embedded:\ndata:\nA B C\nA 0 1 0\nB 0 0 1\nC 1 0 0\n";
513 let result = read_dl(&input[..], true).unwrap();
514 assert_eq!(result.graph.vcount(), 3);
515 assert_eq!(result.graph.ecount(), 3);
516 let labels = result.labels.unwrap();
517 assert_eq!(labels, vec!["A", "B", "C"]);
518 }
519
520 #[test]
521 fn test_nodelist1_basic() {
522 let input = b"DL n=4\nformat = nodelist1\ndata:\n1 2 3\n2 3\n3 4\n";
523 let result = read_dl(&input[..], true).unwrap();
524 assert_eq!(result.graph.vcount(), 4);
525 assert_eq!(result.graph.ecount(), 4);
526 }
527
528 #[test]
529 fn test_nodelist1_labels_embedded() {
530 let input = b"DL n=3\nformat = nodelist1\nlabels embedded:\ndata:\nA B C\nB C\n";
531 let result = read_dl(&input[..], true).unwrap();
532 assert_eq!(result.graph.ecount(), 3);
533 let labels = result.labels.unwrap();
534 assert_eq!(labels[0], "A");
535 }
536
537 #[test]
538 fn test_case_insensitive() {
539 let input = b"dl N=2\nFORMAT = EDGELIST1\nDATA:\n1 2\n";
540 let result = read_dl(&input[..], true).unwrap();
541 assert_eq!(result.graph.vcount(), 2);
542 assert_eq!(result.graph.ecount(), 1);
543 }
544
545 #[test]
546 fn test_empty_graph() {
547 let input = b"DL n=5\nformat = edgelist1\ndata:\n";
548 let result = read_dl(&input[..], true).unwrap();
549 assert_eq!(result.graph.vcount(), 5);
550 assert_eq!(result.graph.ecount(), 0);
551 }
552
553 #[test]
554 fn test_no_dl_header_error() {
555 let input = b"n=3\ndata:\n1 2\n";
556 let result = read_dl(&input[..], true);
557 assert!(result.is_err());
558 }
559
560 #[test]
561 fn test_no_n_error() {
562 let input = b"DL\nformat = edgelist1\ndata:\n1 2\n";
563 let result = read_dl(&input[..], true);
564 assert!(result.is_err());
565 }
566
567 #[test]
568 fn test_vertex_id_out_of_range() {
569 let input = b"DL n=2\nformat = edgelist1\ndata:\n1 5\n";
570 let result = read_dl(&input[..], true);
571 assert!(result.is_err());
572 }
573
574 #[test]
575 fn test_zero_vertex_id_error() {
576 let input = b"DL n=2\nformat = edgelist1\ndata:\n0 1\n";
577 let result = read_dl(&input[..], true);
578 assert!(result.is_err());
579 }
580
581 #[test]
582 fn test_n_on_same_line() {
583 let input = b"DL n=4\nformat=edgelist1\ndata:\n1 2\n3 4\n";
584 let result = read_dl(&input[..], true).unwrap();
585 assert_eq!(result.graph.vcount(), 4);
586 assert_eq!(result.graph.ecount(), 2);
587 }
588
589 #[test]
590 fn test_labels_whitespace_separated() {
591 let input = b"DL n=3\nformat = edgelist1\nlabels:\nAlpha Beta Gamma\ndata:\n1 2\n";
592 let result = read_dl(&input[..], true).unwrap();
593 let labels = result.labels.unwrap();
594 assert_eq!(labels, vec!["Alpha", "Beta", "Gamma"]);
595 }
596}