rs_graph/
string.rs

1/*
2 * Copyright (c) 2019-2022 Frank Fischer <frank-fischer@shadow-soft.de>
3 *
4 * This program is free software: you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation, either version 3 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program.  If not, see  <http://www.gnu.org/licenses/>
16 */
17
18//! A small module to read graphs from ascii art.
19//!
20//! See [`from_ascii`] for a detailed explanation.
21//!
22//! *Warning*: the main purpose of this module is its use in
23//! documentation comments and examples. It is not meant for
24//! production use.
25
26use crate::builder::{Buildable, Builder};
27use crate::linkedlistgraph::LinkedListGraph;
28use crate::traits::{Directed, IndexGraph};
29
30use std::collections::{HashMap, HashSet, VecDeque};
31use std::error;
32use std::num::ParseIntError;
33use std::str::{from_utf8, Utf8Error};
34
35/// Error reading an ascii-art graph.
36#[derive(Debug)]
37pub enum Error {
38    /// An invalid character appeared in the ascii text.
39    InvalidCharacter(char),
40    /// Parsing a number (edge weight) failed.
41    InvalidNumber(Box<dyn error::Error>),
42}
43
44impl std::fmt::Display for Error {
45    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
46        match self {
47            Error::InvalidCharacter(c) => write!(fmt, "Invalid character: {}", c),
48            Error::InvalidNumber(e) => write!(fmt, "Invalid number: {}", e),
49        }
50    }
51}
52
53impl std::error::Error for Error {}
54
55impl From<ParseIntError> for Error {
56    fn from(e: ParseIntError) -> Error {
57        Error::InvalidNumber(e.into())
58    }
59}
60
61impl From<Utf8Error> for Error {
62    fn from(e: Utf8Error) -> Error {
63        Error::InvalidNumber(e.into())
64    }
65}
66
67/// The data returned by `from_ascii`.
68pub struct Data<G> {
69    /// The graph.
70    pub graph: G,
71    /// Map from character to node for named node ids.
72    pub nodes: HashMap<char, usize>,
73    /// The edge weights.
74    pub weights: Vec<usize>,
75}
76
77/// Create a graph from ASCII art drawing with edge weights.
78///
79/// Parses an ASCII-art drawing and generates the corresponding graph.
80/// Node are `*` or letters (except for `v` which is used to draw directed
81/// edges), edges are sequences of `-`, `|`, `/` and `\` characters. Edges may
82/// cross as in the example below.
83///
84/// Weights are non-negative numbers along the edges. Edges without explicit
85/// weight receive the weight 0. The edge weights are returned in a vector in
86/// the order of creation (although the order is undefined -- usually this
87/// corresponds to `g.edge_id`).
88///
89/// ```
90/// use rs_graph::traits::*;
91/// use rs_graph::linkedlistgraph::LinkedListGraph;
92/// use rs_graph::string::{Data, from_ascii};
93///
94/// let Data{ graph: g, weights, nodes } = from_ascii::<LinkedListGraph>(r"
95///       a  b
96///       |  |
97///   ----|-----
98///  /   223 |  \
99/// |     | /    |
100/// |   --|-     |
101///  \ /  |     /
102///   -   *-10--").unwrap();
103///
104/// assert_eq!(g.neighs(g.id2node(nodes[&'a'])).map(|(e,_)| weights[g.edge_id(e)]).collect::<Vec<_>>(), vec![223]);
105/// assert_eq!(g.neighs(g.id2node(nodes[&'b'])).map(|(e,_)| weights[g.edge_id(e)]).collect::<Vec<_>>(), vec![10]);
106/// ```
107///
108/// Usually edges are undirected. Directed edges can be inserted by ending an
109/// edge in one of the characters `<`, `>`, `^`, `v` or `@` (where the `@` can
110/// be used in place of any of the other head characters; for diagonal directed
111/// connections it is the only alternative).
112///
113/// ```
114/// use rs_graph::traits::*;
115/// use rs_graph::linkedlistgraph::LinkedListGraph;
116/// use rs_graph::string::{Data, from_ascii};
117///
118/// let Data { graph: g, nodes, .. } = from_ascii::<LinkedListGraph>(r"
119///    *  *  *
120///     \ | /
121///      @v@
122///    *->a<-*
123///      @^@
124///     / | \
125///    *  *  *").unwrap();
126/// let a = g.id2node(nodes[&'a']);
127///
128/// assert_eq!(g.num_nodes(), 9);
129/// assert_eq!(g.num_edges(), 8);
130/// for u in g.nodes() {
131///     if u == a {
132///         assert_eq!(g.inedges(u).count(), 8);
133///         assert_eq!(g.outedges(u).count(), 0);
134///     } else {
135///         assert_eq!(g.inedges(u).count(), 0);
136///         assert_eq!(g.outedges(u).count(), 1);
137///     }
138/// }
139/// ```
140///
141/// Nodes are created (and thus numbered) row-wise. Nodes that have a character
142/// label can be access through the `nodes` field of the returned data. The
143/// order of the edges is undefined.
144///
145/// *Note*: this function is meant to be used in test cases, it should not be
146/// used in production code.
147#[allow(clippy::cognitive_complexity)]
148pub fn from_ascii<G>(text: &str) -> Result<Data<G>, Error>
149where
150    G: IndexGraph + Buildable,
151{
152    let lines = text.lines().map(|l| l.as_bytes()).collect::<Vec<_>>();
153
154    // compute numbers of rows and columns
155    let nrows = lines.len();
156    let ncols = lines.iter().map(|l| l.len()).max().unwrap_or(0);
157
158    let mut h = LinkedListGraph::<u32>::new_builder();
159    let hnodes = (0..nrows)
160        .map(|_| (0..ncols).map(|_| h.add_node()).collect::<Vec<_>>())
161        .collect::<Vec<_>>();
162
163    let mut hweights = HashMap::new();
164    let mut hdirected = HashSet::new();
165
166    // insert horizontal edges
167    for i in 0..nrows {
168        for j in 1..ncols {
169            if j >= lines[i].len() {
170                break;
171            }
172            if (lines[i][j - 1] == b'-' && is_node_or(lines[i][j], b"/\\-"))
173                || (lines[i][j] == b'-' && is_node_or(lines[i][j - 1], b"/\\-"))
174            {
175                h.add_edge(hnodes[i][j - 1], hnodes[i][j]);
176                h.add_edge(hnodes[i][j], hnodes[i][j - 1]);
177            }
178        }
179    }
180
181    // insert vertical edges
182    for j in 0..ncols {
183        for i in 1..nrows {
184            if j >= lines[i - 1].len() || j >= lines[i].len() {
185                continue;
186            }
187            if (lines[i - 1][j] == b'|' && is_node_or(lines[i][j], b"/\\|"))
188                || (lines[i][j] == b'|' && is_node_or(lines[i - 1][j], b"/\\|"))
189            {
190                h.add_edge(hnodes[i - 1][j], hnodes[i][j]);
191                h.add_edge(hnodes[i][j], hnodes[i - 1][j]);
192            }
193        }
194    }
195
196    // insert main diagonal edges
197    for j in 1..ncols {
198        for i in 1..nrows {
199            if j > lines[i - 1].len() || j >= lines[i].len() {
200                continue;
201            }
202            if (lines[i - 1][j - 1] == b'\\' && is_node_or(lines[i][j], b"\\-|"))
203                || (lines[i][j] == b'\\' && is_node_or(lines[i - 1][j - 1], b"\\-|"))
204            {
205                h.add_edge(hnodes[i - 1][j - 1], hnodes[i][j]);
206                h.add_edge(hnodes[i][j], hnodes[i - 1][j - 1]);
207            }
208        }
209    }
210
211    // insert secondary diagonal edges
212    for j in 0..ncols - 1 {
213        for i in 1..nrows {
214            if j + 1 >= lines[i - 1].len() || j >= lines[i].len() {
215                continue;
216            }
217            if (lines[i - 1][j + 1] == b'/' && is_node_or(lines[i][j], b"/-|*"))
218                || (lines[i][j] == b'/' && is_node_or(lines[i - 1][j + 1], b"/-|*"))
219            {
220                h.add_edge(hnodes[i - 1][j + 1], hnodes[i][j]);
221                h.add_edge(hnodes[i][j], hnodes[i - 1][j + 1]);
222            }
223        }
224    }
225
226    // insert directed end-points >, <, ^, v or @
227    for i in 0..nrows {
228        for j in 0..ncols {
229            if j > 0
230                && lines[i].get(j - 1).map(|&c| c == b'-').unwrap_or(false)
231                && lines[i].get(j).map(|&c| c == b'@' || c == b'>').unwrap_or(false)
232                && lines[i].get(j + 1).cloned().map(is_node).unwrap_or(false)
233            {
234                let e = h.add_edge(hnodes[i][j - 1], hnodes[i][j + 1]);
235                hdirected.insert(e);
236            }
237            if j > 0
238                && lines[i].get(j - 1).cloned().map(is_node).unwrap_or(false)
239                && lines[i].get(j).map(|&c| c == b'@' || c == b'<').unwrap_or(false)
240                && lines[i].get(j + 1).map(|&c| c == b'-').unwrap_or(false)
241            {
242                let e = h.add_edge(hnodes[i][j + 1], hnodes[i][j - 1]);
243                hdirected.insert(e);
244            }
245            if i > 0
246                && i + 1 < nrows
247                && lines[i - 1].get(j).cloned().map(is_node).unwrap_or(false)
248                && lines[i].get(j).map(|&c| c == b'@' || c == b'^').unwrap_or(false)
249                && lines[i + 1].get(j).map(|&c| c == b'|').unwrap_or(false)
250            {
251                let e = h.add_edge(hnodes[i + 1][j], hnodes[i - 1][j]);
252                hdirected.insert(e);
253            }
254            if i > 0
255                && i + 1 < nrows
256                && lines[i - 1].get(j).map(|&c| c == b'|').unwrap_or(false)
257                && lines[i].get(j).map(|&c| c == b'@' || c == b'v').unwrap_or(false)
258                && lines[i + 1].get(j).cloned().map(is_node).unwrap_or(false)
259            {
260                let e = h.add_edge(hnodes[i - 1][j], hnodes[i + 1][j]);
261                hdirected.insert(e);
262            }
263            // diagonals
264            if i > 0
265                && i + 1 < nrows
266                && j > 0
267                && lines[i - 1].get(j - 1).map(|&c| c == b'\\').unwrap_or(false)
268                && lines[i].get(j).map(|&c| c == b'@').unwrap_or(false)
269                && lines[i + 1].get(j + 1).cloned().map(is_node).unwrap_or(false)
270            {
271                let e = h.add_edge(hnodes[i - 1][j - 1], hnodes[i + 1][j + 1]);
272                hdirected.insert(e);
273            }
274            if i > 0
275                && i + 1 < nrows
276                && j > 0
277                && lines[i - 1].get(j - 1).cloned().map(is_node).unwrap_or(false)
278                && lines[i].get(j).map(|&c| c == b'@').unwrap_or(false)
279                && lines[i + 1].get(j + 1).map(|&c| c == b'\\').unwrap_or(false)
280            {
281                let e = h.add_edge(hnodes[i + 1][j + 1], hnodes[i - 1][j - 1]);
282                hdirected.insert(e);
283            }
284            if i > 0
285                && i + 1 < nrows
286                && j > 0
287                && lines[i - 1].get(j + 1).map(|&c| c == b'/').unwrap_or(false)
288                && lines[i].get(j).map(|&c| c == b'@').unwrap_or(false)
289                && lines[i + 1].get(j - 1).cloned().map(is_node).unwrap_or(false)
290            {
291                let e = h.add_edge(hnodes[i - 1][j + 1], hnodes[i + 1][j - 1]);
292                hdirected.insert(e);
293            }
294            if i > 0
295                && i + 1 < nrows
296                && j > 0
297                && lines[i - 1].get(j + 1).cloned().map(is_node).unwrap_or(false)
298                && lines[i].get(j).map(|&c| c == b'@').unwrap_or(false)
299                && lines[i + 1].get(j - 1).map(|&c| c == b'/').unwrap_or(false)
300            {
301                let e = h.add_edge(hnodes[i + 1][j - 1], hnodes[i - 1][j + 1]);
302                hdirected.insert(e);
303            }
304        }
305    }
306
307    let get_weight = |i: usize, j: usize| -> Result<usize, Error> {
308        let mut beg = j;
309        while beg > 0 && lines[i][beg - 1].is_ascii_digit() {
310            beg -= 1;
311        }
312        let mut end = j;
313        while lines[i].get(end).map(|c| c.is_ascii_digit()).unwrap_or(false) {
314            end += 1;
315        }
316        Ok(from_utf8(&lines[i][beg..end])?.parse::<usize>()?)
317    };
318
319    // insert crossing edges
320    for j in 0..ncols {
321        for i in 0..nrows {
322            // horizontal
323            if j > 0
324                && j + 1 < lines[i].len()
325                && lines[i][j - 1] == b'-'
326                && lines[i][j + 1] == b'-'
327                && (lines[i][j] == b'|' || lines[i][j].is_ascii_digit())
328            {
329                let e = h.add_edge(hnodes[i][j - 1], hnodes[i][j + 1]);
330                let f = h.add_edge(hnodes[i][j + 1], hnodes[i][j - 1]);
331                if lines[i][j] != b'|' {
332                    let w = get_weight(i, j)?;
333                    hweights.insert(e, w);
334                    hweights.insert(f, w);
335                }
336            }
337            // vertical
338            if i > 0
339                && i + 1 < nrows
340                && lines[i - 1].get(j).map(|&c| c == b'|').unwrap_or(false)
341                && lines[i + 1].get(j).map(|&c| c == b'|').unwrap_or(false)
342                && lines[i]
343                    .get(j)
344                    .map(|&c| c == b'-' || c.is_ascii_digit())
345                    .unwrap_or(false)
346            {
347                let e = h.add_edge(hnodes[i - 1][j], hnodes[i + 1][j]);
348                let f = h.add_edge(hnodes[i + 1][j], hnodes[i - 1][j]);
349                if lines[i][j] != b'-' {
350                    let w = get_weight(i, j)?;
351                    hweights.insert(e, w);
352                    hweights.insert(f, w);
353                }
354            }
355            // main diagonal
356            if i > 0
357                && j > 0
358                && i + 1 < nrows
359                && lines[i - 1].get(j - 1).map(|&c| c == b'\\').unwrap_or(false)
360                && lines[i + 1].get(j + 1).map(|&c| c == b'\\').unwrap_or(false)
361                && lines[i]
362                    .get(j)
363                    .map(|&c| c == b'/' || c.is_ascii_digit())
364                    .unwrap_or(false)
365            {
366                let e = h.add_edge(hnodes[i - 1][j - 1], hnodes[i + 1][j + 1]);
367                let f = h.add_edge(hnodes[i + 1][j + 1], hnodes[i - 1][j - 1]);
368                if lines[i][j] != b'/' {
369                    let w = get_weight(i, j)?;
370                    hweights.insert(e, w);
371                    hweights.insert(f, w);
372                }
373            }
374            // secondary diagonal
375            if i > 0
376                && j > 0
377                && i + 1 < nrows
378                && lines[i - 1].get(j + 1).map(|&c| c == b'/').unwrap_or(false)
379                && lines[i + 1].get(j - 1).map(|&c| c == b'/').unwrap_or(false)
380                && lines[i]
381                    .get(j)
382                    .map(|&c| c == b'\\' || c.is_ascii_digit())
383                    .unwrap_or(false)
384            {
385                let e = h.add_edge(hnodes[i - 1][j + 1], hnodes[i + 1][j - 1]);
386                let f = h.add_edge(hnodes[i + 1][j - 1], hnodes[i - 1][j + 1]);
387                if lines[i][j] != b'\\' {
388                    let w = get_weight(i, j)?;
389                    hweights.insert(e, w);
390                    hweights.insert(f, w);
391                }
392            }
393        }
394    }
395
396    // insert edges jumping over multi-digit horizontal numbers
397    for i in 0..nrows {
398        for j in 1..ncols {
399            if lines[i].get(j - 1).map(|&c| c == b'-').unwrap_or(false) {
400                let mut k = j;
401                while lines[i].get(k).map(|c| c.is_ascii_digit()).unwrap_or(false) {
402                    k += 1;
403                }
404                if k >= j + 2 && lines[i].get(k).map(|&c| c == b'-').unwrap_or(false) {
405                    let e = h.add_edge(hnodes[i][j - 1], hnodes[i][k]);
406                    let f = h.add_edge(hnodes[i][k], hnodes[i][j - 1]);
407                    let weight = from_utf8(&lines[i][j..k])?.parse::<usize>()?;
408                    hweights.insert(e, weight);
409                    hweights.insert(f, weight);
410                }
411            }
412        }
413    }
414
415    let h = h.into_graph();
416
417    // construct the graph from the helper graph
418    let mut b = G::Builder::new();
419    let mut bnodes = HashMap::new();
420    let mut weights = vec![];
421    let mut namednodes = HashMap::new();
422
423    for i in 0..nrows {
424        for j in 0..lines[i].len() {
425            if lines[i][j] == b'*' {
426                bnodes.insert(hnodes[i][j], b.add_node());
427            } else if is_node(lines[i][j]) {
428                let u = b.add_node();
429                namednodes.insert(lines[i][j] as char, b.node2id(u));
430                bnodes.insert(hnodes[i][j], u);
431            }
432        }
433    }
434
435    let mut bnodes_ordered = bnodes.iter().collect::<Vec<_>>();
436    bnodes_ordered.sort_by_key(|&(&h_u, _)| h.node_id(h_u));
437    for (&h_u, &u) in bnodes_ordered {
438        let mut queue = VecDeque::new();
439        let mut seen = HashSet::new();
440        queue.push_back((h_u, 0, false));
441        seen.insert(h_u);
442        while let Some((h_v, w, directed)) = queue.pop_front() {
443            if let Some(&v) = if h_v != h_u { bnodes.get(&h_v) } else { None } {
444                // v is connected to u, add edge
445                if directed || h.node_id(h_u) < h.node_id(h_v) {
446                    b.add_edge(u, v);
447                    weights.push(w);
448                }
449            } else {
450                // otherwise continue search
451                for (e, h_w) in h.outedges(h_v) {
452                    if !seen.contains(&h_w) {
453                        seen.insert(h_w);
454                        queue.push_back((
455                            h_w,
456                            w + hweights.get(&e).unwrap_or(&0),
457                            directed || hdirected.contains(&e),
458                        ));
459                    }
460                }
461            }
462        }
463    }
464
465    Ok(Data {
466        graph: b.into_graph(),
467        nodes: namednodes,
468        weights,
469    })
470}
471
472fn is_node(ch: u8) -> bool {
473    ch == b'*' || (ch.is_ascii_alphabetic() && ch != b'v')
474}
475
476fn is_node_or(ch: u8, chars: &[u8]) -> bool {
477    is_node(ch) || chars.contains(&ch)
478}
479
480#[cfg(test)]
481mod tests {
482    use super::{from_ascii, Data};
483    use crate::traits::{FiniteGraph, IndexGraph, Undirected};
484    use crate::LinkedListGraph;
485
486    #[test]
487    fn test_ascii() {
488        for txt in &[
489            "a---*---b",
490            "
491        a
492        |
493        *
494        |
495        b",
496            r"
497        a
498         \
499          *
500         /
501        b",
502            r"
503        a      *--b
504         \    /
505          ----",
506            r"
507        a    -
508         \  / \
509          \/   |
510          /\  /
511         b  *-",
512            r"
513        b   --
514         \ /  \
515          \    |
516         / \  /
517        a   *-",
518            r"
519        b  a
520        |  |
521    ----|-----
522   /    |  |  \
523  |     | /    |
524  |   --|-     |
525   \ /  |     /
526    -   *-----",
527            r"
528        b  a
529        |  |
530    -8--|-----
531   /    |  |  \
532  |     | /    |
533  |   --|-     |
534   \ /  |     /
535    -   *-10--",
536        ] {
537            let data = from_ascii::<LinkedListGraph>(txt).unwrap();
538            let g = data.graph;
539            let a = data.nodes[&'a'];
540            let b = data.nodes[&'b'];
541
542            assert_eq!(g.num_nodes(), 3);
543            assert_eq!(g.num_edges(), 2);
544
545            assert_eq!(g.nodes().filter(|&u| g.neighs(u).count() == 1).count(), 2);
546            assert_eq!(g.nodes().filter(|&u| g.neighs(u).count() == 2).count(), 1);
547            assert_eq!(g.neighs(g.id2node(a)).count(), 1);
548            assert_eq!(g.neighs(g.id2node(b)).count(), 1);
549        }
550    }
551
552    #[test]
553    fn test_ascii_with_weights() {
554        for txt in &[
555            r"
556        a        b
557         \      /
558         223   10
559           \  /
560            *-",
561            r"
562        a  b
563        |  |
564    ----|-----
565   /   223 |  \
566  |     | /    |
567  |   --|-     |
568   \ /  |     /
569    -   *-10--",
570        ] {
571            let Data {
572                graph: g,
573                weights,
574                nodes,
575            } = from_ascii::<LinkedListGraph>(txt).unwrap();
576            let a = nodes[&'a'];
577            let b = nodes[&'b'];
578
579            assert_eq!(g.num_nodes(), 3);
580            assert_eq!(g.num_edges(), 2);
581
582            assert_eq!(g.nodes().filter(|&u| g.neighs(u).count() == 1).count(), 2);
583            assert_eq!(g.nodes().filter(|&u| g.neighs(u).count() == 2).count(), 1);
584
585            for (e, _) in g.neighs(g.id2node(a)) {
586                assert_eq!(weights[g.edge_id(e)], 223);
587            }
588
589            for (e, _) in g.neighs(g.id2node(b)) {
590                assert_eq!(weights[g.edge_id(e)], 10);
591            }
592        }
593    }
594
595    #[test]
596    fn test_ascii_directed() {
597        use crate::traits::*;
598
599        for txt in &[
600            r"
601   *  *  *
602    \ | /
603     @v@
604   *->a<-*
605     @^@
606    / | \
607   *  *  *",
608            r"
609   *  *  *
610    \ | /
611     @@@
612   *-@a@-*
613     @@@
614    / | \
615   *  *  *",
616        ] {
617            let Data { graph: g, nodes, .. } = from_ascii::<LinkedListGraph>(txt).unwrap();
618            let a = nodes[&'a'];
619
620            assert_eq!(g.num_nodes(), 9);
621            assert_eq!(g.num_edges(), 8);
622            for u in g.nodes() {
623                if g.node_id(u) == a {
624                    assert_eq!(g.inedges(u).count(), 8);
625                    assert_eq!(g.outedges(u).count(), 0);
626                } else {
627                    assert_eq!(g.inedges(u).count(), 0);
628                    assert_eq!(g.outedges(u).count(), 1);
629                }
630            }
631        }
632    }
633
634    #[test]
635    /// This is a regression test for non-deterministic behaviour. The
636    /// `from_ascii` function created edges in random orders, because it iterated
637    /// over the entries of a (randomized) hashmap.
638    ///
639    /// We basically run the algorithm 100 times (on the same, 3-node path) and
640    /// check whether the neighbors of the middle node have the same order. It
641    /// basically failed every time.
642    fn test_deterministic() {
643        use crate::string::{from_ascii, Data};
644        use crate::traits::*;
645        use crate::LinkedListGraph;
646
647        let mut prev = vec![];
648        for i in 0..100 {
649            let Data { graph: g, nodes, .. } = from_ascii::<LinkedListGraph>("*-s-*").unwrap();
650            let s = nodes[&'s'];
651            let next = g.neighs(g.id2node(s)).map(|(_, v)| g.node_id(v)).collect::<Vec<_>>();
652
653            if i > 0 {
654                assert_eq!(prev, next);
655            }
656            prev = next;
657        }
658    }
659}