automata/
dot.rs

1//! Strongly typed representation for writing `dot` files.
2//!
3//! See <https://graphviz.gitlab.io/_pages/doc/info/lang.html> for the full specification. Only
4//! parts which were relevant have been translated. Some redundant parts have no representation
5//! such as allowing multiple `[]` attribute lists behind a `node` or `edge` directive.
6use std::borrow::Cow;
7use std::fmt;
8use std::io::{self, Write};
9
10/// Optionally contains the possible node attributes.
11#[derive(Clone, Default)]
12pub struct Node {
13    /// A label to appear, can be html or an escaped string.
14    pub label: Option<Id>,
15
16    /// Number of stacked polygon lines for the outer shape.
17    ///
18    /// Final/Accepting states in automaton are marked by two peripheral lines. The default value
19    /// for this attribute is 1.
20    pub peripheries: Option<usize>,
21
22    /// Use `.. Node::Default()` to make the structs.
23    pub _non_exhaustive: (),
24}
25
26/// Optionally contains the possible edge attributes.
27#[derive(Clone, Default)]
28pub struct Edge {
29    /// A label to appear, can be html or an escaped string.
30    pub label: Option<Id>,
31
32    /// Use `.. Edge::Default()` to make the structs.
33    pub _non_exhaustive: (),
34}
35
36/// Writes dot files.
37///
38/// Node names are chosen automatically from the given index.
39pub struct GraphWriter<W: Write> {
40    inner: Option<W>,
41
42    /// The edgeop must correspond to the chosen graph family.
43    edgeop: Family,
44}
45
46#[derive(Clone, Copy, Debug)]
47pub enum Family {
48    Directed,
49    Undirected,
50}
51
52/// An identifier, has several uses in the language (`ID`).
53///
54/// IDs representing attributes have a constant defined in this struct.
55///
56/// TODO: `node_id` is currently restricted to this, but could have port and another specifier.
57#[derive(Clone, Debug)]
58pub struct Id(IdEnum);
59
60#[derive(Clone, Debug)]
61enum IdEnum {
62    /// A c-style identifier or a string numeral.
63    ///
64    /// Any string of alphabetic ([a-zA-Z\200-\377]) characters, underscores ('_') or digits ([0-9]), not beginning with a digit;
65    ///
66    /// A numeral `[-]?(.[0-9]+ | [0-9]+(.[0-9]*)? )`;
67    Raw(Cow<'static, str>),
68
69    /// A standard unsigned numeral, encoded to `(.[0-9]+ | [0-9]+(.[0-9]*)? )`;
70    Numeral(usize),
71
72    /// A standard signed numeral, encoded to `[-]?(.[0-9]+ | [0-9]+(.[0-9]*)? )`;
73    INumeral(isize),
74
75    /// A C string escaped string.
76    ///
77    /// Any double-quoted string ("...") possibly containing escaped quotes (\");
78    Str(Cow<'static, str>),
79
80    // An html escaped string.
81    // Html(String),
82}
83
84/// Trait for structures that can be dumped as a dot graph.
85pub trait DotGraph {
86    /// Write a dot representation.
87    fn dot_graph<W>(&self, to: W) -> io::Result<()>;
88}
89
90/// Extension to `std::io::Write` for writing dot graphs.
91pub trait WriteDot: io::Write {
92    fn write_dot<D: DotGraph>(&mut self, graph: D) -> io::Result<()> {
93        graph.dot_graph(self)
94    }
95}
96
97impl<W: Write> GraphWriter<W> {
98    /// Begins writing a graph with the given parameters.
99    pub fn new(mut inner: W, family: Family, name: Option<Id>) -> io::Result<Self> {
100        if let Some(name) = name {
101            writeln!(&mut inner, "{} {} {{", family.name(), name)?;
102        } else {
103            writeln!(&mut inner, "{} {{", family.name())?;
104        }
105
106        Ok(GraphWriter {
107            inner: Some(inner),
108            edgeop: family,
109        })
110    }
111
112    /// Set the default node information.
113    pub fn default_node(&mut self, default_node: Node) -> io::Result<()> {
114        let fmt = self.inner.as_mut().unwrap();
115        write!(fmt, "\tnode [{}];", default_node)
116    }
117
118    /// Set the default edge attributes.
119    pub fn default_edge(&mut self, default_edge: Edge) -> io::Result<()> {
120        let fmt = self.inner.as_mut().unwrap();
121        write!(fmt, "\tedge [{}];", default_edge)
122    }
123
124    /// Add a line segment, that is two or more connected nodes.
125    ///
126    /// Panics: when the iterator returned less than two nodes.
127    ///
128    /// TODO: the spec allows adhoc subgraphs instead of node specifiers.
129    pub fn segment<I, T>(&mut self, iter: I, options: Option<Edge>) -> io::Result<()> 
130        where I: IntoIterator<Item=T>, T: Into<Id>
131    {
132        let fmt = self.inner.as_mut().unwrap();
133
134        let mut iter = iter.into_iter();
135
136        let begin = iter.next().unwrap();
137        let end = iter.next().unwrap();
138
139        write!(fmt, "\t{} {} {} ", begin.into(), self.edgeop.edgeop(), end.into())?;
140
141        for next in iter {
142            write!(fmt, "{} {} ", self.edgeop.edgeop(), next.into())?;
143        }
144
145        if let Some(options) = options {
146            writeln!(fmt, "[{}];", options)
147        } else {
148            writeln!(fmt, ";")
149        }
150    }
151
152    /// Set node information or create a new node.
153    pub fn node(&mut self, id: Id, node: Option<Node>) -> io::Result<()> {
154        let fmt = self.inner.as_mut().unwrap();
155
156        write!(fmt, "\t{} ", id)?;
157
158        if let Some(options) = node {
159            writeln!(fmt, "[{}];", options)
160        } else {
161            writeln!(fmt, ";")
162        }
163    }
164
165    /// In contrast to a simple drop, returns the inner writer.
166    pub fn end_into_inner(mut self) -> (W, io::Result<()>) {
167        let mut inner = self.inner.take().unwrap();
168        let result = inner.write_all(b"}\n");
169
170        (inner, result)
171    }
172}
173
174impl<W: io::Write> Drop for GraphWriter<W> {
175    fn drop(&mut self) {
176        if let Some(writer) = self.inner.as_mut() {
177            writer.write_all(b"}\n").unwrap();
178        }
179    }
180}
181
182impl<'a, W: Write> GraphWriter<&'a mut W> {
183    pub fn subgraph(&mut self, _name: Option<String>) -> GraphWriter<&mut W> {
184        unimplemented!()
185    }
186}
187
188impl Node {
189    /// A node with no attributes.
190    ///
191    /// May be used in constructors to default assign remaining members with `.. Node::none()`.
192    pub fn none() -> Self {
193        Node::default()
194    }
195}
196
197impl Edge {
198    /// An edge with no attributes.
199    ///
200    /// May be used in constructors to default assign remaining members with `.. Edge::none()`.
201    pub fn none() -> Self {
202        Edge::default()
203    }
204}
205
206impl Family {
207    /// The keyword at the top of the dot file.
208    fn name(self) -> &'static str {
209        match self {
210            Family::Directed => "digraph",
211            Family::Undirected => "graph",
212        }
213    }
214
215    fn edgeop(self) -> &'static str {
216        match self {
217            Family::Directed => "->",
218            Family::Undirected => "--",
219        }
220    }
221}
222
223impl Id {
224    const LABEL: Id = Id(IdEnum::Raw(Cow::Borrowed("label")));
225    const PERIPHERIES: Id = Id(IdEnum::Raw(Cow::Borrowed("peripheries")));
226}
227
228impl IdEnum {
229    /// Constructs the ID representation for this string.
230    ///
231    /// Automatically chooses between raw ascii, digits and string encoded versions of identifiers,
232    /// whichever has the least conversion and usage overhead.
233    ///
234    /// Panics: When the escaped string does not fit inside a string.
235    fn from_string_like<T>(name: T) -> Self
236        where T: Into<Cow<'static, str>> 
237    {
238        let name = name.into();
239
240        let raw_identifier = |c: char| c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_';
241        let raw_identifier_begin = |c: &char| c.is_ascii_alphabetic() || *c == '_';
242
243        if name.as_ref().is_empty() {
244            return IdEnum::Str(name)
245        }
246
247        if name.as_ref().chars().all(|c| c.is_ascii_digit()) {
248            return IdEnum::Raw(name)
249        }
250
251        if name.as_ref().chars().all(raw_identifier) && name.as_ref().chars().nth(0).filter(raw_identifier_begin).is_some() {
252            return IdEnum::Raw(name)
253        }
254
255        // Simply escape single quotes once. Since the default charset is UTF-8, all other strings are fine.
256        let quote_count = name.bytes().filter(|c| *c == b'"').count();
257        let name = if quote_count > 0 {
258            let mut string = name.into_owned();
259
260            // Escape every '"' with '\'.
261            //
262            // This operation is safe since we only prepend b'\' (a valid UTF-8 sequence) to b'"'.
263            //
264            // More in-depth:
265            // Because b'"' can only appear inside char boundaries in any other situation but as a
266            // standalone b'"' character, the new sequence keeps all char boundaries intact and has
267            // only inserted a new valid char sequence, b'\'. Hence the new string is still valid
268            // UTF-8.
269            //
270            // This cannot be performed safely and efficiently, since we can only utilize
271            // `String::insert` to add single characters but doing so would be O(n·m) where n is
272            // the length of the string and m is the number of '"' chars. In comparison, this
273            // operation is O(n) since we only move each character at most once.
274            unsafe {
275                let vec = string.as_mut_vec();
276                let mut num_inserts = quote_count;
277
278                assert!(num_inserts > 0, "contains at least one quote");
279                assert!(!vec.is_empty(), "contains at least one quote");
280                let mut text_end = vec.len();
281
282                // Controlled panic
283                let new_len = vec.len().checked_add(num_inserts)
284                    .expect("escaped string would not fit");
285
286                // Add all the additional escape characters. We move them around as a contiguous
287                // slice later, each time leaving behind the last slash where it belongs.
288                vec.resize(new_len, b'\\');
289                let mut new_end = new_len;
290
291                let slice = vec.as_mut_slice();
292                // Pointer arithmetic on the slice elements (u8) later can be done as usize
293                // arithmetic with this base address without wrapping.
294                let base_ptr = slice.as_ptr() as usize;
295
296                // Copy & insert the new characters.
297                while num_inserts > 0 {
298                    let tail = slice[..text_end]
299                        // Get all the text following the last '"'
300                        .rsplit(|c| *c == b'"').next().unwrap()
301                        .as_ptr() as usize;
302
303                    assert!(tail > base_ptr, "at least one quote left infront");
304
305                    // Calculate the index of the quote character
306                    let quote_index = tail
307                        .checked_sub(base_ptr).unwrap()
308                        .checked_sub(1).unwrap();
309                    let relative_end = text_end
310                        .checked_sub(quote_index).unwrap();
311
312                    // Move the uninitialized part infront of the slice. Remember that the slice of
313                    // new characters consists only of slashes.
314                    slice[quote_index..new_end].rotate_right(relative_end);
315
316                    // Now leave behind one slash and set all new values.  Expecting clang magic to
317                    // remove the unwrap because he can prove that `num_insert > 1` at this point.
318                    num_inserts = num_inserts.checked_sub(1).unwrap();
319                    new_end = quote_index + num_inserts;
320                    text_end = quote_index;
321                }
322            }
323
324            string.into()
325        } else {
326            name
327        };
328
329        IdEnum::Str(name)
330    }
331}
332
333impl From<Cow<'static, str>> for Id {
334    fn from(id: Cow<'static, str>) -> Self {
335        Id(IdEnum::from_string_like(id))
336    }
337}
338
339impl From<&'static str> for Id {
340    fn from(id: &'static str) -> Self {
341        Id(IdEnum::from_string_like(Cow::from(id)))
342    }
343}
344
345impl From<String> for Id {
346    fn from(id: String) -> Self {
347        Id(IdEnum::from_string_like(Cow::from(id)))
348    }
349}
350
351impl From<usize> for Id {
352    fn from(id: usize) -> Self {
353        Id(IdEnum::Numeral(id))
354    }
355}
356
357impl From<isize> for Id {
358    fn from(id: isize) -> Self {
359        Id(IdEnum::INumeral(id))
360    }
361}
362
363impl fmt::Display for IdEnum {
364    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
365        match self {
366            IdEnum::Raw(id) => write!(f, "{}", id),
367            IdEnum::Numeral(id) => write!(f, "{}", id),
368            IdEnum::INumeral(id) => write!(f, "{}", id),
369            IdEnum::Str(id) => write!(f, "\"{}\"", id),
370        }
371    }
372}
373
374impl fmt::Display for Id {
375    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
376        self.0.fmt(f)
377    }
378}
379
380/// Formats the node attributes (`a_list` in specification terms).
381impl fmt::Display for Node {
382    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
383        if let Some(label) = self.label.as_ref() {
384            write!(f, "{}={},", Id::LABEL, label)?;
385        }
386
387        if let Some(peripheries) = self.peripheries {
388            write!(f, "{}={},", Id::PERIPHERIES, peripheries)?;
389        }
390
391        Ok(())
392    }
393}
394
395/// Formats the edge attributes (`a_list` in specification terms).
396impl fmt::Display for Edge {
397    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
398        if let Some(label) = self.label.as_ref() {
399            write!(f, "{}={},", Id::LABEL, label)?;
400        }
401
402        Ok(())
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn identifiers() {
412        assert_eq!(format!("{}", Id::from("abc")), "abc");
413        assert_eq!(format!("{}", Id::from(0usize)), "0");
414        assert_eq!(format!("{}", Id::from(-1isize)), "-1");
415        assert_eq!(format!("{}", Id::from("123")), "123");
416        assert_eq!(format!("{}", Id::from("a string with spaces")), r#""a string with spaces""#);
417        assert_eq!(format!("{}", Id::from("\"")), r#""\"""#);
418        assert_eq!(format!("{}", Id::from("")), r#""""#);
419    }
420}