1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
//! Strongly typed representation for writing `dot` files.
//!
//! See <https://graphviz.gitlab.io/_pages/doc/info/lang.html> for the full specification. Only
//! parts which were relevant have been translated. Some redundant parts have no representation
//! such as allowing multiple `[]` attribute lists behind a `node` or `edge` directive.
use std::borrow::Cow;
use std::fmt;
use std::io::{self, Write};

/// Optionally contains the possible node attributes.
#[derive(Clone, Default)]
pub struct Node {
    /// A label to appear, can be html or an escaped string.
    pub label: Option<Id>,

    /// Number of stacked polygon lines for the outer shape.
    ///
    /// Final/Accepting states in automaton are marked by two peripheral lines. The default value
    /// for this attribute is 1.
    pub peripheries: Option<usize>,

    /// Use `.. Node::Default()` to make the structs.
    pub _non_exhaustive: (),
}

/// Optionally contains the possible edge attributes.
#[derive(Clone, Default)]
pub struct Edge {
    /// A label to appear, can be html or an escaped string.
    pub label: Option<Id>,

    /// Use `.. Edge::Default()` to make the structs.
    pub _non_exhaustive: (),
}

/// Writes dot files.
///
/// Node names are chosen automatically from the given index.
pub struct GraphWriter<W: Write> {
    inner: Option<W>,

    /// The edgeop must correspond to the chosen graph family.
    edgeop: Family,
}

#[derive(Clone, Copy, Debug)]
pub enum Family {
    Directed,
    Undirected,
}

/// An identifier, has several uses in the language (`ID`).
///
/// IDs representing attributes have a constant defined in this struct.
///
/// TODO: `node_id` is currently restricted to this, but could have port and another specifier.
#[derive(Clone, Debug)]
pub struct Id(IdEnum);

#[derive(Clone, Debug)]
enum IdEnum {
    /// A c-style identifier or a string numeral.
    ///
    /// Any string of alphabetic ([a-zA-Z\200-\377]) characters, underscores ('_') or digits ([0-9]), not beginning with a digit;
    ///
    /// A numeral `[-]?(.[0-9]+ | [0-9]+(.[0-9]*)? )`;
    Raw(Cow<'static, str>),

    /// A standard unsigned numeral, encoded to `(.[0-9]+ | [0-9]+(.[0-9]*)? )`;
    Numeral(usize),

    /// A standard signed numeral, encoded to `[-]?(.[0-9]+ | [0-9]+(.[0-9]*)? )`;
    INumeral(isize),

    /// A C string escaped string.
    ///
    /// Any double-quoted string ("...") possibly containing escaped quotes (\");
    Str(Cow<'static, str>),

    // An html escaped string.
    // Html(String),
}

/// Trait for structures that can be dumped as a dot graph.
pub trait DotGraph {
    /// Write a dot representation.
    fn dot_graph<W>(&self, to: W) -> io::Result<()>;
}

/// Extension to `std::io::Write` for writing dot graphs.
pub trait WriteDot: io::Write {
    fn write_dot<D: DotGraph>(&mut self, graph: D) -> io::Result<()> {
        graph.dot_graph(self)
    }
}

impl<W: Write> GraphWriter<W> {
    /// Begins writing a graph with the given parameters.
    pub fn new(mut inner: W, family: Family, name: Option<Id>) -> io::Result<Self> {
        if let Some(name) = name {
            writeln!(&mut inner, "{} {} {{", family.name(), name)?;
        } else {
            writeln!(&mut inner, "{} {{", family.name())?;
        }

        Ok(GraphWriter {
            inner: Some(inner),
            edgeop: family,
        })
    }

    /// Set the default node information.
    pub fn default_node(&mut self, default_node: Node) -> io::Result<()> {
        let fmt = self.inner.as_mut().unwrap();
        write!(fmt, "\tnode [{}];", default_node)
    }

    /// Set the default edge attributes.
    pub fn default_edge(&mut self, default_edge: Edge) -> io::Result<()> {
        let fmt = self.inner.as_mut().unwrap();
        write!(fmt, "\tedge [{}];", default_edge)
    }

    /// Add a line segment, that is two or more connected nodes.
    ///
    /// Panics: when the iterator returned less than two nodes.
    ///
    /// TODO: the spec allows adhoc subgraphs instead of node specifiers.
    pub fn segment<I, T>(&mut self, iter: I, options: Option<Edge>) -> io::Result<()> 
        where I: IntoIterator<Item=T>, T: Into<Id>
    {
        let fmt = self.inner.as_mut().unwrap();

        let mut iter = iter.into_iter();

        let begin = iter.next().unwrap();
        let end = iter.next().unwrap();

        write!(fmt, "\t{} {} {} ", begin.into(), self.edgeop.edgeop(), end.into())?;

        for next in iter {
            write!(fmt, "{} {} ", self.edgeop.edgeop(), next.into())?;
        }

        if let Some(options) = options {
            writeln!(fmt, "[{}];", options)
        } else {
            writeln!(fmt, ";")
        }
    }

    /// Set node information or create a new node.
    pub fn node(&mut self, id: Id, node: Option<Node>) -> io::Result<()> {
        let fmt = self.inner.as_mut().unwrap();

        write!(fmt, "\t{} ", id)?;

        if let Some(options) = node {
            writeln!(fmt, "[{}];", options)
        } else {
            writeln!(fmt, ";")
        }
    }

    /// In contrast to a simple drop, returns the inner writer.
    pub fn end_into_inner(mut self) -> (W, io::Result<()>) {
        let mut inner = self.inner.take().unwrap();
        let result = inner.write_all(b"}\n");

        (inner, result)
    }
}

impl<W: io::Write> Drop for GraphWriter<W> {
    fn drop(&mut self) {
        if let Some(writer) = self.inner.as_mut() {
            writer.write_all(b"}\n").unwrap();
        }
    }
}

impl<'a, W: Write> GraphWriter<&'a mut W> {
    pub fn subgraph(&mut self, _name: Option<String>) -> GraphWriter<&mut W> {
        unimplemented!()
    }
}

impl Node {
    /// A node with no attributes.
    ///
    /// May be used in constructors to default assign remaining members with `.. Node::none()`.
    pub fn none() -> Self {
        Node::default()
    }
}

impl Edge {
    /// An edge with no attributes.
    ///
    /// May be used in constructors to default assign remaining members with `.. Edge::none()`.
    pub fn none() -> Self {
        Edge::default()
    }
}

impl Family {
    /// The keyword at the top of the dot file.
    fn name(self) -> &'static str {
        match self {
            Family::Directed => "digraph",
            Family::Undirected => "graph",
        }
    }

    fn edgeop(self) -> &'static str {
        match self {
            Family::Directed => "->",
            Family::Undirected => "--",
        }
    }
}

impl Id {
    const LABEL: Id = Id(IdEnum::Raw(Cow::Borrowed("label")));
    const PERIPHERIES: Id = Id(IdEnum::Raw(Cow::Borrowed("peripheries")));
}

impl IdEnum {
    /// Constructs the ID representation for this string.
    ///
    /// Automatically chooses between raw ascii, digits and string encoded versions of identifiers,
    /// whichever has the least conversion and usage overhead.
    ///
    /// Panics: When the escaped string does not fit inside a string.
    fn from_string_like<T>(name: T) -> Self
        where T: Into<Cow<'static, str>> 
    {
        let name = name.into();

        let raw_identifier = |c: char| c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_';
        let raw_identifier_begin = |c: &char| c.is_ascii_alphabetic() || *c == '_';

        if name.as_ref().is_empty() {
            return IdEnum::Str(name)
        }

        if name.as_ref().chars().all(|c| c.is_ascii_digit()) {
            return IdEnum::Raw(name)
        }

        if name.as_ref().chars().all(raw_identifier) && name.as_ref().chars().nth(0).filter(raw_identifier_begin).is_some() {
            return IdEnum::Raw(name)
        }

        // Simply escape single quotes once. Since the default charset is UTF-8, all other strings are fine.
        let quote_count = name.bytes().filter(|c| *c == b'"').count();
        let name = if quote_count > 0 {
            let mut string = name.into_owned();

            // Escape every '"' with '\'.
            //
            // This operation is safe since we only prepend b'\' (a valid UTF-8 sequence) to b'"'.
            //
            // More in-depth:
            // Because b'"' can only appear inside char boundaries in any other situation but as a
            // standalone b'"' character, the new sequence keeps all char boundaries intact and has
            // only inserted a new valid char sequence, b'\'. Hence the new string is still valid
            // UTF-8.
            //
            // This cannot be performed safely and efficiently, since we can only utilize
            // `String::insert` to add single characters but doing so would be O(n·m) where n is
            // the length of the string and m is the number of '"' chars. In comparison, this
            // operation is O(n) since we only move each character at most once.
            unsafe {
                let vec = string.as_mut_vec();
                let mut num_inserts = quote_count;

                assert!(num_inserts > 0, "contains at least one quote");
                assert!(!vec.is_empty(), "contains at least one quote");
                let mut text_end = vec.len();

                // Controlled panic
                let new_len = vec.len().checked_add(num_inserts)
                    .expect("escaped string would not fit");

                // Add all the additional escape characters. We move them around as a contiguous
                // slice later, each time leaving behind the last slash where it belongs.
                vec.resize(new_len, b'\\');
                let mut new_end = new_len;

                let slice = vec.as_mut_slice();
                // Pointer arithmetic on the slice elements (u8) later can be done as usize
                // arithmetic with this base address without wrapping.
                let base_ptr = slice.as_ptr() as usize;

                // Copy & insert the new characters.
                while num_inserts > 0 {
                    let tail = slice[..text_end]
                        // Get all the text following the last '"'
                        .rsplit(|c| *c == b'"').next().unwrap()
                        .as_ptr() as usize;

                    assert!(tail > base_ptr, "at least one quote left infront");

                    // Calculate the index of the quote character
                    let quote_index = tail
                        .checked_sub(base_ptr).unwrap()
                        .checked_sub(1).unwrap();
                    let relative_end = text_end
                        .checked_sub(quote_index).unwrap();

                    // Move the uninitialized part infront of the slice. Remember that the slice of
                    // new characters consists only of slashes.
                    slice[quote_index..new_end].rotate_right(relative_end);

                    // Now leave behind one slash and set all new values.  Expecting clang magic to
                    // remove the unwrap because he can prove that `num_insert > 1` at this point.
                    num_inserts = num_inserts.checked_sub(1).unwrap();
                    new_end = quote_index + num_inserts;
                    text_end = quote_index;
                }
            }

            string.into()
        } else {
            name
        };

        IdEnum::Str(name)
    }
}

impl From<Cow<'static, str>> for Id {
    fn from(id: Cow<'static, str>) -> Self {
        Id(IdEnum::from_string_like(id))
    }
}

impl From<&'static str> for Id {
    fn from(id: &'static str) -> Self {
        Id(IdEnum::from_string_like(Cow::from(id)))
    }
}

impl From<String> for Id {
    fn from(id: String) -> Self {
        Id(IdEnum::from_string_like(Cow::from(id)))
    }
}

impl From<usize> for Id {
    fn from(id: usize) -> Self {
        Id(IdEnum::Numeral(id))
    }
}

impl From<isize> for Id {
    fn from(id: isize) -> Self {
        Id(IdEnum::INumeral(id))
    }
}

impl fmt::Display for IdEnum {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        match self {
            IdEnum::Raw(id) => write!(f, "{}", id),
            IdEnum::Numeral(id) => write!(f, "{}", id),
            IdEnum::INumeral(id) => write!(f, "{}", id),
            IdEnum::Str(id) => write!(f, "\"{}\"", id),
        }
    }
}

impl fmt::Display for Id {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        self.0.fmt(f)
    }
}

/// Formats the node attributes (`a_list` in specification terms).
impl fmt::Display for Node {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if let Some(label) = self.label.as_ref() {
            write!(f, "{}={},", Id::LABEL, label)?;
        }

        if let Some(peripheries) = self.peripheries {
            write!(f, "{}={},", Id::PERIPHERIES, peripheries)?;
        }

        Ok(())
    }
}

/// Formats the edge attributes (`a_list` in specification terms).
impl fmt::Display for Edge {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if let Some(label) = self.label.as_ref() {
            write!(f, "{}={},", Id::LABEL, label)?;
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn identifiers() {
        assert_eq!(format!("{}", Id::from("abc")), "abc");
        assert_eq!(format!("{}", Id::from(0usize)), "0");
        assert_eq!(format!("{}", Id::from(-1isize)), "-1");
        assert_eq!(format!("{}", Id::from("123")), "123");
        assert_eq!(format!("{}", Id::from("a string with spaces")), r#""a string with spaces""#);
        assert_eq!(format!("{}", Id::from("\"")), r#""\"""#);
        assert_eq!(format!("{}", Id::from("")), r#""""#);
    }
}