mermaid-text 0.1.0

Render Mermaid diagrams as Unicode box-drawing text — no browser, no image protocols, pure Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
//! Hand-rolled parser for Mermaid `graph`/`flowchart` syntax.
//!
//! The parser works statement-by-statement. A "statement" is one logical
//! declaration separated by a newline or semicolon. Each statement is
//! classified as either:
//!
//! - A **node definition**: `A[Label]`, `A{Label}`, `A((Label))`, `A(Label)`, or bare `A`
//! - An **edge chain**: `A --> B --> C`, potentially with inline labels
//! - A **header line**: `graph LR` / `flowchart TD` (handled before entering this module)
//! - A blank / comment line — silently ignored
//!
//! All edge types (`-->`, `---`, `-.->`, `==>`) are treated identically in
//! Phase 1 (rendered as solid arrows).

use crate::{
    Error,
    types::{Direction, Edge, Graph, Node, NodeShape},
};

// ---------------------------------------------------------------------------
// Public entry point
// ---------------------------------------------------------------------------

/// Parse a Mermaid `graph`/`flowchart` source string into a [`Graph`].
///
/// The function expects the *full* input including the header line
/// (`graph LR`, `flowchart TD`, etc.). Both newlines and semicolons are
/// treated as statement separators, so `graph LR; A-->B` is valid.
///
/// # Errors
///
/// Returns [`Error::ParseError`] if the header statement is missing or the
/// direction keyword is unrecognised.
pub fn parse(input: &str) -> Result<Graph, Error> {
    // Normalise: replace newlines with semicolons, then split on ';'.
    // This means both `graph LR; A-->B` and multi-line input are handled
    // identically — the first non-blank, non-comment statement is the header.
    let normalised = input.replace('\n', ";").replace('\r', "");

    let mut statements = normalised
        .split(';')
        .map(str::trim)
        .filter(|s| !s.is_empty() && !s.starts_with("%%"));

    // ---- Find and parse the header statement ----------------------------
    let direction = parse_header_stmt(&mut statements)?;
    let mut graph = Graph::new(direction);

    // ---- Parse each remaining statement ---------------------------------
    for stmt in statements {
        parse_statement(stmt, &mut graph);
    }

    Ok(graph)
}

// ---------------------------------------------------------------------------
// Header parsing
// ---------------------------------------------------------------------------

/// Consume the first statement from `stmts` and parse it as a
/// `graph`/`flowchart` header, returning the [`Direction`].
///
/// The direction is the first whitespace-delimited token after the keyword.
fn parse_header_stmt<'a>(stmts: &mut impl Iterator<Item = &'a str>) -> Result<Direction, Error> {
    let stmt = stmts
        .next()
        .ok_or_else(|| Error::ParseError("no 'graph'/'flowchart' header found".to_string()))?;

    // e.g. "graph LR" or "flowchart TD"
    let mut parts = stmt.splitn(3, |c: char| c.is_whitespace());
    let keyword = parts.next().unwrap_or("").to_lowercase();

    if keyword != "graph" && keyword != "flowchart" {
        return Err(Error::ParseError(format!(
            "expected 'graph' or 'flowchart', got '{keyword}'"
        )));
    }

    // The direction is the next whitespace-separated token (just the first
    // word — we ignore any trailing content on the header line since we
    // already split on semicolons above).
    let dir_str = parts
        .next()
        .map(str::trim)
        .filter(|s| !s.is_empty())
        .unwrap_or("TD"); // default to top-down if omitted

    Direction::parse(dir_str)
        .ok_or_else(|| Error::ParseError(format!("unknown direction '{dir_str}'")))
}

// ---------------------------------------------------------------------------
// Statement parsing
// ---------------------------------------------------------------------------

/// Parse a single statement (already trimmed, no leading/trailing whitespace).
///
/// A statement is either a standalone node definition or an edge chain that
/// may include inline node definitions.
///
/// Any nodes referenced in edges are auto-created if they have not been
/// explicitly defined yet.
fn parse_statement(stmt: &str, graph: &mut Graph) {
    // Skip mermaid keywords that are not node definitions or edge chains.
    // These appear inside subgraph blocks, style directives, etc.
    let first_word = stmt.split_whitespace().next().unwrap_or("");
    if matches!(
        first_word,
        "subgraph" | "end" | "direction" | "style" | "classDef" | "class"
            | "click" | "linkStyle" | "accTitle" | "accDescr"
    ) {
        return;
    }

    // Try to parse as an edge chain first (contains an arrow token).
    // Edge chains look like: A --> B  or  A -->|label| B --> C
    if looks_like_edge_chain(stmt) {
        parse_edge_chain(stmt, graph);
    } else {
        // Pure node definition: A[label], A{label}, A((label)), A(label), A
        if let Some(node) = parse_node_definition(stmt) {
            graph.upsert_node(node);
        }
    }
}

/// Return `true` if the statement appears to contain at least one edge arrow.
fn looks_like_edge_chain(s: &str) -> bool {
    // Quick scan: any of -->, ---, -.->, ==> or their variants
    s.contains("-->")
        || s.contains("---")
        || s.contains("-.->")
        || s.contains("==>")
        || s.contains("-- ") // "-- label -->" form
}

// ---------------------------------------------------------------------------
// Edge chain parsing
// ---------------------------------------------------------------------------

/// Parse an edge chain statement and push nodes + edges into `graph`.
///
/// The chain is tokenised by splitting on edge markers while preserving
/// edge-label content between `|...|` delimiters.
fn parse_edge_chain(stmt: &str, graph: &mut Graph) {
    // We build a list of (node_token, edge_label_or_none) pairs.
    // Strategy: walk char-by-char, extracting alternating node/edge segments.

    let tokens = tokenise_chain(stmt);
    if tokens.is_empty() {
        return;
    }

    // tokens = [node_tok, edge_tok, node_tok, edge_tok, node_tok, ...]
    // Odd indices are node tokens, even indices are edge (arrow+label) tokens.
    // Actually our tokeniser returns: node, arrow, node, arrow, node
    // i.e. length is always odd and ≥ 1.

    // Collect (node_token, Option<edge_label_before_next_node>) pairs.
    // We iterate pairs of (node_tok, Option<arrow_tok>).
    let mut i = 0;
    let mut prev_id: Option<String> = None;
    let mut pending_edge_label: Option<String> = None;

    while i < tokens.len() {
        let tok = tokens[i].trim();

        if i % 2 == 0 {
            // Node token
            if tok.is_empty() {
                i += 1;
                continue;
            }
            let node = parse_node_definition(tok).unwrap_or_else(|| {
                // Treat as bare ID
                Node::new(tok, tok, NodeShape::Rectangle)
            });
            let node_id = node.id.clone();
            graph.upsert_node(node);

            if let Some(ref from) = prev_id {
                let edge = Edge::new(from.clone(), node_id.clone(), pending_edge_label.take());
                graph.edges.push(edge);
            }
            prev_id = Some(node_id);
        } else {
            // Arrow token — extract optional label
            pending_edge_label = extract_arrow_label(tok);
        }

        i += 1;
    }
}

/// Split a chain statement into alternating node/arrow tokens.
///
/// Returns a `Vec<String>` where even indices are node tokens and odd indices
/// are arrow tokens (including any `|label|` portion).
fn tokenise_chain(stmt: &str) -> Vec<String> {
    let mut tokens: Vec<String> = Vec::new();
    let chars: Vec<char> = stmt.chars().collect();
    let len = chars.len();
    let mut i = 0;
    let mut current = String::new();

    while i < len {
        // Detect start of an arrow sequence.
        // Arrows: -->, ---, -.->  ==>, -- label -->, -->|label|
        // We look for `-` or `=` not inside a node bracket.
        let ch = chars[i];

        if (ch == '-' || ch == '=') && !current.trim().is_empty() {
            // Peek ahead to see if this is really an arrow
            if is_arrow_start(&chars, i) {
                // Push the current node token
                tokens.push(current.trim().to_string());
                current = String::new();

                // Consume the full arrow (including optional |label|)
                let (arrow_tok, consumed) = consume_arrow(&chars, i);
                tokens.push(arrow_tok);
                i += consumed;
                continue;
            }
        }

        current.push(ch);
        i += 1;
    }

    // Push the last node token
    let last = current.trim().to_string();
    if !last.is_empty() {
        tokens.push(last);
    }

    tokens
}

/// Return `true` if position `i` in `chars` starts an arrow sequence.
fn is_arrow_start(chars: &[char], i: usize) -> bool {
    let remaining: String = chars[i..].iter().collect();
    remaining.starts_with("-->")
        || remaining.starts_with("---")
        || remaining.starts_with("-.->")
        || remaining.starts_with("==>")
        || remaining.starts_with("-- ") // "-- label -->"
        || remaining.starts_with("--")
}

/// Consume an arrow starting at position `i`, returning `(arrow_token, chars_consumed)`.
///
/// Handles these forms:
/// - `-->` / `-->|label|`
/// - `---`
/// - `-.->` / `-.->|label|`
/// - `==>`
/// - `-- label -->`
fn consume_arrow(chars: &[char], start: usize) -> (String, usize) {
    let remaining: String = chars[start..].iter().collect();

    // "-- label -->" form  (must check before plain "--")
    if let Some(arrow) = try_consume_labeled_dash_arrow(&remaining) {
        let len = arrow.chars().count();
        return (arrow, len);
    }

    // "-.->"|label|?
    if remaining.starts_with("-.-") {
        let base = if remaining.starts_with("-.->") { 4 } else { 3 };
        let (label_part, extra) = try_consume_pipe_label(&remaining[base..]);
        let tok = format!("{}{label_part}", &remaining[..base]);
        return (tok, base + extra);
    }

    // "==>"
    if let Some(rest) = remaining.strip_prefix("==>") {
        let (label_part, extra) = try_consume_pipe_label(rest);
        let tok = format!("==>{label_part}");
        return (tok, 3 + extra);
    }

    // "-->" / "---"
    if let Some(rest) = remaining.strip_prefix("-->") {
        let (label_part, extra) = try_consume_pipe_label(rest);
        let tok = format!("-->{label_part}");
        return (tok, 3 + extra);
    }
    if let Some(rest) = remaining.strip_prefix("---") {
        let (label_part, extra) = try_consume_pipe_label(rest);
        let tok = format!("---{label_part}");
        return (tok, 3 + extra);
    }
    // Fallback: consume "--"
    (remaining[..2].to_string(), 2)
}

/// Try to parse `-- label -->` form. Returns the full token string if matched.
fn try_consume_labeled_dash_arrow(s: &str) -> Option<String> {
    // Must start with "-- " (dash dash space)
    if !s.starts_with("-- ") {
        return None;
    }
    // Find closing "-->"
    let rest = &s[3..];
    rest.find("-->").map(|end| {
        let full_len = 3 + end + 3; // "-- " + label + "-->"
        s[..full_len].to_string()
    })
}

/// Try to consume a `|label|` suffix. Returns `(consumed_string, char_count)`.
fn try_consume_pipe_label(s: &str) -> (String, usize) {
    if let Some(inner) = s.strip_prefix('|')
        && let Some(end) = inner.find('|')
    {
        let portion = &s[..end + 2]; // includes both pipes
        return (portion.to_string(), end + 2);
    }
    (String::new(), 0)
}

/// Extract a label string from an arrow token, if present.
///
/// Handles `-->|label|`, `-- label -->`, etc.
fn extract_arrow_label(arrow: &str) -> Option<String> {
    // Pipe-style: -->|label| or -.->|label|
    if let Some(start) = arrow.find('|')
        && let Some(end) = arrow[start + 1..].find('|')
    {
        let label = arrow[start + 1..start + 1 + end].trim().to_string();
        if !label.is_empty() {
            return Some(label);
        }
    }
    // Dash-style: -- label -->
    if arrow.starts_with("-- ")
        && let Some(end) = arrow.rfind("-->")
    {
        let label = arrow[3..end].trim().to_string();
        if !label.is_empty() {
            return Some(label);
        }
    }
    None
}

// ---------------------------------------------------------------------------
// Node definition parsing
// ---------------------------------------------------------------------------

/// Parse a single node-definition token such as `A[Label]`, `B{text}`,
/// `C((name))`, `D(rounded)`, or bare `E`.
///
/// Returns `None` if the token is empty or unparseable.
pub fn parse_node_definition(token: &str) -> Option<Node> {
    let token = token.trim();
    if token.is_empty() {
        return None;
    }

    // Find the first bracket/brace/paren character to split id from shape.
    let shape_start = token.find(['[', '{', '(']);

    let (id, label, shape) = if let Some(pos) = shape_start {
        let id = token[..pos].trim().to_string();
        let rest = &token[pos..];

        if rest.starts_with("((") && rest.ends_with("))") {
            // Circle: A((text))
            let inner = rest[2..rest.len() - 2].trim().to_string();
            (id, inner, NodeShape::Circle)
        } else if rest.starts_with('{') && rest.ends_with('}') {
            // Diamond: A{text}
            let inner = rest[1..rest.len() - 1].trim().to_string();
            (id, inner, NodeShape::Diamond)
        } else if rest.starts_with('[') && rest.ends_with(']') {
            // Rectangle: A[text]
            let inner = rest[1..rest.len() - 1].trim().to_string();
            (id, inner, NodeShape::Rectangle)
        } else if rest.starts_with('(') && rest.ends_with(')') {
            // Rounded: A(text)
            let inner = rest[1..rest.len() - 1].trim().to_string();
            (id, inner, NodeShape::Rounded)
        } else {
            // Unrecognised bracket pattern — treat entire token as bare ID
            let id = token.to_string();
            (id.clone(), id, NodeShape::Rectangle)
        }
    } else {
        // Bare ID
        (token.to_string(), token.to_string(), NodeShape::Rectangle)
    };

    if id.is_empty() {
        return None;
    }

    // Strip HTML-like line breaks that Mermaid supports in labels.
    let label = label
        .replace("<br/>", " ")
        .replace("<br>", " ")
        .replace("<br />", " ");

    Some(Node::new(id, label, shape))
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::types::NodeShape;

    #[test]
    fn parse_simple_lr() {
        let g = parse("graph LR\nA-->B-->C").unwrap();
        assert_eq!(g.direction, Direction::LeftToRight);
        assert!(g.has_node("A"));
        assert!(g.has_node("B"));
        assert!(g.has_node("C"));
        assert_eq!(g.edges.len(), 2);
    }

    #[test]
    fn parse_semicolons() {
        let g = parse("graph LR; A-->B; B-->C").unwrap();
        assert_eq!(g.edges.len(), 2);
    }

    #[test]
    fn parse_labeled_nodes() {
        let g = parse("graph LR\nA[Start] --> B[End]").unwrap();
        assert_eq!(g.node("A").unwrap().label, "Start");
        assert_eq!(g.node("B").unwrap().label, "End");
    }

    #[test]
    fn parse_diamond_node() {
        let g = parse("graph LR\nA{Decision}").unwrap();
        assert_eq!(g.node("A").unwrap().shape, NodeShape::Diamond);
        assert_eq!(g.node("A").unwrap().label, "Decision");
    }

    #[test]
    fn parse_circle_node() {
        let g = parse("graph LR\nA((Circle))").unwrap();
        assert_eq!(g.node("A").unwrap().shape, NodeShape::Circle);
    }

    #[test]
    fn parse_rounded_node() {
        let g = parse("graph LR\nA(Rounded)").unwrap();
        assert_eq!(g.node("A").unwrap().shape, NodeShape::Rounded);
    }

    #[test]
    fn parse_edge_label_pipe() {
        let g = parse("graph LR\nA -->|yes| B").unwrap();
        assert_eq!(g.edges[0].label.as_deref(), Some("yes"));
    }

    #[test]
    fn parse_edge_label_dash() {
        let g = parse("graph LR\nA -- hello --> B").unwrap();
        assert_eq!(g.edges[0].label.as_deref(), Some("hello"));
    }

    #[test]
    fn parse_flowchart_keyword() {
        let g = parse("flowchart TD\nA-->B").unwrap();
        assert_eq!(g.direction, Direction::TopToBottom);
    }

    #[test]
    fn bad_direction_returns_error() {
        assert!(parse("graph XY\nA-->B").is_err());
    }

    #[test]
    fn no_header_returns_error() {
        assert!(parse("A-->B").is_err());
    }
}