Skip to main content

streamweave_attractor/
dot_parser.rs

1//! Minimal DOT parser for Attractor pipeline graphs.
2//!
3//! Implements the subset defined in attractor-spec ยง2.
4
5use crate::types::{AttractorEdge, AttractorGraph, AttractorNode};
6use std::collections::HashMap;
7use tracing::{info, instrument};
8
9/// Parse a DOT source string into an AttractorGraph.
10#[instrument(level = "trace", skip(source))]
11pub fn parse_dot(source: &str) -> Result<AttractorGraph, String> {
12  let source = strip_comments(source);
13  let source = source.trim();
14
15  if !source.starts_with("digraph") {
16    return Err("Expected 'digraph' at start".to_string());
17  }
18
19  let rest = source["digraph".len()..].trim_start();
20  let (_name, rest) = parse_identifier(rest).ok_or("Expected graph name")?;
21  let rest = rest.trim_start();
22
23  let rest = rest
24    .strip_prefix('{')
25    .ok_or("Expected '{' after graph name")?;
26
27  let mut graph = AttractorGraph {
28    goal: String::new(),
29    nodes: HashMap::new(),
30    edges: Vec::new(),
31  };
32  let mut remaining = rest.trim();
33  while !remaining.is_empty() && !remaining.starts_with('}') {
34    remaining = parse_statement(remaining, &mut graph)?;
35    remaining = remaining.trim();
36  }
37
38  info!(
39    nodes = graph.nodes.len(),
40    edges = graph.edges.len(),
41    "DOT parse complete"
42  );
43  Ok(graph)
44}
45
46/// Strips `//` and `/* */` style comments from DOT source.
47#[instrument(level = "trace", skip(s))]
48pub(crate) fn strip_comments(s: &str) -> String {
49  let mut out = String::new();
50  let mut i = 0;
51  let bytes = s.as_bytes();
52  while i < bytes.len() {
53    if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
54      while i < bytes.len() && bytes[i] != b'\n' {
55        i += 1;
56      }
57      continue;
58    }
59    if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
60      i += 2;
61      while i + 1 < bytes.len() && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
62        i += 1;
63      }
64      if i + 1 < bytes.len() {
65        i += 2;
66      }
67      continue;
68    }
69    out.push(bytes[i] as char);
70    i += 1;
71  }
72  out
73}
74
75/// Parses an identifier (alphanumeric + underscore) and returns it plus the remaining string.
76#[instrument(level = "trace", skip(s))]
77pub(crate) fn parse_identifier(s: &str) -> Option<(&str, &str)> {
78  let s = s.trim_start();
79  let start = s
80    .find(|c: char| c.is_ascii_alphabetic() || c == '_')
81    .unwrap_or(0);
82  let end = s[start..]
83    .find(|c: char| !c.is_ascii_alphanumeric() && c != '_')
84    .map(|i| start + i)
85    .unwrap_or(s.len());
86  if start < end {
87    Some((&s[start..end], &s[end..]))
88  } else {
89    None
90  }
91}
92
93/// Parses a single graph statement and updates `graph`. Returns the unconsumed remainder.
94#[instrument(level = "trace", skip(graph))]
95fn parse_statement<'a>(mut s: &'a str, graph: &mut AttractorGraph) -> Result<&'a str, String> {
96  s = s.trim_start();
97  if s.starts_with('}') {
98    return Ok(s);
99  }
100  if s.starts_with("graph") {
101    return parse_graph_attrs(s, graph);
102  }
103  if s.starts_with("node") {
104    return skip_attr_block(s);
105  }
106  if s.starts_with("edge") {
107    return skip_attr_block(s);
108  }
109  if s.starts_with("rankdir") {
110    return skip_assign(s);
111  }
112  if s.starts_with("subgraph") {
113    return skip_subgraph(s);
114  }
115
116  let (id, rest) = parse_identifier(s).ok_or("Expected identifier")?;
117  let rest = rest.trim_start();
118
119  if rest.starts_with('[') {
120    let (attrs, rest) = parse_attr_block(rest)?;
121    let node = parse_node_attrs(id, &attrs)?;
122    graph.nodes.insert(id.to_string(), node);
123    return Ok(rest.trim_start().trim_start_matches(';'));
124  }
125
126  if rest.starts_with("->") {
127    return parse_edge_stmt(id, rest, graph);
128  }
129
130  Ok(rest.trim_start().trim_start_matches(';'))
131}
132
133/// Applies graph-level DOT attributes (e.g. `goal`) to the given [`AttractorGraph`].
134#[instrument(level = "trace", skip(attrs, graph))]
135pub(crate) fn apply_graph_attrs(attrs: &[(String, String)], graph: &mut AttractorGraph) {
136  for (k, v) in attrs {
137    if k == "goal" {
138      graph.goal = v.clone();
139    }
140  }
141}
142
143/// Parses a `graph [key=value,...]` block and applies attributes to `graph`; returns the remainder.
144#[instrument(level = "trace", skip(graph))]
145fn parse_graph_attrs<'a>(mut s: &'a str, graph: &mut AttractorGraph) -> Result<&'a str, String> {
146  s = s["graph".len()..].trim_start();
147  let (attrs, rest) = parse_attr_block(s)?;
148  apply_graph_attrs(&attrs, graph);
149  Ok(rest.trim_start().trim_start_matches(';'))
150}
151
152/// List of key-value attribute pairs from DOT `[key=value,...]` blocks.
153type AttrList = Vec<(String, String)>;
154
155/// Extracts label, condition, and weight from edge attribute list.
156#[instrument(level = "trace", skip(attrs))]
157pub(crate) fn extract_edge_attrs(
158  attrs: &[(String, String)],
159) -> (Option<String>, Option<String>, i32) {
160  let label = attrs
161    .iter()
162    .find(|(k, _)| k == "label")
163    .map(|(_, v)| v.clone());
164  let condition = attrs
165    .iter()
166    .find(|(k, _)| k == "condition")
167    .map(|(_, v)| v.clone());
168  let weight = attrs
169    .iter()
170    .find(|(k, _)| k == "weight")
171    .and_then(|(_, v)| v.parse().ok())
172    .unwrap_or(0);
173  (label, condition, weight)
174}
175
176/// Parses `[key=value,...]` and returns the attributes plus the remainder.
177#[instrument(level = "trace")]
178fn parse_attr_block(s: &str) -> Result<(AttrList, &str), String> {
179  let s = s.trim_start().strip_prefix('[').ok_or("Expected '['")?;
180  let mut attrs = Vec::new();
181  let mut remaining = s.trim_start();
182  while !remaining.starts_with(']') {
183    let (k, rest) = parse_identifier(remaining).ok_or("Expected attribute key")?;
184    let rest = rest.trim_start().strip_prefix('=').ok_or("Expected '='")?;
185    let (v, rest) = parse_value(rest.trim_start())?;
186    attrs.push((k.to_string(), v));
187    remaining = rest.trim_start().trim_start_matches(',');
188  }
189  let rest = remaining[1..].trim_start();
190  Ok((attrs, rest))
191}
192
193/// Unescapes DOT quoted string escape sequences (\\n, \\t, \\\", \\\\).
194#[instrument(level = "trace", skip(s))]
195pub(crate) fn unescape_quoted_string(s: &str) -> String {
196  s.replace("\\n", "\n")
197    .replace("\\t", "\t")
198    .replace("\\\"", "\"")
199    .replace("\\\\", "\\")
200}
201
202/// Parses a quoted string, number, or identifier value and returns it plus the remainder.
203#[instrument(level = "trace", skip(s))]
204pub(crate) fn parse_value(s: &str) -> Result<(String, &str), String> {
205  let s = s.trim_start();
206  if s.starts_with('"') {
207    let mut end = 1;
208    while end < s.len() {
209      let c = s.as_bytes()[end];
210      if c == b'\\' && end + 1 < s.len() {
211        end += 2;
212        continue;
213      }
214      if c == b'"' {
215        break;
216      }
217      end += 1;
218    }
219    let v = unescape_quoted_string(&s[1..end]);
220    Ok((v, s[end + 1..].trim_start()))
221  } else if let Some((num, rest)) = parse_number(s) {
222    Ok((num, rest))
223  } else {
224    let (id, rest) = parse_identifier(s).ok_or("Expected value")?;
225    Ok((id.to_string(), rest))
226  }
227}
228
229/// Parses an optional decimal number and returns it plus the remainder.
230#[instrument(level = "trace", skip(s))]
231pub(crate) fn parse_number(s: &str) -> Option<(String, &str)> {
232  let s = s.trim_start();
233  let mut end = 0;
234  if end < s.len() && s.as_bytes()[end] == b'-' {
235    end += 1;
236  }
237  while end < s.len() && s.as_bytes()[end].is_ascii_digit() {
238    end += 1;
239  }
240  if end > 0 {
241    Some((s[..end].to_string(), &s[end..]))
242  } else {
243    None
244  }
245}
246
247/// Builds an `AttractorNode` from a node id and its attribute list.
248#[instrument(level = "trace", skip(attrs))]
249pub(crate) fn parse_node_attrs(
250  id: &str,
251  attrs: &[(String, String)],
252) -> Result<AttractorNode, String> {
253  let mut shape = "box".to_string();
254  let mut handler_type = None;
255  let mut label = Some(id.to_string());
256  let mut prompt = None;
257  let mut command = None;
258  let mut goal_gate = false;
259  let mut max_retries = 0u32;
260
261  for (k, v) in attrs {
262    match k.as_str() {
263      "shape" => shape = v.clone(),
264      "type" => handler_type = Some(v.clone()),
265      "label" => label = Some(v.clone()),
266      "prompt" => prompt = Some(v.clone()),
267      "command" => command = Some(v.clone()),
268      "goal_gate" => goal_gate = v.eq_ignore_ascii_case("true"),
269      "max_retries" => max_retries = v.parse().unwrap_or(0),
270      _ => {}
271    }
272  }
273
274  let handler_type = handler_type.or_else(|| resolve_handler_from_shape(&shape));
275
276  Ok(AttractorNode {
277    id: id.to_string(),
278    shape,
279    handler_type,
280    label,
281    prompt,
282    command,
283    goal_gate,
284    max_retries,
285  })
286}
287
288/// Maps DOT shape names to Attractor handler type strings.
289#[instrument(level = "trace")]
290pub(crate) fn resolve_handler_from_shape(shape: &str) -> Option<String> {
291  Some(
292    match shape {
293      "Mdiamond" => "start",
294      "Msquare" => "exit",
295      "box" => "codergen",
296      "hexagon" => "wait.human",
297      "diamond" => "conditional",
298      "component" => "parallel",
299      "tripleoctagon" => "parallel.fan_in",
300      "parallelogram" => "tool",
301      "house" => "stack.manager_loop",
302      _ => "codergen",
303    }
304    .to_string(),
305  )
306}
307
308/// Parses an edge statement `id -> target [attrs]` and adds edges to `graph`.
309#[instrument(level = "trace", skip(graph))]
310fn parse_edge_stmt<'a>(
311  from: &str,
312  mut s: &'a str,
313  graph: &mut AttractorGraph,
314) -> Result<&'a str, String> {
315  let mut targets = Vec::new();
316  s = s["->".len()..].trim_start();
317  loop {
318    let (to, rest) = parse_identifier(s).ok_or("Expected target node")?;
319    targets.push(to.to_string());
320    let rest = rest.trim_start();
321    if rest.starts_with('[') {
322      let (attrs, rest) = parse_attr_block(rest)?;
323      let (label, condition, weight) = extract_edge_attrs(&attrs);
324      let mut prev = from;
325      for t in &targets {
326        graph.edges.push(AttractorEdge {
327          from_node: prev.to_string(),
328          to_node: t.clone(),
329          label: label.clone(),
330          condition: condition.clone(),
331          weight,
332        });
333        prev = t;
334      }
335      return Ok(rest.trim_start().trim_start_matches(';'));
336    }
337    if !rest.starts_with("->") {
338      let mut prev = from;
339      for t in &targets {
340        graph.edges.push(AttractorEdge {
341          from_node: prev.to_string(),
342          to_node: t.clone(),
343          label: None,
344          condition: None,
345          weight: 0,
346        });
347        prev = t;
348      }
349      return Ok(rest);
350    }
351    s = rest["->".len()..].trim_start();
352  }
353}
354
355/// Skips a balanced `[...]` attribute block and returns the remainder.
356#[instrument(level = "trace")]
357fn skip_attr_block(s: &str) -> Result<&str, String> {
358  let s = s.trim_start();
359  let idx = s.find('[').ok_or("Expected '['")?;
360  let mut depth = 0;
361  let _i = idx;
362  for (j, c) in s[idx..].chars().enumerate() {
363    match c {
364      '[' => depth += 1,
365      ']' => {
366        depth -= 1;
367        if depth == 0 {
368          return Ok(&s[idx + j + 1..]);
369        }
370      }
371      _ => {}
372    }
373  }
374  Err("Unclosed attribute block".to_string())
375}
376
377/// Skips an assignment `key=value` and returns the remainder.
378/// Parses the value correctly so we don't consume the rest of the file when there's no semicolon.
379#[instrument(level = "trace")]
380fn skip_assign(s: &str) -> Result<&str, String> {
381  let eq = s.find('=').ok_or("Expected '='")?;
382  let rest = s[eq + 1..].trim_start();
383  let (_, after_value) = parse_value(rest)?;
384  Ok(after_value.trim_start().trim_start_matches(';'))
385}
386
387/// Skips a balanced `{...}` subgraph and returns the remainder.
388#[instrument(level = "trace")]
389fn skip_subgraph(s: &str) -> Result<&str, String> {
390  let start = s.find('{').ok_or("Expected '{'")?;
391  let mut depth = 0;
392  for (i, c) in s[start..].chars().enumerate() {
393    match c {
394      '{' => depth += 1,
395      '}' => {
396        depth -= 1;
397        if depth == 0 {
398          return Ok(&s[start + i + 1..]);
399        }
400      }
401      _ => {}
402    }
403  }
404  Err("Unclosed subgraph".to_string())
405}