torg_core/
builder.rs

1//! State machine builder for TØR-G graph construction.
2//!
3//! The builder consumes tokens one-by-one and constructs the graph,
4//! enforcing all structural constraints. It provides `valid_next_tokens()`
5//! for LLM logit masking.
6
7use std::collections::HashSet;
8
9use crate::error::BuildError;
10use crate::graph::{Graph, Node};
11use crate::limits::Limits;
12use crate::token::{BoolOp, Source, Token};
13
14/// Builder state machine phases.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum Phase {
17    /// Accepting input declarations.
18    Inputs,
19    /// Accepting node definitions or output declarations.
20    Nodes,
21    /// Accepting only output declarations.
22    Outputs,
23    /// Graph complete, no more tokens accepted.
24    Done,
25}
26
27/// State within a node definition (● ... ○).
28#[allow(clippy::enum_variant_names)]
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30enum NodeState {
31    /// Expecting node ID after NodeStart.
32    ExpectId,
33    /// Expecting operator after node ID.
34    ExpectOp { id: u16 },
35    /// Expecting first source operand.
36    ExpectLeft { id: u16, op: BoolOp },
37    /// Expecting second source operand.
38    ExpectRight { id: u16, op: BoolOp, left: Source },
39    /// Expecting NodeEnd.
40    ExpectEnd {
41        id: u16,
42        op: BoolOp,
43        left: Source,
44        right: Source,
45    },
46}
47
48/// Deterministic state machine for graph construction.
49///
50/// The builder enforces all structural constraints:
51/// - No duplicate ID definitions
52/// - No forward references (DAG property)
53/// - Resource limits (max nodes, depth, inputs, outputs)
54pub struct Builder {
55    graph: Graph,
56    phase: Phase,
57    node_state: Option<NodeState>,
58    /// All defined IDs (inputs + nodes).
59    defined: HashSet<u16>,
60    limits: Limits,
61}
62
63impl Default for Builder {
64    fn default() -> Self {
65        Self::new()
66    }
67}
68
69impl Builder {
70    /// Create a new builder with default limits.
71    pub fn new() -> Self {
72        Self::with_limits(Limits::default())
73    }
74
75    /// Create a new builder with custom limits.
76    pub fn with_limits(limits: Limits) -> Self {
77        Self {
78            graph: Graph::new(),
79            phase: Phase::Inputs,
80            node_state: None,
81            defined: HashSet::new(),
82            limits,
83        }
84    }
85
86    /// Get the current build phase.
87    pub fn phase(&self) -> Phase {
88        self.phase
89    }
90
91    /// Feed a single token to the builder.
92    pub fn push(&mut self, token: Token) -> Result<(), BuildError> {
93        match self.phase {
94            Phase::Done => Err(BuildError::UnexpectedToken),
95            Phase::Inputs => self.handle_inputs(token),
96            Phase::Nodes => self.handle_nodes(token),
97            Phase::Outputs => self.handle_outputs(token),
98        }
99    }
100
101    /// Finalize and return the graph.
102    ///
103    /// # Errors
104    ///
105    /// Returns an error if:
106    /// - A node definition is incomplete
107    /// - No outputs have been declared
108    pub fn finish(self) -> Result<Graph, BuildError> {
109        if self.node_state.is_some() {
110            return Err(BuildError::IncompleteNode);
111        }
112        if self.graph.outputs.is_empty() {
113            return Err(BuildError::NoOutputs);
114        }
115        Ok(self.graph)
116    }
117
118    /// Handle tokens in the Inputs phase.
119    fn handle_inputs(&mut self, token: Token) -> Result<(), BuildError> {
120        match token {
121            Token::InputDecl => {
122                // Stay in Inputs phase, expect Id next
123                Ok(())
124            }
125            Token::Id(id) => {
126                // Check limits
127                if self.graph.inputs.len() >= self.limits.max_inputs {
128                    return Err(BuildError::MaxInputsExceeded(self.limits.max_inputs));
129                }
130                // Check for duplicate
131                if self.defined.contains(&id) {
132                    return Err(BuildError::DuplicateDefinition(id));
133                }
134                self.defined.insert(id);
135                self.graph.inputs.push(id);
136                Ok(())
137            }
138            Token::NodeStart => {
139                // Check limits
140                if self.graph.nodes.len() >= self.limits.max_nodes {
141                    return Err(BuildError::MaxNodesExceeded(self.limits.max_nodes));
142                }
143                // Transition to Nodes phase
144                self.phase = Phase::Nodes;
145                self.node_state = Some(NodeState::ExpectId);
146                Ok(())
147            }
148            Token::OutputDecl => {
149                // Skip directly to Outputs phase (empty nodes section)
150                self.phase = Phase::Outputs;
151                Ok(())
152            }
153            _ => Err(BuildError::UnexpectedToken),
154        }
155    }
156
157    /// Handle tokens in the Nodes phase.
158    fn handle_nodes(&mut self, token: Token) -> Result<(), BuildError> {
159        match self.node_state {
160            None => {
161                // Between nodes, expecting NodeStart or OutputDecl
162                match token {
163                    Token::NodeStart => {
164                        // Check limits
165                        if self.graph.nodes.len() >= self.limits.max_nodes {
166                            return Err(BuildError::MaxNodesExceeded(self.limits.max_nodes));
167                        }
168                        self.node_state = Some(NodeState::ExpectId);
169                        Ok(())
170                    }
171                    Token::OutputDecl => {
172                        self.phase = Phase::Outputs;
173                        Ok(())
174                    }
175                    _ => Err(BuildError::UnexpectedToken),
176                }
177            }
178            Some(state) => self.handle_node_state(state, token),
179        }
180    }
181
182    /// Handle tokens within a node definition.
183    fn handle_node_state(&mut self, state: NodeState, token: Token) -> Result<(), BuildError> {
184        match state {
185            NodeState::ExpectId => {
186                if let Token::Id(id) = token {
187                    // Check for duplicate
188                    if self.defined.contains(&id) {
189                        return Err(BuildError::DuplicateDefinition(id));
190                    }
191                    self.node_state = Some(NodeState::ExpectOp { id });
192                    Ok(())
193                } else {
194                    Err(BuildError::UnexpectedToken)
195                }
196            }
197            NodeState::ExpectOp { id } => {
198                if let Some(op) = token.as_bool_op() {
199                    self.node_state = Some(NodeState::ExpectLeft { id, op });
200                    Ok(())
201                } else {
202                    Err(BuildError::UnexpectedToken)
203                }
204            }
205            NodeState::ExpectLeft { id, op } => {
206                if let Some(source) = token.as_source() {
207                    // Validate source reference
208                    if let Source::Id(ref_id) = source {
209                        if !self.defined.contains(&ref_id) {
210                            return Err(BuildError::UndefinedReference(ref_id));
211                        }
212                    }
213                    self.node_state = Some(NodeState::ExpectRight {
214                        id,
215                        op,
216                        left: source,
217                    });
218                    Ok(())
219                } else {
220                    Err(BuildError::UnexpectedToken)
221                }
222            }
223            NodeState::ExpectRight { id, op, left } => {
224                if let Some(source) = token.as_source() {
225                    // Validate source reference
226                    if let Source::Id(ref_id) = source {
227                        if !self.defined.contains(&ref_id) {
228                            return Err(BuildError::UndefinedReference(ref_id));
229                        }
230                    }
231                    self.node_state = Some(NodeState::ExpectEnd {
232                        id,
233                        op,
234                        left,
235                        right: source,
236                    });
237                    Ok(())
238                } else {
239                    Err(BuildError::UnexpectedToken)
240                }
241            }
242            NodeState::ExpectEnd {
243                id,
244                op,
245                left,
246                right,
247            } => {
248                if token == Token::NodeEnd {
249                    // Commit the node
250                    let node = Node::new(id, op, left, right);
251                    self.defined.insert(id);
252                    self.graph.nodes.push(node);
253
254                    // Check depth limit
255                    let depth = self.graph.depth();
256                    if depth > self.limits.max_depth {
257                        return Err(BuildError::MaxDepthExceeded(self.limits.max_depth));
258                    }
259
260                    self.node_state = None;
261                    Ok(())
262                } else {
263                    Err(BuildError::UnexpectedToken)
264                }
265            }
266        }
267    }
268
269    /// Handle tokens in the Outputs phase.
270    fn handle_outputs(&mut self, token: Token) -> Result<(), BuildError> {
271        match token {
272            Token::OutputDecl => {
273                // Stay in Outputs phase, expect Id next
274                Ok(())
275            }
276            Token::Id(id) => {
277                // Check limits
278                if self.graph.outputs.len() >= self.limits.max_outputs {
279                    return Err(BuildError::MaxOutputsExceeded(self.limits.max_outputs));
280                }
281                // Output must reference a defined ID
282                if !self.defined.contains(&id) {
283                    return Err(BuildError::UndefinedReference(id));
284                }
285                self.graph.outputs.push(id);
286                Ok(())
287            }
288            _ => {
289                self.phase = Phase::Done;
290                Err(BuildError::UnexpectedToken)
291            }
292        }
293    }
294
295    /// Returns the set of valid next tokens for LLM logit masking.
296    ///
297    /// This is the critical interface for constraining LLM output.
298    /// By masking all logits except those corresponding to valid tokens,
299    /// the LLM is guaranteed to produce a syntactically correct circuit.
300    pub fn valid_next_tokens(&self) -> Vec<Token> {
301        match self.phase {
302            Phase::Done => vec![],
303            Phase::Inputs => self.valid_in_inputs(),
304            Phase::Nodes => self.valid_in_nodes(),
305            Phase::Outputs => self.valid_in_outputs(),
306        }
307    }
308
309    fn valid_in_inputs(&self) -> Vec<Token> {
310        let mut valid = Vec::new();
311
312        // Can always declare more inputs (up to limit)
313        if self.graph.inputs.len() < self.limits.max_inputs {
314            valid.push(Token::InputDecl);
315            // After InputDecl, valid IDs are any not yet defined
316            // For practicality, suggest a range
317            for id in 0..=255u16 {
318                if !self.defined.contains(&id) {
319                    valid.push(Token::Id(id));
320                }
321            }
322        }
323
324        // Can start defining nodes
325        if self.graph.nodes.len() < self.limits.max_nodes {
326            valid.push(Token::NodeStart);
327        }
328
329        // Can go directly to outputs (even with no nodes, though finish() will fail)
330        if self.graph.outputs.len() < self.limits.max_outputs {
331            valid.push(Token::OutputDecl);
332        }
333
334        valid
335    }
336
337    fn valid_in_nodes(&self) -> Vec<Token> {
338        match &self.node_state {
339            None => {
340                // Between nodes
341                let mut valid = Vec::new();
342                if self.graph.nodes.len() < self.limits.max_nodes {
343                    valid.push(Token::NodeStart);
344                }
345                if self.graph.outputs.len() < self.limits.max_outputs
346                    && !self.graph.nodes.is_empty()
347                {
348                    valid.push(Token::OutputDecl);
349                }
350                valid
351            }
352            Some(state) => self.valid_in_node_state(state),
353        }
354    }
355
356    fn valid_in_node_state(&self, state: &NodeState) -> Vec<Token> {
357        match state {
358            NodeState::ExpectId => {
359                // Any ID not yet defined
360                let mut valid = Vec::new();
361                for id in 0..=255u16 {
362                    if !self.defined.contains(&id) {
363                        valid.push(Token::Id(id));
364                    }
365                }
366                valid
367            }
368            NodeState::ExpectOp { .. } => {
369                vec![Token::Or, Token::Nor, Token::Xor]
370            }
371            NodeState::ExpectLeft { .. } | NodeState::ExpectRight { .. } => {
372                // Can reference any defined ID, or use constants
373                let mut valid = Vec::new();
374                for &id in &self.defined {
375                    valid.push(Token::Id(id));
376                }
377                valid.push(Token::True);
378                valid.push(Token::False);
379                valid
380            }
381            NodeState::ExpectEnd { .. } => {
382                vec![Token::NodeEnd]
383            }
384        }
385    }
386
387    fn valid_in_outputs(&self) -> Vec<Token> {
388        let mut valid = Vec::new();
389
390        if self.graph.outputs.len() < self.limits.max_outputs {
391            valid.push(Token::OutputDecl);
392            // Can reference any defined ID as output
393            for &id in &self.defined {
394                valid.push(Token::Id(id));
395            }
396        }
397
398        valid
399    }
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn test_simple_build() {
408        let mut builder = Builder::new();
409
410        // Declare inputs
411        builder.push(Token::InputDecl).unwrap();
412        builder.push(Token::Id(0)).unwrap();
413        builder.push(Token::InputDecl).unwrap();
414        builder.push(Token::Id(1)).unwrap();
415
416        // Define a node: node(2) = input(0) OR input(1)
417        builder.push(Token::NodeStart).unwrap();
418        builder.push(Token::Id(2)).unwrap();
419        builder.push(Token::Or).unwrap();
420        builder.push(Token::Id(0)).unwrap();
421        builder.push(Token::Id(1)).unwrap();
422        builder.push(Token::NodeEnd).unwrap();
423
424        // Declare output
425        builder.push(Token::OutputDecl).unwrap();
426        builder.push(Token::Id(2)).unwrap();
427
428        let graph = builder.finish().unwrap();
429        assert_eq!(graph.inputs, vec![0, 1]);
430        assert_eq!(graph.nodes.len(), 1);
431        assert_eq!(graph.outputs, vec![2]);
432    }
433
434    #[test]
435    fn test_duplicate_definition() {
436        let mut builder = Builder::new();
437        builder.push(Token::InputDecl).unwrap();
438        builder.push(Token::Id(0)).unwrap();
439        builder.push(Token::InputDecl).unwrap();
440        assert_eq!(
441            builder.push(Token::Id(0)),
442            Err(BuildError::DuplicateDefinition(0))
443        );
444    }
445
446    #[test]
447    fn test_undefined_reference() {
448        let mut builder = Builder::new();
449        builder.push(Token::InputDecl).unwrap();
450        builder.push(Token::Id(0)).unwrap();
451        builder.push(Token::NodeStart).unwrap();
452        builder.push(Token::Id(1)).unwrap();
453        builder.push(Token::Or).unwrap();
454        builder.push(Token::Id(0)).unwrap();
455        // Reference to undefined ID 99
456        assert_eq!(
457            builder.push(Token::Id(99)),
458            Err(BuildError::UndefinedReference(99))
459        );
460    }
461
462    #[test]
463    fn test_no_outputs_error() {
464        let mut builder = Builder::new();
465        builder.push(Token::InputDecl).unwrap();
466        builder.push(Token::Id(0)).unwrap();
467        assert_eq!(builder.finish(), Err(BuildError::NoOutputs));
468    }
469
470    #[test]
471    fn test_incomplete_node_error() {
472        let mut builder = Builder::new();
473        builder.push(Token::InputDecl).unwrap();
474        builder.push(Token::Id(0)).unwrap();
475        builder.push(Token::NodeStart).unwrap();
476        builder.push(Token::Id(1)).unwrap();
477        builder.push(Token::Or).unwrap();
478        // Node incomplete
479        assert_eq!(builder.finish(), Err(BuildError::IncompleteNode));
480    }
481
482    #[test]
483    fn test_valid_next_tokens_initial() {
484        let builder = Builder::new();
485        let valid = builder.valid_next_tokens();
486        assert!(valid.contains(&Token::InputDecl));
487        assert!(valid.contains(&Token::NodeStart));
488        assert!(valid.contains(&Token::OutputDecl));
489    }
490
491    #[test]
492    fn test_valid_next_tokens_expect_op() {
493        let mut builder = Builder::new();
494        builder.push(Token::InputDecl).unwrap();
495        builder.push(Token::Id(0)).unwrap();
496        builder.push(Token::NodeStart).unwrap();
497        builder.push(Token::Id(1)).unwrap();
498
499        let valid = builder.valid_next_tokens();
500        assert_eq!(valid, vec![Token::Or, Token::Nor, Token::Xor]);
501    }
502
503    #[test]
504    fn test_constants_as_sources() {
505        let mut builder = Builder::new();
506
507        // No inputs, just use constants
508        builder.push(Token::NodeStart).unwrap();
509        builder.push(Token::Id(0)).unwrap();
510        builder.push(Token::Or).unwrap();
511        builder.push(Token::True).unwrap();
512        builder.push(Token::False).unwrap();
513        builder.push(Token::NodeEnd).unwrap();
514
515        builder.push(Token::OutputDecl).unwrap();
516        builder.push(Token::Id(0)).unwrap();
517
518        let graph = builder.finish().unwrap();
519        assert!(graph.inputs.is_empty());
520        assert_eq!(graph.nodes.len(), 1);
521        assert_eq!(graph.nodes[0].left, Source::True);
522        assert_eq!(graph.nodes[0].right, Source::False);
523    }
524}