torg_core/
builder.rs

1//! State machine builder for TØR-G graph construction.
2//!
3//! The builder consumes tokens one-by-one and constructs the graph,
4//! enforcing all structural constraints. It provides `valid_next_tokens()`
5//! for LLM logit masking.
6
7use std::collections::HashSet;
8
9use crate::error::BuildError;
10use crate::graph::{Graph, Node};
11use crate::limits::Limits;
12use crate::token::{BoolOp, Source, Token};
13
14/// Builder state machine phases.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum Phase {
17    /// Accepting input declarations.
18    Inputs,
19    /// Accepting node definitions or output declarations.
20    Nodes,
21    /// Accepting only output declarations.
22    Outputs,
23    /// Graph complete, no more tokens accepted.
24    Done,
25}
26
27/// State within a node definition (● ... ○).
28#[allow(clippy::enum_variant_names)]
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30enum NodeState {
31    /// Expecting node ID after NodeStart.
32    ExpectId,
33    /// Expecting operator after node ID.
34    ExpectOp { id: u16 },
35    /// Expecting first source operand.
36    ExpectLeft { id: u16, op: BoolOp },
37    /// Expecting second source operand.
38    ExpectRight { id: u16, op: BoolOp, left: Source },
39    /// Expecting NodeEnd.
40    ExpectEnd {
41        id: u16,
42        op: BoolOp,
43        left: Source,
44        right: Source,
45    },
46}
47
48/// Deterministic state machine for graph construction.
49///
50/// The builder enforces all structural constraints:
51/// - No duplicate ID definitions
52/// - No forward references (DAG property)
53/// - Resource limits (max nodes, depth, inputs, outputs)
54#[derive(Debug)]
55pub struct Builder {
56    graph: Graph,
57    phase: Phase,
58    node_state: Option<NodeState>,
59    /// All defined IDs (inputs + nodes).
60    defined: HashSet<u16>,
61    limits: Limits,
62}
63
64impl Default for Builder {
65    fn default() -> Self {
66        Self::new()
67    }
68}
69
70impl Builder {
71    /// Create a new builder with default limits.
72    pub fn new() -> Self {
73        Self::with_limits(Limits::default())
74    }
75
76    /// Create a new builder with custom limits.
77    pub fn with_limits(limits: Limits) -> Self {
78        Self {
79            graph: Graph::new(),
80            phase: Phase::Inputs,
81            node_state: None,
82            defined: HashSet::new(),
83            limits,
84        }
85    }
86
87    /// Get the current build phase.
88    pub fn phase(&self) -> Phase {
89        self.phase
90    }
91
92    /// Feed a single token to the builder.
93    pub fn push(&mut self, token: Token) -> Result<(), BuildError> {
94        match self.phase {
95            Phase::Done => Err(BuildError::UnexpectedToken),
96            Phase::Inputs => self.handle_inputs(token),
97            Phase::Nodes => self.handle_nodes(token),
98            Phase::Outputs => self.handle_outputs(token),
99        }
100    }
101
102    /// Finalize and return the graph.
103    ///
104    /// # Errors
105    ///
106    /// Returns an error if:
107    /// - A node definition is incomplete
108    /// - No outputs have been declared
109    pub fn finish(self) -> Result<Graph, BuildError> {
110        if self.node_state.is_some() {
111            return Err(BuildError::IncompleteNode);
112        }
113        if self.graph.outputs.is_empty() {
114            return Err(BuildError::NoOutputs);
115        }
116        Ok(self.graph)
117    }
118
119    /// Handle tokens in the Inputs phase.
120    fn handle_inputs(&mut self, token: Token) -> Result<(), BuildError> {
121        match token {
122            Token::InputDecl => {
123                // Stay in Inputs phase, expect Id next
124                Ok(())
125            }
126            Token::Id(id) => {
127                // Check limits
128                if self.graph.inputs.len() >= self.limits.max_inputs {
129                    return Err(BuildError::MaxInputsExceeded(self.limits.max_inputs));
130                }
131                // Check for duplicate
132                if self.defined.contains(&id) {
133                    return Err(BuildError::DuplicateDefinition(id));
134                }
135                self.defined.insert(id);
136                self.graph.inputs.push(id);
137                Ok(())
138            }
139            Token::NodeStart => {
140                // Check limits
141                if self.graph.nodes.len() >= self.limits.max_nodes {
142                    return Err(BuildError::MaxNodesExceeded(self.limits.max_nodes));
143                }
144                // Transition to Nodes phase
145                self.phase = Phase::Nodes;
146                self.node_state = Some(NodeState::ExpectId);
147                Ok(())
148            }
149            Token::OutputDecl => {
150                // Skip directly to Outputs phase (empty nodes section)
151                self.phase = Phase::Outputs;
152                Ok(())
153            }
154            _ => Err(BuildError::UnexpectedToken),
155        }
156    }
157
158    /// Handle tokens in the Nodes phase.
159    fn handle_nodes(&mut self, token: Token) -> Result<(), BuildError> {
160        match self.node_state {
161            None => {
162                // Between nodes, expecting NodeStart or OutputDecl
163                match token {
164                    Token::NodeStart => {
165                        // Check limits
166                        if self.graph.nodes.len() >= self.limits.max_nodes {
167                            return Err(BuildError::MaxNodesExceeded(self.limits.max_nodes));
168                        }
169                        self.node_state = Some(NodeState::ExpectId);
170                        Ok(())
171                    }
172                    Token::OutputDecl => {
173                        self.phase = Phase::Outputs;
174                        Ok(())
175                    }
176                    _ => Err(BuildError::UnexpectedToken),
177                }
178            }
179            Some(state) => self.handle_node_state(state, token),
180        }
181    }
182
183    /// Handle tokens within a node definition.
184    fn handle_node_state(&mut self, state: NodeState, token: Token) -> Result<(), BuildError> {
185        match state {
186            NodeState::ExpectId => {
187                if let Token::Id(id) = token {
188                    // Check for duplicate
189                    if self.defined.contains(&id) {
190                        return Err(BuildError::DuplicateDefinition(id));
191                    }
192                    self.node_state = Some(NodeState::ExpectOp { id });
193                    Ok(())
194                } else {
195                    Err(BuildError::UnexpectedToken)
196                }
197            }
198            NodeState::ExpectOp { id } => {
199                if let Some(op) = token.as_bool_op() {
200                    self.node_state = Some(NodeState::ExpectLeft { id, op });
201                    Ok(())
202                } else {
203                    Err(BuildError::UnexpectedToken)
204                }
205            }
206            NodeState::ExpectLeft { id, op } => {
207                if let Some(source) = token.as_source() {
208                    // Validate source reference
209                    if let Source::Id(ref_id) = source {
210                        if !self.defined.contains(&ref_id) {
211                            return Err(BuildError::UndefinedReference(ref_id));
212                        }
213                    }
214                    self.node_state = Some(NodeState::ExpectRight {
215                        id,
216                        op,
217                        left: source,
218                    });
219                    Ok(())
220                } else {
221                    Err(BuildError::UnexpectedToken)
222                }
223            }
224            NodeState::ExpectRight { id, op, left } => {
225                if let Some(source) = token.as_source() {
226                    // Validate source reference
227                    if let Source::Id(ref_id) = source {
228                        if !self.defined.contains(&ref_id) {
229                            return Err(BuildError::UndefinedReference(ref_id));
230                        }
231                    }
232                    self.node_state = Some(NodeState::ExpectEnd {
233                        id,
234                        op,
235                        left,
236                        right: source,
237                    });
238                    Ok(())
239                } else {
240                    Err(BuildError::UnexpectedToken)
241                }
242            }
243            NodeState::ExpectEnd {
244                id,
245                op,
246                left,
247                right,
248            } => {
249                if token == Token::NodeEnd {
250                    // Commit the node
251                    let node = Node::new(id, op, left, right);
252                    self.defined.insert(id);
253                    self.graph.nodes.push(node);
254
255                    // Check depth limit
256                    let depth = self.graph.depth();
257                    if depth > self.limits.max_depth {
258                        return Err(BuildError::MaxDepthExceeded(self.limits.max_depth));
259                    }
260
261                    self.node_state = None;
262                    Ok(())
263                } else {
264                    Err(BuildError::UnexpectedToken)
265                }
266            }
267        }
268    }
269
270    /// Handle tokens in the Outputs phase.
271    fn handle_outputs(&mut self, token: Token) -> Result<(), BuildError> {
272        match token {
273            Token::OutputDecl => {
274                // Stay in Outputs phase, expect Id next
275                Ok(())
276            }
277            Token::Id(id) => {
278                // Check limits
279                if self.graph.outputs.len() >= self.limits.max_outputs {
280                    return Err(BuildError::MaxOutputsExceeded(self.limits.max_outputs));
281                }
282                // Output must reference a defined ID
283                if !self.defined.contains(&id) {
284                    return Err(BuildError::UndefinedReference(id));
285                }
286                self.graph.outputs.push(id);
287                Ok(())
288            }
289            _ => {
290                self.phase = Phase::Done;
291                Err(BuildError::UnexpectedToken)
292            }
293        }
294    }
295
296    /// Returns the set of valid next tokens for LLM logit masking.
297    ///
298    /// This is the critical interface for constraining LLM output.
299    /// By masking all logits except those corresponding to valid tokens,
300    /// the LLM is guaranteed to produce a syntactically correct circuit.
301    pub fn valid_next_tokens(&self) -> Vec<Token> {
302        match self.phase {
303            Phase::Done => vec![],
304            Phase::Inputs => self.valid_in_inputs(),
305            Phase::Nodes => self.valid_in_nodes(),
306            Phase::Outputs => self.valid_in_outputs(),
307        }
308    }
309
310    fn valid_in_inputs(&self) -> Vec<Token> {
311        let mut valid = Vec::new();
312
313        // Can always declare more inputs (up to limit)
314        if self.graph.inputs.len() < self.limits.max_inputs {
315            valid.push(Token::InputDecl);
316            // After InputDecl, valid IDs are any not yet defined
317            // For practicality, suggest a range
318            for id in 0..=255u16 {
319                if !self.defined.contains(&id) {
320                    valid.push(Token::Id(id));
321                }
322            }
323        }
324
325        // Can start defining nodes
326        if self.graph.nodes.len() < self.limits.max_nodes {
327            valid.push(Token::NodeStart);
328        }
329
330        // Can go directly to outputs (even with no nodes, though finish() will fail)
331        if self.graph.outputs.len() < self.limits.max_outputs {
332            valid.push(Token::OutputDecl);
333        }
334
335        valid
336    }
337
338    fn valid_in_nodes(&self) -> Vec<Token> {
339        match &self.node_state {
340            None => {
341                // Between nodes
342                let mut valid = Vec::new();
343                if self.graph.nodes.len() < self.limits.max_nodes {
344                    valid.push(Token::NodeStart);
345                }
346                if self.graph.outputs.len() < self.limits.max_outputs
347                    && !self.graph.nodes.is_empty()
348                {
349                    valid.push(Token::OutputDecl);
350                }
351                valid
352            }
353            Some(state) => self.valid_in_node_state(state),
354        }
355    }
356
357    fn valid_in_node_state(&self, state: &NodeState) -> Vec<Token> {
358        match state {
359            NodeState::ExpectId => {
360                // Any ID not yet defined
361                let mut valid = Vec::new();
362                for id in 0..=255u16 {
363                    if !self.defined.contains(&id) {
364                        valid.push(Token::Id(id));
365                    }
366                }
367                valid
368            }
369            NodeState::ExpectOp { .. } => {
370                vec![Token::Or, Token::Nor, Token::Xor]
371            }
372            NodeState::ExpectLeft { .. } | NodeState::ExpectRight { .. } => {
373                // Can reference any defined ID, or use constants
374                let mut valid = Vec::new();
375                for &id in &self.defined {
376                    valid.push(Token::Id(id));
377                }
378                valid.push(Token::True);
379                valid.push(Token::False);
380                valid
381            }
382            NodeState::ExpectEnd { .. } => {
383                vec![Token::NodeEnd]
384            }
385        }
386    }
387
388    fn valid_in_outputs(&self) -> Vec<Token> {
389        let mut valid = Vec::new();
390
391        if self.graph.outputs.len() < self.limits.max_outputs {
392            valid.push(Token::OutputDecl);
393            // Can reference any defined ID as output
394            for &id in &self.defined {
395                valid.push(Token::Id(id));
396            }
397        }
398
399        valid
400    }
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    #[test]
408    fn test_simple_build() {
409        let mut builder = Builder::new();
410
411        // Declare inputs
412        builder.push(Token::InputDecl).unwrap();
413        builder.push(Token::Id(0)).unwrap();
414        builder.push(Token::InputDecl).unwrap();
415        builder.push(Token::Id(1)).unwrap();
416
417        // Define a node: node(2) = input(0) OR input(1)
418        builder.push(Token::NodeStart).unwrap();
419        builder.push(Token::Id(2)).unwrap();
420        builder.push(Token::Or).unwrap();
421        builder.push(Token::Id(0)).unwrap();
422        builder.push(Token::Id(1)).unwrap();
423        builder.push(Token::NodeEnd).unwrap();
424
425        // Declare output
426        builder.push(Token::OutputDecl).unwrap();
427        builder.push(Token::Id(2)).unwrap();
428
429        let graph = builder.finish().unwrap();
430        assert_eq!(graph.inputs, vec![0, 1]);
431        assert_eq!(graph.nodes.len(), 1);
432        assert_eq!(graph.outputs, vec![2]);
433    }
434
435    #[test]
436    fn test_duplicate_definition() {
437        let mut builder = Builder::new();
438        builder.push(Token::InputDecl).unwrap();
439        builder.push(Token::Id(0)).unwrap();
440        builder.push(Token::InputDecl).unwrap();
441        assert_eq!(
442            builder.push(Token::Id(0)),
443            Err(BuildError::DuplicateDefinition(0))
444        );
445    }
446
447    #[test]
448    fn test_undefined_reference() {
449        let mut builder = Builder::new();
450        builder.push(Token::InputDecl).unwrap();
451        builder.push(Token::Id(0)).unwrap();
452        builder.push(Token::NodeStart).unwrap();
453        builder.push(Token::Id(1)).unwrap();
454        builder.push(Token::Or).unwrap();
455        builder.push(Token::Id(0)).unwrap();
456        // Reference to undefined ID 99
457        assert_eq!(
458            builder.push(Token::Id(99)),
459            Err(BuildError::UndefinedReference(99))
460        );
461    }
462
463    #[test]
464    fn test_no_outputs_error() {
465        let mut builder = Builder::new();
466        builder.push(Token::InputDecl).unwrap();
467        builder.push(Token::Id(0)).unwrap();
468        assert_eq!(builder.finish(), Err(BuildError::NoOutputs));
469    }
470
471    #[test]
472    fn test_incomplete_node_error() {
473        let mut builder = Builder::new();
474        builder.push(Token::InputDecl).unwrap();
475        builder.push(Token::Id(0)).unwrap();
476        builder.push(Token::NodeStart).unwrap();
477        builder.push(Token::Id(1)).unwrap();
478        builder.push(Token::Or).unwrap();
479        // Node incomplete
480        assert_eq!(builder.finish(), Err(BuildError::IncompleteNode));
481    }
482
483    #[test]
484    fn test_valid_next_tokens_initial() {
485        let builder = Builder::new();
486        let valid = builder.valid_next_tokens();
487        assert!(valid.contains(&Token::InputDecl));
488        assert!(valid.contains(&Token::NodeStart));
489        assert!(valid.contains(&Token::OutputDecl));
490    }
491
492    #[test]
493    fn test_valid_next_tokens_expect_op() {
494        let mut builder = Builder::new();
495        builder.push(Token::InputDecl).unwrap();
496        builder.push(Token::Id(0)).unwrap();
497        builder.push(Token::NodeStart).unwrap();
498        builder.push(Token::Id(1)).unwrap();
499
500        let valid = builder.valid_next_tokens();
501        assert_eq!(valid, vec![Token::Or, Token::Nor, Token::Xor]);
502    }
503
504    #[test]
505    fn test_constants_as_sources() {
506        let mut builder = Builder::new();
507
508        // No inputs, just use constants
509        builder.push(Token::NodeStart).unwrap();
510        builder.push(Token::Id(0)).unwrap();
511        builder.push(Token::Or).unwrap();
512        builder.push(Token::True).unwrap();
513        builder.push(Token::False).unwrap();
514        builder.push(Token::NodeEnd).unwrap();
515
516        builder.push(Token::OutputDecl).unwrap();
517        builder.push(Token::Id(0)).unwrap();
518
519        let graph = builder.finish().unwrap();
520        assert!(graph.inputs.is_empty());
521        assert_eq!(graph.nodes.len(), 1);
522        assert_eq!(graph.nodes[0].left, Source::True);
523        assert_eq!(graph.nodes[0].right, Source::False);
524    }
525}