torg_core/
builder.rs

1//! State machine builder for TØR-G graph construction.
2//!
3//! The builder consumes tokens one-by-one and constructs the graph,
4//! enforcing all structural constraints. It provides `valid_next_tokens()`
5//! for LLM logit masking.
6
7use std::collections::HashSet;
8
9use crate::error::BuildError;
10use crate::graph::{Graph, Node};
11use crate::limits::Limits;
12use crate::token::{BoolOp, Source, Token};
13
14/// Builder state machine phases.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum Phase {
17    /// Accepting input declarations.
18    Inputs,
19    /// Accepting node definitions or output declarations.
20    Nodes,
21    /// Accepting only output declarations.
22    Outputs,
23    /// Graph complete, no more tokens accepted.
24    Done,
25}
26
27/// State within the Inputs phase.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
29enum InputState {
30    /// Ready for InputDecl, NodeStart, or OutputDecl.
31    #[default]
32    Ready,
33    /// Just saw InputDecl, expecting Id.
34    ExpectId,
35}
36
37/// State within the Outputs phase.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39enum OutputState {
40    /// Ready for OutputDecl or end.
41    #[default]
42    Ready,
43    /// Just saw OutputDecl, expecting Id.
44    ExpectId,
45}
46
47/// State within a node definition (● ... ○).
48#[allow(clippy::enum_variant_names)]
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50enum NodeState {
51    /// Expecting node ID after NodeStart.
52    ExpectId,
53    /// Expecting operator after node ID.
54    ExpectOp { id: u16 },
55    /// Expecting first source operand.
56    ExpectLeft { id: u16, op: BoolOp },
57    /// Expecting second source operand.
58    ExpectRight { id: u16, op: BoolOp, left: Source },
59    /// Expecting NodeEnd.
60    ExpectEnd {
61        id: u16,
62        op: BoolOp,
63        left: Source,
64        right: Source,
65    },
66}
67
68/// Deterministic state machine for graph construction.
69///
70/// The builder enforces all structural constraints:
71/// - No duplicate ID definitions
72/// - No forward references (DAG property)
73/// - Resource limits (max nodes, depth, inputs, outputs)
74#[derive(Debug)]
75pub struct Builder {
76    graph: Graph,
77    phase: Phase,
78    node_state: Option<NodeState>,
79    input_state: InputState,
80    output_state: OutputState,
81    /// All defined IDs (inputs + nodes).
82    defined: HashSet<u16>,
83    limits: Limits,
84}
85
86impl Default for Builder {
87    fn default() -> Self {
88        Self::new()
89    }
90}
91
92impl Builder {
93    /// Create a new builder with default limits.
94    pub fn new() -> Self {
95        Self::with_limits(Limits::default())
96    }
97
98    /// Create a new builder with custom limits.
99    pub fn with_limits(limits: Limits) -> Self {
100        Self {
101            graph: Graph::new(),
102            phase: Phase::Inputs,
103            node_state: None,
104            input_state: InputState::default(),
105            output_state: OutputState::default(),
106            defined: HashSet::new(),
107            limits,
108        }
109    }
110
111    /// Get the current build phase.
112    pub fn phase(&self) -> Phase {
113        self.phase
114    }
115
116    /// Check if the graph is in a completable state.
117    ///
118    /// Returns `true` when:
119    /// - At least one output has been declared
120    /// - No node definition is in progress
121    /// - We're ready for another output (not mid-declaration)
122    ///
123    /// This is useful for LLM decoding where we want to know if
124    /// generation can stop, even though more outputs could be added.
125    pub fn is_completable(&self) -> bool {
126        !self.graph.outputs.is_empty()
127            && self.node_state.is_none()
128            && self.output_state == OutputState::Ready
129    }
130
131    /// Feed a single token to the builder.
132    pub fn push(&mut self, token: Token) -> Result<(), BuildError> {
133        match self.phase {
134            Phase::Done => Err(BuildError::UnexpectedToken),
135            Phase::Inputs => self.handle_inputs(token),
136            Phase::Nodes => self.handle_nodes(token),
137            Phase::Outputs => self.handle_outputs(token),
138        }
139    }
140
141    /// Finalize and return the graph.
142    ///
143    /// # Errors
144    ///
145    /// Returns an error if:
146    /// - A node definition is incomplete
147    /// - No outputs have been declared
148    pub fn finish(self) -> Result<Graph, BuildError> {
149        if self.node_state.is_some() {
150            return Err(BuildError::IncompleteNode);
151        }
152        if self.graph.outputs.is_empty() {
153            return Err(BuildError::NoOutputs);
154        }
155        Ok(self.graph)
156    }
157
158    /// Handle tokens in the Inputs phase.
159    fn handle_inputs(&mut self, token: Token) -> Result<(), BuildError> {
160        match self.input_state {
161            InputState::Ready => {
162                // Expecting InputDecl, NodeStart, or OutputDecl
163                match token {
164                    Token::InputDecl => {
165                        // Check limits before accepting
166                        if self.graph.inputs.len() >= self.limits.max_inputs {
167                            return Err(BuildError::MaxInputsExceeded(self.limits.max_inputs));
168                        }
169                        self.input_state = InputState::ExpectId;
170                        Ok(())
171                    }
172                    Token::NodeStart => {
173                        // Check limits
174                        if self.graph.nodes.len() >= self.limits.max_nodes {
175                            return Err(BuildError::MaxNodesExceeded(self.limits.max_nodes));
176                        }
177                        // Transition to Nodes phase
178                        self.phase = Phase::Nodes;
179                        self.node_state = Some(NodeState::ExpectId);
180                        Ok(())
181                    }
182                    Token::OutputDecl => {
183                        // Skip directly to Outputs phase (empty nodes section)
184                        self.phase = Phase::Outputs;
185                        self.output_state = OutputState::ExpectId;
186                        Ok(())
187                    }
188                    _ => Err(BuildError::UnexpectedToken),
189                }
190            }
191            InputState::ExpectId => {
192                // Expecting an Id after InputDecl
193                if let Token::Id(id) = token {
194                    // Check for duplicate
195                    if self.defined.contains(&id) {
196                        return Err(BuildError::DuplicateDefinition(id));
197                    }
198                    self.defined.insert(id);
199                    self.graph.inputs.push(id);
200                    self.input_state = InputState::Ready;
201                    Ok(())
202                } else {
203                    Err(BuildError::UnexpectedToken)
204                }
205            }
206        }
207    }
208
209    /// Handle tokens in the Nodes phase.
210    fn handle_nodes(&mut self, token: Token) -> Result<(), BuildError> {
211        match self.node_state {
212            None => {
213                // Between nodes, expecting NodeStart or OutputDecl
214                match token {
215                    Token::NodeStart => {
216                        // Check limits
217                        if self.graph.nodes.len() >= self.limits.max_nodes {
218                            return Err(BuildError::MaxNodesExceeded(self.limits.max_nodes));
219                        }
220                        self.node_state = Some(NodeState::ExpectId);
221                        Ok(())
222                    }
223                    Token::OutputDecl => {
224                        // Check limits before accepting
225                        if self.graph.outputs.len() >= self.limits.max_outputs {
226                            return Err(BuildError::MaxOutputsExceeded(self.limits.max_outputs));
227                        }
228                        self.phase = Phase::Outputs;
229                        self.output_state = OutputState::ExpectId;
230                        Ok(())
231                    }
232                    _ => Err(BuildError::UnexpectedToken),
233                }
234            }
235            Some(state) => self.handle_node_state(state, token),
236        }
237    }
238
239    /// Handle tokens within a node definition.
240    fn handle_node_state(&mut self, state: NodeState, token: Token) -> Result<(), BuildError> {
241        match state {
242            NodeState::ExpectId => {
243                if let Token::Id(id) = token {
244                    // Check for duplicate
245                    if self.defined.contains(&id) {
246                        return Err(BuildError::DuplicateDefinition(id));
247                    }
248                    self.node_state = Some(NodeState::ExpectOp { id });
249                    Ok(())
250                } else {
251                    Err(BuildError::UnexpectedToken)
252                }
253            }
254            NodeState::ExpectOp { id } => {
255                if let Some(op) = token.as_bool_op() {
256                    self.node_state = Some(NodeState::ExpectLeft { id, op });
257                    Ok(())
258                } else {
259                    Err(BuildError::UnexpectedToken)
260                }
261            }
262            NodeState::ExpectLeft { id, op } => {
263                if let Some(source) = token.as_source() {
264                    // Validate source reference
265                    if let Source::Id(ref_id) = source {
266                        if !self.defined.contains(&ref_id) {
267                            return Err(BuildError::UndefinedReference(ref_id));
268                        }
269                    }
270                    self.node_state = Some(NodeState::ExpectRight {
271                        id,
272                        op,
273                        left: source,
274                    });
275                    Ok(())
276                } else {
277                    Err(BuildError::UnexpectedToken)
278                }
279            }
280            NodeState::ExpectRight { id, op, left } => {
281                if let Some(source) = token.as_source() {
282                    // Validate source reference
283                    if let Source::Id(ref_id) = source {
284                        if !self.defined.contains(&ref_id) {
285                            return Err(BuildError::UndefinedReference(ref_id));
286                        }
287                    }
288                    self.node_state = Some(NodeState::ExpectEnd {
289                        id,
290                        op,
291                        left,
292                        right: source,
293                    });
294                    Ok(())
295                } else {
296                    Err(BuildError::UnexpectedToken)
297                }
298            }
299            NodeState::ExpectEnd {
300                id,
301                op,
302                left,
303                right,
304            } => {
305                if token == Token::NodeEnd {
306                    // Commit the node
307                    let node = Node::new(id, op, left, right);
308                    self.defined.insert(id);
309                    self.graph.nodes.push(node);
310
311                    // Check depth limit
312                    let depth = self.graph.depth();
313                    if depth > self.limits.max_depth {
314                        return Err(BuildError::MaxDepthExceeded(self.limits.max_depth));
315                    }
316
317                    self.node_state = None;
318                    Ok(())
319                } else {
320                    Err(BuildError::UnexpectedToken)
321                }
322            }
323        }
324    }
325
326    /// Handle tokens in the Outputs phase.
327    fn handle_outputs(&mut self, token: Token) -> Result<(), BuildError> {
328        match self.output_state {
329            OutputState::Ready => {
330                // Expecting OutputDecl
331                match token {
332                    Token::OutputDecl => {
333                        // Check limits before accepting
334                        if self.graph.outputs.len() >= self.limits.max_outputs {
335                            return Err(BuildError::MaxOutputsExceeded(self.limits.max_outputs));
336                        }
337                        self.output_state = OutputState::ExpectId;
338                        Ok(())
339                    }
340                    _ => {
341                        self.phase = Phase::Done;
342                        Err(BuildError::UnexpectedToken)
343                    }
344                }
345            }
346            OutputState::ExpectId => {
347                // Expecting an Id after OutputDecl
348                if let Token::Id(id) = token {
349                    // Output must reference a defined ID
350                    if !self.defined.contains(&id) {
351                        return Err(BuildError::UndefinedReference(id));
352                    }
353                    self.graph.outputs.push(id);
354                    self.output_state = OutputState::Ready;
355                    Ok(())
356                } else {
357                    Err(BuildError::UnexpectedToken)
358                }
359            }
360        }
361    }
362
363    /// Returns the set of valid next tokens for LLM logit masking.
364    ///
365    /// This is the critical interface for constraining LLM output.
366    /// By masking all logits except those corresponding to valid tokens,
367    /// the LLM is guaranteed to produce a syntactically correct circuit.
368    pub fn valid_next_tokens(&self) -> Vec<Token> {
369        match self.phase {
370            Phase::Done => vec![],
371            Phase::Inputs => self.valid_in_inputs(),
372            Phase::Nodes => self.valid_in_nodes(),
373            Phase::Outputs => self.valid_in_outputs(),
374        }
375    }
376
377    fn valid_in_inputs(&self) -> Vec<Token> {
378        match self.input_state {
379            InputState::Ready => {
380                // Ready for structural tokens, NOT Id
381                let mut valid = Vec::new();
382
383                // Can declare more inputs (up to limit)
384                if self.graph.inputs.len() < self.limits.max_inputs {
385                    valid.push(Token::InputDecl);
386                }
387
388                // Can start defining nodes
389                if self.graph.nodes.len() < self.limits.max_nodes {
390                    valid.push(Token::NodeStart);
391                }
392
393                // Can go directly to outputs
394                if self.graph.outputs.len() < self.limits.max_outputs {
395                    valid.push(Token::OutputDecl);
396                }
397
398                valid
399            }
400            InputState::ExpectId => {
401                // After InputDecl, only Id tokens are valid
402                let mut valid = Vec::new();
403                for id in 0..=255u16 {
404                    if !self.defined.contains(&id) {
405                        valid.push(Token::Id(id));
406                    }
407                }
408                valid
409            }
410        }
411    }
412
413    fn valid_in_nodes(&self) -> Vec<Token> {
414        match &self.node_state {
415            None => {
416                // Between nodes
417                let mut valid = Vec::new();
418                if self.graph.nodes.len() < self.limits.max_nodes {
419                    valid.push(Token::NodeStart);
420                }
421                if self.graph.outputs.len() < self.limits.max_outputs
422                    && !self.graph.nodes.is_empty()
423                {
424                    valid.push(Token::OutputDecl);
425                }
426                valid
427            }
428            Some(state) => self.valid_in_node_state(state),
429        }
430    }
431
432    fn valid_in_node_state(&self, state: &NodeState) -> Vec<Token> {
433        match state {
434            NodeState::ExpectId => {
435                // Any ID not yet defined
436                let mut valid = Vec::new();
437                for id in 0..=255u16 {
438                    if !self.defined.contains(&id) {
439                        valid.push(Token::Id(id));
440                    }
441                }
442                valid
443            }
444            NodeState::ExpectOp { .. } => {
445                vec![Token::Or, Token::Nor, Token::Xor]
446            }
447            NodeState::ExpectLeft { .. } | NodeState::ExpectRight { .. } => {
448                // Can reference any defined ID, or use constants
449                let mut valid = Vec::new();
450                for &id in &self.defined {
451                    valid.push(Token::Id(id));
452                }
453                valid.push(Token::True);
454                valid.push(Token::False);
455                valid
456            }
457            NodeState::ExpectEnd { .. } => {
458                vec![Token::NodeEnd]
459            }
460        }
461    }
462
463    fn valid_in_outputs(&self) -> Vec<Token> {
464        match self.output_state {
465            OutputState::Ready => {
466                // Ready for OutputDecl only (or end of graph)
467                let mut valid = Vec::new();
468                if self.graph.outputs.len() < self.limits.max_outputs {
469                    valid.push(Token::OutputDecl);
470                }
471                valid
472            }
473            OutputState::ExpectId => {
474                // After OutputDecl, only defined IDs are valid
475                let mut valid = Vec::new();
476                for &id in &self.defined {
477                    valid.push(Token::Id(id));
478                }
479                valid
480            }
481        }
482    }
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    #[test]
490    fn test_simple_build() {
491        let mut builder = Builder::new();
492
493        // Declare inputs
494        builder.push(Token::InputDecl).unwrap();
495        builder.push(Token::Id(0)).unwrap();
496        builder.push(Token::InputDecl).unwrap();
497        builder.push(Token::Id(1)).unwrap();
498
499        // Define a node: node(2) = input(0) OR input(1)
500        builder.push(Token::NodeStart).unwrap();
501        builder.push(Token::Id(2)).unwrap();
502        builder.push(Token::Or).unwrap();
503        builder.push(Token::Id(0)).unwrap();
504        builder.push(Token::Id(1)).unwrap();
505        builder.push(Token::NodeEnd).unwrap();
506
507        // Declare output
508        builder.push(Token::OutputDecl).unwrap();
509        builder.push(Token::Id(2)).unwrap();
510
511        let graph = builder.finish().unwrap();
512        assert_eq!(graph.inputs, vec![0, 1]);
513        assert_eq!(graph.nodes.len(), 1);
514        assert_eq!(graph.outputs, vec![2]);
515    }
516
517    #[test]
518    fn test_duplicate_definition() {
519        let mut builder = Builder::new();
520        builder.push(Token::InputDecl).unwrap();
521        builder.push(Token::Id(0)).unwrap();
522        builder.push(Token::InputDecl).unwrap();
523        assert_eq!(
524            builder.push(Token::Id(0)),
525            Err(BuildError::DuplicateDefinition(0))
526        );
527    }
528
529    #[test]
530    fn test_undefined_reference() {
531        let mut builder = Builder::new();
532        builder.push(Token::InputDecl).unwrap();
533        builder.push(Token::Id(0)).unwrap();
534        builder.push(Token::NodeStart).unwrap();
535        builder.push(Token::Id(1)).unwrap();
536        builder.push(Token::Or).unwrap();
537        builder.push(Token::Id(0)).unwrap();
538        // Reference to undefined ID 99
539        assert_eq!(
540            builder.push(Token::Id(99)),
541            Err(BuildError::UndefinedReference(99))
542        );
543    }
544
545    #[test]
546    fn test_no_outputs_error() {
547        let mut builder = Builder::new();
548        builder.push(Token::InputDecl).unwrap();
549        builder.push(Token::Id(0)).unwrap();
550        assert_eq!(builder.finish(), Err(BuildError::NoOutputs));
551    }
552
553    #[test]
554    fn test_incomplete_node_error() {
555        let mut builder = Builder::new();
556        builder.push(Token::InputDecl).unwrap();
557        builder.push(Token::Id(0)).unwrap();
558        builder.push(Token::NodeStart).unwrap();
559        builder.push(Token::Id(1)).unwrap();
560        builder.push(Token::Or).unwrap();
561        // Node incomplete
562        assert_eq!(builder.finish(), Err(BuildError::IncompleteNode));
563    }
564
565    #[test]
566    fn test_valid_next_tokens_initial() {
567        let builder = Builder::new();
568        let valid = builder.valid_next_tokens();
569        assert!(valid.contains(&Token::InputDecl));
570        assert!(valid.contains(&Token::NodeStart));
571        assert!(valid.contains(&Token::OutputDecl));
572    }
573
574    #[test]
575    fn test_valid_next_tokens_expect_op() {
576        let mut builder = Builder::new();
577        builder.push(Token::InputDecl).unwrap();
578        builder.push(Token::Id(0)).unwrap();
579        builder.push(Token::NodeStart).unwrap();
580        builder.push(Token::Id(1)).unwrap();
581
582        let valid = builder.valid_next_tokens();
583        assert_eq!(valid, vec![Token::Or, Token::Nor, Token::Xor]);
584    }
585
586    #[test]
587    fn test_constants_as_sources() {
588        let mut builder = Builder::new();
589
590        // No inputs, just use constants
591        builder.push(Token::NodeStart).unwrap();
592        builder.push(Token::Id(0)).unwrap();
593        builder.push(Token::Or).unwrap();
594        builder.push(Token::True).unwrap();
595        builder.push(Token::False).unwrap();
596        builder.push(Token::NodeEnd).unwrap();
597
598        builder.push(Token::OutputDecl).unwrap();
599        builder.push(Token::Id(0)).unwrap();
600
601        let graph = builder.finish().unwrap();
602        assert!(graph.inputs.is_empty());
603        assert_eq!(graph.nodes.len(), 1);
604        assert_eq!(graph.nodes[0].left, Source::True);
605        assert_eq!(graph.nodes[0].right, Source::False);
606    }
607}