substrait_explain/parser/
structural.rs

1//! Parser for the structural part of the Substrait file format.
2//!
3//! This is the overall parser for parsing the text format. It is responsible
4//! for tracking which section of the file we are currently parsing, and parsing
5//! each line separately.
6
7use std::fmt;
8
9use substrait::proto::rel::RelType;
10use substrait::proto::{
11    AggregateRel, FilterRel, Plan, PlanRel, ProjectRel, ReadRel, Rel, RelRoot, plan_rel,
12};
13use thiserror::Error;
14
15use crate::extensions::{SimpleExtensions, simple};
16use crate::parser::common::{MessageParseError, ParsePair};
17use crate::parser::expressions::Name;
18use crate::parser::extensions::{ExtensionParseError, ExtensionParser};
19use crate::parser::{ErrorKind, ExpressionParser, RelationParsePair, Rule, unwrap_single_pair};
20
21pub const PLAN_HEADER: &str = "=== Plan";
22
23/// Represents an input line, trimmed of leading two-space indents and final
24/// whitespace. Contains the number of indents and the trimmed line.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct IndentedLine<'a>(pub usize, pub &'a str);
27
28impl<'a> From<&'a str> for IndentedLine<'a> {
29    fn from(line: &'a str) -> Self {
30        let line = line.trim_end();
31        let mut spaces = 0;
32        for c in line.chars() {
33            if c == ' ' {
34                spaces += 1;
35            } else {
36                break;
37            }
38        }
39
40        let indents = spaces / 2;
41
42        let (_, trimmed) = line.split_at(indents * 2);
43
44        IndentedLine(indents, trimmed)
45    }
46}
47
48#[derive(Debug, Clone, Error)]
49pub enum ParseError {
50    #[error("Error parsing extension on line {0}: {1}")]
51    Extension(ParseContext, #[source] ExtensionParseError),
52    #[error("Error parsing plan on {0}: {1}")]
53    Plan(ParseContext, #[source] MessageParseError),
54    #[error("Error parsing section header on line {0}: {1}")]
55    Initial(ParseContext, #[source] MessageParseError),
56    #[error("Error parsing relation: {0}")]
57    Relation(ParseContext, #[source] MessageParseError),
58}
59
60/// Represents a line in the [`Plan`] tree structure before it's converted to a
61/// relation. This allows us to build the tree structure first, then convert to
62/// relations with proper parent-child relationships.
63#[derive(Debug, Clone)]
64pub struct LineNode<'a> {
65    pub pair: pest::iterators::Pair<'a, Rule>,
66    pub line_no: i64,
67    pub children: Vec<LineNode<'a>>,
68}
69
70impl<'a> LineNode<'a> {
71    pub fn context(&self) -> ParseContext {
72        ParseContext {
73            line_no: self.line_no,
74            line: self.pair.as_str().to_string(),
75        }
76    }
77
78    pub fn parse(line: &'a str, line_no: i64) -> Result<Self, ParseError> {
79        // Parse the line immediately to catch syntax errors
80        let mut pairs: pest::iterators::Pairs<'a, Rule> =
81            <ExpressionParser as pest::Parser<Rule>>::parse(Rule::relation, line).map_err(|e| {
82                ParseError::Plan(
83                    ParseContext {
84                        line_no,
85                        line: line.to_string(),
86                    },
87                    MessageParseError::new("relation", ErrorKind::InvalidValue, Box::new(e)),
88                )
89            })?;
90
91        let pair = pairs.next().unwrap();
92        assert!(pairs.next().is_none()); // Should be exactly one pair
93
94        Ok(Self {
95            pair,
96            line_no,
97            children: Vec::new(),
98        })
99    }
100
101    /// Parse the root relation of a plan, at depth 0.
102    pub fn parse_root(line: &'a str, line_no: i64) -> Result<Self, ParseError> {
103        // Parse the line as a top-level relation (either root_relation or regular relation)
104        let mut pairs: pest::iterators::Pairs<'a, Rule> =
105            <ExpressionParser as pest::Parser<Rule>>::parse(Rule::top_level_relation, line)
106                .map_err(|e| {
107                    ParseError::Plan(
108                        ParseContext::new(line_no, line.to_string()),
109                        MessageParseError::new(
110                            "top_level_relation",
111                            crate::parser::ErrorKind::Syntax,
112                            Box::new(e),
113                        ),
114                    )
115                })?;
116
117        let pair = pairs.next().unwrap();
118        assert!(pairs.next().is_none());
119
120        // Get the inner pair, which is either a root relation or a regular relation
121        let inner_pair = unwrap_single_pair(pair);
122
123        Ok(Self {
124            pair: inner_pair,
125            line_no,
126            children: Vec::new(),
127        })
128    }
129}
130
131/// Helper function to get the number of input fields from a relation.
132/// This is needed for Project relations to calculate output mapping indices.
133fn get_input_field_count(rel: &Rel) -> usize {
134    match &rel.rel_type {
135        Some(RelType::Read(read_rel)) => {
136            // For Read relations, count the fields in the base schema
137            read_rel
138                .base_schema
139                .as_ref()
140                .and_then(|schema| schema.r#struct.as_ref())
141                .map(|struct_| struct_.types.len())
142                .unwrap_or(0)
143        }
144        Some(RelType::Filter(filter_rel)) => {
145            // For Filter relations, get the count from the input
146            filter_rel
147                .input
148                .as_ref()
149                .map(|input| get_input_field_count(input))
150                .unwrap_or(0)
151        }
152        Some(RelType::Project(project_rel)) => {
153            // For Project relations, get the count from the input
154            project_rel
155                .input
156                .as_ref()
157                .map(|input| get_input_field_count(input))
158                .unwrap_or(0)
159        }
160        _ => 0,
161    }
162}
163
164#[derive(Copy, Clone, Debug)]
165pub enum State {
166    // The initial state, before we have parsed any lines.
167    Initial,
168    // The extensions section, after parsing the header and any other Extension lines.
169    Extensions,
170    // The plan section, after parsing the header and any other Plan lines.
171    Plan,
172}
173
174impl fmt::Display for State {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        write!(f, "{self:?}")
177    }
178}
179
180#[derive(Debug, Clone)]
181pub struct ParseContext {
182    pub line_no: i64,
183    pub line: String,
184}
185
186impl ParseContext {
187    pub fn new(line_no: i64, line: String) -> Self {
188        Self { line_no, line }
189    }
190}
191
192impl fmt::Display for ParseContext {
193    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194        write!(f, "line {} ('{}')", self.line_no, self.line)
195    }
196}
197
198// An in-progress tree builder, building the tree of relations.
199#[derive(Debug, Clone, Default)]
200pub struct TreeBuilder<'a> {
201    // Current tree of nodes being built. These have been successfully parsed
202    // into Pest pairs, but have not yet been converted to substrait plans.
203    current: Option<LineNode<'a>>,
204    // Completed trees that have been built.
205    completed: Vec<LineNode<'a>>,
206}
207
208impl<'a> TreeBuilder<'a> {
209    /// Traverse down the tree, always taking the last child at each level, until reaching the specified depth.
210    pub fn get_at_depth(&mut self, depth: usize) -> Option<&mut LineNode<'a>> {
211        let mut node = self.current.as_mut()?;
212        for _ in 0..depth {
213            node = node.children.last_mut()?;
214        }
215        Some(node)
216    }
217
218    pub fn add_line(&mut self, depth: usize, node: LineNode<'a>) -> Result<(), ParseError> {
219        if depth == 0 {
220            if let Some(prev) = self.current.take() {
221                self.completed.push(prev)
222            }
223            self.current = Some(node);
224            return Ok(());
225        }
226
227        let parent = match self.get_at_depth(depth - 1) {
228            None => {
229                return Err(ParseError::Plan(
230                    node.context(),
231                    MessageParseError::invalid(
232                        "relation",
233                        node.pair.as_span(),
234                        format!("No parent found for depth {depth}"),
235                    ),
236                ));
237            }
238            Some(parent) => parent,
239        };
240
241        parent.children.push(node.clone());
242        Ok(())
243    }
244
245    /// End of input - move any remaining nodes from stack to completed and
246    /// return any trees in progress. Resets the builder to its initial state
247    /// (empty)
248    pub fn finish(&mut self) -> Vec<LineNode<'a>> {
249        // Move any remaining nodes from stack to completed
250        if let Some(node) = self.current.take() {
251            self.completed.push(node);
252        }
253        std::mem::take(&mut self.completed)
254    }
255}
256
257// Relation parsing component - handles converting LineNodes to Relations
258#[derive(Debug, Clone, Default)]
259pub struct RelationParser<'a> {
260    tree: TreeBuilder<'a>,
261}
262
263impl<'a> RelationParser<'a> {
264    pub fn parse_line(&mut self, line: IndentedLine<'a>, line_no: i64) -> Result<(), ParseError> {
265        let IndentedLine(depth, line) = line;
266
267        // Use parse_root for depth 0 (top-level relations), parse for other depths
268        let node = if depth == 0 {
269            LineNode::parse_root(line, line_no)?
270        } else {
271            LineNode::parse(line, line_no)?
272        };
273
274        self.tree.add_line(depth, node)
275    }
276
277    /// Parse a relation from a Pest pair of rule 'relation' into a Substrait
278    /// Rel.
279    //
280    // Clippy says a Vec<Box<…>> is unnecessary, as the Vec is already on the
281    // heap, but this is what the protobuf requires so we allow it here
282    #[allow(clippy::vec_box)]
283    fn parse_relation(
284        &self,
285        extensions: &SimpleExtensions,
286        line_no: i64,
287        pair: pest::iterators::Pair<Rule>,
288        child_relations: Vec<Box<substrait::proto::Rel>>,
289        input_field_count: usize,
290    ) -> Result<substrait::proto::Rel, ParseError> {
291        assert_eq!(pair.as_rule(), Rule::relation);
292        let p = unwrap_single_pair(pair);
293
294        let (e, l, p, c, ic) = (extensions, line_no, p, child_relations, input_field_count);
295
296        match p.as_rule() {
297            Rule::read_relation => self.parse_rel::<ReadRel>(e, l, p, c, ic),
298            Rule::filter_relation => self.parse_rel::<FilterRel>(e, l, p, c, ic),
299            Rule::project_relation => self.parse_rel::<ProjectRel>(e, l, p, c, ic),
300            Rule::aggregate_relation => self.parse_rel::<AggregateRel>(e, l, p, c, ic),
301            _ => todo!(),
302        }
303    }
304
305    /// Parse a specific relation type from a Pest pair of matching rule into a
306    /// Substrait Rel.
307    //
308    // Clippy says a Vec<Box<…>> is unnecessary, as the Vec is already on the
309    // heap, but this is what the protobuf requires so we allow it here
310    #[allow(clippy::vec_box)]
311    fn parse_rel<T: RelationParsePair>(
312        &self,
313        extensions: &SimpleExtensions,
314        line_no: i64,
315        pair: pest::iterators::Pair<Rule>,
316        child_relations: Vec<Box<substrait::proto::Rel>>,
317        input_field_count: usize,
318    ) -> Result<substrait::proto::Rel, ParseError> {
319        assert_eq!(pair.as_rule(), T::rule());
320
321        let line = pair.as_str();
322        let rel_type =
323            T::parse_pair_with_context(extensions, pair, child_relations, input_field_count);
324
325        match rel_type {
326            Ok(rel) => Ok(rel.into_rel()),
327            Err(e) => Err(ParseError::Plan(
328                ParseContext::new(line_no, line.to_string()),
329                e,
330            )),
331        }
332    }
333
334    /// Convert a given LineNode into a Substrait Rel. Also recursively builds children.
335    fn build_rel(
336        &self,
337        extensions: &SimpleExtensions,
338        node: LineNode,
339    ) -> Result<substrait::proto::Rel, ParseError> {
340        // Parse children first to get their output schemas
341        let child_relations = node
342            .children
343            .into_iter()
344            .map(|c| self.build_rel(extensions, c).map(Box::new))
345            .collect::<Result<Vec<Box<Rel>>, ParseError>>()?;
346
347        // Get the input field count from all the children
348        let input_field_count = child_relations
349            .iter()
350            .map(|r| get_input_field_count(r.as_ref()))
351            .reduce(|a, b| a + b)
352            .unwrap_or(0);
353
354        // Parse this node using the stored pair
355        self.parse_relation(
356            extensions,
357            node.line_no,
358            node.pair,
359            child_relations,
360            input_field_count,
361        )
362    }
363
364    /// Build a tree of relations from a LineNode, with the root in the form of
365    /// a PlanRel - the root type in a Substrait Plan.
366    fn build_plan_rel(
367        &self,
368        extensions: &SimpleExtensions,
369        mut node: LineNode,
370    ) -> Result<PlanRel, ParseError> {
371        // Plain relations are allowed as root relations, they just don't have names.
372        if node.pair.as_rule() == Rule::relation {
373            let rel = self.build_rel(extensions, node)?;
374            return Ok(PlanRel {
375                rel_type: Some(plan_rel::RelType::Rel(rel)),
376            });
377        }
378
379        // Otherwise, it must be a root relation.
380        assert_eq!(node.pair.as_rule(), Rule::root_relation);
381        let context = node.context();
382        let span = node.pair.as_span();
383
384        // Parse the column names
385        let column_names_pair = unwrap_single_pair(node.pair);
386        assert_eq!(column_names_pair.as_rule(), Rule::root_name_list);
387
388        let names: Vec<String> = column_names_pair
389            .into_inner()
390            .map(|name_pair| {
391                assert_eq!(name_pair.as_rule(), Rule::name);
392                Name::parse_pair(name_pair).0
393            })
394            .collect();
395
396        let child = match node.children.len() {
397            1 => self.build_rel(extensions, node.children.pop().unwrap())?,
398            n => {
399                return Err(ParseError::Plan(
400                    context,
401                    MessageParseError::invalid(
402                        "root_relation",
403                        span,
404                        format!("Root relation must have exactly one child, found {n}"),
405                    ),
406                ));
407            }
408        };
409
410        let rel_root = RelRoot {
411            names,
412            input: Some(child),
413        };
414
415        Ok(PlanRel {
416            rel_type: Some(plan_rel::RelType::Root(rel_root)),
417        })
418    }
419
420    /// Build all the trees we have into `PlanRel`s.
421    fn build(mut self, extensions: &SimpleExtensions) -> Result<Vec<PlanRel>, ParseError> {
422        let nodes = self.tree.finish();
423        nodes
424            .into_iter()
425            .map(|n| self.build_plan_rel(extensions, n))
426            .collect::<Result<Vec<PlanRel>, ParseError>>()
427    }
428}
429
430/// A parser for Substrait query plans in text format.
431///
432/// The `Parser` converts human-readable Substrait text format into Substrait
433/// protobuf plans. It handles both the extensions section (which defines
434/// functions, types, etc.) and the plan section (which defines the actual query
435/// structure).
436///
437/// ## Usage
438///
439/// The simplest entry point is the static `parse()` method:
440///
441/// ```rust
442/// use substrait_explain::parser::Parser;
443///
444/// let plan_text = r#"
445/// === Plan
446/// Root[c, d]
447///   Project[$1, 42]
448///     Read[schema.table => a:i64, b:string?]
449/// "#;
450///
451/// let plan = Parser::parse(plan_text).unwrap();
452/// ```
453///
454/// ## Input Format
455///
456/// The parser expects input in the following format:
457///
458/// ```text
459/// === Extensions
460/// URIs:
461///   @  1: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml
462/// Functions:
463///   # 10 @  1: add
464/// === Plan
465/// Root[columns]
466///   Relation[arguments => columns]
467///     ChildRelation[arguments => columns]
468/// ```
469///
470/// - **Extensions section** (optional): Defines URIs and function/type declarations
471/// - **Plan section** (required): Defines the query structure with indented relations
472///
473/// ## Error Handling
474///
475/// The parser provides detailed error information including:
476/// - Line number where the error occurred
477/// - The actual line content that failed to parse
478/// - Specific error type and description
479///
480/// ```rust
481/// use substrait_explain::parser::Parser;
482///
483/// let invalid_plan = r#"
484/// === Plan
485/// InvalidRelation[invalid syntax]
486/// "#;
487///
488/// match Parser::parse(invalid_plan) {
489///     Ok(plan) => println!("Successfully parsed"),
490///     Err(e) => eprintln!("Parse error: {}", e),
491/// }
492/// ```
493///
494/// ## Supported Relations
495///
496/// The parser supports all standard Substrait relations:
497/// - `Read[table => columns]` - Read from a table
498/// - `Project[expressions]` - Project columns/expressions
499/// - `Filter[condition => columns]` - Filter rows
500/// - `Root[columns]` - Root relation with output columns
501/// - And more...
502///
503/// ## Extensions Support
504///
505/// The parser fully supports Substrait Simple Extensions, allowing you to:
506/// - Define custom functions with URIs and anchors
507/// - Reference functions by name in expressions
508/// - Use custom types and type variations
509///
510/// ```rust
511/// use substrait_explain::parser::Parser;
512///
513/// let plan_with_extensions = r#"
514/// === Extensions
515/// URIs:
516///   @  1: https://example.com/functions.yaml
517/// Functions:
518///   ## 10 @  1: my_custom_function
519/// === Plan
520/// Root[result]
521///   Project[my_custom_function($0, $1)]
522///     Read[table => col1:i32, col2:i32]
523/// "#;
524///
525/// let plan = Parser::parse(plan_with_extensions).unwrap();
526/// ```
527///
528/// ## Performance
529///
530/// The parser is designed for efficiency:
531/// - Single-pass parsing with minimal allocations
532/// - Early error detection and reporting
533/// - Memory-efficient tree building
534///
535/// ## Thread Safety
536///
537/// `Parser` instances are not thread-safe and should not be shared between threads.
538/// However, the static `parse()` method is safe to call from multiple threads.
539#[derive(Debug)]
540pub struct Parser<'a> {
541    line_no: i64,
542    state: State,
543    extension_parser: ExtensionParser,
544    relation_parser: RelationParser<'a>,
545}
546impl<'a> Default for Parser<'a> {
547    fn default() -> Self {
548        Self {
549            line_no: 1,
550            state: State::Initial,
551            extension_parser: ExtensionParser::default(),
552            relation_parser: RelationParser::default(),
553        }
554    }
555}
556
557impl<'a> Parser<'a> {
558    /// Parse a Substrait plan from text format.
559    ///
560    /// This is the main entry point for parsing well-formed plans.
561    /// Returns a clear error if parsing fails.
562    ///
563    /// The input should be in the Substrait text format, which consists of:
564    /// - An optional extensions section starting with "=== Extensions"
565    /// - A plan section starting with "=== Plan"
566    /// - Indented relation definitions
567    ///
568    /// # Example
569    /// ```rust
570    /// use substrait_explain::parser::Parser;
571    ///
572    /// let plan_text = r#"
573    /// === Plan
574    /// Root[c, d]
575    ///   Project[$1, 42]
576    ///     Read[schema.table => a:i64, b:string?]
577    /// "#;
578    ///
579    /// let plan = Parser::parse(plan_text).unwrap();
580    /// ```
581    ///
582    /// # Errors
583    ///
584    /// Returns a [`ParseError`] if the input cannot be parsed. The error includes
585    /// the line number and content where parsing failed, along with a description
586    /// of what went wrong.
587    pub fn parse(input: &'a str) -> Result<Plan, ParseError> {
588        let mut parser = Self::default();
589
590        for line in input.lines() {
591            if line.trim().is_empty() {
592                parser.line_no += 1;
593                continue;
594            }
595
596            parser.parse_line(line)?;
597            parser.line_no += 1;
598        }
599
600        parser.build_plan()
601    }
602
603    /// Parse a single line of input, updating the parser state.
604    fn parse_line(&mut self, line: &'a str) -> Result<(), ParseError> {
605        let indented_line = IndentedLine::from(line);
606        let line_no = self.line_no;
607        let ctx = || ParseContext {
608            line_no,
609            line: line.to_string(),
610        };
611
612        match self.state {
613            State::Initial => self.parse_initial(indented_line),
614            State::Extensions => self
615                .parse_extensions(indented_line)
616                .map_err(|e| ParseError::Extension(ctx(), e)),
617            State::Plan => self.parse_plan_line(indented_line),
618        }
619    }
620
621    /// Parse the initial line(s) of the input, which is either a blank line or
622    /// the extensions or plan header.
623    fn parse_initial(&mut self, line: IndentedLine) -> Result<(), ParseError> {
624        match line {
625            IndentedLine(0, l) if l.trim().is_empty() => {}
626            IndentedLine(0, simple::EXTENSIONS_HEADER) => {
627                self.state = State::Extensions;
628            }
629            IndentedLine(0, PLAN_HEADER) => {
630                self.state = State::Plan;
631            }
632            IndentedLine(n, l) => {
633                return Err(ParseError::Initial(
634                    ParseContext::new(n as i64, l.to_string()),
635                    MessageParseError::invalid(
636                        "initial",
637                        pest::Span::new(l, 0, l.len()).expect("Invalid span?!"),
638                        format!("Unknown initial line: {l:?}"),
639                    ),
640                ));
641            }
642        }
643        if line.1.trim().is_empty() {
644            // Blank line - do nothing
645            return Ok(());
646        }
647
648        if line == IndentedLine(0, simple::EXTENSIONS_HEADER) {
649            self.state = State::Extensions;
650            return Ok(());
651        }
652        if line == IndentedLine(0, PLAN_HEADER) {
653            self.state = State::Plan;
654            return Ok(());
655        }
656        todo!()
657    }
658
659    /// Parse a single line from the extensions section of the input, updating
660    /// the parser state.
661    fn parse_extensions(&mut self, line: IndentedLine<'_>) -> Result<(), ExtensionParseError> {
662        if line == IndentedLine(0, PLAN_HEADER) {
663            self.state = State::Plan;
664            return Ok(());
665        }
666        self.extension_parser.parse_line(line)
667    }
668
669    /// Parse a single line from the plan section of the input, updating the
670    /// parser state.
671    fn parse_plan_line(&mut self, line: IndentedLine<'a>) -> Result<(), ParseError> {
672        self.relation_parser.parse_line(line, self.line_no)
673    }
674
675    /// Build the plan from the parser state.
676    fn build_plan(self) -> Result<Plan, ParseError> {
677        let Parser {
678            relation_parser,
679            extension_parser,
680            ..
681        } = self;
682
683        let extensions = extension_parser.extensions();
684
685        // Parse the tree into relations
686        let root_relations = relation_parser.build(extensions)?;
687
688        // Build the final plan
689        Ok(Plan {
690            extension_uris: extensions.to_extension_uris(),
691            extensions: extensions.to_extension_declarations(),
692            relations: root_relations,
693            ..Default::default()
694        })
695    }
696}
697
698#[cfg(test)]
699mod tests {
700    use substrait::proto::extensions::simple_extension_declaration::MappingType;
701
702    use super::*;
703    use crate::extensions::simple::ExtensionKind;
704    use crate::parser::extensions::ExtensionParserState;
705
706    #[test]
707    fn test_parse_basic_block() {
708        let mut expected_extensions = SimpleExtensions::new();
709        expected_extensions
710            .add_extension_uri("/uri/common".to_string(), 1)
711            .unwrap();
712        expected_extensions
713            .add_extension_uri("/uri/specific_funcs".to_string(), 2)
714            .unwrap();
715        expected_extensions
716            .add_extension(ExtensionKind::Function, 1, 10, "func_a".to_string())
717            .unwrap();
718        expected_extensions
719            .add_extension(ExtensionKind::Function, 2, 11, "func_b_special".to_string())
720            .unwrap();
721        expected_extensions
722            .add_extension(ExtensionKind::Type, 1, 20, "SomeType".to_string())
723            .unwrap();
724        expected_extensions
725            .add_extension(ExtensionKind::TypeVariation, 2, 30, "VarX".to_string())
726            .unwrap();
727
728        let mut parser = ExtensionParser::default();
729        let input_block = r#"
730URIs:
731  @  1: /uri/common
732  @  2: /uri/specific_funcs
733Functions:
734  # 10 @  1: func_a
735  # 11 @  2: func_b_special
736Types:
737  # 20 @  1: SomeType
738Type Variations:
739  # 30 @  2: VarX
740"#;
741
742        for line_str in input_block.trim().lines() {
743            parser
744                .parse_line(IndentedLine::from(line_str))
745                .unwrap_or_else(|e| panic!("Failed to parse line \'{line_str}\': {e:?}"));
746        }
747
748        assert_eq!(*parser.extensions(), expected_extensions);
749
750        let extensions_str = parser.extensions().to_string("  ");
751        // The writer adds the header; the ExtensionParser does not parse the
752        // header, so we add it here for comparison.
753        let expected_str = format!(
754            "{}\n{}",
755            simple::EXTENSIONS_HEADER,
756            input_block.trim_start()
757        );
758        assert_eq!(extensions_str.trim(), expected_str.trim());
759        // Check final state after all lines are processed.
760        // The last significant line in input_block is a TypeVariation declaration.
761        assert_eq!(
762            parser.state(),
763            ExtensionParserState::ExtensionDeclarations(ExtensionKind::TypeVariation)
764        );
765
766        // Check that a subsequent blank line correctly resets state to Extensions.
767        parser.parse_line(IndentedLine(0, "")).unwrap();
768        assert_eq!(parser.state(), ExtensionParserState::Extensions);
769    }
770
771    /// Test that we can parse a larger extensions block and it matches the input.
772    #[test]
773    fn test_parse_complete_extension_block() {
774        let mut parser = ExtensionParser::default();
775        let input_block = r#"
776URIs:
777  @  1: /uri/common
778  @  2: /uri/specific_funcs
779  @  3: /uri/types_lib
780  @  4: /uri/variations_lib
781Functions:
782  # 10 @  1: func_a
783  # 11 @  2: func_b_special
784  # 12 @  1: func_c_common
785Types:
786  # 20 @  1: CommonType
787  # 21 @  3: LibraryType
788  # 22 @  1: AnotherCommonType
789Type Variations:
790  # 30 @  4: VarX
791  # 31 @  4: VarY
792"#;
793
794        for line_str in input_block.trim().lines() {
795            parser
796                .parse_line(IndentedLine::from(line_str))
797                .unwrap_or_else(|e| panic!("Failed to parse line \'{line_str}\': {e:?}"));
798        }
799
800        let extensions_str = parser.extensions().to_string("  ");
801        // The writer adds the header; the ExtensionParser does not parse the
802        // header, so we add it here for comparison.
803        let expected_str = format!(
804            "{}\n{}",
805            simple::EXTENSIONS_HEADER,
806            input_block.trim_start()
807        );
808        assert_eq!(extensions_str.trim(), expected_str.trim());
809    }
810
811    #[test]
812    fn test_parse_relation_tree() {
813        // Example plan with a Project, a Filter, and a Read, nested by indentation
814        let plan = r#"=== Plan
815Project[$0, $1, 42, 84]
816  Filter[$2 => $0, $1]
817    Read[my.table => a:i32, b:string?, c:boolean]
818"#;
819        let mut parser = Parser::default();
820        for line in plan.lines() {
821            parser.parse_line(line).unwrap();
822        }
823
824        // Complete the current tree to convert it to relations
825        let plan = parser.build_plan().unwrap();
826
827        let root_rel = &plan.relations[0].rel_type;
828        let first_rel = match root_rel {
829            Some(plan_rel::RelType::Rel(rel)) => rel,
830            _ => panic!("Expected Rel type, got {root_rel:?}"),
831        };
832        // Root should be Project
833        let project = match &first_rel.rel_type {
834            Some(RelType::Project(p)) => p,
835            other => panic!("Expected Project at root, got {other:?}"),
836        };
837
838        // Check that Project has Filter as input
839        assert!(project.input.is_some());
840        let filter_input = project.input.as_ref().unwrap();
841
842        // Check that Filter has Read as input
843        match &filter_input.rel_type {
844            Some(RelType::Filter(_)) => {
845                match &filter_input.rel_type {
846                    Some(RelType::Filter(filter)) => {
847                        assert!(filter.input.is_some());
848                        let read_input = filter.input.as_ref().unwrap();
849
850                        // Check that Read has no input (it's a leaf)
851                        match &read_input.rel_type {
852                            Some(RelType::Read(_)) => {}
853                            other => panic!("Expected Read relation, got {other:?}"),
854                        }
855                    }
856                    other => panic!("Expected Filter relation, got {other:?}"),
857                }
858            }
859            other => panic!("Expected Filter relation, got {other:?}"),
860        }
861    }
862
863    #[test]
864    fn test_parse_root_relation() {
865        // Test a plan with a Root relation
866        let plan = r#"=== Plan
867Root[result]
868  Project[$0, $1]
869    Read[my.table => a:i32, b:string?]
870"#;
871        let mut parser = Parser::default();
872        for line in plan.lines() {
873            parser.parse_line(line).unwrap();
874        }
875
876        let plan = parser.build_plan().unwrap();
877
878        // Check that we have exactly one relation
879        assert_eq!(plan.relations.len(), 1);
880
881        let root_rel = &plan.relations[0].rel_type;
882        let rel_root = match root_rel {
883            Some(plan_rel::RelType::Root(rel_root)) => rel_root,
884            other => panic!("Expected Root type, got {other:?}"),
885        };
886
887        // Check that the root has the correct name
888        assert_eq!(rel_root.names, vec!["result"]);
889
890        // Check that the root has a Project as input
891        let project_input = match &rel_root.input {
892            Some(rel) => rel,
893            None => panic!("Root should have an input"),
894        };
895
896        let project = match &project_input.rel_type {
897            Some(RelType::Project(p)) => p,
898            other => panic!("Expected Project as root input, got {other:?}"),
899        };
900
901        // Check that Project has Read as input
902        let read_input = match &project.input {
903            Some(rel) => rel,
904            None => panic!("Project should have an input"),
905        };
906
907        match &read_input.rel_type {
908            Some(RelType::Read(_)) => {}
909            other => panic!("Expected Read relation, got {other:?}"),
910        }
911    }
912
913    #[test]
914    fn test_parse_root_relation_no_names() {
915        // Test a plan with a Root relation with no names
916        let plan = r#"=== Plan
917Root[]
918  Project[$0, $1]
919    Read[my.table => a:i32, b:string?]
920"#;
921        let mut parser = Parser::default();
922        for line in plan.lines() {
923            parser.parse_line(line).unwrap();
924        }
925
926        let plan = parser.build_plan().unwrap();
927
928        let root_rel = &plan.relations[0].rel_type;
929        let rel_root = match root_rel {
930            Some(plan_rel::RelType::Root(rel_root)) => rel_root,
931            other => panic!("Expected Root type, got {other:?}"),
932        };
933
934        // Check that the root has no names
935        assert_eq!(rel_root.names, Vec::<String>::new());
936    }
937
938    #[test]
939    fn test_parse_full_plan() {
940        // Test a complete Substrait plan with extensions and relations
941        let input = r#"
942=== Extensions
943URIs:
944  @  1: /uri/common
945  @  2: /uri/specific_funcs
946Functions:
947  # 10 @  1: func_a
948  # 11 @  2: func_b_special
949Types:
950  # 20 @  1: SomeType
951Type Variations:
952  # 30 @  2: VarX
953
954=== Plan
955Project[$0, $1, 42, 84]
956  Filter[$2 => $0, $1]
957    Read[my.table => a:i32, b:string?, c:boolean]
958"#;
959
960        let plan = Parser::parse(input).unwrap();
961
962        // Verify the plan structure
963        assert_eq!(plan.extension_uris.len(), 2);
964        assert_eq!(plan.extensions.len(), 4);
965        assert_eq!(plan.relations.len(), 1);
966
967        // Verify extension URIs
968        let uri1 = &plan.extension_uris[0];
969        assert_eq!(uri1.extension_uri_anchor, 1);
970        assert_eq!(uri1.uri, "/uri/common");
971
972        let uri2 = &plan.extension_uris[1];
973        assert_eq!(uri2.extension_uri_anchor, 2);
974        assert_eq!(uri2.uri, "/uri/specific_funcs");
975
976        // Verify extensions
977        let func1 = &plan.extensions[0];
978        match &func1.mapping_type {
979            Some(MappingType::ExtensionFunction(f)) => {
980                assert_eq!(f.function_anchor, 10);
981                assert_eq!(f.extension_uri_reference, 1);
982                assert_eq!(f.name, "func_a");
983            }
984            other => panic!("Expected ExtensionFunction, got {other:?}"),
985        }
986
987        let func2 = &plan.extensions[1];
988        match &func2.mapping_type {
989            Some(MappingType::ExtensionFunction(f)) => {
990                assert_eq!(f.function_anchor, 11);
991                assert_eq!(f.extension_uri_reference, 2);
992                assert_eq!(f.name, "func_b_special");
993            }
994            other => panic!("Expected ExtensionFunction, got {other:?}"),
995        }
996
997        let type1 = &plan.extensions[2];
998        match &type1.mapping_type {
999            Some(MappingType::ExtensionType(t)) => {
1000                assert_eq!(t.type_anchor, 20);
1001                assert_eq!(t.extension_uri_reference, 1);
1002                assert_eq!(t.name, "SomeType");
1003            }
1004            other => panic!("Expected ExtensionType, got {other:?}"),
1005        }
1006
1007        let var1 = &plan.extensions[3];
1008        match &var1.mapping_type {
1009            Some(MappingType::ExtensionTypeVariation(v)) => {
1010                assert_eq!(v.type_variation_anchor, 30);
1011                assert_eq!(v.extension_uri_reference, 2);
1012                assert_eq!(v.name, "VarX");
1013            }
1014            other => panic!("Expected ExtensionTypeVariation, got {other:?}"),
1015        }
1016
1017        // Verify the relation tree structure
1018        let root_rel = &plan.relations[0];
1019        match &root_rel.rel_type {
1020            Some(plan_rel::RelType::Rel(rel)) => {
1021                match &rel.rel_type {
1022                    Some(RelType::Project(project)) => {
1023                        // Verify Project relation
1024                        assert_eq!(project.expressions.len(), 2); // 42 and 84
1025                        println!("Project input: {:?}", project.input.is_some());
1026                        assert!(project.input.is_some()); // Should have Filter as input
1027
1028                        // Check the Filter input
1029                        let filter_input = project.input.as_ref().unwrap();
1030                        match &filter_input.rel_type {
1031                            Some(RelType::Filter(filter)) => {
1032                                println!("Filter input: {:?}", filter.input.is_some());
1033                                assert!(filter.input.is_some()); // Should have Read as input
1034
1035                                // Check the Read input
1036                                let read_input = filter.input.as_ref().unwrap();
1037                                match &read_input.rel_type {
1038                                    Some(RelType::Read(read)) => {
1039                                        // Verify Read relation
1040                                        let schema = read.base_schema.as_ref().unwrap();
1041                                        assert_eq!(schema.names.len(), 3);
1042                                        assert_eq!(schema.names[0], "a");
1043                                        assert_eq!(schema.names[1], "b");
1044                                        assert_eq!(schema.names[2], "c");
1045
1046                                        let struct_ = schema.r#struct.as_ref().unwrap();
1047                                        assert_eq!(struct_.types.len(), 3);
1048                                    }
1049                                    other => panic!("Expected Read relation, got {other:?}"),
1050                                }
1051                            }
1052                            other => panic!("Expected Filter relation, got {other:?}"),
1053                        }
1054                    }
1055                    other => panic!("Expected Project relation, got {other:?}"),
1056                }
1057            }
1058            other => panic!("Expected Rel type, got {other:?}"),
1059        }
1060    }
1061}