wagon_gll/
lib.rs

1#![warn(missing_docs)]
2//! A (pseudo) generic GLL parsing library in Rust.
3//!
4//! Written to supplement parsers generated by [`wagon-codegen-gll`](../wagon_codegen_gll/index.html) as a proof-of-concept for the WAGon ecosystem.
5//!
6//! This library could be used to write GLL parsers in another way, as long as you stick to required patterns. However,
7//! the library was created with WAGs in mind. As a result, if you only care about pure GLL parsing, there
8//! are probably faster implementations out there that do not have to consider the possibility of the grammar changing at runtime.
9use gss::{GSSNodeIndex, GSSNode};
10use petgraph::prelude::EdgeIndex;
11use thiserror::Error;
12
13use wagon_utils::{comma_separated_with_or_str, ErrorReport};
14use std::{hash::{Hash, Hasher}, rc::Rc, str::{from_utf8, Utf8Error}, collections::HashSet, mem::Discriminant};
15
16use sppf::{SPPFNodeIndex, SPPFNode};
17use value::{Value, ValueError, InnerValue, InnerValueError};
18use wagon_ident::Ident;
19
20/// An implementation of the SPPF.
21pub mod sppf;
22/// An implementation of the GSS.
23pub mod gss;
24/// An extension of [`wagon_value::Value`] to deal with some GLL specific elements.
25pub mod value;
26
27mod label;
28/// An implementation of the global state. This is the main object that runs the parser.
29mod state;
30mod descriptor;
31mod slot;
32
33pub use label::{Label, RegexTerminal};
34pub use state::{GLLState, LabelMap, RuleMap, RegexMap};
35pub use slot::GrammarSlot;
36
37/// A single byte in a [`Terminal`].
38pub type TerminalBit<'a> = &'a u8;
39/// A Terminal, represented as a byte array.
40pub type Terminal<'a> = &'a[u8];
41
42/// The ID of the initial non-terminal.
43///
44/// GLL needs to have some non-terminal to start from. We define that starting non-terminal to always be `S'` and ensure that it holds.
45pub const ROOT_UUID: &str = "S'";
46
47/// A pointer to a [`Label`].
48pub type GLLBlockLabel<'a> = Rc<dyn Label<'a>>;
49
50/// The datastructure for attributes passed along as arguments.
51pub type AttributeMap<'a> = Vec<Value<'a>>;
52/// The datastructure for attributes returned from a non-terminal.
53pub type ReturnMap<'a> = Vec<Option<Value<'a>>>;
54/// The key for the [`AttributeMap`].
55pub type AttributeKey = usize;
56
57/// Result of anything in the GLL process that can return an error.
58pub type GLLResult<'a, T> = Result<T, GLLError<'a>>;
59
60/// Result of the GLL parse.
61pub type ParseResult<'a, T> = Result<T, GLLParseError<'a>>;
62
63/// Result for something that can only have issues in the implementation.
64pub type ImplementationResult<'a, T> = Result<T, GLLImplementationError<'a>>;
65
66#[derive(Debug, Error)]
67/// Errors possible during the GLL process.
68pub enum GLLError<'a> {
69	/// An error in the implementation occurred. There is nothing wrong with the file being parsed.
70	#[error(transparent)]
71	ImplementationError(GLLImplementationError<'a>),
72	/// An error in the parsing occurred. The input file is probably wrong.
73	#[error(transparent)]
74	ParseError(GLLParseError<'a>),
75	/// An error occurred while processing the final state. Similar to a [`Self::ImplementationError`].
76	#[error("{0}")]
77	ProcessError(#[from] GLLProcessError)
78}
79
80#[derive(Debug, Error)]
81/// Errors possible because of implementation mistakes.
82pub enum GLLImplementationError<'a> {
83	/// Data was not utf8 compatible.
84	#[error("{0}")]
85	Utf8Error(#[from] Utf8Error),
86	/// A [`ValueError`] occurred during parsing.
87	#[error("{0}")]
88	ValueError(ValueError<'a>),
89	/// Tried to get a rule that does not exists.
90	#[error("No rule with id {0} exists in the state object.")]
91	UnknownRule(&'a str),
92	/// Tried to get a label that does not exist.
93	#[error("No label with id {0} exists in the state object.")]
94	UnknownLabel(&'a str),
95	/// The non-terminal identified by [`ROOT_UUID`] could not be found.
96	#[error("{ROOT_UUID} could not be found.")]
97	MissingRoot,
98	/// An SPPF node that we expect to exist in the graph is inexplicably missing.
99	#[error("Expected to find SPPF node {0:?} in the graph, but it was not there.")]
100	MissingSPPFNode(SPPFNodeIndex),
101	/// We expected a specific type of SPPF Node, but got another.
102	#[error("Expected SPPFNode of type {}, got {1:?}", comma_separated_with_or_str(.0))]
103	IncorrectSPPFType(Vec<&'a str>, Discriminant<SPPFNode<'a>>),
104	/// A GSS node that we expect to exist in the graph is inexplicably missing.
105	#[error("Expected to find GSS node {0:?} in the graph, but it was not there.")]
106	MissingGSSNode(GSSNodeIndex),
107	/// A GSS edge is inexplicably missing.
108	#[error("Expected to find GSS edge {0:?} in the graph, but it was not there.")]
109	MissingGSSEdge(EdgeIndex),
110	/// An attribute that is expected to have been passed does not exist.
111	#[error("The {0}th attribute is not at GSS node {1:?}")]
112	MissingAttribute(AttributeKey, Rc<GSSNode<'a>>),
113	/// An attribute that is expected to be in the context does not exist.
114	#[error("The {0}th attribute is not in the context of GSS node {1:?}")]
115	MissingContext(AttributeKey, Rc<GSSNode<'a>>),
116	/// Tried to do something with a completed slot
117	#[error("Tried to access completed slot {0} as if it were not completed.")]
118	CompletedSlot(String),
119	/// Any generic fatal error for which we have no specific variant.
120	#[error("A fatal error occurred! {0}.")]
121	Fatal(&'a str),
122}
123
124#[derive(Debug, Error)]
125/// Errors possible while GLL parsing.
126pub enum GLLParseError<'a> {
127	/// Encountered an unexpected byte.
128	#[error("Encountered unexpected byte at {pointer}. Expected {expected} saw {offender}.")]
129	UnexpectedByte {
130		/// Where in the string the byte was encountered.
131		pointer: usize,
132		/// What we expected to see.
133		expected: u8,
134		/// What we got.
135		offender: u8
136	},
137	/// Expected more bytes than found in the input string.
138	#[error("Tried reading more than possible from input. Current pointer at {pointer}, tried reading {offender:?}.")]
139	TooLong {
140		/// Where we are at the input string.
141		pointer: usize,
142		/// What character we expected to see.
143		offender: Terminal<'a>
144	},
145	/// All the alternatives to this rule failed the first_set check.
146	#[error("No parse candidates were found for rule `{rule}` in context `{context}`")]
147	NoCandidates {
148		/// Where in the input stream this happened.
149		pointer: usize,
150		/// String representation of the rule it happened in.
151		rule: String,
152		/// String representation of the parse context at this time.
153		context: String
154	},
155	/// All the alternatives to this rule had a 0 weight.
156	#[error("All weights for rule `{rule}` in context `{context}` were 0")]
157	ZeroWeights {
158		/// Where in the input stream this happened.
159		pointer: usize,
160		/// String representation of the rule it happened in.
161		rule: String,
162		/// String representation of the parse context at this time.
163		context: String
164	}
165}
166
167impl<'a> From<GLLImplementationError<'a>> for GLLError<'a> {
168    fn from(value: GLLImplementationError<'a>) -> Self {
169        Self::ImplementationError(value)
170    }
171}
172
173impl<'a> From<GLLParseError<'a>> for GLLError<'a> {
174    fn from(value: GLLParseError<'a>) -> Self {
175        Self::ParseError(value)
176    }
177}
178
179impl<'a> From<InnerValueError<Value<'a>>> for GLLImplementationError<'a> {
180    fn from(value: InnerValueError<Value<'a>>) -> Self {
181        Self::ValueError(ValueError::ValueError(value))
182    }
183}
184
185impl<'a> From<ValueError<'a>> for GLLImplementationError<'a> {
186    fn from(value: ValueError<'a>) -> Self {
187        Self::ValueError(value)
188    }
189}
190
191impl<'a> From<ValueError<'a>> for GLLError<'a> {
192    fn from(value: ValueError<'a>) -> Self {
193        Self::ImplementationError(GLLImplementationError::ValueError(value))
194    }
195}
196
197impl<'a> From<InnerValueError<Value<'a>>> for GLLError<'a> {
198    fn from(value: InnerValueError<Value<'a>>) -> Self {
199        Self::ImplementationError(GLLImplementationError::ValueError(ValueError::ValueError(value)))
200    }
201}
202
203impl<'a> From<InnerValueError<InnerValue<Value<'a>>>> for GLLImplementationError<'a> {
204    fn from(value: InnerValueError<InnerValue<Value<'a>>>) -> Self {
205        Self::ValueError(ValueError::ValueError(value.into()))
206    }
207}
208
209impl<'a> From<InnerValueError<InnerValue<Value<'a>>>> for GLLError<'a> {
210    fn from(value: InnerValueError<InnerValue<Value<'a>>>) -> Self {
211        Self::ImplementationError(GLLImplementationError::from(value))
212    }
213}
214
215impl ErrorReport for GLLError<'_> {
216    fn span(self) -> wagon_utils::Span {
217        match self {
218            GLLError::ParseError(e) => match e {
219                GLLParseError::UnexpectedByte { pointer, .. } | GLLParseError::TooLong { pointer, .. } 
220                | GLLParseError::NoCandidates { pointer, .. } | GLLParseError::ZeroWeights { pointer, .. } => pointer..pointer,
221            },
222            _ => wagon_utils::Span::default(),
223        }
224    }
225
226    fn msg(&self) -> (String, String) {
227        match self {
228            GLLError::ImplementationError(e) => ("Fatal Implementation Error".to_string(), e.to_string()),
229            GLLError::ParseError(e) => ("Parse Error".to_string(), e.to_string()),
230            GLLError::ProcessError(e) => ("Post-Processing Error".to_string(), e.to_string()),
231        }
232    }
233}
234
235/// Result type for any operations on the finished state that can error.
236pub type ProcessResult<T> = Result<T, GLLProcessError>;
237
238/// Errors that can occur when processing the finished state object.
239#[derive(Debug, Error)]
240pub enum GLLProcessError {
241	/// An SPPF node that we expect to exist in the graph is inexplicably missing.
242	#[error("Expected to find SPPF node {0:?} in the graph, but it was not there.")]
243	MissingSPPFNode(SPPFNodeIndex),
244}