lexi_gram/
options.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use std::io::Write;
4use lexigram_lib::file_utils::{get_tagged_source, replace_tagged_source, SrcTagError};
5use lexigram_lib::lexergen::LexigramCrate;
6
7/// Action performed by the source generator: generates the source or verifies it
8/// (verification is only possible with some [CodeLocation] options).
9#[derive(Clone, PartialEq, Debug)]
10pub enum Action { Generate, Verify }
11
12/// Specification of the lexer or parser (lexicon or grammar content or location)
13#[derive(Clone, PartialEq, Debug)]
14pub enum Specification {
15    /// No source
16    None,
17    /// Source is in a string
18    String(String),
19    /// Source is an existing file
20    File { filename: String },
21    /// Source is between tags in an existing file
22    FileTag { filename: String, tag: String },
23}
24
25impl Specification {
26    pub fn is_none(&self) -> bool {
27        self == &Specification::None
28    }
29
30    pub fn get_type(&self) -> String {
31        match self {
32            Specification::None => "no content".to_string(),
33            Specification::String(_) => "String text".to_string(),
34            Specification::File { filename } => format!("file '{filename}'"),
35            Specification::FileTag { filename, tag } => format!("file '{filename}' / tag '{tag}'"),
36        }
37    }
38
39    pub fn get(self) -> Result<Option<String>, SrcTagError> {
40        match self {
41            Specification::None => Ok(None),
42            Specification::String(s) => Ok(Some(s)),
43            Specification::File { filename } => Ok(std::fs::read_to_string(filename)?).map(|s| Some(s)),
44            Specification::FileTag { filename, tag } => get_tagged_source(&filename, &tag).map(|s| Some(s)),
45        }
46    }
47}
48
49/// Location of the code to be generated or verified
50#[derive(Clone, PartialEq, Debug)]
51pub enum CodeLocation {
52    /// * Generate mode: doesn't write anything.
53    /// * Verify mode: returns `None`.
54    None,
55    /// * Generate mode: creates a new file or overwrites an existing one.
56    /// * Verify mode: reads the expected code from an existing file.
57    File { filename: String },
58    /// * Generate mode: inserts the code between tags into an existing file.
59    /// * Verify mode: reads the expected code between tags of an existing file.
60    FileTag { filename: String, tag: String },
61    /// * Generate mode: writes the code to stdout.
62    /// * not a valid option in verify mode
63    StdOut,
64}
65
66impl CodeLocation {
67    pub fn is_none(&self) -> bool {
68        self == &CodeLocation::None
69    }
70
71    pub fn get_type(&self) -> String {
72        match self {
73            CodeLocation::None => "no content".to_string(),
74            CodeLocation::File { filename } => format!("file '{filename}'"),
75            CodeLocation::FileTag { filename, tag } => format!("file '{filename}' / tag '{tag}'"),
76            CodeLocation::StdOut => "stdout".to_string(),
77        }
78    }
79
80    pub fn read(&self) -> Result<Option<String>, SrcTagError> {
81        match self {
82            CodeLocation::None => Ok(None),
83            CodeLocation::File { filename } => Ok(std::fs::read_to_string(filename)?).map(|s| Some(s)),
84            CodeLocation::FileTag { filename, tag } => get_tagged_source(&filename, &tag).map(|s| Some(s)),
85            CodeLocation::StdOut => {
86                Err(SrcTagError::Io(std::io::Error::new(std::io::ErrorKind::InvalidInput, "stdout can only be used as output")))
87            }
88        }
89    }
90
91    pub fn write(&self, source: &str) -> Result<(), SrcTagError> {
92        match self {
93            CodeLocation::None => Ok(()),
94            CodeLocation::File { filename } => {
95                Ok(std::fs::write(filename, source)?)
96            }
97            CodeLocation::FileTag { filename, tag } => replace_tagged_source(filename, tag, &source),
98            CodeLocation::StdOut => {
99                Ok(std::io::stdout().write_all(source.as_bytes())?)
100            }
101        }
102    }
103}
104
105// ---------------------------------------------------------------------------------------------
106
107/// Options used to generate the source code of the lexer, parser, wrapper, and listener from a lexicon and a grammar
108/// (the grammar is optional if only the lexer must be generated).
109///
110/// See [OptionsBuilder] for the accompanying builder.
111#[derive(Clone, PartialEq, Debug)]
112pub struct Options {
113    /// Specification of the lexer (lexicon)
114    pub lexer_spec: Specification,
115    /// Location of the generated/verified lexer code
116    pub lexer_code: CodeLocation,
117    /// Indentation of lexer source code
118    pub lexer_indent: usize,
119    /// Specification of the parser (grammar)
120    pub parser_spec: Specification,
121    /// Location of the generated/verified parser code
122    pub parser_code: CodeLocation,
123    /// Indentation of parser source code
124    pub parser_indent: usize,
125    /// Extra headers before the lexer code
126    pub lexer_headers: Vec<String>,
127    /// Extra headers before the parser code
128    pub parser_headers: Vec<String>,
129    /// Extra `use` libraries to include in the parser code (only if `parser_code` isn't `None`)
130    pub extra_libs: Vec<String>,
131    /// Includes the definitions of the alternatives in the parser, for debugging purposes
132    ///
133    /// Default: `false`
134    pub gen_parser_alts: bool,
135    /// Generates the wrapper, which is necessary to interface a listener (only if `parser_code` isn't `None`)
136    ///
137    /// Default: `true`
138    pub gen_wrapper: bool,
139    /// Generates the span parameters in the listener methods, to get the position of the terminals/nonterminals (only if `gen_wrapper` is `true`)
140    ///
141    /// Default: `false`
142    pub gen_span_params: bool,
143    /// Uses the full library instead of the core library in the generated code. Use this option for a lexer / parser that
144    /// needs [lexigram_lib] instead of the smaller [lexigram_core].
145    ///
146    /// Default: `false`
147    pub lib_crate: LexigramCrate,
148}
149
150impl Options {
151    pub fn new() -> Self {
152        Self::default()
153    }
154}
155
156impl Default for Options {
157    fn default() -> Self {
158        Options {
159            lexer_spec: Specification::None,
160            lexer_code: CodeLocation::None,
161            lexer_indent: 0,
162            parser_spec: Specification::None,
163            parser_code: CodeLocation::None,
164            parser_indent: 0,
165            lexer_headers: vec![],
166            parser_headers: vec![],
167            extra_libs: vec![],
168            gen_parser_alts: false,
169            gen_wrapper: true,
170            gen_span_params: false,
171            lib_crate: LexigramCrate::Core,
172        }
173    }
174}
175
176#[derive(Clone, Copy, PartialEq, Debug)]
177enum BuilderState { Start, Lexer, Parser, Error }
178
179/// Builder of the [Options] object.
180///
181/// There are 3 types of options:
182/// * options related to the lexer: [lexer](OptionsBuilder::lexer), [indent](OptionsBuilder::indent), [headers](OptionsBuilder::headers)
183/// * options related to the parser: [parser](OptionsBuilder::parser), [indent](OptionsBuilder::indent), [headers](OptionsBuilder::headers)
184/// * general options: [extra_libs](OptionsBuilder::extra_libs), [parser_alts](OptionsBuilder::parser_alts),
185/// [wrapper](OptionsBuilder::wrapper), [span_params](OptionsBuilder::span_params)
186///
187/// Initially, the default option settings corresponds to [Object]'s defaults. The builder offers a convenient way to chain method
188/// in order to set custom options.
189///
190/// The builder offers a way to use some of those options differently, depending on their position in the method chain.
191/// The options [indent](OptionsBuilder::indent) and [headers](OptionsBuilder::headers) are:
192/// * applied to both the lexer and parser if placed before [lexer](OptionsBuilder::lexer) and [parser](OptionsBuilder::parser)
193/// * applied to the lexer if placed after [lexer](OptionsBuilder::lexer) and before [parser](OptionsBuilder::parser)
194/// * applied to the parser if placed after [parser](OptionsBuilder::parser).
195///
196/// The option [parser](OptionsBuilder::parser) mustn't be used before [lexer](OptionsBuilder::lexer).
197///
198/// The [build](OptionsBuilder::build) method creates the resulting [Options] object. It doesn't check the object is properly set, however,
199/// since the user may want to modify it later. For instance, it doesn't verify that the lexer is defined as something else than 'none'.
200/// The current configuration is cleared after that point, and a new object can be created again with the same builder.
201///
202/// The [options](OptionsBuilder::options) method moves the builder to create the resulting [Options] object, so it can't be reused
203/// (unless cloned before callind the method).
204#[derive(Clone, Debug)]
205pub struct OptionsBuilder {
206    options: Options,
207    state: BuilderState,
208    message: Option<String>,
209    has_spec: bool,
210    has_code: bool,
211}
212
213pub(crate) static ERR_LEXER_SPEC_ALREADY_SET: &str = "lexer lexicon specification already set";
214pub(crate) static ERR_LEXER_CODE_ALREADY_SET: &str = "lexer code location already set";
215pub(crate) static ERR_LEXER_AFTER_PARSER: &str = "lexer option set after parser options";
216pub(crate) static ERR_LEXER_SPEC_OR_CODE_ALREADY_SET: &str = "lexer code location and/or specification already set";
217pub(crate) static ERR_PARSER_SET_BEFORE_LEXER_NOT_SET: &str = "parser option set before any lexer options has been set";
218pub(crate) static ERR_MISSING_LEXER_OPTION: &str = "lexer is missing option(s)";
219pub(crate) static ERR_PARSER_SPEC_ALREADY_SET: &str = "parser grammar specifications already set";
220pub(crate) static ERR_PARSER_CODE_ALREADY_SET: &str = "parser code location already set";
221pub(crate) static ERR_PARSER_SPEC_OR_CODE_ALREADY_SET: &str = "parser code location and/or specification already set";
222pub(crate) static ERR_MISSING_PARSER_OPTION: &str = "parser is missing option(s)";
223
224impl OptionsBuilder {
225    /// Creates a builder for [Options]
226    pub fn new() -> Self {
227        OptionsBuilder { state: BuilderState::Start, options: Options::new(), message: None, has_spec: false, has_code: false }
228    }
229
230    /// Checks if the builder has encountered an error
231    pub fn has_error(&self) -> bool {
232        self.state == BuilderState::Error
233    }
234
235    /// Gets the current error message, if any
236    pub fn get_error_message(&self) -> Option<&str> {
237        self.message.as_ref().map(|s| s.as_str())
238    }
239
240    pub fn reset(&mut self) {
241        self.state = BuilderState::Start;
242        self.message = None;
243        self.has_code = false;
244        self.has_spec = false;
245    }
246
247    fn set_error<T: Into<String>>(&mut self, message: T) {
248        if self.state != BuilderState::Error {
249            self.state = BuilderState::Error;
250            self.message = Some(message.into());
251        }
252    }
253
254    /// Sets the location of the lexer's lexicon specification (default is none)
255    pub fn lexer_spec(&mut self, lexer_spec: Specification) -> &mut Self {
256        match self.state {
257            BuilderState::Start | BuilderState::Lexer => {
258                if !self.has_spec {
259                    self.state = BuilderState::Lexer;
260                    self.options.lexer_spec = lexer_spec;
261                    self.has_spec = true;
262                } else {
263                    self.set_error(ERR_LEXER_SPEC_ALREADY_SET);
264                }
265            }
266            BuilderState::Parser => {
267                self.set_error(ERR_LEXER_AFTER_PARSER);
268            }
269            BuilderState::Error => {}
270        }
271        self
272    }
273
274    /// Sets the location of the lexer's generated code (default is none)
275    pub fn lexer_code(&mut self, lexer_code: CodeLocation) -> &mut Self {
276        match self.state {
277            BuilderState::Start | BuilderState::Lexer => {
278                if !self.has_code {
279                    self.state = BuilderState::Lexer;
280                    self.options.lexer_code = lexer_code;
281                    self.has_code = true;
282                } else {
283                    self.set_error(ERR_LEXER_CODE_ALREADY_SET);
284                }
285            }
286            BuilderState::Parser => {
287                self.set_error(ERR_LEXER_AFTER_PARSER);
288            }
289            BuilderState::Error => {}
290        }
291        self
292    }
293
294    /// Sets the location of the lexer's lexicon specification and generated code (default is none for both)
295    pub fn lexer(&mut self, lexer_spec: Specification, lexer_code: CodeLocation) -> &mut Self {
296        match self.state {
297            BuilderState::Start => {
298                self.state = BuilderState::Lexer;
299                self.options.lexer_spec = lexer_spec;
300                self.options.lexer_code = lexer_code;
301                self.has_spec = true;
302                self.has_code = true;
303            }
304            BuilderState::Lexer => {
305                self.set_error(ERR_LEXER_SPEC_OR_CODE_ALREADY_SET);
306            }
307            BuilderState::Parser => {
308                self.set_error(ERR_LEXER_AFTER_PARSER);
309            }
310            BuilderState::Error => {}
311        }
312        self
313    }
314
315    /// Sets the location the parser's grammar specification (default is none)
316    pub fn parser_spec(&mut self, parser_spec: Specification) -> &mut Self {
317        match self.state {
318            BuilderState::Start => {
319                self.set_error(ERR_PARSER_SET_BEFORE_LEXER_NOT_SET);
320            }
321            BuilderState::Lexer | BuilderState::Parser => {
322                if self.state != BuilderState::Parser {
323                    self.has_spec = false;
324                    self.has_code = false;
325                }
326                if !self.has_spec {
327                    self.state = BuilderState::Parser;
328                    self.options.parser_spec = parser_spec;
329                    self.has_spec = true;
330                } else {
331                    self.set_error(ERR_PARSER_SPEC_ALREADY_SET);
332                }
333            }
334            BuilderState::Error => {}
335        }
336        self
337    }
338
339    /// Sets the location the parser's generated code (default is none)
340    pub fn parser_code(&mut self, parser_code: CodeLocation) -> &mut Self {
341        match self.state {
342            BuilderState::Start => {
343                self.set_error(ERR_PARSER_SET_BEFORE_LEXER_NOT_SET);
344            }
345            BuilderState::Lexer | BuilderState::Parser => {
346                if self.state != BuilderState::Parser {
347                    self.has_spec = false;
348                    self.has_code = false;
349                }
350                if !self.has_code {
351                    self.state = BuilderState::Parser;
352                    self.options.parser_code = parser_code;
353                    self.has_code = true;
354                } else {
355                    self.set_error(ERR_PARSER_CODE_ALREADY_SET);
356                }
357            }
358            BuilderState::Error => {}
359        }
360        self
361    }
362
363    /// Sets the location the parser's grammar specification and generated code (default is none for both)
364    pub fn parser(&mut self, parser_spec: Specification, parser_code: CodeLocation) -> &mut Self {
365        match self.state {
366            BuilderState::Start => {
367                self.set_error(ERR_PARSER_SET_BEFORE_LEXER_NOT_SET);
368            }
369            BuilderState::Lexer => {
370                self.state = BuilderState::Parser;
371                self.options.parser_spec = parser_spec;
372                self.options.parser_code = parser_code;
373                self.has_spec = true;
374                self.has_code = true;
375            }
376            BuilderState::Parser => {
377                self.set_error(ERR_PARSER_SPEC_OR_CODE_ALREADY_SET);
378            }
379            BuilderState::Error => {}
380        }
381        self
382    }
383
384    /// Sets the indentation of the generated code, in number of space characters (default is 0)
385    pub fn indent(&mut self, indent: usize) -> &mut Self {
386        match self.state {
387            BuilderState::Start => {
388                self.options.lexer_indent = indent;
389                self.options.parser_indent = indent;
390            }
391            BuilderState::Lexer => self.options.lexer_indent = indent,
392            BuilderState::Parser => self.options.parser_indent = indent,
393            BuilderState::Error => {}
394        }
395        self
396    }
397
398    /// **Adds** optional headers, which will be placed in front of the code (even before the `use`
399    /// clauses). This can be used to place inner attributes like `#![allow(unused)]` or `#![cfg(...)]`.
400    ///
401    /// This method can be called several times to add more headers.
402    pub fn headers<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, headers: I) -> &mut Self {
403        let hdr: Vec<String> = headers.into_iter().map(|s| s.into()).collect();
404        match self.state {
405            BuilderState::Start => {
406                self.options.lexer_headers.extend(hdr.clone());
407                self.options.parser_headers.extend(hdr);
408            }
409            BuilderState::Lexer => self.options.lexer_headers.extend(hdr),
410            BuilderState::Parser => self.options.parser_headers.extend(hdr),
411            BuilderState::Error => {}
412        }
413        self
414    }
415
416    /// **Adds** user crates and modules to the list of `use` dependencies for the parser / wrapper.
417    /// This can be used to define the user types needed in the wrapper / listener
418    /// (those types can be initially copied from the generated code; they're commented out near the
419    /// beginning, after the context type definitions).
420    ///
421    /// This method can be called several times to add more dependencies.
422    pub fn extra_libs<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, libs: I) -> &mut Self {
423        self.options.extra_libs.extend(libs.into_iter().map(|s| s.into()));
424        self
425    }
426
427    /// Sets the boolean option that generates more explicit debug messages in the parser when a parsing error
428    /// is encountered. It requires to generate additional information.
429    ///
430    /// Default: `false`
431    pub fn parser_alts(&mut self, parser_alts: bool) -> &mut Self {
432        self.options.gen_parser_alts = parser_alts;
433        self
434    }
435
436    /// Sets the boolean option that generates the wrapper.
437    ///
438    /// Default: `true`
439    pub fn wrapper(&mut self, wrapper: bool) -> &mut Self {
440        self.options.gen_wrapper = wrapper;
441        self
442    }
443
444    /// Sets the boolean option that generates the extra `span` parameters in the listener callback methods.
445    /// These parameters locate the terminals and nonterminals in the source text, so they can be used
446    /// for instance to generate report messages with the precise location of symbols that caused an
447    /// error.
448    ///
449    /// Default: `false`
450    pub fn span_params(&mut self, span_params: bool) -> &mut Self {
451        self.options.gen_span_params = span_params;
452        self
453    }
454
455    /// Uses the full [lexigram_lib] crate in the generated code if `use_full_lib` is true, or the
456    /// smaller [lexigram_core](lexigram_lib::lexigram_core) if false.
457    /// Use this option for a lexer / parser that needs to access the code generation features in [lexigram_lib].
458    ///
459    /// See also [set_crate](OptionsBuilder::set_crate) for a custom name or path.
460    ///
461    /// ##Example
462    ///
463    /// ```ignore
464    /// let options = OptionsBuilder::new()
465    ///     .lexer(genspec!(filename: LEXICON), gencode!(filename: LEXER))
466    ///     .parser(genspec!(filename: GRAMMAR), gencode!(filename: PARSER))
467    ///     .use_full_lib(false)
468    ///     .build()
469    ///     .expect("should have no error");
470    /// try_gen_parser(action, options)?;
471    /// ```
472    /// -> `use lexigram_core::parser::Parser;` and so on
473    pub fn use_full_lib(&mut self, use_full_lib: bool) -> &mut Self {
474        self.options.lib_crate = if use_full_lib { LexigramCrate::Full } else { LexigramCrate::Core };
475        self
476    }
477
478    /// Sets the `use` path to the lexigram core library.
479    ///
480    /// See also [use_full_lib](OptionsBuilder::set_crate) for the most common situations.
481    ///
482    /// ##Example
483    ///
484    /// ```ignore
485    /// use lexigram_core as core;
486    ///
487    /// let options = OptionsBuilder::new()
488    ///     .lexer(genspec!(filename: LEXICON), gencode!(filename: LEXER))
489    ///     .parser(genspec!(filename: GRAMMAR), gencode!(filename: PARSER))
490    ///     .set_crate(LexigramCrate::Custom("core".to_string()))
491    ///     .build()
492    ///     .expect("should have no error");
493    /// try_gen_parser(action, options)?;
494    /// ```
495    /// -> `use core::parser::Parser;` and so on
496    pub fn set_crate(&mut self, lcrate: LexigramCrate) -> &mut Self {
497        self.options.lib_crate = lcrate;
498        self
499    }
500
501    fn check_sanity(&mut self) {
502        if !self.has_error() {
503            if self.options.lexer_spec.is_none() || self.options.lexer_code.is_none() {
504                self.set_error(ERR_MISSING_LEXER_OPTION)
505            } else if self.options.parser_spec.is_none() ^ self.options.parser_code.is_none() == true {
506                self.set_error(ERR_MISSING_PARSER_OPTION)
507            }
508        }
509    }
510
511    /// Creates an [Options] object with the current options defined earlier by [OptionsBuilder]'s methods.
512    ///
513    /// **The builder resets the options to their default values** after creating and returning that object,
514    /// so it can be reused to generate other [Options] objects, but the options must be set again.
515    /// If you want the builder to keep the options, consider cloning it and using [options()](OptionsBuilder::options)
516    /// instead.
517    pub fn build(&mut self) -> Result<Options, String> {
518        self.check_sanity();
519        let error = self.state == BuilderState::Error;
520        let result = if error {
521            Err(self.message.take().expect("error without message"))
522        } else {
523            Ok(std::mem::take(&mut self.options))
524        };
525        self.reset();
526        result
527    }
528
529    /// Creates an [Options] object with the current options defined earlier by [OptionsBuilder]'s methods.
530    ///
531    /// This method moves the builder. If you want to reuse the builder, consider cloning it (if you want
532    /// to keep the same options) or using [build()](OptionsBuilder::build) instead.
533    ///
534    /// **Important note**: once [lexer](OptionsBuilder::lexer) has been called, it's not possible to call
535    /// it again. Similarly, once [parser](OptionsBuilder::parser) has been called, it's not possible to
536    /// call it or [lexer](OptionsBuilder::lexer) again (see the [Options] doc for further details).
537    /// It means that **if you clone the builder and generate an option object with this method, the
538    /// remaining builder will only be partially reconfigurable**.
539    pub fn options(self) -> Options {
540        self.options
541    }
542}
543
544// ---------------------------------------------------------------------------------------------
545// Macros
546
547pub mod macros {
548    /// Generates a [Specification](crate::gen_parser::Specification) object:
549    /// ```ignore
550    /// genspec!(none)
551    /// genspec!(string: expr)
552    /// genspec!(filename: expr)
553    /// genspec!(filename: expr, tag: expr)
554    /// ```
555    /// where `expr.to_string()` are valid strings
556    #[macro_export]
557    macro_rules! genspec {
558        (none) => {
559            $crate::options::Specification::None
560        };
561        (string: $string: expr) => {
562            $crate::options::Specification::String($string.to_string())
563        };
564        (filename: $file: expr) => {
565            $crate::options::Specification::File { filename: $file.to_string() }
566        };
567        (filename: $file: expr, tag: $tag: expr) => {
568            $crate::options::Specification::FileTag { filename: $file.to_string(), tag: $tag.to_string() }
569        };
570    }
571
572    /// Generates a [CodeLocation](crate::gen_parser::CodeLocation) object:
573    /// ```ignore
574    /// gencode!(none)
575    /// gencode!(string: expr)
576    /// gencode!(filename: expr)
577    /// gencode!(filename: expr, tag: expr)
578    /// ```
579    /// where `expr.to_string()` are valid strings
580    #[macro_export]
581    macro_rules! gencode {
582        (none) => {
583            $crate::options::CodeLocation::None
584        };
585        (filename: $file: expr) => {
586            $crate::options::CodeLocation::File { filename: $file.to_string() }
587        };
588        (filename: $file: expr, tag: $tag: expr) => {
589            $crate::options::CodeLocation::FileTag { filename: $file.to_string(), tag: $tag.to_string() }
590        };
591        (stdout) => {
592            $crate::options::CodeLocation::StdOut
593        };
594    }
595}