lexigram_lib/
lib.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use std::collections::BTreeSet;
4use std::ops::Deref;
5use vectree::VecTree;
6
7// exposes lexigram-core:
8pub use lexigram_core;
9pub use lexigram_core::{AltId, TokenId, VarId};
10pub use lexigram_core::CollectJoin;
11pub use lexigram_core::alt;
12pub use lexigram_core::fixed_sym_table;
13pub use lexigram_core::log;
14pub use lexigram_core::char_reader;
15pub use lexigram_core::segmap;
16pub use lexigram_core::seg;
17pub use lexigram_core::lexer;
18pub use lexigram_core::parser;
19
20mod macros;
21mod take_until;
22mod cproduct;
23pub mod build;
24pub mod segments;
25pub mod dfa;
26pub mod lexergen;
27pub mod lexi;
28pub mod grammar;
29pub mod parsergen;
30pub mod file_utils;
31mod name_fixer;
32pub use name_fixer::{NameFixer, NameTransformer};
33mod symbol_table;
34pub mod rtsgen;
35mod tests;
36
37pub use symbol_table::SymbolTable;
38
39// package name & version
40pub const LIB_PKG_NAME: &str = env!("CARGO_PKG_NAME");
41pub const LIB_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
42
43// Regular expressions / DFA, See:
44// - https://blog.burntsushi.net/regex-internals/
45// - https://en.wikipedia.org/wiki/Tagged_Deterministic_Finite_Automaton
46// - https://arxiv.org/abs/2206.01398
47//
48// See also:
49// - Ref: https://en.wikipedia.org/wiki/Comparison_of_parser_generators
50//
51// UTF-8:
52// - https://www.ibm.com/docs/en/db2/11.5?topic=support-unicode-character-encoding
53// - std::char::encode_utf8_raw(...)
54//
55// Misc
56// - https://re2c.org/
57// - https://www.genivia.com/get-reflex.html
58// - https://nothings.org/computer/lexing.html
59// - https://github.com/maciejhirsz/logos
60// - https://alic.dev/blog/fast-lexing
61
62// ---------------------------------------------------------------------------------------------
63// Shared types
64
65/// Unit type used as generic parameter to indicate general, non-normalized form.
66///
67/// - `Dfa<General>` may have accepting states with IDs smaller than non-accepting states' IDs, or non-incremntal IDs.
68/// - `RuleTreeSet<General>` may include any operators like `*`, `+`, and `?`, and doesn't have a restriction on depth.
69/// - `ProdRuleSet<General>` may be ambiguous, left-recursive, and/or need left factorization, depending on the target.
70#[derive(Clone, Debug)]
71pub struct General;
72
73/// - `ProdRuleSet<LR>` have no ambiguity.
74#[derive(Clone, Debug)]
75pub struct LR;
76
77/// - `ProdRuleSet<LL>` aren't left-recursive and are left-factorized.
78#[derive(Clone, Debug)]
79pub struct LL1;
80
81/// Unit type used as generic parameter to indicate normalized form.
82///
83/// - `Dfa<Normalized>` always has incremental state numbers, starting at 0, with all the accepting states at the end.
84/// - `RuleTreeSet<Normalized>` only has `|`, `&`, and symbols, and must have one of the 3 following patterns:
85///   - a symbol
86///   - a `&` with only symbols as children
87///   - a `|` with only `&(symbols)` or symbols as children
88#[derive(Clone, Debug)]
89pub struct Normalized;
90
91// ---------------------------------------------------------------------------------------------
92// General helper functions
93
94/// Gathers `iter_item` in a vector and pushes it into `v`.
95pub(crate) fn vaddi<I, T>(v: &mut Vec<Vec<T>>, iter_item: I) -> usize
96    where I: IntoIterator<Item=T> + Clone
97{
98    let new_index = v.len();
99    v.push(Vec::from_iter(iter_item));
100    new_index
101}
102
103/// Takes `lines` of columns and outputs lines of strings in which the columns
104/// are aligned. The minimum width of each column can be preset with the optional `min_widths` vector.
105///
106/// The final width of each column is the 1 + maximum number of characters - not bytes - of the strings
107/// representing that column in all the lines (the +1 makes sure columns are separated by at least one
108/// space). The last column is left as-is; no spaces are added to adjust its width.
109pub fn columns_to_str(cols: Vec<Vec<String>>, min_widths: Option<Vec<usize>>) -> Vec<String> {
110    let min_widths = min_widths.unwrap_or(vec![0; cols.get(0).map(|v| v.len()).unwrap_or(0)]);
111    let ncol = min_widths.len();
112    let mut width = cols.iter().fold(min_widths, |acc, s| {
113        assert_eq!(s.len(), ncol, "number of columns is not consistently {ncol}");
114        acc.into_iter().zip(s).map(|(a, s)| a.max(s.charlen() + 1)).collect()
115    });
116    if let Some(x) = width.last_mut() { *x = 0 };
117    cols.into_iter().map(|v| v.into_iter().zip(&width).map(|(mut s, w)| {
118        for _ in 0..w.saturating_sub(s.charlen()) { s.push(' ') }
119        s
120    }).collect::<String>()).collect()
121}
122
123pub(crate) fn indent_source(parts: Vec<Vec<String>>, indent: usize) -> String {
124    // SAFETY: ' ' is ASCII, and `indent` is >= 0
125    let s = unsafe { String::from_utf8_unchecked(vec![32; indent]) };
126    let mut source = String::new();
127    let mut first = true;
128    for part in parts {
129        if !first {
130            source.push('\n');
131        }
132        first = false;
133        for string in part {
134            for line in string.split("\n") {
135                let cured_line = line.trim_end();
136                if cured_line.len() > 0 {
137                    source.push_str(&s);
138                }
139                source.push_str(cured_line);
140                source.push('\n');
141            }
142        }
143    }
144    source
145}
146
147// ---------------------------------------------------------------------------------------------
148// General helper traits
149
150pub trait CharLen {
151    /// Returns the length in characters (not bytes).
152    fn charlen(&self) -> usize;
153}
154
155impl CharLen for str {
156    fn charlen(&self) -> usize {
157        self.chars().count()
158    }
159}
160
161// ---------------------------------------------------------------------------------------------
162// Source generation helper traits and types
163
164/// Adds empty lines between blocks of text
165pub trait SourceSpacer {
166    /// Adds an empty string to the vector, but only if the last vector string isn't empty. If the vector is
167    /// empty, doesn't add anything.
168    fn add_space(&mut self);
169}
170
171impl SourceSpacer for Vec<String> {
172    fn add_space(&mut self) {
173        if let Some(line) = self.last() {
174            if !line.is_empty() {
175                self.push("".to_string());
176            }
177        }
178    }
179}
180
181#[derive(Debug)]
182struct StructLibs {
183    libs: BTreeSet<String>
184}
185
186impl StructLibs {
187    pub fn new() -> Self {
188        StructLibs { libs: BTreeSet::new() }
189    }
190
191    pub fn add<T: Into<String>>(&mut self, lib: T) {
192        self.libs.insert(lib.into());
193    }
194
195    pub fn extend<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, libs: I) {
196        self.libs.extend(libs.into_iter().map(|s| s.into()));
197    }
198
199    #[cfg(test)]
200    fn tree_to_string(t: &VecTree<String>, idx: usize) -> String {
201        let children = t.children(idx).iter().map(|child| {
202            if t.children(*child).len() > 0 {
203                format!("{}[{}]", t.get(*child), Self::tree_to_string(t, *child))
204            } else {
205                t.get(*child).to_string()
206            }
207        }).to_vec();
208        children.join(", ")
209    }
210
211    fn to_tree(&self) -> VecTree<String> {
212        let mut tree = VecTree::new();
213        let root = tree.add_root(String::new());
214        for lib in &self.libs {
215            let mut idx = root;
216            for md in lib.split("::") {
217                idx = tree.children(idx).iter()
218                    .find_map(|&i| {
219                        if tree.get(i) == md { Some(i) } else { None }
220                    })
221                    .unwrap_or_else(|| {
222                        tree.add(Some(idx), md.to_string())
223                    });
224            }
225            tree.add(Some(idx), "self".to_string());
226        }
227        tree
228    }
229
230    pub fn gen_source_code(&self) -> Vec<String> {
231        let mut stack = Vec::<Vec<String>>::new();
232        let tree = self.to_tree();
233        for node in tree.iter_depth_simple() {
234            if node.depth as usize >= stack.len() && node.depth > 0 {
235                while node.depth as usize > stack.len() {
236                    stack.push(vec![]);
237                }
238                stack.last_mut().unwrap().push(node.to_string())
239            } else if node.depth > 0 {
240                let sub = stack.pop().unwrap();
241                stack.last_mut().unwrap().push(
242                    if sub.len() > 1 {
243                        format!("{}::{{{}}}", node.deref(), sub.join(", "))
244                    } else if sub.last().unwrap() == "self" {
245                        format!("{}", node.deref())
246                    } else {
247                        format!("{}::{}", node.deref(), sub[0])
248                    });
249            }
250        }
251        stack.pop().unwrap_or(vec![]).into_iter().map(|s| format!("use {s};")).to_vec()
252    }
253}
254
255// ---------------------------------------------------------------------------------------------
256
257#[cfg(test)]
258mod libtests {
259    use super::*;
260    use lexigram_core::log::{BufLog, Logger};
261    use crate::build::{BuildError, BuildErrorSource};
262
263    #[test]
264    fn test_column_to_str() {
265        let a = vec![
266            vec!["1".to_string(), "2".to_string()],
267            vec!["◄10".to_string(), "20".to_string()],
268            vec!["100".to_string(), "200".to_string()],
269        ];
270        let b = vec![
271            vec!["1".to_string(), "◄20".to_string(), "3000".to_string()],
272            vec!["10".to_string(), "2000".to_string(), "3".to_string()],
273            vec!["1000".to_string(), "2".to_string(), "300".to_string()],
274        ];
275        let tests = vec![
276            (a.clone(), Some(vec![2, 0]), "1   2\n◄10 20\n100 200"),
277            (a, None, "1   2\n◄10 20\n100 200"),
278            (b.clone(), Some(vec![3, 2, 0]), "1    ◄20  3000\n10   2000 3\n1000 2    300"),
279            (b, Some(vec![8, 2, 0]), "1       ◄20  3000\n10      2000 3\n1000    2    300"),
280            (vec![], Some(vec![]), ""),
281            (vec![], Some(vec![1, 2, 3]), ""),
282            (vec![], None, ""),
283        ];
284        for (i, (v, w, expected)) in tests.into_iter().enumerate() {
285            let result_v = columns_to_str(v, w);
286            let result = result_v.join("\n");
287            assert_eq!(result, expected, "failed with test {i}")
288        }
289    }
290
291    #[test]
292    fn test_col_to_string() {
293        let x = std::collections::BTreeSet::<u32>::from([10, 20, 25]);
294        assert_eq!(x.iter().join(", "), "10, 20, 25");
295    }
296
297    #[test]
298    fn test_to_vec() {
299        assert_eq!((0..5).to_vec(), vec![0, 1, 2, 3, 4]);
300    }
301
302    #[test]
303    fn test_charlen() {
304        assert_eq!("".to_string().charlen(), 0);
305        assert_eq!("12345".to_string().charlen(), 5);
306        assert_eq!("◄123►".to_string().charlen(), 5);
307    }
308
309    #[test]
310    fn test_add_space() {
311        let mut src = Vec::<String>::new();
312        src.add_space();
313        assert!(src.is_empty());
314        src.push("1".to_string());
315        assert_eq!(src, vec!["1".to_string()]);
316        src.add_space();
317        assert_eq!(src, vec!["1".to_string(), "".to_string()]);
318        src.add_space();
319        assert_eq!(src, vec!["1".to_string(), "".to_string()]);
320        src.push("2".to_string());
321        assert_eq!(src, vec!["1".to_string(), "".to_string(), "2".to_string()]);
322        src.add_space();
323        assert_eq!(src, vec!["1".to_string(), "".to_string(), "2".to_string(), "".to_string()]);
324        src.add_space();
325        assert_eq!(src, vec!["1".to_string(), "".to_string(), "2".to_string(), "".to_string()]);
326    }
327
328    #[test]
329    fn test_struct_libs() {
330        const VERBOSE: bool = false;
331        let mut l = StructLibs::new();
332        let l1 = ["a", "a::a1", "a::b1", "a::a1::a2", "a::a1::b2"];
333        let l2 = vec!["a::a1::a2::a3"];
334        let l3 = vec!["a::c1::a2".to_string()];
335        l.extend(l1);
336        l.add("b");
337        l.extend(l2);
338        l.extend(l3);
339        let tree = l.to_tree();
340        if VERBOSE {
341            println!("{}", tree.iter_depth_simple().map(|n| format!("({}){}", n.depth, n.deref())).join(", "));
342            println!("{}", StructLibs::tree_to_string(&tree, tree.get_root().unwrap()));
343        }
344        let src = l.gen_source_code();
345        if VERBOSE {
346            println!("{}", src.join("\n"));
347        }
348        assert_eq!(src, ["use a::{self, a1::{self, a2::{self, a3}, b2}, b1, c1::a2};", "use b;"]);
349
350        let src_empty = StructLibs::new().gen_source_code();
351        assert_eq!(src_empty, Vec::<String>::new());
352    }
353
354    #[test]
355    fn test_build_error() {
356        fn build() -> Result<(), BuildError> {
357            let mut log = BufLog::new();
358            log.add_error("the test generated a fake error successfully");
359            Err(BuildError::new(log, BuildErrorSource::ParserGen))
360        }
361        let err = build().err().expect("build() should return an error");
362        assert_eq!(err.to_string(), "Errors have occurred in ParserGen:\n- ERROR  : the test generated a fake error successfully\n");
363    }
364}
365
366// ---------------------------------------------------------------------------------------------