Skip to main content

lexigram_lib/
lib.rs

1// Copyright (c) 2025 Redglyph (@gmail.com). All Rights Reserved.
2
3use std::collections::BTreeSet;
4use std::ops::Deref;
5use vectree::VecTree;
6
7// exposes lexigram-core:
8pub use lexigram_core;
9pub use lexigram_core::{AltId, TokenId, VarId};
10pub use lexigram_core::CollectJoin;
11pub use lexigram_core::alt;
12pub use lexigram_core::fixed_sym_table;
13pub use lexigram_core::log;
14pub use lexigram_core::char_reader;
15use lexigram_core::CharLen;
16pub use lexigram_core::segmap;
17pub use lexigram_core::seg;
18pub use lexigram_core::lexer;
19pub use lexigram_core::parser;
20
21mod macros;
22mod take_until;
23mod cproduct;
24pub mod build;
25pub mod segments;
26pub mod dfa;
27pub mod lexergen;
28pub mod lexi;
29pub mod grammar;
30pub mod parsergen;
31pub mod file_utils;
32mod name_fixer;
33pub use name_fixer::{NameFixer, NameTransformer};
34mod symbol_table;
35pub mod rtsgen;
36mod tests;
37
38pub use symbol_table::SymbolTable;
39
40// package name & version
41pub const LIB_PKG_NAME: &str = env!("CARGO_PKG_NAME");
42pub const LIB_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
43
44// Regular expressions / DFA, See:
45// - https://blog.burntsushi.net/regex-internals/
46// - https://en.wikipedia.org/wiki/Tagged_Deterministic_Finite_Automaton
47// - https://arxiv.org/abs/2206.01398
48//
49// See also:
50// - Ref: https://en.wikipedia.org/wiki/Comparison_of_parser_generators
51//
52// UTF-8:
53// - https://www.ibm.com/docs/en/db2/11.5?topic=support-unicode-character-encoding
54// - std::char::encode_utf8_raw(...)
55//
56// Misc
57// - https://re2c.org/
58// - https://www.genivia.com/get-reflex.html
59// - https://nothings.org/computer/lexing.html
60// - https://github.com/maciejhirsz/logos
61// - https://alic.dev/blog/fast-lexing
62
63// ---------------------------------------------------------------------------------------------
64// Shared types
65
66/// Unit type used as generic parameter to indicate general, non-normalized form.
67///
68/// - `Dfa<General>` may have accepting states with IDs smaller than non-accepting states' IDs, or non-incremntal IDs.
69/// - `RuleTreeSet<General>` may include any operators like `*`, `+`, and `?`, and doesn't have a restriction on depth.
70/// - `ProdRuleSet<General>` may be ambiguous, left-recursive, and/or need left factorization, depending on the target.
71#[derive(Clone, Debug)]
72pub struct General;
73
74/// - `ProdRuleSet<LR>` have no ambiguity.
75#[derive(Clone, Debug)]
76pub struct LR;
77
78/// - `ProdRuleSet<LL>` aren't left-recursive and are left-factorized.
79#[derive(Clone, Debug)]
80pub struct LL1;
81
82/// Unit type used as generic parameter to indicate normalized form.
83///
84/// - `Dfa<Normalized>` always has incremental state numbers, starting at 0, with all the accepting states at the end.
85/// - `RuleTreeSet<Normalized>` only has `|`, `&`, and symbols, and must have one of the 3 following patterns:
86///   - a symbol
87///   - a `&` with only symbols as children
88///   - a `|` with only `&(symbols)` or symbols as children
89#[derive(Clone, Debug)]
90pub struct Normalized;
91
92// ---------------------------------------------------------------------------------------------
93// General helper functions
94
95/// Gathers `iter_item` in a vector and pushes it into `v`.
96pub(crate) fn vaddi<I, T>(v: &mut Vec<Vec<T>>, iter_item: I) -> usize
97    where I: IntoIterator<Item=T> + Clone
98{
99    let new_index = v.len();
100    v.push(Vec::from_iter(iter_item));
101    new_index
102}
103
104/// Takes `lines` of columns and outputs lines of strings in which the columns
105/// are aligned. The minimum width of each column can be preset with the optional `min_widths` vector.
106///
107/// The final width of each column is the 1 + maximum number of characters - not bytes - of the strings
108/// representing that column in all the lines (the +1 makes sure columns are separated by at least one
109/// space). The last column is left as-is; no spaces are added to adjust its width.
110pub fn columns_to_str(cols: Vec<Vec<String>>, min_widths: Option<Vec<usize>>) -> Vec<String> {
111    let min_widths = min_widths.unwrap_or(vec![0; cols.first().map(|v| v.len()).unwrap_or(0)]);
112    let ncol = min_widths.len();
113    let mut width = cols.iter().fold(min_widths, |acc, s| {
114        assert_eq!(s.len(), ncol, "number of columns is not consistently {ncol}");
115        acc.into_iter().zip(s).map(|(a, s)| a.max(s.charlen() + 1)).collect()
116    });
117    if let Some(x) = width.last_mut() { *x = 0 };
118    cols.into_iter().map(|v| v.into_iter().zip(&width).map(|(mut s, w)| {
119        for _ in 0..w.saturating_sub(s.charlen()) { s.push(' ') }
120        s
121    }).collect::<String>()).collect()
122}
123
124pub fn indent_source(parts: Vec<Vec<String>>, indent: usize) -> String {
125    // SAFETY: ' ' is ASCII, and `indent` is >= 0
126    let s = unsafe { String::from_utf8_unchecked(vec![32; indent]) };
127    let mut source = String::new();
128    let mut first = true;
129    for part in parts {
130        if !first {
131            source.push('\n');
132        }
133        first = false;
134        for string in part {
135            for line in string.split("\n") {
136                let cured_line = line.trim_end();
137                if !cured_line.is_empty() {
138                    source.push_str(&s);
139                }
140                source.push_str(cured_line);
141                source.push('\n');
142            }
143        }
144    }
145    source
146}
147
148// ---------------------------------------------------------------------------------------------
149// Source generation helper traits and types
150
151/// Adds empty lines between blocks of text
152pub trait SourceSpacer {
153    /// Adds an empty string to the vector, but only if the last vector string isn't empty. If the vector is
154    /// empty, doesn't add anything.
155    fn add_space(&mut self);
156}
157
158impl SourceSpacer for Vec<String> {
159    fn add_space(&mut self) {
160        if let Some(line) = self.last() {
161            if !line.is_empty() {
162                self.push("".to_string());
163            }
164        }
165    }
166}
167
168#[derive(Clone, Debug, Default)]
169pub struct StructLibs {
170    libs: BTreeSet<String>
171}
172
173impl StructLibs {
174    pub fn new() -> Self {
175        StructLibs::default()
176    }
177
178    pub fn add<T: Into<String>>(&mut self, lib: T) {
179        self.libs.insert(lib.into());
180    }
181
182    pub fn extend<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, libs: I) {
183        self.libs.extend(libs.into_iter().map(|s| s.into()));
184    }
185
186    #[cfg(test)]
187    fn tree_to_string(t: &VecTree<String>, idx: usize) -> String {
188        let children = t.children(idx).iter().map(|child| {
189            if t.children(*child).len() > 0 {
190                format!("{}[{}]", t.get(*child), Self::tree_to_string(t, *child))
191            } else {
192                t.get(*child).to_string()
193            }
194        }).to_vec();
195        children.join(", ")
196    }
197
198    fn to_tree(&self) -> VecTree<String> {
199        let mut tree = VecTree::new();
200        let root = tree.add_root(String::new());
201        for lib in &self.libs {
202            let mut idx = root;
203            for md in lib.split("::") {
204                idx = tree.children(idx).iter()
205                    .find_map(|&i| {
206                        if tree.get(i) == md { Some(i) } else { None }
207                    })
208                    .unwrap_or_else(|| {
209                        tree.add(Some(idx), md.to_string())
210                    });
211            }
212            tree.add(Some(idx), "self".to_string());
213        }
214        tree
215    }
216
217    pub fn gen_source_code(&self) -> Vec<String> {
218        let mut stack = Vec::<Vec<String>>::new();
219        let tree = self.to_tree();
220        for node in tree.iter_post_depth_simple() {
221            if node.depth as usize >= stack.len() && node.depth > 0 {
222                while node.depth as usize > stack.len() {
223                    stack.push(vec![]);
224                }
225                stack.last_mut().unwrap().push(node.to_string())
226            } else if node.depth > 0 {
227                let sub = stack.pop().unwrap();
228                stack.last_mut().unwrap().push(
229                    if sub.len() > 1 {
230                        format!("{}::{{{}}}", node.deref(), sub.join(", "))
231                    } else if sub.last().unwrap() == "self" {
232                        node.deref().to_string()
233                    } else {
234                        format!("{}::{}", node.deref(), sub[0])
235                    });
236            }
237        }
238        stack.pop().unwrap_or(vec![]).into_iter().map(|s| format!("use {s};")).to_vec()
239    }
240}
241
242// ---------------------------------------------------------------------------------------------
243
244#[cfg(test)]
245mod libtests {
246    use lexigram_core::CharLen;
247    use super::*;
248    use lexigram_core::log::{BufLog, Logger};
249    use crate::build::{BuildError, BuildErrorSource};
250
251    #[test]
252    fn test_column_to_str() {
253        let a = vec![
254            vec!["1".to_string(), "2".to_string()],
255            vec!["◄10".to_string(), "20".to_string()],
256            vec!["100".to_string(), "200".to_string()],
257        ];
258        let b = vec![
259            vec!["1".to_string(), "◄20".to_string(), "3000".to_string()],
260            vec!["10".to_string(), "2000".to_string(), "3".to_string()],
261            vec!["1000".to_string(), "2".to_string(), "300".to_string()],
262        ];
263        let tests = vec![
264            (a.clone(), Some(vec![2, 0]), "1   2\n◄10 20\n100 200"),
265            (a, None, "1   2\n◄10 20\n100 200"),
266            (b.clone(), Some(vec![3, 2, 0]), "1    ◄20  3000\n10   2000 3\n1000 2    300"),
267            (b, Some(vec![8, 2, 0]), "1       ◄20  3000\n10      2000 3\n1000    2    300"),
268            (vec![], Some(vec![]), ""),
269            (vec![], Some(vec![1, 2, 3]), ""),
270            (vec![], None, ""),
271        ];
272        for (i, (v, w, expected)) in tests.into_iter().enumerate() {
273            let result_v = columns_to_str(v, w);
274            let result = result_v.join("\n");
275            assert_eq!(result, expected, "failed with test {i}")
276        }
277    }
278
279    #[test]
280    fn test_col_to_string() {
281        let x = std::collections::BTreeSet::<u32>::from([10, 20, 25]);
282        assert_eq!(x.iter().join(", "), "10, 20, 25");
283    }
284
285    #[test]
286    fn test_to_vec() {
287        assert_eq!((0..5).to_vec(), vec![0, 1, 2, 3, 4]);
288    }
289
290    #[test]
291    fn test_charlen() {
292        assert_eq!("".charlen(), 0);
293        assert_eq!("12345".charlen(), 5);
294        assert_eq!("◄123►".to_string().charlen(), 5);
295    }
296
297    #[test]
298    fn test_add_space() {
299        let mut src = Vec::<String>::new();
300        src.add_space();
301        assert!(src.is_empty());
302        src.push("1".to_string());
303        assert_eq!(src, vec!["1".to_string()]);
304        src.add_space();
305        assert_eq!(src, vec!["1".to_string(), "".to_string()]);
306        src.add_space();
307        assert_eq!(src, vec!["1".to_string(), "".to_string()]);
308        src.push("2".to_string());
309        assert_eq!(src, vec!["1".to_string(), "".to_string(), "2".to_string()]);
310        src.add_space();
311        assert_eq!(src, vec!["1".to_string(), "".to_string(), "2".to_string(), "".to_string()]);
312        src.add_space();
313        assert_eq!(src, vec!["1".to_string(), "".to_string(), "2".to_string(), "".to_string()]);
314    }
315
316    #[test]
317    fn test_struct_libs() {
318        const VERBOSE: bool = false;
319        let mut l = StructLibs::new();
320        let l1 = ["a", "a::a1", "a::b1", "a::a1::a2", "a::a1::b2"];
321        let l2 = vec!["a::a1::a2::a3"];
322        let l3 = vec!["a::c1::a2".to_string()];
323        l.extend(l1);
324        l.add("b");
325        l.extend(l2);
326        l.extend(l3);
327        let tree = l.to_tree();
328        if VERBOSE {
329            println!("{}", tree.iter_post_depth_simple().map(|n| format!("({}){}", n.depth, n.deref())).join(", "));
330            println!("{}", StructLibs::tree_to_string(&tree, tree.get_root().unwrap()));
331        }
332        let src = l.gen_source_code();
333        if VERBOSE {
334            println!("{}", src.join("\n"));
335        }
336        assert_eq!(src, ["use a::{self, a1::{self, a2::{self, a3}, b2}, b1, c1::a2};", "use b;"]);
337
338        let src_empty = StructLibs::new().gen_source_code();
339        assert_eq!(src_empty, Vec::<String>::new());
340    }
341
342    #[test]
343    fn test_build_error() {
344        fn build() -> Result<(), BuildError> {
345            let mut log = BufLog::new();
346            log.add_error("the test generated a fake error successfully");
347            Err(BuildError::new(log, BuildErrorSource::ParserGen))
348        }
349        let err = build().err().expect("build() should return an error");
350        assert_eq!(err.to_string(), "Errors have occurred in ParserGen:\n- ERROR  : the test generated a fake error successfully\n");
351    }
352}
353
354// ---------------------------------------------------------------------------------------------