Skip to main content

oxihuman_core/
peg_parser.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5/// Minimal PEG-style parser combinator.
6#[allow(dead_code)]
7#[derive(Debug, Clone, PartialEq)]
8pub enum ParseNode {
9    Literal(String),
10    Sequence(Vec<ParseNode>),
11    Choice(Box<ParseNode>),
12    Empty,
13}
14
15#[allow(dead_code)]
16pub type ParseResult<'a> = Option<(ParseNode, &'a str)>;
17
18/// Match a literal string at the start of input.
19#[allow(dead_code)]
20pub fn parse_literal<'a>(input: &'a str, lit: &str) -> ParseResult<'a> {
21    if let Some(rest) = input.strip_prefix(lit) {
22        Some((ParseNode::Literal(lit.to_string()), rest))
23    } else {
24        None
25    }
26}
27
28/// Skip leading whitespace.
29#[allow(dead_code)]
30pub fn skip_whitespace(input: &str) -> &str {
31    input.trim_start()
32}
33
34/// Match a decimal integer.
35#[allow(dead_code)]
36pub fn parse_integer(input: &str) -> ParseResult<'_> {
37    let s = skip_whitespace(input);
38    let end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
39    if end == 0 {
40        return None;
41    }
42    Some((ParseNode::Literal(s[..end].to_string()), &s[end..]))
43}
44
45/// Match a sequence of alphanumeric+underscore chars.
46#[allow(dead_code)]
47pub fn parse_ident(input: &str) -> ParseResult<'_> {
48    let s = skip_whitespace(input);
49    let end = s
50        .find(|c: char| !c.is_alphanumeric() && c != '_')
51        .unwrap_or(s.len());
52    if end == 0 {
53        return None;
54    }
55    Some((ParseNode::Literal(s[..end].to_string()), &s[end..]))
56}
57
58/// Match an optional rule (returns Empty if fails).
59#[allow(dead_code)]
60pub fn parse_opt<'a, F>(input: &'a str, f: F) -> (ParseNode, &'a str)
61where
62    F: Fn(&'a str) -> ParseResult<'a>,
63{
64    if let Some((n, rest)) = f(input) {
65        (n, rest)
66    } else {
67        (ParseNode::Empty, input)
68    }
69}
70
71/// Try left then right, return first success.
72#[allow(dead_code)]
73pub fn parse_choice<'a, F, G>(input: &'a str, f: F, g: G) -> ParseResult<'a>
74where
75    F: Fn(&'a str) -> ParseResult<'a>,
76    G: Fn(&'a str) -> ParseResult<'a>,
77{
78    f(input).or_else(|| g(input))
79}
80
81/// Parse zero or more idents separated by a delimiter.
82#[allow(dead_code)]
83pub fn parse_list<'a>(input: &'a str, delim: &str) -> (Vec<String>, &'a str) {
84    let mut items = Vec::new();
85    let mut cur = skip_whitespace(input);
86    while let Some((ParseNode::Literal(s), rest)) = parse_ident(cur) {
87        items.push(s);
88        let rest2 = skip_whitespace(rest);
89        if let Some(after_delim) = rest2.strip_prefix(delim) {
90            cur = skip_whitespace(after_delim);
91        } else {
92            cur = rest2;
93            break;
94        }
95    }
96    (items, cur)
97}
98
99/// Estimate parse depth/complexity of a string (for diagnostics).
100#[allow(dead_code)]
101pub fn parse_depth(input: &str) -> usize {
102    let mut depth = 0usize;
103    let mut max_depth = 0usize;
104    for c in input.chars() {
105        match c {
106            '(' | '[' | '{' => {
107                depth += 1;
108                if depth > max_depth {
109                    max_depth = depth;
110                }
111            }
112            ')' | ']' | '}' => {
113                depth = depth.saturating_sub(1);
114            }
115            _ => {}
116        }
117    }
118    max_depth
119}
120
121#[allow(dead_code)]
122pub fn node_text(n: &ParseNode) -> Option<&str> {
123    if let ParseNode::Literal(s) = n {
124        Some(s.as_str())
125    } else {
126        None
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    #[test]
134    fn test_parse_literal_success() {
135        let (node, rest) = parse_literal("hello world", "hello").expect("should succeed");
136        assert_eq!(node_text(&node), Some("hello"));
137        assert_eq!(rest, " world");
138    }
139    #[test]
140    fn test_parse_literal_fail() {
141        assert!(parse_literal("world", "hello").is_none());
142    }
143    #[test]
144    fn test_parse_integer() {
145        let (node, rest) = parse_integer("123abc").expect("should succeed");
146        assert_eq!(node_text(&node), Some("123"));
147        assert_eq!(rest, "abc");
148    }
149    #[test]
150    fn test_parse_ident() {
151        let (node, rest) = parse_ident("foo_bar 42").expect("should succeed");
152        assert_eq!(node_text(&node), Some("foo_bar"));
153        assert_eq!(rest, " 42");
154    }
155    #[test]
156    fn test_parse_opt_success() {
157        let (node, _rest) = parse_opt("hello", |i| parse_literal(i, "hello"));
158        assert_ne!(node, ParseNode::Empty);
159    }
160    #[test]
161    fn test_parse_opt_fail() {
162        let (node, rest) = parse_opt("world", |i| parse_literal(i, "hello"));
163        assert_eq!(node, ParseNode::Empty);
164        assert_eq!(rest, "world");
165    }
166    #[test]
167    fn test_parse_choice() {
168        let r = parse_choice("42xyz", |i| parse_integer(i), |i| parse_ident(i));
169        assert!(r.is_some_and(|(n, _)| node_text(&n) == Some("42")));
170    }
171    #[test]
172    fn test_parse_list() {
173        let (items, _) = parse_list("a, b, c", ",");
174        assert_eq!(
175            items,
176            vec!["a".to_string(), "b".to_string(), "c".to_string()]
177        );
178    }
179    #[test]
180    fn test_parse_depth() {
181        assert_eq!(parse_depth("((()))"), 3);
182        assert_eq!(parse_depth("[]{}"), 1);
183        assert_eq!(parse_depth("no brackets"), 0);
184    }
185    #[test]
186    fn test_skip_whitespace() {
187        assert_eq!(skip_whitespace("   hello"), "hello");
188    }
189}