ragit_pdl/schema/
parse.rs

1use super::{Constraint, Schema, SchemaType};
2use std::fmt;
3
4#[derive(Debug)]
5pub enum SchemaParseError {
6    UnexpectedByte(u8),
7    UnmatchedGroup(u8),  // an opening delim
8    UnexpectedToken(Token),
9    UnexpectedEof,
10    ParseFloatError(std::num::ParseFloatError),
11    Utf8Error(std::string::FromUtf8Error),
12    InvalidConstraint(String),
13}
14
15#[derive(Clone, Debug)]
16pub enum Token {
17    Literal(String),
18    Integer(i64),
19    Float(f64),
20    Group {
21        kind: GroupKind,
22        tokens: Vec<Token>,
23    },
24
25    /// ':' | ','
26    Punct(u8),
27}
28
29impl fmt::Display for Token {
30    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
31        match self {
32            Token::Literal(s) => write!(fmt, "{s:?}"),
33            Token::Integer(n) => write!(fmt, "{n:?}"),
34            Token::Float(n) => write!(fmt, "{n:?}"),
35            _ => unreachable!(),
36        }
37    }
38}
39
40#[derive(Clone, Debug)]
41pub enum GroupKind {
42    Brace,
43    Parenthesis,
44    Bracket,
45}
46
47impl From<u8> for GroupKind {
48    fn from(c: u8) -> Self {
49        match c {
50            b'{' | b'}' => GroupKind::Brace,
51            b'(' | b')' => GroupKind::Parenthesis,
52            b'[' | b']' => GroupKind::Bracket,
53            _ => unreachable!(),
54        }
55    }
56}
57
58enum TokenizeState {
59    Init,
60    Number,
61    Identifier,
62    Literal(u8),
63}
64
65pub fn parse_schema(s: &str) -> Result<Schema, SchemaParseError> {
66    let mut index = 0;
67    let s = s.as_bytes();
68    let tokens = tokenize(s, &mut index)?;
69
70    if let Some(b) = s.get(index) {
71        return Err(SchemaParseError::UnexpectedByte(*b));
72    }
73
74    let mut index = 0;
75    let result = token_to_schema(&tokens, &mut index)?;
76    result.validate_constraint()?;
77
78    Ok(result)
79}
80
81fn tokenize(s: &[u8], index: &mut usize) -> Result<Vec<Token>, SchemaParseError> {
82    let mut curr_state = TokenizeState::Init;
83    let mut result = vec![];
84    let mut cursor = *index;
85
86    loop {
87        match curr_state {
88            TokenizeState::Init => match s.get(*index) {
89                Some(d @ (b'{' | b'(' | b'[')) => {
90                    *index += 1;
91                    let inner = tokenize(s, index)?;
92
93                    if s.get(*index) == Some(&matching_delim(*d)) {
94                        result.push(Token::Group {
95                            kind: GroupKind::from(*d),
96                            tokens: inner,
97                        });
98                    }
99
100                    else {
101                        return Err(SchemaParseError::UnmatchedGroup(*d));
102                    }
103                },
104                Some(b'}' | b')' | b']') => {
105                    return Ok(result);
106                },
107                Some(m @ (b'"' | b'\'')) => {
108                    curr_state = TokenizeState::Literal(*m);
109                    cursor = *index + 1;
110                },
111                Some(b'0'..=b'9') => {
112                    curr_state = TokenizeState::Number;
113                    cursor = *index;
114                },
115                Some(b' ' | b'\n' | b'\r' | b'\t') => {},
116                Some(b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') => {
117                    curr_state = TokenizeState::Identifier;
118                    cursor = *index;
119                },
120                Some(p @ (b':' | b',')) => {
121                    result.push(Token::Punct(*p));
122                },
123                Some(c) => {
124                    return Err(SchemaParseError::UnexpectedByte(*c));
125                },
126                None => {
127                    return Ok(result);
128                },
129            },
130            TokenizeState::Number => match s.get(*index) {
131                Some(b'0'..=b'9' | b'.') => {},
132                _ => {
133                    let ns = String::from_utf8_lossy(&s[cursor..*index]).to_string();
134
135                    match ns.parse::<i64>() {
136                        Ok(n) => {
137                            curr_state = TokenizeState::Init;
138                            result.push(Token::Integer(n));
139                            continue;
140                        },
141                        Err(_) => match ns.parse::<f64>() {
142                            Ok(n) => {
143                                curr_state = TokenizeState::Init;
144                                result.push(Token::Float(n));
145                                continue;
146                            },
147                            Err(e) => {
148                                return Err(SchemaParseError::ParseFloatError(e));
149                            },
150                        },
151                    }
152                },
153            },
154            TokenizeState::Identifier => match s.get(*index) {
155                Some(b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') => {},
156                _ => match String::from_utf8(s[cursor..*index].to_vec()) {
157                    Ok(s) => {
158                        curr_state = TokenizeState::Init;
159                        result.push(Token::Literal(s));
160                        continue;
161                    },
162                    Err(e) => {
163                        return Err(SchemaParseError::Utf8Error(e));
164                    },
165                },
166            },
167            TokenizeState::Literal(marker) => match s.get(*index) {
168                Some(c) if *c == marker => match String::from_utf8(s[cursor..*index].to_vec()) {
169                    Ok(s) => {
170                        curr_state = TokenizeState::Init;
171                        result.push(Token::Literal(s));
172                        continue;
173                    },
174                    Err(e) => {
175                        return Err(SchemaParseError::Utf8Error(e));
176                    },
177                },
178                Some(_) => {},
179                None => {
180                    return Err(SchemaParseError::UnmatchedGroup(marker));
181                },
182            },
183        }
184
185        *index += 1;
186    }
187}
188
189fn token_to_schema(tokens: &[Token], index: &mut usize) -> Result<Schema, SchemaParseError> {
190    let mut r#type = match tokens.get(*index) {
191        Some(t @ Token::Literal(s)) => match s.as_str() {
192            "str" | "string" => Schema::default_string(),
193            "int" | "integer" => Schema::default_integer(),
194            "float" | "number" => Schema::default_float(),
195            "bool" | "boolean" => Schema::default_boolean(),
196            "yesno" => Schema::default_yesno(),
197            "code" => Schema::default_code(),
198            "tasklist" => Schema::default_task_list(),
199            _ => {
200                return Err(SchemaParseError::UnexpectedToken(t.clone()));
201            },
202        },
203        Some(Token::Group {
204            kind: GroupKind::Brace,
205            tokens: inner,
206        }) => {
207            let mut inner_index = 0;
208            let mut result = vec![];
209
210            loop {
211                let key = match inner.get(inner_index) {
212                    Some(Token::Literal(s)) => s.to_string(),
213                    Some(t) => {
214                        return Err(SchemaParseError::UnexpectedToken(t.clone()));
215                    },
216                    None => { break; },
217                };
218
219                inner_index += 1;
220
221                match inner.get(inner_index) {
222                    Some(Token::Punct(b':')) => {},
223                    Some(t) => {
224                        return Err(SchemaParseError::UnexpectedToken(t.clone()));
225                    },
226                    None => {
227                        return Err(SchemaParseError::UnexpectedEof);
228                    },
229                }
230
231                inner_index += 1;
232                let inner_type = token_to_schema(&inner, &mut inner_index)?;
233                result.push((key, inner_type));
234
235                match inner.get(inner_index) {
236                    Some(Token::Punct(b',')) => {
237                        inner_index += 1;
238                    },
239                    Some(t) => {
240                        return Err(SchemaParseError::UnexpectedToken(t.clone()));
241                    },
242                    None => { break; },
243                }
244            }
245
246            Schema {
247                r#type: SchemaType::Object(result),
248                constraint: None,
249            }
250        },
251        Some(Token::Group {
252            kind: GroupKind::Bracket,
253            tokens: inner,
254        }) => {
255            let mut inner_index = 0;
256            let inner_type = if inner.is_empty() {
257                None
258            } else {
259                let res = token_to_schema(&inner, &mut inner_index)?;
260
261                if inner_index < inner.len() {
262                    return Err(SchemaParseError::UnexpectedToken(inner[inner_index].clone()));
263                }
264
265                Some(res)
266            };
267
268            Schema::default_array(inner_type)
269        },
270        Some(t) => {
271            return Err(SchemaParseError::UnexpectedToken(t.clone()));
272        },
273        None => {
274            return Err(SchemaParseError::UnexpectedEof);
275        },
276    };
277    *index += 1;
278
279    if let Some(Token::Group { kind: GroupKind::Brace, tokens: inner }) = tokens.get(*index) {
280        let constraint = parse_constraint(inner)?;
281        r#type.add_constraint(constraint);
282        *index += 1;
283    }
284
285    Ok(r#type)
286}
287
288fn parse_constraint(tokens: &[Token]) -> Result<Constraint, SchemaParseError> {
289    let mut index = 0;
290    let mut result = Constraint::default();
291
292    loop {
293        let key = match tokens.get(index) {
294            Some(Token::Literal(s)) => s.to_string(),
295            Some(t) => {
296                return Err(SchemaParseError::UnexpectedToken(t.clone()));
297            },
298            None => { break; },
299        };
300        index += 1;
301
302        match tokens.get(index) {
303            Some(Token::Punct(b':')) => {},
304            Some(t) => {
305                return Err(SchemaParseError::UnexpectedToken(t.clone()));
306            },
307            None => {
308                return Err(SchemaParseError::UnexpectedEof);
309            },
310        }
311
312        index += 1;
313
314        match key.as_str() {
315            k @ ("min" | "max" | "len_min" | "len_max") => match tokens.get(index) {
316                Some(n @ (Token::Integer(_) | Token::Float(_))) => if k == "min" || k == "len_min" {
317                    if result.min.is_some() {
318                        return Err(SchemaParseError::InvalidConstraint(format!("A constraint `{key}` appears more than once.")));
319                    }
320
321                    result.min = Some(n.to_string());
322                } else {
323                    if result.max.is_some() {
324                        return Err(SchemaParseError::InvalidConstraint(format!("A constraint `{key}` appears more than once.")));
325                    }
326
327                    result.max = Some(n.to_string());
328                },
329                Some(t) => {
330                    return Err(SchemaParseError::UnexpectedToken(t.clone()));
331                },
332                None => {
333                    return Err(SchemaParseError::UnexpectedEof);
334                },
335            },
336            _ => {
337                return Err(SchemaParseError::InvalidConstraint(format!("`{key}` is not a valid constraint")));
338            },
339        }
340
341        index += 1;
342
343        match tokens.get(index) {
344            Some(Token::Punct(b',')) => {},
345            Some(t) => {
346                return Err(SchemaParseError::UnexpectedToken(t.clone()));
347            },
348            None => {
349                return Ok(result);
350            },
351        }
352
353        index += 1;
354    }
355
356    Ok(result)
357}
358
359fn matching_delim(c: u8) -> u8 {
360    match c {
361        b'{' => b'}',
362        b'(' => b')',
363        b'[' => b']',
364        b'}' => b'{',
365        b')' => b'(',
366        b']' => b'[',
367        _ => unreachable!(),
368    }
369}