1use super::{Constraint, Schema, SchemaType};
2use std::fmt;
3
4#[derive(Debug)]
5pub enum SchemaParseError {
6 UnexpectedByte(u8),
7 UnmatchedGroup(u8), UnexpectedToken(Token),
9 UnexpectedEof,
10 ParseFloatError(std::num::ParseFloatError),
11 Utf8Error(std::string::FromUtf8Error),
12 InvalidConstraint(String),
13}
14
15#[derive(Clone, Debug)]
16pub enum Token {
17 Literal(String),
18 Integer(i64),
19 Float(f64),
20 Group {
21 kind: GroupKind,
22 tokens: Vec<Token>,
23 },
24
25 Punct(u8),
27}
28
29impl fmt::Display for Token {
30 fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
31 match self {
32 Token::Literal(s) => write!(fmt, "{s:?}"),
33 Token::Integer(n) => write!(fmt, "{n:?}"),
34 Token::Float(n) => write!(fmt, "{n:?}"),
35 _ => unreachable!(),
36 }
37 }
38}
39
40#[derive(Clone, Debug)]
41pub enum GroupKind {
42 Brace,
43 Parenthesis,
44 Bracket,
45}
46
47impl From<u8> for GroupKind {
48 fn from(c: u8) -> Self {
49 match c {
50 b'{' | b'}' => GroupKind::Brace,
51 b'(' | b')' => GroupKind::Parenthesis,
52 b'[' | b']' => GroupKind::Bracket,
53 _ => unreachable!(),
54 }
55 }
56}
57
58enum TokenizeState {
59 Init,
60 Number,
61 Identifier,
62 Literal(u8),
63}
64
65pub fn parse_schema(s: &str) -> Result<Schema, SchemaParseError> {
66 let mut index = 0;
67 let s = s.as_bytes();
68 let tokens = tokenize(s, &mut index)?;
69
70 if let Some(b) = s.get(index) {
71 return Err(SchemaParseError::UnexpectedByte(*b));
72 }
73
74 let mut index = 0;
75 let result = token_to_schema(&tokens, &mut index)?;
76 result.validate_constraint()?;
77
78 Ok(result)
79}
80
81fn tokenize(s: &[u8], index: &mut usize) -> Result<Vec<Token>, SchemaParseError> {
82 let mut curr_state = TokenizeState::Init;
83 let mut result = vec![];
84 let mut cursor = *index;
85
86 loop {
87 match curr_state {
88 TokenizeState::Init => match s.get(*index) {
89 Some(d @ (b'{' | b'(' | b'[')) => {
90 *index += 1;
91 let inner = tokenize(s, index)?;
92
93 if s.get(*index) == Some(&matching_delim(*d)) {
94 result.push(Token::Group {
95 kind: GroupKind::from(*d),
96 tokens: inner,
97 });
98 }
99
100 else {
101 return Err(SchemaParseError::UnmatchedGroup(*d));
102 }
103 },
104 Some(b'}' | b')' | b']') => {
105 return Ok(result);
106 },
107 Some(m @ (b'"' | b'\'')) => {
108 curr_state = TokenizeState::Literal(*m);
109 cursor = *index + 1;
110 },
111 Some(b'0'..=b'9') => {
112 curr_state = TokenizeState::Number;
113 cursor = *index;
114 },
115 Some(b' ' | b'\n' | b'\r' | b'\t') => {},
116 Some(b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') => {
117 curr_state = TokenizeState::Identifier;
118 cursor = *index;
119 },
120 Some(p @ (b':' | b',')) => {
121 result.push(Token::Punct(*p));
122 },
123 Some(c) => {
124 return Err(SchemaParseError::UnexpectedByte(*c));
125 },
126 None => {
127 return Ok(result);
128 },
129 },
130 TokenizeState::Number => match s.get(*index) {
131 Some(b'0'..=b'9' | b'.') => {},
132 _ => {
133 let ns = String::from_utf8_lossy(&s[cursor..*index]).to_string();
134
135 match ns.parse::<i64>() {
136 Ok(n) => {
137 curr_state = TokenizeState::Init;
138 result.push(Token::Integer(n));
139 continue;
140 },
141 Err(_) => match ns.parse::<f64>() {
142 Ok(n) => {
143 curr_state = TokenizeState::Init;
144 result.push(Token::Float(n));
145 continue;
146 },
147 Err(e) => {
148 return Err(SchemaParseError::ParseFloatError(e));
149 },
150 },
151 }
152 },
153 },
154 TokenizeState::Identifier => match s.get(*index) {
155 Some(b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') => {},
156 _ => match String::from_utf8(s[cursor..*index].to_vec()) {
157 Ok(s) => {
158 curr_state = TokenizeState::Init;
159 result.push(Token::Literal(s));
160 continue;
161 },
162 Err(e) => {
163 return Err(SchemaParseError::Utf8Error(e));
164 },
165 },
166 },
167 TokenizeState::Literal(marker) => match s.get(*index) {
168 Some(c) if *c == marker => match String::from_utf8(s[cursor..*index].to_vec()) {
169 Ok(s) => {
170 curr_state = TokenizeState::Init;
171 result.push(Token::Literal(s));
172 continue;
173 },
174 Err(e) => {
175 return Err(SchemaParseError::Utf8Error(e));
176 },
177 },
178 Some(_) => {},
179 None => {
180 return Err(SchemaParseError::UnmatchedGroup(marker));
181 },
182 },
183 }
184
185 *index += 1;
186 }
187}
188
189fn token_to_schema(tokens: &[Token], index: &mut usize) -> Result<Schema, SchemaParseError> {
190 let mut r#type = match tokens.get(*index) {
191 Some(t @ Token::Literal(s)) => match s.as_str() {
192 "str" | "string" => Schema::default_string(),
193 "int" | "integer" => Schema::default_integer(),
194 "float" | "number" => Schema::default_float(),
195 "bool" | "boolean" => Schema::default_boolean(),
196 "yesno" => Schema::default_yesno(),
197 "code" => Schema::default_code(),
198 "tasklist" => Schema::default_task_list(),
199 _ => {
200 return Err(SchemaParseError::UnexpectedToken(t.clone()));
201 },
202 },
203 Some(Token::Group {
204 kind: GroupKind::Brace,
205 tokens: inner,
206 }) => {
207 let mut inner_index = 0;
208 let mut result = vec![];
209
210 loop {
211 let key = match inner.get(inner_index) {
212 Some(Token::Literal(s)) => s.to_string(),
213 Some(t) => {
214 return Err(SchemaParseError::UnexpectedToken(t.clone()));
215 },
216 None => { break; },
217 };
218
219 inner_index += 1;
220
221 match inner.get(inner_index) {
222 Some(Token::Punct(b':')) => {},
223 Some(t) => {
224 return Err(SchemaParseError::UnexpectedToken(t.clone()));
225 },
226 None => {
227 return Err(SchemaParseError::UnexpectedEof);
228 },
229 }
230
231 inner_index += 1;
232 let inner_type = token_to_schema(&inner, &mut inner_index)?;
233 result.push((key, inner_type));
234
235 match inner.get(inner_index) {
236 Some(Token::Punct(b',')) => {
237 inner_index += 1;
238 },
239 Some(t) => {
240 return Err(SchemaParseError::UnexpectedToken(t.clone()));
241 },
242 None => { break; },
243 }
244 }
245
246 Schema {
247 r#type: SchemaType::Object(result),
248 constraint: None,
249 }
250 },
251 Some(Token::Group {
252 kind: GroupKind::Bracket,
253 tokens: inner,
254 }) => {
255 let mut inner_index = 0;
256 let inner_type = if inner.is_empty() {
257 None
258 } else {
259 let res = token_to_schema(&inner, &mut inner_index)?;
260
261 if inner_index < inner.len() {
262 return Err(SchemaParseError::UnexpectedToken(inner[inner_index].clone()));
263 }
264
265 Some(res)
266 };
267
268 Schema::default_array(inner_type)
269 },
270 Some(t) => {
271 return Err(SchemaParseError::UnexpectedToken(t.clone()));
272 },
273 None => {
274 return Err(SchemaParseError::UnexpectedEof);
275 },
276 };
277 *index += 1;
278
279 if let Some(Token::Group { kind: GroupKind::Brace, tokens: inner }) = tokens.get(*index) {
280 let constraint = parse_constraint(inner)?;
281 r#type.add_constraint(constraint);
282 *index += 1;
283 }
284
285 Ok(r#type)
286}
287
288fn parse_constraint(tokens: &[Token]) -> Result<Constraint, SchemaParseError> {
289 let mut index = 0;
290 let mut result = Constraint::default();
291
292 loop {
293 let key = match tokens.get(index) {
294 Some(Token::Literal(s)) => s.to_string(),
295 Some(t) => {
296 return Err(SchemaParseError::UnexpectedToken(t.clone()));
297 },
298 None => { break; },
299 };
300 index += 1;
301
302 match tokens.get(index) {
303 Some(Token::Punct(b':')) => {},
304 Some(t) => {
305 return Err(SchemaParseError::UnexpectedToken(t.clone()));
306 },
307 None => {
308 return Err(SchemaParseError::UnexpectedEof);
309 },
310 }
311
312 index += 1;
313
314 match key.as_str() {
315 k @ ("min" | "max" | "len_min" | "len_max") => match tokens.get(index) {
316 Some(n @ (Token::Integer(_) | Token::Float(_))) => if k == "min" || k == "len_min" {
317 if result.min.is_some() {
318 return Err(SchemaParseError::InvalidConstraint(format!("A constraint `{key}` appears more than once.")));
319 }
320
321 result.min = Some(n.to_string());
322 } else {
323 if result.max.is_some() {
324 return Err(SchemaParseError::InvalidConstraint(format!("A constraint `{key}` appears more than once.")));
325 }
326
327 result.max = Some(n.to_string());
328 },
329 Some(t) => {
330 return Err(SchemaParseError::UnexpectedToken(t.clone()));
331 },
332 None => {
333 return Err(SchemaParseError::UnexpectedEof);
334 },
335 },
336 _ => {
337 return Err(SchemaParseError::InvalidConstraint(format!("`{key}` is not a valid constraint")));
338 },
339 }
340
341 index += 1;
342
343 match tokens.get(index) {
344 Some(Token::Punct(b',')) => {},
345 Some(t) => {
346 return Err(SchemaParseError::UnexpectedToken(t.clone()));
347 },
348 None => {
349 return Ok(result);
350 },
351 }
352
353 index += 1;
354 }
355
356 Ok(result)
357}
358
359fn matching_delim(c: u8) -> u8 {
360 match c {
361 b'{' => b'}',
362 b'(' => b')',
363 b'[' => b']',
364 b'}' => b'{',
365 b')' => b'(',
366 b']' => b'[',
367 _ => unreachable!(),
368 }
369}