1use crate::ast::{DatatypeParam, Definition, NameClass, Namespace, Pattern, QName, Schema};
4use crate::lexer::Token;
5
6pub struct Parser {
8 tokens: Vec<Token>,
9 pos: usize,
10}
11
12impl Parser {
13 pub fn new(tokens: Vec<Token>) -> Self {
14 Self { tokens, pos: 0 }
15 }
16
17 pub fn parse(mut self) -> Result<Schema, ParseError> {
18 let mut namespaces = Vec::new();
19 let mut definitions = Vec::new();
20
21 while !self.at_end() {
22 if self.check(&Token::Namespace) || self.check(&Token::Default) {
23 namespaces.push(self.parse_namespace()?);
24 } else if let Some(Token::Ident(_)) = self.peek() {
25 definitions.push(self.parse_definition()?);
26 } else {
27 return Err(self.error("expected namespace or definition"));
28 }
29 }
30
31 Ok(Schema {
32 namespaces,
33 definitions,
34 })
35 }
36
37 fn parse_namespace(&mut self) -> Result<Namespace, ParseError> {
38 let is_default = self.check(&Token::Default);
39 if is_default {
40 self.advance();
41 }
42 self.expect(&Token::Namespace)?;
43
44 let prefix = if self.check(&Token::Equals) {
48 String::new()
50 } else {
51 self.expect_ident()?
52 };
53 self.expect(&Token::Equals)?;
54 let uri = self.expect_string()?;
55
56 Ok(Namespace {
57 prefix,
58 uri,
59 is_default,
60 })
61 }
62
63 fn parse_definition(&mut self) -> Result<Definition, ParseError> {
64 let name = self.expect_ident()?;
65 self.expect(&Token::Equals)?;
66 let pattern = self.parse_pattern()?;
67
68 Ok(Definition {
69 name,
70 pattern,
71 doc_comment: None,
72 })
73 }
74
75 fn parse_pattern(&mut self) -> Result<Pattern, ParseError> {
76 self.parse_interleave()
77 }
78
79 fn parse_interleave(&mut self) -> Result<Pattern, ParseError> {
81 let mut left = self.parse_choice()?;
82
83 while self.check(&Token::Ampersand) {
84 self.advance();
85 let right = self.parse_choice()?;
86 left = match left {
87 Pattern::Interleave(mut v) => {
88 v.push(right);
89 Pattern::Interleave(v)
90 }
91 _ => Pattern::Interleave(vec![left, right]),
92 };
93 }
94
95 Ok(left)
96 }
97
98 fn parse_choice(&mut self) -> Result<Pattern, ParseError> {
100 let mut left = self.parse_sequence()?;
101
102 while self.check(&Token::Pipe) {
103 self.advance();
104 let right = self.parse_sequence()?;
105 left = match left {
106 Pattern::Choice(mut v) => {
107 v.push(right);
108 Pattern::Choice(v)
109 }
110 _ => Pattern::Choice(vec![left, right]),
111 };
112 }
113
114 Ok(left)
115 }
116
117 fn parse_sequence(&mut self) -> Result<Pattern, ParseError> {
119 let mut left = self.parse_postfix()?;
120
121 while self.check(&Token::Comma) {
122 self.advance();
123 let right = self.parse_postfix()?;
124 left = match left {
125 Pattern::Sequence(mut v) => {
126 v.push(right);
127 Pattern::Sequence(v)
128 }
129 _ => Pattern::Sequence(vec![left, right]),
130 };
131 }
132
133 Ok(left)
134 }
135
136 fn parse_postfix(&mut self) -> Result<Pattern, ParseError> {
138 let mut pattern = self.parse_primary()?;
139
140 loop {
141 if self.check(&Token::Question) {
142 self.advance();
143 pattern = Pattern::Optional(Box::new(pattern));
144 } else if self.check(&Token::Star) {
145 self.advance();
146 pattern = Pattern::ZeroOrMore(Box::new(pattern));
147 } else if self.check(&Token::Plus) {
148 self.advance();
149 pattern = Pattern::OneOrMore(Box::new(pattern));
150 } else {
151 break;
152 }
153 }
154
155 Ok(pattern)
156 }
157
158 fn parse_primary(&mut self) -> Result<Pattern, ParseError> {
160 if self.check(&Token::Empty) {
161 self.advance();
162 return Ok(Pattern::Empty);
163 }
164
165 if self.check(&Token::String) {
166 self.advance();
167 let value = self.expect_string()?;
168 return Ok(Pattern::StringLiteral(value));
169 }
170
171 if self.check(&Token::Element) {
172 return self.parse_element();
173 }
174
175 if self.check(&Token::Attribute) {
176 return self.parse_attribute();
177 }
178
179 if self.check(&Token::Mixed) {
180 self.advance();
181 self.expect(&Token::LBrace)?;
182 let inner = self.parse_pattern()?;
183 self.expect(&Token::RBrace)?;
184 return Ok(Pattern::Mixed(Box::new(inner)));
185 }
186
187 if self.check(&Token::List) {
188 self.advance();
189 self.expect(&Token::LBrace)?;
190 let inner = self.parse_pattern()?;
191 self.expect(&Token::RBrace)?;
192 return Ok(Pattern::List(Box::new(inner)));
193 }
194
195 if self.check(&Token::Text) {
196 self.advance();
197 return Ok(Pattern::Text);
198 }
199
200 if let Some(Token::QuotedString(_)) = self.peek() {
202 let value = self.expect_string()?;
203 return Ok(Pattern::StringLiteral(value));
204 }
205
206 if self.check(&Token::LParen) {
207 self.advance();
208 let inner = self.parse_pattern()?;
209 self.expect(&Token::RParen)?;
210 return Ok(Pattern::Group(Box::new(inner)));
211 }
212
213 if let Some(Token::Ident(_)) = self.peek() {
215 let name = self.expect_ident()?;
216
217 if self.check(&Token::Colon) {
219 self.advance();
220 let type_name = self.expect_ident_or_keyword()?;
221
222 if self.check(&Token::LBrace) {
224 let params = self.parse_datatype_params()?;
225 return Ok(Pattern::Datatype {
226 library: name,
227 name: type_name,
228 params,
229 });
230 } else if let Some(Token::QuotedString(_)) = self.peek() {
231 let value = self.expect_string()?;
233 return Ok(Pattern::Datatype {
234 library: name,
235 name: type_name,
236 params: vec![DatatypeParam {
237 name: "pattern".to_string(),
238 value,
239 }],
240 });
241 } else {
242 return Ok(Pattern::Datatype {
243 library: name,
244 name: type_name,
245 params: vec![],
246 });
247 }
248 }
249
250 return Ok(Pattern::Ref(name));
251 }
252
253 Err(self.error("expected pattern"))
254 }
255
256 fn parse_element(&mut self) -> Result<Pattern, ParseError> {
257 self.expect(&Token::Element)?;
258 let name_class = self.parse_name_class()?;
259 self.expect(&Token::LBrace)?;
260 let pattern = self.parse_pattern()?;
261 self.expect(&Token::RBrace)?;
262
263 let name = match name_class {
265 NameClass::Name(qn) => qn,
266 _ => QName {
267 prefix: None,
268 local: "_any".to_string(),
269 },
270 };
271
272 Ok(Pattern::Element {
273 name,
274 pattern: Box::new(pattern),
275 })
276 }
277
278 fn parse_attribute(&mut self) -> Result<Pattern, ParseError> {
279 self.expect(&Token::Attribute)?;
280 let name_class = self.parse_name_class()?;
281 self.expect(&Token::LBrace)?;
282 let pattern = self.parse_pattern()?;
283 self.expect(&Token::RBrace)?;
284
285 let name = match name_class {
287 NameClass::Name(qn) => qn,
288 _ => QName {
289 prefix: None,
290 local: "_any".to_string(),
291 },
292 };
293
294 Ok(Pattern::Attribute {
295 name,
296 pattern: Box::new(pattern),
297 })
298 }
299
300 fn parse_name_class(&mut self) -> Result<NameClass, ParseError> {
302 let left = self.parse_name_class_primary()?;
303
304 if self.check(&Token::Minus) {
306 self.advance();
307 let right = self.parse_name_class_primary()?;
308 return Ok(NameClass::Except(Box::new(left), Box::new(right)));
309 }
310
311 Ok(left)
312 }
313
314 fn parse_name_class_primary(&mut self) -> Result<NameClass, ParseError> {
315 if self.check(&Token::Star) {
317 self.advance();
318 return Ok(NameClass::AnyName);
319 }
320
321 if self.check(&Token::LParen) {
323 self.advance();
324 let mut choices = vec![self.parse_name_class()?];
325 while self.check(&Token::Pipe) {
326 self.advance();
327 choices.push(self.parse_name_class()?);
328 }
329 self.expect(&Token::RParen)?;
330 if choices.len() == 1 {
331 return Ok(choices.pop().unwrap());
332 }
333 return Ok(NameClass::Choice(choices));
334 }
335
336 let qname = self.parse_qname()?;
338
339 if qname.local == "*" {
341 if let Some(prefix) = qname.prefix {
342 return Ok(NameClass::NsName(prefix));
343 }
344 return Ok(NameClass::AnyName);
345 }
346
347 Ok(NameClass::Name(qname))
348 }
349
350 fn parse_qname(&mut self) -> Result<QName, ParseError> {
351 let first = self.expect_name()?;
353
354 if self.check(&Token::Colon) {
355 self.advance();
356 let local = if self.check(&Token::Star) {
358 self.advance();
359 "*".to_string()
360 } else {
361 self.expect_name()?
362 };
363 Ok(QName {
364 prefix: Some(first),
365 local,
366 })
367 } else {
368 Ok(QName {
369 prefix: None,
370 local: first,
371 })
372 }
373 }
374
375 fn expect_name(&mut self) -> Result<String, ParseError> {
377 match self.peek() {
378 Some(Token::Ident(s)) => {
379 let s = s.clone();
380 self.advance();
381 Ok(s)
382 }
383 Some(Token::String) => {
384 self.advance();
385 Ok("string".to_string())
386 }
387 Some(Token::Default) => {
388 self.advance();
389 Ok("default".to_string())
390 }
391 Some(Token::Element) => {
392 self.advance();
393 Ok("element".to_string())
394 }
395 Some(Token::Attribute) => {
396 self.advance();
397 Ok("attribute".to_string())
398 }
399 Some(Token::Namespace) => {
400 self.advance();
401 Ok("namespace".to_string())
402 }
403 Some(Token::Empty) => {
404 self.advance();
405 Ok("empty".to_string())
406 }
407 Some(Token::Mixed) => {
408 self.advance();
409 Ok("mixed".to_string())
410 }
411 Some(Token::List) => {
412 self.advance();
413 Ok("list".to_string())
414 }
415 Some(Token::Text) => {
416 self.advance();
417 Ok("text".to_string())
418 }
419 _ => Err(self.error("expected name")),
420 }
421 }
422
423 fn parse_datatype_params(&mut self) -> Result<Vec<DatatypeParam>, ParseError> {
424 if !self.check(&Token::LBrace) {
425 return Ok(Vec::new());
426 }
427 self.advance();
428
429 let mut params = Vec::new();
430 while !self.check(&Token::RBrace) {
431 let name = self.expect_ident()?;
432 self.expect(&Token::Equals)?;
433 let value = self.expect_string()?;
434 params.push(DatatypeParam { name, value });
435 }
436 self.expect(&Token::RBrace)?;
437
438 Ok(params)
439 }
440
441 fn peek(&self) -> Option<&Token> {
444 self.tokens.get(self.pos)
445 }
446
447 fn check(&self, token: &Token) -> bool {
448 self.peek()
449 .is_some_and(|t| std::mem::discriminant(t) == std::mem::discriminant(token))
450 }
451
452 fn at_end(&self) -> bool {
453 matches!(self.peek(), Some(Token::Eof) | None)
454 }
455
456 fn advance(&mut self) -> Option<&Token> {
457 if !self.at_end() {
458 self.pos += 1;
459 }
460 self.tokens.get(self.pos - 1)
461 }
462
463 fn expect(&mut self, expected: &Token) -> Result<(), ParseError> {
464 if self.check(expected) {
465 self.advance();
466 Ok(())
467 } else {
468 Err(self.error(&format!("expected {:?}", expected)))
469 }
470 }
471
472 fn expect_ident(&mut self) -> Result<String, ParseError> {
473 match self.peek() {
474 Some(Token::Ident(s)) => {
475 let s = s.clone();
476 self.advance();
477 Ok(s)
478 }
479 _ => Err(self.error("expected identifier")),
480 }
481 }
482
483 fn expect_ident_or_keyword(&mut self) -> Result<String, ParseError> {
485 match self.peek() {
486 Some(Token::Ident(s)) => {
487 let s = s.clone();
488 self.advance();
489 Ok(s)
490 }
491 Some(Token::String) => {
492 self.advance();
493 Ok("string".to_string())
494 }
495 _ => Err(self.error("expected identifier or type name")),
496 }
497 }
498
499 fn expect_string(&mut self) -> Result<String, ParseError> {
500 match self.peek() {
501 Some(Token::QuotedString(s)) => {
502 let s = s.clone();
503 self.advance();
504 Ok(s)
505 }
506 _ => Err(self.error("expected quoted string")),
507 }
508 }
509
510 fn error(&self, msg: &str) -> ParseError {
511 ParseError {
512 message: msg.to_string(),
513 position: self.pos,
514 token: self.peek().cloned(),
515 }
516 }
517}
518
519#[derive(Debug, thiserror::Error)]
520#[error("parse error at position {position}: {message} (found {:?})", token)]
521pub struct ParseError {
522 pub message: String,
523 pub position: usize,
524 pub token: Option<Token>,
525}
526
527#[cfg(test)]
528mod tests {
529 use super::*;
530 use crate::lexer::Lexer;
531
532 fn parse(input: &str) -> Schema {
533 let tokens = Lexer::new(input).tokenize().unwrap();
534 Parser::new(tokens).parse().unwrap()
535 }
536
537 #[test]
538 fn test_empty_definition() {
539 let schema = parse("w_CT_Empty = empty");
540 assert_eq!(schema.definitions.len(), 1);
541 assert_eq!(schema.definitions[0].name, "w_CT_Empty");
542 assert!(matches!(schema.definitions[0].pattern, Pattern::Empty));
543 }
544
545 #[test]
546 fn test_choice() {
547 let schema = parse(r#"w_ST_Foo = string "a" | string "b" | string "c""#);
548 assert_eq!(schema.definitions.len(), 1);
549 match &schema.definitions[0].pattern {
550 Pattern::Choice(v) => assert_eq!(v.len(), 3),
551 _ => panic!("expected choice"),
552 }
553 }
554
555 #[test]
556 fn test_attribute() {
557 let schema = parse("w_CT_OnOff = attribute w:val { s_ST_OnOff }?");
558 assert_eq!(schema.definitions.len(), 1);
559 match &schema.definitions[0].pattern {
560 Pattern::Optional(inner) => match inner.as_ref() {
561 Pattern::Attribute { name, .. } => {
562 assert_eq!(name.prefix, Some("w".into()));
563 assert_eq!(name.local, "val");
564 }
565 _ => panic!("expected attribute"),
566 },
567 _ => panic!("expected optional"),
568 }
569 }
570
571 #[test]
572 fn test_namespace() {
573 let schema = parse(r#"default namespace w = "http://example.com""#);
574 assert_eq!(schema.namespaces.len(), 1);
575 assert!(schema.namespaces[0].is_default);
576 assert_eq!(schema.namespaces[0].prefix, "w");
577 }
578}