1pub use logos::Span;
2use logos::{Lexer, Logos};
3use std::borrow::Cow;
4use std::ops::Deref;
5
6use crate::ast::{SelectCase, Token as AstToken};
7
8#[cfg(test)]
9mod test;
10
11type Result<T> = std::result::Result<T, (String, Span)>;
12pub use Span as LexerSpan;
13#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
14#[logos()]
15enum BodyToken {
16 #[token("''")]
19 DoubleApostrophe,
20
21 #[regex(r"'[{}#]([^'])*'")]
23 Quote,
28
29 #[token(r"{")]
31 Argument, #[token("#")]
35 Octothorpe,
36
37 #[regex(r#"([^\{\}#']+|')"#)]
39 Content,
40
41 #[token("}")]
43 End, }
45
46type PassLexer<'source, S, T> = (Result<S>, Lexer<'source, T>);
47
48fn parse_body<'source, 'a, T>(
49 mut lex: Lexer<'source, BodyToken>,
50) -> PassLexer<'source, (Vec<AstToken<'source, 'a, T>>, bool), BodyToken>
51where
52 T: Deref<Target = str> + Clone + From<&'source str>,
53{
54 let mut ast: Vec<AstToken<T>> = vec![];
55 while let Some(Ok(token)) = lex.next() {
58 match token {
59 BodyToken::Argument => {
60 let (res, tlex) = parse_arg(lex.morph());
61 lex = tlex.morph();
62 match res {
63 Ok(Some(t)) => ast.push(t),
64 Ok(None) => {}
65 Err(e) => return (Err(e), lex),
66 };
67 }
68 BodyToken::DoubleApostrophe => ast.push(AstToken::Content {
69 value: lex.slice()[0..1].into(),
70 }),
71 BodyToken::Quote => {
72 let slice = lex.slice();
73 ast.push(AstToken::Content {
74 value: slice[1..slice.len() - 1].into(),
75 })
76 }
77 BodyToken::Octothorpe => ast.push(AstToken::Octothorpe {}),
78 BodyToken::Content => ast.push(AstToken::Content {
79 value: lex.slice().into(),
80 }),
81 BodyToken::End => {
82 return (Ok((ast, true)), lex);
83 }
84 }
85 }
86
87 (Ok((ast, false)), lex)
88}
89
90#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
115#[logos(skip r"\p{Pattern_White_Space}+")] enum ArgToken {
117 #[token("plural")]
119 Plural,
120 #[token("select")]
121 Select,
122 #[token("selectordinal")]
123 SelectOrdinal,
124
125 #[token(",")]
126 Comma,
127
128 #[regex(r"[\d\p{ID_Start}][\p{ID_Continue}]*")]
130 Ident,
131
132 #[token("}")]
133 End, }
135
136fn parse_arg<'source, 'a, T>(
137 mut lex: Lexer<'source, ArgToken>,
138) -> PassLexer<'source, Option<AstToken<'source, 'a, T>>, ArgToken>
139where
140 T: Deref<Target = str> + Clone + From<&'source str>,
141{
142 let mut arg = None;
143 let next = lex.next();
144 if let Some(Ok(token)) = next {
145 match token {
147 ArgToken::Ident => arg = Some(lex.slice()),
148 ArgToken::Plural | ArgToken::Select | ArgToken::SelectOrdinal => {
150 arg = Some(lex.slice())
151 }
152 ArgToken::End => return (Ok(None), lex),
155 _ => {
157 return (
158 Err(("Unexpected token in argument".to_owned(), lex.span())),
159 lex,
160 )
161 }
162 };
163 } else {
164 dbg!(next, arg);
166 if next.is_some() {
167 return (
168 Err(("Unexpected token in argument".to_owned(), lex.span())),
169 lex,
170 );
171 } else {
172 return (
173 Err((
174 "Message unexpectedly ended within argument".to_owned(),
175 lex.span(),
176 )),
177 lex,
178 );
179 }
180 }
181 if let Some(Ok(token)) = lex.next() {
182 match token {
183 ArgToken::End => {
184 if let Some(arg) = arg {
186 return (Ok(Some(AstToken::PlainArg { arg: arg.into() })), lex);
187 } else {
188 unreachable!() }
190 }
191 ArgToken::Comma => {} _ => {
193 return (
194 Err((
195 "Unexpected token in argument (expected comma or closing bracket)"
196 .to_owned(),
197 lex.span(),
198 )),
199 lex,
200 )
201 }
202 }
203 }
204
205 if let Some(Ok(token)) = lex.next() {
206 match token {
207 select @ (ArgToken::Plural | ArgToken::Select | ArgToken::SelectOrdinal) => {
208 let (res, tlex) = parse_select(select, arg.unwrap(), lex.morph());
209 lex = tlex.morph();
210 match res {
211 Ok(t) => (Ok(Some(t)), lex),
212 Err(e) => (Err(e), lex),
214 }
215 }
216
217 ArgToken::Ident => todo!(),
218
219 ArgToken::End => {
220 if let Some(arg) = arg {
222 (Ok(Some(AstToken::PlainArg { arg: arg.into() })), lex)
223 } else {
224 unreachable!() }
226 }
227 _ => (
229 Err(("Unexpected token in argument".to_owned(), lex.span())),
230 lex,
231 ),
232 }
233 } else {
234 (Err(("Unexpected end of input".to_owned(), lex.span())), lex)
235 }
236}
237
238#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
239#[logos(skip r"\p{Pattern_White_Space}+")]
240enum SelectToken {
241 #[token("offset")]
243 Offset,
244
245 #[token(":")]
246 Colon,
247
248 #[regex(r"\d+", priority = 4)]
249 Int,
250
251 #[regex(r"[\d\p{ID_Start}][\p{ID_Continue}]*", priority = 2)]
252 Ident,
253
254 #[token(",")]
255 Comma,
256
257 #[token("{")]
258 Open,
259
260 #[token("}")]
261 End, }
263
264fn parse_select<'source, 'a, T>(
265 parent_type: ArgToken,
266 arg: &'source str,
267 mut lex: Lexer<'source, SelectToken>,
268) -> PassLexer<'source, AstToken<'source, 'a, T>, SelectToken>
269where
270 T: Deref<Target = str> + Clone + From<&'source str>,
271{
272 let mut cases = vec![];
273 let mut offset = (None, None);
274 let mut expect_colon = false;
275 let mut expect_comma = true;
276 let mut key = None;
277
278 while let Some(Ok(token)) = lex.next() {
279 match token {
280 SelectToken::Offset => {
281 if offset.1.is_none() && !expect_comma {
282 offset.1 = Some(lex.slice());
283 expect_colon = true;
284 expect_comma = true; } else {
286 return (
287 Err(("Unexpected offset keyword".to_owned(), lex.span())),
288 lex,
289 );
290 }
291 }
292 SelectToken::Colon => {
293 if expect_colon {
294 expect_colon = false;
295 expect_comma = false; } else {
297 return (Err(("Unexpected colon".to_owned(), lex.span())), lex);
298 }
299 }
300 SelectToken::Int => {
301 if offset.1.is_some() && !expect_colon {
302 match lex.slice().parse::<i32>() {
304 Ok(i) => {
305 offset.0 = Some(i);
306 offset.1 = None
307 }
308 Err(e) => {
309 return (
310 Result::Err((format!("Bad integer: {}", e), lex.span())),
311 lex,
312 )
313 }
314 };
315 } else if offset.1.is_none() && !expect_comma && !expect_colon {
316 key = Some(lex.slice());
318 } else {
319 return (Err(("Unexpected integer".to_owned(), lex.span())), lex);
320 }
321 }
322 SelectToken::Ident => {
323 if offset.1.is_none() && !expect_comma && !expect_colon {
324 key = Some(lex.slice());
325 } else {
326 return (Err(("Unexpected identifier".to_owned(), lex.span())), lex);
327 }
328 }
329 SelectToken::Comma => {
330 if expect_comma {
331 expect_comma = false;
332 expect_colon = false; } else {
334 return (Err(("Unexpected comma".to_owned(), lex.span())), lex);
335 }
336 }
337 SelectToken::Open => {
338 if let Some(key_inner) = key {
339 let (res, tlex) = parse_body(lex.morph());
340 lex = tlex.morph();
341 match res {
342 Ok((t, true)) => {
343 cases.push(SelectCase {
344 key: key_inner.into(),
345 tokens: Cow::Owned(t),
346 });
347 key = None
348 }
349 Ok((_, false)) => {
350 return (Err(("Unexpected end of input".to_owned(), lex.span())), lex);
351 }
352 Err(e) => return (Err(e), lex),
354 };
355 }
356 }
357 SelectToken::End => {
358 if !expect_colon {
359 return (
360 match parent_type {
361 ArgToken::Plural => {
362 let _token: std::result::Result<AstToken<T>, ()> =
363 Ok(AstToken::Plural {
364 arg: arg.into(),
365 cases: Cow::Owned(vec![]),
366 plural_offset: offset.0,
367 });
368 todo!()
369 }
370 ArgToken::SelectOrdinal => {
371 let _token: std::result::Result<AstToken<T>, ()> =
372 Ok(AstToken::SelectOrdinal {
373 arg: arg.into(),
374 cases: Cow::Owned(vec![]),
375 plural_offset: offset.0,
376 });
377 todo!()
378 }
379 ArgToken::Select => Ok(AstToken::Select {
380 arg: arg.into(),
381 cases: Cow::Owned(cases),
382 plural_offset: offset.0,
383 }),
384 _ => Err(("Unexpected parent token type".to_owned(), lex.span())),
385 },
386 lex,
387 );
388 } else {
389 return (
390 Err(("Unexpected end of select".to_owned(), lex.span())),
391 lex,
392 );
393 }
394 }
395 }
396 }
397 todo! {}
398}
399
400pub fn parse<'source, 'a, T>(src: &'source str) -> Result<Vec<AstToken<'source, 'a, T>>>
413where
414 T: Deref<Target = str> + Clone + From<&'source str>,
415{
416 let lex = BodyToken::lexer(src);
417
418 let (res, lex) = parse_body(lex);
419 match res {
420 Ok((_tok, true)) => Err(("Unexpected end of body".to_owned(), lex.span())),
421 Ok((tok, false)) => Ok(tok),
422
423 Err(e) => Err(e),
424 }
425}
426
427#[cfg(test)]
428mod inline_tests {
429 use super::test::*;
431 use super::*;
432 use crate::parser::SelectCase;
433 use crate::Token;
434 macro_rules! parse_assert {
437 ( $src:literal, $( $i:expr ),* ) => {
438 {
439 assert_eq!(
440 parse_ui($src),
441 vec![
442 $(
443 parse_assert! (token, $i)
444 ),+
445 ]
446 );
447 }
448 };
449 ( token, $str:literal ) => {
450 crate::ast::Token::Content {
451 value: $str
452 }
453 };
454
455 ( token, $tree:expr ) => {
456 $tree
457 }
458 }
459
460 macro_rules! parse_assert_concat {
461 ( $src:literal, $res:literal ) => {{
462 let res = parse::<&str>($src).unwrap();
463 let text: String = res
464 .iter()
465 .map(|t| match t {
466 Token::Content { value } => *value,
467 _ => panic!(),
468 })
469 .collect();
470 assert_eq!(&text, $res);
471 }};
472 }
473
474 #[test]
475 fn test_body_simple() {
476 parse_assert!("This is a message", "This is a message");
477 }
478
479 #[test]
481 fn test_body_octothorpe() {
482 parse_assert!(
483 "This is # an octothorpe",
484 "This is ",
485 Token::Octothorpe {},
486 " an octothorpe"
487 );
488 }
489
490 #[test]
491 fn test_body_doublequote() {
492 parse_assert_concat!("This is a doublequote: ''", "This is a doublequote: '");
493 }
494
495 #[test]
496 fn test_body_quote_escape() {
497 parse_assert_concat!(
498 "This is an '{escaped}' string, with some more escapes: '{', '}'",
499 "This is an {escaped} string, with some more escapes: {, }"
500 );
501 }
502
503 #[test]
504 fn test_body_quote_no_escape() {
505 parse_assert_concat!("This is a 'quoted' string", "This is a 'quoted' string");
506 }
507
508 #[test]
509 #[should_panic]
510 fn test_body_unexpected_close() {
511 let _ = parse::<&str>("This is an unexpected close: }").unwrap();
512 }
513
514 #[test]
515 fn test_arg_simple() {
516 parse_assert!(
517 "This is a {simple} replace.",
518 "This is a ",
519 Token::PlainArg { arg: "simple" },
520 " replace."
521 );
522 }
523
524 #[test]
525 fn test_arg_keyword() {
526 parse_assert!(
527 "This has a keyword {select} replace.",
528 "This has a keyword ",
529 Token::PlainArg { arg: "select" },
530 " replace."
531 );
532 }
533
534 #[test]
535 fn test_arg_select() {
536 parse_assert!(
537 "This is a {varname, select, this{...} that{...} other{...}}",
538 "This is a ",
539 Token::Select {
540 arg: "varname",
541 plural_offset: None,
542 cases: vec![
543 SelectCase {
544 key: "this",
545 tokens: vec![Token::Content { value: "..." }].into()
546 },
547 SelectCase {
548 key: "that",
549 tokens: vec![Token::Content { value: "..." }].into()
550 },
551 SelectCase {
552 key: "other",
553 tokens: vec![Token::Content { value: "..." }].into()
554 }
555 ]
556 .into()
557 }
558 );
559 }
560}