1use winnow::ascii::space1;
2use winnow::combinator::alt;
3use winnow::combinator::cut_err;
4use winnow::combinator::delimited;
5use winnow::combinator::opt;
6use winnow::combinator::preceded;
7use winnow::combinator::terminated;
8use winnow::combinator::trace;
9use winnow::prelude::*;
10use winnow::token::one_of;
11
12use crate::{Category, Cluster, Entry, Pos, Tag, Type, Variant};
13
14#[derive(Clone, PartialEq, Eq, Debug)]
15pub struct ClusterIter<'i> {
16 input: &'i str,
17}
18
19impl<'i> ClusterIter<'i> {
20 pub fn new(input: &'i str) -> Self {
21 Self { input }
22 }
23}
24
25impl Iterator for ClusterIter<'_> {
26 type Item = Cluster;
27
28 fn next(&mut self) -> Option<Cluster> {
29 self.input = self.input.trim_start();
30 Cluster::parse_.parse_next(&mut self.input).ok()
31 }
32}
33
34#[cfg(test)]
35mod test_cluster_iter {
36 use super::*;
37
38 use snapbox::assert_data_eq;
39 use snapbox::str;
40 use snapbox::ToDebug;
41
42 #[test]
43 fn test_single() {
44 let actual = ClusterIter::new(
45 "# acknowledgment <verified> (level 35)
46A Cv: acknowledgment / Av B C: acknowledgement
47A Cv: acknowledgments / Av B C: acknowledgements
48A Cv: acknowledgment's / Av B C: acknowledgement's
49
50",
51 );
52 assert_data_eq!(
53 actual.collect::<Vec<_>>().to_debug(),
54 str![[r#"
55[
56 Cluster {
57 header: "acknowledgment ",
58 verified: true,
59 level: 35,
60 entries: [
61 Entry {
62 variants: [
63 Variant {
64 types: [
65 Type {
66 category: American,
67 tag: None,
68 num: None,
69 },
70 Type {
71 category: Canadian,
72 tag: Some(
73 Variant,
74 ),
75 num: None,
76 },
77 ],
78 word: "acknowledgment",
79 },
80 Variant {
81 types: [
82 Type {
83 category: American,
84 tag: Some(
85 Variant,
86 ),
87 num: None,
88 },
89 Type {
90 category: BritishIse,
91 tag: None,
92 num: None,
93 },
94 Type {
95 category: Canadian,
96 tag: None,
97 num: None,
98 },
99 ],
100 word: "acknowledgement",
101 },
102 ],
103 pos: None,
104 archaic: false,
105 description: None,
106 note: None,
107 comment: None,
108 },
109 Entry {
110 variants: [
111 Variant {
112 types: [
113 Type {
114 category: American,
115 tag: None,
116 num: None,
117 },
118 Type {
119 category: Canadian,
120 tag: Some(
121 Variant,
122 ),
123 num: None,
124 },
125 ],
126 word: "acknowledgments",
127 },
128 Variant {
129 types: [
130 Type {
131 category: American,
132 tag: Some(
133 Variant,
134 ),
135 num: None,
136 },
137 Type {
138 category: BritishIse,
139 tag: None,
140 num: None,
141 },
142 Type {
143 category: Canadian,
144 tag: None,
145 num: None,
146 },
147 ],
148 word: "acknowledgements",
149 },
150 ],
151 pos: None,
152 archaic: false,
153 description: None,
154 note: None,
155 comment: None,
156 },
157 Entry {
158 variants: [
159 Variant {
160 types: [
161 Type {
162 category: American,
163 tag: None,
164 num: None,
165 },
166 Type {
167 category: Canadian,
168 tag: Some(
169 Variant,
170 ),
171 num: None,
172 },
173 ],
174 word: "acknowledgment's",
175 },
176 Variant {
177 types: [
178 Type {
179 category: American,
180 tag: Some(
181 Variant,
182 ),
183 num: None,
184 },
185 Type {
186 category: BritishIse,
187 tag: None,
188 num: None,
189 },
190 Type {
191 category: Canadian,
192 tag: None,
193 num: None,
194 },
195 ],
196 word: "acknowledgement's",
197 },
198 ],
199 pos: None,
200 archaic: false,
201 description: None,
202 note: None,
203 comment: None,
204 },
205 ],
206 notes: [],
207 },
208]
209
210"#]]
211 );
212 }
213
214 #[test]
215 fn test_multiple() {
216 let actual = ClusterIter::new(
217 "# acknowledgment <verified> (level 35)
218A Cv: acknowledgment / Av B C: acknowledgement
219A Cv: acknowledgments / Av B C: acknowledgements
220A Cv: acknowledgment's / Av B C: acknowledgement's
221
222# acknowledgment <verified> (level 35)
223A Cv: acknowledgment / Av B C: acknowledgement
224A Cv: acknowledgments / Av B C: acknowledgements
225A Cv: acknowledgment's / Av B C: acknowledgement's
226
227",
228 );
229 assert_data_eq!(
230 actual.collect::<Vec<_>>().to_debug(),
231 str![[r#"
232[
233 Cluster {
234 header: "acknowledgment ",
235 verified: true,
236 level: 35,
237 entries: [
238 Entry {
239 variants: [
240 Variant {
241 types: [
242 Type {
243 category: American,
244 tag: None,
245 num: None,
246 },
247 Type {
248 category: Canadian,
249 tag: Some(
250 Variant,
251 ),
252 num: None,
253 },
254 ],
255 word: "acknowledgment",
256 },
257 Variant {
258 types: [
259 Type {
260 category: American,
261 tag: Some(
262 Variant,
263 ),
264 num: None,
265 },
266 Type {
267 category: BritishIse,
268 tag: None,
269 num: None,
270 },
271 Type {
272 category: Canadian,
273 tag: None,
274 num: None,
275 },
276 ],
277 word: "acknowledgement",
278 },
279 ],
280 pos: None,
281 archaic: false,
282 description: None,
283 note: None,
284 comment: None,
285 },
286 Entry {
287 variants: [
288 Variant {
289 types: [
290 Type {
291 category: American,
292 tag: None,
293 num: None,
294 },
295 Type {
296 category: Canadian,
297 tag: Some(
298 Variant,
299 ),
300 num: None,
301 },
302 ],
303 word: "acknowledgments",
304 },
305 Variant {
306 types: [
307 Type {
308 category: American,
309 tag: Some(
310 Variant,
311 ),
312 num: None,
313 },
314 Type {
315 category: BritishIse,
316 tag: None,
317 num: None,
318 },
319 Type {
320 category: Canadian,
321 tag: None,
322 num: None,
323 },
324 ],
325 word: "acknowledgements",
326 },
327 ],
328 pos: None,
329 archaic: false,
330 description: None,
331 note: None,
332 comment: None,
333 },
334 Entry {
335 variants: [
336 Variant {
337 types: [
338 Type {
339 category: American,
340 tag: None,
341 num: None,
342 },
343 Type {
344 category: Canadian,
345 tag: Some(
346 Variant,
347 ),
348 num: None,
349 },
350 ],
351 word: "acknowledgment's",
352 },
353 Variant {
354 types: [
355 Type {
356 category: American,
357 tag: Some(
358 Variant,
359 ),
360 num: None,
361 },
362 Type {
363 category: BritishIse,
364 tag: None,
365 num: None,
366 },
367 Type {
368 category: Canadian,
369 tag: None,
370 num: None,
371 },
372 ],
373 word: "acknowledgement's",
374 },
375 ],
376 pos: None,
377 archaic: false,
378 description: None,
379 note: None,
380 comment: None,
381 },
382 ],
383 notes: [],
384 },
385 Cluster {
386 header: "acknowledgment ",
387 verified: true,
388 level: 35,
389 entries: [
390 Entry {
391 variants: [
392 Variant {
393 types: [
394 Type {
395 category: American,
396 tag: None,
397 num: None,
398 },
399 Type {
400 category: Canadian,
401 tag: Some(
402 Variant,
403 ),
404 num: None,
405 },
406 ],
407 word: "acknowledgment",
408 },
409 Variant {
410 types: [
411 Type {
412 category: American,
413 tag: Some(
414 Variant,
415 ),
416 num: None,
417 },
418 Type {
419 category: BritishIse,
420 tag: None,
421 num: None,
422 },
423 Type {
424 category: Canadian,
425 tag: None,
426 num: None,
427 },
428 ],
429 word: "acknowledgement",
430 },
431 ],
432 pos: None,
433 archaic: false,
434 description: None,
435 note: None,
436 comment: None,
437 },
438 Entry {
439 variants: [
440 Variant {
441 types: [
442 Type {
443 category: American,
444 tag: None,
445 num: None,
446 },
447 Type {
448 category: Canadian,
449 tag: Some(
450 Variant,
451 ),
452 num: None,
453 },
454 ],
455 word: "acknowledgments",
456 },
457 Variant {
458 types: [
459 Type {
460 category: American,
461 tag: Some(
462 Variant,
463 ),
464 num: None,
465 },
466 Type {
467 category: BritishIse,
468 tag: None,
469 num: None,
470 },
471 Type {
472 category: Canadian,
473 tag: None,
474 num: None,
475 },
476 ],
477 word: "acknowledgements",
478 },
479 ],
480 pos: None,
481 archaic: false,
482 description: None,
483 note: None,
484 comment: None,
485 },
486 Entry {
487 variants: [
488 Variant {
489 types: [
490 Type {
491 category: American,
492 tag: None,
493 num: None,
494 },
495 Type {
496 category: Canadian,
497 tag: Some(
498 Variant,
499 ),
500 num: None,
501 },
502 ],
503 word: "acknowledgment's",
504 },
505 Variant {
506 types: [
507 Type {
508 category: American,
509 tag: Some(
510 Variant,
511 ),
512 num: None,
513 },
514 Type {
515 category: BritishIse,
516 tag: None,
517 num: None,
518 },
519 Type {
520 category: Canadian,
521 tag: None,
522 num: None,
523 },
524 ],
525 word: "acknowledgement's",
526 },
527 ],
528 pos: None,
529 archaic: false,
530 description: None,
531 note: None,
532 comment: None,
533 },
534 ],
535 notes: [],
536 },
537]
538
539"#]]
540 );
541 }
542}
543
544impl Cluster {
545 pub fn parse(input: &str) -> Result<Self, ParseError> {
546 Self::parse_.parse(input).map_err(|_err| ParseError)
547 }
548
549 fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
550 trace("cluster", move |input: &mut &str| {
551 let header = (
552 "#",
553 winnow::ascii::space0,
554 winnow::token::take_till(1.., ('\r', '\n', '<', '(')),
555 winnow::ascii::space0,
556 opt(("<verified>", winnow::ascii::space0)),
557 delimited("(level ", winnow::ascii::digit1, ')').parse_to::<usize>(),
558 winnow::ascii::space0,
559 winnow::ascii::line_ending,
560 );
561 let note = preceded(
562 ("##", winnow::ascii::space0),
563 terminated(winnow::ascii::till_line_ending, winnow::ascii::line_ending),
564 );
565 let mut cluster = (
566 header,
567 winnow::combinator::repeat(
568 1..,
569 terminated(Entry::parse_, winnow::ascii::line_ending),
570 ),
571 winnow::combinator::repeat(0.., note),
572 );
573 let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
574
575 let verified = header.4.is_some();
576 let level = header.5;
577 let header = header.2.to_owned();
578 let notes = notes.into_iter().map(|s| s.to_owned()).collect();
579 let c = Self {
580 header,
581 verified,
582 level,
583 entries,
584 notes,
585 };
586 Ok(c)
587 })
588 .parse_next(input)
589 }
590}
591
592#[cfg(test)]
593mod test_cluster {
594 use super::*;
595
596 use snapbox::assert_data_eq;
597 use snapbox::str;
598 use snapbox::ToDebug;
599
600 #[test]
601 fn test_basic() {
602 let (input, actual) = Cluster::parse_
603 .parse_peek(
604 "# acknowledgment <verified> (level 35)
605A Cv: acknowledgment / Av B C: acknowledgement
606A Cv: acknowledgments / Av B C: acknowledgements
607A Cv: acknowledgment's / Av B C: acknowledgement's
608
609",
610 )
611 .unwrap();
612 assert_data_eq!(
613 input,
614 str![[r#"
615
616
617"#]]
618 );
619 assert_data_eq!(
620 actual.to_debug(),
621 str![[r#"
622Cluster {
623 header: "acknowledgment ",
624 verified: true,
625 level: 35,
626 entries: [
627 Entry {
628 variants: [
629 Variant {
630 types: [
631 Type {
632 category: American,
633 tag: None,
634 num: None,
635 },
636 Type {
637 category: Canadian,
638 tag: Some(
639 Variant,
640 ),
641 num: None,
642 },
643 ],
644 word: "acknowledgment",
645 },
646 Variant {
647 types: [
648 Type {
649 category: American,
650 tag: Some(
651 Variant,
652 ),
653 num: None,
654 },
655 Type {
656 category: BritishIse,
657 tag: None,
658 num: None,
659 },
660 Type {
661 category: Canadian,
662 tag: None,
663 num: None,
664 },
665 ],
666 word: "acknowledgement",
667 },
668 ],
669 pos: None,
670 archaic: false,
671 description: None,
672 note: None,
673 comment: None,
674 },
675 Entry {
676 variants: [
677 Variant {
678 types: [
679 Type {
680 category: American,
681 tag: None,
682 num: None,
683 },
684 Type {
685 category: Canadian,
686 tag: Some(
687 Variant,
688 ),
689 num: None,
690 },
691 ],
692 word: "acknowledgments",
693 },
694 Variant {
695 types: [
696 Type {
697 category: American,
698 tag: Some(
699 Variant,
700 ),
701 num: None,
702 },
703 Type {
704 category: BritishIse,
705 tag: None,
706 num: None,
707 },
708 Type {
709 category: Canadian,
710 tag: None,
711 num: None,
712 },
713 ],
714 word: "acknowledgements",
715 },
716 ],
717 pos: None,
718 archaic: false,
719 description: None,
720 note: None,
721 comment: None,
722 },
723 Entry {
724 variants: [
725 Variant {
726 types: [
727 Type {
728 category: American,
729 tag: None,
730 num: None,
731 },
732 Type {
733 category: Canadian,
734 tag: Some(
735 Variant,
736 ),
737 num: None,
738 },
739 ],
740 word: "acknowledgment's",
741 },
742 Variant {
743 types: [
744 Type {
745 category: American,
746 tag: Some(
747 Variant,
748 ),
749 num: None,
750 },
751 Type {
752 category: BritishIse,
753 tag: None,
754 num: None,
755 },
756 Type {
757 category: Canadian,
758 tag: None,
759 num: None,
760 },
761 ],
762 word: "acknowledgement's",
763 },
764 ],
765 pos: None,
766 archaic: false,
767 description: None,
768 note: None,
769 comment: None,
770 },
771 ],
772 notes: [],
773}
774
775"#]]
776 );
777 }
778
779 #[test]
780 fn test_notes() {
781 let (input, actual) = Cluster::parse_
782 .parse_peek(
783 "# coloration <verified> (level 50)
784A B C: coloration / B. Cv: colouration
785A B C: colorations / B. Cv: colourations
786A B C: coloration's / B. Cv: colouration's
787## OED has coloration as the preferred spelling and discolouration as a
788## variant for British Engl or some reason
789
790",
791 )
792 .unwrap();
793 assert_data_eq!(
794 input,
795 str![[r#"
796
797
798"#]]
799 );
800 assert_data_eq!(
801 actual.to_debug(),
802 str![[r#"
803Cluster {
804 header: "coloration ",
805 verified: true,
806 level: 50,
807 entries: [
808 Entry {
809 variants: [
810 Variant {
811 types: [
812 Type {
813 category: American,
814 tag: None,
815 num: None,
816 },
817 Type {
818 category: BritishIse,
819 tag: None,
820 num: None,
821 },
822 Type {
823 category: Canadian,
824 tag: None,
825 num: None,
826 },
827 ],
828 word: "coloration",
829 },
830 Variant {
831 types: [
832 Type {
833 category: BritishIse,
834 tag: Some(
835 Eq,
836 ),
837 num: None,
838 },
839 Type {
840 category: Canadian,
841 tag: Some(
842 Variant,
843 ),
844 num: None,
845 },
846 ],
847 word: "colouration",
848 },
849 ],
850 pos: None,
851 archaic: false,
852 description: None,
853 note: None,
854 comment: None,
855 },
856 Entry {
857 variants: [
858 Variant {
859 types: [
860 Type {
861 category: American,
862 tag: None,
863 num: None,
864 },
865 Type {
866 category: BritishIse,
867 tag: None,
868 num: None,
869 },
870 Type {
871 category: Canadian,
872 tag: None,
873 num: None,
874 },
875 ],
876 word: "colorations",
877 },
878 Variant {
879 types: [
880 Type {
881 category: BritishIse,
882 tag: Some(
883 Eq,
884 ),
885 num: None,
886 },
887 Type {
888 category: Canadian,
889 tag: Some(
890 Variant,
891 ),
892 num: None,
893 },
894 ],
895 word: "colourations",
896 },
897 ],
898 pos: None,
899 archaic: false,
900 description: None,
901 note: None,
902 comment: None,
903 },
904 Entry {
905 variants: [
906 Variant {
907 types: [
908 Type {
909 category: American,
910 tag: None,
911 num: None,
912 },
913 Type {
914 category: BritishIse,
915 tag: None,
916 num: None,
917 },
918 Type {
919 category: Canadian,
920 tag: None,
921 num: None,
922 },
923 ],
924 word: "coloration's",
925 },
926 Variant {
927 types: [
928 Type {
929 category: BritishIse,
930 tag: Some(
931 Eq,
932 ),
933 num: None,
934 },
935 Type {
936 category: Canadian,
937 tag: Some(
938 Variant,
939 ),
940 num: None,
941 },
942 ],
943 word: "colouration's",
944 },
945 ],
946 pos: None,
947 archaic: false,
948 description: None,
949 note: None,
950 comment: None,
951 },
952 ],
953 notes: [
954 "OED has coloration as the preferred spelling and discolouration as a",
955 "variant for British Engl or some reason",
956 ],
957}
958
959"#]]
960 );
961 }
962}
963
964impl Entry {
965 pub fn parse(input: &str) -> Result<Self, ParseError> {
966 Self::parse_.parse(input).map_err(|_err| ParseError)
967 }
968
969 fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
970 trace("entry", move |input: &mut &str| {
971 let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
972 let variants =
973 winnow::combinator::separated(1.., Variant::parse_, var_sep).parse_next(input)?;
974
975 let mut e = Self::parse_description.parse_next(input)?;
976
977 let comment_sep = (winnow::ascii::space0, '#');
978 let comment =
979 opt((comment_sep, space1, winnow::ascii::till_line_ending)).parse_next(input)?;
980
981 let _ = winnow::ascii::space0.parse_next(input)?;
982
983 e.variants = variants;
984 e.comment = comment.map(|c| c.2.to_owned());
985 Ok(e)
986 })
987 .parse_next(input)
988 }
989
990 fn parse_description(input: &mut &str) -> ModalResult<Self, ()> {
991 trace("description", move |input: &mut &str| {
992 let mut entry = Self {
993 variants: Vec::new(),
994 pos: None,
995 archaic: false,
996 description: None,
997 note: None,
998 comment: None,
999 };
1000
1001 if opt((winnow::ascii::space0, '|'))
1002 .parse_next(input)?
1003 .is_some()
1004 {
1005 let _ = opt((space1, "<abbr>")).parse_next(input)?;
1006 let _ = opt((space1, "<pl>")).parse_next(input)?;
1007 entry.pos = opt(delimited((space1, '<'), cut_err(Pos::parse_), cut_err('>')))
1008 .parse_next(input)?;
1009 entry.archaic = opt(preceded(space1, archaic)).parse_next(input)?.is_some();
1010 entry.note = opt(preceded(space1, note)).parse_next(input)?;
1011 entry.description = opt(preceded(space1, description)).parse_next(input)?;
1012
1013 if opt((winnow::ascii::space0, '|'))
1014 .parse_next(input)?
1015 .is_some()
1016 {
1017 entry.note = opt(preceded(space1, note)).parse_next(input)?;
1018 }
1019 }
1020 Ok(entry)
1021 })
1022 .parse_next(input)
1023 }
1024}
1025
1026fn note(input: &mut &str) -> ModalResult<String, ()> {
1027 let (_, _, note) = (NOTE_PREFIX, space1, description).parse_next(input)?;
1028 Ok(note)
1029}
1030
1031const NOTE_PREFIX: &str = "--";
1032
1033fn archaic(input: &mut &str) -> ModalResult<(), ()> {
1034 "(-)".void().parse_next(input)
1035}
1036
1037fn description(input: &mut &str) -> ModalResult<String, ()> {
1038 let description = winnow::token::take_till(0.., ('\n', '\r', '#', '|')).parse_next(input)?;
1039 Ok(description.to_owned())
1040}
1041
1042#[cfg(test)]
1043mod test_entry {
1044 #![allow(clippy::bool_assert_comparison)]
1045 use super::*;
1046
1047 use snapbox::assert_data_eq;
1048 use snapbox::str;
1049 use snapbox::ToDebug;
1050
1051 #[test]
1052 fn test_variant_only() {
1053 let (input, actual) = Entry::parse_
1056 .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
1057 .unwrap();
1058 assert_data_eq!(
1059 input,
1060 str![[r#"
1061
1062
1063"#]]
1064 );
1065 assert_data_eq!(
1066 actual.to_debug(),
1067 str![[r#"
1068Entry {
1069 variants: [
1070 Variant {
1071 types: [
1072 Type {
1073 category: American,
1074 tag: None,
1075 num: None,
1076 },
1077 Type {
1078 category: Canadian,
1079 tag: Some(
1080 Variant,
1081 ),
1082 num: None,
1083 },
1084 ],
1085 word: "acknowledgment's",
1086 },
1087 Variant {
1088 types: [
1089 Type {
1090 category: American,
1091 tag: Some(
1092 Variant,
1093 ),
1094 num: None,
1095 },
1096 Type {
1097 category: BritishIse,
1098 tag: None,
1099 num: None,
1100 },
1101 Type {
1102 category: Canadian,
1103 tag: None,
1104 num: None,
1105 },
1106 ],
1107 word: "acknowledgement's",
1108 },
1109 ],
1110 pos: None,
1111 archaic: false,
1112 description: None,
1113 note: None,
1114 comment: None,
1115}
1116
1117"#]]
1118 );
1119 }
1120
1121 #[test]
1122 fn test_description() {
1123 let (input, actual) = Entry::parse_
1126 .parse_peek("A C: prize / B: prise | otherwise\n")
1127 .unwrap();
1128 assert_data_eq!(
1129 input,
1130 str![[r#"
1131
1132
1133"#]]
1134 );
1135 assert_data_eq!(
1136 actual.to_debug(),
1137 str![[r#"
1138Entry {
1139 variants: [
1140 Variant {
1141 types: [
1142 Type {
1143 category: American,
1144 tag: None,
1145 num: None,
1146 },
1147 Type {
1148 category: Canadian,
1149 tag: None,
1150 num: None,
1151 },
1152 ],
1153 word: "prize",
1154 },
1155 Variant {
1156 types: [
1157 Type {
1158 category: BritishIse,
1159 tag: None,
1160 num: None,
1161 },
1162 ],
1163 word: "prise",
1164 },
1165 ],
1166 pos: None,
1167 archaic: false,
1168 description: Some(
1169 "otherwise",
1170 ),
1171 note: None,
1172 comment: None,
1173}
1174
1175"#]]
1176 );
1177 }
1178
1179 #[test]
1180 fn test_pos() {
1181 let (input, actual) = Entry::parse_
1184 .parse_peek("A B C: practice / AV Cv: practise | <N>\n")
1185 .unwrap();
1186 assert_data_eq!(
1187 input,
1188 str![[r#"
1189
1190
1191"#]]
1192 );
1193 assert_data_eq!(
1194 actual.to_debug(),
1195 str![[r#"
1196Entry {
1197 variants: [
1198 Variant {
1199 types: [
1200 Type {
1201 category: American,
1202 tag: None,
1203 num: None,
1204 },
1205 Type {
1206 category: BritishIse,
1207 tag: None,
1208 num: None,
1209 },
1210 Type {
1211 category: Canadian,
1212 tag: None,
1213 num: None,
1214 },
1215 ],
1216 word: "practice",
1217 },
1218 Variant {
1219 types: [
1220 Type {
1221 category: American,
1222 tag: Some(
1223 Seldom,
1224 ),
1225 num: None,
1226 },
1227 Type {
1228 category: Canadian,
1229 tag: Some(
1230 Variant,
1231 ),
1232 num: None,
1233 },
1234 ],
1235 word: "practise",
1236 },
1237 ],
1238 pos: Some(
1239 Noun,
1240 ),
1241 archaic: false,
1242 description: None,
1243 note: None,
1244 comment: None,
1245}
1246
1247"#]]
1248 );
1249 }
1250
1251 #[test]
1252 fn test_pos_bad() {
1253 let err = Entry::parse_
1256 .parse_peek("A B C: practice / AV Cv: practise | <Bad>\n")
1257 .unwrap_err();
1258 assert_data_eq!(err.to_string(), str!["Parsing Failure: ()"]);
1259 }
1260
1261 #[test]
1262 fn test_plural() {
1263 let (input, actual) = Entry::parse_.parse_peek("_ _-: dogies | <pl>\n").unwrap();
1266 assert_data_eq!(
1267 input,
1268 str![[r#"
1269
1270
1271"#]]
1272 );
1273 assert_data_eq!(
1274 actual.to_debug(),
1275 str![[r#"
1276Entry {
1277 variants: [
1278 Variant {
1279 types: [
1280 Type {
1281 category: Other,
1282 tag: None,
1283 num: None,
1284 },
1285 Type {
1286 category: Other,
1287 tag: Some(
1288 Possible,
1289 ),
1290 num: None,
1291 },
1292 ],
1293 word: "dogies",
1294 },
1295 ],
1296 pos: None,
1297 archaic: false,
1298 description: None,
1299 note: None,
1300 comment: None,
1301}
1302
1303"#]]
1304 );
1305 }
1306
1307 #[test]
1308 fn test_abbr() {
1309 let (input, actual) = Entry::parse_.parse_peek("A B: ha | <abbr>\n").unwrap();
1312 assert_data_eq!(
1313 input,
1314 str![[r#"
1315
1316
1317"#]]
1318 );
1319 assert_data_eq!(
1320 actual.to_debug(),
1321 str![[r#"
1322Entry {
1323 variants: [
1324 Variant {
1325 types: [
1326 Type {
1327 category: American,
1328 tag: None,
1329 num: None,
1330 },
1331 Type {
1332 category: BritishIse,
1333 tag: None,
1334 num: None,
1335 },
1336 ],
1337 word: "ha",
1338 },
1339 ],
1340 pos: None,
1341 archaic: false,
1342 description: None,
1343 note: None,
1344 comment: None,
1345}
1346
1347"#]]
1348 );
1349 }
1350
1351 #[test]
1352 fn test_archaic() {
1353 let (input, actual) = Entry::parse_
1356 .parse_peek("A: bark / Av B: barque | (-) ship\n")
1357 .unwrap();
1358 assert_data_eq!(
1359 input,
1360 str![[r#"
1361
1362
1363"#]]
1364 );
1365 assert_data_eq!(
1366 actual.to_debug(),
1367 str![[r#"
1368Entry {
1369 variants: [
1370 Variant {
1371 types: [
1372 Type {
1373 category: American,
1374 tag: None,
1375 num: None,
1376 },
1377 ],
1378 word: "bark",
1379 },
1380 Variant {
1381 types: [
1382 Type {
1383 category: American,
1384 tag: Some(
1385 Variant,
1386 ),
1387 num: None,
1388 },
1389 Type {
1390 category: BritishIse,
1391 tag: None,
1392 num: None,
1393 },
1394 ],
1395 word: "barque",
1396 },
1397 ],
1398 pos: None,
1399 archaic: true,
1400 description: Some(
1401 "ship",
1402 ),
1403 note: None,
1404 comment: None,
1405}
1406
1407"#]]
1408 );
1409 }
1410
1411 #[test]
1412 fn test_note() {
1413 let (input, actual) = Entry::parse_
1416 .parse_peek("_: cabbies | -- plural\n")
1417 .unwrap();
1418 assert_data_eq!(
1419 input,
1420 str![[r#"
1421
1422
1423"#]]
1424 );
1425 assert_data_eq!(
1426 actual.to_debug(),
1427 str![[r#"
1428Entry {
1429 variants: [
1430 Variant {
1431 types: [
1432 Type {
1433 category: Other,
1434 tag: None,
1435 num: None,
1436 },
1437 ],
1438 word: "cabbies",
1439 },
1440 ],
1441 pos: None,
1442 archaic: false,
1443 description: None,
1444 note: Some(
1445 "plural",
1446 ),
1447 comment: None,
1448}
1449
1450"#]]
1451 );
1452 }
1453
1454 #[test]
1455 fn test_description_and_note() {
1456 let (input, actual) = Entry::parse_
1459 .parse_peek("A B: wizz | as in \"gee whiz\" | -- Ox: informal, chiefly N. Amer.\n")
1460 .unwrap();
1461 assert_data_eq!(
1462 input,
1463 str![[r#"
1464
1465
1466"#]]
1467 );
1468 assert_data_eq!(
1469 actual.to_debug(),
1470 str![[r#"
1471Entry {
1472 variants: [
1473 Variant {
1474 types: [
1475 Type {
1476 category: American,
1477 tag: None,
1478 num: None,
1479 },
1480 Type {
1481 category: BritishIse,
1482 tag: None,
1483 num: None,
1484 },
1485 ],
1486 word: "wizz",
1487 },
1488 ],
1489 pos: None,
1490 archaic: false,
1491 description: Some(
1492 "as in /"gee whiz/" ",
1493 ),
1494 note: Some(
1495 "Ox: informal, chiefly N. Amer.",
1496 ),
1497 comment: None,
1498}
1499
1500"#]]
1501 );
1502 }
1503
1504 #[test]
1505 fn test_trailing_comment() {
1506 let (input, actual) = Entry::parse_.parse_peek(
1507 "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
1508 )
1509 .unwrap();
1510 assert_data_eq!(
1511 input,
1512 str![[r#"
1513
1514
1515"#]]
1516 );
1517 assert_data_eq!(
1518 actual.to_debug(),
1519 str![[r#"
1520Entry {
1521 variants: [
1522 Variant {
1523 types: [
1524 Type {
1525 category: American,
1526 tag: None,
1527 num: None,
1528 },
1529 Type {
1530 category: BritishIse,
1531 tag: None,
1532 num: None,
1533 },
1534 ],
1535 word: "accursed",
1536 },
1537 Variant {
1538 types: [
1539 Type {
1540 category: American,
1541 tag: Some(
1542 Seldom,
1543 ),
1544 num: None,
1545 },
1546 Type {
1547 category: BritishIse,
1548 tag: Some(
1549 Possible,
1550 ),
1551 num: None,
1552 },
1553 ],
1554 word: "accurst",
1555 },
1556 ],
1557 pos: None,
1558 archaic: false,
1559 description: None,
1560 note: None,
1561 comment: Some(
1562 "ODE: archaic, M-W: 'or' but can find little evidence of use",
1563 ),
1564}
1565
1566"#]]
1567 );
1568 }
1569}
1570
1571impl Variant {
1572 pub fn parse(input: &str) -> Result<Self, ParseError> {
1573 Self::parse_.parse(input).map_err(|_err| ParseError)
1574 }
1575
1576 fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1577 trace("variant", move |input: &mut &str| {
1578 let types = winnow::combinator::separated(1.., Type::parse_, space1);
1579 let columns =
1580 winnow::combinator::separated(0.., winnow::ascii::digit1, space1).map(|()| ());
1581 let sep = (":", winnow::ascii::space0);
1582 let ((types, _, _columns), word) = winnow::combinator::separated_pair(
1583 (types, winnow::ascii::space0, columns),
1584 sep,
1585 word,
1586 )
1587 .parse_next(input)?;
1588 let v = Self { types, word };
1589 Ok(v)
1590 })
1591 .parse_next(input)
1592 }
1593}
1594
1595fn word(input: &mut &str) -> ModalResult<String, ()> {
1596 trace("word", move |input: &mut &str| {
1597 winnow::token::take_till(1.., |item: char| item.is_ascii_whitespace())
1598 .map(|s: &str| s.to_owned().replace('_', " "))
1599 .parse_next(input)
1600 })
1601 .parse_next(input)
1602}
1603
1604#[cfg(test)]
1605mod test_variant {
1606 use super::*;
1607
1608 use snapbox::assert_data_eq;
1609 use snapbox::str;
1610 use snapbox::ToDebug;
1611
1612 #[test]
1613 fn test_valid() {
1614 let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
1617 assert_data_eq!(input, str![" "]);
1618 assert_data_eq!(
1619 actual.to_debug(),
1620 str![[r#"
1621Variant {
1622 types: [
1623 Type {
1624 category: American,
1625 tag: None,
1626 num: None,
1627 },
1628 Type {
1629 category: Canadian,
1630 tag: Some(
1631 Variant,
1632 ),
1633 num: None,
1634 },
1635 ],
1636 word: "acknowledgment",
1637}
1638
1639"#]]
1640 );
1641 }
1642
1643 #[test]
1644 fn test_extra() {
1645 let (input, actual) = Variant::parse_
1646 .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
1647 .unwrap();
1648 assert_data_eq!(input, str![" / Av B C: acknowledgement's"]);
1649 assert_data_eq!(
1650 actual.to_debug(),
1651 str![[r#"
1652Variant {
1653 types: [
1654 Type {
1655 category: American,
1656 tag: None,
1657 num: None,
1658 },
1659 Type {
1660 category: Canadian,
1661 tag: Some(
1662 Variant,
1663 ),
1664 num: None,
1665 },
1666 ],
1667 word: "acknowledgment's",
1668}
1669
1670"#]]
1671 );
1672 }
1673
1674 #[test]
1675 fn test_underscore() {
1676 let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
1677 assert_data_eq!(
1678 input,
1679 str![[r#"
1680
1681
1682"#]]
1683 );
1684 assert_data_eq!(
1685 actual.to_debug(),
1686 str![[r#"
1687Variant {
1688 types: [
1689 Type {
1690 category: Other,
1691 tag: None,
1692 num: None,
1693 },
1694 ],
1695 word: "air gun",
1696}
1697
1698"#]]
1699 );
1700 }
1701
1702 #[test]
1703 fn test_columns() {
1704 let (input, actual) = Variant::parse_.parse_peek("A B 1 2: aeries").unwrap();
1707 assert_data_eq!(input, str![""]);
1708 assert_data_eq!(
1709 actual.to_debug(),
1710 str![[r#"
1711Variant {
1712 types: [
1713 Type {
1714 category: American,
1715 tag: None,
1716 num: None,
1717 },
1718 Type {
1719 category: BritishIse,
1720 tag: None,
1721 num: None,
1722 },
1723 ],
1724 word: "aeries",
1725}
1726
1727"#]]
1728 );
1729 }
1730}
1731
1732impl Type {
1733 pub fn parse(input: &str) -> Result<Self, ParseError> {
1734 Self::parse_.parse(input).map_err(|_err| ParseError)
1735 }
1736
1737 fn parse_(input: &mut &str) -> ModalResult<Type, ()> {
1738 trace("type", move |input: &mut &str| {
1739 let category = Category::parse_(input)?;
1740 let tag = opt(Tag::parse_).parse_next(input)?;
1741 let num = opt(winnow::ascii::digit1).parse_next(input)?;
1742 let num = num.map(|s| s.parse().expect("parser ensured it's a number"));
1743 let t = Type { category, tag, num };
1744 Ok(t)
1745 })
1746 .parse_next(input)
1747 }
1748}
1749
1750#[cfg(test)]
1751mod test_type {
1752 use super::*;
1753
1754 use snapbox::assert_data_eq;
1755 use snapbox::str;
1756 use snapbox::ToDebug;
1757
1758 #[test]
1759 fn test_valid() {
1760 let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
1763 assert_data_eq!(input, str![" "]);
1764 assert_data_eq!(
1765 actual.to_debug(),
1766 str![[r#"
1767Type {
1768 category: American,
1769 tag: None,
1770 num: None,
1771}
1772
1773"#]]
1774 );
1775
1776 let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
1777 assert_data_eq!(input, str![" "]);
1778 assert_data_eq!(
1779 actual.to_debug(),
1780 str![[r#"
1781Type {
1782 category: BritishIse,
1783 tag: Some(
1784 Variant,
1785 ),
1786 num: None,
1787}
1788
1789"#]]
1790 );
1791 }
1792
1793 #[test]
1794 fn test_extra() {
1795 let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
1796 assert_data_eq!(input, str![" foobar"]);
1797 assert_data_eq!(
1798 actual.to_debug(),
1799 str![[r#"
1800Type {
1801 category: BritishIze,
1802 tag: None,
1803 num: None,
1804}
1805
1806"#]]
1807 );
1808
1809 let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
1810 assert_data_eq!(input, str![" foobar"]);
1811 assert_data_eq!(
1812 actual.to_debug(),
1813 str![[r#"
1814Type {
1815 category: Canadian,
1816 tag: Some(
1817 Possible,
1818 ),
1819 num: None,
1820}
1821
1822"#]]
1823 );
1824 }
1825
1826 #[test]
1827 fn test_num() {
1828 let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
1829 assert_data_eq!(input, str![" "]);
1830 assert_data_eq!(
1831 actual.to_debug(),
1832 str![[r#"
1833Type {
1834 category: American,
1835 tag: Some(
1836 Variant,
1837 ),
1838 num: Some(
1839 1,
1840 ),
1841}
1842
1843"#]]
1844 );
1845 }
1846}
1847
1848impl Category {
1849 pub fn parse(input: &str) -> Result<Self, ParseError> {
1850 Self::parse_.parse(input).map_err(|_err| ParseError)
1851 }
1852
1853 fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1854 trace("category", move |input: &mut &str| {
1855 let symbols = one_of(['A', 'B', 'Z', 'C', 'D', '_']);
1856 symbols
1857 .map(|c| match c {
1858 'A' => Category::American,
1859 'B' => Category::BritishIse,
1860 'Z' => Category::BritishIze,
1861 'C' => Category::Canadian,
1862 'D' => Category::Australian,
1863 '_' => Category::Other,
1864 _ => unreachable!("parser won't select this option"),
1865 })
1866 .parse_next(input)
1867 })
1868 .parse_next(input)
1869 }
1870}
1871
1872#[cfg(test)]
1873mod test_category {
1874 use super::*;
1875
1876 use snapbox::assert_data_eq;
1877 use snapbox::str;
1878 use snapbox::ToDebug;
1879
1880 #[test]
1881 fn test_valid() {
1882 let (input, actual) = Category::parse_.parse_peek("A").unwrap();
1883 assert_data_eq!(input, str![]);
1884 assert_data_eq!(
1885 actual.to_debug(),
1886 str![[r#"
1887American
1888
1889"#]]
1890 );
1891 }
1892
1893 #[test]
1894 fn test_extra() {
1895 let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
1896 assert_data_eq!(input, str![" foobar"]);
1897 assert_data_eq!(
1898 actual.to_debug(),
1899 str![[r#"
1900Other
1901
1902"#]]
1903 );
1904 }
1905}
1906
1907impl Tag {
1908 pub fn parse(input: &str) -> Result<Self, ParseError> {
1909 Self::parse_.parse(input).map_err(|_err| ParseError)
1910 }
1911
1912 fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1913 trace("tag", move |input: &mut &str| {
1914 let symbols = one_of(['.', 'v', 'V', '-', 'x']);
1915 symbols
1916 .map(|c| match c {
1917 '.' => Tag::Eq,
1918 'v' => Tag::Variant,
1919 'V' => Tag::Seldom,
1920 '-' => Tag::Possible,
1921 'x' => Tag::Improper,
1922 _ => unreachable!("parser won't select this option"),
1923 })
1924 .parse_next(input)
1925 })
1926 .parse_next(input)
1927 }
1928}
1929
1930#[cfg(test)]
1931mod test_tag {
1932 use super::*;
1933
1934 use snapbox::assert_data_eq;
1935 use snapbox::str;
1936 use snapbox::ToDebug;
1937
1938 #[test]
1939 fn test_valid() {
1940 let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
1941 assert_data_eq!(input, str![]);
1942 assert_data_eq!(
1943 actual.to_debug(),
1944 str![[r#"
1945Eq
1946
1947"#]]
1948 );
1949 }
1950
1951 #[test]
1952 fn test_extra() {
1953 let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
1954 assert_data_eq!(input, str![" foobar"]);
1955 assert_data_eq!(
1956 actual.to_debug(),
1957 str![[r#"
1958Improper
1959
1960"#]]
1961 );
1962 }
1963}
1964
1965impl Pos {
1966 pub fn parse(input: &str) -> Result<Self, ParseError> {
1967 Self::parse_.parse(input).map_err(|_err| ParseError)
1968 }
1969
1970 fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1971 trace("pos", move |input: &mut &str| {
1972 alt((
1973 "N".value(Pos::Noun),
1974 "V".value(Pos::Verb),
1975 "Adj".value(Pos::Adjective),
1976 "Adv".value(Pos::Adverb),
1977 "A".value(Pos::AdjectiveOrAdverb),
1978 "Inj".value(Pos::Interjection),
1979 "Prep".value(Pos::Preposition),
1980 ))
1981 .parse_next(input)
1982 })
1983 .parse_next(input)
1984 }
1985}
1986
1987#[cfg(test)]
1988mod test_pos {
1989 use super::*;
1990
1991 use snapbox::assert_data_eq;
1992 use snapbox::str;
1993 use snapbox::ToDebug;
1994
1995 #[test]
1996 fn test_valid() {
1997 let (input, actual) = Pos::parse_.parse_peek("N>").unwrap();
1998 assert_data_eq!(input, str![">"]);
1999 assert_data_eq!(
2000 actual.to_debug(),
2001 str![[r#"
2002Noun
2003
2004"#]]
2005 );
2006 }
2007
2008 #[test]
2009 fn test_extra() {
2010 let (input, actual) = Pos::parse_.parse_peek("Adj> foobar").unwrap();
2011 assert_data_eq!(input, str!["> foobar"]);
2012 assert_data_eq!(
2013 actual.to_debug(),
2014 str![[r#"
2015Adjective
2016
2017"#]]
2018 );
2019 }
2020}
2021
2022#[derive(Debug)]
2023pub struct ParseError;
2024
2025impl std::fmt::Display for ParseError {
2026 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2027 write!(f, "invalid")
2028 }
2029}
2030
2031impl std::error::Error for ParseError {}