varcon_core/
parser.rs

1use winnow::ascii::space1;
2use winnow::combinator::alt;
3use winnow::combinator::cut_err;
4use winnow::combinator::delimited;
5use winnow::combinator::opt;
6use winnow::combinator::preceded;
7use winnow::combinator::terminated;
8use winnow::combinator::trace;
9use winnow::prelude::*;
10use winnow::token::one_of;
11
12use crate::{Category, Cluster, Entry, Pos, Tag, Type, Variant};
13
14#[derive(Clone, PartialEq, Eq, Debug)]
15pub struct ClusterIter<'i> {
16    input: &'i str,
17}
18
19impl<'i> ClusterIter<'i> {
20    pub fn new(input: &'i str) -> Self {
21        Self { input }
22    }
23}
24
25impl Iterator for ClusterIter<'_> {
26    type Item = Cluster;
27
28    fn next(&mut self) -> Option<Cluster> {
29        self.input = self.input.trim_start();
30        Cluster::parse_.parse_next(&mut self.input).ok()
31    }
32}
33
34#[cfg(test)]
35mod test_cluster_iter {
36    use super::*;
37
38    use snapbox::assert_data_eq;
39    use snapbox::str;
40    use snapbox::ToDebug;
41
42    #[test]
43    fn test_single() {
44        let actual = ClusterIter::new(
45            "# acknowledgment <verified> (level 35)
46A Cv: acknowledgment / Av B C: acknowledgement
47A Cv: acknowledgments / Av B C: acknowledgements
48A Cv: acknowledgment's / Av B C: acknowledgement's
49
50",
51        );
52        assert_data_eq!(
53            actual.collect::<Vec<_>>().to_debug(),
54            str![[r#"
55[
56    Cluster {
57        header: "acknowledgment ",
58        verified: true,
59        level: 35,
60        entries: [
61            Entry {
62                variants: [
63                    Variant {
64                        types: [
65                            Type {
66                                category: American,
67                                tag: None,
68                                num: None,
69                            },
70                            Type {
71                                category: Canadian,
72                                tag: Some(
73                                    Variant,
74                                ),
75                                num: None,
76                            },
77                        ],
78                        word: "acknowledgment",
79                    },
80                    Variant {
81                        types: [
82                            Type {
83                                category: American,
84                                tag: Some(
85                                    Variant,
86                                ),
87                                num: None,
88                            },
89                            Type {
90                                category: BritishIse,
91                                tag: None,
92                                num: None,
93                            },
94                            Type {
95                                category: Canadian,
96                                tag: None,
97                                num: None,
98                            },
99                        ],
100                        word: "acknowledgement",
101                    },
102                ],
103                pos: None,
104                archaic: false,
105                description: None,
106                note: None,
107                comment: None,
108            },
109            Entry {
110                variants: [
111                    Variant {
112                        types: [
113                            Type {
114                                category: American,
115                                tag: None,
116                                num: None,
117                            },
118                            Type {
119                                category: Canadian,
120                                tag: Some(
121                                    Variant,
122                                ),
123                                num: None,
124                            },
125                        ],
126                        word: "acknowledgments",
127                    },
128                    Variant {
129                        types: [
130                            Type {
131                                category: American,
132                                tag: Some(
133                                    Variant,
134                                ),
135                                num: None,
136                            },
137                            Type {
138                                category: BritishIse,
139                                tag: None,
140                                num: None,
141                            },
142                            Type {
143                                category: Canadian,
144                                tag: None,
145                                num: None,
146                            },
147                        ],
148                        word: "acknowledgements",
149                    },
150                ],
151                pos: None,
152                archaic: false,
153                description: None,
154                note: None,
155                comment: None,
156            },
157            Entry {
158                variants: [
159                    Variant {
160                        types: [
161                            Type {
162                                category: American,
163                                tag: None,
164                                num: None,
165                            },
166                            Type {
167                                category: Canadian,
168                                tag: Some(
169                                    Variant,
170                                ),
171                                num: None,
172                            },
173                        ],
174                        word: "acknowledgment's",
175                    },
176                    Variant {
177                        types: [
178                            Type {
179                                category: American,
180                                tag: Some(
181                                    Variant,
182                                ),
183                                num: None,
184                            },
185                            Type {
186                                category: BritishIse,
187                                tag: None,
188                                num: None,
189                            },
190                            Type {
191                                category: Canadian,
192                                tag: None,
193                                num: None,
194                            },
195                        ],
196                        word: "acknowledgement's",
197                    },
198                ],
199                pos: None,
200                archaic: false,
201                description: None,
202                note: None,
203                comment: None,
204            },
205        ],
206        notes: [],
207    },
208]
209
210"#]]
211        );
212    }
213
214    #[test]
215    fn test_multiple() {
216        let actual = ClusterIter::new(
217            "# acknowledgment <verified> (level 35)
218A Cv: acknowledgment / Av B C: acknowledgement
219A Cv: acknowledgments / Av B C: acknowledgements
220A Cv: acknowledgment's / Av B C: acknowledgement's
221
222# acknowledgment <verified> (level 35)
223A Cv: acknowledgment / Av B C: acknowledgement
224A Cv: acknowledgments / Av B C: acknowledgements
225A Cv: acknowledgment's / Av B C: acknowledgement's
226
227",
228        );
229        assert_data_eq!(
230            actual.collect::<Vec<_>>().to_debug(),
231            str![[r#"
232[
233    Cluster {
234        header: "acknowledgment ",
235        verified: true,
236        level: 35,
237        entries: [
238            Entry {
239                variants: [
240                    Variant {
241                        types: [
242                            Type {
243                                category: American,
244                                tag: None,
245                                num: None,
246                            },
247                            Type {
248                                category: Canadian,
249                                tag: Some(
250                                    Variant,
251                                ),
252                                num: None,
253                            },
254                        ],
255                        word: "acknowledgment",
256                    },
257                    Variant {
258                        types: [
259                            Type {
260                                category: American,
261                                tag: Some(
262                                    Variant,
263                                ),
264                                num: None,
265                            },
266                            Type {
267                                category: BritishIse,
268                                tag: None,
269                                num: None,
270                            },
271                            Type {
272                                category: Canadian,
273                                tag: None,
274                                num: None,
275                            },
276                        ],
277                        word: "acknowledgement",
278                    },
279                ],
280                pos: None,
281                archaic: false,
282                description: None,
283                note: None,
284                comment: None,
285            },
286            Entry {
287                variants: [
288                    Variant {
289                        types: [
290                            Type {
291                                category: American,
292                                tag: None,
293                                num: None,
294                            },
295                            Type {
296                                category: Canadian,
297                                tag: Some(
298                                    Variant,
299                                ),
300                                num: None,
301                            },
302                        ],
303                        word: "acknowledgments",
304                    },
305                    Variant {
306                        types: [
307                            Type {
308                                category: American,
309                                tag: Some(
310                                    Variant,
311                                ),
312                                num: None,
313                            },
314                            Type {
315                                category: BritishIse,
316                                tag: None,
317                                num: None,
318                            },
319                            Type {
320                                category: Canadian,
321                                tag: None,
322                                num: None,
323                            },
324                        ],
325                        word: "acknowledgements",
326                    },
327                ],
328                pos: None,
329                archaic: false,
330                description: None,
331                note: None,
332                comment: None,
333            },
334            Entry {
335                variants: [
336                    Variant {
337                        types: [
338                            Type {
339                                category: American,
340                                tag: None,
341                                num: None,
342                            },
343                            Type {
344                                category: Canadian,
345                                tag: Some(
346                                    Variant,
347                                ),
348                                num: None,
349                            },
350                        ],
351                        word: "acknowledgment's",
352                    },
353                    Variant {
354                        types: [
355                            Type {
356                                category: American,
357                                tag: Some(
358                                    Variant,
359                                ),
360                                num: None,
361                            },
362                            Type {
363                                category: BritishIse,
364                                tag: None,
365                                num: None,
366                            },
367                            Type {
368                                category: Canadian,
369                                tag: None,
370                                num: None,
371                            },
372                        ],
373                        word: "acknowledgement's",
374                    },
375                ],
376                pos: None,
377                archaic: false,
378                description: None,
379                note: None,
380                comment: None,
381            },
382        ],
383        notes: [],
384    },
385    Cluster {
386        header: "acknowledgment ",
387        verified: true,
388        level: 35,
389        entries: [
390            Entry {
391                variants: [
392                    Variant {
393                        types: [
394                            Type {
395                                category: American,
396                                tag: None,
397                                num: None,
398                            },
399                            Type {
400                                category: Canadian,
401                                tag: Some(
402                                    Variant,
403                                ),
404                                num: None,
405                            },
406                        ],
407                        word: "acknowledgment",
408                    },
409                    Variant {
410                        types: [
411                            Type {
412                                category: American,
413                                tag: Some(
414                                    Variant,
415                                ),
416                                num: None,
417                            },
418                            Type {
419                                category: BritishIse,
420                                tag: None,
421                                num: None,
422                            },
423                            Type {
424                                category: Canadian,
425                                tag: None,
426                                num: None,
427                            },
428                        ],
429                        word: "acknowledgement",
430                    },
431                ],
432                pos: None,
433                archaic: false,
434                description: None,
435                note: None,
436                comment: None,
437            },
438            Entry {
439                variants: [
440                    Variant {
441                        types: [
442                            Type {
443                                category: American,
444                                tag: None,
445                                num: None,
446                            },
447                            Type {
448                                category: Canadian,
449                                tag: Some(
450                                    Variant,
451                                ),
452                                num: None,
453                            },
454                        ],
455                        word: "acknowledgments",
456                    },
457                    Variant {
458                        types: [
459                            Type {
460                                category: American,
461                                tag: Some(
462                                    Variant,
463                                ),
464                                num: None,
465                            },
466                            Type {
467                                category: BritishIse,
468                                tag: None,
469                                num: None,
470                            },
471                            Type {
472                                category: Canadian,
473                                tag: None,
474                                num: None,
475                            },
476                        ],
477                        word: "acknowledgements",
478                    },
479                ],
480                pos: None,
481                archaic: false,
482                description: None,
483                note: None,
484                comment: None,
485            },
486            Entry {
487                variants: [
488                    Variant {
489                        types: [
490                            Type {
491                                category: American,
492                                tag: None,
493                                num: None,
494                            },
495                            Type {
496                                category: Canadian,
497                                tag: Some(
498                                    Variant,
499                                ),
500                                num: None,
501                            },
502                        ],
503                        word: "acknowledgment's",
504                    },
505                    Variant {
506                        types: [
507                            Type {
508                                category: American,
509                                tag: Some(
510                                    Variant,
511                                ),
512                                num: None,
513                            },
514                            Type {
515                                category: BritishIse,
516                                tag: None,
517                                num: None,
518                            },
519                            Type {
520                                category: Canadian,
521                                tag: None,
522                                num: None,
523                            },
524                        ],
525                        word: "acknowledgement's",
526                    },
527                ],
528                pos: None,
529                archaic: false,
530                description: None,
531                note: None,
532                comment: None,
533            },
534        ],
535        notes: [],
536    },
537]
538
539"#]]
540        );
541    }
542}
543
544impl Cluster {
545    pub fn parse(input: &str) -> Result<Self, ParseError> {
546        Self::parse_.parse(input).map_err(|_err| ParseError)
547    }
548
549    fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
550        trace("cluster", move |input: &mut &str| {
551            let header = (
552                "#",
553                winnow::ascii::space0,
554                winnow::token::take_till(1.., ('\r', '\n', '<', '(')),
555                winnow::ascii::space0,
556                opt(("<verified>", winnow::ascii::space0)),
557                delimited("(level ", winnow::ascii::digit1, ')').parse_to::<usize>(),
558                winnow::ascii::space0,
559                winnow::ascii::line_ending,
560            );
561            let note = preceded(
562                ("##", winnow::ascii::space0),
563                terminated(winnow::ascii::till_line_ending, winnow::ascii::line_ending),
564            );
565            let mut cluster = (
566                header,
567                winnow::combinator::repeat(
568                    1..,
569                    terminated(Entry::parse_, winnow::ascii::line_ending),
570                ),
571                winnow::combinator::repeat(0.., note),
572            );
573            let (header, entries, notes): (_, _, Vec<_>) = cluster.parse_next(input)?;
574
575            let verified = header.4.is_some();
576            let level = header.5;
577            let header = header.2.to_owned();
578            let notes = notes.into_iter().map(|s| s.to_owned()).collect();
579            let c = Self {
580                header,
581                verified,
582                level,
583                entries,
584                notes,
585            };
586            Ok(c)
587        })
588        .parse_next(input)
589    }
590}
591
592#[cfg(test)]
593mod test_cluster {
594    use super::*;
595
596    use snapbox::assert_data_eq;
597    use snapbox::str;
598    use snapbox::ToDebug;
599
600    #[test]
601    fn test_basic() {
602        let (input, actual) = Cluster::parse_
603            .parse_peek(
604                "# acknowledgment <verified> (level 35)
605A Cv: acknowledgment / Av B C: acknowledgement
606A Cv: acknowledgments / Av B C: acknowledgements
607A Cv: acknowledgment's / Av B C: acknowledgement's
608
609",
610            )
611            .unwrap();
612        assert_data_eq!(
613            input,
614            str![[r#"
615
616
617"#]]
618        );
619        assert_data_eq!(
620            actual.to_debug(),
621            str![[r#"
622Cluster {
623    header: "acknowledgment ",
624    verified: true,
625    level: 35,
626    entries: [
627        Entry {
628            variants: [
629                Variant {
630                    types: [
631                        Type {
632                            category: American,
633                            tag: None,
634                            num: None,
635                        },
636                        Type {
637                            category: Canadian,
638                            tag: Some(
639                                Variant,
640                            ),
641                            num: None,
642                        },
643                    ],
644                    word: "acknowledgment",
645                },
646                Variant {
647                    types: [
648                        Type {
649                            category: American,
650                            tag: Some(
651                                Variant,
652                            ),
653                            num: None,
654                        },
655                        Type {
656                            category: BritishIse,
657                            tag: None,
658                            num: None,
659                        },
660                        Type {
661                            category: Canadian,
662                            tag: None,
663                            num: None,
664                        },
665                    ],
666                    word: "acknowledgement",
667                },
668            ],
669            pos: None,
670            archaic: false,
671            description: None,
672            note: None,
673            comment: None,
674        },
675        Entry {
676            variants: [
677                Variant {
678                    types: [
679                        Type {
680                            category: American,
681                            tag: None,
682                            num: None,
683                        },
684                        Type {
685                            category: Canadian,
686                            tag: Some(
687                                Variant,
688                            ),
689                            num: None,
690                        },
691                    ],
692                    word: "acknowledgments",
693                },
694                Variant {
695                    types: [
696                        Type {
697                            category: American,
698                            tag: Some(
699                                Variant,
700                            ),
701                            num: None,
702                        },
703                        Type {
704                            category: BritishIse,
705                            tag: None,
706                            num: None,
707                        },
708                        Type {
709                            category: Canadian,
710                            tag: None,
711                            num: None,
712                        },
713                    ],
714                    word: "acknowledgements",
715                },
716            ],
717            pos: None,
718            archaic: false,
719            description: None,
720            note: None,
721            comment: None,
722        },
723        Entry {
724            variants: [
725                Variant {
726                    types: [
727                        Type {
728                            category: American,
729                            tag: None,
730                            num: None,
731                        },
732                        Type {
733                            category: Canadian,
734                            tag: Some(
735                                Variant,
736                            ),
737                            num: None,
738                        },
739                    ],
740                    word: "acknowledgment's",
741                },
742                Variant {
743                    types: [
744                        Type {
745                            category: American,
746                            tag: Some(
747                                Variant,
748                            ),
749                            num: None,
750                        },
751                        Type {
752                            category: BritishIse,
753                            tag: None,
754                            num: None,
755                        },
756                        Type {
757                            category: Canadian,
758                            tag: None,
759                            num: None,
760                        },
761                    ],
762                    word: "acknowledgement's",
763                },
764            ],
765            pos: None,
766            archaic: false,
767            description: None,
768            note: None,
769            comment: None,
770        },
771    ],
772    notes: [],
773}
774
775"#]]
776        );
777    }
778
779    #[test]
780    fn test_notes() {
781        let (input, actual) = Cluster::parse_
782            .parse_peek(
783                "# coloration <verified> (level 50)
784A B C: coloration / B. Cv: colouration
785A B C: colorations / B. Cv: colourations
786A B C: coloration's / B. Cv: colouration's
787## OED has coloration as the preferred spelling and discolouration as a
788## variant for British Engl or some reason
789
790",
791            )
792            .unwrap();
793        assert_data_eq!(
794            input,
795            str![[r#"
796
797
798"#]]
799        );
800        assert_data_eq!(
801            actual.to_debug(),
802            str![[r#"
803Cluster {
804    header: "coloration ",
805    verified: true,
806    level: 50,
807    entries: [
808        Entry {
809            variants: [
810                Variant {
811                    types: [
812                        Type {
813                            category: American,
814                            tag: None,
815                            num: None,
816                        },
817                        Type {
818                            category: BritishIse,
819                            tag: None,
820                            num: None,
821                        },
822                        Type {
823                            category: Canadian,
824                            tag: None,
825                            num: None,
826                        },
827                    ],
828                    word: "coloration",
829                },
830                Variant {
831                    types: [
832                        Type {
833                            category: BritishIse,
834                            tag: Some(
835                                Eq,
836                            ),
837                            num: None,
838                        },
839                        Type {
840                            category: Canadian,
841                            tag: Some(
842                                Variant,
843                            ),
844                            num: None,
845                        },
846                    ],
847                    word: "colouration",
848                },
849            ],
850            pos: None,
851            archaic: false,
852            description: None,
853            note: None,
854            comment: None,
855        },
856        Entry {
857            variants: [
858                Variant {
859                    types: [
860                        Type {
861                            category: American,
862                            tag: None,
863                            num: None,
864                        },
865                        Type {
866                            category: BritishIse,
867                            tag: None,
868                            num: None,
869                        },
870                        Type {
871                            category: Canadian,
872                            tag: None,
873                            num: None,
874                        },
875                    ],
876                    word: "colorations",
877                },
878                Variant {
879                    types: [
880                        Type {
881                            category: BritishIse,
882                            tag: Some(
883                                Eq,
884                            ),
885                            num: None,
886                        },
887                        Type {
888                            category: Canadian,
889                            tag: Some(
890                                Variant,
891                            ),
892                            num: None,
893                        },
894                    ],
895                    word: "colourations",
896                },
897            ],
898            pos: None,
899            archaic: false,
900            description: None,
901            note: None,
902            comment: None,
903        },
904        Entry {
905            variants: [
906                Variant {
907                    types: [
908                        Type {
909                            category: American,
910                            tag: None,
911                            num: None,
912                        },
913                        Type {
914                            category: BritishIse,
915                            tag: None,
916                            num: None,
917                        },
918                        Type {
919                            category: Canadian,
920                            tag: None,
921                            num: None,
922                        },
923                    ],
924                    word: "coloration's",
925                },
926                Variant {
927                    types: [
928                        Type {
929                            category: BritishIse,
930                            tag: Some(
931                                Eq,
932                            ),
933                            num: None,
934                        },
935                        Type {
936                            category: Canadian,
937                            tag: Some(
938                                Variant,
939                            ),
940                            num: None,
941                        },
942                    ],
943                    word: "colouration's",
944                },
945            ],
946            pos: None,
947            archaic: false,
948            description: None,
949            note: None,
950            comment: None,
951        },
952    ],
953    notes: [
954        "OED has coloration as the preferred spelling and discolouration as a",
955        "variant for British Engl or some reason",
956    ],
957}
958
959"#]]
960        );
961    }
962}
963
964impl Entry {
965    pub fn parse(input: &str) -> Result<Self, ParseError> {
966        Self::parse_.parse(input).map_err(|_err| ParseError)
967    }
968
969    fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
970        trace("entry", move |input: &mut &str| {
971            let var_sep = (winnow::ascii::space0, '/', winnow::ascii::space0);
972            let variants =
973                winnow::combinator::separated(1.., Variant::parse_, var_sep).parse_next(input)?;
974
975            let mut e = Self::parse_description.parse_next(input)?;
976
977            let comment_sep = (winnow::ascii::space0, '#');
978            let comment =
979                opt((comment_sep, space1, winnow::ascii::till_line_ending)).parse_next(input)?;
980
981            let _ = winnow::ascii::space0.parse_next(input)?;
982
983            e.variants = variants;
984            e.comment = comment.map(|c| c.2.to_owned());
985            Ok(e)
986        })
987        .parse_next(input)
988    }
989
990    fn parse_description(input: &mut &str) -> ModalResult<Self, ()> {
991        trace("description", move |input: &mut &str| {
992            let mut entry = Self {
993                variants: Vec::new(),
994                pos: None,
995                archaic: false,
996                description: None,
997                note: None,
998                comment: None,
999            };
1000
1001            if opt((winnow::ascii::space0, '|'))
1002                .parse_next(input)?
1003                .is_some()
1004            {
1005                let _ = opt((space1, "<abbr>")).parse_next(input)?;
1006                let _ = opt((space1, "<pl>")).parse_next(input)?;
1007                entry.pos = opt(delimited((space1, '<'), cut_err(Pos::parse_), cut_err('>')))
1008                    .parse_next(input)?;
1009                entry.archaic = opt(preceded(space1, archaic)).parse_next(input)?.is_some();
1010                entry.note = opt(preceded(space1, note)).parse_next(input)?;
1011                entry.description = opt(preceded(space1, description)).parse_next(input)?;
1012
1013                if opt((winnow::ascii::space0, '|'))
1014                    .parse_next(input)?
1015                    .is_some()
1016                {
1017                    entry.note = opt(preceded(space1, note)).parse_next(input)?;
1018                }
1019            }
1020            Ok(entry)
1021        })
1022        .parse_next(input)
1023    }
1024}
1025
1026fn note(input: &mut &str) -> ModalResult<String, ()> {
1027    let (_, _, note) = (NOTE_PREFIX, space1, description).parse_next(input)?;
1028    Ok(note)
1029}
1030
1031const NOTE_PREFIX: &str = "--";
1032
1033fn archaic(input: &mut &str) -> ModalResult<(), ()> {
1034    "(-)".void().parse_next(input)
1035}
1036
1037fn description(input: &mut &str) -> ModalResult<String, ()> {
1038    let description = winnow::token::take_till(0.., ('\n', '\r', '#', '|')).parse_next(input)?;
1039    Ok(description.to_owned())
1040}
1041
1042#[cfg(test)]
1043mod test_entry {
1044    #![allow(clippy::bool_assert_comparison)]
1045    use super::*;
1046
1047    use snapbox::assert_data_eq;
1048    use snapbox::str;
1049    use snapbox::ToDebug;
1050
1051    #[test]
1052    fn test_variant_only() {
1053        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1054        // cases.
1055        let (input, actual) = Entry::parse_
1056            .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's\n")
1057            .unwrap();
1058        assert_data_eq!(
1059            input,
1060            str![[r#"
1061
1062
1063"#]]
1064        );
1065        assert_data_eq!(
1066            actual.to_debug(),
1067            str![[r#"
1068Entry {
1069    variants: [
1070        Variant {
1071            types: [
1072                Type {
1073                    category: American,
1074                    tag: None,
1075                    num: None,
1076                },
1077                Type {
1078                    category: Canadian,
1079                    tag: Some(
1080                        Variant,
1081                    ),
1082                    num: None,
1083                },
1084            ],
1085            word: "acknowledgment's",
1086        },
1087        Variant {
1088            types: [
1089                Type {
1090                    category: American,
1091                    tag: Some(
1092                        Variant,
1093                    ),
1094                    num: None,
1095                },
1096                Type {
1097                    category: BritishIse,
1098                    tag: None,
1099                    num: None,
1100                },
1101                Type {
1102                    category: Canadian,
1103                    tag: None,
1104                    num: None,
1105                },
1106            ],
1107            word: "acknowledgement's",
1108        },
1109    ],
1110    pos: None,
1111    archaic: false,
1112    description: None,
1113    note: None,
1114    comment: None,
1115}
1116
1117"#]]
1118        );
1119    }
1120
1121    #[test]
1122    fn test_description() {
1123        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1124        // cases.
1125        let (input, actual) = Entry::parse_
1126            .parse_peek("A C: prize / B: prise | otherwise\n")
1127            .unwrap();
1128        assert_data_eq!(
1129            input,
1130            str![[r#"
1131
1132
1133"#]]
1134        );
1135        assert_data_eq!(
1136            actual.to_debug(),
1137            str![[r#"
1138Entry {
1139    variants: [
1140        Variant {
1141            types: [
1142                Type {
1143                    category: American,
1144                    tag: None,
1145                    num: None,
1146                },
1147                Type {
1148                    category: Canadian,
1149                    tag: None,
1150                    num: None,
1151                },
1152            ],
1153            word: "prize",
1154        },
1155        Variant {
1156            types: [
1157                Type {
1158                    category: BritishIse,
1159                    tag: None,
1160                    num: None,
1161                },
1162            ],
1163            word: "prise",
1164        },
1165    ],
1166    pos: None,
1167    archaic: false,
1168    description: Some(
1169        "otherwise",
1170    ),
1171    note: None,
1172    comment: None,
1173}
1174
1175"#]]
1176        );
1177    }
1178
1179    #[test]
1180    fn test_pos() {
1181        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1182        // cases.
1183        let (input, actual) = Entry::parse_
1184            .parse_peek("A B C: practice / AV Cv: practise | <N>\n")
1185            .unwrap();
1186        assert_data_eq!(
1187            input,
1188            str![[r#"
1189
1190
1191"#]]
1192        );
1193        assert_data_eq!(
1194            actual.to_debug(),
1195            str![[r#"
1196Entry {
1197    variants: [
1198        Variant {
1199            types: [
1200                Type {
1201                    category: American,
1202                    tag: None,
1203                    num: None,
1204                },
1205                Type {
1206                    category: BritishIse,
1207                    tag: None,
1208                    num: None,
1209                },
1210                Type {
1211                    category: Canadian,
1212                    tag: None,
1213                    num: None,
1214                },
1215            ],
1216            word: "practice",
1217        },
1218        Variant {
1219            types: [
1220                Type {
1221                    category: American,
1222                    tag: Some(
1223                        Seldom,
1224                    ),
1225                    num: None,
1226                },
1227                Type {
1228                    category: Canadian,
1229                    tag: Some(
1230                        Variant,
1231                    ),
1232                    num: None,
1233                },
1234            ],
1235            word: "practise",
1236        },
1237    ],
1238    pos: Some(
1239        Noun,
1240    ),
1241    archaic: false,
1242    description: None,
1243    note: None,
1244    comment: None,
1245}
1246
1247"#]]
1248        );
1249    }
1250
1251    #[test]
1252    fn test_pos_bad() {
1253        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1254        // cases.
1255        let err = Entry::parse_
1256            .parse_peek("A B C: practice / AV Cv: practise | <Bad>\n")
1257            .unwrap_err();
1258        assert_data_eq!(err.to_string(), str!["Parsing Failure: ()"]);
1259    }
1260
1261    #[test]
1262    fn test_plural() {
1263        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1264        // cases.
1265        let (input, actual) = Entry::parse_.parse_peek("_ _-: dogies | <pl>\n").unwrap();
1266        assert_data_eq!(
1267            input,
1268            str![[r#"
1269
1270
1271"#]]
1272        );
1273        assert_data_eq!(
1274            actual.to_debug(),
1275            str![[r#"
1276Entry {
1277    variants: [
1278        Variant {
1279            types: [
1280                Type {
1281                    category: Other,
1282                    tag: None,
1283                    num: None,
1284                },
1285                Type {
1286                    category: Other,
1287                    tag: Some(
1288                        Possible,
1289                    ),
1290                    num: None,
1291                },
1292            ],
1293            word: "dogies",
1294        },
1295    ],
1296    pos: None,
1297    archaic: false,
1298    description: None,
1299    note: None,
1300    comment: None,
1301}
1302
1303"#]]
1304        );
1305    }
1306
1307    #[test]
1308    fn test_abbr() {
1309        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1310        // cases.
1311        let (input, actual) = Entry::parse_.parse_peek("A B: ha | <abbr>\n").unwrap();
1312        assert_data_eq!(
1313            input,
1314            str![[r#"
1315
1316
1317"#]]
1318        );
1319        assert_data_eq!(
1320            actual.to_debug(),
1321            str![[r#"
1322Entry {
1323    variants: [
1324        Variant {
1325            types: [
1326                Type {
1327                    category: American,
1328                    tag: None,
1329                    num: None,
1330                },
1331                Type {
1332                    category: BritishIse,
1333                    tag: None,
1334                    num: None,
1335                },
1336            ],
1337            word: "ha",
1338        },
1339    ],
1340    pos: None,
1341    archaic: false,
1342    description: None,
1343    note: None,
1344    comment: None,
1345}
1346
1347"#]]
1348        );
1349    }
1350
1351    #[test]
1352    fn test_archaic() {
1353        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1354        // cases.
1355        let (input, actual) = Entry::parse_
1356            .parse_peek("A: bark / Av B: barque | (-) ship\n")
1357            .unwrap();
1358        assert_data_eq!(
1359            input,
1360            str![[r#"
1361
1362
1363"#]]
1364        );
1365        assert_data_eq!(
1366            actual.to_debug(),
1367            str![[r#"
1368Entry {
1369    variants: [
1370        Variant {
1371            types: [
1372                Type {
1373                    category: American,
1374                    tag: None,
1375                    num: None,
1376                },
1377            ],
1378            word: "bark",
1379        },
1380        Variant {
1381            types: [
1382                Type {
1383                    category: American,
1384                    tag: Some(
1385                        Variant,
1386                    ),
1387                    num: None,
1388                },
1389                Type {
1390                    category: BritishIse,
1391                    tag: None,
1392                    num: None,
1393                },
1394            ],
1395            word: "barque",
1396        },
1397    ],
1398    pos: None,
1399    archaic: true,
1400    description: Some(
1401        "ship",
1402    ),
1403    note: None,
1404    comment: None,
1405}
1406
1407"#]]
1408        );
1409    }
1410
1411    #[test]
1412    fn test_note() {
1413        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1414        // cases.
1415        let (input, actual) = Entry::parse_
1416            .parse_peek("_: cabbies | -- plural\n")
1417            .unwrap();
1418        assert_data_eq!(
1419            input,
1420            str![[r#"
1421
1422
1423"#]]
1424        );
1425        assert_data_eq!(
1426            actual.to_debug(),
1427            str![[r#"
1428Entry {
1429    variants: [
1430        Variant {
1431            types: [
1432                Type {
1433                    category: Other,
1434                    tag: None,
1435                    num: None,
1436                },
1437            ],
1438            word: "cabbies",
1439        },
1440    ],
1441    pos: None,
1442    archaic: false,
1443    description: None,
1444    note: Some(
1445        "plural",
1446    ),
1447    comment: None,
1448}
1449
1450"#]]
1451        );
1452    }
1453
1454    #[test]
1455    fn test_description_and_note() {
1456        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1457        // cases.
1458        let (input, actual) = Entry::parse_
1459            .parse_peek("A B: wizz | as in \"gee whiz\" | -- Ox: informal, chiefly N. Amer.\n")
1460            .unwrap();
1461        assert_data_eq!(
1462            input,
1463            str![[r#"
1464
1465
1466"#]]
1467        );
1468        assert_data_eq!(
1469            actual.to_debug(),
1470            str![[r#"
1471Entry {
1472    variants: [
1473        Variant {
1474            types: [
1475                Type {
1476                    category: American,
1477                    tag: None,
1478                    num: None,
1479                },
1480                Type {
1481                    category: BritishIse,
1482                    tag: None,
1483                    num: None,
1484                },
1485            ],
1486            word: "wizz",
1487        },
1488    ],
1489    pos: None,
1490    archaic: false,
1491    description: Some(
1492        "as in /"gee whiz/" ",
1493    ),
1494    note: Some(
1495        "Ox: informal, chiefly N. Amer.",
1496    ),
1497    comment: None,
1498}
1499
1500"#]]
1501        );
1502    }
1503
1504    #[test]
1505    fn test_trailing_comment() {
1506        let (input, actual) = Entry::parse_.parse_peek(
1507            "A B: accursed / AV B-: accurst # ODE: archaic, M-W: 'or' but can find little evidence of use\n",
1508        )
1509        .unwrap();
1510        assert_data_eq!(
1511            input,
1512            str![[r#"
1513
1514
1515"#]]
1516        );
1517        assert_data_eq!(
1518            actual.to_debug(),
1519            str![[r#"
1520Entry {
1521    variants: [
1522        Variant {
1523            types: [
1524                Type {
1525                    category: American,
1526                    tag: None,
1527                    num: None,
1528                },
1529                Type {
1530                    category: BritishIse,
1531                    tag: None,
1532                    num: None,
1533                },
1534            ],
1535            word: "accursed",
1536        },
1537        Variant {
1538            types: [
1539                Type {
1540                    category: American,
1541                    tag: Some(
1542                        Seldom,
1543                    ),
1544                    num: None,
1545                },
1546                Type {
1547                    category: BritishIse,
1548                    tag: Some(
1549                        Possible,
1550                    ),
1551                    num: None,
1552                },
1553            ],
1554            word: "accurst",
1555        },
1556    ],
1557    pos: None,
1558    archaic: false,
1559    description: None,
1560    note: None,
1561    comment: Some(
1562        "ODE: archaic, M-W: 'or' but can find little evidence of use",
1563    ),
1564}
1565
1566"#]]
1567        );
1568    }
1569}
1570
1571impl Variant {
1572    pub fn parse(input: &str) -> Result<Self, ParseError> {
1573        Self::parse_.parse(input).map_err(|_err| ParseError)
1574    }
1575
1576    fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1577        trace("variant", move |input: &mut &str| {
1578            let types = winnow::combinator::separated(1.., Type::parse_, space1);
1579            let columns =
1580                winnow::combinator::separated(0.., winnow::ascii::digit1, space1).map(|()| ());
1581            let sep = (":", winnow::ascii::space0);
1582            let ((types, _, _columns), word) = winnow::combinator::separated_pair(
1583                (types, winnow::ascii::space0, columns),
1584                sep,
1585                word,
1586            )
1587            .parse_next(input)?;
1588            let v = Self { types, word };
1589            Ok(v)
1590        })
1591        .parse_next(input)
1592    }
1593}
1594
1595fn word(input: &mut &str) -> ModalResult<String, ()> {
1596    trace("word", move |input: &mut &str| {
1597        winnow::token::take_till(1.., |item: char| item.is_ascii_whitespace())
1598            .map(|s: &str| s.to_owned().replace('_', " "))
1599            .parse_next(input)
1600    })
1601    .parse_next(input)
1602}
1603
1604#[cfg(test)]
1605mod test_variant {
1606    use super::*;
1607
1608    use snapbox::assert_data_eq;
1609    use snapbox::str;
1610    use snapbox::ToDebug;
1611
1612    #[test]
1613    fn test_valid() {
1614        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1615        // cases.
1616        let (input, actual) = Variant::parse_.parse_peek("A Cv: acknowledgment ").unwrap();
1617        assert_data_eq!(input, str![" "]);
1618        assert_data_eq!(
1619            actual.to_debug(),
1620            str![[r#"
1621Variant {
1622    types: [
1623        Type {
1624            category: American,
1625            tag: None,
1626            num: None,
1627        },
1628        Type {
1629            category: Canadian,
1630            tag: Some(
1631                Variant,
1632            ),
1633            num: None,
1634        },
1635    ],
1636    word: "acknowledgment",
1637}
1638
1639"#]]
1640        );
1641    }
1642
1643    #[test]
1644    fn test_extra() {
1645        let (input, actual) = Variant::parse_
1646            .parse_peek("A Cv: acknowledgment's / Av B C: acknowledgement's")
1647            .unwrap();
1648        assert_data_eq!(input, str![" / Av B C: acknowledgement's"]);
1649        assert_data_eq!(
1650            actual.to_debug(),
1651            str![[r#"
1652Variant {
1653    types: [
1654        Type {
1655            category: American,
1656            tag: None,
1657            num: None,
1658        },
1659        Type {
1660            category: Canadian,
1661            tag: Some(
1662                Variant,
1663            ),
1664            num: None,
1665        },
1666    ],
1667    word: "acknowledgment's",
1668}
1669
1670"#]]
1671        );
1672    }
1673
1674    #[test]
1675    fn test_underscore() {
1676        let (input, actual) = Variant::parse_.parse_peek("_: air_gun\n").unwrap();
1677        assert_data_eq!(
1678            input,
1679            str![[r#"
1680
1681
1682"#]]
1683        );
1684        assert_data_eq!(
1685            actual.to_debug(),
1686            str![[r#"
1687Variant {
1688    types: [
1689        Type {
1690            category: Other,
1691            tag: None,
1692            num: None,
1693        },
1694    ],
1695    word: "air gun",
1696}
1697
1698"#]]
1699        );
1700    }
1701
1702    #[test]
1703    fn test_columns() {
1704        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1705        // cases.
1706        let (input, actual) = Variant::parse_.parse_peek("A B 1 2: aeries").unwrap();
1707        assert_data_eq!(input, str![""]);
1708        assert_data_eq!(
1709            actual.to_debug(),
1710            str![[r#"
1711Variant {
1712    types: [
1713        Type {
1714            category: American,
1715            tag: None,
1716            num: None,
1717        },
1718        Type {
1719            category: BritishIse,
1720            tag: None,
1721            num: None,
1722        },
1723    ],
1724    word: "aeries",
1725}
1726
1727"#]]
1728        );
1729    }
1730}
1731
1732impl Type {
1733    pub fn parse(input: &str) -> Result<Self, ParseError> {
1734        Self::parse_.parse(input).map_err(|_err| ParseError)
1735    }
1736
1737    fn parse_(input: &mut &str) -> ModalResult<Type, ()> {
1738        trace("type", move |input: &mut &str| {
1739            let category = Category::parse_(input)?;
1740            let tag = opt(Tag::parse_).parse_next(input)?;
1741            let num = opt(winnow::ascii::digit1).parse_next(input)?;
1742            let num = num.map(|s| s.parse().expect("parser ensured it's a number"));
1743            let t = Type { category, tag, num };
1744            Ok(t)
1745        })
1746        .parse_next(input)
1747    }
1748}
1749
1750#[cfg(test)]
1751mod test_type {
1752    use super::*;
1753
1754    use snapbox::assert_data_eq;
1755    use snapbox::str;
1756    use snapbox::ToDebug;
1757
1758    #[test]
1759    fn test_valid() {
1760        // Having nothing after `A` causes an incomplete parse. Shouldn't be a problem for my use
1761        // cases.
1762        let (input, actual) = Type::parse_.parse_peek("A ").unwrap();
1763        assert_data_eq!(input, str![" "]);
1764        assert_data_eq!(
1765            actual.to_debug(),
1766            str![[r#"
1767Type {
1768    category: American,
1769    tag: None,
1770    num: None,
1771}
1772
1773"#]]
1774        );
1775
1776        let (input, actual) = Type::parse_.parse_peek("Bv ").unwrap();
1777        assert_data_eq!(input, str![" "]);
1778        assert_data_eq!(
1779            actual.to_debug(),
1780            str![[r#"
1781Type {
1782    category: BritishIse,
1783    tag: Some(
1784        Variant,
1785    ),
1786    num: None,
1787}
1788
1789"#]]
1790        );
1791    }
1792
1793    #[test]
1794    fn test_extra() {
1795        let (input, actual) = Type::parse_.parse_peek("Z foobar").unwrap();
1796        assert_data_eq!(input, str![" foobar"]);
1797        assert_data_eq!(
1798            actual.to_debug(),
1799            str![[r#"
1800Type {
1801    category: BritishIze,
1802    tag: None,
1803    num: None,
1804}
1805
1806"#]]
1807        );
1808
1809        let (input, actual) = Type::parse_.parse_peek("C- foobar").unwrap();
1810        assert_data_eq!(input, str![" foobar"]);
1811        assert_data_eq!(
1812            actual.to_debug(),
1813            str![[r#"
1814Type {
1815    category: Canadian,
1816    tag: Some(
1817        Possible,
1818    ),
1819    num: None,
1820}
1821
1822"#]]
1823        );
1824    }
1825
1826    #[test]
1827    fn test_num() {
1828        let (input, actual) = Type::parse_.parse_peek("Av1 ").unwrap();
1829        assert_data_eq!(input, str![" "]);
1830        assert_data_eq!(
1831            actual.to_debug(),
1832            str![[r#"
1833Type {
1834    category: American,
1835    tag: Some(
1836        Variant,
1837    ),
1838    num: Some(
1839        1,
1840    ),
1841}
1842
1843"#]]
1844        );
1845    }
1846}
1847
1848impl Category {
1849    pub fn parse(input: &str) -> Result<Self, ParseError> {
1850        Self::parse_.parse(input).map_err(|_err| ParseError)
1851    }
1852
1853    fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1854        trace("category", move |input: &mut &str| {
1855            let symbols = one_of(['A', 'B', 'Z', 'C', 'D', '_']);
1856            symbols
1857                .map(|c| match c {
1858                    'A' => Category::American,
1859                    'B' => Category::BritishIse,
1860                    'Z' => Category::BritishIze,
1861                    'C' => Category::Canadian,
1862                    'D' => Category::Australian,
1863                    '_' => Category::Other,
1864                    _ => unreachable!("parser won't select this option"),
1865                })
1866                .parse_next(input)
1867        })
1868        .parse_next(input)
1869    }
1870}
1871
1872#[cfg(test)]
1873mod test_category {
1874    use super::*;
1875
1876    use snapbox::assert_data_eq;
1877    use snapbox::str;
1878    use snapbox::ToDebug;
1879
1880    #[test]
1881    fn test_valid() {
1882        let (input, actual) = Category::parse_.parse_peek("A").unwrap();
1883        assert_data_eq!(input, str![]);
1884        assert_data_eq!(
1885            actual.to_debug(),
1886            str![[r#"
1887American
1888
1889"#]]
1890        );
1891    }
1892
1893    #[test]
1894    fn test_extra() {
1895        let (input, actual) = Category::parse_.parse_peek("_ foobar").unwrap();
1896        assert_data_eq!(input, str![" foobar"]);
1897        assert_data_eq!(
1898            actual.to_debug(),
1899            str![[r#"
1900Other
1901
1902"#]]
1903        );
1904    }
1905}
1906
1907impl Tag {
1908    pub fn parse(input: &str) -> Result<Self, ParseError> {
1909        Self::parse_.parse(input).map_err(|_err| ParseError)
1910    }
1911
1912    fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1913        trace("tag", move |input: &mut &str| {
1914            let symbols = one_of(['.', 'v', 'V', '-', 'x']);
1915            symbols
1916                .map(|c| match c {
1917                    '.' => Tag::Eq,
1918                    'v' => Tag::Variant,
1919                    'V' => Tag::Seldom,
1920                    '-' => Tag::Possible,
1921                    'x' => Tag::Improper,
1922                    _ => unreachable!("parser won't select this option"),
1923                })
1924                .parse_next(input)
1925        })
1926        .parse_next(input)
1927    }
1928}
1929
1930#[cfg(test)]
1931mod test_tag {
1932    use super::*;
1933
1934    use snapbox::assert_data_eq;
1935    use snapbox::str;
1936    use snapbox::ToDebug;
1937
1938    #[test]
1939    fn test_valid() {
1940        let (input, actual) = Tag::parse_.parse_peek(".").unwrap();
1941        assert_data_eq!(input, str![]);
1942        assert_data_eq!(
1943            actual.to_debug(),
1944            str![[r#"
1945Eq
1946
1947"#]]
1948        );
1949    }
1950
1951    #[test]
1952    fn test_extra() {
1953        let (input, actual) = Tag::parse_.parse_peek("x foobar").unwrap();
1954        assert_data_eq!(input, str![" foobar"]);
1955        assert_data_eq!(
1956            actual.to_debug(),
1957            str![[r#"
1958Improper
1959
1960"#]]
1961        );
1962    }
1963}
1964
1965impl Pos {
1966    pub fn parse(input: &str) -> Result<Self, ParseError> {
1967        Self::parse_.parse(input).map_err(|_err| ParseError)
1968    }
1969
1970    fn parse_(input: &mut &str) -> ModalResult<Self, ()> {
1971        trace("pos", move |input: &mut &str| {
1972            alt((
1973                "N".value(Pos::Noun),
1974                "V".value(Pos::Verb),
1975                "Adj".value(Pos::Adjective),
1976                "Adv".value(Pos::Adverb),
1977                "A".value(Pos::AdjectiveOrAdverb),
1978                "Inj".value(Pos::Interjection),
1979                "Prep".value(Pos::Preposition),
1980            ))
1981            .parse_next(input)
1982        })
1983        .parse_next(input)
1984    }
1985}
1986
1987#[cfg(test)]
1988mod test_pos {
1989    use super::*;
1990
1991    use snapbox::assert_data_eq;
1992    use snapbox::str;
1993    use snapbox::ToDebug;
1994
1995    #[test]
1996    fn test_valid() {
1997        let (input, actual) = Pos::parse_.parse_peek("N>").unwrap();
1998        assert_data_eq!(input, str![">"]);
1999        assert_data_eq!(
2000            actual.to_debug(),
2001            str![[r#"
2002Noun
2003
2004"#]]
2005        );
2006    }
2007
2008    #[test]
2009    fn test_extra() {
2010        let (input, actual) = Pos::parse_.parse_peek("Adj> foobar").unwrap();
2011        assert_data_eq!(input, str!["> foobar"]);
2012        assert_data_eq!(
2013            actual.to_debug(),
2014            str![[r#"
2015Adjective
2016
2017"#]]
2018        );
2019    }
2020}
2021
2022#[derive(Debug)]
2023pub struct ParseError;
2024
2025impl std::fmt::Display for ParseError {
2026    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2027        write!(f, "invalid")
2028    }
2029}
2030
2031impl std::error::Error for ParseError {}