udled_tokenizers/
ident.rs

1use udled::{
2    any,
3    tokenizers::{AlphaNumeric, Alphabetic},
4    AsChar, AsSlice, Buffer, Error, Item, Reader, Tokenizer, TokenizerExt,
5};
6
7/// Match a unicode identifier
8#[derive(Debug, Clone, Copy, Default)]
9pub struct Ident;
10
11impl<'input, B> Tokenizer<'input, B> for Ident
12where
13    B: Buffer<'input>,
14    B::Item: AsChar,
15    B::Source: AsSlice<'input>,
16{
17    type Token = Item<<B::Source as AsSlice<'input>>::Slice>;
18
19    fn to_token(&self, reader: &mut Reader<'_, 'input, B>) -> Result<Self::Token, Error> {
20        let item =
21            reader.parse((Alphabetic.or('_'), AlphaNumeric.or('_').many().optional()).slice())?;
22
23        Ok(item)
24    }
25
26    fn peek<'a>(&self, reader: &mut Reader<'_, 'input, B>) -> bool {
27        reader.is(Alphabetic.or('_'))
28    }
29}
30
31/// Match a xml style tag or attribute
32pub struct XmlIdent;
33
34impl XmlIdent {}
35
36impl<'input, B> Tokenizer<'input, B> for XmlIdent
37where
38    B: Buffer<'input>,
39    B::Item: AsChar,
40    B::Source: AsSlice<'input>,
41{
42    type Token = Item<<B::Source as AsSlice<'input>>::Slice>;
43
44    fn to_token(&self, reader: &mut Reader<'_, 'input, B>) -> Result<Self::Token, Error> {
45        let start_tokenizer = any!(
46            ':',
47            'a'..='z',
48            'A'..='Z',
49            '\u{2070}'..='\u{218F}',
50            '\u{2C00}'..='\u{2FEF}',
51            '\u{3001}'..='\u{D7FF}',
52            '\u{F900}'..='\u{FDCF}',
53            '\u{FDF0}'..='\u{FFFD}'
54        );
55        let rest_tokenizer = any!(
56            '0'..='9',
57            '-',
58            '.',
59            '_',
60            '\u{00B7}',
61            '\u{0300}'..='\u{036F}',
62            '\u{203F}'..='\u{2040}'
63        );
64
65        let all = any!(&start_tokenizer, rest_tokenizer);
66
67        reader.parse((&start_tokenizer, all.many()).slice())
68    }
69
70    fn peek(&self, reader: &mut Reader<'_, 'input, B>) -> bool {
71        reader.is(any!(
72            ':',
73            'a'..='z',
74            'A'..='Z',
75            '\u{2070}'..='\u{218F}',
76            '\u{2C00}'..='\u{2FEF}',
77            '\u{3001}'..='\u{D7FF}',
78            '\u{F900}'..='\u{FDCF}',
79            '\u{FDF0}'..='\u{FFFD}'
80        ))
81    }
82}
83
84// #[cfg(test)]
85// mod test {
86//     use udled::{Input, Item, Span};
87
88//     use super::{Ident, XmlIdent};
89
90//     #[test]
91//     fn xml_ident() {
92//         let mut input = Input::new("div custom-tag data-id2");
93
94//         assert_eq!(
95//             input
96//                 .parse((XmlIdent, ' ', XmlIdent, ' ', XmlIdent))
97//                 .unwrap(),
98//             (
99//                 Item::new(, Span::new(0, 3), "div"),
100//                 Span::new(3, 4),
101//                 Item::new("custom-tag", Span::new(4, 14)),
102//                 Span::new(14, 15),
103//                 Item::new("data-id2", Span::new(15, 23))
104//             )
105//         );
106//     }
107
108//     #[test]
109//     fn ident() {
110//         let mut input = Input::new("Ident other");
111//         assert_eq!(
112//             input.parse(Ident).unwrap(),
113//             Lex {
114//                 value: "Ident",
115//                 span: Span { start: 0, end: 5 }
116//             }
117//         );
118//     }
119// }