udled_tokenizers/
comment.rs

1use udled::{
2    tokenizers::Exclude, AsChar, AsSlice, Buffer, Either, Error, Item, Reader, Span, Tokenizer,
3    TokenizerExt, EOF,
4};
5
6pub const fn cstyle_line_comment() -> RawLineComment<&'static str> {
7    RawLineComment("//")
8}
9
10pub const fn cstyle_multiline_comment(
11    nested: bool,
12) -> Either<RawMultiLine<&'static str, &'static str>, RawMultiLineNested<&'static str, &'static str>>
13{
14    if nested {
15        Either::Right(RawMultiLineNested("/*", "*/"))
16    } else {
17        Either::Left(RawMultiLine("/*", "*/"))
18    }
19}
20
21pub const fn rust_doc_comment() -> RawLineComment<&'static str> {
22    RawLineComment("///")
23}
24
25pub const fn python_line_comment() -> RawLineComment<&'static str> {
26    RawLineComment("#")
27}
28
29pub const fn python_multiline_comment() -> RawMultiLine<&'static str, &'static str> {
30    RawMultiLine("'''", "'''")
31}
32
33pub const fn javascript_doc_comment() -> RawMultiLine<&'static str, &'static str> {
34    RawMultiLine("/**", "*/")
35}
36
37pub const fn html_comment() -> RawMultiLine<&'static str, &'static str> {
38    RawMultiLine("<!--", "-->")
39}
40
41#[derive(Debug, Clone, Copy)]
42pub struct RawLineComment<T>(T);
43
44impl<'input, B, T> Tokenizer<'input, B> for RawLineComment<T>
45where
46    T: Tokenizer<'input, B>,
47    B: Buffer<'input>,
48    B::Source: AsSlice<'input>,
49    B::Item: AsChar,
50{
51    type Token = Item<<B::Source as AsSlice<'input>>::Slice>;
52    fn to_token<'a>(&self, reader: &mut Reader<'_, 'input, B>) -> Result<Self::Token, Error> {
53        let item = reader.parse(
54            (
55                &self.0,
56                Exclude::new('\n'.or(EOF)).many().optional().spanned(),
57                '\n'.optional(),
58            )
59                .slice(),
60        )?;
61
62        Ok(item)
63    }
64
65    fn peek<'a>(&self, reader: &mut Reader<'_, 'input, B>) -> bool {
66        reader.is(&self.0)
67    }
68}
69
70#[derive(Debug, Clone, Copy)]
71pub struct RawMultiLine<O, C>(O, C);
72
73impl<'input, O, C, B> Tokenizer<'input, B> for RawMultiLine<O, C>
74where
75    O: Tokenizer<'input, B>,
76    C: Tokenizer<'input, B>,
77    B: Buffer<'input>,
78    B::Source: AsSlice<'input>,
79    B::Item: AsChar,
80{
81    type Token = Item<<B::Source as AsSlice<'input>>::Slice>;
82    fn to_token<'a>(&self, reader: &mut Reader<'_, 'input, B>) -> Result<Self::Token, Error> {
83        let item =
84            reader.parse((&self.0, Exclude::new(&self.1).many().optional(), &self.1).slice())?;
85
86        Ok(item)
87    }
88
89    fn peek<'a>(&self, reader: &mut Reader<'_, 'input, B>) -> bool {
90        reader.is(&self.0)
91    }
92}
93
94#[derive(Debug, Clone, Copy)]
95pub struct RawMultiLineNested<O, C>(O, C);
96
97impl<'input, O, C, B> Tokenizer<'input, B> for RawMultiLineNested<O, C>
98where
99    B: Buffer<'input>,
100    B::Source: AsSlice<'input>,
101    B::Item: AsChar,
102    O: Tokenizer<'input, B>,
103    C: Tokenizer<'input, B>,
104{
105    type Token = Item<<B::Source as AsSlice<'input>>::Slice>;
106    fn to_token(&self, reader: &mut Reader<'_, 'input, B>) -> Result<Self::Token, Error> {
107        let start = reader.position();
108
109        reader.eat(&self.0)?;
110
111        let mut depth = 1;
112
113        loop {
114            if reader.is(EOF) {
115                return Err(reader.error("unexpected end of input inside multi-line comment"));
116            } else if reader.eat(&self.0).is_ok() {
117                depth += 1;
118            } else if reader.eat(&self.1).is_ok() {
119                depth -= 1;
120
121                if depth == 0 {
122                    break;
123                }
124            } else {
125                reader.read()?;
126            }
127        }
128
129        let span = Span::new(start, reader.position());
130
131        let Some(value) = reader.buffer().source().sliced(span) else {
132            return Err(reader.error("slice"));
133        };
134
135        Ok(Item::new(span, value))
136    }
137
138    fn peek<'a>(&self, reader: &mut Reader<'_, 'input, B>) -> bool {
139        reader.is(&self.0)
140    }
141}
142
143#[cfg(test)]
144mod test {
145    use udled::Input;
146
147    use super::*;
148
149    #[test]
150    fn line_comment() {
151        let mut input = Input::new("//");
152        assert_eq!(
153            input.parse(cstyle_line_comment()).unwrap(),
154            Item::new(Span::new(0, 2), "//")
155        );
156
157        let mut input = Input::new("// Some tekst");
158        assert_eq!(
159            input.parse(cstyle_line_comment()).unwrap(),
160            Item::new(Span::new(0, 13), "// Some tekst")
161        );
162        let mut input = Input::new("// Some tekst\n test");
163        assert_eq!(
164            input.parse(cstyle_line_comment()).unwrap(),
165            Item::new(Span::new(0, 14), "// Some tekst\n")
166        );
167    }
168}