Skip to main content

vimdoc_language_server/
parser.rs

1use lsp_types::{Position, Range};
2
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub enum SepKind {
5    Major,
6    Minor,
7}
8
9#[derive(Debug, Clone)]
10pub struct Span {
11    pub name: String,
12    pub range: Range,
13}
14
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum LineKind {
17    Blank,
18    Separator(SepKind),
19    CodeBody,
20    Text,
21}
22
23#[derive(Debug, Clone)]
24pub struct ParsedLine {
25    pub kind: LineKind,
26    pub tag_defs: Vec<Span>,
27    pub tag_refs: Vec<Span>,
28}
29
30#[derive(Debug, Default)]
31pub struct Document {
32    pub lines: Vec<ParsedLine>,
33}
34
35impl Document {
36    #[must_use]
37    #[allow(clippy::cast_possible_truncation)]
38    pub fn parse(text: &str) -> Self {
39        let mut lines = Vec::new();
40        let mut in_code = false;
41        for (idx, raw) in text.lines().enumerate() {
42            lines.push(parse_line(idx as u32, raw, &mut in_code));
43        }
44        Document { lines }
45    }
46
47    pub fn tag_defs(&self) -> impl Iterator<Item = &Span> {
48        self.lines.iter().flat_map(|l| l.tag_defs.iter())
49    }
50
51    pub fn tag_refs(&self) -> impl Iterator<Item = &Span> {
52        self.lines.iter().flat_map(|l| l.tag_refs.iter())
53    }
54}
55
56#[allow(clippy::similar_names)]
57fn parse_line(line_num: u32, raw: &str, in_code: &mut bool) -> ParsedLine {
58    let trimmed = raw.trim_end();
59
60    if trimmed.is_empty() {
61        *in_code = false;
62        return mk(LineKind::Blank, vec![], vec![]);
63    }
64
65    if *in_code {
66        let ends_code = trimmed == "<" || (!raw.starts_with(' ') && !raw.starts_with('\t'));
67        if ends_code {
68            *in_code = false;
69            if trimmed == "<" {
70                return mk(LineKind::CodeBody, vec![], vec![]);
71            }
72        } else {
73            return mk(LineKind::CodeBody, vec![], vec![]);
74        }
75    }
76
77    if trimmed.len() >= 10 && trimmed.bytes().all(|b| b == b'=') {
78        return mk(LineKind::Separator(SepKind::Major), vec![], vec![]);
79    }
80    if trimmed.len() >= 10 && trimmed.bytes().all(|b| b == b'-') {
81        return mk(LineKind::Separator(SepKind::Minor), vec![], vec![]);
82    }
83
84    let (tag_defs, tag_refs) = scan_inline(line_num, raw);
85
86    if trimmed.ends_with('>') && !trimmed.ends_with("->") {
87        *in_code = true;
88    }
89
90    mk(LineKind::Text, tag_defs, tag_refs)
91}
92
93#[allow(clippy::similar_names)]
94fn mk(kind: LineKind, tag_defs: Vec<Span>, tag_refs: Vec<Span>) -> ParsedLine {
95    ParsedLine {
96        kind,
97        tag_defs,
98        tag_refs,
99    }
100}
101
102#[allow(clippy::similar_names)]
103fn scan_inline(line_num: u32, raw: &str) -> (Vec<Span>, Vec<Span>) {
104    let mut tag_defs = Vec::new();
105    let mut tag_refs = Vec::new();
106    let bytes = raw.as_bytes();
107    let len = bytes.len();
108    let mut i = 0;
109
110    while i < len {
111        match bytes[i] {
112            b'*' => {
113                if let Some((name, end)) = scan_delimited(raw, i + 1, b'*') {
114                    tag_defs.push(make_span(line_num, i, end, name));
115                    i = end;
116                } else {
117                    i += 1;
118                }
119            }
120            b'|' => {
121                if let Some((name, end)) = scan_delimited(raw, i + 1, b'|') {
122                    tag_refs.push(make_span(line_num, i, end, name));
123                    i = end;
124                } else {
125                    i += 1;
126                }
127            }
128            b'`' => {
129                let mut j = i + 1;
130                while j < len && bytes[j] != b'`' {
131                    j += 1;
132                }
133                i = j + 1;
134            }
135            _ => {
136                i += 1;
137            }
138        }
139    }
140
141    (tag_defs, tag_refs)
142}
143
144#[allow(clippy::cast_possible_truncation)]
145fn make_span(line_num: u32, start: usize, end: usize, name: String) -> Span {
146    Span {
147        name,
148        range: Range {
149            start: Position {
150                line: line_num,
151                character: start as u32,
152            },
153            end: Position {
154                line: line_num,
155                character: end as u32,
156            },
157        },
158    }
159}
160
161fn scan_delimited(raw: &str, start: usize, delim: u8) -> Option<(String, usize)> {
162    let bytes = raw.as_bytes();
163    let mut end = start;
164    while end < bytes.len() {
165        if bytes[end] == delim {
166            break;
167        }
168        if bytes[end] == b' ' || bytes[end] == b'\t' {
169            return None;
170        }
171        end += 1;
172    }
173    if end >= bytes.len() || end == start {
174        return None;
175    }
176    Some((raw[start..end].to_string(), end + 1))
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    #[test]
184    fn detects_tag_defs() {
185        let doc = Document::parse("*my-tag* some text");
186        assert_eq!(doc.tag_defs().count(), 1);
187        assert_eq!(doc.tag_defs().next().unwrap().name, "my-tag");
188    }
189
190    #[test]
191    fn detects_tag_refs() {
192        let doc = Document::parse("see |my-tag| for details");
193        assert_eq!(doc.tag_refs().count(), 1);
194        assert_eq!(doc.tag_refs().next().unwrap().name, "my-tag");
195    }
196
197    #[test]
198    fn detects_major_separator() {
199        let doc = Document::parse(&"=".repeat(78));
200        assert_eq!(doc.lines[0].kind, LineKind::Separator(SepKind::Major));
201    }
202
203    #[test]
204    fn detects_minor_separator() {
205        let doc = Document::parse(&"-".repeat(78));
206        assert_eq!(doc.lines[0].kind, LineKind::Separator(SepKind::Minor));
207    }
208
209    #[test]
210    fn code_block_body_is_verbatim() {
211        let text = "example >\n    code line\n    another\n<\nnormal";
212        let doc = Document::parse(text);
213        assert_eq!(doc.lines[1].kind, LineKind::CodeBody);
214        assert_eq!(doc.lines[2].kind, LineKind::CodeBody);
215        assert_eq!(doc.lines[4].kind, LineKind::Text);
216    }
217
218    #[test]
219    fn blank_ends_code_block() {
220        let text = "example >\n    code\n\nnormal";
221        let doc = Document::parse(text);
222        assert_eq!(doc.lines[1].kind, LineKind::CodeBody);
223        assert_eq!(doc.lines[2].kind, LineKind::Blank);
224        assert_eq!(doc.lines[3].kind, LineKind::Text);
225    }
226
227    #[test]
228    fn no_tag_with_space() {
229        let doc = Document::parse("* not a tag *");
230        assert_eq!(doc.tag_defs().count(), 0);
231    }
232}