zeta_note/
structure.rs

1use lsp_document::{Pos, TextMap};
2use regex::Regex;
3
4use std::{
5    fmt::{Debug, Display},
6    ops::Range,
7    path::{Path, PathBuf},
8    sync::Arc,
9};
10
11use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
12
13use serde::{Deserialize, Serialize};
14
15#[derive(Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
16pub struct NoteName(String);
17
18impl From<String> for NoteName {
19    fn from(name: String) -> Self {
20        Self(name)
21    }
22}
23
24impl From<&str> for NoteName {
25    fn from(name: &str) -> Self {
26        name.to_string().into()
27    }
28}
29
30impl Debug for NoteName {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        f.write_str(self.to_str())
33    }
34}
35
36impl Display for NoteName {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        f.write_str(self.to_str())
39    }
40}
41
42impl NoteName {
43    pub fn from_path(path: &Path, root: &Path) -> NoteName {
44        let rel = path.strip_prefix(root).unwrap();
45        let stem = rel.with_extension("");
46        stem.to_string_lossy().to_string().into()
47    }
48
49    pub fn to_path(&self, root: &Path) -> PathBuf {
50        root.join(&self.0).with_extension("md")
51    }
52
53    pub fn to_str(&self) -> &str {
54        &self.0
55    }
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
59pub struct NoteID(u32);
60
61impl NoteID {
62    pub fn to_u32(&self) -> u32 {
63        self.0
64    }
65
66    pub fn to_usize(&self) -> usize {
67        self.to_u32() as usize
68    }
69}
70
71impl From<usize> for NoteID {
72    fn from(idx: usize) -> Self {
73        NoteID(idx as u32)
74    }
75}
76
77#[derive(Debug, PartialEq, Eq, Clone)]
78pub struct Structure {
79    elements: Arc<[ElementWithLoc]>,
80}
81
82impl Structure {
83    pub fn new(elements: Vec<ElementWithLoc>) -> Self {
84        Self {
85            elements: elements.into(),
86        }
87    }
88
89    pub fn elements(&self) -> Vec<ElementID> {
90        let mut els = Vec::with_capacity(self.elements.len());
91        for (idx, (el, _)) in self.elements.iter().enumerate() {
92            match el {
93                Element::Heading(..) => els.push(ElementID::Heading(HeadingID(idx as u32))),
94                Element::LinkRef(..) => els.push(ElementID::Ref(LinkRefID(idx as u32))),
95                Element::LinkRegular(..) => (),
96            }
97        }
98
99        els
100    }
101
102    pub fn elements_with_loc(&self) -> Vec<(ElementID, &ElementWithLoc)> {
103        let mut els = Vec::with_capacity(self.elements.len());
104        for (idx, ewl) in self.elements.iter().enumerate() {
105            match ewl.0 {
106                Element::Heading(..) => els.push((ElementID::Heading(HeadingID(idx as u32)), ewl)),
107                Element::LinkRef(..) => els.push((ElementID::Ref(LinkRefID(idx as u32)), ewl)),
108                Element::LinkRegular(..) => (),
109            }
110        }
111
112        els
113    }
114
115    pub fn elements_with_ids<'a, 'b: 'a>(
116        &'a self,
117        ids: &'b [ElementID],
118    ) -> impl Iterator<Item = &'a ElementWithLoc> {
119        ids.iter().map(move |id| &self.elements[id.to_usize()])
120    }
121
122    pub fn headings(&self) -> Vec<HeadingID> {
123        let mut headings = Vec::new();
124        for (idx, (el, _)) in self.elements.iter().enumerate() {
125            if let Element::Heading(..) = el {
126                headings.push(HeadingID(idx as u32))
127            }
128        }
129
130        headings
131    }
132
133    pub fn element_by_id(&self, id: ElementID) -> &ElementWithLoc {
134        &self.elements[id.to_usize()]
135    }
136
137    pub fn heading_by_id(&self, id: HeadingID) -> (&Heading, Range<Pos>) {
138        let el = &self.elements[id.0 as usize];
139        if let (Element::Heading(hd), span) = el {
140            (hd, span.clone())
141        } else {
142            panic!("Expected a heading at idx {:?} in {:?}", id, self.elements)
143        }
144    }
145
146    pub fn headings_with_ids(&self, ids: &[HeadingID]) -> Vec<(&Heading, Range<Pos>)> {
147        ids.iter().map(move |&id| self.heading_by_id(id)).collect()
148    }
149
150    pub fn refs(&self) -> Vec<LinkRefID> {
151        let mut refs = Vec::new();
152        for (idx, (el, _)) in self.elements.iter().enumerate() {
153            if let Element::LinkRef(..) = el {
154                refs.push(LinkRefID(idx as u32))
155            }
156        }
157
158        refs
159    }
160
161    pub fn ref_by_id(&self, id: LinkRefID) -> (&LinkRef, Range<Pos>) {
162        let el = &self.elements[id.0 as usize];
163        if let (Element::LinkRef(lr), span) = el {
164            (lr, span.clone())
165        } else {
166            panic!("Expected a ref at idx {:?} in {:?}", id, self.elements)
167        }
168    }
169
170    pub fn refs_with_ids(&self, ids: &[LinkRefID]) -> Vec<(&LinkRef, Range<Pos>)> {
171        ids.iter().map(move |&id| self.ref_by_id(id)).collect()
172    }
173}
174
175pub type ElementWithLoc = (Element, Range<Pos>);
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
178pub struct HeadingID(u32);
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
181pub struct LinkRefID(u32);
182
183#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
184pub enum ElementID {
185    Heading(HeadingID),
186    Ref(LinkRefID),
187}
188
189impl ElementID {
190    pub fn to_u32(&self) -> u32 {
191        match self {
192            ElementID::Heading(HeadingID(id)) => *id,
193            ElementID::Ref(LinkRefID(id)) => *id,
194        }
195    }
196
197    pub fn to_usize(&self) -> usize {
198        self.to_u32() as usize
199    }
200}
201
202#[derive(Debug, PartialEq, Eq, Clone)]
203pub enum Element {
204    Heading(Heading),
205    LinkRegular(LinkRegular),
206    LinkRef(LinkRef),
207}
208
209#[derive(Debug, PartialEq, Eq, Clone, Hash)]
210pub struct Heading {
211    pub level: u8,
212    pub text: String,
213    pub scope: Range<Pos>,
214}
215
216#[derive(Debug, PartialEq, Eq, Clone)]
217pub struct LinkRef {
218    pub text: String,
219    pub note_name: Option<NoteName>,
220    pub heading: Option<String>,
221}
222
223#[derive(Debug, PartialEq, Eq, Clone)]
224pub struct LinkRegular {
225    text: String,
226    dest: Option<String>,
227    title: Option<String>,
228}
229
230pub fn parse_link_ref(text: &str) -> Option<LinkRef> {
231    let ref_link_regex = Regex::new(r"^\[:([^@]*)(@(.*))?\]$").unwrap();
232
233    if let Some(captures) = ref_link_regex.captures(text) {
234        let text = text.to_string();
235        let note_name = captures
236            .get(1)
237            .map(|m| m.as_str())
238            .filter(|s| !s.is_empty())
239            .map(|s| s.into());
240        let heading = captures
241            .get(3)
242            .map(|m| m.as_str().to_string())
243            .filter(|s| !s.is_empty());
244        Some(LinkRef {
245            text,
246            note_name,
247            heading,
248        })
249    } else {
250        None
251    }
252}
253
254pub fn parse_link_regular(text: &str, dest: CowStr, title: CowStr) -> LinkRegular {
255    let text = text.to_string();
256    let dest = if dest.is_empty() {
257        None
258    } else {
259        Some(dest.to_string())
260    };
261    let title = if title.is_empty() {
262        None
263    } else {
264        Some(title.to_string())
265    };
266    LinkRegular { text, dest, title }
267}
268
269pub fn scrape(index: &impl TextMap) -> Vec<ElementWithLoc> {
270    let mut callback = |_: BrokenLink<'_>| Some(("".into(), "".into()));
271    let parser =
272        Parser::new_with_broken_link_callback(index.text(), Options::all(), Some(&mut callback));
273    let mut elements = Vec::new();
274
275    let mut scoped_headings: Vec<(u8, String, Range<usize>)> = Vec::new();
276
277    for (event, el_span) in parser.into_offset_iter() {
278        match event {
279            Event::Start(Tag::Heading(level)) => {
280                let heading_text = &index.text()[el_span.start..el_span.end];
281
282                // Trim newlines, whitespaces on the right
283                let trim_right_text = heading_text.trim_end().to_string();
284                let trimmed_on_right = heading_text.len() - trim_right_text.len();
285                let heading_span = el_span.start..(el_span.end - trimmed_on_right);
286
287                while let Some(last) = scoped_headings.last() {
288                    if last.0 >= level as u8 {
289                        let last = scoped_headings.pop().unwrap();
290                        let heading = Heading {
291                            level: last.0,
292                            text: last.1,
293                            scope: index
294                                .offset_range_to_range(last.2.start..el_span.start)
295                                .unwrap(),
296                        };
297                        elements.push((
298                            Element::Heading(heading),
299                            index.offset_range_to_range(last.2).unwrap(),
300                        ));
301                    } else {
302                        break;
303                    }
304                }
305
306                scoped_headings.push((level as u8, trim_right_text, heading_span));
307            }
308            Event::Start(Tag::Link(typ, dest, title)) => match typ {
309                LinkType::Inline
310                | LinkType::Reference
311                | LinkType::ReferenceUnknown
312                | LinkType::Collapsed
313                | LinkType::CollapsedUnknown
314                | LinkType::Shortcut
315                | LinkType::ShortcutUnknown => {
316                    let link_text = &index.text()[el_span.start..el_span.end].trim();
317                    let link = parse_link_ref(link_text)
318                        .map(Element::LinkRef)
319                        .unwrap_or_else(|| {
320                            Element::LinkRegular(parse_link_regular(link_text, dest, title))
321                        });
322                    elements.push((link, index.offset_range_to_range(el_span).unwrap()));
323                }
324                _ => (),
325            },
326            _ => (),
327        }
328    }
329
330    for remaining in scoped_headings {
331        let heading = Heading {
332            level: remaining.0,
333            text: remaining.1,
334            scope: index
335                .offset_range_to_range(remaining.2.start..index.text().len())
336                .unwrap(),
337        };
338        elements.push((
339            Element::Heading(heading),
340            index.offset_range_to_range(remaining.2).unwrap(),
341        ));
342    }
343
344    elements.sort_by_key(|(_, span)| span.start);
345
346    elements
347}
348
349#[cfg(test)]
350mod test {
351    use anyhow::Result;
352    use lsp_document::IndexedText;
353
354    use super::*;
355    use pretty_assertions::assert_eq;
356    use std::{fs, io, path::PathBuf};
357
358    fn read_resource(name: &str) -> io::Result<String> {
359        let mut root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
360        root.push("tests");
361        root.push("resources");
362        root.push(name);
363
364        fs::read_to_string(&root)
365    }
366
367    #[test]
368    fn scrape_note() -> Result<()> {
369        let text = IndexedText::new(read_resource("example1.md")?);
370        let elements = scrape(&text);
371        insta::assert_debug_snapshot!(elements);
372        Ok(())
373    }
374
375    #[test]
376    fn scrape_eof() {
377        let elements = scrape(&IndexedText::new("#"));
378        assert_eq!(
379            elements,
380            vec![(
381                Element::Heading(Heading {
382                    level: 1,
383                    text: "#".to_string(),
384                    scope: Pos::new(0, 0)..Pos::new(0, 1)
385                }),
386                Pos::new(0, 0)..Pos::new(0, 1)
387            )]
388        );
389    }
390}