1use lsp_document::{Pos, TextMap};
2use regex::Regex;
3
4use std::{
5 fmt::{Debug, Display},
6 ops::Range,
7 path::{Path, PathBuf},
8 sync::Arc,
9};
10
11use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
12
13use serde::{Deserialize, Serialize};
14
15#[derive(Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
16pub struct NoteName(String);
17
18impl From<String> for NoteName {
19 fn from(name: String) -> Self {
20 Self(name)
21 }
22}
23
24impl From<&str> for NoteName {
25 fn from(name: &str) -> Self {
26 name.to_string().into()
27 }
28}
29
30impl Debug for NoteName {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 f.write_str(self.to_str())
33 }
34}
35
36impl Display for NoteName {
37 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38 f.write_str(self.to_str())
39 }
40}
41
42impl NoteName {
43 pub fn from_path(path: &Path, root: &Path) -> NoteName {
44 let rel = path.strip_prefix(root).unwrap();
45 let stem = rel.with_extension("");
46 stem.to_string_lossy().to_string().into()
47 }
48
49 pub fn to_path(&self, root: &Path) -> PathBuf {
50 root.join(&self.0).with_extension("md")
51 }
52
53 pub fn to_str(&self) -> &str {
54 &self.0
55 }
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
59pub struct NoteID(u32);
60
61impl NoteID {
62 pub fn to_u32(&self) -> u32 {
63 self.0
64 }
65
66 pub fn to_usize(&self) -> usize {
67 self.to_u32() as usize
68 }
69}
70
71impl From<usize> for NoteID {
72 fn from(idx: usize) -> Self {
73 NoteID(idx as u32)
74 }
75}
76
77#[derive(Debug, PartialEq, Eq, Clone)]
78pub struct Structure {
79 elements: Arc<[ElementWithLoc]>,
80}
81
82impl Structure {
83 pub fn new(elements: Vec<ElementWithLoc>) -> Self {
84 Self {
85 elements: elements.into(),
86 }
87 }
88
89 pub fn elements(&self) -> Vec<ElementID> {
90 let mut els = Vec::with_capacity(self.elements.len());
91 for (idx, (el, _)) in self.elements.iter().enumerate() {
92 match el {
93 Element::Heading(..) => els.push(ElementID::Heading(HeadingID(idx as u32))),
94 Element::LinkRef(..) => els.push(ElementID::Ref(LinkRefID(idx as u32))),
95 Element::LinkRegular(..) => (),
96 }
97 }
98
99 els
100 }
101
102 pub fn elements_with_loc(&self) -> Vec<(ElementID, &ElementWithLoc)> {
103 let mut els = Vec::with_capacity(self.elements.len());
104 for (idx, ewl) in self.elements.iter().enumerate() {
105 match ewl.0 {
106 Element::Heading(..) => els.push((ElementID::Heading(HeadingID(idx as u32)), ewl)),
107 Element::LinkRef(..) => els.push((ElementID::Ref(LinkRefID(idx as u32)), ewl)),
108 Element::LinkRegular(..) => (),
109 }
110 }
111
112 els
113 }
114
115 pub fn elements_with_ids<'a, 'b: 'a>(
116 &'a self,
117 ids: &'b [ElementID],
118 ) -> impl Iterator<Item = &'a ElementWithLoc> {
119 ids.iter().map(move |id| &self.elements[id.to_usize()])
120 }
121
122 pub fn headings(&self) -> Vec<HeadingID> {
123 let mut headings = Vec::new();
124 for (idx, (el, _)) in self.elements.iter().enumerate() {
125 if let Element::Heading(..) = el {
126 headings.push(HeadingID(idx as u32))
127 }
128 }
129
130 headings
131 }
132
133 pub fn element_by_id(&self, id: ElementID) -> &ElementWithLoc {
134 &self.elements[id.to_usize()]
135 }
136
137 pub fn heading_by_id(&self, id: HeadingID) -> (&Heading, Range<Pos>) {
138 let el = &self.elements[id.0 as usize];
139 if let (Element::Heading(hd), span) = el {
140 (hd, span.clone())
141 } else {
142 panic!("Expected a heading at idx {:?} in {:?}", id, self.elements)
143 }
144 }
145
146 pub fn headings_with_ids(&self, ids: &[HeadingID]) -> Vec<(&Heading, Range<Pos>)> {
147 ids.iter().map(move |&id| self.heading_by_id(id)).collect()
148 }
149
150 pub fn refs(&self) -> Vec<LinkRefID> {
151 let mut refs = Vec::new();
152 for (idx, (el, _)) in self.elements.iter().enumerate() {
153 if let Element::LinkRef(..) = el {
154 refs.push(LinkRefID(idx as u32))
155 }
156 }
157
158 refs
159 }
160
161 pub fn ref_by_id(&self, id: LinkRefID) -> (&LinkRef, Range<Pos>) {
162 let el = &self.elements[id.0 as usize];
163 if let (Element::LinkRef(lr), span) = el {
164 (lr, span.clone())
165 } else {
166 panic!("Expected a ref at idx {:?} in {:?}", id, self.elements)
167 }
168 }
169
170 pub fn refs_with_ids(&self, ids: &[LinkRefID]) -> Vec<(&LinkRef, Range<Pos>)> {
171 ids.iter().map(move |&id| self.ref_by_id(id)).collect()
172 }
173}
174
175pub type ElementWithLoc = (Element, Range<Pos>);
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
178pub struct HeadingID(u32);
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
181pub struct LinkRefID(u32);
182
183#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
184pub enum ElementID {
185 Heading(HeadingID),
186 Ref(LinkRefID),
187}
188
189impl ElementID {
190 pub fn to_u32(&self) -> u32 {
191 match self {
192 ElementID::Heading(HeadingID(id)) => *id,
193 ElementID::Ref(LinkRefID(id)) => *id,
194 }
195 }
196
197 pub fn to_usize(&self) -> usize {
198 self.to_u32() as usize
199 }
200}
201
202#[derive(Debug, PartialEq, Eq, Clone)]
203pub enum Element {
204 Heading(Heading),
205 LinkRegular(LinkRegular),
206 LinkRef(LinkRef),
207}
208
209#[derive(Debug, PartialEq, Eq, Clone, Hash)]
210pub struct Heading {
211 pub level: u8,
212 pub text: String,
213 pub scope: Range<Pos>,
214}
215
216#[derive(Debug, PartialEq, Eq, Clone)]
217pub struct LinkRef {
218 pub text: String,
219 pub note_name: Option<NoteName>,
220 pub heading: Option<String>,
221}
222
223#[derive(Debug, PartialEq, Eq, Clone)]
224pub struct LinkRegular {
225 text: String,
226 dest: Option<String>,
227 title: Option<String>,
228}
229
230pub fn parse_link_ref(text: &str) -> Option<LinkRef> {
231 let ref_link_regex = Regex::new(r"^\[:([^@]*)(@(.*))?\]$").unwrap();
232
233 if let Some(captures) = ref_link_regex.captures(text) {
234 let text = text.to_string();
235 let note_name = captures
236 .get(1)
237 .map(|m| m.as_str())
238 .filter(|s| !s.is_empty())
239 .map(|s| s.into());
240 let heading = captures
241 .get(3)
242 .map(|m| m.as_str().to_string())
243 .filter(|s| !s.is_empty());
244 Some(LinkRef {
245 text,
246 note_name,
247 heading,
248 })
249 } else {
250 None
251 }
252}
253
254pub fn parse_link_regular(text: &str, dest: CowStr, title: CowStr) -> LinkRegular {
255 let text = text.to_string();
256 let dest = if dest.is_empty() {
257 None
258 } else {
259 Some(dest.to_string())
260 };
261 let title = if title.is_empty() {
262 None
263 } else {
264 Some(title.to_string())
265 };
266 LinkRegular { text, dest, title }
267}
268
269pub fn scrape(index: &impl TextMap) -> Vec<ElementWithLoc> {
270 let mut callback = |_: BrokenLink<'_>| Some(("".into(), "".into()));
271 let parser =
272 Parser::new_with_broken_link_callback(index.text(), Options::all(), Some(&mut callback));
273 let mut elements = Vec::new();
274
275 let mut scoped_headings: Vec<(u8, String, Range<usize>)> = Vec::new();
276
277 for (event, el_span) in parser.into_offset_iter() {
278 match event {
279 Event::Start(Tag::Heading(level)) => {
280 let heading_text = &index.text()[el_span.start..el_span.end];
281
282 let trim_right_text = heading_text.trim_end().to_string();
284 let trimmed_on_right = heading_text.len() - trim_right_text.len();
285 let heading_span = el_span.start..(el_span.end - trimmed_on_right);
286
287 while let Some(last) = scoped_headings.last() {
288 if last.0 >= level as u8 {
289 let last = scoped_headings.pop().unwrap();
290 let heading = Heading {
291 level: last.0,
292 text: last.1,
293 scope: index
294 .offset_range_to_range(last.2.start..el_span.start)
295 .unwrap(),
296 };
297 elements.push((
298 Element::Heading(heading),
299 index.offset_range_to_range(last.2).unwrap(),
300 ));
301 } else {
302 break;
303 }
304 }
305
306 scoped_headings.push((level as u8, trim_right_text, heading_span));
307 }
308 Event::Start(Tag::Link(typ, dest, title)) => match typ {
309 LinkType::Inline
310 | LinkType::Reference
311 | LinkType::ReferenceUnknown
312 | LinkType::Collapsed
313 | LinkType::CollapsedUnknown
314 | LinkType::Shortcut
315 | LinkType::ShortcutUnknown => {
316 let link_text = &index.text()[el_span.start..el_span.end].trim();
317 let link = parse_link_ref(link_text)
318 .map(Element::LinkRef)
319 .unwrap_or_else(|| {
320 Element::LinkRegular(parse_link_regular(link_text, dest, title))
321 });
322 elements.push((link, index.offset_range_to_range(el_span).unwrap()));
323 }
324 _ => (),
325 },
326 _ => (),
327 }
328 }
329
330 for remaining in scoped_headings {
331 let heading = Heading {
332 level: remaining.0,
333 text: remaining.1,
334 scope: index
335 .offset_range_to_range(remaining.2.start..index.text().len())
336 .unwrap(),
337 };
338 elements.push((
339 Element::Heading(heading),
340 index.offset_range_to_range(remaining.2).unwrap(),
341 ));
342 }
343
344 elements.sort_by_key(|(_, span)| span.start);
345
346 elements
347}
348
349#[cfg(test)]
350mod test {
351 use anyhow::Result;
352 use lsp_document::IndexedText;
353
354 use super::*;
355 use pretty_assertions::assert_eq;
356 use std::{fs, io, path::PathBuf};
357
358 fn read_resource(name: &str) -> io::Result<String> {
359 let mut root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
360 root.push("tests");
361 root.push("resources");
362 root.push(name);
363
364 fs::read_to_string(&root)
365 }
366
367 #[test]
368 fn scrape_note() -> Result<()> {
369 let text = IndexedText::new(read_resource("example1.md")?);
370 let elements = scrape(&text);
371 insta::assert_debug_snapshot!(elements);
372 Ok(())
373 }
374
375 #[test]
376 fn scrape_eof() {
377 let elements = scrape(&IndexedText::new("#"));
378 assert_eq!(
379 elements,
380 vec![(
381 Element::Heading(Heading {
382 level: 1,
383 text: "#".to_string(),
384 scope: Pos::new(0, 0)..Pos::new(0, 1)
385 }),
386 Pos::new(0, 0)..Pos::new(0, 1)
387 )]
388 );
389 }
390}