Skip to main content

cairo_lang_doc/
parser.rs

1use std::fmt;
2use std::ops::Range;
3
4use cairo_lang_debug::DebugWithDb;
5use cairo_lang_filesystem::span::{TextOffset, TextSpan, TextWidth};
6use itertools::Itertools;
7use pulldown_cmark::{
8    Alignment, BrokenLink, CodeBlockKind, Event, HeadingLevel, LinkType, Options,
9    Parser as MarkdownParser, Tag, TagEnd,
10};
11
12use crate::db::DocGroup;
13
14#[derive(PartialEq, Eq, Hash, Debug, Clone)]
15pub struct MarkdownLink {
16    /// The span of the whole link, including the label, the destination URL and the delimiters.
17    pub link_span: TextSpan,
18    /// Where the link leads to. Not present when the label could not be resolved.
19    pub dest_span: Option<TextSpan>,
20    /// The underlying content of `dest_span`, if present.
21    pub dest_text: Option<String>,
22}
23
24/// Token representing a link to another item inside the documentation.
25#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
26pub struct CommentLinkToken {
27    /// A link part that's inside "[]" brackets.
28    pub label: String,
29    /// A link part that's inside "()" brackets, right after the label.
30    pub path: Option<String>,
31    /// The link.
32    pub md_link: MarkdownLink,
33}
34
35/// Generic type for a comment token. It's either plain content or a link.
36/// Notice that the Content token type can store much more than just one word.
37#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
38pub enum DocumentationCommentToken {
39    /// Token with plain documentation content.
40    Content(String),
41    /// Link token.
42    Link(CommentLinkToken),
43}
44
45impl DocumentationCommentToken {
46    /// Checks if string representation of [`DocumentationCommentToken`] ends with newline.
47    pub fn ends_with_newline(self) -> bool {
48        match self {
49            DocumentationCommentToken::Content(content) => content.ends_with('\n'),
50            DocumentationCommentToken::Link(link_token) => link_token.label.ends_with('\n'),
51        }
52    }
53}
54
55/// Helper struct for formatting possibly nested Markdown lists.
56struct DocCommentListItem {
57    /// Ordered list item separator
58    delimiter: Option<u64>,
59    /// Flag for an ordered list
60    is_ordered_list: bool,
61}
62
63struct PendingLink {
64    label: String,
65    path: Option<String>,
66    link_start: usize,
67    link_type: LinkType,
68    destination: String,
69    label_range: Option<Range<usize>>,
70}
71
72/// Parses documentation comment content into a vector of [DocumentationCommentToken]s, keeping
73/// the order in which they were present in the content.
74///
75/// We look for 3 link patterns (ignore the backslash):
76/// "\[label\](path)", "\[path\]" or "\[`path`\]".
77pub fn parse_documentation_comment(documentation_comment: &str) -> Vec<DocumentationCommentToken> {
78    let mut tokens = Vec::new();
79    let mut current_link: Option<PendingLink> = None;
80    let mut is_indented_code_block = false;
81    let mut replacer = |broken_link: BrokenLink<'_>| {
82        if matches!(broken_link.link_type, LinkType::ShortcutUnknown | LinkType::Shortcut) {
83            return Some((broken_link.reference.to_string().into(), "".into()));
84        }
85        None
86    };
87
88    let mut options = Options::empty();
89    options.insert(Options::ENABLE_TABLES);
90    let parser = MarkdownParser::new_with_broken_link_callback(
91        documentation_comment,
92        options,
93        Some(&mut replacer),
94    );
95
96    let mut list_nesting: Vec<DocCommentListItem> = Vec::new();
97    let write_list_item_prefix =
98        |list_nesting: &mut Vec<DocCommentListItem>,
99         tokens: &mut Vec<DocumentationCommentToken>| {
100            if !list_nesting.is_empty() {
101                let indent = "  ".repeat(list_nesting.len() - 1);
102                let list_nesting = list_nesting.last_mut().unwrap();
103
104                let item_delimiter = if list_nesting.is_ordered_list {
105                    let delimiter = list_nesting.delimiter.unwrap_or(0);
106                    list_nesting.delimiter = Some(delimiter + 1);
107                    format!("{indent}{delimiter}.",)
108                } else {
109                    format!("{indent}-")
110                };
111                tokens
112                    .push(DocumentationCommentToken::Content(format!("{indent}{item_delimiter} ")));
113            }
114        };
115    let mut prefix_list_item = false;
116    let mut last_two_events = [None, None];
117    let mut table_alignment: Vec<Alignment> = Vec::new();
118
119    for (event, range) in parser.into_offset_iter() {
120        match &event {
121            Event::Text(text) => {
122                if prefix_list_item {
123                    write_list_item_prefix(&mut list_nesting, &mut tokens);
124                    prefix_list_item = false;
125                }
126                if let Some(link) = current_link.as_mut() {
127                    link.label.push_str(text.as_ref());
128                    link.label_range = Some(range.clone());
129                } else {
130                    let text = {
131                        if is_indented_code_block {
132                            format!("    {text}")
133                        } else {
134                            text.to_string()
135                        }
136                    };
137                    tokens.push(DocumentationCommentToken::Content(text));
138                }
139            }
140            Event::Code(code) => {
141                if prefix_list_item {
142                    write_list_item_prefix(&mut list_nesting, &mut tokens);
143                    prefix_list_item = false;
144                }
145                let complete_code = format!("`{code}`");
146                if let Some(link) = current_link.as_mut() {
147                    link.label.push_str(&complete_code);
148                    link.label_range = Some(range.clone());
149                } else {
150                    tokens.push(DocumentationCommentToken::Content(complete_code));
151                }
152            }
153            Event::Start(tag_start) => match tag_start {
154                Tag::Heading { level, .. } => {
155                    if let Some(last_token) = tokens.last_mut()
156                        && !last_token.clone().ends_with_newline()
157                    {
158                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
159                    }
160                    tokens.push(DocumentationCommentToken::Content(format!(
161                        "{} ",
162                        heading_level_to_markdown(*level)
163                    )));
164                }
165                Tag::List(list_type) => {
166                    if !list_nesting.is_empty() {
167                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
168                    }
169                    list_nesting.push(DocCommentListItem {
170                        delimiter: *list_type,
171                        is_ordered_list: list_type.is_some(),
172                    });
173                }
174                Tag::CodeBlock(kind) => match kind {
175                    CodeBlockKind::Fenced(language) => {
176                        if language.trim().is_empty() {
177                            tokens.push(DocumentationCommentToken::Content(String::from(
178                                "```cairo\n",
179                            )));
180                        } else {
181                            tokens.push(DocumentationCommentToken::Content(format!(
182                                "```{language}\n"
183                            )));
184                        }
185                    }
186                    CodeBlockKind::Indented => {
187                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
188                        is_indented_code_block = true;
189                    }
190                },
191                Tag::Link { link_type, dest_url, .. } => {
192                    let path = match *link_type {
193                        LinkType::ShortcutUnknown | LinkType::Shortcut => None,
194                        _ => Some(dest_url.clone().into_string()),
195                    };
196                    current_link = Some(PendingLink {
197                        label: String::new(),
198                        path,
199                        link_start: range.start,
200                        link_type: *link_type,
201                        destination: dest_url.clone().into_string(),
202                        label_range: None,
203                    });
204                }
205                Tag::Paragraph | Tag::TableRow => {
206                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
207                }
208                Tag::Item => {
209                    prefix_list_item = true;
210                }
211                Tag::Table(alignment) => {
212                    table_alignment = alignment.clone();
213                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
214                }
215                Tag::TableCell => {
216                    tokens.push(DocumentationCommentToken::Content("|".to_string()));
217                }
218                Tag::Strong => {
219                    tokens.push(DocumentationCommentToken::Content("**".to_string()));
220                }
221                Tag::Emphasis => {
222                    tokens.push(DocumentationCommentToken::Content("_".to_string()));
223                }
224                _ => {}
225            },
226            Event::End(tag_end) => match tag_end {
227                TagEnd::Heading(_) | TagEnd::Table => {
228                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
229                }
230                TagEnd::List(_) => {
231                    list_nesting.pop();
232                }
233                TagEnd::Item => {
234                    if !matches!(last_two_events[0], Some(Event::End(_)))
235                        | !matches!(last_two_events[1], Some(Event::End(_)))
236                    {
237                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
238                    }
239                }
240                TagEnd::TableHead => {
241                    tokens.push(DocumentationCommentToken::Content(format!(
242                        "|\n|{}|",
243                        table_alignment
244                            .iter()
245                            .map(|a| {
246                                let (left, right) = get_alignment_markers(a);
247                                format!("{left}---{right}")
248                            })
249                            .join("|")
250                    )));
251                    table_alignment.clear();
252                }
253                TagEnd::CodeBlock => {
254                    if !is_indented_code_block {
255                        tokens.push(DocumentationCommentToken::Content("```\n".to_string()));
256                    }
257                    is_indented_code_block = false;
258                }
259                TagEnd::Link => {
260                    if let Some(link) = current_link {
261                        let link_span = span_from_relative_range(
262                            documentation_comment,
263                            link.link_start..range.end,
264                        );
265                        let (dest_span, dest_text) = link
266                            .label_range
267                            .as_ref()
268                            .and_then(|label_range| {
269                                location_from_link_fields(
270                                    link.link_type,
271                                    &link.destination,
272                                    label_range,
273                                )
274                            })
275                            .map(|(dest_range, dest_text)| {
276                                (
277                                    Some(span_from_relative_range(
278                                        documentation_comment,
279                                        dest_range,
280                                    )),
281                                    Some(dest_text),
282                                )
283                            })
284                            .unwrap_or((None, None));
285                        let md_link = MarkdownLink { link_span, dest_span, dest_text };
286                        tokens.push(DocumentationCommentToken::Link(CommentLinkToken {
287                            label: link.label,
288                            path: link.path,
289                            md_link,
290                        }));
291                    }
292                    current_link = None;
293                }
294                TagEnd::TableRow => {
295                    tokens.push(DocumentationCommentToken::Content("|".to_string()));
296                }
297                TagEnd::Strong => {
298                    tokens.push(DocumentationCommentToken::Content("**".to_string()));
299                }
300                TagEnd::Emphasis => {
301                    tokens.push(DocumentationCommentToken::Content("_".to_string()));
302                }
303                TagEnd::Paragraph => {
304                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
305                }
306                _ => {}
307            },
308            Event::SoftBreak => {
309                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
310            }
311            Event::Rule => {
312                tokens.push(DocumentationCommentToken::Content("___\n".to_string()));
313            }
314            _ => {}
315        }
316        last_two_events = [last_two_events[1].clone(), Some(event)];
317    }
318
319    if let Some(DocumentationCommentToken::Content(token)) = tokens.first()
320        && token == "\n"
321    {
322        tokens.remove(0);
323    }
324    if let Some(DocumentationCommentToken::Content(token)) = tokens.last_mut() {
325        *token = token.trim_end().to_string();
326        if token.is_empty() {
327            tokens.pop();
328        }
329    }
330
331    tokens
332}
333
334impl fmt::Display for CommentLinkToken {
335    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336        match self.path.clone() {
337            Some(path) => write!(f, "[{}]({})", self.label, path),
338            None => write!(f, "[{}]", self.label),
339        }
340    }
341}
342
343impl fmt::Display for DocumentationCommentToken {
344    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345        match self {
346            DocumentationCommentToken::Content(content) => {
347                write!(f, "{content}")
348            }
349            DocumentationCommentToken::Link(link_token) => {
350                write!(f, "{link_token}")
351            }
352        }
353    }
354}
355
356impl<'db> DebugWithDb<'db> for CommentLinkToken {
357    type Db = dyn DocGroup;
358    fn fmt(&self, f: &mut fmt::Formatter<'_>, _db: &Self::Db) -> fmt::Result {
359        f.debug_struct("CommentLinkToken")
360            .field("label", &self.label)
361            .field("path", &self.path)
362            .field("md_link", &self.md_link)
363            .finish()
364    }
365}
366
367/// Converts a byte range within the string into a `TextSpan` relative to the string start.
368fn span_from_relative_range(content: &str, range: Range<usize>) -> TextSpan {
369    let start = TextOffset::START.add_width(TextWidth::at(content, range.start));
370    let end = TextOffset::START.add_width(TextWidth::at(content, range.end));
371    TextSpan::new(start, end)
372}
373
374/// Extracts a location link span and normalized destination text for the given link fields.
375fn location_from_link_fields(
376    link_type: LinkType,
377    destination: &str,
378    label_range: &Range<usize>,
379) -> Option<(Range<usize>, String)> {
380    let (destination_normalized, backticked) = normalize_location_text(destination)?;
381
382    match link_type {
383        LinkType::Inline => {
384            let range = find_inline_destination_range(label_range.end, destination);
385            Some((range, destination_normalized))
386        }
387        LinkType::Collapsed
388        | LinkType::CollapsedUnknown
389        | LinkType::Shortcut
390        | LinkType::ShortcutUnknown => Some((label_range.clone(), destination_normalized)),
391        _ => None,
392    }
393    .map(|(range, text)| (trim_backtick_range(range.clone(), backticked), text))
394}
395
396/// Returns true when the string looks like a location path (letters, digits, '_' or ':').
397fn is_location_string(value: &str) -> bool {
398    !value.is_empty() && value.chars().all(|c| c.is_ascii_alphanumeric() || c == '_' || c == ':')
399}
400
401/// Normalizes the link destination and reports whether it was backticked.
402fn normalize_location_text(value: &str) -> Option<(String, bool)> {
403    let (value, backticked) = strip_backticks(value);
404    is_location_string(value).then(|| (value.to_string(), backticked))
405}
406
407/// Strips backticks around a string if present and reports whether a pair was removed.
408fn strip_backticks(value: &str) -> (&str, bool) {
409    let value = value.trim();
410    if let Some(stripped) = value.strip_prefix('`').and_then(|rest| rest.strip_suffix('`')) {
411        (stripped, true)
412    } else {
413        (value, false)
414    }
415}
416
417/// Trims the range by one on each end when a backticked span is expected.
418fn trim_backtick_range(range: Range<usize>, backticked: bool) -> Range<usize> {
419    if backticked { (range.start + 1)..(range.end - 1) } else { range }
420}
421
422/// Computes the range for an inline destination that follows a label.
423fn find_inline_destination_range(label_last_end: usize, destination: &str) -> Range<usize> {
424    let destination_start = label_last_end + 2;
425    let destination_end = destination_start + destination.len();
426    destination_start..destination_end
427}
428
429/// Maps `HeadingLevel` to the correct markdown marker.
430fn heading_level_to_markdown(heading_level: HeadingLevel) -> String {
431    let heading_char: String = String::from("#");
432    match heading_level {
433        HeadingLevel::H1 => heading_char,
434        HeadingLevel::H2 => heading_char.repeat(2),
435        HeadingLevel::H3 => heading_char.repeat(3),
436        HeadingLevel::H4 => heading_char.repeat(4),
437        HeadingLevel::H5 => heading_char.repeat(5),
438        HeadingLevel::H6 => heading_char.repeat(6),
439    }
440}
441
442/// Maps [`Alignment`] to the correct markdown markers.
443fn get_alignment_markers(alignment: &Alignment) -> (String, String) {
444    let (left, right) = match alignment {
445        Alignment::None => ("", ""),
446        Alignment::Left => (":", ""),
447        Alignment::Right => ("", ":"),
448        Alignment::Center => (":", ":"),
449    };
450    (left.to_string(), right.to_string())
451}