Skip to main content

cairo_lang_doc/
parser.rs

1use std::fmt;
2use std::ops::Range;
3
4use cairo_lang_debug::DebugWithDb;
5use cairo_lang_filesystem::span::{TextOffset, TextSpan, TextWidth};
6use itertools::Itertools;
7use pulldown_cmark::{
8    Alignment, BrokenLink, CodeBlockKind, Event, HeadingLevel, LinkType, Options,
9    Parser as MarkdownParser, Tag, TagEnd,
10};
11
12use crate::db::DocGroup;
13
14#[derive(PartialEq, Eq, Hash, Debug, Clone)]
15pub struct MarkdownLink {
16    /// The span of the whole link, including the label, the destination URL and the delimiters.
17    pub link_span: TextSpan,
18    /// Where the link leads to. Not present when the label could not be resolved.
19    pub dest_span: Option<TextSpan>,
20    /// The underlying content of `dest_span`, if present.
21    pub dest_text: Option<String>,
22}
23
24/// Token representing a link to another item inside the documentation.
25#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
26pub struct CommentLinkToken {
27    /// A link part that's inside "[]" brackets.
28    pub label: String,
29    /// A link part that's inside "()" brackets, right after the label.
30    pub path: Option<String>,
31    /// The link.
32    pub md_link: MarkdownLink,
33}
34
35/// Generic type for a comment token. It's either plain content or a link.
36/// Notice that the Content token type can store much more than just one word.
37#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
38pub enum DocumentationCommentToken {
39    /// Token with plain documentation content.
40    Content(String),
41    /// Link token.
42    Link(CommentLinkToken),
43}
44
45impl DocumentationCommentToken {
46    /// Checks if string representation of [`DocumentationCommentToken`] ends with newline.
47    pub fn ends_with_newline(self) -> bool {
48        match self {
49            DocumentationCommentToken::Content(content) => content.ends_with('\n'),
50            DocumentationCommentToken::Link(link_token) => link_token.label.ends_with('\n'),
51        }
52    }
53}
54
55/// Helper struct for formatting possibly nested Markdown lists.
56struct DocCommentListItem {
57    /// Ordered list item separator.
58    delimiter: Option<u64>,
59    /// Flag for an ordered list.
60    is_ordered_list: bool,
61}
62
63struct PendingLink {
64    label: String,
65    path: Option<String>,
66    link_start: usize,
67    link_type: LinkType,
68    destination: String,
69    label_range: Option<Range<usize>>,
70}
71
72/// Parses documentation comment content into a vector of [DocumentationCommentToken]s, keeping
73/// the order in which they were present in the content.
74///
75/// We look for 3 link patterns (ignore the backslash):
76/// "\[label\](path)", "\[path\]" or "\[`path`\]".
77pub fn parse_documentation_comment(documentation_comment: &str) -> Vec<DocumentationCommentToken> {
78    let mut tokens = Vec::new();
79    let mut current_link: Option<PendingLink> = None;
80    let mut is_indented_code_block = false;
81    let mut replacer = |broken_link: BrokenLink<'_>| {
82        if matches!(broken_link.link_type, LinkType::ShortcutUnknown | LinkType::Shortcut) {
83            return Some((broken_link.reference.to_string().into(), "".into()));
84        }
85        None
86    };
87
88    let mut options = Options::empty();
89    options.insert(Options::ENABLE_TABLES);
90    let parser = MarkdownParser::new_with_broken_link_callback(
91        documentation_comment,
92        options,
93        Some(&mut replacer),
94    );
95
96    let mut list_nesting: Vec<DocCommentListItem> = Vec::new();
97    let write_list_item_prefix =
98        |list_nesting: &mut Vec<DocCommentListItem>,
99         tokens: &mut Vec<DocumentationCommentToken>| {
100            if !list_nesting.is_empty() {
101                let indent = "  ".repeat(list_nesting.len() - 1);
102                let list_nesting = list_nesting.last_mut().unwrap();
103
104                let item_delimiter = if list_nesting.is_ordered_list {
105                    let delimiter = list_nesting.delimiter.unwrap_or(0);
106                    list_nesting.delimiter = Some(delimiter + 1);
107                    format!("{indent}{delimiter}.",)
108                } else {
109                    format!("{indent}-")
110                };
111                tokens
112                    .push(DocumentationCommentToken::Content(format!("{indent}{item_delimiter} ")));
113            }
114        };
115    let mut prefix_list_item = false;
116    let mut last_two_events = [None, None];
117    let mut table_alignment: Vec<Alignment> = Vec::new();
118
119    for (event, range) in parser.into_offset_iter() {
120        match &event {
121            Event::Text(text) => {
122                if prefix_list_item {
123                    write_list_item_prefix(&mut list_nesting, &mut tokens);
124                    prefix_list_item = false;
125                }
126                if let Some(link) = current_link.as_mut() {
127                    link.label.push_str(text.as_ref());
128                    link.label_range = Some(range.clone());
129                } else {
130                    let text = {
131                        if is_indented_code_block {
132                            format!("    {text}")
133                        } else {
134                            text.to_string()
135                        }
136                    };
137                    tokens.push(DocumentationCommentToken::Content(text));
138                }
139            }
140            Event::Code(code) => {
141                if prefix_list_item {
142                    write_list_item_prefix(&mut list_nesting, &mut tokens);
143                    prefix_list_item = false;
144                }
145                let complete_code = format!("`{code}`");
146                if let Some(link) = current_link.as_mut() {
147                    link.label.push_str(&complete_code);
148                    link.label_range = Some(range.clone());
149                } else {
150                    tokens.push(DocumentationCommentToken::Content(complete_code));
151                }
152            }
153            Event::Start(tag_start) => match tag_start {
154                Tag::Heading { level, .. } => {
155                    if let Some(last_token) = tokens.last_mut()
156                        && !last_token.clone().ends_with_newline()
157                    {
158                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
159                    }
160                    tokens.push(DocumentationCommentToken::Content(format!(
161                        "{} ",
162                        heading_level_to_markdown(*level)
163                    )));
164                }
165                Tag::List(list_type) => {
166                    if !list_nesting.is_empty() {
167                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
168                    }
169                    list_nesting.push(DocCommentListItem {
170                        delimiter: *list_type,
171                        is_ordered_list: list_type.is_some(),
172                    });
173                }
174                Tag::CodeBlock(kind) => match kind {
175                    CodeBlockKind::Fenced(language) => {
176                        if language.trim().is_empty() {
177                            tokens.push(DocumentationCommentToken::Content(String::from(
178                                "```cairo\n",
179                            )));
180                        } else {
181                            tokens.push(DocumentationCommentToken::Content(format!(
182                                "```{language}\n"
183                            )));
184                        }
185                    }
186                    CodeBlockKind::Indented => {
187                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
188                        is_indented_code_block = true;
189                    }
190                },
191                Tag::Link { link_type, dest_url, .. } => {
192                    let path = match *link_type {
193                        LinkType::ShortcutUnknown | LinkType::Shortcut => None,
194                        _ => Some(dest_url.clone().into_string()),
195                    };
196                    current_link = Some(PendingLink {
197                        label: String::new(),
198                        path,
199                        link_start: range.start,
200                        link_type: *link_type,
201                        destination: dest_url.clone().into_string(),
202                        label_range: None,
203                    });
204                }
205                Tag::Paragraph | Tag::TableRow => {
206                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
207                }
208                Tag::Item => {
209                    prefix_list_item = true;
210                }
211                Tag::Table(alignment) => {
212                    table_alignment = alignment.clone();
213                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
214                }
215                Tag::TableCell => {
216                    tokens.push(DocumentationCommentToken::Content("|".to_string()));
217                }
218                Tag::Strong => {
219                    tokens.push(DocumentationCommentToken::Content("**".to_string()));
220                }
221                Tag::Emphasis => {
222                    tokens.push(DocumentationCommentToken::Content("_".to_string()));
223                }
224                _ => {}
225            },
226            Event::End(tag_end) => match tag_end {
227                TagEnd::Heading(_) | TagEnd::Table => {
228                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
229                }
230                TagEnd::List(_) => {
231                    list_nesting.pop();
232                }
233                TagEnd::Item
234                    if !matches!(last_two_events[0], Some(Event::End(_)))
235                        | !matches!(last_two_events[1], Some(Event::End(_))) =>
236                {
237                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
238                }
239                TagEnd::TableHead => {
240                    tokens.push(DocumentationCommentToken::Content(format!(
241                        "|\n|{}|",
242                        table_alignment
243                            .iter()
244                            .map(|a| {
245                                let (left, right) = get_alignment_markers(a);
246                                format!("{left}---{right}")
247                            })
248                            .join("|")
249                    )));
250                    table_alignment.clear();
251                }
252                TagEnd::CodeBlock => {
253                    if !is_indented_code_block {
254                        tokens.push(DocumentationCommentToken::Content("```\n".to_string()));
255                    }
256                    is_indented_code_block = false;
257                }
258                TagEnd::Link => {
259                    if let Some(link) = current_link {
260                        let link_span = span_from_relative_range(
261                            documentation_comment,
262                            link.link_start..range.end,
263                        );
264                        let (dest_span, dest_text) = link
265                            .label_range
266                            .as_ref()
267                            .and_then(|label_range| {
268                                location_from_link_fields(
269                                    link.link_type,
270                                    &link.destination,
271                                    label_range,
272                                )
273                            })
274                            .map(|(dest_range, dest_text)| {
275                                (
276                                    Some(span_from_relative_range(
277                                        documentation_comment,
278                                        dest_range,
279                                    )),
280                                    Some(dest_text),
281                                )
282                            })
283                            .unwrap_or((None, None));
284                        let md_link = MarkdownLink { link_span, dest_span, dest_text };
285                        tokens.push(DocumentationCommentToken::Link(CommentLinkToken {
286                            label: link.label,
287                            path: link.path,
288                            md_link,
289                        }));
290                    }
291                    current_link = None;
292                }
293                TagEnd::TableRow => {
294                    tokens.push(DocumentationCommentToken::Content("|".to_string()));
295                }
296                TagEnd::Strong => {
297                    tokens.push(DocumentationCommentToken::Content("**".to_string()));
298                }
299                TagEnd::Emphasis => {
300                    tokens.push(DocumentationCommentToken::Content("_".to_string()));
301                }
302                TagEnd::Paragraph => {
303                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
304                }
305                _ => {}
306            },
307            Event::SoftBreak => {
308                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
309            }
310            Event::Rule => {
311                tokens.push(DocumentationCommentToken::Content("___\n".to_string()));
312            }
313            _ => {}
314        }
315        last_two_events = [last_two_events[1].clone(), Some(event)];
316    }
317
318    if let Some(DocumentationCommentToken::Content(token)) = tokens.first()
319        && token == "\n"
320    {
321        tokens.remove(0);
322    }
323    if let Some(DocumentationCommentToken::Content(token)) = tokens.last_mut() {
324        *token = token.trim_end().to_string();
325        if token.is_empty() {
326            tokens.pop();
327        }
328    }
329
330    tokens
331}
332
333impl fmt::Display for CommentLinkToken {
334    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
335        match self.path.clone() {
336            Some(path) => write!(f, "[{}]({})", self.label, path),
337            None => write!(f, "[{}]", self.label),
338        }
339    }
340}
341
342impl fmt::Display for DocumentationCommentToken {
343    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
344        match self {
345            DocumentationCommentToken::Content(content) => {
346                write!(f, "{content}")
347            }
348            DocumentationCommentToken::Link(link_token) => {
349                write!(f, "{link_token}")
350            }
351        }
352    }
353}
354
355impl<'db> DebugWithDb<'db> for CommentLinkToken {
356    type Db = dyn DocGroup;
357    fn fmt(&self, f: &mut fmt::Formatter<'_>, _db: &Self::Db) -> fmt::Result {
358        f.debug_struct("CommentLinkToken")
359            .field("label", &self.label)
360            .field("path", &self.path)
361            .field("md_link", &self.md_link)
362            .finish()
363    }
364}
365
366/// Converts a byte range within the string into a `TextSpan` relative to the string start.
367fn span_from_relative_range(content: &str, range: Range<usize>) -> TextSpan {
368    let start = TextOffset::START.add_width(TextWidth::at(content, range.start));
369    let end = TextOffset::START.add_width(TextWidth::at(content, range.end));
370    TextSpan::new(start, end)
371}
372
373/// Extracts a location link span and normalized destination text for the given link fields.
374fn location_from_link_fields(
375    link_type: LinkType,
376    destination: &str,
377    label_range: &Range<usize>,
378) -> Option<(Range<usize>, String)> {
379    let (destination_normalized, backticked) = normalize_location_text(destination)?;
380
381    match link_type {
382        LinkType::Inline => {
383            let range = find_inline_destination_range(label_range.end, destination);
384            Some((range, destination_normalized))
385        }
386        LinkType::Collapsed
387        | LinkType::CollapsedUnknown
388        | LinkType::Shortcut
389        | LinkType::ShortcutUnknown => Some((label_range.clone(), destination_normalized)),
390        _ => None,
391    }
392    .map(|(range, text)| (trim_backtick_range(range.clone(), backticked), text))
393}
394
395/// Returns true when the string looks like a location path (letters, digits, '_' or ':').
396fn is_location_string(value: &str) -> bool {
397    !value.is_empty() && value.chars().all(|c| c.is_ascii_alphanumeric() || c == '_' || c == ':')
398}
399
400/// Normalizes the link destination and reports whether it was backticked.
401fn normalize_location_text(value: &str) -> Option<(String, bool)> {
402    let (value, backticked) = strip_backticks(value);
403    is_location_string(value).then(|| (value.to_string(), backticked))
404}
405
406/// Strips backticks around a string if present and reports whether a pair was removed.
407fn strip_backticks(value: &str) -> (&str, bool) {
408    let value = value.trim();
409    if let Some(stripped) = value.strip_prefix('`').and_then(|rest| rest.strip_suffix('`')) {
410        (stripped, true)
411    } else {
412        (value, false)
413    }
414}
415
416/// Trims the range by one on each end when a backticked span is expected.
417fn trim_backtick_range(range: Range<usize>, backticked: bool) -> Range<usize> {
418    if backticked { (range.start + 1)..(range.end - 1) } else { range }
419}
420
421/// Computes the range for an inline destination that follows a label.
422fn find_inline_destination_range(label_last_end: usize, destination: &str) -> Range<usize> {
423    let destination_start = label_last_end + 2;
424    let destination_end = destination_start + destination.len();
425    destination_start..destination_end
426}
427
428/// Maps `HeadingLevel` to the correct markdown marker.
429fn heading_level_to_markdown(heading_level: HeadingLevel) -> String {
430    let heading_char: String = String::from("#");
431    match heading_level {
432        HeadingLevel::H1 => heading_char,
433        HeadingLevel::H2 => heading_char.repeat(2),
434        HeadingLevel::H3 => heading_char.repeat(3),
435        HeadingLevel::H4 => heading_char.repeat(4),
436        HeadingLevel::H5 => heading_char.repeat(5),
437        HeadingLevel::H6 => heading_char.repeat(6),
438    }
439}
440
441/// Maps [`Alignment`] to the correct markdown markers.
442fn get_alignment_markers(alignment: &Alignment) -> (String, String) {
443    let (left, right) = match alignment {
444        Alignment::None => ("", ""),
445        Alignment::Left => (":", ""),
446        Alignment::Right => ("", ":"),
447        Alignment::Center => (":", ":"),
448    };
449    (left.to_string(), right.to_string())
450}