cairo_lang_doc/
parser.rs

1use std::fmt;
2
3use cairo_lang_debug::DebugWithDb;
4use cairo_lang_defs::ids::{GenericTypeId, LookupItemId, ModuleId, ModuleItemId, TraitItemId};
5use cairo_lang_diagnostics::DiagnosticsBuilder;
6use cairo_lang_filesystem::db::FilesGroup;
7use cairo_lang_filesystem::ids::{FileKind, FileLongId, SmolStrId, VirtualFile};
8use cairo_lang_parser::parser::Parser;
9use cairo_lang_semantic::diagnostic::{NotFoundItemType, SemanticDiagnostics};
10use cairo_lang_semantic::expr::inference::InferenceId;
11use cairo_lang_semantic::items::functions::GenericFunctionId;
12use cairo_lang_semantic::lsp_helpers::LspHelpers;
13use cairo_lang_semantic::resolve::{AsSegments, ResolutionContext, ResolvedGenericItem, Resolver};
14use cairo_lang_syntax::node::ast::{Expr, ExprPath};
15use cairo_lang_utils::Intern;
16use itertools::Itertools;
17use pulldown_cmark::{
18    Alignment, BrokenLink, CodeBlockKind, Event, HeadingLevel, LinkType, Options,
19    Parser as MarkdownParser, Tag, TagEnd,
20};
21use salsa::Database;
22
23use crate::db::DocGroup;
24use crate::documentable_item::DocumentableItemId;
25
26/// Token representing a link to another item inside the documentation.
27#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
28pub struct CommentLinkToken<'db> {
29    /// A link part that's inside "[]" brackets.
30    pub label: String,
31    /// A link part that's inside "()" brackets, right after the label.
32    pub path: Option<String>,
33    /// Item resolved based on the path provided by user. If resolver cannot resolve the item, we
34    /// leave it as None.
35    pub resolved_item: Option<DocumentableItemId<'db>>,
36}
37
38/// Generic type for a comment token. It's either plain content or a link.
39/// Notice that the Content token type can store much more than just one word.
40#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
41pub enum DocumentationCommentToken<'db> {
42    /// Token with plain documentation content.
43    Content(String),
44    /// Link token.
45    Link(CommentLinkToken<'db>),
46}
47
48impl DocumentationCommentToken<'_> {
49    /// Checks if string representation of [`DocumentationCommentToken`] ends with newline.
50    pub fn ends_with_newline(self) -> bool {
51        match self {
52            DocumentationCommentToken::Content(content) => content.ends_with('\n'),
53            DocumentationCommentToken::Link(link_token) => link_token.label.ends_with('\n'),
54        }
55    }
56}
57
58/// Helper struct for formatting possibly nested Markdown lists.
59struct DocCommentListItem {
60    /// Ordered list item separator
61    delimiter: Option<u64>,
62    /// Flag for an ordered list
63    is_ordered_list: bool,
64}
65
66/// Parses plain documentation comments into [DocumentationCommentToken]s.
67pub struct DocumentationCommentParser<'db> {
68    db: &'db dyn Database,
69}
70
71impl<'db> DocumentationCommentParser<'db> {
72    pub fn new(db: &'db dyn Database) -> Self {
73        Self { db }
74    }
75
76    /// Parses documentation comment content into a vector of [DocumentationCommentToken]s, keeping
77    /// the order in which they were present in the content.
78    ///
79    /// We look for 3 link patterns (ignore the backslash):
80    /// "\[label\](path)", "\[path\]" or "\[`path`\]".
81    pub fn parse_documentation_comment(
82        &self,
83        item_id: DocumentableItemId<'db>,
84        documentation_comment: String,
85    ) -> Vec<DocumentationCommentToken<'db>> {
86        let mut tokens = Vec::new();
87        let mut current_link: Option<CommentLinkToken<'db>> = None;
88        let mut is_indented_code_block = false;
89        let mut replacer = |broken_link: BrokenLink<'_>| {
90            if matches!(broken_link.link_type, LinkType::ShortcutUnknown | LinkType::Shortcut) {
91                return Some((broken_link.reference.to_string().into(), "".into()));
92            }
93            None
94        };
95
96        let mut options = Options::empty();
97        options.insert(Options::ENABLE_TABLES);
98        let parser = MarkdownParser::new_with_broken_link_callback(
99            &documentation_comment,
100            options,
101            Some(&mut replacer),
102        );
103
104        let mut list_nesting: Vec<DocCommentListItem> = Vec::new();
105        let write_list_item_prefix =
106            |list_nesting: &mut Vec<DocCommentListItem>,
107             tokens: &mut Vec<DocumentationCommentToken<'db>>| {
108                if !list_nesting.is_empty() {
109                    let indent = "  ".repeat(list_nesting.len() - 1);
110                    let list_nesting = list_nesting.last_mut().unwrap();
111
112                    let item_delimiter = if list_nesting.is_ordered_list {
113                        let delimiter = list_nesting.delimiter.unwrap_or(0);
114                        list_nesting.delimiter = Some(delimiter + 1);
115                        format!("{indent}{delimiter}.",)
116                    } else {
117                        format!("{indent}-")
118                    };
119                    tokens.push(DocumentationCommentToken::Content(format!(
120                        "{indent}{item_delimiter} "
121                    )));
122                }
123            };
124        let mut prefix_list_item = false;
125        let mut last_two_events = [None, None];
126        let mut table_alignment: Vec<Alignment> = Vec::new();
127
128        for event in parser {
129            match &event {
130                Event::Text(text) => {
131                    if prefix_list_item {
132                        write_list_item_prefix(&mut list_nesting, &mut tokens);
133                        prefix_list_item = false;
134                    }
135                    if let Some(link) = current_link.as_mut() {
136                        link.label.push_str(text.as_ref());
137                    } else {
138                        let text = {
139                            if is_indented_code_block {
140                                format!("    {text}")
141                            } else {
142                                text.to_string()
143                            }
144                        };
145                        tokens.push(DocumentationCommentToken::Content(text));
146                    }
147                }
148                Event::Code(code) => {
149                    if prefix_list_item {
150                        write_list_item_prefix(&mut list_nesting, &mut tokens);
151                        prefix_list_item = false;
152                    }
153                    let complete_code = format!("`{code}`");
154                    if let Some(link) = current_link.as_mut() {
155                        link.label.push_str(&complete_code);
156                    } else {
157                        tokens.push(DocumentationCommentToken::Content(complete_code));
158                    }
159                }
160                Event::Start(tag_start) => {
161                    match tag_start {
162                        Tag::Heading { level, .. } => {
163                            if let Some(last_token) = tokens.last_mut()
164                                && !last_token.clone().ends_with_newline()
165                            {
166                                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
167                            }
168                            tokens.push(DocumentationCommentToken::Content(format!(
169                                "{} ",
170                                heading_level_to_markdown(*level)
171                            )));
172                        }
173                        Tag::List(list_type) => {
174                            if !list_nesting.is_empty() {
175                                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
176                            }
177                            list_nesting.push(DocCommentListItem {
178                                delimiter: *list_type,
179                                is_ordered_list: list_type.is_some(),
180                            });
181                        }
182                        Tag::CodeBlock(kind) => match kind {
183                            CodeBlockKind::Fenced(language) => {
184                                if language.trim().is_empty() {
185                                    tokens.push(DocumentationCommentToken::Content(String::from(
186                                        "```cairo\n",
187                                    )));
188                                } else {
189                                    tokens.push(DocumentationCommentToken::Content(format!(
190                                        "```{language}\n"
191                                    )));
192                                }
193                            }
194                            CodeBlockKind::Indented => {
195                                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
196                                is_indented_code_block = true;
197                            }
198                        },
199                        Tag::Link { link_type, dest_url, .. } => {
200                            match *link_type {
201                                LinkType::ShortcutUnknown | LinkType::Shortcut => {
202                                    let path =
203                                        if dest_url.starts_with("`") && dest_url.ends_with("`") {
204                                            dest_url
205                                                .trim_start_matches("`")
206                                                .trim_end_matches("`")
207                                                .to_string()
208                                        } else {
209                                            dest_url.clone().to_string()
210                                        };
211                                    current_link = Some(CommentLinkToken {
212                                        label: "".to_string(),
213                                        path: None,
214                                        resolved_item: self.resolve_linked_item(item_id, path), /* Or resolve item here */
215                                    });
216                                }
217                                _ => {
218                                    current_link = Some(CommentLinkToken {
219                                        label: "".to_string(),
220                                        path: Some(dest_url.clone().into_string()),
221                                        resolved_item: self.resolve_linked_item(
222                                            item_id,
223                                            dest_url.clone().into_string(),
224                                        ), // Or resolve item here
225                                    });
226                                }
227                            }
228                        }
229                        Tag::Paragraph | Tag::TableRow => {
230                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
231                        }
232                        Tag::Item => {
233                            prefix_list_item = true;
234                        }
235                        Tag::Table(alignment) => {
236                            table_alignment = alignment.clone();
237                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
238                        }
239                        Tag::TableCell => {
240                            tokens.push(DocumentationCommentToken::Content("|".to_string()));
241                        }
242                        Tag::Strong => {
243                            tokens.push(DocumentationCommentToken::Content("**".to_string()));
244                        }
245                        Tag::Emphasis => {
246                            tokens.push(DocumentationCommentToken::Content("_".to_string()));
247                        }
248                        _ => {}
249                    }
250                }
251                Event::End(tag_end) => match tag_end {
252                    TagEnd::Heading(_) | TagEnd::Table => {
253                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
254                    }
255                    TagEnd::List(_) => {
256                        list_nesting.pop();
257                    }
258                    TagEnd::Item => {
259                        if !matches!(last_two_events[0], Some(Event::End(_)))
260                            | !matches!(last_two_events[1], Some(Event::End(_)))
261                        {
262                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
263                        }
264                    }
265                    TagEnd::TableHead => {
266                        tokens.push(DocumentationCommentToken::Content(format!(
267                            "|\n|{}|",
268                            table_alignment
269                                .iter()
270                                .map(|a| {
271                                    let (left, right) = get_alignment_markers(a);
272                                    format!("{left}---{right}")
273                                })
274                                .join("|")
275                        )));
276                        table_alignment.clear();
277                    }
278                    TagEnd::CodeBlock => {
279                        if !is_indented_code_block {
280                            tokens.push(DocumentationCommentToken::Content("```\n".to_string()));
281                        }
282                        is_indented_code_block = false;
283                    }
284                    TagEnd::Link => {
285                        if let Some(link) = current_link.take() {
286                            tokens.push(DocumentationCommentToken::Link(link));
287                        }
288                    }
289                    TagEnd::TableRow => {
290                        tokens.push(DocumentationCommentToken::Content("|".to_string()));
291                    }
292                    TagEnd::Strong => {
293                        tokens.push(DocumentationCommentToken::Content("**".to_string()));
294                    }
295                    TagEnd::Emphasis => {
296                        tokens.push(DocumentationCommentToken::Content("_".to_string()));
297                    }
298                    TagEnd::Paragraph => {
299                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
300                    }
301                    _ => {}
302                },
303                Event::SoftBreak => {
304                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
305                }
306                Event::Rule => {
307                    tokens.push(DocumentationCommentToken::Content("___\n".to_string()));
308                }
309                _ => {}
310            }
311            last_two_events = [last_two_events[1].clone(), Some(event)];
312        }
313
314        if let Some(DocumentationCommentToken::Content(token)) = tokens.first()
315            && token == "\n"
316        {
317            tokens.remove(0);
318        }
319        if let Some(DocumentationCommentToken::Content(token)) = tokens.last_mut() {
320            *token = token.trim_end().to_string();
321            if token.is_empty() {
322                tokens.pop();
323            }
324        }
325
326        tokens
327    }
328
329    /// Resolves item based on the provided path as a string.
330    fn resolve_linked_item(
331        &self,
332        item_id: DocumentableItemId<'db>,
333        path: String,
334    ) -> Option<DocumentableItemId<'db>> {
335        let syntax_node = item_id.stable_location(self.db)?.syntax_node(self.db);
336        let containing_module = self.db.find_module_containing_node(syntax_node)?;
337        let mut resolver = Resolver::new(self.db, containing_module, InferenceId::NoContext);
338        let mut diagnostics = SemanticDiagnostics::default();
339        let segments = self.parse_comment_link_path(path)?;
340        resolver
341            .resolve_generic_path(
342                &mut diagnostics,
343                segments.to_segments(self.db),
344                NotFoundItemType::Identifier,
345                ResolutionContext::Default,
346            )
347            .ok()?
348            .to_documentable_item_id(self.db)
349    }
350
351    /// Parses the path as a string to a Path Expression, which can be later used by a resolver.
352    fn parse_comment_link_path(&self, path: String) -> Option<ExprPath<'db>> {
353        let virtual_file = FileLongId::Virtual(VirtualFile {
354            parent: Default::default(),
355            name: SmolStrId::from(self.db, ""),
356            content: SmolStrId::from(self.db, path),
357            code_mappings: Default::default(),
358            kind: FileKind::Module,
359            original_item_removed: false,
360        })
361        .intern(self.db);
362
363        let content = self.db.file_content(virtual_file).unwrap();
364        let expr = Parser::parse_file_expr(
365            self.db,
366            &mut DiagnosticsBuilder::default(),
367            virtual_file,
368            content,
369        );
370
371        if let Expr::Path(expr_path) = expr { Some(expr_path) } else { None }
372    }
373}
374
375trait ToDocumentableItemId<'db, T> {
376    fn to_documentable_item_id(self, db: &'db dyn Database) -> Option<DocumentableItemId<'db>>;
377}
378
379impl<'db> ToDocumentableItemId<'db, DocumentableItemId<'db>> for ResolvedGenericItem<'db> {
380    /// Converts the [ResolvedGenericItem] to [DocumentableItemId].
381    /// As for now, returns None only for a common Variable, as those are not a supported
382    /// documentable item.
383    fn to_documentable_item_id(self, db: &'db dyn Database) -> Option<DocumentableItemId<'db>> {
384        match self {
385            ResolvedGenericItem::GenericConstant(id) => Some(DocumentableItemId::LookupItem(
386                LookupItemId::ModuleItem(ModuleItemId::Constant(id)),
387            )),
388            ResolvedGenericItem::GenericFunction(GenericFunctionId::Free(id)) => {
389                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
390                    ModuleItemId::FreeFunction(id),
391                )))
392            }
393            ResolvedGenericItem::GenericType(GenericTypeId::Struct(id)) => Some(
394                DocumentableItemId::LookupItem(LookupItemId::ModuleItem(ModuleItemId::Struct(id))),
395            ),
396            ResolvedGenericItem::GenericType(GenericTypeId::Enum(id)) => Some(
397                DocumentableItemId::LookupItem(LookupItemId::ModuleItem(ModuleItemId::Enum(id))),
398            ),
399            ResolvedGenericItem::GenericTypeAlias(id) => Some(DocumentableItemId::LookupItem(
400                LookupItemId::ModuleItem(ModuleItemId::TypeAlias(id)),
401            )),
402            ResolvedGenericItem::GenericImplAlias(id) => Some(DocumentableItemId::LookupItem(
403                LookupItemId::ModuleItem(ModuleItemId::ImplAlias(id)),
404            )),
405            ResolvedGenericItem::Trait(id) => Some(DocumentableItemId::LookupItem(
406                LookupItemId::ModuleItem(ModuleItemId::Trait(id)),
407            )),
408            ResolvedGenericItem::Impl(id) => Some(DocumentableItemId::LookupItem(
409                LookupItemId::ModuleItem(ModuleItemId::Impl(id)),
410            )),
411            ResolvedGenericItem::Macro(id) => Some(DocumentableItemId::LookupItem(
412                LookupItemId::ModuleItem(ModuleItemId::MacroDeclaration(id)),
413            )),
414            ResolvedGenericItem::GenericType(GenericTypeId::Extern(id)) => {
415                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
416                    ModuleItemId::ExternType(id),
417                )))
418            }
419            ResolvedGenericItem::GenericFunction(GenericFunctionId::Extern(id)) => {
420                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
421                    ModuleItemId::ExternFunction(id),
422                )))
423            }
424            ResolvedGenericItem::Module(ModuleId::Submodule(id)) => {
425                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
426                    ModuleItemId::Submodule(id),
427                )))
428            }
429            ResolvedGenericItem::Module(ModuleId::CrateRoot(id)) => {
430                Some(DocumentableItemId::Crate(id))
431            }
432            ResolvedGenericItem::Module(ModuleId::MacroCall { .. }) => None,
433
434            ResolvedGenericItem::Variant(variant) => Some(DocumentableItemId::Variant(variant.id)),
435            ResolvedGenericItem::GenericFunction(GenericFunctionId::Impl(generic_impl_func)) => {
436                if let Some(impl_function) = generic_impl_func.impl_function(db).ok().flatten() {
437                    Some(DocumentableItemId::LookupItem(LookupItemId::ImplItem(
438                        cairo_lang_defs::ids::ImplItemId::Function(impl_function),
439                    )))
440                } else {
441                    Some(DocumentableItemId::LookupItem(LookupItemId::TraitItem(
442                        TraitItemId::Function(generic_impl_func.function),
443                    )))
444                }
445            }
446            ResolvedGenericItem::TraitItem(id) => {
447                Some(DocumentableItemId::LookupItem(LookupItemId::TraitItem(id)))
448            }
449            ResolvedGenericItem::Variable(_) => None,
450        }
451    }
452}
453
454impl fmt::Display for CommentLinkToken<'_> {
455    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
456        match self.path.clone() {
457            Some(path) => write!(f, "[{}]({})", self.label, path),
458            None => write!(f, "[{}]", self.label),
459        }
460    }
461}
462
463impl fmt::Display for DocumentationCommentToken<'_> {
464    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
465        match self {
466            DocumentationCommentToken::Content(content) => {
467                write!(f, "{content}")
468            }
469            DocumentationCommentToken::Link(link_token) => {
470                write!(f, "{link_token}")
471            }
472        }
473    }
474}
475
476impl<'db> DebugWithDb<'db> for CommentLinkToken<'db> {
477    type Db = dyn DocGroup;
478    fn fmt(&self, f: &mut fmt::Formatter<'_>, db: &Self::Db) -> fmt::Result {
479        f.debug_struct("CommentLinkToken")
480            .field("label", &self.label)
481            .field("path", &self.path)
482            .field("resolved_item_name", &self.resolved_item.map(|item| item.name(db).long(db)))
483            .finish()
484    }
485}
486
487/// Maps `HeadingLevel` to the correct markdown marker.
488fn heading_level_to_markdown(heading_level: HeadingLevel) -> String {
489    let heading_char: String = String::from("#");
490    match heading_level {
491        HeadingLevel::H1 => heading_char,
492        HeadingLevel::H2 => heading_char.repeat(2),
493        HeadingLevel::H3 => heading_char.repeat(3),
494        HeadingLevel::H4 => heading_char.repeat(4),
495        HeadingLevel::H5 => heading_char.repeat(5),
496        HeadingLevel::H6 => heading_char.repeat(6),
497    }
498}
499
500/// Maps [`Alignment`] to the correct markdown markers.
501fn get_alignment_markers(alignment: &Alignment) -> (String, String) {
502    let (left, right) = match alignment {
503        Alignment::None => ("", ""),
504        Alignment::Left => (":", ""),
505        Alignment::Right => ("", ":"),
506        Alignment::Center => (":", ":"),
507    };
508    (left.to_string(), right.to_string())
509}