cairo_lang_doc/
parser.rs

1use std::fmt;
2
3use cairo_lang_debug::DebugWithDb;
4use cairo_lang_defs::ids::{
5    FileIndex, GenericTypeId, LookupItemId, ModuleFileId, ModuleId, ModuleItemId, TraitItemId,
6};
7use cairo_lang_diagnostics::DiagnosticsBuilder;
8use cairo_lang_filesystem::ids::{FileKind, FileLongId, VirtualFile};
9use cairo_lang_parser::parser::Parser;
10use cairo_lang_semantic::db::SemanticGroup;
11use cairo_lang_semantic::diagnostic::{NotFoundItemType, SemanticDiagnostics};
12use cairo_lang_semantic::expr::inference::InferenceId;
13use cairo_lang_semantic::items::functions::GenericFunctionId;
14use cairo_lang_semantic::resolve::{AsSegments, ResolutionContext, ResolvedGenericItem, Resolver};
15use cairo_lang_syntax::node::ast::{Expr, ExprPath, ItemModule};
16use cairo_lang_syntax::node::helpers::GetIdentifier;
17use cairo_lang_syntax::node::{SyntaxNode, TypedSyntaxNode};
18use cairo_lang_utils::Intern;
19use itertools::Itertools;
20use pulldown_cmark::{
21    Alignment, BrokenLink, CodeBlockKind, Event, HeadingLevel, LinkType, Options,
22    Parser as MarkdownParser, Tag, TagEnd,
23};
24
25use crate::db::DocGroup;
26use crate::documentable_item::DocumentableItemId;
27
28/// Token representing a link to another item inside the documentation.
29#[derive(Debug, PartialEq, Clone, Eq)]
30pub struct CommentLinkToken {
31    /// A link part that's inside "[]" brackets.
32    pub label: String,
33    /// A link part that's inside "()" brackets, right after the label.
34    pub path: Option<String>,
35    /// Item resolved based on the path provided by user. If resolver cannot resolve the item, we
36    /// leave it as None.
37    pub resolved_item: Option<DocumentableItemId>,
38}
39
40/// Generic type for a comment token. It's either a plain content or a link.
41/// Notice that the Content type of token can store much more than just one word.
42#[derive(Debug, PartialEq, Clone, Eq)]
43pub enum DocumentationCommentToken {
44    /// Token with plain documentation content.
45    Content(String),
46    /// Link token.
47    Link(CommentLinkToken),
48}
49
50impl DocumentationCommentToken {
51    /// Checks if string representation of [`DocumentationCommentToken`] ends with newline.
52    pub fn ends_with_newline(self) -> bool {
53        match self {
54            DocumentationCommentToken::Content(content) => content.ends_with('\n'),
55            DocumentationCommentToken::Link(link_token) => link_token.label.ends_with('\n'),
56        }
57    }
58}
59
60/// Helper struct for formatting possibly nested Markdown lists.
61struct DocCommentListItem {
62    /// Order list item separator
63    delimiter: Option<u64>,
64    /// Flag for a list with order elements
65    is_ordered_list: bool,
66}
67
68/// Parses plain documentation comments into [DocumentationCommentToken]s.
69pub struct DocumentationCommentParser<'a> {
70    db: &'a dyn DocGroup,
71}
72
73impl<'a> DocumentationCommentParser<'a> {
74    pub fn new(db: &'a dyn DocGroup) -> Self {
75        Self { db }
76    }
77
78    /// Parses documentation comment content into vector of [DocumentationCommentToken]s, keeping
79    /// the order in which they were present in the content.
80    ///
81    /// We look for 3 types of patterns when it comes to link (ignore the backslash):
82    /// "\[label\](path)", "\[path\]" or "\[`path`\]".
83    pub fn parse_documentation_comment(
84        &self,
85        item_id: DocumentableItemId,
86        documentation_comment: String,
87    ) -> Vec<DocumentationCommentToken> {
88        let mut tokens = Vec::new();
89        let mut current_link: Option<CommentLinkToken> = None;
90        let mut is_indented_code_block = false;
91        let mut replacer = |broken_link: BrokenLink<'_>| {
92            if matches!(broken_link.link_type, LinkType::ShortcutUnknown | LinkType::Shortcut) {
93                return Some((broken_link.reference.to_string().into(), "".into()));
94            }
95            None
96        };
97
98        let mut options = Options::empty();
99        options.insert(Options::ENABLE_TABLES);
100        let parser = MarkdownParser::new_with_broken_link_callback(
101            &documentation_comment,
102            options,
103            Some(&mut replacer),
104        );
105
106        let mut list_nesting: Vec<DocCommentListItem> = Vec::new();
107        let write_list_item_prefix =
108            |list_nesting: &mut Vec<DocCommentListItem>,
109             tokens: &mut Vec<DocumentationCommentToken>| {
110                if !list_nesting.is_empty() {
111                    let indent = "  ".repeat(list_nesting.len() - 1);
112                    let list_nesting = list_nesting.last_mut().unwrap();
113
114                    let item_delimiter = if list_nesting.is_ordered_list {
115                        let delimiter = list_nesting.delimiter.unwrap_or(0);
116                        list_nesting.delimiter = Some(delimiter + 1);
117                        format!("{indent}{delimiter}.",)
118                    } else {
119                        format!("{indent}-")
120                    };
121                    tokens.push(DocumentationCommentToken::Content(format!(
122                        "{indent}{item_delimiter} "
123                    )));
124                }
125            };
126        let mut prefix_list_item = false;
127        let mut last_two_events = [None, None];
128        let mut table_alignment: Vec<Alignment> = Vec::new();
129
130        for event in parser {
131            let current_event = event.clone();
132            match current_event {
133                Event::Text(text) => {
134                    if prefix_list_item {
135                        write_list_item_prefix(&mut list_nesting, &mut tokens);
136                        prefix_list_item = false;
137                    }
138                    if let Some(link) = current_link.as_mut() {
139                        link.label.push_str(&text);
140                    } else {
141                        let text = {
142                            if is_indented_code_block {
143                                format!("    {text}")
144                            } else {
145                                text.to_string()
146                            }
147                        };
148                        tokens.push(DocumentationCommentToken::Content(text));
149                    }
150                }
151                Event::Code(code) => {
152                    if prefix_list_item {
153                        write_list_item_prefix(&mut list_nesting, &mut tokens);
154                        prefix_list_item = false;
155                    }
156                    let complete_code = format!("`{code}`");
157                    if let Some(link) = current_link.as_mut() {
158                        link.label.push_str(&complete_code);
159                    } else {
160                        tokens.push(DocumentationCommentToken::Content(complete_code));
161                    }
162                }
163                Event::Start(tag_start) => {
164                    match tag_start {
165                        Tag::Heading { level, .. } => {
166                            if let Some(last_token) = tokens.last_mut() {
167                                if !last_token.clone().ends_with_newline() {
168                                    tokens
169                                        .push(DocumentationCommentToken::Content("\n".to_string()));
170                                }
171                            }
172                            tokens.push(DocumentationCommentToken::Content(format!(
173                                "{} ",
174                                heading_level_to_markdown(level)
175                            )));
176                        }
177                        Tag::List(list_type) => {
178                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
179                            list_nesting.push(DocCommentListItem {
180                                delimiter: list_type,
181                                is_ordered_list: list_type.is_some(),
182                            });
183                        }
184                        Tag::CodeBlock(kind) => match kind {
185                            CodeBlockKind::Fenced(language) => {
186                                if language.trim().is_empty() {
187                                    tokens.push(DocumentationCommentToken::Content(String::from(
188                                        "\n```cairo\n",
189                                    )));
190                                } else {
191                                    tokens.push(DocumentationCommentToken::Content(format!(
192                                        "\n```{language}\n"
193                                    )));
194                                }
195                            }
196                            CodeBlockKind::Indented => {
197                                tokens.push(DocumentationCommentToken::Content("\n\n".to_string()));
198                                is_indented_code_block = true;
199                            }
200                        },
201                        Tag::Link { link_type, dest_url, .. } => {
202                            match link_type {
203                                LinkType::ShortcutUnknown | LinkType::Shortcut => {
204                                    let path =
205                                        if dest_url.starts_with("`") && dest_url.ends_with("`") {
206                                            dest_url
207                                                .trim_start_matches("`")
208                                                .trim_end_matches("`")
209                                                .to_string()
210                                        } else {
211                                            dest_url.clone().to_string()
212                                        };
213                                    current_link = Some(CommentLinkToken {
214                                        label: "".to_string(),
215                                        path: None,
216                                        resolved_item: self.resolve_linked_item(item_id, path), /* Or resolve item here */
217                                    });
218                                }
219                                _ => {
220                                    current_link = Some(CommentLinkToken {
221                                        label: "".to_string(),
222                                        path: Some(dest_url.clone().into_string()),
223                                        resolved_item: self.resolve_linked_item(
224                                            item_id,
225                                            dest_url.clone().into_string(),
226                                        ), // Or resolve item here
227                                    });
228                                }
229                            }
230                        }
231                        Tag::Paragraph | Tag::TableRow => {
232                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
233                        }
234                        Tag::Item => {
235                            prefix_list_item = true;
236                        }
237                        Tag::Table(alignment) => {
238                            table_alignment = alignment;
239                            tokens.push(DocumentationCommentToken::Content("\n\n".to_string()));
240                        }
241                        Tag::TableCell => {
242                            tokens.push(DocumentationCommentToken::Content("|".to_string()));
243                        }
244                        _ => {}
245                    }
246                }
247                Event::End(tag_end) => match tag_end {
248                    TagEnd::Heading(_) | TagEnd::Table => {
249                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
250                    }
251                    TagEnd::List(_) => {
252                        list_nesting.pop();
253                    }
254                    TagEnd::Item => {
255                        if !matches!(last_two_events[0], Some(Event::End(_)))
256                            | !matches!(last_two_events[1], Some(Event::End(_)))
257                        {
258                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
259                        }
260                    }
261                    TagEnd::TableHead => {
262                        tokens.push(DocumentationCommentToken::Content(format!(
263                            "|\n|{}|",
264                            table_alignment
265                                .iter()
266                                .map(|a| {
267                                    let (left, right) = get_alignment_markers(a);
268                                    format!("{left}---{right}")
269                                })
270                                .join("|")
271                        )));
272                        table_alignment.clear();
273                    }
274                    TagEnd::CodeBlock => {
275                        if !is_indented_code_block {
276                            tokens.push(DocumentationCommentToken::Content("```\n".to_string()));
277                        }
278                        is_indented_code_block = false;
279                    }
280                    TagEnd::Link => {
281                        if let Some(link) = current_link.take() {
282                            tokens.push(DocumentationCommentToken::Link(link));
283                        }
284                    }
285                    TagEnd::TableRow => {
286                        tokens.push(DocumentationCommentToken::Content("|".to_string()));
287                    }
288                    _ => {}
289                },
290                Event::SoftBreak => {
291                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
292                }
293                Event::Rule => {
294                    tokens.push(DocumentationCommentToken::Content("\n___\n".to_string()));
295                }
296                _ => {}
297            }
298            last_two_events = [last_two_events[1].clone(), Some(event)];
299        }
300
301        if let Some(DocumentationCommentToken::Content(token)) = tokens.first() {
302            if token == "\n" {
303                tokens.remove(0);
304            }
305        }
306        if let Some(DocumentationCommentToken::Content(token)) = tokens.last_mut() {
307            *token = token.trim_end().to_string();
308            if token.is_empty() {
309                tokens.pop();
310            }
311        }
312
313        tokens
314    }
315
316    /// Resolves item based on the provided path as a string.
317    fn resolve_linked_item(
318        &self,
319        item_id: DocumentableItemId,
320        path: String,
321    ) -> Option<DocumentableItemId> {
322        let syntax_node = item_id.stable_location(self.db)?.syntax_node(self.db);
323        let containing_module = self.find_module_file_containing_node(&syntax_node)?;
324        let mut resolver = Resolver::new(self.db, containing_module, InferenceId::NoContext);
325        let mut diagnostics = SemanticDiagnostics::default();
326        let segments = self.parse_comment_link_path(path)?;
327        resolver
328            .resolve_generic_path(
329                &mut diagnostics,
330                segments.to_segments(self.db),
331                NotFoundItemType::Identifier,
332                ResolutionContext::Default,
333            )
334            .ok()?
335            .to_documentable_item_id(self.db)
336    }
337
338    /// Parses the path as a string to a Path Expression, which can be later used by a resolver.
339    fn parse_comment_link_path(&self, path: String) -> Option<ExprPath> {
340        let virtual_file = FileLongId::Virtual(VirtualFile {
341            parent: Default::default(),
342            name: Default::default(),
343            content: Default::default(),
344            code_mappings: Default::default(),
345            kind: FileKind::Module,
346            original_item_removed: false,
347        })
348        .intern(self.db);
349
350        let expr = Parser::parse_file_expr(
351            self.db,
352            &mut DiagnosticsBuilder::default(),
353            virtual_file,
354            &path,
355        );
356
357        if let Expr::Path(expr_path) = expr { Some(expr_path) } else { None }
358    }
359
360    /// Returns a [`ModuleFileId`] containing the node.
361    ///
362    /// If the node is located in a virtual file generated by a compiler plugin, this method will
363    /// return a [`ModuleFileId`] pointing to the main, user-written file of the module.
364    fn find_module_file_containing_node(&self, node: &SyntaxNode) -> Option<ModuleFileId> {
365        let module_id = self.find_module_containing_node(node)?;
366        let file_index = FileIndex(0);
367        Some(ModuleFileId(module_id, file_index))
368    }
369    /// Finds a [`ModuleId`] containing the node.
370    ///
371    /// If the node is located in a virtual file generated by a compiler plugin, this method will
372    /// return the (sub)module of the main, user-written file that leads to the node.
373    fn find_module_containing_node(&self, node: &SyntaxNode) -> Option<ModuleId> {
374        let db = self.db;
375
376        // Get the main module of the main file that leads to the node.
377        // The node may be located in a virtual file of a submodule.
378        // This code attempts to get the absolute "parent" of both "module" and "file" parts.
379        let main_module = {
380            // Get the file where the node is located.
381            // This might be a virtual file generated by a compiler plugin.
382            let node_file_id = node.stable_ptr(db).file_id(db);
383
384            // Get the root module of a file containing the node.
385            let node_main_module = db.file_modules(node_file_id).ok()?.iter().copied().next()?;
386
387            // Get the main module of the file.
388            let main_file = db.module_main_file(node_main_module).ok()?;
389
390            // Get the main module of that file.
391            db.file_modules(main_file).ok()?.iter().copied().next()?
392        };
393
394        // Get the stack (bottom-up) of submodule names in the file containing the node, in the main
395        // module, that lead to the node.
396        node.ancestors(db)
397            .filter_map(|node| ItemModule::cast(db, node))
398            .map(|item_module| {
399                item_module
400                    .stable_ptr(db)
401                    .name_green(db)
402                    .identifier(db)
403            })
404            // Buffer the stack to get DoubleEndedIterator.
405            .collect::<Vec<_>>()
406            .into_iter()
407            // And get id of the (sub)module containing the node by traversing this stack top-down.
408            .try_rfold(main_module, |module, name| {
409                let ModuleItemId::Submodule(submodule) =
410                    db.module_item_by_name(module, name).ok()??
411                else {
412                    return None;
413                };
414                Some(ModuleId::Submodule(submodule))
415            })
416    }
417}
418
419trait ToDocumentableItemId<T> {
420    fn to_documentable_item_id(self, db: &dyn SemanticGroup) -> Option<DocumentableItemId>;
421}
422
423impl ToDocumentableItemId<DocumentableItemId> for ResolvedGenericItem {
424    /// Converts the [ResolvedGenericItem] to [DocumentableItemId].
425    /// As for now, returns None only for a common Variable, as those are not a supported
426    /// documentable item.
427    fn to_documentable_item_id(self, db: &dyn SemanticGroup) -> Option<DocumentableItemId> {
428        match self {
429            ResolvedGenericItem::GenericConstant(id) => Some(DocumentableItemId::LookupItem(
430                LookupItemId::ModuleItem(ModuleItemId::Constant(id)),
431            )),
432            ResolvedGenericItem::GenericFunction(GenericFunctionId::Free(id)) => {
433                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
434                    ModuleItemId::FreeFunction(id),
435                )))
436            }
437            ResolvedGenericItem::GenericType(GenericTypeId::Struct(id)) => Some(
438                DocumentableItemId::LookupItem(LookupItemId::ModuleItem(ModuleItemId::Struct(id))),
439            ),
440            ResolvedGenericItem::GenericType(GenericTypeId::Enum(id)) => Some(
441                DocumentableItemId::LookupItem(LookupItemId::ModuleItem(ModuleItemId::Enum(id))),
442            ),
443            ResolvedGenericItem::GenericTypeAlias(id) => Some(DocumentableItemId::LookupItem(
444                LookupItemId::ModuleItem(ModuleItemId::TypeAlias(id)),
445            )),
446            ResolvedGenericItem::GenericImplAlias(id) => Some(DocumentableItemId::LookupItem(
447                LookupItemId::ModuleItem(ModuleItemId::ImplAlias(id)),
448            )),
449            ResolvedGenericItem::Trait(id) => Some(DocumentableItemId::LookupItem(
450                LookupItemId::ModuleItem(ModuleItemId::Trait(id)),
451            )),
452            ResolvedGenericItem::Impl(id) => Some(DocumentableItemId::LookupItem(
453                LookupItemId::ModuleItem(ModuleItemId::Impl(id)),
454            )),
455            ResolvedGenericItem::Macro(id) => Some(DocumentableItemId::LookupItem(
456                LookupItemId::ModuleItem(ModuleItemId::MacroDeclaration(id)),
457            )),
458            ResolvedGenericItem::GenericType(GenericTypeId::Extern(id)) => {
459                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
460                    ModuleItemId::ExternType(id),
461                )))
462            }
463            ResolvedGenericItem::GenericFunction(GenericFunctionId::Extern(id)) => {
464                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
465                    ModuleItemId::ExternFunction(id),
466                )))
467            }
468            ResolvedGenericItem::Module(ModuleId::Submodule(id)) => {
469                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
470                    ModuleItemId::Submodule(id),
471                )))
472            }
473            ResolvedGenericItem::Module(ModuleId::CrateRoot(id)) => {
474                Some(DocumentableItemId::Crate(id))
475            }
476            ResolvedGenericItem::Variant(variant) => Some(DocumentableItemId::Variant(variant.id)),
477            ResolvedGenericItem::GenericFunction(GenericFunctionId::Impl(generic_impl_func)) => {
478                if let Some(impl_function) = generic_impl_func.impl_function(db).ok().flatten() {
479                    Some(DocumentableItemId::LookupItem(LookupItemId::ImplItem(
480                        cairo_lang_defs::ids::ImplItemId::Function(impl_function),
481                    )))
482                } else {
483                    Some(DocumentableItemId::LookupItem(LookupItemId::TraitItem(
484                        TraitItemId::Function(generic_impl_func.function),
485                    )))
486                }
487            }
488            ResolvedGenericItem::TraitItem(id) => {
489                Some(DocumentableItemId::LookupItem(LookupItemId::TraitItem(id)))
490            }
491            ResolvedGenericItem::Variable(_) => None,
492        }
493    }
494}
495
496impl fmt::Display for CommentLinkToken {
497    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
498        match self.path.clone() {
499            Some(path) => write!(f, "[{}]({})", self.label, path),
500            None => write!(f, "[{}]", self.label),
501        }
502    }
503}
504
505impl fmt::Display for DocumentationCommentToken {
506    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
507        match self {
508            DocumentationCommentToken::Content(ref content) => {
509                write!(f, "{content}")
510            }
511            DocumentationCommentToken::Link(ref link_token) => {
512                write!(f, "{link_token}")
513            }
514        }
515    }
516}
517
518impl DebugWithDb<dyn DocGroup> for CommentLinkToken {
519    fn fmt(&self, f: &mut fmt::Formatter<'_>, db: &dyn DocGroup) -> fmt::Result {
520        f.debug_struct("CommentLinkToken")
521            .field("label", &self.label)
522            .field("path", &self.path)
523            .field("resolved_item_name", &self.resolved_item.map(|item| item.name(db)))
524            .finish()
525    }
526}
527
528/// Maps `HeadingLevel` to correct markdown marker.
529fn heading_level_to_markdown(heading_level: HeadingLevel) -> String {
530    let heading_char: String = String::from("#");
531    match heading_level {
532        HeadingLevel::H1 => heading_char,
533        HeadingLevel::H2 => heading_char.repeat(2),
534        HeadingLevel::H3 => heading_char.repeat(3),
535        HeadingLevel::H4 => heading_char.repeat(4),
536        HeadingLevel::H5 => heading_char.repeat(5),
537        HeadingLevel::H6 => heading_char.repeat(6),
538    }
539}
540
541/// Maps [`Alignment`] to correct markdown markers.
542fn get_alignment_markers(alignment: &Alignment) -> (String, String) {
543    let (left, right) = match alignment {
544        Alignment::None => ("", ""),
545        Alignment::Left => (":", ""),
546        Alignment::Right => ("", ":"),
547        Alignment::Center => (":", ":"),
548    };
549    (left.to_string(), right.to_string())
550}