cairo_lang_doc/
parser.rs

1use std::fmt;
2
3use cairo_lang_debug::DebugWithDb;
4use cairo_lang_defs::db::DefsGroup;
5use cairo_lang_defs::ids::{GenericTypeId, LookupItemId, ModuleId, ModuleItemId, TraitItemId};
6use cairo_lang_diagnostics::DiagnosticsBuilder;
7use cairo_lang_filesystem::db::FilesGroup;
8use cairo_lang_filesystem::ids::{FileKind, FileLongId, SmolStrId, VirtualFile};
9use cairo_lang_parser::parser::Parser;
10use cairo_lang_semantic::diagnostic::{NotFoundItemType, SemanticDiagnostics};
11use cairo_lang_semantic::expr::inference::InferenceId;
12use cairo_lang_semantic::items::functions::GenericFunctionId;
13use cairo_lang_semantic::items::module::ModuleSemantic;
14use cairo_lang_semantic::resolve::{AsSegments, ResolutionContext, ResolvedGenericItem, Resolver};
15use cairo_lang_syntax::node::ast::{Expr, ExprPath, ItemModule};
16use cairo_lang_syntax::node::helpers::GetIdentifier;
17use cairo_lang_syntax::node::{SyntaxNode, TypedSyntaxNode};
18use cairo_lang_utils::Intern;
19use itertools::Itertools;
20use pulldown_cmark::{
21    Alignment, BrokenLink, CodeBlockKind, Event, HeadingLevel, LinkType, Options,
22    Parser as MarkdownParser, Tag, TagEnd,
23};
24use salsa::Database;
25
26use crate::db::DocGroup;
27use crate::documentable_item::DocumentableItemId;
28
29/// Token representing a link to another item inside the documentation.
30#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
31pub struct CommentLinkToken<'db> {
32    /// A link part that's inside "[]" brackets.
33    pub label: String,
34    /// A link part that's inside "()" brackets, right after the label.
35    pub path: Option<String>,
36    /// Item resolved based on the path provided by user. If resolver cannot resolve the item, we
37    /// leave it as None.
38    pub resolved_item: Option<DocumentableItemId<'db>>,
39}
40
41/// Generic type for a comment token. It's either a plain content or a link.
42/// Notice that the Content type of token can store much more than just one word.
43#[derive(Debug, PartialEq, Clone, Eq, salsa::Update)]
44pub enum DocumentationCommentToken<'db> {
45    /// Token with plain documentation content.
46    Content(String),
47    /// Link token.
48    Link(CommentLinkToken<'db>),
49}
50
51impl DocumentationCommentToken<'_> {
52    /// Checks if string representation of [`DocumentationCommentToken`] ends with newline.
53    pub fn ends_with_newline(self) -> bool {
54        match self {
55            DocumentationCommentToken::Content(content) => content.ends_with('\n'),
56            DocumentationCommentToken::Link(link_token) => link_token.label.ends_with('\n'),
57        }
58    }
59}
60
61/// Helper struct for formatting possibly nested Markdown lists.
62struct DocCommentListItem {
63    /// Order list item separator
64    delimiter: Option<u64>,
65    /// Flag for a list with order elements
66    is_ordered_list: bool,
67}
68
69/// Parses plain documentation comments into [DocumentationCommentToken]s.
70pub struct DocumentationCommentParser<'db> {
71    db: &'db dyn Database,
72}
73
74impl<'db> DocumentationCommentParser<'db> {
75    pub fn new(db: &'db dyn Database) -> Self {
76        Self { db }
77    }
78
79    /// Parses documentation comment content into vector of [DocumentationCommentToken]s, keeping
80    /// the order in which they were present in the content.
81    ///
82    /// We look for 3 types of patterns when it comes to link (ignore the backslash):
83    /// "\[label\](path)", "\[path\]" or "\[`path`\]".
84    pub fn parse_documentation_comment(
85        &self,
86        item_id: DocumentableItemId<'db>,
87        documentation_comment: String,
88    ) -> Vec<DocumentationCommentToken<'db>> {
89        let mut tokens = Vec::new();
90        let mut current_link: Option<CommentLinkToken<'db>> = None;
91        let mut is_indented_code_block = false;
92        let mut replacer = |broken_link: BrokenLink<'_>| {
93            if matches!(broken_link.link_type, LinkType::ShortcutUnknown | LinkType::Shortcut) {
94                return Some((broken_link.reference.to_string().into(), "".into()));
95            }
96            None
97        };
98
99        let mut options = Options::empty();
100        options.insert(Options::ENABLE_TABLES);
101        let parser = MarkdownParser::new_with_broken_link_callback(
102            &documentation_comment,
103            options,
104            Some(&mut replacer),
105        );
106
107        let mut list_nesting: Vec<DocCommentListItem> = Vec::new();
108        let write_list_item_prefix =
109            |list_nesting: &mut Vec<DocCommentListItem>,
110             tokens: &mut Vec<DocumentationCommentToken<'db>>| {
111                if !list_nesting.is_empty() {
112                    let indent = "  ".repeat(list_nesting.len() - 1);
113                    let list_nesting = list_nesting.last_mut().unwrap();
114
115                    let item_delimiter = if list_nesting.is_ordered_list {
116                        let delimiter = list_nesting.delimiter.unwrap_or(0);
117                        list_nesting.delimiter = Some(delimiter + 1);
118                        format!("{indent}{delimiter}.",)
119                    } else {
120                        format!("{indent}-")
121                    };
122                    tokens.push(DocumentationCommentToken::Content(format!(
123                        "{indent}{item_delimiter} "
124                    )));
125                }
126            };
127        let mut prefix_list_item = false;
128        let mut last_two_events = [None, None];
129        let mut table_alignment: Vec<Alignment> = Vec::new();
130
131        for event in parser {
132            match &event {
133                Event::Text(text) => {
134                    if prefix_list_item {
135                        write_list_item_prefix(&mut list_nesting, &mut tokens);
136                        prefix_list_item = false;
137                    }
138                    if let Some(link) = current_link.as_mut() {
139                        link.label.push_str(text.as_ref());
140                    } else {
141                        let text = {
142                            if is_indented_code_block {
143                                format!("    {text}")
144                            } else {
145                                text.to_string()
146                            }
147                        };
148                        tokens.push(DocumentationCommentToken::Content(text));
149                    }
150                }
151                Event::Code(code) => {
152                    if prefix_list_item {
153                        write_list_item_prefix(&mut list_nesting, &mut tokens);
154                        prefix_list_item = false;
155                    }
156                    let complete_code = format!("`{code}`");
157                    if let Some(link) = current_link.as_mut() {
158                        link.label.push_str(&complete_code);
159                    } else {
160                        tokens.push(DocumentationCommentToken::Content(complete_code));
161                    }
162                }
163                Event::Start(tag_start) => {
164                    match tag_start {
165                        Tag::Heading { level, .. } => {
166                            if let Some(last_token) = tokens.last_mut()
167                                && !last_token.clone().ends_with_newline()
168                            {
169                                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
170                            }
171                            tokens.push(DocumentationCommentToken::Content(format!(
172                                "{} ",
173                                heading_level_to_markdown(*level)
174                            )));
175                        }
176                        Tag::List(list_type) => {
177                            if !list_nesting.is_empty() {
178                                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
179                            }
180                            list_nesting.push(DocCommentListItem {
181                                delimiter: *list_type,
182                                is_ordered_list: list_type.is_some(),
183                            });
184                        }
185                        Tag::CodeBlock(kind) => match kind {
186                            CodeBlockKind::Fenced(language) => {
187                                if language.trim().is_empty() {
188                                    tokens.push(DocumentationCommentToken::Content(String::from(
189                                        "```cairo\n",
190                                    )));
191                                } else {
192                                    tokens.push(DocumentationCommentToken::Content(format!(
193                                        "```{language}\n"
194                                    )));
195                                }
196                            }
197                            CodeBlockKind::Indented => {
198                                tokens.push(DocumentationCommentToken::Content("\n".to_string()));
199                                is_indented_code_block = true;
200                            }
201                        },
202                        Tag::Link { link_type, dest_url, .. } => {
203                            match *link_type {
204                                LinkType::ShortcutUnknown | LinkType::Shortcut => {
205                                    let path =
206                                        if dest_url.starts_with("`") && dest_url.ends_with("`") {
207                                            dest_url
208                                                .trim_start_matches("`")
209                                                .trim_end_matches("`")
210                                                .to_string()
211                                        } else {
212                                            dest_url.clone().to_string()
213                                        };
214                                    current_link = Some(CommentLinkToken {
215                                        label: "".to_string(),
216                                        path: None,
217                                        resolved_item: self.resolve_linked_item(item_id, path), /* Or resolve item here */
218                                    });
219                                }
220                                _ => {
221                                    current_link = Some(CommentLinkToken {
222                                        label: "".to_string(),
223                                        path: Some(dest_url.clone().into_string()),
224                                        resolved_item: self.resolve_linked_item(
225                                            item_id,
226                                            dest_url.clone().into_string(),
227                                        ), // Or resolve item here
228                                    });
229                                }
230                            }
231                        }
232                        Tag::Paragraph | Tag::TableRow => {
233                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
234                        }
235                        Tag::Item => {
236                            prefix_list_item = true;
237                        }
238                        Tag::Table(alignment) => {
239                            table_alignment = alignment.clone();
240                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
241                        }
242                        Tag::TableCell => {
243                            tokens.push(DocumentationCommentToken::Content("|".to_string()));
244                        }
245                        Tag::Strong => {
246                            tokens.push(DocumentationCommentToken::Content("**".to_string()));
247                        }
248                        Tag::Emphasis => {
249                            tokens.push(DocumentationCommentToken::Content("_".to_string()));
250                        }
251                        _ => {}
252                    }
253                }
254                Event::End(tag_end) => match tag_end {
255                    TagEnd::Heading(_) | TagEnd::Table => {
256                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
257                    }
258                    TagEnd::List(_) => {
259                        list_nesting.pop();
260                    }
261                    TagEnd::Item => {
262                        if !matches!(last_two_events[0], Some(Event::End(_)))
263                            | !matches!(last_two_events[1], Some(Event::End(_)))
264                        {
265                            tokens.push(DocumentationCommentToken::Content("\n".to_string()));
266                        }
267                    }
268                    TagEnd::TableHead => {
269                        tokens.push(DocumentationCommentToken::Content(format!(
270                            "|\n|{}|",
271                            table_alignment
272                                .iter()
273                                .map(|a| {
274                                    let (left, right) = get_alignment_markers(a);
275                                    format!("{left}---{right}")
276                                })
277                                .join("|")
278                        )));
279                        table_alignment.clear();
280                    }
281                    TagEnd::CodeBlock => {
282                        if !is_indented_code_block {
283                            tokens.push(DocumentationCommentToken::Content("```\n".to_string()));
284                        }
285                        is_indented_code_block = false;
286                    }
287                    TagEnd::Link => {
288                        if let Some(link) = current_link.take() {
289                            tokens.push(DocumentationCommentToken::Link(link));
290                        }
291                    }
292                    TagEnd::TableRow => {
293                        tokens.push(DocumentationCommentToken::Content("|".to_string()));
294                    }
295                    TagEnd::Strong => {
296                        tokens.push(DocumentationCommentToken::Content("**".to_string()));
297                    }
298                    TagEnd::Emphasis => {
299                        tokens.push(DocumentationCommentToken::Content("_".to_string()));
300                    }
301                    TagEnd::Paragraph => {
302                        tokens.push(DocumentationCommentToken::Content("\n".to_string()));
303                    }
304                    _ => {}
305                },
306                Event::SoftBreak => {
307                    tokens.push(DocumentationCommentToken::Content("\n".to_string()));
308                }
309                Event::Rule => {
310                    tokens.push(DocumentationCommentToken::Content("___\n".to_string()));
311                }
312                _ => {}
313            }
314            last_two_events = [last_two_events[1].clone(), Some(event)];
315        }
316
317        if let Some(DocumentationCommentToken::Content(token)) = tokens.first()
318            && token == "\n"
319        {
320            tokens.remove(0);
321        }
322        if let Some(DocumentationCommentToken::Content(token)) = tokens.last_mut() {
323            *token = token.trim_end().to_string();
324            if token.is_empty() {
325                tokens.pop();
326            }
327        }
328
329        tokens
330    }
331
332    /// Resolves item based on the provided path as a string.
333    fn resolve_linked_item(
334        &self,
335        item_id: DocumentableItemId<'db>,
336        path: String,
337    ) -> Option<DocumentableItemId<'db>> {
338        let syntax_node = item_id.stable_location(self.db)?.syntax_node(self.db);
339        let containing_module = self.find_module_containing_node(&syntax_node)?;
340        let mut resolver = Resolver::new(self.db, containing_module, InferenceId::NoContext);
341        let mut diagnostics = SemanticDiagnostics::default();
342        let segments = self.parse_comment_link_path(path)?;
343        resolver
344            .resolve_generic_path(
345                &mut diagnostics,
346                segments.to_segments(self.db),
347                NotFoundItemType::Identifier,
348                ResolutionContext::Default,
349            )
350            .ok()?
351            .to_documentable_item_id(self.db)
352    }
353
354    /// Parses the path as a string to a Path Expression, which can be later used by a resolver.
355    fn parse_comment_link_path(&self, path: String) -> Option<ExprPath<'db>> {
356        let virtual_file = FileLongId::Virtual(VirtualFile {
357            parent: Default::default(),
358            name: SmolStrId::from(self.db, ""),
359            content: SmolStrId::from(self.db, path),
360            code_mappings: Default::default(),
361            kind: FileKind::Module,
362            original_item_removed: false,
363        })
364        .intern(self.db);
365
366        let content = self.db.file_content(virtual_file).unwrap();
367        let expr = Parser::parse_file_expr(
368            self.db,
369            &mut DiagnosticsBuilder::default(),
370            virtual_file,
371            content,
372        );
373
374        if let Expr::Path(expr_path) = expr { Some(expr_path) } else { None }
375    }
376
377    /// Finds a [`ModuleId`] containing the node.
378    ///
379    /// If the node is located in a virtual file generated by a compiler plugin, this method will
380    /// return the (sub)module of the main, user-written file that leads to the node.
381    fn find_module_containing_node(&self, node: &SyntaxNode<'db>) -> Option<ModuleId<'db>> {
382        let db = self.db;
383
384        // Get the main module of the main file that leads to the node.
385        // The node may be located in a virtual file of a submodule.
386        // This code attempts to get the absolute "parent" of both "module" and "file" parts.
387        let main_module = {
388            // Get the file where the node is located.
389            // This might be a virtual file generated by a compiler plugin.
390            let node_file_id = node.stable_ptr(db).file_id(db);
391
392            // Get the root module of a file containing the node.
393            let node_main_module = db.file_modules(node_file_id).ok()?.iter().copied().next()?;
394
395            // Get the main module of the file.
396            let main_file = db.module_main_file(node_main_module).ok()?;
397
398            // Get the main module of that file.
399            db.file_modules(main_file).ok()?.iter().copied().next()?
400        };
401
402        // Get the stack (bottom-up) of submodule names in the file containing the node, in the main
403        // module, that lead to the node.
404        node.ancestors(db)
405            .filter_map(|node| ItemModule::cast(db, node))
406            .map(|item_module| {
407                item_module
408                    .stable_ptr(db)
409                    .name_green(db)
410                    .identifier(db)
411            })
412            // Buffer the stack to get DoubleEndedIterator.
413            .collect::<Vec<_>>()
414            .into_iter()
415            // And get id of the (sub)module containing the node by traversing this stack top-down.
416            .try_rfold(main_module, |module, name| {
417                let ModuleItemId::Submodule(submodule) =
418                    db.module_item_by_name(module, name).ok()??
419                else {
420                    return None;
421                };
422                Some(ModuleId::Submodule(submodule))
423            })
424    }
425}
426
427trait ToDocumentableItemId<'db, T> {
428    fn to_documentable_item_id(self, db: &'db dyn Database) -> Option<DocumentableItemId<'db>>;
429}
430
431impl<'db> ToDocumentableItemId<'db, DocumentableItemId<'db>> for ResolvedGenericItem<'db> {
432    /// Converts the [ResolvedGenericItem] to [DocumentableItemId].
433    /// As for now, returns None only for a common Variable, as those are not a supported
434    /// documentable item.
435    fn to_documentable_item_id(self, db: &'db dyn Database) -> Option<DocumentableItemId<'db>> {
436        match self {
437            ResolvedGenericItem::GenericConstant(id) => Some(DocumentableItemId::LookupItem(
438                LookupItemId::ModuleItem(ModuleItemId::Constant(id)),
439            )),
440            ResolvedGenericItem::GenericFunction(GenericFunctionId::Free(id)) => {
441                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
442                    ModuleItemId::FreeFunction(id),
443                )))
444            }
445            ResolvedGenericItem::GenericType(GenericTypeId::Struct(id)) => Some(
446                DocumentableItemId::LookupItem(LookupItemId::ModuleItem(ModuleItemId::Struct(id))),
447            ),
448            ResolvedGenericItem::GenericType(GenericTypeId::Enum(id)) => Some(
449                DocumentableItemId::LookupItem(LookupItemId::ModuleItem(ModuleItemId::Enum(id))),
450            ),
451            ResolvedGenericItem::GenericTypeAlias(id) => Some(DocumentableItemId::LookupItem(
452                LookupItemId::ModuleItem(ModuleItemId::TypeAlias(id)),
453            )),
454            ResolvedGenericItem::GenericImplAlias(id) => Some(DocumentableItemId::LookupItem(
455                LookupItemId::ModuleItem(ModuleItemId::ImplAlias(id)),
456            )),
457            ResolvedGenericItem::Trait(id) => Some(DocumentableItemId::LookupItem(
458                LookupItemId::ModuleItem(ModuleItemId::Trait(id)),
459            )),
460            ResolvedGenericItem::Impl(id) => Some(DocumentableItemId::LookupItem(
461                LookupItemId::ModuleItem(ModuleItemId::Impl(id)),
462            )),
463            ResolvedGenericItem::Macro(id) => Some(DocumentableItemId::LookupItem(
464                LookupItemId::ModuleItem(ModuleItemId::MacroDeclaration(id)),
465            )),
466            ResolvedGenericItem::GenericType(GenericTypeId::Extern(id)) => {
467                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
468                    ModuleItemId::ExternType(id),
469                )))
470            }
471            ResolvedGenericItem::GenericFunction(GenericFunctionId::Extern(id)) => {
472                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
473                    ModuleItemId::ExternFunction(id),
474                )))
475            }
476            ResolvedGenericItem::Module(ModuleId::Submodule(id)) => {
477                Some(DocumentableItemId::LookupItem(LookupItemId::ModuleItem(
478                    ModuleItemId::Submodule(id),
479                )))
480            }
481            ResolvedGenericItem::Module(ModuleId::CrateRoot(id)) => {
482                Some(DocumentableItemId::Crate(id))
483            }
484            ResolvedGenericItem::Module(ModuleId::MacroCall { .. }) => None,
485
486            ResolvedGenericItem::Variant(variant) => Some(DocumentableItemId::Variant(variant.id)),
487            ResolvedGenericItem::GenericFunction(GenericFunctionId::Impl(generic_impl_func)) => {
488                if let Some(impl_function) = generic_impl_func.impl_function(db).ok().flatten() {
489                    Some(DocumentableItemId::LookupItem(LookupItemId::ImplItem(
490                        cairo_lang_defs::ids::ImplItemId::Function(impl_function),
491                    )))
492                } else {
493                    Some(DocumentableItemId::LookupItem(LookupItemId::TraitItem(
494                        TraitItemId::Function(generic_impl_func.function),
495                    )))
496                }
497            }
498            ResolvedGenericItem::TraitItem(id) => {
499                Some(DocumentableItemId::LookupItem(LookupItemId::TraitItem(id)))
500            }
501            ResolvedGenericItem::Variable(_) => None,
502        }
503    }
504}
505
506impl fmt::Display for CommentLinkToken<'_> {
507    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
508        match self.path.clone() {
509            Some(path) => write!(f, "[{}]({})", self.label, path),
510            None => write!(f, "[{}]", self.label),
511        }
512    }
513}
514
515impl fmt::Display for DocumentationCommentToken<'_> {
516    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
517        match self {
518            DocumentationCommentToken::Content(content) => {
519                write!(f, "{content}")
520            }
521            DocumentationCommentToken::Link(link_token) => {
522                write!(f, "{link_token}")
523            }
524        }
525    }
526}
527
528impl<'db> DebugWithDb<'db> for CommentLinkToken<'db> {
529    type Db = dyn DocGroup;
530    fn fmt(&self, f: &mut fmt::Formatter<'_>, db: &Self::Db) -> fmt::Result {
531        f.debug_struct("CommentLinkToken")
532            .field("label", &self.label)
533            .field("path", &self.path)
534            .field("resolved_item_name", &self.resolved_item.map(|item| item.name(db).long(db)))
535            .finish()
536    }
537}
538
539/// Maps `HeadingLevel` to correct markdown marker.
540fn heading_level_to_markdown(heading_level: HeadingLevel) -> String {
541    let heading_char: String = String::from("#");
542    match heading_level {
543        HeadingLevel::H1 => heading_char,
544        HeadingLevel::H2 => heading_char.repeat(2),
545        HeadingLevel::H3 => heading_char.repeat(3),
546        HeadingLevel::H4 => heading_char.repeat(4),
547        HeadingLevel::H5 => heading_char.repeat(5),
548        HeadingLevel::H6 => heading_char.repeat(6),
549    }
550}
551
552/// Maps [`Alignment`] to correct markdown markers.
553fn get_alignment_markers(alignment: &Alignment) -> (String, String) {
554    let (left, right) = match alignment {
555        Alignment::None => ("", ""),
556        Alignment::Left => (":", ""),
557        Alignment::Right => ("", ":"),
558        Alignment::Center => (":", ":"),
559    };
560    (left.to_string(), right.to_string())
561}