prost_build/
ast.rs

1use once_cell::sync::Lazy;
2use prost_types::source_code_info::Location;
3#[cfg(feature = "cleanup-markdown")]
4use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
5use regex::Regex;
6
7/// Comments on a Protobuf item.
8#[derive(Debug, Default, Clone)]
9pub struct Comments {
10    /// Leading detached blocks of comments.
11    pub leading_detached: Vec<Vec<String>>,
12
13    /// Leading comments.
14    pub leading: Vec<String>,
15
16    /// Trailing comments.
17    pub trailing: Vec<String>,
18}
19
20impl Comments {
21    pub(crate) fn from_location(location: &Location) -> Comments {
22        let leading_detached = location
23            .leading_detached_comments
24            .iter()
25            .map(get_lines)
26            .collect();
27        let leading = location
28            .leading_comments
29            .as_ref()
30            .map_or(Vec::new(), get_lines);
31        let trailing = location
32            .trailing_comments
33            .as_ref()
34            .map_or(Vec::new(), get_lines);
35        Comments {
36            leading_detached,
37            leading,
38            trailing,
39        }
40    }
41
42    /// Appends the comments to a buffer with indentation.
43    ///
44    /// Each level of indentation corresponds to four space (' ') characters.
45    pub fn append_with_indent(&self, indent_level: u8, buf: &mut String) {
46        // Append blocks of detached comments.
47        for detached_block in &self.leading_detached {
48            for line in detached_block {
49                for _ in 0..indent_level {
50                    buf.push_str("    ");
51                }
52                buf.push_str("//");
53                buf.push_str(&Self::sanitize_line(line));
54                buf.push('\n');
55            }
56            buf.push('\n');
57        }
58
59        // Append leading comments.
60        for line in &self.leading {
61            for _ in 0..indent_level {
62                buf.push_str("    ");
63            }
64            buf.push_str("///");
65            buf.push_str(&Self::sanitize_line(line));
66            buf.push('\n');
67        }
68
69        // Append an empty comment line if there are leading and trailing comments.
70        if !self.leading.is_empty() && !self.trailing.is_empty() {
71            for _ in 0..indent_level {
72                buf.push_str("    ");
73            }
74            buf.push_str("///\n");
75        }
76
77        // Append trailing comments.
78        for line in &self.trailing {
79            for _ in 0..indent_level {
80                buf.push_str("    ");
81            }
82            buf.push_str("///");
83            buf.push_str(&Self::sanitize_line(line));
84            buf.push('\n');
85        }
86    }
87
88    /// Checks whether a RustDoc line should be indented.
89    ///
90    /// Lines should be indented if:
91    /// - they are non-empty, AND
92    ///   - they don't already start with a space
93    ///     OR
94    ///   - they start with several spaces.
95    ///
96    /// The last condition can happen in the case of multi-line Markdown lists
97    /// such as:
98    ///
99    /// - this is a list
100    ///   where some elements spans multiple lines
101    /// - but not all elements
102    fn should_indent(sanitized_line: &str) -> bool {
103        let mut chars = sanitized_line.chars();
104        chars
105            .next()
106            .is_some_and(|c| c != ' ' || chars.next() == Some(' '))
107    }
108
109    /// Sanitizes the line for rustdoc by performing the following operations:
110    ///     - escape urls as <http://foo.com>
111    ///     - escape `[` & `]` if not already escaped and not followed by a parenthesis or bracket
112    fn sanitize_line(line: &str) -> String {
113        static RULE_URL: Lazy<Regex> = Lazy::new(|| Regex::new(r"https?://[^\s)]+").unwrap());
114        static RULE_BRACKETS: Lazy<Regex> =
115            Lazy::new(|| Regex::new(r"(^|[^\]\\])\[(([^\]]*[^\\])?)\]([^(\[]|$)").unwrap());
116
117        let mut s = RULE_URL.replace_all(line, r"<$0>").to_string();
118        s = RULE_BRACKETS.replace_all(&s, r"$1\[$2\]$4").to_string();
119        if Self::should_indent(&s) {
120            s.insert(0, ' ');
121        }
122        s
123    }
124}
125
126/// A service descriptor.
127#[derive(Debug, Clone)]
128pub struct Service {
129    /// The service name in Rust style.
130    pub name: String,
131    /// The service name as it appears in the .proto file.
132    pub proto_name: String,
133    /// The package name as it appears in the .proto file.
134    pub package: String,
135    /// The service comments.
136    pub comments: Comments,
137    /// The service methods.
138    pub methods: Vec<Method>,
139    /// The service options.
140    pub options: prost_types::ServiceOptions,
141}
142
143/// A service method descriptor.
144#[derive(Debug, Clone)]
145pub struct Method {
146    /// The name of the method in Rust style.
147    pub name: String,
148    /// The name of the method as it appears in the .proto file.
149    pub proto_name: String,
150    /// The method comments.
151    pub comments: Comments,
152    /// The input Rust type.
153    pub input_type: String,
154    /// The output Rust type.
155    pub output_type: String,
156    /// The input Protobuf type.
157    pub input_proto_type: String,
158    /// The output Protobuf type.
159    pub output_proto_type: String,
160    /// The method options.
161    pub options: prost_types::MethodOptions,
162    /// Identifies if client streams multiple client messages.
163    pub client_streaming: bool,
164    /// Identifies if server streams multiple server messages.
165    pub server_streaming: bool,
166}
167
168#[cfg(not(feature = "cleanup-markdown"))]
169fn get_lines<S>(comments: S) -> Vec<String>
170where
171    S: AsRef<str>,
172{
173    comments.as_ref().lines().map(str::to_owned).collect()
174}
175
176#[cfg(feature = "cleanup-markdown")]
177fn get_lines<S>(comments: S) -> Vec<String>
178where
179    S: AsRef<str>,
180{
181    let comments = comments.as_ref();
182    let mut buffer = String::with_capacity(comments.len() + 256);
183    let opts = pulldown_cmark_to_cmark::Options {
184        code_block_token_count: 3,
185        ..Default::default()
186    };
187    match pulldown_cmark_to_cmark::cmark_with_options(
188        Parser::new_ext(comments, Options::all() - Options::ENABLE_SMART_PUNCTUATION).map(
189            |event| {
190                fn map_codeblock(kind: CodeBlockKind) -> CodeBlockKind {
191                    match kind {
192                        CodeBlockKind::Fenced(s) => {
193                            if s.as_ref() == "rust" {
194                                CodeBlockKind::Fenced("compile_fail".into())
195                            } else {
196                                CodeBlockKind::Fenced(format!("text,{}", s).into())
197                            }
198                        }
199                        CodeBlockKind::Indented => CodeBlockKind::Fenced("text".into()),
200                    }
201                }
202                match event {
203                    Event::Start(Tag::CodeBlock(kind)) => {
204                        Event::Start(Tag::CodeBlock(map_codeblock(kind)))
205                    }
206                    e => e,
207                }
208            },
209        ),
210        &mut buffer,
211        opts,
212    ) {
213        Ok(_) => buffer.lines().map(str::to_owned).collect(),
214        Err(_) => comments.lines().map(str::to_owned).collect(),
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    #[test]
223    fn test_comment_append_with_indent_leaves_prespaced_lines() {
224        struct TestCases {
225            name: &'static str,
226            input: String,
227            expected: String,
228        }
229
230        let tests = vec![
231            TestCases {
232                name: "existing_space",
233                input: " A line with a single leading space.".to_string(),
234                expected: "/// A line with a single leading space.\n".to_string(),
235            },
236            TestCases {
237                name: "non_existing_space",
238                input: "A line without a single leading space.".to_string(),
239                expected: "/// A line without a single leading space.\n".to_string(),
240            },
241            TestCases {
242                name: "empty",
243                input: "".to_string(),
244                expected: "///\n".to_string(),
245            },
246            TestCases {
247                name: "multiple_leading_spaces",
248                input: "  a line with several leading spaces, such as in a markdown list"
249                    .to_string(),
250                expected: "///   a line with several leading spaces, such as in a markdown list\n"
251                    .to_string(),
252            },
253        ];
254        for t in tests {
255            let input = Comments {
256                leading_detached: vec![],
257                leading: vec![],
258                trailing: vec![t.input],
259            };
260
261            let mut actual = "".to_string();
262            input.append_with_indent(0, &mut actual);
263
264            assert_eq!(t.expected, actual, "failed {}", t.name);
265        }
266    }
267
268    #[test]
269    fn test_comment_append_with_indent_sanitizes_comment_doc_url() {
270        struct TestCases {
271            name: &'static str,
272            input: String,
273            expected: String,
274        }
275
276        let tests = vec![
277            TestCases {
278                name: "valid_http",
279                input: "See https://www.rust-lang.org/".to_string(),
280                expected: "/// See <https://www.rust-lang.org/>\n".to_string(),
281            },
282            TestCases {
283                name: "valid_https",
284                input: "See https://www.rust-lang.org/".to_string(),
285                expected: "/// See <https://www.rust-lang.org/>\n".to_string(),
286            },
287            TestCases {
288                name: "valid_https_parenthesis",
289                input: "See (https://www.rust-lang.org/)".to_string(),
290                expected: "/// See (<https://www.rust-lang.org/>)\n".to_string(),
291            },
292            TestCases {
293                name: "invalid",
294                input: "See note://abc".to_string(),
295                expected: "/// See note://abc\n".to_string(),
296            },
297        ];
298        for t in tests {
299            let input = Comments {
300                leading_detached: vec![],
301                leading: vec![],
302                trailing: vec![t.input],
303            };
304
305            let mut actual = "".to_string();
306            input.append_with_indent(0, &mut actual);
307
308            assert_eq!(t.expected, actual, "failed {}", t.name);
309        }
310    }
311
312    #[test]
313    fn test_comment_append_with_indent_sanitizes_square_brackets() {
314        struct TestCases {
315            name: &'static str,
316            input: String,
317            expected: String,
318        }
319
320        let tests = vec![
321            TestCases {
322                name: "valid_brackets",
323                input: "foo [bar] baz".to_string(),
324                expected: "/// foo \\[bar\\] baz\n".to_string(),
325            },
326            TestCases {
327                name: "invalid_start_bracket",
328                input: "foo [= baz".to_string(),
329                expected: "/// foo [= baz\n".to_string(),
330            },
331            TestCases {
332                name: "invalid_end_bracket",
333                input: "foo =] baz".to_string(),
334                expected: "/// foo =] baz\n".to_string(),
335            },
336            TestCases {
337                name: "invalid_bracket_combination",
338                input: "[0, 9)".to_string(),
339                expected: "/// [0, 9)\n".to_string(),
340            },
341            TestCases {
342                name: "valid_brackets_parenthesis",
343                input: "foo [bar](bar) baz".to_string(),
344                expected: "/// foo [bar](bar) baz\n".to_string(),
345            },
346            TestCases {
347                name: "valid_brackets_end",
348                input: "foo [bar]".to_string(),
349                expected: "/// foo \\[bar\\]\n".to_string(),
350            },
351            TestCases {
352                name: "valid_brackets_no_parenthesis",
353                input: "foo [bar]baz".to_string(),
354                expected: "/// foo \\[bar\\]baz\n".to_string(),
355            },
356            TestCases {
357                name: "valid_empty_brackets",
358                input: "foo []".to_string(),
359                expected: "/// foo \\[\\]\n".to_string(),
360            },
361            TestCases {
362                name: "valid_empty_brackets_parenthesis",
363                input: "foo []()".to_string(),
364                expected: "/// foo []()\n".to_string(),
365            },
366            TestCases {
367                name: "valid_brackets_brackets",
368                input: "foo [bar][bar] baz".to_string(),
369                expected: "/// foo [bar][bar] baz\n".to_string(),
370            },
371            TestCases {
372                name: "valid_brackets_brackets_end",
373                input: "foo [bar][baz]".to_string(),
374                expected: "/// foo [bar][baz]\n".to_string(),
375            },
376            TestCases {
377                name: "valid_brackets_brackets_all",
378                input: "[bar][baz]".to_string(),
379                expected: "/// [bar][baz]\n".to_string(),
380            },
381            TestCases {
382                name: "escaped_brackets",
383                input: "\\[bar\\]\\[baz\\]".to_string(),
384                expected: "/// \\[bar\\]\\[baz\\]\n".to_string(),
385            },
386            TestCases {
387                name: "escaped_empty_brackets",
388                input: "\\[\\]\\[\\]".to_string(),
389                expected: "/// \\[\\]\\[\\]\n".to_string(),
390            },
391        ];
392        for t in tests {
393            let input = Comments {
394                leading_detached: vec![],
395                leading: vec![],
396                trailing: vec![t.input],
397            };
398
399            let mut actual = "".to_string();
400            input.append_with_indent(0, &mut actual);
401
402            assert_eq!(t.expected, actual, "failed {}", t.name);
403        }
404    }
405
406    #[test]
407    fn test_codeblocks() {
408        struct TestCase {
409            name: &'static str,
410            input: &'static str,
411            #[allow(unused)]
412            cleanedup_expected: Vec<&'static str>,
413        }
414
415        let tests = vec![
416            TestCase {
417                name: "unlabelled_block",
418                input: "    thingy\n",
419                cleanedup_expected: vec!["", "```text", "thingy", "```"],
420            },
421            TestCase {
422                name: "rust_block",
423                input: "```rust\nfoo.bar()\n```\n",
424                cleanedup_expected: vec!["", "```compile_fail", "foo.bar()", "```"],
425            },
426            TestCase {
427                name: "js_block",
428                input: "```javascript\nfoo.bar()\n```\n",
429                cleanedup_expected: vec!["", "```text,javascript", "foo.bar()", "```"],
430            },
431        ];
432
433        for t in tests {
434            let loc = Location {
435                path: vec![],
436                span: vec![],
437                leading_comments: Some(t.input.into()),
438                trailing_comments: None,
439                leading_detached_comments: vec![],
440            };
441            let comments = Comments::from_location(&loc);
442            #[cfg(feature = "cleanup-markdown")]
443            let expected = t.cleanedup_expected;
444            #[cfg(not(feature = "cleanup-markdown"))]
445            let expected: Vec<&str> = t.input.lines().collect();
446            assert_eq!(expected, comments.leading, "failed {}", t.name);
447        }
448    }
449}