1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
use rustc_lint::{LateContext, LateLintPass, LintStore};
use rustc_session::{declare_tool_lint, impl_lint_pass};
use rustc_span::Span;
use crate::comment_walk::{CommentChunk, CommentSurface, walk_local_comments};
use crate::common::{DefaultState, resolved_state};
use crate::enclosing_hir::emit_at_enclosing_hir;
use crate::literal_scan::emit_flagged_char_hir;
use crate::markdown::{position_in_skip, scan_code_regions};
declare_tool_lint! {
/// ### What it does
///
/// Forbids U+2026 HORIZONTAL ELLIPSIS (`…`) in doc comments —
/// `///` and `//!` line forms and the `/** */` / `/*! */` block
/// forms. Prefer the three-ASCII-dot form `...`. Regular `//` and
/// `/* */` comments are covered by a sibling lint
/// (`perfectionist::unicode_ellipsis_in_comments`).
///
/// ### Why restrict this?
///
/// This is a stylistic preference, not a correctness issue.
/// ASCII `...` survives every encoding round-trip, every terminal,
/// every copy-paste, every `grep` invocation, and every `git diff`
/// viewer without rendering as `?` or a tofu box. The visual
/// difference between `…` and `...` is small enough that the
/// Unicode form usually arrives by accident — autocorrect, an IDE
/// smart-quote setting — rather than as a deliberate choice in
/// technical writing.
///
/// ### Example
///
/// **Avoid:**
///
/// ```rust,ignore
/// /// Walk the tree, collecting sizes…
/// ```
///
/// **Prefer:**
///
/// ```rust,ignore
/// /// Walk the tree, collecting sizes...
/// ```
#[cfg_attr(
dylint_lib = "perfectionist",
expect(
perfectionist::unicode_ellipsis_in_docs,
reason = "this rule's own rustdoc names the U+2026 glyph it governs"
)
)]
pub perfectionist::UNICODE_ELLIPSIS_IN_DOCS,
Warn,
"U+2026 HORIZONTAL ELLIPSIS in doc comments; prefer `...`",
report_in_external_macro: false
}
const CONFIG_KEY: &str = "perfectionist::unicode_ellipsis_in_docs";
#[derive(Debug, Default, serde::Deserialize)]
#[serde(default, deny_unknown_fields, rename_all = "snake_case")]
struct Config {
/// Extra characters to flag alongside U+2026. Useful for catching
/// near-relatives such as U+22EF MIDLINE HORIZONTAL ELLIPSIS (`⋯`)
/// or U+2025 TWO DOT LEADER (`‥`) that the same autocorrect
/// pipelines occasionally insert. Empty by default.
extra_flagged_chars: Vec<char>,
/// Whether to also flag a character inside an inline code span
/// (`` `...` ``). Defaults to `false`: code spans often quote example
/// text where the ellipsis is meaningful, so they are left alone
/// unless this is set to `true`. Code *blocks* — fenced
/// (` ``` ... ``` `), `~~~`-fenced, four-space indented, and the
/// doc-test code they hold — are always skipped regardless of this
/// knob.
scan_code_spans: bool,
}
pub struct UnicodeEllipsisInDocs {
flagged_chars: Vec<char>,
scan_code_spans: bool,
}
impl UnicodeEllipsisInDocs {
fn new() -> Self {
let config: Config = dylint_linting::config_or_default(CONFIG_KEY);
let mut flagged_chars = vec!['\u{2026}'];
for character in config.extra_flagged_chars {
if !flagged_chars.contains(&character) {
flagged_chars.push(character);
}
}
Self {
flagged_chars,
scan_code_spans: config.scan_code_spans,
}
}
}
impl_lint_pass!(UnicodeEllipsisInDocs => [UNICODE_ELLIPSIS_IN_DOCS]);
pub fn register_lint(lint_store: &mut LintStore) {
lint_store.register_lints(&[UNICODE_ELLIPSIS_IN_DOCS]);
}
pub fn register_pass(lint_store: &mut LintStore) {
if let DefaultState::Inactive = resolved_state("unicode_ellipsis_in_docs", DefaultState::Active)
{
return;
}
lint_store.register_late_pass(|_| Box::new(UnicodeEllipsisInDocs::new()));
}
impl<'tcx> LateLintPass<'tcx> for UnicodeEllipsisInDocs {
fn check_crate_post(&mut self, lint_context: &LateContext<'tcx>) {
let mut violations: Vec<(Span, char)> = Vec::new();
walk_local_comments(lint_context, |chunk| match chunk.surface {
CommentSurface::DocBlock | CommentSurface::DocBlockBlock => {
self.collect_doc_chunk(chunk, &mut violations);
}
CommentSurface::PlainLine | CommentSurface::PlainBlock => {}
});
emit_at_enclosing_hir(lint_context.tcx, violations, |hir_id, span, character| {
emit_flagged_char_hir(
lint_context,
UNICODE_ELLIPSIS_IN_DOCS,
hir_id,
character,
span,
"doc comment",
);
});
}
}
impl UnicodeEllipsisInDocs {
fn collect_doc_chunk(&self, chunk: &CommentChunk<'_>, out: &mut Vec<(Span, char)>) {
// Code spans join the skip mask unless the user opts into
// scanning them; code blocks are always masked.
let skips = scan_code_regions(&chunk.rendered, !self.scan_code_spans);
for (byte_offset, character) in chunk.rendered.char_indices() {
if !self.flagged_chars.contains(&character) {
continue;
}
if position_in_skip(&skips, byte_offset) {
continue;
}
// A flagged character always lands inside a content line —
// never the synthesised `\n` between joined `///` lines —
// so `span_for` returns `Some`; the guard is defensive.
let Some(span) = chunk.span_for(byte_offset, character.len_utf8() as u32) else {
continue;
};
out.push((span, character));
}
}
}