1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::emphasis_style::EmphasisStyle;
3use crate::utils::document_structure::DocumentStructure;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use lazy_static::lazy_static;
6use regex::Regex;
7
8lazy_static! {
9 static ref REF_DEF_REGEX: Regex = Regex::new(
11 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
12 ).unwrap();
13}
14
15mod md049_config;
16use md049_config::MD049Config;
17
18#[derive(Debug, Default, Clone)]
28pub struct MD049EmphasisStyle {
29 config: MD049Config,
30}
31
32impl MD049EmphasisStyle {
33 pub fn new(style: EmphasisStyle) -> Self {
35 MD049EmphasisStyle {
36 config: MD049Config { style },
37 }
38 }
39
40 pub fn from_config_struct(config: MD049Config) -> Self {
41 Self { config }
42 }
43
44 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
46 for link in &ctx.links {
48 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
49 return true;
50 }
51 }
52
53 for image in &ctx.images {
55 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
56 return true;
57 }
58 }
59
60 for m in REF_DEF_REGEX.find_iter(ctx.content) {
62 if m.start() <= byte_pos && byte_pos < m.end() {
63 return true;
64 }
65 }
66
67 false
68 }
69
70 fn collect_emphasis_from_line(
72 &self,
73 line: &str,
74 line_num: usize,
75 line_start_pos: usize,
76 emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, ) {
78 let line_no_code = replace_inline_code(line);
80
81 let markers = find_emphasis_markers(&line_no_code);
83 if markers.is_empty() {
84 return;
85 }
86
87 let spans = find_single_emphasis_spans(&line_no_code, markers);
89
90 for span in spans {
91 let marker_char = span.opening.as_char();
92 let col = span.opening.start_pos + 1; let abs_pos = line_start_pos + span.opening.start_pos;
94
95 emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
96 }
97 }
98}
99
100impl Rule for MD049EmphasisStyle {
101 fn name(&self) -> &'static str {
102 "MD049"
103 }
104
105 fn description(&self) -> &'static str {
106 "Emphasis style should be consistent"
107 }
108
109 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
110 let mut warnings = vec![];
111 let content = ctx.content;
112
113 if !content.contains('*') && !content.contains('_') {
115 return Ok(warnings);
116 }
117
118 let structure = DocumentStructure::new(content);
120
121 let mut emphasis_info = vec![];
123
124 let mut abs_pos = 0;
126
127 for (line_idx, line) in content.lines().enumerate() {
128 let line_num = line_idx + 1;
129
130 if structure.is_in_code_block(line_num) || structure.is_in_front_matter(line_num) {
132 abs_pos += line.len() + 1; continue;
134 }
135
136 if !line.contains('*') && !line.contains('_') {
138 abs_pos += line.len() + 1;
139 continue;
140 }
141
142 let line_start = abs_pos;
144 self.collect_emphasis_from_line(line, line_num, line_start, &mut emphasis_info);
145
146 abs_pos += line.len() + 1;
147 }
148
149 emphasis_info.retain(|(_, _, abs_pos, _, _)| !self.is_in_link(ctx, *abs_pos));
151
152 match self.config.style {
153 EmphasisStyle::Consistent => {
154 if emphasis_info.len() < 2 {
156 return Ok(warnings);
157 }
158
159 let target_marker = emphasis_info[0].3;
161
162 for (line_num, col, abs_pos, marker, content) in emphasis_info.iter().skip(1) {
164 if *marker != target_marker {
165 let emphasis_len = 1 + content.len() + 1;
167
168 warnings.push(LintWarning {
169 rule_name: Some(self.name()),
170 line: *line_num,
171 column: *col,
172 end_line: *line_num,
173 end_column: col + emphasis_len,
174 message: format!("Emphasis should use {target_marker} instead of {marker}"),
175 fix: Some(Fix {
176 range: *abs_pos..*abs_pos + emphasis_len,
177 replacement: format!("{target_marker}{content}{target_marker}"),
178 }),
179 severity: Severity::Warning,
180 });
181 }
182 }
183 }
184 EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
185 let (wrong_marker, correct_marker) = match self.config.style {
186 EmphasisStyle::Asterisk => ('_', '*'),
187 EmphasisStyle::Underscore => ('*', '_'),
188 EmphasisStyle::Consistent => {
189 ('_', '*')
192 }
193 };
194
195 for (line_num, col, abs_pos, marker, content) in &emphasis_info {
196 if *marker == wrong_marker {
197 let emphasis_len = 1 + content.len() + 1;
199
200 warnings.push(LintWarning {
201 rule_name: Some(self.name()),
202 line: *line_num,
203 column: *col,
204 end_line: *line_num,
205 end_column: col + emphasis_len,
206 message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
207 fix: Some(Fix {
208 range: *abs_pos..*abs_pos + emphasis_len,
209 replacement: format!("{correct_marker}{content}{correct_marker}"),
210 }),
211 severity: Severity::Warning,
212 });
213 }
214 }
215 }
216 }
217 Ok(warnings)
218 }
219
220 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
221 let warnings = self.check(ctx)?;
223
224 if warnings.is_empty() {
226 return Ok(ctx.content.to_string());
227 }
228
229 let mut fixes: Vec<_> = warnings
231 .iter()
232 .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
233 .collect();
234 fixes.sort_by(|a, b| b.0.cmp(&a.0));
235
236 let mut result = ctx.content.to_string();
238 for (start, end, replacement) in fixes {
239 if start < result.len() && end <= result.len() && start <= end {
240 result.replace_range(start..end, replacement);
241 }
242 }
243
244 Ok(result)
245 }
246
247 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
249 ctx.content.is_empty() || (!ctx.content.contains('*') && !ctx.content.contains('_'))
250 }
251
252 fn as_any(&self) -> &dyn std::any::Any {
253 self
254 }
255
256 fn default_config_section(&self) -> Option<(String, toml::Value)> {
257 let json_value = serde_json::to_value(&self.config).ok()?;
258 Some((
259 self.name().to_string(),
260 crate::rule_config_serde::json_to_toml_value(&json_value)?,
261 ))
262 }
263
264 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
265 where
266 Self: Sized,
267 {
268 let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
269 Box::new(Self::from_config_struct(rule_config))
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 #[test]
278 fn test_name() {
279 let rule = MD049EmphasisStyle::default();
280 assert_eq!(rule.name(), "MD049");
281 }
282
283 #[test]
284 fn test_style_from_str() {
285 assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
286 assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
287 assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
288 }
289
290 #[test]
291 fn test_emphasis_in_links_not_flagged() {
292 let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
293 let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
294
295Also see the [`__init__`][__init__] reference.
296
297This should be _flagged_ since we're using asterisk style.
298
299[__init__]: https://example.com/__init__.py"#;
300 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
301 let result = rule.check(&ctx).unwrap();
302
303 assert_eq!(result.len(), 1);
305 assert!(result[0].message.contains("Emphasis should use * instead of _"));
306 assert!(result[0].line == 5); }
309
310 #[test]
311 fn test_emphasis_in_links_vs_outside_links() {
312 let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
313 let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
314
315[*link*]: https://example.com/*path*"#;
316 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
317 let result = rule.check(&ctx).unwrap();
318
319 assert_eq!(result.len(), 1);
321 assert!(result[0].message.contains("Emphasis should use _ instead of *"));
322 assert!(result[0].line == 1);
324 }
325}