1use serde::Deserialize;
2use std::rc::Rc;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6use tree_sitter::Node;
7
8use crate::{
9 linter::{range_from_tree_sitter, Context, RuleViolation},
10 rules::{Rule, RuleLinter, RuleType},
11};
12
13#[derive(Debug, PartialEq, Clone, Deserialize)]
15pub enum EmphasisStyle {
16 #[serde(rename = "consistent")]
17 Consistent,
18 #[serde(rename = "asterisk")]
19 Asterisk,
20 #[serde(rename = "underscore")]
21 Underscore,
22}
23
24impl Default for EmphasisStyle {
25 fn default() -> Self {
26 Self::Consistent
27 }
28}
29
30#[derive(Debug, PartialEq, Clone, Deserialize)]
31pub struct MD049EmphasisStyleTable {
32 #[serde(default)]
33 pub style: EmphasisStyle,
34}
35
36impl Default for MD049EmphasisStyleTable {
37 fn default() -> Self {
38 Self {
39 style: EmphasisStyle::Consistent,
40 }
41 }
42}
43
44static ASTERISK_EMPHASIS_REGEX: Lazy<Regex> =
46 Lazy::new(|| Regex::new(r"\*([^*\n]+?)\*").expect("Invalid asterisk emphasis regex"));
47
48static UNDERSCORE_EMPHASIS_REGEX: Lazy<Regex> =
49 Lazy::new(|| Regex::new(r"_([^_\n]+?)_").expect("Invalid underscore emphasis regex"));
50
51static CODE_SPAN_REGEX: Lazy<Regex> =
53 Lazy::new(|| Regex::new(r"`[^`\n]*`").expect("Invalid code span regex"));
54
55#[derive(Debug, Clone, Copy, PartialEq)]
56enum DetectedEmphasisStyle {
57 Asterisk,
58 Underscore,
59}
60
61pub(crate) struct MD049Linter {
62 context: Rc<Context>,
63 violations: Vec<RuleViolation>,
64 document_style: Option<DetectedEmphasisStyle>,
65}
66
67impl MD049Linter {
68 pub fn new(context: Rc<Context>) -> Self {
69 Self {
70 context,
71 violations: Vec::new(),
72 document_style: None,
73 }
74 }
75
76 fn get_configured_style(&self) -> EmphasisStyle {
77 self.context
78 .config
79 .linters
80 .settings
81 .emphasis_style
82 .style
83 .clone()
84 }
85
86 fn is_in_code_context(&self, node: &Node) -> bool {
87 let mut current = Some(*node);
89 while let Some(node_to_check) = current {
90 match node_to_check.kind() {
91 "code_span" | "fenced_code_block" | "indented_code_block" => {
92 return true;
93 }
94 _ => {
95 current = node_to_check.parent();
96 }
97 }
98 }
99 false
100 }
101
102 fn is_intraword_emphasis(
103 &self,
104 _text: &str,
105 start_offset: usize,
106 emphasis_start: usize,
107 emphasis_end: usize,
108 ) -> bool {
109 let emphasis_global_start = start_offset + emphasis_start;
110 let emphasis_global_end = start_offset + emphasis_end;
111 let source = self.context.get_document_content();
112
113 let before_is_word_char = if emphasis_global_start > 0 {
115 if let Some(ch) = source.chars().nth(emphasis_global_start - 1) {
116 ch.is_alphanumeric() || ch == '_'
117 } else {
118 false
119 }
120 } else {
121 false
122 };
123
124 let after_is_word_char = if emphasis_global_end < source.len() {
126 if let Some(ch) = source.chars().nth(emphasis_global_end) {
127 ch.is_alphanumeric() || ch == '_'
128 } else {
129 false
130 }
131 } else {
132 false
133 };
134
135 before_is_word_char || after_is_word_char
136 }
137
138 fn process_emphasis_matches(
139 &mut self,
140 text: &str,
141 start_offset: usize,
142 regex: &Regex,
143 style: DetectedEmphasisStyle,
144 ) {
145 let code_span_ranges: Vec<(usize, usize)> = CODE_SPAN_REGEX
147 .find_iter(text)
148 .map(|m| (m.start(), m.end()))
149 .collect();
150
151 for capture in regex.find_iter(text) {
152 let match_start = capture.start();
153 let match_end = capture.end();
154
155 let in_code_span = code_span_ranges
157 .iter()
158 .any(|(code_start, code_end)| match_start < *code_end && match_end > *code_start);
159
160 if in_code_span {
161 continue; }
163
164 if self.is_intraword_emphasis(text, start_offset, match_start, match_end) {
166 continue;
168 }
169
170 let configured_style = self.get_configured_style();
171 let should_report_violation = match configured_style {
172 EmphasisStyle::Asterisk => style != DetectedEmphasisStyle::Asterisk,
173 EmphasisStyle::Underscore => style != DetectedEmphasisStyle::Underscore,
174 EmphasisStyle::Consistent => {
175 if let Some(doc_style) = self.document_style {
176 style != doc_style
177 } else {
178 self.document_style = Some(style);
180 false }
182 }
183 };
184
185 if should_report_violation {
186 let expected_style = match configured_style {
187 EmphasisStyle::Asterisk => "asterisk",
188 EmphasisStyle::Underscore => "underscore",
189 EmphasisStyle::Consistent => match self.document_style {
190 Some(DetectedEmphasisStyle::Asterisk) => "asterisk",
191 Some(DetectedEmphasisStyle::Underscore) => "underscore",
192 None => "consistent", },
194 };
195
196 let actual_style = match style {
197 DetectedEmphasisStyle::Asterisk => "asterisk",
198 DetectedEmphasisStyle::Underscore => "underscore",
199 };
200
201 let global_start = start_offset + match_start;
203 let global_end = start_offset + match_end;
204
205 let range = tree_sitter::Range {
206 start_byte: global_start,
207 end_byte: global_end,
208 start_point: self.byte_to_point(global_start),
209 end_point: self.byte_to_point(global_end),
210 };
211
212 self.violations.push(RuleViolation::new(
213 &MD049,
214 format!("Expected: {expected_style}; Actual: {actual_style}"),
215 self.context.file_path.clone(),
216 range_from_tree_sitter(&range),
217 ));
218 }
219 }
220 }
221
222 fn find_emphasis_violations_in_text(&mut self, node: &Node) {
223 if self.is_in_code_context(node) {
224 return;
225 }
226
227 let start_byte = node.start_byte();
228 let text = {
229 let source = self.context.get_document_content();
230 source[start_byte..node.end_byte()].to_string()
231 };
232
233 self.process_emphasis_matches(
237 &text,
238 start_byte,
239 &ASTERISK_EMPHASIS_REGEX,
240 DetectedEmphasisStyle::Asterisk,
241 );
242
243 self.process_emphasis_matches(
245 &text,
246 start_byte,
247 &UNDERSCORE_EMPHASIS_REGEX,
248 DetectedEmphasisStyle::Underscore,
249 );
250 }
251
252 fn byte_to_point(&self, byte_pos: usize) -> tree_sitter::Point {
253 let source = self.context.get_document_content();
254 let mut line = 0;
255 let mut column = 0;
256
257 for (i, ch) in source.char_indices() {
258 if i >= byte_pos {
259 break;
260 }
261 if ch == '\n' {
262 line += 1;
263 column = 0;
264 } else {
265 column += 1;
266 }
267 }
268
269 tree_sitter::Point { row: line, column }
270 }
271}
272
273impl RuleLinter for MD049Linter {
274 fn feed(&mut self, node: &Node) {
275 match node.kind() {
276 "text" | "inline" => {
278 self.find_emphasis_violations_in_text(node);
279 }
280 _ => {}
281 }
282 }
283
284 fn finalize(&mut self) -> Vec<RuleViolation> {
285 std::mem::take(&mut self.violations)
286 }
287}
288
289pub const MD049: Rule = Rule {
290 id: "MD049",
291 alias: "emphasis-style",
292 tags: &["emphasis"],
293 description: "Emphasis style",
294 rule_type: RuleType::Token,
295 required_nodes: &["emphasis"],
296 new_linter: |context| Box::new(MD049Linter::new(context)),
297};
298
299#[cfg(test)]
300mod test {
301 use std::path::PathBuf;
302
303 use crate::config::RuleSeverity;
304 use crate::linter::MultiRuleLinter;
305 use crate::test_utils::test_helpers::test_config_with_rules;
306
307 fn test_config() -> crate::config::QuickmarkConfig {
308 test_config_with_rules(vec![("emphasis-style", RuleSeverity::Error)])
309 }
310
311 #[test]
312 fn test_consistent_style_asterisk_should_pass() {
313 let config = test_config();
314 let input = "This has *valid* emphasis and *more* emphasis.";
315
316 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
317 let violations = linter.analyze();
318 let md049_violations: Vec<_> = violations
319 .iter()
320 .filter(|v| v.rule().id == "MD049")
321 .collect();
322 assert_eq!(md049_violations.len(), 0);
323 }
324
325 #[test]
326 fn test_consistent_style_underscore_should_pass() {
327 let config = test_config();
328 let input = "This has _valid_ emphasis and _more_ emphasis.";
329
330 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
331 let violations = linter.analyze();
332 let md049_violations: Vec<_> = violations
333 .iter()
334 .filter(|v| v.rule().id == "MD049")
335 .collect();
336 assert_eq!(md049_violations.len(), 0);
337 }
338
339 #[test]
340 fn test_mixed_styles_should_fail() {
341 let config = test_config();
342 let input = "This has *asterisk* emphasis and _underscore_ emphasis.";
343
344 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
345 let violations = linter.analyze();
346 let md049_violations: Vec<_> = violations
347 .iter()
348 .filter(|v| v.rule().id == "MD049")
349 .collect();
350 assert!(!md049_violations.is_empty());
352 }
353
354 #[test]
355 fn test_intraword_emphasis_should_be_preserved() {
356 let config = test_config();
357 let input = "This has apple*banana*cherry and normal *emphasis* as well.";
358
359 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
360 let violations = linter.analyze();
361 let md049_violations: Vec<_> = violations
362 .iter()
363 .filter(|v| v.rule().id == "MD049")
364 .collect();
365 assert_eq!(md049_violations.len(), 0);
367 }
368
369 #[test]
370 fn test_nested_emphasis_mixed_styles() {
371 let config = test_config();
372 let input = "This paragraph *nests both _kinds_ of emphasis* marker.";
373
374 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
375 let violations = linter.analyze();
376 let md049_violations: Vec<_> = violations
377 .iter()
378 .filter(|v| v.rule().id == "MD049")
379 .collect();
380 assert!(!md049_violations.is_empty());
382 }
383}