use lazy_static::lazy_static;
use regex::Regex;
use crate::rule::{LintResult, LintWarning, Rule, LintError, Fix};
lazy_static! {
static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
static ref INDENTED_CODE_BLOCK: Regex = Regex::new(r"^(\s{4,})").unwrap();
static ref FRONT_MATTER_DELIM: Regex = Regex::new(r"^---\s*$").unwrap();
static ref ASTERISK_EMPHASIS: Regex = Regex::new(r"\*\s+([^*\s][^*]*?)\s+\*|\*\s+([^*\s][^*]*?)\*|\*([^*\s][^*]*?)\s+\*").unwrap();
static ref DOUBLE_ASTERISK_EMPHASIS: Regex = Regex::new(r"\*\*\s+([^*\s][^*]*?)\s+\*\*|\*\*\s+([^*\s][^*]*?)\*\*|\*\*([^*\s][^*]*?)\s+\*\*").unwrap();
static ref UNDERSCORE_EMPHASIS: Regex = Regex::new(r"_\s+([^_\s][^_]*?)\s+_|_\s+([^_\s][^_]*?)_|_([^_\s][^_]*?)\s+_").unwrap();
static ref DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"__\s+([^_\s][^_]*?)\s+__|__\s+([^_\s][^_]*?)__|__([^_\s][^_]*?)\s+__").unwrap();
static ref UNBALANCED_ASTERISK: Regex = Regex::new(r"\*([^*]+)$|^([^*]*)\*").unwrap();
static ref UNBALANCED_DOUBLE_ASTERISK: Regex = Regex::new(r"\*\*([^*]+)$|^([^*]*)\*\*").unwrap();
static ref UNBALANCED_UNDERSCORE: Regex = Regex::new(r"_([^_]+)$|^([^_]*)_").unwrap();
static ref UNBALANCED_DOUBLE_UNDERSCORE: Regex = Regex::new(r"__([^_]+)$|^([^_]*)__").unwrap();
static ref INLINE_CODE: Regex = Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap();
static ref LIST_MARKER: Regex = Regex::new(r"^\s*[*+-]\s+").unwrap();
static ref VALID_START_EMPHASIS: Regex = Regex::new(r"^(\*\*[^*\s]|\*[^*\s]|__[^_\s]|_[^_\s])").unwrap();
static ref DOC_METADATA_PATTERN: Regex = Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap();
static ref BOLD_TEXT_PATTERN: Regex = Regex::new(r"\*\*[^*]+\*\*").unwrap();
static ref MULTI_LINE_EMPHASIS_START: Regex = Regex::new(r"(\*\*|\*|__|_)([^*_\s].*?)$").unwrap();
static ref MULTI_LINE_EMPHASIS_END: Regex = Regex::new(r"^(.*?)(\*\*|\*|__|_)").unwrap();
}
struct CodeBlockState {
in_fenced_code: bool,
in_alternate_fenced: bool,
in_front_matter: bool,
}
impl CodeBlockState {
fn new() -> Self {
CodeBlockState {
in_fenced_code: false,
in_alternate_fenced: false,
in_front_matter: false,
}
}
fn is_in_code_block(&self, line: &str) -> bool {
if self.in_fenced_code || self.in_alternate_fenced || self.in_front_matter {
return true;
}
INDENTED_CODE_BLOCK.is_match(line)
}
fn update(&mut self, line: &str) {
if FRONT_MATTER_DELIM.is_match(line) {
self.in_front_matter = !self.in_front_matter;
return;
}
if self.in_front_matter {
return;
}
if FENCED_CODE_BLOCK_START.is_match(line) {
self.in_fenced_code = true;
} else if FENCED_CODE_BLOCK_END.is_match(line) && self.in_fenced_code {
self.in_fenced_code = false;
}
if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
self.in_alternate_fenced = true;
} else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && self.in_alternate_fenced {
self.in_alternate_fenced = false;
}
}
}
fn replace_inline_code(line: &str) -> String {
let mut result = line.to_string();
let mut offset = 0;
for cap in INLINE_CODE.captures_iter(line) {
if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
(cap.get(0), cap.get(1), cap.get(2), cap.get(3)) {
let match_start = full_match.start();
let match_end = full_match.end();
let placeholder = " ".repeat(match_end - match_start);
result.replace_range(match_start + offset..match_end + offset, &placeholder);
offset += placeholder.len() - (match_end - match_start);
}
}
result
}
#[derive(Default)]
pub struct MD037SpacesAroundEmphasis;
impl Rule for MD037SpacesAroundEmphasis {
fn name(&self) -> &'static str {
"MD037"
}
fn description(&self) -> &'static str {
"Spaces inside emphasis markers"
}
fn check(&self, content: &str) -> LintResult {
let mut warnings = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let mut code_block_state = CodeBlockState::new();
for (_i, line) in lines.iter().enumerate() {
code_block_state.update(line);
if code_block_state.is_in_code_block(line) {
continue;
}
let line_no_code = replace_inline_code(line);
check_emphasis_patterns(&line_no_code, _i + 1, line, &mut warnings);
}
Ok(warnings)
}
fn fix(&self, content: &str) -> Result<String, LintError> {
let lines: Vec<&str> = content.lines().collect();
let mut fixed_lines = Vec::new();
let mut code_block_state = CodeBlockState::new();
for (_i, line) in lines.iter().enumerate() {
code_block_state.update(line);
if code_block_state.is_in_code_block(line) {
fixed_lines.push(line.to_string());
continue;
}
fixed_lines.push(fix_emphasis_patterns(line));
}
let result = if fixed_lines.is_empty() {
String::new()
} else {
fixed_lines.join("\n")
};
let result = if content.ends_with('\n') {
format!("{}\n", result.trim_end())
} else {
result
};
Ok(result)
}
}
fn check_emphasis_patterns(line: &str, line_num: usize, original_line: &str, warnings: &mut Vec<LintWarning>) {
if LIST_MARKER.is_match(line) {
return;
}
if line.trim_start().starts_with("* *") && line.contains("*:") {
return;
}
if line.trim_start().starts_with("* **") && line.contains("**:") {
return;
}
if DOC_METADATA_PATTERN.is_match(line) {
return;
}
if BOLD_TEXT_PATTERN.is_match(line) {
return;
}
if VALID_START_EMPHASIS.is_match(line) {
let emphasis_start = line.find(' ').unwrap_or(line.len());
if emphasis_start < line.len() {
let rest_of_line = &line[emphasis_start..];
check_emphasis_with_pattern(rest_of_line, &ASTERISK_EMPHASIS, "*", line_num, original_line, warnings);
check_emphasis_with_pattern(rest_of_line, &DOUBLE_ASTERISK_EMPHASIS, "**", line_num, original_line, warnings);
check_emphasis_with_pattern(rest_of_line, &UNDERSCORE_EMPHASIS, "_", line_num, original_line, warnings);
check_emphasis_with_pattern(rest_of_line, &DOUBLE_UNDERSCORE_EMPHASIS, "__", line_num, original_line, warnings);
}
return;
}
check_emphasis_with_pattern(line, &ASTERISK_EMPHASIS, "*", line_num, original_line, warnings);
check_emphasis_with_pattern(line, &DOUBLE_ASTERISK_EMPHASIS, "**", line_num, original_line, warnings);
check_emphasis_with_pattern(line, &UNDERSCORE_EMPHASIS, "_", line_num, original_line, warnings);
check_emphasis_with_pattern(line, &DOUBLE_UNDERSCORE_EMPHASIS, "__", line_num, original_line, warnings);
}
fn check_emphasis_with_pattern(
line: &str,
pattern: &Regex,
marker_type: &str,
line_num: usize,
original_line: &str,
warnings: &mut Vec<LintWarning>
) {
for cap in pattern.captures_iter(line) {
if let Some(m) = cap.get(0) {
if m.start() == 0 && (line.starts_with('*') || line.starts_with("**")) {
continue;
}
let actual_start = find_actual_position(original_line, m.start());
warnings.push(LintWarning {
line: line_num,
column: actual_start + 1,
message: format!("Spaces inside {} emphasis markers", marker_type),
fix: Some(Fix {
line: line_num,
column: actual_start + 1,
replacement: fix_specific_emphasis_section(original_line, m.start(), m.end()),
}),
});
}
}
}
fn find_actual_position(original_line: &str, position_in_processed: usize) -> usize {
let mut in_code = false;
let mut backtick_count = 0;
let mut processed_pos = 0;
for (i, c) in original_line.chars().enumerate() {
if c == '`' {
backtick_count += 1;
if backtick_count == 1 {
in_code = !in_code;
} else if backtick_count > 1 && !in_code {
in_code = true;
backtick_count = 0;
} else if backtick_count > 1 && in_code {
in_code = false;
backtick_count = 0;
}
} else {
backtick_count = 0;
if !in_code {
processed_pos += 1;
}
if processed_pos > position_in_processed {
return i;
}
}
}
position_in_processed.min(original_line.len())
}
fn fix_specific_emphasis_section(line: &str, start_approx: usize, end_approx: usize) -> String {
let section = &line[start_approx.min(line.len())..end_approx.min(line.len())];
if section.starts_with("**") && section.ends_with("**") {
let content = section.trim_start_matches("**").trim_end_matches("**").trim();
return format!("**{}**", content);
} else if section.starts_with('*') && section.ends_with('*') {
let content = section.trim_start_matches('*').trim_end_matches('*').trim();
return format!("*{}*", content);
} else if section.starts_with("__") && section.ends_with("__") {
let content = section.trim_start_matches("__").trim_end_matches("__").trim();
return format!("__{}__", content);
} else if section.starts_with('_') && section.ends_with('_') {
let content = section.trim_start_matches('_').trim_end_matches('_').trim();
return format!("_{}_", content);
}
fix_emphasis_patterns(line)
}
fn fix_emphasis_patterns(line: &str) -> String {
let (line_no_code, code_spans) = extract_code_spans(line);
let mut result = line_no_code;
result = ASTERISK_EMPHASIS.replace_all(&result, |caps: ®ex::Captures| {
for i in 1..4 {
if let Some(m) = caps.get(i) {
return format!("*{}*", m.as_str());
}
}
caps.get(0).map_or("", |m| m.as_str()).to_string()
}).to_string();
result = DOUBLE_ASTERISK_EMPHASIS.replace_all(&result, |caps: ®ex::Captures| {
for i in 1..4 {
if let Some(m) = caps.get(i) {
return format!("**{}**", m.as_str());
}
}
caps.get(0).map_or("", |m| m.as_str()).to_string()
}).to_string();
result = UNDERSCORE_EMPHASIS.replace_all(&result, |caps: ®ex::Captures| {
for i in 1..4 {
if let Some(m) = caps.get(i) {
return format!("_{}_", m.as_str());
}
}
caps.get(0).map_or("", |m| m.as_str()).to_string()
}).to_string();
result = DOUBLE_UNDERSCORE_EMPHASIS.replace_all(&result, |caps: ®ex::Captures| {
for i in 1..4 {
if let Some(m) = caps.get(i) {
return format!("__{}__", m.as_str());
}
}
caps.get(0).map_or("", |m| m.as_str()).to_string()
}).to_string();
restore_code_spans(result, code_spans)
}
fn extract_code_spans(line: &str) -> (String, Vec<(String, String)>) {
let mut result = line.to_string();
let mut code_spans = Vec::new();
let mut positions = Vec::new();
for (i, cap) in INLINE_CODE.captures_iter(line).enumerate() {
if let Some(m) = cap.get(0) {
let code_span = line[m.start()..m.end()].to_string();
let placeholder = format!("CODE_SPAN_{}", i);
code_spans.push((placeholder.clone(), code_span));
positions.push((m.start(), m.end(), placeholder));
}
}
positions.sort_by(|a, b| b.0.cmp(&a.0));
for (start, end, placeholder) in positions {
if start < result.len() && end <= result.len() {
result.replace_range(start..end, &placeholder);
}
}
(result, code_spans)
}
fn restore_code_spans(mut content: String, code_spans: Vec<(String, String)>) -> String {
for (placeholder, code_span) in code_spans {
content = content.replace(&placeholder, &code_span);
}
content
}