1pub mod config;
2pub mod init;
3pub mod lint_context;
4pub mod markdownlint_config;
5pub mod profiling;
6pub mod rule;
7pub mod rules;
8pub mod utils;
9
10#[cfg(feature = "python")]
11pub mod python;
12
13pub use rules::heading_utils::{Heading, HeadingStyle};
14pub use rules::*;
15
16pub use crate::lint_context::LintContext;
17use crate::rule::{LintResult, Rule, RuleCategory};
18use crate::utils::document_structure::DocumentStructure;
19use std::time::Instant;
20
21#[derive(Debug, Default)]
23struct ContentCharacteristics {
24 has_headings: bool, has_lists: bool, has_links: bool, has_code: bool, has_emphasis: bool, has_html: bool, has_tables: bool, has_blockquotes: bool, has_images: bool, line_count: usize,
34}
35
36impl ContentCharacteristics {
37 fn analyze(content: &str) -> Self {
38 let mut chars = Self::default();
39 chars.line_count = content.lines().count();
40
41 let mut has_atx_heading = false;
43 let mut has_setext_heading = false;
44
45 for line in content.lines() {
46 let trimmed = line.trim();
47
48 if !has_atx_heading && trimmed.starts_with('#') {
50 has_atx_heading = true;
51 }
52 if !has_setext_heading && (trimmed.chars().all(|c| c == '=' || c == '-') && trimmed.len() > 1) {
53 has_setext_heading = true;
54 }
55
56 if !chars.has_lists && (line.contains("* ") || line.contains("- ") || line.contains("+ ")) {
58 chars.has_lists = true;
59 }
60 if !chars.has_lists && line.chars().next().map_or(false, |c| c.is_ascii_digit()) && line.contains(". ") {
61 chars.has_lists = true;
62 }
63 if !chars.has_links && line.contains('[') {
64 chars.has_links = true;
65 }
66 if !chars.has_images && line.contains("![") {
67 chars.has_images = true;
68 }
69 if !chars.has_code && (line.contains('`') || line.contains("~~~")) {
70 chars.has_code = true;
71 }
72 if !chars.has_emphasis && (line.contains('*') || line.contains('_')) {
73 chars.has_emphasis = true;
74 }
75 if !chars.has_html && line.contains('<') {
76 chars.has_html = true;
77 }
78 if !chars.has_tables && line.contains('|') {
79 chars.has_tables = true;
80 }
81 if !chars.has_blockquotes && line.starts_with('>') {
82 chars.has_blockquotes = true;
83 }
84 }
85
86 chars.has_headings = has_atx_heading || has_setext_heading;
87 chars
88 }
89
90 fn should_skip_rule(&self, rule: &dyn Rule) -> bool {
92 match rule.category() {
93 RuleCategory::Heading => !self.has_headings,
94 RuleCategory::List => !self.has_lists,
95 RuleCategory::Link => !self.has_links && !self.has_images,
96 RuleCategory::Image => !self.has_images,
97 RuleCategory::CodeBlock => !self.has_code,
98 RuleCategory::Html => !self.has_html,
99 RuleCategory::Emphasis => !self.has_emphasis,
100 RuleCategory::Blockquote => !self.has_blockquotes,
101 RuleCategory::Table => !self.has_tables,
102 RuleCategory::Whitespace | RuleCategory::FrontMatter | RuleCategory::Other => false,
104 }
105 }
106}
107
108pub fn lint(content: &str, rules: &[Box<dyn Rule>], _verbose: bool) -> LintResult {
112 let mut warnings = Vec::new();
113 let _overall_start = Instant::now();
114
115 if content.is_empty() {
117 return Ok(warnings);
118 }
119
120 let characteristics = ContentCharacteristics::analyze(content);
122
123 let applicable_rules: Vec<_> = rules
125 .iter()
126 .filter(|rule| !characteristics.should_skip_rule(rule.as_ref()))
127 .collect();
128
129 let _total_rules = rules.len();
131 let _applicable_count = applicable_rules.len();
132
133 let structure = DocumentStructure::new(content);
135
136 let ast_rules_count = applicable_rules.iter().filter(|rule| rule.uses_ast()).count();
138 let ast = if ast_rules_count > 0 {
139 Some(crate::utils::ast_utils::get_cached_ast(content))
140 } else {
141 None
142 };
143
144 let lint_ctx = crate::lint_context::LintContext::new(content);
146
147 for rule in applicable_rules {
148 let _rule_start = Instant::now();
149
150 let result = if rule.uses_ast() && ast.is_some() {
152 rule.as_maybe_ast()
154 .and_then(|ext| ext.check_with_ast_opt(&lint_ctx, ast.as_ref().unwrap()))
155 .unwrap_or_else(|| rule.check_with_ast(&lint_ctx, ast.as_ref().unwrap()))
156 } else {
157 rule.as_maybe_document_structure()
159 .and_then(|ext| ext.check_with_structure_opt(&lint_ctx, &structure))
160 .unwrap_or_else(|| rule.check(&lint_ctx))
161 };
162
163 match result {
164 Ok(rule_warnings) => {
165 warnings.extend(rule_warnings);
166 }
167 Err(e) => {
168 log::error!("Error checking rule {}: {}", rule.name(), e);
169 return Err(e);
170 }
171 }
172
173 #[cfg(not(test))]
174 if _verbose {
175 let rule_duration = _rule_start.elapsed();
176 if rule_duration.as_millis() > 500 {
177 log::debug!("Rule {} took {:?}", rule.name(), rule_duration);
178 }
179 }
180 }
181
182 #[cfg(not(test))]
183 if _verbose {
184 let skipped_rules = _total_rules - _applicable_count;
185 if skipped_rules > 0 {
186 log::debug!("Skipped {} of {} rules based on content analysis", skipped_rules, _total_rules);
187 }
188 if ast.is_some() {
189 log::debug!("Used shared AST for {} rules", ast_rules_count);
190 }
191 }
192
193 Ok(warnings)
194}
195
196pub fn get_profiling_report() -> String {
198 profiling::get_report()
199}
200
201pub fn reset_profiling() {
203 profiling::reset()
204}
205
206pub fn get_regex_cache_stats() -> std::collections::HashMap<String, u64> {
208 crate::utils::regex_cache::get_cache_stats()
209}
210
211pub fn get_ast_cache_stats() -> std::collections::HashMap<u64, u64> {
213 crate::utils::ast_utils::get_ast_cache_stats()
214}
215
216pub fn clear_all_caches() {
218 crate::utils::ast_utils::clear_ast_cache();
219 }
221
222pub fn get_cache_performance_report() -> String {
224 let regex_stats = get_regex_cache_stats();
225 let ast_stats = get_ast_cache_stats();
226
227 let mut report = String::new();
228
229 report.push_str("=== Cache Performance Report ===\n\n");
230
231 report.push_str("Regex Cache:\n");
233 if regex_stats.is_empty() {
234 report.push_str(" No regex patterns cached\n");
235 } else {
236 let total_usage: u64 = regex_stats.values().sum();
237 report.push_str(&format!(" Total patterns: {}\n", regex_stats.len()));
238 report.push_str(&format!(" Total usage: {}\n", total_usage));
239
240 let mut sorted_patterns: Vec<_> = regex_stats.iter().collect();
242 sorted_patterns.sort_by(|a, b| b.1.cmp(a.1));
243
244 report.push_str(" Top patterns by usage:\n");
245 for (pattern, count) in sorted_patterns.iter().take(5) {
246 let truncated_pattern = if pattern.len() > 50 {
247 format!("{}...", &pattern[..47])
248 } else {
249 pattern.to_string()
250 };
251 report.push_str(&format!(" {} ({}x): {}\n", count, pattern.len().min(50), truncated_pattern));
252 }
253 }
254
255 report.push_str("\n");
256
257 report.push_str("AST Cache:\n");
259 if ast_stats.is_empty() {
260 report.push_str(" No AST nodes cached\n");
261 } else {
262 let total_usage: u64 = ast_stats.values().sum();
263 report.push_str(&format!(" Total ASTs: {}\n", ast_stats.len()));
264 report.push_str(&format!(" Total usage: {}\n", total_usage));
265
266 if total_usage > ast_stats.len() as u64 {
267 let cache_hit_rate = ((total_usage - ast_stats.len() as u64) as f64 / total_usage as f64) * 100.0;
268 report.push_str(&format!(" Cache hit rate: {:.1}%\n", cache_hit_rate));
269 }
270 }
271
272 report
273}