1use glob::glob;
2use pulldown_cmark::{Event, Options, Parser};
3use serde::Serialize;
4use std::collections::HashSet;
5use std::fs;
6use std::path::{Path, PathBuf};
7use thiserror::Error;
8
9#[derive(Debug, Error)]
10pub enum CheckError {
11 #[error("IO error: {0}")]
12 Io(#[from] std::io::Error),
13 #[error("Invalid UTF-8 in file: {0}")]
14 Utf8Error(#[from] std::string::FromUtf8Error),
15}
16
17#[derive(Debug, Clone, PartialEq)] pub struct CheckConfig {
19 pub recursive: bool,
20 pub output_format: OutputFormat,
21 pub strict: bool,
22 pub ignore_warnings: bool,
23}
24
25#[derive(Debug, Clone, PartialEq)] pub enum OutputFormat {
27 Human,
28 Json,
29}
30
31#[derive(Debug, Serialize)]
32pub struct Issue {
33 pub line: usize,
34 pub message: String,
35 pub context: Option<String>,
36}
37
38#[derive(Debug, Serialize)]
39pub struct CheckResult {
40 pub file_path: PathBuf,
41 pub errors: Vec<Issue>,
42 pub warnings: Vec<Issue>,
43}
44
45pub fn check_files(paths: &[PathBuf], config: &CheckConfig) -> Vec<CheckResult> {
46 let mut all_files = Vec::new();
47
48 for path in paths {
49 if path.is_dir() {
50 if config.recursive {
51 let pattern = path.join("**/*.md").to_string_lossy().to_string();
52 if let Ok(entries) = glob(&pattern) {
53 for entry in entries.flatten() {
54 all_files.push(entry);
55 }
56 }
57 } else {
58 if let Ok(entries) = fs::read_dir(path) {
59 for entry in entries.flatten() {
60 let file_path = entry.path();
61 if file_path
62 .extension()
63 .map_or(false, |ext| ext == "md" || ext == "markdown")
64 {
65 all_files.push(file_path);
66 }
67 }
68 }
69 }
70 } else if path.is_file() {
71 all_files.push(path.clone());
72 }
73 }
74
75 all_files
76 .iter()
77 .map(|file_path| check_file(file_path, config))
78 .collect()
79}
80
81pub fn check_file(file_path: &Path, _config: &CheckConfig) -> CheckResult {
82 let mut errors = Vec::new();
83 let mut warnings = Vec::new();
84
85 let content = match fs::read_to_string(file_path) {
86 Ok(content) => content,
87 Err(e) => {
88 errors.push(Issue {
89 line: 0,
90 message: format!("Failed to read file: {}", e),
91 context: None,
92 });
93 return CheckResult {
94 file_path: file_path.to_path_buf(),
95 errors,
96 warnings,
97 };
98 }
99 };
100
101 let lines: Vec<&str> = content.lines().collect();
102 let mut options = Options::empty();
103 options.insert(Options::ENABLE_TABLES);
104 options.insert(Options::ENABLE_FOOTNOTES);
105 options.insert(Options::ENABLE_STRIKETHROUGH);
106 options.insert(Options::ENABLE_TASKLISTS);
107
108 let parser = Parser::new_ext(&content, options);
109 let events: Vec<Event> = parser.collect();
110
111 check_markdown_structure(&lines, &events, &mut errors, &mut warnings);
113 check_link_references(&lines, &mut errors);
114 check_list_consistency(&lines, &mut warnings);
115 check_header_consistency(&lines, &mut warnings);
116
117 CheckResult {
118 file_path: file_path.to_path_buf(),
119 errors,
120 warnings,
121 }
122}
123
124fn check_markdown_structure(
125 lines: &[&str],
126 events: &[Event],
127 errors: &mut Vec<Issue>,
128 warnings: &mut Vec<Issue>,
129) {
130 let mut in_code_block = false;
131 let mut line_num = 0;
132
133 for line in lines {
134 line_num += 1;
135 let trimmed = line.trim();
136
137 if trimmed.starts_with("```") {
139 in_code_block = !in_code_block;
140 continue;
141 }
142
143 if in_code_block {
144 continue; }
146
147 if !trimmed.is_empty() {
149 let leading_spaces = line.len() - line.trim_start().len();
150 if leading_spaces % 4 != 0 && leading_spaces % 2 != 0 && leading_spaces > 0 {
151 warnings.push(Issue {
152 line: line_num,
153 message: "Inconsistent indentation (prefer 2 or 4 spaces)".to_string(),
154 context: Some(line.to_string()),
155 });
156 }
157 }
158
159 if contains_bare_url(trimmed) {
161 warnings.push(Issue {
162 line: line_num,
163 message: "Bare URL detected (consider using link syntax)".to_string(),
164 context: Some(line.to_string()),
165 });
166 }
167 }
168
169 let mut open_elements = Vec::new();
171 for event in events {
172 match event {
173 Event::Start(tag) => {
174 open_elements.push((tag.clone(), line_num));
175 }
176 Event::End(tag) => {
177 if let Some((last_tag, _)) = open_elements.last() {
178 if last_tag == tag {
179 open_elements.pop();
180 }
181 }
182 }
183 _ => {}
184 }
185 }
186
187 for (tag, line) in open_elements {
188 errors.push(Issue {
189 line,
190 message: format!("Unclosed {:?} element", tag),
191 context: None,
192 });
193 }
194}
195
196fn check_link_references(lines: &[&str], errors: &mut Vec<Issue>) {
197 let mut link_references = HashSet::new();
198 let mut defined_references = HashSet::new();
199 let mut line_num = 0;
200
201 let ref_def_regex = regex::Regex::new(r"^\[([^\]]+)\]:\s*(.+)$").unwrap();
203 let link_ref_regex = regex::Regex::new(r"!?\[([^\]]*)\]\[([^\]]+)\]").unwrap();
204
205 for line in lines {
206 line_num += 1;
207
208 if let Some(caps) = ref_def_regex.captures(line) {
210 if let Some(reference) = caps.get(1) {
211 defined_references.insert(reference.as_str().to_lowercase());
212 }
213 }
214
215 if let Some(caps) = link_ref_regex.captures(line) {
217 if let Some(reference) = caps.get(2) {
218 link_references.insert((reference.as_str().to_lowercase(), line_num));
219 }
220 }
221 }
222
223 for (reference, line) in link_references {
225 if !defined_references.contains(&reference) {
226 errors.push(Issue {
227 line,
228 message: format!("Undefined link reference '[{}]'", reference),
229 context: None,
230 });
231 }
232 }
233}
234
235fn check_list_consistency(lines: &[&str], warnings: &mut Vec<Issue>) {
236 let mut in_list = false;
237 let mut list_indent = 0;
238 let mut line_num = 0;
239
240 for line in lines {
241 line_num += 1;
242 let trimmed = line.trim_start();
243 let indent = line.len() - trimmed.len();
244
245 if trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+') {
246 if in_list && indent != list_indent {
247 warnings.push(Issue {
248 line: line_num,
249 message: "Inconsistent list indentation".to_string(),
250 context: Some(line.to_string()),
251 });
252 }
253 in_list = true;
254 list_indent = indent;
255 } else if trimmed.starts_with(|c: char| c.is_ascii_digit()) && trimmed.contains('.') {
256 if in_list && indent != list_indent {
258 warnings.push(Issue {
259 line: line_num,
260 message: "Inconsistent list indentation".to_string(),
261 context: Some(line.to_string()),
262 });
263 }
264 in_list = true;
265 list_indent = indent;
266 } else if !trimmed.is_empty() && !line.trim().is_empty() {
267 in_list = false;
268 }
269 }
270}
271
272fn check_header_consistency(lines: &[&str], warnings: &mut Vec<Issue>) {
273 let mut previous_level = 0;
274 let mut line_num = 0;
275
276 for line in lines {
277 line_num += 1;
278
279 if let Some(level) = detect_header_level(line) {
280 if level > previous_level + 1 {
281 warnings.push(Issue {
282 line: line_num,
283 message: format!("Header level jump from {} to {}", previous_level, level),
284 context: Some(line.to_string()),
285 });
286 }
287 previous_level = level;
288 }
289 }
290}
291
292fn detect_header_level(line: &str) -> Option<u32> {
293 let trimmed = line.trim();
294
295 if trimmed.starts_with('#') {
297 let level = trimmed.chars().take_while(|&c| c == '#').count() as u32;
298 if level >= 1 && level <= 6 {
299 return Some(level);
300 }
301 }
302
303 None
304}
305
306fn contains_bare_url(text: &str) -> bool {
307 let url_patterns = ["http://", "https://", "www.", "ftp://", "mailto:"];
308
309 url_patterns.iter().any(|pattern| text.contains(pattern)) &&
310 !text.contains('[') && !text.contains("![]") }
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317 use std::fs::File;
318 use std::io::Write;
319 use tempfile::tempdir;
320
321 #[test]
322 fn test_check_valid_markdown() {
323 let dir = tempdir().unwrap();
324 let file_path = dir.path().join("test.md");
325 let mut file = File::create(&file_path).unwrap();
326 writeln!(file, "# Valid Header").unwrap();
327 writeln!(file, "").unwrap();
328 writeln!(
329 file,
330 "This is a paragraph with a [link](http://example.com)."
331 )
332 .unwrap();
333
334 let config = CheckConfig {
335 recursive: false,
336 output_format: OutputFormat::Human,
337 strict: false,
338 ignore_warnings: false,
339 };
340
341 let result = check_file(&file_path, &config);
342 assert!(result.errors.is_empty());
343 }
344
345 #[test]
346 fn test_check_invalid_reference() {
347 let dir = tempdir().unwrap();
348 let file_path = dir.path().join("test.md");
349 let mut file = File::create(&file_path).unwrap();
350 writeln!(file, "This has an [undefined link][missing].").unwrap();
351
352 let config = CheckConfig {
353 recursive: false,
354 output_format: OutputFormat::Human,
355 strict: false,
356 ignore_warnings: false,
357 };
358
359 let result = check_file(&file_path, &config);
360 assert!(!result.errors.is_empty());
361 }
362}