1use std::path::Path;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct DocViolation {
13 pub line: usize,
14 pub rule: &'static str,
15 pub message: String,
16}
17
18#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct DriftResult {
21 pub stale: bool,
22 pub diff_lines: usize,
23}
24
25pub fn validate_heading_hierarchy(md: &str) -> Vec<DocViolation> {
27 let mut violations = Vec::new();
28 let mut headings: Vec<(usize, usize)> = Vec::new();
29 let mut in_fence = false;
30
31 for (idx, line) in md.lines().enumerate() {
32 let trimmed = line.trim_start();
33 if trimmed.starts_with("```") {
34 in_fence = !in_fence;
35 continue;
36 }
37 if in_fence {
38 continue;
39 }
40 if !trimmed.starts_with('#') {
41 continue;
42 }
43 let hashes = trimmed.bytes().take_while(|&b| b == b'#').count();
44 if hashes > 6 {
45 continue;
46 }
47 let rest = &trimmed[hashes..];
48 if !rest.is_empty() && !rest.starts_with(' ') {
49 continue;
50 }
51 headings.push((idx + 1, hashes));
52 }
53
54 if headings.is_empty() {
55 return violations;
56 }
57
58 if headings[0].1 != 1 {
59 violations.push(DocViolation {
60 line: headings[0].0,
61 rule: "heading-hierarchy",
62 message: format!("first heading must be H1, found H{}", headings[0].1),
63 });
64 }
65
66 for &(line, level) in &headings[1..] {
67 if level == 1 {
68 violations.push(DocViolation {
69 line,
70 rule: "heading-hierarchy",
71 message: "duplicate H1 — exactly one H1 allowed per document".into(),
72 });
73 }
74 }
75
76 for i in 1..headings.len() {
77 let (_, prev) = headings[i - 1];
78 let (line, curr) = headings[i];
79 if curr > prev + 1 {
80 violations.push(DocViolation {
81 line,
82 rule: "heading-hierarchy",
83 message: format!(
84 "heading level skip: H{curr} follows H{prev} (expected H{} or lower)",
85 prev + 1
86 ),
87 });
88 }
89 }
90
91 violations
92}
93
94pub fn validate_links(md: &str) -> Vec<DocViolation> {
96 let mut violations = Vec::new();
97 let mut in_fence = false;
98
99 for (idx, line) in md.lines().enumerate() {
100 let trimmed_check = line.trim_start();
101 if trimmed_check.starts_with("```") {
102 in_fence = !in_fence;
103 continue;
104 }
105 if in_fence {
106 continue;
107 }
108 let line_num = idx + 1;
109 let bytes = line.as_bytes();
110 let len = bytes.len();
111 let mut i = 0;
112
113 while i < len {
114 if i + 1 < len && bytes[i] == b']' && bytes[i + 1] == b'(' && bytes[..i].contains(&b'[')
115 {
116 let url_start = i + 2;
117 let mut depth = 1u32;
118 let mut url_end = url_start;
119 while url_end < len && depth > 0 {
120 match bytes[url_end] {
121 b'(' => depth += 1,
122 b')' => depth -= 1,
123 _ => {}
124 }
125 if depth > 0 {
126 url_end += 1;
127 }
128 }
129 let url = &line[url_start..url_end];
130 if url.is_empty() {
131 violations.push(DocViolation {
132 line: line_num,
133 rule: "link-wellformedness",
134 message: "link URL is empty".into(),
135 });
136 } else {
137 if url.starts_with("javascript:") {
138 violations.push(DocViolation {
139 line: line_num,
140 rule: "link-wellformedness",
141 message: format!("link URL uses javascript: scheme (XSS risk): {url}"),
142 });
143 }
144 if url.contains(' ') {
145 violations.push(DocViolation {
146 line: line_num,
147 rule: "link-wellformedness",
148 message: format!("link URL contains unescaped space: {url}"),
149 });
150 }
151 }
152 i = url_end + 1;
153 } else {
154 i += 1;
155 }
156 }
157 }
158 violations
159}
160
161pub fn validate_code_fences(md: &str) -> Vec<DocViolation> {
166 let mut violations = Vec::new();
167 let mut in_fence = false;
168
169 for (idx, line) in md.lines().enumerate() {
170 let trimmed = line.trim();
171 if trimmed.starts_with("```") {
172 if in_fence {
173 in_fence = false;
175 } else {
176 in_fence = true;
178 if trimmed[3..].trim().is_empty() {
179 violations.push(DocViolation {
180 line: idx + 1,
181 rule: "code-fence-language",
182 message: "code fence without language tag".into(),
183 });
184 }
185 }
186 }
187 }
188 violations
189}
190
191pub fn validate_tables(md: &str) -> Vec<DocViolation> {
193 let mut violations = Vec::new();
194 let lines: Vec<&str> = md.lines().collect();
195 let mut i = 0;
196 let mut in_fence = false;
197
198 while i < lines.len() {
199 let trimmed_check = lines[i].trim_start();
200 if trimmed_check.starts_with("```") {
201 in_fence = !in_fence;
202 i += 1;
203 continue;
204 }
205 if in_fence {
206 i += 1;
207 continue;
208 }
209 let line = lines[i].trim();
210 if !line.starts_with('|') {
211 i += 1;
212 continue;
213 }
214 let header_cols = count_table_columns(line);
215 if i + 1 >= lines.len() {
216 i += 1;
217 continue;
218 }
219 let sep_line = lines[i + 1].trim();
220 if !is_table_separator(sep_line) {
221 i += 1;
222 continue;
223 }
224
225 let sep_cols = count_table_columns(sep_line);
226 if sep_cols != header_cols {
227 violations.push(DocViolation {
228 line: i + 2,
229 rule: "table-column-parity",
230 message: format!("separator has {sep_cols} columns, header has {header_cols}"),
231 });
232 }
233
234 let mut j = i + 2;
235 while j < lines.len() {
236 let row = lines[j].trim();
237 if !row.starts_with('|') {
238 break;
239 }
240 let row_cols = count_table_columns(row);
241 if row_cols != header_cols {
242 violations.push(DocViolation {
243 line: j + 1,
244 rule: "table-column-parity",
245 message: format!("row has {row_cols} columns, header has {header_cols}"),
246 });
247 }
248 j += 1;
249 }
250 i = j;
251 }
252 violations
253}
254
255fn count_table_columns(row: &str) -> usize {
256 let trimmed = row.trim();
257 let inner = trimmed.strip_prefix('|').unwrap_or(trimmed);
258 let inner = inner.strip_suffix('|').unwrap_or(inner);
259 if inner.trim().is_empty() {
260 return 0;
261 }
262 inner.split('|').count()
263}
264
265fn is_table_separator(line: &str) -> bool {
266 let trimmed = line.trim();
267 if !trimmed.contains('|') || !trimmed.contains('-') {
268 return false;
269 }
270 let inner = trimmed.strip_prefix('|').unwrap_or(trimmed);
271 let inner = inner.strip_suffix('|').unwrap_or(inner);
272 inner.split('|').all(|cell| {
273 let c = cell.trim();
274 !c.is_empty() && c.chars().all(|ch| ch == '-' || ch == ':')
275 })
276}
277
278pub fn validate_svg(content: &str) -> Vec<DocViolation> {
280 let mut violations = Vec::new();
281 let lower = content.to_ascii_lowercase();
282
283 if !lower.contains("<svg") {
284 violations.push(DocViolation {
285 line: 1,
286 rule: "svg-structural-safety",
287 message: "missing <svg> root element".into(),
288 });
289 return violations;
290 }
291 if !content.contains("viewBox") {
292 violations.push(DocViolation {
293 line: 1,
294 rule: "svg-structural-safety",
295 message: "missing viewBox attribute on <svg>".into(),
296 });
297 }
298 for (tag, msg) in [
299 ("<script", "SVG contains <script> tag (XSS risk)"),
300 ("<foreignobject", "SVG contains <foreignObject> tag"),
301 ] {
302 if lower.contains(tag) {
303 for (idx, line) in content.lines().enumerate() {
304 if line.to_ascii_lowercase().contains(tag) {
305 violations.push(DocViolation {
306 line: idx + 1,
307 rule: "svg-structural-safety",
308 message: msg.into(),
309 });
310 }
311 }
312 }
313 }
314 let has_xmlns = content.contains("xmlns=\"http://www.w3.org/2000/svg\"")
315 || content.contains("xmlns='http://www.w3.org/2000/svg'");
316 if !has_xmlns {
317 violations.push(DocViolation {
318 line: 1,
319 rule: "svg-structural-safety",
320 message: "missing xmlns=\"http://www.w3.org/2000/svg\" namespace".into(),
321 });
322 }
323 violations
324}
325
326pub fn validate_required_sections(md: &str, required: &[&str]) -> Vec<String> {
328 let mut in_fence = false;
329 let headings: Vec<String> = md
330 .lines()
331 .filter_map(|line| {
332 let trimmed = line.trim_start();
333 if trimmed.starts_with("```") {
334 in_fence = !in_fence;
335 return None;
336 }
337 if in_fence {
338 return None;
339 }
340 if trimmed.starts_with('#') {
341 let text = trimmed.trim_start_matches('#').trim();
342 if !text.is_empty() {
343 return Some(text.to_string());
344 }
345 }
346 None
347 })
348 .collect();
349
350 required
351 .iter()
352 .filter(|&&s| !headings.iter().any(|h| h.eq_ignore_ascii_case(s)))
353 .map(ToString::to_string)
354 .collect()
355}
356
357pub fn detect_readme_drift(actual: &str, generated: &str) -> DriftResult {
359 let norm = |s: &str| -> Vec<String> { s.lines().map(|l| l.trim_end().to_string()).collect() };
360 let a = norm(actual);
361 let g = norm(generated);
362 let max_len = a.len().max(g.len());
363 let mut diff_count = 0usize;
364 for i in 0..max_len {
365 if a.get(i).map_or("", String::as_str) != g.get(i).map_or("", String::as_str) {
366 diff_count += 1;
367 }
368 }
369 DriftResult {
370 stale: diff_count > 0,
371 diff_lines: diff_count,
372 }
373}
374
375pub fn validate_document(path: &Path) -> Vec<DocViolation> {
377 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
378 let read = |p: &Path| -> Result<String, Vec<DocViolation>> {
379 std::fs::read_to_string(p).map_err(|e| {
380 vec![DocViolation {
381 line: 0,
382 rule: "io-error",
383 message: format!("failed to read file: {e}"),
384 }]
385 })
386 };
387 match ext {
388 "md" | "markdown" => {
389 let content = match read(path) {
390 Ok(c) => c,
391 Err(v) => return v,
392 };
393 let mut v = validate_heading_hierarchy(&content);
394 v.extend(validate_links(&content));
395 v.extend(validate_code_fences(&content));
396 v.extend(validate_tables(&content));
397 v
398 }
399 "svg" => match read(path) {
400 Ok(c) => validate_svg(&c),
401 Err(v) => v,
402 },
403 _ => vec![DocViolation {
404 line: 0,
405 rule: "unsupported-extension",
406 message: format!("unsupported file extension: .{ext}"),
407 }],
408 }
409}
410
411#[cfg(test)]
412#[path = "doc_integrity_tests.rs"]
413mod tests;