1use std::fs;
4use std::num::NonZeroU32;
5use std::path::Path;
6
7use thiserror::Error;
8
9use crate::condition::ConditionTag;
10use crate::config::Profile;
11use crate::language::{default_language, detect_language};
12use crate::parser::{parse_markdown, parse_plain, word_count};
13use crate::rules::lexicon::unexplained_abbreviation::{self, UnexplainedAbbreviation};
14use crate::rules::readability::score::{self, FormulaChoice, ReadabilityScore};
15use crate::rules::structure::excessive_commas::{self, ExcessiveCommas};
16use crate::rules::{
17 default_rules, filter_by_conditions, filter_by_experimental, ExperimentalOptIn, Rule,
18};
19use crate::scoring::{self, Scorecard, ScoringConfig};
20use crate::types::{Diagnostic, Language, SourceFile};
21
22#[derive(Debug, Clone)]
27pub struct Report {
28 pub diagnostics: Vec<Diagnostic>,
30 pub scorecard: Scorecard,
32 pub word_count: u32,
34}
35
36pub struct Engine {
41 profile: Profile,
42 rules: Vec<Box<dyn Rule>>,
43 scoring_config: ScoringConfig,
44}
45
46impl Engine {
47 #[must_use]
49 pub fn with_profile(profile: Profile) -> Self {
50 Self {
51 profile,
52 rules: default_rules(profile),
53 scoring_config: ScoringConfig::default(),
54 }
55 }
56
57 #[must_use]
61 pub fn with_profile_and_conditions(profile: Profile, conditions: &[ConditionTag]) -> Self {
62 Self::with_profile_conditions_and_experimental(
63 profile,
64 conditions,
65 &ExperimentalOptIn::None,
66 )
67 }
68
69 #[must_use]
77 pub fn with_profile_conditions_and_experimental(
78 profile: Profile,
79 conditions: &[ConditionTag],
80 experimental: &ExperimentalOptIn,
81 ) -> Self {
82 let rules = filter_by_experimental(default_rules(profile), experimental);
83 let rules = filter_by_conditions(rules, conditions);
84 Self {
85 profile,
86 rules,
87 scoring_config: ScoringConfig::default(),
88 }
89 }
90
91 #[must_use]
93 pub fn with_rules(profile: Profile, rules: Vec<Box<dyn Rule>>) -> Self {
94 Self {
95 profile,
96 rules,
97 scoring_config: ScoringConfig::default(),
98 }
99 }
100
101 #[must_use]
103 pub fn with_scoring_config(mut self, scoring_config: ScoringConfig) -> Self {
104 self.scoring_config = scoring_config;
105 self
106 }
107
108 #[must_use]
118 pub fn with_readability_formula(mut self, formula: FormulaChoice) -> Self {
119 let config = score::Config::for_profile(self.profile).with_formula(formula);
120 self.replace_rule(
121 ReadabilityScore::ID,
122 Box::new(ReadabilityScore::new(config)),
123 );
124 self
125 }
126
127 #[must_use]
136 pub fn with_unexplained_whitelist(mut self, extra: Vec<String>) -> Self {
137 if extra.is_empty() {
138 return self;
139 }
140 let config =
141 unexplained_abbreviation::Config::for_profile(self.profile).with_extra_whitelist(extra);
142 self.replace_rule(
143 UnexplainedAbbreviation::ID,
144 Box::new(UnexplainedAbbreviation::new(config)),
145 );
146 self
147 }
148
149 #[must_use]
154 pub fn with_excessive_commas_max_commas(mut self, max_commas: NonZeroU32) -> Self {
155 let config =
156 excessive_commas::Config::for_profile(self.profile).with_max_commas(max_commas);
157 self.replace_rule(ExcessiveCommas::ID, Box::new(ExcessiveCommas::new(config)));
158 self
159 }
160
161 fn replace_rule(&mut self, id: &str, replacement: Box<dyn Rule>) {
167 if let Some(slot) = self.rules.iter_mut().find(|r| r.id() == id) {
168 *slot = replacement;
169 }
170 }
171
172 #[must_use]
174 pub const fn profile(&self) -> Profile {
175 self.profile
176 }
177
178 #[must_use]
180 pub fn lint_str(&self, input: &str) -> Report {
181 self.lint_with_source(input, SourceFile::Anonymous, true)
182 }
183
184 #[must_use]
186 pub fn lint_stdin(&self, input: &str) -> Report {
187 self.lint_with_source(input, SourceFile::Stdin, true)
188 }
189
190 pub fn lint_file(&self, path: &Path) -> Result<Report, EngineError> {
199 let contents = fs::read_to_string(path).map_err(EngineError::Io)?;
200 let is_markdown = path
201 .extension()
202 .and_then(|e| e.to_str())
203 .is_some_and(|ext| matches!(ext, "md" | "markdown"));
204 let source = SourceFile::Path(path.to_path_buf());
205 Ok(self.lint_with_source(&contents, source, is_markdown))
206 }
207
208 fn lint_with_source(&self, input: &str, source: SourceFile, is_markdown: bool) -> Report {
209 let normalized = normalize_input(input);
210 let input = normalized.as_ref();
211 let language = match detect_language(input) {
212 Language::Unknown => default_language(),
213 detected => detected,
214 };
215
216 let document = if is_markdown {
217 parse_markdown(input, source)
218 } else {
219 parse_plain(input, source)
220 };
221
222 let mut diagnostics = Vec::new();
223 for rule in &self.rules {
224 diagnostics.extend(rule.check(&document, language));
225 }
226 diagnostics.retain(|d| {
227 !document
228 .directives
229 .iter()
230 .any(|dir| dir.rule_id == d.rule_id && dir.covers(d.location.line))
231 });
232
233 let words = word_count(input);
234 let scorecard = scoring::compute(&diagnostics, words, &self.scoring_config);
235
236 Report {
237 diagnostics,
238 scorecard,
239 word_count: words,
240 }
241 }
242}
243
244fn normalize_input(input: &str) -> std::borrow::Cow<'_, str> {
248 use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
249
250 let stripped = input.strip_prefix('\u{FEFF}');
251 let body = stripped.unwrap_or(input);
252 match is_nfc_quick(body.chars()) {
253 IsNormalized::Yes if stripped.is_none() => std::borrow::Cow::Borrowed(input),
254 IsNormalized::Yes => std::borrow::Cow::Owned(body.to_string()),
255 _ => std::borrow::Cow::Owned(body.nfc().collect()),
256 }
257}
258
259#[derive(Debug, Error)]
261pub enum EngineError {
262 #[error("failed to read input file")]
264 Io(#[source] std::io::Error),
265}
266
267#[cfg(test)]
268mod tests {
269 use super::*;
270 use crate::types::Severity;
271
272 #[test]
273 fn engine_applies_default_rules() {
274 let engine = Engine::with_profile(Profile::Public);
275 let text = "This is a rather long sentence that keeps adding more and more words \
276 until it exceeds the public profile threshold by a comfortable margin.";
277 let report = engine.lint_str(text);
278 assert!(!report.diagnostics.is_empty());
279 assert!(report
280 .diagnostics
281 .iter()
282 .any(|d| d.severity == Severity::Warning));
283 }
284
285 #[test]
286 fn engine_returns_no_warnings_for_clean_text() {
287 let engine = Engine::with_profile(Profile::Public);
288 let report = engine.lint_str("Short clean sentence. Another fine one.");
289 assert!(report
290 .diagnostics
291 .iter()
292 .all(|d| d.severity == Severity::Info));
293 }
294
295 fn diags_for_rule(diags: &[Diagnostic], rule_id: &str) -> usize {
296 diags.iter().filter(|d| d.rule_id == rule_id).count()
297 }
298
299 #[test]
300 fn engine_respects_profile() {
301 let public = Engine::with_profile(Profile::Public);
302 let dev = Engine::with_profile(Profile::DevDoc);
303 let text = "This is a long sentence that keeps adding more and more words until it \
305 exceeds the public profile threshold by a comfortable margin of safety.";
306 assert!(
307 diags_for_rule(
308 &public.lint_str(text).diagnostics,
309 "structure.sentence-too-long"
310 ) > 0
311 );
312 assert_eq!(
313 diags_for_rule(
314 &dev.lint_str(text).diagnostics,
315 "structure.sentence-too-long"
316 ),
317 0
318 );
319 }
320
321 #[test]
322 fn inline_disable_suppresses_matching_diagnostic() {
323 let engine = Engine::with_profile(Profile::Public);
324 let text = "Intro paragraph.\n\n\
325 <!-- lucid-lint disable-next-line structure.sentence-too-long -->\n\
326 This is a long sentence that keeps adding more and more words until it \
327 exceeds the public profile threshold by a comfortable margin of safety.\n";
328 let report = engine.lint_str(text);
329 assert_eq!(
330 diags_for_rule(&report.diagnostics, "structure.sentence-too-long"),
331 0,
332 "expected directive to suppress sentence-too-long, got: {:?}",
333 report.diagnostics
334 );
335 }
336
337 #[test]
338 fn inline_disable_does_not_affect_other_rules_or_lines() {
339 let engine = Engine::with_profile(Profile::Public);
340 let text = "Intro.\n\n\
341 <!-- lucid-lint disable-next-line weasel-words -->\n\
342 This is a long sentence that keeps adding more and more words until it \
343 exceeds the public profile threshold by a comfortable margin of safety.\n";
344 let report = engine.lint_str(text);
345 assert_eq!(
346 diags_for_rule(&report.diagnostics, "structure.sentence-too-long"),
347 1
348 );
349 }
350
351 #[test]
352 fn block_disable_suppresses_diagnostics_within_scope() {
353 let engine = Engine::with_profile(Profile::Public);
354 let long_sentence = "This is a long sentence that keeps adding more and more words \
355 until it exceeds the public profile threshold by a comfortable \
356 margin of safety.";
357 let text = format!(
358 "Intro.\n\n\
359 <!-- lucid-lint-disable structure.sentence-too-long -->\n\n\
360 {long_sentence}\n\n\
361 {long_sentence}\n\n\
362 <!-- lucid-lint-enable -->\n\n\
363 {long_sentence}\n",
364 );
365 let report = engine.lint_str(&text);
366 assert_eq!(
369 diags_for_rule(&report.diagnostics, "structure.sentence-too-long"),
370 1,
371 "expected block directive to suppress 2 of 3 diagnostics, got: {:?}",
372 report.diagnostics,
373 );
374 }
375
376 #[test]
377 fn engine_profile_accessor() {
378 let engine = Engine::with_profile(Profile::Falc);
379 assert_eq!(engine.profile(), Profile::Falc);
380 }
381
382 #[test]
383 fn with_excessive_commas_max_commas_overrides_threshold() {
384 let base = Engine::with_profile(Profile::Public);
385 let tightened = Engine::with_profile(Profile::Public)
386 .with_excessive_commas_max_commas(NonZeroU32::new(1).unwrap());
387 let text = "Alpha, beta, gamma are three items in a short list.";
388 let base_hits = diags_for_rule(
389 &base.lint_str(text).diagnostics,
390 "structure.excessive-commas",
391 );
392 let tight_hits = diags_for_rule(
393 &tightened.lint_str(text).diagnostics,
394 "structure.excessive-commas",
395 );
396 assert!(
397 tight_hits > base_hits,
398 "tightened max_commas=1 should flag more than the Public baseline (base={base_hits}, tight={tight_hits})"
399 );
400 }
401
402 #[test]
403 fn with_unexplained_whitelist_suppresses_extra_acronym() {
404 let text = "WCAG is the relevant reference for accessibility compliance.";
405 let rule_id = "lexicon.unexplained-abbreviation";
406 let base = Engine::with_profile(Profile::Public);
407 let base_hits = diags_for_rule(&base.lint_str(text).diagnostics, rule_id);
408 if base_hits == 0 {
409 let text2 = "XYZZY governs that procedure as a policy baseline.";
412 let extended = Engine::with_profile(Profile::Public)
413 .with_unexplained_whitelist(vec!["XYZZY".into()]);
414 let baseline = Engine::with_profile(Profile::Public);
415 assert!(
416 diags_for_rule(&baseline.lint_str(text2).diagnostics, rule_id)
417 > diags_for_rule(&extended.lint_str(text2).diagnostics, rule_id),
418 "extra whitelist entry should suppress at least one diagnostic"
419 );
420 } else {
421 let extended = Engine::with_profile(Profile::Public)
422 .with_unexplained_whitelist(vec!["WCAG".into()]);
423 let extended_hits = diags_for_rule(&extended.lint_str(text).diagnostics, rule_id);
424 assert!(extended_hits < base_hits);
425 }
426 }
427
428 #[test]
429 fn override_helpers_are_no_ops_when_rule_filtered_out() {
430 let engine = Engine::with_rules(Profile::Public, Vec::new())
433 .with_readability_formula(FormulaChoice::Auto)
434 .with_unexplained_whitelist(vec!["NASA".into()])
435 .with_excessive_commas_max_commas(NonZeroU32::new(1).unwrap());
436 assert!(engine.lint_str("Anything.").diagnostics.is_empty());
437 }
438
439 #[test]
440 fn normalize_input_passes_through_clean_ascii_borrowed() {
441 let input = "Plain ASCII sentence.";
443 let out = normalize_input(input);
444 assert!(matches!(out, std::borrow::Cow::Borrowed(_)));
445 assert_eq!(out.as_ref(), input);
446 }
447
448 #[test]
449 fn normalize_input_passes_through_nfc_unicode_borrowed() {
450 let input = "Le café est prêt.";
452 let out = normalize_input(input);
453 assert!(matches!(out, std::borrow::Cow::Borrowed(_)));
454 assert_eq!(out.as_ref(), input);
455 }
456
457 #[test]
458 fn normalize_input_strips_leading_bom_only() {
459 let out = normalize_input("\u{FEFF}hello");
460 assert_eq!(out.as_ref(), "hello");
461 }
462
463 #[test]
464 fn normalize_input_does_not_strip_inner_bom() {
465 let input = "hello\u{FEFF}world";
468 let out = normalize_input(input);
469 assert_eq!(out.as_ref(), input);
470 }
471
472 #[test]
473 fn normalize_input_nfc_normalizes_decomposed_text() {
474 let out = normalize_input("cafe\u{0301}");
476 assert_eq!(out.as_ref(), "café");
477 }
478
479 #[test]
480 fn normalize_input_strips_bom_and_nfc_normalizes() {
481 let out = normalize_input("\u{FEFF}cafe\u{0301}");
483 assert_eq!(out.as_ref(), "café");
484 }
485
486 #[test]
487 fn normalize_input_handles_empty_string() {
488 let out = normalize_input("");
489 assert_eq!(out.as_ref(), "");
490 assert!(matches!(out, std::borrow::Cow::Borrowed(_)));
491 }
492
493 #[test]
494 fn bom_prefix_does_not_shift_diagnostics() {
495 let engine = Engine::with_profile(Profile::Public);
496 let body = "This is a long sentence that keeps adding more and more words until it \
497 exceeds the public profile threshold by a comfortable margin of safety.";
498 let with_bom = format!("\u{FEFF}{body}");
499 let plain = engine.lint_str(body);
500 let bommed = engine.lint_str(&with_bom);
501 assert_eq!(plain.diagnostics.len(), bommed.diagnostics.len());
502 for (a, b) in plain.diagnostics.iter().zip(bommed.diagnostics.iter()) {
503 assert_eq!(a.rule_id, b.rule_id);
504 assert_eq!(a.location.line, b.location.line);
505 assert_eq!(a.location.column, b.location.column);
506 assert_eq!(a.message, b.message);
507 }
508 }
509
510 #[test]
511 fn nfd_input_yields_same_diagnostics_as_nfc() {
512 let engine = Engine::with_profile(Profile::Public);
516 let nfc = "Le café est bon. Le café est chaud. Le café est noir. Le café est fort.";
517 let nfd = "Le cafe\u{0301} est bon. Le cafe\u{0301} est chaud. Le cafe\u{0301} est noir. \
518 Le cafe\u{0301} est fort.";
519 let a = engine.lint_str(nfc);
520 let b = engine.lint_str(nfd);
521 assert_eq!(a.diagnostics.len(), b.diagnostics.len());
522 for (x, y) in a.diagnostics.iter().zip(b.diagnostics.iter()) {
523 assert_eq!(x.rule_id, y.rule_id);
524 assert_eq!(x.location.line, y.location.line);
525 }
526 }
527
528 #[test]
529 fn lone_cr_line_endings_are_normalized() {
530 let engine = Engine::with_profile(Profile::Public);
534 let lf = "First paragraph.\n\nSecond paragraph.\n\nThird.";
535 let cr = "First paragraph.\r\rSecond paragraph.\r\rThird.";
536 let a = engine.lint_str(lf);
537 let b = engine.lint_str(cr);
538 assert_eq!(a.word_count, b.word_count);
539 assert_eq!(a.diagnostics.len(), b.diagnostics.len());
540 }
541
542 #[test]
543 fn zero_width_chars_inside_words_pin_behaviour() {
544 let engine = Engine::with_profile(Profile::Public);
551 let text = "Hello\u{200B}world. Bonjour\u{200C}le\u{200D}monde.";
552 let report = engine.lint_str(text);
553 let _ = report.word_count;
554 }
555
556 #[test]
557 fn engine_produces_scorecard_with_fixed_max() {
558 let engine = Engine::with_profile(Profile::Public);
559 let report = engine.lint_str("Short clean sentence. Another fine one.");
560 assert_eq!(
561 report.scorecard.global.max,
562 crate::scoring::DEFAULT_CATEGORY_MAX * 5
563 );
564 assert_eq!(report.scorecard.per_category.len(), 5);
565 }
566}