1use bstr::ByteSlice;
2use std::io::Read;
3use std::io::Write;
4
5use crate::report;
6
7pub trait FileChecker: Send + Sync {
8 fn check_file(
9 &self,
10 path: &std::path::Path,
11 explicit: bool,
12 policy: &crate::policy::Policy<'_, '_, '_>,
13 reporter: &dyn report::Report,
14 ) -> Result<(), std::io::Error>;
15}
16
17#[derive(Debug, Clone, Copy)]
18pub struct Typos;
19
20impl FileChecker for Typos {
21 fn check_file(
22 &self,
23 path: &std::path::Path,
24 explicit: bool,
25 policy: &crate::policy::Policy<'_, '_, '_>,
26 reporter: &dyn report::Report,
27 ) -> Result<(), std::io::Error> {
28 if policy.check_filenames {
29 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
30 for typo in check_str(file_name, policy) {
31 let msg = report::Typo {
32 context: Some(report::PathContext { path }.into()),
33 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
34 byte_offset: typo.byte_offset,
35 typo: typo.typo.as_ref(),
36 corrections: typo.corrections,
37 };
38 reporter.report(msg.into())?;
39 }
40 }
41 }
42
43 if policy.check_files {
44 let (buffer, content_type) = read_file(path, reporter)?;
45 if !explicit && !policy.binary && content_type.is_binary() {
46 let msg = report::BinaryFile { path };
47 reporter.report(msg.into())?;
48 } else {
49 let mut accum_line_num = AccumulateLineNum::new();
50 for typo in check_bytes(&buffer, policy) {
51 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
52 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
53 let msg = report::Typo {
54 context: Some(report::FileContext { path, line_num }.into()),
55 buffer: std::borrow::Cow::Borrowed(line),
56 byte_offset: line_offset,
57 typo: typo.typo.as_ref(),
58 corrections: typo.corrections,
59 };
60 reporter.report(msg.into())?;
61 }
62 }
63 }
64
65 Ok(())
66 }
67}
68
69#[derive(Debug, Clone, Copy)]
70pub struct FixTypos;
71
72impl FileChecker for FixTypos {
73 fn check_file(
74 &self,
75 path: &std::path::Path,
76 explicit: bool,
77 policy: &crate::policy::Policy<'_, '_, '_>,
78 reporter: &dyn report::Report,
79 ) -> Result<(), std::io::Error> {
80 if policy.check_files {
81 let (buffer, content_type) = read_file(path, reporter)?;
82 if !explicit && !policy.binary && content_type.is_binary() {
83 let msg = report::BinaryFile { path };
84 reporter.report(msg.into())?;
85 } else {
86 let mut fixes = Vec::new();
87 let mut accum_line_num = AccumulateLineNum::new();
88 for typo in check_bytes(&buffer, policy) {
89 if is_fixable(&typo) {
90 fixes.push(typo.into_owned());
91 } else {
92 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
93 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
94 let msg = report::Typo {
95 context: Some(report::FileContext { path, line_num }.into()),
96 buffer: std::borrow::Cow::Borrowed(line),
97 byte_offset: line_offset,
98 typo: typo.typo.as_ref(),
99 corrections: typo.corrections,
100 };
101 reporter.report(msg.into())?;
102 }
103 }
104 if !fixes.is_empty() || path == std::path::Path::new("-") {
105 let buffer = fix_buffer(buffer, fixes.into_iter());
106 write_file(path, content_type, buffer, reporter)?;
107 }
108 }
109 }
110
111 if policy.check_filenames {
113 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
114 let mut fixes = Vec::new();
115 for typo in check_str(file_name, policy) {
116 if is_fixable(&typo) {
117 fixes.push(typo.into_owned());
118 } else {
119 let msg = report::Typo {
120 context: Some(report::PathContext { path }.into()),
121 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
122 byte_offset: typo.byte_offset,
123 typo: typo.typo.as_ref(),
124 corrections: typo.corrections,
125 };
126 reporter.report(msg.into())?;
127 }
128 }
129 if !fixes.is_empty() {
130 let file_name = file_name.to_owned().into_bytes();
131 let new_name = fix_buffer(file_name, fixes.into_iter());
132 let new_name =
133 String::from_utf8(new_name).expect("corrections are valid utf-8");
134 let new_path = path.with_file_name(new_name);
135 std::fs::rename(path, new_path)?;
136 }
137 }
138 }
139
140 Ok(())
141 }
142}
143
144#[derive(Debug, Clone, Copy)]
145pub struct DiffTypos;
146
147impl FileChecker for DiffTypos {
148 fn check_file(
149 &self,
150 path: &std::path::Path,
151 explicit: bool,
152 policy: &crate::policy::Policy<'_, '_, '_>,
153 reporter: &dyn report::Report,
154 ) -> Result<(), std::io::Error> {
155 let mut content = Vec::new();
156 let mut new_content = Vec::new();
157 if policy.check_files {
158 let (buffer, content_type) = read_file(path, reporter)?;
159 if !explicit && !policy.binary && content_type.is_binary() {
160 let msg = report::BinaryFile { path };
161 reporter.report(msg.into())?;
162 } else {
163 let mut fixes = Vec::new();
164 let mut accum_line_num = AccumulateLineNum::new();
165 for typo in check_bytes(&buffer, policy) {
166 if is_fixable(&typo) {
167 fixes.push(typo.into_owned());
168 } else {
169 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
170 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
171 let msg = report::Typo {
172 context: Some(report::FileContext { path, line_num }.into()),
173 buffer: std::borrow::Cow::Borrowed(line),
174 byte_offset: line_offset,
175 typo: typo.typo.as_ref(),
176 corrections: typo.corrections,
177 };
178 reporter.report(msg.into())?;
179 }
180 }
181 if !fixes.is_empty() {
182 new_content = fix_buffer(buffer.clone(), fixes.into_iter());
183 content = buffer;
184 }
185 }
186 }
187
188 let mut new_path = None;
190 if policy.check_filenames {
191 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
192 let mut fixes = Vec::new();
193 for typo in check_str(file_name, policy) {
194 if is_fixable(&typo) {
195 fixes.push(typo.into_owned());
196 } else {
197 let msg = report::Typo {
198 context: Some(report::PathContext { path }.into()),
199 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
200 byte_offset: typo.byte_offset,
201 typo: typo.typo.as_ref(),
202 corrections: typo.corrections,
203 };
204 reporter.report(msg.into())?;
205 }
206 }
207 if !fixes.is_empty() {
208 let file_name = file_name.to_owned().into_bytes();
209 let new_name = fix_buffer(file_name, fixes.into_iter());
210 let new_name =
211 String::from_utf8(new_name).expect("corrections are valid utf-8");
212 new_path = Some(path.with_file_name(new_name));
213 }
214 }
215 }
216
217 if new_path.is_some() || !content.is_empty() {
218 let original_path = path.display().to_string();
219 let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
220 let original_content: Vec<_> = content
221 .lines_with_terminator()
222 .map(|s| String::from_utf8_lossy(s).into_owned())
223 .collect();
224 let fixed_content: Vec<_> = new_content
225 .lines_with_terminator()
226 .map(|s| String::from_utf8_lossy(s).into_owned())
227 .collect();
228 let diff = difflib::unified_diff(
229 &original_content,
230 &fixed_content,
231 original_path.as_str(),
232 fixed_path.as_str(),
233 "original",
234 "fixed",
235 0,
236 );
237 let stdout = std::io::stdout();
238 let mut handle = stdout.lock();
239 for line in diff {
240 write!(handle, "{line}")?;
241 }
242 }
243
244 Ok(())
245 }
246}
247
248#[derive(Debug, Clone, Copy)]
249pub struct Identifiers;
250
251impl FileChecker for Identifiers {
252 fn check_file(
253 &self,
254 path: &std::path::Path,
255 explicit: bool,
256 policy: &crate::policy::Policy<'_, '_, '_>,
257 reporter: &dyn report::Report,
258 ) -> Result<(), std::io::Error> {
259 let mut ignores: Option<Ignores> = None;
260 if policy.check_filenames {
261 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
262 for word in policy.tokenizer.parse_str(file_name) {
263 if ignores
264 .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
265 .is_ignored(word.span())
266 {
267 continue;
268 }
269 let msg = report::Parse {
270 context: Some(report::PathContext { path }.into()),
271 kind: report::ParseKind::Identifier,
272 data: word.token(),
273 };
274 reporter.report(msg.into())?;
275 }
276 }
277 }
278
279 if policy.check_files {
280 let (buffer, content_type) = read_file(path, reporter)?;
281 if !explicit && !policy.binary && content_type.is_binary() {
282 let msg = report::BinaryFile { path };
283 reporter.report(msg.into())?;
284 } else {
285 for word in policy.tokenizer.parse_bytes(&buffer) {
286 if ignores
287 .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
288 .is_ignored(word.span())
289 {
290 continue;
291 }
292 let line_num = 0;
296 let msg = report::Parse {
297 context: Some(report::FileContext { path, line_num }.into()),
298 kind: report::ParseKind::Identifier,
299 data: word.token(),
300 };
301 reporter.report(msg.into())?;
302 }
303 }
304 }
305
306 Ok(())
307 }
308}
309
310#[derive(Debug, Clone, Copy)]
311pub struct Words;
312
313impl FileChecker for Words {
314 fn check_file(
315 &self,
316 path: &std::path::Path,
317 explicit: bool,
318 policy: &crate::policy::Policy<'_, '_, '_>,
319 reporter: &dyn report::Report,
320 ) -> Result<(), std::io::Error> {
321 let mut ignores: Option<Ignores> = None;
322 if policy.check_filenames {
323 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
324 for word in policy
325 .tokenizer
326 .parse_str(file_name)
327 .flat_map(|i| i.split())
328 {
329 if ignores
330 .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
331 .is_ignored(word.span())
332 {
333 continue;
334 }
335 let msg = report::Parse {
336 context: Some(report::PathContext { path }.into()),
337 kind: report::ParseKind::Word,
338 data: word.token(),
339 };
340 reporter.report(msg.into())?;
341 }
342 }
343 }
344
345 if policy.check_files {
346 let (buffer, content_type) = read_file(path, reporter)?;
347 if !explicit && !policy.binary && content_type.is_binary() {
348 let msg = report::BinaryFile { path };
349 reporter.report(msg.into())?;
350 } else {
351 for word in policy
352 .tokenizer
353 .parse_bytes(&buffer)
354 .flat_map(|i| i.split())
355 {
356 if ignores
357 .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
358 .is_ignored(word.span())
359 {
360 continue;
361 }
362 let line_num = 0;
366 let msg = report::Parse {
367 context: Some(report::FileContext { path, line_num }.into()),
368 kind: report::ParseKind::Word,
369 data: word.token(),
370 };
371 reporter.report(msg.into())?;
372 }
373 }
374 }
375
376 Ok(())
377 }
378}
379
380#[derive(Debug, Clone, Copy)]
381pub struct FileTypes;
382
383impl FileChecker for FileTypes {
384 fn check_file(
385 &self,
386 path: &std::path::Path,
387 explicit: bool,
388 policy: &crate::policy::Policy<'_, '_, '_>,
389 reporter: &dyn report::Report,
390 ) -> Result<(), std::io::Error> {
391 if policy.binary {
393 let msg = report::FileType::new(path, policy.file_type);
394 reporter.report(msg.into())?;
395 } else {
396 let (_buffer, content_type) = read_file(path, reporter)?;
397 if !explicit && content_type.is_binary() {
398 let msg = report::BinaryFile { path };
399 reporter.report(msg.into())?;
400 } else {
401 let msg = report::FileType::new(path, policy.file_type);
402 reporter.report(msg.into())?;
403 }
404 }
405
406 Ok(())
407 }
408}
409
410#[derive(Debug, Clone, Copy)]
411pub struct FoundFiles;
412
413impl FileChecker for FoundFiles {
414 fn check_file(
415 &self,
416 path: &std::path::Path,
417 explicit: bool,
418 policy: &crate::policy::Policy<'_, '_, '_>,
419 reporter: &dyn report::Report,
420 ) -> Result<(), std::io::Error> {
421 if policy.binary {
423 let msg = report::File::new(path);
424 reporter.report(msg.into())?;
425 } else {
426 let (_buffer, content_type) = read_file(path, reporter)?;
427 if !explicit && content_type.is_binary() {
428 let msg = report::BinaryFile { path };
429 reporter.report(msg.into())?;
430 } else {
431 let msg = report::File::new(path);
432 reporter.report(msg.into())?;
433 }
434 }
435
436 Ok(())
437 }
438}
439
440fn read_file(
441 path: &std::path::Path,
442 reporter: &dyn report::Report,
443) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
444 let buffer = if path == std::path::Path::new("-") {
445 let mut buffer = Vec::new();
446 report_result(
447 std::io::stdin().read_to_end(&mut buffer),
448 Some(path),
449 reporter,
450 )?;
451 buffer
452 } else {
453 report_result(std::fs::read(path), Some(path), reporter)?
454 };
455
456 let content_type = content_inspector::inspect(&buffer);
457
458 let (buffer, content_type) = match content_type {
459 content_inspector::ContentType::BINARY |
460 content_inspector::ContentType::UTF_32LE |
462 content_inspector::ContentType::UTF_32BE => {
463 (buffer, content_inspector::ContentType::BINARY)
464 },
465 content_inspector::ContentType::UTF_8 |
466 content_inspector::ContentType::UTF_8_BOM => {
467 (buffer, content_type)
468 },
469 content_inspector::ContentType::UTF_16LE => {
470 let mut decoded = String::with_capacity(buffer.len() * 2);
474 let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
475 let decoded = match r {
476 encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
477 _ => Err(format!("invalid UTF-16LE encoding at byte {written} in {}", path.display())),
478 };
479 let buffer = report_result(decoded, Some(path), reporter)?;
480 (buffer.into_bytes(), content_type)
481 }
482 content_inspector::ContentType::UTF_16BE => {
483 let mut decoded = String::with_capacity(buffer.len() * 2);
487 let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
488 let decoded = match r {
489 encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
490 _ => Err(format!("invalid UTF-16BE encoding at byte {written} in {}", path.display())),
491 };
492 let buffer = report_result(decoded, Some(path), reporter)?;
493 (buffer.into_bytes(), content_type)
494 },
495 };
496
497 Ok((buffer, content_type))
498}
499
500fn write_file(
501 path: &std::path::Path,
502 content_type: content_inspector::ContentType,
503 buffer: Vec<u8>,
504 reporter: &dyn report::Report,
505) -> Result<(), std::io::Error> {
506 let buffer = match content_type {
507 content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
509 unreachable!("read_file should prevent these from being passed along");
510 }
511 content_inspector::ContentType::BINARY
512 | content_inspector::ContentType::UTF_8
513 | content_inspector::ContentType::UTF_8_BOM => buffer,
514 content_inspector::ContentType::UTF_16LE => {
515 let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
516 if buffer.is_empty() {
517 return Ok(());
519 }
520 let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
521 assert!(
522 !replaced,
523 "Coming from UTF-8, UTF-16LE shouldn't do replacements"
524 );
525 encoded.into_owned()
526 }
527 content_inspector::ContentType::UTF_16BE => {
528 let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
529 if buffer.is_empty() {
530 return Ok(());
532 }
533 let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
534 assert!(
535 !replaced,
536 "Coming from UTF-8, UTF-16BE shouldn't do replacements"
537 );
538 encoded.into_owned()
539 }
540 };
541
542 if path == std::path::Path::new("-") {
543 report_result(std::io::stdout().write_all(&buffer), Some(path), reporter)?;
544 } else {
545 report_result(std::fs::write(path, buffer), Some(path), reporter)?;
546 }
547
548 Ok(())
549}
550
551fn check_str<'a>(
552 buffer: &'a str,
553 policy: &'a crate::policy::Policy<'a, 'a, 'a>,
554) -> impl Iterator<Item = typos::Typo<'a>> {
555 let mut ignores: Option<Ignores> = None;
556
557 typos::check_str(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
558 !ignores
559 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
560 .is_ignored(typo.span())
561 })
562}
563
564fn check_bytes<'a>(
565 buffer: &'a [u8],
566 policy: &'a crate::policy::Policy<'a, 'a, 'a>,
567) -> impl Iterator<Item = typos::Typo<'a>> {
568 let mut ignores: Option<Ignores> = None;
569
570 typos::check_bytes(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
571 !ignores
572 .get_or_insert_with(|| Ignores::new(buffer, policy.ignore))
573 .is_ignored(typo.span())
574 })
575}
576
577fn report_result<T: Default, E: ToString>(
578 value: Result<T, E>,
579 path: Option<&std::path::Path>,
580 reporter: &dyn report::Report,
581) -> Result<T, std::io::Error> {
582 let buffer = match value {
583 Ok(value) => value,
584 Err(err) => {
585 report_error(err, path, reporter)?;
586 Default::default()
587 }
588 };
589 Ok(buffer)
590}
591
592fn report_error<E: ToString>(
593 err: E,
594 path: Option<&std::path::Path>,
595 reporter: &dyn report::Report,
596) -> Result<(), std::io::Error> {
597 let mut msg = report::Error::new(err.to_string());
598 msg.context = path.map(|path| report::Context::Path(report::PathContext { path }));
599 reporter.report(msg.into())?;
600 Ok(())
601}
602
603struct AccumulateLineNum {
604 line_num: usize,
605 last_offset: usize,
606}
607
608impl AccumulateLineNum {
609 fn new() -> Self {
610 Self {
611 line_num: 1,
613 last_offset: 0,
614 }
615 }
616
617 fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
618 assert!(self.last_offset <= byte_offset);
619 let slice = &buffer[self.last_offset..byte_offset];
620 let newlines = slice.find_iter(b"\n").count();
621 let line_num = self.line_num + newlines;
622 self.line_num = line_num;
623 self.last_offset = byte_offset;
624 line_num
625 }
626}
627
628fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
629 let line_start = buffer[0..byte_offset]
630 .rfind_byte(b'\n')
631 .map(|s| s + 1)
633 .unwrap_or(0);
634 let line = buffer[line_start..]
635 .lines()
636 .next()
637 .expect("should always be at least a line");
638 let line_offset = byte_offset - line_start;
639 (line, line_offset)
640}
641
642fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
643 match &typo.corrections {
644 typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
645 _ => None,
646 }
647}
648
649fn is_fixable(typo: &typos::Typo<'_>) -> bool {
650 extract_fix(typo).is_some()
651}
652
653fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
654 let mut offset = 0isize;
655 for typo in typos {
656 let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
657 let start = ((typo.byte_offset as isize) + offset) as usize;
658 let end = start + typo.typo.len();
659
660 buffer.splice(start..end, fix.as_bytes().iter().copied());
661
662 offset += (fix.len() as isize) - (typo.typo.len() as isize);
663 }
664 buffer
665}
666
667pub fn walk_path(
668 walk: ignore::Walk,
669 checks: &dyn FileChecker,
670 engine: &crate::policy::ConfigEngine<'_>,
671 reporter: &dyn report::Report,
672 force_exclude: bool,
673) -> Result<(), ignore::Error> {
674 for entry in walk {
675 walk_entry(entry, checks, engine, reporter, force_exclude)?;
676 }
677 Ok(())
678}
679
680pub fn walk_path_parallel(
681 walk: ignore::WalkParallel,
682 checks: &dyn FileChecker,
683 engine: &crate::policy::ConfigEngine<'_>,
684 reporter: &dyn report::Report,
685 force_exclude: bool,
686) -> Result<(), ignore::Error> {
687 let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
688 walk.run(|| {
689 Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
690 match walk_entry(entry, checks, engine, reporter, force_exclude) {
691 Ok(()) => ignore::WalkState::Continue,
692 Err(err) => {
693 *error.lock().unwrap() = Err(err);
694 ignore::WalkState::Quit
695 }
696 }
697 })
698 });
699
700 error.into_inner().unwrap()
701}
702
703fn walk_entry(
704 entry: Result<ignore::DirEntry, ignore::Error>,
705 checks: &dyn FileChecker,
706 engine: &crate::policy::ConfigEngine<'_>,
707 reporter: &dyn report::Report,
708 force_exclude: bool,
709) -> Result<(), ignore::Error> {
710 let entry = match entry {
711 Ok(entry) => entry,
712 Err(err) => {
713 report_error(err, None, reporter)?;
714 return Ok(());
715 }
716 };
717 if crate::config::SUPPORTED_FILE_NAMES
718 .iter()
719 .any(|n| *n == entry.file_name())
720 {
721 log::debug!(
722 "{}: skipping potential config file as it may have typos",
723 entry.path().display()
724 );
725 return Ok(());
726 }
727 if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
728 let explicit = entry.depth() == 0 && !force_exclude;
729 let (path, lookup_path) = if entry.is_stdin() {
730 let path = std::path::Path::new("-");
731 let cwd = std::env::current_dir().map_err(|err| {
732 let kind = err.kind();
733 std::io::Error::new(kind, "no current working directory".to_owned())
734 })?;
735 (path, cwd)
736 } else {
737 let path = entry.path();
738 let abs_path = report_result(path.canonicalize(), Some(path), reporter)?;
739 (path, abs_path)
740 };
741 let policy = engine.policy(&lookup_path);
742 checks.check_file(path, explicit, &policy, reporter)?;
743 }
744
745 Ok(())
746}
747
748#[derive(Clone, Debug)]
749struct Ignores {
750 blocks: Vec<std::ops::Range<usize>>,
751}
752
753impl Ignores {
754 fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
755 let mut blocks = Vec::new();
756 if let Ok(content) = std::str::from_utf8(content) {
757 for ignore in ignores {
758 for mat in ignore.find_iter(content) {
759 blocks.push(mat.range());
760 }
761 }
762 }
763 Self { blocks }
764 }
765
766 fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
767 let start = span.start;
768 let end = span.end.saturating_sub(1);
769 self.blocks
770 .iter()
771 .any(|block| block.contains(&start) || block.contains(&end))
772 }
773}
774
775#[cfg(test)]
776mod test {
777 use super::*;
778
779 fn fix_simple(line: &str, corrections: Vec<(usize, &'static str, &'static str)>) -> String {
780 let line = line.as_bytes().to_vec();
781 let corrections = corrections
782 .into_iter()
783 .map(|(byte_offset, typo, correction)| typos::Typo {
784 byte_offset,
785 typo: typo.into(),
786 corrections: typos::Status::Corrections(vec![correction.into()]),
787 });
788 let actual = fix_buffer(line, corrections);
789 String::from_utf8(actual).unwrap()
790 }
791
792 #[test]
793 fn test_fix_buffer_single() {
794 let actual = fix_simple("foo foo foo", vec![(4, "foo", "bar")]);
795 assert_eq!(actual, "foo bar foo");
796 }
797
798 #[test]
799 fn test_fix_buffer_single_grow() {
800 let actual = fix_simple("foo foo foo", vec![(4, "foo", "happy")]);
801 assert_eq!(actual, "foo happy foo");
802 }
803
804 #[test]
805 fn test_fix_buffer_single_shrink() {
806 let actual = fix_simple("foo foo foo", vec![(4, "foo", "if")]);
807 assert_eq!(actual, "foo if foo");
808 }
809
810 #[test]
811 fn test_fix_buffer_start() {
812 let actual = fix_simple("foo foo foo", vec![(0, "foo", "bar")]);
813 assert_eq!(actual, "bar foo foo");
814 }
815
816 #[test]
817 fn test_fix_buffer_end() {
818 let actual = fix_simple("foo foo foo", vec![(8, "foo", "bar")]);
819 assert_eq!(actual, "foo foo bar");
820 }
821
822 #[test]
823 fn test_fix_buffer_end_grow() {
824 let actual = fix_simple("foo foo foo", vec![(8, "foo", "happy")]);
825 assert_eq!(actual, "foo foo happy");
826 }
827
828 #[test]
829 fn test_fix_buffer_multiple() {
830 let actual = fix_simple(
831 "foo foo foo",
832 vec![(4, "foo", "happy"), (8, "foo", "world")],
833 );
834 assert_eq!(actual, "foo happy world");
835 }
836
837 #[test]
838 fn test_line_count_first() {
839 let mut accum_line_num = AccumulateLineNum::new();
840 let line_num = accum_line_num.line_num(b"hello world", 6);
841 assert_eq!(line_num, 1);
842 }
843
844 #[test]
845 fn test_line_count_second() {
846 let mut accum_line_num = AccumulateLineNum::new();
847 let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
848 assert_eq!(line_num, 2);
849 }
850
851 #[test]
852 fn test_line_count_multiple() {
853 let mut accum_line_num = AccumulateLineNum::new();
854 let line_num = accum_line_num.line_num(b"1\n2\n3", 0);
855 assert_eq!(line_num, 1);
856 let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
857 assert_eq!(line_num, 2);
858 let line_num = accum_line_num.line_num(b"1\n2\n3", 4);
859 assert_eq!(line_num, 3);
860 }
861
862 #[test]
863 fn test_extract_line_single_line() {
864 let buffer = b"hello world";
865 let buffer_offset = 6;
866 let expected_line = b"hello world";
867 let (line, offset) = extract_line(buffer, buffer_offset);
868 assert_eq!(line, expected_line);
869 assert_eq!(offset, 6);
870 assert_eq!(line[offset], buffer[buffer_offset]);
871 }
872
873 #[test]
874 fn test_extract_line_first() {
875 let buffer = b"1\n2\n3";
876 let buffer_offset = 0;
877 let expected_line = b"1";
878 let (line, offset) = extract_line(buffer, buffer_offset);
879 assert_eq!(line, expected_line);
880 assert_eq!(offset, 0);
881 assert_eq!(line[offset], buffer[buffer_offset]);
882 }
883
884 #[test]
885 fn test_extract_line_middle() {
886 let buffer = b"1\n2\n3";
887 let buffer_offset = 2;
888 let expected_line = b"2";
889 let (line, offset) = extract_line(buffer, buffer_offset);
890 assert_eq!(line, expected_line);
891 assert_eq!(offset, 0);
892 assert_eq!(line[offset], buffer[buffer_offset]);
893 }
894
895 #[test]
896 fn test_extract_line_end() {
897 let buffer = b"1\n2\n3";
898 let buffer_offset = 4;
899 let expected_line = b"3";
900 let (line, offset) = extract_line(buffer, buffer_offset);
901 assert_eq!(line, expected_line);
902 assert_eq!(offset, 0);
903 assert_eq!(line[offset], buffer[buffer_offset]);
904 }
905
906 #[test]
907 fn test_extract_line_offset_change() {
908 let buffer = b"1\nhello world\n2";
909 let buffer_offset = 8;
910 let expected_line = b"hello world";
911 let (line, offset) = extract_line(buffer, buffer_offset);
912 assert_eq!(line, expected_line);
913 assert_eq!(offset, 6);
914 assert_eq!(line[offset], buffer[buffer_offset]);
915 }
916
917 #[test]
918 fn test_extract_line_windows() {
919 let buffer = b"1\r\nhello world\r\n2";
920 let buffer_offset = 9;
921 let expected_line = b"hello world";
922 let (line, offset) = extract_line(buffer, buffer_offset);
923 assert_eq!(line, expected_line);
924 assert_eq!(offset, 6);
925 assert_eq!(line[offset], buffer[buffer_offset]);
926 }
927
928 #[test]
929 fn test_extract_line_slovak() {
930 let buffer = b"LastErrorMessage=%1.%n%nChyba %2: %3\r\nSetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.\r\nSetupFileCorrupt=S\xFAbory sprievodcu in\x9Atal\xE1ciou s\xFA po\x9Akoden\xE9. Zaobstarajte si, pros\xEDm, nov\xFA k\xF3piu tohto produktu.";
931 let buffer_offset = 66;
932 let expected_line = b"SetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.";
933 let (line, offset) = extract_line(buffer, buffer_offset);
934 assert_eq!(line, expected_line);
935 assert_eq!(offset, 28);
936 assert_eq!(line[offset], buffer[buffer_offset]);
937 }
938}