1use bstr::ByteSlice;
2use std::io::Read;
3use std::io::Write;
4
5use crate::report;
6
7pub trait FileChecker: Send + Sync {
8 fn check_file(
9 &self,
10 path: &std::path::Path,
11 explicit: bool,
12 policy: &crate::policy::Policy<'_, '_, '_>,
13 reporter: &dyn report::Report,
14 ) -> Result<(), std::io::Error>;
15}
16
17#[derive(Debug, Clone, Copy)]
18pub struct Typos;
19
20impl FileChecker for Typos {
21 fn check_file(
22 &self,
23 path: &std::path::Path,
24 explicit: bool,
25 policy: &crate::policy::Policy<'_, '_, '_>,
26 reporter: &dyn report::Report,
27 ) -> Result<(), std::io::Error> {
28 if policy.check_filenames {
29 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
30 for typo in check_str(file_name, policy) {
31 let msg = report::Typo {
32 context: Some(report::PathContext { path }.into()),
33 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
34 byte_offset: typo.byte_offset,
35 typo: typo.typo.as_ref(),
36 corrections: typo.corrections,
37 };
38 reporter.report(msg.into())?;
39 }
40 }
41 }
42
43 if policy.check_files {
44 let (buffer, content_type) = read_file(path, reporter)?;
45 if !explicit && !policy.binary && content_type.is_binary() {
46 let msg = report::BinaryFile { path };
47 reporter.report(msg.into())?;
48 } else {
49 let mut accum_line_num = AccumulateLineNum::new();
50 for typo in check_bytes(&buffer, policy) {
51 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
52 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
53 let msg = report::Typo {
54 context: Some(report::FileContext { path, line_num }.into()),
55 buffer: std::borrow::Cow::Borrowed(line),
56 byte_offset: line_offset,
57 typo: typo.typo.as_ref(),
58 corrections: typo.corrections,
59 };
60 reporter.report(msg.into())?;
61 }
62 }
63 }
64
65 Ok(())
66 }
67}
68
69#[derive(Debug, Clone, Copy)]
70pub struct FixTypos;
71
72impl FileChecker for FixTypos {
73 fn check_file(
74 &self,
75 path: &std::path::Path,
76 explicit: bool,
77 policy: &crate::policy::Policy<'_, '_, '_>,
78 reporter: &dyn report::Report,
79 ) -> Result<(), std::io::Error> {
80 if policy.check_files {
81 let (buffer, content_type) = read_file(path, reporter)?;
82 if !explicit && !policy.binary && content_type.is_binary() {
83 let msg = report::BinaryFile { path };
84 reporter.report(msg.into())?;
85 } else {
86 let mut fixes = Vec::new();
87 let mut accum_line_num = AccumulateLineNum::new();
88 for typo in check_bytes(&buffer, policy) {
89 if is_fixable(&typo) {
90 fixes.push(typo.into_owned());
91 } else {
92 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
93 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
94 let msg = report::Typo {
95 context: Some(report::FileContext { path, line_num }.into()),
96 buffer: std::borrow::Cow::Borrowed(line),
97 byte_offset: line_offset,
98 typo: typo.typo.as_ref(),
99 corrections: typo.corrections,
100 };
101 reporter.report(msg.into())?;
102 }
103 }
104 if !fixes.is_empty() || path == std::path::Path::new("-") {
105 let buffer = fix_buffer(buffer, fixes.into_iter());
106 write_file(path, content_type, buffer, reporter)?;
107 }
108 }
109 }
110
111 if policy.check_filenames {
113 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
114 let mut fixes = Vec::new();
115 for typo in check_str(file_name, policy) {
116 if is_fixable(&typo) {
117 fixes.push(typo.into_owned());
118 } else {
119 let msg = report::Typo {
120 context: Some(report::PathContext { path }.into()),
121 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
122 byte_offset: typo.byte_offset,
123 typo: typo.typo.as_ref(),
124 corrections: typo.corrections,
125 };
126 reporter.report(msg.into())?;
127 }
128 }
129 if !fixes.is_empty() {
130 let file_name = file_name.to_owned().into_bytes();
131 let new_name = fix_buffer(file_name, fixes.into_iter());
132 let new_name =
133 String::from_utf8(new_name).expect("corrections are valid utf-8");
134 let new_path = path.with_file_name(new_name);
135 std::fs::rename(path, new_path)?;
136 }
137 }
138 }
139
140 Ok(())
141 }
142}
143
144#[derive(Debug, Clone, Copy)]
145pub struct DiffTypos;
146
147impl FileChecker for DiffTypos {
148 fn check_file(
149 &self,
150 path: &std::path::Path,
151 explicit: bool,
152 policy: &crate::policy::Policy<'_, '_, '_>,
153 reporter: &dyn report::Report,
154 ) -> Result<(), std::io::Error> {
155 let mut content = Vec::new();
156 let mut new_content = Vec::new();
157 if policy.check_files {
158 let (buffer, content_type) = read_file(path, reporter)?;
159 if !explicit && !policy.binary && content_type.is_binary() {
160 let msg = report::BinaryFile { path };
161 reporter.report(msg.into())?;
162 } else {
163 let mut fixes = Vec::new();
164 let mut accum_line_num = AccumulateLineNum::new();
165 for typo in check_bytes(&buffer, policy) {
166 if is_fixable(&typo) {
167 fixes.push(typo.into_owned());
168 } else {
169 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
170 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
171 let msg = report::Typo {
172 context: Some(report::FileContext { path, line_num }.into()),
173 buffer: std::borrow::Cow::Borrowed(line),
174 byte_offset: line_offset,
175 typo: typo.typo.as_ref(),
176 corrections: typo.corrections,
177 };
178 reporter.report(msg.into())?;
179 }
180 }
181 if !fixes.is_empty() {
182 new_content = fix_buffer(buffer.clone(), fixes.into_iter());
183 content = buffer;
184 }
185 }
186 }
187
188 let mut new_path = None;
190 if policy.check_filenames {
191 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
192 let mut fixes = Vec::new();
193 for typo in check_str(file_name, policy) {
194 if is_fixable(&typo) {
195 fixes.push(typo.into_owned());
196 } else {
197 let msg = report::Typo {
198 context: Some(report::PathContext { path }.into()),
199 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
200 byte_offset: typo.byte_offset,
201 typo: typo.typo.as_ref(),
202 corrections: typo.corrections,
203 };
204 reporter.report(msg.into())?;
205 }
206 }
207 if !fixes.is_empty() {
208 let file_name = file_name.to_owned().into_bytes();
209 let new_name = fix_buffer(file_name, fixes.into_iter());
210 let new_name =
211 String::from_utf8(new_name).expect("corrections are valid utf-8");
212 new_path = Some(path.with_file_name(new_name));
213 }
214 }
215 }
216
217 if new_path.is_some() || !content.is_empty() {
218 let original_path = path.display().to_string();
219 let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
220 let original_content: Vec<_> = content
221 .lines_with_terminator()
222 .map(|s| String::from_utf8_lossy(s).into_owned())
223 .collect();
224 let fixed_content: Vec<_> = new_content
225 .lines_with_terminator()
226 .map(|s| String::from_utf8_lossy(s).into_owned())
227 .collect();
228 let diff = difflib::unified_diff(
229 &original_content,
230 &fixed_content,
231 original_path.as_str(),
232 fixed_path.as_str(),
233 "original",
234 "fixed",
235 0,
236 );
237 let stdout = std::io::stdout();
238 let mut handle = stdout.lock();
239 for line in diff {
240 write!(handle, "{line}")?;
241 }
242 }
243
244 Ok(())
245 }
246}
247
248#[derive(Debug, Clone, Copy)]
249pub struct HighlightIdentifiers;
250
251impl FileChecker for HighlightIdentifiers {
252 fn check_file(
253 &self,
254 path: &std::path::Path,
255 explicit: bool,
256 policy: &crate::policy::Policy<'_, '_, '_>,
257 reporter: &dyn report::Report,
258 ) -> Result<(), std::io::Error> {
259 use std::fmt::Write as _;
260
261 let stdout = std::io::stdout();
262 let mut handle = stdout.lock();
263
264 let mut ignores: Option<Ignores> = None;
265 if policy.check_filenames {
266 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
267 let mut styled = String::new();
268 let mut prev_end = 0;
269 for (word, highlight) in policy
270 .tokenizer
271 .parse_str(file_name)
272 .filter(|word| {
273 !ignores
274 .get_or_insert_with(|| {
275 Ignores::new(file_name.as_bytes(), policy.ignore)
276 })
277 .is_ignored(word.span())
278 })
279 .zip(HIGHLIGHTS.iter().cycle())
280 {
281 let start = word.offset();
282 let end = word.offset() + word.token().len();
283 if prev_end != start {
284 let _ = write!(
285 &mut styled,
286 "{UNMATCHED}{}{UNMATCHED:#}",
287 &file_name[prev_end..start]
288 );
289 }
290 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
291 prev_end = end;
292 }
293 let _ = write!(
294 &mut styled,
295 "{UNMATCHED}{}{UNMATCHED:#}",
296 &file_name[prev_end..file_name.len()]
297 );
298
299 let parent_dir = path.parent().unwrap();
300 if !parent_dir.as_os_str().is_empty() {
301 let parent_dir = parent_dir.display();
302 write!(handle, "{UNMATCHED}{parent_dir}/")?;
303 }
304 writeln!(handle, "{styled}{UNMATCHED}:{UNMATCHED:#}")?;
305 } else {
306 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
307 }
308 } else {
309 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
310 }
311
312 if policy.check_files {
313 let (buffer, content_type) = read_file(path, reporter)?;
314 if !explicit && !policy.binary && content_type.is_binary() {
315 } else if let Ok(buffer) = buffer.to_str() {
317 let mut styled = String::new();
318 let mut prev_end = 0;
319 for (word, highlight) in policy
320 .tokenizer
321 .parse_bytes(buffer.as_bytes())
322 .filter(|word| {
323 !ignores
324 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
325 .is_ignored(word.span())
326 })
327 .zip(HIGHLIGHTS.iter().cycle())
328 {
329 let start = word.offset();
330 let end = word.offset() + word.token().len();
331 if prev_end != start {
332 let _ = write!(
333 &mut styled,
334 "{UNMATCHED}{}{UNMATCHED:#}",
335 &buffer[prev_end..start]
336 );
337 }
338 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
339 prev_end = end;
340 }
341 let _ = write!(
342 &mut styled,
343 "{UNMATCHED}{}{UNMATCHED:#}",
344 &buffer[prev_end..buffer.len()]
345 );
346
347 write!(handle, "{styled}")?;
348 }
349 }
350
351 Ok(())
352 }
353}
354
355#[derive(Debug, Clone, Copy)]
356pub struct Identifiers;
357
358impl FileChecker for Identifiers {
359 fn check_file(
360 &self,
361 path: &std::path::Path,
362 explicit: bool,
363 policy: &crate::policy::Policy<'_, '_, '_>,
364 reporter: &dyn report::Report,
365 ) -> Result<(), std::io::Error> {
366 let mut ignores: Option<Ignores> = None;
367 if policy.check_filenames {
368 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
369 for word in policy.tokenizer.parse_str(file_name) {
370 if ignores
371 .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
372 .is_ignored(word.span())
373 {
374 continue;
375 }
376 let msg = report::Parse {
377 context: Some(report::PathContext { path }.into()),
378 kind: report::ParseKind::Identifier,
379 data: word.token(),
380 };
381 reporter.report(msg.into())?;
382 }
383 }
384 }
385
386 if policy.check_files {
387 let (buffer, content_type) = read_file(path, reporter)?;
388 if !explicit && !policy.binary && content_type.is_binary() {
389 let msg = report::BinaryFile { path };
390 reporter.report(msg.into())?;
391 } else {
392 for word in policy.tokenizer.parse_bytes(&buffer) {
393 if ignores
394 .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
395 .is_ignored(word.span())
396 {
397 continue;
398 }
399 let line_num = 0;
403 let msg = report::Parse {
404 context: Some(report::FileContext { path, line_num }.into()),
405 kind: report::ParseKind::Identifier,
406 data: word.token(),
407 };
408 reporter.report(msg.into())?;
409 }
410 }
411 }
412
413 Ok(())
414 }
415}
416
417#[derive(Debug, Clone, Copy)]
418pub struct HighlightWords;
419
420impl FileChecker for HighlightWords {
421 fn check_file(
422 &self,
423 path: &std::path::Path,
424 explicit: bool,
425 policy: &crate::policy::Policy<'_, '_, '_>,
426 reporter: &dyn report::Report,
427 ) -> Result<(), std::io::Error> {
428 use std::fmt::Write as _;
429
430 let stdout = std::io::stdout();
431 let mut handle = stdout.lock();
432
433 let mut ignores: Option<Ignores> = None;
434 if policy.check_filenames {
435 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
436 let mut styled = String::new();
437 let mut prev_end = 0;
438 for (word, highlight) in policy
439 .tokenizer
440 .parse_str(file_name)
441 .flat_map(|i| i.split())
442 .filter(|word| {
443 !ignores
444 .get_or_insert_with(|| {
445 Ignores::new(file_name.as_bytes(), policy.ignore)
446 })
447 .is_ignored(word.span())
448 })
449 .zip(HIGHLIGHTS.iter().cycle())
450 {
451 let start = word.offset();
452 let end = word.offset() + word.token().len();
453 if prev_end != start {
454 let _ = write!(
455 &mut styled,
456 "{UNMATCHED}{}{UNMATCHED:#}",
457 &file_name[prev_end..start]
458 );
459 }
460 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
461 prev_end = end;
462 }
463 let _ = write!(
464 &mut styled,
465 "{UNMATCHED}{}{UNMATCHED:#}",
466 &file_name[prev_end..file_name.len()]
467 );
468
469 let parent_dir = path.parent().unwrap();
470 if !parent_dir.as_os_str().is_empty() {
471 let parent_dir = parent_dir.display();
472 write!(handle, "{UNMATCHED}{parent_dir}/")?;
473 }
474 writeln!(handle, "{styled}{UNMATCHED}:{UNMATCHED:#}")?;
475 } else {
476 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
477 }
478 } else {
479 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
480 }
481
482 if policy.check_files {
483 let (buffer, content_type) = read_file(path, reporter)?;
484 if !explicit && !policy.binary && content_type.is_binary() {
485 } else if let Ok(buffer) = buffer.to_str() {
487 let mut styled = String::new();
488 let mut prev_end = 0;
489 for (word, highlight) in policy
490 .tokenizer
491 .parse_bytes(buffer.as_bytes())
492 .flat_map(|i| i.split())
493 .filter(|word| {
494 !ignores
495 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
496 .is_ignored(word.span())
497 })
498 .zip(HIGHLIGHTS.iter().cycle())
499 {
500 let start = word.offset();
501 let end = word.offset() + word.token().len();
502 if prev_end != start {
503 let _ = write!(
504 &mut styled,
505 "{UNMATCHED}{}{UNMATCHED:#}",
506 &buffer[prev_end..start]
507 );
508 }
509 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
510 prev_end = end;
511 }
512 let _ = write!(
513 &mut styled,
514 "{UNMATCHED}{}{UNMATCHED:#}",
515 &buffer[prev_end..buffer.len()]
516 );
517
518 write!(handle, "{styled}")?;
519 }
520 }
521
522 Ok(())
523 }
524}
525
526static HIGHLIGHTS: &[anstyle::Style] = &[
527 anstyle::AnsiColor::Cyan.on_default(),
528 anstyle::AnsiColor::Cyan
529 .on_default()
530 .effects(anstyle::Effects::BOLD),
531];
532
533static UNMATCHED: anstyle::Style = anstyle::Style::new().effects(anstyle::Effects::DIMMED);
534
535#[derive(Debug, Clone, Copy)]
536pub struct Words;
537
538impl FileChecker for Words {
539 fn check_file(
540 &self,
541 path: &std::path::Path,
542 explicit: bool,
543 policy: &crate::policy::Policy<'_, '_, '_>,
544 reporter: &dyn report::Report,
545 ) -> Result<(), std::io::Error> {
546 let mut ignores: Option<Ignores> = None;
547 if policy.check_filenames {
548 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
549 for word in policy
550 .tokenizer
551 .parse_str(file_name)
552 .flat_map(|i| i.split())
553 {
554 if ignores
555 .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
556 .is_ignored(word.span())
557 {
558 continue;
559 }
560 let msg = report::Parse {
561 context: Some(report::PathContext { path }.into()),
562 kind: report::ParseKind::Word,
563 data: word.token(),
564 };
565 reporter.report(msg.into())?;
566 }
567 }
568 }
569
570 if policy.check_files {
571 let (buffer, content_type) = read_file(path, reporter)?;
572 if !explicit && !policy.binary && content_type.is_binary() {
573 let msg = report::BinaryFile { path };
574 reporter.report(msg.into())?;
575 } else {
576 for word in policy
577 .tokenizer
578 .parse_bytes(&buffer)
579 .flat_map(|i| i.split())
580 {
581 if ignores
582 .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
583 .is_ignored(word.span())
584 {
585 continue;
586 }
587 let line_num = 0;
591 let msg = report::Parse {
592 context: Some(report::FileContext { path, line_num }.into()),
593 kind: report::ParseKind::Word,
594 data: word.token(),
595 };
596 reporter.report(msg.into())?;
597 }
598 }
599 }
600
601 Ok(())
602 }
603}
604
605#[derive(Debug, Clone, Copy)]
606pub struct FileTypes;
607
608impl FileChecker for FileTypes {
609 fn check_file(
610 &self,
611 path: &std::path::Path,
612 explicit: bool,
613 policy: &crate::policy::Policy<'_, '_, '_>,
614 reporter: &dyn report::Report,
615 ) -> Result<(), std::io::Error> {
616 if policy.binary {
618 let msg = report::FileType::new(path, policy.file_type);
619 reporter.report(msg.into())?;
620 } else {
621 let (_buffer, content_type) = read_file(path, reporter)?;
622 if !explicit && content_type.is_binary() {
623 let msg = report::BinaryFile { path };
624 reporter.report(msg.into())?;
625 } else {
626 let msg = report::FileType::new(path, policy.file_type);
627 reporter.report(msg.into())?;
628 }
629 }
630
631 Ok(())
632 }
633}
634
635#[derive(Debug, Clone, Copy)]
636pub struct FoundFiles;
637
638impl FileChecker for FoundFiles {
639 fn check_file(
640 &self,
641 path: &std::path::Path,
642 explicit: bool,
643 policy: &crate::policy::Policy<'_, '_, '_>,
644 reporter: &dyn report::Report,
645 ) -> Result<(), std::io::Error> {
646 if policy.binary {
648 let msg = report::File::new(path);
649 reporter.report(msg.into())?;
650 } else {
651 let (_buffer, content_type) = read_file(path, reporter)?;
652 if !explicit && content_type.is_binary() {
653 let msg = report::BinaryFile { path };
654 reporter.report(msg.into())?;
655 } else {
656 let msg = report::File::new(path);
657 reporter.report(msg.into())?;
658 }
659 }
660
661 Ok(())
662 }
663}
664
665fn read_file(
666 path: &std::path::Path,
667 reporter: &dyn report::Report,
668) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
669 let buffer = if path == std::path::Path::new("-") {
670 let mut buffer = Vec::new();
671 report_result(
672 std::io::stdin().read_to_end(&mut buffer),
673 Some(path),
674 reporter,
675 )?;
676 buffer
677 } else {
678 report_result(std::fs::read(path), Some(path), reporter)?
679 };
680
681 let content_type = content_inspector::inspect(&buffer);
682
683 let (buffer, content_type) = match content_type {
684 content_inspector::ContentType::BINARY |
685 content_inspector::ContentType::UTF_32LE |
687 content_inspector::ContentType::UTF_32BE => {
688 (buffer, content_inspector::ContentType::BINARY)
689 },
690 content_inspector::ContentType::UTF_8 |
691 content_inspector::ContentType::UTF_8_BOM => {
692 (buffer, content_type)
693 },
694 content_inspector::ContentType::UTF_16LE => {
695 let mut decoded = String::with_capacity(buffer.len() * 2);
699 let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
700 let decoded = match r {
701 encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
702 _ => Err(format!("invalid UTF-16LE encoding at byte {written} in {}", path.display())),
703 };
704 let buffer = report_result(decoded, Some(path), reporter)?;
705 (buffer.into_bytes(), content_type)
706 }
707 content_inspector::ContentType::UTF_16BE => {
708 let mut decoded = String::with_capacity(buffer.len() * 2);
712 let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
713 let decoded = match r {
714 encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
715 _ => Err(format!("invalid UTF-16BE encoding at byte {written} in {}", path.display())),
716 };
717 let buffer = report_result(decoded, Some(path), reporter)?;
718 (buffer.into_bytes(), content_type)
719 },
720 };
721
722 Ok((buffer, content_type))
723}
724
725fn write_file(
726 path: &std::path::Path,
727 content_type: content_inspector::ContentType,
728 buffer: Vec<u8>,
729 reporter: &dyn report::Report,
730) -> Result<(), std::io::Error> {
731 let buffer = match content_type {
732 content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
734 unreachable!("read_file should prevent these from being passed along");
735 }
736 content_inspector::ContentType::BINARY
737 | content_inspector::ContentType::UTF_8
738 | content_inspector::ContentType::UTF_8_BOM => buffer,
739 content_inspector::ContentType::UTF_16LE => {
740 let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
741 if buffer.is_empty() {
742 return Ok(());
744 }
745 let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
746 assert!(
747 !replaced,
748 "Coming from UTF-8, UTF-16LE shouldn't do replacements"
749 );
750 encoded.into_owned()
751 }
752 content_inspector::ContentType::UTF_16BE => {
753 let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
754 if buffer.is_empty() {
755 return Ok(());
757 }
758 let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
759 assert!(
760 !replaced,
761 "Coming from UTF-8, UTF-16BE shouldn't do replacements"
762 );
763 encoded.into_owned()
764 }
765 };
766
767 if path == std::path::Path::new("-") {
768 report_result(std::io::stdout().write_all(&buffer), Some(path), reporter)?;
769 } else {
770 report_result(std::fs::write(path, buffer), Some(path), reporter)?;
771 }
772
773 Ok(())
774}
775
776fn check_str<'a>(
777 buffer: &'a str,
778 policy: &'a crate::policy::Policy<'a, 'a, 'a>,
779) -> impl Iterator<Item = typos::Typo<'a>> {
780 let mut ignores: Option<Ignores> = None;
781
782 typos::check_str(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
783 !ignores
784 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
785 .is_ignored(typo.span())
786 })
787}
788
789fn check_bytes<'a>(
790 buffer: &'a [u8],
791 policy: &'a crate::policy::Policy<'a, 'a, 'a>,
792) -> impl Iterator<Item = typos::Typo<'a>> {
793 let mut ignores: Option<Ignores> = None;
794
795 typos::check_bytes(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
796 !ignores
797 .get_or_insert_with(|| Ignores::new(buffer, policy.ignore))
798 .is_ignored(typo.span())
799 })
800}
801
802fn report_result<T: Default, E: ToString>(
803 value: Result<T, E>,
804 path: Option<&std::path::Path>,
805 reporter: &dyn report::Report,
806) -> Result<T, std::io::Error> {
807 let buffer = match value {
808 Ok(value) => value,
809 Err(err) => {
810 report_error(err, path, reporter)?;
811 Default::default()
812 }
813 };
814 Ok(buffer)
815}
816
817fn report_error<E: ToString>(
818 err: E,
819 path: Option<&std::path::Path>,
820 reporter: &dyn report::Report,
821) -> Result<(), std::io::Error> {
822 let mut msg = report::Error::new(err.to_string());
823 msg.context = path.map(|path| report::Context::Path(report::PathContext { path }));
824 reporter.report(msg.into())?;
825 Ok(())
826}
827
828struct AccumulateLineNum {
829 line_num: usize,
830 last_offset: usize,
831}
832
833impl AccumulateLineNum {
834 fn new() -> Self {
835 Self {
836 line_num: 1,
838 last_offset: 0,
839 }
840 }
841
842 fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
843 assert!(self.last_offset <= byte_offset);
844 let slice = &buffer[self.last_offset..byte_offset];
845 let newlines = slice.find_iter(b"\n").count();
846 let line_num = self.line_num + newlines;
847 self.line_num = line_num;
848 self.last_offset = byte_offset;
849 line_num
850 }
851}
852
853fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
854 let line_start = buffer[0..byte_offset]
855 .rfind_byte(b'\n')
856 .map(|s| s + 1)
858 .unwrap_or(0);
859 let line = buffer[line_start..]
860 .lines()
861 .next()
862 .expect("should always be at least a line");
863 let line_offset = byte_offset - line_start;
864 (line, line_offset)
865}
866
867fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
868 match &typo.corrections {
869 typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
870 _ => None,
871 }
872}
873
874fn is_fixable(typo: &typos::Typo<'_>) -> bool {
875 extract_fix(typo).is_some()
876}
877
878fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
879 let mut offset = 0isize;
880 for typo in typos {
881 let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
882 let start = ((typo.byte_offset as isize) + offset) as usize;
883 let end = start + typo.typo.len();
884
885 buffer.splice(start..end, fix.as_bytes().iter().copied());
886
887 offset += (fix.len() as isize) - (typo.typo.len() as isize);
888 }
889 buffer
890}
891
892pub fn walk_path(
893 walk: ignore::Walk,
894 checks: &dyn FileChecker,
895 engine: &crate::policy::ConfigEngine<'_>,
896 reporter: &dyn report::Report,
897 force_exclude: bool,
898) -> Result<(), ignore::Error> {
899 for entry in walk {
900 walk_entry(entry, checks, engine, reporter, force_exclude)?;
901 }
902 Ok(())
903}
904
905pub fn walk_path_parallel(
906 walk: ignore::WalkParallel,
907 checks: &dyn FileChecker,
908 engine: &crate::policy::ConfigEngine<'_>,
909 reporter: &dyn report::Report,
910 force_exclude: bool,
911) -> Result<(), ignore::Error> {
912 let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
913 walk.run(|| {
914 Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
915 match walk_entry(entry, checks, engine, reporter, force_exclude) {
916 Ok(()) => ignore::WalkState::Continue,
917 Err(err) => {
918 *error.lock().unwrap() = Err(err);
919 ignore::WalkState::Quit
920 }
921 }
922 })
923 });
924
925 error.into_inner().unwrap()
926}
927
928fn walk_entry(
929 entry: Result<ignore::DirEntry, ignore::Error>,
930 checks: &dyn FileChecker,
931 engine: &crate::policy::ConfigEngine<'_>,
932 reporter: &dyn report::Report,
933 force_exclude: bool,
934) -> Result<(), ignore::Error> {
935 let entry = match entry {
936 Ok(entry) => entry,
937 Err(err) => {
938 report_error(err, None, reporter)?;
939 return Ok(());
940 }
941 };
942 if crate::config::SUPPORTED_FILE_NAMES
943 .iter()
944 .any(|n| *n == entry.file_name())
945 {
946 log::debug!(
947 "{}: skipping potential config file as it may have typos",
948 entry.path().display()
949 );
950 return Ok(());
951 }
952 if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
953 let explicit = entry.depth() == 0 && !force_exclude;
954 let (path, lookup_path) = if entry.is_stdin() {
955 let path = std::path::Path::new("-");
956 let cwd = std::env::current_dir().map_err(|err| {
957 let kind = err.kind();
958 std::io::Error::new(kind, "no current working directory".to_owned())
959 })?;
960 (path, cwd)
961 } else {
962 let path = entry.path();
963 let abs_path = report_result(path.canonicalize(), Some(path), reporter)?;
964 (path, abs_path)
965 };
966 let policy = engine.policy(&lookup_path);
967 checks.check_file(path, explicit, &policy, reporter)?;
968 }
969
970 Ok(())
971}
972
973#[derive(Clone, Debug)]
974struct Ignores {
975 blocks: Vec<std::ops::Range<usize>>,
976}
977
978impl Ignores {
979 fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
980 let mut blocks = Vec::new();
981 if let Ok(content) = std::str::from_utf8(content) {
982 for ignore in ignores {
983 for mat in ignore.find_iter(content) {
984 blocks.push(mat.range());
985 }
986 }
987 }
988 Self { blocks }
989 }
990
991 fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
992 let start = span.start;
993 let end = span.end.saturating_sub(1);
994 self.blocks
995 .iter()
996 .any(|block| block.contains(&start) || block.contains(&end))
997 }
998}
999
1000#[cfg(test)]
1001mod test {
1002 use super::*;
1003
1004 fn fix_simple(line: &str, corrections: Vec<(usize, &'static str, &'static str)>) -> String {
1005 let line = line.as_bytes().to_vec();
1006 let corrections = corrections
1007 .into_iter()
1008 .map(|(byte_offset, typo, correction)| typos::Typo {
1009 byte_offset,
1010 typo: typo.into(),
1011 corrections: typos::Status::Corrections(vec![correction.into()]),
1012 });
1013 let actual = fix_buffer(line, corrections);
1014 String::from_utf8(actual).unwrap()
1015 }
1016
1017 #[test]
1018 fn test_fix_buffer_single() {
1019 let actual = fix_simple("foo foo foo", vec![(4, "foo", "bar")]);
1020 assert_eq!(actual, "foo bar foo");
1021 }
1022
1023 #[test]
1024 fn test_fix_buffer_single_grow() {
1025 let actual = fix_simple("foo foo foo", vec![(4, "foo", "happy")]);
1026 assert_eq!(actual, "foo happy foo");
1027 }
1028
1029 #[test]
1030 fn test_fix_buffer_single_shrink() {
1031 let actual = fix_simple("foo foo foo", vec![(4, "foo", "if")]);
1032 assert_eq!(actual, "foo if foo");
1033 }
1034
1035 #[test]
1036 fn test_fix_buffer_start() {
1037 let actual = fix_simple("foo foo foo", vec![(0, "foo", "bar")]);
1038 assert_eq!(actual, "bar foo foo");
1039 }
1040
1041 #[test]
1042 fn test_fix_buffer_end() {
1043 let actual = fix_simple("foo foo foo", vec![(8, "foo", "bar")]);
1044 assert_eq!(actual, "foo foo bar");
1045 }
1046
1047 #[test]
1048 fn test_fix_buffer_end_grow() {
1049 let actual = fix_simple("foo foo foo", vec![(8, "foo", "happy")]);
1050 assert_eq!(actual, "foo foo happy");
1051 }
1052
1053 #[test]
1054 fn test_fix_buffer_multiple() {
1055 let actual = fix_simple(
1056 "foo foo foo",
1057 vec![(4, "foo", "happy"), (8, "foo", "world")],
1058 );
1059 assert_eq!(actual, "foo happy world");
1060 }
1061
1062 #[test]
1063 fn test_line_count_first() {
1064 let mut accum_line_num = AccumulateLineNum::new();
1065 let line_num = accum_line_num.line_num(b"hello world", 6);
1066 assert_eq!(line_num, 1);
1067 }
1068
1069 #[test]
1070 fn test_line_count_second() {
1071 let mut accum_line_num = AccumulateLineNum::new();
1072 let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
1073 assert_eq!(line_num, 2);
1074 }
1075
1076 #[test]
1077 fn test_line_count_multiple() {
1078 let mut accum_line_num = AccumulateLineNum::new();
1079 let line_num = accum_line_num.line_num(b"1\n2\n3", 0);
1080 assert_eq!(line_num, 1);
1081 let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
1082 assert_eq!(line_num, 2);
1083 let line_num = accum_line_num.line_num(b"1\n2\n3", 4);
1084 assert_eq!(line_num, 3);
1085 }
1086
1087 #[test]
1088 fn test_extract_line_single_line() {
1089 let buffer = b"hello world";
1090 let buffer_offset = 6;
1091 let expected_line = b"hello world";
1092 let (line, offset) = extract_line(buffer, buffer_offset);
1093 assert_eq!(line, expected_line);
1094 assert_eq!(offset, 6);
1095 assert_eq!(line[offset], buffer[buffer_offset]);
1096 }
1097
1098 #[test]
1099 fn test_extract_line_first() {
1100 let buffer = b"1\n2\n3";
1101 let buffer_offset = 0;
1102 let expected_line = b"1";
1103 let (line, offset) = extract_line(buffer, buffer_offset);
1104 assert_eq!(line, expected_line);
1105 assert_eq!(offset, 0);
1106 assert_eq!(line[offset], buffer[buffer_offset]);
1107 }
1108
1109 #[test]
1110 fn test_extract_line_middle() {
1111 let buffer = b"1\n2\n3";
1112 let buffer_offset = 2;
1113 let expected_line = b"2";
1114 let (line, offset) = extract_line(buffer, buffer_offset);
1115 assert_eq!(line, expected_line);
1116 assert_eq!(offset, 0);
1117 assert_eq!(line[offset], buffer[buffer_offset]);
1118 }
1119
1120 #[test]
1121 fn test_extract_line_end() {
1122 let buffer = b"1\n2\n3";
1123 let buffer_offset = 4;
1124 let expected_line = b"3";
1125 let (line, offset) = extract_line(buffer, buffer_offset);
1126 assert_eq!(line, expected_line);
1127 assert_eq!(offset, 0);
1128 assert_eq!(line[offset], buffer[buffer_offset]);
1129 }
1130
1131 #[test]
1132 fn test_extract_line_offset_change() {
1133 let buffer = b"1\nhello world\n2";
1134 let buffer_offset = 8;
1135 let expected_line = b"hello world";
1136 let (line, offset) = extract_line(buffer, buffer_offset);
1137 assert_eq!(line, expected_line);
1138 assert_eq!(offset, 6);
1139 assert_eq!(line[offset], buffer[buffer_offset]);
1140 }
1141
1142 #[test]
1143 fn test_extract_line_windows() {
1144 let buffer = b"1\r\nhello world\r\n2";
1145 let buffer_offset = 9;
1146 let expected_line = b"hello world";
1147 let (line, offset) = extract_line(buffer, buffer_offset);
1148 assert_eq!(line, expected_line);
1149 assert_eq!(offset, 6);
1150 assert_eq!(line[offset], buffer[buffer_offset]);
1151 }
1152
1153 #[test]
1154 fn test_extract_line_slovak() {
1155 let buffer = b"LastErrorMessage=%1.%n%nChyba %2: %3\r\nSetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.\r\nSetupFileCorrupt=S\xFAbory sprievodcu in\x9Atal\xE1ciou s\xFA po\x9Akoden\xE9. Zaobstarajte si, pros\xEDm, nov\xFA k\xF3piu tohto produktu.";
1156 let buffer_offset = 66;
1157 let expected_line = b"SetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.";
1158 let (line, offset) = extract_line(buffer, buffer_offset);
1159 assert_eq!(line, expected_line);
1160 assert_eq!(offset, 28);
1161 assert_eq!(line[offset], buffer[buffer_offset]);
1162 }
1163}