1use bstr::ByteSlice;
2use std::io::Read;
3use std::io::Write;
4
5use crate::report;
6
7pub trait FileChecker: Send + Sync {
8 fn check_file(
9 &self,
10 path: &std::path::Path,
11 explicit: bool,
12 policy: &crate::policy::Policy<'_, '_, '_>,
13 reporter: &dyn report::Report,
14 ) -> Result<(), std::io::Error>;
15}
16
17#[derive(Debug, Clone, Copy)]
18pub struct Codetypo;
19
20impl FileChecker for Codetypo {
21 fn check_file(
22 &self,
23 path: &std::path::Path,
24 explicit: bool,
25 policy: &crate::policy::Policy<'_, '_, '_>,
26 reporter: &dyn report::Report,
27 ) -> Result<(), std::io::Error> {
28 if policy.check_filenames {
29 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
30 for typo in check_str(file_name, policy) {
31 let msg = report::Typo {
32 context: Some(report::PathContext { path }.into()),
33 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
34 byte_offset: typo.byte_offset,
35 typo: typo.typo.as_ref(),
36 corrections: typo.corrections,
37 };
38 reporter.report(msg.into())?;
39 }
40 }
41 }
42
43 if policy.check_files {
44 let (buffer, content_type) = read_file(path, reporter)?;
45 if !explicit && !policy.binary && content_type.is_binary() {
46 let msg = report::BinaryFile { path };
47 reporter.report(msg.into())?;
48 } else {
49 let mut accum_line_num = AccumulateLineNum::new();
50 for typo in check_bytes(&buffer, policy) {
51 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
52 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
53 let msg = report::Typo {
54 context: Some(report::FileContext { path, line_num }.into()),
55 buffer: std::borrow::Cow::Borrowed(line),
56 byte_offset: line_offset,
57 typo: typo.typo.as_ref(),
58 corrections: typo.corrections,
59 };
60 reporter.report(msg.into())?;
61 }
62 }
63 }
64
65 Ok(())
66 }
67}
68
69#[derive(Debug, Clone, Copy)]
70pub struct FixCodetypo;
71
72impl FileChecker for FixCodetypo {
73 fn check_file(
74 &self,
75 path: &std::path::Path,
76 explicit: bool,
77 policy: &crate::policy::Policy<'_, '_, '_>,
78 reporter: &dyn report::Report,
79 ) -> Result<(), std::io::Error> {
80 if policy.check_files {
81 let (buffer, content_type) = read_file(path, reporter)?;
82 if !explicit && !policy.binary && content_type.is_binary() {
83 let msg = report::BinaryFile { path };
84 reporter.report(msg.into())?;
85 } else {
86 let mut fixes = Vec::new();
87 let mut accum_line_num = AccumulateLineNum::new();
88 for typo in check_bytes(&buffer, policy) {
89 if is_fixable(&typo) {
90 fixes.push(typo.into_owned());
91 } else {
92 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
93 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
94 let msg = report::Typo {
95 context: Some(report::FileContext { path, line_num }.into()),
96 buffer: std::borrow::Cow::Borrowed(line),
97 byte_offset: line_offset,
98 typo: typo.typo.as_ref(),
99 corrections: typo.corrections,
100 };
101 reporter.report(msg.into())?;
102 }
103 }
104 if !fixes.is_empty() || path == std::path::Path::new("-") {
105 let buffer = fix_buffer(buffer, fixes.into_iter());
106 write_file(path, content_type, buffer, reporter)?;
107 }
108 }
109 }
110
111 if policy.check_filenames {
113 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
114 let mut fixes = Vec::new();
115 for typo in check_str(file_name, policy) {
116 if is_fixable(&typo) {
117 fixes.push(typo.into_owned());
118 } else {
119 let msg = report::Typo {
120 context: Some(report::PathContext { path }.into()),
121 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
122 byte_offset: typo.byte_offset,
123 typo: typo.typo.as_ref(),
124 corrections: typo.corrections,
125 };
126 reporter.report(msg.into())?;
127 }
128 }
129 if !fixes.is_empty() {
130 let file_name = file_name.to_owned().into_bytes();
131 let new_name = fix_buffer(file_name, fixes.into_iter());
132 let new_name =
133 String::from_utf8(new_name).expect("corrections are valid utf-8");
134 let new_path = path.with_file_name(new_name);
135 std::fs::rename(path, new_path)?;
136 }
137 }
138 }
139
140 Ok(())
141 }
142}
143
144#[derive(Debug, Clone, Copy)]
145pub struct DiffCodetypo;
146
147impl FileChecker for DiffCodetypo {
148 fn check_file(
149 &self,
150 path: &std::path::Path,
151 explicit: bool,
152 policy: &crate::policy::Policy<'_, '_, '_>,
153 reporter: &dyn report::Report,
154 ) -> Result<(), std::io::Error> {
155 let mut content = Vec::new();
156 let mut new_content = Vec::new();
157 if policy.check_files {
158 let (buffer, content_type) = read_file(path, reporter)?;
159 if !explicit && !policy.binary && content_type.is_binary() {
160 let msg = report::BinaryFile { path };
161 reporter.report(msg.into())?;
162 } else {
163 let mut fixes = Vec::new();
164 let mut accum_line_num = AccumulateLineNum::new();
165 for typo in check_bytes(&buffer, policy) {
166 if is_fixable(&typo) {
167 fixes.push(typo.into_owned());
168 } else {
169 let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
170 let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
171 let msg = report::Typo {
172 context: Some(report::FileContext { path, line_num }.into()),
173 buffer: std::borrow::Cow::Borrowed(line),
174 byte_offset: line_offset,
175 typo: typo.typo.as_ref(),
176 corrections: typo.corrections,
177 };
178 reporter.report(msg.into())?;
179 }
180 }
181 if !fixes.is_empty() {
182 new_content = fix_buffer(buffer.clone(), fixes.into_iter());
183 content = buffer;
184 }
185 }
186 }
187
188 let mut new_path = None;
190 if policy.check_filenames {
191 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
192 let mut fixes = Vec::new();
193 for typo in check_str(file_name, policy) {
194 if is_fixable(&typo) {
195 fixes.push(typo.into_owned());
196 } else {
197 let msg = report::Typo {
198 context: Some(report::PathContext { path }.into()),
199 buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
200 byte_offset: typo.byte_offset,
201 typo: typo.typo.as_ref(),
202 corrections: typo.corrections,
203 };
204 reporter.report(msg.into())?;
205 }
206 }
207 if !fixes.is_empty() {
208 let file_name = file_name.to_owned().into_bytes();
209 let new_name = fix_buffer(file_name, fixes.into_iter());
210 let new_name =
211 String::from_utf8(new_name).expect("corrections are valid utf-8");
212 new_path = Some(path.with_file_name(new_name));
213 }
214 }
215 }
216
217 if new_path.is_some() || !content.is_empty() {
218 let original_path = path.display().to_string();
219 let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
220 let original_content: Vec<_> = content
221 .lines_with_terminator()
222 .map(|s| String::from_utf8_lossy(s).into_owned())
223 .collect();
224 let fixed_content: Vec<_> = new_content
225 .lines_with_terminator()
226 .map(|s| String::from_utf8_lossy(s).into_owned())
227 .collect();
228 let diff = difflib::unified_diff(
229 &original_content,
230 &fixed_content,
231 original_path.as_str(),
232 fixed_path.as_str(),
233 "original",
234 "fixed",
235 0,
236 );
237 let stdout = std::io::stdout();
238 let mut handle = stdout.lock();
239 for line in diff {
240 write!(handle, "{line}")?;
241 }
242 }
243
244 Ok(())
245 }
246}
247
248#[derive(Debug, Clone, Copy)]
249pub struct HighlightIdentifiers;
250
251impl FileChecker for HighlightIdentifiers {
252 fn check_file(
253 &self,
254 path: &std::path::Path,
255 explicit: bool,
256 policy: &crate::policy::Policy<'_, '_, '_>,
257 reporter: &dyn report::Report,
258 ) -> Result<(), std::io::Error> {
259 use std::fmt::Write as _;
260
261 let stdout = std::io::stdout();
262 let mut handle = stdout.lock();
263
264 let mut ignores: Option<Ignores> = None;
265 if policy.check_filenames {
266 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
267 let mut styled = String::new();
268 let mut prev_end = 0;
269 for (word, highlight) in policy
270 .tokenizer
271 .parse_str(file_name)
272 .filter(|word| {
273 !ignores
274 .get_or_insert_with(|| {
275 Ignores::new(file_name.as_bytes(), policy.ignore)
276 })
277 .is_ignored(word.span())
278 })
279 .zip(HIGHLIGHTS.iter().cycle())
280 {
281 let start = word.offset();
282 let end = word.offset() + word.token().len();
283 if prev_end != start {
284 let _ = write!(
285 &mut styled,
286 "{UNMATCHED}{}{UNMATCHED:#}",
287 &file_name[prev_end..start]
288 );
289 }
290 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
291 prev_end = end;
292 }
293 let _ = write!(
294 &mut styled,
295 "{UNMATCHED}{}{UNMATCHED:#}",
296 &file_name[prev_end..file_name.len()]
297 );
298
299 let parent_dir = path.parent().unwrap();
300 if !parent_dir.as_os_str().is_empty() {
301 let parent_dir = parent_dir.display();
302 write!(handle, "{UNMATCHED}{parent_dir}/")?;
303 }
304 writeln!(handle, "{styled}{UNMATCHED}:{UNMATCHED:#}")?;
305 } else {
306 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
307 }
308 } else {
309 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
310 }
311
312 if policy.check_files {
313 let (buffer, content_type) = read_file(path, reporter)?;
314 if !explicit && !policy.binary && content_type.is_binary() {
315 } else if let Ok(buffer) = buffer.to_str() {
317 let mut styled = String::new();
318 let mut prev_end = 0;
319 for (word, highlight) in policy
320 .tokenizer
321 .parse_bytes(buffer.as_bytes())
322 .filter(|word| {
323 !ignores
324 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
325 .is_ignored(word.span())
326 })
327 .zip(HIGHLIGHTS.iter().cycle())
328 {
329 let start = word.offset();
330 let end = word.offset() + word.token().len();
331 if prev_end != start {
332 let _ = write!(
333 &mut styled,
334 "{UNMATCHED}{}{UNMATCHED:#}",
335 &buffer[prev_end..start]
336 );
337 }
338 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
339 prev_end = end;
340 }
341 let _ = write!(
342 &mut styled,
343 "{UNMATCHED}{}{UNMATCHED:#}",
344 &buffer[prev_end..buffer.len()]
345 );
346
347 write!(handle, "{styled}")?;
348 }
349 }
350
351 Ok(())
352 }
353}
354
355#[derive(Debug, Clone, Copy)]
356pub struct Identifiers;
357
358impl FileChecker for Identifiers {
359 fn check_file(
360 &self,
361 path: &std::path::Path,
362 explicit: bool,
363 policy: &crate::policy::Policy<'_, '_, '_>,
364 reporter: &dyn report::Report,
365 ) -> Result<(), std::io::Error> {
366 let mut ignores: Option<Ignores> = None;
367 if policy.check_filenames {
368 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
369 for word in policy.tokenizer.parse_str(file_name) {
370 if ignores
371 .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
372 .is_ignored(word.span())
373 {
374 continue;
375 }
376 let msg = report::Parse {
377 context: Some(report::PathContext { path }.into()),
378 kind: report::ParseKind::Identifier,
379 data: word.token(),
380 };
381 reporter.report(msg.into())?;
382 }
383 }
384 }
385
386 if policy.check_files {
387 let (buffer, content_type) = read_file(path, reporter)?;
388 if !explicit && !policy.binary && content_type.is_binary() {
389 let msg = report::BinaryFile { path };
390 reporter.report(msg.into())?;
391 } else {
392 for word in policy.tokenizer.parse_bytes(&buffer) {
393 if ignores
394 .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
395 .is_ignored(word.span())
396 {
397 continue;
398 }
399 let line_num = 0;
403 let msg = report::Parse {
404 context: Some(report::FileContext { path, line_num }.into()),
405 kind: report::ParseKind::Identifier,
406 data: word.token(),
407 };
408 reporter.report(msg.into())?;
409 }
410 }
411 }
412
413 Ok(())
414 }
415}
416
417#[derive(Debug, Clone, Copy)]
418pub struct HighlightWords;
419
420impl FileChecker for HighlightWords {
421 fn check_file(
422 &self,
423 path: &std::path::Path,
424 explicit: bool,
425 policy: &crate::policy::Policy<'_, '_, '_>,
426 reporter: &dyn report::Report,
427 ) -> Result<(), std::io::Error> {
428 use std::fmt::Write as _;
429
430 let stdout = std::io::stdout();
431 let mut handle = stdout.lock();
432
433 let mut ignores: Option<Ignores> = None;
434 if policy.check_filenames {
435 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
436 let mut styled = String::new();
437 let mut prev_end = 0;
438 for (word, highlight) in policy
439 .tokenizer
440 .parse_str(file_name)
441 .flat_map(|i| i.split())
442 .filter(|word| {
443 !ignores
444 .get_or_insert_with(|| {
445 Ignores::new(file_name.as_bytes(), policy.ignore)
446 })
447 .is_ignored(word.span())
448 })
449 .zip(HIGHLIGHTS.iter().cycle())
450 {
451 let start = word.offset();
452 let end = word.offset() + word.token().len();
453 if prev_end != start {
454 let _ = write!(
455 &mut styled,
456 "{UNMATCHED}{}{UNMATCHED:#}",
457 &file_name[prev_end..start]
458 );
459 }
460 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
461 prev_end = end;
462 }
463 let _ = write!(
464 &mut styled,
465 "{UNMATCHED}{}{UNMATCHED:#}",
466 &file_name[prev_end..file_name.len()]
467 );
468
469 let parent_dir = path.parent().unwrap();
470 if !parent_dir.as_os_str().is_empty() {
471 let parent_dir = parent_dir.display();
472 write!(handle, "{UNMATCHED}{parent_dir}/")?;
473 }
474 writeln!(handle, "{styled}{UNMATCHED}:{UNMATCHED:#}")?;
475 } else {
476 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
477 }
478 } else {
479 writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
480 }
481
482 if policy.check_files {
483 let (buffer, content_type) = read_file(path, reporter)?;
484 if !explicit && !policy.binary && content_type.is_binary() {
485 } else if let Ok(buffer) = buffer.to_str() {
487 let mut styled = String::new();
488 let mut prev_end = 0;
489 for (word, highlight) in policy
490 .tokenizer
491 .parse_bytes(buffer.as_bytes())
492 .flat_map(|i| i.split())
493 .filter(|word| {
494 !ignores
495 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
496 .is_ignored(word.span())
497 })
498 .zip(HIGHLIGHTS.iter().cycle())
499 {
500 let start = word.offset();
501 let end = word.offset() + word.token().len();
502 if prev_end != start {
503 let _ = write!(
504 &mut styled,
505 "{UNMATCHED}{}{UNMATCHED:#}",
506 &buffer[prev_end..start]
507 );
508 }
509 let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
510 prev_end = end;
511 }
512 let _ = write!(
513 &mut styled,
514 "{UNMATCHED}{}{UNMATCHED:#}",
515 &buffer[prev_end..buffer.len()]
516 );
517
518 write!(handle, "{styled}")?;
519 }
520 }
521
522 Ok(())
523 }
524}
525
526static HIGHLIGHTS: &[anstyle::Style] = &[
527 anstyle::AnsiColor::Cyan.on_default(),
528 anstyle::AnsiColor::Cyan
529 .on_default()
530 .effects(anstyle::Effects::BOLD),
531];
532
533static UNMATCHED: anstyle::Style = anstyle::Style::new().effects(anstyle::Effects::DIMMED);
534
535#[derive(Debug, Clone, Copy)]
536pub struct Words;
537
538impl FileChecker for Words {
539 fn check_file(
540 &self,
541 path: &std::path::Path,
542 explicit: bool,
543 policy: &crate::policy::Policy<'_, '_, '_>,
544 reporter: &dyn report::Report,
545 ) -> Result<(), std::io::Error> {
546 let mut ignores: Option<Ignores> = None;
547 if policy.check_filenames {
548 if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
549 for word in policy
550 .tokenizer
551 .parse_str(file_name)
552 .flat_map(|i| i.split())
553 {
554 if ignores
555 .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
556 .is_ignored(word.span())
557 {
558 continue;
559 }
560 let msg = report::Parse {
561 context: Some(report::PathContext { path }.into()),
562 kind: report::ParseKind::Word,
563 data: word.token(),
564 };
565 reporter.report(msg.into())?;
566 }
567 }
568 }
569
570 if policy.check_files {
571 let (buffer, content_type) = read_file(path, reporter)?;
572 if !explicit && !policy.binary && content_type.is_binary() {
573 let msg = report::BinaryFile { path };
574 reporter.report(msg.into())?;
575 } else {
576 for word in policy
577 .tokenizer
578 .parse_bytes(&buffer)
579 .flat_map(|i| i.split())
580 {
581 if ignores
582 .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
583 .is_ignored(word.span())
584 {
585 continue;
586 }
587 let line_num = 0;
591 let msg = report::Parse {
592 context: Some(report::FileContext { path, line_num }.into()),
593 kind: report::ParseKind::Word,
594 data: word.token(),
595 };
596 reporter.report(msg.into())?;
597 }
598 }
599 }
600
601 Ok(())
602 }
603}
604
605#[derive(Debug, Clone, Copy)]
606pub struct FileTypes;
607
608impl FileChecker for FileTypes {
609 fn check_file(
610 &self,
611 path: &std::path::Path,
612 explicit: bool,
613 policy: &crate::policy::Policy<'_, '_, '_>,
614 reporter: &dyn report::Report,
615 ) -> Result<(), std::io::Error> {
616 if policy.binary {
618 let msg = report::FileType::new(path, policy.file_type);
619 reporter.report(msg.into())?;
620 } else {
621 let (_buffer, content_type) = read_file(path, reporter)?;
622 if !explicit && content_type.is_binary() {
623 let msg = report::BinaryFile { path };
624 reporter.report(msg.into())?;
625 } else {
626 let msg = report::FileType::new(path, policy.file_type);
627 reporter.report(msg.into())?;
628 }
629 }
630
631 Ok(())
632 }
633}
634
635#[derive(Debug, Clone, Copy)]
636pub struct FoundFiles;
637
638impl FileChecker for FoundFiles {
639 fn check_file(
640 &self,
641 path: &std::path::Path,
642 explicit: bool,
643 policy: &crate::policy::Policy<'_, '_, '_>,
644 reporter: &dyn report::Report,
645 ) -> Result<(), std::io::Error> {
646 if policy.binary {
648 let msg = report::File::new(path);
649 reporter.report(msg.into())?;
650 } else {
651 let (_buffer, content_type) = read_file(path, reporter)?;
652 if !explicit && content_type.is_binary() {
653 let msg = report::BinaryFile { path };
654 reporter.report(msg.into())?;
655 } else {
656 let msg = report::File::new(path);
657 reporter.report(msg.into())?;
658 }
659 }
660
661 Ok(())
662 }
663}
664
665fn read_file(
666 path: &std::path::Path,
667 reporter: &dyn report::Report,
668) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
669 let buffer = if path == std::path::Path::new("-") {
670 let mut buffer = Vec::new();
671 report_result(
672 std::io::stdin().read_to_end(&mut buffer),
673 Some(path),
674 reporter,
675 )?;
676 buffer
677 } else {
678 report_result(std::fs::read(path), Some(path), reporter)?
679 };
680
681 let content_type = content_inspector::inspect(&buffer);
682
683 let (buffer, content_type) = match content_type {
684 content_inspector::ContentType::BINARY |
685 content_inspector::ContentType::UTF_32LE |
687 content_inspector::ContentType::UTF_32BE => {
688 (buffer, content_inspector::ContentType::BINARY)
689 },
690 content_inspector::ContentType::UTF_8 |
691 content_inspector::ContentType::UTF_8_BOM => {
692 (buffer, content_type)
693 },
694 content_inspector::ContentType::UTF_16LE => {
695 let mut decoded = String::with_capacity(buffer.len() * 2);
699 let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
700 let decoded = match r {
701 encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
702 _ => Err(format!("invalid UTF-16LE encoding at byte {written} in {}", path.display())),
703 };
704 let buffer = report_result(decoded, Some(path), reporter)?;
705 (buffer.into_bytes(), content_type)
706 }
707 content_inspector::ContentType::UTF_16BE => {
708 let mut decoded = String::with_capacity(buffer.len() * 2);
712 let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
713 let decoded = match r {
714 encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
715 _ => Err(format!("invalid UTF-16BE encoding at byte {written} in {}", path.display())),
716 };
717 let buffer = report_result(decoded, Some(path), reporter)?;
718 (buffer.into_bytes(), content_type)
719 },
720 };
721
722 Ok((buffer, content_type))
723}
724
725fn write_file(
726 path: &std::path::Path,
727 content_type: content_inspector::ContentType,
728 buffer: Vec<u8>,
729 reporter: &dyn report::Report,
730) -> Result<(), std::io::Error> {
731 let buffer = match content_type {
732 content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
734 unreachable!("read_file should prevent these from being passed along");
735 }
736 content_inspector::ContentType::BINARY
737 | content_inspector::ContentType::UTF_8
738 | content_inspector::ContentType::UTF_8_BOM => buffer,
739 content_inspector::ContentType::UTF_16LE => {
740 let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
741 if buffer.is_empty() {
742 return Ok(());
744 }
745 let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
746 assert!(
747 !replaced,
748 "Coming from UTF-8, UTF-16LE shouldn't do replacements"
749 );
750 encoded.into_owned()
751 }
752 content_inspector::ContentType::UTF_16BE => {
753 let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
754 if buffer.is_empty() {
755 return Ok(());
757 }
758 let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
759 assert!(
760 !replaced,
761 "Coming from UTF-8, UTF-16BE shouldn't do replacements"
762 );
763 encoded.into_owned()
764 }
765 };
766
767 if path == std::path::Path::new("-") {
768 report_result(std::io::stdout().write_all(&buffer), Some(path), reporter)?;
769 } else {
770 report_result(std::fs::write(path, buffer), Some(path), reporter)?;
771 }
772
773 Ok(())
774}
775
776fn check_str<'a>(
777 buffer: &'a str,
778 policy: &'a crate::policy::Policy<'a, 'a, 'a>,
779) -> impl Iterator<Item = codetypo::Typo<'a>> {
780 let mut ignores: Option<Ignores> = None;
781
782 codetypo::check_str(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
783 !ignores
784 .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
785 .is_ignored(typo.span())
786 })
787}
788
789fn check_bytes<'a>(
790 buffer: &'a [u8],
791 policy: &'a crate::policy::Policy<'a, 'a, 'a>,
792) -> impl Iterator<Item = codetypo::Typo<'a>> {
793 let mut ignores: Option<Ignores> = None;
794
795 codetypo::check_bytes(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
796 !ignores
797 .get_or_insert_with(|| Ignores::new(buffer, policy.ignore))
798 .is_ignored(typo.span())
799 })
800}
801
802fn report_result<T: Default, E: ToString>(
803 value: Result<T, E>,
804 path: Option<&std::path::Path>,
805 reporter: &dyn report::Report,
806) -> Result<T, std::io::Error> {
807 let buffer = match value {
808 Ok(value) => value,
809 Err(err) => {
810 report_error(err, path, reporter)?;
811 Default::default()
812 }
813 };
814 Ok(buffer)
815}
816
817fn report_error<E: ToString>(
818 err: E,
819 path: Option<&std::path::Path>,
820 reporter: &dyn report::Report,
821) -> Result<(), std::io::Error> {
822 let mut msg = report::Error::new(err.to_string());
823 msg.context = path.map(|path| report::Context::Path(report::PathContext { path }));
824 reporter.report(msg.into())?;
825 Ok(())
826}
827
828struct AccumulateLineNum {
829 line_num: usize,
830 last_offset: usize,
831}
832
833impl AccumulateLineNum {
834 fn new() -> Self {
835 Self {
836 line_num: 1,
838 last_offset: 0,
839 }
840 }
841
842 fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
843 assert!(self.last_offset <= byte_offset);
844 let slice = &buffer[self.last_offset..byte_offset];
845 let newlines = slice.find_iter(b"\n").count();
846 let line_num = self.line_num + newlines;
847 self.line_num = line_num;
848 self.last_offset = byte_offset;
849 line_num
850 }
851}
852
853fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
854 let line_start = buffer[0..byte_offset]
855 .rfind_byte(b'\n')
856 .map(|s| s + 1)
858 .unwrap_or(0);
859 let line = buffer[line_start..]
860 .lines()
861 .next()
862 .expect("should always be at least a line");
863 let line_offset = byte_offset - line_start;
864 (line, line_offset)
865}
866
867fn extract_fix<'t>(typo: &'t codetypo::Typo<'t>) -> Option<&'t str> {
868 match &typo.corrections {
869 codetypo::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
870 _ => None,
871 }
872}
873
874fn is_fixable(typo: &codetypo::Typo<'_>) -> bool {
875 extract_fix(typo).is_some()
876}
877
878fn fix_buffer(
879 mut buffer: Vec<u8>,
880 codetypo: impl Iterator<Item = codetypo::Typo<'static>>,
881) -> Vec<u8> {
882 let mut offset = 0isize;
883 for typo in codetypo {
884 let fix = extract_fix(&typo).expect("Caller only provides fixable codetypo");
885 let start = ((typo.byte_offset as isize) + offset) as usize;
886 let end = start + typo.typo.len();
887
888 buffer.splice(start..end, fix.as_bytes().iter().copied());
889
890 offset += (fix.len() as isize) - (typo.typo.len() as isize);
891 }
892 buffer
893}
894
895pub fn walk_path(
896 walk: ignore::Walk,
897 checks: &dyn FileChecker,
898 engine: &crate::policy::ConfigEngine<'_>,
899 reporter: &dyn report::Report,
900 force_exclude: bool,
901) -> Result<(), ignore::Error> {
902 for entry in walk {
903 walk_entry(entry, checks, engine, reporter, force_exclude)?;
904 }
905 Ok(())
906}
907
908pub fn walk_path_parallel(
909 walk: ignore::WalkParallel,
910 checks: &dyn FileChecker,
911 engine: &crate::policy::ConfigEngine<'_>,
912 reporter: &dyn report::Report,
913 force_exclude: bool,
914) -> Result<(), ignore::Error> {
915 let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
916 walk.run(|| {
917 Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
918 match walk_entry(entry, checks, engine, reporter, force_exclude) {
919 Ok(()) => ignore::WalkState::Continue,
920 Err(err) => {
921 *error.lock().unwrap() = Err(err);
922 ignore::WalkState::Quit
923 }
924 }
925 })
926 });
927
928 error.into_inner().unwrap()
929}
930
931fn walk_entry(
932 entry: Result<ignore::DirEntry, ignore::Error>,
933 checks: &dyn FileChecker,
934 engine: &crate::policy::ConfigEngine<'_>,
935 reporter: &dyn report::Report,
936 force_exclude: bool,
937) -> Result<(), ignore::Error> {
938 let entry = match entry {
939 Ok(entry) => entry,
940 Err(err) => {
941 report_error(err, None, reporter)?;
942 return Ok(());
943 }
944 };
945 if crate::config::SUPPORTED_FILE_NAMES
946 .iter()
947 .any(|n| *n == entry.file_name())
948 {
949 log::debug!(
950 "{}: skipping potential config file as it may have codetypo",
951 entry.path().display()
952 );
953 return Ok(());
954 }
955 if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
956 let explicit = entry.depth() == 0 && !force_exclude;
957 let (path, lookup_path) = if entry.is_stdin() {
958 let path = std::path::Path::new("-");
959 let cwd = std::env::current_dir().map_err(|err| {
960 let kind = err.kind();
961 std::io::Error::new(kind, "no current working directory".to_owned())
962 })?;
963 (path, cwd)
964 } else {
965 let path = entry.path();
966 let abs_path = report_result(path.canonicalize(), Some(path), reporter)?;
967 (path, abs_path)
968 };
969 let policy = engine.policy(&lookup_path);
970 checks.check_file(path, explicit, &policy, reporter)?;
971 }
972
973 Ok(())
974}
975
976#[derive(Clone, Debug)]
977struct Ignores {
978 blocks: Vec<std::ops::Range<usize>>,
979}
980
981impl Ignores {
982 fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
983 let mut blocks = Vec::new();
984 if let Ok(content) = std::str::from_utf8(content) {
985 for ignore in ignores {
986 for mat in ignore.find_iter(content) {
987 blocks.push(mat.range());
988 }
989 }
990 }
991 Self { blocks }
992 }
993
994 fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
995 let start = span.start;
996 let end = span.end.saturating_sub(1);
997 self.blocks
998 .iter()
999 .any(|block| block.contains(&start) || block.contains(&end))
1000 }
1001}
1002
1003#[cfg(test)]
1004mod test {
1005 use super::*;
1006
1007 fn fix_simple(line: &str, corrections: Vec<(usize, &'static str, &'static str)>) -> String {
1008 let line = line.as_bytes().to_vec();
1009 let corrections = corrections
1010 .into_iter()
1011 .map(|(byte_offset, typo, correction)| codetypo::Typo {
1012 byte_offset,
1013 typo: typo.into(),
1014 corrections: codetypo::Status::Corrections(vec![correction.into()]),
1015 });
1016 let actual = fix_buffer(line, corrections);
1017 String::from_utf8(actual).unwrap()
1018 }
1019
1020 #[test]
1021 fn test_fix_buffer_single() {
1022 let actual = fix_simple("foo foo foo", vec![(4, "foo", "bar")]);
1023 assert_eq!(actual, "foo bar foo");
1024 }
1025
1026 #[test]
1027 fn test_fix_buffer_single_grow() {
1028 let actual = fix_simple("foo foo foo", vec![(4, "foo", "happy")]);
1029 assert_eq!(actual, "foo happy foo");
1030 }
1031
1032 #[test]
1033 fn test_fix_buffer_single_shrink() {
1034 let actual = fix_simple("foo foo foo", vec![(4, "foo", "if")]);
1035 assert_eq!(actual, "foo if foo");
1036 }
1037
1038 #[test]
1039 fn test_fix_buffer_start() {
1040 let actual = fix_simple("foo foo foo", vec![(0, "foo", "bar")]);
1041 assert_eq!(actual, "bar foo foo");
1042 }
1043
1044 #[test]
1045 fn test_fix_buffer_end() {
1046 let actual = fix_simple("foo foo foo", vec![(8, "foo", "bar")]);
1047 assert_eq!(actual, "foo foo bar");
1048 }
1049
1050 #[test]
1051 fn test_fix_buffer_end_grow() {
1052 let actual = fix_simple("foo foo foo", vec![(8, "foo", "happy")]);
1053 assert_eq!(actual, "foo foo happy");
1054 }
1055
1056 #[test]
1057 fn test_fix_buffer_multiple() {
1058 let actual = fix_simple(
1059 "foo foo foo",
1060 vec![(4, "foo", "happy"), (8, "foo", "world")],
1061 );
1062 assert_eq!(actual, "foo happy world");
1063 }
1064
1065 #[test]
1066 fn test_line_count_first() {
1067 let mut accum_line_num = AccumulateLineNum::new();
1068 let line_num = accum_line_num.line_num(b"hello world", 6);
1069 assert_eq!(line_num, 1);
1070 }
1071
1072 #[test]
1073 fn test_line_count_second() {
1074 let mut accum_line_num = AccumulateLineNum::new();
1075 let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
1076 assert_eq!(line_num, 2);
1077 }
1078
1079 #[test]
1080 fn test_line_count_multiple() {
1081 let mut accum_line_num = AccumulateLineNum::new();
1082 let line_num = accum_line_num.line_num(b"1\n2\n3", 0);
1083 assert_eq!(line_num, 1);
1084 let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
1085 assert_eq!(line_num, 2);
1086 let line_num = accum_line_num.line_num(b"1\n2\n3", 4);
1087 assert_eq!(line_num, 3);
1088 }
1089
1090 #[test]
1091 fn test_extract_line_single_line() {
1092 let buffer = b"hello world";
1093 let buffer_offset = 6;
1094 let expected_line = b"hello world";
1095 let (line, offset) = extract_line(buffer, buffer_offset);
1096 assert_eq!(line, expected_line);
1097 assert_eq!(offset, 6);
1098 assert_eq!(line[offset], buffer[buffer_offset]);
1099 }
1100
1101 #[test]
1102 fn test_extract_line_first() {
1103 let buffer = b"1\n2\n3";
1104 let buffer_offset = 0;
1105 let expected_line = b"1";
1106 let (line, offset) = extract_line(buffer, buffer_offset);
1107 assert_eq!(line, expected_line);
1108 assert_eq!(offset, 0);
1109 assert_eq!(line[offset], buffer[buffer_offset]);
1110 }
1111
1112 #[test]
1113 fn test_extract_line_middle() {
1114 let buffer = b"1\n2\n3";
1115 let buffer_offset = 2;
1116 let expected_line = b"2";
1117 let (line, offset) = extract_line(buffer, buffer_offset);
1118 assert_eq!(line, expected_line);
1119 assert_eq!(offset, 0);
1120 assert_eq!(line[offset], buffer[buffer_offset]);
1121 }
1122
1123 #[test]
1124 fn test_extract_line_end() {
1125 let buffer = b"1\n2\n3";
1126 let buffer_offset = 4;
1127 let expected_line = b"3";
1128 let (line, offset) = extract_line(buffer, buffer_offset);
1129 assert_eq!(line, expected_line);
1130 assert_eq!(offset, 0);
1131 assert_eq!(line[offset], buffer[buffer_offset]);
1132 }
1133
1134 #[test]
1135 fn test_extract_line_offset_change() {
1136 let buffer = b"1\nhello world\n2";
1137 let buffer_offset = 8;
1138 let expected_line = b"hello world";
1139 let (line, offset) = extract_line(buffer, buffer_offset);
1140 assert_eq!(line, expected_line);
1141 assert_eq!(offset, 6);
1142 assert_eq!(line[offset], buffer[buffer_offset]);
1143 }
1144
1145 #[test]
1146 fn test_extract_line_windows() {
1147 let buffer = b"1\r\nhello world\r\n2";
1148 let buffer_offset = 9;
1149 let expected_line = b"hello world";
1150 let (line, offset) = extract_line(buffer, buffer_offset);
1151 assert_eq!(line, expected_line);
1152 assert_eq!(offset, 6);
1153 assert_eq!(line[offset], buffer[buffer_offset]);
1154 }
1155
1156 #[test]
1157 fn test_extract_line_slovak() {
1158 let buffer = b"LastErrorMessage=%1.%n%nChyba %2: %3\r\nSetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.\r\nSetupFileCorrupt=S\xFAbory sprievodcu in\x9Atal\xE1ciou s\xFA po\x9Akoden\xE9. Zaobstarajte si, pros\xEDm, nov\xFA k\xF3piu tohto produktu.";
1159 let buffer_offset = 66;
1160 let expected_line = b"SetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.";
1161 let (line, offset) = extract_line(buffer, buffer_offset);
1162 assert_eq!(line, expected_line);
1163 assert_eq!(offset, 28);
1164 assert_eq!(line[offset], buffer[buffer_offset]);
1165 }
1166}