typos_cli/
file.rs

1use bstr::ByteSlice;
2use std::io::Read;
3use std::io::Write;
4
5use crate::report;
6
7pub trait FileChecker: Send + Sync {
8    fn check_file(
9        &self,
10        path: &std::path::Path,
11        explicit: bool,
12        policy: &crate::policy::Policy<'_, '_, '_>,
13        reporter: &dyn report::Report,
14    ) -> Result<(), std::io::Error>;
15}
16
17#[derive(Debug, Clone, Copy)]
18pub struct Typos;
19
20impl FileChecker for Typos {
21    fn check_file(
22        &self,
23        path: &std::path::Path,
24        explicit: bool,
25        policy: &crate::policy::Policy<'_, '_, '_>,
26        reporter: &dyn report::Report,
27    ) -> Result<(), std::io::Error> {
28        if policy.check_filenames {
29            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
30                for typo in check_str(file_name, policy) {
31                    let msg = report::Typo {
32                        context: Some(report::PathContext { path }.into()),
33                        buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
34                        byte_offset: typo.byte_offset,
35                        typo: typo.typo.as_ref(),
36                        corrections: typo.corrections,
37                    };
38                    reporter.report(msg.into())?;
39                }
40            }
41        }
42
43        if policy.check_files {
44            let (buffer, content_type) = read_file(path, reporter)?;
45            if !explicit && !policy.binary && content_type.is_binary() {
46                let msg = report::BinaryFile { path };
47                reporter.report(msg.into())?;
48            } else {
49                let mut accum_line_num = AccumulateLineNum::new();
50                for typo in check_bytes(&buffer, policy) {
51                    let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
52                    let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
53                    let msg = report::Typo {
54                        context: Some(report::FileContext { path, line_num }.into()),
55                        buffer: std::borrow::Cow::Borrowed(line),
56                        byte_offset: line_offset,
57                        typo: typo.typo.as_ref(),
58                        corrections: typo.corrections,
59                    };
60                    reporter.report(msg.into())?;
61                }
62            }
63        }
64
65        Ok(())
66    }
67}
68
69#[derive(Debug, Clone, Copy)]
70pub struct FixTypos;
71
72impl FileChecker for FixTypos {
73    fn check_file(
74        &self,
75        path: &std::path::Path,
76        explicit: bool,
77        policy: &crate::policy::Policy<'_, '_, '_>,
78        reporter: &dyn report::Report,
79    ) -> Result<(), std::io::Error> {
80        if policy.check_files {
81            let (buffer, content_type) = read_file(path, reporter)?;
82            if !explicit && !policy.binary && content_type.is_binary() {
83                let msg = report::BinaryFile { path };
84                reporter.report(msg.into())?;
85            } else {
86                let mut fixes = Vec::new();
87                let mut accum_line_num = AccumulateLineNum::new();
88                for typo in check_bytes(&buffer, policy) {
89                    if is_fixable(&typo) {
90                        fixes.push(typo.into_owned());
91                    } else {
92                        let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
93                        let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
94                        let msg = report::Typo {
95                            context: Some(report::FileContext { path, line_num }.into()),
96                            buffer: std::borrow::Cow::Borrowed(line),
97                            byte_offset: line_offset,
98                            typo: typo.typo.as_ref(),
99                            corrections: typo.corrections,
100                        };
101                        reporter.report(msg.into())?;
102                    }
103                }
104                if !fixes.is_empty() || path == std::path::Path::new("-") {
105                    let buffer = fix_buffer(buffer, fixes.into_iter());
106                    write_file(path, content_type, buffer, reporter)?;
107                }
108            }
109        }
110
111        // Ensure the above write can happen before renaming the file.
112        if policy.check_filenames {
113            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
114                let mut fixes = Vec::new();
115                for typo in check_str(file_name, policy) {
116                    if is_fixable(&typo) {
117                        fixes.push(typo.into_owned());
118                    } else {
119                        let msg = report::Typo {
120                            context: Some(report::PathContext { path }.into()),
121                            buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
122                            byte_offset: typo.byte_offset,
123                            typo: typo.typo.as_ref(),
124                            corrections: typo.corrections,
125                        };
126                        reporter.report(msg.into())?;
127                    }
128                }
129                if !fixes.is_empty() {
130                    let file_name = file_name.to_owned().into_bytes();
131                    let new_name = fix_buffer(file_name, fixes.into_iter());
132                    let new_name =
133                        String::from_utf8(new_name).expect("corrections are valid utf-8");
134                    let new_path = path.with_file_name(new_name);
135                    std::fs::rename(path, new_path)?;
136                }
137            }
138        }
139
140        Ok(())
141    }
142}
143
144#[derive(Debug, Clone, Copy)]
145pub struct DiffTypos;
146
147impl FileChecker for DiffTypos {
148    fn check_file(
149        &self,
150        path: &std::path::Path,
151        explicit: bool,
152        policy: &crate::policy::Policy<'_, '_, '_>,
153        reporter: &dyn report::Report,
154    ) -> Result<(), std::io::Error> {
155        let mut content = Vec::new();
156        let mut new_content = Vec::new();
157        if policy.check_files {
158            let (buffer, content_type) = read_file(path, reporter)?;
159            if !explicit && !policy.binary && content_type.is_binary() {
160                let msg = report::BinaryFile { path };
161                reporter.report(msg.into())?;
162            } else {
163                let mut fixes = Vec::new();
164                let mut accum_line_num = AccumulateLineNum::new();
165                for typo in check_bytes(&buffer, policy) {
166                    if is_fixable(&typo) {
167                        fixes.push(typo.into_owned());
168                    } else {
169                        let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
170                        let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
171                        let msg = report::Typo {
172                            context: Some(report::FileContext { path, line_num }.into()),
173                            buffer: std::borrow::Cow::Borrowed(line),
174                            byte_offset: line_offset,
175                            typo: typo.typo.as_ref(),
176                            corrections: typo.corrections,
177                        };
178                        reporter.report(msg.into())?;
179                    }
180                }
181                if !fixes.is_empty() {
182                    new_content = fix_buffer(buffer.clone(), fixes.into_iter());
183                    content = buffer;
184                }
185            }
186        }
187
188        // Match FixTypos ordering for easy diffing.
189        let mut new_path = None;
190        if policy.check_filenames {
191            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
192                let mut fixes = Vec::new();
193                for typo in check_str(file_name, policy) {
194                    if is_fixable(&typo) {
195                        fixes.push(typo.into_owned());
196                    } else {
197                        let msg = report::Typo {
198                            context: Some(report::PathContext { path }.into()),
199                            buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
200                            byte_offset: typo.byte_offset,
201                            typo: typo.typo.as_ref(),
202                            corrections: typo.corrections,
203                        };
204                        reporter.report(msg.into())?;
205                    }
206                }
207                if !fixes.is_empty() {
208                    let file_name = file_name.to_owned().into_bytes();
209                    let new_name = fix_buffer(file_name, fixes.into_iter());
210                    let new_name =
211                        String::from_utf8(new_name).expect("corrections are valid utf-8");
212                    new_path = Some(path.with_file_name(new_name));
213                }
214            }
215        }
216
217        if new_path.is_some() || !content.is_empty() {
218            let original_path = path.display().to_string();
219            let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
220            let original_content: Vec<_> = content
221                .lines_with_terminator()
222                .map(|s| String::from_utf8_lossy(s).into_owned())
223                .collect();
224            let fixed_content: Vec<_> = new_content
225                .lines_with_terminator()
226                .map(|s| String::from_utf8_lossy(s).into_owned())
227                .collect();
228            let diff = difflib::unified_diff(
229                &original_content,
230                &fixed_content,
231                original_path.as_str(),
232                fixed_path.as_str(),
233                "original",
234                "fixed",
235                0,
236            );
237            let stdout = std::io::stdout();
238            let mut handle = stdout.lock();
239            for line in diff {
240                write!(handle, "{line}")?;
241            }
242        }
243
244        Ok(())
245    }
246}
247
248#[derive(Debug, Clone, Copy)]
249pub struct HighlightIdentifiers;
250
251impl FileChecker for HighlightIdentifiers {
252    fn check_file(
253        &self,
254        path: &std::path::Path,
255        explicit: bool,
256        policy: &crate::policy::Policy<'_, '_, '_>,
257        reporter: &dyn report::Report,
258    ) -> Result<(), std::io::Error> {
259        use std::fmt::Write as _;
260
261        let stdout = std::io::stdout();
262        let mut handle = stdout.lock();
263
264        let mut ignores: Option<Ignores> = None;
265        if policy.check_filenames {
266            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
267                let mut styled = String::new();
268                let mut prev_end = 0;
269                for (word, highlight) in policy
270                    .tokenizer
271                    .parse_str(file_name)
272                    .filter(|word| {
273                        !ignores
274                            .get_or_insert_with(|| {
275                                Ignores::new(file_name.as_bytes(), policy.ignore)
276                            })
277                            .is_ignored(word.span())
278                    })
279                    .zip(HIGHLIGHTS.iter().cycle())
280                {
281                    let start = word.offset();
282                    let end = word.offset() + word.token().len();
283                    if prev_end != start {
284                        let _ = write!(
285                            &mut styled,
286                            "{UNMATCHED}{}{UNMATCHED:#}",
287                            &file_name[prev_end..start]
288                        );
289                    }
290                    let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
291                    prev_end = end;
292                }
293                let _ = write!(
294                    &mut styled,
295                    "{UNMATCHED}{}{UNMATCHED:#}",
296                    &file_name[prev_end..file_name.len()]
297                );
298
299                let parent_dir = path.parent().unwrap();
300                if !parent_dir.as_os_str().is_empty() {
301                    let parent_dir = parent_dir.display();
302                    write!(handle, "{UNMATCHED}{parent_dir}/")?;
303                }
304                writeln!(handle, "{styled}{UNMATCHED}:{UNMATCHED:#}")?;
305            } else {
306                writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
307            }
308        } else {
309            writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
310        }
311
312        if policy.check_files {
313            let (buffer, content_type) = read_file(path, reporter)?;
314            if !explicit && !policy.binary && content_type.is_binary() {
315                // nop
316            } else if let Ok(buffer) = buffer.to_str() {
317                let mut styled = String::new();
318                let mut prev_end = 0;
319                for (word, highlight) in policy
320                    .tokenizer
321                    .parse_bytes(buffer.as_bytes())
322                    .filter(|word| {
323                        !ignores
324                            .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
325                            .is_ignored(word.span())
326                    })
327                    .zip(HIGHLIGHTS.iter().cycle())
328                {
329                    let start = word.offset();
330                    let end = word.offset() + word.token().len();
331                    if prev_end != start {
332                        let _ = write!(
333                            &mut styled,
334                            "{UNMATCHED}{}{UNMATCHED:#}",
335                            &buffer[prev_end..start]
336                        );
337                    }
338                    let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
339                    prev_end = end;
340                }
341                let _ = write!(
342                    &mut styled,
343                    "{UNMATCHED}{}{UNMATCHED:#}",
344                    &buffer[prev_end..buffer.len()]
345                );
346
347                write!(handle, "{styled}")?;
348            }
349        }
350
351        Ok(())
352    }
353}
354
355#[derive(Debug, Clone, Copy)]
356pub struct Identifiers;
357
358impl FileChecker for Identifiers {
359    fn check_file(
360        &self,
361        path: &std::path::Path,
362        explicit: bool,
363        policy: &crate::policy::Policy<'_, '_, '_>,
364        reporter: &dyn report::Report,
365    ) -> Result<(), std::io::Error> {
366        let mut ignores: Option<Ignores> = None;
367        if policy.check_filenames {
368            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
369                for word in policy.tokenizer.parse_str(file_name) {
370                    if ignores
371                        .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
372                        .is_ignored(word.span())
373                    {
374                        continue;
375                    }
376                    let msg = report::Parse {
377                        context: Some(report::PathContext { path }.into()),
378                        kind: report::ParseKind::Identifier,
379                        data: word.token(),
380                    };
381                    reporter.report(msg.into())?;
382                }
383            }
384        }
385
386        if policy.check_files {
387            let (buffer, content_type) = read_file(path, reporter)?;
388            if !explicit && !policy.binary && content_type.is_binary() {
389                let msg = report::BinaryFile { path };
390                reporter.report(msg.into())?;
391            } else {
392                for word in policy.tokenizer.parse_bytes(&buffer) {
393                    if ignores
394                        .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
395                        .is_ignored(word.span())
396                    {
397                        continue;
398                    }
399                    // HACK: Don't look up the line_num per entry to better match the performance
400                    // of Typos for comparison purposes.  We don't really get much out of it
401                    // anyway.
402                    let line_num = 0;
403                    let msg = report::Parse {
404                        context: Some(report::FileContext { path, line_num }.into()),
405                        kind: report::ParseKind::Identifier,
406                        data: word.token(),
407                    };
408                    reporter.report(msg.into())?;
409                }
410            }
411        }
412
413        Ok(())
414    }
415}
416
417#[derive(Debug, Clone, Copy)]
418pub struct HighlightWords;
419
420impl FileChecker for HighlightWords {
421    fn check_file(
422        &self,
423        path: &std::path::Path,
424        explicit: bool,
425        policy: &crate::policy::Policy<'_, '_, '_>,
426        reporter: &dyn report::Report,
427    ) -> Result<(), std::io::Error> {
428        use std::fmt::Write as _;
429
430        let stdout = std::io::stdout();
431        let mut handle = stdout.lock();
432
433        let mut ignores: Option<Ignores> = None;
434        if policy.check_filenames {
435            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
436                let mut styled = String::new();
437                let mut prev_end = 0;
438                for (word, highlight) in policy
439                    .tokenizer
440                    .parse_str(file_name)
441                    .flat_map(|i| i.split())
442                    .filter(|word| {
443                        !ignores
444                            .get_or_insert_with(|| {
445                                Ignores::new(file_name.as_bytes(), policy.ignore)
446                            })
447                            .is_ignored(word.span())
448                    })
449                    .zip(HIGHLIGHTS.iter().cycle())
450                {
451                    let start = word.offset();
452                    let end = word.offset() + word.token().len();
453                    if prev_end != start {
454                        let _ = write!(
455                            &mut styled,
456                            "{UNMATCHED}{}{UNMATCHED:#}",
457                            &file_name[prev_end..start]
458                        );
459                    }
460                    let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
461                    prev_end = end;
462                }
463                let _ = write!(
464                    &mut styled,
465                    "{UNMATCHED}{}{UNMATCHED:#}",
466                    &file_name[prev_end..file_name.len()]
467                );
468
469                let parent_dir = path.parent().unwrap();
470                if !parent_dir.as_os_str().is_empty() {
471                    let parent_dir = parent_dir.display();
472                    write!(handle, "{UNMATCHED}{parent_dir}/")?;
473                }
474                writeln!(handle, "{styled}{UNMATCHED}:{UNMATCHED:#}")?;
475            } else {
476                writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
477            }
478        } else {
479            writeln!(handle, "{UNMATCHED}{}:{UNMATCHED:#}", path.display())?;
480        }
481
482        if policy.check_files {
483            let (buffer, content_type) = read_file(path, reporter)?;
484            if !explicit && !policy.binary && content_type.is_binary() {
485                // nop
486            } else if let Ok(buffer) = buffer.to_str() {
487                let mut styled = String::new();
488                let mut prev_end = 0;
489                for (word, highlight) in policy
490                    .tokenizer
491                    .parse_bytes(buffer.as_bytes())
492                    .flat_map(|i| i.split())
493                    .filter(|word| {
494                        !ignores
495                            .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
496                            .is_ignored(word.span())
497                    })
498                    .zip(HIGHLIGHTS.iter().cycle())
499                {
500                    let start = word.offset();
501                    let end = word.offset() + word.token().len();
502                    if prev_end != start {
503                        let _ = write!(
504                            &mut styled,
505                            "{UNMATCHED}{}{UNMATCHED:#}",
506                            &buffer[prev_end..start]
507                        );
508                    }
509                    let _ = write!(&mut styled, "{highlight}{}{highlight:#}", word.token());
510                    prev_end = end;
511                }
512                let _ = write!(
513                    &mut styled,
514                    "{UNMATCHED}{}{UNMATCHED:#}",
515                    &buffer[prev_end..buffer.len()]
516                );
517
518                write!(handle, "{styled}")?;
519            }
520        }
521
522        Ok(())
523    }
524}
525
526static HIGHLIGHTS: &[anstyle::Style] = &[
527    anstyle::AnsiColor::Cyan.on_default(),
528    anstyle::AnsiColor::Cyan
529        .on_default()
530        .effects(anstyle::Effects::BOLD),
531];
532
533static UNMATCHED: anstyle::Style = anstyle::Style::new().effects(anstyle::Effects::DIMMED);
534
535#[derive(Debug, Clone, Copy)]
536pub struct Words;
537
538impl FileChecker for Words {
539    fn check_file(
540        &self,
541        path: &std::path::Path,
542        explicit: bool,
543        policy: &crate::policy::Policy<'_, '_, '_>,
544        reporter: &dyn report::Report,
545    ) -> Result<(), std::io::Error> {
546        let mut ignores: Option<Ignores> = None;
547        if policy.check_filenames {
548            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
549                for word in policy
550                    .tokenizer
551                    .parse_str(file_name)
552                    .flat_map(|i| i.split())
553                {
554                    if ignores
555                        .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
556                        .is_ignored(word.span())
557                    {
558                        continue;
559                    }
560                    let msg = report::Parse {
561                        context: Some(report::PathContext { path }.into()),
562                        kind: report::ParseKind::Word,
563                        data: word.token(),
564                    };
565                    reporter.report(msg.into())?;
566                }
567            }
568        }
569
570        if policy.check_files {
571            let (buffer, content_type) = read_file(path, reporter)?;
572            if !explicit && !policy.binary && content_type.is_binary() {
573                let msg = report::BinaryFile { path };
574                reporter.report(msg.into())?;
575            } else {
576                for word in policy
577                    .tokenizer
578                    .parse_bytes(&buffer)
579                    .flat_map(|i| i.split())
580                {
581                    if ignores
582                        .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
583                        .is_ignored(word.span())
584                    {
585                        continue;
586                    }
587                    // HACK: Don't look up the line_num per entry to better match the performance
588                    // of Typos for comparison purposes.  We don't really get much out of it
589                    // anyway.
590                    let line_num = 0;
591                    let msg = report::Parse {
592                        context: Some(report::FileContext { path, line_num }.into()),
593                        kind: report::ParseKind::Word,
594                        data: word.token(),
595                    };
596                    reporter.report(msg.into())?;
597                }
598            }
599        }
600
601        Ok(())
602    }
603}
604
605#[derive(Debug, Clone, Copy)]
606pub struct FileTypes;
607
608impl FileChecker for FileTypes {
609    fn check_file(
610        &self,
611        path: &std::path::Path,
612        explicit: bool,
613        policy: &crate::policy::Policy<'_, '_, '_>,
614        reporter: &dyn report::Report,
615    ) -> Result<(), std::io::Error> {
616        // Check `policy.binary` first so we can easily check performance of walking vs reading
617        if policy.binary {
618            let msg = report::FileType::new(path, policy.file_type);
619            reporter.report(msg.into())?;
620        } else {
621            let (_buffer, content_type) = read_file(path, reporter)?;
622            if !explicit && content_type.is_binary() {
623                let msg = report::BinaryFile { path };
624                reporter.report(msg.into())?;
625            } else {
626                let msg = report::FileType::new(path, policy.file_type);
627                reporter.report(msg.into())?;
628            }
629        }
630
631        Ok(())
632    }
633}
634
635#[derive(Debug, Clone, Copy)]
636pub struct FoundFiles;
637
638impl FileChecker for FoundFiles {
639    fn check_file(
640        &self,
641        path: &std::path::Path,
642        explicit: bool,
643        policy: &crate::policy::Policy<'_, '_, '_>,
644        reporter: &dyn report::Report,
645    ) -> Result<(), std::io::Error> {
646        // Check `policy.binary` first so we can easily check performance of walking vs reading
647        if policy.binary {
648            let msg = report::File::new(path);
649            reporter.report(msg.into())?;
650        } else {
651            let (_buffer, content_type) = read_file(path, reporter)?;
652            if !explicit && content_type.is_binary() {
653                let msg = report::BinaryFile { path };
654                reporter.report(msg.into())?;
655            } else {
656                let msg = report::File::new(path);
657                reporter.report(msg.into())?;
658            }
659        }
660
661        Ok(())
662    }
663}
664
665fn read_file(
666    path: &std::path::Path,
667    reporter: &dyn report::Report,
668) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
669    let buffer = if path == std::path::Path::new("-") {
670        let mut buffer = Vec::new();
671        report_result(
672            std::io::stdin().read_to_end(&mut buffer),
673            Some(path),
674            reporter,
675        )?;
676        buffer
677    } else {
678        report_result(std::fs::read(path), Some(path), reporter)?
679    };
680
681    let content_type = content_inspector::inspect(&buffer);
682
683    let (buffer, content_type) = match content_type {
684        content_inspector::ContentType::BINARY |
685        // HACK: We don't support UTF-32 yet
686        content_inspector::ContentType::UTF_32LE |
687        content_inspector::ContentType::UTF_32BE => {
688            (buffer, content_inspector::ContentType::BINARY)
689        },
690        content_inspector::ContentType::UTF_8 |
691        content_inspector::ContentType::UTF_8_BOM => {
692            (buffer, content_type)
693        },
694        content_inspector::ContentType::UTF_16LE => {
695            // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
696            // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
697            // a buffer twice its size
698            let mut decoded = String::with_capacity(buffer.len() * 2);
699            let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
700            let decoded = match r {
701                encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
702                _ => Err(format!("invalid UTF-16LE encoding at byte {written} in {}", path.display())),
703            };
704            let buffer = report_result(decoded, Some(path), reporter)?;
705            (buffer.into_bytes(), content_type)
706        }
707        content_inspector::ContentType::UTF_16BE => {
708            // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
709            // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
710            // a buffer twice its size
711            let mut decoded = String::with_capacity(buffer.len() * 2);
712            let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
713            let decoded = match r {
714                encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
715                _ => Err(format!("invalid UTF-16BE encoding at byte {written} in {}", path.display())),
716            };
717            let buffer = report_result(decoded, Some(path), reporter)?;
718            (buffer.into_bytes(), content_type)
719        },
720    };
721
722    Ok((buffer, content_type))
723}
724
725fn write_file(
726    path: &std::path::Path,
727    content_type: content_inspector::ContentType,
728    buffer: Vec<u8>,
729    reporter: &dyn report::Report,
730) -> Result<(), std::io::Error> {
731    let buffer = match content_type {
732        // HACK: We don't support UTF-32 yet
733        content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
734            unreachable!("read_file should prevent these from being passed along");
735        }
736        content_inspector::ContentType::BINARY
737        | content_inspector::ContentType::UTF_8
738        | content_inspector::ContentType::UTF_8_BOM => buffer,
739        content_inspector::ContentType::UTF_16LE => {
740            let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
741            if buffer.is_empty() {
742                // Error occurred, don't clear out the file
743                return Ok(());
744            }
745            let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
746            assert!(
747                !replaced,
748                "Coming from UTF-8, UTF-16LE shouldn't do replacements"
749            );
750            encoded.into_owned()
751        }
752        content_inspector::ContentType::UTF_16BE => {
753            let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
754            if buffer.is_empty() {
755                // Error occurred, don't clear out the file
756                return Ok(());
757            }
758            let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
759            assert!(
760                !replaced,
761                "Coming from UTF-8, UTF-16BE shouldn't do replacements"
762            );
763            encoded.into_owned()
764        }
765    };
766
767    if path == std::path::Path::new("-") {
768        report_result(std::io::stdout().write_all(&buffer), Some(path), reporter)?;
769    } else {
770        report_result(std::fs::write(path, buffer), Some(path), reporter)?;
771    }
772
773    Ok(())
774}
775
776fn check_str<'a>(
777    buffer: &'a str,
778    policy: &'a crate::policy::Policy<'a, 'a, 'a>,
779) -> impl Iterator<Item = typos::Typo<'a>> {
780    let mut ignores: Option<Ignores> = None;
781
782    typos::check_str(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
783        !ignores
784            .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
785            .is_ignored(typo.span())
786    })
787}
788
789fn check_bytes<'a>(
790    buffer: &'a [u8],
791    policy: &'a crate::policy::Policy<'a, 'a, 'a>,
792) -> impl Iterator<Item = typos::Typo<'a>> {
793    let mut ignores: Option<Ignores> = None;
794
795    typos::check_bytes(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
796        !ignores
797            .get_or_insert_with(|| Ignores::new(buffer, policy.ignore))
798            .is_ignored(typo.span())
799    })
800}
801
802fn report_result<T: Default, E: ToString>(
803    value: Result<T, E>,
804    path: Option<&std::path::Path>,
805    reporter: &dyn report::Report,
806) -> Result<T, std::io::Error> {
807    let buffer = match value {
808        Ok(value) => value,
809        Err(err) => {
810            report_error(err, path, reporter)?;
811            Default::default()
812        }
813    };
814    Ok(buffer)
815}
816
817fn report_error<E: ToString>(
818    err: E,
819    path: Option<&std::path::Path>,
820    reporter: &dyn report::Report,
821) -> Result<(), std::io::Error> {
822    let mut msg = report::Error::new(err.to_string());
823    msg.context = path.map(|path| report::Context::Path(report::PathContext { path }));
824    reporter.report(msg.into())?;
825    Ok(())
826}
827
828struct AccumulateLineNum {
829    line_num: usize,
830    last_offset: usize,
831}
832
833impl AccumulateLineNum {
834    fn new() -> Self {
835        Self {
836            // 1-indexed
837            line_num: 1,
838            last_offset: 0,
839        }
840    }
841
842    fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
843        assert!(self.last_offset <= byte_offset);
844        let slice = &buffer[self.last_offset..byte_offset];
845        let newlines = slice.find_iter(b"\n").count();
846        let line_num = self.line_num + newlines;
847        self.line_num = line_num;
848        self.last_offset = byte_offset;
849        line_num
850    }
851}
852
853fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
854    let line_start = buffer[0..byte_offset]
855        .rfind_byte(b'\n')
856        // Skip the newline
857        .map(|s| s + 1)
858        .unwrap_or(0);
859    let line = buffer[line_start..]
860        .lines()
861        .next()
862        .expect("should always be at least a line");
863    let line_offset = byte_offset - line_start;
864    (line, line_offset)
865}
866
867fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
868    match &typo.corrections {
869        typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
870        _ => None,
871    }
872}
873
874fn is_fixable(typo: &typos::Typo<'_>) -> bool {
875    extract_fix(typo).is_some()
876}
877
878fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
879    let mut offset = 0isize;
880    for typo in typos {
881        let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
882        let start = ((typo.byte_offset as isize) + offset) as usize;
883        let end = start + typo.typo.len();
884
885        buffer.splice(start..end, fix.as_bytes().iter().copied());
886
887        offset += (fix.len() as isize) - (typo.typo.len() as isize);
888    }
889    buffer
890}
891
892pub fn walk_path(
893    walk: ignore::Walk,
894    checks: &dyn FileChecker,
895    engine: &crate::policy::ConfigEngine<'_>,
896    reporter: &dyn report::Report,
897    force_exclude: bool,
898) -> Result<(), ignore::Error> {
899    for entry in walk {
900        walk_entry(entry, checks, engine, reporter, force_exclude)?;
901    }
902    Ok(())
903}
904
905pub fn walk_path_parallel(
906    walk: ignore::WalkParallel,
907    checks: &dyn FileChecker,
908    engine: &crate::policy::ConfigEngine<'_>,
909    reporter: &dyn report::Report,
910    force_exclude: bool,
911) -> Result<(), ignore::Error> {
912    let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
913    walk.run(|| {
914        Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
915            match walk_entry(entry, checks, engine, reporter, force_exclude) {
916                Ok(()) => ignore::WalkState::Continue,
917                Err(err) => {
918                    *error.lock().unwrap() = Err(err);
919                    ignore::WalkState::Quit
920                }
921            }
922        })
923    });
924
925    error.into_inner().unwrap()
926}
927
928fn walk_entry(
929    entry: Result<ignore::DirEntry, ignore::Error>,
930    checks: &dyn FileChecker,
931    engine: &crate::policy::ConfigEngine<'_>,
932    reporter: &dyn report::Report,
933    force_exclude: bool,
934) -> Result<(), ignore::Error> {
935    let entry = match entry {
936        Ok(entry) => entry,
937        Err(err) => {
938            report_error(err, None, reporter)?;
939            return Ok(());
940        }
941    };
942    if crate::config::SUPPORTED_FILE_NAMES
943        .iter()
944        .any(|n| *n == entry.file_name())
945    {
946        log::debug!(
947            "{}: skipping potential config file as it may have typos",
948            entry.path().display()
949        );
950        return Ok(());
951    }
952    if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
953        let explicit = entry.depth() == 0 && !force_exclude;
954        let (path, lookup_path) = if entry.is_stdin() {
955            let path = std::path::Path::new("-");
956            let cwd = std::env::current_dir().map_err(|err| {
957                let kind = err.kind();
958                std::io::Error::new(kind, "no current working directory".to_owned())
959            })?;
960            (path, cwd)
961        } else {
962            let path = entry.path();
963            let abs_path = report_result(path.canonicalize(), Some(path), reporter)?;
964            (path, abs_path)
965        };
966        let policy = engine.policy(&lookup_path);
967        checks.check_file(path, explicit, &policy, reporter)?;
968    }
969
970    Ok(())
971}
972
973#[derive(Clone, Debug)]
974struct Ignores {
975    blocks: Vec<std::ops::Range<usize>>,
976}
977
978impl Ignores {
979    fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
980        let mut blocks = Vec::new();
981        if let Ok(content) = std::str::from_utf8(content) {
982            for ignore in ignores {
983                for mat in ignore.find_iter(content) {
984                    blocks.push(mat.range());
985                }
986            }
987        }
988        Self { blocks }
989    }
990
991    fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
992        let start = span.start;
993        let end = span.end.saturating_sub(1);
994        self.blocks
995            .iter()
996            .any(|block| block.contains(&start) || block.contains(&end))
997    }
998}
999
1000#[cfg(test)]
1001mod test {
1002    use super::*;
1003
1004    fn fix_simple(line: &str, corrections: Vec<(usize, &'static str, &'static str)>) -> String {
1005        let line = line.as_bytes().to_vec();
1006        let corrections = corrections
1007            .into_iter()
1008            .map(|(byte_offset, typo, correction)| typos::Typo {
1009                byte_offset,
1010                typo: typo.into(),
1011                corrections: typos::Status::Corrections(vec![correction.into()]),
1012            });
1013        let actual = fix_buffer(line, corrections);
1014        String::from_utf8(actual).unwrap()
1015    }
1016
1017    #[test]
1018    fn test_fix_buffer_single() {
1019        let actual = fix_simple("foo foo foo", vec![(4, "foo", "bar")]);
1020        assert_eq!(actual, "foo bar foo");
1021    }
1022
1023    #[test]
1024    fn test_fix_buffer_single_grow() {
1025        let actual = fix_simple("foo foo foo", vec![(4, "foo", "happy")]);
1026        assert_eq!(actual, "foo happy foo");
1027    }
1028
1029    #[test]
1030    fn test_fix_buffer_single_shrink() {
1031        let actual = fix_simple("foo foo foo", vec![(4, "foo", "if")]);
1032        assert_eq!(actual, "foo if foo");
1033    }
1034
1035    #[test]
1036    fn test_fix_buffer_start() {
1037        let actual = fix_simple("foo foo foo", vec![(0, "foo", "bar")]);
1038        assert_eq!(actual, "bar foo foo");
1039    }
1040
1041    #[test]
1042    fn test_fix_buffer_end() {
1043        let actual = fix_simple("foo foo foo", vec![(8, "foo", "bar")]);
1044        assert_eq!(actual, "foo foo bar");
1045    }
1046
1047    #[test]
1048    fn test_fix_buffer_end_grow() {
1049        let actual = fix_simple("foo foo foo", vec![(8, "foo", "happy")]);
1050        assert_eq!(actual, "foo foo happy");
1051    }
1052
1053    #[test]
1054    fn test_fix_buffer_multiple() {
1055        let actual = fix_simple(
1056            "foo foo foo",
1057            vec![(4, "foo", "happy"), (8, "foo", "world")],
1058        );
1059        assert_eq!(actual, "foo happy world");
1060    }
1061
1062    #[test]
1063    fn test_line_count_first() {
1064        let mut accum_line_num = AccumulateLineNum::new();
1065        let line_num = accum_line_num.line_num(b"hello world", 6);
1066        assert_eq!(line_num, 1);
1067    }
1068
1069    #[test]
1070    fn test_line_count_second() {
1071        let mut accum_line_num = AccumulateLineNum::new();
1072        let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
1073        assert_eq!(line_num, 2);
1074    }
1075
1076    #[test]
1077    fn test_line_count_multiple() {
1078        let mut accum_line_num = AccumulateLineNum::new();
1079        let line_num = accum_line_num.line_num(b"1\n2\n3", 0);
1080        assert_eq!(line_num, 1);
1081        let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
1082        assert_eq!(line_num, 2);
1083        let line_num = accum_line_num.line_num(b"1\n2\n3", 4);
1084        assert_eq!(line_num, 3);
1085    }
1086
1087    #[test]
1088    fn test_extract_line_single_line() {
1089        let buffer = b"hello world";
1090        let buffer_offset = 6;
1091        let expected_line = b"hello world";
1092        let (line, offset) = extract_line(buffer, buffer_offset);
1093        assert_eq!(line, expected_line);
1094        assert_eq!(offset, 6);
1095        assert_eq!(line[offset], buffer[buffer_offset]);
1096    }
1097
1098    #[test]
1099    fn test_extract_line_first() {
1100        let buffer = b"1\n2\n3";
1101        let buffer_offset = 0;
1102        let expected_line = b"1";
1103        let (line, offset) = extract_line(buffer, buffer_offset);
1104        assert_eq!(line, expected_line);
1105        assert_eq!(offset, 0);
1106        assert_eq!(line[offset], buffer[buffer_offset]);
1107    }
1108
1109    #[test]
1110    fn test_extract_line_middle() {
1111        let buffer = b"1\n2\n3";
1112        let buffer_offset = 2;
1113        let expected_line = b"2";
1114        let (line, offset) = extract_line(buffer, buffer_offset);
1115        assert_eq!(line, expected_line);
1116        assert_eq!(offset, 0);
1117        assert_eq!(line[offset], buffer[buffer_offset]);
1118    }
1119
1120    #[test]
1121    fn test_extract_line_end() {
1122        let buffer = b"1\n2\n3";
1123        let buffer_offset = 4;
1124        let expected_line = b"3";
1125        let (line, offset) = extract_line(buffer, buffer_offset);
1126        assert_eq!(line, expected_line);
1127        assert_eq!(offset, 0);
1128        assert_eq!(line[offset], buffer[buffer_offset]);
1129    }
1130
1131    #[test]
1132    fn test_extract_line_offset_change() {
1133        let buffer = b"1\nhello world\n2";
1134        let buffer_offset = 8;
1135        let expected_line = b"hello world";
1136        let (line, offset) = extract_line(buffer, buffer_offset);
1137        assert_eq!(line, expected_line);
1138        assert_eq!(offset, 6);
1139        assert_eq!(line[offset], buffer[buffer_offset]);
1140    }
1141
1142    #[test]
1143    fn test_extract_line_windows() {
1144        let buffer = b"1\r\nhello world\r\n2";
1145        let buffer_offset = 9;
1146        let expected_line = b"hello world";
1147        let (line, offset) = extract_line(buffer, buffer_offset);
1148        assert_eq!(line, expected_line);
1149        assert_eq!(offset, 6);
1150        assert_eq!(line[offset], buffer[buffer_offset]);
1151    }
1152
1153    #[test]
1154    fn test_extract_line_slovak() {
1155        let buffer = b"LastErrorMessage=%1.%n%nChyba %2: %3\r\nSetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.\r\nSetupFileCorrupt=S\xFAbory sprievodcu in\x9Atal\xE1ciou s\xFA po\x9Akoden\xE9. Zaobstarajte si, pros\xEDm, nov\xFA k\xF3piu tohto produktu.";
1156        let buffer_offset = 66;
1157        let expected_line = b"SetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.";
1158        let (line, offset) = extract_line(buffer, buffer_offset);
1159        assert_eq!(line, expected_line);
1160        assert_eq!(offset, 28);
1161        assert_eq!(line[offset], buffer[buffer_offset]);
1162    }
1163}