typos_cli/
file.rs

1use bstr::ByteSlice;
2use std::io::Read;
3use std::io::Write;
4
5use crate::report;
6
7pub trait FileChecker: Send + Sync {
8    fn check_file(
9        &self,
10        path: &std::path::Path,
11        explicit: bool,
12        policy: &crate::policy::Policy<'_, '_, '_>,
13        reporter: &dyn report::Report,
14    ) -> Result<(), std::io::Error>;
15}
16
17#[derive(Debug, Clone, Copy)]
18pub struct Typos;
19
20impl FileChecker for Typos {
21    fn check_file(
22        &self,
23        path: &std::path::Path,
24        explicit: bool,
25        policy: &crate::policy::Policy<'_, '_, '_>,
26        reporter: &dyn report::Report,
27    ) -> Result<(), std::io::Error> {
28        if policy.check_filenames {
29            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
30                for typo in check_str(file_name, policy) {
31                    let msg = report::Typo {
32                        context: Some(report::PathContext { path }.into()),
33                        buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
34                        byte_offset: typo.byte_offset,
35                        typo: typo.typo.as_ref(),
36                        corrections: typo.corrections,
37                    };
38                    reporter.report(msg.into())?;
39                }
40            }
41        }
42
43        if policy.check_files {
44            let (buffer, content_type) = read_file(path, reporter)?;
45            if !explicit && !policy.binary && content_type.is_binary() {
46                let msg = report::BinaryFile { path };
47                reporter.report(msg.into())?;
48            } else {
49                let mut accum_line_num = AccumulateLineNum::new();
50                for typo in check_bytes(&buffer, policy) {
51                    let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
52                    let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
53                    let msg = report::Typo {
54                        context: Some(report::FileContext { path, line_num }.into()),
55                        buffer: std::borrow::Cow::Borrowed(line),
56                        byte_offset: line_offset,
57                        typo: typo.typo.as_ref(),
58                        corrections: typo.corrections,
59                    };
60                    reporter.report(msg.into())?;
61                }
62            }
63        }
64
65        Ok(())
66    }
67}
68
69#[derive(Debug, Clone, Copy)]
70pub struct FixTypos;
71
72impl FileChecker for FixTypos {
73    fn check_file(
74        &self,
75        path: &std::path::Path,
76        explicit: bool,
77        policy: &crate::policy::Policy<'_, '_, '_>,
78        reporter: &dyn report::Report,
79    ) -> Result<(), std::io::Error> {
80        if policy.check_files {
81            let (buffer, content_type) = read_file(path, reporter)?;
82            if !explicit && !policy.binary && content_type.is_binary() {
83                let msg = report::BinaryFile { path };
84                reporter.report(msg.into())?;
85            } else {
86                let mut fixes = Vec::new();
87                let mut accum_line_num = AccumulateLineNum::new();
88                for typo in check_bytes(&buffer, policy) {
89                    if is_fixable(&typo) {
90                        fixes.push(typo.into_owned());
91                    } else {
92                        let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
93                        let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
94                        let msg = report::Typo {
95                            context: Some(report::FileContext { path, line_num }.into()),
96                            buffer: std::borrow::Cow::Borrowed(line),
97                            byte_offset: line_offset,
98                            typo: typo.typo.as_ref(),
99                            corrections: typo.corrections,
100                        };
101                        reporter.report(msg.into())?;
102                    }
103                }
104                if !fixes.is_empty() || path == std::path::Path::new("-") {
105                    let buffer = fix_buffer(buffer, fixes.into_iter());
106                    write_file(path, content_type, buffer, reporter)?;
107                }
108            }
109        }
110
111        // Ensure the above write can happen before renaming the file.
112        if policy.check_filenames {
113            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
114                let mut fixes = Vec::new();
115                for typo in check_str(file_name, policy) {
116                    if is_fixable(&typo) {
117                        fixes.push(typo.into_owned());
118                    } else {
119                        let msg = report::Typo {
120                            context: Some(report::PathContext { path }.into()),
121                            buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
122                            byte_offset: typo.byte_offset,
123                            typo: typo.typo.as_ref(),
124                            corrections: typo.corrections,
125                        };
126                        reporter.report(msg.into())?;
127                    }
128                }
129                if !fixes.is_empty() {
130                    let file_name = file_name.to_owned().into_bytes();
131                    let new_name = fix_buffer(file_name, fixes.into_iter());
132                    let new_name =
133                        String::from_utf8(new_name).expect("corrections are valid utf-8");
134                    let new_path = path.with_file_name(new_name);
135                    std::fs::rename(path, new_path)?;
136                }
137            }
138        }
139
140        Ok(())
141    }
142}
143
144#[derive(Debug, Clone, Copy)]
145pub struct DiffTypos;
146
147impl FileChecker for DiffTypos {
148    fn check_file(
149        &self,
150        path: &std::path::Path,
151        explicit: bool,
152        policy: &crate::policy::Policy<'_, '_, '_>,
153        reporter: &dyn report::Report,
154    ) -> Result<(), std::io::Error> {
155        let mut content = Vec::new();
156        let mut new_content = Vec::new();
157        if policy.check_files {
158            let (buffer, content_type) = read_file(path, reporter)?;
159            if !explicit && !policy.binary && content_type.is_binary() {
160                let msg = report::BinaryFile { path };
161                reporter.report(msg.into())?;
162            } else {
163                let mut fixes = Vec::new();
164                let mut accum_line_num = AccumulateLineNum::new();
165                for typo in check_bytes(&buffer, policy) {
166                    if is_fixable(&typo) {
167                        fixes.push(typo.into_owned());
168                    } else {
169                        let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
170                        let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
171                        let msg = report::Typo {
172                            context: Some(report::FileContext { path, line_num }.into()),
173                            buffer: std::borrow::Cow::Borrowed(line),
174                            byte_offset: line_offset,
175                            typo: typo.typo.as_ref(),
176                            corrections: typo.corrections,
177                        };
178                        reporter.report(msg.into())?;
179                    }
180                }
181                if !fixes.is_empty() {
182                    new_content = fix_buffer(buffer.clone(), fixes.into_iter());
183                    content = buffer;
184                }
185            }
186        }
187
188        // Match FixTypos ordering for easy diffing.
189        let mut new_path = None;
190        if policy.check_filenames {
191            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
192                let mut fixes = Vec::new();
193                for typo in check_str(file_name, policy) {
194                    if is_fixable(&typo) {
195                        fixes.push(typo.into_owned());
196                    } else {
197                        let msg = report::Typo {
198                            context: Some(report::PathContext { path }.into()),
199                            buffer: std::borrow::Cow::Borrowed(file_name.as_bytes()),
200                            byte_offset: typo.byte_offset,
201                            typo: typo.typo.as_ref(),
202                            corrections: typo.corrections,
203                        };
204                        reporter.report(msg.into())?;
205                    }
206                }
207                if !fixes.is_empty() {
208                    let file_name = file_name.to_owned().into_bytes();
209                    let new_name = fix_buffer(file_name, fixes.into_iter());
210                    let new_name =
211                        String::from_utf8(new_name).expect("corrections are valid utf-8");
212                    new_path = Some(path.with_file_name(new_name));
213                }
214            }
215        }
216
217        if new_path.is_some() || !content.is_empty() {
218            let original_path = path.display().to_string();
219            let fixed_path = new_path.as_deref().unwrap_or(path).display().to_string();
220            let original_content: Vec<_> = content
221                .lines_with_terminator()
222                .map(|s| String::from_utf8_lossy(s).into_owned())
223                .collect();
224            let fixed_content: Vec<_> = new_content
225                .lines_with_terminator()
226                .map(|s| String::from_utf8_lossy(s).into_owned())
227                .collect();
228            let diff = difflib::unified_diff(
229                &original_content,
230                &fixed_content,
231                original_path.as_str(),
232                fixed_path.as_str(),
233                "original",
234                "fixed",
235                0,
236            );
237            let stdout = std::io::stdout();
238            let mut handle = stdout.lock();
239            for line in diff {
240                write!(handle, "{line}")?;
241            }
242        }
243
244        Ok(())
245    }
246}
247
248#[derive(Debug, Clone, Copy)]
249pub struct Identifiers;
250
251impl FileChecker for Identifiers {
252    fn check_file(
253        &self,
254        path: &std::path::Path,
255        explicit: bool,
256        policy: &crate::policy::Policy<'_, '_, '_>,
257        reporter: &dyn report::Report,
258    ) -> Result<(), std::io::Error> {
259        let mut ignores: Option<Ignores> = None;
260        if policy.check_filenames {
261            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
262                for word in policy.tokenizer.parse_str(file_name) {
263                    if ignores
264                        .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
265                        .is_ignored(word.span())
266                    {
267                        continue;
268                    }
269                    let msg = report::Parse {
270                        context: Some(report::PathContext { path }.into()),
271                        kind: report::ParseKind::Identifier,
272                        data: word.token(),
273                    };
274                    reporter.report(msg.into())?;
275                }
276            }
277        }
278
279        if policy.check_files {
280            let (buffer, content_type) = read_file(path, reporter)?;
281            if !explicit && !policy.binary && content_type.is_binary() {
282                let msg = report::BinaryFile { path };
283                reporter.report(msg.into())?;
284            } else {
285                for word in policy.tokenizer.parse_bytes(&buffer) {
286                    if ignores
287                        .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
288                        .is_ignored(word.span())
289                    {
290                        continue;
291                    }
292                    // HACK: Don't look up the line_num per entry to better match the performance
293                    // of Typos for comparison purposes.  We don't really get much out of it
294                    // anyway.
295                    let line_num = 0;
296                    let msg = report::Parse {
297                        context: Some(report::FileContext { path, line_num }.into()),
298                        kind: report::ParseKind::Identifier,
299                        data: word.token(),
300                    };
301                    reporter.report(msg.into())?;
302                }
303            }
304        }
305
306        Ok(())
307    }
308}
309
310#[derive(Debug, Clone, Copy)]
311pub struct Words;
312
313impl FileChecker for Words {
314    fn check_file(
315        &self,
316        path: &std::path::Path,
317        explicit: bool,
318        policy: &crate::policy::Policy<'_, '_, '_>,
319        reporter: &dyn report::Report,
320    ) -> Result<(), std::io::Error> {
321        let mut ignores: Option<Ignores> = None;
322        if policy.check_filenames {
323            if let Some(file_name) = path.file_name().and_then(|s| s.to_str()) {
324                for word in policy
325                    .tokenizer
326                    .parse_str(file_name)
327                    .flat_map(|i| i.split())
328                {
329                    if ignores
330                        .get_or_insert_with(|| Ignores::new(file_name.as_bytes(), policy.ignore))
331                        .is_ignored(word.span())
332                    {
333                        continue;
334                    }
335                    let msg = report::Parse {
336                        context: Some(report::PathContext { path }.into()),
337                        kind: report::ParseKind::Word,
338                        data: word.token(),
339                    };
340                    reporter.report(msg.into())?;
341                }
342            }
343        }
344
345        if policy.check_files {
346            let (buffer, content_type) = read_file(path, reporter)?;
347            if !explicit && !policy.binary && content_type.is_binary() {
348                let msg = report::BinaryFile { path };
349                reporter.report(msg.into())?;
350            } else {
351                for word in policy
352                    .tokenizer
353                    .parse_bytes(&buffer)
354                    .flat_map(|i| i.split())
355                {
356                    if ignores
357                        .get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
358                        .is_ignored(word.span())
359                    {
360                        continue;
361                    }
362                    // HACK: Don't look up the line_num per entry to better match the performance
363                    // of Typos for comparison purposes.  We don't really get much out of it
364                    // anyway.
365                    let line_num = 0;
366                    let msg = report::Parse {
367                        context: Some(report::FileContext { path, line_num }.into()),
368                        kind: report::ParseKind::Word,
369                        data: word.token(),
370                    };
371                    reporter.report(msg.into())?;
372                }
373            }
374        }
375
376        Ok(())
377    }
378}
379
380#[derive(Debug, Clone, Copy)]
381pub struct FileTypes;
382
383impl FileChecker for FileTypes {
384    fn check_file(
385        &self,
386        path: &std::path::Path,
387        explicit: bool,
388        policy: &crate::policy::Policy<'_, '_, '_>,
389        reporter: &dyn report::Report,
390    ) -> Result<(), std::io::Error> {
391        // Check `policy.binary` first so we can easily check performance of walking vs reading
392        if policy.binary {
393            let msg = report::FileType::new(path, policy.file_type);
394            reporter.report(msg.into())?;
395        } else {
396            let (_buffer, content_type) = read_file(path, reporter)?;
397            if !explicit && content_type.is_binary() {
398                let msg = report::BinaryFile { path };
399                reporter.report(msg.into())?;
400            } else {
401                let msg = report::FileType::new(path, policy.file_type);
402                reporter.report(msg.into())?;
403            }
404        }
405
406        Ok(())
407    }
408}
409
410#[derive(Debug, Clone, Copy)]
411pub struct FoundFiles;
412
413impl FileChecker for FoundFiles {
414    fn check_file(
415        &self,
416        path: &std::path::Path,
417        explicit: bool,
418        policy: &crate::policy::Policy<'_, '_, '_>,
419        reporter: &dyn report::Report,
420    ) -> Result<(), std::io::Error> {
421        // Check `policy.binary` first so we can easily check performance of walking vs reading
422        if policy.binary {
423            let msg = report::File::new(path);
424            reporter.report(msg.into())?;
425        } else {
426            let (_buffer, content_type) = read_file(path, reporter)?;
427            if !explicit && content_type.is_binary() {
428                let msg = report::BinaryFile { path };
429                reporter.report(msg.into())?;
430            } else {
431                let msg = report::File::new(path);
432                reporter.report(msg.into())?;
433            }
434        }
435
436        Ok(())
437    }
438}
439
440fn read_file(
441    path: &std::path::Path,
442    reporter: &dyn report::Report,
443) -> Result<(Vec<u8>, content_inspector::ContentType), std::io::Error> {
444    let buffer = if path == std::path::Path::new("-") {
445        let mut buffer = Vec::new();
446        report_result(
447            std::io::stdin().read_to_end(&mut buffer),
448            Some(path),
449            reporter,
450        )?;
451        buffer
452    } else {
453        report_result(std::fs::read(path), Some(path), reporter)?
454    };
455
456    let content_type = content_inspector::inspect(&buffer);
457
458    let (buffer, content_type) = match content_type {
459        content_inspector::ContentType::BINARY |
460        // HACK: We don't support UTF-32 yet
461        content_inspector::ContentType::UTF_32LE |
462        content_inspector::ContentType::UTF_32BE => {
463            (buffer, content_inspector::ContentType::BINARY)
464        },
465        content_inspector::ContentType::UTF_8 |
466        content_inspector::ContentType::UTF_8_BOM => {
467            (buffer, content_type)
468        },
469        content_inspector::ContentType::UTF_16LE => {
470            // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
471            // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
472            // a buffer twice its size
473            let mut decoded = String::with_capacity(buffer.len() * 2);
474            let (r, written) = encoding_rs::UTF_16LE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
475            let decoded = match r {
476                encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
477                _ => Err(format!("invalid UTF-16LE encoding at byte {written} in {}", path.display())),
478            };
479            let buffer = report_result(decoded, Some(path), reporter)?;
480            (buffer.into_bytes(), content_type)
481        }
482        content_inspector::ContentType::UTF_16BE => {
483            // Despite accepting a `String`, decode_to_string_without_replacement` doesn't allocate
484            // so to avoid `OutputFull` loops, we're going to assume any UTF-16 content can fit in
485            // a buffer twice its size
486            let mut decoded = String::with_capacity(buffer.len() * 2);
487            let (r, written) = encoding_rs::UTF_16BE.new_decoder_with_bom_removal().decode_to_string_without_replacement(&buffer, &mut decoded, true);
488            let decoded = match r {
489                encoding_rs::DecoderResult::InputEmpty => Ok(decoded),
490                _ => Err(format!("invalid UTF-16BE encoding at byte {written} in {}", path.display())),
491            };
492            let buffer = report_result(decoded, Some(path), reporter)?;
493            (buffer.into_bytes(), content_type)
494        },
495    };
496
497    Ok((buffer, content_type))
498}
499
500fn write_file(
501    path: &std::path::Path,
502    content_type: content_inspector::ContentType,
503    buffer: Vec<u8>,
504    reporter: &dyn report::Report,
505) -> Result<(), std::io::Error> {
506    let buffer = match content_type {
507        // HACK: We don't support UTF-32 yet
508        content_inspector::ContentType::UTF_32LE | content_inspector::ContentType::UTF_32BE => {
509            unreachable!("read_file should prevent these from being passed along");
510        }
511        content_inspector::ContentType::BINARY
512        | content_inspector::ContentType::UTF_8
513        | content_inspector::ContentType::UTF_8_BOM => buffer,
514        content_inspector::ContentType::UTF_16LE => {
515            let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
516            if buffer.is_empty() {
517                // Error occurred, don't clear out the file
518                return Ok(());
519            }
520            let (encoded, _, replaced) = encoding_rs::UTF_16LE.encode(&buffer);
521            assert!(
522                !replaced,
523                "Coming from UTF-8, UTF-16LE shouldn't do replacements"
524            );
525            encoded.into_owned()
526        }
527        content_inspector::ContentType::UTF_16BE => {
528            let buffer = report_result(String::from_utf8(buffer), Some(path), reporter)?;
529            if buffer.is_empty() {
530                // Error occurred, don't clear out the file
531                return Ok(());
532            }
533            let (encoded, _, replaced) = encoding_rs::UTF_16BE.encode(&buffer);
534            assert!(
535                !replaced,
536                "Coming from UTF-8, UTF-16BE shouldn't do replacements"
537            );
538            encoded.into_owned()
539        }
540    };
541
542    if path == std::path::Path::new("-") {
543        report_result(std::io::stdout().write_all(&buffer), Some(path), reporter)?;
544    } else {
545        report_result(std::fs::write(path, buffer), Some(path), reporter)?;
546    }
547
548    Ok(())
549}
550
551fn check_str<'a>(
552    buffer: &'a str,
553    policy: &'a crate::policy::Policy<'a, 'a, 'a>,
554) -> impl Iterator<Item = typos::Typo<'a>> {
555    let mut ignores: Option<Ignores> = None;
556
557    typos::check_str(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
558        !ignores
559            .get_or_insert_with(|| Ignores::new(buffer.as_bytes(), policy.ignore))
560            .is_ignored(typo.span())
561    })
562}
563
564fn check_bytes<'a>(
565    buffer: &'a [u8],
566    policy: &'a crate::policy::Policy<'a, 'a, 'a>,
567) -> impl Iterator<Item = typos::Typo<'a>> {
568    let mut ignores: Option<Ignores> = None;
569
570    typos::check_bytes(buffer, policy.tokenizer, policy.dict).filter(move |typo| {
571        !ignores
572            .get_or_insert_with(|| Ignores::new(buffer, policy.ignore))
573            .is_ignored(typo.span())
574    })
575}
576
577fn report_result<T: Default, E: ToString>(
578    value: Result<T, E>,
579    path: Option<&std::path::Path>,
580    reporter: &dyn report::Report,
581) -> Result<T, std::io::Error> {
582    let buffer = match value {
583        Ok(value) => value,
584        Err(err) => {
585            report_error(err, path, reporter)?;
586            Default::default()
587        }
588    };
589    Ok(buffer)
590}
591
592fn report_error<E: ToString>(
593    err: E,
594    path: Option<&std::path::Path>,
595    reporter: &dyn report::Report,
596) -> Result<(), std::io::Error> {
597    let mut msg = report::Error::new(err.to_string());
598    msg.context = path.map(|path| report::Context::Path(report::PathContext { path }));
599    reporter.report(msg.into())?;
600    Ok(())
601}
602
603struct AccumulateLineNum {
604    line_num: usize,
605    last_offset: usize,
606}
607
608impl AccumulateLineNum {
609    fn new() -> Self {
610        Self {
611            // 1-indexed
612            line_num: 1,
613            last_offset: 0,
614        }
615    }
616
617    fn line_num(&mut self, buffer: &[u8], byte_offset: usize) -> usize {
618        assert!(self.last_offset <= byte_offset);
619        let slice = &buffer[self.last_offset..byte_offset];
620        let newlines = slice.find_iter(b"\n").count();
621        let line_num = self.line_num + newlines;
622        self.line_num = line_num;
623        self.last_offset = byte_offset;
624        line_num
625    }
626}
627
628fn extract_line(buffer: &[u8], byte_offset: usize) -> (&[u8], usize) {
629    let line_start = buffer[0..byte_offset]
630        .rfind_byte(b'\n')
631        // Skip the newline
632        .map(|s| s + 1)
633        .unwrap_or(0);
634    let line = buffer[line_start..]
635        .lines()
636        .next()
637        .expect("should always be at least a line");
638    let line_offset = byte_offset - line_start;
639    (line, line_offset)
640}
641
642fn extract_fix<'t>(typo: &'t typos::Typo<'t>) -> Option<&'t str> {
643    match &typo.corrections {
644        typos::Status::Corrections(c) if c.len() == 1 => Some(c[0].as_ref()),
645        _ => None,
646    }
647}
648
649fn is_fixable(typo: &typos::Typo<'_>) -> bool {
650    extract_fix(typo).is_some()
651}
652
653fn fix_buffer(mut buffer: Vec<u8>, typos: impl Iterator<Item = typos::Typo<'static>>) -> Vec<u8> {
654    let mut offset = 0isize;
655    for typo in typos {
656        let fix = extract_fix(&typo).expect("Caller only provides fixable typos");
657        let start = ((typo.byte_offset as isize) + offset) as usize;
658        let end = start + typo.typo.len();
659
660        buffer.splice(start..end, fix.as_bytes().iter().copied());
661
662        offset += (fix.len() as isize) - (typo.typo.len() as isize);
663    }
664    buffer
665}
666
667pub fn walk_path(
668    walk: ignore::Walk,
669    checks: &dyn FileChecker,
670    engine: &crate::policy::ConfigEngine<'_>,
671    reporter: &dyn report::Report,
672    force_exclude: bool,
673) -> Result<(), ignore::Error> {
674    for entry in walk {
675        walk_entry(entry, checks, engine, reporter, force_exclude)?;
676    }
677    Ok(())
678}
679
680pub fn walk_path_parallel(
681    walk: ignore::WalkParallel,
682    checks: &dyn FileChecker,
683    engine: &crate::policy::ConfigEngine<'_>,
684    reporter: &dyn report::Report,
685    force_exclude: bool,
686) -> Result<(), ignore::Error> {
687    let error: std::sync::Mutex<Result<(), ignore::Error>> = std::sync::Mutex::new(Ok(()));
688    walk.run(|| {
689        Box::new(|entry: Result<ignore::DirEntry, ignore::Error>| {
690            match walk_entry(entry, checks, engine, reporter, force_exclude) {
691                Ok(()) => ignore::WalkState::Continue,
692                Err(err) => {
693                    *error.lock().unwrap() = Err(err);
694                    ignore::WalkState::Quit
695                }
696            }
697        })
698    });
699
700    error.into_inner().unwrap()
701}
702
703fn walk_entry(
704    entry: Result<ignore::DirEntry, ignore::Error>,
705    checks: &dyn FileChecker,
706    engine: &crate::policy::ConfigEngine<'_>,
707    reporter: &dyn report::Report,
708    force_exclude: bool,
709) -> Result<(), ignore::Error> {
710    let entry = match entry {
711        Ok(entry) => entry,
712        Err(err) => {
713            report_error(err, None, reporter)?;
714            return Ok(());
715        }
716    };
717    if crate::config::SUPPORTED_FILE_NAMES
718        .iter()
719        .any(|n| *n == entry.file_name())
720    {
721        log::debug!(
722            "{}: skipping potential config file as it may have typos",
723            entry.path().display()
724        );
725        return Ok(());
726    }
727    if entry.file_type().map(|t| t.is_file()).unwrap_or(true) {
728        let explicit = entry.depth() == 0 && !force_exclude;
729        let (path, lookup_path) = if entry.is_stdin() {
730            let path = std::path::Path::new("-");
731            let cwd = std::env::current_dir().map_err(|err| {
732                let kind = err.kind();
733                std::io::Error::new(kind, "no current working directory".to_owned())
734            })?;
735            (path, cwd)
736        } else {
737            let path = entry.path();
738            let abs_path = report_result(path.canonicalize(), Some(path), reporter)?;
739            (path, abs_path)
740        };
741        let policy = engine.policy(&lookup_path);
742        checks.check_file(path, explicit, &policy, reporter)?;
743    }
744
745    Ok(())
746}
747
748#[derive(Clone, Debug)]
749struct Ignores {
750    blocks: Vec<std::ops::Range<usize>>,
751}
752
753impl Ignores {
754    fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
755        let mut blocks = Vec::new();
756        if let Ok(content) = std::str::from_utf8(content) {
757            for ignore in ignores {
758                for mat in ignore.find_iter(content) {
759                    blocks.push(mat.range());
760                }
761            }
762        }
763        Self { blocks }
764    }
765
766    fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
767        let start = span.start;
768        let end = span.end.saturating_sub(1);
769        self.blocks
770            .iter()
771            .any(|block| block.contains(&start) || block.contains(&end))
772    }
773}
774
775#[cfg(test)]
776mod test {
777    use super::*;
778
779    fn fix_simple(line: &str, corrections: Vec<(usize, &'static str, &'static str)>) -> String {
780        let line = line.as_bytes().to_vec();
781        let corrections = corrections
782            .into_iter()
783            .map(|(byte_offset, typo, correction)| typos::Typo {
784                byte_offset,
785                typo: typo.into(),
786                corrections: typos::Status::Corrections(vec![correction.into()]),
787            });
788        let actual = fix_buffer(line, corrections);
789        String::from_utf8(actual).unwrap()
790    }
791
792    #[test]
793    fn test_fix_buffer_single() {
794        let actual = fix_simple("foo foo foo", vec![(4, "foo", "bar")]);
795        assert_eq!(actual, "foo bar foo");
796    }
797
798    #[test]
799    fn test_fix_buffer_single_grow() {
800        let actual = fix_simple("foo foo foo", vec![(4, "foo", "happy")]);
801        assert_eq!(actual, "foo happy foo");
802    }
803
804    #[test]
805    fn test_fix_buffer_single_shrink() {
806        let actual = fix_simple("foo foo foo", vec![(4, "foo", "if")]);
807        assert_eq!(actual, "foo if foo");
808    }
809
810    #[test]
811    fn test_fix_buffer_start() {
812        let actual = fix_simple("foo foo foo", vec![(0, "foo", "bar")]);
813        assert_eq!(actual, "bar foo foo");
814    }
815
816    #[test]
817    fn test_fix_buffer_end() {
818        let actual = fix_simple("foo foo foo", vec![(8, "foo", "bar")]);
819        assert_eq!(actual, "foo foo bar");
820    }
821
822    #[test]
823    fn test_fix_buffer_end_grow() {
824        let actual = fix_simple("foo foo foo", vec![(8, "foo", "happy")]);
825        assert_eq!(actual, "foo foo happy");
826    }
827
828    #[test]
829    fn test_fix_buffer_multiple() {
830        let actual = fix_simple(
831            "foo foo foo",
832            vec![(4, "foo", "happy"), (8, "foo", "world")],
833        );
834        assert_eq!(actual, "foo happy world");
835    }
836
837    #[test]
838    fn test_line_count_first() {
839        let mut accum_line_num = AccumulateLineNum::new();
840        let line_num = accum_line_num.line_num(b"hello world", 6);
841        assert_eq!(line_num, 1);
842    }
843
844    #[test]
845    fn test_line_count_second() {
846        let mut accum_line_num = AccumulateLineNum::new();
847        let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
848        assert_eq!(line_num, 2);
849    }
850
851    #[test]
852    fn test_line_count_multiple() {
853        let mut accum_line_num = AccumulateLineNum::new();
854        let line_num = accum_line_num.line_num(b"1\n2\n3", 0);
855        assert_eq!(line_num, 1);
856        let line_num = accum_line_num.line_num(b"1\n2\n3", 2);
857        assert_eq!(line_num, 2);
858        let line_num = accum_line_num.line_num(b"1\n2\n3", 4);
859        assert_eq!(line_num, 3);
860    }
861
862    #[test]
863    fn test_extract_line_single_line() {
864        let buffer = b"hello world";
865        let buffer_offset = 6;
866        let expected_line = b"hello world";
867        let (line, offset) = extract_line(buffer, buffer_offset);
868        assert_eq!(line, expected_line);
869        assert_eq!(offset, 6);
870        assert_eq!(line[offset], buffer[buffer_offset]);
871    }
872
873    #[test]
874    fn test_extract_line_first() {
875        let buffer = b"1\n2\n3";
876        let buffer_offset = 0;
877        let expected_line = b"1";
878        let (line, offset) = extract_line(buffer, buffer_offset);
879        assert_eq!(line, expected_line);
880        assert_eq!(offset, 0);
881        assert_eq!(line[offset], buffer[buffer_offset]);
882    }
883
884    #[test]
885    fn test_extract_line_middle() {
886        let buffer = b"1\n2\n3";
887        let buffer_offset = 2;
888        let expected_line = b"2";
889        let (line, offset) = extract_line(buffer, buffer_offset);
890        assert_eq!(line, expected_line);
891        assert_eq!(offset, 0);
892        assert_eq!(line[offset], buffer[buffer_offset]);
893    }
894
895    #[test]
896    fn test_extract_line_end() {
897        let buffer = b"1\n2\n3";
898        let buffer_offset = 4;
899        let expected_line = b"3";
900        let (line, offset) = extract_line(buffer, buffer_offset);
901        assert_eq!(line, expected_line);
902        assert_eq!(offset, 0);
903        assert_eq!(line[offset], buffer[buffer_offset]);
904    }
905
906    #[test]
907    fn test_extract_line_offset_change() {
908        let buffer = b"1\nhello world\n2";
909        let buffer_offset = 8;
910        let expected_line = b"hello world";
911        let (line, offset) = extract_line(buffer, buffer_offset);
912        assert_eq!(line, expected_line);
913        assert_eq!(offset, 6);
914        assert_eq!(line[offset], buffer[buffer_offset]);
915    }
916
917    #[test]
918    fn test_extract_line_windows() {
919        let buffer = b"1\r\nhello world\r\n2";
920        let buffer_offset = 9;
921        let expected_line = b"hello world";
922        let (line, offset) = extract_line(buffer, buffer_offset);
923        assert_eq!(line, expected_line);
924        assert_eq!(offset, 6);
925        assert_eq!(line[offset], buffer[buffer_offset]);
926    }
927
928    #[test]
929    fn test_extract_line_slovak() {
930        let buffer = b"LastErrorMessage=%1.%n%nChyba %2: %3\r\nSetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.\r\nSetupFileCorrupt=S\xFAbory sprievodcu in\x9Atal\xE1ciou s\xFA po\x9Akoden\xE9. Zaobstarajte si, pros\xEDm, nov\xFA k\xF3piu tohto produktu.";
931        let buffer_offset = 66;
932        let expected_line = b"SetupFileMissing=In\x9Atala\xE8n\xFD adres\xE1r neobsahuje s\xFAbor %1. Opravte, pros\xEDm, t\xFAto chybu alebo si zaobstarajte nov\xFA k\xF3piu tohto produktu.";
933        let (line, offset) = extract_line(buffer, buffer_offset);
934        assert_eq!(line, expected_line);
935        assert_eq!(offset, 28);
936        assert_eq!(line[offset], buffer[buffer_offset]);
937    }
938}