gitoxide_core/repository/attributes/
validate_baseline.rs

1use crate::OutputFormat;
2
3pub struct Options {
4    pub format: OutputFormat,
5    pub statistics: bool,
6    pub ignore: bool,
7}
8
9pub(crate) mod function {
10    use std::{
11        collections::BTreeSet,
12        io,
13        io::{BufRead, Write},
14        iter::Peekable,
15        ops::Sub,
16        path::PathBuf,
17        sync::atomic::Ordering,
18    };
19
20    use anyhow::{anyhow, bail};
21    use gix::{attrs::Assignment, bstr::BString, Count, Progress};
22
23    use crate::{
24        repository::attributes::{query::attributes_cache, validate_baseline::Options},
25        OutputFormat,
26    };
27
28    pub fn validate_baseline(
29        repo: gix::Repository,
30        paths: Option<impl Iterator<Item = BString> + Send + 'static>,
31        mut progress: impl gix::NestedProgress + 'static,
32        mut out: impl io::Write,
33        mut err: impl io::Write,
34        Options {
35            format,
36            statistics,
37            mut ignore,
38        }: Options,
39    ) -> anyhow::Result<()> {
40        if format != OutputFormat::Human {
41            bail!("JSON output isn't implemented yet");
42        }
43
44        if repo.is_bare() {
45            writeln!(
46                err,
47                "Repo at '{repo}' is bare - disabling git-ignore baseline as `git check-ignore` needs a worktree",
48                repo = repo.path().display()
49            )
50            .ok();
51            ignore = false;
52        }
53        let mut num_entries = None;
54        let paths = paths.map_or_else(
55            {
56                let repo = repo.clone();
57                let num_entries = &mut num_entries;
58                move || -> anyhow::Result<_> {
59                    let index = repo.index_or_load_from_head()?.into_owned();
60                    let (entries, path_backing) = index.into_parts().0.into_entries();
61                    *num_entries = Some(entries.len());
62                    let iter = Box::new(entries.into_iter().map(move |e| e.path_in(&path_backing).to_owned()));
63                    Ok(iter as Box<dyn Iterator<Item = BString> + Send + 'static>)
64                }
65            },
66            |paths| anyhow::Result::Ok(Box::new(paths)),
67        )?;
68
69        let (tx_base, rx_base) = std::sync::mpsc::channel::<(String, Baseline)>();
70        let feed_attrs = {
71            let (tx, rx) = std::sync::mpsc::sync_channel::<BString>(100);
72            std::thread::spawn({
73                let path = repo.path().to_owned();
74                let tx_base = tx_base.clone();
75                let mut progress = progress.add_child("attributes");
76                move || -> anyhow::Result<()> {
77                    let mut child =
78                        std::process::Command::from(gix::command::prepare(gix::path::env::exe_invocation()))
79                            .args(["check-attr", "--stdin", "-a"])
80                            .stdin(std::process::Stdio::piped())
81                            .stdout(std::process::Stdio::piped())
82                            .stderr(std::process::Stdio::null())
83                            .current_dir(path)
84                            .spawn()?;
85
86                    std::thread::spawn({
87                        let mut stdin = child.stdin.take().expect("we configured it");
88                        move || -> anyhow::Result<()> {
89                            progress.init(num_entries, gix::progress::count("paths"));
90                            let start = std::time::Instant::now();
91                            for path in rx {
92                                progress.inc();
93                                stdin.write_all(&path)?;
94                                stdin.write_all(b"\n")?;
95                            }
96                            progress.show_throughput(start);
97                            Ok(())
98                        }
99                    });
100
101                    let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it"));
102                    let mut lines = stdout.lines().map_while(Result::ok).peekable();
103                    while let Some(baseline) = parse_attributes(&mut lines) {
104                        if tx_base.send(baseline).is_err() {
105                            child.kill().ok();
106                            break;
107                        }
108                    }
109
110                    Ok(())
111                }
112            });
113            tx
114        };
115        let work_dir = ignore
116            .then(|| {
117                #[allow(clippy::unnecessary_debug_formatting)]
118                repo.workdir()
119                    .map(ToOwned::to_owned)
120                    .ok_or_else(|| anyhow!("repository at {:?} must have a worktree checkout", repo.path()))
121            })
122            .transpose()?;
123        let feed_excludes = ignore.then(|| {
124            let (tx, rx) = std::sync::mpsc::sync_channel::<BString>(100);
125            std::thread::spawn({
126                let path = work_dir.expect("present if we are here");
127                let tx_base = tx_base.clone();
128                let mut progress = progress.add_child("excludes");
129                move || -> anyhow::Result<()> {
130                    let mut child =
131                        std::process::Command::from(gix::command::prepare(gix::path::env::exe_invocation()))
132                            .args(["check-ignore", "--stdin", "-nv", "--no-index"])
133                            .stdin(std::process::Stdio::piped())
134                            .stdout(std::process::Stdio::piped())
135                            .stderr(std::process::Stdio::null())
136                            .current_dir(path)
137                            .spawn()?;
138
139                    std::thread::spawn({
140                        let mut stdin = child.stdin.take().expect("we configured it");
141                        move || -> anyhow::Result<()> {
142                            progress.init(num_entries, gix::progress::count("paths"));
143                            let start = std::time::Instant::now();
144                            for path in rx {
145                                progress.inc();
146                                stdin.write_all(path.as_ref())?;
147                                stdin.write_all(b"\n")?;
148                            }
149                            progress.show_throughput(start);
150                            Ok(())
151                        }
152                    });
153
154                    let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it"));
155                    for line in stdout.lines() {
156                        let line = line?;
157                        if let Some(baseline) = parse_exclude(&line) {
158                            if tx_base.send(baseline).is_err() {
159                                child.kill().ok();
160                                break;
161                            }
162                        } else {
163                            eprintln!("Failed to parse line {line:?} - ignored");
164                        }
165                    }
166
167                    Ok(())
168                }
169            });
170            tx
171        });
172        drop(tx_base);
173
174        std::thread::spawn(move || {
175            for path in paths {
176                if feed_attrs.send(path.clone()).is_err() {
177                    break;
178                }
179                if let Some(ch) = feed_excludes.as_ref() {
180                    if ch.send(path).is_err() {
181                        break;
182                    }
183                }
184            }
185        });
186
187        let (mut cache, _index) = attributes_cache(&repo)?;
188        let mut matches = cache.attribute_matches();
189        let mut progress = progress.add_child("validate");
190        let mut mismatches = Vec::new();
191        let start = std::time::Instant::now();
192        progress.init(
193            num_entries.map(|n| n + if ignore { n } else { 0 }),
194            gix::progress::count("paths"),
195        );
196
197        for (rela_path, baseline) in rx_base {
198            let entry = cache.at_entry(rela_path.as_str(), None)?;
199            match baseline {
200                Baseline::Attribute { assignments: expected } => {
201                    entry.matching_attributes(&mut matches);
202                    let fast_path_mismatch = matches
203                        .iter()
204                        .map(|m| m.assignment)
205                        .zip(expected.iter().map(Assignment::as_ref))
206                        .any(|(a, b)| a != b);
207                    if fast_path_mismatch {
208                        let actual_set = BTreeSet::from_iter(matches.iter().map(|m| m.assignment));
209                        let expected_set = BTreeSet::from_iter(expected.iter().map(Assignment::as_ref));
210                        let too_few_or_too_many =
211                            !(expected_set.sub(&actual_set).is_empty() && actual_set.sub(&expected_set).is_empty());
212                        if too_few_or_too_many {
213                            mismatches.push((
214                                rela_path,
215                                Mismatch::Attributes {
216                                    actual: matches.iter().map(|m| m.assignment.to_owned()).collect(),
217                                    expected,
218                                },
219                            ));
220                        }
221                    }
222                }
223                Baseline::Exclude { location } => {
224                    let match_ = entry.matching_exclude_pattern();
225                    if match_.is_some() != location.is_some() {
226                        mismatches.push((
227                            rela_path,
228                            Mismatch::Exclude {
229                                actual: match_.map(Into::into),
230                                expected: location,
231                            },
232                        ));
233                    }
234                }
235            }
236            progress.inc();
237        }
238
239        if let Some(stats) = statistics.then(|| cache.take_statistics()) {
240            out.flush()?;
241            writeln!(err, "{stats:#?}").ok();
242        }
243        progress.show_throughput(start);
244
245        if mismatches.is_empty() {
246            Ok(())
247        } else {
248            for (rela_path, mm) in &mismatches {
249                writeln!(err, "{rela_path}: {mm:#?}").ok();
250            }
251            bail!(
252                "{}: Validation failed with {} mismatches out of {}",
253                gix::path::realpath(repo.workdir().unwrap_or(repo.git_dir()))?.display(),
254                mismatches.len(),
255                progress.counter().load(Ordering::Relaxed)
256            );
257        }
258    }
259
260    enum Baseline {
261        Attribute { assignments: Vec<gix::attrs::Assignment> },
262        Exclude { location: Option<ExcludeLocation> },
263    }
264
265    #[derive(Debug)]
266    // See note on `Mismatch`
267    #[allow(dead_code)]
268    pub struct ExcludeLocation {
269        pub line: usize,
270        pub rela_source_file: String,
271        pub pattern: String,
272    }
273
274    #[derive(Debug)]
275    // We debug-print this structure, which makes all fields 'used', but it doesn't count.
276    // TODO: find a way to not have to do more work, but make the warning go away.
277    #[allow(dead_code)]
278    pub enum Mismatch {
279        Attributes {
280            actual: Vec<gix::attrs::Assignment>,
281            expected: Vec<gix::attrs::Assignment>,
282        },
283        Exclude {
284            actual: Option<ExcludeMatch>,
285            expected: Option<ExcludeLocation>,
286        },
287    }
288
289    #[derive(Debug)]
290    // See note on `Mismatch`
291    #[allow(dead_code)]
292    pub struct ExcludeMatch {
293        pub pattern: gix::glob::Pattern,
294        pub source: Option<PathBuf>,
295        pub sequence_number: usize,
296    }
297
298    impl From<gix::ignore::search::Match<'_>> for ExcludeMatch {
299        fn from(value: gix::ignore::search::Match<'_>) -> Self {
300            ExcludeMatch {
301                pattern: value.pattern.clone(),
302                source: value.source.map(ToOwned::to_owned),
303                sequence_number: value.sequence_number,
304            }
305        }
306    }
307
308    fn parse_exclude(line: &str) -> Option<(String, Baseline)> {
309        let (left, value) = line.split_at(line.find('\t')?);
310        let value = &value[1..];
311
312        let location = if left == "::" {
313            None
314        } else {
315            let mut tokens = left.split(':');
316            let source = tokens.next()?;
317            let line_number: usize = tokens.next()?.parse().ok()?;
318            let pattern = tokens.next()?;
319            Some(ExcludeLocation {
320                line: line_number,
321                rela_source_file: source.into(),
322                pattern: pattern.into(),
323            })
324        };
325        Some((value.to_string(), Baseline::Exclude { location }))
326    }
327
328    fn parse_attributes(lines: &mut Peekable<impl Iterator<Item = String>>) -> Option<(String, Baseline)> {
329        let first = lines.next()?;
330        let mut out = Vec::new();
331        let (path, assignment) = parse_attribute_line(&first)?;
332
333        let current = path.to_owned();
334        out.push(assignment.to_owned());
335        loop {
336            let next_line = match lines.peek() {
337                None => break,
338                Some(l) => l,
339            };
340            let (next_path, next_assignment) = parse_attribute_line(next_line)?;
341            if next_path != current {
342                return Some((current, Baseline::Attribute { assignments: out }));
343            } else {
344                out.push(next_assignment.to_owned());
345                lines.next();
346            }
347        }
348        Some((current, Baseline::Attribute { assignments: out }))
349    }
350
351    fn parse_attribute_line(line: &str) -> Option<(&str, gix::attrs::AssignmentRef<'_>)> {
352        use gix::{attrs::StateRef, bstr::ByteSlice};
353
354        let mut prev = None;
355        let mut tokens = line.splitn(3, |b| {
356            let is_match = b == ' ' && prev.take() == Some(':');
357            prev = Some(b);
358            is_match
359        });
360        if let Some(((mut path, attr), info)) = tokens.next().zip(tokens.next()).zip(tokens.next()) {
361            let state = match info {
362                "set" => StateRef::Set,
363                "unset" => StateRef::Unset,
364                "unspecified" => StateRef::Unspecified,
365                _ => StateRef::from_bytes(info.as_bytes()),
366            };
367            path = path.trim_end_matches(':');
368            let attr = attr.trim_end_matches(':');
369            let assignment = gix::attrs::AssignmentRef {
370                name: gix::attrs::NameRef::try_from(attr.as_bytes().as_bstr()).ok()?,
371                state,
372            };
373            Some((path, assignment))
374        } else {
375            None
376        }
377    }
378}