gitoxide_core/repository/attributes/
validate_baseline.rs1use crate::OutputFormat;
2
3pub struct Options {
4 pub format: OutputFormat,
5 pub statistics: bool,
6 pub ignore: bool,
7}
8
9pub(crate) mod function {
10 use std::{
11 collections::BTreeSet,
12 io,
13 io::{BufRead, Write},
14 iter::Peekable,
15 ops::Sub,
16 path::PathBuf,
17 sync::atomic::Ordering,
18 };
19
20 use anyhow::{anyhow, bail};
21 use gix::{attrs::Assignment, bstr::BString, Count, Progress};
22
23 use crate::{
24 repository::attributes::{query::attributes_cache, validate_baseline::Options},
25 OutputFormat,
26 };
27
28 pub fn validate_baseline(
29 repo: gix::Repository,
30 paths: Option<impl Iterator<Item = BString> + Send + 'static>,
31 mut progress: impl gix::NestedProgress + 'static,
32 mut out: impl io::Write,
33 mut err: impl io::Write,
34 Options {
35 format,
36 statistics,
37 mut ignore,
38 }: Options,
39 ) -> anyhow::Result<()> {
40 if format != OutputFormat::Human {
41 bail!("JSON output isn't implemented yet");
42 }
43
44 if repo.is_bare() {
45 writeln!(
46 err,
47 "Repo at '{repo}' is bare - disabling git-ignore baseline as `git check-ignore` needs a worktree",
48 repo = repo.path().display()
49 )
50 .ok();
51 ignore = false;
52 }
53 let mut num_entries = None;
54 let paths = paths.map_or_else(
55 {
56 let repo = repo.clone();
57 let num_entries = &mut num_entries;
58 move || -> anyhow::Result<_> {
59 let index = repo.index_or_load_from_head()?.into_owned();
60 let (entries, path_backing) = index.into_parts().0.into_entries();
61 *num_entries = Some(entries.len());
62 let iter = Box::new(entries.into_iter().map(move |e| e.path_in(&path_backing).to_owned()));
63 Ok(iter as Box<dyn Iterator<Item = BString> + Send + 'static>)
64 }
65 },
66 |paths| anyhow::Result::Ok(Box::new(paths)),
67 )?;
68
69 let (tx_base, rx_base) = std::sync::mpsc::channel::<(String, Baseline)>();
70 let feed_attrs = {
71 let (tx, rx) = std::sync::mpsc::sync_channel::<BString>(100);
72 std::thread::spawn({
73 let path = repo.path().to_owned();
74 let tx_base = tx_base.clone();
75 let mut progress = progress.add_child("attributes");
76 move || -> anyhow::Result<()> {
77 let mut child =
78 std::process::Command::from(gix::command::prepare(gix::path::env::exe_invocation()))
79 .args(["check-attr", "--stdin", "-a"])
80 .stdin(std::process::Stdio::piped())
81 .stdout(std::process::Stdio::piped())
82 .stderr(std::process::Stdio::null())
83 .current_dir(path)
84 .spawn()?;
85
86 std::thread::spawn({
87 let mut stdin = child.stdin.take().expect("we configured it");
88 move || -> anyhow::Result<()> {
89 progress.init(num_entries, gix::progress::count("paths"));
90 let start = std::time::Instant::now();
91 for path in rx {
92 progress.inc();
93 stdin.write_all(&path)?;
94 stdin.write_all(b"\n")?;
95 }
96 progress.show_throughput(start);
97 Ok(())
98 }
99 });
100
101 let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it"));
102 let mut lines = stdout.lines().map_while(Result::ok).peekable();
103 while let Some(baseline) = parse_attributes(&mut lines) {
104 if tx_base.send(baseline).is_err() {
105 child.kill().ok();
106 break;
107 }
108 }
109
110 Ok(())
111 }
112 });
113 tx
114 };
115 let work_dir = ignore
116 .then(|| {
117 #[allow(clippy::unnecessary_debug_formatting)]
118 repo.workdir()
119 .map(ToOwned::to_owned)
120 .ok_or_else(|| anyhow!("repository at {:?} must have a worktree checkout", repo.path()))
121 })
122 .transpose()?;
123 let feed_excludes = ignore.then(|| {
124 let (tx, rx) = std::sync::mpsc::sync_channel::<BString>(100);
125 std::thread::spawn({
126 let path = work_dir.expect("present if we are here");
127 let tx_base = tx_base.clone();
128 let mut progress = progress.add_child("excludes");
129 move || -> anyhow::Result<()> {
130 let mut child =
131 std::process::Command::from(gix::command::prepare(gix::path::env::exe_invocation()))
132 .args(["check-ignore", "--stdin", "-nv", "--no-index"])
133 .stdin(std::process::Stdio::piped())
134 .stdout(std::process::Stdio::piped())
135 .stderr(std::process::Stdio::null())
136 .current_dir(path)
137 .spawn()?;
138
139 std::thread::spawn({
140 let mut stdin = child.stdin.take().expect("we configured it");
141 move || -> anyhow::Result<()> {
142 progress.init(num_entries, gix::progress::count("paths"));
143 let start = std::time::Instant::now();
144 for path in rx {
145 progress.inc();
146 stdin.write_all(path.as_ref())?;
147 stdin.write_all(b"\n")?;
148 }
149 progress.show_throughput(start);
150 Ok(())
151 }
152 });
153
154 let stdout = std::io::BufReader::new(child.stdout.take().expect("we configured it"));
155 for line in stdout.lines() {
156 let line = line?;
157 if let Some(baseline) = parse_exclude(&line) {
158 if tx_base.send(baseline).is_err() {
159 child.kill().ok();
160 break;
161 }
162 } else {
163 eprintln!("Failed to parse line {line:?} - ignored");
164 }
165 }
166
167 Ok(())
168 }
169 });
170 tx
171 });
172 drop(tx_base);
173
174 std::thread::spawn(move || {
175 for path in paths {
176 if feed_attrs.send(path.clone()).is_err() {
177 break;
178 }
179 if let Some(ch) = feed_excludes.as_ref() {
180 if ch.send(path).is_err() {
181 break;
182 }
183 }
184 }
185 });
186
187 let (mut cache, _index) = attributes_cache(&repo)?;
188 let mut matches = cache.attribute_matches();
189 let mut progress = progress.add_child("validate");
190 let mut mismatches = Vec::new();
191 let start = std::time::Instant::now();
192 progress.init(
193 num_entries.map(|n| n + if ignore { n } else { 0 }),
194 gix::progress::count("paths"),
195 );
196
197 for (rela_path, baseline) in rx_base {
198 let entry = cache.at_entry(rela_path.as_str(), None)?;
199 match baseline {
200 Baseline::Attribute { assignments: expected } => {
201 entry.matching_attributes(&mut matches);
202 let fast_path_mismatch = matches
203 .iter()
204 .map(|m| m.assignment)
205 .zip(expected.iter().map(Assignment::as_ref))
206 .any(|(a, b)| a != b);
207 if fast_path_mismatch {
208 let actual_set = BTreeSet::from_iter(matches.iter().map(|m| m.assignment));
209 let expected_set = BTreeSet::from_iter(expected.iter().map(Assignment::as_ref));
210 let too_few_or_too_many =
211 !(expected_set.sub(&actual_set).is_empty() && actual_set.sub(&expected_set).is_empty());
212 if too_few_or_too_many {
213 mismatches.push((
214 rela_path,
215 Mismatch::Attributes {
216 actual: matches.iter().map(|m| m.assignment.to_owned()).collect(),
217 expected,
218 },
219 ));
220 }
221 }
222 }
223 Baseline::Exclude { location } => {
224 let match_ = entry.matching_exclude_pattern();
225 if match_.is_some() != location.is_some() {
226 mismatches.push((
227 rela_path,
228 Mismatch::Exclude {
229 actual: match_.map(Into::into),
230 expected: location,
231 },
232 ));
233 }
234 }
235 }
236 progress.inc();
237 }
238
239 if let Some(stats) = statistics.then(|| cache.take_statistics()) {
240 out.flush()?;
241 writeln!(err, "{stats:#?}").ok();
242 }
243 progress.show_throughput(start);
244
245 if mismatches.is_empty() {
246 Ok(())
247 } else {
248 for (rela_path, mm) in &mismatches {
249 writeln!(err, "{rela_path}: {mm:#?}").ok();
250 }
251 bail!(
252 "{}: Validation failed with {} mismatches out of {}",
253 gix::path::realpath(repo.workdir().unwrap_or(repo.git_dir()))?.display(),
254 mismatches.len(),
255 progress.counter().load(Ordering::Relaxed)
256 );
257 }
258 }
259
260 enum Baseline {
261 Attribute { assignments: Vec<gix::attrs::Assignment> },
262 Exclude { location: Option<ExcludeLocation> },
263 }
264
265 #[derive(Debug)]
266 #[allow(dead_code)]
268 pub struct ExcludeLocation {
269 pub line: usize,
270 pub rela_source_file: String,
271 pub pattern: String,
272 }
273
274 #[derive(Debug)]
275 #[allow(dead_code)]
278 pub enum Mismatch {
279 Attributes {
280 actual: Vec<gix::attrs::Assignment>,
281 expected: Vec<gix::attrs::Assignment>,
282 },
283 Exclude {
284 actual: Option<ExcludeMatch>,
285 expected: Option<ExcludeLocation>,
286 },
287 }
288
289 #[derive(Debug)]
290 #[allow(dead_code)]
292 pub struct ExcludeMatch {
293 pub pattern: gix::glob::Pattern,
294 pub source: Option<PathBuf>,
295 pub sequence_number: usize,
296 }
297
298 impl From<gix::ignore::search::Match<'_>> for ExcludeMatch {
299 fn from(value: gix::ignore::search::Match<'_>) -> Self {
300 ExcludeMatch {
301 pattern: value.pattern.clone(),
302 source: value.source.map(ToOwned::to_owned),
303 sequence_number: value.sequence_number,
304 }
305 }
306 }
307
308 fn parse_exclude(line: &str) -> Option<(String, Baseline)> {
309 let (left, value) = line.split_at(line.find('\t')?);
310 let value = &value[1..];
311
312 let location = if left == "::" {
313 None
314 } else {
315 let mut tokens = left.split(':');
316 let source = tokens.next()?;
317 let line_number: usize = tokens.next()?.parse().ok()?;
318 let pattern = tokens.next()?;
319 Some(ExcludeLocation {
320 line: line_number,
321 rela_source_file: source.into(),
322 pattern: pattern.into(),
323 })
324 };
325 Some((value.to_string(), Baseline::Exclude { location }))
326 }
327
328 fn parse_attributes(lines: &mut Peekable<impl Iterator<Item = String>>) -> Option<(String, Baseline)> {
329 let first = lines.next()?;
330 let mut out = Vec::new();
331 let (path, assignment) = parse_attribute_line(&first)?;
332
333 let current = path.to_owned();
334 out.push(assignment.to_owned());
335 loop {
336 let next_line = match lines.peek() {
337 None => break,
338 Some(l) => l,
339 };
340 let (next_path, next_assignment) = parse_attribute_line(next_line)?;
341 if next_path != current {
342 return Some((current, Baseline::Attribute { assignments: out }));
343 } else {
344 out.push(next_assignment.to_owned());
345 lines.next();
346 }
347 }
348 Some((current, Baseline::Attribute { assignments: out }))
349 }
350
351 fn parse_attribute_line(line: &str) -> Option<(&str, gix::attrs::AssignmentRef<'_>)> {
352 use gix::{attrs::StateRef, bstr::ByteSlice};
353
354 let mut prev = None;
355 let mut tokens = line.splitn(3, |b| {
356 let is_match = b == ' ' && prev.take() == Some(':');
357 prev = Some(b);
358 is_match
359 });
360 if let Some(((mut path, attr), info)) = tokens.next().zip(tokens.next()).zip(tokens.next()) {
361 let state = match info {
362 "set" => StateRef::Set,
363 "unset" => StateRef::Unset,
364 "unspecified" => StateRef::Unspecified,
365 _ => StateRef::from_bytes(info.as_bytes()),
366 };
367 path = path.trim_end_matches(':');
368 let attr = attr.trim_end_matches(':');
369 let assignment = gix::attrs::AssignmentRef {
370 name: gix::attrs::NameRef::try_from(attr.as_bytes().as_bstr()).ok()?,
371 state,
372 };
373 Some((path, assignment))
374 } else {
375 None
376 }
377 }
378}