Skip to main content

git_vendor/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod cli;
4pub mod exe;
5
6use git_filter_tree::FilterTree;
7use git_set_attr::SetAttr;
8use std::{
9    collections::{HashMap, HashSet},
10    path::{Path, PathBuf},
11    str::FromStr,
12};
13
14use git2::Repository;
15
16/// Convert a path to a git-compatible string with forward slashes.
17///
18/// Git patterns (e.g. in `.gitattributes`) always use `/` as the separator,
19/// but `Path::join` and `PathBuf::from` produce `\` on Windows.
20fn to_git_path(p: &Path) -> String {
21    let s = p.to_string_lossy().replace('\\', "/");
22    s.strip_prefix("./").unwrap_or(&s).to_string()
23}
24
25/// A parsed pattern entry with an optional destination prefix.
26///
27/// The raw config value uses the syntax `<glob>` or `<glob>:<destination>`.
28/// For example:
29/// - `src/**` – match `src/**`, no remapping (files keep their upstream path)
30/// - `src/**:ext/` – match `src/**`, strip the literal prefix `src/`, then
31///   prepend `ext/` to get the local path.
32#[derive(Clone, Debug, PartialEq, Eq)]
33pub struct PatternMapping {
34    /// The glob string (left of the colon, or the whole value when no colon).
35    pub glob: String,
36    /// The local destination prefix (right of the colon), if present.
37    pub destination: Option<String>,
38}
39
40impl PatternMapping {
41    /// Parse a raw pattern string, splitting on the first `:` only.
42    pub fn parse(raw: &str) -> Self {
43        match raw.split_once(':') {
44            Some((glob, dest)) => PatternMapping {
45                glob: glob.to_string(),
46                destination: if dest.is_empty() {
47                    None
48                } else {
49                    Some(dest.to_string())
50                },
51            },
52            None => PatternMapping {
53                glob: raw.to_string(),
54                destination: None,
55            },
56        }
57    }
58
59    /// Serialize back to the raw config string.
60    pub fn to_raw(&self) -> String {
61        match &self.destination {
62            Some(dest) => format!("{}:{}", self.glob, dest),
63            None => self.glob.clone(),
64        }
65    }
66
67    /// Extract the literal (non-glob) leading path component(s) from the glob.
68    ///
69    /// "Literal prefix" is everything before the first glob character (`*`, `?`,
70    /// `[`).  For `src/**/*.rs` this returns `src/`.  For `**` it returns `""`.
71    pub fn literal_prefix(&self) -> &str {
72        let glob = self.glob.as_str();
73        // Normalize trailing '/' patterns (e.g. "src/") – the literal prefix
74        // is the whole string in that case.
75        let first_glob = glob.find(['*', '?', '[']);
76        match first_glob {
77            Some(0) => "",
78            Some(idx) => &glob[..idx],
79            None => {
80                // No glob characters at all; the whole thing is a literal prefix
81                // (directory shorthand).
82                if glob.ends_with('/') { glob } else { "" }
83            }
84        }
85    }
86
87    /// Compute the local path for an upstream file that matched this pattern.
88    ///
89    /// 1. Strip the literal prefix from `upstream_path`.
90    /// 2. If a `destination` is set, prepend it.
91    ///
92    /// Returns `None` if the upstream path doesn't start with the literal
93    /// prefix (which shouldn't happen when the glob matched, but we guard
94    /// defensively).
95    pub fn local_path(&self, upstream_path: &str) -> Option<String> {
96        let prefix = self.literal_prefix();
97        let stripped = if prefix.is_empty() {
98            upstream_path
99        } else {
100            upstream_path.strip_prefix(prefix)?
101        };
102        Some(match &self.destination {
103            Some(dest) => {
104                // Ensure dest ends with '/' when non-empty so paths join correctly.
105                let dest = dest.trim_end_matches('/');
106                if dest.is_empty() {
107                    stripped.to_string()
108                } else {
109                    format!("{}/{}", dest, stripped)
110                }
111            }
112            None => upstream_path.to_string(),
113        })
114    }
115}
116
117/// Parse a slice of raw pattern strings into [`PatternMapping`]s.
118pub fn parse_patterns(raws: &[impl AsRef<str>]) -> Vec<PatternMapping> {
119    raws.iter()
120        .map(|r| PatternMapping::parse(r.as_ref()))
121        .collect()
122}
123
124/// Build a [`globset::GlobSet`] from a slice of [`PatternMapping`]s, using
125/// only the glob side (left of `:`).
126fn build_glob_matcher_from_mappings(
127    mappings: &[PatternMapping],
128) -> Result<globset::GlobSet, git2::Error> {
129    let globs: Vec<&str> = mappings.iter().map(|m| m.glob.as_str()).collect();
130    build_glob_matcher(&globs)
131}
132
133/// Build a [`globset::GlobSet`] from a slice of pattern strings, normalizing
134/// trailing-`/` directory shorthands to `dir/**`.
135fn build_glob_matcher(patterns: &[impl AsRef<str>]) -> Result<globset::GlobSet, git2::Error> {
136    let mut builder = globset::GlobSetBuilder::new();
137    for pat in patterns {
138        let pat = pat.as_ref();
139        let normalized = if pat.ends_with('/') {
140            format!("{}**", pat)
141        } else {
142            pat.to_string()
143        };
144        let g = globset::Glob::new(&normalized)
145            .map_err(|e| git2::Error::from_str(&format!("Invalid pattern '{}': {}", pat, e)))?;
146        builder.add(g);
147    }
148    builder
149        .build()
150        .map_err(|e| git2::Error::from_str(&e.to_string()))
151}
152
153/// Convert a `(globs, path)` pair — the legacy API shape — into a
154/// `Vec<PatternMapping>`.
155///
156/// Each glob is parsed with [`PatternMapping::parse`].  If a glob already
157/// carries an explicit colon destination, that destination is used as-is.
158/// Otherwise `path` is applied as the destination prefix (unless `path` is
159/// `"."` or empty, which means "no remapping").
160fn globs_and_path_to_mappings(globs: &[&str], path: &Path) -> Vec<PatternMapping> {
161    // Determine the normalized destination string from `path`.
162    let dest: Option<String> = {
163        let s = path.to_string_lossy().replace('\\', "/");
164        let s = s.trim_end_matches('/');
165        if s.is_empty() || s == "." {
166            None
167        } else {
168            Some(format!("{}/", s))
169        }
170    };
171
172    globs
173        .iter()
174        .map(|raw| {
175            let m = PatternMapping::parse(raw);
176            if m.destination.is_some() {
177                // Already has an explicit colon mapping – keep it.
178                m
179            } else if let Some(ref d) = dest {
180                // Apply `path` as the destination.
181                PatternMapping {
182                    glob: m.glob,
183                    destination: Some(d.clone()),
184                }
185            } else {
186                m
187            }
188        })
189        .collect()
190}
191
192/// Find the first [`PatternMapping`] from `mappings` whose glob matches
193/// `upstream_path`, and return the computed local path.
194///
195/// Returns `None` if no pattern matches.
196fn apply_pattern_mappings(mappings: &[PatternMapping], upstream_path: &str) -> Option<String> {
197    for mapping in mappings {
198        let glob = if mapping.glob.ends_with('/') {
199            format!("{}**", mapping.glob)
200        } else {
201            mapping.glob.clone()
202        };
203        let g = globset::Glob::new(&glob).ok()?;
204        let matcher = globset::GlobSetBuilder::new().add_then_build(g).ok()?;
205        if matcher.is_match(upstream_path) {
206            return mapping.local_path(upstream_path);
207        }
208    }
209    None
210}
211
212/// Extension trait for [`globset::GlobSetBuilder`] to support chaining.
213trait GlobSetBuilderExt {
214    fn add_then_build(self, glob: globset::Glob) -> Result<globset::GlobSet, globset::Error>;
215}
216
217impl GlobSetBuilderExt for globset::GlobSetBuilder {
218    fn add_then_build(mut self, glob: globset::Glob) -> Result<globset::GlobSet, globset::Error> {
219        self.add(glob);
220        self.build()
221    }
222}
223
224/// Build a new git tree in `repo` containing only the upstream files that
225/// match one of `mappings`, placed at their **local** (remapped) paths.
226///
227/// For each blob in `upstream_tree`, the first matching [`PatternMapping`] is
228/// used to compute the local path.  Files that match no pattern are skipped.
229///
230/// The resulting tree has entries keyed by local path, ready to be used as
231/// "theirs" in a merge against HEAD.
232fn remap_upstream_tree<'a>(
233    repo: &'a git2::Repository,
234    upstream_tree: &git2::Tree<'_>,
235    mappings: &[PatternMapping],
236) -> Result<git2::Tree<'a>, git2::Error> {
237    // Collect (local_path, blob_oid, mode) pairs.
238    let mut entries: Vec<(String, git2::Oid, u32)> = Vec::new();
239
240    upstream_tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
241        if entry.kind() != Some(git2::ObjectType::Blob) {
242            return git2::TreeWalkResult::Ok;
243        }
244        let upstream_path = format!("{}{}", dir, entry.name().unwrap_or(""));
245        if let Some(local_path) = apply_pattern_mappings(mappings, &upstream_path) {
246            entries.push((local_path, entry.id(), entry.filemode() as u32));
247        }
248        git2::TreeWalkResult::Ok
249    })?;
250
251    build_tree_from_entries(repo, &entries)
252}
253
254/// Build a git tree from a flat list of `(path, blob_oid, mode)` entries,
255/// creating nested subtrees as needed.
256fn build_tree_from_entries<'a>(
257    repo: &'a git2::Repository,
258    entries: &[(String, git2::Oid, u32)],
259) -> Result<git2::Tree<'a>, git2::Error> {
260    // Group entries by their top-level component, then recurse.
261    // Entries at the root are inserted directly; others go into subtrees.
262    let mut root_files: Vec<(&str, git2::Oid, u32)> = Vec::new();
263    let mut subdirs: std::collections::BTreeMap<&str, Vec<(String, git2::Oid, u32)>> =
264        std::collections::BTreeMap::new();
265
266    for (path, oid, mode) in entries {
267        if let Some((dir, rest)) = path.split_once('/') {
268            subdirs
269                .entry(dir)
270                .or_default()
271                .push((rest.to_string(), *oid, *mode));
272        } else {
273            root_files.push((path.as_str(), *oid, *mode));
274        }
275    }
276
277    let mut builder = repo.treebuilder(None)?;
278    for (name, oid, mode) in root_files {
279        builder.insert(name, oid, mode as i32)?;
280    }
281    for (dir, sub_entries) in &subdirs {
282        let subtree = build_tree_from_entries(repo, sub_entries)?;
283        builder.insert(dir, subtree.id(), 0o040000)?;
284    }
285
286    let oid = builder.write()?;
287    repo.find_tree(oid)
288}
289
290/// All metadata required to retrieve necessary objects from a vendor.
291#[derive(Clone, Hash, PartialEq, Eq)]
292pub struct VendorSource {
293    /// The unique identifier for this particular vendor.
294    pub name: String,
295    pub url: String,
296    /// The branch to track on the upstream remote.
297    /// If not specified, this defaults to `HEAD`.
298    pub branch: Option<String>,
299    /// The most recent merge base. If not specified,
300    /// it is assumed that no prior merge has taken
301    /// place and conflicts must be resolved manually.
302    pub base: Option<String>,
303    /// Glob pattern(s) selecting which upstream files to vendor.
304    pub patterns: Vec<String>,
305}
306
307impl VendorSource {
308    pub fn to_config(&self, cfg: &mut git2::Config) -> Result<(), git2::Error> {
309        cfg.set_str(&format!("vendor.{}.url", &self.name), &self.url)?;
310
311        if let Some(branch) = &self.branch {
312            cfg.set_str(&format!("vendor.{}.branch", &self.name), branch)?;
313        }
314
315        if let Some(base) = &self.base {
316            cfg.set_str(&format!("vendor.{}.base", &self.name), base)?;
317        }
318
319        // Remove existing pattern entries before writing the current set.
320        let pattern_key = format!("vendor.{}.pattern", &self.name);
321        let _ = cfg.remove_multivar(&pattern_key, ".*");
322        for pattern in &self.patterns {
323            cfg.set_multivar(&pattern_key, "^$", pattern)?;
324        }
325
326        Ok(())
327    }
328
329    pub fn from_config(cfg: &git2::Config, name: &str) -> Result<Option<Self>, git2::Error> {
330        let name = name.to_string();
331        let mut entries = cfg.entries(Some(&format!("vendor.{name}")))?;
332
333        if entries.next().is_none() {
334            return Ok(None);
335        }
336
337        let url = cfg.get_string(&format!("vendor.{name}.url"))?;
338        let branch = cfg.get_string(&format!("vendor.{name}.branch")).ok();
339        let base = cfg.get_string(&format!("vendor.{name}.base")).ok();
340
341        let mut patterns = Vec::new();
342        let pattern_entries = cfg.multivar(&format!("vendor.{name}.pattern"), None);
343        if let Ok(pattern_entries) = pattern_entries {
344            pattern_entries.for_each(|entry| {
345                if let Some(value) = entry.value() {
346                    patterns.push(value.to_string());
347                }
348            })?;
349        }
350
351        Ok(Some(Self {
352            name,
353            url,
354            branch,
355            base,
356            patterns,
357        }))
358    }
359
360    /// The ref holding the latest fetched upstream tip.
361    pub fn head_ref(&self) -> String {
362        format!("refs/vendor/{}", self.name)
363    }
364
365    /// The ref to track.
366    pub fn tracking_branch(&self) -> String {
367        match &self.branch {
368            Some(branch) => branch.clone(),
369            None => "HEAD".into(),
370        }
371    }
372}
373
374fn vendors_from_config(cfg: &git2::Config) -> Result<Vec<VendorSource>, git2::Error> {
375    let mut entries = cfg.entries(Some("vendor.*"))?;
376    let mut vendor_names = std::collections::HashSet::new();
377
378    while let Some(entry) = entries.next() {
379        let entry = entry?;
380        if let Some(name) = entry.name() {
381            // Entry names look like "vendor.<name>.<key>"; extract <name>
382            let parts: Vec<&str> = name.splitn(3, '.').collect();
383            if parts.len() == 3 && parts[0] == "vendor" {
384                vendor_names.insert(parts[1].to_string());
385            }
386        }
387    }
388
389    let mut vendors = Vec::new();
390    for name in vendor_names {
391        let vendor = VendorSource::from_config(cfg, &name)?;
392        if let Some(vendor) = vendor {
393            vendors.push(vendor);
394        } else {
395            return Err(git2::Error::from_str("vendor not found"));
396        }
397    }
398
399    Ok(vendors)
400}
401
402/// A trait which provides methods for vendoring content across repository boundaries.
403pub trait Vendor {
404    /// Retrieve vendor configuration by merging three levels (lowest → highest
405    /// priority), analogous to `git config`:
406    ///
407    /// 1. **Global** – `~/.gitvendors`
408    /// 2. **Local**  – `$GIT_DIR/gitvendors`
409    /// 3. **Index**  – `$WORKDIR/.gitvendors` (tracked)
410    ///
411    /// Writes go to the highest-priority file present in the stack (index).
412    fn vendor_config(&self) -> Result<git2::Config, git2::Error>;
413
414    /// Retrieve all vendored files in a given tree.
415    fn vendored_subtree(&self) -> Result<git2::Tree<'_>, git2::Error>;
416
417    /// Return all vendor sources tracked at the commit provided (defaulting to `HEAD`).
418    fn list_vendors(&self) -> Result<Vec<VendorSource>, git2::Error>;
419
420    /// Return all vendor sources mapped to the upstream tip OID if it differs from the base tree.
421    /// `Some(oid)` means there are unmerged upstream changes at that commit; `None` means up to date.
422    fn check_vendors(&self) -> Result<HashMap<VendorSource, Option<git2::Oid>>, git2::Error>;
423
424    /// Track vendor pattern(s) by writing per-file gitattributes lines with the `vendor` attribute.
425    ///
426    /// `globs` selects which upstream files to track.  `path` is the local
427    /// directory under which the vendored files will live; it acts as the
428    /// destination prefix for each glob.  Use `Path::new(".")` when the files
429    /// are placed at the repository root.
430    ///
431    /// Patterns that already carry an explicit colon mapping (e.g. `src/**:ext/`)
432    /// are stored as-is in [`VendorSource::patterns`] and are interpreted by
433    /// the internal [`PatternMapping`] machinery when `path` is `"."`.
434    fn track_vendor_pattern(
435        &self,
436        vendor: &VendorSource,
437        globs: &[&str],
438        path: &Path,
439    ) -> Result<(), git2::Error>;
440
441    /// Refresh `.gitattributes` after a merge so that per-file entries match
442    /// the merged result.  New upstream files get entries; deleted files lose
443    /// them.
444    ///
445    /// `path` is the local directory under which vendored files live.  Pass
446    /// `Path::new(".")` when files are at the repository root.
447    fn refresh_vendor_attrs(
448        &self,
449        vendor: &VendorSource,
450        merged_index: &git2::Index,
451        path: &Path,
452    ) -> Result<(), git2::Error>;
453
454    /// Fetch the upstream for the given vendor and advance `refs/vendor/$name`.
455    /// Returns the updated reference.
456    fn fetch_vendor<'a>(
457        &'a self,
458        source: &VendorSource,
459        maybe_opts: Option<&mut git2::FetchOptions>,
460    ) -> Result<git2::Reference<'a>, git2::Error>;
461
462    /// Perform the initial add of a vendor source.
463    ///
464    /// Unlike `merge_vendor`, which relies on files already present in HEAD to
465    /// determine the upstream ↔ local mapping, `add_vendor` uses the given
466    /// `globs` and `path` to filter and place the upstream tree directly.  This
467    /// makes it suitable for the first-time add where no vendor files exist in
468    /// HEAD yet.
469    ///
470    /// `globs` may contain plain glob strings or strings with the colon mapping
471    /// syntax (e.g. `src/**:ext/`); see [`PatternMapping`].  When a plain glob
472    /// is given, `path` acts as the destination prefix.
473    ///
474    /// The resulting `git2::Index` contains the merged entries ready to be
475    /// written to the working tree and staged.
476    fn add_vendor(
477        &self,
478        vendor: &VendorSource,
479        globs: &[&str],
480        path: &Path,
481        file_favor: Option<git2::FileFavor>,
482    ) -> Result<git2::Index, git2::Error>;
483
484    /// If a `base` exists in the vendor source provided (by `name`),
485    /// initiate a three-way merge with the base reference, the
486    /// commit provided (defaulting to the repository's `HEAD`),
487    /// and the tip of `refs/vendor/{name}`. If no `base` exists,
488    /// then a two-way merge is performed and a new `base` is written
489    /// to the the returned `VendorSource`.
490    fn merge_vendor(
491        &self,
492        vendor: &VendorSource,
493        maybe_opts: Option<&mut git2::FetchOptions>,
494        file_favor: Option<git2::FileFavor>,
495    ) -> Result<git2::Index, git2::Error>;
496
497    /// Given a vendor's name and a target commit (defaulting to `HEAD`),
498    /// return the vendor's `base` reference it it exists. If no such `base`
499    /// exists for the provided vendor source, `None` is returned.
500    fn find_vendor_base(
501        &self,
502        vendor: &VendorSource,
503    ) -> Result<Option<git2::Commit<'_>>, git2::Error>;
504
505    /// Return a `VendorSource` which matches the provided name, if one exists
506    /// in the provided `commit` (defaulting to `HEAD`).
507    fn get_vendor_by_name(&self, name: &str) -> Result<Option<VendorSource>, git2::Error>;
508}
509
510fn bail_if_bare(repo: &Repository) -> Result<(), git2::Error> {
511    // TODO: add support for bare repositories
512    // Support for bare repositories is currently blocked by the lack of
513    // in-memory `gitconfig` readers. How hard can that be to make?
514    if repo.is_bare() {
515        return Err(git2::Error::from_str(
516            "a working tree is required; bare repositories are not supported",
517        ));
518    }
519
520    Ok(())
521}
522
523impl Vendor for Repository {
524    fn vendor_config(&self) -> Result<git2::Config, git2::Error> {
525        bail_if_bare(self)?;
526        let workdir = self
527            .workdir()
528            .ok_or_else(|| git2::Error::from_str("repository has no working directory"))?;
529
530        let mut cfg = git2::Config::new()?;
531
532        // Global ~/.gitvendors (lowest priority).
533        // Derive the home directory from libgit2's own global config path
534        // (~/.gitconfig) so we don't depend on env vars directly.
535        if let Some(global_path) = git2::Config::find_global()
536            .ok()
537            .and_then(|p| p.parent().map(|h| h.join(".gitvendors")))
538            .filter(|p| p.exists())
539        {
540            cfg.add_file(&global_path, git2::ConfigLevel::Global, false)?;
541        }
542
543        // Local $GIT_DIR/gitvendors (repo-private, not tracked).
544        let local_path = self.path().join("gitvendors");
545        if local_path.exists() {
546            cfg.add_file(&local_path, git2::ConfigLevel::Local, false)?;
547        }
548
549        // Index $WORKDIR/.gitvendors (tracked, highest priority).
550        let index_path = workdir.join(".gitvendors");
551        cfg.add_file(&index_path, git2::ConfigLevel::App, false)?;
552
553        Ok(cfg)
554    }
555
556    fn vendored_subtree(&self) -> Result<git2::Tree<'_>, git2::Error> {
557        let head = self.head()?.peel_to_tree()?;
558
559        let mut vendored_entries: Vec<git2::TreeEntry> = Vec::new();
560
561        head.walk(git2::TreeWalkMode::PreOrder, |_, entry| {
562            if let Some(attrs) = entry.name().and_then(|name| {
563                self.get_attr(
564                    &PathBuf::from_str(name).ok()?,
565                    "vendored",
566                    git2::AttrCheckFlags::FILE_THEN_INDEX,
567                )
568                .ok()
569            }) && (attrs == Some("true") || attrs == Some("set"))
570            {
571                vendored_entries.push(entry.to_owned());
572            }
573            git2::TreeWalkResult::Ok
574        })?;
575
576        todo!()
577    }
578
579    fn list_vendors(&self) -> Result<Vec<VendorSource>, git2::Error> {
580        let cfg = self.vendor_config()?;
581        vendors_from_config(&cfg)
582    }
583
584    fn fetch_vendor<'a>(
585        &'a self,
586        vendor: &VendorSource,
587        maybe_opts: Option<&mut git2::FetchOptions>,
588    ) -> Result<git2::Reference<'a>, git2::Error> {
589        let mut remote = self.remote_anonymous(&vendor.url)?;
590        let refspec = format!("{}:{}", vendor.tracking_branch(), vendor.head_ref());
591        remote.fetch(&[&refspec], maybe_opts, None)?;
592
593        let head = self.find_reference(&vendor.head_ref())?;
594
595        Ok(head)
596    }
597
598    fn check_vendors(&self) -> Result<HashMap<VendorSource, Option<git2::Oid>>, git2::Error> {
599        let vendors = self.list_vendors()?;
600        let mut updates = HashMap::new();
601
602        for vendor in vendors {
603            match vendor.base.as_ref() {
604                Some(base) => {
605                    let base = git2::Oid::from_str(base)?;
606                    let head = self.find_reference(&vendor.head_ref())?.target().ok_or(
607                        git2::Error::from_str("head ref was not found; this is an internal error"),
608                    )?;
609
610                    if base == head {
611                        updates.insert(vendor, None);
612                    } else {
613                        updates.insert(vendor, Some(head));
614                    }
615                }
616                None => {
617                    let head = self.find_reference(&vendor.head_ref())?.target().ok_or(
618                        git2::Error::from_str("head ref was not found; this is an internal error"),
619                    )?;
620                    updates.insert(vendor, Some(head));
621                }
622            }
623        }
624
625        Ok(updates)
626    }
627
628    fn track_vendor_pattern(
629        &self,
630        vendor: &VendorSource,
631        globs: &[&str],
632        path: &Path,
633    ) -> Result<(), git2::Error> {
634        let workdir = self
635            .workdir()
636            .ok_or_else(|| git2::Error::from_str("repository has no working directory"))?;
637        // Always write to the root .gitattributes.
638        let gitattributes = workdir.join(".gitattributes");
639        let tree = self.find_reference(&vendor.head_ref())?.peel_to_tree()?;
640        let vendor_attr = format!("vendor={}", vendor.name);
641
642        // Convert (globs, path) to PatternMappings.
643        // Each glob may already carry colon syntax (e.g. "src/**:ext/"); if so,
644        // parse it directly.  Otherwise apply `path` as the destination prefix.
645        let mappings = globs_and_path_to_mappings(globs, path);
646
647        // Collect (local_path) for each upstream file matched by any mapping.
648        let mut matched_local_paths: Vec<String> = Vec::new();
649
650        tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
651            if entry.kind() != Some(git2::ObjectType::Blob) {
652                return git2::TreeWalkResult::Ok;
653            }
654            let upstream_path = format!("{}{}", dir, entry.name().unwrap_or(""));
655            if let Some(local_path) = apply_pattern_mappings(&mappings, &upstream_path) {
656                matched_local_paths.push(local_path);
657            }
658            git2::TreeWalkResult::Ok
659        })?;
660
661        for local_path in &matched_local_paths {
662            self.set_attr(local_path, &[&vendor_attr], &gitattributes)?;
663        }
664
665        Ok(())
666    }
667
668    fn add_vendor(
669        &self,
670        vendor: &VendorSource,
671        globs: &[&str],
672        path: &Path,
673        file_favor: Option<git2::FileFavor>,
674    ) -> Result<git2::Index, git2::Error> {
675        // Convert (globs, path) to PatternMappings.
676        let mappings = globs_and_path_to_mappings(globs, path);
677
678        // Build the remapped upstream tree: each upstream file is placed at its
679        // local (mapped) path according to the pattern mappings.
680        let upstream_tree = self.find_reference(&vendor.head_ref())?.peel_to_tree()?;
681        let theirs_remapped = remap_upstream_tree(self, &upstream_tree, &mappings)?;
682
683        // Collect local paths so we can filter HEAD to only overlapping entries.
684        // This lets merge_trees detect add/add conflicts when a local file already
685        // exists at the same local path as an incoming vendor file.
686        let mut local_paths: HashSet<String> = HashSet::new();
687        theirs_remapped.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
688            if entry.kind() == Some(git2::ObjectType::Blob) {
689                local_paths.insert(format!("{}{}", dir, entry.name().unwrap_or("")));
690            }
691            git2::TreeWalkResult::Ok
692        })?;
693
694        let ours = self.head()?.peel_to_tree()?;
695        let ours_filtered =
696            self.filter_by_predicate(&ours, |_repo, p| local_paths.contains(&*to_git_path(p)))?;
697
698        // Two-way merge: empty ancestor so that both sides look like pure
699        // additions.  If the same path exists in both ours and theirs with
700        // different content, git2 will report an add/add conflict.
701        let empty_tree = {
702            let empty_oid = self.treebuilder(None)?.write()?;
703            self.find_tree(empty_oid)?
704        };
705
706        let mut opts = git2::MergeOptions::new();
707        opts.find_renames(true);
708        opts.rename_threshold(50);
709        if let Some(favor) = file_favor {
710            opts.file_favor(favor);
711        }
712
713        self.merge_trees(&empty_tree, &ours_filtered, &theirs_remapped, Some(&opts))
714    }
715
716    fn merge_vendor(
717        &self,
718        vendor: &VendorSource,
719        _maybe_opts: Option<&mut git2::FetchOptions>,
720        file_favor: Option<git2::FileFavor>,
721    ) -> Result<git2::Index, git2::Error> {
722        // Parse stored patterns into mappings (supports colon syntax).
723        let mappings = parse_patterns(&vendor.patterns);
724
725        // UPSTREAM (theirs): remap the upstream tree to local paths via mappings.
726        let upstream_tree = self.find_reference(&vendor.head_ref())?.peel_to_tree()?;
727        let theirs_remapped = remap_upstream_tree(self, &upstream_tree, &mappings)?;
728
729        // LOCAL (ours): use gitattributes to find files currently tracked for
730        // this vendor.  Falls back to glob matching against local paths when the
731        // gitattribute is unset (e.g. legacy .gitattributes or first-ever merge).
732        let glob_matcher = build_glob_matcher_from_mappings(&mappings)?;
733        let expected_vendor = vendor.name.clone();
734        let ours = self.head()?.peel_to_tree()?;
735        let ours_filtered = self.filter_by_predicate(&ours, |repo, path| {
736            match repo.get_attr(path, "vendor", git2::AttrCheckFlags::FILE_THEN_INDEX) {
737                Ok(Some(value)) if value == expected_vendor => true,
738                // Legacy fallback: match local paths against the glob side of patterns.
739                _ => glob_matcher.is_match(path),
740            }
741        })?;
742
743        let mut opts = git2::MergeOptions::new();
744        opts.find_renames(true);
745        opts.rename_threshold(50);
746        if let Some(favor) = file_favor {
747            opts.file_favor(favor);
748        }
749
750        // BASE: if a base commit is recorded, remap its tree the same way.
751        let base_commit = self.find_vendor_base(vendor)?;
752        let base = match &base_commit {
753            Some(c) => {
754                let base_full_tree = c.as_object().peel_to_tree()?;
755                remap_upstream_tree(self, &base_full_tree, &mappings)?
756            }
757            None => self.find_tree(ours_filtered.id())?,
758        };
759
760        self.merge_trees(&base, &ours_filtered, &theirs_remapped, Some(&opts))
761    }
762
763    fn refresh_vendor_attrs(
764        &self,
765        vendor: &VendorSource,
766        merged_index: &git2::Index,
767        _path: &Path,
768    ) -> Result<(), git2::Error> {
769        let workdir = self
770            .workdir()
771            .ok_or_else(|| git2::Error::from_str("repository has no working directory"))?;
772        // Always use the root .gitattributes regardless of `_path`.
773        // The `_path` parameter is retained for API compatibility; the actual
774        // per-file paths are taken directly from the merged index entries.
775        let gitattributes = workdir.join(".gitattributes");
776        let vendor_attr = format!("vendor={}", vendor.name);
777
778        // Collect all stage-0 paths from the merged index that belong to this
779        // vendor.  We identify them by the `vendor` gitattribute (or by
780        // checking if the path appears in the remapped tree when no attr yet).
781        let expected_vendor = vendor.name.clone();
782        let mut merged_paths: HashSet<String> = HashSet::new();
783        for entry in merged_index.iter() {
784            let stage = (entry.flags >> 12) & 0x3;
785            if stage != 0 {
786                continue;
787            }
788            if let Ok(entry_path) = std::str::from_utf8(&entry.path) {
789                // Include paths already attributed to this vendor in HEAD,
790                // plus any new path that was brought in by the merge
791                // (detected by checking the gitattr on the merged result –
792                // which may not yet be set for brand-new files, so we
793                // include all stage-0 entries and let the attribute file
794                // be the source of truth going forward).
795                merged_paths.insert(entry_path.to_string());
796            }
797        }
798
799        // Filter to only paths that are (or should be) owned by this vendor.
800        // Strategy: a path belongs to this vendor if:
801        //   (a) it already carries `vendor=<name>` in HEAD's gitattributes, OR
802        //   (b) it is a new file not yet attributed (we conservatively include
803        //       all stage-0 entries from the merge result and trust that the
804        //       caller only invokes refresh_vendor_attrs with a properly-scoped
805        //       merged_index).
806        let owned_paths: HashSet<String> = merged_paths
807            .into_iter()
808            .filter(|path| {
809                match self.get_attr(
810                    Path::new(path),
811                    "vendor",
812                    git2::AttrCheckFlags::FILE_THEN_INDEX,
813                ) {
814                    Ok(Some(value)) => value == expected_vendor,
815                    // New file not yet attributed – include it.
816                    _ => true,
817                }
818            })
819            .collect();
820
821        // Read existing root .gitattributes, remove stale entries for this
822        // vendor, keep everything else.
823        let needle = format!("vendor={}", vendor.name);
824        let mut lines: Vec<String> = if gitattributes.exists() {
825            let content = std::fs::read_to_string(&gitattributes)
826                .map_err(|e| git2::Error::from_str(&format!("read .gitattributes: {e}")))?;
827            content
828                .lines()
829                .filter(|line| !line.split_whitespace().any(|tok| tok == needle))
830                .map(String::from)
831                .collect()
832        } else {
833            Vec::new()
834        };
835
836        // Add per-file entries for all merged paths.
837        let mut sorted: Vec<_> = owned_paths.into_iter().collect();
838        sorted.sort();
839        for file in sorted {
840            let line = format!("{} {}", to_git_path(Path::new(&file)), vendor_attr);
841            lines.push(line);
842        }
843
844        // Sort attribute lines by pattern to ensure deterministic ordering
845        // across runs, regardless of which vendor's entries were removed and
846        // re-added.  Comments and blank lines are stripped (gitattributes
847        // written by this tool are fully generated).
848        lines.sort_by(|a, b| {
849            let key = |l: &String| {
850                let trimmed = l.trim();
851                if trimmed.is_empty() || trimmed.starts_with('#') {
852                    // Sort blanks/comments after all attribute lines.
853                    (1, trimmed.to_string())
854                } else {
855                    (0, trimmed.to_string())
856                }
857            };
858            key(a).cmp(&key(b))
859        });
860
861        // Write back.
862        if let Some(parent) = gitattributes.parent() {
863            std::fs::create_dir_all(parent).map_err(|e| {
864                git2::Error::from_str(&format!("create dir for .gitattributes: {e}"))
865            })?;
866        }
867        let mut content = lines.join("\n");
868        if !content.is_empty() && !content.ends_with('\n') {
869            content.push('\n');
870        }
871        std::fs::write(&gitattributes, &content)
872            .map_err(|e| git2::Error::from_str(&format!("write .gitattributes: {e}")))?;
873        Ok(())
874    }
875
876    fn find_vendor_base(
877        &self,
878        vendor: &VendorSource,
879    ) -> Result<Option<git2::Commit<'_>>, git2::Error> {
880        match vendor.base.as_ref() {
881            Some(base) => {
882                let oid = git2::Oid::from_str(base)?;
883                let commit = self.find_commit(oid)?;
884                Ok(Some(commit))
885            }
886            _ => Ok(None),
887        }
888    }
889
890    fn get_vendor_by_name(&self, name: &str) -> Result<Option<VendorSource>, git2::Error> {
891        let gitvendors = self.vendor_config()?;
892        VendorSource::from_config(&gitvendors, name)
893    }
894}
895
896#[cfg(test)]
897mod tests;