Skip to main content

grit_lib/
crlf.rs

1//! CRLF / EOL conversion and clean/smudge filter support.
2//!
3//! This module handles line-ending conversion when staging files (`git add`)
4//! and checking out files (`git checkout`, `read-tree -u`, `checkout-index`).
5//!
6//! Config knobs:
7//!   - `core.autocrlf` (true / input / false)
8//!   - `core.eol` (lf / crlf / native)
9//!   - `core.safecrlf` (true / warn / false)
10//!
11//! Gitattributes:
12//!   - `text` / `text=auto` / `-text` / `binary`
13//!   - `eol=lf` / `eol=crlf`
14//!   - `filter=<name>` (with `filter.<name>.clean` / `filter.<name>.smudge`)
15//!   - `ident` keyword expansion
16
17use std::path::{Path, PathBuf};
18use std::process::{Command, Stdio};
19
20use crate::config::ConfigSet;
21use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
22
23/// What `core.autocrlf` is set to.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum AutoCrlf {
26    True,
27    Input,
28    False,
29}
30
31/// What `core.eol` is set to.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum CoreEol {
34    Lf,
35    Crlf,
36    Native,
37}
38
39/// What `core.safecrlf` is set to.
40#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum SafeCrlf {
42    True,
43    Warn,
44    False,
45}
46
47/// Per-file text attribute from .gitattributes.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum TextAttr {
50    /// `text` — always treat as text.
51    Set,
52    /// `text=auto` — auto-detect.
53    Auto,
54    /// `-text` or `binary` — never convert.
55    Unset,
56    /// No text attribute specified.
57    Unspecified,
58}
59
60/// Per-file eol attribute from .gitattributes.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum EolAttr {
63    Lf,
64    Crlf,
65    Unspecified,
66}
67
68/// Legacy `crlf` gitattribute (deprecated in Git; still honored for EOL conversion).
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
70pub enum CrlfLegacyAttr {
71    #[default]
72    Unspecified,
73    /// `-crlf` — disable CRLF conversion.
74    Unset,
75    /// `crlf=input` — normalize to LF in the object database; no CRLF on checkout.
76    Input,
77    /// Bare `crlf` (set) — force CRLF on checkout for text files.
78    Crlf,
79}
80
81/// Per-file merge attribute from .gitattributes.
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub enum MergeAttr {
84    /// No merge attribute specified.
85    Unspecified,
86    /// `-merge` — treat as binary/non-text merge.
87    Unset,
88    /// `merge=<driver>` — use named merge driver.
89    Driver(String),
90}
91
92/// How the `diff` gitattribute affects diff output.
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum DiffAttr {
95    /// No `diff` attribute (use heuristics / default).
96    Unspecified,
97    /// `-diff` / `diff=unset` / `binary` — treat as binary for diff purposes.
98    Unset,
99    /// `diff=<driver>` — use named driver (e.g. for textconv).
100    Driver(String),
101}
102
103/// Per-file attributes relevant to conversion.
104#[derive(Debug, Clone)]
105pub struct FileAttrs {
106    pub text: TextAttr,
107    pub eol: EolAttr,
108    /// Effect of the `diff` gitattribute on diff output.
109    pub diff_attr: DiffAttr,
110    /// `export-ignore` — omit from `git archive`.
111    pub export_ignore: bool,
112    /// `export-subst` — expand `$Format:` placeholders using the archived commit.
113    pub export_subst: bool,
114    pub filter_clean: Option<String>,
115    pub filter_smudge: Option<String>,
116    /// `filter.<name>.process` — long-running filter (takes precedence over clean/smudge commands).
117    pub filter_process: Option<String>,
118    /// Driver name from the active `filter=<name>` gitattribute (for error messages).
119    pub filter_driver_name: Option<String>,
120    /// Whether `filter.<name>.required` is set for this path's filter driver.
121    pub filter_smudge_required: bool,
122    /// Same config key as smudge; clean direction fails when unset if true.
123    pub filter_clean_required: bool,
124    pub ident: bool,
125    pub merge: MergeAttr,
126    pub conflict_marker_size: Option<String>,
127    /// Working tree encoding (e.g. "utf-16") — content is converted to UTF-8 on add.
128    pub working_tree_encoding: Option<String>,
129    /// Legacy `crlf` / `-crlf` / `crlf=input` from `.gitattributes`.
130    pub crlf_legacy: CrlfLegacyAttr,
131    /// `whitespace` attribute value: `None` if unset, `Some("set")` for bare `whitespace`,
132    /// `Some("unset")` for `-whitespace`, or `Some("trailing,...")` for `whitespace=...`.
133    pub whitespace: Option<String>,
134}
135
136impl Default for FileAttrs {
137    fn default() -> Self {
138        FileAttrs {
139            text: TextAttr::Unspecified,
140            eol: EolAttr::Unspecified,
141            diff_attr: DiffAttr::Unspecified,
142            export_ignore: false,
143            export_subst: false,
144            filter_clean: None,
145            filter_smudge: None,
146            filter_process: None,
147            filter_driver_name: None,
148            filter_smudge_required: false,
149            filter_clean_required: false,
150            ident: false,
151            merge: MergeAttr::Unspecified,
152            conflict_marker_size: None,
153            working_tree_encoding: None,
154            crlf_legacy: CrlfLegacyAttr::Unspecified,
155            whitespace: None,
156        }
157    }
158}
159
160/// Global conversion settings derived from config.
161#[derive(Debug, Clone)]
162pub struct ConversionConfig {
163    pub autocrlf: AutoCrlf,
164    pub eol: CoreEol,
165    pub safecrlf: SafeCrlf,
166}
167
168impl ConversionConfig {
169    /// Load conversion settings from a ConfigSet.
170    pub fn from_config(config: &ConfigSet) -> Self {
171        let autocrlf = match config.get("core.autocrlf") {
172            Some(v) => match v.to_lowercase().as_str() {
173                "true" | "yes" | "on" | "1" => AutoCrlf::True,
174                "input" => AutoCrlf::Input,
175                _ => AutoCrlf::False,
176            },
177            None => AutoCrlf::False,
178        };
179
180        let eol = match config.get("core.eol") {
181            Some(v) => match v.to_lowercase().as_str() {
182                "crlf" => CoreEol::Crlf,
183                "lf" => CoreEol::Lf,
184                "native" => CoreEol::Native,
185                _ => CoreEol::Native,
186            },
187            None => CoreEol::Native,
188        };
189
190        let safecrlf = match config.get("core.safecrlf") {
191            Some(v) => match v.to_lowercase().as_str() {
192                "true" | "yes" | "on" | "1" => SafeCrlf::True,
193                "warn" => SafeCrlf::Warn,
194                _ => SafeCrlf::False,
195            },
196            // Git warns on round-trip EOL issues by default when unset.
197            None => SafeCrlf::Warn,
198        };
199
200        ConversionConfig {
201            autocrlf,
202            eol,
203            safecrlf,
204        }
205    }
206}
207
208/// A parsed .gitattributes rule.
209#[derive(Debug, Clone)]
210pub struct AttrRule {
211    /// Glob text used for matching (trailing directory `/` stripped; see [`AttrRule::must_be_dir`]).
212    pattern: String,
213    /// When true, the source pattern ended with `/` and matches only directories (Git `PATTERN_FLAG_MUSTBEDIR`).
214    must_be_dir: bool,
215    /// When true, match only the path's final component (Git `PATTERN_FLAG_NODIR` / no `/` in the pattern body).
216    basename_only: bool,
217    attrs: Vec<(String, String)>, // (name, value) where value is "set"/"unset"/specific value
218}
219
220impl AttrRule {
221    /// Diff driver names assigned by this rule (`diff=<driver>`), excluding `set`/`unset`.
222    pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
223        self.attrs.iter().filter_map(|(name, value)| {
224            if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
225                Some(value.as_str())
226            } else {
227                None
228            }
229        })
230    }
231}
232
233/// Load .gitattributes from the worktree root.
234pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
235    let mut rules = Vec::new();
236
237    let root_attrs = work_tree.join(".gitattributes");
238    if let Ok(content) = std::fs::read_to_string(&root_attrs) {
239        parse_gitattributes(&content, &mut rules);
240    }
241
242    let info_attrs = work_tree.join(".git/info/attributes");
243    if let Ok(content) = std::fs::read_to_string(&info_attrs) {
244        parse_gitattributes(&content, &mut rules);
245    }
246
247    rules
248}
249
250/// Parse gitattributes content into attribute rules.
251///
252/// This is useful when attributes are sourced from non-worktree inputs
253/// (for example, tree objects selected by `--attr-source`).
254#[must_use]
255pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
256    let mut rules = Vec::new();
257    parse_gitattributes(content, &mut rules);
258    rules
259}
260
261/// Load .gitattributes from the index (for use during checkout when
262/// the worktree file may not yet exist).
263pub fn load_gitattributes_from_index(
264    index: &crate::index::Index,
265    odb: &crate::odb::Odb,
266) -> Vec<AttrRule> {
267    let mut rules = Vec::new();
268
269    // Look for .gitattributes in the index (stage 0)
270    if let Some(entry) = index.get(b".gitattributes", 0) {
271        if let Ok(obj) = odb.read(&entry.oid) {
272            if let Ok(content) = String::from_utf8(obj.data) {
273                parse_gitattributes(&content, &mut rules);
274            }
275        }
276    }
277
278    rules
279}
280
281/// Load `.gitattributes` rules that apply to `rel_path`, including root and
282/// nested `dir/.gitattributes` along parent directories (Git-consistent order:
283/// root first, then each ancestor directory; later rules win in [`get_file_attrs`]).
284///
285/// Reads from the working tree when present, otherwise from a stage-0 index entry.
286pub fn load_gitattributes_for_checkout(
287    work_tree: &Path,
288    rel_path: &str,
289    index: &crate::index::Index,
290    odb: &crate::odb::Odb,
291) -> Vec<AttrRule> {
292    let mut rules = load_gitattributes(work_tree);
293
294    // Root `.gitattributes` may exist only in the index while the worktree file
295    // is missing (e.g. t0020 in-tree attributes after `rm -rf .gitattributes`).
296    if !work_tree.join(".gitattributes").exists() {
297        if let Some(entry) = index.get(b".gitattributes", 0) {
298            if let Ok(obj) = odb.read(&entry.oid) {
299                if let Ok(content) = String::from_utf8(obj.data) {
300                    parse_gitattributes(&content, &mut rules);
301                }
302            }
303        }
304    }
305
306    let path = Path::new(rel_path);
307    if let Some(parent) = path.parent() {
308        let mut accum = PathBuf::new();
309        for comp in parent.components() {
310            accum.push(comp);
311            let ga_rel = accum.join(".gitattributes");
312            let wt_ga = work_tree.join(&ga_rel);
313            if let Ok(content) = std::fs::read_to_string(&wt_ga) {
314                parse_gitattributes(&content, &mut rules);
315            } else {
316                let key = path_to_index_bytes(&ga_rel);
317                if let Some(entry) = index.get(&key, 0) {
318                    if let Ok(obj) = odb.read(&entry.oid) {
319                        if let Ok(content) = String::from_utf8(obj.data) {
320                            parse_gitattributes(&content, &mut rules);
321                        }
322                    }
323                }
324            }
325        }
326    }
327
328    rules
329}
330
331fn path_to_index_bytes(path: &Path) -> Vec<u8> {
332    use std::os::unix::ffi::OsStrExt;
333    path.as_os_str().as_bytes().to_vec()
334}
335
336fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
337    for line in content.lines() {
338        let line = line.trim();
339        if line.is_empty() || line.starts_with('#') {
340            continue;
341        }
342
343        let mut parts = line.split_whitespace();
344        let raw_pattern = match parts.next() {
345            Some(p) => p,
346            None => continue,
347        };
348
349        let mut pat = raw_pattern.to_owned();
350        let mut must_be_dir = false;
351        if pat.ends_with('/') && pat.len() > 1 {
352            pat.pop();
353            must_be_dir = true;
354        }
355        let basename_only = !pat.contains('/');
356
357        let mut attrs = Vec::new();
358        for part in parts {
359            if part == "binary" {
360                attrs.push(("text".to_owned(), "unset".to_owned()));
361                attrs.push(("diff".to_owned(), "unset".to_owned()));
362            } else if let Some(rest) = part.strip_prefix('-') {
363                attrs.push((rest.to_owned(), "unset".to_owned()));
364            } else if let Some((key, val)) = part.split_once('=') {
365                attrs.push((key.to_owned(), val.to_owned()));
366            } else {
367                attrs.push((part.to_owned(), "set".to_owned()));
368            }
369        }
370
371        if !attrs.is_empty() {
372            rules.push(AttrRule {
373                pattern: pat,
374                must_be_dir,
375                basename_only,
376                attrs,
377            });
378        }
379    }
380}
381
382fn config_bool_truthy(value: &str) -> bool {
383    matches!(
384        value.trim().to_ascii_lowercase().as_str(),
385        "true" | "yes" | "on" | "1"
386    )
387}
388
389/// Get file attributes for a given path from .gitattributes rules and config.
390///
391/// `is_dir` should be true when `rel_path` names a directory (Git passes a trailing `/` for
392/// directory paths in some call sites; we accept either trailing `/` or this flag from tree walks).
393pub fn get_file_attrs(
394    rules: &[AttrRule],
395    rel_path: &str,
396    is_dir: bool,
397    config: &ConfigSet,
398) -> FileAttrs {
399    let mut fa = FileAttrs::default();
400
401    // Walk rules; last match wins for each attribute.
402    for rule in rules {
403        if attr_rule_matches(rule, rel_path, is_dir) {
404            for (name, value) in &rule.attrs {
405                match name.as_str() {
406                    "text" => {
407                        fa.text = match value.as_str() {
408                            "set" => TextAttr::Set,
409                            "unset" => TextAttr::Unset,
410                            "auto" => TextAttr::Auto,
411                            _ => TextAttr::Unspecified,
412                        };
413                    }
414                    "eol" => {
415                        fa.eol = match value.as_str() {
416                            "lf" => EolAttr::Lf,
417                            "crlf" => EolAttr::Crlf,
418                            _ => EolAttr::Unspecified,
419                        };
420                    }
421                    "filter" => {
422                        if value == "unset" {
423                            fa.filter_clean = None;
424                            fa.filter_smudge = None;
425                            fa.filter_process = None;
426                            fa.filter_driver_name = None;
427                            fa.filter_smudge_required = false;
428                            fa.filter_clean_required = false;
429                        } else {
430                            let clean_key = format!("filter.{value}.clean");
431                            let smudge_key = format!("filter.{value}.smudge");
432                            let process_key = format!("filter.{value}.process");
433                            let req_key = format!("filter.{value}.required");
434                            fa.filter_driver_name = Some(value.clone());
435                            fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
436                            if fa.filter_process.is_some() {
437                                fa.filter_clean = None;
438                                fa.filter_smudge = None;
439                            } else {
440                                fa.filter_clean = config.get(&clean_key);
441                                fa.filter_smudge = config.get(&smudge_key);
442                            }
443                            let required =
444                                config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
445                            fa.filter_smudge_required = required;
446                            fa.filter_clean_required = required;
447                        }
448                    }
449                    "diff" => {
450                        if value == "unset" {
451                            fa.diff_attr = DiffAttr::Unset;
452                        } else if !value.is_empty() && value != "set" {
453                            fa.diff_attr = DiffAttr::Driver(value.clone());
454                        }
455                    }
456                    "ident" => {
457                        fa.ident = value == "set";
458                    }
459                    "export-ignore" => {
460                        fa.export_ignore = value != "unset";
461                    }
462                    "export-subst" => {
463                        fa.export_subst = value != "unset";
464                    }
465                    "merge" => {
466                        fa.merge = match value.as_str() {
467                            "unset" => MergeAttr::Unset,
468                            "set" => MergeAttr::Unspecified,
469                            other => MergeAttr::Driver(other.to_string()),
470                        };
471                    }
472                    "conflict-marker-size" => {
473                        if value == "unset" {
474                            fa.conflict_marker_size = None;
475                        } else {
476                            fa.conflict_marker_size = Some(value.clone());
477                        }
478                    }
479                    "working-tree-encoding" => {
480                        if value != "unset" && !value.is_empty() {
481                            fa.working_tree_encoding = Some(value.clone());
482                        }
483                    }
484                    "crlf" => {
485                        fa.crlf_legacy = match value.as_str() {
486                            "unset" => CrlfLegacyAttr::Unset,
487                            "input" => CrlfLegacyAttr::Input,
488                            "set" => CrlfLegacyAttr::Crlf,
489                            _ => CrlfLegacyAttr::Unspecified,
490                        };
491                    }
492                    "whitespace" => {
493                        if value == "unset" {
494                            fa.whitespace = Some("unset".to_owned());
495                        } else if !value.is_empty() {
496                            fa.whitespace = Some(value.clone());
497                        }
498                    }
499                    _ => {}
500                }
501            }
502        }
503    }
504
505    fa
506}
507
508/// Returns whether gitattribute `attr_name` is set (last matching rule wins), for arbitrary
509/// attribute names used by pathspec `:(attr:...)`.
510///
511/// `is_dir` is whether `path` refers to a directory (see [`get_file_attrs`]).
512#[must_use]
513pub fn path_has_gitattribute(
514    rules: &[AttrRule],
515    path: &str,
516    is_dir: bool,
517    attr_name: &str,
518) -> bool {
519    let mut last: Option<&str> = None;
520    for rule in rules {
521        if attr_rule_matches(rule, path, is_dir) {
522            for (name, value) in &rule.attrs {
523                if name == attr_name {
524                    last = Some(value.as_str());
525                }
526            }
527        }
528    }
529    match last {
530        None | Some("unset") => false,
531        Some(_) => true,
532    }
533}
534
535/// Whether `rule` matches `rel_path` given directory vs file context (Git `path_matches`).
536#[must_use]
537pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
538    let path_is_dir = is_dir || rel_path.ends_with('/');
539    if rule.must_be_dir && !path_is_dir {
540        return false;
541    }
542    let path_for_glob = rel_path.trim_end_matches('/');
543    if rule.basename_only {
544        let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
545        glob_matches(rule.pattern.as_str(), basename)
546    } else {
547        glob_matches(rule.pattern.as_str(), path_for_glob)
548    }
549}
550
551fn glob_matches(pattern: &str, text: &str) -> bool {
552    glob_match_bytes(pattern.as_bytes(), text.as_bytes())
553}
554
555fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
556    match (pat.first(), text.first()) {
557        (None, None) => true,
558        (Some(&b'*'), _) => {
559            let pat_rest = pat
560                .iter()
561                .position(|&b| b != b'*')
562                .map_or(&pat[pat.len()..], |i| &pat[i..]);
563            if pat_rest.is_empty() {
564                return true;
565            }
566            for i in 0..=text.len() {
567                if glob_match_bytes(pat_rest, &text[i..]) {
568                    return true;
569                }
570            }
571            false
572        }
573        (Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
574        (Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
575        _ => false,
576    }
577}
578
579/// Returns true if the data looks binary (contains NUL bytes in the first 8000 bytes).
580pub fn is_binary(data: &[u8]) -> bool {
581    let check_len = data.len().min(8000);
582    data[..check_len].contains(&0)
583}
584
585// Git `convert.c` `CONVERT_STAT_BITS_*` / `gather_convert_stats_ascii` (for `ls-files --eol`).
586const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
587const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
588const CONVERT_STAT_BITS_BIN: u32 = 0x4;
589
590#[derive(Default, Clone)]
591struct TextStat {
592    nul: u32,
593    lonecr: u32,
594    lonelf: u32,
595    crlf: u32,
596    printable: u32,
597    nonprintable: u32,
598}
599
600fn gather_text_stat(data: &[u8]) -> TextStat {
601    let mut s = TextStat::default();
602    let mut i = 0usize;
603    while i < data.len() {
604        let c = data[i];
605        if c == b'\r' {
606            if i + 1 < data.len() && data[i + 1] == b'\n' {
607                s.crlf += 1;
608                i += 2;
609            } else {
610                s.lonecr += 1;
611                i += 1;
612            }
613            continue;
614        }
615        if c == b'\n' {
616            s.lonelf += 1;
617            i += 1;
618            continue;
619        }
620        if c == 127 {
621            s.nonprintable += 1;
622        } else if c < 32 {
623            match c {
624                b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
625                0 => {
626                    s.nul += 1;
627                    s.nonprintable += 1;
628                }
629                _ => s.nonprintable += 1,
630            }
631        } else {
632            s.printable += 1;
633        }
634        i += 1;
635    }
636    s
637}
638
639fn convert_is_binary(stats: &TextStat) -> bool {
640    stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
641}
642
643fn git_text_stat(data: &[u8]) -> TextStat {
644    let mut stats = gather_text_stat(data);
645    if !data.is_empty() && data[data.len() - 1] == 0x1a {
646        stats.nonprintable = stats.nonprintable.saturating_sub(1);
647    }
648    stats
649}
650
651/// Git `will_convert_lf_to_crlf` using [`TextStat`] (same rules as [`should_convert_to_crlf`] on bytes).
652fn will_convert_lf_to_crlf_from_stats(
653    stats: &TextStat,
654    conv: &ConversionConfig,
655    attrs: &FileAttrs,
656) -> bool {
657    let has_lone_lf = stats.lonelf > 0;
658    let is_bin = convert_is_binary(stats);
659
660    match attrs.crlf_legacy {
661        CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
662        CrlfLegacyAttr::Crlf => {
663            if attrs.text == TextAttr::Unset {
664                return false;
665            }
666            return has_lone_lf;
667        }
668        CrlfLegacyAttr::Unspecified => {}
669    }
670
671    if attrs.text == TextAttr::Unset {
672        return false;
673    }
674
675    if attrs.eol != EolAttr::Unspecified {
676        if attrs.text == TextAttr::Auto && is_bin {
677            return false;
678        }
679        if attrs.eol != EolAttr::Crlf {
680            return false;
681        }
682        if attrs.text == TextAttr::Auto {
683            return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
684        }
685        return has_lone_lf;
686    }
687
688    if attrs.text == TextAttr::Set {
689        if !output_eol_is_crlf(conv) {
690            return false;
691        }
692        return has_lone_lf;
693    }
694
695    if attrs.text == TextAttr::Auto {
696        if is_bin || !output_eol_is_crlf(conv) {
697            return false;
698        }
699        return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
700    }
701
702    match conv.autocrlf {
703        AutoCrlf::True => {
704            if is_bin {
705                return false;
706            }
707            auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
708        }
709        AutoCrlf::Input | AutoCrlf::False => false,
710    }
711}
712
713fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
714    if stats.lonelf == 0 {
715        return false;
716    }
717    if stats.lonecr > 0 || stats.crlf > 0 {
718        return false;
719    }
720    !convert_is_binary(stats)
721}
722
723fn gather_convert_stats(data: &[u8]) -> u32 {
724    if data.is_empty() {
725        return 0;
726    }
727    let mut stats = gather_text_stat(data);
728    if !data.is_empty() && data[data.len() - 1] == 0x1a {
729        stats.nonprintable = stats.nonprintable.saturating_sub(1);
730    }
731    let mut ret = 0u32;
732    if convert_is_binary(&stats) {
733        ret |= CONVERT_STAT_BITS_BIN;
734    }
735    if stats.crlf > 0 {
736        ret |= CONVERT_STAT_BITS_TXT_CRLF;
737    }
738    if stats.lonelf > 0 {
739        ret |= CONVERT_STAT_BITS_TXT_LF;
740    }
741    ret
742}
743
744/// Git `convert.c` `gather_convert_stats_ascii` — worktree/index blob EOL stats for `ls-files --eol`.
745#[must_use]
746pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
747    let convert_stats = gather_convert_stats(data);
748    if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
749        return "-text";
750    }
751    match convert_stats {
752        CONVERT_STAT_BITS_TXT_LF => "lf",
753        CONVERT_STAT_BITS_TXT_CRLF => "crlf",
754        x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
755        _ => "none",
756    }
757}
758
759/// Git `convert.c` `get_convert_attr_ascii` — ASCII summary of EOL-related attributes for
760/// `git ls-files --eol` (matches `attr_action` after attribute merge, before clean/smudge).
761#[must_use]
762pub fn convert_attr_ascii_for_ls_files(
763    rules: &[AttrRule],
764    rel_path: &str,
765    config: &ConfigSet,
766) -> String {
767    let fa = get_file_attrs(rules, rel_path, false, config);
768    // Mirror `git_path_check_crlf` for `text` then legacy `crlf` (Git checks `text` first).
769    let mut action = match fa.text {
770        TextAttr::Set => 1,   // CRLF_TEXT
771        TextAttr::Unset => 2, // CRLF_BINARY
772        TextAttr::Auto => 5,  // CRLF_AUTO
773        TextAttr::Unspecified => 0,
774    };
775    if action == 0 {
776        action = match fa.crlf_legacy {
777            CrlfLegacyAttr::Crlf => 1,
778            CrlfLegacyAttr::Unset => 2,
779            CrlfLegacyAttr::Input => 3, // CRLF_TEXT_INPUT
780            CrlfLegacyAttr::Unspecified => 0,
781        };
782    }
783    if action == 2 {
784        return "-text".to_string();
785    }
786    // Bare `eol=lf` / `eol=crlf` without `text` still implies text mode (`convert_attrs`).
787    if action == 0 {
788        if fa.eol == EolAttr::Unspecified {
789            return String::new();
790        }
791        action = 1; // CRLF_TEXT
792    }
793
794    // Merge `eol=` like `convert_attrs` (only when not already binary).
795    if fa.eol == EolAttr::Lf {
796        if action == 5 {
797            action = 7; // CRLF_AUTO_INPUT
798        } else {
799            action = 3; // CRLF_TEXT_INPUT
800        }
801    } else if fa.eol == EolAttr::Crlf {
802        if action == 5 {
803            action = 6; // CRLF_AUTO_CRLF
804        } else {
805            action = 4; // CRLF_TEXT_CRLF
806        }
807    }
808
809    // `attr_action` snapshot (Git assigns before splitting bare `text` / applying autocrlf).
810    let attr_action = action;
811
812    match attr_action {
813        1 => "text".to_string(),
814        3 => "text eol=lf".to_string(),
815        4 => "text eol=crlf".to_string(),
816        5 => "text=auto".to_string(),
817        6 => "text=auto eol=crlf".to_string(),
818        7 => "text=auto eol=lf".to_string(),
819        _ => String::new(),
820    }
821}
822
823/// Returns true if data contains any CRLF sequences.
824pub fn has_crlf(data: &[u8]) -> bool {
825    data.windows(2).any(|w| w == b"\r\n")
826}
827
828/// Returns true if data contains any lone LF (not preceded by CR).
829pub fn has_lone_lf(data: &[u8]) -> bool {
830    for i in 0..data.len() {
831        if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
832            return true;
833        }
834    }
835    false
836}
837
838/// Returns true if data contains a bare CR not followed by LF (Git `text_stat.lonecr`).
839fn has_lone_cr(data: &[u8]) -> bool {
840    for i in 0..data.len() {
841        if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
842            return true;
843        }
844    }
845    false
846}
847
848/// Git `convert.c` `will_convert_lf_to_crlf` for `CRLF_AUTO` / `CRLF_AUTO_INPUT` / `CRLF_AUTO_CRLF`:
849/// if the blob already has CRLF pairs or lone CRs, do not convert lone LFs to CRLF on checkout.
850fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
851    if !has_lone_lf(data) {
852        return false;
853    }
854    if has_lone_cr(data) || has_crlf(data) {
855        return false;
856    }
857    if is_binary(data) {
858        return false;
859    }
860    true
861}
862
863/// Returns true if ALL line endings are CRLF (no lone LF).
864pub fn is_all_crlf(data: &[u8]) -> bool {
865    has_crlf(data) && !has_lone_lf(data)
866}
867
868/// Returns true if ALL line endings are LF (no CRLF).
869pub fn is_all_lf(data: &[u8]) -> bool {
870    has_lone_lf(data) && !has_crlf(data)
871}
872
873/// Git `convert.c` `has_crlf_in_index`: index blob already contains CRLF pairs (non-binary).
874#[must_use]
875pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
876    if !data.contains(&b'\r') {
877        return false;
878    }
879    let st = gather_convert_stats(data);
880    st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
881}
882
883/// Whether clean conversion uses Git's `has_crlf_in_index` guard (`convert.c` only for
884/// `CRLF_AUTO`, `CRLF_AUTO_INPUT`, `CRLF_AUTO_CRLF`). Bare `eol=` without `text=auto` becomes
885/// `CRLF_TEXT_*` and must not use this guard.
886#[must_use]
887pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
888    if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
889        return false;
890    }
891    if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
892        return false;
893    }
894    attrs.text == TextAttr::Auto
895        || (attrs.text == TextAttr::Unspecified
896            && matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
897}
898
899/// Optional inputs for [`convert_to_git_with_opts`] (Git `CONV_EOL_RENORMALIZE` / index blob).
900#[derive(Debug, Clone, Copy)]
901pub struct ConvertToGitOpts<'a> {
902    /// Stage-0 blob bytes for this path before the current add (for safer-autocrlf).
903    pub index_blob: Option<&'a [u8]>,
904    /// When true, always apply CRLF→LF when configured (merge/cherry-pick renormalize).
905    pub renormalize: bool,
906    /// When false, skip `core.safecrlf` simulation (used for internal diff/hashing — must not spam stderr).
907    pub check_safecrlf: bool,
908}
909
910impl Default for ConvertToGitOpts<'_> {
911    fn default() -> Self {
912        Self {
913            index_blob: None,
914            renormalize: false,
915            check_safecrlf: true,
916        }
917    }
918}
919
920// ---------------------------------------------------------------------------
921// Input (add / clean) direction
922// ---------------------------------------------------------------------------
923
924/// Convert data for storage in the index/object database (the "clean" direction).
925///
926/// This handles:
927/// 1. Clean filter execution
928/// 2. CRLF → LF conversion based on config + attributes
929/// 3. safecrlf checking
930///
931/// Returns `Ok(data)` on success, or an error if safecrlf rejects it.
932pub fn convert_to_git(
933    data: &[u8],
934    rel_path: &str,
935    conv: &ConversionConfig,
936    file_attrs: &FileAttrs,
937) -> Result<Vec<u8>, String> {
938    convert_to_git_with_opts(
939        data,
940        rel_path,
941        conv,
942        file_attrs,
943        ConvertToGitOpts::default(),
944    )
945}
946
947/// Like [`convert_to_git`] with Git-compatible safer-autocrlf index handling.
948pub fn convert_to_git_with_opts(
949    data: &[u8],
950    rel_path: &str,
951    conv: &ConversionConfig,
952    file_attrs: &FileAttrs,
953    opts: ConvertToGitOpts<'_>,
954) -> Result<Vec<u8>, String> {
955    let mut buf = data.to_vec();
956
957    // 1. Run clean filter if configured (long-running `process` overrides clean command)
958    if let Some(ref proc_cmd) = file_attrs.filter_process {
959        let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
960        buf = apply_process_clean(proc_cmd, rel_path, &buf).map_err(|_e| {
961            if file_attrs.filter_clean_required {
962                format!("fatal: {rel_path}: clean filter '{name}' failed")
963            } else {
964                format!("clean filter failed: {_e}")
965            }
966        })?;
967    } else {
968        match file_attrs.filter_clean.as_ref() {
969            Some(clean_cmd) => {
970                buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
971                    let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
972                    if file_attrs.filter_clean_required {
973                        format!("fatal: {rel_path}: clean filter '{name}' failed")
974                    } else {
975                        format!("clean filter failed: {e}")
976                    }
977                })?;
978            }
979            None => {
980                if file_attrs.filter_clean_required {
981                    let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
982                    return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
983                }
984            }
985        }
986    }
987
988    // 2. Determine if we should do CRLF→LF conversion
989    let would_convert = would_convert_on_input(conv, file_attrs, &buf);
990
991    let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
992    if convert_crlf_into_lf
993        && clean_uses_autocrlf_index_guard(file_attrs, conv)
994        && !opts.renormalize
995        && opts.index_blob.is_some_and(has_crlf_in_index_blob)
996    {
997        convert_crlf_into_lf = false;
998    }
999
1000    // 3. safecrlf check — Git simulates clean then smudge (`check_global_conv_flags_eol`).
1001    if would_convert && opts.check_safecrlf {
1002        check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
1003    }
1004
1005    // 4. Actually convert CRLF → LF if the file has CRLFs
1006    if convert_crlf_into_lf {
1007        buf = crlf_to_lf(&buf);
1008    }
1009
1010    Ok(buf)
1011}
1012
1013/// Decide whether CRLF/LF conversion is configured for this file on input.
1014/// Returns true if the file *would* be subject to conversion (even if no
1015/// actual bytes need changing).
1016fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1017    match attrs.crlf_legacy {
1018        CrlfLegacyAttr::Unset => return false,
1019        CrlfLegacyAttr::Input => {
1020            if is_binary(data) {
1021                return false;
1022            }
1023            return true;
1024        }
1025        CrlfLegacyAttr::Crlf => {
1026            if attrs.text == TextAttr::Unset {
1027                return false;
1028            }
1029            if is_binary(data) {
1030                return false;
1031            }
1032            return true;
1033        }
1034        CrlfLegacyAttr::Unspecified => {}
1035    }
1036
1037    // If text is explicitly unset (-text or binary), never convert
1038    if attrs.text == TextAttr::Unset {
1039        return false;
1040    }
1041
1042    // If eol attr is set, this implies text mode
1043    if attrs.eol != EolAttr::Unspecified {
1044        if attrs.text == TextAttr::Auto && is_binary(data) {
1045            return false;
1046        }
1047        return true;
1048    }
1049
1050    // If text is explicitly set, always convert
1051    if attrs.text == TextAttr::Set {
1052        return true;
1053    }
1054
1055    if attrs.text == TextAttr::Auto {
1056        if is_binary(data) {
1057            return false;
1058        }
1059        return true;
1060    }
1061
1062    // No text attribute: fall back to core.autocrlf
1063    match conv.autocrlf {
1064        AutoCrlf::True | AutoCrlf::Input => {
1065            if is_binary(data) {
1066                return false;
1067            }
1068            true
1069        }
1070        AutoCrlf::False => false,
1071    }
1072}
1073
1074/// Git-compatible stderr when `core.safecrlf` is `warn` (clean direction, CRLF→LF).
1075fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
1076    eprintln!(
1077        "warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
1078    );
1079}
1080
1081/// Git-compatible stderr when `core.safecrlf` is `warn` (clean direction, LF→CRLF).
1082fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
1083    eprintln!(
1084        "warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
1085    );
1086}
1087
1088/// Git `convert.c` `check_global_conv_flags_eol` after simulating clean + smudge.
1089fn check_safecrlf_roundtrip(
1090    conv: &ConversionConfig,
1091    file_attrs: &FileAttrs,
1092    data: &[u8],
1093    rel_path: &str,
1094    convert_crlf_into_lf: bool,
1095) -> Result<(), String> {
1096    if conv.safecrlf == SafeCrlf::False {
1097        return Ok(());
1098    }
1099
1100    let old_stats = git_text_stat(data);
1101
1102    let mut new_stats = old_stats.clone();
1103    if convert_crlf_into_lf && new_stats.crlf > 0 {
1104        new_stats.lonelf += new_stats.crlf;
1105        new_stats.crlf = 0;
1106    }
1107    if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
1108        new_stats.crlf += new_stats.lonelf;
1109        new_stats.lonelf = 0;
1110    }
1111
1112    if old_stats.crlf > 0 && new_stats.crlf == 0 {
1113        let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
1114        if conv.safecrlf == SafeCrlf::True {
1115            return Err(msg);
1116        }
1117        eprint_safecrlf_warn_crlf_to_lf(rel_path);
1118    } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
1119        let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
1120        if conv.safecrlf == SafeCrlf::True {
1121            return Err(msg);
1122        }
1123        eprint_safecrlf_warn_lf_to_crlf(rel_path);
1124    }
1125
1126    Ok(())
1127}
1128
1129/// Replace CRLF with LF.
1130pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
1131    let mut out = Vec::with_capacity(data.len());
1132    let mut i = 0;
1133    while i < data.len() {
1134        if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
1135            out.push(b'\n');
1136            i += 2;
1137        } else {
1138            out.push(data[i]);
1139            i += 1;
1140        }
1141    }
1142    out
1143}
1144
1145/// Replace lone LF with CRLF.
1146pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
1147    let mut out = Vec::with_capacity(data.len() + data.len() / 10);
1148    let mut i = 0;
1149    while i < data.len() {
1150        if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
1151            out.push(b'\r');
1152            out.push(b'\n');
1153        } else {
1154            out.push(data[i]);
1155        }
1156        i += 1;
1157    }
1158    out
1159}
1160
1161// ---------------------------------------------------------------------------
1162// Output (checkout / smudge) direction
1163// ---------------------------------------------------------------------------
1164
1165/// Convert data from the object database for writing to the working tree
1166/// (the "smudge" direction).
1167///
1168/// This handles:
1169/// 1. LF → CRLF conversion based on config + attributes
1170/// 2. Smudge filter execution
1171/// 3. Ident keyword expansion
1172pub fn convert_to_worktree(
1173    data: &[u8],
1174    rel_path: &str,
1175    conv: &ConversionConfig,
1176    file_attrs: &FileAttrs,
1177    oid_hex: Option<&str>,
1178    smudge_meta: Option<&FilterSmudgeMeta>,
1179) -> Result<Vec<u8>, String> {
1180    let mut buf = data.to_vec();
1181
1182    // 1. Ident expansion
1183    if file_attrs.ident {
1184        if let Some(oid) = oid_hex {
1185            buf = expand_ident(&buf, oid);
1186        }
1187    }
1188
1189    // 2. Smudge filter (before EOL conversion) — process driver overrides shell smudge
1190    let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
1191    if let Some(ref proc_cmd) = file_attrs.filter_process {
1192        buf = apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta).map_err(|_e| {
1193            if file_attrs.filter_smudge_required {
1194                format!("fatal: {rel_path}: smudge filter {driver} failed")
1195            } else {
1196                _e
1197            }
1198        })?;
1199    } else {
1200        match file_attrs.filter_smudge.as_ref() {
1201            Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
1202                Ok(filtered) => buf = filtered,
1203                Err(_e) => {
1204                    if file_attrs.filter_smudge_required {
1205                        return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1206                    }
1207                }
1208            },
1209            None => {
1210                if file_attrs.filter_smudge_required {
1211                    return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1212                }
1213            }
1214        }
1215    }
1216
1217    // 3. LF→CRLF for working tree
1218    let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
1219    if should_convert {
1220        buf = lf_to_crlf(&buf);
1221    }
1222
1223    Ok(buf)
1224}
1225
1226/// Decide whether to convert LF→CRLF on output.
1227fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1228    match attrs.crlf_legacy {
1229        CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
1230        CrlfLegacyAttr::Crlf => {
1231            if attrs.text == TextAttr::Unset {
1232                return false;
1233            }
1234            // Legacy `crlf` (set) forces CRLF on checkout (even for paths Git
1235            // would otherwise treat as binary; see t0020 "t* crlf" + `three`).
1236            return true;
1237        }
1238        CrlfLegacyAttr::Unspecified => {}
1239    }
1240
1241    // If text is explicitly unset, never convert
1242    if attrs.text == TextAttr::Unset {
1243        return false;
1244    }
1245
1246    // If there's an explicit eol attribute
1247    if attrs.eol != EolAttr::Unspecified {
1248        if attrs.text == TextAttr::Auto && is_binary(data) {
1249            return false;
1250        }
1251        if attrs.eol != EolAttr::Crlf {
1252            return false;
1253        }
1254        // `text=auto` + `eol=crlf` → Git `CRLF_AUTO_CRLF` (safe mixed handling).
1255        if attrs.text == TextAttr::Auto {
1256            return auto_crlf_should_smudge_lf_to_crlf(data);
1257        }
1258        // Explicit `eol=crlf` with `text` set, etc. → `CRLF_TEXT_CRLF` (always normalize).
1259        return true;
1260    }
1261
1262    // If text is explicitly set, use eol config
1263    if attrs.text == TextAttr::Set {
1264        return output_eol_is_crlf(conv);
1265    }
1266
1267    if attrs.text == TextAttr::Auto {
1268        if is_binary(data) {
1269            return false;
1270        }
1271        if !output_eol_is_crlf(conv) {
1272            return false;
1273        }
1274        return auto_crlf_should_smudge_lf_to_crlf(data);
1275    }
1276
1277    // No text attribute: fall back to core.autocrlf
1278    match conv.autocrlf {
1279        AutoCrlf::True => {
1280            if is_binary(data) {
1281                return false;
1282            }
1283            auto_crlf_should_smudge_lf_to_crlf(data)
1284        }
1285        AutoCrlf::Input | AutoCrlf::False => false,
1286    }
1287}
1288
1289/// Whether the output EOL should be CRLF based on config.
1290fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
1291    // Git `text_eol_is_crlf`: autocrlf=input forces LF output before `core.eol` is consulted.
1292    if conv.autocrlf == AutoCrlf::Input {
1293        return false;
1294    }
1295    if conv.autocrlf == AutoCrlf::True {
1296        return true;
1297    }
1298    match conv.eol {
1299        CoreEol::Crlf => true,
1300        CoreEol::Lf => false,
1301        CoreEol::Native => {
1302            // On Unix, native is LF
1303            cfg!(windows)
1304        }
1305    }
1306}
1307
1308/// Expand `$Id$` → `$Id: <oid>$` in data.
1309///
1310/// Matches Git's `ident_to_worktree` in `convert.c`: same-line `$` terminator, and foreign
1311/// idents (internal spaces before the closing `$`) are left unchanged.
1312fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
1313    if !count_ident_regions(data) {
1314        return data.to_vec();
1315    }
1316    let replacement = format!("$Id: {oid} $");
1317    let mut out = Vec::with_capacity(data.len() + 60);
1318    let mut i = 0;
1319    while i < data.len() {
1320        if data[i] != b'$' {
1321            out.push(data[i]);
1322            i += 1;
1323            continue;
1324        }
1325        if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1326            out.push(data[i]);
1327            i += 1;
1328            continue;
1329        }
1330        let after_id = i + 3;
1331        let ch = data.get(after_id).copied();
1332        match ch {
1333            Some(b'$') => {
1334                out.extend_from_slice(replacement.as_bytes());
1335                i = after_id + 1;
1336            }
1337            Some(b':') => {
1338                let rest = &data[after_id + 1..];
1339                let line_end = rest
1340                    .iter()
1341                    .position(|&b| b == b'\n' || b == b'\r')
1342                    .unwrap_or(rest.len());
1343                let line = &rest[..line_end];
1344                let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
1345                    out.push(data[i]);
1346                    i += 1;
1347                    continue;
1348                };
1349                if line[..dollar_rel].contains(&b'\n') {
1350                    out.push(data[i]);
1351                    i += 1;
1352                    continue;
1353                }
1354                // Foreign ident (Git `ident_to_worktree`): first space in the payload after the
1355                // byte following `:` must not be the last character before `$`.
1356                let payload = &line[..dollar_rel];
1357                let foreign = payload.len() > 1
1358                    && payload[1..]
1359                        .iter()
1360                        .position(|&b| b == b' ')
1361                        .is_some_and(|rel| {
1362                            let pos = 1 + rel;
1363                            pos < payload.len().saturating_sub(1)
1364                        });
1365                if foreign {
1366                    out.push(data[i]);
1367                    i += 1;
1368                    continue;
1369                }
1370                out.extend_from_slice(replacement.as_bytes());
1371                i = after_id + 1 + dollar_rel + 1;
1372            }
1373            _ => {
1374                out.push(data[i]);
1375                i += 1;
1376            }
1377        }
1378    }
1379    out
1380}
1381
1382/// Whether the buffer contains any `$Id$` / `$Id: ... $` regions Git would rewrite (`count_ident`).
1383fn count_ident_regions(data: &[u8]) -> bool {
1384    let mut i = 0usize;
1385    while i < data.len() {
1386        if data[i] != b'$' {
1387            i += 1;
1388            continue;
1389        }
1390        if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1391            i += 1;
1392            continue;
1393        }
1394        let after = i + 3;
1395        match data.get(after).copied() {
1396            Some(b'$') => return true,
1397            Some(b':') => {
1398                let mut j = after + 1;
1399                let mut found = false;
1400                while j < data.len() {
1401                    match data[j] {
1402                        b'$' => {
1403                            found = true;
1404                            break;
1405                        }
1406                        b'\n' | b'\r' => break,
1407                        _ => j += 1,
1408                    }
1409                }
1410                if found {
1411                    return true;
1412                }
1413                i += 1;
1414            }
1415            _ => i += 1,
1416        }
1417    }
1418    false
1419}
1420
1421/// Collapse `$Id: ... $` back to `$Id$`.
1422pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
1423    let mut out = Vec::with_capacity(data.len());
1424    let mut i = 0;
1425    while i < data.len() {
1426        if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
1427            let rest = &data[i + 4..];
1428            let line_end = rest
1429                .iter()
1430                .position(|&b| b == b'\n' || b == b'\r')
1431                .unwrap_or(rest.len());
1432            let line = &rest[..line_end];
1433            if let Some(end) = line.iter().position(|&b| b == b'$') {
1434                out.extend_from_slice(b"$Id$");
1435                i += 4 + end + 1;
1436                continue;
1437            }
1438        }
1439        out.push(data[i]);
1440        i += 1;
1441    }
1442    out
1443}
1444
1445/// Shell-quote `s` with single quotes, matching Git's `sq_quote_buf` (`'` → `'\''`).
1446fn sq_quote_buf(s: &str) -> String {
1447    let mut out = String::with_capacity(s.len() + 2);
1448    out.push('\'');
1449    for ch in s.chars() {
1450        if ch == '\'' {
1451            out.push_str("'\\''");
1452        } else {
1453            out.push(ch);
1454        }
1455    }
1456    out.push('\'');
1457    out
1458}
1459
1460/// Expand Git filter command placeholders: `%%` → `%`, `%f` → quoted repository-relative path.
1461fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
1462    let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
1463    let mut chars = cmd.chars().peekable();
1464    while let Some(c) = chars.next() {
1465        if c == '%' {
1466            match chars.peek() {
1467                Some('%') => {
1468                    chars.next();
1469                    out.push('%');
1470                }
1471                Some('f') => {
1472                    chars.next();
1473                    out.push_str(&sq_quote_buf(rel_path));
1474                }
1475                _ => out.push('%'),
1476            }
1477        } else {
1478            out.push(c);
1479        }
1480    }
1481    out
1482}
1483
1484/// Run a filter command, piping data through stdin→stdout.
1485fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
1486    let expanded = expand_filter_command(cmd, rel_path);
1487    let mut child = Command::new("sh")
1488        .arg("-c")
1489        .arg(&expanded)
1490        .stdin(Stdio::piped())
1491        .stdout(Stdio::piped())
1492        .stderr(Stdio::inherit())
1493        .spawn()?;
1494
1495    use std::io::{ErrorKind, Write};
1496    if let Some(ref mut stdin) = child.stdin {
1497        if let Err(e) = stdin.write_all(data) {
1498            // Match Git: if the filter exits without reading stdin, ignore EPIPE.
1499            if e.kind() != ErrorKind::BrokenPipe {
1500                return Err(e);
1501            }
1502        }
1503    }
1504    drop(child.stdin.take());
1505
1506    let output = child.wait_with_output()?;
1507    if !output.status.success() {
1508        return Err(std::io::Error::other(format!(
1509            "filter command exited with status {}",
1510            output.status
1511        )));
1512    }
1513
1514    Ok(output.stdout)
1515}
1516
1517// Re-export AttrRule type is internal, but we expose the vec through load_gitattributes.
1518// The public API uses the opaque Vec from load_gitattributes + get_file_attrs.
1519
1520/// Opaque type alias for loaded gitattributes rules.
1521pub type GitAttributes = Vec<AttrRule>;
1522
1523#[cfg(test)]
1524mod tests {
1525    use super::*;
1526
1527    #[test]
1528    fn test_crlf_to_lf() {
1529        assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
1530        assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
1531        assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
1532    }
1533
1534    #[test]
1535    fn test_lf_to_crlf() {
1536        assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
1537        assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
1538    }
1539
1540    #[test]
1541    fn test_has_crlf() {
1542        assert!(has_crlf(b"hello\r\nworld"));
1543        assert!(!has_crlf(b"hello\nworld"));
1544    }
1545
1546    #[test]
1547    fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
1548        let mut blob = Vec::new();
1549        for part in [
1550            b"Oh\n".as_slice(),
1551            b"here\n",
1552            b"is\n",
1553            b"CRLF\r\n",
1554            b"in\n",
1555            b"text\n",
1556        ] {
1557            blob.extend_from_slice(part);
1558        }
1559        let conv = ConversionConfig {
1560            autocrlf: AutoCrlf::True,
1561            eol: CoreEol::Lf,
1562            safecrlf: SafeCrlf::False,
1563        };
1564        let attrs = FileAttrs::default();
1565        let out = convert_to_worktree(&blob, "mixed", &conv, &attrs, None, None).unwrap();
1566        assert_eq!(out, blob);
1567    }
1568
1569    #[test]
1570    fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
1571        let blob = b"a\nb\n";
1572        let conv = ConversionConfig {
1573            autocrlf: AutoCrlf::True,
1574            eol: CoreEol::Lf,
1575            safecrlf: SafeCrlf::False,
1576        };
1577        let attrs = FileAttrs::default();
1578        let out = convert_to_worktree(blob, "x", &conv, &attrs, None, None).unwrap();
1579        assert_eq!(out, b"a\r\nb\r\n");
1580    }
1581
1582    #[test]
1583    fn test_is_binary() {
1584        assert!(is_binary(b"hello\0world"));
1585        assert!(!is_binary(b"hello world"));
1586    }
1587
1588    #[test]
1589    fn attr_dir_only_pattern_does_not_match_same_named_file() {
1590        let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
1591        let rule = &rules[0];
1592        assert!(rule.must_be_dir);
1593        assert!(rule.basename_only);
1594        assert!(!attr_rule_matches(
1595            rule,
1596            "not-ignored-dir/ignored-only-if-dir",
1597            false
1598        ));
1599        assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
1600    }
1601
1602    #[test]
1603    fn test_expand_collapse_ident() {
1604        let data = b"$Id$";
1605        let expanded = expand_ident(data, "abc123");
1606        assert_eq!(expanded, b"$Id: abc123 $");
1607        let collapsed = collapse_ident(&expanded);
1608        assert_eq!(collapsed, b"$Id$");
1609    }
1610
1611    #[test]
1612    fn expand_ident_does_not_span_lines_for_partial_keyword() {
1613        let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
1614        let expanded = expand_ident(data, "newoid");
1615        assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
1616    }
1617
1618    #[test]
1619    fn expand_ident_preserves_foreign_id_with_internal_spaces() {
1620        let data = b"$Id: Foreign Commit With Spaces $\n";
1621        let expanded = expand_ident(data, "abc");
1622        assert_eq!(expanded, data);
1623    }
1624
1625    #[test]
1626    fn expand_filter_command_percent_f_quotes_path() {
1627        let s = expand_filter_command("sh ./x.sh %f --extra", "name  with 'sq'");
1628        assert_eq!(s, "sh ./x.sh 'name  with '\\''sq'\\''' --extra");
1629        assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
1630    }
1631}