1use std::path::{Path, PathBuf};
18use std::process::{Command, Stdio};
19
20use crate::config::ConfigSet;
21use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum AutoCrlf {
26 True,
27 Input,
28 False,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum CoreEol {
34 Lf,
35 Crlf,
36 Native,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
41pub enum SafeCrlf {
42 True,
43 Warn,
44 False,
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum TextAttr {
50 Set,
52 Auto,
54 Unset,
56 Unspecified,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum EolAttr {
63 Lf,
64 Crlf,
65 Unspecified,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
70pub enum CrlfLegacyAttr {
71 #[default]
72 Unspecified,
73 Unset,
75 Input,
77 Crlf,
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
83pub enum MergeAttr {
84 Unspecified,
86 Unset,
88 Driver(String),
90}
91
92#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum DiffAttr {
95 Unspecified,
97 Unset,
99 Driver(String),
101}
102
103#[derive(Debug, Clone)]
105pub struct FileAttrs {
106 pub text: TextAttr,
107 pub eol: EolAttr,
108 pub diff_attr: DiffAttr,
110 pub export_ignore: bool,
112 pub export_subst: bool,
114 pub filter_clean: Option<String>,
115 pub filter_smudge: Option<String>,
116 pub filter_process: Option<String>,
118 pub filter_driver_name: Option<String>,
120 pub filter_smudge_required: bool,
122 pub filter_clean_required: bool,
124 pub ident: bool,
125 pub merge: MergeAttr,
126 pub conflict_marker_size: Option<String>,
127 pub working_tree_encoding: Option<String>,
129 pub crlf_legacy: CrlfLegacyAttr,
131 pub whitespace: Option<String>,
134}
135
136impl Default for FileAttrs {
137 fn default() -> Self {
138 FileAttrs {
139 text: TextAttr::Unspecified,
140 eol: EolAttr::Unspecified,
141 diff_attr: DiffAttr::Unspecified,
142 export_ignore: false,
143 export_subst: false,
144 filter_clean: None,
145 filter_smudge: None,
146 filter_process: None,
147 filter_driver_name: None,
148 filter_smudge_required: false,
149 filter_clean_required: false,
150 ident: false,
151 merge: MergeAttr::Unspecified,
152 conflict_marker_size: None,
153 working_tree_encoding: None,
154 crlf_legacy: CrlfLegacyAttr::Unspecified,
155 whitespace: None,
156 }
157 }
158}
159
160#[derive(Debug, Clone)]
162pub struct ConversionConfig {
163 pub autocrlf: AutoCrlf,
164 pub eol: CoreEol,
165 pub safecrlf: SafeCrlf,
166}
167
168impl ConversionConfig {
169 pub fn from_config(config: &ConfigSet) -> Self {
171 let autocrlf = match config.get("core.autocrlf") {
172 Some(v) => match v.to_lowercase().as_str() {
173 "true" | "yes" | "on" | "1" => AutoCrlf::True,
174 "input" => AutoCrlf::Input,
175 _ => AutoCrlf::False,
176 },
177 None => AutoCrlf::False,
178 };
179
180 let eol = match config.get("core.eol") {
181 Some(v) => match v.to_lowercase().as_str() {
182 "crlf" => CoreEol::Crlf,
183 "lf" => CoreEol::Lf,
184 "native" => CoreEol::Native,
185 _ => CoreEol::Native,
186 },
187 None => CoreEol::Native,
188 };
189
190 let safecrlf = match config.get("core.safecrlf") {
191 Some(v) => match v.to_lowercase().as_str() {
192 "true" | "yes" | "on" | "1" => SafeCrlf::True,
193 "warn" => SafeCrlf::Warn,
194 _ => SafeCrlf::False,
195 },
196 None => SafeCrlf::Warn,
198 };
199
200 ConversionConfig {
201 autocrlf,
202 eol,
203 safecrlf,
204 }
205 }
206}
207
208#[derive(Debug, Clone)]
210pub struct AttrRule {
211 pattern: String,
213 must_be_dir: bool,
215 basename_only: bool,
217 attrs: Vec<(String, String)>, }
219
220impl AttrRule {
221 pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
223 self.attrs.iter().filter_map(|(name, value)| {
224 if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
225 Some(value.as_str())
226 } else {
227 None
228 }
229 })
230 }
231}
232
233pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
235 let mut rules = Vec::new();
236
237 let root_attrs = work_tree.join(".gitattributes");
238 if let Ok(content) = std::fs::read_to_string(&root_attrs) {
239 parse_gitattributes(&content, &mut rules);
240 }
241
242 let info_attrs = work_tree.join(".git/info/attributes");
243 if let Ok(content) = std::fs::read_to_string(&info_attrs) {
244 parse_gitattributes(&content, &mut rules);
245 }
246
247 rules
248}
249
250#[must_use]
255pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
256 let mut rules = Vec::new();
257 parse_gitattributes(content, &mut rules);
258 rules
259}
260
261pub fn load_gitattributes_from_index(
264 index: &crate::index::Index,
265 odb: &crate::odb::Odb,
266) -> Vec<AttrRule> {
267 let mut rules = Vec::new();
268
269 if let Some(entry) = index.get(b".gitattributes", 0) {
271 if let Ok(obj) = odb.read(&entry.oid) {
272 if let Ok(content) = String::from_utf8(obj.data) {
273 parse_gitattributes(&content, &mut rules);
274 }
275 }
276 }
277
278 rules
279}
280
281pub fn load_gitattributes_for_checkout(
287 work_tree: &Path,
288 rel_path: &str,
289 index: &crate::index::Index,
290 odb: &crate::odb::Odb,
291) -> Vec<AttrRule> {
292 let mut rules = load_gitattributes(work_tree);
293
294 if !work_tree.join(".gitattributes").exists() {
297 if let Some(entry) = index.get(b".gitattributes", 0) {
298 if let Ok(obj) = odb.read(&entry.oid) {
299 if let Ok(content) = String::from_utf8(obj.data) {
300 parse_gitattributes(&content, &mut rules);
301 }
302 }
303 }
304 }
305
306 let path = Path::new(rel_path);
307 if let Some(parent) = path.parent() {
308 let mut accum = PathBuf::new();
309 for comp in parent.components() {
310 accum.push(comp);
311 let ga_rel = accum.join(".gitattributes");
312 let wt_ga = work_tree.join(&ga_rel);
313 if let Ok(content) = std::fs::read_to_string(&wt_ga) {
314 parse_gitattributes(&content, &mut rules);
315 } else {
316 let key = path_to_index_bytes(&ga_rel);
317 if let Some(entry) = index.get(&key, 0) {
318 if let Ok(obj) = odb.read(&entry.oid) {
319 if let Ok(content) = String::from_utf8(obj.data) {
320 parse_gitattributes(&content, &mut rules);
321 }
322 }
323 }
324 }
325 }
326 }
327
328 rules
329}
330
331fn path_to_index_bytes(path: &Path) -> Vec<u8> {
332 use std::os::unix::ffi::OsStrExt;
333 path.as_os_str().as_bytes().to_vec()
334}
335
336fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
337 for line in content.lines() {
338 let line = line.trim();
339 if line.is_empty() || line.starts_with('#') {
340 continue;
341 }
342
343 let mut parts = line.split_whitespace();
344 let raw_pattern = match parts.next() {
345 Some(p) => p,
346 None => continue,
347 };
348
349 let mut pat = raw_pattern.to_owned();
350 let mut must_be_dir = false;
351 if pat.ends_with('/') && pat.len() > 1 {
352 pat.pop();
353 must_be_dir = true;
354 }
355 let basename_only = !pat.contains('/');
356
357 let mut attrs = Vec::new();
358 for part in parts {
359 if part == "binary" {
360 attrs.push(("text".to_owned(), "unset".to_owned()));
361 attrs.push(("diff".to_owned(), "unset".to_owned()));
362 } else if let Some(rest) = part.strip_prefix('-') {
363 attrs.push((rest.to_owned(), "unset".to_owned()));
364 } else if let Some((key, val)) = part.split_once('=') {
365 attrs.push((key.to_owned(), val.to_owned()));
366 } else {
367 attrs.push((part.to_owned(), "set".to_owned()));
368 }
369 }
370
371 if !attrs.is_empty() {
372 rules.push(AttrRule {
373 pattern: pat,
374 must_be_dir,
375 basename_only,
376 attrs,
377 });
378 }
379 }
380}
381
382fn config_bool_truthy(value: &str) -> bool {
383 matches!(
384 value.trim().to_ascii_lowercase().as_str(),
385 "true" | "yes" | "on" | "1"
386 )
387}
388
389pub fn get_file_attrs(
394 rules: &[AttrRule],
395 rel_path: &str,
396 is_dir: bool,
397 config: &ConfigSet,
398) -> FileAttrs {
399 let mut fa = FileAttrs::default();
400
401 for rule in rules {
403 if attr_rule_matches(rule, rel_path, is_dir) {
404 for (name, value) in &rule.attrs {
405 match name.as_str() {
406 "text" => {
407 fa.text = match value.as_str() {
408 "set" => TextAttr::Set,
409 "unset" => TextAttr::Unset,
410 "auto" => TextAttr::Auto,
411 _ => TextAttr::Unspecified,
412 };
413 }
414 "eol" => {
415 fa.eol = match value.as_str() {
416 "lf" => EolAttr::Lf,
417 "crlf" => EolAttr::Crlf,
418 _ => EolAttr::Unspecified,
419 };
420 }
421 "filter" => {
422 if value == "unset" {
423 fa.filter_clean = None;
424 fa.filter_smudge = None;
425 fa.filter_process = None;
426 fa.filter_driver_name = None;
427 fa.filter_smudge_required = false;
428 fa.filter_clean_required = false;
429 } else {
430 let clean_key = format!("filter.{value}.clean");
431 let smudge_key = format!("filter.{value}.smudge");
432 let process_key = format!("filter.{value}.process");
433 let req_key = format!("filter.{value}.required");
434 fa.filter_driver_name = Some(value.clone());
435 fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
436 if fa.filter_process.is_some() {
437 fa.filter_clean = None;
438 fa.filter_smudge = None;
439 } else {
440 fa.filter_clean = config.get(&clean_key);
441 fa.filter_smudge = config.get(&smudge_key);
442 }
443 let required =
444 config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
445 fa.filter_smudge_required = required;
446 fa.filter_clean_required = required;
447 }
448 }
449 "diff" => {
450 if value == "unset" {
451 fa.diff_attr = DiffAttr::Unset;
452 } else if !value.is_empty() && value != "set" {
453 fa.diff_attr = DiffAttr::Driver(value.clone());
454 }
455 }
456 "ident" => {
457 fa.ident = value == "set";
458 }
459 "export-ignore" => {
460 fa.export_ignore = value != "unset";
461 }
462 "export-subst" => {
463 fa.export_subst = value != "unset";
464 }
465 "merge" => {
466 fa.merge = match value.as_str() {
467 "unset" => MergeAttr::Unset,
468 "set" => MergeAttr::Unspecified,
469 other => MergeAttr::Driver(other.to_string()),
470 };
471 }
472 "conflict-marker-size" => {
473 if value == "unset" {
474 fa.conflict_marker_size = None;
475 } else {
476 fa.conflict_marker_size = Some(value.clone());
477 }
478 }
479 "working-tree-encoding" => {
480 if value != "unset" && !value.is_empty() {
481 fa.working_tree_encoding = Some(value.clone());
482 }
483 }
484 "crlf" => {
485 fa.crlf_legacy = match value.as_str() {
486 "unset" => CrlfLegacyAttr::Unset,
487 "input" => CrlfLegacyAttr::Input,
488 "set" => CrlfLegacyAttr::Crlf,
489 _ => CrlfLegacyAttr::Unspecified,
490 };
491 }
492 "whitespace" => {
493 if value == "unset" {
494 fa.whitespace = Some("unset".to_owned());
495 } else if !value.is_empty() {
496 fa.whitespace = Some(value.clone());
497 }
498 }
499 _ => {}
500 }
501 }
502 }
503 }
504
505 fa
506}
507
508#[must_use]
513pub fn path_has_gitattribute(
514 rules: &[AttrRule],
515 path: &str,
516 is_dir: bool,
517 attr_name: &str,
518) -> bool {
519 let mut last: Option<&str> = None;
520 for rule in rules {
521 if attr_rule_matches(rule, path, is_dir) {
522 for (name, value) in &rule.attrs {
523 if name == attr_name {
524 last = Some(value.as_str());
525 }
526 }
527 }
528 }
529 match last {
530 None | Some("unset") => false,
531 Some(_) => true,
532 }
533}
534
535#[must_use]
537pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
538 let path_is_dir = is_dir || rel_path.ends_with('/');
539 if rule.must_be_dir && !path_is_dir {
540 return false;
541 }
542 let path_for_glob = rel_path.trim_end_matches('/');
543 if rule.basename_only {
544 let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
545 glob_matches(rule.pattern.as_str(), basename)
546 } else {
547 glob_matches(rule.pattern.as_str(), path_for_glob)
548 }
549}
550
551fn glob_matches(pattern: &str, text: &str) -> bool {
552 glob_match_bytes(pattern.as_bytes(), text.as_bytes())
553}
554
555fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
556 match (pat.first(), text.first()) {
557 (None, None) => true,
558 (Some(&b'*'), _) => {
559 let pat_rest = pat
560 .iter()
561 .position(|&b| b != b'*')
562 .map_or(&pat[pat.len()..], |i| &pat[i..]);
563 if pat_rest.is_empty() {
564 return true;
565 }
566 for i in 0..=text.len() {
567 if glob_match_bytes(pat_rest, &text[i..]) {
568 return true;
569 }
570 }
571 false
572 }
573 (Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
574 (Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
575 _ => false,
576 }
577}
578
579pub fn is_binary(data: &[u8]) -> bool {
581 let check_len = data.len().min(8000);
582 data[..check_len].contains(&0)
583}
584
585const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
587const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
588const CONVERT_STAT_BITS_BIN: u32 = 0x4;
589
590#[derive(Default, Clone)]
591struct TextStat {
592 nul: u32,
593 lonecr: u32,
594 lonelf: u32,
595 crlf: u32,
596 printable: u32,
597 nonprintable: u32,
598}
599
600fn gather_text_stat(data: &[u8]) -> TextStat {
601 let mut s = TextStat::default();
602 let mut i = 0usize;
603 while i < data.len() {
604 let c = data[i];
605 if c == b'\r' {
606 if i + 1 < data.len() && data[i + 1] == b'\n' {
607 s.crlf += 1;
608 i += 2;
609 } else {
610 s.lonecr += 1;
611 i += 1;
612 }
613 continue;
614 }
615 if c == b'\n' {
616 s.lonelf += 1;
617 i += 1;
618 continue;
619 }
620 if c == 127 {
621 s.nonprintable += 1;
622 } else if c < 32 {
623 match c {
624 b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
625 0 => {
626 s.nul += 1;
627 s.nonprintable += 1;
628 }
629 _ => s.nonprintable += 1,
630 }
631 } else {
632 s.printable += 1;
633 }
634 i += 1;
635 }
636 s
637}
638
639fn convert_is_binary(stats: &TextStat) -> bool {
640 stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
641}
642
643fn git_text_stat(data: &[u8]) -> TextStat {
644 let mut stats = gather_text_stat(data);
645 if !data.is_empty() && data[data.len() - 1] == 0x1a {
646 stats.nonprintable = stats.nonprintable.saturating_sub(1);
647 }
648 stats
649}
650
651fn will_convert_lf_to_crlf_from_stats(
653 stats: &TextStat,
654 conv: &ConversionConfig,
655 attrs: &FileAttrs,
656) -> bool {
657 let has_lone_lf = stats.lonelf > 0;
658 let is_bin = convert_is_binary(stats);
659
660 match attrs.crlf_legacy {
661 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
662 CrlfLegacyAttr::Crlf => {
663 if attrs.text == TextAttr::Unset {
664 return false;
665 }
666 return has_lone_lf;
667 }
668 CrlfLegacyAttr::Unspecified => {}
669 }
670
671 if attrs.text == TextAttr::Unset {
672 return false;
673 }
674
675 if attrs.eol != EolAttr::Unspecified {
676 if attrs.text == TextAttr::Auto && is_bin {
677 return false;
678 }
679 if attrs.eol != EolAttr::Crlf {
680 return false;
681 }
682 if attrs.text == TextAttr::Auto {
683 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
684 }
685 return has_lone_lf;
686 }
687
688 if attrs.text == TextAttr::Set {
689 if !output_eol_is_crlf(conv) {
690 return false;
691 }
692 return has_lone_lf;
693 }
694
695 if attrs.text == TextAttr::Auto {
696 if is_bin || !output_eol_is_crlf(conv) {
697 return false;
698 }
699 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
700 }
701
702 match conv.autocrlf {
703 AutoCrlf::True => {
704 if is_bin {
705 return false;
706 }
707 auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
708 }
709 AutoCrlf::Input | AutoCrlf::False => false,
710 }
711}
712
713fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
714 if stats.lonelf == 0 {
715 return false;
716 }
717 if stats.lonecr > 0 || stats.crlf > 0 {
718 return false;
719 }
720 !convert_is_binary(stats)
721}
722
723fn gather_convert_stats(data: &[u8]) -> u32 {
724 if data.is_empty() {
725 return 0;
726 }
727 let mut stats = gather_text_stat(data);
728 if !data.is_empty() && data[data.len() - 1] == 0x1a {
729 stats.nonprintable = stats.nonprintable.saturating_sub(1);
730 }
731 let mut ret = 0u32;
732 if convert_is_binary(&stats) {
733 ret |= CONVERT_STAT_BITS_BIN;
734 }
735 if stats.crlf > 0 {
736 ret |= CONVERT_STAT_BITS_TXT_CRLF;
737 }
738 if stats.lonelf > 0 {
739 ret |= CONVERT_STAT_BITS_TXT_LF;
740 }
741 ret
742}
743
744#[must_use]
746pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
747 let convert_stats = gather_convert_stats(data);
748 if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
749 return "-text";
750 }
751 match convert_stats {
752 CONVERT_STAT_BITS_TXT_LF => "lf",
753 CONVERT_STAT_BITS_TXT_CRLF => "crlf",
754 x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
755 _ => "none",
756 }
757}
758
759#[must_use]
762pub fn convert_attr_ascii_for_ls_files(
763 rules: &[AttrRule],
764 rel_path: &str,
765 config: &ConfigSet,
766) -> String {
767 let fa = get_file_attrs(rules, rel_path, false, config);
768 let mut action = match fa.text {
770 TextAttr::Set => 1, TextAttr::Unset => 2, TextAttr::Auto => 5, TextAttr::Unspecified => 0,
774 };
775 if action == 0 {
776 action = match fa.crlf_legacy {
777 CrlfLegacyAttr::Crlf => 1,
778 CrlfLegacyAttr::Unset => 2,
779 CrlfLegacyAttr::Input => 3, CrlfLegacyAttr::Unspecified => 0,
781 };
782 }
783 if action == 2 {
784 return "-text".to_string();
785 }
786 if action == 0 {
788 if fa.eol == EolAttr::Unspecified {
789 return String::new();
790 }
791 action = 1; }
793
794 if fa.eol == EolAttr::Lf {
796 if action == 5 {
797 action = 7; } else {
799 action = 3; }
801 } else if fa.eol == EolAttr::Crlf {
802 if action == 5 {
803 action = 6; } else {
805 action = 4; }
807 }
808
809 let attr_action = action;
811
812 match attr_action {
813 1 => "text".to_string(),
814 3 => "text eol=lf".to_string(),
815 4 => "text eol=crlf".to_string(),
816 5 => "text=auto".to_string(),
817 6 => "text=auto eol=crlf".to_string(),
818 7 => "text=auto eol=lf".to_string(),
819 _ => String::new(),
820 }
821}
822
823pub fn has_crlf(data: &[u8]) -> bool {
825 data.windows(2).any(|w| w == b"\r\n")
826}
827
828pub fn has_lone_lf(data: &[u8]) -> bool {
830 for i in 0..data.len() {
831 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
832 return true;
833 }
834 }
835 false
836}
837
838fn has_lone_cr(data: &[u8]) -> bool {
840 for i in 0..data.len() {
841 if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
842 return true;
843 }
844 }
845 false
846}
847
848fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
851 if !has_lone_lf(data) {
852 return false;
853 }
854 if has_lone_cr(data) || has_crlf(data) {
855 return false;
856 }
857 if is_binary(data) {
858 return false;
859 }
860 true
861}
862
863pub fn is_all_crlf(data: &[u8]) -> bool {
865 has_crlf(data) && !has_lone_lf(data)
866}
867
868pub fn is_all_lf(data: &[u8]) -> bool {
870 has_lone_lf(data) && !has_crlf(data)
871}
872
873#[must_use]
875pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
876 if !data.contains(&b'\r') {
877 return false;
878 }
879 let st = gather_convert_stats(data);
880 st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
881}
882
883#[must_use]
887pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
888 if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
889 return false;
890 }
891 if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
892 return false;
893 }
894 attrs.text == TextAttr::Auto
895 || (attrs.text == TextAttr::Unspecified
896 && matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
897}
898
899#[derive(Debug, Clone, Copy)]
901pub struct ConvertToGitOpts<'a> {
902 pub index_blob: Option<&'a [u8]>,
904 pub renormalize: bool,
906 pub check_safecrlf: bool,
908}
909
910impl Default for ConvertToGitOpts<'_> {
911 fn default() -> Self {
912 Self {
913 index_blob: None,
914 renormalize: false,
915 check_safecrlf: true,
916 }
917 }
918}
919
920pub fn convert_to_git(
933 data: &[u8],
934 rel_path: &str,
935 conv: &ConversionConfig,
936 file_attrs: &FileAttrs,
937) -> Result<Vec<u8>, String> {
938 convert_to_git_with_opts(
939 data,
940 rel_path,
941 conv,
942 file_attrs,
943 ConvertToGitOpts::default(),
944 )
945}
946
947pub fn convert_to_git_with_opts(
949 data: &[u8],
950 rel_path: &str,
951 conv: &ConversionConfig,
952 file_attrs: &FileAttrs,
953 opts: ConvertToGitOpts<'_>,
954) -> Result<Vec<u8>, String> {
955 let mut buf = data.to_vec();
956
957 if let Some(ref proc_cmd) = file_attrs.filter_process {
959 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
960 buf = apply_process_clean(proc_cmd, rel_path, &buf).map_err(|_e| {
961 if file_attrs.filter_clean_required {
962 format!("fatal: {rel_path}: clean filter '{name}' failed")
963 } else {
964 format!("clean filter failed: {_e}")
965 }
966 })?;
967 } else {
968 match file_attrs.filter_clean.as_ref() {
969 Some(clean_cmd) => {
970 buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
971 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
972 if file_attrs.filter_clean_required {
973 format!("fatal: {rel_path}: clean filter '{name}' failed")
974 } else {
975 format!("clean filter failed: {e}")
976 }
977 })?;
978 }
979 None => {
980 if file_attrs.filter_clean_required {
981 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
982 return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
983 }
984 }
985 }
986 }
987
988 let would_convert = would_convert_on_input(conv, file_attrs, &buf);
990
991 let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
992 if convert_crlf_into_lf
993 && clean_uses_autocrlf_index_guard(file_attrs, conv)
994 && !opts.renormalize
995 && opts.index_blob.is_some_and(has_crlf_in_index_blob)
996 {
997 convert_crlf_into_lf = false;
998 }
999
1000 if would_convert && opts.check_safecrlf {
1002 check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
1003 }
1004
1005 if convert_crlf_into_lf {
1007 buf = crlf_to_lf(&buf);
1008 }
1009
1010 Ok(buf)
1011}
1012
1013fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1017 match attrs.crlf_legacy {
1018 CrlfLegacyAttr::Unset => return false,
1019 CrlfLegacyAttr::Input => {
1020 if is_binary(data) {
1021 return false;
1022 }
1023 return true;
1024 }
1025 CrlfLegacyAttr::Crlf => {
1026 if attrs.text == TextAttr::Unset {
1027 return false;
1028 }
1029 if is_binary(data) {
1030 return false;
1031 }
1032 return true;
1033 }
1034 CrlfLegacyAttr::Unspecified => {}
1035 }
1036
1037 if attrs.text == TextAttr::Unset {
1039 return false;
1040 }
1041
1042 if attrs.eol != EolAttr::Unspecified {
1044 if attrs.text == TextAttr::Auto && is_binary(data) {
1045 return false;
1046 }
1047 return true;
1048 }
1049
1050 if attrs.text == TextAttr::Set {
1052 return true;
1053 }
1054
1055 if attrs.text == TextAttr::Auto {
1056 if is_binary(data) {
1057 return false;
1058 }
1059 return true;
1060 }
1061
1062 match conv.autocrlf {
1064 AutoCrlf::True | AutoCrlf::Input => {
1065 if is_binary(data) {
1066 return false;
1067 }
1068 true
1069 }
1070 AutoCrlf::False => false,
1071 }
1072}
1073
1074fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
1076 eprintln!(
1077 "warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
1078 );
1079}
1080
1081fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
1083 eprintln!(
1084 "warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
1085 );
1086}
1087
1088fn check_safecrlf_roundtrip(
1090 conv: &ConversionConfig,
1091 file_attrs: &FileAttrs,
1092 data: &[u8],
1093 rel_path: &str,
1094 convert_crlf_into_lf: bool,
1095) -> Result<(), String> {
1096 if conv.safecrlf == SafeCrlf::False {
1097 return Ok(());
1098 }
1099
1100 let old_stats = git_text_stat(data);
1101
1102 let mut new_stats = old_stats.clone();
1103 if convert_crlf_into_lf && new_stats.crlf > 0 {
1104 new_stats.lonelf += new_stats.crlf;
1105 new_stats.crlf = 0;
1106 }
1107 if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
1108 new_stats.crlf += new_stats.lonelf;
1109 new_stats.lonelf = 0;
1110 }
1111
1112 if old_stats.crlf > 0 && new_stats.crlf == 0 {
1113 let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
1114 if conv.safecrlf == SafeCrlf::True {
1115 return Err(msg);
1116 }
1117 eprint_safecrlf_warn_crlf_to_lf(rel_path);
1118 } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
1119 let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
1120 if conv.safecrlf == SafeCrlf::True {
1121 return Err(msg);
1122 }
1123 eprint_safecrlf_warn_lf_to_crlf(rel_path);
1124 }
1125
1126 Ok(())
1127}
1128
1129pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
1131 let mut out = Vec::with_capacity(data.len());
1132 let mut i = 0;
1133 while i < data.len() {
1134 if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
1135 out.push(b'\n');
1136 i += 2;
1137 } else {
1138 out.push(data[i]);
1139 i += 1;
1140 }
1141 }
1142 out
1143}
1144
1145pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
1147 let mut out = Vec::with_capacity(data.len() + data.len() / 10);
1148 let mut i = 0;
1149 while i < data.len() {
1150 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
1151 out.push(b'\r');
1152 out.push(b'\n');
1153 } else {
1154 out.push(data[i]);
1155 }
1156 i += 1;
1157 }
1158 out
1159}
1160
1161pub fn convert_to_worktree(
1173 data: &[u8],
1174 rel_path: &str,
1175 conv: &ConversionConfig,
1176 file_attrs: &FileAttrs,
1177 oid_hex: Option<&str>,
1178 smudge_meta: Option<&FilterSmudgeMeta>,
1179) -> Result<Vec<u8>, String> {
1180 let mut buf = data.to_vec();
1181
1182 if file_attrs.ident {
1184 if let Some(oid) = oid_hex {
1185 buf = expand_ident(&buf, oid);
1186 }
1187 }
1188
1189 let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
1191 if let Some(ref proc_cmd) = file_attrs.filter_process {
1192 buf = apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta).map_err(|_e| {
1193 if file_attrs.filter_smudge_required {
1194 format!("fatal: {rel_path}: smudge filter {driver} failed")
1195 } else {
1196 _e
1197 }
1198 })?;
1199 } else {
1200 match file_attrs.filter_smudge.as_ref() {
1201 Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
1202 Ok(filtered) => buf = filtered,
1203 Err(_e) => {
1204 if file_attrs.filter_smudge_required {
1205 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1206 }
1207 }
1208 },
1209 None => {
1210 if file_attrs.filter_smudge_required {
1211 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1212 }
1213 }
1214 }
1215 }
1216
1217 let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
1219 if should_convert {
1220 buf = lf_to_crlf(&buf);
1221 }
1222
1223 Ok(buf)
1224}
1225
1226fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1228 match attrs.crlf_legacy {
1229 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
1230 CrlfLegacyAttr::Crlf => {
1231 if attrs.text == TextAttr::Unset {
1232 return false;
1233 }
1234 return true;
1237 }
1238 CrlfLegacyAttr::Unspecified => {}
1239 }
1240
1241 if attrs.text == TextAttr::Unset {
1243 return false;
1244 }
1245
1246 if attrs.eol != EolAttr::Unspecified {
1248 if attrs.text == TextAttr::Auto && is_binary(data) {
1249 return false;
1250 }
1251 if attrs.eol != EolAttr::Crlf {
1252 return false;
1253 }
1254 if attrs.text == TextAttr::Auto {
1256 return auto_crlf_should_smudge_lf_to_crlf(data);
1257 }
1258 return true;
1260 }
1261
1262 if attrs.text == TextAttr::Set {
1264 return output_eol_is_crlf(conv);
1265 }
1266
1267 if attrs.text == TextAttr::Auto {
1268 if is_binary(data) {
1269 return false;
1270 }
1271 if !output_eol_is_crlf(conv) {
1272 return false;
1273 }
1274 return auto_crlf_should_smudge_lf_to_crlf(data);
1275 }
1276
1277 match conv.autocrlf {
1279 AutoCrlf::True => {
1280 if is_binary(data) {
1281 return false;
1282 }
1283 auto_crlf_should_smudge_lf_to_crlf(data)
1284 }
1285 AutoCrlf::Input | AutoCrlf::False => false,
1286 }
1287}
1288
1289fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
1291 if conv.autocrlf == AutoCrlf::Input {
1293 return false;
1294 }
1295 if conv.autocrlf == AutoCrlf::True {
1296 return true;
1297 }
1298 match conv.eol {
1299 CoreEol::Crlf => true,
1300 CoreEol::Lf => false,
1301 CoreEol::Native => {
1302 cfg!(windows)
1304 }
1305 }
1306}
1307
1308fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
1313 if !count_ident_regions(data) {
1314 return data.to_vec();
1315 }
1316 let replacement = format!("$Id: {oid} $");
1317 let mut out = Vec::with_capacity(data.len() + 60);
1318 let mut i = 0;
1319 while i < data.len() {
1320 if data[i] != b'$' {
1321 out.push(data[i]);
1322 i += 1;
1323 continue;
1324 }
1325 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1326 out.push(data[i]);
1327 i += 1;
1328 continue;
1329 }
1330 let after_id = i + 3;
1331 let ch = data.get(after_id).copied();
1332 match ch {
1333 Some(b'$') => {
1334 out.extend_from_slice(replacement.as_bytes());
1335 i = after_id + 1;
1336 }
1337 Some(b':') => {
1338 let rest = &data[after_id + 1..];
1339 let line_end = rest
1340 .iter()
1341 .position(|&b| b == b'\n' || b == b'\r')
1342 .unwrap_or(rest.len());
1343 let line = &rest[..line_end];
1344 let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
1345 out.push(data[i]);
1346 i += 1;
1347 continue;
1348 };
1349 if line[..dollar_rel].contains(&b'\n') {
1350 out.push(data[i]);
1351 i += 1;
1352 continue;
1353 }
1354 let payload = &line[..dollar_rel];
1357 let foreign = payload.len() > 1
1358 && payload[1..]
1359 .iter()
1360 .position(|&b| b == b' ')
1361 .is_some_and(|rel| {
1362 let pos = 1 + rel;
1363 pos < payload.len().saturating_sub(1)
1364 });
1365 if foreign {
1366 out.push(data[i]);
1367 i += 1;
1368 continue;
1369 }
1370 out.extend_from_slice(replacement.as_bytes());
1371 i = after_id + 1 + dollar_rel + 1;
1372 }
1373 _ => {
1374 out.push(data[i]);
1375 i += 1;
1376 }
1377 }
1378 }
1379 out
1380}
1381
1382fn count_ident_regions(data: &[u8]) -> bool {
1384 let mut i = 0usize;
1385 while i < data.len() {
1386 if data[i] != b'$' {
1387 i += 1;
1388 continue;
1389 }
1390 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1391 i += 1;
1392 continue;
1393 }
1394 let after = i + 3;
1395 match data.get(after).copied() {
1396 Some(b'$') => return true,
1397 Some(b':') => {
1398 let mut j = after + 1;
1399 let mut found = false;
1400 while j < data.len() {
1401 match data[j] {
1402 b'$' => {
1403 found = true;
1404 break;
1405 }
1406 b'\n' | b'\r' => break,
1407 _ => j += 1,
1408 }
1409 }
1410 if found {
1411 return true;
1412 }
1413 i += 1;
1414 }
1415 _ => i += 1,
1416 }
1417 }
1418 false
1419}
1420
1421pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
1423 let mut out = Vec::with_capacity(data.len());
1424 let mut i = 0;
1425 while i < data.len() {
1426 if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
1427 let rest = &data[i + 4..];
1428 let line_end = rest
1429 .iter()
1430 .position(|&b| b == b'\n' || b == b'\r')
1431 .unwrap_or(rest.len());
1432 let line = &rest[..line_end];
1433 if let Some(end) = line.iter().position(|&b| b == b'$') {
1434 out.extend_from_slice(b"$Id$");
1435 i += 4 + end + 1;
1436 continue;
1437 }
1438 }
1439 out.push(data[i]);
1440 i += 1;
1441 }
1442 out
1443}
1444
1445fn sq_quote_buf(s: &str) -> String {
1447 let mut out = String::with_capacity(s.len() + 2);
1448 out.push('\'');
1449 for ch in s.chars() {
1450 if ch == '\'' {
1451 out.push_str("'\\''");
1452 } else {
1453 out.push(ch);
1454 }
1455 }
1456 out.push('\'');
1457 out
1458}
1459
1460fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
1462 let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
1463 let mut chars = cmd.chars().peekable();
1464 while let Some(c) = chars.next() {
1465 if c == '%' {
1466 match chars.peek() {
1467 Some('%') => {
1468 chars.next();
1469 out.push('%');
1470 }
1471 Some('f') => {
1472 chars.next();
1473 out.push_str(&sq_quote_buf(rel_path));
1474 }
1475 _ => out.push('%'),
1476 }
1477 } else {
1478 out.push(c);
1479 }
1480 }
1481 out
1482}
1483
1484fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
1486 let expanded = expand_filter_command(cmd, rel_path);
1487 let mut child = Command::new("sh")
1488 .arg("-c")
1489 .arg(&expanded)
1490 .stdin(Stdio::piped())
1491 .stdout(Stdio::piped())
1492 .stderr(Stdio::inherit())
1493 .spawn()?;
1494
1495 use std::io::{ErrorKind, Write};
1496 if let Some(ref mut stdin) = child.stdin {
1497 if let Err(e) = stdin.write_all(data) {
1498 if e.kind() != ErrorKind::BrokenPipe {
1500 return Err(e);
1501 }
1502 }
1503 }
1504 drop(child.stdin.take());
1505
1506 let output = child.wait_with_output()?;
1507 if !output.status.success() {
1508 return Err(std::io::Error::other(format!(
1509 "filter command exited with status {}",
1510 output.status
1511 )));
1512 }
1513
1514 Ok(output.stdout)
1515}
1516
1517pub type GitAttributes = Vec<AttrRule>;
1522
1523#[cfg(test)]
1524mod tests {
1525 use super::*;
1526
1527 #[test]
1528 fn test_crlf_to_lf() {
1529 assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
1530 assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
1531 assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
1532 }
1533
1534 #[test]
1535 fn test_lf_to_crlf() {
1536 assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
1537 assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
1538 }
1539
1540 #[test]
1541 fn test_has_crlf() {
1542 assert!(has_crlf(b"hello\r\nworld"));
1543 assert!(!has_crlf(b"hello\nworld"));
1544 }
1545
1546 #[test]
1547 fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
1548 let mut blob = Vec::new();
1549 for part in [
1550 b"Oh\n".as_slice(),
1551 b"here\n",
1552 b"is\n",
1553 b"CRLF\r\n",
1554 b"in\n",
1555 b"text\n",
1556 ] {
1557 blob.extend_from_slice(part);
1558 }
1559 let conv = ConversionConfig {
1560 autocrlf: AutoCrlf::True,
1561 eol: CoreEol::Lf,
1562 safecrlf: SafeCrlf::False,
1563 };
1564 let attrs = FileAttrs::default();
1565 let out = convert_to_worktree(&blob, "mixed", &conv, &attrs, None, None).unwrap();
1566 assert_eq!(out, blob);
1567 }
1568
1569 #[test]
1570 fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
1571 let blob = b"a\nb\n";
1572 let conv = ConversionConfig {
1573 autocrlf: AutoCrlf::True,
1574 eol: CoreEol::Lf,
1575 safecrlf: SafeCrlf::False,
1576 };
1577 let attrs = FileAttrs::default();
1578 let out = convert_to_worktree(blob, "x", &conv, &attrs, None, None).unwrap();
1579 assert_eq!(out, b"a\r\nb\r\n");
1580 }
1581
1582 #[test]
1583 fn test_is_binary() {
1584 assert!(is_binary(b"hello\0world"));
1585 assert!(!is_binary(b"hello world"));
1586 }
1587
1588 #[test]
1589 fn attr_dir_only_pattern_does_not_match_same_named_file() {
1590 let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
1591 let rule = &rules[0];
1592 assert!(rule.must_be_dir);
1593 assert!(rule.basename_only);
1594 assert!(!attr_rule_matches(
1595 rule,
1596 "not-ignored-dir/ignored-only-if-dir",
1597 false
1598 ));
1599 assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
1600 }
1601
1602 #[test]
1603 fn test_expand_collapse_ident() {
1604 let data = b"$Id$";
1605 let expanded = expand_ident(data, "abc123");
1606 assert_eq!(expanded, b"$Id: abc123 $");
1607 let collapsed = collapse_ident(&expanded);
1608 assert_eq!(collapsed, b"$Id$");
1609 }
1610
1611 #[test]
1612 fn expand_ident_does_not_span_lines_for_partial_keyword() {
1613 let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
1614 let expanded = expand_ident(data, "newoid");
1615 assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
1616 }
1617
1618 #[test]
1619 fn expand_ident_preserves_foreign_id_with_internal_spaces() {
1620 let data = b"$Id: Foreign Commit With Spaces $\n";
1621 let expanded = expand_ident(data, "abc");
1622 assert_eq!(expanded, data);
1623 }
1624
1625 #[test]
1626 fn expand_filter_command_percent_f_quotes_path() {
1627 let s = expand_filter_command("sh ./x.sh %f --extra", "name with 'sq'");
1628 assert_eq!(s, "sh ./x.sh 'name with '\\''sq'\\''' --extra");
1629 assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
1630 }
1631}