1use std::path::{Path, PathBuf};
18use std::process::{Command, Stdio};
19
20use encoding_rs::UTF_8;
21
22use crate::config::ConfigSet;
23use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
24use crate::objects::{parse_tree, ObjectId, ObjectKind};
25use crate::odb::Odb;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum AutoCrlf {
30 True,
31 Input,
32 False,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum CoreEol {
38 Lf,
39 Crlf,
40 Native,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum SafeCrlf {
46 True,
47 Warn,
48 False,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum TextAttr {
54 Set,
56 Auto,
58 Unset,
60 Unspecified,
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66pub enum EolAttr {
67 Lf,
68 Crlf,
69 Unspecified,
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
74pub enum CrlfLegacyAttr {
75 #[default]
76 Unspecified,
77 Unset,
79 Input,
81 Crlf,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
87pub enum MergeAttr {
88 Unspecified,
90 Unset,
92 Driver(String),
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
98pub enum DiffAttr {
99 Unspecified,
101 Unset,
103 Set,
105 Driver(String),
107}
108
109#[derive(Debug, Clone)]
111pub struct FileAttrs {
112 pub text: TextAttr,
113 pub eol: EolAttr,
114 pub diff_attr: DiffAttr,
116 pub export_ignore: bool,
118 pub export_subst: bool,
120 pub filter_clean: Option<String>,
121 pub filter_smudge: Option<String>,
122 pub filter_process: Option<String>,
124 pub filter_driver_name: Option<String>,
126 pub filter_smudge_required: bool,
128 pub filter_clean_required: bool,
130 pub ident: bool,
131 pub merge: MergeAttr,
132 pub conflict_marker_size: Option<String>,
133 pub working_tree_encoding: Option<String>,
135 pub crlf_legacy: CrlfLegacyAttr,
137 pub whitespace: Option<String>,
140}
141
142impl Default for FileAttrs {
143 fn default() -> Self {
144 FileAttrs {
145 text: TextAttr::Unspecified,
146 eol: EolAttr::Unspecified,
147 diff_attr: DiffAttr::Unspecified,
148 export_ignore: false,
149 export_subst: false,
150 filter_clean: None,
151 filter_smudge: None,
152 filter_process: None,
153 filter_driver_name: None,
154 filter_smudge_required: false,
155 filter_clean_required: false,
156 ident: false,
157 merge: MergeAttr::Unspecified,
158 conflict_marker_size: None,
159 working_tree_encoding: None,
160 crlf_legacy: CrlfLegacyAttr::Unspecified,
161 whitespace: None,
162 }
163 }
164}
165
166#[derive(Debug, Clone)]
168pub struct ConversionConfig {
169 pub autocrlf: AutoCrlf,
170 pub eol: CoreEol,
171 pub safecrlf: SafeCrlf,
172 pub check_roundtrip_encoding: Option<String>,
175}
176
177impl ConversionConfig {
178 pub fn from_config(config: &ConfigSet) -> Self {
180 let autocrlf = match config.get("core.autocrlf") {
181 Some(v) => match v.to_lowercase().as_str() {
182 "true" | "yes" | "on" | "1" => AutoCrlf::True,
183 "input" => AutoCrlf::Input,
184 _ => AutoCrlf::False,
185 },
186 None => AutoCrlf::False,
187 };
188
189 let eol = match config.get("core.eol") {
190 Some(v) => match v.to_lowercase().as_str() {
191 "crlf" => CoreEol::Crlf,
192 "lf" => CoreEol::Lf,
193 "native" => CoreEol::Native,
194 _ => CoreEol::Native,
195 },
196 None => CoreEol::Native,
197 };
198
199 let safecrlf = match config.get("core.safecrlf") {
200 Some(v) => match v.to_lowercase().as_str() {
201 "true" | "yes" | "on" | "1" => SafeCrlf::True,
202 "warn" => SafeCrlf::Warn,
203 _ => SafeCrlf::False,
204 },
205 None => SafeCrlf::Warn,
207 };
208
209 let check_roundtrip_encoding = config
210 .get("core.checkRoundtripEncoding")
211 .filter(|s| !s.is_empty());
212
213 ConversionConfig {
214 autocrlf,
215 eol,
216 safecrlf,
217 check_roundtrip_encoding,
218 }
219 }
220}
221
222#[derive(Debug, Clone)]
224pub struct AttrRule {
225 pattern: String,
227 must_be_dir: bool,
229 basename_only: bool,
231 attrs: Vec<(String, String)>, }
233
234impl AttrRule {
235 pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
237 self.attrs.iter().filter_map(|(name, value)| {
238 if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
239 Some(value.as_str())
240 } else {
241 None
242 }
243 })
244 }
245}
246
247pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
249 let mut rules = Vec::new();
250
251 let root_attrs = work_tree.join(".gitattributes");
252 if let Ok(content) = std::fs::read_to_string(&root_attrs) {
253 parse_gitattributes(&content, &mut rules);
254 }
255
256 let info_attrs = work_tree.join(".git/info/attributes");
257 if let Ok(content) = std::fs::read_to_string(&info_attrs) {
258 parse_gitattributes(&content, &mut rules);
259 }
260
261 rules
262}
263
264#[must_use]
269pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
270 let mut rules = Vec::new();
271 parse_gitattributes(content, &mut rules);
272 rules
273}
274
275pub fn load_gitattributes_from_index(
278 index: &crate::index::Index,
279 odb: &crate::odb::Odb,
280) -> Vec<AttrRule> {
281 let mut rules = Vec::new();
282
283 if let Some(entry) = index.get(b".gitattributes", 0) {
285 if let Ok(obj) = odb.read(&entry.oid) {
286 if let Ok(content) = String::from_utf8(obj.data) {
287 parse_gitattributes(&content, &mut rules);
288 }
289 }
290 }
291
292 rules
293}
294
295pub fn load_gitattributes_for_checkout(
301 work_tree: &Path,
302 rel_path: &str,
303 index: &crate::index::Index,
304 odb: &crate::odb::Odb,
305) -> Vec<AttrRule> {
306 let mut rules = load_gitattributes(work_tree);
307
308 if !work_tree.join(".gitattributes").exists() {
311 if let Some(entry) = index.get(b".gitattributes", 0) {
312 if let Ok(obj) = odb.read(&entry.oid) {
313 if let Ok(content) = String::from_utf8(obj.data) {
314 parse_gitattributes(&content, &mut rules);
315 }
316 }
317 }
318 }
319
320 let path = Path::new(rel_path);
321 if let Some(parent) = path.parent() {
322 let mut accum = PathBuf::new();
323 for comp in parent.components() {
324 accum.push(comp);
325 let ga_rel = accum.join(".gitattributes");
326 let wt_ga = work_tree.join(&ga_rel);
327 if let Ok(content) = std::fs::read_to_string(&wt_ga) {
328 parse_gitattributes(&content, &mut rules);
329 } else {
330 let key = path_to_index_bytes(&ga_rel);
331 if let Some(entry) = index.get(&key, 0) {
332 if let Ok(obj) = odb.read(&entry.oid) {
333 if let Ok(content) = String::from_utf8(obj.data) {
334 parse_gitattributes(&content, &mut rules);
335 }
336 }
337 }
338 }
339 }
340 }
341
342 rules
343}
344
345pub fn load_gitattributes_for_tree_path(
353 odb: &Odb,
354 tree_oid: &ObjectId,
355 rel_path: &str,
356) -> Vec<AttrRule> {
357 let mut rules = Vec::new();
358 load_gitattributes_blob_from_tree(odb, tree_oid, ".gitattributes", &mut rules);
359
360 let path = Path::new(rel_path);
361 if let Some(parent) = path.parent() {
362 let mut accum = PathBuf::new();
363 for comp in parent.components() {
364 accum.push(comp);
365 let ga_rel = accum.join(".gitattributes");
366 let ga_rel = ga_rel.to_string_lossy().replace('\\', "/");
367 load_gitattributes_blob_from_tree(odb, tree_oid, &ga_rel, &mut rules);
368 }
369 }
370
371 rules
372}
373
374fn load_gitattributes_blob_from_tree(
375 odb: &Odb,
376 tree_oid: &ObjectId,
377 ga_path: &str,
378 rules: &mut Vec<AttrRule>,
379) {
380 let Some(oid) = lookup_tree_path(odb, tree_oid, ga_path) else {
381 return;
382 };
383 let Ok(obj) = odb.read(&oid) else {
384 return;
385 };
386 if obj.kind != ObjectKind::Blob {
387 return;
388 }
389 if let Ok(content) = String::from_utf8(obj.data) {
390 parse_gitattributes(&content, rules);
391 }
392}
393
394fn lookup_tree_path(odb: &Odb, tree_oid: &ObjectId, rel_path: &str) -> Option<ObjectId> {
395 let mut current = *tree_oid;
396 let mut parts = rel_path.split('/').peekable();
397 while let Some(part) = parts.next() {
398 let obj = odb.read(¤t).ok()?;
399 if obj.kind != ObjectKind::Tree {
400 return None;
401 }
402 let entries = parse_tree(&obj.data).ok()?;
403 let entry = entries
404 .iter()
405 .find(|entry| String::from_utf8_lossy(&entry.name) == part)?;
406 if parts.peek().is_none() {
407 return Some(entry.oid);
408 }
409 if entry.mode != 0o040000 {
410 return None;
411 }
412 current = entry.oid;
413 }
414 None
415}
416
417fn path_to_index_bytes(path: &Path) -> Vec<u8> {
418 #[cfg(unix)]
419 {
420 use std::os::unix::ffi::OsStrExt;
421 path.as_os_str().as_bytes().to_vec()
422 }
423 #[cfg(not(unix))]
424 {
425 path.to_string_lossy().as_bytes().to_vec()
426 }
427}
428
429fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
430 for line in content.lines() {
431 let line = line.trim();
432 if line.is_empty() || line.starts_with('#') {
433 continue;
434 }
435
436 let mut parts = line.split_whitespace();
437 let raw_pattern = match parts.next() {
438 Some(p) => p,
439 None => continue,
440 };
441
442 let mut pat = raw_pattern.to_owned();
443 let mut must_be_dir = false;
444 if pat.ends_with('/') && pat.len() > 1 {
445 pat.pop();
446 must_be_dir = true;
447 }
448 let basename_only = !pat.contains('/');
449
450 let mut attrs = Vec::new();
451 for part in parts {
452 if part == "binary" {
453 attrs.push(("text".to_owned(), "unset".to_owned()));
454 attrs.push(("diff".to_owned(), "unset".to_owned()));
455 } else if let Some(rest) = part.strip_prefix('-') {
456 attrs.push((rest.to_owned(), "unset".to_owned()));
457 } else if let Some((key, val)) = part.split_once('=') {
458 attrs.push((key.to_owned(), val.to_owned()));
459 } else {
460 attrs.push((part.to_owned(), "set".to_owned()));
461 }
462 }
463
464 if !attrs.is_empty() {
465 rules.push(AttrRule {
466 pattern: pat,
467 must_be_dir,
468 basename_only,
469 attrs,
470 });
471 }
472 }
473}
474
475fn config_bool_truthy(value: &str) -> bool {
476 matches!(
477 value.trim().to_ascii_lowercase().as_str(),
478 "true" | "yes" | "on" | "1"
479 )
480}
481
482pub fn get_file_attrs(
487 rules: &[AttrRule],
488 rel_path: &str,
489 is_dir: bool,
490 config: &ConfigSet,
491) -> FileAttrs {
492 let mut fa = FileAttrs::default();
493
494 for rule in rules {
496 if attr_rule_matches(rule, rel_path, is_dir) {
497 for (name, value) in &rule.attrs {
498 match name.as_str() {
499 "text" => {
500 fa.text = match value.as_str() {
501 "set" => TextAttr::Set,
502 "unset" => TextAttr::Unset,
503 "auto" => TextAttr::Auto,
504 _ => TextAttr::Unspecified,
505 };
506 }
507 "eol" => {
508 fa.eol = match value.as_str() {
509 "lf" => EolAttr::Lf,
510 "crlf" => EolAttr::Crlf,
511 _ => EolAttr::Unspecified,
512 };
513 }
514 "filter" => {
515 if value == "unset" {
516 fa.filter_clean = None;
517 fa.filter_smudge = None;
518 fa.filter_process = None;
519 fa.filter_driver_name = None;
520 fa.filter_smudge_required = false;
521 fa.filter_clean_required = false;
522 } else {
523 let clean_key = format!("filter.{value}.clean");
524 let smudge_key = format!("filter.{value}.smudge");
525 let process_key = format!("filter.{value}.process");
526 let req_key = format!("filter.{value}.required");
527 fa.filter_driver_name = Some(value.clone());
528 fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
529 if fa.filter_process.is_some() {
530 fa.filter_clean = None;
531 fa.filter_smudge = None;
532 } else {
533 fa.filter_clean = config.get(&clean_key);
534 fa.filter_smudge = config.get(&smudge_key);
535 }
536 let required =
537 config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
538 fa.filter_smudge_required = required;
539 fa.filter_clean_required = required;
540 }
541 }
542 "diff" => {
543 if value == "unset" {
544 fa.diff_attr = DiffAttr::Unset;
545 } else if value == "set" {
546 fa.diff_attr = DiffAttr::Set;
547 } else if !value.is_empty() {
548 fa.diff_attr = DiffAttr::Driver(value.clone());
549 }
550 }
551 "ident" => {
552 fa.ident = value == "set";
553 }
554 "export-ignore" => {
555 fa.export_ignore = value != "unset";
556 }
557 "export-subst" => {
558 fa.export_subst = value != "unset";
559 }
560 "merge" => {
561 fa.merge = match value.as_str() {
562 "unset" => MergeAttr::Unset,
563 "set" => MergeAttr::Unspecified,
564 other => MergeAttr::Driver(other.to_string()),
565 };
566 }
567 "conflict-marker-size" => {
568 if value == "unset" {
569 fa.conflict_marker_size = None;
570 } else {
571 fa.conflict_marker_size = Some(value.clone());
572 }
573 }
574 "working-tree-encoding" => {
575 if value != "unset" && !value.is_empty() {
576 fa.working_tree_encoding = Some(value.clone());
577 }
578 }
579 "crlf" => {
580 fa.crlf_legacy = match value.as_str() {
581 "unset" => CrlfLegacyAttr::Unset,
582 "input" => CrlfLegacyAttr::Input,
583 "set" => CrlfLegacyAttr::Crlf,
584 _ => CrlfLegacyAttr::Unspecified,
585 };
586 }
587 "whitespace" => {
588 if value == "unset" {
589 fa.whitespace = Some("unset".to_owned());
590 } else if !value.is_empty() {
591 fa.whitespace = Some(value.clone());
592 }
593 }
594 _ => {}
595 }
596 }
597 }
598 }
599
600 fa
601}
602
603#[must_use]
608pub fn path_has_gitattribute(
609 rules: &[AttrRule],
610 path: &str,
611 is_dir: bool,
612 attr_name: &str,
613) -> bool {
614 matches!(
615 path_gitattribute_value(rules, path, is_dir, attr_name).as_deref(),
616 Some(value) if value != "unset"
617 )
618}
619
620#[must_use]
628pub fn path_gitattribute_value(
629 rules: &[AttrRule],
630 path: &str,
631 is_dir: bool,
632 attr_name: &str,
633) -> Option<String> {
634 let mut last: Option<&str> = None;
635 for rule in rules {
636 if attr_rule_matches(rule, path, is_dir) {
637 for (name, value) in &rule.attrs {
638 if name == attr_name {
639 last = Some(value.as_str());
640 }
641 }
642 }
643 }
644 last.map(str::to_string)
645}
646
647#[must_use]
649pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
650 let path_is_dir = is_dir || rel_path.ends_with('/');
651 if rule.must_be_dir && !path_is_dir {
652 return false;
653 }
654 let path_for_glob = rel_path.trim_end_matches('/');
655 if rule.basename_only {
656 let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
657 glob_matches(rule.pattern.as_str(), basename)
658 } else {
659 glob_matches(rule.pattern.as_str(), path_for_glob)
660 }
661}
662
663fn glob_matches(pattern: &str, text: &str) -> bool {
664 glob_match_bytes(pattern.as_bytes(), text.as_bytes())
665}
666
667fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
668 match (pat.first(), text.first()) {
669 (None, None) => true,
670 (Some(&b'*'), _) => {
671 let pat_rest = pat
672 .iter()
673 .position(|&b| b != b'*')
674 .map_or(&pat[pat.len()..], |i| &pat[i..]);
675 if pat_rest.is_empty() {
676 return true;
677 }
678 for i in 0..=text.len() {
679 if glob_match_bytes(pat_rest, &text[i..]) {
680 return true;
681 }
682 }
683 false
684 }
685 (Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
686 (Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
687 _ => false,
688 }
689}
690
691pub fn is_binary(data: &[u8]) -> bool {
693 let check_len = data.len().min(8000);
694 data[..check_len].contains(&0)
695}
696
697const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
699const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
700const CONVERT_STAT_BITS_BIN: u32 = 0x4;
701
702#[derive(Default, Clone)]
703struct TextStat {
704 nul: u32,
705 lonecr: u32,
706 lonelf: u32,
707 crlf: u32,
708 printable: u32,
709 nonprintable: u32,
710}
711
712fn gather_text_stat(data: &[u8]) -> TextStat {
713 let mut s = TextStat::default();
714 let mut i = 0usize;
715 while i < data.len() {
716 let c = data[i];
717 if c == b'\r' {
718 if i + 1 < data.len() && data[i + 1] == b'\n' {
719 s.crlf += 1;
720 i += 2;
721 } else {
722 s.lonecr += 1;
723 i += 1;
724 }
725 continue;
726 }
727 if c == b'\n' {
728 s.lonelf += 1;
729 i += 1;
730 continue;
731 }
732 if c == 127 {
733 s.nonprintable += 1;
734 } else if c < 32 {
735 match c {
736 b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
737 0 => {
738 s.nul += 1;
739 s.nonprintable += 1;
740 }
741 _ => s.nonprintable += 1,
742 }
743 } else {
744 s.printable += 1;
745 }
746 i += 1;
747 }
748 s
749}
750
751fn convert_is_binary(stats: &TextStat) -> bool {
752 stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
753}
754
755fn git_text_stat(data: &[u8]) -> TextStat {
756 let mut stats = gather_text_stat(data);
757 if !data.is_empty() && data[data.len() - 1] == 0x1a {
758 stats.nonprintable = stats.nonprintable.saturating_sub(1);
759 }
760 stats
761}
762
763fn will_convert_lf_to_crlf_from_stats(
765 stats: &TextStat,
766 conv: &ConversionConfig,
767 attrs: &FileAttrs,
768) -> bool {
769 let has_lone_lf = stats.lonelf > 0;
770 let is_bin = convert_is_binary(stats);
771
772 match attrs.crlf_legacy {
773 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
774 CrlfLegacyAttr::Crlf => {
775 if attrs.text == TextAttr::Unset {
776 return false;
777 }
778 return has_lone_lf;
779 }
780 CrlfLegacyAttr::Unspecified => {}
781 }
782
783 if attrs.text == TextAttr::Unset {
784 return false;
785 }
786
787 if attrs.eol != EolAttr::Unspecified {
788 if attrs.text == TextAttr::Auto && is_bin {
789 return false;
790 }
791 if attrs.eol != EolAttr::Crlf {
792 return false;
793 }
794 if attrs.text == TextAttr::Auto {
795 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
796 }
797 return has_lone_lf;
798 }
799
800 if attrs.text == TextAttr::Set {
801 if !output_eol_is_crlf(conv) {
802 return false;
803 }
804 return has_lone_lf;
805 }
806
807 if attrs.text == TextAttr::Auto {
808 if is_bin || !output_eol_is_crlf(conv) {
809 return false;
810 }
811 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
812 }
813
814 match conv.autocrlf {
815 AutoCrlf::True => {
816 if is_bin {
817 return false;
818 }
819 auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
820 }
821 AutoCrlf::Input | AutoCrlf::False => false,
822 }
823}
824
825fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
826 if stats.lonelf == 0 {
827 return false;
828 }
829 if stats.lonecr > 0 || stats.crlf > 0 {
830 return false;
831 }
832 !convert_is_binary(stats)
833}
834
835fn gather_convert_stats(data: &[u8]) -> u32 {
836 if data.is_empty() {
837 return 0;
838 }
839 let mut stats = gather_text_stat(data);
840 if !data.is_empty() && data[data.len() - 1] == 0x1a {
841 stats.nonprintable = stats.nonprintable.saturating_sub(1);
842 }
843 let mut ret = 0u32;
844 if convert_is_binary(&stats) {
845 ret |= CONVERT_STAT_BITS_BIN;
846 }
847 if stats.crlf > 0 {
848 ret |= CONVERT_STAT_BITS_TXT_CRLF;
849 }
850 if stats.lonelf > 0 {
851 ret |= CONVERT_STAT_BITS_TXT_LF;
852 }
853 ret
854}
855
856#[must_use]
858pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
859 let convert_stats = gather_convert_stats(data);
860 if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
861 return "-text";
862 }
863 match convert_stats {
864 CONVERT_STAT_BITS_TXT_LF => "lf",
865 CONVERT_STAT_BITS_TXT_CRLF => "crlf",
866 x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
867 _ => "none",
868 }
869}
870
871#[must_use]
874pub fn convert_attr_ascii_for_ls_files(
875 rules: &[AttrRule],
876 rel_path: &str,
877 config: &ConfigSet,
878) -> String {
879 let fa = get_file_attrs(rules, rel_path, false, config);
880 let mut action = match fa.text {
882 TextAttr::Set => 1, TextAttr::Unset => 2, TextAttr::Auto => 5, TextAttr::Unspecified => 0,
886 };
887 if action == 0 {
888 action = match fa.crlf_legacy {
889 CrlfLegacyAttr::Crlf => 1,
890 CrlfLegacyAttr::Unset => 2,
891 CrlfLegacyAttr::Input => 3, CrlfLegacyAttr::Unspecified => 0,
893 };
894 }
895 if action == 2 {
896 return "-text".to_string();
897 }
898 if action == 0 {
900 if fa.eol == EolAttr::Unspecified {
901 return String::new();
902 }
903 action = 1; }
905
906 if fa.eol == EolAttr::Lf {
908 if action == 5 {
909 action = 7; } else {
911 action = 3; }
913 } else if fa.eol == EolAttr::Crlf {
914 if action == 5 {
915 action = 6; } else {
917 action = 4; }
919 }
920
921 let attr_action = action;
923
924 match attr_action {
925 1 => "text".to_string(),
926 3 => "text eol=lf".to_string(),
927 4 => "text eol=crlf".to_string(),
928 5 => "text=auto".to_string(),
929 6 => "text=auto eol=crlf".to_string(),
930 7 => "text=auto eol=lf".to_string(),
931 _ => String::new(),
932 }
933}
934
935pub fn has_crlf(data: &[u8]) -> bool {
937 data.windows(2).any(|w| w == b"\r\n")
938}
939
940pub fn has_lone_lf(data: &[u8]) -> bool {
942 for i in 0..data.len() {
943 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
944 return true;
945 }
946 }
947 false
948}
949
950fn has_lone_cr(data: &[u8]) -> bool {
952 for i in 0..data.len() {
953 if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
954 return true;
955 }
956 }
957 false
958}
959
960fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
963 if !has_lone_lf(data) {
964 return false;
965 }
966 if has_lone_cr(data) || has_crlf(data) {
967 return false;
968 }
969 if is_binary(data) {
970 return false;
971 }
972 true
973}
974
975pub fn is_all_crlf(data: &[u8]) -> bool {
977 has_crlf(data) && !has_lone_lf(data)
978}
979
980pub fn is_all_lf(data: &[u8]) -> bool {
982 has_lone_lf(data) && !has_crlf(data)
983}
984
985#[must_use]
987pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
988 if !data.contains(&b'\r') {
989 return false;
990 }
991 let st = gather_convert_stats(data);
992 st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
993}
994
995#[must_use]
999pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
1000 if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
1001 return false;
1002 }
1003 if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
1004 return false;
1005 }
1006 attrs.text == TextAttr::Auto
1007 || (attrs.text == TextAttr::Unspecified
1008 && matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
1009}
1010
1011#[derive(Debug, Clone, Copy)]
1013pub struct ConvertToGitOpts<'a> {
1014 pub index_blob: Option<&'a [u8]>,
1016 pub renormalize: bool,
1018 pub check_safecrlf: bool,
1020}
1021
1022impl Default for ConvertToGitOpts<'_> {
1023 fn default() -> Self {
1024 Self {
1025 index_blob: None,
1026 renormalize: false,
1027 check_safecrlf: true,
1028 }
1029 }
1030}
1031
1032const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
1038const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
1039const UTF32_BE_BOM: &[u8] = &[0x00, 0x00, 0xFE, 0xFF];
1040const UTF32_LE_BOM: &[u8] = &[0xFF, 0xFE, 0x00, 0x00];
1041
1042fn canonical_utf_label(label: &str) -> Option<String> {
1047 let trimmed = label.trim();
1048 let lower = trimmed.to_ascii_lowercase();
1049 let rest = lower.strip_prefix("utf")?;
1050 let rest = rest.strip_prefix('-').unwrap_or(rest);
1051 match rest {
1052 "8" => Some("utf-8".to_string()),
1053 "16" => Some("utf-16".to_string()),
1054 "16be" => Some("utf-16be".to_string()),
1055 "16le" => Some("utf-16le".to_string()),
1056 "16be-bom" => Some("utf-16be-bom".to_string()),
1057 "16le-bom" => Some("utf-16le-bom".to_string()),
1058 "32" => Some("utf-32".to_string()),
1059 "32be" => Some("utf-32be".to_string()),
1060 "32le" => Some("utf-32le".to_string()),
1061 _ => None,
1062 }
1063}
1064
1065fn has_bom_prefix(data: &[u8], bom: &[u8]) -> bool {
1066 data.len() >= bom.len() && &data[..bom.len()] == bom
1067}
1068
1069fn has_prohibited_utf_bom(canon: &str, data: &[u8]) -> bool {
1071 match canon {
1072 "utf-16be" | "utf-16le" => {
1073 has_bom_prefix(data, UTF16_BE_BOM) || has_bom_prefix(data, UTF16_LE_BOM)
1074 }
1075 "utf-32be" | "utf-32le" => {
1076 has_bom_prefix(data, UTF32_BE_BOM) || has_bom_prefix(data, UTF32_LE_BOM)
1077 }
1078 _ => false,
1079 }
1080}
1081
1082fn is_missing_required_utf_bom(canon: &str, data: &[u8]) -> bool {
1084 match canon {
1085 "utf-16" => !(has_bom_prefix(data, UTF16_BE_BOM) || has_bom_prefix(data, UTF16_LE_BOM)),
1086 "utf-32" => !(has_bom_prefix(data, UTF32_BE_BOM) || has_bom_prefix(data, UTF32_LE_BOM)),
1087 _ => false,
1088 }
1089}
1090
1091fn validate_utf_bom(
1099 canon: &str,
1100 label: &str,
1101 rel_path: &str,
1102 data: &[u8],
1103 die_on_error: bool,
1104) -> Result<(), String> {
1105 if has_prohibited_utf_bom(canon, data) {
1106 let stripped = label
1108 .strip_prefix("utf")
1109 .or_else(|| label.strip_prefix("UTF"));
1110 let utf_num = stripped
1111 .map(|s| s.trim_start_matches('-'))
1112 .and_then(|s| s.get(..s.len().saturating_sub(2)))
1113 .unwrap_or("");
1114 eprintln!(
1115 "The file '{rel_path}' contains a byte order mark (BOM). Please use UTF-{utf_num} as working-tree-encoding."
1116 );
1117 let body = format!("BOM is prohibited in '{rel_path}' if encoded as {label}");
1118 if die_on_error {
1119 return Err(format!("fatal: {body}"));
1120 }
1121 eprintln!("error: {body}");
1122 return Err(body);
1123 }
1124 if is_missing_required_utf_bom(canon, data) {
1125 let utf_num = label
1126 .strip_prefix("utf")
1127 .or_else(|| label.strip_prefix("UTF"))
1128 .map(|s| s.trim_start_matches('-'))
1129 .unwrap_or("");
1130 eprintln!(
1131 "The file '{rel_path}' is missing a byte order mark (BOM). Please use UTF-{utf_num}BE or UTF-{utf_num}LE (depending on the byte order) as working-tree-encoding."
1132 );
1133 let body = format!("BOM is required in '{rel_path}' if encoded as {label}");
1134 if die_on_error {
1135 return Err(format!("fatal: {body}"));
1136 }
1137 eprintln!("error: {body}");
1138 return Err(body);
1139 }
1140 Ok(())
1141}
1142
1143fn encoding_needs_roundtrip_check(enc_name: &str, conv: &ConversionConfig) -> bool {
1146 let list = conv
1147 .check_roundtrip_encoding
1148 .as_deref()
1149 .unwrap_or("SHIFT-JIS");
1150 let target = enc_name.to_ascii_lowercase();
1151 list.split([',', ' ', '\t'])
1152 .map(str::trim)
1153 .filter(|tok| !tok.is_empty())
1154 .any(|tok| tok.eq_ignore_ascii_case(&target))
1155}
1156
1157fn trace_roundtrip_encoding(enc_name: &str) {
1159 use std::io::Write;
1160 let Ok(trace_val) = std::env::var("GIT_TRACE") else {
1161 return;
1162 };
1163 if trace_val.is_empty() || trace_val == "0" || trace_val.eq_ignore_ascii_case("false") {
1164 return;
1165 }
1166 let line = format!("Checking roundtrip encoding for {enc_name}...\n");
1167 match trace_val.as_str() {
1168 "1" | "true" | "2" => {
1169 let _ = std::io::stderr().write_all(line.as_bytes());
1170 }
1171 path_dest => {
1172 if let Ok(mut f) = std::fs::OpenOptions::new()
1173 .create(true)
1174 .append(true)
1175 .open(path_dest)
1176 {
1177 let _ = f.write_all(line.as_bytes());
1178 }
1179 }
1180 }
1181}
1182
1183fn reencode_via_iconv(data: &[u8], from: &str, to: &str) -> Option<Vec<u8>> {
1187 use std::io::Write;
1188 let mut child = Command::new("iconv")
1189 .arg("-f")
1190 .arg(from)
1191 .arg("-t")
1192 .arg(to)
1193 .stdin(Stdio::piped())
1194 .stdout(Stdio::piped())
1195 .stderr(Stdio::null())
1196 .spawn()
1197 .ok()?;
1198 if let Some(mut stdin) = child.stdin.take() {
1199 let _ = stdin.write_all(data);
1200 }
1201 let output = child.wait_with_output().ok()?;
1202 if !output.status.success() {
1203 return None;
1204 }
1205 Some(output.stdout)
1206}
1207
1208fn decode_working_tree_bytes_to_utf8(
1213 src: &[u8],
1214 rel_path: &str,
1215 enc_label: &str,
1216 validate: bool,
1217) -> Result<Vec<u8>, String> {
1218 let label = enc_label.trim();
1219 if label.is_empty() {
1220 return Ok(src.to_vec());
1221 }
1222
1223 let canon = canonical_utf_label(label);
1224
1225 if let Some(ref c) = canon {
1229 validate_utf_bom(c, label, rel_path, src, validate)?;
1230 }
1231
1232 if canon.as_deref() == Some("utf-8") {
1234 return Ok(src.to_vec());
1235 }
1236
1237 let (iconv_from, body): (&str, &[u8]) = match canon.as_deref() {
1239 Some("utf-16le-bom") => {
1240 let body = if has_bom_prefix(src, UTF16_LE_BOM) {
1241 &src[2..]
1242 } else {
1243 src
1244 };
1245 ("UTF-16LE", body)
1246 }
1247 Some("utf-16be-bom") => {
1248 let body = if has_bom_prefix(src, UTF16_BE_BOM) {
1249 &src[2..]
1250 } else {
1251 src
1252 };
1253 ("UTF-16BE", body)
1254 }
1255 Some(c) => (utf_canon_to_iconv_name(c), src),
1257 None => {
1258 if let Some(out) = reencode_via_iconv(src, label, "UTF-8") {
1260 return Ok(out);
1261 }
1262 let Some(enc) = crate::commit_encoding::resolve(label) else {
1265 return Err(format!(
1266 "failed to encode '{rel_path}' from {label} to UTF-8"
1267 ));
1268 };
1269 if enc == UTF_8 {
1270 return Ok(src.to_vec());
1271 }
1272 let (cow, _, had_errors) = enc.decode(src);
1273 if had_errors {
1274 return Err(format!(
1275 "failed to encode '{rel_path}' from {label} to UTF-8"
1276 ));
1277 }
1278 return Ok(cow.into_owned().into_bytes());
1279 }
1280 };
1281
1282 if let Some(out) = reencode_via_iconv(body, iconv_from, "UTF-8") {
1283 return Ok(out);
1284 }
1285
1286 decode_utf_bytes_with_encoding_rs(body, rel_path, label, iconv_from)
1288}
1289
1290fn decode_utf_bytes_with_encoding_rs(
1292 body: &[u8],
1293 rel_path: &str,
1294 label: &str,
1295 iconv_from: &str,
1296) -> Result<Vec<u8>, String> {
1297 let fail = || format!("failed to encode '{rel_path}' from {label} to UTF-8");
1298 match iconv_from {
1299 "UTF-16BE" => {
1300 let (cow, _, had_errors) = encoding_rs::UTF_16BE.decode(body);
1301 if had_errors {
1302 return Err(fail());
1303 }
1304 Ok(cow.into_owned().into_bytes())
1305 }
1306 "UTF-16LE" => {
1307 let (cow, _, had_errors) = encoding_rs::UTF_16LE.decode(body);
1308 if had_errors {
1309 return Err(fail());
1310 }
1311 Ok(cow.into_owned().into_bytes())
1312 }
1313 "UTF-16" => {
1314 if has_bom_prefix(body, UTF16_BE_BOM) {
1315 decode_utf_bytes_with_encoding_rs(&body[2..], rel_path, label, "UTF-16BE")
1316 } else if has_bom_prefix(body, UTF16_LE_BOM) {
1317 decode_utf_bytes_with_encoding_rs(&body[2..], rel_path, label, "UTF-16LE")
1318 } else {
1319 Err(fail())
1320 }
1321 }
1322 "UTF-32" => {
1323 if has_bom_prefix(body, UTF32_BE_BOM) {
1324 decode_utf32_body_to_utf8_bytes(&body[4..], rel_path, true)
1325 } else if has_bom_prefix(body, UTF32_LE_BOM) {
1326 decode_utf32_body_to_utf8_bytes(&body[4..], rel_path, false)
1327 } else {
1328 Err(fail())
1329 }
1330 }
1331 "UTF-32BE" => decode_utf32_body_to_utf8_bytes(body, rel_path, true),
1332 "UTF-32LE" => decode_utf32_body_to_utf8_bytes(body, rel_path, false),
1333 _ => Err(fail()),
1334 }
1335}
1336
1337fn decode_utf32_body_to_utf8_bytes(
1338 body: &[u8],
1339 rel_path: &str,
1340 big_endian: bool,
1341) -> Result<Vec<u8>, String> {
1342 let fail = || format!("failed to encode '{rel_path}' from UTF-32 to UTF-8");
1343 if !body.len().is_multiple_of(4) {
1344 return Err(fail());
1345 }
1346 let mut s = String::new();
1347 for chunk in body.chunks_exact(4) {
1348 let cp = if big_endian {
1349 u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
1350 } else {
1351 u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
1352 };
1353 let Some(ch) = char::from_u32(cp) else {
1354 return Err(fail());
1355 };
1356 s.push(ch);
1357 }
1358 Ok(s.into_bytes())
1359}
1360
1361fn utf_canon_to_iconv_name(canon: &str) -> &'static str {
1363 match canon {
1364 "utf-16" => "UTF-16",
1365 "utf-16be" => "UTF-16BE",
1366 "utf-16le" => "UTF-16LE",
1367 "utf-32" => "UTF-32",
1368 "utf-32be" => "UTF-32BE",
1369 "utf-32le" => "UTF-32LE",
1370 _ => "UTF-8",
1371 }
1372}
1373
1374fn encode_utf8_blob_to_working_tree_bytes(
1379 src: &[u8],
1380 rel_path: &str,
1381 enc_label: &str,
1382) -> Result<Vec<u8>, String> {
1383 let label = enc_label.trim();
1384 if label.is_empty() {
1385 return Ok(src.to_vec());
1386 }
1387
1388 let canon = canonical_utf_label(label);
1389 if canon.as_deref() == Some("utf-8") {
1390 return Ok(src.to_vec());
1391 }
1392
1393 let fail = || format!("failed to encode '{rel_path}' from UTF-8 to {label}");
1394
1395 match canon.as_deref() {
1397 Some("utf-16le-bom") => {
1398 let body = reencode_via_iconv(src, "UTF-8", "UTF-16LE")
1399 .or_else(|| encode_utf_with_encoding_rs(src, "UTF-16LE"))
1400 .ok_or_else(fail)?;
1401 let mut out = UTF16_LE_BOM.to_vec();
1402 out.extend(body);
1403 return Ok(out);
1404 }
1405 Some("utf-16be-bom") => {
1406 let body = reencode_via_iconv(src, "UTF-8", "UTF-16BE")
1407 .or_else(|| encode_utf_with_encoding_rs(src, "UTF-16BE"))
1408 .ok_or_else(fail)?;
1409 let mut out = UTF16_BE_BOM.to_vec();
1410 out.extend(body);
1411 return Ok(out);
1412 }
1413 Some(c) => {
1414 let iconv_name = utf_canon_to_iconv_name(c);
1415 if let Some(out) = reencode_via_iconv(src, "UTF-8", iconv_name) {
1416 return Ok(out);
1417 }
1418 return encode_utf_with_encoding_rs(src, c).ok_or_else(fail);
1419 }
1420 None => {}
1421 }
1422
1423 if let Some(out) = reencode_via_iconv(src, "UTF-8", label) {
1425 return Ok(out);
1426 }
1427 let s = std::str::from_utf8(src).map_err(|_| fail())?;
1428 let Some(enc) = crate::commit_encoding::resolve(label) else {
1429 return Err(format!(
1430 "unknown working-tree-encoding '{label}' for '{rel_path}'"
1431 ));
1432 };
1433 if enc == UTF_8 {
1434 return Ok(src.to_vec());
1435 }
1436 let (cow, _, had_errors) = enc.encode(s);
1437 if had_errors {
1438 return Err(fail());
1439 }
1440 Ok(cow.into_owned())
1441}
1442
1443fn encode_utf_with_encoding_rs(src: &[u8], target: &str) -> Option<Vec<u8>> {
1446 let s = std::str::from_utf8(src).ok()?;
1447 let lower = target.to_ascii_lowercase();
1448 let mut out = Vec::new();
1449 match lower.as_str() {
1450 "utf-16" | "utf-16be" => {
1451 for u in s.encode_utf16() {
1452 out.extend_from_slice(&u.to_be_bytes());
1453 }
1454 }
1455 "utf-16le" => {
1456 for u in s.encode_utf16() {
1457 out.extend_from_slice(&u.to_le_bytes());
1458 }
1459 }
1460 "utf-32" | "utf-32be" => {
1461 for ch in s.chars() {
1462 out.extend_from_slice(&(ch as u32).to_be_bytes());
1463 }
1464 }
1465 "utf-32le" => {
1466 for ch in s.chars() {
1467 out.extend_from_slice(&(ch as u32).to_le_bytes());
1468 }
1469 }
1470 _ => return None,
1471 }
1472 Some(out)
1473}
1474
1475pub fn convert_to_git(
1488 data: &[u8],
1489 rel_path: &str,
1490 conv: &ConversionConfig,
1491 file_attrs: &FileAttrs,
1492) -> Result<Vec<u8>, String> {
1493 convert_to_git_with_opts(
1494 data,
1495 rel_path,
1496 conv,
1497 file_attrs,
1498 ConvertToGitOpts::default(),
1499 )
1500}
1501
1502pub fn convert_to_git_with_opts(
1504 data: &[u8],
1505 rel_path: &str,
1506 conv: &ConversionConfig,
1507 file_attrs: &FileAttrs,
1508 opts: ConvertToGitOpts<'_>,
1509) -> Result<Vec<u8>, String> {
1510 let mut buf = data.to_vec();
1511
1512 if let Some(ref proc_cmd) = file_attrs.filter_process {
1514 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1515 match apply_process_clean(proc_cmd, rel_path, &buf) {
1516 Ok(filtered) => buf = filtered,
1517 Err(e) => {
1518 if file_attrs.filter_clean_required {
1519 if e.contains("expected git-filter-server") {
1520 return Err(e);
1521 }
1522 return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
1523 }
1524 if e.starts_with("filter status: abort") {
1525 crate::filter_process::disable_process_filter(proc_cmd);
1526 }
1527 eprintln!("error: external filter '{name}' failed");
1528 }
1529 }
1530 } else {
1531 match file_attrs.filter_clean.as_ref() {
1532 Some(clean_cmd) => {
1533 buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
1534 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1535 if file_attrs.filter_clean_required {
1536 format!("fatal: {rel_path}: clean filter '{name}' failed")
1537 } else {
1538 format!("clean filter failed: {e}")
1539 }
1540 })?;
1541 }
1542 None => {
1543 if file_attrs.filter_clean_required {
1544 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1545 return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
1546 }
1547 }
1548 }
1549 }
1550
1551 if let Some(ref enc) = file_attrs.working_tree_encoding {
1553 if enc == "set" || enc == "true" || enc == "false" {
1556 return Err("fatal: true/false are no valid working-tree-encodings".to_string());
1557 }
1558 let writing_object = opts.check_safecrlf;
1560 buf = decode_working_tree_bytes_to_utf8(&buf, rel_path, enc, writing_object)?;
1561 if writing_object && encoding_needs_roundtrip_check(enc, conv) {
1564 trace_roundtrip_encoding(enc);
1565 }
1566 }
1567
1568 let would_convert = would_convert_on_input(conv, file_attrs, &buf);
1570
1571 let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
1572 if convert_crlf_into_lf
1573 && clean_uses_autocrlf_index_guard(file_attrs, conv)
1574 && !opts.renormalize
1575 && opts.index_blob.is_some_and(has_crlf_in_index_blob)
1576 {
1577 convert_crlf_into_lf = false;
1578 }
1579
1580 if would_convert && opts.check_safecrlf {
1582 check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
1583 }
1584
1585 if convert_crlf_into_lf {
1587 buf = crlf_to_lf(&buf);
1588 }
1589
1590 Ok(buf)
1591}
1592
1593fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1597 match attrs.crlf_legacy {
1598 CrlfLegacyAttr::Unset => return false,
1599 CrlfLegacyAttr::Input => {
1600 if is_binary(data) {
1601 return false;
1602 }
1603 return true;
1604 }
1605 CrlfLegacyAttr::Crlf => {
1606 if attrs.text == TextAttr::Unset {
1607 return false;
1608 }
1609 if is_binary(data) {
1610 return false;
1611 }
1612 return true;
1613 }
1614 CrlfLegacyAttr::Unspecified => {}
1615 }
1616
1617 if attrs.text == TextAttr::Unset {
1619 return false;
1620 }
1621
1622 if attrs.eol != EolAttr::Unspecified {
1624 if attrs.text == TextAttr::Auto && is_binary(data) {
1625 return false;
1626 }
1627 return true;
1628 }
1629
1630 if attrs.text == TextAttr::Set {
1632 return true;
1633 }
1634
1635 if attrs.text == TextAttr::Auto {
1636 if is_binary(data) {
1637 return false;
1638 }
1639 return true;
1640 }
1641
1642 match conv.autocrlf {
1644 AutoCrlf::True | AutoCrlf::Input => {
1645 if is_binary(data) {
1646 return false;
1647 }
1648 true
1649 }
1650 AutoCrlf::False => false,
1651 }
1652}
1653
1654fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
1656 eprintln!(
1657 "warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
1658 );
1659}
1660
1661fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
1663 eprintln!(
1664 "warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
1665 );
1666}
1667
1668fn check_safecrlf_roundtrip(
1670 conv: &ConversionConfig,
1671 file_attrs: &FileAttrs,
1672 data: &[u8],
1673 rel_path: &str,
1674 convert_crlf_into_lf: bool,
1675) -> Result<(), String> {
1676 if conv.safecrlf == SafeCrlf::False {
1677 return Ok(());
1678 }
1679
1680 let old_stats = git_text_stat(data);
1681
1682 let mut new_stats = old_stats.clone();
1683 if convert_crlf_into_lf && new_stats.crlf > 0 {
1684 new_stats.lonelf += new_stats.crlf;
1685 new_stats.crlf = 0;
1686 }
1687 if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
1688 new_stats.crlf += new_stats.lonelf;
1689 new_stats.lonelf = 0;
1690 }
1691
1692 if old_stats.crlf > 0 && new_stats.crlf == 0 {
1693 let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
1694 if conv.safecrlf == SafeCrlf::True {
1695 return Err(msg);
1696 }
1697 eprint_safecrlf_warn_crlf_to_lf(rel_path);
1698 } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
1699 let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
1700 if conv.safecrlf == SafeCrlf::True {
1701 return Err(msg);
1702 }
1703 eprint_safecrlf_warn_lf_to_crlf(rel_path);
1704 }
1705
1706 Ok(())
1707}
1708
1709pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
1711 let mut out = Vec::with_capacity(data.len());
1712 let mut i = 0;
1713 while i < data.len() {
1714 if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
1715 out.push(b'\n');
1716 i += 2;
1717 } else {
1718 out.push(data[i]);
1719 i += 1;
1720 }
1721 }
1722 out
1723}
1724
1725pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
1727 let mut out = Vec::with_capacity(data.len() + data.len() / 10);
1728 let mut i = 0;
1729 while i < data.len() {
1730 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
1731 out.push(b'\r');
1732 out.push(b'\n');
1733 } else {
1734 out.push(data[i]);
1735 }
1736 i += 1;
1737 }
1738 out
1739}
1740
1741pub fn convert_to_worktree(
1757 data: &[u8],
1758 rel_path: &str,
1759 conv: &ConversionConfig,
1760 file_attrs: &FileAttrs,
1761 oid_hex: Option<&str>,
1762 smudge_meta: Option<&FilterSmudgeMeta>,
1763 delayed_checkout: Option<&mut crate::filter_process::DelayedProcessCheckout>,
1764) -> Result<Option<Vec<u8>>, String> {
1765 let mut buf = data.to_vec();
1766
1767 if file_attrs.ident {
1769 if let Some(oid) = oid_hex {
1770 buf = expand_ident(&buf, oid);
1771 }
1772 }
1773
1774 let can_delay_smudge = delayed_checkout.is_some()
1775 && file_attrs.working_tree_encoding.is_none()
1776 && !file_attrs.ident
1777 && file_attrs
1778 .filter_process
1779 .as_deref()
1780 .is_some_and(|c| !c.is_empty())
1781 && !should_convert_to_crlf(conv, file_attrs, &buf)
1782 && file_attrs
1783 .filter_process
1784 .as_deref()
1785 .is_some_and(crate::filter_process::process_filter_supports_delay);
1786
1787 let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
1789 if should_convert {
1790 buf = lf_to_crlf(&buf);
1791 }
1792
1793 if let Some(ref enc) = file_attrs.working_tree_encoding {
1795 buf = encode_utf8_blob_to_working_tree_bytes(&buf, rel_path, enc)?;
1796 }
1797
1798 let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
1800 if let Some(ref proc_cmd) = file_attrs.filter_process {
1801 let smudge_out =
1802 match apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta, can_delay_smudge) {
1803 Ok(out) => out,
1804 Err(e) => {
1805 if file_attrs.filter_smudge_required {
1806 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1807 }
1808 if e.starts_with("filter status: abort") {
1809 crate::filter_process::disable_process_filter(proc_cmd);
1810 }
1811 eprintln!("error: external filter '{driver}' failed");
1812 return Ok(Some(buf));
1813 }
1814 };
1815 let Some(out) = smudge_out else {
1816 let Some(q) = delayed_checkout else {
1817 return Err(format!(
1818 "internal error: delayed smudge without checkout queue for {rel_path}"
1819 ));
1820 };
1821 q.push_delayed(
1822 proc_cmd.clone(),
1823 rel_path.to_string(),
1824 smudge_meta.cloned().unwrap_or_default(),
1825 );
1826 return Ok(None);
1827 };
1828 buf = out;
1829 } else {
1830 match file_attrs.filter_smudge.as_ref() {
1831 Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
1832 Ok(filtered) => buf = filtered,
1833 Err(_e) => {
1834 if file_attrs.filter_smudge_required {
1835 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1836 }
1837 }
1838 },
1839 None => {
1840 if file_attrs.filter_smudge_required {
1841 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1842 }
1843 }
1844 }
1845 }
1846
1847 Ok(Some(buf))
1848}
1849
1850#[must_use]
1852pub fn convert_to_worktree_eager(
1853 data: &[u8],
1854 rel_path: &str,
1855 conv: &ConversionConfig,
1856 file_attrs: &FileAttrs,
1857 oid_hex: Option<&str>,
1858 smudge_meta: Option<&FilterSmudgeMeta>,
1859) -> Result<Vec<u8>, String> {
1860 match convert_to_worktree(data, rel_path, conv, file_attrs, oid_hex, smudge_meta, None)? {
1861 Some(v) => Ok(v),
1862 None => Err(format!(
1863 "internal error: unexpected delayed smudge for {rel_path}"
1864 )),
1865 }
1866}
1867
1868#[must_use]
1870pub fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1871 match attrs.crlf_legacy {
1872 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
1873 CrlfLegacyAttr::Crlf => {
1874 if attrs.text == TextAttr::Unset {
1875 return false;
1876 }
1877 return true;
1880 }
1881 CrlfLegacyAttr::Unspecified => {}
1882 }
1883
1884 if attrs.text == TextAttr::Unset {
1886 return false;
1887 }
1888
1889 if attrs.eol != EolAttr::Unspecified {
1891 if attrs.text == TextAttr::Auto && is_binary(data) {
1892 return false;
1893 }
1894 if attrs.eol != EolAttr::Crlf {
1895 return false;
1896 }
1897 if attrs.text == TextAttr::Auto {
1899 return auto_crlf_should_smudge_lf_to_crlf(data);
1900 }
1901 return true;
1903 }
1904
1905 if attrs.text == TextAttr::Set {
1907 return output_eol_is_crlf(conv);
1908 }
1909
1910 if attrs.text == TextAttr::Auto {
1911 if is_binary(data) {
1912 return false;
1913 }
1914 if !output_eol_is_crlf(conv) {
1915 return false;
1916 }
1917 return auto_crlf_should_smudge_lf_to_crlf(data);
1918 }
1919
1920 match conv.autocrlf {
1922 AutoCrlf::True => {
1923 if is_binary(data) {
1924 return false;
1925 }
1926 auto_crlf_should_smudge_lf_to_crlf(data)
1927 }
1928 AutoCrlf::Input | AutoCrlf::False => false,
1929 }
1930}
1931
1932fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
1934 if conv.autocrlf == AutoCrlf::Input {
1936 return false;
1937 }
1938 if conv.autocrlf == AutoCrlf::True {
1939 return true;
1940 }
1941 match conv.eol {
1942 CoreEol::Crlf => true,
1943 CoreEol::Lf => false,
1944 CoreEol::Native => {
1945 cfg!(windows)
1947 }
1948 }
1949}
1950
1951fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
1956 if !count_ident_regions(data) {
1957 return data.to_vec();
1958 }
1959 let replacement = format!("$Id: {oid} $");
1960 let mut out = Vec::with_capacity(data.len() + 60);
1961 let mut i = 0;
1962 while i < data.len() {
1963 if data[i] != b'$' {
1964 out.push(data[i]);
1965 i += 1;
1966 continue;
1967 }
1968 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1969 out.push(data[i]);
1970 i += 1;
1971 continue;
1972 }
1973 let after_id = i + 3;
1974 let ch = data.get(after_id).copied();
1975 match ch {
1976 Some(b'$') => {
1977 out.extend_from_slice(replacement.as_bytes());
1978 i = after_id + 1;
1979 }
1980 Some(b':') => {
1981 let rest = &data[after_id + 1..];
1982 let line_end = rest
1983 .iter()
1984 .position(|&b| b == b'\n' || b == b'\r')
1985 .unwrap_or(rest.len());
1986 let line = &rest[..line_end];
1987 let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
1988 out.push(data[i]);
1989 i += 1;
1990 continue;
1991 };
1992 if line[..dollar_rel].contains(&b'\n') {
1993 out.push(data[i]);
1994 i += 1;
1995 continue;
1996 }
1997 let payload = &line[..dollar_rel];
2000 let foreign = payload.len() > 1
2001 && payload[1..]
2002 .iter()
2003 .position(|&b| b == b' ')
2004 .is_some_and(|rel| {
2005 let pos = 1 + rel;
2006 pos < payload.len().saturating_sub(1)
2007 });
2008 if foreign {
2009 out.push(data[i]);
2010 i += 1;
2011 continue;
2012 }
2013 out.extend_from_slice(replacement.as_bytes());
2014 i = after_id + 1 + dollar_rel + 1;
2015 }
2016 _ => {
2017 out.push(data[i]);
2018 i += 1;
2019 }
2020 }
2021 }
2022 out
2023}
2024
2025fn count_ident_regions(data: &[u8]) -> bool {
2027 let mut i = 0usize;
2028 while i < data.len() {
2029 if data[i] != b'$' {
2030 i += 1;
2031 continue;
2032 }
2033 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
2034 i += 1;
2035 continue;
2036 }
2037 let after = i + 3;
2038 match data.get(after).copied() {
2039 Some(b'$') => return true,
2040 Some(b':') => {
2041 let mut j = after + 1;
2042 let mut found = false;
2043 while j < data.len() {
2044 match data[j] {
2045 b'$' => {
2046 found = true;
2047 break;
2048 }
2049 b'\n' | b'\r' => break,
2050 _ => j += 1,
2051 }
2052 }
2053 if found {
2054 return true;
2055 }
2056 i += 1;
2057 }
2058 _ => i += 1,
2059 }
2060 }
2061 false
2062}
2063
2064pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
2066 let mut out = Vec::with_capacity(data.len());
2067 let mut i = 0;
2068 while i < data.len() {
2069 if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
2070 let rest = &data[i + 4..];
2071 let line_end = rest
2072 .iter()
2073 .position(|&b| b == b'\n' || b == b'\r')
2074 .unwrap_or(rest.len());
2075 let line = &rest[..line_end];
2076 if let Some(end) = line.iter().position(|&b| b == b'$') {
2077 out.extend_from_slice(b"$Id$");
2078 i += 4 + end + 1;
2079 continue;
2080 }
2081 }
2082 out.push(data[i]);
2083 i += 1;
2084 }
2085 out
2086}
2087
2088fn sq_quote_buf(s: &str) -> String {
2090 let mut out = String::with_capacity(s.len() + 2);
2091 out.push('\'');
2092 for ch in s.chars() {
2093 if ch == '\'' {
2094 out.push_str("'\\''");
2095 } else {
2096 out.push(ch);
2097 }
2098 }
2099 out.push('\'');
2100 out
2101}
2102
2103fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
2105 let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
2106 let mut chars = cmd.chars().peekable();
2107 while let Some(c) = chars.next() {
2108 if c == '%' {
2109 match chars.peek() {
2110 Some('%') => {
2111 chars.next();
2112 out.push('%');
2113 }
2114 Some('f') => {
2115 chars.next();
2116 out.push_str(&sq_quote_buf(rel_path));
2117 }
2118 _ => out.push('%'),
2119 }
2120 } else {
2121 out.push(c);
2122 }
2123 }
2124 out
2125}
2126
2127fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
2129 let expanded = expand_filter_command(cmd, rel_path);
2130 let mut child = Command::new("sh")
2131 .arg("-c")
2132 .arg(&expanded)
2133 .stdin(Stdio::piped())
2134 .stdout(Stdio::piped())
2135 .stderr(Stdio::inherit())
2136 .spawn()?;
2137
2138 use std::io::{ErrorKind, Write};
2139 if let Some(ref mut stdin) = child.stdin {
2140 if let Err(e) = stdin.write_all(data) {
2141 if e.kind() != ErrorKind::BrokenPipe {
2143 return Err(e);
2144 }
2145 }
2146 }
2147 drop(child.stdin.take());
2148
2149 let output = child.wait_with_output()?;
2150 if !output.status.success() {
2151 return Err(std::io::Error::other(format!(
2152 "filter command exited with status {}",
2153 output.status
2154 )));
2155 }
2156
2157 Ok(output.stdout)
2158}
2159
2160pub type GitAttributes = Vec<AttrRule>;
2165
2166#[cfg(test)]
2167mod tests {
2168 use super::*;
2169
2170 #[test]
2171 fn test_crlf_to_lf() {
2172 assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
2173 assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
2174 assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
2175 }
2176
2177 #[test]
2178 fn test_lf_to_crlf() {
2179 assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
2180 assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
2181 }
2182
2183 #[test]
2184 fn test_has_crlf() {
2185 assert!(has_crlf(b"hello\r\nworld"));
2186 assert!(!has_crlf(b"hello\nworld"));
2187 }
2188
2189 #[test]
2190 fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
2191 let mut blob = Vec::new();
2192 for part in [
2193 b"Oh\n".as_slice(),
2194 b"here\n",
2195 b"is\n",
2196 b"CRLF\r\n",
2197 b"in\n",
2198 b"text\n",
2199 ] {
2200 blob.extend_from_slice(part);
2201 }
2202 let conv = ConversionConfig {
2203 autocrlf: AutoCrlf::True,
2204 eol: CoreEol::Lf,
2205 safecrlf: SafeCrlf::False,
2206 check_roundtrip_encoding: None,
2207 };
2208 let attrs = FileAttrs::default();
2209 let out = convert_to_worktree_eager(&blob, "mixed", &conv, &attrs, None, None).unwrap();
2210 assert_eq!(out, blob);
2211 }
2212
2213 #[test]
2214 fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
2215 let blob = b"a\nb\n";
2216 let conv = ConversionConfig {
2217 autocrlf: AutoCrlf::True,
2218 eol: CoreEol::Lf,
2219 safecrlf: SafeCrlf::False,
2220 check_roundtrip_encoding: None,
2221 };
2222 let attrs = FileAttrs::default();
2223 let out = convert_to_worktree_eager(blob, "x", &conv, &attrs, None, None).unwrap();
2224 assert_eq!(out, b"a\r\nb\r\n");
2225 }
2226
2227 #[test]
2228 fn test_is_binary() {
2229 assert!(is_binary(b"hello\0world"));
2230 assert!(!is_binary(b"hello world"));
2231 }
2232
2233 #[test]
2234 fn attr_dir_only_pattern_does_not_match_same_named_file() {
2235 let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
2236 let rule = &rules[0];
2237 assert!(rule.must_be_dir);
2238 assert!(rule.basename_only);
2239 assert!(!attr_rule_matches(
2240 rule,
2241 "not-ignored-dir/ignored-only-if-dir",
2242 false
2243 ));
2244 assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
2245 }
2246
2247 #[test]
2248 fn test_expand_collapse_ident() {
2249 let data = b"$Id$";
2250 let expanded = expand_ident(data, "abc123");
2251 assert_eq!(expanded, b"$Id: abc123 $");
2252 let collapsed = collapse_ident(&expanded);
2253 assert_eq!(collapsed, b"$Id$");
2254 }
2255
2256 #[test]
2257 fn expand_ident_does_not_span_lines_for_partial_keyword() {
2258 let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
2259 let expanded = expand_ident(data, "newoid");
2260 assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
2261 }
2262
2263 #[test]
2264 fn expand_ident_preserves_foreign_id_with_internal_spaces() {
2265 let data = b"$Id: Foreign Commit With Spaces $\n";
2266 let expanded = expand_ident(data, "abc");
2267 assert_eq!(expanded, data);
2268 }
2269
2270 #[test]
2271 fn expand_filter_command_percent_f_quotes_path() {
2272 let s = expand_filter_command("sh ./x.sh %f --extra", "name with 'sq'");
2273 assert_eq!(s, "sh ./x.sh 'name with '\\''sq'\\''' --extra");
2274 assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
2275 }
2276}