1use std::path::{Path, PathBuf};
18use std::process::{Command, Stdio};
19
20use encoding_rs::UTF_8;
21
22use crate::config::ConfigSet;
23use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum AutoCrlf {
28 True,
29 Input,
30 False,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum CoreEol {
36 Lf,
37 Crlf,
38 Native,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum SafeCrlf {
44 True,
45 Warn,
46 False,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum TextAttr {
52 Set,
54 Auto,
56 Unset,
58 Unspecified,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum EolAttr {
65 Lf,
66 Crlf,
67 Unspecified,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72pub enum CrlfLegacyAttr {
73 #[default]
74 Unspecified,
75 Unset,
77 Input,
79 Crlf,
81}
82
83#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum MergeAttr {
86 Unspecified,
88 Unset,
90 Driver(String),
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum DiffAttr {
97 Unspecified,
99 Unset,
101 Driver(String),
103}
104
105#[derive(Debug, Clone)]
107pub struct FileAttrs {
108 pub text: TextAttr,
109 pub eol: EolAttr,
110 pub diff_attr: DiffAttr,
112 pub export_ignore: bool,
114 pub export_subst: bool,
116 pub filter_clean: Option<String>,
117 pub filter_smudge: Option<String>,
118 pub filter_process: Option<String>,
120 pub filter_driver_name: Option<String>,
122 pub filter_smudge_required: bool,
124 pub filter_clean_required: bool,
126 pub ident: bool,
127 pub merge: MergeAttr,
128 pub conflict_marker_size: Option<String>,
129 pub working_tree_encoding: Option<String>,
131 pub crlf_legacy: CrlfLegacyAttr,
133 pub whitespace: Option<String>,
136}
137
138impl Default for FileAttrs {
139 fn default() -> Self {
140 FileAttrs {
141 text: TextAttr::Unspecified,
142 eol: EolAttr::Unspecified,
143 diff_attr: DiffAttr::Unspecified,
144 export_ignore: false,
145 export_subst: false,
146 filter_clean: None,
147 filter_smudge: None,
148 filter_process: None,
149 filter_driver_name: None,
150 filter_smudge_required: false,
151 filter_clean_required: false,
152 ident: false,
153 merge: MergeAttr::Unspecified,
154 conflict_marker_size: None,
155 working_tree_encoding: None,
156 crlf_legacy: CrlfLegacyAttr::Unspecified,
157 whitespace: None,
158 }
159 }
160}
161
162#[derive(Debug, Clone)]
164pub struct ConversionConfig {
165 pub autocrlf: AutoCrlf,
166 pub eol: CoreEol,
167 pub safecrlf: SafeCrlf,
168}
169
170impl ConversionConfig {
171 pub fn from_config(config: &ConfigSet) -> Self {
173 let autocrlf = match config.get("core.autocrlf") {
174 Some(v) => match v.to_lowercase().as_str() {
175 "true" | "yes" | "on" | "1" => AutoCrlf::True,
176 "input" => AutoCrlf::Input,
177 _ => AutoCrlf::False,
178 },
179 None => AutoCrlf::False,
180 };
181
182 let eol = match config.get("core.eol") {
183 Some(v) => match v.to_lowercase().as_str() {
184 "crlf" => CoreEol::Crlf,
185 "lf" => CoreEol::Lf,
186 "native" => CoreEol::Native,
187 _ => CoreEol::Native,
188 },
189 None => CoreEol::Native,
190 };
191
192 let safecrlf = match config.get("core.safecrlf") {
193 Some(v) => match v.to_lowercase().as_str() {
194 "true" | "yes" | "on" | "1" => SafeCrlf::True,
195 "warn" => SafeCrlf::Warn,
196 _ => SafeCrlf::False,
197 },
198 None => SafeCrlf::Warn,
200 };
201
202 ConversionConfig {
203 autocrlf,
204 eol,
205 safecrlf,
206 }
207 }
208}
209
210#[derive(Debug, Clone)]
212pub struct AttrRule {
213 pattern: String,
215 must_be_dir: bool,
217 basename_only: bool,
219 attrs: Vec<(String, String)>, }
221
222impl AttrRule {
223 pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
225 self.attrs.iter().filter_map(|(name, value)| {
226 if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
227 Some(value.as_str())
228 } else {
229 None
230 }
231 })
232 }
233}
234
235pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
237 let mut rules = Vec::new();
238
239 let root_attrs = work_tree.join(".gitattributes");
240 if let Ok(content) = std::fs::read_to_string(&root_attrs) {
241 parse_gitattributes(&content, &mut rules);
242 }
243
244 let info_attrs = work_tree.join(".git/info/attributes");
245 if let Ok(content) = std::fs::read_to_string(&info_attrs) {
246 parse_gitattributes(&content, &mut rules);
247 }
248
249 rules
250}
251
252#[must_use]
257pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
258 let mut rules = Vec::new();
259 parse_gitattributes(content, &mut rules);
260 rules
261}
262
263pub fn load_gitattributes_from_index(
266 index: &crate::index::Index,
267 odb: &crate::odb::Odb,
268) -> Vec<AttrRule> {
269 let mut rules = Vec::new();
270
271 if let Some(entry) = index.get(b".gitattributes", 0) {
273 if let Ok(obj) = odb.read(&entry.oid) {
274 if let Ok(content) = String::from_utf8(obj.data) {
275 parse_gitattributes(&content, &mut rules);
276 }
277 }
278 }
279
280 rules
281}
282
283pub fn load_gitattributes_for_checkout(
289 work_tree: &Path,
290 rel_path: &str,
291 index: &crate::index::Index,
292 odb: &crate::odb::Odb,
293) -> Vec<AttrRule> {
294 let mut rules = load_gitattributes(work_tree);
295
296 if !work_tree.join(".gitattributes").exists() {
299 if let Some(entry) = index.get(b".gitattributes", 0) {
300 if let Ok(obj) = odb.read(&entry.oid) {
301 if let Ok(content) = String::from_utf8(obj.data) {
302 parse_gitattributes(&content, &mut rules);
303 }
304 }
305 }
306 }
307
308 let path = Path::new(rel_path);
309 if let Some(parent) = path.parent() {
310 let mut accum = PathBuf::new();
311 for comp in parent.components() {
312 accum.push(comp);
313 let ga_rel = accum.join(".gitattributes");
314 let wt_ga = work_tree.join(&ga_rel);
315 if let Ok(content) = std::fs::read_to_string(&wt_ga) {
316 parse_gitattributes(&content, &mut rules);
317 } else {
318 let key = path_to_index_bytes(&ga_rel);
319 if let Some(entry) = index.get(&key, 0) {
320 if let Ok(obj) = odb.read(&entry.oid) {
321 if let Ok(content) = String::from_utf8(obj.data) {
322 parse_gitattributes(&content, &mut rules);
323 }
324 }
325 }
326 }
327 }
328 }
329
330 rules
331}
332
333fn path_to_index_bytes(path: &Path) -> Vec<u8> {
334 #[cfg(unix)]
335 {
336 use std::os::unix::ffi::OsStrExt;
337 path.as_os_str().as_bytes().to_vec()
338 }
339 #[cfg(not(unix))]
340 {
341 path.to_string_lossy().as_bytes().to_vec()
342 }
343}
344
345fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
346 for line in content.lines() {
347 let line = line.trim();
348 if line.is_empty() || line.starts_with('#') {
349 continue;
350 }
351
352 let mut parts = line.split_whitespace();
353 let raw_pattern = match parts.next() {
354 Some(p) => p,
355 None => continue,
356 };
357
358 let mut pat = raw_pattern.to_owned();
359 let mut must_be_dir = false;
360 if pat.ends_with('/') && pat.len() > 1 {
361 pat.pop();
362 must_be_dir = true;
363 }
364 let basename_only = !pat.contains('/');
365
366 let mut attrs = Vec::new();
367 for part in parts {
368 if part == "binary" {
369 attrs.push(("text".to_owned(), "unset".to_owned()));
370 attrs.push(("diff".to_owned(), "unset".to_owned()));
371 } else if let Some(rest) = part.strip_prefix('-') {
372 attrs.push((rest.to_owned(), "unset".to_owned()));
373 } else if let Some((key, val)) = part.split_once('=') {
374 attrs.push((key.to_owned(), val.to_owned()));
375 } else {
376 attrs.push((part.to_owned(), "set".to_owned()));
377 }
378 }
379
380 if !attrs.is_empty() {
381 rules.push(AttrRule {
382 pattern: pat,
383 must_be_dir,
384 basename_only,
385 attrs,
386 });
387 }
388 }
389}
390
391fn config_bool_truthy(value: &str) -> bool {
392 matches!(
393 value.trim().to_ascii_lowercase().as_str(),
394 "true" | "yes" | "on" | "1"
395 )
396}
397
398pub fn get_file_attrs(
403 rules: &[AttrRule],
404 rel_path: &str,
405 is_dir: bool,
406 config: &ConfigSet,
407) -> FileAttrs {
408 let mut fa = FileAttrs::default();
409
410 for rule in rules {
412 if attr_rule_matches(rule, rel_path, is_dir) {
413 for (name, value) in &rule.attrs {
414 match name.as_str() {
415 "text" => {
416 fa.text = match value.as_str() {
417 "set" => TextAttr::Set,
418 "unset" => TextAttr::Unset,
419 "auto" => TextAttr::Auto,
420 _ => TextAttr::Unspecified,
421 };
422 }
423 "eol" => {
424 fa.eol = match value.as_str() {
425 "lf" => EolAttr::Lf,
426 "crlf" => EolAttr::Crlf,
427 _ => EolAttr::Unspecified,
428 };
429 }
430 "filter" => {
431 if value == "unset" {
432 fa.filter_clean = None;
433 fa.filter_smudge = None;
434 fa.filter_process = None;
435 fa.filter_driver_name = None;
436 fa.filter_smudge_required = false;
437 fa.filter_clean_required = false;
438 } else {
439 let clean_key = format!("filter.{value}.clean");
440 let smudge_key = format!("filter.{value}.smudge");
441 let process_key = format!("filter.{value}.process");
442 let req_key = format!("filter.{value}.required");
443 fa.filter_driver_name = Some(value.clone());
444 fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
445 if fa.filter_process.is_some() {
446 fa.filter_clean = None;
447 fa.filter_smudge = None;
448 } else {
449 fa.filter_clean = config.get(&clean_key);
450 fa.filter_smudge = config.get(&smudge_key);
451 }
452 let required =
453 config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
454 fa.filter_smudge_required = required;
455 fa.filter_clean_required = required;
456 }
457 }
458 "diff" => {
459 if value == "unset" {
460 fa.diff_attr = DiffAttr::Unset;
461 } else if !value.is_empty() && value != "set" {
462 fa.diff_attr = DiffAttr::Driver(value.clone());
463 }
464 }
465 "ident" => {
466 fa.ident = value == "set";
467 }
468 "export-ignore" => {
469 fa.export_ignore = value != "unset";
470 }
471 "export-subst" => {
472 fa.export_subst = value != "unset";
473 }
474 "merge" => {
475 fa.merge = match value.as_str() {
476 "unset" => MergeAttr::Unset,
477 "set" => MergeAttr::Unspecified,
478 other => MergeAttr::Driver(other.to_string()),
479 };
480 }
481 "conflict-marker-size" => {
482 if value == "unset" {
483 fa.conflict_marker_size = None;
484 } else {
485 fa.conflict_marker_size = Some(value.clone());
486 }
487 }
488 "working-tree-encoding" => {
489 if value != "unset" && !value.is_empty() {
490 fa.working_tree_encoding = Some(value.clone());
491 }
492 }
493 "crlf" => {
494 fa.crlf_legacy = match value.as_str() {
495 "unset" => CrlfLegacyAttr::Unset,
496 "input" => CrlfLegacyAttr::Input,
497 "set" => CrlfLegacyAttr::Crlf,
498 _ => CrlfLegacyAttr::Unspecified,
499 };
500 }
501 "whitespace" => {
502 if value == "unset" {
503 fa.whitespace = Some("unset".to_owned());
504 } else if !value.is_empty() {
505 fa.whitespace = Some(value.clone());
506 }
507 }
508 _ => {}
509 }
510 }
511 }
512 }
513
514 fa
515}
516
517#[must_use]
522pub fn path_has_gitattribute(
523 rules: &[AttrRule],
524 path: &str,
525 is_dir: bool,
526 attr_name: &str,
527) -> bool {
528 let mut last: Option<&str> = None;
529 for rule in rules {
530 if attr_rule_matches(rule, path, is_dir) {
531 for (name, value) in &rule.attrs {
532 if name == attr_name {
533 last = Some(value.as_str());
534 }
535 }
536 }
537 }
538 match last {
539 None | Some("unset") => false,
540 Some(_) => true,
541 }
542}
543
544#[must_use]
546pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
547 let path_is_dir = is_dir || rel_path.ends_with('/');
548 if rule.must_be_dir && !path_is_dir {
549 return false;
550 }
551 let path_for_glob = rel_path.trim_end_matches('/');
552 if rule.basename_only {
553 let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
554 glob_matches(rule.pattern.as_str(), basename)
555 } else {
556 glob_matches(rule.pattern.as_str(), path_for_glob)
557 }
558}
559
560fn glob_matches(pattern: &str, text: &str) -> bool {
561 glob_match_bytes(pattern.as_bytes(), text.as_bytes())
562}
563
564fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
565 match (pat.first(), text.first()) {
566 (None, None) => true,
567 (Some(&b'*'), _) => {
568 let pat_rest = pat
569 .iter()
570 .position(|&b| b != b'*')
571 .map_or(&pat[pat.len()..], |i| &pat[i..]);
572 if pat_rest.is_empty() {
573 return true;
574 }
575 for i in 0..=text.len() {
576 if glob_match_bytes(pat_rest, &text[i..]) {
577 return true;
578 }
579 }
580 false
581 }
582 (Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
583 (Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
584 _ => false,
585 }
586}
587
588pub fn is_binary(data: &[u8]) -> bool {
590 let check_len = data.len().min(8000);
591 data[..check_len].contains(&0)
592}
593
594const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
596const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
597const CONVERT_STAT_BITS_BIN: u32 = 0x4;
598
599#[derive(Default, Clone)]
600struct TextStat {
601 nul: u32,
602 lonecr: u32,
603 lonelf: u32,
604 crlf: u32,
605 printable: u32,
606 nonprintable: u32,
607}
608
609fn gather_text_stat(data: &[u8]) -> TextStat {
610 let mut s = TextStat::default();
611 let mut i = 0usize;
612 while i < data.len() {
613 let c = data[i];
614 if c == b'\r' {
615 if i + 1 < data.len() && data[i + 1] == b'\n' {
616 s.crlf += 1;
617 i += 2;
618 } else {
619 s.lonecr += 1;
620 i += 1;
621 }
622 continue;
623 }
624 if c == b'\n' {
625 s.lonelf += 1;
626 i += 1;
627 continue;
628 }
629 if c == 127 {
630 s.nonprintable += 1;
631 } else if c < 32 {
632 match c {
633 b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
634 0 => {
635 s.nul += 1;
636 s.nonprintable += 1;
637 }
638 _ => s.nonprintable += 1,
639 }
640 } else {
641 s.printable += 1;
642 }
643 i += 1;
644 }
645 s
646}
647
648fn convert_is_binary(stats: &TextStat) -> bool {
649 stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
650}
651
652fn git_text_stat(data: &[u8]) -> TextStat {
653 let mut stats = gather_text_stat(data);
654 if !data.is_empty() && data[data.len() - 1] == 0x1a {
655 stats.nonprintable = stats.nonprintable.saturating_sub(1);
656 }
657 stats
658}
659
660fn will_convert_lf_to_crlf_from_stats(
662 stats: &TextStat,
663 conv: &ConversionConfig,
664 attrs: &FileAttrs,
665) -> bool {
666 let has_lone_lf = stats.lonelf > 0;
667 let is_bin = convert_is_binary(stats);
668
669 match attrs.crlf_legacy {
670 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
671 CrlfLegacyAttr::Crlf => {
672 if attrs.text == TextAttr::Unset {
673 return false;
674 }
675 return has_lone_lf;
676 }
677 CrlfLegacyAttr::Unspecified => {}
678 }
679
680 if attrs.text == TextAttr::Unset {
681 return false;
682 }
683
684 if attrs.eol != EolAttr::Unspecified {
685 if attrs.text == TextAttr::Auto && is_bin {
686 return false;
687 }
688 if attrs.eol != EolAttr::Crlf {
689 return false;
690 }
691 if attrs.text == TextAttr::Auto {
692 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
693 }
694 return has_lone_lf;
695 }
696
697 if attrs.text == TextAttr::Set {
698 if !output_eol_is_crlf(conv) {
699 return false;
700 }
701 return has_lone_lf;
702 }
703
704 if attrs.text == TextAttr::Auto {
705 if is_bin || !output_eol_is_crlf(conv) {
706 return false;
707 }
708 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
709 }
710
711 match conv.autocrlf {
712 AutoCrlf::True => {
713 if is_bin {
714 return false;
715 }
716 auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
717 }
718 AutoCrlf::Input | AutoCrlf::False => false,
719 }
720}
721
722fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
723 if stats.lonelf == 0 {
724 return false;
725 }
726 if stats.lonecr > 0 || stats.crlf > 0 {
727 return false;
728 }
729 !convert_is_binary(stats)
730}
731
732fn gather_convert_stats(data: &[u8]) -> u32 {
733 if data.is_empty() {
734 return 0;
735 }
736 let mut stats = gather_text_stat(data);
737 if !data.is_empty() && data[data.len() - 1] == 0x1a {
738 stats.nonprintable = stats.nonprintable.saturating_sub(1);
739 }
740 let mut ret = 0u32;
741 if convert_is_binary(&stats) {
742 ret |= CONVERT_STAT_BITS_BIN;
743 }
744 if stats.crlf > 0 {
745 ret |= CONVERT_STAT_BITS_TXT_CRLF;
746 }
747 if stats.lonelf > 0 {
748 ret |= CONVERT_STAT_BITS_TXT_LF;
749 }
750 ret
751}
752
753#[must_use]
755pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
756 let convert_stats = gather_convert_stats(data);
757 if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
758 return "-text";
759 }
760 match convert_stats {
761 CONVERT_STAT_BITS_TXT_LF => "lf",
762 CONVERT_STAT_BITS_TXT_CRLF => "crlf",
763 x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
764 _ => "none",
765 }
766}
767
768#[must_use]
771pub fn convert_attr_ascii_for_ls_files(
772 rules: &[AttrRule],
773 rel_path: &str,
774 config: &ConfigSet,
775) -> String {
776 let fa = get_file_attrs(rules, rel_path, false, config);
777 let mut action = match fa.text {
779 TextAttr::Set => 1, TextAttr::Unset => 2, TextAttr::Auto => 5, TextAttr::Unspecified => 0,
783 };
784 if action == 0 {
785 action = match fa.crlf_legacy {
786 CrlfLegacyAttr::Crlf => 1,
787 CrlfLegacyAttr::Unset => 2,
788 CrlfLegacyAttr::Input => 3, CrlfLegacyAttr::Unspecified => 0,
790 };
791 }
792 if action == 2 {
793 return "-text".to_string();
794 }
795 if action == 0 {
797 if fa.eol == EolAttr::Unspecified {
798 return String::new();
799 }
800 action = 1; }
802
803 if fa.eol == EolAttr::Lf {
805 if action == 5 {
806 action = 7; } else {
808 action = 3; }
810 } else if fa.eol == EolAttr::Crlf {
811 if action == 5 {
812 action = 6; } else {
814 action = 4; }
816 }
817
818 let attr_action = action;
820
821 match attr_action {
822 1 => "text".to_string(),
823 3 => "text eol=lf".to_string(),
824 4 => "text eol=crlf".to_string(),
825 5 => "text=auto".to_string(),
826 6 => "text=auto eol=crlf".to_string(),
827 7 => "text=auto eol=lf".to_string(),
828 _ => String::new(),
829 }
830}
831
832pub fn has_crlf(data: &[u8]) -> bool {
834 data.windows(2).any(|w| w == b"\r\n")
835}
836
837pub fn has_lone_lf(data: &[u8]) -> bool {
839 for i in 0..data.len() {
840 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
841 return true;
842 }
843 }
844 false
845}
846
847fn has_lone_cr(data: &[u8]) -> bool {
849 for i in 0..data.len() {
850 if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
851 return true;
852 }
853 }
854 false
855}
856
857fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
860 if !has_lone_lf(data) {
861 return false;
862 }
863 if has_lone_cr(data) || has_crlf(data) {
864 return false;
865 }
866 if is_binary(data) {
867 return false;
868 }
869 true
870}
871
872pub fn is_all_crlf(data: &[u8]) -> bool {
874 has_crlf(data) && !has_lone_lf(data)
875}
876
877pub fn is_all_lf(data: &[u8]) -> bool {
879 has_lone_lf(data) && !has_crlf(data)
880}
881
882#[must_use]
884pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
885 if !data.contains(&b'\r') {
886 return false;
887 }
888 let st = gather_convert_stats(data);
889 st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
890}
891
892#[must_use]
896pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
897 if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
898 return false;
899 }
900 if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
901 return false;
902 }
903 attrs.text == TextAttr::Auto
904 || (attrs.text == TextAttr::Unspecified
905 && matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
906}
907
908#[derive(Debug, Clone, Copy)]
910pub struct ConvertToGitOpts<'a> {
911 pub index_blob: Option<&'a [u8]>,
913 pub renormalize: bool,
915 pub check_safecrlf: bool,
917}
918
919impl Default for ConvertToGitOpts<'_> {
920 fn default() -> Self {
921 Self {
922 index_blob: None,
923 renormalize: false,
924 check_safecrlf: true,
925 }
926 }
927}
928
929fn utf16_scalar_iter_to_le_bytes(chars: impl Iterator<Item = u16>) -> Vec<u8> {
934 let mut out = Vec::new();
935 for u in chars {
936 out.extend_from_slice(&u.to_le_bytes());
937 }
938 out
939}
940
941fn utf16_scalar_iter_to_be_bytes(chars: impl Iterator<Item = u16>) -> Vec<u8> {
942 let mut out = Vec::new();
943 for u in chars {
944 out.extend_from_slice(&u.to_be_bytes());
945 }
946 out
947}
948
949fn utf32_chars_to_be_bytes(s: &str) -> Vec<u8> {
950 let mut out = Vec::new();
951 for ch in s.chars() {
952 out.extend_from_slice(&(ch as u32).to_be_bytes());
953 }
954 out
955}
956
957fn utf32_chars_to_le_bytes(s: &str) -> Vec<u8> {
958 let mut out = Vec::new();
959 for ch in s.chars() {
960 out.extend_from_slice(&(ch as u32).to_le_bytes());
961 }
962 out
963}
964
965fn decode_utf32_body_to_utf8_bytes(
966 body: &[u8],
967 rel_path: &str,
968 big_endian: bool,
969) -> Result<Vec<u8>, String> {
970 if !body.len().is_multiple_of(4) {
971 return Err(format!(
972 "invalid UTF-32 length for working tree file '{rel_path}'"
973 ));
974 }
975 let mut s = String::new();
976 for chunk in body.chunks_exact(4) {
977 let cp = if big_endian {
978 u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
979 } else {
980 u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
981 };
982 let Some(ch) = char::from_u32(cp) else {
983 return Err(format!(
984 "invalid UTF-32 scalar U+{cp:X} in working tree file '{rel_path}'"
985 ));
986 };
987 s.push(ch);
988 }
989 Ok(s.into_bytes())
990}
991
992fn decode_working_tree_bytes_to_utf8(
993 src: &[u8],
994 rel_path: &str,
995 enc_label: &str,
996) -> Result<Vec<u8>, String> {
997 let label = enc_label.trim();
998 if label.is_empty() {
999 return Ok(src.to_vec());
1000 }
1001 let lower = label.replace('_', "-").to_ascii_lowercase();
1002
1003 let (cow, _used_enc, had_errors) = match lower.as_str() {
1004 "utf-16le-bom" => {
1005 let body = if src.len() >= 2 && src.starts_with(&[0xFF, 0xFE]) {
1006 &src[2..]
1007 } else {
1008 src
1009 };
1010 encoding_rs::UTF_16LE.decode(body)
1011 }
1012 "utf-16" => {
1014 if src.len() >= 2 && src.starts_with(&[0xFE, 0xFF]) {
1015 encoding_rs::UTF_16BE.decode(&src[2..])
1016 } else if src.len() >= 2 && src.starts_with(&[0xFF, 0xFE]) {
1017 encoding_rs::UTF_16LE.decode(&src[2..])
1018 } else {
1019 return Err(format!(
1020 "missing byte order mark for UTF-16 working tree file '{rel_path}'"
1021 ));
1022 }
1023 }
1024 "utf-16be" => encoding_rs::UTF_16BE.decode(src),
1025 "utf-16le" => encoding_rs::UTF_16LE.decode(src),
1026 "utf-32" => {
1027 let (body, big_endian) = if src.len() >= 4 && src.starts_with(&[0, 0, 0xFE, 0xFF]) {
1028 (&src[4..], true)
1029 } else if src.len() >= 4 && src.starts_with(&[0xFF, 0xFE, 0, 0]) {
1030 (&src[4..], false)
1031 } else {
1032 return Err(format!(
1033 "missing byte order mark for UTF-32 working tree file '{rel_path}'"
1034 ));
1035 };
1036 return decode_utf32_body_to_utf8_bytes(body, rel_path, big_endian);
1037 }
1038 "utf-32be" => return decode_utf32_body_to_utf8_bytes(src, rel_path, true),
1039 "utf-32le" => return decode_utf32_body_to_utf8_bytes(src, rel_path, false),
1040 _ => {
1041 let Some(enc) = crate::commit_encoding::resolve(label) else {
1042 return Err(format!(
1043 "unknown working-tree-encoding '{label}' for '{rel_path}'"
1044 ));
1045 };
1046 if enc == UTF_8 {
1047 return Ok(src.to_vec());
1048 }
1049 enc.decode(src)
1050 }
1051 };
1052
1053 if had_errors {
1054 return Err(format!(
1055 "failed to decode '{rel_path}' from working-tree-encoding {label}"
1056 ));
1057 }
1058 Ok(cow.into_owned().into_bytes())
1059}
1060
1061fn encode_utf8_blob_to_working_tree_bytes(
1062 src: &[u8],
1063 rel_path: &str,
1064 enc_label: &str,
1065) -> Result<Vec<u8>, String> {
1066 let label = enc_label.trim();
1067 if label.is_empty() {
1068 return Ok(src.to_vec());
1069 }
1070 let s = std::str::from_utf8(src).map_err(|_| {
1071 format!("failed to encode '{rel_path}' from UTF-8: blob is not valid UTF-8")
1072 })?;
1073 let lower = label.replace('_', "-").to_ascii_lowercase();
1074
1075 match lower.as_str() {
1076 "utf-16le-bom" => {
1077 let mut out = vec![0xFF_u8, 0xFE_u8];
1078 out.extend(utf16_scalar_iter_to_le_bytes(s.encode_utf16()));
1079 Ok(out)
1080 }
1081 "utf-16" => {
1084 let mut out = vec![0xFF_u8, 0xFE_u8];
1085 out.extend(utf16_scalar_iter_to_le_bytes(s.encode_utf16()));
1086 Ok(out)
1087 }
1088 "utf-16be" => {
1089 let mut out = vec![0xFE_u8, 0xFF_u8];
1090 out.extend(utf16_scalar_iter_to_be_bytes(s.encode_utf16()));
1091 Ok(out)
1092 }
1093 "utf-16le" => Ok(utf16_scalar_iter_to_le_bytes(s.encode_utf16())),
1094 "utf-32" | "utf-32be" => {
1095 let mut out = vec![0_u8, 0_u8, 0xFE_u8, 0xFF_u8];
1096 out.extend(utf32_chars_to_be_bytes(s));
1097 Ok(out)
1098 }
1099 "utf-32le" => {
1100 let mut out = vec![0xFF_u8, 0xFE_u8, 0_u8, 0_u8];
1101 out.extend(utf32_chars_to_le_bytes(s));
1102 Ok(out)
1103 }
1104 _ => {
1105 let Some(enc) = crate::commit_encoding::resolve(label) else {
1106 return Err(format!(
1107 "unknown working-tree-encoding '{label}' for '{rel_path}'"
1108 ));
1109 };
1110 if enc == UTF_8 {
1111 return Ok(src.to_vec());
1112 }
1113 let (cow, _, had_errors) = enc.encode(s);
1114 if had_errors {
1115 return Err(format!(
1116 "failed to encode '{rel_path}' from UTF-8 to {label}"
1117 ));
1118 }
1119 Ok(cow.into_owned())
1120 }
1121 }
1122}
1123
1124pub fn convert_to_git(
1137 data: &[u8],
1138 rel_path: &str,
1139 conv: &ConversionConfig,
1140 file_attrs: &FileAttrs,
1141) -> Result<Vec<u8>, String> {
1142 convert_to_git_with_opts(
1143 data,
1144 rel_path,
1145 conv,
1146 file_attrs,
1147 ConvertToGitOpts::default(),
1148 )
1149}
1150
1151pub fn convert_to_git_with_opts(
1153 data: &[u8],
1154 rel_path: &str,
1155 conv: &ConversionConfig,
1156 file_attrs: &FileAttrs,
1157 opts: ConvertToGitOpts<'_>,
1158) -> Result<Vec<u8>, String> {
1159 let mut buf = data.to_vec();
1160
1161 if let Some(ref proc_cmd) = file_attrs.filter_process {
1163 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1164 buf = apply_process_clean(proc_cmd, rel_path, &buf).map_err(|_e| {
1165 if file_attrs.filter_clean_required {
1166 format!("fatal: {rel_path}: clean filter '{name}' failed")
1167 } else {
1168 format!("clean filter failed: {_e}")
1169 }
1170 })?;
1171 } else {
1172 match file_attrs.filter_clean.as_ref() {
1173 Some(clean_cmd) => {
1174 buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
1175 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1176 if file_attrs.filter_clean_required {
1177 format!("fatal: {rel_path}: clean filter '{name}' failed")
1178 } else {
1179 format!("clean filter failed: {e}")
1180 }
1181 })?;
1182 }
1183 None => {
1184 if file_attrs.filter_clean_required {
1185 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1186 return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
1187 }
1188 }
1189 }
1190 }
1191
1192 if let Some(ref enc) = file_attrs.working_tree_encoding {
1194 buf = decode_working_tree_bytes_to_utf8(&buf, rel_path, enc)?;
1195 }
1196
1197 let would_convert = would_convert_on_input(conv, file_attrs, &buf);
1199
1200 let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
1201 if convert_crlf_into_lf
1202 && clean_uses_autocrlf_index_guard(file_attrs, conv)
1203 && !opts.renormalize
1204 && opts.index_blob.is_some_and(has_crlf_in_index_blob)
1205 {
1206 convert_crlf_into_lf = false;
1207 }
1208
1209 if would_convert && opts.check_safecrlf {
1211 check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
1212 }
1213
1214 if convert_crlf_into_lf {
1216 buf = crlf_to_lf(&buf);
1217 }
1218
1219 Ok(buf)
1220}
1221
1222fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1226 match attrs.crlf_legacy {
1227 CrlfLegacyAttr::Unset => return false,
1228 CrlfLegacyAttr::Input => {
1229 if is_binary(data) {
1230 return false;
1231 }
1232 return true;
1233 }
1234 CrlfLegacyAttr::Crlf => {
1235 if attrs.text == TextAttr::Unset {
1236 return false;
1237 }
1238 if is_binary(data) {
1239 return false;
1240 }
1241 return true;
1242 }
1243 CrlfLegacyAttr::Unspecified => {}
1244 }
1245
1246 if attrs.text == TextAttr::Unset {
1248 return false;
1249 }
1250
1251 if attrs.eol != EolAttr::Unspecified {
1253 if attrs.text == TextAttr::Auto && is_binary(data) {
1254 return false;
1255 }
1256 return true;
1257 }
1258
1259 if attrs.text == TextAttr::Set {
1261 return true;
1262 }
1263
1264 if attrs.text == TextAttr::Auto {
1265 if is_binary(data) {
1266 return false;
1267 }
1268 return true;
1269 }
1270
1271 match conv.autocrlf {
1273 AutoCrlf::True | AutoCrlf::Input => {
1274 if is_binary(data) {
1275 return false;
1276 }
1277 true
1278 }
1279 AutoCrlf::False => false,
1280 }
1281}
1282
1283fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
1285 eprintln!(
1286 "warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
1287 );
1288}
1289
1290fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
1292 eprintln!(
1293 "warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
1294 );
1295}
1296
1297fn check_safecrlf_roundtrip(
1299 conv: &ConversionConfig,
1300 file_attrs: &FileAttrs,
1301 data: &[u8],
1302 rel_path: &str,
1303 convert_crlf_into_lf: bool,
1304) -> Result<(), String> {
1305 if conv.safecrlf == SafeCrlf::False {
1306 return Ok(());
1307 }
1308
1309 let old_stats = git_text_stat(data);
1310
1311 let mut new_stats = old_stats.clone();
1312 if convert_crlf_into_lf && new_stats.crlf > 0 {
1313 new_stats.lonelf += new_stats.crlf;
1314 new_stats.crlf = 0;
1315 }
1316 if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
1317 new_stats.crlf += new_stats.lonelf;
1318 new_stats.lonelf = 0;
1319 }
1320
1321 if old_stats.crlf > 0 && new_stats.crlf == 0 {
1322 let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
1323 if conv.safecrlf == SafeCrlf::True {
1324 return Err(msg);
1325 }
1326 eprint_safecrlf_warn_crlf_to_lf(rel_path);
1327 } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
1328 let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
1329 if conv.safecrlf == SafeCrlf::True {
1330 return Err(msg);
1331 }
1332 eprint_safecrlf_warn_lf_to_crlf(rel_path);
1333 }
1334
1335 Ok(())
1336}
1337
1338pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
1340 let mut out = Vec::with_capacity(data.len());
1341 let mut i = 0;
1342 while i < data.len() {
1343 if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
1344 out.push(b'\n');
1345 i += 2;
1346 } else {
1347 out.push(data[i]);
1348 i += 1;
1349 }
1350 }
1351 out
1352}
1353
1354pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
1356 let mut out = Vec::with_capacity(data.len() + data.len() / 10);
1357 let mut i = 0;
1358 while i < data.len() {
1359 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
1360 out.push(b'\r');
1361 out.push(b'\n');
1362 } else {
1363 out.push(data[i]);
1364 }
1365 i += 1;
1366 }
1367 out
1368}
1369
1370pub fn convert_to_worktree(
1386 data: &[u8],
1387 rel_path: &str,
1388 conv: &ConversionConfig,
1389 file_attrs: &FileAttrs,
1390 oid_hex: Option<&str>,
1391 smudge_meta: Option<&FilterSmudgeMeta>,
1392 delayed_checkout: Option<&mut crate::filter_process::DelayedProcessCheckout>,
1393) -> Result<Option<Vec<u8>>, String> {
1394 let mut buf = data.to_vec();
1395
1396 if file_attrs.ident {
1398 if let Some(oid) = oid_hex {
1399 buf = expand_ident(&buf, oid);
1400 }
1401 }
1402
1403 let can_delay_smudge = delayed_checkout.is_some()
1404 && file_attrs.working_tree_encoding.is_none()
1405 && !file_attrs.ident
1406 && file_attrs
1407 .filter_process
1408 .as_deref()
1409 .is_some_and(|c| !c.is_empty())
1410 && !should_convert_to_crlf(conv, file_attrs, &buf)
1411 && file_attrs
1412 .filter_process
1413 .as_deref()
1414 .is_some_and(crate::filter_process::process_filter_supports_delay);
1415
1416 let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
1418 if should_convert {
1419 buf = lf_to_crlf(&buf);
1420 }
1421
1422 if let Some(ref enc) = file_attrs.working_tree_encoding {
1424 buf = encode_utf8_blob_to_working_tree_bytes(&buf, rel_path, enc)?;
1425 }
1426
1427 let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
1429 if let Some(ref proc_cmd) = file_attrs.filter_process {
1430 let smudge_out =
1431 apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta, can_delay_smudge).map_err(
1432 |_e| {
1433 if file_attrs.filter_smudge_required {
1434 format!("fatal: {rel_path}: smudge filter {driver} failed")
1435 } else {
1436 _e
1437 }
1438 },
1439 )?;
1440 let Some(out) = smudge_out else {
1441 let Some(q) = delayed_checkout else {
1442 return Err(format!(
1443 "internal error: delayed smudge without checkout queue for {rel_path}"
1444 ));
1445 };
1446 q.push_delayed(
1447 proc_cmd.clone(),
1448 rel_path.to_string(),
1449 smudge_meta.cloned().unwrap_or_default(),
1450 );
1451 return Ok(None);
1452 };
1453 buf = out;
1454 } else {
1455 match file_attrs.filter_smudge.as_ref() {
1456 Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
1457 Ok(filtered) => buf = filtered,
1458 Err(_e) => {
1459 if file_attrs.filter_smudge_required {
1460 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1461 }
1462 }
1463 },
1464 None => {
1465 if file_attrs.filter_smudge_required {
1466 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1467 }
1468 }
1469 }
1470 }
1471
1472 Ok(Some(buf))
1473}
1474
1475#[must_use]
1477pub fn convert_to_worktree_eager(
1478 data: &[u8],
1479 rel_path: &str,
1480 conv: &ConversionConfig,
1481 file_attrs: &FileAttrs,
1482 oid_hex: Option<&str>,
1483 smudge_meta: Option<&FilterSmudgeMeta>,
1484) -> Result<Vec<u8>, String> {
1485 match convert_to_worktree(data, rel_path, conv, file_attrs, oid_hex, smudge_meta, None)? {
1486 Some(v) => Ok(v),
1487 None => Err(format!(
1488 "internal error: unexpected delayed smudge for {rel_path}"
1489 )),
1490 }
1491}
1492
1493#[must_use]
1495pub fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1496 match attrs.crlf_legacy {
1497 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
1498 CrlfLegacyAttr::Crlf => {
1499 if attrs.text == TextAttr::Unset {
1500 return false;
1501 }
1502 return true;
1505 }
1506 CrlfLegacyAttr::Unspecified => {}
1507 }
1508
1509 if attrs.text == TextAttr::Unset {
1511 return false;
1512 }
1513
1514 if attrs.eol != EolAttr::Unspecified {
1516 if attrs.text == TextAttr::Auto && is_binary(data) {
1517 return false;
1518 }
1519 if attrs.eol != EolAttr::Crlf {
1520 return false;
1521 }
1522 if attrs.text == TextAttr::Auto {
1524 return auto_crlf_should_smudge_lf_to_crlf(data);
1525 }
1526 return true;
1528 }
1529
1530 if attrs.text == TextAttr::Set {
1532 return output_eol_is_crlf(conv);
1533 }
1534
1535 if attrs.text == TextAttr::Auto {
1536 if is_binary(data) {
1537 return false;
1538 }
1539 if !output_eol_is_crlf(conv) {
1540 return false;
1541 }
1542 return auto_crlf_should_smudge_lf_to_crlf(data);
1543 }
1544
1545 match conv.autocrlf {
1547 AutoCrlf::True => {
1548 if is_binary(data) {
1549 return false;
1550 }
1551 auto_crlf_should_smudge_lf_to_crlf(data)
1552 }
1553 AutoCrlf::Input | AutoCrlf::False => false,
1554 }
1555}
1556
1557fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
1559 if conv.autocrlf == AutoCrlf::Input {
1561 return false;
1562 }
1563 if conv.autocrlf == AutoCrlf::True {
1564 return true;
1565 }
1566 match conv.eol {
1567 CoreEol::Crlf => true,
1568 CoreEol::Lf => false,
1569 CoreEol::Native => {
1570 cfg!(windows)
1572 }
1573 }
1574}
1575
1576fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
1581 if !count_ident_regions(data) {
1582 return data.to_vec();
1583 }
1584 let replacement = format!("$Id: {oid} $");
1585 let mut out = Vec::with_capacity(data.len() + 60);
1586 let mut i = 0;
1587 while i < data.len() {
1588 if data[i] != b'$' {
1589 out.push(data[i]);
1590 i += 1;
1591 continue;
1592 }
1593 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1594 out.push(data[i]);
1595 i += 1;
1596 continue;
1597 }
1598 let after_id = i + 3;
1599 let ch = data.get(after_id).copied();
1600 match ch {
1601 Some(b'$') => {
1602 out.extend_from_slice(replacement.as_bytes());
1603 i = after_id + 1;
1604 }
1605 Some(b':') => {
1606 let rest = &data[after_id + 1..];
1607 let line_end = rest
1608 .iter()
1609 .position(|&b| b == b'\n' || b == b'\r')
1610 .unwrap_or(rest.len());
1611 let line = &rest[..line_end];
1612 let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
1613 out.push(data[i]);
1614 i += 1;
1615 continue;
1616 };
1617 if line[..dollar_rel].contains(&b'\n') {
1618 out.push(data[i]);
1619 i += 1;
1620 continue;
1621 }
1622 let payload = &line[..dollar_rel];
1625 let foreign = payload.len() > 1
1626 && payload[1..]
1627 .iter()
1628 .position(|&b| b == b' ')
1629 .is_some_and(|rel| {
1630 let pos = 1 + rel;
1631 pos < payload.len().saturating_sub(1)
1632 });
1633 if foreign {
1634 out.push(data[i]);
1635 i += 1;
1636 continue;
1637 }
1638 out.extend_from_slice(replacement.as_bytes());
1639 i = after_id + 1 + dollar_rel + 1;
1640 }
1641 _ => {
1642 out.push(data[i]);
1643 i += 1;
1644 }
1645 }
1646 }
1647 out
1648}
1649
1650fn count_ident_regions(data: &[u8]) -> bool {
1652 let mut i = 0usize;
1653 while i < data.len() {
1654 if data[i] != b'$' {
1655 i += 1;
1656 continue;
1657 }
1658 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1659 i += 1;
1660 continue;
1661 }
1662 let after = i + 3;
1663 match data.get(after).copied() {
1664 Some(b'$') => return true,
1665 Some(b':') => {
1666 let mut j = after + 1;
1667 let mut found = false;
1668 while j < data.len() {
1669 match data[j] {
1670 b'$' => {
1671 found = true;
1672 break;
1673 }
1674 b'\n' | b'\r' => break,
1675 _ => j += 1,
1676 }
1677 }
1678 if found {
1679 return true;
1680 }
1681 i += 1;
1682 }
1683 _ => i += 1,
1684 }
1685 }
1686 false
1687}
1688
1689pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
1691 let mut out = Vec::with_capacity(data.len());
1692 let mut i = 0;
1693 while i < data.len() {
1694 if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
1695 let rest = &data[i + 4..];
1696 let line_end = rest
1697 .iter()
1698 .position(|&b| b == b'\n' || b == b'\r')
1699 .unwrap_or(rest.len());
1700 let line = &rest[..line_end];
1701 if let Some(end) = line.iter().position(|&b| b == b'$') {
1702 out.extend_from_slice(b"$Id$");
1703 i += 4 + end + 1;
1704 continue;
1705 }
1706 }
1707 out.push(data[i]);
1708 i += 1;
1709 }
1710 out
1711}
1712
1713fn sq_quote_buf(s: &str) -> String {
1715 let mut out = String::with_capacity(s.len() + 2);
1716 out.push('\'');
1717 for ch in s.chars() {
1718 if ch == '\'' {
1719 out.push_str("'\\''");
1720 } else {
1721 out.push(ch);
1722 }
1723 }
1724 out.push('\'');
1725 out
1726}
1727
1728fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
1730 let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
1731 let mut chars = cmd.chars().peekable();
1732 while let Some(c) = chars.next() {
1733 if c == '%' {
1734 match chars.peek() {
1735 Some('%') => {
1736 chars.next();
1737 out.push('%');
1738 }
1739 Some('f') => {
1740 chars.next();
1741 out.push_str(&sq_quote_buf(rel_path));
1742 }
1743 _ => out.push('%'),
1744 }
1745 } else {
1746 out.push(c);
1747 }
1748 }
1749 out
1750}
1751
1752fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
1754 let expanded = expand_filter_command(cmd, rel_path);
1755 let mut child = Command::new("sh")
1756 .arg("-c")
1757 .arg(&expanded)
1758 .stdin(Stdio::piped())
1759 .stdout(Stdio::piped())
1760 .stderr(Stdio::inherit())
1761 .spawn()?;
1762
1763 use std::io::{ErrorKind, Write};
1764 if let Some(ref mut stdin) = child.stdin {
1765 if let Err(e) = stdin.write_all(data) {
1766 if e.kind() != ErrorKind::BrokenPipe {
1768 return Err(e);
1769 }
1770 }
1771 }
1772 drop(child.stdin.take());
1773
1774 let output = child.wait_with_output()?;
1775 if !output.status.success() {
1776 return Err(std::io::Error::other(format!(
1777 "filter command exited with status {}",
1778 output.status
1779 )));
1780 }
1781
1782 Ok(output.stdout)
1783}
1784
1785pub type GitAttributes = Vec<AttrRule>;
1790
1791#[cfg(test)]
1792mod tests {
1793 use super::*;
1794
1795 #[test]
1796 fn test_crlf_to_lf() {
1797 assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
1798 assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
1799 assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
1800 }
1801
1802 #[test]
1803 fn test_lf_to_crlf() {
1804 assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
1805 assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
1806 }
1807
1808 #[test]
1809 fn test_has_crlf() {
1810 assert!(has_crlf(b"hello\r\nworld"));
1811 assert!(!has_crlf(b"hello\nworld"));
1812 }
1813
1814 #[test]
1815 fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
1816 let mut blob = Vec::new();
1817 for part in [
1818 b"Oh\n".as_slice(),
1819 b"here\n",
1820 b"is\n",
1821 b"CRLF\r\n",
1822 b"in\n",
1823 b"text\n",
1824 ] {
1825 blob.extend_from_slice(part);
1826 }
1827 let conv = ConversionConfig {
1828 autocrlf: AutoCrlf::True,
1829 eol: CoreEol::Lf,
1830 safecrlf: SafeCrlf::False,
1831 };
1832 let attrs = FileAttrs::default();
1833 let out = convert_to_worktree_eager(&blob, "mixed", &conv, &attrs, None, None).unwrap();
1834 assert_eq!(out, blob);
1835 }
1836
1837 #[test]
1838 fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
1839 let blob = b"a\nb\n";
1840 let conv = ConversionConfig {
1841 autocrlf: AutoCrlf::True,
1842 eol: CoreEol::Lf,
1843 safecrlf: SafeCrlf::False,
1844 };
1845 let attrs = FileAttrs::default();
1846 let out = convert_to_worktree_eager(blob, "x", &conv, &attrs, None, None).unwrap();
1847 assert_eq!(out, b"a\r\nb\r\n");
1848 }
1849
1850 #[test]
1851 fn test_is_binary() {
1852 assert!(is_binary(b"hello\0world"));
1853 assert!(!is_binary(b"hello world"));
1854 }
1855
1856 #[test]
1857 fn attr_dir_only_pattern_does_not_match_same_named_file() {
1858 let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
1859 let rule = &rules[0];
1860 assert!(rule.must_be_dir);
1861 assert!(rule.basename_only);
1862 assert!(!attr_rule_matches(
1863 rule,
1864 "not-ignored-dir/ignored-only-if-dir",
1865 false
1866 ));
1867 assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
1868 }
1869
1870 #[test]
1871 fn test_expand_collapse_ident() {
1872 let data = b"$Id$";
1873 let expanded = expand_ident(data, "abc123");
1874 assert_eq!(expanded, b"$Id: abc123 $");
1875 let collapsed = collapse_ident(&expanded);
1876 assert_eq!(collapsed, b"$Id$");
1877 }
1878
1879 #[test]
1880 fn expand_ident_does_not_span_lines_for_partial_keyword() {
1881 let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
1882 let expanded = expand_ident(data, "newoid");
1883 assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
1884 }
1885
1886 #[test]
1887 fn expand_ident_preserves_foreign_id_with_internal_spaces() {
1888 let data = b"$Id: Foreign Commit With Spaces $\n";
1889 let expanded = expand_ident(data, "abc");
1890 assert_eq!(expanded, data);
1891 }
1892
1893 #[test]
1894 fn expand_filter_command_percent_f_quotes_path() {
1895 let s = expand_filter_command("sh ./x.sh %f --extra", "name with 'sq'");
1896 assert_eq!(s, "sh ./x.sh 'name with '\\''sq'\\''' --extra");
1897 assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
1898 }
1899}