1use std::path::{Path, PathBuf};
18use std::process::{Command, Stdio};
19
20use encoding_rs::UTF_8;
21
22use crate::config::ConfigSet;
23use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum AutoCrlf {
28 True,
29 Input,
30 False,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum CoreEol {
36 Lf,
37 Crlf,
38 Native,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum SafeCrlf {
44 True,
45 Warn,
46 False,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum TextAttr {
52 Set,
54 Auto,
56 Unset,
58 Unspecified,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum EolAttr {
65 Lf,
66 Crlf,
67 Unspecified,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72pub enum CrlfLegacyAttr {
73 #[default]
74 Unspecified,
75 Unset,
77 Input,
79 Crlf,
81}
82
83#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum MergeAttr {
86 Unspecified,
88 Unset,
90 Driver(String),
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum DiffAttr {
97 Unspecified,
99 Unset,
101 Driver(String),
103}
104
105#[derive(Debug, Clone)]
107pub struct FileAttrs {
108 pub text: TextAttr,
109 pub eol: EolAttr,
110 pub diff_attr: DiffAttr,
112 pub export_ignore: bool,
114 pub export_subst: bool,
116 pub filter_clean: Option<String>,
117 pub filter_smudge: Option<String>,
118 pub filter_process: Option<String>,
120 pub filter_driver_name: Option<String>,
122 pub filter_smudge_required: bool,
124 pub filter_clean_required: bool,
126 pub ident: bool,
127 pub merge: MergeAttr,
128 pub conflict_marker_size: Option<String>,
129 pub working_tree_encoding: Option<String>,
131 pub crlf_legacy: CrlfLegacyAttr,
133 pub whitespace: Option<String>,
136}
137
138impl Default for FileAttrs {
139 fn default() -> Self {
140 FileAttrs {
141 text: TextAttr::Unspecified,
142 eol: EolAttr::Unspecified,
143 diff_attr: DiffAttr::Unspecified,
144 export_ignore: false,
145 export_subst: false,
146 filter_clean: None,
147 filter_smudge: None,
148 filter_process: None,
149 filter_driver_name: None,
150 filter_smudge_required: false,
151 filter_clean_required: false,
152 ident: false,
153 merge: MergeAttr::Unspecified,
154 conflict_marker_size: None,
155 working_tree_encoding: None,
156 crlf_legacy: CrlfLegacyAttr::Unspecified,
157 whitespace: None,
158 }
159 }
160}
161
162#[derive(Debug, Clone)]
164pub struct ConversionConfig {
165 pub autocrlf: AutoCrlf,
166 pub eol: CoreEol,
167 pub safecrlf: SafeCrlf,
168}
169
170impl ConversionConfig {
171 pub fn from_config(config: &ConfigSet) -> Self {
173 let autocrlf = match config.get("core.autocrlf") {
174 Some(v) => match v.to_lowercase().as_str() {
175 "true" | "yes" | "on" | "1" => AutoCrlf::True,
176 "input" => AutoCrlf::Input,
177 _ => AutoCrlf::False,
178 },
179 None => AutoCrlf::False,
180 };
181
182 let eol = match config.get("core.eol") {
183 Some(v) => match v.to_lowercase().as_str() {
184 "crlf" => CoreEol::Crlf,
185 "lf" => CoreEol::Lf,
186 "native" => CoreEol::Native,
187 _ => CoreEol::Native,
188 },
189 None => CoreEol::Native,
190 };
191
192 let safecrlf = match config.get("core.safecrlf") {
193 Some(v) => match v.to_lowercase().as_str() {
194 "true" | "yes" | "on" | "1" => SafeCrlf::True,
195 "warn" => SafeCrlf::Warn,
196 _ => SafeCrlf::False,
197 },
198 None => SafeCrlf::Warn,
200 };
201
202 ConversionConfig {
203 autocrlf,
204 eol,
205 safecrlf,
206 }
207 }
208}
209
210#[derive(Debug, Clone)]
212pub struct AttrRule {
213 pattern: String,
215 must_be_dir: bool,
217 basename_only: bool,
219 attrs: Vec<(String, String)>, }
221
222impl AttrRule {
223 pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
225 self.attrs.iter().filter_map(|(name, value)| {
226 if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
227 Some(value.as_str())
228 } else {
229 None
230 }
231 })
232 }
233}
234
235pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
237 let mut rules = Vec::new();
238
239 let root_attrs = work_tree.join(".gitattributes");
240 if let Ok(content) = std::fs::read_to_string(&root_attrs) {
241 parse_gitattributes(&content, &mut rules);
242 }
243
244 let info_attrs = work_tree.join(".git/info/attributes");
245 if let Ok(content) = std::fs::read_to_string(&info_attrs) {
246 parse_gitattributes(&content, &mut rules);
247 }
248
249 rules
250}
251
252#[must_use]
257pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
258 let mut rules = Vec::new();
259 parse_gitattributes(content, &mut rules);
260 rules
261}
262
263pub fn load_gitattributes_from_index(
266 index: &crate::index::Index,
267 odb: &crate::odb::Odb,
268) -> Vec<AttrRule> {
269 let mut rules = Vec::new();
270
271 if let Some(entry) = index.get(b".gitattributes", 0) {
273 if let Ok(obj) = odb.read(&entry.oid) {
274 if let Ok(content) = String::from_utf8(obj.data) {
275 parse_gitattributes(&content, &mut rules);
276 }
277 }
278 }
279
280 rules
281}
282
283pub fn load_gitattributes_for_checkout(
289 work_tree: &Path,
290 rel_path: &str,
291 index: &crate::index::Index,
292 odb: &crate::odb::Odb,
293) -> Vec<AttrRule> {
294 let mut rules = load_gitattributes(work_tree);
295
296 if !work_tree.join(".gitattributes").exists() {
299 if let Some(entry) = index.get(b".gitattributes", 0) {
300 if let Ok(obj) = odb.read(&entry.oid) {
301 if let Ok(content) = String::from_utf8(obj.data) {
302 parse_gitattributes(&content, &mut rules);
303 }
304 }
305 }
306 }
307
308 let path = Path::new(rel_path);
309 if let Some(parent) = path.parent() {
310 let mut accum = PathBuf::new();
311 for comp in parent.components() {
312 accum.push(comp);
313 let ga_rel = accum.join(".gitattributes");
314 let wt_ga = work_tree.join(&ga_rel);
315 if let Ok(content) = std::fs::read_to_string(&wt_ga) {
316 parse_gitattributes(&content, &mut rules);
317 } else {
318 let key = path_to_index_bytes(&ga_rel);
319 if let Some(entry) = index.get(&key, 0) {
320 if let Ok(obj) = odb.read(&entry.oid) {
321 if let Ok(content) = String::from_utf8(obj.data) {
322 parse_gitattributes(&content, &mut rules);
323 }
324 }
325 }
326 }
327 }
328 }
329
330 rules
331}
332
333fn path_to_index_bytes(path: &Path) -> Vec<u8> {
334 use std::os::unix::ffi::OsStrExt;
335 path.as_os_str().as_bytes().to_vec()
336}
337
338fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
339 for line in content.lines() {
340 let line = line.trim();
341 if line.is_empty() || line.starts_with('#') {
342 continue;
343 }
344
345 let mut parts = line.split_whitespace();
346 let raw_pattern = match parts.next() {
347 Some(p) => p,
348 None => continue,
349 };
350
351 let mut pat = raw_pattern.to_owned();
352 let mut must_be_dir = false;
353 if pat.ends_with('/') && pat.len() > 1 {
354 pat.pop();
355 must_be_dir = true;
356 }
357 let basename_only = !pat.contains('/');
358
359 let mut attrs = Vec::new();
360 for part in parts {
361 if part == "binary" {
362 attrs.push(("text".to_owned(), "unset".to_owned()));
363 attrs.push(("diff".to_owned(), "unset".to_owned()));
364 } else if let Some(rest) = part.strip_prefix('-') {
365 attrs.push((rest.to_owned(), "unset".to_owned()));
366 } else if let Some((key, val)) = part.split_once('=') {
367 attrs.push((key.to_owned(), val.to_owned()));
368 } else {
369 attrs.push((part.to_owned(), "set".to_owned()));
370 }
371 }
372
373 if !attrs.is_empty() {
374 rules.push(AttrRule {
375 pattern: pat,
376 must_be_dir,
377 basename_only,
378 attrs,
379 });
380 }
381 }
382}
383
384fn config_bool_truthy(value: &str) -> bool {
385 matches!(
386 value.trim().to_ascii_lowercase().as_str(),
387 "true" | "yes" | "on" | "1"
388 )
389}
390
391pub fn get_file_attrs(
396 rules: &[AttrRule],
397 rel_path: &str,
398 is_dir: bool,
399 config: &ConfigSet,
400) -> FileAttrs {
401 let mut fa = FileAttrs::default();
402
403 for rule in rules {
405 if attr_rule_matches(rule, rel_path, is_dir) {
406 for (name, value) in &rule.attrs {
407 match name.as_str() {
408 "text" => {
409 fa.text = match value.as_str() {
410 "set" => TextAttr::Set,
411 "unset" => TextAttr::Unset,
412 "auto" => TextAttr::Auto,
413 _ => TextAttr::Unspecified,
414 };
415 }
416 "eol" => {
417 fa.eol = match value.as_str() {
418 "lf" => EolAttr::Lf,
419 "crlf" => EolAttr::Crlf,
420 _ => EolAttr::Unspecified,
421 };
422 }
423 "filter" => {
424 if value == "unset" {
425 fa.filter_clean = None;
426 fa.filter_smudge = None;
427 fa.filter_process = None;
428 fa.filter_driver_name = None;
429 fa.filter_smudge_required = false;
430 fa.filter_clean_required = false;
431 } else {
432 let clean_key = format!("filter.{value}.clean");
433 let smudge_key = format!("filter.{value}.smudge");
434 let process_key = format!("filter.{value}.process");
435 let req_key = format!("filter.{value}.required");
436 fa.filter_driver_name = Some(value.clone());
437 fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
438 if fa.filter_process.is_some() {
439 fa.filter_clean = None;
440 fa.filter_smudge = None;
441 } else {
442 fa.filter_clean = config.get(&clean_key);
443 fa.filter_smudge = config.get(&smudge_key);
444 }
445 let required =
446 config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
447 fa.filter_smudge_required = required;
448 fa.filter_clean_required = required;
449 }
450 }
451 "diff" => {
452 if value == "unset" {
453 fa.diff_attr = DiffAttr::Unset;
454 } else if !value.is_empty() && value != "set" {
455 fa.diff_attr = DiffAttr::Driver(value.clone());
456 }
457 }
458 "ident" => {
459 fa.ident = value == "set";
460 }
461 "export-ignore" => {
462 fa.export_ignore = value != "unset";
463 }
464 "export-subst" => {
465 fa.export_subst = value != "unset";
466 }
467 "merge" => {
468 fa.merge = match value.as_str() {
469 "unset" => MergeAttr::Unset,
470 "set" => MergeAttr::Unspecified,
471 other => MergeAttr::Driver(other.to_string()),
472 };
473 }
474 "conflict-marker-size" => {
475 if value == "unset" {
476 fa.conflict_marker_size = None;
477 } else {
478 fa.conflict_marker_size = Some(value.clone());
479 }
480 }
481 "working-tree-encoding" => {
482 if value != "unset" && !value.is_empty() {
483 fa.working_tree_encoding = Some(value.clone());
484 }
485 }
486 "crlf" => {
487 fa.crlf_legacy = match value.as_str() {
488 "unset" => CrlfLegacyAttr::Unset,
489 "input" => CrlfLegacyAttr::Input,
490 "set" => CrlfLegacyAttr::Crlf,
491 _ => CrlfLegacyAttr::Unspecified,
492 };
493 }
494 "whitespace" => {
495 if value == "unset" {
496 fa.whitespace = Some("unset".to_owned());
497 } else if !value.is_empty() {
498 fa.whitespace = Some(value.clone());
499 }
500 }
501 _ => {}
502 }
503 }
504 }
505 }
506
507 fa
508}
509
510#[must_use]
515pub fn path_has_gitattribute(
516 rules: &[AttrRule],
517 path: &str,
518 is_dir: bool,
519 attr_name: &str,
520) -> bool {
521 let mut last: Option<&str> = None;
522 for rule in rules {
523 if attr_rule_matches(rule, path, is_dir) {
524 for (name, value) in &rule.attrs {
525 if name == attr_name {
526 last = Some(value.as_str());
527 }
528 }
529 }
530 }
531 match last {
532 None | Some("unset") => false,
533 Some(_) => true,
534 }
535}
536
537#[must_use]
539pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
540 let path_is_dir = is_dir || rel_path.ends_with('/');
541 if rule.must_be_dir && !path_is_dir {
542 return false;
543 }
544 let path_for_glob = rel_path.trim_end_matches('/');
545 if rule.basename_only {
546 let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
547 glob_matches(rule.pattern.as_str(), basename)
548 } else {
549 glob_matches(rule.pattern.as_str(), path_for_glob)
550 }
551}
552
553fn glob_matches(pattern: &str, text: &str) -> bool {
554 glob_match_bytes(pattern.as_bytes(), text.as_bytes())
555}
556
557fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
558 match (pat.first(), text.first()) {
559 (None, None) => true,
560 (Some(&b'*'), _) => {
561 let pat_rest = pat
562 .iter()
563 .position(|&b| b != b'*')
564 .map_or(&pat[pat.len()..], |i| &pat[i..]);
565 if pat_rest.is_empty() {
566 return true;
567 }
568 for i in 0..=text.len() {
569 if glob_match_bytes(pat_rest, &text[i..]) {
570 return true;
571 }
572 }
573 false
574 }
575 (Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
576 (Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
577 _ => false,
578 }
579}
580
581pub fn is_binary(data: &[u8]) -> bool {
583 let check_len = data.len().min(8000);
584 data[..check_len].contains(&0)
585}
586
587const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
589const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
590const CONVERT_STAT_BITS_BIN: u32 = 0x4;
591
592#[derive(Default, Clone)]
593struct TextStat {
594 nul: u32,
595 lonecr: u32,
596 lonelf: u32,
597 crlf: u32,
598 printable: u32,
599 nonprintable: u32,
600}
601
602fn gather_text_stat(data: &[u8]) -> TextStat {
603 let mut s = TextStat::default();
604 let mut i = 0usize;
605 while i < data.len() {
606 let c = data[i];
607 if c == b'\r' {
608 if i + 1 < data.len() && data[i + 1] == b'\n' {
609 s.crlf += 1;
610 i += 2;
611 } else {
612 s.lonecr += 1;
613 i += 1;
614 }
615 continue;
616 }
617 if c == b'\n' {
618 s.lonelf += 1;
619 i += 1;
620 continue;
621 }
622 if c == 127 {
623 s.nonprintable += 1;
624 } else if c < 32 {
625 match c {
626 b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
627 0 => {
628 s.nul += 1;
629 s.nonprintable += 1;
630 }
631 _ => s.nonprintable += 1,
632 }
633 } else {
634 s.printable += 1;
635 }
636 i += 1;
637 }
638 s
639}
640
641fn convert_is_binary(stats: &TextStat) -> bool {
642 stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
643}
644
645fn git_text_stat(data: &[u8]) -> TextStat {
646 let mut stats = gather_text_stat(data);
647 if !data.is_empty() && data[data.len() - 1] == 0x1a {
648 stats.nonprintable = stats.nonprintable.saturating_sub(1);
649 }
650 stats
651}
652
653fn will_convert_lf_to_crlf_from_stats(
655 stats: &TextStat,
656 conv: &ConversionConfig,
657 attrs: &FileAttrs,
658) -> bool {
659 let has_lone_lf = stats.lonelf > 0;
660 let is_bin = convert_is_binary(stats);
661
662 match attrs.crlf_legacy {
663 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
664 CrlfLegacyAttr::Crlf => {
665 if attrs.text == TextAttr::Unset {
666 return false;
667 }
668 return has_lone_lf;
669 }
670 CrlfLegacyAttr::Unspecified => {}
671 }
672
673 if attrs.text == TextAttr::Unset {
674 return false;
675 }
676
677 if attrs.eol != EolAttr::Unspecified {
678 if attrs.text == TextAttr::Auto && is_bin {
679 return false;
680 }
681 if attrs.eol != EolAttr::Crlf {
682 return false;
683 }
684 if attrs.text == TextAttr::Auto {
685 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
686 }
687 return has_lone_lf;
688 }
689
690 if attrs.text == TextAttr::Set {
691 if !output_eol_is_crlf(conv) {
692 return false;
693 }
694 return has_lone_lf;
695 }
696
697 if attrs.text == TextAttr::Auto {
698 if is_bin || !output_eol_is_crlf(conv) {
699 return false;
700 }
701 return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
702 }
703
704 match conv.autocrlf {
705 AutoCrlf::True => {
706 if is_bin {
707 return false;
708 }
709 auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
710 }
711 AutoCrlf::Input | AutoCrlf::False => false,
712 }
713}
714
715fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
716 if stats.lonelf == 0 {
717 return false;
718 }
719 if stats.lonecr > 0 || stats.crlf > 0 {
720 return false;
721 }
722 !convert_is_binary(stats)
723}
724
725fn gather_convert_stats(data: &[u8]) -> u32 {
726 if data.is_empty() {
727 return 0;
728 }
729 let mut stats = gather_text_stat(data);
730 if !data.is_empty() && data[data.len() - 1] == 0x1a {
731 stats.nonprintable = stats.nonprintable.saturating_sub(1);
732 }
733 let mut ret = 0u32;
734 if convert_is_binary(&stats) {
735 ret |= CONVERT_STAT_BITS_BIN;
736 }
737 if stats.crlf > 0 {
738 ret |= CONVERT_STAT_BITS_TXT_CRLF;
739 }
740 if stats.lonelf > 0 {
741 ret |= CONVERT_STAT_BITS_TXT_LF;
742 }
743 ret
744}
745
746#[must_use]
748pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
749 let convert_stats = gather_convert_stats(data);
750 if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
751 return "-text";
752 }
753 match convert_stats {
754 CONVERT_STAT_BITS_TXT_LF => "lf",
755 CONVERT_STAT_BITS_TXT_CRLF => "crlf",
756 x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
757 _ => "none",
758 }
759}
760
761#[must_use]
764pub fn convert_attr_ascii_for_ls_files(
765 rules: &[AttrRule],
766 rel_path: &str,
767 config: &ConfigSet,
768) -> String {
769 let fa = get_file_attrs(rules, rel_path, false, config);
770 let mut action = match fa.text {
772 TextAttr::Set => 1, TextAttr::Unset => 2, TextAttr::Auto => 5, TextAttr::Unspecified => 0,
776 };
777 if action == 0 {
778 action = match fa.crlf_legacy {
779 CrlfLegacyAttr::Crlf => 1,
780 CrlfLegacyAttr::Unset => 2,
781 CrlfLegacyAttr::Input => 3, CrlfLegacyAttr::Unspecified => 0,
783 };
784 }
785 if action == 2 {
786 return "-text".to_string();
787 }
788 if action == 0 {
790 if fa.eol == EolAttr::Unspecified {
791 return String::new();
792 }
793 action = 1; }
795
796 if fa.eol == EolAttr::Lf {
798 if action == 5 {
799 action = 7; } else {
801 action = 3; }
803 } else if fa.eol == EolAttr::Crlf {
804 if action == 5 {
805 action = 6; } else {
807 action = 4; }
809 }
810
811 let attr_action = action;
813
814 match attr_action {
815 1 => "text".to_string(),
816 3 => "text eol=lf".to_string(),
817 4 => "text eol=crlf".to_string(),
818 5 => "text=auto".to_string(),
819 6 => "text=auto eol=crlf".to_string(),
820 7 => "text=auto eol=lf".to_string(),
821 _ => String::new(),
822 }
823}
824
825pub fn has_crlf(data: &[u8]) -> bool {
827 data.windows(2).any(|w| w == b"\r\n")
828}
829
830pub fn has_lone_lf(data: &[u8]) -> bool {
832 for i in 0..data.len() {
833 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
834 return true;
835 }
836 }
837 false
838}
839
840fn has_lone_cr(data: &[u8]) -> bool {
842 for i in 0..data.len() {
843 if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
844 return true;
845 }
846 }
847 false
848}
849
850fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
853 if !has_lone_lf(data) {
854 return false;
855 }
856 if has_lone_cr(data) || has_crlf(data) {
857 return false;
858 }
859 if is_binary(data) {
860 return false;
861 }
862 true
863}
864
865pub fn is_all_crlf(data: &[u8]) -> bool {
867 has_crlf(data) && !has_lone_lf(data)
868}
869
870pub fn is_all_lf(data: &[u8]) -> bool {
872 has_lone_lf(data) && !has_crlf(data)
873}
874
875#[must_use]
877pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
878 if !data.contains(&b'\r') {
879 return false;
880 }
881 let st = gather_convert_stats(data);
882 st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
883}
884
885#[must_use]
889pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
890 if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
891 return false;
892 }
893 if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
894 return false;
895 }
896 attrs.text == TextAttr::Auto
897 || (attrs.text == TextAttr::Unspecified
898 && matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
899}
900
901#[derive(Debug, Clone, Copy)]
903pub struct ConvertToGitOpts<'a> {
904 pub index_blob: Option<&'a [u8]>,
906 pub renormalize: bool,
908 pub check_safecrlf: bool,
910}
911
912impl Default for ConvertToGitOpts<'_> {
913 fn default() -> Self {
914 Self {
915 index_blob: None,
916 renormalize: false,
917 check_safecrlf: true,
918 }
919 }
920}
921
922fn utf16_scalar_iter_to_le_bytes(chars: impl Iterator<Item = u16>) -> Vec<u8> {
927 let mut out = Vec::new();
928 for u in chars {
929 out.extend_from_slice(&u.to_le_bytes());
930 }
931 out
932}
933
934fn utf16_scalar_iter_to_be_bytes(chars: impl Iterator<Item = u16>) -> Vec<u8> {
935 let mut out = Vec::new();
936 for u in chars {
937 out.extend_from_slice(&u.to_be_bytes());
938 }
939 out
940}
941
942fn utf32_chars_to_be_bytes(s: &str) -> Vec<u8> {
943 let mut out = Vec::new();
944 for ch in s.chars() {
945 out.extend_from_slice(&(ch as u32).to_be_bytes());
946 }
947 out
948}
949
950fn utf32_chars_to_le_bytes(s: &str) -> Vec<u8> {
951 let mut out = Vec::new();
952 for ch in s.chars() {
953 out.extend_from_slice(&(ch as u32).to_le_bytes());
954 }
955 out
956}
957
958fn decode_utf32_body_to_utf8_bytes(
959 body: &[u8],
960 rel_path: &str,
961 big_endian: bool,
962) -> Result<Vec<u8>, String> {
963 if !body.len().is_multiple_of(4) {
964 return Err(format!(
965 "invalid UTF-32 length for working tree file '{rel_path}'"
966 ));
967 }
968 let mut s = String::new();
969 for chunk in body.chunks_exact(4) {
970 let cp = if big_endian {
971 u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
972 } else {
973 u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
974 };
975 let Some(ch) = char::from_u32(cp) else {
976 return Err(format!(
977 "invalid UTF-32 scalar U+{cp:X} in working tree file '{rel_path}'"
978 ));
979 };
980 s.push(ch);
981 }
982 Ok(s.into_bytes())
983}
984
985fn decode_working_tree_bytes_to_utf8(
986 src: &[u8],
987 rel_path: &str,
988 enc_label: &str,
989) -> Result<Vec<u8>, String> {
990 let label = enc_label.trim();
991 if label.is_empty() {
992 return Ok(src.to_vec());
993 }
994 let lower = label.replace('_', "-").to_ascii_lowercase();
995
996 let (cow, _used_enc, had_errors) = match lower.as_str() {
997 "utf-16le-bom" => {
998 let body = if src.len() >= 2 && src.starts_with(&[0xFF, 0xFE]) {
999 &src[2..]
1000 } else {
1001 src
1002 };
1003 encoding_rs::UTF_16LE.decode(body)
1004 }
1005 "utf-16" => {
1007 if src.len() >= 2 && src.starts_with(&[0xFE, 0xFF]) {
1008 encoding_rs::UTF_16BE.decode(&src[2..])
1009 } else if src.len() >= 2 && src.starts_with(&[0xFF, 0xFE]) {
1010 encoding_rs::UTF_16LE.decode(&src[2..])
1011 } else {
1012 return Err(format!(
1013 "missing byte order mark for UTF-16 working tree file '{rel_path}'"
1014 ));
1015 }
1016 }
1017 "utf-16be" => encoding_rs::UTF_16BE.decode(src),
1018 "utf-16le" => encoding_rs::UTF_16LE.decode(src),
1019 "utf-32" => {
1020 let (body, big_endian) = if src.len() >= 4 && src.starts_with(&[0, 0, 0xFE, 0xFF]) {
1021 (&src[4..], true)
1022 } else if src.len() >= 4 && src.starts_with(&[0xFF, 0xFE, 0, 0]) {
1023 (&src[4..], false)
1024 } else {
1025 return Err(format!(
1026 "missing byte order mark for UTF-32 working tree file '{rel_path}'"
1027 ));
1028 };
1029 return decode_utf32_body_to_utf8_bytes(body, rel_path, big_endian);
1030 }
1031 "utf-32be" => return decode_utf32_body_to_utf8_bytes(src, rel_path, true),
1032 "utf-32le" => return decode_utf32_body_to_utf8_bytes(src, rel_path, false),
1033 _ => {
1034 let Some(enc) = crate::commit_encoding::resolve(label) else {
1035 return Err(format!(
1036 "unknown working-tree-encoding '{label}' for '{rel_path}'"
1037 ));
1038 };
1039 if enc == UTF_8 {
1040 return Ok(src.to_vec());
1041 }
1042 enc.decode(src)
1043 }
1044 };
1045
1046 if had_errors {
1047 return Err(format!(
1048 "failed to decode '{rel_path}' from working-tree-encoding {label}"
1049 ));
1050 }
1051 Ok(cow.into_owned().into_bytes())
1052}
1053
1054fn encode_utf8_blob_to_working_tree_bytes(
1055 src: &[u8],
1056 rel_path: &str,
1057 enc_label: &str,
1058) -> Result<Vec<u8>, String> {
1059 let label = enc_label.trim();
1060 if label.is_empty() {
1061 return Ok(src.to_vec());
1062 }
1063 let s = std::str::from_utf8(src).map_err(|_| {
1064 format!("failed to encode '{rel_path}' from UTF-8: blob is not valid UTF-8")
1065 })?;
1066 let lower = label.replace('_', "-").to_ascii_lowercase();
1067
1068 match lower.as_str() {
1069 "utf-16le-bom" => {
1070 let mut out = vec![0xFF_u8, 0xFE_u8];
1071 out.extend(utf16_scalar_iter_to_le_bytes(s.encode_utf16()));
1072 Ok(out)
1073 }
1074 "utf-16" => {
1077 let mut out = vec![0xFF_u8, 0xFE_u8];
1078 out.extend(utf16_scalar_iter_to_le_bytes(s.encode_utf16()));
1079 Ok(out)
1080 }
1081 "utf-16be" => {
1082 let mut out = vec![0xFE_u8, 0xFF_u8];
1083 out.extend(utf16_scalar_iter_to_be_bytes(s.encode_utf16()));
1084 Ok(out)
1085 }
1086 "utf-16le" => Ok(utf16_scalar_iter_to_le_bytes(s.encode_utf16())),
1087 "utf-32" | "utf-32be" => {
1088 let mut out = vec![0_u8, 0_u8, 0xFE_u8, 0xFF_u8];
1089 out.extend(utf32_chars_to_be_bytes(s));
1090 Ok(out)
1091 }
1092 "utf-32le" => {
1093 let mut out = vec![0xFF_u8, 0xFE_u8, 0_u8, 0_u8];
1094 out.extend(utf32_chars_to_le_bytes(s));
1095 Ok(out)
1096 }
1097 _ => {
1098 let Some(enc) = crate::commit_encoding::resolve(label) else {
1099 return Err(format!(
1100 "unknown working-tree-encoding '{label}' for '{rel_path}'"
1101 ));
1102 };
1103 if enc == UTF_8 {
1104 return Ok(src.to_vec());
1105 }
1106 let (cow, _, had_errors) = enc.encode(s);
1107 if had_errors {
1108 return Err(format!(
1109 "failed to encode '{rel_path}' from UTF-8 to {label}"
1110 ));
1111 }
1112 Ok(cow.into_owned())
1113 }
1114 }
1115}
1116
1117pub fn convert_to_git(
1130 data: &[u8],
1131 rel_path: &str,
1132 conv: &ConversionConfig,
1133 file_attrs: &FileAttrs,
1134) -> Result<Vec<u8>, String> {
1135 convert_to_git_with_opts(
1136 data,
1137 rel_path,
1138 conv,
1139 file_attrs,
1140 ConvertToGitOpts::default(),
1141 )
1142}
1143
1144pub fn convert_to_git_with_opts(
1146 data: &[u8],
1147 rel_path: &str,
1148 conv: &ConversionConfig,
1149 file_attrs: &FileAttrs,
1150 opts: ConvertToGitOpts<'_>,
1151) -> Result<Vec<u8>, String> {
1152 let mut buf = data.to_vec();
1153
1154 if let Some(ref proc_cmd) = file_attrs.filter_process {
1156 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1157 buf = apply_process_clean(proc_cmd, rel_path, &buf).map_err(|_e| {
1158 if file_attrs.filter_clean_required {
1159 format!("fatal: {rel_path}: clean filter '{name}' failed")
1160 } else {
1161 format!("clean filter failed: {_e}")
1162 }
1163 })?;
1164 } else {
1165 match file_attrs.filter_clean.as_ref() {
1166 Some(clean_cmd) => {
1167 buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
1168 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1169 if file_attrs.filter_clean_required {
1170 format!("fatal: {rel_path}: clean filter '{name}' failed")
1171 } else {
1172 format!("clean filter failed: {e}")
1173 }
1174 })?;
1175 }
1176 None => {
1177 if file_attrs.filter_clean_required {
1178 let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
1179 return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
1180 }
1181 }
1182 }
1183 }
1184
1185 if let Some(ref enc) = file_attrs.working_tree_encoding {
1187 buf = decode_working_tree_bytes_to_utf8(&buf, rel_path, enc)?;
1188 }
1189
1190 let would_convert = would_convert_on_input(conv, file_attrs, &buf);
1192
1193 let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
1194 if convert_crlf_into_lf
1195 && clean_uses_autocrlf_index_guard(file_attrs, conv)
1196 && !opts.renormalize
1197 && opts.index_blob.is_some_and(has_crlf_in_index_blob)
1198 {
1199 convert_crlf_into_lf = false;
1200 }
1201
1202 if would_convert && opts.check_safecrlf {
1204 check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
1205 }
1206
1207 if convert_crlf_into_lf {
1209 buf = crlf_to_lf(&buf);
1210 }
1211
1212 Ok(buf)
1213}
1214
1215fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1219 match attrs.crlf_legacy {
1220 CrlfLegacyAttr::Unset => return false,
1221 CrlfLegacyAttr::Input => {
1222 if is_binary(data) {
1223 return false;
1224 }
1225 return true;
1226 }
1227 CrlfLegacyAttr::Crlf => {
1228 if attrs.text == TextAttr::Unset {
1229 return false;
1230 }
1231 if is_binary(data) {
1232 return false;
1233 }
1234 return true;
1235 }
1236 CrlfLegacyAttr::Unspecified => {}
1237 }
1238
1239 if attrs.text == TextAttr::Unset {
1241 return false;
1242 }
1243
1244 if attrs.eol != EolAttr::Unspecified {
1246 if attrs.text == TextAttr::Auto && is_binary(data) {
1247 return false;
1248 }
1249 return true;
1250 }
1251
1252 if attrs.text == TextAttr::Set {
1254 return true;
1255 }
1256
1257 if attrs.text == TextAttr::Auto {
1258 if is_binary(data) {
1259 return false;
1260 }
1261 return true;
1262 }
1263
1264 match conv.autocrlf {
1266 AutoCrlf::True | AutoCrlf::Input => {
1267 if is_binary(data) {
1268 return false;
1269 }
1270 true
1271 }
1272 AutoCrlf::False => false,
1273 }
1274}
1275
1276fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
1278 eprintln!(
1279 "warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
1280 );
1281}
1282
1283fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
1285 eprintln!(
1286 "warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
1287 );
1288}
1289
1290fn check_safecrlf_roundtrip(
1292 conv: &ConversionConfig,
1293 file_attrs: &FileAttrs,
1294 data: &[u8],
1295 rel_path: &str,
1296 convert_crlf_into_lf: bool,
1297) -> Result<(), String> {
1298 if conv.safecrlf == SafeCrlf::False {
1299 return Ok(());
1300 }
1301
1302 let old_stats = git_text_stat(data);
1303
1304 let mut new_stats = old_stats.clone();
1305 if convert_crlf_into_lf && new_stats.crlf > 0 {
1306 new_stats.lonelf += new_stats.crlf;
1307 new_stats.crlf = 0;
1308 }
1309 if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
1310 new_stats.crlf += new_stats.lonelf;
1311 new_stats.lonelf = 0;
1312 }
1313
1314 if old_stats.crlf > 0 && new_stats.crlf == 0 {
1315 let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
1316 if conv.safecrlf == SafeCrlf::True {
1317 return Err(msg);
1318 }
1319 eprint_safecrlf_warn_crlf_to_lf(rel_path);
1320 } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
1321 let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
1322 if conv.safecrlf == SafeCrlf::True {
1323 return Err(msg);
1324 }
1325 eprint_safecrlf_warn_lf_to_crlf(rel_path);
1326 }
1327
1328 Ok(())
1329}
1330
1331pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
1333 let mut out = Vec::with_capacity(data.len());
1334 let mut i = 0;
1335 while i < data.len() {
1336 if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
1337 out.push(b'\n');
1338 i += 2;
1339 } else {
1340 out.push(data[i]);
1341 i += 1;
1342 }
1343 }
1344 out
1345}
1346
1347pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
1349 let mut out = Vec::with_capacity(data.len() + data.len() / 10);
1350 let mut i = 0;
1351 while i < data.len() {
1352 if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
1353 out.push(b'\r');
1354 out.push(b'\n');
1355 } else {
1356 out.push(data[i]);
1357 }
1358 i += 1;
1359 }
1360 out
1361}
1362
1363pub fn convert_to_worktree(
1379 data: &[u8],
1380 rel_path: &str,
1381 conv: &ConversionConfig,
1382 file_attrs: &FileAttrs,
1383 oid_hex: Option<&str>,
1384 smudge_meta: Option<&FilterSmudgeMeta>,
1385 delayed_checkout: Option<&mut crate::filter_process::DelayedProcessCheckout>,
1386) -> Result<Option<Vec<u8>>, String> {
1387 let mut buf = data.to_vec();
1388
1389 if file_attrs.ident {
1391 if let Some(oid) = oid_hex {
1392 buf = expand_ident(&buf, oid);
1393 }
1394 }
1395
1396 let can_delay_smudge = delayed_checkout.is_some()
1397 && file_attrs.working_tree_encoding.is_none()
1398 && !file_attrs.ident
1399 && file_attrs
1400 .filter_process
1401 .as_deref()
1402 .is_some_and(|c| !c.is_empty())
1403 && !should_convert_to_crlf(conv, file_attrs, &buf)
1404 && file_attrs
1405 .filter_process
1406 .as_deref()
1407 .is_some_and(crate::filter_process::process_filter_supports_delay);
1408
1409 let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
1411 if should_convert {
1412 buf = lf_to_crlf(&buf);
1413 }
1414
1415 if let Some(ref enc) = file_attrs.working_tree_encoding {
1417 buf = encode_utf8_blob_to_working_tree_bytes(&buf, rel_path, enc)?;
1418 }
1419
1420 let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
1422 if let Some(ref proc_cmd) = file_attrs.filter_process {
1423 let smudge_out =
1424 apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta, can_delay_smudge).map_err(
1425 |_e| {
1426 if file_attrs.filter_smudge_required {
1427 format!("fatal: {rel_path}: smudge filter {driver} failed")
1428 } else {
1429 _e
1430 }
1431 },
1432 )?;
1433 let Some(out) = smudge_out else {
1434 let Some(q) = delayed_checkout else {
1435 return Err(format!(
1436 "internal error: delayed smudge without checkout queue for {rel_path}"
1437 ));
1438 };
1439 q.push_delayed(
1440 proc_cmd.clone(),
1441 rel_path.to_string(),
1442 smudge_meta.cloned().unwrap_or_default(),
1443 );
1444 return Ok(None);
1445 };
1446 buf = out;
1447 } else {
1448 match file_attrs.filter_smudge.as_ref() {
1449 Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
1450 Ok(filtered) => buf = filtered,
1451 Err(_e) => {
1452 if file_attrs.filter_smudge_required {
1453 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1454 }
1455 }
1456 },
1457 None => {
1458 if file_attrs.filter_smudge_required {
1459 return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
1460 }
1461 }
1462 }
1463 }
1464
1465 Ok(Some(buf))
1466}
1467
1468#[must_use]
1470pub fn convert_to_worktree_eager(
1471 data: &[u8],
1472 rel_path: &str,
1473 conv: &ConversionConfig,
1474 file_attrs: &FileAttrs,
1475 oid_hex: Option<&str>,
1476 smudge_meta: Option<&FilterSmudgeMeta>,
1477) -> Result<Vec<u8>, String> {
1478 match convert_to_worktree(data, rel_path, conv, file_attrs, oid_hex, smudge_meta, None)? {
1479 Some(v) => Ok(v),
1480 None => Err(format!(
1481 "internal error: unexpected delayed smudge for {rel_path}"
1482 )),
1483 }
1484}
1485
1486#[must_use]
1488pub fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
1489 match attrs.crlf_legacy {
1490 CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
1491 CrlfLegacyAttr::Crlf => {
1492 if attrs.text == TextAttr::Unset {
1493 return false;
1494 }
1495 return true;
1498 }
1499 CrlfLegacyAttr::Unspecified => {}
1500 }
1501
1502 if attrs.text == TextAttr::Unset {
1504 return false;
1505 }
1506
1507 if attrs.eol != EolAttr::Unspecified {
1509 if attrs.text == TextAttr::Auto && is_binary(data) {
1510 return false;
1511 }
1512 if attrs.eol != EolAttr::Crlf {
1513 return false;
1514 }
1515 if attrs.text == TextAttr::Auto {
1517 return auto_crlf_should_smudge_lf_to_crlf(data);
1518 }
1519 return true;
1521 }
1522
1523 if attrs.text == TextAttr::Set {
1525 return output_eol_is_crlf(conv);
1526 }
1527
1528 if attrs.text == TextAttr::Auto {
1529 if is_binary(data) {
1530 return false;
1531 }
1532 if !output_eol_is_crlf(conv) {
1533 return false;
1534 }
1535 return auto_crlf_should_smudge_lf_to_crlf(data);
1536 }
1537
1538 match conv.autocrlf {
1540 AutoCrlf::True => {
1541 if is_binary(data) {
1542 return false;
1543 }
1544 auto_crlf_should_smudge_lf_to_crlf(data)
1545 }
1546 AutoCrlf::Input | AutoCrlf::False => false,
1547 }
1548}
1549
1550fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
1552 if conv.autocrlf == AutoCrlf::Input {
1554 return false;
1555 }
1556 if conv.autocrlf == AutoCrlf::True {
1557 return true;
1558 }
1559 match conv.eol {
1560 CoreEol::Crlf => true,
1561 CoreEol::Lf => false,
1562 CoreEol::Native => {
1563 cfg!(windows)
1565 }
1566 }
1567}
1568
1569fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
1574 if !count_ident_regions(data) {
1575 return data.to_vec();
1576 }
1577 let replacement = format!("$Id: {oid} $");
1578 let mut out = Vec::with_capacity(data.len() + 60);
1579 let mut i = 0;
1580 while i < data.len() {
1581 if data[i] != b'$' {
1582 out.push(data[i]);
1583 i += 1;
1584 continue;
1585 }
1586 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1587 out.push(data[i]);
1588 i += 1;
1589 continue;
1590 }
1591 let after_id = i + 3;
1592 let ch = data.get(after_id).copied();
1593 match ch {
1594 Some(b'$') => {
1595 out.extend_from_slice(replacement.as_bytes());
1596 i = after_id + 1;
1597 }
1598 Some(b':') => {
1599 let rest = &data[after_id + 1..];
1600 let line_end = rest
1601 .iter()
1602 .position(|&b| b == b'\n' || b == b'\r')
1603 .unwrap_or(rest.len());
1604 let line = &rest[..line_end];
1605 let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
1606 out.push(data[i]);
1607 i += 1;
1608 continue;
1609 };
1610 if line[..dollar_rel].contains(&b'\n') {
1611 out.push(data[i]);
1612 i += 1;
1613 continue;
1614 }
1615 let payload = &line[..dollar_rel];
1618 let foreign = payload.len() > 1
1619 && payload[1..]
1620 .iter()
1621 .position(|&b| b == b' ')
1622 .is_some_and(|rel| {
1623 let pos = 1 + rel;
1624 pos < payload.len().saturating_sub(1)
1625 });
1626 if foreign {
1627 out.push(data[i]);
1628 i += 1;
1629 continue;
1630 }
1631 out.extend_from_slice(replacement.as_bytes());
1632 i = after_id + 1 + dollar_rel + 1;
1633 }
1634 _ => {
1635 out.push(data[i]);
1636 i += 1;
1637 }
1638 }
1639 }
1640 out
1641}
1642
1643fn count_ident_regions(data: &[u8]) -> bool {
1645 let mut i = 0usize;
1646 while i < data.len() {
1647 if data[i] != b'$' {
1648 i += 1;
1649 continue;
1650 }
1651 if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
1652 i += 1;
1653 continue;
1654 }
1655 let after = i + 3;
1656 match data.get(after).copied() {
1657 Some(b'$') => return true,
1658 Some(b':') => {
1659 let mut j = after + 1;
1660 let mut found = false;
1661 while j < data.len() {
1662 match data[j] {
1663 b'$' => {
1664 found = true;
1665 break;
1666 }
1667 b'\n' | b'\r' => break,
1668 _ => j += 1,
1669 }
1670 }
1671 if found {
1672 return true;
1673 }
1674 i += 1;
1675 }
1676 _ => i += 1,
1677 }
1678 }
1679 false
1680}
1681
1682pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
1684 let mut out = Vec::with_capacity(data.len());
1685 let mut i = 0;
1686 while i < data.len() {
1687 if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
1688 let rest = &data[i + 4..];
1689 let line_end = rest
1690 .iter()
1691 .position(|&b| b == b'\n' || b == b'\r')
1692 .unwrap_or(rest.len());
1693 let line = &rest[..line_end];
1694 if let Some(end) = line.iter().position(|&b| b == b'$') {
1695 out.extend_from_slice(b"$Id$");
1696 i += 4 + end + 1;
1697 continue;
1698 }
1699 }
1700 out.push(data[i]);
1701 i += 1;
1702 }
1703 out
1704}
1705
1706fn sq_quote_buf(s: &str) -> String {
1708 let mut out = String::with_capacity(s.len() + 2);
1709 out.push('\'');
1710 for ch in s.chars() {
1711 if ch == '\'' {
1712 out.push_str("'\\''");
1713 } else {
1714 out.push(ch);
1715 }
1716 }
1717 out.push('\'');
1718 out
1719}
1720
1721fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
1723 let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
1724 let mut chars = cmd.chars().peekable();
1725 while let Some(c) = chars.next() {
1726 if c == '%' {
1727 match chars.peek() {
1728 Some('%') => {
1729 chars.next();
1730 out.push('%');
1731 }
1732 Some('f') => {
1733 chars.next();
1734 out.push_str(&sq_quote_buf(rel_path));
1735 }
1736 _ => out.push('%'),
1737 }
1738 } else {
1739 out.push(c);
1740 }
1741 }
1742 out
1743}
1744
1745fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
1747 let expanded = expand_filter_command(cmd, rel_path);
1748 let mut child = Command::new("sh")
1749 .arg("-c")
1750 .arg(&expanded)
1751 .stdin(Stdio::piped())
1752 .stdout(Stdio::piped())
1753 .stderr(Stdio::inherit())
1754 .spawn()?;
1755
1756 use std::io::{ErrorKind, Write};
1757 if let Some(ref mut stdin) = child.stdin {
1758 if let Err(e) = stdin.write_all(data) {
1759 if e.kind() != ErrorKind::BrokenPipe {
1761 return Err(e);
1762 }
1763 }
1764 }
1765 drop(child.stdin.take());
1766
1767 let output = child.wait_with_output()?;
1768 if !output.status.success() {
1769 return Err(std::io::Error::other(format!(
1770 "filter command exited with status {}",
1771 output.status
1772 )));
1773 }
1774
1775 Ok(output.stdout)
1776}
1777
1778pub type GitAttributes = Vec<AttrRule>;
1783
1784#[cfg(test)]
1785mod tests {
1786 use super::*;
1787
1788 #[test]
1789 fn test_crlf_to_lf() {
1790 assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
1791 assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
1792 assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
1793 }
1794
1795 #[test]
1796 fn test_lf_to_crlf() {
1797 assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
1798 assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
1799 }
1800
1801 #[test]
1802 fn test_has_crlf() {
1803 assert!(has_crlf(b"hello\r\nworld"));
1804 assert!(!has_crlf(b"hello\nworld"));
1805 }
1806
1807 #[test]
1808 fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
1809 let mut blob = Vec::new();
1810 for part in [
1811 b"Oh\n".as_slice(),
1812 b"here\n",
1813 b"is\n",
1814 b"CRLF\r\n",
1815 b"in\n",
1816 b"text\n",
1817 ] {
1818 blob.extend_from_slice(part);
1819 }
1820 let conv = ConversionConfig {
1821 autocrlf: AutoCrlf::True,
1822 eol: CoreEol::Lf,
1823 safecrlf: SafeCrlf::False,
1824 };
1825 let attrs = FileAttrs::default();
1826 let out = convert_to_worktree_eager(&blob, "mixed", &conv, &attrs, None, None).unwrap();
1827 assert_eq!(out, blob);
1828 }
1829
1830 #[test]
1831 fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
1832 let blob = b"a\nb\n";
1833 let conv = ConversionConfig {
1834 autocrlf: AutoCrlf::True,
1835 eol: CoreEol::Lf,
1836 safecrlf: SafeCrlf::False,
1837 };
1838 let attrs = FileAttrs::default();
1839 let out = convert_to_worktree_eager(blob, "x", &conv, &attrs, None, None).unwrap();
1840 assert_eq!(out, b"a\r\nb\r\n");
1841 }
1842
1843 #[test]
1844 fn test_is_binary() {
1845 assert!(is_binary(b"hello\0world"));
1846 assert!(!is_binary(b"hello world"));
1847 }
1848
1849 #[test]
1850 fn attr_dir_only_pattern_does_not_match_same_named_file() {
1851 let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
1852 let rule = &rules[0];
1853 assert!(rule.must_be_dir);
1854 assert!(rule.basename_only);
1855 assert!(!attr_rule_matches(
1856 rule,
1857 "not-ignored-dir/ignored-only-if-dir",
1858 false
1859 ));
1860 assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
1861 }
1862
1863 #[test]
1864 fn test_expand_collapse_ident() {
1865 let data = b"$Id$";
1866 let expanded = expand_ident(data, "abc123");
1867 assert_eq!(expanded, b"$Id: abc123 $");
1868 let collapsed = collapse_ident(&expanded);
1869 assert_eq!(collapsed, b"$Id$");
1870 }
1871
1872 #[test]
1873 fn expand_ident_does_not_span_lines_for_partial_keyword() {
1874 let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
1875 let expanded = expand_ident(data, "newoid");
1876 assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
1877 }
1878
1879 #[test]
1880 fn expand_ident_preserves_foreign_id_with_internal_spaces() {
1881 let data = b"$Id: Foreign Commit With Spaces $\n";
1882 let expanded = expand_ident(data, "abc");
1883 assert_eq!(expanded, data);
1884 }
1885
1886 #[test]
1887 fn expand_filter_command_percent_f_quotes_path() {
1888 let s = expand_filter_command("sh ./x.sh %f --extra", "name with 'sq'");
1889 assert_eq!(s, "sh ./x.sh 'name with '\\''sq'\\''' --extra");
1890 assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
1891 }
1892}