1use super::*;
6use crate::attributes::*;
7use crate::ignore::*;
8use crate::index::*;
9use crate::index_io::*;
10use crate::types_admin::*;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub(crate) enum EolConversion {
35 None,
38 Lf,
41 Crlf,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub(crate) enum TextDecision {
50 Binary,
52 Text,
54 Auto,
57 Unspecified,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
63pub(crate) struct ContentFilterPlan {
64 pub(crate) text: TextDecision,
65 pub(crate) eol: EolConversion,
67 pub(crate) ident: bool,
69 pub(crate) driver: Option<FilterDriver>,
71 pub(crate) encoding: WtEncoding,
74}
75
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub(crate) struct FilterDriver {
78 name: Vec<u8>,
79 process: Option<String>,
80 clean: Option<String>,
81 smudge: Option<String>,
82 required: bool,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
90pub(crate) enum WtEncoding {
91 None,
93 Invalid,
95 Named(Vec<u8>),
97}
98
99impl WtEncoding {
100 fn from_attr(state: Option<&AttributeState>) -> WtEncoding {
101 match state {
102 None | Some(AttributeState::Unset) => WtEncoding::None,
104 Some(AttributeState::Set) => WtEncoding::Invalid,
106 Some(AttributeState::Value(value)) => {
107 if value.is_empty() || encoding_name_is_utf8(value) {
110 WtEncoding::None
111 } else {
112 WtEncoding::Named(value.clone())
113 }
114 }
115 }
116 }
117}
118
119pub(crate) fn encoding_name_is_utf8(name: &[u8]) -> bool {
123 utf_suffix(name).is_some_and(|suffix| suffix == "8")
124}
125
126pub(crate) fn utf_suffix(name: &[u8]) -> Option<String> {
130 let upper: String = std::str::from_utf8(name).ok()?.to_ascii_uppercase();
131 let rest = upper.strip_prefix("UTF")?;
132 Some(rest.strip_prefix('-').unwrap_or(rest).to_string())
133}
134
135#[derive(Clone, Copy)]
136pub(crate) enum BomProblem {
137 Prohibited,
138 Required,
139}
140
141pub(crate) fn utf_bom_problem(suffix: &str, data: &[u8]) -> Option<BomProblem> {
145 let has16 = data.starts_with(&[0xFF, 0xFE]) || data.starts_with(&[0xFE, 0xFF]);
146 let has32 = data.starts_with(&[0xFF, 0xFE, 0, 0]) || data.starts_with(&[0, 0, 0xFE, 0xFF]);
147 match suffix {
148 "16LE" | "16BE" => has16.then_some(BomProblem::Prohibited),
149 "32LE" | "32BE" => has32.then_some(BomProblem::Prohibited),
150 "16" => (!has16).then_some(BomProblem::Required),
151 "32" => (!has32).then_some(BomProblem::Required),
152 _ => None,
153 }
154}
155
156pub(crate) const HOST_LE: bool = cfg!(target_endian = "little");
159
160pub(crate) fn decode_to_utf8(suffix: &str, data: &[u8]) -> Option<Vec<u8>> {
163 match suffix {
164 "16LE" => decode_utf16(data, true),
165 "16BE" => decode_utf16(data, false),
166 "16" | "16LE-BOM" | "16BE-BOM" => {
167 let (le, body) = strip_utf16_bom(data);
168 decode_utf16(body, le)
169 }
170 "32LE" => decode_utf32(data, true),
171 "32BE" => decode_utf32(data, false),
172 "32" | "32LE-BOM" | "32BE-BOM" => {
173 let (le, body) = strip_utf32_bom(data);
174 decode_utf32(body, le)
175 }
176 _ => None,
177 }
178}
179
180pub(crate) fn encode_from_utf8(suffix: &str, utf8: &[u8]) -> Option<Vec<u8>> {
183 match suffix {
184 "16LE" => encode_utf16(utf8, true, false),
185 "16BE" => encode_utf16(utf8, false, false),
186 "16LE-BOM" => encode_utf16(utf8, true, true),
187 "16BE-BOM" => encode_utf16(utf8, false, true),
188 "16" => encode_utf16(utf8, HOST_LE, true),
189 "32LE" => encode_utf32(utf8, true, false),
190 "32BE" => encode_utf32(utf8, false, false),
191 "32LE-BOM" => encode_utf32(utf8, true, true),
192 "32BE-BOM" => encode_utf32(utf8, false, true),
193 "32" => encode_utf32(utf8, HOST_LE, true),
194 _ => None,
195 }
196}
197
198pub(crate) fn strip_utf16_bom(data: &[u8]) -> (bool, &[u8]) {
199 if data.starts_with(&[0xFF, 0xFE]) {
200 (true, &data[2..])
201 } else if data.starts_with(&[0xFE, 0xFF]) {
202 (false, &data[2..])
203 } else {
204 (HOST_LE, data)
205 }
206}
207
208pub(crate) fn strip_utf32_bom(data: &[u8]) -> (bool, &[u8]) {
209 if data.starts_with(&[0xFF, 0xFE, 0, 0]) {
210 (true, &data[4..])
211 } else if data.starts_with(&[0, 0, 0xFE, 0xFF]) {
212 (false, &data[4..])
213 } else {
214 (HOST_LE, data)
215 }
216}
217
218pub(crate) fn decode_utf16(data: &[u8], le: bool) -> Option<Vec<u8>> {
219 if !data.len().is_multiple_of(2) {
220 return None;
221 }
222 let units = data.chunks_exact(2).map(|chunk| {
223 let pair = [chunk[0], chunk[1]];
224 if le {
225 u16::from_le_bytes(pair)
226 } else {
227 u16::from_be_bytes(pair)
228 }
229 });
230 let mut out = String::new();
231 for unit in char::decode_utf16(units) {
232 out.push(unit.ok()?);
233 }
234 Some(out.into_bytes())
235}
236
237pub(crate) fn decode_utf32(data: &[u8], le: bool) -> Option<Vec<u8>> {
238 if !data.len().is_multiple_of(4) {
239 return None;
240 }
241 let mut out = String::new();
242 for chunk in data.chunks_exact(4) {
243 let quad = [chunk[0], chunk[1], chunk[2], chunk[3]];
244 let cp = if le {
245 u32::from_le_bytes(quad)
246 } else {
247 u32::from_be_bytes(quad)
248 };
249 out.push(char::from_u32(cp)?);
250 }
251 Some(out.into_bytes())
252}
253
254pub(crate) fn encode_utf16(utf8: &[u8], le: bool, bom: bool) -> Option<Vec<u8>> {
255 let text = std::str::from_utf8(utf8).ok()?;
256 let mut out = Vec::with_capacity(utf8.len() * 2 + 2);
257 if bom {
258 out.extend_from_slice(if le { &[0xFF, 0xFE] } else { &[0xFE, 0xFF] });
259 }
260 for unit in text.encode_utf16() {
261 out.extend_from_slice(&if le {
262 unit.to_le_bytes()
263 } else {
264 unit.to_be_bytes()
265 });
266 }
267 Some(out)
268}
269
270pub(crate) fn encode_utf32(utf8: &[u8], le: bool, bom: bool) -> Option<Vec<u8>> {
271 let text = std::str::from_utf8(utf8).ok()?;
272 let mut out = Vec::with_capacity(utf8.len() * 4 + 4);
273 if bom {
274 out.extend_from_slice(if le {
275 &[0xFF, 0xFE, 0, 0]
276 } else {
277 &[0, 0, 0xFE, 0xFF]
278 });
279 }
280 for ch in text.chars() {
281 let cp = ch as u32;
282 out.extend_from_slice(&if le {
283 cp.to_le_bytes()
284 } else {
285 cp.to_be_bytes()
286 });
287 }
288 Some(out)
289}
290
291pub(crate) fn check_wt_encoding_valid(encoding: &WtEncoding) -> Result<()> {
295 if matches!(encoding, WtEncoding::Invalid) {
296 eprintln!("fatal: true/false are no valid working-tree-encodings");
297 return Err(GitError::Exit(128));
298 }
299 Ok(())
300}
301
302pub(crate) fn encode_to_git<'a>(
306 encoding: &WtEncoding,
307 path: &[u8],
308 data: Cow<'a, [u8]>,
309 write_object: bool,
310) -> Result<Cow<'a, [u8]>> {
311 let name = match encoding {
312 WtEncoding::None => return Ok(data),
313 WtEncoding::Invalid => return check_wt_encoding_valid(encoding).map(|()| data),
314 WtEncoding::Named(name) => name,
315 };
316 if data.is_empty() {
317 return Ok(data);
318 }
319 let display = String::from_utf8_lossy(path);
320 let enc = String::from_utf8_lossy(name);
321 if let Some(suffix) = utf_suffix(name)
322 && let Some(problem) = utf_bom_problem(&suffix, &data)
323 {
324 let number = &suffix[..2.min(suffix.len())];
325 match problem {
326 BomProblem::Prohibited => {
327 eprintln!(
328 "hint: The file '{display}' contains a byte order mark (BOM). \
329Please use UTF-{number} as working-tree-encoding."
330 );
331 report_encode_failure(
332 write_object,
333 &format!("BOM is prohibited in '{display}' if encoded as {enc}"),
334 )?;
335 return Ok(data);
336 }
337 BomProblem::Required => {
338 eprintln!(
339 "hint: The file '{display}' is missing a byte order mark (BOM). \
340Please use UTF-{number}BE or UTF-{number}LE (depending on the byte order) as \
341working-tree-encoding."
342 );
343 report_encode_failure(
344 write_object,
345 &format!("BOM is required in '{display}' if encoded as {enc}"),
346 )?;
347 return Ok(data);
348 }
349 }
350 }
351 match utf_suffix(name).and_then(|suffix| decode_to_utf8(&suffix, &data)) {
352 Some(utf8) => Ok(Cow::Owned(utf8)),
353 None => {
354 report_encode_failure(
355 write_object,
356 &format!("failed to encode '{display}' from {enc} to UTF-8"),
357 )?;
358 Ok(data)
359 }
360 }
361}
362
363pub(crate) fn encode_to_worktree<'a>(
367 encoding: &WtEncoding,
368 path: &[u8],
369 data: Cow<'a, [u8]>,
370) -> Result<Cow<'a, [u8]>> {
371 let name = match encoding {
372 WtEncoding::None => return Ok(data),
373 WtEncoding::Invalid => return check_wt_encoding_valid(encoding).map(|()| data),
374 WtEncoding::Named(name) => name,
375 };
376 if data.is_empty() {
377 return Ok(data);
378 }
379 match utf_suffix(name).and_then(|suffix| encode_from_utf8(&suffix, &data)) {
380 Some(encoded) => Ok(Cow::Owned(encoded)),
381 None => {
382 let display = String::from_utf8_lossy(path);
383 let enc = String::from_utf8_lossy(name);
384 eprintln!("error: failed to encode '{display}' from UTF-8 to {enc}");
385 Ok(data)
386 }
387 }
388}
389
390pub(crate) fn report_encode_failure(write_object: bool, message: &str) -> Result<()> {
393 if write_object {
394 eprintln!("fatal: {message}");
395 Err(GitError::Exit(128))
396 } else {
397 eprintln!("error: {message}");
398 Ok(())
399 }
400}
401
402pub(crate) fn decode_crlf_family_attribute(
410 state: Option<&AttributeState>,
411) -> (TextDecision, EolConversion) {
412 match state {
413 Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
414 Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
415 Some(AttributeState::Value(value)) if value == b"auto" => {
416 (TextDecision::Auto, EolConversion::None)
417 }
418 Some(AttributeState::Value(value)) if value == b"input" => {
421 (TextDecision::Text, EolConversion::Lf)
422 }
423 _ => (TextDecision::Unspecified, EolConversion::None),
425 }
426}
427
428impl ContentFilterPlan {
429 fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
431 let text_attr = checks.iter().find(|check| check.attribute == b"text");
432 let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
433 let ident_attr = checks.iter().find(|check| check.attribute == b"ident");
434 let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
435 let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
436 let encoding_attr = checks
437 .iter()
438 .find(|check| check.attribute == b"working-tree-encoding");
439 let encoding = WtEncoding::from_attr(encoding_attr.and_then(|check| check.state.as_ref()));
440
441 let eol_value = eol_attr.and_then(|check| match &check.state {
443 Some(AttributeState::Value(value)) => Some(value.clone()),
444 _ => None,
445 });
446
447 let mut forced_eol = EolConversion::None;
450 let mut text = match text_attr.map(|check| &check.state) {
451 Some(Some(AttributeState::Set)) => TextDecision::Text,
452 Some(Some(AttributeState::Unset)) => TextDecision::Binary,
453 Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
454 Some(Some(AttributeState::Value(value))) if value == b"input" => {
455 forced_eol = EolConversion::Lf;
456 TextDecision::Text
457 }
458 Some(Some(AttributeState::Value(_))) => TextDecision::Text,
460 _ => {
462 let (decision, eol) =
463 decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
464 forced_eol = eol;
465 decision
466 }
467 };
468
469 let eol = match (&text, eol_value.as_deref()) {
474 (TextDecision::Binary, _) => EolConversion::None,
475 (_, Some(b"crlf")) => {
476 if text == TextDecision::Unspecified {
477 text = TextDecision::Text;
478 }
479 EolConversion::Crlf
480 }
481 (_, Some(b"lf")) => {
482 if text == TextDecision::Unspecified {
483 text = TextDecision::Text;
484 }
485 EolConversion::Lf
486 }
487 _ if forced_eol == EolConversion::Lf => EolConversion::Lf,
491 _ => eol_from_config(config),
493 };
494
495 let eol = match (&text, eol) {
499 (TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
500 (_, eol) => eol,
501 };
502
503 let text = match (text, eol_attr.is_some()) {
506 (TextDecision::Unspecified, _) => {
507 if autocrlf_enabled(config) {
510 TextDecision::Auto
511 } else {
512 TextDecision::Unspecified
513 }
514 }
515 (text, _) => text,
516 };
517
518 let driver = resolve_filter_driver(config, filter_attr);
519 let ident = matches!(
520 ident_attr.and_then(|check| check.state.as_ref()),
521 Some(AttributeState::Set)
522 );
523
524 ContentFilterPlan {
525 text,
526 eol,
527 ident,
528 driver,
529 encoding,
530 }
531 }
532
533 fn convert_eol(&self, content: &[u8]) -> bool {
535 match self.text {
536 TextDecision::Binary | TextDecision::Unspecified => false,
537 TextDecision::Text => self.eol != EolConversion::None,
538 TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
540 }
541 }
542
543 pub(crate) fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
551 self.will_convert_lf_to_crlf_stats(&gather_convert_stats(content))
552 }
553
554 fn will_convert_lf_to_crlf_stats(&self, stats: &ConvertStats) -> bool {
559 if self.eol != EolConversion::Crlf {
561 return false;
562 }
563 if stats.lonelf == 0 {
565 return false;
566 }
567 if self.text == TextDecision::Auto {
568 if stats.lonecr > 0 || stats.crlf > 0 {
570 return false;
571 }
572 if convert_is_binary(stats) {
573 return false;
574 }
575 }
576 true
577 }
578
579 fn safecrlf_applies(&self) -> bool {
583 matches!(self.text, TextDecision::Text | TextDecision::Auto)
584 }
585
586 fn check_safe_crlf_stats(
598 &self,
599 old_stats: &ConvertStats,
600 index_has_crlf: bool,
601 flags: ConvFlags,
602 path: &[u8],
603 ) -> Result<()> {
604 if flags == ConvFlags::Off || !self.safecrlf_applies() {
605 return Ok(());
606 }
607
608 let mut convert_crlf_into_lf = old_stats.crlf > 0;
613 if self.text == TextDecision::Auto {
614 if convert_is_binary(old_stats) {
615 return Ok(());
617 }
618 if index_has_crlf {
619 convert_crlf_into_lf = false;
620 }
621 }
622
623 let mut new_stats = old_stats.clone();
625 if convert_crlf_into_lf {
627 new_stats.lonelf += new_stats.crlf;
628 new_stats.crlf = 0;
629 }
630 if self.will_convert_lf_to_crlf_stats(&new_stats) {
632 new_stats.crlf += new_stats.lonelf;
633 new_stats.lonelf = 0;
634 }
635 check_safe_crlf(old_stats, &new_stats, flags, path)
636 }
637}
638
639pub(crate) fn eol_from_config(config: &GitConfig) -> EolConversion {
641 if let Some(value) = config.get("core", None, "autocrlf") {
642 match value.to_ascii_lowercase().as_str() {
643 "input" => return EolConversion::Lf,
644 "true" | "yes" | "on" | "1" => return EolConversion::Crlf,
645 _ => {}
646 }
647 }
648 if config.get_bool("core", None, "autocrlf") == Some(true) {
649 return EolConversion::Crlf;
650 }
651 match config
652 .get("core", None, "eol")
653 .map(|v| v.to_ascii_lowercase())
654 {
655 Some(ref v) if v == "crlf" => EolConversion::Crlf,
656 Some(ref v) if v == "lf" => EolConversion::Lf,
657 _ => EolConversion::None,
658 }
659}
660
661pub(crate) fn autocrlf_enabled(config: &GitConfig) -> bool {
664 if let Some(value) = config.get("core", None, "autocrlf")
665 && value.eq_ignore_ascii_case("input")
666 {
667 return true;
668 }
669 config.get_bool("core", None, "autocrlf") == Some(true)
670}
671
672pub(crate) fn resolve_filter_driver(
674 config: &GitConfig,
675 filter_attr: Option<&AttributeCheck>,
676) -> Option<FilterDriver> {
677 let name = match filter_attr.map(|check| &check.state) {
678 Some(Some(AttributeState::Value(value))) => value.clone(),
679 _ => return None,
681 };
682 let subsection = String::from_utf8_lossy(&name).into_owned();
683 let process = filter_config_value(config, &subsection, "process").filter(|cmd| !cmd.is_empty());
684 let clean = filter_config_value(config, &subsection, "clean").filter(|cmd| !cmd.is_empty());
685 let smudge = filter_config_value(config, &subsection, "smudge").filter(|cmd| !cmd.is_empty());
686 let required = filter_config_bool(config, &subsection, "required").unwrap_or(false);
687 if process.is_none() && clean.is_none() && smudge.is_none() && !required {
689 return None;
690 }
691 Some(FilterDriver {
692 name,
693 process,
694 clean,
695 smudge,
696 required,
697 })
698}
699
700pub(crate) fn filter_config_value(
701 config: &GitConfig,
702 subsection: &str,
703 key: &str,
704) -> Option<String> {
705 config
706 .get("filter", Some(subsection), key)
707 .map(str::to_owned)
708 .or_else(|| global_filter_config_value(subsection, key))
709}
710
711pub(crate) fn filter_config_bool(config: &GitConfig, subsection: &str, key: &str) -> Option<bool> {
712 config
713 .get_bool("filter", Some(subsection), key)
714 .or_else(|| {
715 global_filter_config_value(subsection, key)
716 .as_deref()
717 .and_then(sley_config::parse_config_bool)
718 })
719}
720
721pub(crate) fn global_filter_config_value(subsection: &str, key: &str) -> Option<String> {
722 for (path, _) in sley_config::default_config_layer_paths().into_iter().rev() {
723 let Ok(config) = GitConfig::read(path) else {
724 continue;
725 };
726 if let Some(value) = config.get("filter", Some(subsection), key) {
727 return Some(value.to_owned());
728 }
729 }
730 None
731}
732
733pub(crate) fn looks_binary(content: &[u8]) -> bool {
736 const FIRST_FEW_BYTES: usize = 8000;
737 let window = &content[..content.len().min(FIRST_FEW_BYTES)];
738 window.contains(&0)
739}
740
741pub(crate) fn convert_crlf_to_lf_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
745 if !content.windows(2).any(|window| window == b"\r\n") {
746 return content;
747 }
748 let mut out = Vec::with_capacity(content.len());
749 let mut index = 0;
750 while index < content.len() {
751 let byte = content[index];
752 if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
753 index += 1;
755 continue;
756 }
757 out.push(byte);
758 index += 1;
759 }
760 Cow::Owned(out)
761}
762
763pub(crate) fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
766 let mut out = Vec::with_capacity(content.len() + content.len() / 16);
767 let mut prev = 0u8;
768 for &byte in content {
769 if byte == b'\n' && prev != b'\r' {
770 out.push(b'\r');
771 }
772 out.push(byte);
773 prev = byte;
774 }
775 out
776}
777
778pub(crate) fn ident_to_git_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
780 let input = content.as_ref();
781 if !has_git_ident(input) {
782 return content;
783 }
784 let mut out = Vec::with_capacity(input.len());
785 let mut pos = 0;
786 while let Some(relative) = input[pos..].iter().position(|byte| *byte == b'$') {
787 let dollar = pos + relative;
788 out.extend_from_slice(&input[pos..=dollar]);
789 pos = dollar + 1;
790 if input.len().saturating_sub(pos) > 3 && input[pos..].starts_with(b"Id:") {
791 let search = &input[pos + 3..];
792 let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
793 break;
794 };
795 let end = pos + 3 + end_relative;
796 if input[pos + 3..end].contains(&b'\n') {
797 continue;
798 }
799 out.extend_from_slice(b"Id$");
800 pos = end + 1;
801 }
802 }
803 out.extend_from_slice(&input[pos..]);
804 Cow::Owned(out)
805}
806
807pub(crate) fn ident_to_worktree_cow(
810 format: ObjectFormat,
811 content: Cow<'_, [u8]>,
812) -> Result<Cow<'_, [u8]>> {
813 let input = content.as_ref();
814 if !has_git_ident(input) {
815 return Ok(content);
816 }
817 let oid = EncodedObject::new(ObjectType::Blob, input.to_vec()).object_id(format)?;
818 let replacement = format!("Id: {} $", oid.to_hex());
819 let mut out = Vec::with_capacity(input.len() + replacement.len());
820 let mut pos = 0;
821 while let Some(relative) = input[pos..].iter().position(|byte| *byte == b'$') {
822 let dollar = pos + relative;
823 out.extend_from_slice(&input[pos..=dollar]);
824 pos = dollar + 1;
825 if input.len().saturating_sub(pos) < 3 || !input[pos..].starts_with(b"Id") {
826 continue;
827 }
828 match input.get(pos + 2) {
829 Some(b'$') => {
830 pos += 3;
831 }
832 Some(b':') => {
833 let search = &input[pos + 3..];
834 let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
835 break;
836 };
837 let end = pos + 3 + end_relative;
838 if input[pos + 3..end].contains(&b'\n') || is_foreign_ident(&input[pos + 3..end]) {
839 continue;
840 }
841 pos = end + 1;
842 }
843 _ => continue,
844 }
845 out.extend_from_slice(replacement.as_bytes());
846 }
847 out.extend_from_slice(&input[pos..]);
848 Ok(Cow::Owned(out))
849}
850
851pub(crate) fn has_git_ident(content: &[u8]) -> bool {
852 let mut pos = 0;
853 while let Some(relative) = content[pos..].iter().position(|byte| *byte == b'$') {
854 let start = pos + relative + 1;
855 if content.len().saturating_sub(start) < 3 {
856 break;
857 }
858 if !content[start..].starts_with(b"Id") {
859 pos = start;
860 continue;
861 }
862 match content.get(start + 2) {
863 Some(b'$') => return true,
864 Some(b':') => {
865 let search = &content[start + 3..];
866 let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
867 break;
868 };
869 let end = start + 3 + end_relative;
870 if !content[start + 3..end].contains(&b'\n') {
871 return true;
872 }
873 pos = end + 1;
874 }
875 _ => pos = start,
876 }
877 }
878 false
879}
880
881pub(crate) fn is_foreign_ident(expansion: &[u8]) -> bool {
882 if expansion.len() <= 1 {
883 return false;
884 }
885 expansion[1..expansion.len().saturating_sub(1)].contains(&b' ')
886}
887
888pub(crate) fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
893 let display_path = String::from_utf8_lossy(path);
896 let expanded = command.replace("%f", &shell_quote(&display_path));
897 let (shell, flag) = if cfg!(windows) {
900 ("cmd", "/C")
901 } else {
902 ("/bin/sh", "-c")
903 };
904 let mut child = Command::new(shell)
905 .arg(flag)
906 .arg(&expanded)
907 .stdin(Stdio::piped())
908 .stdout(Stdio::piped())
909 .stderr(Stdio::piped())
910 .spawn()
911 .map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
912 let mut stdin = child
915 .stdin
916 .take()
917 .ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
918 let payload = content.to_vec();
919 let writer = std::thread::spawn(move || {
920 let _ = stdin.write_all(&payload);
921 });
923 let output = child
924 .wait_with_output()
925 .map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
926 let _ = writer.join();
929 if !output.status.success() {
930 let stderr = String::from_utf8_lossy(&output.stderr);
931 return Err(GitError::Command(format!(
932 "filter `{command}` exited with {}: {}",
933 output.status,
934 stderr.trim()
935 )));
936 }
937 Ok(output.stdout)
938}
939
940pub(crate) const PROCESS_CAP_CLEAN: u8 = 1;
941pub(crate) const PROCESS_CAP_SMUDGE: u8 = 1 << 1;
942pub(crate) const PROCESS_CAP_DELAY: u8 = 1 << 2;
943pub(crate) const PKT_DATA_MAX: usize = 65_516;
944
945pub(crate) static PROCESS_FILTERS: OnceLock<Mutex<HashMap<String, ProcessFilter>>> =
946 OnceLock::new();
947pub(crate) type ProcessFilterMetadata = Vec<(String, String)>;
948pub(crate) static PROCESS_FILTER_METADATA: OnceLock<Mutex<Option<ProcessFilterMetadata>>> =
949 OnceLock::new();
950
951pub(crate) struct ProcessFilterMetadataGuard {
952 previous: Option<ProcessFilterMetadata>,
953}
954
955impl Drop for ProcessFilterMetadataGuard {
956 fn drop(&mut self) {
957 if let Ok(mut guard) = PROCESS_FILTER_METADATA
958 .get_or_init(|| Mutex::new(None))
959 .lock()
960 {
961 *guard = self.previous.take();
962 }
963 }
964}
965
966pub(crate) fn set_process_filter_metadata(
967 metadata: Option<ProcessFilterMetadata>,
968) -> ProcessFilterMetadataGuard {
969 let mutex = PROCESS_FILTER_METADATA.get_or_init(|| Mutex::new(None));
970 let previous = mutex
971 .lock()
972 .map(|mut guard| std::mem::replace(&mut *guard, metadata))
973 .unwrap_or(None);
974 ProcessFilterMetadataGuard { previous }
975}
976
977pub(crate) fn current_process_filter_metadata() -> Option<ProcessFilterMetadata> {
978 PROCESS_FILTER_METADATA
979 .get_or_init(|| Mutex::new(None))
980 .lock()
981 .ok()
982 .and_then(|guard| guard.clone())
983}
984
985pub(crate) struct ProcessFilter {
986 child: Child,
987 stdin: ChildStdin,
988 stdout: ChildStdout,
989 capabilities: u8,
990}
991
992pub(crate) enum ProcessFilterOutcome {
993 Filtered(Vec<u8>),
994 Unsupported,
995 Status(String),
996}
997
998pub(crate) struct ProcessFilterFailure {
999 message: String,
1000 protocol: bool,
1001}
1002
1003impl ProcessFilterFailure {
1004 fn protocol(message: impl Into<String>) -> Self {
1005 Self {
1006 message: message.into(),
1007 protocol: true,
1008 }
1009 }
1010}
1011
1012pub(crate) fn run_process_filter(
1013 command: &str,
1014 direction: &str,
1015 path: &[u8],
1016 content: &[u8],
1017 blob: Option<ObjectId>,
1018) -> std::result::Result<ProcessFilterOutcome, ProcessFilterFailure> {
1019 let filters = PROCESS_FILTERS.get_or_init(|| Mutex::new(HashMap::new()));
1020 let mut filters = filters
1021 .lock()
1022 .map_err(|_| ProcessFilterFailure::protocol("process filter cache poisoned"))?;
1023 if !filters.contains_key(command) {
1024 let filter = ProcessFilter::start(command)?;
1025 filters.insert(command.to_string(), filter);
1026 }
1027 let result = filters
1028 .get_mut(command)
1029 .expect("process filter was inserted")
1030 .apply(direction, path, content, blob);
1031 if result.as_ref().is_err_and(|err| err.protocol) {
1032 filters.remove(command);
1033 }
1034 result
1035}
1036
1037impl ProcessFilter {
1038 fn start(command: &str) -> std::result::Result<Self, ProcessFilterFailure> {
1039 let (shell, flag) = if cfg!(windows) {
1040 ("cmd", "/C")
1041 } else {
1042 ("/bin/sh", "-c")
1043 };
1044 let mut child = Command::new(shell)
1045 .arg(flag)
1046 .arg(command)
1047 .stdin(Stdio::piped())
1048 .stdout(Stdio::piped())
1049 .stderr(Stdio::inherit())
1050 .spawn()
1051 .map_err(|err| {
1052 ProcessFilterFailure::protocol(format!(
1053 "cannot fork to run subprocess '{command}': {err}"
1054 ))
1055 })?;
1056 let mut stdin = child
1057 .stdin
1058 .take()
1059 .ok_or_else(|| ProcessFilterFailure::protocol("process filter stdin unavailable"))?;
1060 let mut stdout = child
1061 .stdout
1062 .take()
1063 .ok_or_else(|| ProcessFilterFailure::protocol("process filter stdout unavailable"))?;
1064
1065 write_pkt_text(&mut stdin, "git-filter-client\n")?;
1066 write_pkt_text(&mut stdin, "version=2\n")?;
1067 write_flush(&mut stdin)?;
1068
1069 let line = read_pkt_text(&mut stdout)?.ok_or_else(|| {
1070 ProcessFilterFailure::protocol(
1071 "Unexpected line '<flush packet>', expected git-filter-server",
1072 )
1073 })?;
1074 if line != "git-filter-server" {
1075 return Err(ProcessFilterFailure::protocol(format!(
1076 "Unexpected line '{line}', expected git-filter-server"
1077 )));
1078 }
1079 let line = read_pkt_text(&mut stdout)?.ok_or_else(|| {
1080 ProcessFilterFailure::protocol("Unexpected line '<flush packet>', expected version")
1081 })?;
1082 if line != "version=2" {
1083 return Err(ProcessFilterFailure::protocol(format!(
1084 "Unexpected line '{line}', expected version"
1085 )));
1086 }
1087 if let Some(line) = read_pkt_text(&mut stdout)? {
1088 return Err(ProcessFilterFailure::protocol(format!(
1089 "Unexpected line '{line}', expected flush"
1090 )));
1091 }
1092
1093 write_pkt_text(&mut stdin, "capability=clean\n")?;
1094 write_pkt_text(&mut stdin, "capability=smudge\n")?;
1095 write_pkt_text(&mut stdin, "capability=delay\n")?;
1096 write_flush(&mut stdin)?;
1097
1098 let mut capabilities = 0;
1099 while let Some(line) = read_pkt_text(&mut stdout)? {
1100 match line.as_str() {
1101 "capability=clean" => capabilities |= PROCESS_CAP_CLEAN,
1102 "capability=smudge" => capabilities |= PROCESS_CAP_SMUDGE,
1103 "capability=delay" => capabilities |= PROCESS_CAP_DELAY,
1104 _ => {}
1105 }
1106 }
1107
1108 Ok(Self {
1109 child,
1110 stdin,
1111 stdout,
1112 capabilities,
1113 })
1114 }
1115
1116 fn apply(
1117 &mut self,
1118 direction: &str,
1119 path: &[u8],
1120 content: &[u8],
1121 blob: Option<ObjectId>,
1122 ) -> std::result::Result<ProcessFilterOutcome, ProcessFilterFailure> {
1123 let wanted = match direction {
1124 "clean" => PROCESS_CAP_CLEAN,
1125 "smudge" => PROCESS_CAP_SMUDGE,
1126 _ => 0,
1127 };
1128 if self.capabilities & wanted == 0 {
1129 return Ok(ProcessFilterOutcome::Unsupported);
1130 }
1131
1132 write_pkt_text(&mut self.stdin, &format!("command={direction}\n"))?;
1133 write_pkt_text(
1134 &mut self.stdin,
1135 &format!("pathname={}\n", String::from_utf8_lossy(path)),
1136 )?;
1137 if direction == "smudge"
1138 && let Some(blob) = blob
1139 {
1140 if let Some(metadata) = current_process_filter_metadata() {
1141 for (key, value) in metadata {
1142 write_pkt_text(&mut self.stdin, &format!("{key}={value}\n"))?;
1143 }
1144 }
1145 write_pkt_text(&mut self.stdin, &format!("blob={}\n", blob.to_hex()))?;
1146 }
1147 write_flush(&mut self.stdin)?;
1148 write_pkt_content(&mut self.stdin, content)?;
1149 write_flush(&mut self.stdin)?;
1150
1151 let mut status = read_process_status(&mut self.stdout)?.unwrap_or_default();
1152 match status.as_str() {
1153 "success" => {}
1154 "error" | "abort" | "delayed" => return Ok(ProcessFilterOutcome::Status(status)),
1155 other => {
1156 return Err(ProcessFilterFailure::protocol(format!(
1157 "external filter returned unsupported status '{other}'"
1158 )));
1159 }
1160 }
1161
1162 let output = read_pkt_content(&mut self.stdout)?;
1163 if let Some(next) = read_process_status(&mut self.stdout)? {
1164 status = next;
1165 }
1166 match status.as_str() {
1167 "" | "success" => Ok(ProcessFilterOutcome::Filtered(output)),
1168 "error" | "abort" | "delayed" => Ok(ProcessFilterOutcome::Status(status)),
1169 other => Err(ProcessFilterFailure::protocol(format!(
1170 "external filter returned unsupported status '{other}'"
1171 ))),
1172 }
1173 }
1174}
1175
1176impl Drop for ProcessFilter {
1177 fn drop(&mut self) {
1178 let _ = self.stdin.flush();
1179 let _ = self.child.kill();
1180 let _ = self.child.wait();
1181 }
1182}
1183
1184pub(crate) fn write_pkt_text(
1185 writer: &mut ChildStdin,
1186 text: &str,
1187) -> std::result::Result<(), ProcessFilterFailure> {
1188 write_pkt_data(writer, text.as_bytes())
1189}
1190
1191pub(crate) fn write_pkt_content(
1192 writer: &mut ChildStdin,
1193 content: &[u8],
1194) -> std::result::Result<(), ProcessFilterFailure> {
1195 for chunk in content.chunks(PKT_DATA_MAX) {
1196 write_pkt_data(writer, chunk)?;
1197 }
1198 Ok(())
1199}
1200
1201pub(crate) fn write_pkt_data(
1202 writer: &mut ChildStdin,
1203 data: &[u8],
1204) -> std::result::Result<(), ProcessFilterFailure> {
1205 let len = data.len() + 4;
1206 write!(writer, "{len:04x}")
1207 .and_then(|_| writer.write_all(data))
1208 .map_err(|err| {
1209 ProcessFilterFailure::protocol(format!("process filter write failed: {err}"))
1210 })
1211}
1212
1213pub(crate) fn write_flush(
1214 writer: &mut ChildStdin,
1215) -> std::result::Result<(), ProcessFilterFailure> {
1216 writer
1217 .write_all(b"0000")
1218 .and_then(|_| writer.flush())
1219 .map_err(|err| {
1220 ProcessFilterFailure::protocol(format!("process filter write failed: {err}"))
1221 })
1222}
1223
1224pub(crate) fn read_pkt_text(
1225 reader: &mut ChildStdout,
1226) -> std::result::Result<Option<String>, ProcessFilterFailure> {
1227 let Some(mut data) = read_pkt_data(reader)? else {
1228 return Ok(None);
1229 };
1230 if data.last() == Some(&b'\n') {
1231 data.pop();
1232 }
1233 Ok(Some(String::from_utf8_lossy(&data).into_owned()))
1234}
1235
1236pub(crate) fn read_pkt_content(
1237 reader: &mut ChildStdout,
1238) -> std::result::Result<Vec<u8>, ProcessFilterFailure> {
1239 let mut out = Vec::new();
1240 while let Some(data) = read_pkt_data(reader)? {
1241 out.extend_from_slice(&data);
1242 }
1243 Ok(out)
1244}
1245
1246pub(crate) fn read_pkt_data(
1247 reader: &mut ChildStdout,
1248) -> std::result::Result<Option<Vec<u8>>, ProcessFilterFailure> {
1249 let mut header = [0u8; 4];
1250 reader.read_exact(&mut header).map_err(|err| {
1251 ProcessFilterFailure::protocol(format!("process filter read failed: {err}"))
1252 })?;
1253 let header = std::str::from_utf8(&header)
1254 .map_err(|err| ProcessFilterFailure::protocol(format!("invalid pkt-line header: {err}")))?;
1255 let len = usize::from_str_radix(header, 16)
1256 .map_err(|err| ProcessFilterFailure::protocol(format!("invalid pkt-line length: {err}")))?;
1257 if len == 0 {
1258 return Ok(None);
1259 }
1260 if len < 4 {
1261 return Err(ProcessFilterFailure::protocol(format!(
1262 "invalid pkt-line length {len}"
1263 )));
1264 }
1265 let mut data = vec![0; len - 4];
1266 reader.read_exact(&mut data).map_err(|err| {
1267 ProcessFilterFailure::protocol(format!("process filter read failed: {err}"))
1268 })?;
1269 Ok(Some(data))
1270}
1271
1272pub(crate) fn read_process_status(
1273 reader: &mut ChildStdout,
1274) -> std::result::Result<Option<String>, ProcessFilterFailure> {
1275 let mut status = None;
1276 while let Some(line) = read_pkt_text(reader)? {
1277 if let Some(value) = line.strip_prefix("status=") {
1278 status = Some(value.to_string());
1279 }
1280 }
1281 Ok(status)
1282}
1283
1284pub(crate) fn shell_quote(value: &str) -> String {
1287 let mut out = String::with_capacity(value.len() + 2);
1288 out.push('\'');
1289 for ch in value.chars() {
1290 if ch == '\'' {
1291 out.push_str("'\\''");
1292 } else {
1293 out.push(ch);
1294 }
1295 }
1296 out.push('\'');
1297 out
1298}
1299
1300pub fn apply_clean_filter(
1314 worktree_root: impl AsRef<Path>,
1315 git_dir: impl AsRef<Path>,
1316 config: &GitConfig,
1317 path: &[u8],
1318 content: &[u8],
1319) -> Result<Vec<u8>> {
1320 let _ = git_dir.as_ref();
1324 let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
1325 apply_clean_filter_with_attributes(config, &checks, path, content)
1326}
1327
1328pub struct WorktreeAttributes {
1338 matcher: AttributeMatcher,
1339}
1340
1341impl WorktreeAttributes {
1342 pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
1345 Ok(Self {
1346 matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
1347 })
1348 }
1349
1350 pub fn apply_clean_filter(
1353 &self,
1354 config: &GitConfig,
1355 path: &[u8],
1356 content: &[u8],
1357 ) -> Result<Vec<u8>> {
1358 let checks = self
1359 .matcher
1360 .attributes_for_path(path, &filter_attribute_names(), false);
1361 apply_clean_filter_with_attributes(config, &checks, path, content)
1362 }
1363}
1364
1365pub struct TreeAttributes {
1382 matcher: AttributeMatcher,
1383}
1384
1385impl TreeAttributes {
1386 pub fn from_tree(
1397 attr_root: impl AsRef<Path>,
1398 git_dir: impl AsRef<Path>,
1399 db: &FileObjectDatabase,
1400 format: ObjectFormat,
1401 tree_oid: &ObjectId,
1402 ) -> Result<Self> {
1403 let attr_root = attr_root.as_ref();
1404 let git_dir = git_dir.as_ref();
1405 let mut matcher = AttributeMatcher::default();
1406 matcher.configure_case_sensitivity(git_dir);
1407 if !matcher.read_configured_attributes(attr_root, git_dir) {
1408 matcher.read_default_global_attributes();
1409 }
1410 collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
1411 read_attribute_patterns(
1412 git_dir.join("info").join("attributes"),
1413 &mut matcher,
1414 &[],
1415 b"info/attributes",
1416 false,
1417 );
1418 Ok(Self { matcher })
1419 }
1420
1421 pub fn apply_smudge_filter(
1427 &self,
1428 config: &GitConfig,
1429 path: &[u8],
1430 content: &[u8],
1431 ) -> Result<Vec<u8>> {
1432 let checks = self
1433 .matcher
1434 .attributes_for_path(path, &filter_attribute_names(), false);
1435 apply_smudge_filter_with_attributes(config, &checks, path, content)
1436 }
1437
1438 pub fn attributes_for_path(&self, path: &[u8], requested: &[Vec<u8>]) -> Vec<AttributeCheck> {
1439 self.matcher.attributes_for_path(path, requested, false)
1440 }
1441
1442 pub fn export_subst_for_path(&self, path: &[u8]) -> bool {
1446 self.attribute_is_set(path, b"export-subst")
1447 }
1448
1449 pub fn export_ignore_for_path(&self, path: &[u8]) -> bool {
1453 self.attribute_is_set(path, b"export-ignore")
1454 }
1455
1456 fn attribute_is_set(&self, path: &[u8], attribute: &[u8]) -> bool {
1457 let requested = [attribute.to_vec()];
1458 let checks = self.matcher.attributes_for_path(path, &requested, false);
1459 matches!(
1460 checks.first().and_then(|check| check.state.as_ref()),
1461 Some(AttributeState::Set)
1462 )
1463 }
1464
1465 pub fn diff_attribute_for_path(&self, path: &[u8]) -> Option<AttributeState> {
1470 let requested = [b"diff".to_vec()];
1471 let checks = self.matcher.attributes_for_path(path, &requested, false);
1472 checks.into_iter().next().and_then(|check| check.state)
1473 }
1474}
1475
1476pub fn apply_clean_filter_with_attributes(
1479 config: &GitConfig,
1480 attributes: &[AttributeCheck],
1481 path: &[u8],
1482 content: &[u8],
1483) -> Result<Vec<u8>> {
1484 Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
1485}
1486
1487pub fn apply_clean_filter_with_attributes_cow<'a>(
1493 config: &GitConfig,
1494 attributes: &[AttributeCheck],
1495 path: &[u8],
1496 content: &'a [u8],
1497) -> Result<Cow<'a, [u8]>> {
1498 apply_clean_filter_with_attributes_cow_safecrlf(
1499 config,
1500 attributes,
1501 path,
1502 content,
1503 ConvFlags::Off,
1504 SafeCrlfIndexBlob::None,
1505 )
1506}
1507
1508pub enum SafeCrlfIndexBlob<'a> {
1512 None,
1515 Lookup {
1518 odb: &'a FileObjectDatabase,
1519 oid: ObjectId,
1520 },
1521}
1522
1523impl SafeCrlfIndexBlob<'_> {
1524 fn has_crlf(&self) -> bool {
1525 match self {
1526 SafeCrlfIndexBlob::None => false,
1527 SafeCrlfIndexBlob::Lookup { odb, oid } => has_crlf_in_index(odb, oid),
1528 }
1529 }
1530}
1531
1532pub fn apply_clean_filter_with_attributes_cow_safecrlf<'a>(
1541 config: &GitConfig,
1542 attributes: &[AttributeCheck],
1543 path: &[u8],
1544 content: &'a [u8],
1545 flags: ConvFlags,
1546 index_blob: SafeCrlfIndexBlob<'_>,
1547) -> Result<Cow<'a, [u8]>> {
1548 apply_clean_filter_cow_inner(config, attributes, path, content, flags, index_blob, false)
1551}
1552
1553pub(crate) fn apply_clean_filter_cow_inner<'a>(
1558 config: &GitConfig,
1559 attributes: &[AttributeCheck],
1560 path: &[u8],
1561 content: &'a [u8],
1562 flags: ConvFlags,
1563 index_blob: SafeCrlfIndexBlob<'_>,
1564 write_object: bool,
1565) -> Result<Cow<'a, [u8]>> {
1566 let plan = ContentFilterPlan::resolve(config, attributes);
1567 check_wt_encoding_valid(&plan.encoding)?;
1568 let mut data = Cow::Borrowed(content);
1569 if let Some(driver) = &plan.driver {
1570 data = run_driver(driver, driver.clean.as_deref(), "clean", None, path, data)?;
1571 }
1572 data = encode_to_git(&plan.encoding, path, data, write_object)?;
1576 if flags != ConvFlags::Off && !data.is_empty() && plan.safecrlf_applies() {
1581 let old_stats = gather_convert_stats(&data);
1582 plan.check_safe_crlf_stats(&old_stats, index_blob.has_crlf(), flags, path)?;
1583 }
1584 if plan.convert_eol(&data) {
1585 data = convert_crlf_to_lf_cow(data);
1586 }
1587 if plan.ident {
1588 data = ident_to_git_cow(data);
1589 }
1590 Ok(data)
1591}
1592
1593pub fn apply_smudge_filter(
1601 worktree_root: impl AsRef<Path>,
1602 git_dir: impl AsRef<Path>,
1603 format: ObjectFormat,
1604 config: &GitConfig,
1605 path: &[u8],
1606 content: &[u8],
1607) -> Result<Vec<u8>> {
1608 let checks =
1611 smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
1612 Ok(
1613 apply_smudge_filter_with_attributes_cow_format(config, &checks, path, content, format)?
1614 .into_owned(),
1615 )
1616}
1617
1618pub fn apply_smudge_filter_with_attributes(
1620 config: &GitConfig,
1621 attributes: &[AttributeCheck],
1622 path: &[u8],
1623 content: &[u8],
1624) -> Result<Vec<u8>> {
1625 Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
1626}
1627
1628pub fn apply_smudge_filter_with_attributes_cow<'a>(
1634 config: &GitConfig,
1635 attributes: &[AttributeCheck],
1636 path: &[u8],
1637 content: &'a [u8],
1638) -> Result<Cow<'a, [u8]>> {
1639 apply_smudge_filter_with_attributes_cow_format(
1640 config,
1641 attributes,
1642 path,
1643 content,
1644 ObjectFormat::Sha1,
1645 )
1646}
1647
1648pub(crate) fn apply_smudge_filter_with_attributes_cow_format<'a>(
1649 config: &GitConfig,
1650 attributes: &[AttributeCheck],
1651 path: &[u8],
1652 content: &'a [u8],
1653 format: ObjectFormat,
1654) -> Result<Cow<'a, [u8]>> {
1655 let plan = ContentFilterPlan::resolve(config, attributes);
1656 check_wt_encoding_valid(&plan.encoding)?;
1657 let mut data = Cow::Borrowed(content);
1658 if plan.ident {
1659 data = ident_to_worktree_cow(format, data)?;
1660 }
1661 if plan.eol == EolConversion::Crlf
1662 && plan.convert_eol(&data)
1663 && plan.will_convert_lf_to_crlf(&data)
1664 {
1665 data = Cow::Owned(convert_lf_to_crlf(&data));
1666 }
1667 data = encode_to_worktree(&plan.encoding, path, data)?;
1671 if let Some(driver) = &plan.driver {
1672 data = run_driver(
1673 driver,
1674 driver.smudge.as_deref(),
1675 "smudge",
1676 Some(format),
1677 path,
1678 data,
1679 )?;
1680 }
1681 Ok(data)
1682}
1683
1684pub(crate) fn run_driver<'a>(
1686 driver: &FilterDriver,
1687 command: Option<&str>,
1688 direction: &str,
1689 format: Option<ObjectFormat>,
1690 path: &[u8],
1691 content: Cow<'a, [u8]>,
1692) -> Result<Cow<'a, [u8]>> {
1693 if let Some(process) = &driver.process {
1694 let blob = if direction == "smudge" {
1695 match format {
1696 Some(format) => {
1697 Some(EncodedObject::new(ObjectType::Blob, content.to_vec()).object_id(format)?)
1698 }
1699 None => None,
1700 }
1701 } else {
1702 None
1703 };
1704 match run_process_filter(process, direction, path, &content, blob) {
1705 Ok(ProcessFilterOutcome::Filtered(output)) => return Ok(Cow::Owned(output)),
1706 Ok(ProcessFilterOutcome::Unsupported) => {}
1707 Ok(ProcessFilterOutcome::Status(status)) => {
1708 if driver.required {
1709 return Err(GitError::Command(format!(
1710 "external filter '{}' returned status {status}",
1711 process
1712 )));
1713 }
1714 return Ok(content);
1715 }
1716 Err(err) => {
1717 if err.protocol {
1718 eprintln!("error: external filter '{}' failed", process);
1719 }
1720 if driver.required {
1721 return Err(GitError::Command(err.message));
1722 }
1723 return Ok(content);
1724 }
1725 }
1726 }
1727 let Some(command) = command else {
1728 if driver.required {
1731 let path = String::from_utf8_lossy(path);
1732 let name = String::from_utf8_lossy(&driver.name);
1733 if direction == "clean" {
1734 eprintln!("fatal: {path}: clean filter '{name}' failed");
1735 } else {
1736 eprintln!("fatal: {path}: smudge filter {name} failed");
1737 }
1738 return Err(GitError::Exit(128));
1739 }
1740 return Ok(content);
1741 };
1742 match run_filter_command(command, path, &content) {
1743 Ok(output) => Ok(Cow::Owned(output)),
1744 Err(err) => {
1745 if driver.required {
1746 Err(err)
1747 } else {
1748 Ok(content)
1751 }
1752 }
1753 }
1754}
1755
1756pub(crate) fn filter_attribute_checks(
1759 worktree_root: &Path,
1760 path: &[u8],
1761) -> Result<Vec<AttributeCheck>> {
1762 let requested = filter_attribute_names();
1763 let mut matcher = AttributeMatcher::default();
1764 let git_dir = worktree_root.join(".git");
1765 matcher.configure_case_sensitivity(&git_dir);
1766 if !matcher.read_configured_attributes(worktree_root, &git_dir) {
1767 matcher.read_default_global_attributes();
1768 }
1769 read_dir_attribute_patterns_for_base(worktree_root, &[], &mut matcher)?;
1770 let mut prefix = Vec::new();
1771 let mut parts = path.split(|byte| *byte == b'/').peekable();
1772 while let Some(part) = parts.next() {
1773 if parts.peek().is_none() {
1774 break;
1775 }
1776 if !prefix.is_empty() {
1777 prefix.push(b'/');
1778 }
1779 prefix.extend_from_slice(part);
1780 let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
1781 read_dir_attribute_patterns_for_base(&dir, &prefix, &mut matcher)?;
1782 }
1783 read_attribute_patterns(
1784 worktree_root.join(".git").join("info").join("attributes"),
1785 &mut matcher,
1786 &[],
1787 b".git/info/attributes",
1788 false,
1789 );
1790 Ok(matcher.attributes_for_path(path, &requested, false))
1791}
1792
1793pub(crate) fn smudge_attribute_checks_from_index(
1807 worktree_root: &Path,
1808 git_dir: &Path,
1809 format: ObjectFormat,
1810 path: &[u8],
1811) -> Result<Vec<AttributeCheck>> {
1812 let requested = filter_attribute_names();
1813 let mut matcher = AttributeMatcher::default();
1814 matcher.configure_case_sensitivity(git_dir);
1815 if !matcher.read_configured_attributes(worktree_root, git_dir) {
1816 matcher.read_default_global_attributes();
1817 }
1818
1819 let index_attributes = index_gitattributes_by_base(git_dir, format)?;
1822
1823 fold_checkout_attribute_frame(worktree_root, &[], &index_attributes, &mut matcher)?;
1826 let mut prefix = Vec::new();
1827 let mut parts = path.split(|byte| *byte == b'/').peekable();
1828 while let Some(part) = parts.next() {
1829 if parts.peek().is_none() {
1830 break;
1831 }
1832 if !prefix.is_empty() {
1833 prefix.push(b'/');
1834 }
1835 prefix.extend_from_slice(part);
1836 let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
1837 fold_checkout_attribute_frame(&dir, &prefix, &index_attributes, &mut matcher)?;
1838 }
1839
1840 read_attribute_patterns(
1841 worktree_root.join(".git").join("info").join("attributes"),
1842 &mut matcher,
1843 &[],
1844 b".git/info/attributes",
1845 false,
1846 );
1847 Ok(matcher.attributes_for_path(path, &requested, false))
1848}
1849
1850pub(crate) fn fold_checkout_attribute_frame(
1855 dir: &Path,
1856 base: &[u8],
1857 index_attributes: &BTreeMap<Vec<u8>, Vec<u8>>,
1858 matcher: &mut AttributeMatcher,
1859) -> Result<()> {
1860 let worktree_file = dir.join(".gitattributes");
1861 let source = attribute_source_for_base(base);
1862 if let Ok(contents) = fs::read(&worktree_file) {
1863 read_attribute_patterns_from_bytes(&contents, matcher, base, &source);
1866 } else if let Some(contents) = index_attributes.get(base) {
1867 read_attribute_patterns_from_bytes(contents, matcher, base, &source);
1868 }
1869 Ok(())
1870}
1871
1872pub(crate) fn index_gitattributes_by_base(
1875 git_dir: &Path,
1876 format: ObjectFormat,
1877) -> Result<BTreeMap<Vec<u8>, Vec<u8>>> {
1878 let mut map = BTreeMap::new();
1879 let index_path = repository_index_path(git_dir);
1880 if !index_path.exists() {
1881 return Ok(map);
1882 }
1883 let db = FileObjectDatabase::from_git_dir(git_dir, format);
1884 let entries = Index::parse(&fs::read(index_path)?, format)?.entries;
1885 for entry in entries {
1886 let is_attributes_file =
1887 entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
1888 if index_entry_stage(&entry) != 0
1889 || tree_entry_object_type(entry.mode) != ObjectType::Blob
1890 || !is_attributes_file
1891 {
1892 continue;
1893 }
1894 let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
1895 Some(b"") => Vec::new(),
1896 Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
1897 None => continue,
1898 };
1899 let object = db
1900 .read_object(&entry.oid)
1901 .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
1902 if object.object_type == ObjectType::Blob {
1903 map.insert(base, object.body.clone());
1904 }
1905 }
1906 Ok(map)
1907}
1908
1909pub(crate) fn filter_attribute_names() -> Vec<Vec<u8>> {
1910 vec![
1913 b"text".to_vec(),
1914 b"crlf".to_vec(),
1915 b"ident".to_vec(),
1916 b"eol".to_vec(),
1917 b"filter".to_vec(),
1918 b"working-tree-encoding".to_vec(),
1919 ]
1920}
1921
1922#[derive(Clone)]
1937pub(crate) struct ConvertStats {
1938 nul: u32,
1939 lonecr: u32,
1940 lonelf: u32,
1941 crlf: u32,
1942 printable: u32,
1943 nonprintable: u32,
1944}
1945
1946pub(crate) fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
1947 let mut stats = ConvertStats {
1948 nul: 0,
1949 lonecr: 0,
1950 lonelf: 0,
1951 crlf: 0,
1952 printable: 0,
1953 nonprintable: 0,
1954 };
1955 let mut i = 0;
1956 while i < buf.len() {
1957 let c = buf[i];
1958 if c == b'\r' {
1959 if buf.get(i + 1) == Some(&b'\n') {
1960 stats.crlf += 1;
1961 i += 1;
1962 } else {
1963 stats.lonecr += 1;
1964 }
1965 i += 1;
1966 continue;
1967 }
1968 if c == b'\n' {
1969 stats.lonelf += 1;
1970 i += 1;
1971 continue;
1972 }
1973 if c == 127 {
1974 stats.nonprintable += 1;
1976 } else if c < 32 {
1977 match c {
1978 0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
1980 0 => {
1981 stats.nul += 1;
1982 stats.nonprintable += 1;
1983 }
1984 _ => stats.nonprintable += 1,
1985 }
1986 } else {
1987 stats.printable += 1;
1988 }
1989 i += 1;
1990 }
1991 if buf.last() == Some(&0x1a) {
1993 stats.nonprintable = stats.nonprintable.saturating_sub(1);
1994 }
1995 stats
1996}
1997
1998pub(crate) fn has_crlf_in_index(odb: &FileObjectDatabase, oid: &ObjectId) -> bool {
2004 let Ok(object) = odb.read_object(oid) else {
2005 return false;
2006 };
2007 if object.object_type != ObjectType::Blob {
2008 return false;
2009 }
2010 let data = &object.body;
2011 if !data.contains(&b'\r') {
2013 return false;
2014 }
2015 let stats = gather_convert_stats(data);
2016 !convert_is_binary(&stats) && stats.crlf > 0
2017}
2018
2019pub(crate) fn convert_is_binary(stats: &ConvertStats) -> bool {
2022 if stats.lonecr > 0 {
2023 return true;
2024 }
2025 if stats.nul > 0 {
2026 return true;
2027 }
2028 (stats.printable >> 7) < stats.nonprintable
2029}
2030
2031#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2036pub enum ConvFlags {
2037 Off,
2039 Warn,
2042 Die,
2044}
2045
2046impl ConvFlags {
2047 pub fn from_config(config: &GitConfig) -> Self {
2053 match config.get("core", None, "safecrlf") {
2054 Some(value) if value.eq_ignore_ascii_case("warn") => ConvFlags::Warn,
2055 Some(_) => {
2056 if config.get_bool("core", None, "safecrlf") == Some(true) {
2057 ConvFlags::Die
2058 } else {
2059 ConvFlags::Off
2060 }
2061 }
2062 None => ConvFlags::Warn,
2063 }
2064 }
2065}
2066
2067pub(crate) fn check_safe_crlf(
2077 old_stats: &ConvertStats,
2078 new_stats: &ConvertStats,
2079 flags: ConvFlags,
2080 path: &[u8],
2081) -> Result<()> {
2082 if flags == ConvFlags::Off {
2083 return Ok(());
2084 }
2085 let display = String::from_utf8_lossy(path);
2086 if old_stats.crlf > 0 && new_stats.crlf == 0 {
2087 match flags {
2089 ConvFlags::Die => {
2090 eprintln!("fatal: CRLF would be replaced by LF in {display}");
2091 return Err(GitError::Exit(128));
2092 }
2093 ConvFlags::Warn => {
2094 eprintln!(
2095 "warning: in the working copy of '{display}', CRLF will be replaced by LF the next time Git touches it"
2096 );
2097 }
2098 ConvFlags::Off => unreachable!("handled above"),
2099 }
2100 } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
2101 match flags {
2103 ConvFlags::Die => {
2104 eprintln!("fatal: LF would be replaced by CRLF in {display}");
2105 return Err(GitError::Exit(128));
2106 }
2107 ConvFlags::Warn => {
2108 eprintln!(
2109 "warning: in the working copy of '{display}', LF will be replaced by CRLF the next time Git touches it"
2110 );
2111 }
2112 ConvFlags::Off => unreachable!("handled above"),
2113 }
2114 }
2115 Ok(())
2116}
2117
2118pub(crate) fn convert_stats_ascii(content: &[u8]) -> &'static str {
2121 if content.is_empty() {
2122 return "none";
2123 }
2124 let stats = gather_convert_stats(content);
2125 if convert_is_binary(&stats) {
2126 return "-text";
2127 }
2128 match (stats.lonelf > 0, stats.crlf > 0) {
2129 (true, false) => "lf",
2130 (false, true) => "crlf",
2131 (true, true) => "mixed",
2132 (false, false) => "none",
2133 }
2134}
2135
2136pub(crate) fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
2140 fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
2141 checks
2142 .iter()
2143 .find(|check| check.attribute == name)
2144 .and_then(|check| check.state.as_ref())
2145 }
2146
2147 #[derive(Clone, Copy, PartialEq)]
2151 enum Action {
2152 Undefined,
2153 Binary,
2154 Text,
2155 TextInput,
2156 TextCrlf,
2157 Auto,
2158 AutoCrlf,
2159 AutoInput,
2160 }
2161 fn check_crlf(state: Option<&AttributeState>) -> Action {
2162 match state {
2163 Some(AttributeState::Set) => Action::Text,
2164 Some(AttributeState::Unset) => Action::Binary,
2165 Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
2166 Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
2167 _ => Action::Undefined,
2169 }
2170 }
2171
2172 let mut action = check_crlf(state_of(checks, b"text"));
2175 if action == Action::Undefined {
2176 action = check_crlf(state_of(checks, b"crlf"));
2177 }
2178
2179 if action != Action::Binary {
2180 let eol = match state_of(checks, b"eol") {
2182 Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
2183 Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
2184 _ => None,
2185 };
2186 action = match (action, eol) {
2187 (Action::Auto, Some(false)) => Action::AutoInput,
2188 (Action::Auto, Some(true)) => Action::AutoCrlf,
2189 (_, Some(false)) if action != Action::Auto => Action::TextInput,
2190 (_, Some(true)) if action != Action::Auto => Action::TextCrlf,
2191 _ => action,
2192 };
2193 }
2194
2195 match action {
2196 Action::Undefined => "",
2197 Action::Binary => "-text",
2198 Action::Text => "text",
2199 Action::TextInput => "text eol=lf",
2200 Action::TextCrlf => "text eol=crlf",
2201 Action::Auto => "text=auto",
2202 Action::AutoCrlf => "text=auto eol=crlf",
2203 Action::AutoInput => "text=auto eol=lf",
2204 }
2205}
2206
2207pub struct EolInfo {
2209 pub index: &'static str,
2211 pub worktree: &'static str,
2213 pub attr: &'static str,
2215}
2216
2217impl EolInfo {
2218 pub fn format_prefix(&self) -> String {
2220 format!(
2221 "i/{:<5} w/{:<5} attr/{:<17}\t",
2222 self.index, self.worktree, self.attr
2223 )
2224 }
2225}
2226
2227pub fn eol_info_for_path(
2235 worktree_root: impl AsRef<Path>,
2236 path: &[u8],
2237 index_content: Option<&[u8]>,
2238 attr_checks: &[AttributeCheck],
2239) -> EolInfo {
2240 let index = index_content.map(convert_stats_ascii).unwrap_or("");
2241
2242 let worktree_root = worktree_root.as_ref();
2243 let worktree = match repo_path_to_os_path(path) {
2244 Ok(rel) => {
2245 let absolute = worktree_root.join(rel);
2246 match fs::symlink_metadata(&absolute) {
2247 Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
2249 Ok(content) => convert_stats_ascii_owned(&content),
2250 Err(_) => "",
2251 },
2252 _ => "",
2253 }
2254 }
2255 Err(_) => "",
2256 };
2257
2258 let attr = convert_attr_ascii(attr_checks);
2259
2260 EolInfo {
2261 index,
2262 worktree,
2263 attr,
2264 }
2265}
2266
2267pub(crate) fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
2270 convert_stats_ascii(content)
2271}
2272
2273pub fn eol_attribute_checks(
2277 worktree_root: impl AsRef<Path>,
2278 path: &[u8],
2279) -> Result<Vec<AttributeCheck>> {
2280 filter_attribute_checks(worktree_root.as_ref(), path)
2281}