use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use encoding_rs::UTF_8;
use crate::config::ConfigSet;
use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AutoCrlf {
True,
Input,
False,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CoreEol {
Lf,
Crlf,
Native,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SafeCrlf {
True,
Warn,
False,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TextAttr {
Set,
Auto,
Unset,
Unspecified,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EolAttr {
Lf,
Crlf,
Unspecified,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum CrlfLegacyAttr {
#[default]
Unspecified,
Unset,
Input,
Crlf,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MergeAttr {
Unspecified,
Unset,
Driver(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DiffAttr {
Unspecified,
Unset,
Driver(String),
}
#[derive(Debug, Clone)]
pub struct FileAttrs {
pub text: TextAttr,
pub eol: EolAttr,
pub diff_attr: DiffAttr,
pub export_ignore: bool,
pub export_subst: bool,
pub filter_clean: Option<String>,
pub filter_smudge: Option<String>,
pub filter_process: Option<String>,
pub filter_driver_name: Option<String>,
pub filter_smudge_required: bool,
pub filter_clean_required: bool,
pub ident: bool,
pub merge: MergeAttr,
pub conflict_marker_size: Option<String>,
pub working_tree_encoding: Option<String>,
pub crlf_legacy: CrlfLegacyAttr,
pub whitespace: Option<String>,
}
impl Default for FileAttrs {
fn default() -> Self {
FileAttrs {
text: TextAttr::Unspecified,
eol: EolAttr::Unspecified,
diff_attr: DiffAttr::Unspecified,
export_ignore: false,
export_subst: false,
filter_clean: None,
filter_smudge: None,
filter_process: None,
filter_driver_name: None,
filter_smudge_required: false,
filter_clean_required: false,
ident: false,
merge: MergeAttr::Unspecified,
conflict_marker_size: None,
working_tree_encoding: None,
crlf_legacy: CrlfLegacyAttr::Unspecified,
whitespace: None,
}
}
}
#[derive(Debug, Clone)]
pub struct ConversionConfig {
pub autocrlf: AutoCrlf,
pub eol: CoreEol,
pub safecrlf: SafeCrlf,
}
impl ConversionConfig {
pub fn from_config(config: &ConfigSet) -> Self {
let autocrlf = match config.get("core.autocrlf") {
Some(v) => match v.to_lowercase().as_str() {
"true" | "yes" | "on" | "1" => AutoCrlf::True,
"input" => AutoCrlf::Input,
_ => AutoCrlf::False,
},
None => AutoCrlf::False,
};
let eol = match config.get("core.eol") {
Some(v) => match v.to_lowercase().as_str() {
"crlf" => CoreEol::Crlf,
"lf" => CoreEol::Lf,
"native" => CoreEol::Native,
_ => CoreEol::Native,
},
None => CoreEol::Native,
};
let safecrlf = match config.get("core.safecrlf") {
Some(v) => match v.to_lowercase().as_str() {
"true" | "yes" | "on" | "1" => SafeCrlf::True,
"warn" => SafeCrlf::Warn,
_ => SafeCrlf::False,
},
None => SafeCrlf::Warn,
};
ConversionConfig {
autocrlf,
eol,
safecrlf,
}
}
}
#[derive(Debug, Clone)]
pub struct AttrRule {
pattern: String,
must_be_dir: bool,
basename_only: bool,
attrs: Vec<(String, String)>, }
impl AttrRule {
pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
self.attrs.iter().filter_map(|(name, value)| {
if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
Some(value.as_str())
} else {
None
}
})
}
}
pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
let mut rules = Vec::new();
let root_attrs = work_tree.join(".gitattributes");
if let Ok(content) = std::fs::read_to_string(&root_attrs) {
parse_gitattributes(&content, &mut rules);
}
let info_attrs = work_tree.join(".git/info/attributes");
if let Ok(content) = std::fs::read_to_string(&info_attrs) {
parse_gitattributes(&content, &mut rules);
}
rules
}
#[must_use]
pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
let mut rules = Vec::new();
parse_gitattributes(content, &mut rules);
rules
}
pub fn load_gitattributes_from_index(
index: &crate::index::Index,
odb: &crate::odb::Odb,
) -> Vec<AttrRule> {
let mut rules = Vec::new();
if let Some(entry) = index.get(b".gitattributes", 0) {
if let Ok(obj) = odb.read(&entry.oid) {
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, &mut rules);
}
}
}
rules
}
pub fn load_gitattributes_for_checkout(
work_tree: &Path,
rel_path: &str,
index: &crate::index::Index,
odb: &crate::odb::Odb,
) -> Vec<AttrRule> {
let mut rules = load_gitattributes(work_tree);
if !work_tree.join(".gitattributes").exists() {
if let Some(entry) = index.get(b".gitattributes", 0) {
if let Ok(obj) = odb.read(&entry.oid) {
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, &mut rules);
}
}
}
}
let path = Path::new(rel_path);
if let Some(parent) = path.parent() {
let mut accum = PathBuf::new();
for comp in parent.components() {
accum.push(comp);
let ga_rel = accum.join(".gitattributes");
let wt_ga = work_tree.join(&ga_rel);
if let Ok(content) = std::fs::read_to_string(&wt_ga) {
parse_gitattributes(&content, &mut rules);
} else {
let key = path_to_index_bytes(&ga_rel);
if let Some(entry) = index.get(&key, 0) {
if let Ok(obj) = odb.read(&entry.oid) {
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, &mut rules);
}
}
}
}
}
}
rules
}
fn path_to_index_bytes(path: &Path) -> Vec<u8> {
use std::os::unix::ffi::OsStrExt;
path.as_os_str().as_bytes().to_vec()
}
fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
for line in content.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let mut parts = line.split_whitespace();
let raw_pattern = match parts.next() {
Some(p) => p,
None => continue,
};
let mut pat = raw_pattern.to_owned();
let mut must_be_dir = false;
if pat.ends_with('/') && pat.len() > 1 {
pat.pop();
must_be_dir = true;
}
let basename_only = !pat.contains('/');
let mut attrs = Vec::new();
for part in parts {
if part == "binary" {
attrs.push(("text".to_owned(), "unset".to_owned()));
attrs.push(("diff".to_owned(), "unset".to_owned()));
} else if let Some(rest) = part.strip_prefix('-') {
attrs.push((rest.to_owned(), "unset".to_owned()));
} else if let Some((key, val)) = part.split_once('=') {
attrs.push((key.to_owned(), val.to_owned()));
} else {
attrs.push((part.to_owned(), "set".to_owned()));
}
}
if !attrs.is_empty() {
rules.push(AttrRule {
pattern: pat,
must_be_dir,
basename_only,
attrs,
});
}
}
}
fn config_bool_truthy(value: &str) -> bool {
matches!(
value.trim().to_ascii_lowercase().as_str(),
"true" | "yes" | "on" | "1"
)
}
pub fn get_file_attrs(
rules: &[AttrRule],
rel_path: &str,
is_dir: bool,
config: &ConfigSet,
) -> FileAttrs {
let mut fa = FileAttrs::default();
for rule in rules {
if attr_rule_matches(rule, rel_path, is_dir) {
for (name, value) in &rule.attrs {
match name.as_str() {
"text" => {
fa.text = match value.as_str() {
"set" => TextAttr::Set,
"unset" => TextAttr::Unset,
"auto" => TextAttr::Auto,
_ => TextAttr::Unspecified,
};
}
"eol" => {
fa.eol = match value.as_str() {
"lf" => EolAttr::Lf,
"crlf" => EolAttr::Crlf,
_ => EolAttr::Unspecified,
};
}
"filter" => {
if value == "unset" {
fa.filter_clean = None;
fa.filter_smudge = None;
fa.filter_process = None;
fa.filter_driver_name = None;
fa.filter_smudge_required = false;
fa.filter_clean_required = false;
} else {
let clean_key = format!("filter.{value}.clean");
let smudge_key = format!("filter.{value}.smudge");
let process_key = format!("filter.{value}.process");
let req_key = format!("filter.{value}.required");
fa.filter_driver_name = Some(value.clone());
fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
if fa.filter_process.is_some() {
fa.filter_clean = None;
fa.filter_smudge = None;
} else {
fa.filter_clean = config.get(&clean_key);
fa.filter_smudge = config.get(&smudge_key);
}
let required =
config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
fa.filter_smudge_required = required;
fa.filter_clean_required = required;
}
}
"diff" => {
if value == "unset" {
fa.diff_attr = DiffAttr::Unset;
} else if !value.is_empty() && value != "set" {
fa.diff_attr = DiffAttr::Driver(value.clone());
}
}
"ident" => {
fa.ident = value == "set";
}
"export-ignore" => {
fa.export_ignore = value != "unset";
}
"export-subst" => {
fa.export_subst = value != "unset";
}
"merge" => {
fa.merge = match value.as_str() {
"unset" => MergeAttr::Unset,
"set" => MergeAttr::Unspecified,
other => MergeAttr::Driver(other.to_string()),
};
}
"conflict-marker-size" => {
if value == "unset" {
fa.conflict_marker_size = None;
} else {
fa.conflict_marker_size = Some(value.clone());
}
}
"working-tree-encoding" => {
if value != "unset" && !value.is_empty() {
fa.working_tree_encoding = Some(value.clone());
}
}
"crlf" => {
fa.crlf_legacy = match value.as_str() {
"unset" => CrlfLegacyAttr::Unset,
"input" => CrlfLegacyAttr::Input,
"set" => CrlfLegacyAttr::Crlf,
_ => CrlfLegacyAttr::Unspecified,
};
}
"whitespace" => {
if value == "unset" {
fa.whitespace = Some("unset".to_owned());
} else if !value.is_empty() {
fa.whitespace = Some(value.clone());
}
}
_ => {}
}
}
}
}
fa
}
#[must_use]
pub fn path_has_gitattribute(
rules: &[AttrRule],
path: &str,
is_dir: bool,
attr_name: &str,
) -> bool {
let mut last: Option<&str> = None;
for rule in rules {
if attr_rule_matches(rule, path, is_dir) {
for (name, value) in &rule.attrs {
if name == attr_name {
last = Some(value.as_str());
}
}
}
}
match last {
None | Some("unset") => false,
Some(_) => true,
}
}
#[must_use]
pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
let path_is_dir = is_dir || rel_path.ends_with('/');
if rule.must_be_dir && !path_is_dir {
return false;
}
let path_for_glob = rel_path.trim_end_matches('/');
if rule.basename_only {
let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
glob_matches(rule.pattern.as_str(), basename)
} else {
glob_matches(rule.pattern.as_str(), path_for_glob)
}
}
fn glob_matches(pattern: &str, text: &str) -> bool {
glob_match_bytes(pattern.as_bytes(), text.as_bytes())
}
fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
match (pat.first(), text.first()) {
(None, None) => true,
(Some(&b'*'), _) => {
let pat_rest = pat
.iter()
.position(|&b| b != b'*')
.map_or(&pat[pat.len()..], |i| &pat[i..]);
if pat_rest.is_empty() {
return true;
}
for i in 0..=text.len() {
if glob_match_bytes(pat_rest, &text[i..]) {
return true;
}
}
false
}
(Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
(Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
_ => false,
}
}
pub fn is_binary(data: &[u8]) -> bool {
let check_len = data.len().min(8000);
data[..check_len].contains(&0)
}
const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
const CONVERT_STAT_BITS_BIN: u32 = 0x4;
#[derive(Default, Clone)]
struct TextStat {
nul: u32,
lonecr: u32,
lonelf: u32,
crlf: u32,
printable: u32,
nonprintable: u32,
}
fn gather_text_stat(data: &[u8]) -> TextStat {
let mut s = TextStat::default();
let mut i = 0usize;
while i < data.len() {
let c = data[i];
if c == b'\r' {
if i + 1 < data.len() && data[i + 1] == b'\n' {
s.crlf += 1;
i += 2;
} else {
s.lonecr += 1;
i += 1;
}
continue;
}
if c == b'\n' {
s.lonelf += 1;
i += 1;
continue;
}
if c == 127 {
s.nonprintable += 1;
} else if c < 32 {
match c {
b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
0 => {
s.nul += 1;
s.nonprintable += 1;
}
_ => s.nonprintable += 1,
}
} else {
s.printable += 1;
}
i += 1;
}
s
}
fn convert_is_binary(stats: &TextStat) -> bool {
stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
}
fn git_text_stat(data: &[u8]) -> TextStat {
let mut stats = gather_text_stat(data);
if !data.is_empty() && data[data.len() - 1] == 0x1a {
stats.nonprintable = stats.nonprintable.saturating_sub(1);
}
stats
}
fn will_convert_lf_to_crlf_from_stats(
stats: &TextStat,
conv: &ConversionConfig,
attrs: &FileAttrs,
) -> bool {
let has_lone_lf = stats.lonelf > 0;
let is_bin = convert_is_binary(stats);
match attrs.crlf_legacy {
CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
CrlfLegacyAttr::Crlf => {
if attrs.text == TextAttr::Unset {
return false;
}
return has_lone_lf;
}
CrlfLegacyAttr::Unspecified => {}
}
if attrs.text == TextAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified {
if attrs.text == TextAttr::Auto && is_bin {
return false;
}
if attrs.eol != EolAttr::Crlf {
return false;
}
if attrs.text == TextAttr::Auto {
return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
}
return has_lone_lf;
}
if attrs.text == TextAttr::Set {
if !output_eol_is_crlf(conv) {
return false;
}
return has_lone_lf;
}
if attrs.text == TextAttr::Auto {
if is_bin || !output_eol_is_crlf(conv) {
return false;
}
return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
}
match conv.autocrlf {
AutoCrlf::True => {
if is_bin {
return false;
}
auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
}
AutoCrlf::Input | AutoCrlf::False => false,
}
}
fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
if stats.lonelf == 0 {
return false;
}
if stats.lonecr > 0 || stats.crlf > 0 {
return false;
}
!convert_is_binary(stats)
}
fn gather_convert_stats(data: &[u8]) -> u32 {
if data.is_empty() {
return 0;
}
let mut stats = gather_text_stat(data);
if !data.is_empty() && data[data.len() - 1] == 0x1a {
stats.nonprintable = stats.nonprintable.saturating_sub(1);
}
let mut ret = 0u32;
if convert_is_binary(&stats) {
ret |= CONVERT_STAT_BITS_BIN;
}
if stats.crlf > 0 {
ret |= CONVERT_STAT_BITS_TXT_CRLF;
}
if stats.lonelf > 0 {
ret |= CONVERT_STAT_BITS_TXT_LF;
}
ret
}
#[must_use]
pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
let convert_stats = gather_convert_stats(data);
if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
return "-text";
}
match convert_stats {
CONVERT_STAT_BITS_TXT_LF => "lf",
CONVERT_STAT_BITS_TXT_CRLF => "crlf",
x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
_ => "none",
}
}
#[must_use]
pub fn convert_attr_ascii_for_ls_files(
rules: &[AttrRule],
rel_path: &str,
config: &ConfigSet,
) -> String {
let fa = get_file_attrs(rules, rel_path, false, config);
let mut action = match fa.text {
TextAttr::Set => 1, TextAttr::Unset => 2, TextAttr::Auto => 5, TextAttr::Unspecified => 0,
};
if action == 0 {
action = match fa.crlf_legacy {
CrlfLegacyAttr::Crlf => 1,
CrlfLegacyAttr::Unset => 2,
CrlfLegacyAttr::Input => 3, CrlfLegacyAttr::Unspecified => 0,
};
}
if action == 2 {
return "-text".to_string();
}
if action == 0 {
if fa.eol == EolAttr::Unspecified {
return String::new();
}
action = 1; }
if fa.eol == EolAttr::Lf {
if action == 5 {
action = 7; } else {
action = 3; }
} else if fa.eol == EolAttr::Crlf {
if action == 5 {
action = 6; } else {
action = 4; }
}
let attr_action = action;
match attr_action {
1 => "text".to_string(),
3 => "text eol=lf".to_string(),
4 => "text eol=crlf".to_string(),
5 => "text=auto".to_string(),
6 => "text=auto eol=crlf".to_string(),
7 => "text=auto eol=lf".to_string(),
_ => String::new(),
}
}
pub fn has_crlf(data: &[u8]) -> bool {
data.windows(2).any(|w| w == b"\r\n")
}
pub fn has_lone_lf(data: &[u8]) -> bool {
for i in 0..data.len() {
if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
return true;
}
}
false
}
fn has_lone_cr(data: &[u8]) -> bool {
for i in 0..data.len() {
if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
return true;
}
}
false
}
fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
if !has_lone_lf(data) {
return false;
}
if has_lone_cr(data) || has_crlf(data) {
return false;
}
if is_binary(data) {
return false;
}
true
}
pub fn is_all_crlf(data: &[u8]) -> bool {
has_crlf(data) && !has_lone_lf(data)
}
pub fn is_all_lf(data: &[u8]) -> bool {
has_lone_lf(data) && !has_crlf(data)
}
#[must_use]
pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
if !data.contains(&b'\r') {
return false;
}
let st = gather_convert_stats(data);
st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
}
#[must_use]
pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
return false;
}
attrs.text == TextAttr::Auto
|| (attrs.text == TextAttr::Unspecified
&& matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
}
#[derive(Debug, Clone, Copy)]
pub struct ConvertToGitOpts<'a> {
pub index_blob: Option<&'a [u8]>,
pub renormalize: bool,
pub check_safecrlf: bool,
}
impl Default for ConvertToGitOpts<'_> {
fn default() -> Self {
Self {
index_blob: None,
renormalize: false,
check_safecrlf: true,
}
}
}
fn utf16_scalar_iter_to_le_bytes(chars: impl Iterator<Item = u16>) -> Vec<u8> {
let mut out = Vec::new();
for u in chars {
out.extend_from_slice(&u.to_le_bytes());
}
out
}
fn utf16_scalar_iter_to_be_bytes(chars: impl Iterator<Item = u16>) -> Vec<u8> {
let mut out = Vec::new();
for u in chars {
out.extend_from_slice(&u.to_be_bytes());
}
out
}
fn utf32_chars_to_be_bytes(s: &str) -> Vec<u8> {
let mut out = Vec::new();
for ch in s.chars() {
out.extend_from_slice(&(ch as u32).to_be_bytes());
}
out
}
fn utf32_chars_to_le_bytes(s: &str) -> Vec<u8> {
let mut out = Vec::new();
for ch in s.chars() {
out.extend_from_slice(&(ch as u32).to_le_bytes());
}
out
}
fn decode_utf32_body_to_utf8_bytes(
body: &[u8],
rel_path: &str,
big_endian: bool,
) -> Result<Vec<u8>, String> {
if !body.len().is_multiple_of(4) {
return Err(format!(
"invalid UTF-32 length for working tree file '{rel_path}'"
));
}
let mut s = String::new();
for chunk in body.chunks_exact(4) {
let cp = if big_endian {
u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
} else {
u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
};
let Some(ch) = char::from_u32(cp) else {
return Err(format!(
"invalid UTF-32 scalar U+{cp:X} in working tree file '{rel_path}'"
));
};
s.push(ch);
}
Ok(s.into_bytes())
}
fn decode_working_tree_bytes_to_utf8(
src: &[u8],
rel_path: &str,
enc_label: &str,
) -> Result<Vec<u8>, String> {
let label = enc_label.trim();
if label.is_empty() {
return Ok(src.to_vec());
}
let lower = label.replace('_', "-").to_ascii_lowercase();
let (cow, _used_enc, had_errors) = match lower.as_str() {
"utf-16le-bom" => {
let body = if src.len() >= 2 && src.starts_with(&[0xFF, 0xFE]) {
&src[2..]
} else {
src
};
encoding_rs::UTF_16LE.decode(body)
}
"utf-16" => {
if src.len() >= 2 && src.starts_with(&[0xFE, 0xFF]) {
encoding_rs::UTF_16BE.decode(&src[2..])
} else if src.len() >= 2 && src.starts_with(&[0xFF, 0xFE]) {
encoding_rs::UTF_16LE.decode(&src[2..])
} else {
return Err(format!(
"missing byte order mark for UTF-16 working tree file '{rel_path}'"
));
}
}
"utf-16be" => encoding_rs::UTF_16BE.decode(src),
"utf-16le" => encoding_rs::UTF_16LE.decode(src),
"utf-32" => {
let (body, big_endian) = if src.len() >= 4 && src.starts_with(&[0, 0, 0xFE, 0xFF]) {
(&src[4..], true)
} else if src.len() >= 4 && src.starts_with(&[0xFF, 0xFE, 0, 0]) {
(&src[4..], false)
} else {
return Err(format!(
"missing byte order mark for UTF-32 working tree file '{rel_path}'"
));
};
return decode_utf32_body_to_utf8_bytes(body, rel_path, big_endian);
}
"utf-32be" => return decode_utf32_body_to_utf8_bytes(src, rel_path, true),
"utf-32le" => return decode_utf32_body_to_utf8_bytes(src, rel_path, false),
_ => {
let Some(enc) = crate::commit_encoding::resolve(label) else {
return Err(format!(
"unknown working-tree-encoding '{label}' for '{rel_path}'"
));
};
if enc == UTF_8 {
return Ok(src.to_vec());
}
enc.decode(src)
}
};
if had_errors {
return Err(format!(
"failed to decode '{rel_path}' from working-tree-encoding {label}"
));
}
Ok(cow.into_owned().into_bytes())
}
fn encode_utf8_blob_to_working_tree_bytes(
src: &[u8],
rel_path: &str,
enc_label: &str,
) -> Result<Vec<u8>, String> {
let label = enc_label.trim();
if label.is_empty() {
return Ok(src.to_vec());
}
let s = std::str::from_utf8(src).map_err(|_| {
format!("failed to encode '{rel_path}' from UTF-8: blob is not valid UTF-8")
})?;
let lower = label.replace('_', "-").to_ascii_lowercase();
match lower.as_str() {
"utf-16le-bom" => {
let mut out = vec![0xFF_u8, 0xFE_u8];
out.extend(utf16_scalar_iter_to_le_bytes(s.encode_utf16()));
Ok(out)
}
"utf-16" => {
let mut out = vec![0xFF_u8, 0xFE_u8];
out.extend(utf16_scalar_iter_to_le_bytes(s.encode_utf16()));
Ok(out)
}
"utf-16be" => {
let mut out = vec![0xFE_u8, 0xFF_u8];
out.extend(utf16_scalar_iter_to_be_bytes(s.encode_utf16()));
Ok(out)
}
"utf-16le" => Ok(utf16_scalar_iter_to_le_bytes(s.encode_utf16())),
"utf-32" | "utf-32be" => {
let mut out = vec![0_u8, 0_u8, 0xFE_u8, 0xFF_u8];
out.extend(utf32_chars_to_be_bytes(s));
Ok(out)
}
"utf-32le" => {
let mut out = vec![0xFF_u8, 0xFE_u8, 0_u8, 0_u8];
out.extend(utf32_chars_to_le_bytes(s));
Ok(out)
}
_ => {
let Some(enc) = crate::commit_encoding::resolve(label) else {
return Err(format!(
"unknown working-tree-encoding '{label}' for '{rel_path}'"
));
};
if enc == UTF_8 {
return Ok(src.to_vec());
}
let (cow, _, had_errors) = enc.encode(s);
if had_errors {
return Err(format!(
"failed to encode '{rel_path}' from UTF-8 to {label}"
));
}
Ok(cow.into_owned())
}
}
}
pub fn convert_to_git(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
) -> Result<Vec<u8>, String> {
convert_to_git_with_opts(
data,
rel_path,
conv,
file_attrs,
ConvertToGitOpts::default(),
)
}
pub fn convert_to_git_with_opts(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
opts: ConvertToGitOpts<'_>,
) -> Result<Vec<u8>, String> {
let mut buf = data.to_vec();
if let Some(ref proc_cmd) = file_attrs.filter_process {
let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
buf = apply_process_clean(proc_cmd, rel_path, &buf).map_err(|_e| {
if file_attrs.filter_clean_required {
format!("fatal: {rel_path}: clean filter '{name}' failed")
} else {
format!("clean filter failed: {_e}")
}
})?;
} else {
match file_attrs.filter_clean.as_ref() {
Some(clean_cmd) => {
buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
if file_attrs.filter_clean_required {
format!("fatal: {rel_path}: clean filter '{name}' failed")
} else {
format!("clean filter failed: {e}")
}
})?;
}
None => {
if file_attrs.filter_clean_required {
let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
}
}
}
}
if let Some(ref enc) = file_attrs.working_tree_encoding {
buf = decode_working_tree_bytes_to_utf8(&buf, rel_path, enc)?;
}
let would_convert = would_convert_on_input(conv, file_attrs, &buf);
let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
if convert_crlf_into_lf
&& clean_uses_autocrlf_index_guard(file_attrs, conv)
&& !opts.renormalize
&& opts.index_blob.is_some_and(has_crlf_in_index_blob)
{
convert_crlf_into_lf = false;
}
if would_convert && opts.check_safecrlf {
check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
}
if convert_crlf_into_lf {
buf = crlf_to_lf(&buf);
}
Ok(buf)
}
fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
match attrs.crlf_legacy {
CrlfLegacyAttr::Unset => return false,
CrlfLegacyAttr::Input => {
if is_binary(data) {
return false;
}
return true;
}
CrlfLegacyAttr::Crlf => {
if attrs.text == TextAttr::Unset {
return false;
}
if is_binary(data) {
return false;
}
return true;
}
CrlfLegacyAttr::Unspecified => {}
}
if attrs.text == TextAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified {
if attrs.text == TextAttr::Auto && is_binary(data) {
return false;
}
return true;
}
if attrs.text == TextAttr::Set {
return true;
}
if attrs.text == TextAttr::Auto {
if is_binary(data) {
return false;
}
return true;
}
match conv.autocrlf {
AutoCrlf::True | AutoCrlf::Input => {
if is_binary(data) {
return false;
}
true
}
AutoCrlf::False => false,
}
}
fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
eprintln!(
"warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
);
}
fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
eprintln!(
"warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
);
}
fn check_safecrlf_roundtrip(
conv: &ConversionConfig,
file_attrs: &FileAttrs,
data: &[u8],
rel_path: &str,
convert_crlf_into_lf: bool,
) -> Result<(), String> {
if conv.safecrlf == SafeCrlf::False {
return Ok(());
}
let old_stats = git_text_stat(data);
let mut new_stats = old_stats.clone();
if convert_crlf_into_lf && new_stats.crlf > 0 {
new_stats.lonelf += new_stats.crlf;
new_stats.crlf = 0;
}
if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
new_stats.crlf += new_stats.lonelf;
new_stats.lonelf = 0;
}
if old_stats.crlf > 0 && new_stats.crlf == 0 {
let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
if conv.safecrlf == SafeCrlf::True {
return Err(msg);
}
eprint_safecrlf_warn_crlf_to_lf(rel_path);
} else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
if conv.safecrlf == SafeCrlf::True {
return Err(msg);
}
eprint_safecrlf_warn_lf_to_crlf(rel_path);
}
Ok(())
}
pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
out.push(b'\n');
i += 2;
} else {
out.push(data[i]);
i += 1;
}
}
out
}
pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(data.len() + data.len() / 10);
let mut i = 0;
while i < data.len() {
if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
out.push(b'\r');
out.push(b'\n');
} else {
out.push(data[i]);
}
i += 1;
}
out
}
pub fn convert_to_worktree(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
oid_hex: Option<&str>,
smudge_meta: Option<&FilterSmudgeMeta>,
delayed_checkout: Option<&mut crate::filter_process::DelayedProcessCheckout>,
) -> Result<Option<Vec<u8>>, String> {
let mut buf = data.to_vec();
if file_attrs.ident {
if let Some(oid) = oid_hex {
buf = expand_ident(&buf, oid);
}
}
let can_delay_smudge = delayed_checkout.is_some()
&& file_attrs.working_tree_encoding.is_none()
&& !file_attrs.ident
&& file_attrs
.filter_process
.as_deref()
.is_some_and(|c| !c.is_empty())
&& !should_convert_to_crlf(conv, file_attrs, &buf)
&& file_attrs
.filter_process
.as_deref()
.is_some_and(crate::filter_process::process_filter_supports_delay);
let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
if should_convert {
buf = lf_to_crlf(&buf);
}
if let Some(ref enc) = file_attrs.working_tree_encoding {
buf = encode_utf8_blob_to_working_tree_bytes(&buf, rel_path, enc)?;
}
let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
if let Some(ref proc_cmd) = file_attrs.filter_process {
let smudge_out =
apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta, can_delay_smudge).map_err(
|_e| {
if file_attrs.filter_smudge_required {
format!("fatal: {rel_path}: smudge filter {driver} failed")
} else {
_e
}
},
)?;
let Some(out) = smudge_out else {
let Some(q) = delayed_checkout else {
return Err(format!(
"internal error: delayed smudge without checkout queue for {rel_path}"
));
};
q.push_delayed(
proc_cmd.clone(),
rel_path.to_string(),
smudge_meta.cloned().unwrap_or_default(),
);
return Ok(None);
};
buf = out;
} else {
match file_attrs.filter_smudge.as_ref() {
Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
Ok(filtered) => buf = filtered,
Err(_e) => {
if file_attrs.filter_smudge_required {
return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
}
}
},
None => {
if file_attrs.filter_smudge_required {
return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
}
}
}
}
Ok(Some(buf))
}
#[must_use]
pub fn convert_to_worktree_eager(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
oid_hex: Option<&str>,
smudge_meta: Option<&FilterSmudgeMeta>,
) -> Result<Vec<u8>, String> {
match convert_to_worktree(data, rel_path, conv, file_attrs, oid_hex, smudge_meta, None)? {
Some(v) => Ok(v),
None => Err(format!(
"internal error: unexpected delayed smudge for {rel_path}"
)),
}
}
#[must_use]
pub fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
match attrs.crlf_legacy {
CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
CrlfLegacyAttr::Crlf => {
if attrs.text == TextAttr::Unset {
return false;
}
return true;
}
CrlfLegacyAttr::Unspecified => {}
}
if attrs.text == TextAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified {
if attrs.text == TextAttr::Auto && is_binary(data) {
return false;
}
if attrs.eol != EolAttr::Crlf {
return false;
}
if attrs.text == TextAttr::Auto {
return auto_crlf_should_smudge_lf_to_crlf(data);
}
return true;
}
if attrs.text == TextAttr::Set {
return output_eol_is_crlf(conv);
}
if attrs.text == TextAttr::Auto {
if is_binary(data) {
return false;
}
if !output_eol_is_crlf(conv) {
return false;
}
return auto_crlf_should_smudge_lf_to_crlf(data);
}
match conv.autocrlf {
AutoCrlf::True => {
if is_binary(data) {
return false;
}
auto_crlf_should_smudge_lf_to_crlf(data)
}
AutoCrlf::Input | AutoCrlf::False => false,
}
}
fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
if conv.autocrlf == AutoCrlf::Input {
return false;
}
if conv.autocrlf == AutoCrlf::True {
return true;
}
match conv.eol {
CoreEol::Crlf => true,
CoreEol::Lf => false,
CoreEol::Native => {
cfg!(windows)
}
}
}
fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
if !count_ident_regions(data) {
return data.to_vec();
}
let replacement = format!("$Id: {oid} $");
let mut out = Vec::with_capacity(data.len() + 60);
let mut i = 0;
while i < data.len() {
if data[i] != b'$' {
out.push(data[i]);
i += 1;
continue;
}
if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
out.push(data[i]);
i += 1;
continue;
}
let after_id = i + 3;
let ch = data.get(after_id).copied();
match ch {
Some(b'$') => {
out.extend_from_slice(replacement.as_bytes());
i = after_id + 1;
}
Some(b':') => {
let rest = &data[after_id + 1..];
let line_end = rest
.iter()
.position(|&b| b == b'\n' || b == b'\r')
.unwrap_or(rest.len());
let line = &rest[..line_end];
let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
out.push(data[i]);
i += 1;
continue;
};
if line[..dollar_rel].contains(&b'\n') {
out.push(data[i]);
i += 1;
continue;
}
let payload = &line[..dollar_rel];
let foreign = payload.len() > 1
&& payload[1..]
.iter()
.position(|&b| b == b' ')
.is_some_and(|rel| {
let pos = 1 + rel;
pos < payload.len().saturating_sub(1)
});
if foreign {
out.push(data[i]);
i += 1;
continue;
}
out.extend_from_slice(replacement.as_bytes());
i = after_id + 1 + dollar_rel + 1;
}
_ => {
out.push(data[i]);
i += 1;
}
}
}
out
}
fn count_ident_regions(data: &[u8]) -> bool {
let mut i = 0usize;
while i < data.len() {
if data[i] != b'$' {
i += 1;
continue;
}
if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
i += 1;
continue;
}
let after = i + 3;
match data.get(after).copied() {
Some(b'$') => return true,
Some(b':') => {
let mut j = after + 1;
let mut found = false;
while j < data.len() {
match data[j] {
b'$' => {
found = true;
break;
}
b'\n' | b'\r' => break,
_ => j += 1,
}
}
if found {
return true;
}
i += 1;
}
_ => i += 1,
}
}
false
}
pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
let rest = &data[i + 4..];
let line_end = rest
.iter()
.position(|&b| b == b'\n' || b == b'\r')
.unwrap_or(rest.len());
let line = &rest[..line_end];
if let Some(end) = line.iter().position(|&b| b == b'$') {
out.extend_from_slice(b"$Id$");
i += 4 + end + 1;
continue;
}
}
out.push(data[i]);
i += 1;
}
out
}
fn sq_quote_buf(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('\'');
for ch in s.chars() {
if ch == '\'' {
out.push_str("'\\''");
} else {
out.push(ch);
}
}
out.push('\'');
out
}
fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
let mut chars = cmd.chars().peekable();
while let Some(c) = chars.next() {
if c == '%' {
match chars.peek() {
Some('%') => {
chars.next();
out.push('%');
}
Some('f') => {
chars.next();
out.push_str(&sq_quote_buf(rel_path));
}
_ => out.push('%'),
}
} else {
out.push(c);
}
}
out
}
fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
let expanded = expand_filter_command(cmd, rel_path);
let mut child = Command::new("sh")
.arg("-c")
.arg(&expanded)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()?;
use std::io::{ErrorKind, Write};
if let Some(ref mut stdin) = child.stdin {
if let Err(e) = stdin.write_all(data) {
if e.kind() != ErrorKind::BrokenPipe {
return Err(e);
}
}
}
drop(child.stdin.take());
let output = child.wait_with_output()?;
if !output.status.success() {
return Err(std::io::Error::other(format!(
"filter command exited with status {}",
output.status
)));
}
Ok(output.stdout)
}
pub type GitAttributes = Vec<AttrRule>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_crlf_to_lf() {
assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
}
#[test]
fn test_lf_to_crlf() {
assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
}
#[test]
fn test_has_crlf() {
assert!(has_crlf(b"hello\r\nworld"));
assert!(!has_crlf(b"hello\nworld"));
}
#[test]
fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
let mut blob = Vec::new();
for part in [
b"Oh\n".as_slice(),
b"here\n",
b"is\n",
b"CRLF\r\n",
b"in\n",
b"text\n",
] {
blob.extend_from_slice(part);
}
let conv = ConversionConfig {
autocrlf: AutoCrlf::True,
eol: CoreEol::Lf,
safecrlf: SafeCrlf::False,
};
let attrs = FileAttrs::default();
let out = convert_to_worktree_eager(&blob, "mixed", &conv, &attrs, None, None).unwrap();
assert_eq!(out, blob);
}
#[test]
fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
let blob = b"a\nb\n";
let conv = ConversionConfig {
autocrlf: AutoCrlf::True,
eol: CoreEol::Lf,
safecrlf: SafeCrlf::False,
};
let attrs = FileAttrs::default();
let out = convert_to_worktree_eager(blob, "x", &conv, &attrs, None, None).unwrap();
assert_eq!(out, b"a\r\nb\r\n");
}
#[test]
fn test_is_binary() {
assert!(is_binary(b"hello\0world"));
assert!(!is_binary(b"hello world"));
}
#[test]
fn attr_dir_only_pattern_does_not_match_same_named_file() {
let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
let rule = &rules[0];
assert!(rule.must_be_dir);
assert!(rule.basename_only);
assert!(!attr_rule_matches(
rule,
"not-ignored-dir/ignored-only-if-dir",
false
));
assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
}
#[test]
fn test_expand_collapse_ident() {
let data = b"$Id$";
let expanded = expand_ident(data, "abc123");
assert_eq!(expanded, b"$Id: abc123 $");
let collapsed = collapse_ident(&expanded);
assert_eq!(collapsed, b"$Id$");
}
#[test]
fn expand_ident_does_not_span_lines_for_partial_keyword() {
let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
let expanded = expand_ident(data, "newoid");
assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
}
#[test]
fn expand_ident_preserves_foreign_id_with_internal_spaces() {
let data = b"$Id: Foreign Commit With Spaces $\n";
let expanded = expand_ident(data, "abc");
assert_eq!(expanded, data);
}
#[test]
fn expand_filter_command_percent_f_quotes_path() {
let s = expand_filter_command("sh ./x.sh %f --extra", "name with 'sq'");
assert_eq!(s, "sh ./x.sh 'name with '\\''sq'\\''' --extra");
assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
}
}