use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use encoding_rs::UTF_8;
use crate::config::ConfigSet;
use crate::filter_process::{apply_process_clean, apply_process_smudge, FilterSmudgeMeta};
use crate::objects::{parse_tree, ObjectId, ObjectKind};
use crate::odb::Odb;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AutoCrlf {
True,
Input,
False,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CoreEol {
Lf,
Crlf,
Native,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SafeCrlf {
True,
Warn,
False,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TextAttr {
Set,
Auto,
Unset,
Unspecified,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EolAttr {
Lf,
Crlf,
Unspecified,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum CrlfLegacyAttr {
#[default]
Unspecified,
Unset,
Input,
Crlf,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MergeAttr {
Unspecified,
Unset,
Driver(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DiffAttr {
Unspecified,
Unset,
Set,
Driver(String),
}
#[derive(Debug, Clone)]
pub struct FileAttrs {
pub text: TextAttr,
pub eol: EolAttr,
pub diff_attr: DiffAttr,
pub export_ignore: bool,
pub export_subst: bool,
pub filter_clean: Option<String>,
pub filter_smudge: Option<String>,
pub filter_process: Option<String>,
pub filter_driver_name: Option<String>,
pub filter_smudge_required: bool,
pub filter_clean_required: bool,
pub ident: bool,
pub merge: MergeAttr,
pub conflict_marker_size: Option<String>,
pub working_tree_encoding: Option<String>,
pub crlf_legacy: CrlfLegacyAttr,
pub whitespace: Option<String>,
}
impl Default for FileAttrs {
fn default() -> Self {
FileAttrs {
text: TextAttr::Unspecified,
eol: EolAttr::Unspecified,
diff_attr: DiffAttr::Unspecified,
export_ignore: false,
export_subst: false,
filter_clean: None,
filter_smudge: None,
filter_process: None,
filter_driver_name: None,
filter_smudge_required: false,
filter_clean_required: false,
ident: false,
merge: MergeAttr::Unspecified,
conflict_marker_size: None,
working_tree_encoding: None,
crlf_legacy: CrlfLegacyAttr::Unspecified,
whitespace: None,
}
}
}
#[derive(Debug, Clone)]
pub struct ConversionConfig {
pub autocrlf: AutoCrlf,
pub eol: CoreEol,
pub safecrlf: SafeCrlf,
pub check_roundtrip_encoding: Option<String>,
}
impl ConversionConfig {
pub fn from_config(config: &ConfigSet) -> Self {
let autocrlf = match config.get("core.autocrlf") {
Some(v) => match v.to_lowercase().as_str() {
"true" | "yes" | "on" | "1" => AutoCrlf::True,
"input" => AutoCrlf::Input,
_ => AutoCrlf::False,
},
None => AutoCrlf::False,
};
let eol = match config.get("core.eol") {
Some(v) => match v.to_lowercase().as_str() {
"crlf" => CoreEol::Crlf,
"lf" => CoreEol::Lf,
"native" => CoreEol::Native,
_ => CoreEol::Native,
},
None => CoreEol::Native,
};
let safecrlf = match config.get("core.safecrlf") {
Some(v) => match v.to_lowercase().as_str() {
"true" | "yes" | "on" | "1" => SafeCrlf::True,
"warn" => SafeCrlf::Warn,
_ => SafeCrlf::False,
},
None => SafeCrlf::Warn,
};
let check_roundtrip_encoding = config
.get("core.checkRoundtripEncoding")
.filter(|s| !s.is_empty());
ConversionConfig {
autocrlf,
eol,
safecrlf,
check_roundtrip_encoding,
}
}
}
#[derive(Debug, Clone)]
pub struct AttrRule {
pattern: String,
must_be_dir: bool,
basename_only: bool,
attrs: Vec<(String, String)>, }
impl AttrRule {
pub fn diff_drivers(&self) -> impl Iterator<Item = &str> + '_ {
self.attrs.iter().filter_map(|(name, value)| {
if name == "diff" && !value.is_empty() && value != "unset" && value != "set" {
Some(value.as_str())
} else {
None
}
})
}
}
pub fn load_gitattributes(work_tree: &Path) -> Vec<AttrRule> {
let mut rules = Vec::new();
let root_attrs = work_tree.join(".gitattributes");
if let Ok(content) = std::fs::read_to_string(&root_attrs) {
parse_gitattributes(&content, &mut rules);
}
let info_attrs = work_tree.join(".git/info/attributes");
if let Ok(content) = std::fs::read_to_string(&info_attrs) {
parse_gitattributes(&content, &mut rules);
}
rules
}
#[must_use]
pub fn parse_gitattributes_content(content: &str) -> Vec<AttrRule> {
let mut rules = Vec::new();
parse_gitattributes(content, &mut rules);
rules
}
pub fn load_gitattributes_from_index(
index: &crate::index::Index,
odb: &crate::odb::Odb,
) -> Vec<AttrRule> {
let mut rules = Vec::new();
if let Some(entry) = index.get(b".gitattributes", 0) {
if let Ok(obj) = odb.read(&entry.oid) {
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, &mut rules);
}
}
}
rules
}
pub fn load_gitattributes_for_checkout(
work_tree: &Path,
rel_path: &str,
index: &crate::index::Index,
odb: &crate::odb::Odb,
) -> Vec<AttrRule> {
let mut rules = load_gitattributes(work_tree);
if !work_tree.join(".gitattributes").exists() {
if let Some(entry) = index.get(b".gitattributes", 0) {
if let Ok(obj) = odb.read(&entry.oid) {
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, &mut rules);
}
}
}
}
let path = Path::new(rel_path);
if let Some(parent) = path.parent() {
let mut accum = PathBuf::new();
for comp in parent.components() {
accum.push(comp);
let ga_rel = accum.join(".gitattributes");
let wt_ga = work_tree.join(&ga_rel);
if let Ok(content) = std::fs::read_to_string(&wt_ga) {
parse_gitattributes(&content, &mut rules);
} else {
let key = path_to_index_bytes(&ga_rel);
if let Some(entry) = index.get(&key, 0) {
if let Ok(obj) = odb.read(&entry.oid) {
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, &mut rules);
}
}
}
}
}
}
rules
}
pub fn load_gitattributes_for_tree_path(
odb: &Odb,
tree_oid: &ObjectId,
rel_path: &str,
) -> Vec<AttrRule> {
let mut rules = Vec::new();
load_gitattributes_blob_from_tree(odb, tree_oid, ".gitattributes", &mut rules);
let path = Path::new(rel_path);
if let Some(parent) = path.parent() {
let mut accum = PathBuf::new();
for comp in parent.components() {
accum.push(comp);
let ga_rel = accum.join(".gitattributes");
let ga_rel = ga_rel.to_string_lossy().replace('\\', "/");
load_gitattributes_blob_from_tree(odb, tree_oid, &ga_rel, &mut rules);
}
}
rules
}
fn load_gitattributes_blob_from_tree(
odb: &Odb,
tree_oid: &ObjectId,
ga_path: &str,
rules: &mut Vec<AttrRule>,
) {
let Some(oid) = lookup_tree_path(odb, tree_oid, ga_path) else {
return;
};
let Ok(obj) = odb.read(&oid) else {
return;
};
if obj.kind != ObjectKind::Blob {
return;
}
if let Ok(content) = String::from_utf8(obj.data) {
parse_gitattributes(&content, rules);
}
}
fn lookup_tree_path(odb: &Odb, tree_oid: &ObjectId, rel_path: &str) -> Option<ObjectId> {
let mut current = *tree_oid;
let mut parts = rel_path.split('/').peekable();
while let Some(part) = parts.next() {
let obj = odb.read(¤t).ok()?;
if obj.kind != ObjectKind::Tree {
return None;
}
let entries = parse_tree(&obj.data).ok()?;
let entry = entries
.iter()
.find(|entry| String::from_utf8_lossy(&entry.name) == part)?;
if parts.peek().is_none() {
return Some(entry.oid);
}
if entry.mode != 0o040000 {
return None;
}
current = entry.oid;
}
None
}
fn path_to_index_bytes(path: &Path) -> Vec<u8> {
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
path.as_os_str().as_bytes().to_vec()
}
#[cfg(not(unix))]
{
path.to_string_lossy().as_bytes().to_vec()
}
}
fn parse_gitattributes(content: &str, rules: &mut Vec<AttrRule>) {
for line in content.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let mut parts = line.split_whitespace();
let raw_pattern = match parts.next() {
Some(p) => p,
None => continue,
};
let mut pat = raw_pattern.to_owned();
let mut must_be_dir = false;
if pat.ends_with('/') && pat.len() > 1 {
pat.pop();
must_be_dir = true;
}
let basename_only = !pat.contains('/');
let mut attrs = Vec::new();
for part in parts {
if part == "binary" {
attrs.push(("text".to_owned(), "unset".to_owned()));
attrs.push(("diff".to_owned(), "unset".to_owned()));
} else if let Some(rest) = part.strip_prefix('-') {
attrs.push((rest.to_owned(), "unset".to_owned()));
} else if let Some((key, val)) = part.split_once('=') {
attrs.push((key.to_owned(), val.to_owned()));
} else {
attrs.push((part.to_owned(), "set".to_owned()));
}
}
if !attrs.is_empty() {
rules.push(AttrRule {
pattern: pat,
must_be_dir,
basename_only,
attrs,
});
}
}
}
fn config_bool_truthy(value: &str) -> bool {
matches!(
value.trim().to_ascii_lowercase().as_str(),
"true" | "yes" | "on" | "1"
)
}
pub fn get_file_attrs(
rules: &[AttrRule],
rel_path: &str,
is_dir: bool,
config: &ConfigSet,
) -> FileAttrs {
let mut fa = FileAttrs::default();
for rule in rules {
if attr_rule_matches(rule, rel_path, is_dir) {
for (name, value) in &rule.attrs {
match name.as_str() {
"text" => {
fa.text = match value.as_str() {
"set" => TextAttr::Set,
"unset" => TextAttr::Unset,
"auto" => TextAttr::Auto,
_ => TextAttr::Unspecified,
};
}
"eol" => {
fa.eol = match value.as_str() {
"lf" => EolAttr::Lf,
"crlf" => EolAttr::Crlf,
_ => EolAttr::Unspecified,
};
}
"filter" => {
if value == "unset" {
fa.filter_clean = None;
fa.filter_smudge = None;
fa.filter_process = None;
fa.filter_driver_name = None;
fa.filter_smudge_required = false;
fa.filter_clean_required = false;
} else {
let clean_key = format!("filter.{value}.clean");
let smudge_key = format!("filter.{value}.smudge");
let process_key = format!("filter.{value}.process");
let req_key = format!("filter.{value}.required");
fa.filter_driver_name = Some(value.clone());
fa.filter_process = config.get(&process_key).filter(|s| !s.is_empty());
if fa.filter_process.is_some() {
fa.filter_clean = None;
fa.filter_smudge = None;
} else {
fa.filter_clean = config.get(&clean_key);
fa.filter_smudge = config.get(&smudge_key);
}
let required =
config.get(&req_key).is_some_and(|v| config_bool_truthy(&v));
fa.filter_smudge_required = required;
fa.filter_clean_required = required;
}
}
"diff" => {
if value == "unset" {
fa.diff_attr = DiffAttr::Unset;
} else if value == "set" {
fa.diff_attr = DiffAttr::Set;
} else if !value.is_empty() {
fa.diff_attr = DiffAttr::Driver(value.clone());
}
}
"ident" => {
fa.ident = value == "set";
}
"export-ignore" => {
fa.export_ignore = value != "unset";
}
"export-subst" => {
fa.export_subst = value != "unset";
}
"merge" => {
fa.merge = match value.as_str() {
"unset" => MergeAttr::Unset,
"set" => MergeAttr::Unspecified,
other => MergeAttr::Driver(other.to_string()),
};
}
"conflict-marker-size" => {
if value == "unset" {
fa.conflict_marker_size = None;
} else {
fa.conflict_marker_size = Some(value.clone());
}
}
"working-tree-encoding" => {
if value != "unset" && !value.is_empty() {
fa.working_tree_encoding = Some(value.clone());
}
}
"crlf" => {
fa.crlf_legacy = match value.as_str() {
"unset" => CrlfLegacyAttr::Unset,
"input" => CrlfLegacyAttr::Input,
"set" => CrlfLegacyAttr::Crlf,
_ => CrlfLegacyAttr::Unspecified,
};
}
"whitespace" => {
if value == "unset" {
fa.whitespace = Some("unset".to_owned());
} else if !value.is_empty() {
fa.whitespace = Some(value.clone());
}
}
_ => {}
}
}
}
}
fa
}
#[must_use]
pub fn path_has_gitattribute(
rules: &[AttrRule],
path: &str,
is_dir: bool,
attr_name: &str,
) -> bool {
matches!(
path_gitattribute_value(rules, path, is_dir, attr_name).as_deref(),
Some(value) if value != "unset"
)
}
#[must_use]
pub fn path_gitattribute_value(
rules: &[AttrRule],
path: &str,
is_dir: bool,
attr_name: &str,
) -> Option<String> {
let mut last: Option<&str> = None;
for rule in rules {
if attr_rule_matches(rule, path, is_dir) {
for (name, value) in &rule.attrs {
if name == attr_name {
last = Some(value.as_str());
}
}
}
}
last.map(str::to_string)
}
#[must_use]
pub fn attr_rule_matches(rule: &AttrRule, rel_path: &str, is_dir: bool) -> bool {
let path_is_dir = is_dir || rel_path.ends_with('/');
if rule.must_be_dir && !path_is_dir {
return false;
}
let path_for_glob = rel_path.trim_end_matches('/');
if rule.basename_only {
let basename = path_for_glob.rsplit('/').next().unwrap_or(path_for_glob);
glob_matches(rule.pattern.as_str(), basename)
} else {
glob_matches(rule.pattern.as_str(), path_for_glob)
}
}
fn glob_matches(pattern: &str, text: &str) -> bool {
glob_match_bytes(pattern.as_bytes(), text.as_bytes())
}
fn glob_match_bytes(pat: &[u8], text: &[u8]) -> bool {
match (pat.first(), text.first()) {
(None, None) => true,
(Some(&b'*'), _) => {
let pat_rest = pat
.iter()
.position(|&b| b != b'*')
.map_or(&pat[pat.len()..], |i| &pat[i..]);
if pat_rest.is_empty() {
return true;
}
for i in 0..=text.len() {
if glob_match_bytes(pat_rest, &text[i..]) {
return true;
}
}
false
}
(Some(&b'?'), Some(_)) => glob_match_bytes(&pat[1..], &text[1..]),
(Some(p), Some(t)) if p == t => glob_match_bytes(&pat[1..], &text[1..]),
_ => false,
}
}
pub fn is_binary(data: &[u8]) -> bool {
let check_len = data.len().min(8000);
data[..check_len].contains(&0)
}
const CONVERT_STAT_BITS_TXT_LF: u32 = 0x1;
const CONVERT_STAT_BITS_TXT_CRLF: u32 = 0x2;
const CONVERT_STAT_BITS_BIN: u32 = 0x4;
#[derive(Default, Clone)]
struct TextStat {
nul: u32,
lonecr: u32,
lonelf: u32,
crlf: u32,
printable: u32,
nonprintable: u32,
}
fn gather_text_stat(data: &[u8]) -> TextStat {
let mut s = TextStat::default();
let mut i = 0usize;
while i < data.len() {
let c = data[i];
if c == b'\r' {
if i + 1 < data.len() && data[i + 1] == b'\n' {
s.crlf += 1;
i += 2;
} else {
s.lonecr += 1;
i += 1;
}
continue;
}
if c == b'\n' {
s.lonelf += 1;
i += 1;
continue;
}
if c == 127 {
s.nonprintable += 1;
} else if c < 32 {
match c {
b'\t' | b'\x08' | b'\x1b' | b'\x0c' => s.printable += 1,
0 => {
s.nul += 1;
s.nonprintable += 1;
}
_ => s.nonprintable += 1,
}
} else {
s.printable += 1;
}
i += 1;
}
s
}
fn convert_is_binary(stats: &TextStat) -> bool {
stats.lonecr > 0 || stats.nul > 0 || (stats.printable >> 7) < stats.nonprintable
}
fn git_text_stat(data: &[u8]) -> TextStat {
let mut stats = gather_text_stat(data);
if !data.is_empty() && data[data.len() - 1] == 0x1a {
stats.nonprintable = stats.nonprintable.saturating_sub(1);
}
stats
}
fn will_convert_lf_to_crlf_from_stats(
stats: &TextStat,
conv: &ConversionConfig,
attrs: &FileAttrs,
) -> bool {
let has_lone_lf = stats.lonelf > 0;
let is_bin = convert_is_binary(stats);
match attrs.crlf_legacy {
CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
CrlfLegacyAttr::Crlf => {
if attrs.text == TextAttr::Unset {
return false;
}
return has_lone_lf;
}
CrlfLegacyAttr::Unspecified => {}
}
if attrs.text == TextAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified {
if attrs.text == TextAttr::Auto && is_bin {
return false;
}
if attrs.eol != EolAttr::Crlf {
return false;
}
if attrs.text == TextAttr::Auto {
return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
}
return has_lone_lf;
}
if attrs.text == TextAttr::Set {
if !output_eol_is_crlf(conv) {
return false;
}
return has_lone_lf;
}
if attrs.text == TextAttr::Auto {
if is_bin || !output_eol_is_crlf(conv) {
return false;
}
return auto_crlf_should_smudge_lf_to_crlf_from_stats(stats);
}
match conv.autocrlf {
AutoCrlf::True => {
if is_bin {
return false;
}
auto_crlf_should_smudge_lf_to_crlf_from_stats(stats)
}
AutoCrlf::Input | AutoCrlf::False => false,
}
}
fn auto_crlf_should_smudge_lf_to_crlf_from_stats(stats: &TextStat) -> bool {
if stats.lonelf == 0 {
return false;
}
if stats.lonecr > 0 || stats.crlf > 0 {
return false;
}
!convert_is_binary(stats)
}
fn gather_convert_stats(data: &[u8]) -> u32 {
if data.is_empty() {
return 0;
}
let mut stats = gather_text_stat(data);
if !data.is_empty() && data[data.len() - 1] == 0x1a {
stats.nonprintable = stats.nonprintable.saturating_sub(1);
}
let mut ret = 0u32;
if convert_is_binary(&stats) {
ret |= CONVERT_STAT_BITS_BIN;
}
if stats.crlf > 0 {
ret |= CONVERT_STAT_BITS_TXT_CRLF;
}
if stats.lonelf > 0 {
ret |= CONVERT_STAT_BITS_TXT_LF;
}
ret
}
#[must_use]
pub fn gather_convert_stats_ascii(data: &[u8]) -> &'static str {
let convert_stats = gather_convert_stats(data);
if convert_stats & CONVERT_STAT_BITS_BIN != 0 {
return "-text";
}
match convert_stats {
CONVERT_STAT_BITS_TXT_LF => "lf",
CONVERT_STAT_BITS_TXT_CRLF => "crlf",
x if x == (CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF) => "mixed",
_ => "none",
}
}
#[must_use]
pub fn convert_attr_ascii_for_ls_files(
rules: &[AttrRule],
rel_path: &str,
config: &ConfigSet,
) -> String {
let fa = get_file_attrs(rules, rel_path, false, config);
let mut action = match fa.text {
TextAttr::Set => 1, TextAttr::Unset => 2, TextAttr::Auto => 5, TextAttr::Unspecified => 0,
};
if action == 0 {
action = match fa.crlf_legacy {
CrlfLegacyAttr::Crlf => 1,
CrlfLegacyAttr::Unset => 2,
CrlfLegacyAttr::Input => 3, CrlfLegacyAttr::Unspecified => 0,
};
}
if action == 2 {
return "-text".to_string();
}
if action == 0 {
if fa.eol == EolAttr::Unspecified {
return String::new();
}
action = 1; }
if fa.eol == EolAttr::Lf {
if action == 5 {
action = 7; } else {
action = 3; }
} else if fa.eol == EolAttr::Crlf {
if action == 5 {
action = 6; } else {
action = 4; }
}
let attr_action = action;
match attr_action {
1 => "text".to_string(),
3 => "text eol=lf".to_string(),
4 => "text eol=crlf".to_string(),
5 => "text=auto".to_string(),
6 => "text=auto eol=crlf".to_string(),
7 => "text=auto eol=lf".to_string(),
_ => String::new(),
}
}
pub fn has_crlf(data: &[u8]) -> bool {
data.windows(2).any(|w| w == b"\r\n")
}
pub fn has_lone_lf(data: &[u8]) -> bool {
for i in 0..data.len() {
if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
return true;
}
}
false
}
fn has_lone_cr(data: &[u8]) -> bool {
for i in 0..data.len() {
if data[i] == b'\r' && (i + 1 >= data.len() || data[i + 1] != b'\n') {
return true;
}
}
false
}
fn auto_crlf_should_smudge_lf_to_crlf(data: &[u8]) -> bool {
if !has_lone_lf(data) {
return false;
}
if has_lone_cr(data) || has_crlf(data) {
return false;
}
if is_binary(data) {
return false;
}
true
}
pub fn is_all_crlf(data: &[u8]) -> bool {
has_crlf(data) && !has_lone_lf(data)
}
pub fn is_all_lf(data: &[u8]) -> bool {
has_lone_lf(data) && !has_crlf(data)
}
#[must_use]
pub fn has_crlf_in_index_blob(data: &[u8]) -> bool {
if !data.contains(&b'\r') {
return false;
}
let st = gather_convert_stats(data);
st & CONVERT_STAT_BITS_BIN == 0 && (st & CONVERT_STAT_BITS_TXT_CRLF) != 0
}
#[must_use]
pub fn clean_uses_autocrlf_index_guard(attrs: &FileAttrs, conv: &ConversionConfig) -> bool {
if attrs.text == TextAttr::Unset || attrs.crlf_legacy == CrlfLegacyAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified && attrs.text != TextAttr::Auto {
return false;
}
attrs.text == TextAttr::Auto
|| (attrs.text == TextAttr::Unspecified
&& matches!(conv.autocrlf, AutoCrlf::True | AutoCrlf::Input))
}
#[derive(Debug, Clone, Copy)]
pub struct ConvertToGitOpts<'a> {
pub index_blob: Option<&'a [u8]>,
pub renormalize: bool,
pub check_safecrlf: bool,
}
impl Default for ConvertToGitOpts<'_> {
fn default() -> Self {
Self {
index_blob: None,
renormalize: false,
check_safecrlf: true,
}
}
}
const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
const UTF32_BE_BOM: &[u8] = &[0x00, 0x00, 0xFE, 0xFF];
const UTF32_LE_BOM: &[u8] = &[0xFF, 0xFE, 0x00, 0x00];
fn canonical_utf_label(label: &str) -> Option<String> {
let trimmed = label.trim();
let lower = trimmed.to_ascii_lowercase();
let rest = lower.strip_prefix("utf")?;
let rest = rest.strip_prefix('-').unwrap_or(rest);
match rest {
"8" => Some("utf-8".to_string()),
"16" => Some("utf-16".to_string()),
"16be" => Some("utf-16be".to_string()),
"16le" => Some("utf-16le".to_string()),
"16be-bom" => Some("utf-16be-bom".to_string()),
"16le-bom" => Some("utf-16le-bom".to_string()),
"32" => Some("utf-32".to_string()),
"32be" => Some("utf-32be".to_string()),
"32le" => Some("utf-32le".to_string()),
_ => None,
}
}
fn has_bom_prefix(data: &[u8], bom: &[u8]) -> bool {
data.len() >= bom.len() && &data[..bom.len()] == bom
}
fn has_prohibited_utf_bom(canon: &str, data: &[u8]) -> bool {
match canon {
"utf-16be" | "utf-16le" => {
has_bom_prefix(data, UTF16_BE_BOM) || has_bom_prefix(data, UTF16_LE_BOM)
}
"utf-32be" | "utf-32le" => {
has_bom_prefix(data, UTF32_BE_BOM) || has_bom_prefix(data, UTF32_LE_BOM)
}
_ => false,
}
}
fn is_missing_required_utf_bom(canon: &str, data: &[u8]) -> bool {
match canon {
"utf-16" => !(has_bom_prefix(data, UTF16_BE_BOM) || has_bom_prefix(data, UTF16_LE_BOM)),
"utf-32" => !(has_bom_prefix(data, UTF32_BE_BOM) || has_bom_prefix(data, UTF32_LE_BOM)),
_ => false,
}
}
fn validate_utf_bom(
canon: &str,
label: &str,
rel_path: &str,
data: &[u8],
die_on_error: bool,
) -> Result<(), String> {
if has_prohibited_utf_bom(canon, data) {
let stripped = label
.strip_prefix("utf")
.or_else(|| label.strip_prefix("UTF"));
let utf_num = stripped
.map(|s| s.trim_start_matches('-'))
.and_then(|s| s.get(..s.len().saturating_sub(2)))
.unwrap_or("");
eprintln!(
"The file '{rel_path}' contains a byte order mark (BOM). Please use UTF-{utf_num} as working-tree-encoding."
);
let body = format!("BOM is prohibited in '{rel_path}' if encoded as {label}");
if die_on_error {
return Err(format!("fatal: {body}"));
}
eprintln!("error: {body}");
return Err(body);
}
if is_missing_required_utf_bom(canon, data) {
let utf_num = label
.strip_prefix("utf")
.or_else(|| label.strip_prefix("UTF"))
.map(|s| s.trim_start_matches('-'))
.unwrap_or("");
eprintln!(
"The file '{rel_path}' is missing a byte order mark (BOM). Please use UTF-{utf_num}BE or UTF-{utf_num}LE (depending on the byte order) as working-tree-encoding."
);
let body = format!("BOM is required in '{rel_path}' if encoded as {label}");
if die_on_error {
return Err(format!("fatal: {body}"));
}
eprintln!("error: {body}");
return Err(body);
}
Ok(())
}
fn encoding_needs_roundtrip_check(enc_name: &str, conv: &ConversionConfig) -> bool {
let list = conv
.check_roundtrip_encoding
.as_deref()
.unwrap_or("SHIFT-JIS");
let target = enc_name.to_ascii_lowercase();
list.split([',', ' ', '\t'])
.map(str::trim)
.filter(|tok| !tok.is_empty())
.any(|tok| tok.eq_ignore_ascii_case(&target))
}
fn trace_roundtrip_encoding(enc_name: &str) {
use std::io::Write;
let Ok(trace_val) = std::env::var("GIT_TRACE") else {
return;
};
if trace_val.is_empty() || trace_val == "0" || trace_val.eq_ignore_ascii_case("false") {
return;
}
let line = format!("Checking roundtrip encoding for {enc_name}...\n");
match trace_val.as_str() {
"1" | "true" | "2" => {
let _ = std::io::stderr().write_all(line.as_bytes());
}
path_dest => {
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path_dest)
{
let _ = f.write_all(line.as_bytes());
}
}
}
}
fn reencode_via_iconv(data: &[u8], from: &str, to: &str) -> Option<Vec<u8>> {
use std::io::Write;
let mut child = Command::new("iconv")
.arg("-f")
.arg(from)
.arg("-t")
.arg(to)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::null())
.spawn()
.ok()?;
if let Some(mut stdin) = child.stdin.take() {
let _ = stdin.write_all(data);
}
let output = child.wait_with_output().ok()?;
if !output.status.success() {
return None;
}
Some(output.stdout)
}
fn decode_working_tree_bytes_to_utf8(
src: &[u8],
rel_path: &str,
enc_label: &str,
validate: bool,
) -> Result<Vec<u8>, String> {
let label = enc_label.trim();
if label.is_empty() {
return Ok(src.to_vec());
}
let canon = canonical_utf_label(label);
if let Some(ref c) = canon {
validate_utf_bom(c, label, rel_path, src, validate)?;
}
if canon.as_deref() == Some("utf-8") {
return Ok(src.to_vec());
}
let (iconv_from, body): (&str, &[u8]) = match canon.as_deref() {
Some("utf-16le-bom") => {
let body = if has_bom_prefix(src, UTF16_LE_BOM) {
&src[2..]
} else {
src
};
("UTF-16LE", body)
}
Some("utf-16be-bom") => {
let body = if has_bom_prefix(src, UTF16_BE_BOM) {
&src[2..]
} else {
src
};
("UTF-16BE", body)
}
Some(c) => (utf_canon_to_iconv_name(c), src),
None => {
if let Some(out) = reencode_via_iconv(src, label, "UTF-8") {
return Ok(out);
}
let Some(enc) = crate::commit_encoding::resolve(label) else {
return Err(format!(
"failed to encode '{rel_path}' from {label} to UTF-8"
));
};
if enc == UTF_8 {
return Ok(src.to_vec());
}
let (cow, _, had_errors) = enc.decode(src);
if had_errors {
return Err(format!(
"failed to encode '{rel_path}' from {label} to UTF-8"
));
}
return Ok(cow.into_owned().into_bytes());
}
};
if let Some(out) = reencode_via_iconv(body, iconv_from, "UTF-8") {
return Ok(out);
}
decode_utf_bytes_with_encoding_rs(body, rel_path, label, iconv_from)
}
fn decode_utf_bytes_with_encoding_rs(
body: &[u8],
rel_path: &str,
label: &str,
iconv_from: &str,
) -> Result<Vec<u8>, String> {
let fail = || format!("failed to encode '{rel_path}' from {label} to UTF-8");
match iconv_from {
"UTF-16BE" => {
let (cow, _, had_errors) = encoding_rs::UTF_16BE.decode(body);
if had_errors {
return Err(fail());
}
Ok(cow.into_owned().into_bytes())
}
"UTF-16LE" => {
let (cow, _, had_errors) = encoding_rs::UTF_16LE.decode(body);
if had_errors {
return Err(fail());
}
Ok(cow.into_owned().into_bytes())
}
"UTF-16" => {
if has_bom_prefix(body, UTF16_BE_BOM) {
decode_utf_bytes_with_encoding_rs(&body[2..], rel_path, label, "UTF-16BE")
} else if has_bom_prefix(body, UTF16_LE_BOM) {
decode_utf_bytes_with_encoding_rs(&body[2..], rel_path, label, "UTF-16LE")
} else {
Err(fail())
}
}
"UTF-32" => {
if has_bom_prefix(body, UTF32_BE_BOM) {
decode_utf32_body_to_utf8_bytes(&body[4..], rel_path, true)
} else if has_bom_prefix(body, UTF32_LE_BOM) {
decode_utf32_body_to_utf8_bytes(&body[4..], rel_path, false)
} else {
Err(fail())
}
}
"UTF-32BE" => decode_utf32_body_to_utf8_bytes(body, rel_path, true),
"UTF-32LE" => decode_utf32_body_to_utf8_bytes(body, rel_path, false),
_ => Err(fail()),
}
}
fn decode_utf32_body_to_utf8_bytes(
body: &[u8],
rel_path: &str,
big_endian: bool,
) -> Result<Vec<u8>, String> {
let fail = || format!("failed to encode '{rel_path}' from UTF-32 to UTF-8");
if !body.len().is_multiple_of(4) {
return Err(fail());
}
let mut s = String::new();
for chunk in body.chunks_exact(4) {
let cp = if big_endian {
u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
} else {
u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
};
let Some(ch) = char::from_u32(cp) else {
return Err(fail());
};
s.push(ch);
}
Ok(s.into_bytes())
}
fn utf_canon_to_iconv_name(canon: &str) -> &'static str {
match canon {
"utf-16" => "UTF-16",
"utf-16be" => "UTF-16BE",
"utf-16le" => "UTF-16LE",
"utf-32" => "UTF-32",
"utf-32be" => "UTF-32BE",
"utf-32le" => "UTF-32LE",
_ => "UTF-8",
}
}
fn encode_utf8_blob_to_working_tree_bytes(
src: &[u8],
rel_path: &str,
enc_label: &str,
) -> Result<Vec<u8>, String> {
let label = enc_label.trim();
if label.is_empty() {
return Ok(src.to_vec());
}
let canon = canonical_utf_label(label);
if canon.as_deref() == Some("utf-8") {
return Ok(src.to_vec());
}
let fail = || format!("failed to encode '{rel_path}' from UTF-8 to {label}");
match canon.as_deref() {
Some("utf-16le-bom") => {
let body = reencode_via_iconv(src, "UTF-8", "UTF-16LE")
.or_else(|| encode_utf_with_encoding_rs(src, "UTF-16LE"))
.ok_or_else(fail)?;
let mut out = UTF16_LE_BOM.to_vec();
out.extend(body);
return Ok(out);
}
Some("utf-16be-bom") => {
let body = reencode_via_iconv(src, "UTF-8", "UTF-16BE")
.or_else(|| encode_utf_with_encoding_rs(src, "UTF-16BE"))
.ok_or_else(fail)?;
let mut out = UTF16_BE_BOM.to_vec();
out.extend(body);
return Ok(out);
}
Some(c) => {
let iconv_name = utf_canon_to_iconv_name(c);
if let Some(out) = reencode_via_iconv(src, "UTF-8", iconv_name) {
return Ok(out);
}
return encode_utf_with_encoding_rs(src, c).ok_or_else(fail);
}
None => {}
}
if let Some(out) = reencode_via_iconv(src, "UTF-8", label) {
return Ok(out);
}
let s = std::str::from_utf8(src).map_err(|_| fail())?;
let Some(enc) = crate::commit_encoding::resolve(label) else {
return Err(format!(
"unknown working-tree-encoding '{label}' for '{rel_path}'"
));
};
if enc == UTF_8 {
return Ok(src.to_vec());
}
let (cow, _, had_errors) = enc.encode(s);
if had_errors {
return Err(fail());
}
Ok(cow.into_owned())
}
fn encode_utf_with_encoding_rs(src: &[u8], target: &str) -> Option<Vec<u8>> {
let s = std::str::from_utf8(src).ok()?;
let lower = target.to_ascii_lowercase();
let mut out = Vec::new();
match lower.as_str() {
"utf-16" | "utf-16be" => {
for u in s.encode_utf16() {
out.extend_from_slice(&u.to_be_bytes());
}
}
"utf-16le" => {
for u in s.encode_utf16() {
out.extend_from_slice(&u.to_le_bytes());
}
}
"utf-32" | "utf-32be" => {
for ch in s.chars() {
out.extend_from_slice(&(ch as u32).to_be_bytes());
}
}
"utf-32le" => {
for ch in s.chars() {
out.extend_from_slice(&(ch as u32).to_le_bytes());
}
}
_ => return None,
}
Some(out)
}
pub fn convert_to_git(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
) -> Result<Vec<u8>, String> {
convert_to_git_with_opts(
data,
rel_path,
conv,
file_attrs,
ConvertToGitOpts::default(),
)
}
pub fn convert_to_git_with_opts(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
opts: ConvertToGitOpts<'_>,
) -> Result<Vec<u8>, String> {
let mut buf = data.to_vec();
if let Some(ref proc_cmd) = file_attrs.filter_process {
let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
match apply_process_clean(proc_cmd, rel_path, &buf) {
Ok(filtered) => buf = filtered,
Err(e) => {
if file_attrs.filter_clean_required {
if e.contains("expected git-filter-server") {
return Err(e);
}
return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
}
if e.starts_with("filter status: abort") {
crate::filter_process::disable_process_filter(proc_cmd);
}
eprintln!("error: external filter '{name}' failed");
}
}
} else {
match file_attrs.filter_clean.as_ref() {
Some(clean_cmd) => {
buf = run_filter(clean_cmd, &buf, rel_path).map_err(|e| {
let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
if file_attrs.filter_clean_required {
format!("fatal: {rel_path}: clean filter '{name}' failed")
} else {
format!("clean filter failed: {e}")
}
})?;
}
None => {
if file_attrs.filter_clean_required {
let name = file_attrs.filter_driver_name.as_deref().unwrap_or_default();
return Err(format!("fatal: {rel_path}: clean filter '{name}' failed"));
}
}
}
}
if let Some(ref enc) = file_attrs.working_tree_encoding {
if enc == "set" || enc == "true" || enc == "false" {
return Err("fatal: true/false are no valid working-tree-encodings".to_string());
}
let writing_object = opts.check_safecrlf;
buf = decode_working_tree_bytes_to_utf8(&buf, rel_path, enc, writing_object)?;
if writing_object && encoding_needs_roundtrip_check(enc, conv) {
trace_roundtrip_encoding(enc);
}
}
let would_convert = would_convert_on_input(conv, file_attrs, &buf);
let mut convert_crlf_into_lf = would_convert && has_crlf(&buf);
if convert_crlf_into_lf
&& clean_uses_autocrlf_index_guard(file_attrs, conv)
&& !opts.renormalize
&& opts.index_blob.is_some_and(has_crlf_in_index_blob)
{
convert_crlf_into_lf = false;
}
if would_convert && opts.check_safecrlf {
check_safecrlf_roundtrip(conv, file_attrs, &buf, rel_path, convert_crlf_into_lf)?;
}
if convert_crlf_into_lf {
buf = crlf_to_lf(&buf);
}
Ok(buf)
}
fn would_convert_on_input(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
match attrs.crlf_legacy {
CrlfLegacyAttr::Unset => return false,
CrlfLegacyAttr::Input => {
if is_binary(data) {
return false;
}
return true;
}
CrlfLegacyAttr::Crlf => {
if attrs.text == TextAttr::Unset {
return false;
}
if is_binary(data) {
return false;
}
return true;
}
CrlfLegacyAttr::Unspecified => {}
}
if attrs.text == TextAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified {
if attrs.text == TextAttr::Auto && is_binary(data) {
return false;
}
return true;
}
if attrs.text == TextAttr::Set {
return true;
}
if attrs.text == TextAttr::Auto {
if is_binary(data) {
return false;
}
return true;
}
match conv.autocrlf {
AutoCrlf::True | AutoCrlf::Input => {
if is_binary(data) {
return false;
}
true
}
AutoCrlf::False => false,
}
}
fn eprint_safecrlf_warn_crlf_to_lf(rel_path: &str) {
eprintln!(
"warning: in the working copy of '{rel_path}', CRLF will be replaced by LF the next time Git touches it"
);
}
fn eprint_safecrlf_warn_lf_to_crlf(rel_path: &str) {
eprintln!(
"warning: in the working copy of '{rel_path}', LF will be replaced by CRLF the next time Git touches it"
);
}
fn check_safecrlf_roundtrip(
conv: &ConversionConfig,
file_attrs: &FileAttrs,
data: &[u8],
rel_path: &str,
convert_crlf_into_lf: bool,
) -> Result<(), String> {
if conv.safecrlf == SafeCrlf::False {
return Ok(());
}
let old_stats = git_text_stat(data);
let mut new_stats = old_stats.clone();
if convert_crlf_into_lf && new_stats.crlf > 0 {
new_stats.lonelf += new_stats.crlf;
new_stats.crlf = 0;
}
if will_convert_lf_to_crlf_from_stats(&new_stats, conv, file_attrs) {
new_stats.crlf += new_stats.lonelf;
new_stats.lonelf = 0;
}
if old_stats.crlf > 0 && new_stats.crlf == 0 {
let msg = format!("fatal: CRLF would be replaced by LF in {rel_path}");
if conv.safecrlf == SafeCrlf::True {
return Err(msg);
}
eprint_safecrlf_warn_crlf_to_lf(rel_path);
} else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
let msg = format!("fatal: LF would be replaced by CRLF in {rel_path}");
if conv.safecrlf == SafeCrlf::True {
return Err(msg);
}
eprint_safecrlf_warn_lf_to_crlf(rel_path);
}
Ok(())
}
pub fn crlf_to_lf(data: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
out.push(b'\n');
i += 2;
} else {
out.push(data[i]);
i += 1;
}
}
out
}
pub fn lf_to_crlf(data: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(data.len() + data.len() / 10);
let mut i = 0;
while i < data.len() {
if data[i] == b'\n' && (i == 0 || data[i - 1] != b'\r') {
out.push(b'\r');
out.push(b'\n');
} else {
out.push(data[i]);
}
i += 1;
}
out
}
pub fn convert_to_worktree(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
oid_hex: Option<&str>,
smudge_meta: Option<&FilterSmudgeMeta>,
delayed_checkout: Option<&mut crate::filter_process::DelayedProcessCheckout>,
) -> Result<Option<Vec<u8>>, String> {
let mut buf = data.to_vec();
if file_attrs.ident {
if let Some(oid) = oid_hex {
buf = expand_ident(&buf, oid);
}
}
let can_delay_smudge = delayed_checkout.is_some()
&& file_attrs.working_tree_encoding.is_none()
&& !file_attrs.ident
&& file_attrs
.filter_process
.as_deref()
.is_some_and(|c| !c.is_empty())
&& !should_convert_to_crlf(conv, file_attrs, &buf)
&& file_attrs
.filter_process
.as_deref()
.is_some_and(crate::filter_process::process_filter_supports_delay);
let should_convert = should_convert_to_crlf(conv, file_attrs, &buf);
if should_convert {
buf = lf_to_crlf(&buf);
}
if let Some(ref enc) = file_attrs.working_tree_encoding {
buf = encode_utf8_blob_to_working_tree_bytes(&buf, rel_path, enc)?;
}
let driver = file_attrs.filter_driver_name.as_deref().unwrap_or("");
if let Some(ref proc_cmd) = file_attrs.filter_process {
let smudge_out =
match apply_process_smudge(proc_cmd, rel_path, &buf, smudge_meta, can_delay_smudge) {
Ok(out) => out,
Err(e) => {
if file_attrs.filter_smudge_required {
return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
}
if e.starts_with("filter status: abort") {
crate::filter_process::disable_process_filter(proc_cmd);
}
eprintln!("error: external filter '{driver}' failed");
return Ok(Some(buf));
}
};
let Some(out) = smudge_out else {
let Some(q) = delayed_checkout else {
return Err(format!(
"internal error: delayed smudge without checkout queue for {rel_path}"
));
};
q.push_delayed(
proc_cmd.clone(),
rel_path.to_string(),
smudge_meta.cloned().unwrap_or_default(),
);
return Ok(None);
};
buf = out;
} else {
match file_attrs.filter_smudge.as_ref() {
Some(smudge_cmd) => match run_filter(smudge_cmd, &buf, rel_path) {
Ok(filtered) => buf = filtered,
Err(_e) => {
if file_attrs.filter_smudge_required {
return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
}
}
},
None => {
if file_attrs.filter_smudge_required {
return Err(format!("fatal: {rel_path}: smudge filter {driver} failed"));
}
}
}
}
Ok(Some(buf))
}
#[must_use]
pub fn convert_to_worktree_eager(
data: &[u8],
rel_path: &str,
conv: &ConversionConfig,
file_attrs: &FileAttrs,
oid_hex: Option<&str>,
smudge_meta: Option<&FilterSmudgeMeta>,
) -> Result<Vec<u8>, String> {
match convert_to_worktree(data, rel_path, conv, file_attrs, oid_hex, smudge_meta, None)? {
Some(v) => Ok(v),
None => Err(format!(
"internal error: unexpected delayed smudge for {rel_path}"
)),
}
}
#[must_use]
pub fn should_convert_to_crlf(conv: &ConversionConfig, attrs: &FileAttrs, data: &[u8]) -> bool {
match attrs.crlf_legacy {
CrlfLegacyAttr::Unset | CrlfLegacyAttr::Input => return false,
CrlfLegacyAttr::Crlf => {
if attrs.text == TextAttr::Unset {
return false;
}
return true;
}
CrlfLegacyAttr::Unspecified => {}
}
if attrs.text == TextAttr::Unset {
return false;
}
if attrs.eol != EolAttr::Unspecified {
if attrs.text == TextAttr::Auto && is_binary(data) {
return false;
}
if attrs.eol != EolAttr::Crlf {
return false;
}
if attrs.text == TextAttr::Auto {
return auto_crlf_should_smudge_lf_to_crlf(data);
}
return true;
}
if attrs.text == TextAttr::Set {
return output_eol_is_crlf(conv);
}
if attrs.text == TextAttr::Auto {
if is_binary(data) {
return false;
}
if !output_eol_is_crlf(conv) {
return false;
}
return auto_crlf_should_smudge_lf_to_crlf(data);
}
match conv.autocrlf {
AutoCrlf::True => {
if is_binary(data) {
return false;
}
auto_crlf_should_smudge_lf_to_crlf(data)
}
AutoCrlf::Input | AutoCrlf::False => false,
}
}
fn output_eol_is_crlf(conv: &ConversionConfig) -> bool {
if conv.autocrlf == AutoCrlf::Input {
return false;
}
if conv.autocrlf == AutoCrlf::True {
return true;
}
match conv.eol {
CoreEol::Crlf => true,
CoreEol::Lf => false,
CoreEol::Native => {
cfg!(windows)
}
}
}
fn expand_ident(data: &[u8], oid: &str) -> Vec<u8> {
if !count_ident_regions(data) {
return data.to_vec();
}
let replacement = format!("$Id: {oid} $");
let mut out = Vec::with_capacity(data.len() + 60);
let mut i = 0;
while i < data.len() {
if data[i] != b'$' {
out.push(data[i]);
i += 1;
continue;
}
if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
out.push(data[i]);
i += 1;
continue;
}
let after_id = i + 3;
let ch = data.get(after_id).copied();
match ch {
Some(b'$') => {
out.extend_from_slice(replacement.as_bytes());
i = after_id + 1;
}
Some(b':') => {
let rest = &data[after_id + 1..];
let line_end = rest
.iter()
.position(|&b| b == b'\n' || b == b'\r')
.unwrap_or(rest.len());
let line = &rest[..line_end];
let Some(dollar_rel) = line.iter().position(|&b| b == b'$') else {
out.push(data[i]);
i += 1;
continue;
};
if line[..dollar_rel].contains(&b'\n') {
out.push(data[i]);
i += 1;
continue;
}
let payload = &line[..dollar_rel];
let foreign = payload.len() > 1
&& payload[1..]
.iter()
.position(|&b| b == b' ')
.is_some_and(|rel| {
let pos = 1 + rel;
pos < payload.len().saturating_sub(1)
});
if foreign {
out.push(data[i]);
i += 1;
continue;
}
out.extend_from_slice(replacement.as_bytes());
i = after_id + 1 + dollar_rel + 1;
}
_ => {
out.push(data[i]);
i += 1;
}
}
}
out
}
fn count_ident_regions(data: &[u8]) -> bool {
let mut i = 0usize;
while i < data.len() {
if data[i] != b'$' {
i += 1;
continue;
}
if i + 3 > data.len() || data[i + 1] != b'I' || data[i + 2] != b'd' {
i += 1;
continue;
}
let after = i + 3;
match data.get(after).copied() {
Some(b'$') => return true,
Some(b':') => {
let mut j = after + 1;
let mut found = false;
while j < data.len() {
match data[j] {
b'$' => {
found = true;
break;
}
b'\n' | b'\r' => break,
_ => j += 1,
}
}
if found {
return true;
}
i += 1;
}
_ => i += 1,
}
}
false
}
pub fn collapse_ident(data: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
if i + 4 <= data.len() && &data[i..i + 4] == b"$Id:" {
let rest = &data[i + 4..];
let line_end = rest
.iter()
.position(|&b| b == b'\n' || b == b'\r')
.unwrap_or(rest.len());
let line = &rest[..line_end];
if let Some(end) = line.iter().position(|&b| b == b'$') {
out.extend_from_slice(b"$Id$");
i += 4 + end + 1;
continue;
}
}
out.push(data[i]);
i += 1;
}
out
}
fn sq_quote_buf(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('\'');
for ch in s.chars() {
if ch == '\'' {
out.push_str("'\\''");
} else {
out.push(ch);
}
}
out.push('\'');
out
}
fn expand_filter_command(cmd: &str, rel_path: &str) -> String {
let mut out = String::with_capacity(cmd.len() + rel_path.len() + 8);
let mut chars = cmd.chars().peekable();
while let Some(c) = chars.next() {
if c == '%' {
match chars.peek() {
Some('%') => {
chars.next();
out.push('%');
}
Some('f') => {
chars.next();
out.push_str(&sq_quote_buf(rel_path));
}
_ => out.push('%'),
}
} else {
out.push(c);
}
}
out
}
fn run_filter(cmd: &str, data: &[u8], rel_path: &str) -> Result<Vec<u8>, std::io::Error> {
let expanded = expand_filter_command(cmd, rel_path);
let mut child = Command::new("sh")
.arg("-c")
.arg(&expanded)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()?;
use std::io::{ErrorKind, Write};
if let Some(ref mut stdin) = child.stdin {
if let Err(e) = stdin.write_all(data) {
if e.kind() != ErrorKind::BrokenPipe {
return Err(e);
}
}
}
drop(child.stdin.take());
let output = child.wait_with_output()?;
if !output.status.success() {
return Err(std::io::Error::other(format!(
"filter command exited with status {}",
output.status
)));
}
Ok(output.stdout)
}
pub type GitAttributes = Vec<AttrRule>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_crlf_to_lf() {
assert_eq!(crlf_to_lf(b"hello\r\nworld\r\n"), b"hello\nworld\n");
assert_eq!(crlf_to_lf(b"hello\nworld\n"), b"hello\nworld\n");
assert_eq!(crlf_to_lf(b"hello\r\n"), b"hello\n");
}
#[test]
fn test_lf_to_crlf() {
assert_eq!(lf_to_crlf(b"hello\nworld\n"), b"hello\r\nworld\r\n");
assert_eq!(lf_to_crlf(b"hello\r\nworld\r\n"), b"hello\r\nworld\r\n");
}
#[test]
fn test_has_crlf() {
assert!(has_crlf(b"hello\r\nworld"));
assert!(!has_crlf(b"hello\nworld"));
}
#[test]
fn smudge_mixed_line_endings_unchanged_with_autocrlf_true() {
let mut blob = Vec::new();
for part in [
b"Oh\n".as_slice(),
b"here\n",
b"is\n",
b"CRLF\r\n",
b"in\n",
b"text\n",
] {
blob.extend_from_slice(part);
}
let conv = ConversionConfig {
autocrlf: AutoCrlf::True,
eol: CoreEol::Lf,
safecrlf: SafeCrlf::False,
check_roundtrip_encoding: None,
};
let attrs = FileAttrs::default();
let out = convert_to_worktree_eager(&blob, "mixed", &conv, &attrs, None, None).unwrap();
assert_eq!(out, blob);
}
#[test]
fn smudge_lf_only_gets_crlf_with_autocrlf_true() {
let blob = b"a\nb\n";
let conv = ConversionConfig {
autocrlf: AutoCrlf::True,
eol: CoreEol::Lf,
safecrlf: SafeCrlf::False,
check_roundtrip_encoding: None,
};
let attrs = FileAttrs::default();
let out = convert_to_worktree_eager(blob, "x", &conv, &attrs, None, None).unwrap();
assert_eq!(out, b"a\r\nb\r\n");
}
#[test]
fn test_is_binary() {
assert!(is_binary(b"hello\0world"));
assert!(!is_binary(b"hello world"));
}
#[test]
fn attr_dir_only_pattern_does_not_match_same_named_file() {
let rules = parse_gitattributes_content("ignored-only-if-dir/ export-ignore\n");
let rule = &rules[0];
assert!(rule.must_be_dir);
assert!(rule.basename_only);
assert!(!attr_rule_matches(
rule,
"not-ignored-dir/ignored-only-if-dir",
false
));
assert!(attr_rule_matches(rule, "ignored-only-if-dir", true));
}
#[test]
fn test_expand_collapse_ident() {
let data = b"$Id$";
let expanded = expand_ident(data, "abc123");
assert_eq!(expanded, b"$Id: abc123 $");
let collapsed = collapse_ident(&expanded);
assert_eq!(collapsed, b"$Id$");
}
#[test]
fn expand_ident_does_not_span_lines_for_partial_keyword() {
let data = b"$Id: NoTerminatingSymbol\n$Id: deadbeef $\n";
let expanded = expand_ident(data, "newoid");
assert_eq!(expanded, b"$Id: NoTerminatingSymbol\n$Id: newoid $\n");
}
#[test]
fn expand_ident_preserves_foreign_id_with_internal_spaces() {
let data = b"$Id: Foreign Commit With Spaces $\n";
let expanded = expand_ident(data, "abc");
assert_eq!(expanded, data);
}
#[test]
fn expand_filter_command_percent_f_quotes_path() {
let s = expand_filter_command("sh ./x.sh %f --extra", "name with 'sq'");
assert_eq!(s, "sh ./x.sh 'name with '\\''sq'\\''' --extra");
assert_eq!(expand_filter_command("a %% b", "p"), "a % b");
}
}