use super::*;
use crate::attributes::*;
use crate::ignore::*;
use crate::index::*;
use crate::index_io::*;
use crate::types_admin::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum EolConversion {
None,
Lf,
Crlf,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum TextDecision {
Binary,
Text,
Auto,
Unspecified,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ContentFilterPlan {
pub(crate) text: TextDecision,
pub(crate) eol: EolConversion,
pub(crate) ident: bool,
pub(crate) driver: Option<FilterDriver>,
pub(crate) encoding: WtEncoding,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct FilterDriver {
name: Vec<u8>,
process: Option<String>,
clean: Option<String>,
smudge: Option<String>,
required: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum WtEncoding {
None,
Invalid,
Named(Vec<u8>),
}
impl WtEncoding {
fn from_attr(state: Option<&AttributeState>) -> WtEncoding {
match state {
None | Some(AttributeState::Unset) => WtEncoding::None,
Some(AttributeState::Set) => WtEncoding::Invalid,
Some(AttributeState::Value(value)) => {
if value.is_empty() || encoding_name_is_utf8(value) {
WtEncoding::None
} else {
WtEncoding::Named(value.clone())
}
}
}
}
}
pub(crate) fn encoding_name_is_utf8(name: &[u8]) -> bool {
utf_suffix(name).is_some_and(|suffix| suffix == "8")
}
pub(crate) fn utf_suffix(name: &[u8]) -> Option<String> {
let upper: String = std::str::from_utf8(name).ok()?.to_ascii_uppercase();
let rest = upper.strip_prefix("UTF")?;
Some(rest.strip_prefix('-').unwrap_or(rest).to_string())
}
#[derive(Clone, Copy)]
pub(crate) enum BomProblem {
Prohibited,
Required,
}
pub(crate) fn utf_bom_problem(suffix: &str, data: &[u8]) -> Option<BomProblem> {
let has16 = data.starts_with(&[0xFF, 0xFE]) || data.starts_with(&[0xFE, 0xFF]);
let has32 = data.starts_with(&[0xFF, 0xFE, 0, 0]) || data.starts_with(&[0, 0, 0xFE, 0xFF]);
match suffix {
"16LE" | "16BE" => has16.then_some(BomProblem::Prohibited),
"32LE" | "32BE" => has32.then_some(BomProblem::Prohibited),
"16" => (!has16).then_some(BomProblem::Required),
"32" => (!has32).then_some(BomProblem::Required),
_ => None,
}
}
pub(crate) const HOST_LE: bool = cfg!(target_endian = "little");
pub(crate) fn decode_to_utf8(suffix: &str, data: &[u8]) -> Option<Vec<u8>> {
match suffix {
"16LE" => decode_utf16(data, true),
"16BE" => decode_utf16(data, false),
"16" | "16LE-BOM" | "16BE-BOM" => {
let (le, body) = strip_utf16_bom(data);
decode_utf16(body, le)
}
"32LE" => decode_utf32(data, true),
"32BE" => decode_utf32(data, false),
"32" | "32LE-BOM" | "32BE-BOM" => {
let (le, body) = strip_utf32_bom(data);
decode_utf32(body, le)
}
_ => None,
}
}
pub(crate) fn encode_from_utf8(suffix: &str, utf8: &[u8]) -> Option<Vec<u8>> {
match suffix {
"16LE" => encode_utf16(utf8, true, false),
"16BE" => encode_utf16(utf8, false, false),
"16LE-BOM" => encode_utf16(utf8, true, true),
"16BE-BOM" => encode_utf16(utf8, false, true),
"16" => encode_utf16(utf8, HOST_LE, true),
"32LE" => encode_utf32(utf8, true, false),
"32BE" => encode_utf32(utf8, false, false),
"32LE-BOM" => encode_utf32(utf8, true, true),
"32BE-BOM" => encode_utf32(utf8, false, true),
"32" => encode_utf32(utf8, HOST_LE, true),
_ => None,
}
}
pub(crate) fn strip_utf16_bom(data: &[u8]) -> (bool, &[u8]) {
if data.starts_with(&[0xFF, 0xFE]) {
(true, &data[2..])
} else if data.starts_with(&[0xFE, 0xFF]) {
(false, &data[2..])
} else {
(HOST_LE, data)
}
}
pub(crate) fn strip_utf32_bom(data: &[u8]) -> (bool, &[u8]) {
if data.starts_with(&[0xFF, 0xFE, 0, 0]) {
(true, &data[4..])
} else if data.starts_with(&[0, 0, 0xFE, 0xFF]) {
(false, &data[4..])
} else {
(HOST_LE, data)
}
}
pub(crate) fn decode_utf16(data: &[u8], le: bool) -> Option<Vec<u8>> {
if !data.len().is_multiple_of(2) {
return None;
}
let units = data.chunks_exact(2).map(|chunk| {
let pair = [chunk[0], chunk[1]];
if le {
u16::from_le_bytes(pair)
} else {
u16::from_be_bytes(pair)
}
});
let mut out = String::new();
for unit in char::decode_utf16(units) {
out.push(unit.ok()?);
}
Some(out.into_bytes())
}
pub(crate) fn decode_utf32(data: &[u8], le: bool) -> Option<Vec<u8>> {
if !data.len().is_multiple_of(4) {
return None;
}
let mut out = String::new();
for chunk in data.chunks_exact(4) {
let quad = [chunk[0], chunk[1], chunk[2], chunk[3]];
let cp = if le {
u32::from_le_bytes(quad)
} else {
u32::from_be_bytes(quad)
};
out.push(char::from_u32(cp)?);
}
Some(out.into_bytes())
}
pub(crate) fn encode_utf16(utf8: &[u8], le: bool, bom: bool) -> Option<Vec<u8>> {
let text = std::str::from_utf8(utf8).ok()?;
let mut out = Vec::with_capacity(utf8.len() * 2 + 2);
if bom {
out.extend_from_slice(if le { &[0xFF, 0xFE] } else { &[0xFE, 0xFF] });
}
for unit in text.encode_utf16() {
out.extend_from_slice(&if le {
unit.to_le_bytes()
} else {
unit.to_be_bytes()
});
}
Some(out)
}
pub(crate) fn encode_utf32(utf8: &[u8], le: bool, bom: bool) -> Option<Vec<u8>> {
let text = std::str::from_utf8(utf8).ok()?;
let mut out = Vec::with_capacity(utf8.len() * 4 + 4);
if bom {
out.extend_from_slice(if le {
&[0xFF, 0xFE, 0, 0]
} else {
&[0, 0, 0xFE, 0xFF]
});
}
for ch in text.chars() {
let cp = ch as u32;
out.extend_from_slice(&if le {
cp.to_le_bytes()
} else {
cp.to_be_bytes()
});
}
Some(out)
}
pub(crate) fn check_wt_encoding_valid(encoding: &WtEncoding) -> Result<()> {
if matches!(encoding, WtEncoding::Invalid) {
eprintln!("fatal: true/false are no valid working-tree-encodings");
return Err(GitError::Exit(128));
}
Ok(())
}
pub(crate) fn encode_to_git<'a>(
encoding: &WtEncoding,
path: &[u8],
data: Cow<'a, [u8]>,
write_object: bool,
) -> Result<Cow<'a, [u8]>> {
let name = match encoding {
WtEncoding::None => return Ok(data),
WtEncoding::Invalid => return check_wt_encoding_valid(encoding).map(|()| data),
WtEncoding::Named(name) => name,
};
if data.is_empty() {
return Ok(data);
}
let display = String::from_utf8_lossy(path);
let enc = String::from_utf8_lossy(name);
if let Some(suffix) = utf_suffix(name)
&& let Some(problem) = utf_bom_problem(&suffix, &data)
{
let number = &suffix[..2.min(suffix.len())];
match problem {
BomProblem::Prohibited => {
eprintln!(
"hint: The file '{display}' contains a byte order mark (BOM). \
Please use UTF-{number} as working-tree-encoding."
);
report_encode_failure(
write_object,
&format!("BOM is prohibited in '{display}' if encoded as {enc}"),
)?;
return Ok(data);
}
BomProblem::Required => {
eprintln!(
"hint: The file '{display}' is missing a byte order mark (BOM). \
Please use UTF-{number}BE or UTF-{number}LE (depending on the byte order) as \
working-tree-encoding."
);
report_encode_failure(
write_object,
&format!("BOM is required in '{display}' if encoded as {enc}"),
)?;
return Ok(data);
}
}
}
match utf_suffix(name).and_then(|suffix| decode_to_utf8(&suffix, &data)) {
Some(utf8) => Ok(Cow::Owned(utf8)),
None => {
report_encode_failure(
write_object,
&format!("failed to encode '{display}' from {enc} to UTF-8"),
)?;
Ok(data)
}
}
}
pub(crate) fn encode_to_worktree<'a>(
encoding: &WtEncoding,
path: &[u8],
data: Cow<'a, [u8]>,
) -> Result<Cow<'a, [u8]>> {
let name = match encoding {
WtEncoding::None => return Ok(data),
WtEncoding::Invalid => return check_wt_encoding_valid(encoding).map(|()| data),
WtEncoding::Named(name) => name,
};
if data.is_empty() {
return Ok(data);
}
match utf_suffix(name).and_then(|suffix| encode_from_utf8(&suffix, &data)) {
Some(encoded) => Ok(Cow::Owned(encoded)),
None => {
let display = String::from_utf8_lossy(path);
let enc = String::from_utf8_lossy(name);
eprintln!("error: failed to encode '{display}' from UTF-8 to {enc}");
Ok(data)
}
}
}
pub(crate) fn report_encode_failure(write_object: bool, message: &str) -> Result<()> {
if write_object {
eprintln!("fatal: {message}");
Err(GitError::Exit(128))
} else {
eprintln!("error: {message}");
Ok(())
}
}
pub(crate) fn decode_crlf_family_attribute(
state: Option<&AttributeState>,
) -> (TextDecision, EolConversion) {
match state {
Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
Some(AttributeState::Value(value)) if value == b"auto" => {
(TextDecision::Auto, EolConversion::None)
}
Some(AttributeState::Value(value)) if value == b"input" => {
(TextDecision::Text, EolConversion::Lf)
}
_ => (TextDecision::Unspecified, EolConversion::None),
}
}
impl ContentFilterPlan {
fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
let text_attr = checks.iter().find(|check| check.attribute == b"text");
let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
let ident_attr = checks.iter().find(|check| check.attribute == b"ident");
let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
let encoding_attr = checks
.iter()
.find(|check| check.attribute == b"working-tree-encoding");
let encoding = WtEncoding::from_attr(encoding_attr.and_then(|check| check.state.as_ref()));
let eol_value = eol_attr.and_then(|check| match &check.state {
Some(AttributeState::Value(value)) => Some(value.clone()),
_ => None,
});
let mut forced_eol = EolConversion::None;
let mut text = match text_attr.map(|check| &check.state) {
Some(Some(AttributeState::Set)) => TextDecision::Text,
Some(Some(AttributeState::Unset)) => TextDecision::Binary,
Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
Some(Some(AttributeState::Value(value))) if value == b"input" => {
forced_eol = EolConversion::Lf;
TextDecision::Text
}
Some(Some(AttributeState::Value(_))) => TextDecision::Text,
_ => {
let (decision, eol) =
decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
forced_eol = eol;
decision
}
};
let eol = match (&text, eol_value.as_deref()) {
(TextDecision::Binary, _) => EolConversion::None,
(_, Some(b"crlf")) => {
if text == TextDecision::Unspecified {
text = TextDecision::Text;
}
EolConversion::Crlf
}
(_, Some(b"lf")) => {
if text == TextDecision::Unspecified {
text = TextDecision::Text;
}
EolConversion::Lf
}
_ if forced_eol == EolConversion::Lf => EolConversion::Lf,
_ => eol_from_config(config),
};
let eol = match (&text, eol) {
(TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
(_, eol) => eol,
};
let text = match (text, eol_attr.is_some()) {
(TextDecision::Unspecified, _) => {
if autocrlf_enabled(config) {
TextDecision::Auto
} else {
TextDecision::Unspecified
}
}
(text, _) => text,
};
let driver = resolve_filter_driver(config, filter_attr);
let ident = matches!(
ident_attr.and_then(|check| check.state.as_ref()),
Some(AttributeState::Set)
);
ContentFilterPlan {
text,
eol,
ident,
driver,
encoding,
}
}
fn convert_eol(&self, content: &[u8]) -> bool {
match self.text {
TextDecision::Binary | TextDecision::Unspecified => false,
TextDecision::Text => self.eol != EolConversion::None,
TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
}
}
pub(crate) fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
self.will_convert_lf_to_crlf_stats(&gather_convert_stats(content))
}
fn will_convert_lf_to_crlf_stats(&self, stats: &ConvertStats) -> bool {
if self.eol != EolConversion::Crlf {
return false;
}
if stats.lonelf == 0 {
return false;
}
if self.text == TextDecision::Auto {
if stats.lonecr > 0 || stats.crlf > 0 {
return false;
}
if convert_is_binary(stats) {
return false;
}
}
true
}
fn safecrlf_applies(&self) -> bool {
matches!(self.text, TextDecision::Text | TextDecision::Auto)
}
fn check_safe_crlf_stats(
&self,
old_stats: &ConvertStats,
index_has_crlf: bool,
flags: ConvFlags,
path: &[u8],
) -> Result<()> {
if flags == ConvFlags::Off || !self.safecrlf_applies() {
return Ok(());
}
let mut convert_crlf_into_lf = old_stats.crlf > 0;
if self.text == TextDecision::Auto {
if convert_is_binary(old_stats) {
return Ok(());
}
if index_has_crlf {
convert_crlf_into_lf = false;
}
}
let mut new_stats = old_stats.clone();
if convert_crlf_into_lf {
new_stats.lonelf += new_stats.crlf;
new_stats.crlf = 0;
}
if self.will_convert_lf_to_crlf_stats(&new_stats) {
new_stats.crlf += new_stats.lonelf;
new_stats.lonelf = 0;
}
check_safe_crlf(old_stats, &new_stats, flags, path)
}
}
pub(crate) fn eol_from_config(config: &GitConfig) -> EolConversion {
if let Some(value) = config.get("core", None, "autocrlf") {
match value.to_ascii_lowercase().as_str() {
"input" => return EolConversion::Lf,
"true" | "yes" | "on" | "1" => return EolConversion::Crlf,
_ => {}
}
}
if config.get_bool("core", None, "autocrlf") == Some(true) {
return EolConversion::Crlf;
}
match config
.get("core", None, "eol")
.map(|v| v.to_ascii_lowercase())
{
Some(ref v) if v == "crlf" => EolConversion::Crlf,
Some(ref v) if v == "lf" => EolConversion::Lf,
_ => EolConversion::None,
}
}
pub(crate) fn autocrlf_enabled(config: &GitConfig) -> bool {
if let Some(value) = config.get("core", None, "autocrlf")
&& value.eq_ignore_ascii_case("input")
{
return true;
}
config.get_bool("core", None, "autocrlf") == Some(true)
}
pub(crate) fn resolve_filter_driver(
config: &GitConfig,
filter_attr: Option<&AttributeCheck>,
) -> Option<FilterDriver> {
let name = match filter_attr.map(|check| &check.state) {
Some(Some(AttributeState::Value(value))) => value.clone(),
_ => return None,
};
let subsection = String::from_utf8_lossy(&name).into_owned();
let process = filter_config_value(config, &subsection, "process").filter(|cmd| !cmd.is_empty());
let clean = filter_config_value(config, &subsection, "clean").filter(|cmd| !cmd.is_empty());
let smudge = filter_config_value(config, &subsection, "smudge").filter(|cmd| !cmd.is_empty());
let required = filter_config_bool(config, &subsection, "required").unwrap_or(false);
if process.is_none() && clean.is_none() && smudge.is_none() && !required {
return None;
}
Some(FilterDriver {
name,
process,
clean,
smudge,
required,
})
}
pub(crate) fn filter_config_value(
config: &GitConfig,
subsection: &str,
key: &str,
) -> Option<String> {
config
.get("filter", Some(subsection), key)
.map(str::to_owned)
.or_else(|| global_filter_config_value(subsection, key))
}
pub(crate) fn filter_config_bool(config: &GitConfig, subsection: &str, key: &str) -> Option<bool> {
config
.get_bool("filter", Some(subsection), key)
.or_else(|| {
global_filter_config_value(subsection, key)
.as_deref()
.and_then(sley_config::parse_config_bool)
})
}
pub(crate) fn global_filter_config_value(subsection: &str, key: &str) -> Option<String> {
for (path, _) in sley_config::default_config_layer_paths().into_iter().rev() {
let Ok(config) = GitConfig::read(path) else {
continue;
};
if let Some(value) = config.get("filter", Some(subsection), key) {
return Some(value.to_owned());
}
}
None
}
pub(crate) fn looks_binary(content: &[u8]) -> bool {
const FIRST_FEW_BYTES: usize = 8000;
let window = &content[..content.len().min(FIRST_FEW_BYTES)];
window.contains(&0)
}
pub(crate) fn convert_crlf_to_lf_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
if !content.windows(2).any(|window| window == b"\r\n") {
return content;
}
let mut out = Vec::with_capacity(content.len());
let mut index = 0;
while index < content.len() {
let byte = content[index];
if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
index += 1;
continue;
}
out.push(byte);
index += 1;
}
Cow::Owned(out)
}
pub(crate) fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(content.len() + content.len() / 16);
let mut prev = 0u8;
for &byte in content {
if byte == b'\n' && prev != b'\r' {
out.push(b'\r');
}
out.push(byte);
prev = byte;
}
out
}
pub(crate) fn ident_to_git_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
let input = content.as_ref();
if !has_git_ident(input) {
return content;
}
let mut out = Vec::with_capacity(input.len());
let mut pos = 0;
while let Some(relative) = input[pos..].iter().position(|byte| *byte == b'$') {
let dollar = pos + relative;
out.extend_from_slice(&input[pos..=dollar]);
pos = dollar + 1;
if input.len().saturating_sub(pos) > 3 && input[pos..].starts_with(b"Id:") {
let search = &input[pos + 3..];
let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
break;
};
let end = pos + 3 + end_relative;
if input[pos + 3..end].contains(&b'\n') {
continue;
}
out.extend_from_slice(b"Id$");
pos = end + 1;
}
}
out.extend_from_slice(&input[pos..]);
Cow::Owned(out)
}
pub(crate) fn ident_to_worktree_cow(
format: ObjectFormat,
content: Cow<'_, [u8]>,
) -> Result<Cow<'_, [u8]>> {
let input = content.as_ref();
if !has_git_ident(input) {
return Ok(content);
}
let oid = EncodedObject::new(ObjectType::Blob, input.to_vec()).object_id(format)?;
let replacement = format!("Id: {} $", oid.to_hex());
let mut out = Vec::with_capacity(input.len() + replacement.len());
let mut pos = 0;
while let Some(relative) = input[pos..].iter().position(|byte| *byte == b'$') {
let dollar = pos + relative;
out.extend_from_slice(&input[pos..=dollar]);
pos = dollar + 1;
if input.len().saturating_sub(pos) < 3 || !input[pos..].starts_with(b"Id") {
continue;
}
match input.get(pos + 2) {
Some(b'$') => {
pos += 3;
}
Some(b':') => {
let search = &input[pos + 3..];
let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
break;
};
let end = pos + 3 + end_relative;
if input[pos + 3..end].contains(&b'\n') || is_foreign_ident(&input[pos + 3..end]) {
continue;
}
pos = end + 1;
}
_ => continue,
}
out.extend_from_slice(replacement.as_bytes());
}
out.extend_from_slice(&input[pos..]);
Ok(Cow::Owned(out))
}
pub(crate) fn has_git_ident(content: &[u8]) -> bool {
let mut pos = 0;
while let Some(relative) = content[pos..].iter().position(|byte| *byte == b'$') {
let start = pos + relative + 1;
if content.len().saturating_sub(start) < 3 {
break;
}
if !content[start..].starts_with(b"Id") {
pos = start;
continue;
}
match content.get(start + 2) {
Some(b'$') => return true,
Some(b':') => {
let search = &content[start + 3..];
let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
break;
};
let end = start + 3 + end_relative;
if !content[start + 3..end].contains(&b'\n') {
return true;
}
pos = end + 1;
}
_ => pos = start,
}
}
false
}
pub(crate) fn is_foreign_ident(expansion: &[u8]) -> bool {
if expansion.len() <= 1 {
return false;
}
expansion[1..expansion.len().saturating_sub(1)].contains(&b' ')
}
pub(crate) fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
let display_path = String::from_utf8_lossy(path);
let expanded = command.replace("%f", &shell_quote(&display_path));
let (shell, flag) = if cfg!(windows) {
("cmd", "/C")
} else {
("/bin/sh", "-c")
};
let mut child = Command::new(shell)
.arg(flag)
.arg(&expanded)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
let mut stdin = child
.stdin
.take()
.ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
let payload = content.to_vec();
let writer = std::thread::spawn(move || {
let _ = stdin.write_all(&payload);
});
let output = child
.wait_with_output()
.map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
let _ = writer.join();
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(GitError::Command(format!(
"filter `{command}` exited with {}: {}",
output.status,
stderr.trim()
)));
}
Ok(output.stdout)
}
pub(crate) const PROCESS_CAP_CLEAN: u8 = 1;
pub(crate) const PROCESS_CAP_SMUDGE: u8 = 1 << 1;
pub(crate) const PROCESS_CAP_DELAY: u8 = 1 << 2;
pub(crate) const PKT_DATA_MAX: usize = 65_516;
pub(crate) static PROCESS_FILTERS: OnceLock<Mutex<HashMap<String, ProcessFilter>>> =
OnceLock::new();
pub(crate) type ProcessFilterMetadata = Vec<(String, String)>;
pub(crate) static PROCESS_FILTER_METADATA: OnceLock<Mutex<Option<ProcessFilterMetadata>>> =
OnceLock::new();
pub(crate) struct ProcessFilterMetadataGuard {
previous: Option<ProcessFilterMetadata>,
}
impl Drop for ProcessFilterMetadataGuard {
fn drop(&mut self) {
if let Ok(mut guard) = PROCESS_FILTER_METADATA
.get_or_init(|| Mutex::new(None))
.lock()
{
*guard = self.previous.take();
}
}
}
pub(crate) fn set_process_filter_metadata(
metadata: Option<ProcessFilterMetadata>,
) -> ProcessFilterMetadataGuard {
let mutex = PROCESS_FILTER_METADATA.get_or_init(|| Mutex::new(None));
let previous = mutex
.lock()
.map(|mut guard| std::mem::replace(&mut *guard, metadata))
.unwrap_or(None);
ProcessFilterMetadataGuard { previous }
}
pub(crate) fn current_process_filter_metadata() -> Option<ProcessFilterMetadata> {
PROCESS_FILTER_METADATA
.get_or_init(|| Mutex::new(None))
.lock()
.ok()
.and_then(|guard| guard.clone())
}
pub(crate) struct ProcessFilter {
child: Child,
stdin: ChildStdin,
stdout: ChildStdout,
capabilities: u8,
}
pub(crate) enum ProcessFilterOutcome {
Filtered(Vec<u8>),
Unsupported,
Status(String),
}
pub(crate) struct ProcessFilterFailure {
message: String,
protocol: bool,
}
impl ProcessFilterFailure {
fn protocol(message: impl Into<String>) -> Self {
Self {
message: message.into(),
protocol: true,
}
}
}
pub(crate) fn run_process_filter(
command: &str,
direction: &str,
path: &[u8],
content: &[u8],
blob: Option<ObjectId>,
) -> std::result::Result<ProcessFilterOutcome, ProcessFilterFailure> {
let filters = PROCESS_FILTERS.get_or_init(|| Mutex::new(HashMap::new()));
let mut filters = filters
.lock()
.map_err(|_| ProcessFilterFailure::protocol("process filter cache poisoned"))?;
if !filters.contains_key(command) {
let filter = ProcessFilter::start(command)?;
filters.insert(command.to_string(), filter);
}
let result = filters
.get_mut(command)
.expect("process filter was inserted")
.apply(direction, path, content, blob);
if result.as_ref().is_err_and(|err| err.protocol) {
filters.remove(command);
}
result
}
impl ProcessFilter {
fn start(command: &str) -> std::result::Result<Self, ProcessFilterFailure> {
let (shell, flag) = if cfg!(windows) {
("cmd", "/C")
} else {
("/bin/sh", "-c")
};
let mut child = Command::new(shell)
.arg(flag)
.arg(command)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.map_err(|err| {
ProcessFilterFailure::protocol(format!(
"cannot fork to run subprocess '{command}': {err}"
))
})?;
let mut stdin = child
.stdin
.take()
.ok_or_else(|| ProcessFilterFailure::protocol("process filter stdin unavailable"))?;
let mut stdout = child
.stdout
.take()
.ok_or_else(|| ProcessFilterFailure::protocol("process filter stdout unavailable"))?;
write_pkt_text(&mut stdin, "git-filter-client\n")?;
write_pkt_text(&mut stdin, "version=2\n")?;
write_flush(&mut stdin)?;
let line = read_pkt_text(&mut stdout)?.ok_or_else(|| {
ProcessFilterFailure::protocol(
"Unexpected line '<flush packet>', expected git-filter-server",
)
})?;
if line != "git-filter-server" {
return Err(ProcessFilterFailure::protocol(format!(
"Unexpected line '{line}', expected git-filter-server"
)));
}
let line = read_pkt_text(&mut stdout)?.ok_or_else(|| {
ProcessFilterFailure::protocol("Unexpected line '<flush packet>', expected version")
})?;
if line != "version=2" {
return Err(ProcessFilterFailure::protocol(format!(
"Unexpected line '{line}', expected version"
)));
}
if let Some(line) = read_pkt_text(&mut stdout)? {
return Err(ProcessFilterFailure::protocol(format!(
"Unexpected line '{line}', expected flush"
)));
}
write_pkt_text(&mut stdin, "capability=clean\n")?;
write_pkt_text(&mut stdin, "capability=smudge\n")?;
write_pkt_text(&mut stdin, "capability=delay\n")?;
write_flush(&mut stdin)?;
let mut capabilities = 0;
while let Some(line) = read_pkt_text(&mut stdout)? {
match line.as_str() {
"capability=clean" => capabilities |= PROCESS_CAP_CLEAN,
"capability=smudge" => capabilities |= PROCESS_CAP_SMUDGE,
"capability=delay" => capabilities |= PROCESS_CAP_DELAY,
_ => {}
}
}
Ok(Self {
child,
stdin,
stdout,
capabilities,
})
}
fn apply(
&mut self,
direction: &str,
path: &[u8],
content: &[u8],
blob: Option<ObjectId>,
) -> std::result::Result<ProcessFilterOutcome, ProcessFilterFailure> {
let wanted = match direction {
"clean" => PROCESS_CAP_CLEAN,
"smudge" => PROCESS_CAP_SMUDGE,
_ => 0,
};
if self.capabilities & wanted == 0 {
return Ok(ProcessFilterOutcome::Unsupported);
}
write_pkt_text(&mut self.stdin, &format!("command={direction}\n"))?;
write_pkt_text(
&mut self.stdin,
&format!("pathname={}\n", String::from_utf8_lossy(path)),
)?;
if direction == "smudge"
&& let Some(blob) = blob
{
if let Some(metadata) = current_process_filter_metadata() {
for (key, value) in metadata {
write_pkt_text(&mut self.stdin, &format!("{key}={value}\n"))?;
}
}
write_pkt_text(&mut self.stdin, &format!("blob={}\n", blob.to_hex()))?;
}
write_flush(&mut self.stdin)?;
write_pkt_content(&mut self.stdin, content)?;
write_flush(&mut self.stdin)?;
let mut status = read_process_status(&mut self.stdout)?.unwrap_or_default();
match status.as_str() {
"success" => {}
"error" | "abort" | "delayed" => return Ok(ProcessFilterOutcome::Status(status)),
other => {
return Err(ProcessFilterFailure::protocol(format!(
"external filter returned unsupported status '{other}'"
)));
}
}
let output = read_pkt_content(&mut self.stdout)?;
if let Some(next) = read_process_status(&mut self.stdout)? {
status = next;
}
match status.as_str() {
"" | "success" => Ok(ProcessFilterOutcome::Filtered(output)),
"error" | "abort" | "delayed" => Ok(ProcessFilterOutcome::Status(status)),
other => Err(ProcessFilterFailure::protocol(format!(
"external filter returned unsupported status '{other}'"
))),
}
}
}
impl Drop for ProcessFilter {
fn drop(&mut self) {
let _ = self.stdin.flush();
let _ = self.child.kill();
let _ = self.child.wait();
}
}
pub(crate) fn write_pkt_text(
writer: &mut ChildStdin,
text: &str,
) -> std::result::Result<(), ProcessFilterFailure> {
write_pkt_data(writer, text.as_bytes())
}
pub(crate) fn write_pkt_content(
writer: &mut ChildStdin,
content: &[u8],
) -> std::result::Result<(), ProcessFilterFailure> {
for chunk in content.chunks(PKT_DATA_MAX) {
write_pkt_data(writer, chunk)?;
}
Ok(())
}
pub(crate) fn write_pkt_data(
writer: &mut ChildStdin,
data: &[u8],
) -> std::result::Result<(), ProcessFilterFailure> {
let len = data.len() + 4;
write!(writer, "{len:04x}")
.and_then(|_| writer.write_all(data))
.map_err(|err| {
ProcessFilterFailure::protocol(format!("process filter write failed: {err}"))
})
}
pub(crate) fn write_flush(
writer: &mut ChildStdin,
) -> std::result::Result<(), ProcessFilterFailure> {
writer
.write_all(b"0000")
.and_then(|_| writer.flush())
.map_err(|err| {
ProcessFilterFailure::protocol(format!("process filter write failed: {err}"))
})
}
pub(crate) fn read_pkt_text(
reader: &mut ChildStdout,
) -> std::result::Result<Option<String>, ProcessFilterFailure> {
let Some(mut data) = read_pkt_data(reader)? else {
return Ok(None);
};
if data.last() == Some(&b'\n') {
data.pop();
}
Ok(Some(String::from_utf8_lossy(&data).into_owned()))
}
pub(crate) fn read_pkt_content(
reader: &mut ChildStdout,
) -> std::result::Result<Vec<u8>, ProcessFilterFailure> {
let mut out = Vec::new();
while let Some(data) = read_pkt_data(reader)? {
out.extend_from_slice(&data);
}
Ok(out)
}
pub(crate) fn read_pkt_data(
reader: &mut ChildStdout,
) -> std::result::Result<Option<Vec<u8>>, ProcessFilterFailure> {
let mut header = [0u8; 4];
reader.read_exact(&mut header).map_err(|err| {
ProcessFilterFailure::protocol(format!("process filter read failed: {err}"))
})?;
let header = std::str::from_utf8(&header)
.map_err(|err| ProcessFilterFailure::protocol(format!("invalid pkt-line header: {err}")))?;
let len = usize::from_str_radix(header, 16)
.map_err(|err| ProcessFilterFailure::protocol(format!("invalid pkt-line length: {err}")))?;
if len == 0 {
return Ok(None);
}
if len < 4 {
return Err(ProcessFilterFailure::protocol(format!(
"invalid pkt-line length {len}"
)));
}
let mut data = vec![0; len - 4];
reader.read_exact(&mut data).map_err(|err| {
ProcessFilterFailure::protocol(format!("process filter read failed: {err}"))
})?;
Ok(Some(data))
}
pub(crate) fn read_process_status(
reader: &mut ChildStdout,
) -> std::result::Result<Option<String>, ProcessFilterFailure> {
let mut status = None;
while let Some(line) = read_pkt_text(reader)? {
if let Some(value) = line.strip_prefix("status=") {
status = Some(value.to_string());
}
}
Ok(status)
}
pub(crate) fn shell_quote(value: &str) -> String {
let mut out = String::with_capacity(value.len() + 2);
out.push('\'');
for ch in value.chars() {
if ch == '\'' {
out.push_str("'\\''");
} else {
out.push(ch);
}
}
out.push('\'');
out
}
pub fn apply_clean_filter(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
config: &GitConfig,
path: &[u8],
content: &[u8],
) -> Result<Vec<u8>> {
let _ = git_dir.as_ref();
let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
apply_clean_filter_with_attributes(config, &checks, path, content)
}
pub struct WorktreeAttributes {
matcher: AttributeMatcher,
}
impl WorktreeAttributes {
pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
Ok(Self {
matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
})
}
pub fn apply_clean_filter(
&self,
config: &GitConfig,
path: &[u8],
content: &[u8],
) -> Result<Vec<u8>> {
let checks = self
.matcher
.attributes_for_path(path, &filter_attribute_names(), false);
apply_clean_filter_with_attributes(config, &checks, path, content)
}
}
pub struct TreeAttributes {
matcher: AttributeMatcher,
}
impl TreeAttributes {
pub fn from_tree(
attr_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
db: &FileObjectDatabase,
format: ObjectFormat,
tree_oid: &ObjectId,
) -> Result<Self> {
let attr_root = attr_root.as_ref();
let git_dir = git_dir.as_ref();
let mut matcher = AttributeMatcher::default();
matcher.configure_case_sensitivity(git_dir);
if !matcher.read_configured_attributes(attr_root, git_dir) {
matcher.read_default_global_attributes();
}
collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
read_attribute_patterns(
git_dir.join("info").join("attributes"),
&mut matcher,
&[],
b"info/attributes",
false,
);
Ok(Self { matcher })
}
pub fn apply_smudge_filter(
&self,
config: &GitConfig,
path: &[u8],
content: &[u8],
) -> Result<Vec<u8>> {
let checks = self
.matcher
.attributes_for_path(path, &filter_attribute_names(), false);
apply_smudge_filter_with_attributes(config, &checks, path, content)
}
pub fn attributes_for_path(&self, path: &[u8], requested: &[Vec<u8>]) -> Vec<AttributeCheck> {
self.matcher.attributes_for_path(path, requested, false)
}
pub fn export_subst_for_path(&self, path: &[u8]) -> bool {
self.attribute_is_set(path, b"export-subst")
}
pub fn export_ignore_for_path(&self, path: &[u8]) -> bool {
self.attribute_is_set(path, b"export-ignore")
}
fn attribute_is_set(&self, path: &[u8], attribute: &[u8]) -> bool {
let requested = [attribute.to_vec()];
let checks = self.matcher.attributes_for_path(path, &requested, false);
matches!(
checks.first().and_then(|check| check.state.as_ref()),
Some(AttributeState::Set)
)
}
pub fn diff_attribute_for_path(&self, path: &[u8]) -> Option<AttributeState> {
let requested = [b"diff".to_vec()];
let checks = self.matcher.attributes_for_path(path, &requested, false);
checks.into_iter().next().and_then(|check| check.state)
}
}
pub fn apply_clean_filter_with_attributes(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &[u8],
) -> Result<Vec<u8>> {
Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
}
pub fn apply_clean_filter_with_attributes_cow<'a>(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &'a [u8],
) -> Result<Cow<'a, [u8]>> {
apply_clean_filter_with_attributes_cow_safecrlf(
config,
attributes,
path,
content,
ConvFlags::Off,
SafeCrlfIndexBlob::None,
)
}
pub enum SafeCrlfIndexBlob<'a> {
None,
Lookup {
odb: &'a FileObjectDatabase,
oid: ObjectId,
},
}
impl SafeCrlfIndexBlob<'_> {
fn has_crlf(&self) -> bool {
match self {
SafeCrlfIndexBlob::None => false,
SafeCrlfIndexBlob::Lookup { odb, oid } => has_crlf_in_index(odb, oid),
}
}
}
pub fn apply_clean_filter_with_attributes_cow_safecrlf<'a>(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &'a [u8],
flags: ConvFlags,
index_blob: SafeCrlfIndexBlob<'_>,
) -> Result<Cow<'a, [u8]>> {
apply_clean_filter_cow_inner(config, attributes, path, content, flags, index_blob, false)
}
pub(crate) fn apply_clean_filter_cow_inner<'a>(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &'a [u8],
flags: ConvFlags,
index_blob: SafeCrlfIndexBlob<'_>,
write_object: bool,
) -> Result<Cow<'a, [u8]>> {
let plan = ContentFilterPlan::resolve(config, attributes);
check_wt_encoding_valid(&plan.encoding)?;
let mut data = Cow::Borrowed(content);
if let Some(driver) = &plan.driver {
data = run_driver(driver, driver.clean.as_deref(), "clean", None, path, data)?;
}
data = encode_to_git(&plan.encoding, path, data, write_object)?;
if flags != ConvFlags::Off && !data.is_empty() && plan.safecrlf_applies() {
let old_stats = gather_convert_stats(&data);
plan.check_safe_crlf_stats(&old_stats, index_blob.has_crlf(), flags, path)?;
}
if plan.convert_eol(&data) {
data = convert_crlf_to_lf_cow(data);
}
if plan.ident {
data = ident_to_git_cow(data);
}
Ok(data)
}
pub fn apply_smudge_filter(
worktree_root: impl AsRef<Path>,
git_dir: impl AsRef<Path>,
format: ObjectFormat,
config: &GitConfig,
path: &[u8],
content: &[u8],
) -> Result<Vec<u8>> {
let checks =
smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
Ok(
apply_smudge_filter_with_attributes_cow_format(config, &checks, path, content, format)?
.into_owned(),
)
}
pub fn apply_smudge_filter_with_attributes(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &[u8],
) -> Result<Vec<u8>> {
Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
}
pub fn apply_smudge_filter_with_attributes_cow<'a>(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &'a [u8],
) -> Result<Cow<'a, [u8]>> {
apply_smudge_filter_with_attributes_cow_format(
config,
attributes,
path,
content,
ObjectFormat::Sha1,
)
}
pub(crate) fn apply_smudge_filter_with_attributes_cow_format<'a>(
config: &GitConfig,
attributes: &[AttributeCheck],
path: &[u8],
content: &'a [u8],
format: ObjectFormat,
) -> Result<Cow<'a, [u8]>> {
let plan = ContentFilterPlan::resolve(config, attributes);
check_wt_encoding_valid(&plan.encoding)?;
let mut data = Cow::Borrowed(content);
if plan.ident {
data = ident_to_worktree_cow(format, data)?;
}
if plan.eol == EolConversion::Crlf
&& plan.convert_eol(&data)
&& plan.will_convert_lf_to_crlf(&data)
{
data = Cow::Owned(convert_lf_to_crlf(&data));
}
data = encode_to_worktree(&plan.encoding, path, data)?;
if let Some(driver) = &plan.driver {
data = run_driver(
driver,
driver.smudge.as_deref(),
"smudge",
Some(format),
path,
data,
)?;
}
Ok(data)
}
pub(crate) fn run_driver<'a>(
driver: &FilterDriver,
command: Option<&str>,
direction: &str,
format: Option<ObjectFormat>,
path: &[u8],
content: Cow<'a, [u8]>,
) -> Result<Cow<'a, [u8]>> {
if let Some(process) = &driver.process {
let blob = if direction == "smudge" {
match format {
Some(format) => {
Some(EncodedObject::new(ObjectType::Blob, content.to_vec()).object_id(format)?)
}
None => None,
}
} else {
None
};
match run_process_filter(process, direction, path, &content, blob) {
Ok(ProcessFilterOutcome::Filtered(output)) => return Ok(Cow::Owned(output)),
Ok(ProcessFilterOutcome::Unsupported) => {}
Ok(ProcessFilterOutcome::Status(status)) => {
if driver.required {
return Err(GitError::Command(format!(
"external filter '{}' returned status {status}",
process
)));
}
return Ok(content);
}
Err(err) => {
if err.protocol {
eprintln!("error: external filter '{}' failed", process);
}
if driver.required {
return Err(GitError::Command(err.message));
}
return Ok(content);
}
}
}
let Some(command) = command else {
if driver.required {
let path = String::from_utf8_lossy(path);
let name = String::from_utf8_lossy(&driver.name);
if direction == "clean" {
eprintln!("fatal: {path}: clean filter '{name}' failed");
} else {
eprintln!("fatal: {path}: smudge filter {name} failed");
}
return Err(GitError::Exit(128));
}
return Ok(content);
};
match run_filter_command(command, path, &content) {
Ok(output) => Ok(Cow::Owned(output)),
Err(err) => {
if driver.required {
Err(err)
} else {
Ok(content)
}
}
}
}
pub(crate) fn filter_attribute_checks(
worktree_root: &Path,
path: &[u8],
) -> Result<Vec<AttributeCheck>> {
let requested = filter_attribute_names();
let mut matcher = AttributeMatcher::default();
let git_dir = worktree_root.join(".git");
matcher.configure_case_sensitivity(&git_dir);
if !matcher.read_configured_attributes(worktree_root, &git_dir) {
matcher.read_default_global_attributes();
}
read_dir_attribute_patterns_for_base(worktree_root, &[], &mut matcher)?;
let mut prefix = Vec::new();
let mut parts = path.split(|byte| *byte == b'/').peekable();
while let Some(part) = parts.next() {
if parts.peek().is_none() {
break;
}
if !prefix.is_empty() {
prefix.push(b'/');
}
prefix.extend_from_slice(part);
let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
read_dir_attribute_patterns_for_base(&dir, &prefix, &mut matcher)?;
}
read_attribute_patterns(
worktree_root.join(".git").join("info").join("attributes"),
&mut matcher,
&[],
b".git/info/attributes",
false,
);
Ok(matcher.attributes_for_path(path, &requested, false))
}
pub(crate) fn smudge_attribute_checks_from_index(
worktree_root: &Path,
git_dir: &Path,
format: ObjectFormat,
path: &[u8],
) -> Result<Vec<AttributeCheck>> {
let requested = filter_attribute_names();
let mut matcher = AttributeMatcher::default();
matcher.configure_case_sensitivity(git_dir);
if !matcher.read_configured_attributes(worktree_root, git_dir) {
matcher.read_default_global_attributes();
}
let index_attributes = index_gitattributes_by_base(git_dir, format)?;
fold_checkout_attribute_frame(worktree_root, &[], &index_attributes, &mut matcher)?;
let mut prefix = Vec::new();
let mut parts = path.split(|byte| *byte == b'/').peekable();
while let Some(part) = parts.next() {
if parts.peek().is_none() {
break;
}
if !prefix.is_empty() {
prefix.push(b'/');
}
prefix.extend_from_slice(part);
let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
fold_checkout_attribute_frame(&dir, &prefix, &index_attributes, &mut matcher)?;
}
read_attribute_patterns(
worktree_root.join(".git").join("info").join("attributes"),
&mut matcher,
&[],
b".git/info/attributes",
false,
);
Ok(matcher.attributes_for_path(path, &requested, false))
}
pub(crate) fn fold_checkout_attribute_frame(
dir: &Path,
base: &[u8],
index_attributes: &BTreeMap<Vec<u8>, Vec<u8>>,
matcher: &mut AttributeMatcher,
) -> Result<()> {
let worktree_file = dir.join(".gitattributes");
let source = attribute_source_for_base(base);
if let Ok(contents) = fs::read(&worktree_file) {
read_attribute_patterns_from_bytes(&contents, matcher, base, &source);
} else if let Some(contents) = index_attributes.get(base) {
read_attribute_patterns_from_bytes(contents, matcher, base, &source);
}
Ok(())
}
pub(crate) fn index_gitattributes_by_base(
git_dir: &Path,
format: ObjectFormat,
) -> Result<BTreeMap<Vec<u8>, Vec<u8>>> {
let mut map = BTreeMap::new();
let index_path = repository_index_path(git_dir);
if !index_path.exists() {
return Ok(map);
}
let db = FileObjectDatabase::from_git_dir(git_dir, format);
let entries = Index::parse(&fs::read(index_path)?, format)?.entries;
for entry in entries {
let is_attributes_file =
entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
if index_entry_stage(&entry) != 0
|| tree_entry_object_type(entry.mode) != ObjectType::Blob
|| !is_attributes_file
{
continue;
}
let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
Some(b"") => Vec::new(),
Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
None => continue,
};
let object = db
.read_object(&entry.oid)
.map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
if object.object_type == ObjectType::Blob {
map.insert(base, object.body.clone());
}
}
Ok(map)
}
pub(crate) fn filter_attribute_names() -> Vec<Vec<u8>> {
vec![
b"text".to_vec(),
b"crlf".to_vec(),
b"ident".to_vec(),
b"eol".to_vec(),
b"filter".to_vec(),
b"working-tree-encoding".to_vec(),
]
}
#[derive(Clone)]
pub(crate) struct ConvertStats {
nul: u32,
lonecr: u32,
lonelf: u32,
crlf: u32,
printable: u32,
nonprintable: u32,
}
pub(crate) fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
let mut stats = ConvertStats {
nul: 0,
lonecr: 0,
lonelf: 0,
crlf: 0,
printable: 0,
nonprintable: 0,
};
let mut i = 0;
while i < buf.len() {
let c = buf[i];
if c == b'\r' {
if buf.get(i + 1) == Some(&b'\n') {
stats.crlf += 1;
i += 1;
} else {
stats.lonecr += 1;
}
i += 1;
continue;
}
if c == b'\n' {
stats.lonelf += 1;
i += 1;
continue;
}
if c == 127 {
stats.nonprintable += 1;
} else if c < 32 {
match c {
0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
0 => {
stats.nul += 1;
stats.nonprintable += 1;
}
_ => stats.nonprintable += 1,
}
} else {
stats.printable += 1;
}
i += 1;
}
if buf.last() == Some(&0x1a) {
stats.nonprintable = stats.nonprintable.saturating_sub(1);
}
stats
}
pub(crate) fn has_crlf_in_index(odb: &FileObjectDatabase, oid: &ObjectId) -> bool {
let Ok(object) = odb.read_object(oid) else {
return false;
};
if object.object_type != ObjectType::Blob {
return false;
}
let data = &object.body;
if !data.contains(&b'\r') {
return false;
}
let stats = gather_convert_stats(data);
!convert_is_binary(&stats) && stats.crlf > 0
}
pub(crate) fn convert_is_binary(stats: &ConvertStats) -> bool {
if stats.lonecr > 0 {
return true;
}
if stats.nul > 0 {
return true;
}
(stats.printable >> 7) < stats.nonprintable
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConvFlags {
Off,
Warn,
Die,
}
impl ConvFlags {
pub fn from_config(config: &GitConfig) -> Self {
match config.get("core", None, "safecrlf") {
Some(value) if value.eq_ignore_ascii_case("warn") => ConvFlags::Warn,
Some(_) => {
if config.get_bool("core", None, "safecrlf") == Some(true) {
ConvFlags::Die
} else {
ConvFlags::Off
}
}
None => ConvFlags::Warn,
}
}
}
pub(crate) fn check_safe_crlf(
old_stats: &ConvertStats,
new_stats: &ConvertStats,
flags: ConvFlags,
path: &[u8],
) -> Result<()> {
if flags == ConvFlags::Off {
return Ok(());
}
let display = String::from_utf8_lossy(path);
if old_stats.crlf > 0 && new_stats.crlf == 0 {
match flags {
ConvFlags::Die => {
eprintln!("fatal: CRLF would be replaced by LF in {display}");
return Err(GitError::Exit(128));
}
ConvFlags::Warn => {
eprintln!(
"warning: in the working copy of '{display}', CRLF will be replaced by LF the next time Git touches it"
);
}
ConvFlags::Off => unreachable!("handled above"),
}
} else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
match flags {
ConvFlags::Die => {
eprintln!("fatal: LF would be replaced by CRLF in {display}");
return Err(GitError::Exit(128));
}
ConvFlags::Warn => {
eprintln!(
"warning: in the working copy of '{display}', LF will be replaced by CRLF the next time Git touches it"
);
}
ConvFlags::Off => unreachable!("handled above"),
}
}
Ok(())
}
pub(crate) fn convert_stats_ascii(content: &[u8]) -> &'static str {
if content.is_empty() {
return "none";
}
let stats = gather_convert_stats(content);
if convert_is_binary(&stats) {
return "-text";
}
match (stats.lonelf > 0, stats.crlf > 0) {
(true, false) => "lf",
(false, true) => "crlf",
(true, true) => "mixed",
(false, false) => "none",
}
}
pub(crate) fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
checks
.iter()
.find(|check| check.attribute == name)
.and_then(|check| check.state.as_ref())
}
#[derive(Clone, Copy, PartialEq)]
enum Action {
Undefined,
Binary,
Text,
TextInput,
TextCrlf,
Auto,
AutoCrlf,
AutoInput,
}
fn check_crlf(state: Option<&AttributeState>) -> Action {
match state {
Some(AttributeState::Set) => Action::Text,
Some(AttributeState::Unset) => Action::Binary,
Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
_ => Action::Undefined,
}
}
let mut action = check_crlf(state_of(checks, b"text"));
if action == Action::Undefined {
action = check_crlf(state_of(checks, b"crlf"));
}
if action != Action::Binary {
let eol = match state_of(checks, b"eol") {
Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
_ => None,
};
action = match (action, eol) {
(Action::Auto, Some(false)) => Action::AutoInput,
(Action::Auto, Some(true)) => Action::AutoCrlf,
(_, Some(false)) if action != Action::Auto => Action::TextInput,
(_, Some(true)) if action != Action::Auto => Action::TextCrlf,
_ => action,
};
}
match action {
Action::Undefined => "",
Action::Binary => "-text",
Action::Text => "text",
Action::TextInput => "text eol=lf",
Action::TextCrlf => "text eol=crlf",
Action::Auto => "text=auto",
Action::AutoCrlf => "text=auto eol=crlf",
Action::AutoInput => "text=auto eol=lf",
}
}
pub struct EolInfo {
pub index: &'static str,
pub worktree: &'static str,
pub attr: &'static str,
}
impl EolInfo {
pub fn format_prefix(&self) -> String {
format!(
"i/{:<5} w/{:<5} attr/{:<17}\t",
self.index, self.worktree, self.attr
)
}
}
pub fn eol_info_for_path(
worktree_root: impl AsRef<Path>,
path: &[u8],
index_content: Option<&[u8]>,
attr_checks: &[AttributeCheck],
) -> EolInfo {
let index = index_content.map(convert_stats_ascii).unwrap_or("");
let worktree_root = worktree_root.as_ref();
let worktree = match repo_path_to_os_path(path) {
Ok(rel) => {
let absolute = worktree_root.join(rel);
match fs::symlink_metadata(&absolute) {
Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
Ok(content) => convert_stats_ascii_owned(&content),
Err(_) => "",
},
_ => "",
}
}
Err(_) => "",
};
let attr = convert_attr_ascii(attr_checks);
EolInfo {
index,
worktree,
attr,
}
}
pub(crate) fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
convert_stats_ascii(content)
}
pub fn eol_attribute_checks(
worktree_root: impl AsRef<Path>,
path: &[u8],
) -> Result<Vec<AttributeCheck>> {
filter_attribute_checks(worktree_root.as_ref(), path)
}