use crate::model::encoding::{self, Encoding};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum LineEnding {
#[default]
LF,
CRLF,
CR,
}
impl LineEnding {
pub fn as_str(&self) -> &'static str {
match self {
Self::LF => "\n",
Self::CRLF => "\r\n",
Self::CR => "\r",
}
}
pub fn display_name(&self) -> &'static str {
match self {
Self::LF => "LF",
Self::CRLF => "CRLF",
Self::CR => "CR",
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct BufferFormat {
line_ending: LineEnding,
original_line_ending: LineEnding,
encoding: Encoding,
original_encoding: Encoding,
}
impl BufferFormat {
pub fn new(line_ending: LineEnding, encoding: Encoding) -> Self {
Self {
line_ending,
original_line_ending: line_ending,
encoding,
original_encoding: encoding,
}
}
pub fn line_ending(&self) -> LineEnding {
self.line_ending
}
pub fn encoding(&self) -> Encoding {
self.encoding
}
pub fn original_line_ending(&self) -> LineEnding {
self.original_line_ending
}
pub fn original_encoding(&self) -> Encoding {
self.original_encoding
}
pub fn set_line_ending(&mut self, le: LineEnding) {
self.line_ending = le;
}
pub fn set_encoding(&mut self, e: Encoding) {
self.encoding = e;
}
pub fn set_default_line_ending(&mut self, le: LineEnding) {
self.line_ending = le;
self.original_line_ending = le;
}
pub fn set_default_encoding(&mut self, e: Encoding) {
self.encoding = e;
self.original_encoding = e;
}
pub fn line_ending_changed_since_load(&self) -> bool {
self.line_ending != self.original_line_ending
}
pub fn encoding_changed_since_load(&self) -> bool {
self.encoding != self.original_encoding
}
pub(super) fn promote_current_to_original(&mut self) {
self.original_line_ending = self.line_ending;
self.original_encoding = self.encoding;
}
}
pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
let check_len = bytes.len().min(8 * 1024);
let sample = &bytes[..check_len];
let mut crlf_count = 0;
let mut lf_only_count = 0;
let mut cr_only_count = 0;
let mut i = 0;
while i < sample.len() {
if sample[i] == b'\r' {
if i + 1 < sample.len() && sample[i + 1] == b'\n' {
crlf_count += 1;
i += 2; continue;
} else {
cr_only_count += 1;
}
} else if sample[i] == b'\n' {
lf_only_count += 1;
}
i += 1;
}
if crlf_count > lf_only_count && crlf_count > cr_only_count {
LineEnding::CRLF
} else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
LineEnding::CR
} else {
LineEnding::LF
}
}
pub fn detect_encoding(bytes: &[u8]) -> Encoding {
encoding::detect_encoding(bytes)
}
pub fn detect_encoding_or_binary(bytes: &[u8], truncated: bool) -> (Encoding, bool) {
encoding::detect_encoding_or_binary(bytes, truncated)
}
pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
encoding::detect_and_convert(bytes)
}
pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
encoding::convert_from_utf8(utf8_bytes, target_encoding)
}
#[allow(dead_code)]
pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
let mut normalized = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\r' {
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
normalized.push(b'\n');
i += 2;
continue;
} else {
normalized.push(b'\n');
}
} else {
normalized.push(bytes[i]);
}
i += 1;
}
normalized
}
pub(super) fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
let mut normalized = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\r' {
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
normalized.push(b'\n');
i += 2;
continue;
} else {
normalized.push(b'\n');
}
} else {
normalized.push(bytes[i]);
}
i += 1;
}
if target_ending == LineEnding::LF {
return normalized;
}
let replacement = target_ending.as_str().as_bytes();
let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
for byte in normalized {
if byte == b'\n' {
result.extend_from_slice(replacement);
} else {
result.push(byte);
}
}
result
}