Skip to main content

fresh/model/buffer/
format.rs

1//! Encoding and line-ending state for a `TextBuffer`.
2//!
3//! Owns the four format-related fields (current line-ending,
4//! original line-ending at load, current encoding, original encoding
5//! at load) as a `BufferFormat` sub-struct composed inside
6//! `TextBuffer`. Exposes pure free functions for detection and
7//! conversion so they can be used without constructing any buffer
8//! state.
9
10use crate::model::encoding::{self, Encoding};
11
12/// Line-ending format detected in (or chosen for) a text buffer.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub enum LineEnding {
15    /// Unix/Linux/Mac format (\n)
16    #[default]
17    LF,
18    /// Windows format (\r\n)
19    CRLF,
20    /// Old Mac format (\r) - rare but supported
21    CR,
22}
23
24impl LineEnding {
25    /// Get the string representation of this line ending
26    pub fn as_str(&self) -> &'static str {
27        match self {
28            Self::LF => "\n",
29            Self::CRLF => "\r\n",
30            Self::CR => "\r",
31        }
32    }
33
34    /// Get the display name for status bar
35    pub fn display_name(&self) -> &'static str {
36        match self {
37            Self::LF => "LF",
38            Self::CRLF => "CRLF",
39            Self::CR => "CR",
40        }
41    }
42}
43
44/// Encoding + line-ending state for one `TextBuffer`.
45///
46/// Owns both the current and the original-at-load-time values so the
47/// save path can detect that the user changed the format and rewrite
48/// the bytes accordingly.
49#[derive(Debug, Clone, Copy)]
50pub struct BufferFormat {
51    line_ending: LineEnding,
52    original_line_ending: LineEnding,
53    encoding: Encoding,
54    original_encoding: Encoding,
55}
56
57impl BufferFormat {
58    pub fn new(line_ending: LineEnding, encoding: Encoding) -> Self {
59        Self {
60            line_ending,
61            original_line_ending: line_ending,
62            encoding,
63            original_encoding: encoding,
64        }
65    }
66
67    pub fn line_ending(&self) -> LineEnding {
68        self.line_ending
69    }
70
71    pub fn encoding(&self) -> Encoding {
72        self.encoding
73    }
74
75    pub fn original_line_ending(&self) -> LineEnding {
76        self.original_line_ending
77    }
78
79    pub fn original_encoding(&self) -> Encoding {
80        self.original_encoding
81    }
82
83    pub fn set_line_ending(&mut self, le: LineEnding) {
84        self.line_ending = le;
85    }
86
87    pub fn set_encoding(&mut self, e: Encoding) {
88        self.encoding = e;
89    }
90
91    pub fn set_default_line_ending(&mut self, le: LineEnding) {
92        self.line_ending = le;
93        self.original_line_ending = le;
94    }
95
96    pub fn set_default_encoding(&mut self, e: Encoding) {
97        self.encoding = e;
98        self.original_encoding = e;
99    }
100
101    pub fn line_ending_changed_since_load(&self) -> bool {
102        self.line_ending != self.original_line_ending
103    }
104
105    pub fn encoding_changed_since_load(&self) -> bool {
106        self.encoding != self.original_encoding
107    }
108
109    /// Called after a successful save to make the current values the
110    /// new "original" baseline.
111    pub(super) fn promote_current_to_original(&mut self) {
112        self.original_line_ending = self.line_ending;
113        self.original_encoding = self.encoding;
114    }
115}
116
117// ---------- free helpers (Rule 4 in the refactor plan) ----------
118
119/// Detect the line ending format from a sample of bytes
120///
121/// Uses majority voting: counts CRLF, LF-only, and CR-only
122/// occurrences and returns the most common format.
123pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
124    // Only check the first 8KB for line ending detection (same as binary detection)
125    let check_len = bytes.len().min(8 * 1024);
126    let sample = &bytes[..check_len];
127
128    let mut crlf_count = 0;
129    let mut lf_only_count = 0;
130    let mut cr_only_count = 0;
131
132    let mut i = 0;
133    while i < sample.len() {
134        if sample[i] == b'\r' {
135            // Check if this is CRLF
136            if i + 1 < sample.len() && sample[i + 1] == b'\n' {
137                crlf_count += 1;
138                i += 2; // Skip both \r and \n
139                continue;
140            } else {
141                // CR only (old Mac format)
142                cr_only_count += 1;
143            }
144        } else if sample[i] == b'\n' {
145            // LF only (Unix format)
146            lf_only_count += 1;
147        }
148        i += 1;
149    }
150
151    // Use majority voting to determine line ending
152    if crlf_count > lf_only_count && crlf_count > cr_only_count {
153        LineEnding::CRLF
154    } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
155        LineEnding::CR
156    } else {
157        // Default to LF if no clear winner or if LF wins
158        LineEnding::LF
159    }
160}
161
162/// Detect the text encoding from a sample of bytes.
163pub fn detect_encoding(bytes: &[u8]) -> Encoding {
164    encoding::detect_encoding(bytes)
165}
166
167/// Detect the text encoding and whether content is binary.
168///
169/// Returns `(encoding, is_binary)`.
170pub fn detect_encoding_or_binary(bytes: &[u8], truncated: bool) -> (Encoding, bool) {
171    encoding::detect_encoding_or_binary(bytes, truncated)
172}
173
174/// Detect encoding and convert bytes to UTF-8.
175pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
176    encoding::detect_and_convert(bytes)
177}
178
179/// Convert UTF-8 content to the specified encoding for saving.
180///
181/// Does NOT add BOM — BOM handling lives in the write-recipe path.
182pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
183    encoding::convert_from_utf8(utf8_bytes, target_encoding)
184}
185
186/// Normalize line endings in the given bytes to LF only.
187///
188/// Converts CRLF (\r\n) and CR (\r) to LF (\n) for internal
189/// representation. Kept for tests and potential future use.
190#[allow(dead_code)]
191pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
192    let mut normalized = Vec::with_capacity(bytes.len());
193    let mut i = 0;
194
195    while i < bytes.len() {
196        if bytes[i] == b'\r' {
197            // Check if this is CRLF
198            if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
199                normalized.push(b'\n');
200                i += 2;
201                continue;
202            } else {
203                normalized.push(b'\n');
204            }
205        } else {
206            normalized.push(bytes[i]);
207        }
208        i += 1;
209    }
210
211    normalized
212}
213
214/// Convert line endings from any source format to any target format.
215///
216/// Used during save when the user has changed the line-ending format.
217/// Pub(super) because only the in-module save path and its tests call it.
218pub(super) fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
219    // First pass: normalize everything to LF
220    let mut normalized = Vec::with_capacity(bytes.len());
221    let mut i = 0;
222    while i < bytes.len() {
223        if bytes[i] == b'\r' {
224            if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
225                normalized.push(b'\n');
226                i += 2;
227                continue;
228            } else {
229                normalized.push(b'\n');
230            }
231        } else {
232            normalized.push(bytes[i]);
233        }
234        i += 1;
235    }
236
237    // If target is LF, we're done
238    if target_ending == LineEnding::LF {
239        return normalized;
240    }
241
242    // Second pass: convert LF to target format
243    let replacement = target_ending.as_str().as_bytes();
244    let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
245
246    for byte in normalized {
247        if byte == b'\n' {
248            result.extend_from_slice(replacement);
249        } else {
250            result.push(byte);
251        }
252    }
253
254    result
255}