1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
//! Expand tab characters in commit log output to spaces, matching Git's `strbuf_add_tabexpand`.
//!
//! Git aligns tabs to multiples of `tab_width` using the display width of the preceding UTF-8
//! text (see `pretty.c`). If width cannot be determined, remaining tabs are copied literally.
use unicode_width::UnicodeWidthChar;
/// Sum Unicode display widths for `s`, or `None` if any codepoint has ambiguous width.
fn utf8_display_width(s: &str) -> Option<usize> {
let mut w = 0usize;
for ch in s.chars() {
w = w.checked_add(UnicodeWidthChar::width(ch)?)?;
}
Some(w)
}
/// Replace tabs in `line` with spaces so each tab advances to the next multiple of `tab_width`.
///
/// `tab_width` must be positive. When expansion is disabled (`effective width` 0), callers should
/// print `line` unchanged instead of calling this function.
///
/// # Parameters
///
/// - `line` — single line without trailing newline.
/// - `tab_width` — tab stop distance (Git `--expand-tabs=N`, `N > 0`).
#[must_use]
pub fn expand_tabs_in_line(line: &str, tab_width: usize) -> String {
debug_assert!(tab_width > 0);
if tab_width == 0 {
return line.to_owned();
}
let mut out = String::with_capacity(line.len());
let mut rest = line;
while let Some(pos) = rest.find('\t') {
let prefix = &rest[..pos];
match utf8_display_width(prefix) {
Some(width) => {
out.push_str(prefix);
let col = width % tab_width;
let spaces = tab_width - col;
out.extend(std::iter::repeat_n(' ', spaces));
}
None => {
out.push_str(rest);
return out;
}
}
rest = &rest[pos + 1..];
}
out.push_str(rest);
out
}
/// Expand tabs on every line of `msg`, preserving newlines (including a trailing final newline).
#[must_use]
pub fn expand_tabs_in_multiline_message(msg: &str, tab_width: usize) -> String {
if tab_width == 0 {
return msg.to_owned();
}
let mut out = String::with_capacity(msg.len());
let mut start = 0usize;
for (i, c) in msg.char_indices() {
if c == '\n' {
out.push_str(&expand_tabs_in_line(&msg[start..i], tab_width));
out.push('\n');
start = i + c.len_utf8();
}
}
out.push_str(&expand_tabs_in_line(&msg[start..], tab_width));
out
}
/// Indent with `indent` ASCII spaces, then optionally expand tabs in `line`.
///
/// Matches Git `pp_handle_indent`: fixed spaces plus tab expansion on the remainder when
/// `tab_width > 0`; otherwise the line is copied verbatim (tabs preserved).
#[must_use]
pub fn indent_and_expand_tabs(line: &str, indent: usize, tab_width: usize) -> String {
let mut out = String::with_capacity(indent + line.len());
out.extend(std::iter::repeat_n(' ', indent));
if tab_width == 0 {
out.push_str(line);
} else {
out.push_str(&expand_tabs_in_line(line, tab_width));
}
out
}
/// Byte-level twin of [`indent_and_expand_tabs`] that preserves commit bodies verbatim.
///
/// Git's `pp_handle_indent`/`strbuf_add_tabexpand` (pretty.c) operate on raw bytes: the message
/// body is emitted exactly as stored when `i18n.commitEncoding` names an encoding Git cannot
/// decode (e.g. the test's `non-utf-8`, which makes `logmsg_reencode` a no-op), so invalid-UTF-8
/// bytes must round-trip unchanged. This returns the indented, optionally tab-expanded line as a
/// byte vector without lossy UTF-8 conversion.
///
/// - `line` — a single body line (no trailing newline) as raw bytes.
/// - `indent` — leading ASCII spaces.
/// - `tab_width` — tab stop distance; `0` disables tab expansion (line copied verbatim).
#[must_use]
pub fn indent_and_expand_tabs_bytes(line: &[u8], indent: usize, tab_width: usize) -> Vec<u8> {
let mut out = Vec::with_capacity(indent + line.len());
out.extend(std::iter::repeat_n(b' ', indent));
if tab_width == 0 {
out.extend_from_slice(line);
return out;
}
// Port of `strbuf_add_tabexpand`: for each tab, align to the next multiple of `tab_width`
// using the display width of the preceding bytes. If that segment is not well-formed UTF-8
// (`pp_utf8_width` returns < 0), give up aligning and copy the remainder verbatim.
let mut rest = line;
while let Some(pos) = rest.iter().position(|&b| b == b'\t') {
let prefix = &rest[..pos];
match std::str::from_utf8(prefix)
.ok()
.and_then(utf8_display_width)
{
Some(width) => {
out.extend_from_slice(prefix);
let spaces = tab_width - (width % tab_width);
out.extend(std::iter::repeat_n(b' ', spaces));
rest = &rest[pos + 1..];
}
None => break,
}
}
out.extend_from_slice(rest);
out
}
/// Default tab-expansion width for a named `--pretty` format (Git `cmt_fmt_map.expand_tabs_in_log`).
///
/// `format` is the resolved pretty name (`None` means Git's default, i.e. `medium`).
/// When `oneline` is true and no explicit format was given, Git uses the oneline defaults.
#[must_use]
pub fn default_expand_tabs_for_pretty_format(format: Option<&str>, oneline: bool) -> usize {
if oneline && format.is_none() {
return 0;
}
let fmt: &str = match format {
None => "medium",
Some(f) => f,
};
match fmt {
"short" | "raw" | "email" | "oneline" | "reference" | "mboxrd" => 0,
"medium" | "full" | "fuller" => 8,
f if f.starts_with("format:") || f.starts_with("tformat:") => 8,
_ => 8,
}
}
/// Resolve effective tab width from CLI flags and pretty format (Git `rev_info.expand_tabs_in_log`).
///
/// Precedence: `--no-expand-tabs` forces 0; else `--expand-tabs[=N]` if present (`N` defaults to 8
/// when the flag is given without `=`, via the CLI layer); else format default.
#[must_use]
pub fn resolve_expand_tabs_in_log(
no_expand_tabs: bool,
expand_tabs: Option<usize>,
format: Option<&str>,
oneline: bool,
) -> usize {
if no_expand_tabs {
return 0;
}
if let Some(n) = expand_tabs {
return n;
}
default_expand_tabs_for_pretty_format(format, oneline)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn expand_tabs_ascii_simple() {
assert_eq!(expand_tabs_in_line("a\tb", 8), "a b");
assert_eq!(expand_tabs_in_line("\tfoo", 8), " foo");
}
#[test]
fn expand_tabs_aligns_to_stop() {
// 'abcd' has width 4; next stop at 8 => 4 spaces
assert_eq!(expand_tabs_in_line("abcd\tx", 8), "abcd x");
}
#[test]
fn expand_tabs_multiline() {
let s = expand_tabs_in_multiline_message("a\tb\nc\td\n", 8);
assert_eq!(s, "a b\nc d\n");
}
#[test]
fn indent_and_expand_combines() {
let s = indent_and_expand_tabs("\ttitle", 4, 8);
assert_eq!(s, format!("{}{}", " ".repeat(12), "title"));
}
#[test]
fn indent_without_expand_preserves_tabs() {
let s = indent_and_expand_tabs("\tx", 4, 0);
assert_eq!(s, " \tx");
}
#[test]
fn bytes_preserve_invalid_utf8_verbatim() {
// Invalid UTF-8 bytes (e.g. a body stored under `i18n.commitEncoding=non-utf-8`) must
// round-trip unchanged, not become U+FFFD.
let line = b"Th\xf8\x9d\x84\x9es";
let out = indent_and_expand_tabs_bytes(line, 6, 0);
assert_eq!(out, b" Th\xf8\x9d\x84\x9es");
}
#[test]
fn bytes_expand_tabs_match_string_path() {
// For valid UTF-8 the byte path aligns tabs identically to the string path.
let out = indent_and_expand_tabs_bytes(b"abcd\tx", 4, 8);
assert_eq!(out, indent_and_expand_tabs("abcd\tx", 4, 8).as_bytes());
}
#[test]
fn bytes_give_up_alignment_after_invalid_utf8_before_tab() {
// Git stops aligning once the segment before a tab is not well-formed UTF-8 and copies
// the remainder (including the tab) verbatim.
let line = b"\xf8\tx";
let out = indent_and_expand_tabs_bytes(line, 0, 8);
assert_eq!(out, b"\xf8\tx");
}
}