coreutils_rs/expand/
core.rs1use std::io::Write;
2
3#[derive(Clone, Debug)]
5pub enum TabStops {
6 Regular(usize),
8 List(Vec<usize>),
10}
11
12impl TabStops {
13 #[inline]
15 fn spaces_to_next(&self, column: usize) -> usize {
16 match self {
17 TabStops::Regular(n) => {
18 if *n == 0 {
19 return 0;
20 }
21 *n - (column % *n)
22 }
23 TabStops::List(stops) => {
24 match stops.binary_search(&(column + 1)) {
26 Ok(idx) => stops[idx] - column,
27 Err(idx) => {
28 if idx < stops.len() {
29 stops[idx] - column
30 } else {
31 1
33 }
34 }
35 }
36 }
37 }
38 }
39
40 #[inline]
42 fn is_tab_stop(&self, column: usize) -> bool {
43 match self {
44 TabStops::Regular(n) => {
45 if *n == 0 {
46 return false;
47 }
48 column.is_multiple_of(*n)
49 }
50 TabStops::List(stops) => stops.binary_search(&column).is_ok(),
51 }
52 }
53
54 #[inline]
56 fn next_tab_stop(&self, column: usize) -> usize {
57 column + self.spaces_to_next(column)
58 }
59}
60
61pub fn parse_tab_stops(spec: &str) -> Result<TabStops, String> {
63 let spec = spec.trim();
64 if spec.is_empty() {
65 return Ok(TabStops::Regular(8));
66 }
67
68 if let Ok(n) = spec.parse::<usize>() {
70 if n == 0 {
71 return Err("tab size cannot be 0".to_string());
72 }
73 return Ok(TabStops::Regular(n));
74 }
75
76 let mut stops: Vec<usize> = Vec::new();
78 for part in spec.split([',', ' ']) {
79 let part = part.trim();
80 if part.is_empty() {
81 continue;
82 }
83 if let Some(rest) = part.strip_prefix('/') {
85 let n: usize = rest
86 .parse()
87 .map_err(|_| format!("'{}' is not a valid number", part))?;
88 if n == 0 {
89 return Err("tab size cannot be 0".to_string());
90 }
91 let last = stops.last().copied().unwrap_or(0);
92 let mut pos = last + n;
93 while pos < 10000 {
94 stops.push(pos);
95 pos += n;
96 }
97 continue;
98 }
99 match part.parse::<usize>() {
100 Ok(n) => {
101 if !stops.is_empty() && n <= *stops.last().unwrap() {
102 return Err("tab sizes must be ascending".to_string());
103 }
104 stops.push(n);
105 }
106 Err(_) => return Err(format!("'{}' is not a valid number", part)),
107 }
108 }
109
110 if stops.is_empty() {
111 return Err("tab specification is empty".to_string());
112 }
113
114 if stops.len() == 1 {
115 return Ok(TabStops::Regular(stops[0]));
116 }
117
118 Ok(TabStops::List(stops))
119}
120
121const SPACES: [u8; 64] = [b' '; 64];
123
124#[inline]
126fn push_spaces(output: &mut Vec<u8>, n: usize) {
127 let mut remaining = n;
128 while remaining > 0 {
129 let chunk = remaining.min(SPACES.len());
130 output.extend_from_slice(&SPACES[..chunk]);
131 remaining -= chunk;
132 }
133}
134
135#[inline]
137fn write_spaces(out: &mut impl Write, n: usize) -> std::io::Result<()> {
138 let mut remaining = n;
139 while remaining > 0 {
140 let chunk = remaining.min(SPACES.len());
141 out.write_all(&SPACES[..chunk])?;
142 remaining -= chunk;
143 }
144 Ok(())
145}
146
147pub fn expand_bytes(
150 data: &[u8],
151 tabs: &TabStops,
152 initial_only: bool,
153 out: &mut impl Write,
154) -> std::io::Result<()> {
155 if data.is_empty() {
156 return Ok(());
157 }
158
159 if memchr::memchr(b'\t', data).is_none() {
161 return out.write_all(data);
162 }
163
164 if let TabStops::Regular(tab_size) = tabs {
166 if !initial_only && memchr::memchr(b'\x08', data).is_none() {
167 return expand_regular_fast(data, *tab_size, out);
168 }
169 }
170
171 expand_generic(data, tabs, initial_only, out)
173}
174
175fn expand_regular_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
179 let mut column: usize = 0;
180 let mut pos: usize = 0;
181
182 while pos < data.len() {
183 match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
184 Some(offset) => {
185 if offset > 0 {
187 out.write_all(&data[pos..pos + offset])?;
188 column += offset;
189 }
190 let byte = data[pos + offset];
191 pos += offset + 1;
192
193 if byte == b'\n' {
194 out.write_all(b"\n")?;
195 column = 0;
196 } else {
197 let spaces = tab_size - (column % tab_size);
199 write_spaces(out, spaces)?;
200 column += spaces;
201 }
202 }
203 None => {
204 out.write_all(&data[pos..])?;
205 break;
206 }
207 }
208 }
209
210 Ok(())
211}
212
213fn expand_generic(
215 data: &[u8],
216 tabs: &TabStops,
217 initial_only: bool,
218 out: &mut impl Write,
219) -> std::io::Result<()> {
220 let mut output = Vec::with_capacity(data.len() + data.len() / 8);
221 let mut column: usize = 0;
222 let mut in_initial = true;
223
224 for &byte in data {
225 match byte {
226 b'\t' => {
227 if initial_only && !in_initial {
228 output.push(b'\t');
229 column = tabs.next_tab_stop(column);
230 } else {
231 let spaces = tabs.spaces_to_next(column);
232 push_spaces(&mut output, spaces);
233 column += spaces;
234 }
235 }
236 b'\n' => {
237 output.push(b'\n');
238 column = 0;
239 in_initial = true;
240 }
241 b'\x08' => {
242 output.push(b'\x08');
243 if column > 0 {
244 column -= 1;
245 }
246 }
247 _ => {
248 if initial_only && in_initial && byte != b' ' {
249 in_initial = false;
250 }
251 output.push(byte);
252 column += 1;
253 }
254 }
255 }
256
257 out.write_all(&output)
258}
259
260pub fn unexpand_bytes(
263 data: &[u8],
264 tabs: &TabStops,
265 all: bool,
266 out: &mut impl Write,
267) -> std::io::Result<()> {
268 if data.is_empty() {
269 return Ok(());
270 }
271
272 let mut output = Vec::with_capacity(data.len());
273 let mut column: usize = 0;
274 let mut space_start_col: Option<usize> = None;
275 let mut in_initial = true;
276
277 for &byte in data {
278 match byte {
279 b' ' => {
280 if !all && !in_initial {
281 output.push(b' ');
282 column += 1;
283 } else {
284 if space_start_col.is_none() {
285 space_start_col = Some(column);
286 }
287 column += 1;
288 if tabs.is_tab_stop(column) {
289 output.push(b'\t');
290 space_start_col = None;
291 }
292 }
293 }
294 b'\t' => {
295 space_start_col = None;
296 output.push(b'\t');
297 column = tabs.next_tab_stop(column);
298 }
299 b'\n' => {
300 if let Some(start_col) = space_start_col.take() {
301 push_spaces(&mut output, column - start_col);
302 }
303 output.push(b'\n');
304 column = 0;
305 in_initial = true;
306 }
307 b'\x08' => {
308 if let Some(start_col) = space_start_col.take() {
309 push_spaces(&mut output, column - start_col);
310 }
311 output.push(b'\x08');
312 if column > 0 {
313 column -= 1;
314 }
315 }
316 _ => {
317 if let Some(start_col) = space_start_col.take() {
318 push_spaces(&mut output, column - start_col);
319 }
320 if in_initial {
321 in_initial = false;
322 }
323 output.push(byte);
324 column += 1;
325 }
326 }
327 }
328
329 if let Some(start_col) = space_start_col {
330 push_spaces(&mut output, column - start_col);
331 }
332
333 out.write_all(&output)
334}