1use clap::{Arg, ArgAction, ArgMatches, Command};
9use std::ffi::OsString;
10use std::fs::File;
11use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout};
12use std::num::IntErrorKind;
13use std::path::Path;
14use std::str::from_utf8;
15use thiserror::Error;
16use unicode_width::UnicodeWidthChar;
17use uucore::display::Quotable;
18use uucore::error::{FromIo, UError, UResult, set_exit_code};
19use uucore::translate;
20use uucore::{format_usage, show_error};
21
22pub mod options {
23 pub static TABS: &str = "tabs";
24 pub static INITIAL: &str = "initial";
25 pub static NO_UTF8: &str = "no-utf8";
26 pub static FILES: &str = "FILES";
27}
28
29static LONG_HELP: &str = "";
30
31static DEFAULT_TABSTOP: usize = 8;
32
33#[derive(PartialEq)]
36enum RemainingMode {
37 None,
38 Slash,
39 Plus,
40}
41
42fn is_space_or_comma(c: char) -> bool {
52 c == ' ' || c == ','
53}
54
55fn is_digit_or_comma(c: char) -> bool {
57 c.is_ascii_digit() || c == ','
58}
59
60#[derive(Debug, Error)]
62enum ParseError {
63 #[error("{}", translate!("expand-error-invalid-character", "char" => .0.quote()))]
64 InvalidCharacter(String),
65 #[error("{}", translate!("expand-error-specifier-not-at-start", "specifier" => .0.quote(), "number" => .1.quote()))]
66 SpecifierNotAtStartOfNumber(String, String),
67 #[error("{}", translate!("expand-error-specifier-only-allowed-with-last", "specifier" => .0.quote()))]
68 SpecifierOnlyAllowedWithLastValue(String),
69 #[error("{}", translate!("expand-error-tab-size-cannot-be-zero"))]
70 TabSizeCannotBeZero,
71 #[error("{}", translate!("expand-error-tab-size-too-large", "size" => .0.quote()))]
72 TabSizeTooLarge(String),
73 #[error("{}", translate!("expand-error-tab-sizes-must-be-ascending"))]
74 TabSizesMustBeAscending,
75}
76
77impl UError for ParseError {}
78
79fn tabstops_parse(s: &str) -> Result<(RemainingMode, Vec<usize>), ParseError> {
88 let s = s.trim_start_matches(is_space_or_comma);
90
91 if s.is_empty() {
94 return Ok((RemainingMode::None, vec![DEFAULT_TABSTOP]));
95 }
96
97 let mut nums = vec![];
98 let mut remaining_mode = RemainingMode::None;
99 let mut is_specifier_already_used = false;
100 for word in s.split(is_space_or_comma) {
101 let bytes = word.as_bytes();
102 for i in 0..bytes.len() {
103 match bytes[i] {
104 b'+' => remaining_mode = RemainingMode::Plus,
105 b'/' => remaining_mode = RemainingMode::Slash,
106 _ => {
107 let s = from_utf8(&bytes[i..]).unwrap();
109 match s.parse::<usize>() {
110 Ok(num) => {
111 if num == 0 {
113 return Err(ParseError::TabSizeCannotBeZero);
114 }
115
116 if let Some(last_stop) = nums.last() {
118 if *last_stop >= num {
119 return Err(ParseError::TabSizesMustBeAscending);
120 }
121 }
122
123 if is_specifier_already_used {
124 let specifier = if remaining_mode == RemainingMode::Slash {
125 "/".to_string()
126 } else {
127 "+".to_string()
128 };
129 return Err(ParseError::SpecifierOnlyAllowedWithLastValue(
130 specifier,
131 ));
132 } else if remaining_mode != RemainingMode::None {
133 is_specifier_already_used = true;
134 }
135
136 nums.push(num);
138 break;
139 }
140 Err(e) => {
141 if *e.kind() == IntErrorKind::PosOverflow {
142 return Err(ParseError::TabSizeTooLarge(s.to_string()));
143 }
144
145 let s = s.trim_start_matches(char::is_numeric);
146 return if s.starts_with('/') || s.starts_with('+') {
147 Err(ParseError::SpecifierNotAtStartOfNumber(
148 s[0..1].to_string(),
149 s.to_string(),
150 ))
151 } else {
152 Err(ParseError::InvalidCharacter(s.to_string()))
153 };
154 }
155 }
156 }
157 }
158 }
159 }
160 if nums.is_empty() {
163 nums = vec![DEFAULT_TABSTOP];
164 }
165
166 if nums.len() < 2 {
167 remaining_mode = RemainingMode::None;
168 }
169 Ok((remaining_mode, nums))
170}
171
172struct Options {
173 files: Vec<OsString>,
174 tabstops: Vec<usize>,
175 tspaces: String,
176 iflag: bool,
177 uflag: bool,
178
179 remaining_mode: RemainingMode,
182}
183
184impl Options {
185 fn new(matches: &ArgMatches) -> Result<Self, ParseError> {
186 let (remaining_mode, tabstops) = match matches.get_many::<String>(options::TABS) {
187 Some(s) => tabstops_parse(&s.map(|s| s.as_str()).collect::<Vec<_>>().join(","))?,
188 None => (RemainingMode::None, vec![DEFAULT_TABSTOP]),
189 };
190
191 let iflag = matches.get_flag(options::INITIAL);
192 let uflag = !matches.get_flag(options::NO_UTF8);
193
194 let nspaces = tabstops
197 .iter()
198 .scan(0, |pr, &it| {
199 let ret = Some(it - *pr);
200 *pr = it;
201 ret
202 })
203 .max()
204 .unwrap(); let tspaces = " ".repeat(nspaces);
206
207 let files: Vec<OsString> = match matches.get_many::<OsString>(options::FILES) {
208 Some(s) => s.cloned().collect(),
209 None => vec![OsString::from("-")],
210 };
211
212 Ok(Self {
213 files,
214 tabstops,
215 tspaces,
216 iflag,
217 uflag,
218 remaining_mode,
219 })
220 }
221}
222
223fn expand_shortcuts(args: Vec<OsString>) -> Vec<OsString> {
226 let mut processed_args = Vec::with_capacity(args.len());
227
228 for arg in args {
229 if let Some(arg) = arg.to_str() {
230 if arg.starts_with('-') && arg[1..].chars().all(is_digit_or_comma) {
231 arg[1..]
232 .split(',')
233 .filter(|s| !s.is_empty())
234 .for_each(|s| processed_args.push(OsString::from(format!("--tabs={s}"))));
235 continue;
236 }
237 }
238 processed_args.push(arg);
239 }
240
241 processed_args
242}
243
244#[uucore::main]
245pub fn uumain(args: impl uucore::Args) -> UResult<()> {
246 let matches =
247 uucore::clap_localization::handle_clap_result(uu_app(), expand_shortcuts(args.collect()))?;
248
249 expand(&Options::new(&matches)?)
250}
251
252pub fn uu_app() -> Command {
253 uucore::clap_localization::configure_localized_command(
254 Command::new(uucore::util_name())
255 .version(uucore::crate_version!())
256 .about(translate!("expand-about"))
257 .after_help(LONG_HELP)
258 .override_usage(format_usage(&translate!("expand-usage"))),
259 )
260 .infer_long_args(true)
261 .args_override_self(true)
262 .arg(
263 Arg::new(options::INITIAL)
264 .long(options::INITIAL)
265 .short('i')
266 .help(translate!("expand-help-initial"))
267 .action(ArgAction::SetTrue),
268 )
269 .arg(
270 Arg::new(options::TABS)
271 .long(options::TABS)
272 .short('t')
273 .value_name("N, LIST")
274 .action(ArgAction::Append)
275 .help(translate!("expand-help-tabs")),
276 )
277 .arg(
278 Arg::new(options::NO_UTF8)
279 .long(options::NO_UTF8)
280 .short('U')
281 .help(translate!("expand-help-no-utf8"))
282 .action(ArgAction::SetTrue),
283 )
284 .arg(
285 Arg::new(options::FILES)
286 .action(ArgAction::Append)
287 .hide(true)
288 .value_hint(clap::ValueHint::FilePath)
289 .value_parser(clap::value_parser!(OsString)),
290 )
291}
292
293fn open(path: &OsString) -> UResult<BufReader<Box<dyn Read + 'static>>> {
294 let file_buf;
295 if path == "-" {
296 Ok(BufReader::new(Box::new(stdin()) as Box<dyn Read>))
297 } else {
298 let path_ref = Path::new(path);
299 file_buf = File::open(path_ref).map_err_context(|| path.to_string_lossy().to_string())?;
300 Ok(BufReader::new(Box::new(file_buf) as Box<dyn Read>))
301 }
302}
303
304fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) -> usize {
315 let num_tabstops = tabstops.len();
316 match remaining_mode {
317 RemainingMode::Plus => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
318 Some(t) => t - col,
319 None => {
320 let step_size = tabstops[num_tabstops - 1];
321 let last_fixed_tabstop = tabstops[num_tabstops - 2];
322 let characters_since_last_tabstop = col - last_fixed_tabstop;
323
324 let steps_required = 1 + characters_since_last_tabstop / step_size;
325 steps_required * step_size - characters_since_last_tabstop
326 }
327 },
328 RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
329 Some(t) => t - col,
330 None => tabstops[num_tabstops - 1] - col % tabstops[num_tabstops - 1],
331 },
332 RemainingMode::None => {
333 if num_tabstops == 1 {
334 tabstops[0] - col % tabstops[0]
335 } else {
336 match tabstops.iter().find(|&&t| t > col) {
337 Some(t) => t - col,
338 None => 1,
339 }
340 }
341 }
342 }
343}
344
345#[derive(PartialEq, Eq, Debug)]
346enum CharType {
347 Backspace,
348 Tab,
349 Other,
350}
351
352#[allow(clippy::cognitive_complexity)]
353fn expand_line(
354 buf: &mut Vec<u8>,
355 output: &mut BufWriter<std::io::Stdout>,
356 tabstops: &[usize],
357 options: &Options,
358) -> std::io::Result<()> {
359 use self::CharType::{Backspace, Other, Tab};
360
361 let mut col = 0;
362 let mut byte = 0;
363 let mut init = true;
364
365 while byte < buf.len() {
366 let (ctype, cwidth, nbytes) = if options.uflag {
367 let nbytes = char::from(buf[byte]).len_utf8();
368
369 if byte + nbytes > buf.len() {
370 (Other, 1, 1)
372 } else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
373 match t.chars().next() {
374 Some('\t') => (Tab, 0, nbytes),
375 Some('\x08') => (Backspace, 0, nbytes),
376 Some(c) => (Other, UnicodeWidthChar::width(c).unwrap_or(0), nbytes),
377 None => {
378 (Other, 1, 1)
380 }
381 }
382 } else {
383 (Other, 1, 1) }
385 } else {
386 (
387 match buf.get(byte) {
388 Some(0x09) => Tab,
390 Some(0x08) => Backspace,
391 _ => Other,
392 },
393 1,
394 1,
395 )
396 };
397
398 match ctype {
400 Tab => {
401 let nts = next_tabstop(tabstops, col, &options.remaining_mode);
403 col += nts;
404
405 if init || !options.iflag {
407 if nts <= options.tspaces.len() {
408 output.write_all(&options.tspaces.as_bytes()[..nts])?;
409 } else {
410 output.write_all(" ".repeat(nts).as_bytes())?;
411 }
412 } else {
413 output.write_all(&buf[byte..byte + nbytes])?;
414 }
415 }
416 _ => {
417 col = if ctype == Other {
418 col + cwidth
419 } else if col > 0 {
420 col - 1
421 } else {
422 0
423 };
424
425 if buf[byte] != 0x20 {
428 init = false;
429 }
430
431 output.write_all(&buf[byte..byte + nbytes])?;
432 }
433 }
434
435 byte += nbytes; }
437
438 output.flush()?;
439 buf.truncate(0); Ok(())
442}
443
444fn expand(options: &Options) -> UResult<()> {
445 let mut output = BufWriter::new(stdout());
446 let ts = options.tabstops.as_ref();
447 let mut buf = Vec::new();
448
449 for file in &options.files {
450 if Path::new(file).is_dir() {
451 show_error!(
452 "{}",
453 translate!("expand-error-is-directory", "file" => file.to_string_lossy())
454 );
455 set_exit_code(1);
456 continue;
457 }
458 match open(file) {
459 Ok(mut fh) => {
460 while match fh.read_until(b'\n', &mut buf) {
461 Ok(s) => s > 0,
462 Err(_) => buf.is_empty(),
463 } {
464 expand_line(&mut buf, &mut output, ts, options)
465 .map_err_context(|| translate!("expand-error-failed-to-write-output"))?;
466 }
467 }
468 Err(e) => {
469 show_error!("{e}");
470 set_exit_code(1);
471 }
472 }
473 }
474 Ok(())
475}
476
477#[cfg(test)]
478mod tests {
479 use crate::is_digit_or_comma;
480
481 use super::RemainingMode;
482 use super::next_tabstop;
483
484 #[test]
485 fn test_next_tabstop_remaining_mode_none() {
486 assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::None), 1);
487 assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::None), 2);
488 assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::None), 1);
489 }
490
491 #[test]
492 fn test_next_tabstop_remaining_mode_plus() {
493 assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Plus), 1);
494 assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 3);
495 assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 5);
496 }
497
498 #[test]
499 fn test_next_tabstop_remaining_mode_slash() {
500 assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Slash), 1);
501 assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Slash), 2);
502 assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Slash), 4);
503 }
504
505 #[test]
506 fn test_is_digit_or_comma() {
507 assert!(is_digit_or_comma('1'));
508 assert!(is_digit_or_comma(','));
509 assert!(!is_digit_or_comma('a'));
510 }
511}