#[macro_use]
extern crate uucore;
use clap::{App, Arg};
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdout, Write};
use std::str::from_utf8;
use unicode_width::UnicodeWidthChar;
static NAME: &str = "unexpand";
static VERSION: &str = env!("CARGO_PKG_VERSION");
static USAGE: &str = "unexpand [OPTION]... [FILE]...";
static SUMMARY: &str = "Convert blanks in each FILE to tabs, writing to standard output.\n
With no FILE, or when FILE is -, read standard input.";
const DEFAULT_TABSTOP: usize = 8;
fn tabstops_parse(s: String) -> Vec<usize> {
let words = s.split(',');
let nums = words
.map(|sn| {
sn.parse()
.unwrap_or_else(|_| crash!(1, "{}\n", "tab size contains invalid character(s)"))
})
.collect::<Vec<usize>>();
if nums.iter().any(|&n| n == 0) {
crash!(1, "{}\n", "tab size cannot be 0");
}
if let (false, _) = nums
.iter()
.fold((true, 0), |(acc, last), &n| (acc && last <= n, n))
{
crash!(1, "{}\n", "tab sizes must be ascending");
}
nums
}
mod options {
pub const FILE: &str = "file";
pub const ALL: &str = "all";
pub const FIRST_ONLY: &str = "first-only";
pub const TABS: &str = "tabs";
pub const NO_UTF8: &str = "no-utf8";
}
struct Options {
files: Vec<String>,
tabstops: Vec<usize>,
aflag: bool,
uflag: bool,
}
impl Options {
fn new(matches: clap::ArgMatches) -> Options {
let tabstops = match matches.value_of(options::TABS) {
None => vec![DEFAULT_TABSTOP],
Some(s) => tabstops_parse(s.to_string()),
};
let aflag = (matches.is_present(options::ALL) || matches.is_present(options::TABS))
&& !matches.is_present(options::FIRST_ONLY);
let uflag = !matches.is_present(options::NO_UTF8);
let files = match matches.value_of(options::FILE) {
Some(v) => vec![v.to_string()],
None => vec!["-".to_owned()],
};
Options {
files,
tabstops,
aflag,
uflag,
}
}
}
pub fn uumain(args: impl uucore::Args) -> i32 {
let args = args.collect_str();
let matches = App::new(executable!())
.name(NAME)
.version(VERSION)
.usage(USAGE)
.about(SUMMARY)
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
.arg(
Arg::with_name(options::ALL)
.short("a")
.long(options::ALL)
.help("convert all blanks, instead of just initial blanks")
.takes_value(false),
)
.arg(
Arg::with_name(options::FIRST_ONLY)
.long(options::FIRST_ONLY)
.help("convert only leading sequences of blanks (overrides -a)")
.takes_value(false),
)
.arg(
Arg::with_name(options::TABS)
.short("t")
.long(options::TABS)
.long_help("use comma separated LIST of tab positions or have tabs N characters apart instead of 8 (enables -a)")
.takes_value(true)
)
.arg(
Arg::with_name(options::NO_UTF8)
.short("U")
.long(options::NO_UTF8)
.takes_value(false)
.help("interpret input file as 8-bit ASCII rather than UTF-8"))
.get_matches_from(args);
unexpand(Options::new(matches));
0
}
fn open(path: String) -> BufReader<Box<dyn Read + 'static>> {
let file_buf;
if path == "-" {
BufReader::new(Box::new(stdin()) as Box<dyn Read>)
} else {
file_buf = match File::open(&path[..]) {
Ok(a) => a,
Err(e) => crash!(1, "{}: {}", &path[..], e),
};
BufReader::new(Box::new(file_buf) as Box<dyn Read>)
}
}
fn next_tabstop(tabstops: &[usize], col: usize) -> Option<usize> {
if tabstops.len() == 1 {
Some(tabstops[0] - col % tabstops[0])
} else {
match tabstops.iter().find(|&&t| t > col) {
Some(t) => Some(t - col),
None => None,
}
}
}
fn write_tabs(
output: &mut BufWriter<Stdout>,
tabstops: &[usize],
mut scol: usize,
col: usize,
prevtab: bool,
init: bool,
amode: bool,
) {
let ai = init || amode;
if (ai && !prevtab && col > scol + 1) || (col > scol && (init || ai && prevtab)) {
while let Some(nts) = next_tabstop(tabstops, scol) {
if col < scol + nts {
break;
}
safe_unwrap!(output.write_all(b"\t"));
scol += nts;
}
}
while col > scol {
safe_unwrap!(output.write_all(b" "));
scol += 1;
}
}
#[derive(PartialEq, Eq, Debug)]
enum CharType {
Backspace,
Space,
Tab,
Other,
}
fn next_char_info(uflag: bool, buf: &[u8], byte: usize) -> (CharType, usize, usize) {
let (ctype, cwidth, nbytes) = if uflag {
let nbytes = char::from(buf[byte]).len_utf8();
if byte + nbytes > buf.len() {
(CharType::Other, 1, 1)
} else if let Ok(t) = from_utf8(&buf[byte..byte + nbytes]) {
match t.chars().next() {
Some(' ') => (CharType::Space, 0, 1),
Some('\t') => (CharType::Tab, 0, 1),
Some('\x08') => (CharType::Backspace, 0, 1),
Some(c) => (
CharType::Other,
UnicodeWidthChar::width(c).unwrap_or(0),
nbytes,
),
None => {
(CharType::Other, 1, 1)
}
}
} else {
(CharType::Other, 1, 1)
}
} else {
(
match buf[byte] {
0x20 => CharType::Space,
0x09 => CharType::Tab,
0x08 => CharType::Backspace,
_ => CharType::Other,
},
1,
1,
)
};
(ctype, cwidth, nbytes)
}
fn unexpand(options: Options) {
let mut output = BufWriter::new(stdout());
let ts = &options.tabstops[..];
let mut buf = Vec::new();
let lastcol = if ts.len() > 1 { *ts.last().unwrap() } else { 0 };
for file in options.files.into_iter() {
let mut fh = open(file);
while match fh.read_until(b'\n', &mut buf) {
Ok(s) => s > 0,
Err(_) => !buf.is_empty(),
} {
let mut byte = 0;
let mut col = 0;
let mut scol = 0;
let mut init = true;
let mut pctype = CharType::Other;
while byte < buf.len() {
if lastcol > 0 && col >= lastcol {
write_tabs(
&mut output,
ts,
scol,
col,
pctype == CharType::Tab,
init,
true,
);
safe_unwrap!(output.write_all(&buf[byte..]));
scol = col;
break;
}
let (ctype, cwidth, nbytes) = next_char_info(options.uflag, &buf, byte);
let tabs_buffered = init || options.aflag;
match ctype {
CharType::Space | CharType::Tab => {
col += if ctype == CharType::Space {
1
} else {
next_tabstop(ts, col).unwrap_or(1)
};
if !tabs_buffered {
safe_unwrap!(output.write_all(&buf[byte..byte + nbytes]));
scol = col;
}
}
CharType::Other | CharType::Backspace => {
write_tabs(
&mut output,
ts,
scol,
col,
pctype == CharType::Tab,
init,
options.aflag,
);
init = false;
col = if ctype == CharType::Other {
col + cwidth
} else if col > 0 {
col - 1
} else {
0
};
safe_unwrap!(output.write_all(&buf[byte..byte + nbytes]));
scol = col;
}
}
byte += nbytes;
pctype = ctype;
}
write_tabs(
&mut output,
ts,
scol,
col,
pctype == CharType::Tab,
init,
true,
);
safe_unwrap!(output.flush());
buf.truncate(0);
}
}
crash_if_err!(1, output.flush())
}