use std::borrow::Cow;
use std::fs;
use std::io;
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str;
use std::thread;
use std::time;
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
use chrono_tz::Tz;
use colored::{Color, ColoredString, Colorize, Styles};
use dateparser::parse_with_timezone;
use docopt::Docopt;
use numfmt::{Formatter, Numeric, Precision};
use rand::Rng;
use rand_chacha::ChaCha8Rng;
use rand_seeder::Seeder;
use serde::de::{Deserialize, DeserializeOwned, Deserializer, Error};
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
use crate::config::{Config, Delimiter};
use crate::select::SelectColumns;
use crate::CliResult;
pub fn version() -> String {
let (maj, min, pat, pre) = (
option_env!("CARGO_PKG_VERSION_MAJOR"),
option_env!("CARGO_PKG_VERSION_MINOR"),
option_env!("CARGO_PKG_VERSION_PATCH"),
option_env!("CARGO_PKG_VERSION_PRE"),
);
match (maj, min, pat, pre) {
(Some(maj), Some(min), Some(pat), Some(pre)) => {
if pre.is_empty() {
format!("{}.{}.{}", maj, min, pat)
} else {
format!("{}.{}.{}-{}", maj, min, pat, pre)
}
}
_ => "".to_owned(),
}
}
pub fn get_args<T>(usage: &str, argv: &[&str]) -> CliResult<T>
where
T: DeserializeOwned,
{
Docopt::new(usage)
.and_then(|d| {
d.argv(argv.iter().copied())
.version(Some(version()))
.deserialize()
})
.map_err(From::from)
}
pub fn many_configs(
inps: &[String],
delim: Option<Delimiter>,
no_headers: bool,
select: Option<&SelectColumns>,
) -> Result<Vec<Config>, String> {
let mut inps = inps.to_vec();
if inps.is_empty() {
inps.push("-".to_owned()); }
let confs = inps
.into_iter()
.map(|p| {
let mut conf = Config::new(&Some(p))
.delimiter(delim)
.no_headers(no_headers);
if let Some(sel) = select {
conf = conf.select(sel.clone());
}
conf
})
.collect::<Vec<_>>();
errif_greater_one_stdin(&confs)?;
Ok(confs)
}
pub fn errif_greater_one_stdin(inps: &[Config]) -> Result<(), String> {
let nstd = inps.iter().filter(|inp| inp.is_std()).count();
if nstd > 1 {
return Err("At most one <stdin> input is allowed.".to_owned());
}
Ok(())
}
pub fn num_of_chunks(nitems: usize, chunk_size: usize) -> usize {
if chunk_size == 0 {
return nitems;
}
let mut n = nitems / chunk_size;
if nitems % chunk_size != 0 {
n += 1;
}
n
}
pub fn last_modified(md: &fs::Metadata) -> u64 {
use filetime::FileTime;
FileTime::from_last_modification_time(md).seconds_relative_to_1970()
}
pub fn idx_path(csv_path: &Path) -> PathBuf {
let mut p = csv_path
.to_path_buf()
.into_os_string()
.into_string()
.unwrap();
p.push_str(".idx");
PathBuf::from(&p)
}
pub type Idx = Option<usize>;
pub fn range(start: Idx, end: Idx, len: Idx, index: Idx) -> Result<(usize, usize), String> {
match (start, end, len, index) {
(None, None, None, Some(i)) => Ok((i, i + 1)),
(_, _, _, Some(_)) => Err("--index cannot be used with --start, --end or --len".to_owned()),
(_, Some(_), Some(_), None) => {
Err("--end and --len cannot be used at the same time.".to_owned())
}
(_, None, None, None) => Ok((start.unwrap_or(0), ::std::usize::MAX)),
(_, Some(e), None, None) => {
let s = start.unwrap_or(0);
if s > e {
Err(format!(
"The end of the range ({}) must be greater than or\n\
equal to the start of the range ({}).",
e, s
))
} else {
Ok((s, e))
}
}
(_, None, Some(l), None) => {
let s = start.unwrap_or(0);
Ok((s, s + l))
}
}
}
pub fn parse_timezone(tz: Option<String>) -> Result<Tz, String> {
match tz {
None => Ok(chrono_tz::UTC),
Some(time_string) => time_string
.parse::<Tz>()
.or(Err(format!("{} is not a valid timezone", time_string))),
}
}
pub fn parse_date(date: &str, tz: Tz, input_fmt: &Option<String>) -> Result<DateTime<Utc>, String> {
match input_fmt {
Some(fmt) => match NaiveDateTime::parse_from_str(date, fmt) {
Ok(time) => Ok(tz.from_local_datetime(&time).unwrap().with_timezone(&Utc)),
_ => Err(format!("{} is not a valid format", fmt)),
},
None => match parse_with_timezone(date, &tz) {
Ok(time) => Ok(time),
_ => Err(format!("Time format could not be inferred for {}", date)),
},
}
}
fn create_dir_all_threadsafe(path: &Path) -> io::Result<()> {
for _ in 0..20 {
match fs::create_dir_all(path) {
Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {}
other => return other,
}
thread::sleep(time::Duration::from_millis(25));
}
fs::create_dir_all(path)
}
#[derive(Clone, Debug)]
pub struct FilenameTemplate {
prefix: String,
suffix: String,
}
impl FilenameTemplate {
pub fn filename(&self, unique_value: &str) -> String {
format!("{}{}{}", &self.prefix, unique_value, &self.suffix)
}
pub fn writer<P>(
&self,
path: P,
unique_value: &str,
) -> io::Result<csv::Writer<Box<dyn io::Write + 'static>>>
where
P: AsRef<Path>,
{
let filename = self.filename(unique_value);
let full_path = path.as_ref().join(filename);
if let Some(parent) = full_path.parent() {
create_dir_all_threadsafe(parent)?;
}
let spath = Some(full_path.display().to_string());
Config::new(&spath).writer_with_options(
fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true),
)
}
}
impl<'de> Deserialize<'de> for FilenameTemplate {
fn deserialize<D: Deserializer<'de>>(d: D) -> Result<FilenameTemplate, D::Error> {
let raw = String::deserialize(d)?;
let chunks = raw.split("{}").collect::<Vec<_>>();
if chunks.len() == 2 {
Ok(FilenameTemplate {
prefix: chunks[0].to_owned(),
suffix: chunks[1].to_owned(),
})
} else {
Err(D::Error::custom(
"The --filename argument must contain one '{}'.",
))
}
}
}
pub fn acquire_rng(seed: Option<usize>) -> impl Rng {
match seed {
None => Seeder::from(rand::thread_rng().gen::<usize>()).make_rng::<ChaCha8Rng>(),
Some(seed) => Seeder::from(seed).make_rng::<ChaCha8Rng>(),
}
}
pub fn acquire_number_formatter() -> Formatter {
Formatter::new()
.precision(Precision::Significance(5))
.separator(',')
.unwrap()
}
pub fn acquire_stty_size() -> Option<termsize::Size> {
if let Ok(output) = Command::new("/bin/sh")
.arg("-c")
.arg("stty size < /dev/tty")
.output()
{
let text = String::from_utf8_lossy(&output.stdout);
let parts = text.split_whitespace().take(2).collect::<Vec<_>>();
if parts.len() < 2 {
return None;
}
let cols: u16 = if let Ok(c) = parts[1].parse() {
c
} else {
return None;
};
let rows: u16 = if let Ok(r) = parts[0].parse() {
r
} else {
return None;
};
return Some(termsize::Size { cols, rows });
}
None
}
pub fn acquire_term_cols(cols_override: &Option<usize>) -> usize {
match cols_override {
None => match termsize::get() {
None => match acquire_stty_size() {
None => 80,
Some(size) => size.cols as usize,
},
Some(size) => size.cols as usize,
},
Some(c) => *c,
}
}
pub fn acquire_term_rows() -> Option<usize> {
termsize::get().map(|size| size.rows as usize)
}
pub fn pretty_print_float<T: Numeric>(f: &mut Formatter, x: T) -> String {
let mut string = f.fmt2(x).to_string();
if string.ends_with(".0") {
string.truncate(string.len() - 2);
}
string
}
pub enum ColorOrStyles {
Color(Color),
Styles(Styles),
}
pub fn colorizer_by_type(string: &str) -> ColorOrStyles {
match string.parse::<f64>() {
Ok(_) => ColorOrStyles::Color(Color::Red),
Err(_) => {
if string.starts_with("http://") || string.starts_with("https://") {
ColorOrStyles::Color(Color::Blue)
} else {
match string {
"true" | "TRUE" | "True" | "false" | "FALSE" | "False" | "yes" | "no" => {
ColorOrStyles::Color(Color::Cyan)
}
"null" | "na" | "NA" | "None" | "n/a" | "N/A" | "<empty>" => {
ColorOrStyles::Styles(Styles::Dimmed)
}
_ => ColorOrStyles::Color(Color::Green),
}
}
}
}
}
pub fn colorizer_by_rainbow(index: usize, string: &str) -> ColorOrStyles {
if string == "<empty>" {
return ColorOrStyles::Styles(Styles::Dimmed);
}
let index = index % 6;
match index {
0 => ColorOrStyles::Color(Color::Red),
1 => ColorOrStyles::Color(Color::Green),
2 => ColorOrStyles::Color(Color::Yellow),
3 => ColorOrStyles::Color(Color::Blue),
4 => ColorOrStyles::Color(Color::Magenta),
5 => ColorOrStyles::Color(Color::Cyan),
_ => unreachable!(),
}
}
pub fn colorize(color_or_style: &ColorOrStyles, string: &str) -> ColoredString {
match color_or_style {
ColorOrStyles::Color(color) => string.color(*color),
ColorOrStyles::Styles(styles) => match styles {
Styles::Dimmed => string.dimmed(),
_ => unimplemented!(),
},
}
}
pub fn unicode_aware_ellipsis(string: &str, max_width: usize) -> String {
let mut string = string.replace('\n', " ");
string = string.replace('\r', " ");
string = string.replace('\t', " ");
let mut width: usize = 0;
let graphemes = string.graphemes(true).collect::<Vec<_>>();
let graphemes_count = graphemes.len();
let mut take: usize = 0;
for grapheme in graphemes.iter() {
width += grapheme.width();
if width <= max_width {
take += 1;
continue;
}
break;
}
let mut parts = graphemes.into_iter().take(take).collect::<Vec<&str>>();
if graphemes_count > parts.len() {
parts.pop();
parts.push("โฆ");
}
parts.into_iter().collect::<String>()
}
pub fn unicode_aware_pad<'a>(
left: bool,
string: &'a str,
width: usize,
padding: &str,
) -> Cow<'a, str> {
let string_width = string.width();
if string_width >= width {
return Cow::Borrowed(string);
}
let mut padded = String::new();
let padding = padding.repeat(width - string_width);
if left {
padded.push_str(&padding);
padded.push_str(string);
} else {
padded.push_str(string);
padded.push_str(&padding);
}
Cow::Owned(padded)
}
pub fn unicode_aware_rpad<'a>(string: &'a str, width: usize, padding: &str) -> Cow<'a, str> {
unicode_aware_pad(false, string, width, padding)
}
fn has_rtl(string: &str) -> bool {
unicode_bidi::BidiInfo::new(string, None).has_rtl()
}
pub fn unicode_aware_pad_with_ellipsis(
left: bool,
string: &str,
width: usize,
padding: &str,
) -> String {
let mut string =
unicode_aware_pad(left, &unicode_aware_ellipsis(string, width), width, padding)
.into_owned();
if has_rtl(&string) {
string.push('\u{200E}');
}
string
}
pub fn unicode_aware_rpad_with_ellipsis(string: &str, width: usize, padding: &str) -> String {
unicode_aware_pad_with_ellipsis(false, string, width, padding)
}
pub fn unicode_aware_lpad_with_ellipsis(string: &str, width: usize, padding: &str) -> String {
unicode_aware_pad_with_ellipsis(true, string, width, padding)
}
pub fn unicode_aware_wrap(string: &str, max_width: usize, indent: usize) -> String {
textwrap::wrap(string, max_width)
.iter()
.enumerate()
.map(|(i, line)| {
if i == 0 {
line.to_string()
} else {
textwrap::indent(line, &" ".repeat(indent))
}
})
.collect::<Vec<_>>()
.join("\n")
}
pub struct EmojiSanitizer {
pattern: regex::Regex,
}
impl EmojiSanitizer {
pub fn new() -> Self {
let mut pattern = String::new();
pattern.push_str("(?:");
let mut all_emojis = emojis::iter().collect::<Vec<_>>();
all_emojis.sort_by_key(|e| std::cmp::Reverse((e.as_bytes().len(), e.as_bytes())));
for emoji in all_emojis {
pattern.push_str(®ex::escape(emoji.as_str()));
pattern.push('|');
}
pattern.pop();
pattern.push(')');
let pattern = regex::Regex::new(&pattern).unwrap();
EmojiSanitizer { pattern }
}
pub fn sanitize(&self, string: &str) -> String {
self.pattern
.replace_all(string, |caps: ®ex::Captures| {
format!(
":{}:",
match emojis::get(&caps[0]) {
None => "unknown_emoji",
Some(emoji) => match emoji.shortcode() {
None => "unknown_emoji",
Some(shortcode) => shortcode,
},
}
)
})
.to_string()
}
}
pub trait ImmutableRecordHelpers<'a> {
type Cell;
fn find_column_index(&self, value: Self::Cell) -> Option<usize>;
#[must_use]
fn replace_at(&self, column_index: usize, new_value: Self::Cell) -> Self;
#[must_use]
fn prepend(&self, cell_value: Self::Cell) -> Self;
#[must_use]
fn append(&self, cell_value: Self::Cell) -> Self;
#[must_use]
fn remove(&self, column_index: usize) -> Self;
}
impl<'a> ImmutableRecordHelpers<'a> for csv::ByteRecord {
type Cell = &'a [u8];
fn find_column_index(&self, value: Self::Cell) -> Option<usize> {
self.iter()
.enumerate()
.find(|(_, cell)| *cell == value)
.map(|(i, _)| i)
}
fn replace_at(&self, column_index: usize, new_value: Self::Cell) -> Self {
self.iter()
.enumerate()
.map(|(i, v)| if i == column_index { new_value } else { v })
.collect()
}
fn prepend(&self, cell_value: Self::Cell) -> Self {
let mut new_record = csv::ByteRecord::new();
new_record.push_field(cell_value);
new_record.extend(self);
new_record
}
fn append(&self, cell_value: Self::Cell) -> Self {
let mut new_record = self.clone();
new_record.push_field(cell_value);
new_record
}
fn remove(&self, column_index: usize) -> Self {
self.iter()
.enumerate()
.filter_map(|(i, c)| if i == column_index { None } else { Some(c) })
.collect()
}
}
impl<'a> ImmutableRecordHelpers<'a> for csv::StringRecord {
type Cell = &'a str;
fn find_column_index(&self, value: Self::Cell) -> Option<usize> {
self.iter()
.enumerate()
.find(|(_, cell)| *cell == value)
.map(|(i, _)| i)
}
fn replace_at(&self, column_index: usize, new_value: Self::Cell) -> Self {
self.iter()
.enumerate()
.map(|(i, v)| if i == column_index { new_value } else { v })
.collect()
}
fn prepend(&self, cell_value: Self::Cell) -> Self {
let mut new_record = csv::StringRecord::new();
new_record.push_field(cell_value);
new_record.extend(self);
new_record
}
fn append(&self, cell_value: Self::Cell) -> Self {
let mut new_record = self.clone();
new_record.push_field(cell_value);
new_record
}
fn remove(&self, column_index: usize) -> Self {
self.iter()
.enumerate()
.filter_map(|(i, c)| if i == column_index { None } else { Some(c) })
.collect()
}
}
pub fn str_to_csv_byte_record(target: &str) -> csv::ByteRecord {
let cursor = io::Cursor::new(target);
let reader = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(cursor);
match reader.into_byte_records().next() {
Some(record) => record.unwrap(),
None => csv::ByteRecord::new(),
}
}
pub struct Chunks<I> {
size: NonZeroUsize,
inner: I,
}
impl<I> Iterator for Chunks<I>
where
I: Iterator,
{
type Item = Vec<I::Item>;
fn next(&mut self) -> Option<Self::Item> {
let mut chunk: Vec<I::Item> = Vec::new();
while chunk.len() < self.size.into() {
match self.inner.next() {
None => {
if chunk.is_empty() {
return None;
}
return Some(chunk);
}
Some(item) => {
chunk.push(item);
}
}
}
Some(chunk)
}
}
pub trait ChunksIteratorExt: Sized {
fn chunks(self, size: NonZeroUsize) -> Chunks<Self>;
}
impl<T: Iterator> ChunksIteratorExt for T {
fn chunks(self, size: NonZeroUsize) -> Chunks<Self> {
Chunks { size, inner: self }
}
}
pub fn bytes_cursor_from_read<R: io::Read>(source: &mut R) -> io::Result<io::Cursor<Vec<u8>>> {
let mut bytes = Vec::<u8>::new();
source.read_to_end(&mut bytes)?;
Ok(io::Cursor::new(bytes))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_unicode_aware_ellipsis() {
assert_eq!(unicode_aware_ellipsis("abcde", 10), "abcde".to_string());
assert_eq!(unicode_aware_ellipsis("abcde", 5), "abcde".to_string());
assert_eq!(unicode_aware_ellipsis("abcde", 4), "abcโฆ".to_string());
assert_eq!(unicode_aware_ellipsis("abcde", 3), "abโฆ".to_string());
}
#[test]
fn test_emoji_sanitizer() {
let sanitizer = EmojiSanitizer::new();
assert_eq!(
sanitizer.sanitize("๐ฉ hello ๐ฉโ๐ฉโ๐งโ๐ฆ"),
":woman: hello :family_woman_woman_girl_boy:"
);
}
macro_rules! brec {
() => {
csv::ByteRecord::new()
};
( $( $x:expr ),* ) => {
{
let mut record = csv::ByteRecord::new();
$(
record.push_field($x);
)*
record
}
};
}
#[test]
fn test_str_to_csv_byte_record() {
assert_eq!(str_to_csv_byte_record(""), brec![]);
assert_eq!(str_to_csv_byte_record("test"), brec![b"test"]);
assert_eq!(str_to_csv_byte_record("test,ok"), brec![b"test", b"ok"]);
assert_eq!(
str_to_csv_byte_record("\"test, ok\",\"\"\"John"),
brec![b"test, ok", b"\"John"]
);
}
}