use core::str;
use memchr::memchr;
use Terminator;
#[derive(Clone, Copy, Debug)]
pub enum QuoteStyle {
Always,
Necessary,
NonNumeric,
Never,
#[doc(hidden)]
__Nonexhaustive,
}
impl Default for QuoteStyle {
fn default() -> QuoteStyle {
QuoteStyle::Necessary
}
}
#[derive(Debug, Default)]
pub struct WriterBuilder {
wtr: Writer,
}
impl WriterBuilder {
pub fn new() -> WriterBuilder {
WriterBuilder { wtr: Writer::default() }
}
pub fn build(&self) -> Writer {
self.wtr.clone()
}
pub fn delimiter(&mut self, delimiter: u8) -> &mut WriterBuilder {
self.wtr.delimiter = delimiter;
self
}
pub fn terminator(&mut self, term: Terminator) -> &mut WriterBuilder {
self.wtr.term = term;
self
}
pub fn quote_style(&mut self, style: QuoteStyle) -> &mut WriterBuilder {
self.wtr.style = style;
self
}
pub fn quote(&mut self, quote: u8) -> &mut WriterBuilder {
self.wtr.quote = quote;
self
}
pub fn escape(&mut self, escape: u8) -> &mut WriterBuilder {
self.wtr.escape = escape;
self
}
pub fn double_quote(&mut self, yes: bool) -> &mut WriterBuilder {
self.wtr.double_quote = yes;
self
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum WriteResult {
InputEmpty,
OutputFull,
}
#[derive(Clone, Debug)]
pub struct Writer {
state: WriterState,
delimiter: u8,
term: Terminator,
style: QuoteStyle,
quote: u8,
escape: u8,
double_quote: bool,
}
#[derive(Clone, Debug)]
struct WriterState {
in_field: bool,
quoting: bool,
record_bytes: u64,
}
impl Writer {
pub fn new() -> Writer {
Writer::default()
}
pub fn finish(&mut self, mut output: &mut [u8]) -> (WriteResult, usize) {
let mut nout = 0;
if self.state.record_bytes == 0 && self.state.in_field {
assert!(!self.state.quoting);
let (res, o) = self.write(&[self.quote, self.quote], output);
if o == 0 {
return (res, 0);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
}
if !self.state.quoting {
return (WriteResult::InputEmpty, nout);
}
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, nout);
}
nout += o;
self.state.record_bytes = 0;
self.state.in_field = false;
self.state.quoting = false;
(res, nout)
}
pub fn field(
&mut self,
input: &[u8],
mut output: &mut [u8],
) -> (WriteResult, usize, usize) {
let (mut nin, mut nout) = (0, 0);
if !self.state.in_field {
self.state.quoting = self.should_quote(input);
if self.state.quoting {
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, 0, 0);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
}
self.state.in_field = true;
}
let (res, i, o) =
if self.state.quoting {
quote(
input, output,
self.quote, self.escape, self.double_quote)
} else {
write_optimistic(input, output)
};
nin += i;
nout += o;
self.state.record_bytes += o as u64;
(res, nin, nout)
}
pub fn delimiter(
&mut self,
mut output: &mut [u8],
) -> (WriteResult, usize) {
let mut nout = 0;
if self.state.quoting {
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, o);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
self.state.quoting = false;
}
let (res, o) = self.write(&[self.delimiter], output);
if o == 0 {
return (res, nout);
}
nout += o;
self.state.record_bytes += o as u64;
self.state.in_field = false;
(res, nout)
}
pub fn terminator(
&mut self,
mut output: &mut [u8],
) -> (WriteResult, usize) {
let mut nout = 0;
if self.state.record_bytes == 0 {
assert!(!self.state.quoting);
let (res, o) = self.write(&[self.quote, self.quote], output);
if o == 0 {
return (res, 0);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
}
if self.state.quoting {
let (res, o) = self.write(&[self.quote], output);
if o == 0 {
return (res, o);
}
output = &mut moving(output)[o..];
nout += o;
self.state.record_bytes += o as u64;
self.state.quoting = false;
}
let (res, o) = match self.term {
Terminator::CRLF => write_pessimistic(&[b'\r', b'\n'], output),
Terminator::Any(b) => write_pessimistic(&[b], output),
};
if o == 0 {
return (res, nout);
}
nout += o;
self.state.record_bytes = 0;
self.state.in_field = false;
(res, nout)
}
fn needs_quotes(&self, input: &[u8]) -> bool {
input.iter().any(|&b| self.byte_needs_quotes(b))
}
fn byte_needs_quotes(&self, b: u8) -> bool {
self.delimiter == b
|| self.term == b
|| self.quote == b
|| b == b'\r' || b == b'\n'
}
fn should_quote(&self, input: &[u8]) -> bool {
match self.style {
QuoteStyle::Always => true,
QuoteStyle::Never => false,
QuoteStyle::NonNumeric => is_non_numeric(input),
QuoteStyle::Necessary => self.needs_quotes(input),
_ => unreachable!(),
}
}
fn write(&self, data: &[u8], output: &mut [u8]) -> (WriteResult, usize) {
if data.len() > output.len() {
(WriteResult::OutputFull, 0)
} else {
output[..data.len()].copy_from_slice(data);
(WriteResult::InputEmpty, data.len())
}
}
}
impl Default for Writer {
fn default() -> Writer {
Writer {
state: WriterState::default(),
delimiter: b',',
term: Terminator::Any(b'\n'),
style: QuoteStyle::default(),
quote: b'"',
escape: b'\\',
double_quote: true,
}
}
}
impl Default for WriterState {
fn default() -> WriterState {
WriterState {
in_field: false,
quoting: false,
record_bytes: 0,
}
}
}
pub fn is_non_numeric(input: &[u8]) -> bool {
let s = match str::from_utf8(input) {
Err(_) => return true,
Ok(s) => s,
};
!s.parse::<f64>().is_ok() && !s.parse::<i64>().is_ok()
}
pub fn quote(
mut input: &[u8],
mut output: &mut [u8],
quote: u8,
escape: u8,
double_quote: bool,
) -> (WriteResult, usize, usize) {
let (mut nin, mut nout) = (0, 0);
loop {
match memchr(quote, input) {
None => {
let (res, i, o) = write_optimistic(input, output);
nin += i;
nout += o;
return (res, nin, nout);
}
Some(next_quote) => {
let (res, i, o) = write_optimistic(
&input[..next_quote], output);
input = &input[i..];
output = &mut moving(output)[o..];
nin += i;
nout += o;
if let WriteResult::OutputFull = res {
return (res, nin, nout);
}
if double_quote {
let (res, o) = write_pessimistic(&[quote, quote], output);
if let WriteResult::OutputFull = res {
return (res, nin, nout);
}
nout += o;
output = &mut moving(output)[o..];
} else {
let (res, o) = write_pessimistic(&[escape, quote], output);
if let WriteResult::OutputFull = res {
return (res, nin, nout);
}
nout += o;
output = &mut moving(output)[o..];
}
nin += 1;
input = &input[1..];
}
}
}
}
fn write_optimistic(
input: &[u8],
output: &mut [u8],
) -> (WriteResult, usize, usize) {
if input.len() > output.len() {
let input = &input[..output.len()];
output.copy_from_slice(input);
(WriteResult::OutputFull, output.len(), output.len())
} else {
output[..input.len()].copy_from_slice(input);
(WriteResult::InputEmpty, input.len(), input.len())
}
}
fn write_pessimistic(
input: &[u8],
output: &mut [u8],
) -> (WriteResult, usize) {
if input.len() > output.len() {
(WriteResult::OutputFull, 0)
} else {
output[..input.len()].copy_from_slice(input);
(WriteResult::InputEmpty, input.len())
}
}
fn moving<T>(x: T) -> T { x }
#[cfg(test)]
mod tests {
use writer::{Writer, WriterBuilder, QuoteStyle, quote};
use writer::WriteResult::*;
fn b(s: &str) -> &[u8] { s.as_bytes() }
fn s(b: &[u8]) -> &str { ::core::str::from_utf8(b).unwrap() }
macro_rules! assert_field {
(
$wtr:expr, $inp:expr, $out:expr,
$expect_in:expr, $expect_out:expr,
$expect_res:expr, $expect_data:expr
) => {{
let (res, i, o) = $wtr.field($inp, $out);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_in, i, "input");
assert_eq!($expect_out, o, "output");
assert_eq!($expect_data, s(&$out[..o]), "data");
}}
}
macro_rules! assert_write {
(
$wtr:expr, $which:ident, $out:expr,
$expect_out:expr, $expect_res:expr, $expect_data:expr
) => {{
let (res, o) = $wtr.$which($out);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_out, o, "output");
assert_eq!($expect_data, s(&$out[..o]), "data");
}}
}
#[test]
fn writer_one_field() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
}
#[test]
fn writer_one_empty_field_terminator() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_write!(wtr, finish, &mut out[..], 0, InputEmpty, "");
}
#[test]
fn writer_one_empty_field_finish() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, finish, &mut out[..], 2, InputEmpty, "\"\"");
}
#[test]
fn writer_many_one_empty_field_finish() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, finish, &mut out[..], 2, InputEmpty, "\"\"");
}
#[test]
fn writer_many_one_empty_field_terminator() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_field!(wtr, b(""), &mut out[..], 0, 0, InputEmpty, "");
assert_write!(wtr, terminator, &mut out[..], 3, InputEmpty, "\"\"\n");
assert_write!(wtr, finish, &mut out[..], 0, InputEmpty, "");
}
#[test]
fn writer_one_field_quote() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr, b("a\"bc"), &mut out[n..], 4, 6, InputEmpty, "\"a\"\"bc");
n += 6;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
}
#[test]
fn writer_one_field_stream() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_field!(wtr, b("x"), &mut out[n..], 1, 1, InputEmpty, "x");
n += 1;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
}
#[test]
fn writer_one_field_stream_quote() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr, b("abc\""), &mut out[n..], 4, 6, InputEmpty, "\"abc\"\"");
n += 6;
assert_field!(wtr, b("x"), &mut out[n..], 1, 1, InputEmpty, "x");
n += 1;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
}
#[test]
fn writer_one_field_stream_quote_partial() {
let mut wtr = Writer::new();
let mut out = &mut [0; 4];
assert_field!(wtr, b("ab\"xyz"), out, 2, 3, OutputFull, "\"ab");
assert_field!(wtr, b("\"xyz"), out, 3, 4, OutputFull, "\"\"xy");
assert_field!(wtr, b("z"), out, 1, 1, InputEmpty, "z");
assert_write!(wtr, finish, out, 1, InputEmpty, "\"");
}
#[test]
fn writer_two_fields() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("yz"), &mut out[n..], 2, 2, InputEmpty, "yz");
n += 2;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
assert_eq!("abc,yz", s(&out[..n]));
}
#[test]
fn writer_two_fields_non_numeric() {
let mut wtr = WriterBuilder::new()
.quote_style(QuoteStyle::NonNumeric)
.build();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 4, InputEmpty, "\"abc");
n += 4;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(wtr, b("5.2"), &mut out[n..], 3, 3, InputEmpty, "5.2");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("98"), &mut out[n..], 2, 2, InputEmpty, "98");
n += 2;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
assert_eq!("\"abc\",5.2,98", s(&out[..n]));
}
#[test]
fn writer_two_fields_quote() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr, b("a,bc"), &mut out[n..], 4, 5, InputEmpty, "\"a,bc");
n += 5;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(wtr, b("\nz"), &mut out[n..], 2, 3, InputEmpty, "\"\nz");
n += 3;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
n += 1;
assert_eq!("\"a,bc\",\"\nz\"", s(&out[..n]));
}
#[test]
fn writer_two_fields_two_records() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(wtr, b("abc"), &mut out[n..], 3, 3, InputEmpty, "abc");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("yz"), &mut out[n..], 2, 2, InputEmpty, "yz");
n += 2;
assert_write!(wtr, terminator, &mut out[n..], 1, InputEmpty, "\n");
n += 1;
assert_field!(wtr, b("foo"), &mut out[n..], 3, 3, InputEmpty, "foo");
n += 3;
assert_write!(wtr, delimiter, &mut out[n..], 1, InputEmpty, ",");
n += 1;
assert_field!(wtr, b("quux"), &mut out[n..], 4, 4, InputEmpty, "quux");
n += 4;
assert_write!(wtr, finish, &mut out[n..], 0, InputEmpty, "");
assert_eq!("abc,yz\nfoo,quux", s(&out[..n]));
}
#[test]
fn writer_two_fields_two_records_quote() {
let mut wtr = Writer::new();
let mut out = &mut [0; 1024];
let mut n = 0;
assert_field!(
wtr, b("a,bc"), &mut out[n..], 4, 5, InputEmpty, "\"a,bc");
n += 5;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(wtr, b("\nz"), &mut out[n..], 2, 3, InputEmpty, "\"\nz");
n += 3;
assert_write!(wtr, terminator, &mut out[n..], 2, InputEmpty, "\"\n");
n += 2;
assert_field!(
wtr, b("f\"oo"), &mut out[n..], 4, 6, InputEmpty, "\"f\"\"oo");
n += 6;
assert_write!(wtr, delimiter, &mut out[n..], 2, InputEmpty, "\",");
n += 2;
assert_field!(
wtr, b("quux,"), &mut out[n..], 5, 6, InputEmpty, "\"quux,");
n += 6;
assert_write!(wtr, finish, &mut out[n..], 1, InputEmpty, "\"");
n += 1;
assert_eq!("\"a,bc\",\"\nz\"\n\"f\"\"oo\",\"quux,\"", s(&out[..n]));
}
macro_rules! assert_quote {
(
$inp:expr, $out:expr,
$expect_in:expr, $expect_out:expr,
$expect_res:expr, $expect_data:expr
) => {
assert_quote!(
$inp, $out,
$expect_in, $expect_out, $expect_res, $expect_data,
true);
};
(
$inp:expr, $out:expr,
$expect_in:expr, $expect_out:expr,
$expect_res:expr, $expect_data:expr,
$double_quote:expr
) => {{
let (res, i, o) = quote($inp, $out, b'"', b'\\', $double_quote);
assert_eq!($expect_res, res, "result");
assert_eq!($expect_in, i, "input");
assert_eq!($expect_out, o, "output");
assert_eq!(b($expect_data), &$out[..o], "data");
}}
}
#[test]
fn quote_empty() {
let inp = b("");
let out = &mut [0; 1024];
assert_quote!(inp, out, 0, 0, InputEmpty, "");
}
#[test]
fn quote_no_quotes() {
let inp = b("foobar");
let out = &mut [0; 1024];
assert_quote!(inp, out, 6, 6, InputEmpty, "foobar");
}
#[test]
fn quote_one_quote() {
let inp = b("\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#);
}
#[test]
fn quote_two_quotes() {
let inp = b("\"\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 2, 4, InputEmpty, r#""""""#);
}
#[test]
fn quote_escaped_one() {
let inp = b("\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 1, 2, InputEmpty, r#"\""#, false);
}
#[test]
fn quote_escaped_two() {
let inp = b("\"\"");
let out = &mut [0; 1024];
assert_quote!(inp, out, 2, 4, InputEmpty, r#"\"\""#, false);
}
#[test]
fn quote_misc() {
let inp = b(r#"foo "bar" baz "quux"?"#);
let out = &mut [0; 1024];
assert_quote!(
inp, out, 21, 25, InputEmpty,
r#"foo ""bar"" baz ""quux""?"#);
}
#[test]
fn quote_stream_no_quotes() {
let mut inp = b("fooba");
let out = &mut [0; 2];
assert_quote!(inp, out, 2, 2, OutputFull, "fo");
inp = &inp[2..];
assert_quote!(inp, out, 2, 2, OutputFull, "ob");
inp = &inp[2..];
assert_quote!(inp, out, 1, 1, InputEmpty, "a");
}
#[test]
fn quote_stream_quotes() {
let mut inp = b(r#"a"bc"d""#);
let out = &mut [0; 2];
assert_quote!(inp, out, 1, 1, OutputFull, "a");
inp = &inp[1..];
assert_quote!(inp, out, 1, 2, OutputFull, r#""""#);
inp = &inp[1..];
assert_quote!(inp, out, 2, 2, OutputFull, "bc");
inp = &inp[2..];
assert_quote!(inp, out, 1, 2, OutputFull, r#""""#);
inp = &inp[1..];
assert_quote!(inp, out, 1, 1, OutputFull, "d");
inp = &inp[1..];
assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#);
}
}