1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
use std::ops::Not;

use crate::raw::RawRecord;
use crate::ByteRecordArena;

pub struct Writer {
    inner: csv_core::Writer,
    skip_header: bool,
    bytes_written: u64,
    records_written: u64,
}

impl Writer {
    pub fn new(skip_header: bool, delim: u8) -> Writer {
        Self {
            inner: csv_core::WriterBuilder::new().delimiter(delim).build(),
            skip_header,
            bytes_written: 0,
            records_written: 0,
        }
    }

    pub fn from_core(writer: csv_core::Writer, first_row_is_header: bool) -> Writer {
        Self {
            inner: writer,
            skip_header: first_row_is_header,
            bytes_written: 0,
            records_written: 0,
        }
    }

    pub fn records_written(&self) -> u64 {
        self.records_written
    }

    fn write_record(
        record: &RawRecord,
        writer: &mut csv_core::Writer,
        out_buffer: &mut [u8],
    ) -> usize {
        let mut record_bytes_out = 0;
        let field_count = record.field_count() - 1;
        for (i, field) in record.iter().enumerate() {
            let output = &mut out_buffer[record_bytes_out..];
            let (res, bytes_in, bytes_out) = writer.field(field, output);
            // We expect the output buffer to be prepared to have enough space
            debug_assert_eq!(res, csv_core::WriteResult::InputEmpty);
            debug_assert_eq!(bytes_in, field.len());
            record_bytes_out += bytes_out;

            let output = &mut out_buffer[record_bytes_out..];
            let (res, bytes_out) = if i < field_count {
                writer.delimiter(output)
            } else {
                writer.terminator(output)
            };
            // We expect the output buffer to be prepared to have enough space
            debug_assert_eq!(res, csv_core::WriteResult::InputEmpty);
            // 2 bytes if there's a end quote and delimiter,
            // 1 byte in case of delimiter only
            debug_assert!(bytes_out == 1 || bytes_out == 2);
            record_bytes_out += bytes_out;
        }
        record_bytes_out
    }


    pub fn dump_arena(&mut self, out_buffer: &mut Vec<u8>, arena_outer: &ByteRecordArena) {
        let arena = &arena_outer.inner;

        // considering CSV quoting, output size is 2 + (2 * field.len()) at maximum
        let fields_len = 2 + (2 * arena.field_data.len());
        let separators_len = arena.field_ends.len();
        let terminators_len = arena.record_ends.len();
        let max_output_len = fields_len + separators_len + terminators_len;
        out_buffer.clear();
        out_buffer.reserve(max_output_len);
        // This unsafe is okay, because
        // 1) the vec only has allocated memory, guaranteed by `reserve`
        // 2) We don't attempt to read the contents (that might be indeterminate bytes), only write.
        // 3) We set the length of the vector back to area what is certainly written into in the end.
        unsafe { out_buffer.set_len(max_output_len) };
        let mut total_bytes_out = 0;

        if let Some(headers) = &arena_outer.headers_inner {
            if self.skip_header.not() {
                let header_record = RawRecord {
                    field_data: headers.name_data.as_slice(),
                    field_ends: headers.name_ends.as_slice(),
                };
                total_bytes_out += Self::write_record(
                    &header_record,
                    &mut self.inner,
                    &mut out_buffer[total_bytes_out..],
                );
                self.skip_header = true;
            }
        }

        for record in arena.iter() {
            total_bytes_out +=
                Self::write_record(&record, &mut self.inner, &mut out_buffer[total_bytes_out..]);
        }
        self.bytes_written += out_buffer.len() as u64;
        self.records_written += arena_outer.record_count();
        // out_buffer is "safe" again now:
        out_buffer.truncate(total_bytes_out);
    }
}