1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#[cfg(test)]
extern crate memstream;

use std::fmt;
use std::io::prelude::*;
use std::io::{Error, ErrorKind, Result};

#[cfg(windows)]
pub const LINEBREAK: &str = "\r\n";
#[cfg(not(windows))]
pub const LINEBREAK: &str = "\n";

/// A intermediate layer that converts every recognizable linebreak, `LF` or
/// `CRLF`, into provided the new linebreak.
/// Warning: You should never write binary data directly to
/// `LinebreakConvertWriter`, because all your bytes of the same value as `LF`
/// may be converted to some thing else.
#[derive(Debug)]
pub struct LinebreakConvertWriter<W: Write> {
    linebreak: &'static [u8],
    inner: W,
}

impl<W: Write> LinebreakConvertWriter<W> {
    /// Create a new `LinebreakConvertWriter` with default linebreak, LF on
    /// UNIX-like platforms or CRLF on Windows.
    #[allow(dead_code)]
    fn new(inner: W) -> LinebreakConvertWriter<W> {
        LinebreakConvertWriter {
            linebreak: LINEBREAK.as_bytes(),
            inner: inner,
        }
    }

    /// Set a new linebreak to replace '\n' with. The new linebreak has to be
    /// known at compile time.
    #[allow(dead_code)]
    fn set_linebreak(&mut self, linebreak: &'static str) {
        self.linebreak = linebreak.as_bytes();
    }

    fn convert_linebreak(&self, buf: &[u8]) -> Vec<u8> {
        /// Remove trailing CRs.
        fn remove_cr<'a>(buf: &'a [u8]) -> &'a [u8] {
            if let Some((last, other)) = buf.split_last() {
                // CR present, ignore it.
                if *last == 0x0D { print!("{:?}",std::str::from_utf8(other).unwrap());other } else { buf }
            } else {
                buf
            }
        }

        // Separate the incoming string by LF.
        let mut breaked_parts = buf.split(|byte| *byte == 0x0A)
                                   .collect::<Vec<_>>();
        // There is at least one element in it.
        let last = breaked_parts.pop().unwrap();
        // Minimize reallocation.
        let mut converted = Vec::<u8>::with_capacity(buf.len());

        for part in breaked_parts {
            let clean_line = remove_cr(part);
            // Remove trailing CRs that can be considered to be a part of CRLF.
            converted.extend(clean_line);
            // Insert given linebreak.
            converted.extend(self.linebreak);
        }
        // Extend the slice.
        converted.extend(last);
        // Output to inner `Write`r.
        converted
    }
}

impl<W: Write> Write for LinebreakConvertWriter<W> {
    fn write(&mut self, buf: &[u8]) -> Result<usize> {
        self.inner.write(buf)
    }

    // Adapted from `std` source.
    fn write_fmt(&mut self, fmt: fmt::Arguments) -> Result<()> {
        struct Adaptor<'a, T: Write + 'a> {
            inner: &'a mut LinebreakConvertWriter<T>,
            error: Result<()>,
        }

        impl<'a, T: Write> fmt::Write for Adaptor<'a, T> {
            fn write_str(&mut self, s: &str) -> fmt::Result {
                // 1. It will be slow to use `std::str::Chars`.
                // 2. CR and LF are both ANSII chars, i.e., they are
                //    represented by a single byte. If UTF-8 complement bytes
                //    contains these two values, it won't pass the parsing
                //    stage. So it's safe to check raw bytes.
                let converted = self.inner.convert_linebreak(s.as_bytes());
                match self.inner.write_all(&converted) {
                    Ok(()) => Ok(()),
                    Err(e) => {
                        self.error = Err(e);
                        Err(fmt::Error)
                    }
                }
            }
        }

        let mut output = Adaptor { inner: self, error: Ok(()) };
        match fmt::write(&mut output, fmt) {
            Ok(()) => Ok(()),
            Err(..) => {
                // check if the error came from the underlying `Write` or not
                if output.error.is_err() {
                    output.error
                } else {
                    Err(Error::new(ErrorKind::Other, "formatter error"))
                }
            }
        }
    }

    fn flush(&mut self) -> Result<()> {
        self.inner.flush()
    }
}

#[cfg(test)]
mod tests {
    macro_rules! test_direct_write {
        ($src: expr, $dst: expr) => {
            let mut lcw = make_test_lcw();
            write!(lcw, $src).unwrap();
            let mut result = String::new();
            lcw.inner.read_to_string(&mut result).unwrap();
            assert_eq!(result, $dst);
        };
    }

    use std::io::prelude::*;
    use super::memstream::MemStream;
    use super::LinebreakConvertWriter;

    fn make_test_lcw() -> LinebreakConvertWriter<MemStream> {
        let mut lcw = LinebreakConvertWriter::new(MemStream::new());
        lcw.set_linebreak("|");
        lcw
    }

    #[test]
    fn empty() { test_direct_write!("", ""); }
    #[test]
    fn cr() { test_direct_write!("\r", "\r"); }
    #[test]
    fn lf() { test_direct_write!("\n", "|"); }
    #[test]
    fn crlf() { test_direct_write!("\r\n", "|"); }
    #[test]
    fn separated_cr_and_lf() { test_direct_write!("\r \n", "\r |"); }
    #[test]
    fn double_cr() { test_direct_write!("\r\r\n", "\r|"); }
    #[test]
    fn mixed() { test_direct_write!("1\n2\r3\r\n", "1|2\r3|"); }
}