1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
use std::io;

use error::Result;
use token::Sentence;

/// A trait for objects that can write CoNLL-X `Sentence`s.
pub trait WriteSentence {
    /// Write a `Sentence` into this object.
    ///
    /// # Errors
    ///
    /// A call to `write_sentence` may generate an error to indicate that
    /// the operation could not be completed.
    fn write_sentence(&mut self, sentence: &Sentence) -> Result<()>;
}

/// A writer for CoNLL-X sentences.
///
/// This writer will write sentences to the embedded writer in CoNLL-X
/// tabular format.
pub struct Writer<W> {
    write: W,
    first: bool,
}

impl<W: io::Write> Writer<W> {
    /// Construct a new writer from an object that implements the `io::Write`
    /// trait.
    pub fn new(write: W) -> Writer<W> {
        Writer {
            write: write,
            first: true,
        }
    }

    /// Borrow the embedded writer. Getting the underlying writer is often
    /// useful when the writer writes to a memory object.
    ///
    /// # Examples
    ///
    /// ```
    /// use conllx::{Sentence, Token, WriteSentence, Writer};
    /// use std::str;
    ///
    /// let output = Vec::new();
    /// let mut writer = Writer::new(output);
    /// let sent = Sentence::new(vec![
    ///   Token::new("hello"),
    ///   Token::new("world"),
    /// ]);
    ///
    /// writer.write_sentence(&sent).unwrap();
    ///
    /// println!("Output:\n{}", str::from_utf8(writer.get_ref()).unwrap());
    /// ```
    pub fn get_ref(&self) -> &W {
        &self.write
    }
}

impl<W: io::Write> WriteSentence for Writer<W> {
    fn write_sentence(&mut self, sentence: &Sentence) -> Result<()> {
        if self.first {
            self.first = false;
            write!(self.write, "{}", sentence)?
        } else {
            write!(self.write, "\n\n{}", sentence)?
        }

        Ok(())
    }
}

/// A writer for CoNLL-X sentences that partitions incoming objects
/// among multiple writers.
///
/// For example, suppose that a `PartitioningWriter` is wraps writers
/// *w1*, *w2*, and sentences *s[1-5]* are written. The sentences are then
/// written as follows:
///
/// * s1 -> w1
/// * s2 -> w2
/// * s3 -> w1
/// * s4 -> w2
/// * s5 -> w1
pub struct PartitioningWriter<W>
where
    W: WriteSentence,
{
    writers: Vec<W>,
    fold: usize,
}

impl<W> PartitioningWriter<W>
where
    W: WriteSentence,
{
    pub fn new(writers: Vec<W>) -> PartitioningWriter<W> {
        PartitioningWriter {
            writers: writers,
            fold: 0,
        }
    }
}

impl<W> WriteSentence for PartitioningWriter<W>
where
    W: WriteSentence,
{
    fn write_sentence(&mut self, sentence: &Sentence) -> Result<()> {
        if self.fold == self.writers.len() {
            self.fold = 0
        }

        self.writers[self.fold].write_sentence(sentence)?;
        self.fold += 1;

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use std::fs::File;
    use std::io;
    use std::io::Read;
    use std::str;

    use super::{WriteSentence, Writer};
    use tests::TEST_SENTENCES;

    static EMPTY: &str = "testdata/empty.conll";

    fn read_file(filename: &str) -> io::Result<String> {
        let mut f = File::open(filename)?;
        let mut contents = String::new();
        f.read_to_string(&mut contents)?;
        Ok(contents)
    }


    #[test]
    fn writer() {
        let output = Vec::new();
        let mut writer = Writer::new(Box::new(output));

        for sentence in &*TEST_SENTENCES {
            writer.write_sentence(&sentence).unwrap();
        }

        assert_eq!(
            read_file(EMPTY).unwrap(),
            str::from_utf8(writer.get_ref()).unwrap()
        );
    }
}