sawp_file/
format.rs

1//! Format Specification
2//!
3//! The format for serializing SAWP API Calls is a series of consecutive
4//! self-contained messages.
5//!
6//! The messages are of the following msgpack type where `N` is the total number
7//! messages ranging from two to infinity.
8//!
9//! | message | type  | description           |
10//! |---------|-------|-----------------------|
11//! | 1       | int   | version number        |
12//! | 2..N    | call  | call structure fields |
13//!
14//! Calls are stored in seperate messages to allow for a streaming format. Users
15//! _do not_ have to store the entire SAWP "file" into memory. Messages can be
16//! parsed asynchronously.
17//!
18//! This format is subject to change and other applications should not attempt
19//! to parse it. Use this library instead for encoding and decoding instead.
20
21use crate::error::{Error, ErrorKind, Result};
22use crate::Version;
23use std::io::{Read, Write};
24
25// Direction of a chunk of data or gap.
26#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Copy, Clone)]
27pub enum Direction {
28    Unknown,
29    ToServer,
30    ToClient,
31}
32
33/// A chunk of input data to parse.
34#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
35pub struct Data {
36    direction: Direction,
37    data: Vec<u8>,
38}
39
40/// Identifies a missing chunk of input data.
41#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
42pub struct Gap {
43    direction: Direction,
44    gap: usize,
45}
46
47/// A list of all API calls we want to expose.
48#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
49pub enum Call {
50    /// Parse the input data.
51    Parse(Data),
52    /// Identify a gap.
53    Gap(Gap),
54}
55
56/// Reads the expected format from a source.
57pub struct Reader<R: Read> {
58    inner: R,
59}
60
61impl<R: Read> Reader<R> {
62    /// Creates a new reader.
63    ///
64    /// This will fail if the version in the format doesn't match the current
65    /// version of this module.
66    pub fn new(inner: R) -> Result<Self> {
67        let mut reader = Reader { inner };
68        let expected_version = crate::version();
69        let actual_version: Version = rmp_serde::from_read(&mut reader.inner)?;
70        if expected_version != actual_version {
71            return Err(Error::new(ErrorKind::VersionMismatch((
72                expected_version,
73                actual_version,
74            ))));
75        }
76        Ok(reader)
77    }
78}
79
80impl<R: Read> std::iter::Iterator for Reader<R> {
81    type Item = Call;
82
83    fn next(&mut self) -> Option<Self::Item> {
84        rmp_serde::from_read(&mut self.inner).ok()
85    }
86}
87
88/// Writes serialized API calls to a sink.
89pub struct Writer<W: Write> {
90    inner: W,
91}
92
93impl<W: Write> Writer<W> {
94    /// Creates a writer.
95    pub fn new(inner: W) -> Result<Self> {
96        let mut writer = Writer { inner };
97        writer.version()?;
98        Ok(writer)
99    }
100
101    /// Writes the format version number.
102    fn version(&mut self) -> Result<()> {
103        let bytes = rmp_serde::to_vec(&crate::version())?;
104        self.inner.write_all(&bytes)?;
105        Ok(())
106    }
107
108    /// Writes the parse API call.
109    pub fn parse(&mut self, direction: Direction, data: &[u8]) -> Result<()> {
110        let call = Call::Parse(Data {
111            direction,
112            data: data.to_vec(),
113        });
114        let bytes = rmp_serde::to_vec(&call)?;
115        self.inner.write_all(&bytes)?;
116        Ok(())
117    }
118
119    /// Writes the gap API call.
120    pub fn gap(&mut self, direction: Direction, gap: usize) -> Result<()> {
121        let call = Call::Gap(Gap { direction, gap });
122        let bytes = rmp_serde::to_vec(&call)?;
123        self.inner.write_all(&bytes)?;
124        Ok(())
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn test_read_write() {
134        let data = b"GET /index.php HTTP/1.1\r\n\r\n";
135        let gap = 10;
136
137        let mut buffer = Vec::new();
138        let mut writer = Writer::new(&mut buffer).expect("failed to create writer");
139        writer.parse(Direction::ToServer, data).unwrap();
140        writer.gap(Direction::ToServer, gap).unwrap();
141
142        let buffer = std::io::Cursor::new(buffer);
143        let reader = Reader::new(buffer).expect("failed to create reader");
144        let result: Vec<Call> = reader.collect();
145        let expected: Vec<Call> = vec![
146            Call::Parse(Data {
147                direction: Direction::ToServer,
148                data: data.to_vec(),
149            }),
150            Call::Gap(Gap {
151                direction: Direction::ToServer,
152                gap,
153            }),
154        ];
155        assert_eq!(expected, result);
156    }
157
158    #[should_panic(expected = "VersionMismatch")]
159    #[test]
160    fn test_version_mismatch() {
161        // Test a version number that is off by one
162        let wrong_version = crate::version() + 1;
163        let bytes = rmp_serde::to_vec(&wrong_version).unwrap();
164        let buffer = std::io::Cursor::new(bytes);
165        let _ = Reader::new(buffer).unwrap();
166    }
167
168    #[test]
169    fn test_corrupt_bytes() {
170        let data = b"GET /index.php HTTP/1.1\r\n\r\n";
171        let gap = 10;
172
173        let mut buffer = Vec::new();
174        let mut writer = Writer::new(&mut buffer).expect("failed to create writer");
175        writer.parse(Direction::ToServer, data).unwrap();
176        writer.gap(Direction::ToServer, gap).unwrap();
177
178        // Process everything but the last byte.
179        let buffer = std::io::Cursor::new(&buffer[..buffer.len() - 1]);
180        let reader = Reader::new(buffer).expect("failed to create reader");
181
182        // Errors are ignored and the iterator will end prematurely
183        assert_eq!(reader.count(), 1);
184    }
185}