arff/
lib.rs

1// Copyright 2018 Martin Billinger
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! # ARFF
10//!
11//! An ARFF (Attribute-Relation File Format) file is an ASCII text file
12//! that describes a list of instances sharing a set of attributes. Its
13//! main use is in data science to store tabular data: each row is an
14//! instance and each column is an attribute. In addition it contains
15//! meta-data such as attribute (column) names, data types, and comments.
16//!
17//! ## Usage
18//! - ARFF is used as an input file format by the machine-learning tool Weka.
19//! - The [OpenML website](https://www.openml.org/) provides data sets in
20//!   ARFF and CSV formats.
21//!
22//! The ARFF crate utilizes the power of Serde to allow serialization and
23//! deserialization of certain Rust types. The file format is relatively
24//! simple, so not all rust types are supported. As a general rule of thumb,
25//! data needs to be represented as a sequence of rows, and a row can be
26//! either a `struct` with named columns or a sequence with static length.
27//!
28//! ## Example
29//!
30//! ```rust
31//! extern crate arff;
32//!
33//! #[macro_use]
34//! extern crate serde_derive;
35//!
36//! fn main() {
37//!     let input = "
38//! @RELATION Data
39//! @ATTRIBUTE a NUMERIC
40//! @ATTRIBUTE b NUMERIC
41//!
42//! @DATA
43//! 42, 9
44//! 7, 5";
45//!
46//!     #[derive(Debug, Deserialize)]
47//!     struct NamedRow {
48//!         b: i32,  // order of fields does not matter
49//!         a: i32,
50//!     }
51//!
52//!     let named_data: Vec<NamedRow> = arff::from_str(input).unwrap();
53//!     println!("{:?}", named_data);
54//!
55//!     let unnamed_data: Vec<[i32; 2]> = arff::from_str(input).unwrap();
56//!     println!("{:?}", unnamed_data);
57//! }
58//! ```
59
60extern crate num_traits;
61extern crate serde;
62
63#[cfg(test)]
64#[macro_use]
65extern crate serde_derive;
66
67mod de;
68pub mod dynamic;
69mod error;
70mod parser;
71mod ser;
72
73pub use de::{flat_from_str, from_str, Deserializer};
74pub use error::{Error, Result};
75pub use ser::{to_string, Serializer};
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    #[test]
82    fn roundtrip_1() {
83        #[derive(Debug, Serialize, Deserialize, PartialEq)]
84        struct Row {
85            a: i16,
86            b: f32,
87            c: String,
88        }
89
90        let orig = vec![
91            Row {
92                a: 0,
93                b: 0.0,
94                c: String::new(),
95            },
96            Row {
97                a: 1,
98                b: 2.0,
99                c: "123".to_owned(),
100            },
101            Row {
102                a: -1726,
103                b: 3.1415,
104                c: "pie".to_owned(),
105            },
106        ];
107
108        let arff = to_string(&orig).unwrap();
109        let deser: Vec<Row> = from_str(&arff).unwrap();
110
111        assert_eq!(deser, orig);
112    }
113
114    #[test]
115    fn roundtrip_2() {
116        #[derive(Debug, Serialize, Deserialize, PartialEq)]
117        struct Row {
118            a: i16,
119            b: f32,
120            c: String,
121        }
122
123        #[derive(Debug, Serialize, Deserialize, PartialEq)]
124        struct MyData(Vec<Row>);
125
126        let input = "@RELATION MyData
127
128@ATTRIBUTE a NUMERIC
129@ATTRIBUTE b NUMERIC
130@ATTRIBUTE c STRING
131
132@DATA
1330, 0, ''
1341, 2, '123'
135-1726, 3.1414999961853027, 'pie'
136";
137
138        let data: MyData = from_str(input).unwrap();
139        let output = to_string(&data).unwrap();
140
141        assert_eq!(input, output);
142    }
143
144    #[test]
145    fn roundtrip_3() {
146        #[derive(Debug, Serialize, Deserialize, PartialEq)]
147        enum Answer {
148            Yes,
149            No,
150            Maybe,
151            Dunno,
152        }
153
154        #[derive(Debug, Serialize, Deserialize, PartialEq)]
155        struct Row {
156            x: f32,
157            class: Answer,
158        }
159
160        let orig = vec![
161            Row {
162                x: -1.0,
163                class: Answer::No,
164            },
165            Row {
166                x: 0.0,
167                class: Answer::Maybe,
168            },
169            Row {
170                x: 1.0,
171                class: Answer::Yes,
172            },
173        ];
174
175        let arff = to_string(&orig).unwrap();
176        let deser: Vec<Row> = from_str(&arff).unwrap();
177
178        assert_eq!(deser, orig);
179    }
180
181    #[test]
182    fn roundtrip_4() {
183        type Row = [[i32; 2]; 2];
184
185        let orig = vec![[[1, 2], [3, 4]], [[1, 3], [2, 4]]];
186
187        let arff = to_string(&orig).unwrap();
188        let deser: Vec<Row> = from_str(&arff).unwrap();
189
190        assert_eq!(deser, orig);
191    }
192
193    #[test]
194    fn roundtrip_5() {
195        type Row = (i32, [u8; 2], i32);
196
197        let orig = vec![(1, [2, 3], 4), (5, [6, 7], 8)];
198
199        let arff = to_string(&orig).unwrap();
200        let deser: Vec<Row> = from_str(&arff).unwrap();
201
202        assert_eq!(deser, orig);
203    }
204
205    #[test]
206    fn type_ser_support_outer() {
207        type Row = [i32; 1];
208
209        let d_tuple: (Row, Row) = ([1], [2]);
210        let d_array: [Row; 2] = [[1], [2]];
211        let d_vec: Vec<Row> = d_array.to_vec();
212        let d_slice: &[Row] = d_array.as_ref();
213
214        assert_eq!(
215            to_string(&d_tuple).unwrap(),
216            format!(
217                "@RELATION {}\n\n@ATTRIBUTE col1 NUMERIC\n\n@DATA\n1\n2\n",
218                "unnamed_data"
219            )
220        );
221        assert_eq!(
222            to_string(&d_array).unwrap(),
223            format!(
224                "@RELATION {}\n\n@ATTRIBUTE col1 NUMERIC\n\n@DATA\n1\n2\n",
225                "unnamed_data"
226            )
227        );
228        assert_eq!(
229            to_string(&d_vec).unwrap(),
230            format!(
231                "@RELATION {}\n\n@ATTRIBUTE col1 NUMERIC\n\n@DATA\n1\n2\n",
232                "unnamed_data"
233            )
234        );
235        assert_eq!(
236            to_string(&d_slice).unwrap(),
237            format!(
238                "@RELATION {}\n\n@ATTRIBUTE col1 NUMERIC\n\n@DATA\n1\n2\n",
239                "unnamed_data"
240            )
241        );
242
243        #[derive(Serialize, Deserialize)]
244        struct NewtypeStruct(Vec<Row>);
245        let d_newtype_struct = NewtypeStruct(vec![[1], [2]]);
246        assert_eq!(
247            to_string(&d_newtype_struct).unwrap(),
248            format!(
249                "@RELATION {}\n\n@ATTRIBUTE col1 NUMERIC\n\n@DATA\n1\n2\n",
250                "NewtypeStruct"
251            )
252        );
253
254        #[derive(Serialize, Deserialize)]
255        struct TupleStruct(Row, Row);
256        let d_tuple_struct = TupleStruct([1], [2]);
257        assert_eq!(
258            to_string(&d_tuple_struct).unwrap(),
259            format!(
260                "@RELATION {}\n\n@ATTRIBUTE col1 NUMERIC\n\n@DATA\n1\n2\n",
261                "TupleStruct"
262            )
263        );
264    }
265
266    #[test]
267    fn type_ser_support_inner() {
268        #[derive(Serialize)]
269        struct StructRow {
270            x: f64,
271            y: i32,
272        };
273
274        let d_struct = [StructRow { x: 1.1, y: 2 }];
275        let d_tuple: [(f64, i32); 1] = [(1.1, 2)];
276        let d_array: [[f64; 2]; 1] = [[1.1, 2.0]];
277
278        assert_eq!(to_string(&d_struct).unwrap(), "@RELATION unnamed_data\n\n@ATTRIBUTE x NUMERIC\n@ATTRIBUTE y NUMERIC\n\n@DATA\n1.1, 2\n");
279        assert_eq!(to_string(&d_tuple).unwrap(), "@RELATION unnamed_data\n\n@ATTRIBUTE col1 NUMERIC\n@ATTRIBUTE col2 NUMERIC\n\n@DATA\n1.1, 2\n");
280        assert_eq!(to_string(&d_array).unwrap(), "@RELATION unnamed_data\n\n@ATTRIBUTE col1 NUMERIC\n@ATTRIBUTE col2 NUMERIC\n\n@DATA\n1.1, 2\n");
281    }
282}