1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
//! A pure Rust reader and writer for Stata's DTA file format.
//!
//! DTA is the binary format [Stata](https://www.stata.com/) uses to
//! persist datasets. This crate covers every released version of the
//! format (104 through 119), including XML-framed releases (117+),
//! tagged missing values, value-label sets, and long-string (`strL`)
//! storage.
//!
//! The API is built around a typestate chain — you walk through the
//! sections of a file in order, and each phase hands the underlying
//! I/O handle to the next. See the [README] for the full tour.
//!
//! [README]: https://github.com/almus-the-dan/dta/#readme
//!
//! # Reading a DTA file
//!
//! ```no_run
//! use dta::stata::dta::dta_reader::DtaReader;
//! use dta::stata::dta::dta_error::Result;
//!
//! # fn demo() -> Result<()> {
//! let mut characteristic_reader = DtaReader::new()
//! .from_path("example.dta")?
//! .read_header()?
//! .read_schema()?;
//!
//! // Characteristics are optional — skip them if you don't care.
//! characteristic_reader.skip_to_end()?;
//!
//! // Iterate observation rows.
//! let mut record_reader = characteristic_reader.into_record_reader()?;
//! let schema = record_reader.schema().clone();
//! while let Some(record) = record_reader.read_record()? {
//! for (variable, value) in schema.variables().iter().zip(record.values()) {
//! println!("{}: {:?}", variable.name(), value);
//! }
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Writing a DTA file
//!
//! ```no_run
//! use dta::stata::dta::byte_order::ByteOrder;
//! use dta::stata::dta::dta_error::Result;
//! use dta::stata::dta::dta_writer::DtaWriter;
//! use dta::stata::dta::header::Header;
//! use dta::stata::dta::release::Release;
//! use dta::stata::dta::schema::Schema;
//! use dta::stata::dta::value::Value;
//! use dta::stata::dta::variable::Variable;
//! use dta::stata::dta::variable_type::VariableType;
//! use dta::stata::stata_long::StataLong;
//!
//! # fn demo() -> Result<()> {
//! let header = Header::builder(Release::V118, ByteOrder::LittleEndian).build();
//! let schema = Schema::builder()
//! .add_variable(Variable::builder(VariableType::Long, "id").format("%12.0g"))
//! .build()?;
//!
//! let mut record_writer = DtaWriter::new()
//! .from_path("example.dta")?
//! .write_header(header)?
//! .write_schema(schema)?
//! .into_record_writer()?;
//! record_writer.write_record(&[Value::Long(StataLong::Present(1))])?;
//!
//! record_writer
//! .into_long_string_writer()?
//! .into_value_label_writer()?
//! .finish()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Round-trip (runnable)
//!
//! Both sides together against an in-memory buffer, so this example
//! actually executes in the test harness:
//!
//! ```
//! use std::io::Cursor;
//! use dta::stata::dta::byte_order::ByteOrder;
//! use dta::stata::dta::dta_error::Result;
//! use dta::stata::dta::dta_reader::DtaReader;
//! use dta::stata::dta::dta_writer::DtaWriter;
//! use dta::stata::dta::header::Header;
//! use dta::stata::dta::release::Release;
//! use dta::stata::dta::schema::Schema;
//! use dta::stata::dta::value::Value;
//! use dta::stata::dta::variable::Variable;
//! use dta::stata::dta::variable_type::VariableType;
//! use dta::stata::stata_long::StataLong;
//!
//! # fn demo() -> Result<()> {
//! let header = Header::builder(Release::V118, ByteOrder::LittleEndian).build();
//! let schema = Schema::builder()
//! .add_variable(Variable::builder(VariableType::Long, "id").format("%12.0g"))
//! .build()?;
//!
//! let mut record_writer = DtaWriter::new()
//! .from_writer(Cursor::new(Vec::<u8>::new()))
//! .write_header(header)?
//! .write_schema(schema)?
//! .into_record_writer()?;
//! record_writer.write_record(&[Value::Long(StataLong::Present(42))])?;
//! let bytes = record_writer
//! .into_long_string_writer()?
//! .into_value_label_writer()?
//! .finish()?
//! .into_inner();
//!
//! let mut characteristic_reader = DtaReader::new()
//! .from_reader(Cursor::new(bytes))
//! .read_header()?
//! .read_schema()?;
//! characteristic_reader.skip_to_end()?;
//! let mut record_reader = characteristic_reader.into_record_reader()?;
//! let record = record_reader.read_record()?.unwrap();
//! assert_eq!(record.values().len(), 1);
//! # Ok(())
//! # }
//! # demo().unwrap();
//! ```
//!
//! # Async
//!
//! Enable the `tokio` feature for async reader and writer mirrors —
//! same typestate chain, `.await` at each step. `DtaReader::from_tokio_*`
//! / `DtaWriter::from_tokio_*` are the entry points.
/// Stata file format types and utilities.