1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
//! A pure Rust reader and writer for Stata data formats.
//!
//! Two related formats live in this crate:
//!
//! - **DTA** — Stata's binary dataset format ([`stata::dta`]). Every
//! released version is supported (102 through 119), including
//! XML-framed releases (117+), tagged missing values, value-label
//! sets, and long-string (`strL`) storage. The API is built around
//! a typestate chain — you walk through the sections of a file in
//! order, and each phase hands the underlying I/O handle to the
//! next.
//! - **DCT** — Stata's plain-text dictionary format ([`stata::dct`]).
//! Describes the schema of a fixed-width or free-format data file.
//! The reader is a two-step builder: parse the dictionary, then
//! pair the resulting schema with a data source.
//!
//! Format-agnostic Stata-domain types — `MissingValue`,
//! `StataByte`/`Int`/`Long`/`Float`/`Double`, `StataTimestamp`, the
//! temporal helpers — live at [`stata`] and are shared between the
//! two formats.
//!
//! See the [README] for the full tour, including DCT examples.
//!
//! [README]: https://github.com/almus-the-dan/dta/#readme
//!
//! # Reading a DTA file
//!
//! ```no_run
//! use dta::stata::dta::dta_reader::DtaReader;
//! use dta::stata::dta::dta_error::Result;
//!
//! # fn demo() -> Result<()> {
//! let mut characteristic_reader = DtaReader::new()
//! .from_path("example.dta")?
//! .read_header()?
//! .read_schema()?;
//!
//! // Characteristics are optional — skip them if you don't care.
//! characteristic_reader.skip_to_end()?;
//!
//! // Iterate observation rows.
//! let mut record_reader = characteristic_reader.into_record_reader()?;
//! let schema = record_reader.schema().clone();
//! while let Some(record) = record_reader.read_record()? {
//! for (variable, value) in schema.variables().iter().zip(record.values()) {
//! println!("{}: {:?}", variable.name(), value);
//! }
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Writing a DTA file
//!
//! ```no_run
//! use dta::stata::dta::byte_order::ByteOrder;
//! use dta::stata::dta::dta_error::Result;
//! use dta::stata::dta::dta_writer::DtaWriter;
//! use dta::stata::dta::header::Header;
//! use dta::stata::dta::release::Release;
//! use dta::stata::dta::schema::Schema;
//! use dta::stata::dta::value::Value;
//! use dta::stata::dta::variable::Variable;
//! use dta::stata::dta::variable_type::VariableType;
//! use dta::stata::stata_long::StataLong;
//!
//! # fn demo() -> Result<()> {
//! let header = Header::builder(Release::V118, ByteOrder::LittleEndian).build();
//! let schema = Schema::builder()
//! .add_variable(Variable::builder(VariableType::Long, "id").format("%12.0g"))
//! .build()?;
//!
//! let mut record_writer = DtaWriter::new()
//! .from_path("example.dta")?
//! .write_header(header)?
//! .write_schema(schema)?
//! .into_record_writer()?;
//! record_writer.write_record(&[Value::Long(StataLong::Present(1))])?;
//!
//! record_writer
//! .into_long_string_writer()?
//! .into_value_label_writer()?
//! .finish()?;
//! # Ok(())
//! # }
//! ```
//!
//! # Round-trip (runnable)
//!
//! Both sides together against an in-memory buffer, so this example
//! actually executes in the test harness:
//!
//! ```
//! use std::io::Cursor;
//! use dta::stata::dta::byte_order::ByteOrder;
//! use dta::stata::dta::dta_error::Result;
//! use dta::stata::dta::dta_reader::DtaReader;
//! use dta::stata::dta::dta_writer::DtaWriter;
//! use dta::stata::dta::header::Header;
//! use dta::stata::dta::release::Release;
//! use dta::stata::dta::schema::Schema;
//! use dta::stata::dta::value::Value;
//! use dta::stata::dta::variable::Variable;
//! use dta::stata::dta::variable_type::VariableType;
//! use dta::stata::stata_long::StataLong;
//!
//! # fn demo() -> Result<()> {
//! let header = Header::builder(Release::V118, ByteOrder::LittleEndian).build();
//! let schema = Schema::builder()
//! .add_variable(Variable::builder(VariableType::Long, "id").format("%12.0g"))
//! .build()?;
//!
//! let mut record_writer = DtaWriter::new()
//! .from_writer(Cursor::new(Vec::<u8>::new()))
//! .write_header(header)?
//! .write_schema(schema)?
//! .into_record_writer()?;
//! record_writer.write_record(&[Value::Long(StataLong::Present(42))])?;
//! let bytes = record_writer
//! .into_long_string_writer()?
//! .into_value_label_writer()?
//! .finish()?
//! .into_inner();
//!
//! let mut characteristic_reader = DtaReader::new()
//! .from_reader(Cursor::new(bytes))
//! .read_header()?
//! .read_schema()?;
//! characteristic_reader.skip_to_end()?;
//! let mut record_reader = characteristic_reader.into_record_reader()?;
//! let record = record_reader.read_record()?.unwrap();
//! assert_eq!(record.values().len(), 1);
//! # Ok(())
//! # }
//! # demo().unwrap();
//! ```
//!
//! # Reading a DCT dictionary + data file
//!
//! ```no_run
//! use dta::stata::dct::dct_reader::DctReader;
//! use dta::stata::dct::dct_source::DctSource;
//! use dta::stata::dct::dct_error::Result;
//!
//! # fn demo() -> Result<()> {
//! let source = DctSource::options().from_path("schema.dct")?;
//! let mut reader = match source {
//! DctSource::External(schema) => {
//! DctReader::options(schema).from_path("data.dat")?
//! }
//! DctSource::Embedded { schema, reader } => {
//! DctReader::options(schema).from_reader(reader)
//! }
//! };
//!
//! // Capture column names up front: the lending pattern means
//! // `record` borrows the reader exclusively, so `reader.schema()`
//! // can't be called inside the loop body.
//! let column_names: Vec<String> = reader
//! .schema()
//! .columns()
//! .iter()
//! .map(|c| c.name().to_string())
//! .collect();
//!
//! while let Some(record) = reader.read_record()? {
//! for (name, value) in column_names.iter().zip(record.values()) {
//! println!("{}: {:?}", name, value);
//! }
//! }
//! # Ok(())
//! # }
//! ```
//!
//! # Async
//!
//! Enable the `tokio` feature for async mirrors of every entry point.
//! Same typestate chain, `.await` at each step:
//!
//! - DTA: `DtaReader::from_tokio_*` / `DtaWriter::from_tokio_*`
//! - DCT: `DctSource::options().from_tokio_*` and
//! `DctReader::options(schema).from_tokio_*`
//!
//! The async DCT paths share the same pure parsing state with the
//! sync paths — the only difference is `.await` on `read_line` and
//! `fill_buf`.
/// Stata file format types and utilities.