use std::fs::{self, File};
use std::path::{Path, PathBuf};
use dta::stata::dta::dta_reader::DtaReader;
use dta::stata::dta::long_string_table::LongStringTable;
use dta::stata::dta::value::Value;
#[test]
#[ignore = "Using local files that require a license"]
fn read_auto_dta_section_counts() {
let fixture_dir = Path::new("/mnt/c/Publish/pandas-stata-fixtures");
let mut paths: Vec<PathBuf> = fs::read_dir(fixture_dir)
.expect("failed to read fixture directory")
.filter_map(Result::ok)
.map(|entry| entry.path())
.filter(|path| path.extension().and_then(|e| e.to_str()) == Some("dta"))
.filter(|path| {
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or_default();
name != "stata1_encoding_118.dta"
})
.collect();
paths.sort();
for path in &paths {
read_dta_section_counts(path);
}
}
fn read_dta_section_counts(path: &Path) {
eprintln!("File: {}", path.to_string_lossy());
let file =
File::open(path).unwrap_or_else(|e| panic!("failed to open {}: {e}", path.display()));
let header_reader = DtaReader::default().from_file(file);
let schema_reader = header_reader.read_header().expect("failed to read header");
let header = schema_reader.header();
eprintln!("Variable count: {}", header.variable_count());
eprintln!("Observation count: {}", header.observation_count());
let mut characteristic_reader = schema_reader.read_schema().expect("failed to read schema");
let schema = characteristic_reader.schema();
eprintln!("Actual variable count: {}", schema.variables().len());
eprintln!("Sort order count: {}", schema.sort_order().len());
let mut characteristic_count = 0;
while let Some(_characteristic) = characteristic_reader
.read_characteristic()
.expect("failed to read characteristic")
{
characteristic_count += 1;
}
eprintln!("Characteristic count: {characteristic_count}");
let mut long_string_reader = characteristic_reader
.seek_long_strings()
.expect("failed to jump to long string reader");
let mut long_string_table = LongStringTable::for_reading();
long_string_reader
.read_remaining_into(&mut long_string_table)
.expect("Could not read long string table");
let mut record_reader = long_string_reader
.seek_records()
.expect("failed to jump to records");
let mut record_count = 0u64;
let encoding = record_reader.encoding();
while let Some(record) = record_reader.read_record().expect("failed to read record") {
let mut value_strings = Vec::with_capacity(record.values().len());
for value in record.values() {
let value_str = match &value {
Value::Byte(b) => b.present().map_or("NA".to_string(), |b| b.to_string()),
Value::Int(i) => i.present().map_or("NA".to_string(), |b| b.to_string()),
Value::Long(l) => l.present().map_or("NA".to_string(), |b| b.to_string()),
Value::Float(f) => f.present().map_or("NA".to_string(), |b| format!("{b:0.4}")),
Value::Double(d) => d.present().map_or("NA".to_string(), |b| format!("{b:0.4}")),
Value::String(d) => d.to_string(),
Value::LongStringRef(r) => long_string_table
.get(r)
.and_then(|s| s.data_str(encoding).map(|s| s.to_string()))
.unwrap_or("NA".to_string()),
};
value_strings.push(value_str);
}
let joined = value_strings.join(" | ");
eprintln!("{joined}");
record_count += 1;
}
eprintln!("Actual observation count: {record_count}");
let mut long_string_reader = record_reader
.into_long_string_reader()
.expect("failed to transition to long string reader");
let mut long_string_count = 0;
while let Some(_long_string) = long_string_reader
.read_long_string()
.expect("failed to read long string")
{
long_string_count += 1;
}
eprintln!("Long string count: {long_string_count}");
let mut value_label_reader = long_string_reader
.into_value_label_reader()
.expect("failed to transition to value label reader");
let mut value_label_set_count = 0;
while let Some(_value_label_set) = value_label_reader
.read_value_label_set()
.expect("failed to read value label set")
{
value_label_set_count += 1;
}
eprintln!("Value label count: {value_label_set_count}");
}