use std::borrow::Cow;
use std::io::Read;
use base64::engine::general_purpose::STANDARD as BASE64;
use base64::Engine as _;
use quick_xml::events::{BytesStart, Event};
use quick_xml::Reader;
use crate::document::{
Array, ArrayEncoding, Child, Column, Dim, Document, GenericElement, LigoLwElement, Param,
Table, Time,
};
use crate::error::{Error, Result};
use crate::stream::parse_stream;
use crate::types::LigoType;
pub fn parse_bytes(bytes: &[u8]) -> Result<Document> {
let mut reader = Reader::from_reader(bytes);
parse_from_reader(&mut reader)
}
pub fn parse_reader<R: Read>(mut r: R) -> Result<Document> {
let mut buf = Vec::new();
r.read_to_end(&mut buf)
.map_err(|e| Error::Io(e.to_string()))?;
parse_bytes(&buf)
}
pub fn parse_path(path: impl AsRef<std::path::Path>) -> Result<Document> {
let path = path.as_ref();
let file = std::fs::File::open(path).map_err(|e| Error::Io(e.to_string()))?;
match path.extension().and_then(|e| e.to_str()) {
Some("gz") => {
let r = flate2::read::GzDecoder::new(file);
parse_reader(r)
}
#[cfg(feature = "bzip2")]
Some("bz2") => {
let r = bzip2::read::BzDecoder::new(file);
parse_reader(r)
}
#[cfg(feature = "xz")]
Some("xz") => {
let r = xz2::read::XzDecoder::new(file);
parse_reader(r)
}
_ => parse_reader(file),
}
}
fn parse_from_reader<B: std::io::BufRead>(reader: &mut Reader<B>) -> Result<Document> {
let config = reader.config_mut();
config.trim_text(false);
config.expand_empty_elements = true;
let mut stack: Vec<OpenFrame> = Vec::new();
let mut closed_root: Option<LigoLwElement> = None;
let mut cur_table: Option<TableInProgress> = None;
let mut cur_param: Option<ParamInProgress> = None;
let mut cur_array: Option<ArrayInProgress> = None;
let mut cur_time: Option<TimeInProgress> = None;
let mut cur_dim: Option<DimInProgress> = None;
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf)? {
Event::Eof => break,
Event::Start(ref e) => match e.local_name().as_ref() {
b"LIGO_LW" => {
let name = attr(e, "Name")?.map(|c| c.into_owned());
let element_type = attr(e, "Type")?.map(|c| c.into_owned());
stack.push(OpenFrame::LigoLw(LigoLwElement {
name,
element_type,
children: Vec::new(),
}));
}
b"Table" => {
let (name, _ty) = read_name_type(e)?;
cur_table = Some(TableInProgress::new(strip_table_suffix(&name)));
}
b"Column" => {
if let Some(t) = cur_table.as_mut() {
let (full_name, ty_str) = read_name_type(e)?;
let ty = LigoType::parse(&ty_str)?;
t.columns.push(Column {
name: strip_any_prefix(&full_name),
ty,
});
}
}
b"Stream" => {
if let Some(t) = cur_table.as_mut() {
t.delimiter = read_delimiter(e);
t.collecting = true;
} else if let Some(a) = cur_array.as_mut() {
a.delimiter = read_delimiter(e);
a.encoding = attr(e, "Encoding")?
.map(|c| ArrayEncoding::parse(&c))
.unwrap_or(ArrayEncoding::Text);
a.collecting = true;
}
}
b"Param" => {
let (name, ty_str) = read_name_type(e)?;
let ty = LigoType::parse(&ty_str).unwrap_or(LigoType::Str);
let unit = attr(e, "Unit")?.map(|c| c.into_owned());
cur_param = Some(ParamInProgress {
name: strip_param_suffix(&name),
ty,
unit,
raw: String::new(),
});
}
b"Time" => {
let name = attr(e, "Name")?.map(|c| c.into_owned());
let time_type = attr(e, "Type")?
.map(|c| c.into_owned())
.unwrap_or_else(|| "GPS".to_string());
cur_time = Some(TimeInProgress {
name,
time_type,
value: String::new(),
});
}
b"Array" => {
let (name_opt, ty_str) = read_optional_name_type(e)?;
let ty = LigoType::parse(&ty_str)?;
let unit = attr(e, "Unit")?.map(|c| c.into_owned());
cur_array = Some(ArrayInProgress::new(name_opt, ty, unit));
}
b"Dim" => {
if cur_array.is_some() {
let name = attr(e, "Name")?.map(|c| c.into_owned());
let scale = attr(e, "Scale")?.and_then(|s| s.parse::<f64>().ok());
let start = attr(e, "Start")?.and_then(|s| s.parse::<f64>().ok());
let unit = attr(e, "Unit")?.map(|c| c.into_owned());
cur_dim = Some(DimInProgress {
name,
size_text: String::new(),
scale,
start,
unit,
});
}
}
other_tag => {
let tag = std::str::from_utf8(other_tag)?.to_string();
let mut name = None;
let mut element_type = None;
let mut attributes = Vec::new();
for a in e.attributes() {
let a = a?;
let k = std::str::from_utf8(a.key.as_ref())?.to_string();
let v = a.unescape_value()?.into_owned();
if k.eq_ignore_ascii_case("Name") {
name = Some(v);
} else if k.eq_ignore_ascii_case("Type") {
element_type = Some(v);
} else {
attributes.push((k, v));
}
}
stack.push(OpenFrame::Generic(GenericElement {
tag,
name,
element_type,
attributes,
children: Vec::new(),
text: String::new(),
}));
}
},
Event::Text(t) => {
let text = t.unescape()?;
if let Some(table) = cur_table.as_mut() {
if table.collecting {
table.stream_text.push_str(&text);
}
}
if let Some(p) = cur_param.as_mut() {
p.raw.push_str(&text);
}
if let Some(t) = cur_time.as_mut() {
t.value.push_str(&text);
}
if let Some(a) = cur_array.as_mut() {
if a.collecting {
a.stream_text.push_str(&text);
}
}
if let Some(d) = cur_dim.as_mut() {
d.size_text.push_str(&text);
}
if let Some(OpenFrame::Generic(g)) = stack.last_mut() {
g.text.push_str(&text);
}
}
Event::CData(t) => {
let text = std::str::from_utf8(t.as_ref())?.to_string();
if let Some(table) = cur_table.as_mut() {
if table.collecting {
table.stream_text.push_str(&text);
}
}
if let Some(p) = cur_param.as_mut() {
p.raw.push_str(&text);
}
if let Some(t) = cur_time.as_mut() {
t.value.push_str(&text);
}
if let Some(a) = cur_array.as_mut() {
if a.collecting {
a.stream_text.push_str(&text);
}
}
if let Some(OpenFrame::Generic(g)) = stack.last_mut() {
g.text.push_str(&text);
}
}
Event::End(ref e) => match e.local_name().as_ref() {
b"Stream" => {
if let Some(t) = cur_table.as_mut() {
t.collecting = false;
} else if let Some(a) = cur_array.as_mut() {
a.collecting = false;
}
}
b"Dim" => {
if let (Some(dip), Some(arr)) = (cur_dim.take(), cur_array.as_mut()) {
let size = dip.size_text.trim().parse::<usize>().map_err(|source| {
Error::BadDim {
literal: dip.size_text.clone(),
source,
}
})?;
arr.dims.push(Dim {
name: dip.name,
size,
scale: dip.scale,
start: dip.start,
unit: dip.unit,
});
}
}
b"Table" => {
if let Some(t) = cur_table.take() {
let table = t.finalize()?;
push_child(&mut stack, Child::Table(table));
}
}
b"Param" => {
if let Some(p) = cur_param.take() {
push_child(
&mut stack,
Child::Param(Param {
name: p.name,
ty: p.ty,
unit: p.unit,
raw: p.raw.trim().to_string(),
}),
);
}
}
b"Time" => {
if let Some(t) = cur_time.take() {
push_child(
&mut stack,
Child::Time(Time {
name: t.name,
time_type: t.time_type,
value: t.value.trim().to_string(),
}),
);
}
}
b"Array" => {
if let Some(a) = cur_array.take() {
push_child(&mut stack, Child::Array(a.finalize()?));
}
}
b"LIGO_LW" => {
if let Some(OpenFrame::LigoLw(frame)) = stack.pop() {
if stack.is_empty() {
closed_root = Some(frame);
} else {
push_child(&mut stack, Child::LigoLw(frame));
}
}
}
other_end => {
let end_tag = std::str::from_utf8(other_end)?;
let should_pop = match stack.last() {
Some(OpenFrame::Generic(g)) => g.tag == end_tag,
_ => false,
};
if should_pop {
if let Some(OpenFrame::Generic(g)) = stack.pop() {
push_child(&mut stack, Child::Other(g));
}
}
}
},
Event::Comment(c) => {
let text = std::str::from_utf8(c.as_ref())?.to_string();
push_child(&mut stack, Child::Comment(text));
}
_ => {}
}
buf.clear();
}
let root = closed_root.ok_or(Error::NoRoot)?;
Ok(Document::from_root(root))
}
enum OpenFrame {
LigoLw(LigoLwElement),
Generic(GenericElement),
}
fn push_child(stack: &mut [OpenFrame], child: Child) {
if let Some(top) = stack.last_mut() {
match top {
OpenFrame::LigoLw(e) => e.children.push(child),
OpenFrame::Generic(g) => g.children.push(child),
}
}
}
struct TableInProgress {
name: String,
columns: Vec<Column>,
delimiter: char,
stream_text: String,
collecting: bool,
}
impl TableInProgress {
fn new(name: String) -> Self {
Self {
name,
columns: Vec::new(),
delimiter: ',',
stream_text: String::new(),
collecting: false,
}
}
fn finalize(self) -> Result<Table> {
let rows = parse_stream(&self.name, self.delimiter, &self.columns, &self.stream_text)?;
Ok(Table {
name: self.name,
delimiter: self.delimiter,
columns: self.columns,
rows,
})
}
}
struct ParamInProgress {
name: String,
ty: LigoType,
unit: Option<String>,
raw: String,
}
struct TimeInProgress {
name: Option<String>,
time_type: String,
value: String,
}
struct DimInProgress {
name: Option<String>,
size_text: String,
scale: Option<f64>,
start: Option<f64>,
unit: Option<String>,
}
struct ArrayInProgress {
name: Option<String>,
ty: LigoType,
unit: Option<String>,
dims: Vec<Dim>,
encoding: ArrayEncoding,
delimiter: char,
stream_text: String,
collecting: bool,
}
impl ArrayInProgress {
fn new(name: Option<String>, ty: LigoType, unit: Option<String>) -> Self {
Self {
name,
ty,
unit,
dims: Vec::new(),
encoding: ArrayEncoding::Text,
delimiter: ' ',
stream_text: String::new(),
collecting: false,
}
}
fn finalize(self) -> Result<Array> {
let values = match self.encoding {
ArrayEncoding::Text => parse_text_array(&self.ty, &self.stream_text)?,
ArrayEncoding::LittleEndianBase64 => {
parse_base64_array(&self.ty, &self.stream_text, true)?
}
ArrayEncoding::BigEndianBase64 => {
parse_base64_array(&self.ty, &self.stream_text, false)?
}
};
Ok(Array {
name: self.name.map(|n| strip_array_suffix(&n)),
ty: self.ty,
unit: self.unit,
dims: self.dims,
encoding: self.encoding,
delimiter: self.delimiter,
values,
})
}
}
fn parse_text_array(ty: &LigoType, text: &str) -> Result<Vec<f64>> {
let mut out = Vec::new();
for token in text.split(|c: char| c.is_whitespace() || c == ',') {
let trimmed = token.trim();
if trimmed.is_empty() {
continue;
}
let v: f64 =
match ty {
LigoType::Real4 | LigoType::Real8 => {
trimmed.parse::<f64>().map_err(|source| Error::BadFloat {
column: "<array>".to_string(),
literal: trimmed.to_string(),
source,
})?
}
LigoType::Int | LigoType::UInt => trimmed
.parse::<i64>()
.map(|v| v as f64)
.map_err(|source| Error::BadInt {
column: "<array>".to_string(),
literal: trimmed.to_string(),
source,
})?,
_ => continue,
};
out.push(v);
}
Ok(out)
}
fn parse_base64_array(ty: &LigoType, text: &str, little: bool) -> Result<Vec<f64>> {
let cleaned: String = text.chars().filter(|c| !c.is_whitespace()).collect();
let bytes = BASE64
.decode(cleaned.as_bytes())
.map_err(Error::ArrayBase64)?;
let mut out = Vec::new();
match ty {
LigoType::Real4 => {
for chunk in bytes.chunks_exact(4) {
let arr = <[u8; 4]>::try_from(chunk).expect("chunk has 4 bytes");
let v = if little {
f32::from_le_bytes(arr)
} else {
f32::from_be_bytes(arr)
};
out.push(v as f64);
}
}
LigoType::Real8 => {
for chunk in bytes.chunks_exact(8) {
let arr = <[u8; 8]>::try_from(chunk).expect("chunk has 8 bytes");
let v = if little {
f64::from_le_bytes(arr)
} else {
f64::from_be_bytes(arr)
};
out.push(v);
}
}
LigoType::Int | LigoType::UInt => {
for chunk in bytes.chunks_exact(4) {
let arr = <[u8; 4]>::try_from(chunk).expect("chunk has 4 bytes");
let v = if little {
i32::from_le_bytes(arr)
} else {
i32::from_be_bytes(arr)
};
out.push(v as f64);
}
}
_ => {}
}
Ok(out)
}
fn attr<'a>(e: &'a BytesStart, key: &str) -> Result<Option<Cow<'a, str>>> {
for a in e.attributes() {
let a = a?;
let k = std::str::from_utf8(a.key.as_ref())?;
if k.eq_ignore_ascii_case(key) {
let v = a.unescape_value()?;
return Ok(Some(v));
}
}
Ok(None)
}
fn read_name_type(e: &BytesStart) -> Result<(String, String)> {
let name = attr(e, "Name")?
.ok_or_else(|| Error::MissingAttr {
element: String::from_utf8_lossy(e.name().as_ref()).into_owned(),
attr: "Name".into(),
})?
.into_owned();
let ty = attr(e, "Type")?.unwrap_or(Cow::Borrowed("")).into_owned();
Ok((name, ty))
}
fn read_optional_name_type(e: &BytesStart) -> Result<(Option<String>, String)> {
let name = attr(e, "Name")?.map(|c| c.into_owned());
let ty = attr(e, "Type")?.unwrap_or(Cow::Borrowed("")).into_owned();
Ok((name, ty))
}
fn read_delimiter(e: &BytesStart) -> char {
match attr(e, "Delimiter") {
Ok(Some(s)) if !s.is_empty() => s.chars().next().unwrap_or(','),
_ => ',',
}
}
fn strip_table_suffix(name: &str) -> String {
name.strip_suffix(":table")
.map(|s| s.to_string())
.unwrap_or_else(|| name.to_string())
}
fn strip_array_suffix(name: &str) -> String {
name.strip_suffix(":array")
.map(|s| s.to_string())
.unwrap_or_else(|| name.to_string())
}
fn strip_any_prefix(column_name: &str) -> String {
column_name
.split_once(':')
.map(|(_, rest)| rest.to_string())
.unwrap_or_else(|| column_name.to_string())
}
fn strip_param_suffix(name: &str) -> String {
if let Some((head, _)) = name.rsplit_once(':') {
head.to_string()
} else {
name.to_string()
}
}