use std::collections::BTreeMap;
use chrono::{DateTime, TimeZone, Utc};
use crate::error::GribberishError;
#[derive(Clone, Debug, PartialEq)]
pub struct IndexEntry {
pub message_number: usize,
pub submessage: Option<usize>,
pub offset: u64,
pub length: Option<u64>,
pub reference_date: Option<DateTime<Utc>>,
pub var: Option<String>,
pub level: Option<String>,
pub forecast_time: Option<String>,
pub extra: Vec<String>,
pub keys: BTreeMap<String, String>,
}
pub fn parse_index(text: &str, file_size: Option<u64>) -> Result<Vec<IndexEntry>, GribberishError> {
match text.trim_start().chars().next() {
Some('{') => parse_ecmwf_index(text),
Some(_) => parse_noaa_index(text, file_size),
None => Ok(Vec::new()),
}
}
pub fn parse_noaa_index(
text: &str,
file_size: Option<u64>,
) -> Result<Vec<IndexEntry>, GribberishError> {
let mut entries = Vec::new();
for (lineno, line) in text.lines().enumerate() {
let line = line.trim();
if line.is_empty() {
continue;
}
let invalid =
|| GribberishError::IndexError(format!("invalid idx line {}: {line}", lineno + 1));
let fields: Vec<&str> = line.split(':').collect();
if fields.len() < 3 {
return Err(invalid());
}
let (message_number, submessage) = match fields[0].split_once('.') {
Some((msg, sub)) => (
msg.parse().map_err(|_| invalid())?,
Some(sub.parse().map_err(|_| invalid())?),
),
None => (fields[0].parse().map_err(|_| invalid())?, None),
};
let offset = fields[1].parse().map_err(|_| invalid())?;
let reference_date = fields[2].strip_prefix("d=").and_then(parse_noaa_date);
let field = |i: usize| {
fields
.get(i)
.filter(|f| !f.is_empty())
.map(|f| f.to_string())
};
entries.push(IndexEntry {
message_number,
submessage,
offset,
length: None,
reference_date,
var: field(3),
level: field(4),
forecast_time: field(5),
extra: fields[6.min(fields.len())..]
.iter()
.filter(|f| !f.is_empty())
.map(|f| f.to_string())
.collect(),
keys: BTreeMap::new(),
});
}
let offsets: Vec<u64> = entries.iter().map(|e| e.offset).collect();
for (i, entry) in entries.iter_mut().enumerate() {
let end = offsets[i + 1..]
.iter()
.find(|&&o| o > entry.offset)
.copied()
.or(file_size);
entry.length = end.map(|e| e.saturating_sub(entry.offset));
}
Ok(entries)
}
pub fn parse_ecmwf_index(text: &str) -> Result<Vec<IndexEntry>, GribberishError> {
text.lines()
.map(str::trim)
.filter(|l| !l.is_empty())
.enumerate()
.map(|(i, line)| {
let invalid = |what: &str| {
GribberishError::IndexError(format!("{what} in index line {}: {line}", i + 1))
};
let value: serde_json::Value =
serde_json::from_str(line).map_err(|_| invalid("invalid json"))?;
let object = value.as_object().ok_or_else(|| invalid("invalid json"))?;
let int_field = |key: &str| match object.get(key) {
Some(serde_json::Value::Number(n)) => n.as_u64(),
Some(serde_json::Value::String(s)) => s.parse().ok(),
_ => None,
};
let offset = int_field("_offset").ok_or_else(|| invalid("missing _offset"))?;
let length = int_field("_length").ok_or_else(|| invalid("missing _length"))?;
let keys: BTreeMap<String, String> = object
.iter()
.filter(|(k, _)| !k.starts_with('_'))
.map(|(k, v)| {
let v = match v {
serde_json::Value::String(s) => s.clone(),
other => other.to_string(),
};
(k.clone(), v)
})
.collect();
Ok(IndexEntry {
message_number: i + 1,
submessage: None,
offset,
length: Some(length),
reference_date: parse_ecmwf_date(keys.get("date"), keys.get("time")),
var: keys.get("param").cloned(),
level: keys.get("levelist").cloned(),
forecast_time: keys.get("step").cloned(),
extra: Vec::new(),
keys,
})
})
.collect()
}
fn parse_noaa_date(s: &str) -> Option<DateTime<Utc>> {
if s.len() < 10 || !s.is_ascii() {
return None;
}
let minute = if s.len() >= 12 {
s[10..12].parse().ok()?
} else {
0
};
Utc.with_ymd_and_hms(
s[0..4].parse().ok()?,
s[4..6].parse().ok()?,
s[6..8].parse().ok()?,
s[8..10].parse().ok()?,
minute,
0,
)
.single()
}
fn parse_ecmwf_date(date: Option<&String>, time: Option<&String>) -> Option<DateTime<Utc>> {
let date = date?;
let time = format!("{:0>4}", time.map(String::as_str).unwrap_or("0"));
if date.len() != 8 || !date.is_ascii() || !time.is_ascii() {
return None;
}
Utc.with_ymd_and_hms(
date[0..4].parse().ok()?,
date[4..6].parse().ok()?,
date[6..8].parse().ok()?,
time[0..2].parse().ok()?,
time[2..4].parse().ok()?,
0,
)
.single()
}