use crate::{
registry::FORMATS, ChronoError, Format, LeapSemantics, PosixNs, Strategy, TzSemantics, Unit,
};
#[derive(Debug, Clone, serde::Serialize)]
pub struct Candidate {
pub format_id: &'static str,
pub label: &'static str,
pub citation: &'static str,
pub instant: PosixNs,
pub rendered: Option<String>,
pub score: f64,
pub components: Vec<(&'static str, f64)>,
pub assumptions: Vec<String>,
pub sentinel: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Endian {
Little,
Big,
}
#[derive(Debug, Clone, Default)]
pub struct InterpretContext<'a> {
pub observed_width_bytes: Option<u8>,
pub endian: Option<Endian>,
pub artifact: Option<&'a str>,
pub neighbours: &'a [i64],
}
#[must_use]
pub fn interpret_int(value: i64) -> Vec<Candidate> {
interpret_int_with_context(value, &InterpretContext::default())
}
#[must_use]
pub fn interpret_int_with_context(value: i64, ctx: &InterpretContext) -> Vec<Candidate> {
let mut out: Vec<Candidate> = Vec::new();
for f in FORMATS {
if let Some(c) = build_candidate(f, value, ctx) {
out.push(c);
}
}
out.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.format_id.cmp(b.format_id))
});
out
}
fn build_candidate(f: &Format, value: i64, ctx: &InterpretContext) -> Option<Candidate> {
let instant = f.decode_int(value).ok()?;
let rendered = instant.to_rfc3339()?;
let components = score_components(f, value, instant, ctx);
let score = overall_score(&components);
let mut assumptions = assumptions(f);
let sentinel = sentinel_reason(value);
if let Some(reason) = sentinel {
assumptions.push(format!(
"value {value} is a likely sentinel ({reason}) — an 'unset'/'never' marker, not necessarily a real instant"
));
}
Some(Candidate {
format_id: f.id,
label: f.label,
citation: f.citation,
instant,
rendered: Some(rendered),
score,
components,
assumptions,
sentinel: sentinel.is_some(),
})
}
fn decode_one(format_id: &str, value: i64, ctx: &InterpretContext) -> Option<Candidate> {
build_candidate(crate::format(format_id).ok()?, value, ctx)
}
fn assumptions(f: &Format) -> Vec<String> {
let mut out = vec![format!(
"consistent with {} [{}] — a reading, not a determination",
f.label, f.citation
)];
if matches!(f.leap, LeapSemantics::PosixIgnored) {
out.push(
"indistinguishable from a leap-smeared source without clock-policy metadata"
.to_string(),
);
}
if matches!(f.tz, TzSemantics::LocalNaive) {
out.push(
"stored as LOCAL wall-clock time with no offset — the instant is naive, not UTC"
.to_string(),
);
}
out
}
#[must_use]
pub fn sentinel_reason(value: i64) -> Option<&'static str> {
match value {
0 => Some("possible sentinel: zero / unset"),
-1 => Some("possible sentinel: -1 / all-ones (unset)"),
i64::MAX => Some("known sentinel: 0x7FFFFFFFFFFFFFFF (e.g. AD accountExpires 'never')"),
_ => None,
}
}
fn score_components(
f: &Format,
value: i64,
instant: PosixNs,
ctx: &InterpretContext,
) -> Vec<(&'static str, f64)> {
let representable = 1.0;
let in_window = f64::from(u8::from(
instant.0 >= f.plausible.0 && instant.0 < f.plausible.1,
));
let granularity = granularity_match(f.strategy, value);
let magnitude = magnitude_fit(f.strategy, instant);
let not_sentinel = f64::from(u8::from(sentinel_reason(value).is_none()));
let mut components = vec![
("representable", representable),
("in_window", in_window),
("granularity_match", granularity),
("magnitude_fit", magnitude),
("not_sentinel", not_sentinel),
];
if let Some(width) = ctx.observed_width_bytes {
components.push(("byte_width_match", byte_width_match(f, value, width)));
if ctx.endian.is_some() {
components.push(("endian_match", endian_match(f, value, width)));
}
}
if let Some(hint) = ctx.artifact {
components.push(("artifact_match", artifact_match(f, hint)));
}
if !ctx.neighbours.is_empty() {
components.push((
"neighbour_monotonicity",
neighbour_monotonicity(f, ctx.neighbours),
));
}
components
}
fn significant_bytes(value: i64) -> u8 {
let n = value.unsigned_abs();
if n == 0 {
return 1;
}
((64 - n.leading_zeros()).div_ceil(8)) as u8
}
fn byte_width_match(f: &Format, value: i64, observed: u8) -> f64 {
let natural = f.storage_bytes();
if observed == natural {
1.0
} else if significant_bytes(value) <= natural {
0.5
} else {
0.0
}
}
fn decode_in_window(f: &Format, value: i64) -> bool {
f.decode_int(value)
.ok()
.is_some_and(|inst| inst.0 >= f.plausible.0 && inst.0 < f.plausible.1)
}
fn byte_swapped(value: i64, width: u8) -> Option<i64> {
match width {
4 => u32::try_from(value).ok().map(|v| i64::from(v.swap_bytes())),
8 => Some((value as u64).swap_bytes() as i64),
_ => None,
}
}
fn endian_match(f: &Format, value: i64, width: u8) -> f64 {
let this_in = decode_in_window(f, value);
let flip_in = byte_swapped(value, width).is_some_and(|v| decode_in_window(f, v));
match (this_in, flip_in) {
(true, false) => 1.0,
(true, true) => 0.5,
(false, _) => 0.0,
}
}
fn artifact_match(f: &Format, hint: &str) -> f64 {
let haystack = format!("{} {} {}", f.id, f.family, f.label).to_lowercase();
let matched = hint
.split(|c: char| !c.is_ascii_alphanumeric())
.filter(|t| t.len() >= 3)
.any(|t| haystack.contains(&t.to_lowercase()));
if matched {
1.0
} else {
0.2
}
}
fn neighbour_monotonicity(f: &Format, neighbours: &[i64]) -> f64 {
if neighbours.len() < 2 {
return f64::from(u8::from(
neighbours.first().is_some_and(|&v| decode_in_window(f, v)),
));
}
let mut consistent = 0u32;
let mut total = 0u32;
for pair in neighbours.windows(2) {
let (a, b) = (pair[0], pair[1]);
total += 1;
let (ia, ib) = (f.decode_int(a).ok(), f.decode_int(b).ok());
if let (Some(ta), Some(tb)) = (ia, ib) {
let in_window = decode_in_window(f, a) && decode_in_window(f, b);
if in_window && ((b >= a) == (tb.0 >= ta.0)) {
consistent += 1;
}
}
}
f64::from(consistent) / f64::from(total)
}
const TWO_YEARS_NS: i128 = 730 * 86_400 * 1_000_000_000;
fn magnitude_fit(strategy: Strategy, instant: PosixNs) -> f64 {
match strategy {
Strategy::Embedded { epoch_ns, .. } => {
let past = instant.0 - epoch_ns;
if past <= 0 {
0.0
} else {
(past as f64 / TWO_YEARS_NS as f64).min(1.0)
}
}
Strategy::LinearInt { .. } | Strategy::LinearFloat { .. } | Strategy::Packed(_) => 1.0,
}
}
fn granularity_match(strategy: Strategy, value: i64) -> f64 {
let unit: Unit = match strategy {
Strategy::LinearInt { unit, .. }
| Strategy::LinearFloat { unit, .. }
| Strategy::Embedded { unit, .. } => unit,
Strategy::Packed(_) => return 1.0,
};
let ssd = unit.sub_second_digits();
if ssd == 0 {
return 1.0;
}
let tz = trailing_zeros_base10(value).min(ssd);
1.0 - f64::from(tz) / f64::from(ssd)
}
fn trailing_zeros_base10(value: i64) -> u32 {
let mut n = value.unsigned_abs();
if n == 0 {
return 0;
}
let mut z = 0;
while n.is_multiple_of(10) {
z += 1;
n /= 10;
}
z
}
fn overall_score(components: &[(&'static str, f64)]) -> f64 {
let weight = |name: &str| match name {
"in_window"
| "magnitude_fit"
| "not_sentinel"
| "byte_width_match"
| "endian_match"
| "neighbour_monotonicity" => 2.0,
_ => 1.0,
};
let (num, den) = components.iter().fold((0.0, 0.0), |(num, den), (n, v)| {
let w = weight(n);
(num + w * v, den + w)
});
if den == 0.0 {
0.0
} else {
num / den
}
}
pub fn interpret_hex(hex: &str) -> Result<Vec<(String, Vec<Candidate>)>, ChronoError> {
let clean: String = hex
.chars()
.filter(|c| !c.is_whitespace() && *c != '_' && *c != ':')
.collect();
let clean = clean.strip_prefix("0x").unwrap_or(&clean);
let bytes = hex::decode(clean).map_err(|_| ChronoError::OutOfRange {
what: "hex (not valid hex bytes)",
value: 0,
})?;
let mut out = Vec::new();
for (label, value, width, endian) in byte_ints(&bytes) {
let ctx = InterpretContext {
observed_width_bytes: Some(width),
endian: Some(endian),
..Default::default()
};
out.push((label, interpret_int_with_context(value, &ctx)));
}
if let Some(four) = bytes.get(..4).and_then(|s| <[u8; 4]>::try_from(s).ok()) {
let lo = u16::from_le_bytes([four[0], four[1]]);
let hi = u16::from_le_bytes([four[2], four[3]]);
let fat_ctx = InterpretContext {
observed_width_bytes: Some(4),
..Default::default()
};
if let Some(c) = decode_one("fat", (i64::from(lo) << 16) | i64::from(hi), &fat_ctx) {
out.push(("FAT/DOS bytes date|time (LE words)".to_string(), vec![c]));
}
if let Some(c) = decode_one("fat", (i64::from(hi) << 16) | i64::from(lo), &fat_ctx) {
out.push((
"FAT/DOS bytes time|date (LE words, directory order)".to_string(),
vec![c],
));
}
}
if let Some(sixteen) = bytes.get(..16) {
if let Some(c) = systemtime_candidate(sixteen) {
out.push((
"SYSTEMTIME (16-byte struct, LE u16 fields)".to_string(),
vec![c],
));
}
}
if bytes
.get(..8)
.and_then(|s| <[u8; 8]>::try_from(s).ok())
.is_some_and(|e| u64::from_le_bytes(e) == u64::MAX)
{
out.push(("u64 all-ones".to_string(), vec![all_ones_sentinel()]));
}
Ok(out)
}
fn systemtime_candidate(b: &[u8]) -> Option<Candidate> {
let field = |i: usize| -> Option<u16> {
let lo = *b.get(i * 2)?;
let hi = *b.get(i * 2 + 1)?;
Some(u16::from_le_bytes([lo, hi]))
};
let year = i16::try_from(field(0)?).ok()?;
let month = i8::try_from(field(1)?).ok()?;
let day = i8::try_from(field(3)?).ok()?;
let hour = i8::try_from(field(4)?).ok()?;
let minute = i8::try_from(field(5)?).ok()?;
let second = i8::try_from(field(6)?).ok()?;
let millis = field(7)?;
let subsec_nanos = i32::from(millis) * 1_000_000;
let instant = civil_to_posix(year, month, day, hour, minute, second, subsec_nanos, 0)?;
Some(string_candidate(
"systemtime",
"Microsoft 128-bit SYSTEMTIME",
"[MS-DTYP] §2.3.13 SYSTEMTIME (8× little-endian WORD fields)",
instant,
"decoded as a 16-byte SYSTEMTIME struct (UTC unless the source noted local)",
))
}
fn byte_ints(b: &[u8]) -> Vec<(String, i64, u8, Endian)> {
let total = b.len();
let suffix = |w: usize| {
if total > w {
format!(" (first {w} of {total})")
} else {
String::new()
}
};
let mut v = Vec::new();
if let Some(four) = b.get(..4).and_then(|s| <[u8; 4]>::try_from(s).ok()) {
v.push((
format!("u32 LE{}", suffix(4)),
i64::from(u32::from_le_bytes(four)),
4,
Endian::Little,
));
v.push((
format!("u32 BE{}", suffix(4)),
i64::from(u32::from_be_bytes(four)),
4,
Endian::Big,
));
}
if let Some(eight) = b.get(..8).and_then(|s| <[u8; 8]>::try_from(s).ok()) {
if let Ok(n) = i64::try_from(u64::from_le_bytes(eight)) {
v.push((format!("u64 LE{}", suffix(8)), n, 8, Endian::Little));
}
if let Ok(n) = i64::try_from(u64::from_be_bytes(eight)) {
v.push((format!("u64 BE{}", suffix(8)), n, 8, Endian::Big));
}
}
v
}
fn all_ones_sentinel() -> Candidate {
Candidate {
format_id: "sentinel",
label: "all-ones value (0xFFFFFFFFFFFFFFFF)",
citation: "",
instant: PosixNs(0),
rendered: None,
score: 0.0,
components: vec![("not_sentinel", 0.0)],
assumptions: vec![
"0xFFFFFFFFFFFFFFFF — all-ones; commonly an 'unset'/'never' marker, not a real instant"
.to_string(),
],
sentinel: true,
}
}
#[must_use]
pub fn interpret_string(text: &str) -> Vec<Candidate> {
let s = text.trim();
let mut out = Vec::new();
if let Ok(ts) = s.parse::<jiff::Timestamp>() {
out.push(string_candidate(
"iso8601",
"ISO 8601 / RFC 3339 string",
"ISO 8601:2019 / RFC 3339",
PosixNs(ts.as_nanosecond()),
"parsed as an ISO 8601 / RFC 3339 string (offset normalised to UTC)",
));
}
if let Some((instant, had_tz)) = parse_asn1_generalizedtime(s) {
out.push(string_candidate(
"asn1_generalizedtime",
"ASN.1 GeneralizedTime",
"ITU-T X.680 / RFC 5280 §4.1.2.5.2",
instant,
&asn1_assumption("GeneralizedTime (4-digit year)", had_tz),
));
}
if let Some((instant, had_tz)) = parse_asn1_utctime(s) {
out.push(string_candidate(
"asn1_utctime",
"ASN.1 UTCTime",
"ITU-T X.680 / RFC 5280 §4.1.2.5.1",
instant,
&asn1_assumption(
"UTCTime (2-digit year; RFC 5280 pivot: <50 => 20YY, else 19YY)",
had_tz,
),
));
}
if let Some(instant) = parse_ulid(s) {
out.push(string_candidate(
"ulid",
"ULID (first 48 bits = Unix ms)",
"ULID spec (Crockford base32; 48-bit ms timestamp)",
instant,
"parsed as a ULID — the leading 48 bits are milliseconds since the Unix epoch",
));
}
if let Some(instant) = parse_uuid_v1(s) {
out.push(string_candidate(
"uuid_v1",
"UUID version 1 (100ns since 1582-10-15)",
"RFC 9562 §5.1 (UUIDv1 60-bit Gregorian timestamp)",
instant,
"parsed as a UUIDv1 — a 60-bit count of 100ns intervals since 1582-10-15 UTC",
));
}
if let Some(instant) = parse_rfc2822(s) {
out.push(string_candidate(
"rfc2822",
"RFC 2822 / email date",
"RFC 5322 §3.3 (date-time; via jiff)",
instant,
"parsed as an RFC 2822 date-time (offset normalised to UTC)",
));
}
if let Some(instant) = parse_exif(s) {
out.push(string_candidate(
"exif",
"EXIF DateTime (YYYY:MM:DD HH:MM:SS)",
"CIPA DC-008 (EXIF) DateTime / DateTimeOriginal",
instant,
"parsed as an EXIF DateTime; NO offset is stored — assumed UTC, but is usually local time",
));
}
out
}
fn parse_ulid(s: &str) -> Option<PosixNs> {
const ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
if s.len() != 26 {
return None;
}
let mut value: u128 = 0;
for ch in s.bytes() {
let up = ch.to_ascii_uppercase();
let idx = ALPHABET.iter().position(|&a| a == up)?;
value = value.checked_mul(32)?.checked_add(idx as u128)?;
}
let ms = i128::from(u64::try_from(value >> 80).ok()?);
Some(PosixNs(ms.checked_mul(Unit::Millis.nanos())?))
}
const UUID_V1_EPOCH_NS: i128 = -12_219_292_800 * 1_000_000_000;
fn parse_uuid_v1(s: &str) -> Option<PosixNs> {
let hex: String = s.chars().filter(|c| *c != '-').collect();
if hex.len() != 32 || !hex.bytes().all(|b| b.is_ascii_hexdigit()) {
return None;
}
let time_low = u64::from_str_radix(hex.get(0..8)?, 16).ok()?;
let time_mid = u64::from_str_radix(hex.get(8..12)?, 16).ok()?;
let time_hi_version = u64::from_str_radix(hex.get(12..16)?, 16).ok()?;
if (time_hi_version >> 12) != 1 {
return None; }
let ts = ((time_hi_version & 0x0FFF) << 48) | (time_mid << 32) | time_low;
let ns = i128::from(ts)
.checked_mul(100)?
.checked_add(UUID_V1_EPOCH_NS)?;
Some(PosixNs(ns))
}
fn parse_rfc2822(s: &str) -> Option<PosixNs> {
jiff::fmt::rfc2822::parse(s)
.ok()
.map(|zoned| PosixNs(zoned.timestamp().as_nanosecond()))
}
fn parse_exif(text: &str) -> Option<PosixNs> {
let (date, time) = text.trim().split_once(' ')?;
let date_parts: Vec<&str> = date.split(':').collect();
let time_parts: Vec<&str> = time.split(':').collect();
if date_parts.len() != 3 || time_parts.len() != 3 {
return None;
}
let year: i16 = date_parts[0].parse().ok()?;
let month: i8 = date_parts[1].parse().ok()?;
let day: i8 = date_parts[2].parse().ok()?;
let hour: i8 = time_parts[0].parse().ok()?;
let minute: i8 = time_parts[1].parse().ok()?;
let second: i8 = time_parts[2].parse().ok()?;
civil_to_posix(year, month, day, hour, minute, second, 0, 0)
}
fn asn1_assumption(kind: &str, had_tz: bool) -> String {
if had_tz {
format!("parsed as ASN.1 {kind}")
} else {
format!(
"parsed as ASN.1 {kind}; NO timezone designator — assumed UTC, but may be local time"
)
}
}
fn string_candidate(
format_id: &'static str,
label: &'static str,
citation: &'static str,
instant: PosixNs,
assumption: &str,
) -> Candidate {
Candidate {
format_id,
label,
citation,
instant,
rendered: instant.to_rfc3339(),
score: 1.0,
components: vec![
("representable", 1.0),
("self_describing", 1.0),
("not_sentinel", 1.0),
],
assumptions: vec![assumption.to_string()],
sentinel: false,
}
}
fn split_tz(s: &str) -> Option<(String, i64, bool)> {
if let Some(core) = s.strip_suffix('Z').or_else(|| s.strip_suffix('z')) {
return Some((core.to_string(), 0, true));
}
if s.len() >= 5 {
let (core, suf) = s.split_at(s.len() - 5);
let b = suf.as_bytes();
if (b[0] == b'+' || b[0] == b'-') && suf[1..].bytes().all(|c| c.is_ascii_digit()) {
let hh: i64 = suf[1..3].parse().ok()?;
let mm: i64 = suf[3..5].parse().ok()?;
if hh > 23 || mm > 59 {
return None; }
let mag = hh * 3600 + mm * 60;
return Some((
core.to_string(),
if b[0] == b'-' { -mag } else { mag },
true,
));
}
}
Some((s.to_string(), 0, false))
}
#[allow(clippy::too_many_arguments)]
fn civil_to_posix(
y: i16,
mo: i8,
d: i8,
h: i8,
mi: i8,
s: i8,
subsec_nanos: i32,
offset_secs: i64,
) -> Option<PosixNs> {
let dt = jiff::civil::DateTime::new(y, mo, d, h, mi, s, subsec_nanos).ok()?;
let off = jiff::tz::Offset::from_seconds(i32::try_from(offset_secs).ok()?).ok()?;
let zoned = dt.to_zoned(jiff::tz::TimeZone::fixed(off)).ok()?;
Some(PosixNs(zoned.timestamp().as_nanosecond()))
}
fn frac_to_nanos(frac: &str) -> i32 {
let mut t: String = frac.chars().take(9).collect();
while t.len() < 9 {
t.push('0');
}
t.parse().unwrap_or(0)
}
fn parse_asn1(s: &str, year_digits: usize) -> Option<(PosixNs, bool)> {
let (core, off, had_tz) = split_tz(s)?;
let (digits, frac) = match core.split_once(['.', ',']) {
Some((d, f)) => (d.to_string(), Some(f.to_string())),
None => (core, None),
};
if !digits.bytes().all(|c| c.is_ascii_digit()) {
return None;
}
let year = if year_digits == 4 {
digits.get(0..4)?.parse().ok()?
} else {
let yy: i16 = digits.get(0..2)?.parse().ok()?;
if yy < 50 {
2000 + yy
} else {
1900 + yy
}
};
let base = year_digits;
let len = digits.len();
let mo = digits.get(base..base + 2)?.parse().ok()?;
let d = digits.get(base + 2..base + 4)?.parse().ok()?;
let h = digits.get(base + 4..base + 6)?.parse().ok()?;
let sec_present = len == base + 10;
let min_present = sec_present || len == base + 8;
if len != base + 6 && len != base + 8 && len != base + 10 {
return None;
}
let mi = if min_present {
digits.get(base + 6..base + 8)?.parse().ok()?
} else {
0
};
let s = if sec_present {
digits.get(base + 8..base + 10)?.parse().ok()?
} else {
0
};
let subsec = match frac {
Some(f) if sec_present && !f.is_empty() && f.bytes().all(|c| c.is_ascii_digit()) => {
frac_to_nanos(&f)
}
Some(_) => return None,
None => 0,
};
let instant = civil_to_posix(year, mo, d, h, mi, s, subsec, off)?;
Some((instant, had_tz))
}
fn parse_asn1_generalizedtime(s: &str) -> Option<(PosixNs, bool)> {
parse_asn1(s, 4)
}
fn parse_asn1_utctime(s: &str) -> Option<(PosixNs, bool)> {
parse_asn1(s, 2)
}