ako 0.0.3

Ako is a Rust crate that offers a practical and human-friendly approach to creating, manipulating, formatting and converting dates, times and timestamps.
Documentation
use alloc::string::ToString;
use alloc::sync::Arc;
use alloc::vec::Vec;
use core::array::TryFromSliceError;
use core::str::from_utf8;
use std::fs;
use std::path::Path;

use crate::error::{ErrorKind, ResultExt};
use crate::time_zone::location::{Location, Transition, Zone};

/// Reads a time-zone information file (TZif) from the given `path`.
pub fn read<P: AsRef<Path>>(path: P) -> crate::Result<Location> {
    let input = fs::read(path).context_tzif()?;

    parse(&mut input.as_slice())
}

/// Parses a time-zone information file (TZif) from the input stream.
fn parse(input: &mut &[u8]) -> crate::Result<Location> {
    // https://linux.die.net/man/5/tzfile
    // https://www.man7.org/linux/man-pages/man5/tzfile.5.html
    // https://datatracker.ietf.org/doc/html/rfc8536

    if input.len() < 44 {
        // not large enough for the TZif header
        return Err(ErrorKind::TimeZoneFile.into());
    }

    // the time-zone information format (tzif) begins with a
    // four-octet ASCII sequence TZif to identify them as
    // time-zone information files

    let (magic, remaining) = input.split_at(4);
    *input = remaining;

    if &magic != b"TZif" {
        // definitely not a TZif file
        return Err(ErrorKind::TimeZoneFile.into());
    }

    // 1-byte version identifies the version of the file's format
    // immediately followed by 15 bytes reserved for future use

    let (p, remaining) = input.split_at(16);
    *input = remaining;

    let version = match p[0] {
        // version 1 is legacy and uses 32-bits for timestamps
        0 | b'1' => 1,

        // version 2 uses 64-bits for timestamps
        //  additionally, there is a POSIX timezone footer to handle
        // moments after the final zone transition
        b'2' => 2,

        // minor extensions to the POSIX timezone footer
        b'3' => 3,

        // only minor adjustments to the leap second data structure
        b'4' => 4,

        _ => {
            // unexpected time-zone information file (tzif) version
            return Err(ErrorKind::TimeZoneFile.into());
        }
    };

    let mut time_size = 4;

    loop {
        // the number of UTC to local indicators
        // should either be zero or equal to `typecnt`

        let isutcnt = get_usize(input, 4).context_tzif()?;

        // the number of standard/wall indicators
        // should either be zero or equal to `typecnt`

        let isstdcnt = get_usize(input, 4).context_tzif()?;

        // number of leap-second records

        let leapcnt = get_usize(input, 4).context_tzif()?;

        // number of transition times

        let timecnt = get_usize(input, 4).context_tzif()?;

        // the number of local time type records
        // must not be zero

        let typecnt = get_usize(input, 4).context_tzif()?;

        // the total number of octets used by the set of time-zone designations
        // must not be zero

        let charcnt = get_usize(input, 4).context_tzif()?;

        // if this is a version 2+ file, and we have not *yet* skipped, do so now

        if version >= 2 && time_size == 4 {
            // skip past the version 1 data block
            *input = &input[timecnt * 4..]; // transition times
            *input = &input[timecnt * 1..]; // transition zone indices
            *input = &input[typecnt * 6..]; // transition zones
            *input = &input[charcnt * 1..]; // transition zone abbreviations
            *input = &input[leapcnt * 4..]; // leap-seconds
            *input = &input[isstdcnt * 1..]; // standard/wall indicators
            *input = &input[isutcnt * 1..]; // utc/local indicators
            // skip past the initial parts of the header
            *input = &input[4 + 1 + 15..]; // magic + version + padding

            // update time_size (so we don't skip again),
            // but we re-read the counts (as the version 1 section can
            // be a subset of the version 2+ section)
            time_size = 8;
            continue;
        }

        // transitions
        let transitions = parse_transitions(timecnt, input, time_size)?;

        // zones
        let zones = parse_zones(typecnt, charcnt, input)?;

        // do not read standard/wall or utc/local indicators
        // these are only used when a time-zone file is used in handling POSIX-style
        // time-zone environment variables.. and according to the man page for `zic -p`
        // this is a legacy and poorly supported feature

        return Ok(Location {
            zones: zones.into_boxed_slice(),
            transitions: transitions.into_boxed_slice(),
        });
    }
}

/// Parse transitions from the input stream.
fn parse_transitions(
    n: usize,
    input: &mut &[u8],
    time_size: usize,
) -> crate::Result<Vec<Transition>> {
    let mut transitions = Vec::with_capacity(n);

    // transition time

    for _ in 0..n {
        let when = get_int(input, time_size).context_tzif()?;

        transitions.push(Transition {
            when,
            // set in the next data block
            zone: 0,
        });
    }

    // transition zone index

    for i in 0..n {
        transitions[i].zone = get_u8(input).context_tzif()?;
    }

    Ok(transitions)
}

/// Parse zones from the input stream.
fn parse_zones(n_zones: usize, n_name: usize, input: &mut &[u8]) -> crate::Result<Vec<Zone>> {
    // skip ahead and get the squished names first
    let names = from_utf8(&input[n_zones * 6..][..n_name]).context_tzif()?;

    let mut zones = Vec::with_capacity(n_zones);

    // zones

    for _ in 0..n_zones {
        let offset = get_i32(input).context_tzif()?;
        let dst = get_u8(input).context_tzif()?;

        let name_offset: usize = get_u8(input).context_tzif()?.into();

        if name_offset >= names.len() {
            // zone name starts past the end
            return Err(ErrorKind::TimeZoneFile.into());
        }

        let Some(name_len) = names[name_offset..].find('\0') else {
            // zone name end was not found
            return Err(ErrorKind::TimeZoneFile.into());
        };

        let name = &names[name_offset..name_offset + name_len];

        zones.push(Zone {
            offset,
            dst: dst == 1,
            name: Arc::from(name.to_string()),
        })
    }

    Ok(zones)
}

/// Gets a single (unsigned) byte from an input stream.
fn get_u8(input: &mut &[u8]) -> Result<u8, TryFromSliceError> {
    let (value, remaining) = input.split_at(1);
    *input = remaining;

    Ok(u8::from_be_bytes(value.try_into()?))
}

/// Gets an unsigned integer from an input stream, either four or eight bytes.
fn get_usize(input: &mut &[u8], size: usize) -> Result<usize, TryFromSliceError> {
    let (value, remaining) = input.split_at(size);
    *input = remaining;

    let value = if size == 4 {
        u32::from_be_bytes(value.try_into()?).into()
    } else {
        u64::from_be_bytes(value.try_into()?)
    };

    Ok(value as usize)
}

/// Gets a signed 32-bit integer from an input stream.
fn get_i32(input: &mut &[u8]) -> Result<i32, TryFromSliceError> {
    let (value, remaining) = input.split_at(4);
    *input = remaining;

    Ok(i32::from_be_bytes(value.try_into()?))
}

/// Gets a signed 64-bit integer from an input stream.
fn get_i64(input: &mut &[u8]) -> Result<i64, TryFromSliceError> {
    let (value, remaining) = input.split_at(8);
    *input = remaining;

    Ok(i64::from_be_bytes(value.try_into()?))
}

/// Gets a signed integer from an input stream, either four or eight bytes.
fn get_int(input: &mut &[u8], size: usize) -> Result<i64, TryFromSliceError> {
    if size == 4 {
        get_i32(input).map(Into::into)
    } else {
        get_i64(input)
    }
}