unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use chrono::DateTime;
use chrono::naive::NaiveDateTime;
use chrono::naive::NaiveDate;
use chrono::naive::NaiveTime;
use chrono::offset::Utc;
use chrono::offset::FixedOffset;

use std::time::SystemTime;

pub type UtcTimestamp = DateTime<Utc>;
pub type FixedOffsetTimestamp = DateTime<FixedOffset>;

fn parse_naive_date(unparsed: &str) -> Option<FixedOffsetTimestamp> {
	Some(UtcTimestamp::from_naive_utc_and_offset(
		NaiveDateTime::new(
			NaiveDate::parse_from_str(unparsed, "%Y-%m-%d").ok()?,
			NaiveTime::from_hms_opt(0,0,0).unwrap()
		),
		Utc
	).into())
}

fn parse_naive_datetime(unparsed: &str, format: &str) -> Option<FixedOffsetTimestamp> {
	Some(UtcTimestamp::from_naive_utc_and_offset(
		NaiveDateTime::parse_from_str(unparsed, format).ok()?,
		Utc
	).into())
}

// Parses a timestamp as specified by the w3 with quirks
// https://www.w3.org/TR/NOTE-datetime
//
// **This parser is not standards compliant!**
//
// Allowed quirks are:
// * lowercase letters
// * A space in place of the `T`
//
// If a string is longer than 40 bytes, this will always return `None`.
pub fn parse_w3_datetime_with_quirks(unparsed: &str) -> Option<FixedOffsetTimestamp> {
	if unparsed.len() > 40 {
		return None;
	}
	let mut u = unparsed.to_string();
	u.make_ascii_uppercase();
	parse_w3_datetime(&u.replace(" ", "T"))
}

// Parses a timestamp as specified by the w3:
// https://www.w3.org/TR/NOTE-datetime
//
// If a string is longer than 40 bytes, this will always return `None`.
pub fn parse_w3_datetime(unparsed: &str) -> Option<FixedOffsetTimestamp> {
	let len = unparsed.len();
	// Stop too long date strings from blowing up the parsers
	if len > 40 {
		return None;
	}
	let mut u = unparsed.to_string();
	if let Some(without_zone_suffix) = u.strip_suffix("Z") {
		u = format!("{without_zone_suffix}+00:00");
	}
	match len {
		4 => { return parse_naive_date(&(unparsed.to_owned()+"-01-01")); },
		7 => { return parse_naive_date(&(unparsed.to_owned()+"-01")); },
		10 => { return parse_naive_date(unparsed); },
		16 => { return parse_naive_datetime(&u, "%Y-%m-%dT%H:%M"); },
		19 => { return parse_naive_datetime(&u, "%Y-%m-%dT%H:%M:%S"); },
		_ => {}
	}
	if let Some(ts) = parse_naive_datetime(&u, "%Y-%m-%dT%H:%M:%S.%f") {
		return Some(ts);
	}
	if let Ok(ts) = DateTime::parse_from_str(&u, "%Y-%m-%dT%H:%M%:z") {
		return Some(ts);
	}
	if let Ok(ts) = DateTime::parse_from_str(&u, "%Y-%m-%dT%H:%M:%S%:z") {
		return Some(ts);
	}
	if let Ok(ts) = DateTime::parse_from_str(&u, "%Y-%m-%dT%H:%M:%S.%f%:z") {
		return Some(ts);
	}
	return None;
}

pub fn from_unix_timestamp_opt(unixtime: Option<i64>) -> Option<UtcTimestamp> {
	if let Some(u) = unixtime {
		UtcTimestamp::from_timestamp(u,0)
	} else {
		None
	}
}

/// Coverts a unix timestamp to a [chrono::DateTime]
/// and falls back to the unic epoch in the case of failure.
pub fn from_unix_timestamp_or_epoch(unixtime: i64) -> UtcTimestamp {
	UtcTimestamp::from_timestamp(unixtime,0).unwrap_or(UtcTimestamp::UNIX_EPOCH)
}

/// Converts a [chrono::DateTime] option to an optional unix timestamp.
///
/// Use the [chrono::DateTime::timestamp] method for non-option types.
pub fn to_unix_timestamp_opt(date_time: Option<UtcTimestamp>) -> Option<i64> {
	date_time.map(|dt| dt.timestamp())
}

pub fn now_utc() -> UtcTimestamp {
	SystemTime::now().into()
}

#[cfg(test)]
mod test {
	use super::*;

	fn parse_reference_datetime_utc(datetime: &str) -> FixedOffsetTimestamp {
		Into::<FixedOffsetTimestamp>::into(
			UtcTimestamp::from_naive_utc_and_offset(
				NaiveDateTime::parse_from_str(datetime, "%Y-%m-%d %H:%M:%S").unwrap(), Utc
			)
		)
	}

	fn parse_reference_datetime(datetime: &str) -> FixedOffsetTimestamp {
		FixedOffsetTimestamp::parse_from_str(datetime, "%Y-%m-%d %H:%M:%S%:z").unwrap()
	}

	fn assert_datetime_parsing(to_test: &str, reference: &str, not_quirky: bool) {
		let reference = if reference.len() > 19 {
			parse_reference_datetime(reference)
		} else {
			parse_reference_datetime_utc(reference)
		};
		let regular_result = parse_w3_datetime(to_test);
		let quirky_result = parse_w3_datetime_with_quirks(to_test);

		let expected_regular_result = if not_quirky {
			Some(reference.clone())
		} else {
			None
		};

		if (regular_result, quirky_result) != (expected_regular_result, Some(reference)) {
			panic!("Parsing the timestamp `{to_test}` into `{reference}` didn't go as expected.\nStandards compliant parser got {regular_result:?} (expected: {expected_regular_result:?})\nQuirky parser got {quirky_result:?} (expected: Some({reference:?}))");
		}
	}


	#[test]
	fn parse_w3_datetime_day() {
		assert_datetime_parsing("2024-01-09", "2024-01-09 00:00:00", true);
	}

	#[test]
	fn parse_w3_datetime_month() {
		assert_datetime_parsing("2024-02", "2024-02-01 00:00:00", true);
	}

	#[test]
	fn parse_w3_datetime_year() {
		assert_datetime_parsing("2024", "2024-01-01 00:00:00", true);
	}

	#[test]
	fn parse_w3_datetime_year_space_minute() {
		assert_datetime_parsing("2024-01-09 21:58", "2024-01-09 21:58:00", false);
	}

	#[test]
	fn parse_w3_datetime_year_t_minute() {
		assert_datetime_parsing("2024-01-09T21:58", "2024-01-09 21:58:00", true);
	}

	#[test]
	fn parse_w3_datetime_year_space_second() {
		assert_datetime_parsing("2024-01-09 21:58:34", "2024-01-09 21:58:34", false);
	}

	#[test]
	fn parse_w3_datetime_year_t_second() {
		assert_datetime_parsing("2024-01-09T21:58:34", "2024-01-09 21:58:34", true);
	}

	#[test]
	fn parse_w3_datetime_year_space_fraction() {
		assert_datetime_parsing("2024-01-09 21:58:34.0", "2024-01-09 21:58:34", false);
	}

	#[test]
	fn parse_w3_datetime_year_t_second_z() {
		assert_datetime_parsing("2024-01-09T21:58:34Z", "2024-01-09 21:58:34", true);
	}

	#[test]
	fn parse_w3_datetime_year_t_fraction_zone() {
		assert_datetime_parsing("2024-01-09T21:58:34.0+02:00", "2024-01-09 21:58:34+02:00", true);
	}

	#[test]
	fn parse_w3_datetime_year_space_minute_zone() {
		assert_datetime_parsing("2024-01-09 21:58-09:45", "2024-01-09 21:58:00-09:45", false);
	}
}