use chrono::Local;
use oxilangtag::LanguageTag;
use thiserror::Error;
use tracing::instrument;
use url::Url;
use crate::Document;
use datetime::ClientDateTimeRecord;
#[derive(Debug, Error)]
#[error(transparent)]
pub struct RecordError(#[from] RecordErrorRepr);
#[derive(Debug, Error)]
enum RecordErrorRepr {
#[error("Error getting the record data: {0}")]
Data(#[from] RecordDataError),
}
#[derive(Debug, Error)]
pub enum RecordDataError {
#[error("Couldn't parse the URL {raw}: {msg}")]
UrlParse {
raw: String,
msg: String,
},
#[error("Couldn't parse the language")]
LanguageParse {
raw: String,
msg: String,
},
}
impl RecordDataError {
#[must_use]
pub fn raw_data(&self) -> &str {
match self {
Self::UrlParse { raw, .. } | Self::LanguageParse { raw, .. } => raw.as_str(),
}
}
}
type DataResult<T> = std::result::Result<T, RecordDataError>;
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum LanguageRecord {
Parsed(LanguageTag<String>),
Raw {
value: String,
error_msg: String,
},
}
impl LanguageRecord {
pub fn new(language_tag: String) -> Self {
language_tag.parse::<LanguageTag<_>>().map_or_else(
|error| Self::Raw {
value: language_tag,
error_msg: error.to_string(),
},
Self::Parsed,
)
}
pub fn primary(&self) -> DataResult<String> {
match self {
LanguageRecord::Parsed(language_tag) => Ok(language_tag.primary_language().to_string()),
LanguageRecord::Raw { value, error_msg } => Err(RecordDataError::LanguageParse {
raw: value.to_owned(),
msg: error_msg.to_owned(),
}),
}
}
}
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
enum UrlRecord {
Parsed(Url),
Raw(String, String),
}
impl UrlRecord {
fn new(url: &str) -> Self {
url.parse::<Url>()
.map_or_else(|e| Self::Raw(url.to_string(), e.to_string()), Self::Parsed)
}
fn as_str(&self) -> &str {
match self {
Self::Parsed(url) => url.as_str(),
Self::Raw(raw, _) => raw.as_str(),
}
}
}
pub mod datetime {
use std::fmt::{Display, Formatter};
use chrono::{DateTime, Datelike, FixedOffset, Local};
use serde_with::serde_as;
use tracing::instrument;
#[serde_as]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum ClientDateTimeRecord {
Parsed(DateTime<FixedOffset>),
Interpolated {
value: DateTime<FixedOffset>,
raw: String,
},
}
impl ClientDateTimeRecord {
#[instrument]
pub fn new(local_datetime_client: &str) -> Self {
DateTime::parse_from_rfc3339(local_datetime_client)
.inspect_err(|e| tracing::warn!("couldn't parse {local_datetime_client}: {e}"))
.map_or_else(
|_| Self::Interpolated {
value: Local::now().into(),
raw: local_datetime_client.to_string(),
},
Self::Parsed,
)
}
#[must_use]
pub fn year(&self) -> i32 {
match self {
Self::Parsed(dt) => dt.year(),
Self::Interpolated { value, .. } => value.year(),
}
}
}
impl Display for ClientDateTimeRecord {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let dt = match self {
Self::Parsed(dt) => dt,
Self::Interpolated { value, .. } => value,
};
write!(f, "{}", dt.to_rfc3339())
}
}
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct RecordParams {
pub id: Option<i64>,
pub url: String,
pub title: String,
pub client_datetime: String,
pub timestamp_flora: Option<i64>,
pub document: Document,
pub language: Option<String>,
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Record {
id: Option<i64>,
url: UrlRecord,
title: String,
client_datetime: ClientDateTimeRecord,
timestamp_flora: i64,
language: Option<LanguageRecord>,
document: Document,
}
impl Record {
#[instrument]
pub fn new(params: RecordParams) -> Self {
let url = UrlRecord::new(¶ms.url);
let client_datetime = ClientDateTimeRecord::new(params.client_datetime.as_str());
let language = params.language.map(LanguageRecord::new);
let timestamp_flora = params
.timestamp_flora
.unwrap_or_else(|| Local::now().timestamp());
Self {
id: params.id,
title: params.title,
url,
client_datetime,
timestamp_flora,
language,
document: params.document,
}
}
#[must_use]
pub fn id(&self) -> Option<i64> {
self.id
}
#[must_use]
pub fn url(&self) -> &str {
self.url.as_str()
}
#[must_use]
pub fn title(&self) -> &str {
&self.title
}
#[must_use]
pub fn client_datetime(&self) -> &ClientDateTimeRecord {
&self.client_datetime
}
#[must_use]
pub fn timestamp_archived(&self) -> i64 {
self.timestamp_flora
}
pub fn host(&self) -> DataResult<Option<String>> {
match &self.url {
UrlRecord::Parsed(url) => Ok(url.host().map(|h| h.to_string())),
UrlRecord::Raw(raw, error) => Err(RecordDataError::UrlParse {
raw: raw.clone(),
msg: error.clone(),
}),
}
}
#[must_use]
pub fn language(&self) -> Option<&LanguageRecord> {
self.language.as_ref()
}
#[must_use]
pub fn document(&self) -> &Document {
&self.document
}
#[must_use]
pub fn into_document(self) -> Document {
self.document
}
}
impl PartialEq for Record {
fn eq(&self, other: &Self) -> bool {
if let (Some(this_id), Some(other_id)) = (self.id, other.id)
&& this_id != other_id
{
return false;
}
self.url == other.url
&& self.client_datetime == other.client_datetime
&& self.timestamp_flora == other.timestamp_flora
}
}
pub trait ToRecord {
type Error: std::error::Error;
fn to_record(&self) -> std::result::Result<Record, Self::Error>;
}
impl<T> ToRecord for &T
where
T: ToRecord,
{
type Error = <T as ToRecord>::Error;
fn to_record(&self) -> std::result::Result<Record, Self::Error> {
(*self).to_record()
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct RecordMeta {
pub id: i64,
pub url: String,
pub title: String,
pub client_datetime: ClientDateTimeRecord,
pub timestamp_flora: i64,
pub language: Option<LanguageRecord>,
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::{assert_eq, assert_ne};
use proptest::prelude::*;
use tracing_test::traced_test;
use crate::document::tests::load_test_web_doc;
fn test_record_params() -> anyhow::Result<RecordParams> {
let document = load_test_web_doc()?;
let url = "https://example.org/".to_string();
let title = "Example Domain";
let client_datetime = "2024-10-11T13:49:46-05:00";
#[allow(clippy::unreadable_literal)]
let timestamp_flora = Some(1728695243i64);
let language = Some("en".to_string());
Ok(RecordParams {
id: None,
url,
title: title.to_string(),
client_datetime: client_datetime.to_string(),
timestamp_flora,
language,
document,
})
}
#[test]
fn can_create_record() -> anyhow::Result<()> {
let record_params = test_record_params()?;
let record = Record::new(record_params);
insta::assert_debug_snapshot!(record);
Ok(())
}
#[test]
fn record_with_some_id_equals_record_with_none_id() -> anyhow::Result<()> {
let record = Record::new(test_record_params()?);
let mut also_record = Record::new(test_record_params()?);
also_record.id = Some(37);
assert_eq!(record, also_record);
Ok(())
}
#[test]
fn records_with_different_ids_compare_unequal() -> anyhow::Result<()> {
let mut record = Record::new(test_record_params()?);
record.id = Some(37);
let mut also_record = Record::new(test_record_params()?);
also_record.id = Some(42);
assert_ne!(record, also_record);
Ok(())
}
proptest! {
#[test]
fn parse_language(s in "[a-z]{2,6}") {
let language = LanguageRecord::new(s.clone());
prop_assert!(language.primary().is_ok());
}
}
#[test]
fn can_parse_rfc3339_datetime() {
let dt = "2025-08-08T15:28:02-05:00";
let parsed = datetime::ClientDateTimeRecord::new(dt);
insta::assert_debug_snapshot!(parsed);
}
#[traced_test]
#[test]
fn malformed_datetime_traces_warning() {
let dt = "2025-08-11T21:49:15.031404172";
let _ = datetime::ClientDateTimeRecord::new(dt);
logs_assert(|lines: &[&str]| {
let line = lines
.first()
.ok_or_else(|| "No tracing lines".to_string())?;
let warning_start = line
.find("WARN")
.ok_or_else(|| "trace contains no warning".to_string())?;
let (_timestamp, warning) = line.split_at(warning_start);
insta::assert_snapshot!(warning);
Ok(())
});
}
#[test]
fn can_take_owned_document_from_record() -> anyhow::Result<()> {
let record_params = test_record_params()?;
let record = Record::new(record_params);
insta::assert_debug_snapshot!(record.into_document());
Ok(())
}
}