1use chrono::{DateTime, FixedOffset, Local, Utc};
3use oxilangtag::LanguageTag;
4use std::fmt;
5use thiserror::Error;
6use tracing::instrument;
7use url::Url;
8
9use crate::Document;
10use datetime::ClientDateTimeRecord;
11
12#[derive(Debug, Error)]
14#[error(transparent)]
15pub struct RecordError(#[from] RecordErrorRepr);
16
17#[derive(Debug, Error)]
19enum RecordErrorRepr {
20 #[error("Error getting the record data: {0}")]
22 Data(#[from] RecordDataError),
23}
24
25#[derive(Debug, Error)]
27pub enum RecordDataError {
28 #[error("Couldn't parse the URL {raw}: {msg}")]
30 UrlParse {
31 raw: String,
33
34 msg: String,
36 },
37
38 #[error("Couldn't parse the language")]
40 LanguageParse {
41 raw: String,
43
44 msg: String,
46 },
47}
48
49impl RecordDataError {
50 #[must_use]
52 pub fn raw_data(&self) -> &str {
53 match self {
54 Self::UrlParse { raw, .. } | Self::LanguageParse { raw, .. } => raw.as_str(),
55 }
56 }
57}
58
59type DataResult<T> = std::result::Result<T, RecordDataError>;
62
63#[derive(Debug, Clone, PartialEq)]
65#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
66pub enum LanguageRecord {
67 Parsed(LanguageTag<String>),
69
70 Raw {
72 value: String,
74
75 error_msg: String,
77 },
78}
79
80impl LanguageRecord {
81 pub fn new(language_tag: String) -> Self {
84 language_tag.parse::<LanguageTag<_>>().map_or_else(
85 |error| Self::Raw {
86 value: language_tag,
87 error_msg: error.to_string(),
88 },
89 Self::Parsed,
90 )
91 }
92
93 pub fn primary(&self) -> DataResult<String> {
103 match self {
104 LanguageRecord::Parsed(language_tag) => Ok(language_tag.primary_language().to_string()),
105 LanguageRecord::Raw { value, error_msg } => Err(RecordDataError::LanguageParse {
106 raw: value.to_owned(),
107 msg: error_msg.to_owned(),
108 }),
109 }
110 }
111}
112
113impl fmt::Display for LanguageRecord {
114 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115 match self.primary() {
116 Ok(tag) => write!(f, "{tag}"),
117 Err(RecordDataError::LanguageParse { raw, .. }) => write!(f, "{raw}"),
118 Err(_) => unreachable!("there are no other possible error states"),
119 }
120 }
121}
122
123#[derive(Debug, PartialEq, Clone)]
125#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
126enum UrlRecord {
127 Parsed(Url),
129 Raw(String, String),
131}
132
133impl UrlRecord {
134 fn new(url: &str) -> Self {
137 url.parse::<Url>()
138 .map_or_else(|e| Self::Raw(url.to_string(), e.to_string()), Self::Parsed)
139 }
140
141 fn as_str(&self) -> &str {
145 match self {
146 Self::Parsed(url) => url.as_str(),
147 Self::Raw(raw, _) => raw.as_str(),
148 }
149 }
150}
151
152pub mod datetime {
154 use std::fmt::{Display, Formatter};
155
156 use chrono::{DateTime, Datelike, FixedOffset, Local};
157 use serde_with::serde_as;
158 use tracing::instrument;
159
160 #[serde_as]
162 #[derive(Debug, PartialEq, Clone)]
163 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
164 pub enum ClientDateTimeRecord {
165 Parsed(DateTime<FixedOffset>),
167
168 Interpolated {
171 value: DateTime<FixedOffset>,
173 raw: String,
175 },
176 }
177
178 impl ClientDateTimeRecord {
179 #[instrument]
182 pub fn new(local_datetime_client: &str) -> Self {
183 DateTime::parse_from_rfc3339(local_datetime_client)
184 .inspect_err(|e| tracing::warn!("couldn't parse {local_datetime_client}: {e}"))
185 .map_or_else(
186 |_| Self::Interpolated {
187 value: Local::now().into(),
188 raw: local_datetime_client.to_string(),
189 },
190 Self::Parsed,
191 )
192 }
193
194 #[must_use]
198 pub fn year(&self) -> i32 {
199 match self {
200 Self::Parsed(dt) => dt.year(),
201 Self::Interpolated { value, .. } => value.year(),
202 }
203 }
204 }
205
206 impl Display for ClientDateTimeRecord {
207 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
208 let dt = match self {
209 Self::Parsed(dt) => dt,
210 Self::Interpolated { value, .. } => value,
211 };
212 write!(f, "{}", dt.to_rfc3339())
213 }
214 }
215}
216
217#[derive(Debug)]
219#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
220pub struct RecordParams {
221 pub id: Option<i64>,
223
224 pub url: String,
226
227 pub title: String,
229
230 pub client_datetime: String,
232
233 pub timestamp_flora: Option<i64>,
235
236 pub document: Document,
238
239 pub language: Option<String>,
241
242 pub updated_at: Option<i64>,
244}
245
246impl From<Record> for RecordParams {
247 fn from(rec: Record) -> Self {
248 let Record {
249 id,
250 url,
251 title,
252 client_datetime,
253 timestamp_flora,
254 language,
255 document,
256 updated_at,
257 } = rec;
258 let url = url.as_str().to_string();
259 let language = language.map(|lang| lang.to_string());
260 let client_datetime = client_datetime.to_string();
262
263 Self {
264 id,
265 url,
266 title,
267 client_datetime,
268 timestamp_flora: Some(timestamp_flora),
269 document,
270 language,
271 updated_at,
272 }
273 }
274}
275
276#[must_use]
278#[derive(Debug)]
279#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
280pub struct Record {
281 id: Option<i64>,
284
285 url: UrlRecord,
287
288 title: String,
290
291 client_datetime: ClientDateTimeRecord,
293
294 timestamp_flora: i64,
296
297 language: Option<LanguageRecord>,
299
300 document: Document,
302
303 updated_at: Option<i64>,
305}
306
307impl Record {
308 #[instrument]
310 pub fn new(params: RecordParams) -> Self {
311 let RecordParams {
312 id,
313 url,
314 title,
315 client_datetime,
316 timestamp_flora,
317 language,
318 document,
319 updated_at,
320 } = params;
321 let url = UrlRecord::new(&url);
322 let client_datetime = ClientDateTimeRecord::new(client_datetime.as_str());
323 let language = language.map(LanguageRecord::new);
324 let timestamp_flora = timestamp_flora.unwrap_or_else(|| Local::now().timestamp());
325
326 Self {
327 id,
328 url,
329 title,
330 client_datetime,
331 timestamp_flora,
332 language,
333 document,
334 updated_at,
335 }
336 }
337
338 #[must_use]
340 pub fn id(&self) -> Option<i64> {
341 self.id
342 }
343
344 #[must_use]
346 pub fn url(&self) -> &str {
347 self.url.as_str()
348 }
349
350 #[must_use]
353 pub fn title(&self) -> &str {
354 &self.title
355 }
356
357 #[must_use]
360 pub fn client_datetime(&self) -> &ClientDateTimeRecord {
361 &self.client_datetime
362 }
363
364 #[must_use]
366 pub fn timestamp_archived(&self) -> i64 {
367 self.timestamp_flora
368 }
369
370 #[must_use]
372 pub fn updated_at(&self) -> Option<i64> {
373 self.updated_at
374 }
375
376 pub fn with_updated_at_now(mut self) -> Self {
383 self.updated_at = Some(Utc::now().timestamp());
384 self
385 }
386
387 pub fn host(&self) -> DataResult<Option<String>> {
392 match &self.url {
393 UrlRecord::Parsed(url) => Ok(url.host().map(|h| h.to_string())),
394 UrlRecord::Raw(raw, error) => Err(RecordDataError::UrlParse {
395 raw: raw.clone(),
396 msg: error.clone(),
397 }),
398 }
399 }
400
401 #[must_use]
403 pub fn language(&self) -> Option<&LanguageRecord> {
404 self.language.as_ref()
405 }
406
407 #[must_use]
409 pub fn document(&self) -> &Document {
410 &self.document
411 }
412
413 #[must_use]
416 pub fn into_document(self) -> Document {
417 self.document
418 }
419}
420
421impl PartialEq for Record {
422 fn eq(&self, other: &Self) -> bool {
426 if let (Some(this_id), Some(other_id)) = (self.id, other.id)
427 && this_id != other_id
428 {
429 return false;
430 }
431
432 self.url == other.url
435 && self.client_datetime == other.client_datetime
436 && self.timestamp_flora == other.timestamp_flora
437 }
438}
439
440pub trait ToRecord {
442 type Error: std::error::Error;
444
445 fn to_record(&self) -> std::result::Result<Record, Self::Error>;
450}
451
452impl<T> ToRecord for &T
453where
454 T: ToRecord,
455{
456 type Error = <T as ToRecord>::Error;
457
458 fn to_record(&self) -> std::result::Result<Record, Self::Error> {
459 (*self).to_record()
460 }
461}
462
463#[derive(Debug, Clone)]
467#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
468pub struct RecordMeta {
469 pub id: i64,
471
472 pub url: String,
474
475 pub title: String,
477
478 pub client_datetime: ClientDateTimeRecord,
480
481 pub timestamp_flora: i64,
483
484 pub language: Option<LanguageRecord>,
486
487 pub updated_at: Option<i64>,
489}
490
491#[derive(Debug)]
496#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
497pub struct RawRecord {
498 pub id: i64,
500
501 pub url: String,
503
504 pub client_datetime: DateTime<FixedOffset>,
506
507 pub client_datetime_raw: Option<String>,
509
510 pub timestamp_flora: i64,
512
513 pub title: String,
515
516 pub language: Option<String>,
518
519 pub document: Vec<u8>,
521
522 pub updated_at: Option<i64>,
524}
525
526#[cfg(test)]
527mod tests {
528 use super::*;
529
530 use pretty_assertions::{assert_eq, assert_ne};
531 use proptest::prelude::*;
532 use tracing_test::traced_test;
533
534 use crate::document::tests::load_test_web_doc;
535
536 fn test_record_params() -> anyhow::Result<RecordParams> {
537 let document = load_test_web_doc()?;
538 let url = "https://example.org/".to_string();
539 let title = "Example Domain";
540 let client_datetime = "2024-10-11T13:49:46-05:00";
541
542 #[allow(clippy::unreadable_literal)]
543 let timestamp_flora = Some(1728695243i64);
544 let language = Some("en".to_string());
545
546 Ok(RecordParams {
547 id: None,
548 url,
549 title: title.to_string(),
550 client_datetime: client_datetime.to_string(),
551 timestamp_flora,
552 language,
553 document,
554 updated_at: None,
555 })
556 }
557
558 #[test]
559 fn can_create_record() -> anyhow::Result<()> {
560 let record_params = test_record_params()?;
561 let record = Record::new(record_params);
562 insta::assert_debug_snapshot!(record);
563 Ok(())
564 }
565
566 #[test]
567 fn record_with_some_id_equals_record_with_none_id() -> anyhow::Result<()> {
568 let record = Record::new(test_record_params()?);
569 let mut also_record = Record::new(test_record_params()?);
570 also_record.id = Some(37);
571 assert_eq!(record, also_record);
572 Ok(())
573 }
574
575 #[test]
576 fn records_with_different_ids_compare_unequal() -> anyhow::Result<()> {
577 let mut record = Record::new(test_record_params()?);
578 record.id = Some(37);
579 let mut also_record = Record::new(test_record_params()?);
580 also_record.id = Some(42);
581 assert_ne!(record, also_record);
582 Ok(())
583 }
584
585 proptest! {
586 #[test]
587 fn parse_language(s in "[a-z]{2,6}") {
588 let language = LanguageRecord::new(s.clone());
590 prop_assert!(language.primary().is_ok());
591 }
592 }
593
594 #[test]
595 fn can_parse_rfc3339_datetime() {
596 let dt = "2025-08-08T15:28:02-05:00";
597 let parsed = datetime::ClientDateTimeRecord::new(dt);
598 insta::assert_debug_snapshot!(parsed);
599 }
600
601 #[traced_test]
602 #[test]
603 fn malformed_datetime_traces_warning() {
604 let dt = "2025-08-11T21:49:15.031404172";
605 let _ = datetime::ClientDateTimeRecord::new(dt);
606
607 logs_assert(|lines: &[&str]| {
608 let line = lines
609 .first()
610 .ok_or_else(|| "No tracing lines".to_string())?;
611 let warning_start = line
612 .find("WARN")
613 .ok_or_else(|| "trace contains no warning".to_string())?;
614
615 let (_timestamp, warning) = line.split_at(warning_start);
616 insta::assert_snapshot!(warning);
617 Ok(())
618 });
619 }
620
621 #[test]
622 fn can_take_owned_document_from_record() -> anyhow::Result<()> {
623 let record_params = test_record_params()?;
624 let record = Record::new(record_params);
625 insta::assert_debug_snapshot!(record.into_document());
626 Ok(())
627 }
628}