Skip to main content

microformats_types/
lib.rs

1//! Core types for Microformats2 parsing.
2//!
3//! This crate provides the fundamental data structures for representing
4//! Microformats2 parsed documents, items, properties, and values.
5//!
6//! # Features
7//!
8//! - `metaformats` - Enable metaformats backcompat support for Open Graph and Twitter Cards
9//! - `debug_flow` - Enable source tracking for debugging parsed values
10//! - `per_element_lang` - Enable per-element language tracking for TextValue and UrlValue
11
12pub mod class;
13#[cfg(feature = "debug_flow")]
14mod debug_types;
15pub mod document;
16pub mod error;
17pub mod item;
18pub mod property_value;
19/// Link relation types.
20pub mod relation;
21pub mod temporal;
22pub mod traits;
23
24pub use class::{Class, KnownClass};
25pub use document::Document;
26pub use error::Error;
27pub use item::{Item, Items, ValueKind};
28pub use property_value::{
29    Fragment, Image, NodeList, Properties, PropertyValue, PropertyWithMetadata,
30};
31pub use relation::{Relation, Relations};
32pub use traits::{FindItemById, FindItemByProperty, FindItemByUrl, LanguageFilter};
33
34#[cfg(feature = "debug_flow")]
35pub use debug_types::*;
36
37/// Alias for [`Item`], representing a parsed microformat.
38pub type Microformat = Item;
39
40pub use url::Url;
41
42#[cfg(test)]
43mod test;
44
45/// A text value with optional language information.
46///
47/// When the `per_element_lang` feature is enabled, this struct can carry
48/// a language tag for the text. Without the feature, it serializes
49/// transparently as a plain string.
50#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
51pub struct TextValue {
52    value: String,
53    #[cfg(feature = "per_element_lang")]
54    lang: Option<String>,
55}
56
57impl TextValue {
58    /// Creates a new TextValue from a string.
59    pub fn new(value: String) -> Self {
60        Self {
61            value,
62            #[cfg(feature = "per_element_lang")]
63            lang: None,
64        }
65    }
66
67    /// Creates a new TextValue with a language tag.
68    #[cfg(feature = "per_element_lang")]
69    pub fn with_lang(value: String, lang: impl Into<String>) -> Self {
70        Self {
71            value,
72            lang: Some(lang.into()),
73        }
74    }
75
76    /// Returns the language tag if present.
77    #[cfg(feature = "per_element_lang")]
78    pub fn lang(&self) -> Option<&str> {
79        self.lang.as_deref()
80    }
81}
82
83impl std::ops::Deref for TextValue {
84    type Target = String;
85    fn deref(&self) -> &Self::Target {
86        &self.value
87    }
88}
89
90impl From<String> for TextValue {
91    fn from(value: String) -> Self {
92        Self::new(value)
93    }
94}
95
96impl From<&str> for TextValue {
97    fn from(value: &str) -> Self {
98        Self::new(value.to_string())
99    }
100}
101
102impl std::fmt::Display for TextValue {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        self.value.fmt(f)
105    }
106}
107
108// Serialize TextValue transparently as a string when per_element_lang is NOT enabled
109#[cfg(not(feature = "per_element_lang"))]
110impl serde::Serialize for TextValue {
111    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
112    where
113        S: serde::Serializer,
114    {
115        serializer.serialize_str(&self.value)
116    }
117}
118
119// Serialize TextValue - as plain string when no lang, as object when lang is present
120#[cfg(feature = "per_element_lang")]
121impl serde::Serialize for TextValue {
122    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
123    where
124        S: serde::Serializer,
125    {
126        if let Some(ref lang) = self.lang {
127            // Has language: serialize as object
128            use serde::ser::SerializeStruct;
129            let mut s = serializer.serialize_struct("TextValue", 2)?;
130            s.serialize_field("value", &self.value)?;
131            s.serialize_field("lang", lang)?;
132            s.end()
133        } else {
134            // No language: serialize as plain string for backward compatibility
135            serializer.serialize_str(&self.value)
136        }
137    }
138}
139
140// Deserialize TextValue from a string when per_element_lang is NOT enabled
141#[cfg(not(feature = "per_element_lang"))]
142impl<'de> serde::Deserialize<'de> for TextValue {
143    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
144    where
145        D: serde::Deserializer<'de>,
146    {
147        let s = String::deserialize(deserializer)?;
148        Ok(TextValue::new(s))
149    }
150}
151
152// Deserialize TextValue from either a string or an object when per_element_lang IS enabled
153#[cfg(feature = "per_element_lang")]
154impl<'de> serde::Deserialize<'de> for TextValue {
155    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
156    where
157        D: serde::Deserializer<'de>,
158    {
159        use serde::de::{self, MapAccess, Visitor};
160        use std::fmt;
161
162        struct TextValueVisitor;
163
164        impl<'de> Visitor<'de> for TextValueVisitor {
165            type Value = TextValue;
166
167            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
168                formatter
169                    .write_str("a string or an object with \"value\" and optional \"lang\" fields")
170            }
171
172            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
173            where
174                E: de::Error,
175            {
176                Ok(TextValue::new(value.to_string()))
177            }
178
179            fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
180            where
181                E: de::Error,
182            {
183                Ok(TextValue::new(value))
184            }
185
186            fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
187            where
188                M: MapAccess<'de>,
189            {
190                let mut value: Option<String> = None;
191                let mut lang: Option<String> = None;
192
193                while let Some(key) = map.next_key()? {
194                    match key {
195                        "value" => {
196                            if value.is_some() {
197                                return Err(de::Error::duplicate_field("value"));
198                            }
199                            value = Some(map.next_value()?);
200                        }
201                        "lang" => {
202                            if lang.is_some() {
203                                return Err(de::Error::duplicate_field("lang"));
204                            }
205                            lang = Some(map.next_value()?);
206                        }
207                        _ => {
208                            return Err(de::Error::unknown_field(key, &["value", "lang"]));
209                        }
210                    }
211                }
212
213                let value = value.ok_or_else(|| de::Error::missing_field("value"))?;
214                Ok(TextValue { value, lang })
215            }
216        }
217
218        deserializer.deserialize_any(TextValueVisitor)
219    }
220}
221
222/// A URL value with optional language information.
223///
224/// When the `per_element_lang` feature is enabled, this struct can carry
225/// a language tag for the URL. Without the feature, it serializes
226/// transparently as a plain URL string.
227#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
228pub struct UrlValue {
229    value: url::Url,
230    #[cfg(feature = "per_element_lang")]
231    lang: Option<String>,
232}
233
234impl UrlValue {
235    /// Creates a new UrlValue from a URL.
236    pub fn new(value: url::Url) -> Self {
237        Self {
238            value,
239            #[cfg(feature = "per_element_lang")]
240            lang: None,
241        }
242    }
243
244    /// Creates a new UrlValue with a language tag.
245    #[cfg(feature = "per_element_lang")]
246    pub fn with_lang(value: url::Url, lang: impl Into<String>) -> Self {
247        Self {
248            value,
249            lang: Some(lang.into()),
250        }
251    }
252
253    /// Returns the language tag if present.
254    #[cfg(feature = "per_element_lang")]
255    pub fn lang(&self) -> Option<&str> {
256        self.lang.as_deref()
257    }
258}
259
260impl std::ops::Deref for UrlValue {
261    type Target = url::Url;
262    fn deref(&self) -> &Self::Target {
263        &self.value
264    }
265}
266
267impl From<url::Url> for UrlValue {
268    fn from(value: url::Url) -> Self {
269        Self::new(value)
270    }
271}
272
273impl std::fmt::Display for UrlValue {
274    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275        self.value.fmt(f)
276    }
277}
278
279// Serialize UrlValue transparently as a URL string when per_element_lang is NOT enabled
280#[cfg(not(feature = "per_element_lang"))]
281impl serde::Serialize for UrlValue {
282    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
283    where
284        S: serde::Serializer,
285    {
286        serializer.serialize_str(self.value.as_str())
287    }
288}
289
290// Serialize UrlValue - as plain string when no lang, as object when lang is present
291#[cfg(feature = "per_element_lang")]
292impl serde::Serialize for UrlValue {
293    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
294    where
295        S: serde::Serializer,
296    {
297        if let Some(ref lang) = self.lang {
298            // Has language: serialize as object
299            use serde::ser::SerializeStruct;
300            let mut s = serializer.serialize_struct("UrlValue", 2)?;
301            s.serialize_field("value", self.value.as_str())?;
302            s.serialize_field("lang", lang)?;
303            s.end()
304        } else {
305            // No language: serialize as plain string for backward compatibility
306            serializer.serialize_str(self.value.as_str())
307        }
308    }
309}
310
311// Deserialize UrlValue from a URL string when per_element_lang is NOT enabled
312#[cfg(not(feature = "per_element_lang"))]
313impl<'de> serde::Deserialize<'de> for UrlValue {
314    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
315    where
316        D: serde::Deserializer<'de>,
317    {
318        let s = String::deserialize(deserializer)?;
319        let url = url::Url::parse(&s).map_err(serde::de::Error::custom)?;
320        Ok(UrlValue::new(url))
321    }
322}
323
324// Deserialize UrlValue from either a URL string or an object when per_element_lang IS enabled
325#[cfg(feature = "per_element_lang")]
326impl<'de> serde::Deserialize<'de> for UrlValue {
327    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
328    where
329        D: serde::Deserializer<'de>,
330    {
331        use serde::de::{self, MapAccess, Visitor};
332        use std::fmt;
333
334        struct UrlValueVisitor;
335
336        impl<'de> Visitor<'de> for UrlValueVisitor {
337            type Value = UrlValue;
338
339            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
340                formatter.write_str(
341                    "a URL string or an object with \"value\" and optional \"lang\" fields",
342                )
343            }
344
345            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
346            where
347                E: de::Error,
348            {
349                let url = url::Url::parse(value).map_err(de::Error::custom)?;
350                Ok(UrlValue::new(url))
351            }
352
353            fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
354            where
355                E: de::Error,
356            {
357                let url = url::Url::parse(&value).map_err(de::Error::custom)?;
358                Ok(UrlValue::new(url))
359            }
360
361            fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
362            where
363                M: MapAccess<'de>,
364            {
365                let mut value: Option<String> = None;
366                let mut lang: Option<String> = None;
367
368                while let Some(key) = map.next_key()? {
369                    match key {
370                        "value" => {
371                            if value.is_some() {
372                                return Err(de::Error::duplicate_field("value"));
373                            }
374                            value = Some(map.next_value()?);
375                        }
376                        "lang" => {
377                            if lang.is_some() {
378                                return Err(de::Error::duplicate_field("lang"));
379                            }
380                            lang = Some(map.next_value()?);
381                        }
382                        _ => {
383                            return Err(de::Error::unknown_field(key, &["value", "lang"]));
384                        }
385                    }
386                }
387
388                let value_str = value.ok_or_else(|| de::Error::missing_field("value"))?;
389                let url = url::Url::parse(&value_str).map_err(de::Error::custom)?;
390                Ok(UrlValue { value: url, lang })
391            }
392        }
393
394        deserializer.deserialize_any(UrlValueVisitor)
395    }
396}
397
398#[cfg(test)]
399mod test_per_element_lang {
400    use super::*;
401
402    #[test]
403    #[cfg(feature = "per_element_lang")]
404    fn text_value_without_lang_serializes_as_string() {
405        let tv = TextValue::new("hello".to_string());
406        // Without lang, it should serialize as a plain string for backward compatibility
407        assert_eq!(serde_json::to_string(&tv).unwrap(), r#""hello""#);
408    }
409
410    #[test]
411    #[cfg(feature = "per_element_lang")]
412    fn text_value_with_lang_serializes_as_object() {
413        let tv = TextValue::with_lang("hello".to_string(), "en");
414        let json = serde_json::to_string(&tv).unwrap();
415        assert!(json.contains(r#""value":"hello""#));
416        assert!(json.contains(r#""lang":"en""#));
417    }
418
419    #[test]
420    #[cfg(feature = "per_element_lang")]
421    fn text_value_deserializes_string() {
422        let tv: TextValue = serde_json::from_str("\"hello\"").unwrap();
423        assert_eq!(&*tv, "hello");
424        assert_eq!(tv.lang(), None);
425    }
426
427    #[test]
428    #[cfg(feature = "per_element_lang")]
429    fn text_value_deserializes_object() {
430        let tv: TextValue = serde_json::from_str("{\"value\":\"hello\",\"lang\":\"en\"}").unwrap();
431        assert_eq!(&*tv, "hello");
432        assert_eq!(tv.lang(), Some("en"));
433    }
434
435    #[test]
436    #[cfg(feature = "per_element_lang")]
437    fn url_value_without_lang_serializes_as_string() {
438        let uv = UrlValue::new("https://example.com".parse().unwrap());
439        // Without lang, it should serialize as a plain string for backward compatibility
440        assert_eq!(
441            serde_json::to_string(&uv).unwrap(),
442            r#""https://example.com/""#
443        );
444    }
445
446    #[test]
447    #[cfg(feature = "per_element_lang")]
448    fn url_value_with_lang_serializes_as_object() {
449        let uv = UrlValue::with_lang("https://example.com".parse().unwrap(), "en");
450        let json = serde_json::to_string(&uv).unwrap();
451        assert!(json.contains(r#""value":"https://example.com/""#));
452        assert!(json.contains(r#""lang":"en""#));
453    }
454
455    #[test]
456    #[cfg(feature = "per_element_lang")]
457    fn text_value_deref_works() {
458        let tv = TextValue::new("hello".to_string());
459        assert_eq!(tv.len(), 5); // Deref to String
460        assert_eq!(&tv[..], "hello");
461    }
462
463    #[test]
464    #[cfg(feature = "per_element_lang")]
465    fn url_value_deref_works() {
466        let uv = UrlValue::new("https://example.com/path".parse().unwrap());
467        assert_eq!(uv.path(), "/path"); // Deref to Url
468    }
469}