Skip to main content

microformats_types/
item.rs

1//! Microformat item types.
2
3use crate::{Class, Fragment, LanguageFilter, Properties, PropertyValue};
4use serde::de::{self, Visitor};
5use serde::ser::{SerializeMap, SerializeSeq};
6use std::collections::HashSet;
7use std::iter::FromIterator;
8use std::ops::Deref;
9use std::ops::DerefMut;
10
11/// The kind of value for an item's value property.
12#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
13#[serde(untagged, rename_all = "kebab-case")]
14pub enum ValueKind {
15    /// A URL value.
16    Url(url::Url),
17    /// A plain text value.
18    Plain(String),
19}
20
21impl Default for ValueKind {
22    fn default() -> Self {
23        Self::Plain(String::default())
24    }
25}
26
27/// A parsed microformat item.
28#[derive(serde::Serialize, serde::Deserialize, Default, PartialEq, Eq, Clone)]
29#[serde(rename_all = "kebab-case")]
30pub struct Item {
31    /// The microformat types (e.g., h-entry, h-card).
32    pub r#type: Vec<Class>,
33
34    /// The properties of this item.
35    #[serde(default, with = "referenced_properties")]
36    pub properties: Properties,
37
38    /// Child items nested within this item.
39    #[serde(
40        default,
41        with = "referenced_children",
42        skip_serializing_if = "referenced_children::is_empty"
43    )]
44    pub children: Items,
45
46    /// The HTML id attribute of the element.
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    pub id: Option<String>,
49
50    /// The language of this item.
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub lang: Option<String>,
53
54    /// The value property for value-class-pattern items.
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub value: Option<ValueKind>,
57}
58
59impl std::fmt::Debug for Item {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        f.debug_struct("Item")
62            .field("type", &self.r#type)
63            .field("id", &self.id)
64            .field("value", &self.value)
65            .field("lang", &self.lang)
66            .finish()
67    }
68}
69
70impl Item {
71    /// Creates a new item with the given types.
72    pub fn new(types: Vec<Class>) -> Self {
73        Item {
74            r#type: types,
75            ..Default::default()
76        }
77    }
78
79    /// Returns true if this item has no children and no types.
80    pub fn is_empty(&self) -> bool {
81        self.children.is_empty() && self.r#type.is_empty()
82    }
83
84    /// Removes all values for a property.
85    pub fn remove_whole_property(&mut self, property_name: &str) {
86        self.properties.remove(property_name);
87    }
88
89    /// Returns the content property values, if any.
90    pub fn content(&self) -> Option<Vec<PropertyValue>> {
91        self.properties.get("content").cloned()
92    }
93
94    /// Sets the content property to a fragment value.
95    pub fn set_content(&mut self, fragment: Fragment) {
96        self.properties.insert(
97            "content".to_string(),
98            vec![PropertyValue::Fragment(fragment)],
99        );
100    }
101
102    /// Appends a value to a property.
103    pub fn append_property(&mut self, property_name: &str, property_value: PropertyValue) {
104        let mut new_values = if let Some(values) = self.properties.get(property_name) {
105            values.to_vec()
106        } else {
107            Vec::default()
108        };
109
110        new_values.push(property_value);
111        self.properties.insert(property_name.to_owned(), new_values);
112    }
113
114    /// Returns true if this item contains nested microformats.
115    pub fn has_nested_microformats(&self) -> bool {
116        let has_nested_value_microformats = self
117            .properties
118            .values()
119            .flatten()
120            .any(|v| matches!(v, PropertyValue::Item(_)));
121
122        has_nested_value_microformats || !self.children.is_empty()
123    }
124
125    /// Returns nested items from property values.
126    pub fn nested_children(&self) -> Vec<Item> {
127        self.properties
128            .values()
129            .flatten()
130            .filter_map(|value| {
131                if let PropertyValue::Item(item) = value {
132                    Some(item)
133                } else {
134                    None
135                }
136            })
137            .cloned()
138            .collect::<Vec<_>>()
139    }
140
141    /// Returns the values for a property.
142    pub fn get_property(&self, arg: &str) -> Option<Vec<PropertyValue>> {
143        self.properties.get(arg).cloned()
144    }
145}
146
147impl TryFrom<serde_json::Map<String, serde_json::Value>> for Item {
148    type Error = crate::Error;
149
150    fn try_from(obj: serde_json::Map<String, serde_json::Value>) -> Result<Self, Self::Error> {
151        if !obj.contains_key("type") {
152            return Err(Self::Error::JsonObjectMissingProperty("type".to_string()));
153        }
154        if !obj.contains_key("properties") {
155            return Err(Self::Error::JsonObjectMissingProperty(
156                "properties".to_string(),
157            ));
158        }
159
160        serde_json::from_value(serde_json::Value::Object(obj)).map_err(Self::Error::JSON)
161    }
162}
163
164impl TryFrom<serde_json::Value> for Item {
165    type Error = crate::Error;
166
167    fn try_from(v: serde_json::Value) -> Result<Self, Self::Error> {
168        if let serde_json::Value::Object(o) = v {
169            Self::try_from(o)
170        } else {
171            Err(Self::Error::NotAnObject)
172        }
173    }
174}
175
176impl TryInto<serde_json::Value> for Item {
177    type Error = crate::Error;
178
179    fn try_into(self) -> Result<serde_json::Value, Self::Error> {
180        serde_json::to_value(self).map_err(crate::Error::JSON)
181    }
182}
183
184impl IntoIterator for Item {
185    type Item = Item;
186    type IntoIter = std::vec::IntoIter<Self::Item>;
187
188    fn into_iter(self) -> Self::IntoIter {
189        let mut items = self
190            .children
191            .iter()
192            .flat_map(|i| i.clone().into_iter())
193            .collect::<Vec<Self::Item>>();
194        items.push(self);
195        items.into_iter()
196    }
197}
198
199/// A collection of items.
200#[derive(Default, Debug, PartialEq, Eq, Clone)]
201pub struct Items(Vec<Item>);
202
203impl From<Vec<Item>> for Items {
204    fn from(value: Vec<Item>) -> Self {
205        Self(value)
206    }
207}
208
209impl Items {
210    /// Creates a new child item with the given types and adds it to the collection.
211    pub fn create_child_item(&mut self, types: &[Class]) -> Item {
212        let item = Item::new(types.to_vec());
213        self.0.push(item.to_owned());
214        item
215    }
216
217    /// Finds an item by ID.
218    pub fn get_by_id(&self, id: &str) -> Option<Item> {
219        self.iter()
220            .flat_map(|item| item.clone().into_iter())
221            .find(|item| item.id == Some(id.to_string()))
222            .clone()
223    }
224
225    /// Finds an item by URL value.
226    pub fn get_by_url(&self, url: &url::Url) -> Option<Item> {
227        self.iter()
228            .flat_map(|item| item.clone().into_iter())
229            .find(|item| item.value == Some(ValueKind::Url(url.to_owned())))
230            .clone()
231    }
232
233    /// Creates a new Items collection with pre-allocated capacity.
234    pub fn with_capacity(size_hint: usize) -> Items {
235        Items(Vec::with_capacity(size_hint))
236    }
237}
238
239impl DerefMut for Items {
240    fn deref_mut(&mut self) -> &mut Self::Target {
241        &mut self.0
242    }
243}
244
245impl Deref for Items {
246    type Target = Vec<Item>;
247
248    fn deref(&self) -> &Self::Target {
249        &self.0
250    }
251}
252
253mod referenced_properties {
254    use super::*;
255    use crate::{NodeList, Properties};
256
257    type Value = Properties;
258
259    struct PropertyVisitor;
260
261    #[derive(serde::Deserialize, Debug)]
262    #[serde(untagged)]
263    enum PotentialPropertyValue {
264        List(NodeList),
265        Value(PropertyValue),
266    }
267
268    impl<'de> Visitor<'de> for PropertyVisitor {
269        type Value = Value;
270
271        fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272            formatter.write_str("a map of properties with values that could be null, a string, a list of either strings, maps or both")
273        }
274
275        fn visit_map<A>(self, mut map_visitor: A) -> Result<Self::Value, A::Error>
276        where
277            A: de::MapAccess<'de>,
278        {
279            let mut property_map = Properties::default();
280
281            while let Some(key) = map_visitor.next_key()? {
282                let concrete_value: NodeList =
283                    match map_visitor.next_value::<PotentialPropertyValue>() {
284                        Ok(PotentialPropertyValue::List(values)) => values,
285                        Ok(PotentialPropertyValue::Value(node)) => vec![node],
286                        Err(_) => vec![],
287                    };
288
289                if let Some(values) = property_map.get_mut(&key) {
290                    values.extend(concrete_value);
291                } else {
292                    property_map.insert(key, concrete_value);
293                }
294            }
295
296            Ok(property_map)
297        }
298    }
299
300    pub fn serialize<S>(properties: &Value, serializer: S) -> Result<S::Ok, S::Error>
301    where
302        S: serde::ser::Serializer,
303    {
304        let mut properties_seq = serializer.serialize_map(Some(properties.len()))?;
305
306        for (key, value) in properties.iter() {
307            properties_seq.serialize_entry(key, value)?;
308        }
309
310        properties_seq.end()
311    }
312
313    pub fn deserialize<'de, D>(deserializer: D) -> Result<Value, D::Error>
314    where
315        D: serde::Deserializer<'de>,
316    {
317        deserializer.deserialize_map(PropertyVisitor)
318    }
319}
320
321mod referenced_children {
322    use super::*;
323
324    type Value = Items;
325
326    struct ChildrenVisitor;
327
328    impl<'de> Visitor<'de> for ChildrenVisitor {
329        type Value = Value;
330        fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331            formatter.write_str("expecting a list of children nodes, an empty list or null")
332        }
333
334        fn visit_seq<ChildrenSequenceAccessor>(
335            self,
336            mut seq: ChildrenSequenceAccessor,
337        ) -> Result<Self::Value, ChildrenSequenceAccessor::Error>
338        where
339            ChildrenSequenceAccessor: de::SeqAccess<'de>,
340        {
341            let size_hint = seq.size_hint().unwrap_or(0);
342            let mut children: Items = Items::with_capacity(size_hint);
343
344            while let Some(item) = seq.next_element()? {
345                children.push(item);
346            }
347
348            Ok(children)
349        }
350    }
351
352    #[allow(clippy::ptr_arg)]
353    pub fn serialize<S>(children: &Value, serializer: S) -> Result<S::Ok, S::Error>
354    where
355        S: serde::ser::Serializer,
356    {
357        let mut seq = serializer.serialize_seq(Some(children.deref().len()))?;
358        let safe_items = children
359            .iter()
360            .filter(|item| !item.is_empty())
361            .cloned()
362            .collect::<Vec<_>>();
363        for concrete_item in safe_items {
364            seq.serialize_element(&concrete_item)?;
365        }
366        seq.end()
367    }
368
369    pub fn deserialize<'de, D>(deserializer: D) -> Result<Value, D::Error>
370    where
371        D: serde::Deserializer<'de>,
372    {
373        deserializer.deserialize_seq(ChildrenVisitor)
374    }
375
376    pub fn is_empty(items: &Items) -> bool {
377        items.is_empty()
378    }
379}
380
381impl LanguageFilter for Item {
382    fn matches_languages(&self, languages: &HashSet<&str>) -> bool {
383        // Check if this item's language matches
384        if let Some(ref lang) = self.lang {
385            if languages.contains(lang.as_str()) {
386                return true;
387            }
388        }
389        // Check children recursively
390        self.children
391            .iter()
392            .any(|child| child.matches_languages(languages))
393    }
394
395    fn filter_by_languages_set(&self, languages: &HashSet<&str>) -> Option<Self> {
396        // Check if this item or its children match
397        if !self.matches_languages(languages) {
398            return None;
399        }
400
401        // Filter children recursively
402        let filtered_children: Vec<Item> = self
403            .children
404            .iter()
405            .filter_map(|child| child.filter_by_languages_set(languages))
406            .collect();
407
408        Some(Self {
409            r#type: self.r#type.clone(),
410            properties: self.properties.clone(),
411            children: filtered_children.into(),
412            id: self.id.clone(),
413            lang: self.lang.clone(),
414            value: self.value.clone(),
415        })
416    }
417}
418
419impl LanguageFilter for Items {
420    fn matches_languages(&self, languages: &HashSet<&str>) -> bool {
421        self.iter().any(|item| item.matches_languages(languages))
422    }
423
424    fn filter_by_languages_set(&self, languages: &HashSet<&str>) -> Option<Self> {
425        let filtered: Vec<Item> = self
426            .iter()
427            .filter_map(|item| item.filter_by_languages_set(languages))
428            .collect();
429        if filtered.is_empty() {
430            None
431        } else {
432            Some(Items::from(filtered))
433        }
434    }
435}
436
437impl FromIterator<Item> for Items {
438    fn from_iter<I: IntoIterator<Item = Item>>(iter: I) -> Self {
439        Items(iter.into_iter().collect())
440    }
441}
442
443#[cfg(test)]
444mod language_filter_tests {
445    use super::*;
446    use crate::{Class, Document, KnownClass, LanguageFilter, Properties};
447
448    /// Helper macro to check language matching
449    #[macro_export]
450    macro_rules! lang_matches {
451        ($item:expr, $($lang:expr),* $(,)?) => {{
452            let mut set = std::collections::HashSet::new();
453            $(
454                set.insert($lang);
455            )*
456            $item.matches_languages(&set)
457        }};
458    }
459
460    /// Helper function to create an Item with specified language and children
461    fn make_item(lang: Option<&str>, children: Vec<Item>) -> Item {
462        Item {
463            r#type: vec![Class::Known(KnownClass::Entry)],
464            properties: Properties::default(),
465            children: Items::from(children),
466            id: None,
467            lang: lang.map(|s| s.to_string()),
468            value: None,
469        }
470    }
471
472    #[test]
473    fn item_with_matching_lang_matches() {
474        let item = make_item(Some("en"), vec![]);
475        let languages: HashSet<&str> = vec!["en", "fr"].into_iter().collect();
476
477        assert!(item.matches_languages(&languages));
478    }
479
480    #[test]
481    fn item_with_non_matching_lang_does_not_match() {
482        let item = make_item(Some("de"), vec![]);
483        let languages: HashSet<&str> = vec!["en", "fr"].into_iter().collect();
484
485        assert!(!item.matches_languages(&languages));
486    }
487
488    #[test]
489    fn item_with_none_lang_matches_only_via_children() {
490        // Item with no lang only matches if children match
491        let child_with_en = make_item(Some("en"), vec![]);
492        let item = make_item(None, vec![child_with_en]);
493
494        let languages: HashSet<&str> = vec!["en"].into_iter().collect();
495        assert!(item.matches_languages(&languages));
496
497        // Item with no lang and no matching children doesn't match
498        let child_with_de = make_item(Some("de"), vec![]);
499        let item_no_match = make_item(None, vec![child_with_de]);
500
501        assert!(!item_no_match.matches_languages(&languages));
502    }
503
504    #[test]
505    fn items_filtering_works() {
506        let item_en = make_item(Some("en"), vec![]);
507        let item_fr = make_item(Some("fr"), vec![]);
508        let item_de = make_item(Some("de"), vec![]);
509
510        let items = Items::from(vec![item_en, item_fr, item_de]);
511        let languages: HashSet<&str> = vec!["en", "fr"].into_iter().collect();
512
513        let filtered = items.filter_by_languages_set(&languages);
514        assert!(filtered.is_some());
515
516        let filtered_items = filtered.unwrap();
517        assert_eq!(filtered_items.len(), 2);
518    }
519
520    #[test]
521    fn document_filtering_works() {
522        let item_en = make_item(Some("en"), vec![]);
523        let item_fr = make_item(Some("fr"), vec![]);
524        let item_de = make_item(Some("de"), vec![]);
525
526        let mut document = Document::default();
527        document.items = vec![item_en, item_fr, item_de];
528
529        let languages: HashSet<&str> = vec!["en"].into_iter().collect();
530
531        let filtered = document.filter_by_languages_set(&languages);
532        assert!(filtered.is_some());
533
534        let filtered_doc = filtered.unwrap();
535        assert_eq!(filtered_doc.items.len(), 1);
536    }
537
538    #[test]
539    fn from_iterator_collect_pattern() {
540        let items: Vec<Item> = vec![make_item(Some("en"), vec![]), make_item(Some("fr"), vec![])];
541
542        // Test FromIterator for Items
543        let collected: Items = items.into_iter().collect();
544        assert_eq!(collected.len(), 2);
545    }
546
547    #[test]
548    fn lang_matches_macro_works() {
549        let item_en = make_item(Some("en"), vec![]);
550        let item_fr = make_item(Some("fr"), vec![]);
551        let item_none = make_item(None, vec![]);
552
553        // Test single language match
554        assert!(lang_matches!(item_en, "en"));
555        assert!(!lang_matches!(item_fr, "en"));
556
557        // Test multiple languages
558        assert!(lang_matches!(item_en, "en", "fr", "de"));
559        assert!(lang_matches!(item_fr, "en", "fr", "de"));
560
561        // Item with None lang doesn't match any language filter directly
562        assert!(!lang_matches!(item_none, "en"));
563    }
564
565    #[test]
566    fn recursive_child_filtering() {
567        // Create nested structure: parent -> child -> grandchild
568        let grandchild = make_item(Some("de"), vec![]);
569        let child = make_item(Some("en"), vec![grandchild]);
570        let parent = make_item(None, vec![child]);
571
572        // Filter for "en" - should keep parent (because child matches), child, but remove grandchild
573        let languages: HashSet<&str> = vec!["en"].into_iter().collect();
574        let filtered = parent.filter_by_languages_set(&languages);
575
576        assert!(filtered.is_some());
577        let filtered_parent = filtered.unwrap();
578        assert_eq!(filtered_parent.children.len(), 1);
579
580        let filtered_child = &filtered_parent.children[0];
581        // Grandchild with "de" should be filtered out
582        assert!(filtered_child.children.is_empty());
583    }
584
585    #[test]
586    fn empty_language_list_filters_everything() {
587        let item_with_lang = make_item(Some("en"), vec![]);
588        let item_without_lang = make_item(None, vec![]);
589
590        let empty_languages: HashSet<&str> = HashSet::new();
591
592        // No item should match an empty language set
593        assert!(!item_with_lang.matches_languages(&empty_languages));
594        assert!(!item_without_lang.matches_languages(&empty_languages));
595
596        // Filtering should return None
597        assert!(
598            item_with_lang
599                .filter_by_languages_set(&empty_languages)
600                .is_none()
601        );
602        assert!(
603            item_without_lang
604                .filter_by_languages_set(&empty_languages)
605                .is_none()
606        );
607    }
608
609    #[test]
610    fn item_without_lang_and_children_does_not_match() {
611        let item = make_item(None, vec![]);
612        let languages: HashSet<&str> = vec!["en"].into_iter().collect();
613
614        assert!(!item.matches_languages(&languages));
615    }
616
617    #[test]
618    fn filter_preserves_item_properties() {
619        let mut item = make_item(Some("en"), vec![]);
620        item.id = Some("test-id".to_string());
621        item.properties.insert(
622            "name".to_string(),
623            vec![crate::PropertyValue::Plain(crate::TextValue::new(
624                "Test".to_string(),
625            ))],
626        );
627
628        let languages: HashSet<&str> = vec!["en"].into_iter().collect();
629        let filtered = item.filter_by_languages_set(&languages);
630
631        assert!(filtered.is_some());
632        let filtered_item = filtered.unwrap();
633        assert_eq!(filtered_item.id, Some("test-id".to_string()));
634        assert!(filtered_item.properties.contains_key("name"));
635    }
636}