1use std::borrow::Cow;
28
29use ego_tree::NodeRef;
30use indexmap::IndexMap;
31use scraper::node::Node;
32use scraper::Html;
33
34use crate::error::{ExtractionError, ExtractionWarning, WarningCode};
35use crate::types::{SchemaNode, SchemaValue, SourceFormat};
36
37use super::{classify_text_value, strip_schema_prefix, ExtractionOutput, Extractor};
38
39const MAX_DEPTH: usize = 20;
41
42pub struct RdfaLiteExtractor;
59
60impl Extractor for RdfaLiteExtractor {
61 fn extract(&self, html: &str) -> Result<ExtractionOutput, ExtractionError> {
62 let document = Html::parse_document(html);
63 self.extract_from_document(&document)
64 }
65}
66
67impl RdfaLiteExtractor {
68 pub fn extract_from_document(
75 &self,
76 document: &Html,
77 ) -> Result<ExtractionOutput, ExtractionError> {
78 let mut warnings = Vec::new();
79 let mut nodes = Vec::new();
80
81 let context = RdfaContext {
82 vocab: None,
83 prefixes: IndexMap::new(),
84 };
85
86 for child in document.tree.root().children() {
88 walk_dom(child, &context, &mut nodes, &mut warnings, 0);
89 }
90
91 Ok(ExtractionOutput { nodes, warnings })
92 }
93}
94
95#[derive(Debug, Clone)]
97struct RdfaContext {
98 vocab: Option<String>,
100 prefixes: IndexMap<String, String>,
102}
103
104impl RdfaContext {
105 fn updated(&self, el: &scraper::node::Element) -> Option<Self> {
108 let has_vocab = el.attr("vocab").is_some();
109 let has_prefix = el.attr("prefix").is_some();
110
111 if !has_vocab && !has_prefix {
112 return None;
113 }
114
115 let mut ctx = self.clone();
116
117 if let Some(vocab) = el.attr("vocab") {
118 ctx.vocab = if vocab.is_empty() {
119 None
120 } else {
121 Some(ensure_trailing_slash(vocab))
122 };
123 }
124
125 if let Some(prefix_attr) = el.attr("prefix") {
126 parse_prefix_attr(prefix_attr, &mut ctx.prefixes);
127 }
128
129 Some(ctx)
130 }
131
132 fn resolve_term(&self, term: &str) -> String {
134 let stripped = strip_schema_prefix(term);
136 if matches!(stripped, Cow::Owned(_)) {
137 return stripped.into_owned();
138 }
139
140 if let Some(colon_pos) = term.find(':') {
142 let prefix = &term[..colon_pos];
143 let local = &term[colon_pos + 1..];
144 if let Some(ns_uri) = self.prefixes.get(prefix) {
145 let full = format!("{ns_uri}{local}");
146 return strip_schema_prefix(&full).into_owned();
147 }
148 }
149
150 term.to_string()
151 }
152}
153
154fn walk_dom(
156 node: NodeRef<'_, Node>,
157 parent_ctx: &RdfaContext,
158 nodes: &mut Vec<SchemaNode>,
159 warnings: &mut Vec<ExtractionWarning>,
160 depth: usize,
161) {
162 if depth > MAX_DEPTH {
163 return;
164 }
165
166 let Some(el) = node.value().as_element() else {
167 for child in node.children() {
169 walk_dom(child, parent_ctx, nodes, warnings, depth);
170 }
171 return;
172 };
173
174 let updated_ctx = parent_ctx.updated(el);
175 let ctx = updated_ctx.as_ref().unwrap_or(parent_ctx);
176
177 if let Some(typeof_attr) = el.attr("typeof") {
179 let types: Vec<String> = typeof_attr
180 .split_whitespace()
181 .map(|t| ctx.resolve_term(t))
182 .collect();
183
184 if types.is_empty() {
185 warnings.push(ExtractionWarning {
186 message: "RDFa typeof attribute is empty".into(),
187 source_location: None,
188 code: WarningCode::EmptyType,
189 });
190 }
191
192 let mut properties: IndexMap<String, Vec<SchemaValue>> = IndexMap::new();
193
194 if let Some(resource) = el.attr("resource") {
196 properties
197 .entry("@id".into())
198 .or_default()
199 .push(classify_text_value(resource));
200 }
201
202 collect_rdfa_properties(node, ctx, &mut properties, warnings, depth + 1);
204
205 let schema_node = SchemaNode {
206 types,
207 properties,
208 source_format: SourceFormat::RdfaLite,
209 source_location: None,
210 };
211
212 nodes.push(schema_node);
213 return; }
215
216 for child in node.children() {
218 walk_dom(child, ctx, nodes, warnings, depth + 1);
219 }
220}
221
222fn collect_rdfa_properties(
224 node: NodeRef<'_, Node>,
225 ctx: &RdfaContext,
226 properties: &mut IndexMap<String, Vec<SchemaValue>>,
227 warnings: &mut Vec<ExtractionWarning>,
228 depth: usize,
229) {
230 if depth > MAX_DEPTH {
231 return;
232 }
233
234 for child in node.children() {
235 visit_for_rdfa_props(child, ctx, properties, warnings, depth);
236 }
237}
238
239fn visit_for_rdfa_props(
241 node: NodeRef<'_, Node>,
242 parent_ctx: &RdfaContext,
243 properties: &mut IndexMap<String, Vec<SchemaValue>>,
244 warnings: &mut Vec<ExtractionWarning>,
245 depth: usize,
246) {
247 if depth > MAX_DEPTH {
248 return;
249 }
250
251 let Some(el) = node.value().as_element() else {
252 return;
253 };
254
255 let updated_ctx = parent_ctx.updated(el);
256 let ctx = updated_ctx.as_ref().unwrap_or(parent_ctx);
257
258 if let Some(prop_attr) = el.attr("property") {
260 let prop_names: Vec<String> = prop_attr
261 .split_whitespace()
262 .map(|p| ctx.resolve_term(p))
263 .collect();
264
265 if prop_names.is_empty() {
266 return;
267 }
268
269 if let Some(typeof_attr) = el.attr("typeof") {
271 let types: Vec<String> = typeof_attr
272 .split_whitespace()
273 .map(|t| ctx.resolve_term(t))
274 .collect();
275
276 let mut nested_props: IndexMap<String, Vec<SchemaValue>> = IndexMap::new();
277
278 if let Some(resource) = el.attr("resource") {
279 nested_props
280 .entry("@id".into())
281 .or_default()
282 .push(classify_text_value(resource));
283 }
284
285 collect_rdfa_properties(node, ctx, &mut nested_props, warnings, depth + 1);
286
287 let nested_node = SchemaNode {
288 types,
289 properties: nested_props,
290 source_format: SourceFormat::RdfaLite,
291 source_location: None,
292 };
293
294 let value = SchemaValue::Node(Box::new(nested_node));
295 for name in &prop_names {
296 properties
297 .entry(name.clone())
298 .or_default()
299 .push(value.clone());
300 }
301 return; }
303
304 let value = extract_rdfa_value(node, el);
306
307 for name in &prop_names {
308 properties
309 .entry(name.clone())
310 .or_default()
311 .push(value.clone());
312 }
313 return; }
315
316 if el.attr("typeof").is_some() {
318 return;
324 }
325
326 for child in node.children() {
328 visit_for_rdfa_props(child, ctx, properties, warnings, depth + 1);
329 }
330}
331
332fn extract_rdfa_value(node: NodeRef<'_, Node>, el: &scraper::node::Element) -> SchemaValue {
334 let tag = el.name();
335
336 if let Some(content) = el.attr("content") {
338 return classify_text_value(content);
339 }
340
341 if let Some(resource) = el.attr("resource") {
343 return classify_text_value(resource);
344 }
345
346 if let Some(href) = el.attr("href") {
348 match tag {
349 "a" | "link" | "area" => return SchemaValue::Url(href.to_string()),
350 _ => return classify_text_value(href),
351 }
352 }
353
354 if let Some(src) = el.attr("src") {
356 match tag {
357 "img" | "audio" | "video" | "source" | "embed" => {
358 return SchemaValue::Url(src.to_string())
359 }
360 _ => return classify_text_value(src),
361 }
362 }
363
364 if tag == "time" {
366 if let Some(datetime) = el.attr("datetime") {
367 return SchemaValue::DateTime(datetime.to_string());
368 }
369 }
370
371 if tag == "data" {
373 if let Some(val) = el.attr("value") {
374 return classify_text_value(val);
375 }
376 }
377
378 let text = collect_text_content(node);
380 let trimmed = text.trim().to_string();
381 classify_text_value(&trimmed)
382}
383
384fn collect_text_content(node: NodeRef<'_, Node>) -> String {
386 let mut text = String::new();
387 for descendant in node.descendants() {
388 if let Some(t) = descendant.value().as_text() {
389 text.push_str(t);
390 }
391 }
392 text
393}
394
395fn parse_prefix_attr(attr: &str, prefixes: &mut IndexMap<String, String>) {
399 let tokens: Vec<&str> = attr.split_whitespace().collect();
400 let mut i = 0;
401 while i + 1 < tokens.len() {
402 let prefix = tokens[i];
403 let uri = tokens[i + 1];
404 if let Some(stripped) = prefix.strip_suffix(':') {
405 prefixes.insert(stripped.to_string(), uri.to_string());
406 i += 2;
407 } else {
408 i += 1;
409 }
410 }
411}
412
413fn ensure_trailing_slash(uri: &str) -> String {
415 if uri.ends_with('/') || uri.ends_with('#') {
416 uri.to_string()
417 } else {
418 format!("{uri}/")
419 }
420}
421
422#[cfg(test)]
423mod tests {
424 use pretty_assertions::assert_eq;
425
426 use super::*;
427
428 #[test]
429 fn basic_product() {
430 let html = r#"<html><body>
431<div vocab="https://schema.org/" typeof="Product">
432 <span property="name">Widget</span>
433 <span property="description">A great widget</span>
434</div>
435</body></html>"#;
436
437 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
438 assert_eq!(out.nodes.len(), 1);
439 assert_eq!(out.nodes[0].types, vec!["Product"]);
440 assert_eq!(out.nodes[0].source_format, SourceFormat::RdfaLite);
441 assert_eq!(
442 out.nodes[0].properties["name"],
443 vec![SchemaValue::Text("Widget".into())]
444 );
445 assert_eq!(
446 out.nodes[0].properties["description"],
447 vec![SchemaValue::Text("A great widget".into())]
448 );
449 }
450
451 #[test]
452 fn nested_typed_property() {
453 let html = r#"<html><body>
454<div vocab="https://schema.org/" typeof="Product">
455 <span property="name">Widget</span>
456 <div property="offers" typeof="Offer">
457 <span property="priceCurrency">USD</span>
458 <meta property="price" content="29.99">
459 </div>
460</div>
461</body></html>"#;
462
463 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
464 assert_eq!(out.nodes.len(), 1);
465 let offers = &out.nodes[0].properties["offers"];
466 assert_eq!(offers.len(), 1);
467 if let SchemaValue::Node(offer) = &offers[0] {
468 assert_eq!(offer.types, vec!["Offer"]);
469 assert_eq!(
470 offer.properties["priceCurrency"],
471 vec![SchemaValue::Text("USD".into())]
472 );
473 assert_eq!(
474 offer.properties["price"],
475 vec![SchemaValue::Text("29.99".into())]
476 );
477 } else {
478 panic!("Expected nested Node for offers");
479 }
480 }
481
482 #[test]
483 fn content_attribute() {
484 let html = r#"<html><body>
485<div vocab="https://schema.org/" typeof="Product">
486 <meta property="name" content="Widget">
487</div>
488</body></html>"#;
489
490 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
491 assert_eq!(
492 out.nodes[0].properties["name"],
493 vec![SchemaValue::Text("Widget".into())]
494 );
495 }
496
497 #[test]
498 fn href_as_url() {
499 let html = r#"<html><body>
500<div vocab="https://schema.org/" typeof="Product">
501 <span property="name">Widget</span>
502 <a property="url" href="https://example.com/widget">Link</a>
503</div>
504</body></html>"#;
505
506 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
507 assert_eq!(
508 out.nodes[0].properties["url"],
509 vec![SchemaValue::Url("https://example.com/widget".into())]
510 );
511 }
512
513 #[test]
514 fn img_src_as_url() {
515 let html = r#"<html><body>
516<div vocab="https://schema.org/" typeof="Product">
517 <span property="name">Widget</span>
518 <img property="image" src="https://example.com/img.jpg">
519</div>
520</body></html>"#;
521
522 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
523 assert_eq!(
524 out.nodes[0].properties["image"],
525 vec![SchemaValue::Url("https://example.com/img.jpg".into())]
526 );
527 }
528
529 #[test]
530 fn time_datetime() {
531 let html = r#"<html><body>
532<div vocab="https://schema.org/" typeof="Event">
533 <span property="name">Concert</span>
534 <time property="startDate" datetime="2024-06-15T19:00:00">June 15</time>
535</div>
536</body></html>"#;
537
538 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
539 assert_eq!(
540 out.nodes[0].properties["startDate"],
541 vec![SchemaValue::DateTime("2024-06-15T19:00:00".into())]
542 );
543 }
544
545 #[test]
546 fn resource_as_id() {
547 let html = r#"<html><body>
548<div vocab="https://schema.org/" typeof="Product" resource="https://example.com/product/1">
549 <span property="name">Widget</span>
550</div>
551</body></html>"#;
552
553 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
554 assert_eq!(
555 out.nodes[0].properties["@id"],
556 vec![SchemaValue::Url("https://example.com/product/1".into())]
557 );
558 }
559
560 #[test]
561 fn vocab_inheritance() {
562 let html = r#"<html vocab="https://schema.org/"><body>
563<div typeof="Product">
564 <span property="name">Widget</span>
565</div>
566</body></html>"#;
567
568 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
569 assert_eq!(out.nodes.len(), 1);
570 assert_eq!(out.nodes[0].types, vec!["Product"]);
571 }
572
573 #[test]
574 fn prefix_resolution() {
575 let html = r#"<html prefix="schema: https://schema.org/"><body>
576<div vocab="https://schema.org/" typeof="schema:Product">
577 <span property="schema:name">Widget</span>
578</div>
579</body></html>"#;
580
581 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
582 assert_eq!(out.nodes.len(), 1);
583 assert_eq!(out.nodes[0].types, vec!["Product"]);
584 assert_eq!(
585 out.nodes[0].properties["name"],
586 vec![SchemaValue::Text("Widget".into())]
587 );
588 }
589
590 #[test]
591 fn multiple_types() {
592 let html = r#"<html><body>
593<div vocab="https://schema.org/" typeof="Product IndividualProduct">
594 <span property="name">Widget</span>
595</div>
596</body></html>"#;
597
598 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
599 assert_eq!(out.nodes[0].types, vec!["Product", "IndividualProduct"]);
600 }
601
602 #[test]
603 fn multiple_top_level_items() {
604 let html = r#"<html><body>
605<div vocab="https://schema.org/" typeof="Product">
606 <span property="name">Widget A</span>
607</div>
608<div vocab="https://schema.org/" typeof="Article">
609 <span property="name">Article B</span>
610</div>
611</body></html>"#;
612
613 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
614 assert_eq!(out.nodes.len(), 2);
615 assert_eq!(out.nodes[0].types, vec!["Product"]);
616 assert_eq!(out.nodes[1].types, vec!["Article"]);
617 }
618
619 #[test]
620 fn no_rdfa() {
621 let html = "<html><body><p>No RDFa here</p></body></html>";
622 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
623 assert!(out.nodes.is_empty());
624 assert!(out.warnings.is_empty());
625 }
626
627 #[test]
628 fn deep_nesting() {
629 let html = r#"<html><body>
630<div vocab="https://schema.org/" typeof="Product">
631 <span property="name">Widget</span>
632 <div property="offers" typeof="Offer">
633 <meta property="price" content="29.99">
634 <div property="seller" typeof="Organization">
635 <span property="name">Acme</span>
636 </div>
637 </div>
638</div>
639</body></html>"#;
640
641 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
642 assert_eq!(out.nodes.len(), 1);
643 if let SchemaValue::Node(offer) = &out.nodes[0].properties["offers"][0] {
644 assert_eq!(offer.types, vec!["Offer"]);
645 if let SchemaValue::Node(seller) = &offer.properties["seller"][0] {
646 assert_eq!(seller.types, vec!["Organization"]);
647 assert_eq!(
648 seller.properties["name"],
649 vec![SchemaValue::Text("Acme".into())]
650 );
651 } else {
652 panic!("Expected Organization node");
653 }
654 } else {
655 panic!("Expected Offer node");
656 }
657 }
658
659 #[test]
660 fn property_in_wrapper_div() {
661 let html = r#"<html><body>
662<div vocab="https://schema.org/" typeof="Product">
663 <div class="wrapper">
664 <span property="name">Widget</span>
665 </div>
666</div>
667</body></html>"#;
668
669 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
670 assert_eq!(
671 out.nodes[0].properties["name"],
672 vec![SchemaValue::Text("Widget".into())]
673 );
674 }
675
676 #[test]
677 fn http_vocab() {
678 let html = r#"<html><body>
679<div vocab="http://schema.org/" typeof="Product">
680 <span property="name">Widget</span>
681</div>
682</body></html>"#;
683
684 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
685 assert_eq!(out.nodes[0].types, vec!["Product"]);
686 }
687
688 #[test]
689 fn parse_prefix_attr_works() {
690 let mut prefixes = IndexMap::new();
691 parse_prefix_attr(
692 "schema: https://schema.org/ og: https://ogp.me/ns#",
693 &mut prefixes,
694 );
695 assert_eq!(prefixes["schema"], "https://schema.org/");
696 assert_eq!(prefixes["og"], "https://ogp.me/ns#");
697 }
698
699 #[test]
700 fn empty_vocab_resets_vocabulary() {
701 let html = r#"<html vocab="https://schema.org/"><body>
703<div typeof="Product">
704 <span property="name">Outer</span>
705 <div vocab="">
706 <div typeof="CustomThing">
707 <span property="label">Inner</span>
708 </div>
709 </div>
710</div>
711</body></html>"#;
712
713 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
714 assert!(out
716 .nodes
717 .iter()
718 .any(|n| n.types.contains(&"Product".to_string())));
719 }
720
721 #[test]
722 fn depth_exceeding_max_truncates_silently() {
723 let mut html = String::from(r#"<html><body><div vocab="https://schema.org/">"#);
725 let target = MAX_DEPTH + 2;
726 for i in 0..target {
727 html.push_str(&format!(
728 r#"<div property="child" typeof="Thing"><span property="name">L{i}</span>"#
729 ));
730 }
731 for _ in 0..target {
732 html.push_str("</div>");
733 }
734 html.push_str("</div></body></html>");
735
736 let html = html.replacen(r#"property="child" "#, "", 1);
738
739 let out = RdfaLiteExtractor.extract(&html).expect("extraction failed");
740 assert!(!out.nodes.is_empty());
742 }
743
744 #[test]
745 fn empty_typeof_warns() {
746 let html = r#"<html><body>
747<div vocab="https://schema.org/" typeof="">
748 <span property="name">Something</span>
749</div>
750</body></html>"#;
751
752 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
753 assert!(
754 out.warnings
755 .iter()
756 .any(|w| w.code == WarningCode::EmptyType),
757 "empty typeof should produce EmptyType warning"
758 );
759 }
760
761 #[test]
762 fn data_element_with_value() {
763 let html = r#"<html><body>
764<div vocab="https://schema.org/" typeof="Product">
765 <span property="name">Widget</span>
766 <data property="sku" value="12345">Product SKU</data>
767</div>
768</body></html>"#;
769
770 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
771 assert_eq!(
772 out.nodes[0].properties["sku"],
773 vec![SchemaValue::Text("12345".into())]
774 );
775 }
776
777 #[test]
778 fn property_with_empty_text() {
779 let html = r#"<html><body>
780<div vocab="https://schema.org/" typeof="Product">
781 <span property="name"></span>
782</div>
783</body></html>"#;
784
785 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
786 assert_eq!(
787 out.nodes[0].properties["name"],
788 vec![SchemaValue::Text(String::new())]
789 );
790 }
791
792 #[test]
793 fn typeof_without_vocab() {
794 let html = r#"<html><body>
796<div typeof="Product">
797 <span property="name">Widget</span>
798</div>
799</body></html>"#;
800
801 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
802 assert_eq!(out.nodes.len(), 1);
803 assert_eq!(out.nodes[0].types, vec!["Product"]);
804 }
805
806 #[test]
807 fn content_attribute_with_url_value() {
808 let html = r#"<html><body>
809<div vocab="https://schema.org/" typeof="Product">
810 <meta property="url" content="https://example.com/product">
811</div>
812</body></html>"#;
813
814 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
815 assert_eq!(
816 out.nodes[0].properties["url"],
817 vec![SchemaValue::Url("https://example.com/product".into())]
818 );
819 }
820
821 #[test]
822 fn resource_on_nested_property() {
823 let html = r#"<html><body>
824<div vocab="https://schema.org/" typeof="Product">
825 <span property="name">Widget</span>
826 <div property="offers" typeof="Offer" resource="https://example.com/offer/1">
827 <span property="priceCurrency">USD</span>
828 </div>
829</div>
830</body></html>"#;
831
832 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
833 let offers = &out.nodes[0].properties["offers"];
834 if let SchemaValue::Node(offer) = &offers[0] {
835 assert_eq!(
836 offer.properties["@id"],
837 vec![SchemaValue::Url("https://example.com/offer/1".into())]
838 );
839 } else {
840 panic!("Expected nested Offer node");
841 }
842 }
843
844 #[test]
845 fn nested_prefix_declarations() {
846 let html = r#"<html prefix="schema: https://schema.org/"><body>
847<div prefix="og: https://ogp.me/ns#" vocab="https://schema.org/" typeof="Product">
848 <span property="name">Widget</span>
849</div>
850</body></html>"#;
851
852 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
853 assert_eq!(out.nodes.len(), 1);
854 assert_eq!(out.nodes[0].types, vec!["Product"]);
855 }
856
857 #[test]
858 fn independent_typeof_nested_in_typed_node() {
859 let html = r#"<html><body>
862<div vocab="https://schema.org/" typeof="WebPage">
863 <span property="name">My Page</span>
864 <div typeof="Organization">
865 <span property="name">Acme Corp</span>
866 </div>
867</div>
868</body></html>"#;
869
870 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
871 assert!(out
875 .nodes
876 .iter()
877 .any(|n| n.types.contains(&"WebPage".to_string())));
878 }
879
880 #[test]
881 fn time_element_without_datetime() {
882 let html = r#"<html><body>
883<div vocab="https://schema.org/" typeof="Event">
884 <span property="name">Concert</span>
885 <time property="startDate">June 15, 2024</time>
886</div>
887</body></html>"#;
888
889 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
890 assert_eq!(
892 out.nodes[0].properties["startDate"],
893 vec![SchemaValue::Text("June 15, 2024".into())]
894 );
895 }
896
897 #[test]
898 fn unicode_preserved_in_values() {
899 let html = r#"<html><body>
900<div vocab="https://schema.org/" typeof="Product">
901 <span property="name">Gerät für Ökologie</span>
902</div>
903</body></html>"#;
904
905 let out = RdfaLiteExtractor.extract(html).expect("extraction failed");
906 assert_eq!(out.nodes.len(), 1);
907 assert_eq!(
908 out.nodes[0].properties["name"],
909 vec![SchemaValue::Text("Gerät für Ökologie".into())]
910 );
911 }
912}