#[cfg(test)]
mod tests {
use cooklang_import::extractors::{Extractor, HtmlClassExtractor, ParsingContext};
use scraper::Html;
#[test]
fn test_wprm_recipe_extraction() {
let html = r#"
<html>
<body>
<h1 class="wprm-recipe-name">Chocolate Chip Cookies</h1>
<div class="wprm-recipe-summary">Delicious homemade chocolate chip cookies</div>
<div class="wprm-recipe-ingredients-container">
<ul>
<li>2 cups all-purpose flour</li>
<li>1 cup butter, softened</li>
<li>1 cup sugar</li>
<li>2 eggs</li>
<li>1 tsp vanilla extract</li>
<li>2 cups chocolate chips</li>
</ul>
</div>
<div class="wprm-recipe-instructions-container">
<ul>
<li>Preheat oven to 350°F</li>
<li>Mix butter and sugar until fluffy</li>
<li>Add eggs and vanilla</li>
<li>Gradually add flour</li>
<li>Fold in chocolate chips</li>
<li>Bake for 10-12 minutes</li>
</ul>
</div>
<span class="wprm-recipe-prep-time">15 minutes</span>
<span class="wprm-recipe-cook-time">12 minutes</span>
<span class="wprm-recipe-servings">24 cookies</span>
</body>
</html>
"#;
let context = ParsingContext {
url: "https://example.com/recipe".to_string(),
document: Html::parse_document(html),
texts: None,
};
let extractor = HtmlClassExtractor;
let result = extractor.parse(&context);
assert!(result.is_ok());
let recipe = result.unwrap();
assert_eq!(recipe.name, "Chocolate Chip Cookies");
assert_eq!(
recipe.description,
Some("Delicious homemade chocolate chip cookies".to_string())
);
assert!(recipe.content.contains("2 cups all-purpose flour"));
assert!(recipe.content.contains("1 cup butter, softened"));
assert!(recipe.content.contains("Preheat oven to 350°F"));
assert!(recipe.content.contains("Bake for 10-12 minutes"));
assert_eq!(
recipe.metadata.get("prep_time"),
Some(&"15 minutes".to_string())
);
assert_eq!(
recipe.metadata.get("cook_time"),
Some(&"12 minutes".to_string())
);
assert_eq!(
recipe.metadata.get("servings"),
Some(&"24 cookies".to_string())
);
}
#[test]
fn test_tasty_recipes_extraction() {
let html = r#"
<html>
<body>
<h2 class="tasty-recipes-title">Banana Bread</h2>
<div class="tasty-recipes-description">Moist and delicious banana bread</div>
<div class="tasty-recipes-ingredients">
<li>3 ripe bananas</li>
<li>2 cups flour</li>
<li>1 cup sugar</li>
<li>1/2 cup butter</li>
<li>2 eggs</li>
</div>
<div class="tasty-recipes-instructions">
<li>Mash bananas</li>
<li>Mix wet ingredients</li>
<li>Add dry ingredients</li>
<li>Pour into loaf pan</li>
<li>Bake at 350°F for 60 minutes</li>
</div>
<span class="tasty-recipes-yield">1 loaf</span>
</body>
</html>
"#;
let context = ParsingContext {
url: "https://example.com/banana-bread".to_string(),
document: Html::parse_document(html),
texts: None,
};
let extractor = HtmlClassExtractor;
let result = extractor.parse(&context);
assert!(result.is_ok());
let recipe = result.unwrap();
assert_eq!(recipe.name, "Banana Bread");
assert_eq!(
recipe.description,
Some("Moist and delicious banana bread".to_string())
);
assert!(recipe.content.contains("3 ripe bananas"));
assert!(recipe.content.contains("Mash bananas"));
assert_eq!(recipe.metadata.get("servings"), Some(&"1 loaf".to_string()));
}
#[test]
fn test_generic_recipe_classes() {
let html = r#"
<html>
<body>
<h1 class="recipe-title">Pasta Carbonara</h1>
<p class="recipe-description">Classic Italian pasta dish</p>
<div class="recipe-ingredients">
<ul>
<li>400g spaghetti</li>
<li>200g pancetta</li>
<li>4 eggs</li>
<li>100g Parmesan cheese</li>
</ul>
</div>
<div class="recipe-instructions">
<p>Cook pasta according to package</p>
<p>Fry pancetta until crispy</p>
<p>Mix eggs and cheese</p>
<p>Combine everything off heat</p>
</div>
<div class="recipe-prep-time">10 minutes</div>
<div class="recipe-cook-time">20 minutes</div>
</body>
</html>
"#;
let context = ParsingContext {
url: "https://example.com/carbonara".to_string(),
document: Html::parse_document(html),
texts: None,
};
let extractor = HtmlClassExtractor;
let result = extractor.parse(&context);
assert!(result.is_ok());
let recipe = result.unwrap();
assert_eq!(recipe.name, "Pasta Carbonara");
assert_eq!(
recipe.description,
Some("Classic Italian pasta dish".to_string())
);
assert!(recipe.content.contains("400g spaghetti"));
assert!(recipe.content.contains("Cook pasta according to package"));
}
#[test]
fn test_fallback_to_fuzzy_matching() {
let html = r#"
<html>
<body>
<h1>Simple Salad</h1>
<div class="my-custom-ingredients-list">
<li>Lettuce</li>
<li>Tomatoes</li>
<li>Cucumber</li>
</div>
<div class="custom-directions-block">
<p>Wash vegetables</p>
<p>Chop everything</p>
<p>Mix and serve</p>
</div>
</body>
</html>
"#;
let context = ParsingContext {
url: "https://example.com/salad".to_string(),
document: Html::parse_document(html),
texts: None,
};
let extractor = HtmlClassExtractor;
let result = extractor.parse(&context);
match result {
Ok(recipe) => {
assert_eq!(recipe.name, "Simple Salad");
assert!(recipe.content.contains("Lettuce"));
assert!(recipe.content.contains("Wash vegetables"));
}
Err(e) => {
println!("Expected error: {e}");
assert!(e.to_string().contains("Could not extract recipe content"));
}
}
}
#[test]
fn test_extraction_failure_no_content() {
let html = r#"
<html>
<body>
<h1>Not a Recipe</h1>
<p>This is just a regular webpage</p>
</body>
</html>
"#;
let context = ParsingContext {
url: "https://example.com/not-recipe".to_string(),
document: Html::parse_document(html),
texts: None,
};
let extractor = HtmlClassExtractor;
let result = extractor.parse(&context);
assert!(result.is_err());
}
}