pub fn extract_article(
html: &str,
url: &str,
config: &Config,
agent: Option<&dyn RLAgent>,
) -> Result<ExtractedArticle>Expand description
Extract a complete article (title/date metadata + body) from one page.
Body selection prefers, in order: the trained RL agent if supplied, then
the hybrid/heuristic node selector, then the plain baseline. Title and date
always come from the baseline metadata extractor. This is the single shared
entry point used by the CLI and the Python bindings.
use content_extractor_rl::{Config, extract_article};
let config = Config::default();
let html = std::fs::read_to_string("page.html").unwrap();
// No model -> hybrid heuristic selection (no training required):
let article = extract_article(&html, "https://example.com/post", &config, None).unwrap();
println!("{}", article.content);