kreuzcrawl 0.2.0

High-performance web crawling engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//! Content processing: word count.

use tl::VDom;

/// Compute the word count of visible text in the HTML body.
pub(crate) fn compute_word_count(dom: &VDom<'_>) -> usize {
    let parser = dom.parser();
    let body_text = dom
        .query_selector("body")
        .and_then(|mut iter| {
            iter.next()
                .and_then(|h| h.get(parser))
                .map(|node| node.inner_text(parser).to_string())
        })
        .unwrap_or_default();
    body_text.split_whitespace().count()
}