Skip to main content

readable_rs/
node_ext.rs

1use crate::parser::NodeRef;
2use std::collections::HashMap;
3
4#[derive(Clone, Copy, Debug, Default)]
5struct NodeMeta {
6    readability_score: Option<f64>,
7    is_readability_data_table: bool,
8}
9
10/// An external store that maps DOM nodes to readability metadata without
11/// mutating the nodes themselves.
12///
13/// [`NodeRef`] values are reference-counted and cannot hold arbitrary
14/// side-channel data, so the scorer keeps a `HashMap` keyed by the
15/// underlying pointer address.  Each call to [`crate::extract`] produces
16/// its own `NodeScoreStore`; scores are **not** shared across extractions.
17///
18/// # Examples
19///
20/// ```rust
21/// use readable_rs::parser::parse_html;
22/// use readable_rs::NodeScoreStore;
23///
24/// let doc = parse_html("<div><p>hello</p></div>");
25/// let store = NodeScoreStore::default();
26/// // store is empty — scores are populated internally during extraction.
27/// ```
28#[derive(Default, Debug, Clone)]
29pub struct NodeScoreStore {
30    map: HashMap<usize, NodeMeta>,
31}
32
33/// Derive a stable key for a [`NodeRef`] by taking the address of the
34/// inner `Node` value.  Two `NodeRef`s that point to the same tree node
35/// will produce the same key.
36fn node_key(node: &NodeRef) -> usize {
37    let ptr: *const _ = &**node;
38    ptr as usize
39}
40
41/// Extension trait that attaches readability scoring and table-classification
42/// metadata to a [`NodeRef`] via an external [`NodeScoreStore`].
43///
44/// Implemented for [`NodeRef`].  All methods require an explicit store
45/// argument so that the caller controls lifetime and isolation.
46pub trait NodeScoreExt {
47    /// Return the readability score previously assigned to this node, or
48    /// `None` if it has not been scored yet.
49    fn readability_score(&self, store: &NodeScoreStore) -> Option<f64>;
50
51    /// Add `offset` to this node's current score (treating an absent score
52    /// as `0.0`).
53    fn offset_readability_score(&self, store: &mut NodeScoreStore, offset: f64);
54
55    /// Overwrite this node's readability score.  Pass `None` to clear it.
56    fn set_readability_score(&self, store: &mut NodeScoreStore, value: Option<f64>);
57
58    /// Return whether this node has been classified as a *data* table
59    /// (as opposed to a layout table).  Defaults to `false`.
60    fn is_readability_data_table(&self, store: &NodeScoreStore) -> bool;
61
62    /// Mark (or unmark) this node as a data table.  Data tables are
63    /// protected from removal during the "clean conditionally" pass.
64    fn set_readability_data_table(&self, store: &mut NodeScoreStore, is_readability_data_table: bool);
65}
66
67impl NodeScoreExt for NodeRef {
68    fn readability_score(&self, store: &NodeScoreStore) -> Option<f64> {
69        let key = node_key(self);
70        store.map.get(&key).and_then(|m| m.readability_score)
71    }
72
73    fn offset_readability_score(&self, store: &mut NodeScoreStore, offset: f64) {
74        let key = node_key(self);
75        let entry = store.map.entry(key).or_default();
76        entry.readability_score = Some(entry.readability_score.unwrap_or(0.0) + offset);
77    }
78
79    fn set_readability_score(&self, store: &mut NodeScoreStore, value: Option<f64>) {
80        let key = node_key(self);
81        let entry = store.map.entry(key).or_default();
82        entry.readability_score = value;
83    }
84
85    fn is_readability_data_table(&self, store: &NodeScoreStore) -> bool {
86        let key = node_key(self);
87        store
88            .map
89            .get(&key)
90            .map(|m| m.is_readability_data_table)
91            .unwrap_or(false)
92    }
93
94    fn set_readability_data_table(&self, store: &mut NodeScoreStore, is_readability_data_table: bool) {
95        let key = node_key(self);
96        let entry = store.map.entry(key).or_default();
97        entry.is_readability_data_table = is_readability_data_table;
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104    use crate::parser::parse_html;
105
106    #[test]
107    fn store_isolated_per_instance() {
108        let doc = parse_html("<div><p>Hello</p></div>");
109        let p = doc.select_first("p").unwrap().as_node().clone();
110        let mut store_a = NodeScoreStore::default();
111        let mut store_b = NodeScoreStore::default();
112
113        p.set_readability_score(&mut store_a, Some(10.0));
114        assert_eq!(p.readability_score(&store_a), Some(10.0));
115        assert_eq!(p.readability_score(&store_b), None);
116
117        p.set_readability_score(&mut store_b, Some(5.0));
118        assert_eq!(p.readability_score(&store_a), Some(10.0));
119        assert_eq!(p.readability_score(&store_b), Some(5.0));
120    }
121}