spider/utils/
interner.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
use crate::CaseInsensitiveString;
use hashbrown::HashSet;
use std::hash::Hash;
use std::marker::PhantomData;
use string_interner::symbol::SymbolUsize;
use string_interner::StringInterner;

#[cfg(all(
    feature = "string_interner_bucket_backend",
    not(feature = "string_interner_string_backend"),
    not(feature = "string_interner_buffer_backend"),
))]
type Backend = string_interner::backend::BucketBackend<SymbolUsize>;

#[cfg(all(
    feature = "string_interner_string_backend",
    not(feature = "string_interner_bucket_backend"),
    not(feature = "string_interner_buffer_backend"),
))]
type Backend = string_interner::backend::StringBackend<SymbolUsize>;

#[cfg(all(
    feature = "string_interner_buffer_backend",
    not(feature = "string_interner_bucket_backend"),
    not(feature = "string_interner_string_backend"),
))]
type Backend = string_interner::backend::BufferBackend<SymbolUsize>;

#[cfg(all(
    not(feature = "string_interner_bucket_backend"),
    not(feature = "string_interner_string_backend"),
    not(feature = "string_interner_buffer_backend")
))]
type Backend = string_interner::backend::BucketBackend<SymbolUsize>;

#[cfg(all(
    feature = "string_interner_bucket_backend",
    feature = "string_interner_string_backend",
    feature = "string_interner_buffer_backend"
))]
type Backend = string_interner::backend::BucketBackend<SymbolUsize>;

/// The links visited bucket store.
#[derive(Debug, Clone)]
pub struct ListBucket<K = CaseInsensitiveString>
where
    K: Eq + Hash + AsRef<str>,
{
    /// The links visited.
    pub(crate) links_visited: HashSet<SymbolUsize>,
    /// The string interner.
    pub(crate) interner: StringInterner<Backend>,
    /// Phantom data to link the generic type.
    _marker: PhantomData<K>,
}

impl<K> Default for ListBucket<K>
where
    K: Eq + Hash + AsRef<str>,
{
    fn default() -> Self {
        Self {
            links_visited: HashSet::new(),
            interner: StringInterner::new(),
            _marker: PhantomData,
        }
    }
}

impl<K> ListBucket<K>
where
    K: Eq + Hash + AsRef<str>,
{
    /// New list bucket.
    pub fn new() -> Self {
        Self::default()
    }

    /// Add a new link to the bucket.
    #[inline(always)]
    pub fn insert(&mut self, link: K) {
        let symbol = self.interner.get_or_intern(link.as_ref());
        self.links_visited.insert(symbol);
    }

    /// Does the bucket contain the link.
    #[inline(always)]
    pub fn contains(&self, link: &K) -> bool {
        if let Some(symbol) = self.interner.get(link.as_ref()) {
            self.links_visited.contains(&symbol)
        } else {
            false
        }
    }

    /// The bucket length.
    pub fn len(&self) -> usize {
        self.links_visited.len()
    }

    /// Drain the bucket.
    pub fn drain(&mut self) -> hashbrown::hash_set::Drain<'_, SymbolUsize> {
        self.links_visited.drain()
    }

    /// Clear the bucket.
    pub fn clear(&mut self) {
        self.links_visited.clear()
    }

    /// Get a vector of all the inner values of the links in the bucket.
    pub fn get_links(&self) -> HashSet<K>
    where
        K: Hash + Clone + From<String>,
    {
        self.links_visited
            .iter()
            .filter_map(|symbol| self.interner.resolve(*symbol))
            .map(|s| K::from(s.to_owned()))
            .collect()
    }

    /// Extend with current links.
    #[inline(always)]
    pub fn extend_links(&mut self, links: &mut HashSet<K>, msg: HashSet<K>)
    where
        K: Clone,
    {
        for link in msg {
            let symbol = self.interner.get_or_intern(link.as_ref());
            if !self.links_visited.contains(&symbol) {
                links.insert(link);
            }
        }
    }

    /// Extend with new links.
    #[inline(always)]
    pub fn extend_with_new_links(&mut self, links: &mut HashSet<K>, s: K)
    where
        K: Clone,
    {
        if let Some(symbol) = self.interner.get(s.as_ref()) {
            if !self.links_visited.contains(&symbol) {
                links.insert(s);
            }
        } else {
            links.insert(s);
        }
    }
}