Skip to main content

domi/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2//! domi provides abstractions and utilities for
3//! [domain-list-community](https://github.com/v2fly/domain-list-community)
4//! data source.
5//!
6//!
7//! <div class="warning">
8//! Warning:
9//! The crate is not updated with official implementation, DO NOT use in production
10//! </div>
11//!
12//! ## Example
13//! ```rust,no_run
14//! use std::{fs, path::Path};
15//!
16//! use domi::Entries;
17//!
18//! const BASE: &str = "alphabet";
19//!
20//! fn main() {
21//!     let data_root = Path::new("data");
22//!     let content = fs::read_to_string(data_root.join(BASE)).unwrap();
23//!     let mut entries = Entries::parse(BASE, content.lines());
24//!     while let Some(i) = entries.next_include() {
25//!         let include = fs::read_to_string(data_root.join(i.base.as_ref())).unwrap();
26//!         entries.parse_extend(i.base.as_ref(), BASE, include.lines());
27//!     }
28//!     // expect: domain_keyword: Some(["fitbit", "google"])
29//!     // change the `Some(&[])` to something else can alter behavier,
30//!     // see crate::Entries
31//!     println!("{:?}", entries)
32//! }
33//! ```
34
35#[cfg(feature = "prost")]
36pub mod geosite;
37#[cfg(feature = "serde")]
38pub mod srs;
39
40use std::{
41    cell::{Cell, RefCell},
42    cmp::Ordering,
43    collections::{BTreeSet, HashSet},
44    fmt::Display,
45    hash::Hash,
46    marker::PhantomData,
47    mem,
48    ops::Deref,
49    panic::{AssertUnwindSafe, catch_unwind},
50    rc::Rc,
51    str::Lines,
52};
53
54use cfg_if::cfg_if;
55
56cfg_if! {
57    if #[cfg(feature = "ahash")] {
58        use ::ahash::RandomState as Hasher;
59    } else if #[cfg(feature = "rustc-hash")] {
60        use ::rustc_hash::FxBuildHasher as Hasher;
61    } else {
62        use ::std::collections::hash_map::RandomState as Hasher;
63    }
64}
65
66#[cfg(feature = "smallvec")]
67const SMALL_VEC_STACK_SIZE: usize = 4;
68
69struct Interner<T>
70where
71    T: Eq + Hash + ?Sized,
72{
73    set: Option<HashSet<Rc<T>, Hasher>>,
74}
75
76impl<T> Interner<T>
77where
78    T: Eq + Hash + ?Sized,
79{
80    fn new() -> Self {
81        Self { set: None }
82    }
83
84    fn initialize(&mut self) {
85        self.set = Some(HashSet::default())
86    }
87
88    fn intern(&mut self, s: Rc<T>) -> Rc<T> {
89        let set = self
90            .set
91            .as_mut()
92            .expect("intern pool not initialized; missing PoolGuard");
93        if let Some(v) = set.get(&s) {
94            v.clone()
95        } else {
96            set.insert(s.clone());
97            s
98        }
99    }
100
101    fn intern_ref(&self, value: &T) -> Option<Rc<T>> {
102        let set = self.set.as_ref()?;
103        set.get(value).cloned()
104    }
105
106    fn clear(&mut self) {
107        self.set = None;
108    }
109}
110
111// Discards DST metadata
112// saves some stack memory
113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
114#[repr(transparent)]
115struct InternId(usize);
116
117impl InternId {
118    /// # Safety
119    ///
120    /// MUST be Intern-ed by Interner
121    #[inline(always)]
122    unsafe fn from_interned<T: ?Sized>(value: Rc<T>) -> Self {
123        Self(Rc::as_ptr(&value).addr())
124    }
125}
126
127macro_rules! define_pool {
128    ($name:ident, $ty:ty) => {
129        ::paste::paste! {
130            thread_local! {
131                static [< $name:snake:upper _POOL >]: RefCell<Interner<$ty>> = RefCell::new(Interner::new());
132            }
133            struct [< $name Pool >];
134            impl [< $name Pool >] {
135                fn initialize() {
136                    [< $name:snake:upper _POOL >].with(|p| p.borrow_mut().initialize())
137                }
138                fn [< $name:snake >](value: Rc<$ty>) -> Rc<$ty> {
139                    [< $name:snake:upper _POOL >].with(|p| p.borrow_mut().intern(value))
140                }
141                fn [< $name:snake _ref >](value: &$ty) -> Option<Rc<$ty>> {
142                    [< $name:snake:upper _POOL >].with(|p| p.borrow().intern_ref(value))
143                }
144                fn clear() {
145                    [< $name:snake:upper _POOL >].with(|p| p.borrow_mut().clear())
146                }
147            }
148        }
149    };
150}
151
152define_pool!(Base, str);
153define_pool!(Attr, str);
154define_pool!(DomainValue, str);
155define_pool!(AttrSlice, [Rc<str>]);
156
157macro_rules! maybe_intern {
158    ($use_pool:expr, $s:expr, $name:ident) => {
159        if !$use_pool {
160            Rc::from($s)
161        } else {
162            intern!($s, $name)
163        }
164    };
165}
166
167macro_rules! intern {
168    ($s:expr, $name:ident) => {
169        ::paste::paste! {
170            crate::[<$name Pool>]::[<$name:snake _ref>]($s.as_ref())
171                .unwrap_or_else(|| [<$name Pool>]::[<$name:snake>](Rc::from($s)))
172        }
173    };
174}
175
176thread_local! {
177    static POOL_USED_COUNT: Cell<isize> = const { Cell::new(0) };
178}
179
180type NotSyncNorSend = PhantomData<Rc<()>>;
181
182#[derive(Debug)]
183struct PoolGuard {
184    _marker: NotSyncNorSend,
185}
186
187impl PoolGuard {
188    fn acquire() -> Self {
189        let n = POOL_USED_COUNT.get();
190        if n <= 0 {
191            POOL_USED_COUNT.set(1);
192            #[cfg(debug_assertions)]
193            if n < 0 {
194                dbg!("POOL_USED_COUNT underflow", n);
195            }
196            Self::clear_pools();
197            AttrPool::initialize();
198            BasePool::initialize();
199            DomainValuePool::initialize();
200            AttrSlicePool::initialize();
201        } else if n == isize::MAX {
202            panic!("Pool is poisoned due to previous panic in Drop");
203        } else {
204            POOL_USED_COUNT.set(n + 1);
205        }
206        Self {
207            _marker: NotSyncNorSend::default(),
208        }
209    }
210
211    fn clear_pools() {
212        AttrPool::clear();
213        BasePool::clear();
214        DomainValuePool::clear();
215        AttrSlicePool::clear();
216    }
217}
218
219impl Default for PoolGuard {
220    fn default() -> Self {
221        Self::acquire()
222    }
223}
224
225impl Drop for PoolGuard {
226    fn drop(&mut self) {
227        let n = POOL_USED_COUNT.get() - 1;
228        if n <= 0 {
229            POOL_USED_COUNT.set(0);
230            if catch_unwind(AssertUnwindSafe(Self::clear_pools)).is_err() {
231                POOL_USED_COUNT.set(isize::MAX);
232            };
233            #[cfg(debug_assertions)]
234            if n < 0 {
235                dbg!("POOL_USED_COUNT underflow", n);
236            }
237        } else {
238            POOL_USED_COUNT.set(n);
239        }
240    }
241}
242
243/// Represents the matching behavior
244///
245/// This corresponds to the prefix of a single domain in the source file
246/// (e.g. `domain:`, `full:`, `keyword:`, `regexp:`).
247///
248/// And if no prefix present, then [`DomainKind::Suffix`] will be chosen.
249#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
250pub enum DomainKind {
251    /// This variant's matching prefix is `domain:`.
252    Suffix,
253    Full,
254    Keyword,
255    Regex,
256}
257
258impl Display for DomainKind {
259    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
260        f.write_str(match *self {
261            DomainKind::Suffix => "domain",
262            DomainKind::Full => "full",
263            DomainKind::Keyword => "keyword",
264            DomainKind::Regex => "regexp",
265        })
266    }
267}
268
269#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
270pub enum Kind {
271    Domain(DomainKind),
272    Include,
273}
274
275impl Display for Kind {
276    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
277        match *self {
278            Self::Domain(d) => d.fmt(f),
279            Self::Include => f.write_str("include"),
280        }
281    }
282}
283
284/// Single parsed entry
285#[derive(Debug, Clone, PartialEq, Eq)]
286pub struct Entry {
287    pub kind: Kind,
288    pub base: Rc<str>,
289    pub value: Rc<str>,
290    pub attrs: Rc<[Rc<str>]>,
291}
292
293impl Ord for Entry {
294    fn cmp(&self, other: &Self) -> Ordering {
295        match (&self.kind, &other.kind) {
296            (Kind::Include, Kind::Include) => self
297                .value
298                .cmp(&other.value)
299                .then_with(|| self.base.cmp(&other.base))
300                .then_with(|| self.attrs.cmp(&other.attrs)),
301            (Kind::Include, _) => Ordering::Less,
302            (_, Kind::Include) => Ordering::Greater,
303
304            (Kind::Domain(a), Kind::Domain(b)) => a
305                .cmp(b)
306                .then_with(|| self.value.cmp(&other.value))
307                .then_with(|| self.base.cmp(&other.base))
308                .then_with(|| self.attrs.cmp(&other.attrs)),
309        }
310    }
311}
312
313impl PartialOrd for Entry {
314    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
315        Some(self.cmp(other))
316    }
317}
318
319/// A string slice that is guaranteed to be a single line.
320///
321/// # Invariants
322///
323/// The contained string must not contain `\n` or `\r`.
324#[repr(transparent)]
325#[derive(Debug, Clone, Copy)]
326pub struct OneLine<'a> {
327    inner: &'a str,
328}
329
330impl<'a> OneLine<'a> {
331    /// Creates a [`OneLine`] if the input string contains no line breaks.
332    ///
333    /// Returns [`None`] if `s` contains `\n` or `\r`.
334    pub fn new(s: &'a str) -> Option<Self> {
335        if s.find(['\n', '\r']).is_some() {
336            None
337        } else {
338            Some(Self { inner: s })
339        }
340    }
341
342    /// # Safety
343    ///
344    /// `s` must not contain `\n` or `\r`.
345    pub unsafe fn new_unchecked(s: &'a str) -> Self {
346        Self { inner: s }
347    }
348}
349
350impl<'a> Deref for OneLine<'a> {
351    type Target = str;
352
353    fn deref(&self) -> &Self::Target {
354        self.inner
355    }
356}
357
358impl<'a> AsRef<str> for OneLine<'a> {
359    fn as_ref(&self) -> &str {
360        self.inner
361    }
362}
363
364impl<'a> Display for OneLine<'a> {
365    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
366        f.write_str(self)
367    }
368}
369
370#[cfg(test)]
371mod one_line {
372    use crate::OneLine;
373
374    #[test]
375    fn test_accepts_normal_str() {
376        let s = "hello world";
377        let line = OneLine::new(s);
378
379        assert!(line.is_some());
380        assert_eq!(&*line.unwrap(), s);
381    }
382
383    #[test]
384    fn test_rejects_lf() {
385        let s = "hello\nworld";
386        assert!(OneLine::new(s).is_none());
387    }
388
389    #[test]
390    fn test_rejects_cr() {
391        let s = "hello\rworld";
392        assert!(OneLine::new(s).is_none());
393    }
394}
395
396cfg_if! {
397    if #[cfg(feature = "smallvec")] {
398        type AttrSlice = ::smallvec::SmallVec<[Rc<str>; SMALL_VEC_STACK_SIZE]>;
399    } else {
400        type AttrSlice = Box<[Rc<str>]>;
401    }
402}
403
404impl Entry {
405    pub fn parse_line(base: &str, line: OneLine) -> Option<Self> {
406        Self::parse_line_inner::<false>(base, line)
407    }
408
409    #[inline(always)]
410    fn parse_line_inner<const USE_POOL: bool>(base: &str, line: OneLine) -> Option<Self> {
411        let line = line.trim();
412        if line.is_empty() || line.starts_with('#') {
413            return None;
414        }
415
416        let base = maybe_intern!(USE_POOL, base, Base);
417
418        let line = line.split_once('#').map(|(l, _)| l).unwrap_or(line).trim();
419
420        let (kind_str, value) = line.split_once(':').unwrap_or(("domain", line));
421        let kind = match kind_str {
422            "domain" => Kind::Domain(DomainKind::Suffix),
423            "full" => Kind::Domain(DomainKind::Full),
424            "regexp" => Kind::Domain(DomainKind::Regex),
425            "keyword" => Kind::Domain(DomainKind::Keyword),
426            "include" => Kind::Include,
427            _ => unimplemented!("unknown domain kind prefix: {kind_str}"),
428        };
429
430        let mut parts = value.split_whitespace();
431
432        let value = parts
433            .next()
434            .map(|s| maybe_intern!(USE_POOL, s, DomainValue))?;
435
436        let attrs: AttrSlice = parts
437            .filter_map(|s| {
438                s.strip_prefix('@')
439                    .map(|s| maybe_intern!(USE_POOL, s, Attr))
440            })
441            .collect();
442        let attrs = maybe_intern!(USE_POOL, attrs.as_ref(), AttrSlice);
443
444        Some(Self {
445            kind,
446            base,
447            value,
448            attrs,
449        })
450    }
451}
452
453#[test]
454fn test_parse_line_combinations() {
455    let _pg = PoolGuard::acquire();
456    let bases = ["google", "alphabet"];
457    let attr_combos: [&[&str]; _] = [&[], &["attr1"], &["attr1", "attr2"]];
458
459    let kinds = [
460        Kind::Domain(DomainKind::Suffix),
461        Kind::Domain(DomainKind::Full),
462        Kind::Domain(DomainKind::Keyword),
463        Kind::Domain(DomainKind::Regex),
464        Kind::Include,
465    ];
466
467    for base in bases {
468        for attrs in attr_combos {
469            for kind in kinds {
470                let mut line = format!("{}:example.com", kind);
471                for attr in attrs.iter() {
472                    line.push_str(" @");
473                    line.push_str(attr);
474                }
475                let line = OneLine::new(&line).unwrap();
476
477                let entry = Entry::parse_line_inner::<true>(base, line);
478
479                let attrs: AttrSlice = attrs.iter().map(|s| intern!(*s, Attr)).collect();
480                let attrs = intern!(attrs.as_ref(), AttrSlice);
481
482                let expected_domain = Some(Entry {
483                    kind,
484                    base: intern!(base, Base),
485                    value: intern!("example.com", DomainValue),
486                    attrs,
487                });
488
489                assert_eq!(entry, expected_domain, "line: {}", line);
490            }
491        }
492    }
493}
494
495/// Parsed entries from source
496///
497/// This type owns all parsed domains and include directives
498/// for a given base.
499///
500/// While an [`Entries`] value is alive, internal string intern pools are kept alive.
501/// They are automatically cleared when the last [`Entries`] is dropped on the thread.
502#[derive(Debug, Default)]
503pub struct Entries {
504    entries: BTreeSet<Entry>,
505    parsed_id: BTreeSet<Rc<str>>,
506    _pg: PoolGuard,
507}
508
509impl Entries {
510    pub fn parse(base: &str, content: Lines) -> Self {
511        let mut ret = Self::default();
512        ret.parse_extend(base, base, content);
513        ret
514    }
515
516    pub fn parse_extend(&mut self, current: &str, base: &str, content: Lines) {
517        let id = intern!(current, Base);
518        if self.parsed_id.contains(&id) {
519            return;
520        };
521        content
522            .filter_map(|line| {
523                // Safety:
524                // `line` comes from `Lines`, which guarantees no `\n` or `\r`.
525                Entry::parse_line_inner::<true>(base, unsafe { OneLine::new_unchecked(line) })
526            })
527            .for_each(|entry| {
528                self.entries.insert(entry);
529            });
530        self.parsed_id.insert(id);
531    }
532
533    /// Returns a deduplicated set of bases.
534    ///
535    /// Bases are ordered by their [`Ord`] implementation.
536    pub fn bases(&self) -> impl Iterator<Item = Rc<str>> + use<> {
537        let btree: BTreeSet<_> = self.entries.iter().map(|d| d.base.clone()).collect();
538        btree.into_iter()
539    }
540
541    /// Removes a [`Entry`] from the list.
542    pub fn pop(&mut self, entry: &Entry) -> bool {
543        self.entries.remove(entry)
544    }
545
546    /// Take and returns the inner [`Vec<Entry>`][Entry]
547    pub fn take(&mut self) -> Vec<Entry> {
548        mem::take(&mut self.entries).into_iter().collect()
549    }
550
551    /// Returns a snapshot iterator of current includes.
552    ///
553    /// Note:
554    /// This iterator is **not live**. Newly added includes (e.g. via
555    /// `parse_extend`) will **not** appear in the iterator returned by
556    /// this call.
557    ///
558    /// To process includes incrementally, call [`Entries::drain_includes`] repeatedly.
559    /// # Example:
560    /// ```rust,no_run
561    /// # use std::fs;
562    /// # use domi::Entries;
563    /// # const BASE: &str = "";
564    /// # let mut entries = Entries::parse(BASE, "".lines());
565    /// while let Some(i) = entries.drain_includes().next() {
566    ///     let include = fs::read_to_string(i.base.as_ref()).unwrap();
567    ///     entries.parse_extend(i.base.as_ref(), BASE, include.lines());
568    /// }
569    /// ```
570    pub fn drain_includes(&mut self) -> impl Iterator<Item = Entry> + use<> {
571        let entries = mem::take(&mut self.entries);
572        let (includes, others) = entries
573            .into_iter()
574            .partition(|e| matches!(e.kind, Kind::Include));
575        self.entries = others;
576        includes.into_iter()
577    }
578
579    /// Returns and consume one include
580    pub fn next_include(&mut self) -> Option<Entry> {
581        let entry = self
582            .entries
583            .range(..)
584            .find(|e| matches!(e.kind, Kind::Include))
585            .cloned()?;
586        self.entries.remove(&entry).then_some(entry)
587    }
588
589    /// Flatten domains by `base` with optional attribute filters,
590    /// then **[`sort`][slice::sort]** and **[`dedup`][Vec::dedup]** the selected domains.
591    ///
592    /// # Selection rules:
593    /// - `attr_filters == None`:
594    ///   Selects **all** domains with a matching `base`.
595    ///
596    /// - `attr_filters == Some(&[])`:
597    ///   Selects **only** domains with a matching `base` and **no** attributes.
598    ///
599    /// - `attr_filters == Some(filters)`:
600    ///   Selects domains with a matching `base` that satisfy **all** filters:
601    ///   - [`AttrFilter::Has`]: **At least one** of `candidate.attrs` matches the filter value.
602    ///   - [`AttrFilter::Lacks`]: **No** `candidate.attrs` matches the filter value.
603    ///     This effectively **overrides** any [`AttrFilter::Has`] matches for the same attribute.
604    ///
605    /// ### Performance
606    /// Unlike [`flatten_drain`][Self::flatten_drain], this method retains the original domains
607    /// by cloning each selected [`Domain`], which incurs some additional allocation overhead.
608    ///
609    ///
610    /// Returns [`None`] if no domains are selected (i.e., the result is empty),
611    /// or if `base` was never seen during parsing.
612    pub fn flatten(
613        &mut self,
614        base: &str,
615        attr_filters: Option<&[AttrFilter]>,
616    ) -> Option<FlatDomains> {
617        self.flatten_inner::<false>(base, attr_filters)
618    }
619
620    /// Similar to [`flatten`][Self::flatten], but **drains** selected domains from `self.domains`.
621    ///
622    /// Only non-selected domains are retained in the original collection. This is generally more
623    /// efficient than [`flatten`][Self::flatten] as it moves out domains instead of cloning them.
624    pub fn flatten_drain(
625        &mut self,
626        base: &str,
627        attr_filters: Option<&[AttrFilter]>,
628    ) -> Option<FlatDomains> {
629        self.flatten_inner::<true>(base, attr_filters)
630    }
631
632    #[inline(always)]
633    fn flatten_inner<const DRAIN: bool>(
634        &mut self,
635        base: &str,
636        attr_filters: Option<&[AttrFilter]>,
637    ) -> Option<FlatDomains> {
638        if self.entries.is_empty() {
639            return None;
640        }
641        let mut flattened = Vec::with_capacity(self.entries.len());
642        let base = BasePool::base_ref(base)?;
643        // Convert `AttrFilter` into an internal `Rc` version for fast lookup.
644        // After intern lookup, comparisons in flatten/filter use `Rc::ptr_eq`
645        // instead of string-by-string comparison, which significantly improves
646        // performance on hot paths with many domains.
647        //
648        // Note: If a caller provides an attribute not already in the AttrPool,
649        // it will be interned on-the-fly, incurring a one-time interning cost
650        // for that specific filter.
651        let attr_filters: Option<AttrFilterSlice> = attr_filters.map(|afs| {
652            afs.iter()
653                .map(|f| match f {
654                    AttrFilter::Has(s) => {
655                        PackedAttr::new(unsafe { InternId::from_interned(intern!(*s, Attr)) }, true)
656                    }
657                    AttrFilter::Lacks(s) => PackedAttr::new(
658                        unsafe { InternId::from_interned(intern!(*s, Attr)) },
659                        false,
660                    ),
661                })
662                .collect()
663        });
664
665        if DRAIN {
666            flatten::drain_matches(
667                &mut self.entries,
668                base,
669                attr_filters.as_deref(),
670                &mut flattened,
671            )
672        } else {
673            flatten::retain_all(&self.entries, base, attr_filters.as_deref(), &mut flattened)
674        }
675
676        if flattened.is_empty() {
677            return None;
678        };
679
680        Some(FlatDomains { inner: flattened })
681    }
682}
683
684mod flatten {
685    use std::{collections::BTreeSet, rc::Rc};
686
687    use crate::{Entry, Kind, PackedAttr};
688
689    #[inline]
690    fn should_select(
691        candidate: &Entry,
692        base: &Rc<str>,
693        attr_filters: Option<&[PackedAttr]>,
694    ) -> bool {
695        if matches!(candidate.kind, Kind::Include) || !Rc::ptr_eq(base, &candidate.base) {
696            return false;
697        }
698
699        match &attr_filters {
700            None => true,
701            Some([]) => candidate.attrs.is_empty(),
702            Some(attr_filters) => attr_filters.iter().all(|packed_attr| {
703                if packed_attr.tag() {
704                    candidate
705                        .attrs
706                        .iter()
707                        .any(|attr| attr.as_ptr() as usize == packed_attr.addr())
708                } else {
709                    candidate
710                        .attrs
711                        .iter()
712                        .all(|attr| attr.as_ptr() as usize != packed_attr.addr())
713                }
714            }),
715        }
716    }
717
718    pub(crate) fn retain_all(
719        domains: &BTreeSet<Entry>,
720        base: Rc<str>,
721        attr_filters: Option<&[PackedAttr]>,
722        flattened: &mut Vec<Entry>,
723    ) {
724        domains.iter().for_each(|candidate| {
725            if should_select(candidate, &base, attr_filters) {
726                flattened.push(candidate.clone());
727            }
728        });
729    }
730
731    pub(crate) fn drain_matches(
732        domains: &mut BTreeSet<Entry>,
733        base: Rc<str>,
734        attr_filters: Option<&[PackedAttr]>,
735        flattened: &mut Vec<Entry>,
736    ) {
737        flattened.extend(domains.extract_if(.., |candidate| {
738            should_select(candidate, &base, attr_filters)
739        }));
740    }
741}
742
743/// Filtering behavior. Used by [`Entries::flatten`]
744pub enum AttrFilter<'a> {
745    Has(&'a str),
746    Lacks(&'a str),
747}
748
749cfg_if! {
750    if #[cfg(feature = "smallvec")] {
751        type AttrFilterSlice = ::smallvec::SmallVec<[PackedAttr; SMALL_VEC_STACK_SIZE]>;
752    } else {
753        type AttrFilterSlice = Box<[PackedAttr]>;
754    }
755}
756
757#[repr(transparent)]
758struct PackedAttr {
759    inner: usize,
760}
761
762impl PackedAttr {
763    const TAG_MASK: usize = 0x1;
764    const ADDR_MASK: usize = !Self::TAG_MASK;
765
766    #[inline(always)]
767    pub const fn new(attr_id: InternId, tag: bool) -> Self {
768        let InternId(ptr_addr) = attr_id;
769        assert!(ptr_addr.is_multiple_of(2));
770        Self {
771            inner: ptr_addr | tag as usize,
772        }
773    }
774
775    #[inline(always)]
776    pub const fn addr(&self) -> usize {
777        self.inner & Self::ADDR_MASK
778    }
779
780    #[inline(always)]
781    pub const fn tag(&self) -> bool {
782        (self.inner & Self::TAG_MASK) != 0
783    }
784}
785
786#[test]
787fn test_packed_attr_logic() {
788    let original_addr = 0x12345670_usize;
789    let attr_id = InternId(original_addr);
790
791    let packed_true = PackedAttr::new(attr_id, true);
792    assert!(packed_true.tag());
793    assert_eq!(packed_true.addr(), original_addr,);
794    assert_eq!(packed_true.inner, original_addr | 1);
795
796    let packed_false = PackedAttr::new(attr_id, false);
797    assert!(!packed_false.tag());
798    assert_eq!(packed_false.addr(), original_addr,);
799    assert_eq!(packed_false.inner, original_addr);
800}
801
802#[test]
803fn test_with_real_pointer() {
804    let val = Box::new(42);
805    let ptr_addr = &*val as *const i32 as usize;
806
807    assert_eq!(ptr_addr & 0x1, 0,);
808
809    let attr_id = InternId(ptr_addr);
810
811    let p1 = PackedAttr::new(attr_id, true);
812    let p2 = PackedAttr::new(attr_id, false);
813
814    assert!(p1.tag());
815    assert!(!p2.tag());
816    assert_eq!(p1.addr(), ptr_addr);
817    assert_eq!(p2.addr(), ptr_addr);
818}
819
820#[test]
821fn test_const_capability() {
822    const ADDR: InternId = InternId(0x1000);
823    const PACKED: PackedAttr = PackedAttr::new(ADDR, true);
824
825    const { assert!(PACKED.tag()) };
826    assert_eq!(PACKED.addr(), 0x1000);
827}
828
829#[cfg(test)]
830const BASE: &str = "base";
831
832#[test]
833fn test_pop_domain() {
834    let mut entries = Entries::parse(BASE, "example.com".lines());
835    let entry = entries.entries.first().unwrap().clone();
836    assert!(entries.pop(&entry));
837}
838
839/// Domain entries flattened by [`Entries::flatten`]
840#[derive(Clone)]
841pub struct FlatDomains {
842    inner: Vec<Entry>,
843}
844
845impl FlatDomains {
846    /// Consumes [`self`] and returns the underlying [`Vec<Domain>`][Domain].
847    pub fn into_vec(self) -> Vec<Entry> {
848        self.inner
849    }
850
851    /// inner [`Vec<Domain>`][Domain] will be [`Vec::split_off`]
852    /// at the next kind index to reduce allocations.
853    ///
854    /// At most one call per [`DomainKind`] variant (maximum 4 calls).
855    pub fn take_next(&mut self) -> Option<Box<[Entry]>> {
856        let kind = self.inner.last()?.kind;
857        let idx = self.inner.partition_point(|d| d.kind != kind);
858        let v = self.inner.split_off(idx).into_boxed_slice();
859        (!v.is_empty()).then_some(v)
860    }
861}
862
863#[test]
864fn test_flatten_domains() {
865    let content = "\
866            domain:example.com
867            full:full.example.com
868            keyword:keyword
869        ";
870
871    let mut entries = Entries::parse(BASE, content.lines());
872
873    let flat = entries.flatten_drain(BASE, None).unwrap();
874
875    assert!(entries.entries.is_empty());
876
877    let flat_domains = flat.into_vec();
878    assert_eq!(flat_domains.len(), 3);
879    assert!(
880        flat_domains
881            .iter()
882            .any(|d| d.kind == Kind::Domain(DomainKind::Suffix))
883    );
884    assert!(
885        flat_domains
886            .iter()
887            .any(|d| d.kind == Kind::Domain(DomainKind::Full))
888    );
889    assert!(
890        flat_domains
891            .iter()
892            .any(|d| d.kind == Kind::Domain(DomainKind::Keyword))
893    );
894}
895
896#[test]
897fn test_flatten_domains_take_next() {
898    let content = "\
899            domain:domain
900            full:full
901            keyword:keyword
902            regexp:regexp
903        ";
904
905    let mut entries = Entries::parse(BASE, content.lines());
906
907    let mut flat = entries.flatten_drain(BASE, None).unwrap();
908
909    assert!(entries.entries.is_empty());
910
911    let mut i = 0;
912    while flat.take_next().is_some() {
913        i += 1;
914    }
915    assert_eq!(i, 4);
916}
917
918#[test]
919fn test_dedup() {
920    let content = "\
921            keyword:keyword
922            keyword:keyword # dedup
923        ";
924
925    let mut entries = Entries::parse(BASE, content.lines());
926
927    let flat = entries.flatten_drain(BASE, None).unwrap().into_vec();
928
929    assert!(entries.entries.is_empty());
930
931    assert_eq!(flat[0].kind, Kind::Domain(DomainKind::Keyword));
932}
933
934#[cfg(test)]
935mod sort_predictable {
936    use std::array;
937
938    use crate::{BASE, Entries, FlatDomains};
939
940    const VARIANT_LEN: usize = 6;
941    const CONTENS: [&str; VARIANT_LEN] = [
942        "full:full\nkeyword:keyword\nregexp:regexp",
943        "full:full\nregexp:regexp\nkeyword:keyword",
944        "keyword:keyword\nfull:full\nregexp:regexp",
945        "keyword:keyword\nregexp:regexp\nfull:full",
946        "regexp:regexp\nfull:full\nkeyword:keyword",
947        "regexp:regexp\nkeyword:keyword\nfull:full",
948    ];
949
950    pub(crate) fn test<T, K, F, C>(mut build: F, mut cmp: C)
951    where
952        K: Ord,
953        F: FnMut(FlatDomains) -> T,
954        C: FnMut(&T) -> K,
955    {
956        let list: [T; VARIANT_LEN] = array::from_fn(|i| {
957            let domains = Entries::parse(BASE, CONTENS[i].lines())
958                .flatten_drain(BASE, None)
959                .unwrap();
960
961            build(domains)
962        });
963
964        assert!(list.windows(2).all(|w| cmp(&w[0]) == cmp(&w[1])));
965    }
966}