Skip to main content

imessage_database/util/
data_detected.rs

1/*!
2 Navigation helpers for Apple's `DDScannerResult` archives.
3
4 These payloads are [`NSKeyedArchiver`](crate::util::plist) archives produced by
5 the private `DataDetectorsCore` framework and stored inline in a message's
6 attributed body (e.g. under `__kIMDataDetectedAttributeName`,
7 `__kIMMoneyAttributeName`, or `__kIMAddressAttributeName`). Each archive
8 describes a tree of *scanner results*; every node has a type
9 ([`kind`](ScannerResult::kind)), an optional value ([`value`](ScannerResult::value)),
10 the substring it matched ([`matched`](ScannerResult::matched)), and zero or more
11 nested results ([`children`](ScannerResult::children)).
12
13 [`ScannerResult`] is a lazy, borrowing cursor over one node of that tree. The
14 semantic detector types parse themselves from a node via [`FromScannerResult`].
15*/
16
17use std::io::Cursor;
18
19use crabstep::deserializer::iter::Property;
20use plist::{Dictionary, Value};
21
22/// Maximum scanner-result depth before traversal stops.
23///
24/// `NSKeyedArchiver` graphs are deduplicated by `UID` and may contain reference
25/// cycles, so recursion is bounded for malformed payloads.
26const MAX_DEPTH: usize = 8;
27
28/// Borrowing, lazily-resolved cursor over one `DDScannerResult` tree node.
29///
30/// Fields are stored as `UID` indices into the archive's `$objects` table and
31/// resolved on access, so constructing or walking a `ScannerResult` allocates
32/// nothing beyond the child-index list produced by [`children`](Self::children).
33#[derive(Clone, Copy)]
34pub struct ScannerResult<'a> {
35    /// The archive's `$objects` table; every field is a `UID` index into this.
36    objects: &'a [Value],
37    /// The index of this node within `objects`.
38    index: usize,
39    /// How deep this node sits in the tree, used to bound recursion.
40    depth: usize,
41}
42
43impl<'a> ScannerResult<'a> {
44    /// Resolve the root scanner result from a parsed detector archive.
45    ///
46    /// The root index is stored under `$top.dd-result` (falling back to
47    /// `$top.root`) and points into the archive's `$objects` table.
48    #[must_use]
49    pub fn root(plist: &'a Value) -> Option<Self> {
50        let body = plist.as_dictionary()?;
51        let objects = body.get("$objects")?.as_array()?;
52        let top = body.get("$top")?.as_dictionary()?;
53        let index = top
54            .get("dd-result")
55            .or_else(|| top.get("root"))
56            .and_then(uid_index)?;
57        Some(Self {
58            objects,
59            index,
60            depth: 0,
61        })
62    }
63
64    /// The result type from the `T` field (e.g. `"Money"`, `"Unit"`, `"TrackingNumber"`).
65    #[must_use]
66    pub fn kind(&self) -> Option<&'a str> {
67        self.field_string("T")
68    }
69
70    /// The result value from the `V` field, if present.
71    #[must_use]
72    pub fn value(&self) -> Option<&'a str> {
73        self.field_string("V")
74    }
75
76    /// The substring of the message text this result matched from the `MS` field.
77    #[must_use]
78    pub fn matched(&self) -> Option<&'a str> {
79        self.field_string("MS")
80    }
81
82    /// Child results from the `SR` array, depth-bounded so cyclic archives
83    /// terminate.
84    pub fn children(&self) -> impl Iterator<Item = ScannerResult<'a>> + '_ {
85        self.child_indices()
86            .unwrap_or_default()
87            .into_iter()
88            .map(|index| ScannerResult {
89                objects: self.objects,
90                index,
91                depth: self.depth + 1,
92            })
93    }
94
95    /// The dictionary backing this node.
96    fn dict(&self) -> Option<&'a Dictionary> {
97        self.objects.get(self.index)?.as_dictionary()
98    }
99
100    /// Resolve a `UID`-referenced string field by key.
101    fn field_string(&self, key: &str) -> Option<&'a str> {
102        let reference = self.dict()?.get(key)?;
103        self.objects.get(uid_index(reference)?)?.as_string()
104    }
105
106    /// Resolve the `SR` array to the object indices of its child results, or
107    /// `None` once the depth bound is reached.
108    fn child_indices(&self) -> Option<Vec<usize>> {
109        if self.depth >= MAX_DEPTH {
110            return None;
111        }
112        let sub_results = self.dict()?.get("SR")?;
113        let array = self
114            .objects
115            .get(uid_index(sub_results)?)?
116            .as_dictionary()?
117            .get("NS.objects")?
118            .as_array()?;
119        Some(array.iter().filter_map(uid_index).collect())
120    }
121}
122
123/// Type that can recognize itself from a [`ScannerResult`] node.
124///
125/// Returning `None` means "this node is not of the implementing type," which is
126/// an expected outcome rather than an error.
127pub trait FromScannerResult: Sized {
128    /// Byte markers used to reject impossible payloads before plist parsing.
129    ///
130    /// When non-empty, [`from_attribute`](Self::from_attribute) parses the
131    /// payload only if it contains at least one of these byte sequences. This
132    /// skips deserializing results from the shared `__kIMDataDetectedAttributeName`
133    /// attribute that cannot be `Self`, since that attribute carries every
134    /// data-detector type. Types parsed from a dedicated attribute leave this
135    /// empty (the default).
136    const MARKERS: &[&[u8]] = &[];
137
138    /// Parse `Self` from a scanner-result node, or return `None` on mismatch.
139    fn from_scanner_result(result: &ScannerResult<'_>) -> Option<Self>;
140
141    /// Parse `Self` from a typedstream attribute carrying a `DDScannerResult`
142    /// archive (`NSData` or `NSMutableData`).
143    ///
144    /// Returns `None` when the value is not data, fails the
145    /// [`MARKERS`](Self::MARKERS) pre-filter, is not a valid archive, or does
146    /// not represent a `Self`.
147    fn from_attribute<'p>(value: &Property<'p, 'p>) -> Option<Self> {
148        let data = value.as_data()?;
149        if !Self::MARKERS.is_empty()
150            && !Self::MARKERS
151                .iter()
152                .any(|marker| data.windows(marker.len()).any(|window| window == *marker))
153        {
154            return None;
155        }
156        let plist = Value::from_reader(Cursor::new(data)).ok()?;
157        Self::from_scanner_result(&ScannerResult::root(&plist)?)
158    }
159}
160
161/// Interpret a plist `UID` as an index into the `$objects` table.
162fn uid_index(value: &Value) -> Option<usize> {
163    usize::try_from(value.as_uid()?.get()).ok()
164}