Skip to main content

sources_core/document/
builder.rs

1use async_trait::async_trait;
2use schema_core::{GenericValue, IndexMapping, IndexName, TableName};
3
4use crate::{Result, RowKey, SnapshotTable};
5
6/// Addresses one document in a target index: which index, and the root row's
7/// key within it. The same source row can map to documents in several indexes,
8/// so the [`index`](Self::index) is part of the identity.
9#[derive(Debug, Clone, PartialEq, Eq, Hash)]
10pub struct DocumentId {
11    pub index: IndexName,
12    /// The root table's primary key — the natural identifier of the document.
13    pub key: RowKey,
14}
15
16/// The result of assembling a document: a body to upsert into the index, or a
17/// tombstone when the root row is gone (or soft-deleted).
18#[derive(Debug, Clone)]
19pub enum Document {
20    /// Upsert the assembled body under [`id`](Self::Upsert::id).
21    Upsert { id: DocumentId, body: GenericValue },
22    /// Remove the document from the index.
23    Delete { id: DocumentId },
24}
25
26impl Document {
27    /// The id this outcome addresses, whichever variant it is.
28    pub fn id(&self) -> &DocumentId {
29        match self {
30            Document::Upsert { id, .. } | Document::Delete { id } => id,
31        }
32    }
33}
34
35/// What an index needs in order to be seeded: its name and the source table to
36/// snapshot for it. A document is identified by its root row, so snapshotting
37/// the **root table** alone seeds the whole index — `build` pulls in every join
38/// and aggregate server-side when each root row is assembled.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct IndexScope {
41    pub index: IndexName,
42    /// The index's root table — the one whose rows map one-to-one to documents.
43    pub root: SnapshotTable,
44}
45
46/// Turns changed rows into target documents — the read half of a source.
47///
48/// Used in two steps so the engine can deduplicate between them:
49///
50/// 1. [`resolve`](Self::resolve) maps a changed row — given only its `table`
51///    and `key` — to the ids of every document it affects. A change on a
52///    document's own root table resolves to that one id; a change on a
53///    *related* table (one folded in by a join or aggregate) is a reverse
54///    lookup whose result size is not known until queried.
55/// 2. [`build`](Self::build) assembles one document by id — the root row plus
56///    its joins and aggregates — or reports it deleted.
57///
58/// The engine resolves every change in a batch, deduplicates the ids (the same
59/// document is often touched by several changes in one transaction), and builds
60/// each unique id once.
61///
62/// Note that `resolve` takes the table and key as plain values rather than a
63/// capture event: document construction is independent of how the change was
64/// captured.
65#[async_trait]
66pub trait DocumentBuilder: std::fmt::Debug + Send + Sync {
67    /// The documents the changed row affects. Empty if it touches nothing any
68    /// index cares about.
69    async fn resolve(&self, table: &TableName, key: &RowKey) -> Result<Vec<DocumentId>>;
70
71    /// Assemble one document, or report it deleted if its root row is absent.
72    async fn build(&self, id: &DocumentId) -> Result<Document>;
73
74    /// Assemble many documents at once. Returns one [`Document`] per requested
75    /// id — an `Upsert`, or a `Delete` tombstone when the root row is absent —
76    /// in any order; callers match results back by [`Document::id`].
77    ///
78    /// The default builds each id independently, so it matches [`build`] one
79    /// for one. Sources that can assemble a set in fewer round-trips (e.g. one
80    /// `WHERE pk IN (…)` query per index) should override it; the engine builds
81    /// a whole batch's deduplicated ids through this in a single call.
82    ///
83    /// [`build`]: Self::build
84    async fn build_many(&self, ids: &[DocumentId]) -> Result<Vec<Document>> {
85        let mut out = Vec::with_capacity(ids.len());
86        for id in ids {
87            out.push(self.build(id).await?);
88        }
89        Ok(out)
90    }
91
92    /// The enabled indexes this builder serves, each with the root table to
93    /// snapshot when seeding it. The engine uses this to scope an initial
94    /// backfill per index. The default is empty — a builder with no backfillable
95    /// indexes, which the engine simply never seeds.
96    fn backfill_scopes(&self) -> Vec<IndexScope> {
97        Vec::new()
98    }
99
100    /// The resolved mapping of every index this builder serves: each field
101    /// typed from the schema's explicit `mapping` where one is given, and from
102    /// the source's own column types otherwise. Sinks that own their index use
103    /// this to create it up front. The default is empty — a builder that leaves
104    /// index creation to whatever the sink does on first write.
105    async fn index_mappings(&self) -> Result<Vec<IndexMapping>> {
106        Ok(Vec::new())
107    }
108}