1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//! A trie (prefix tree) for efficient column path matching.
//!
//! Used to quickly determine if a column path matches or is a descendant of any
//! user-specified column. This provides O(path_length) lookup instead of
//! O(num_specified_columns * path_length).
use std::collections::HashMap;
use delta_kernel_derive::internal_api;
use crate::expressions::ColumnName;
/// A trie (prefix tree) for efficient column path matching.
///
/// The lifetime `'col` ties this trie to the column names it was built from,
/// allowing it to borrow string slices instead of cloning.
///
/// The `Default` implementation creates an empty trie node with no children and
/// `is_terminal = false`. This is used both for creating a new root trie and for
/// creating intermediate nodes during insertion (via `or_default()`).
#[derive(Debug, Default)]
#[internal_api]
pub(crate) struct ColumnTrie<'col> {
children: HashMap<&'col str, ColumnTrie<'col>>,
/// True if this node represents the end of a specified column path.
/// Intermediate nodes have `is_terminal = false`; only the final node of
/// an inserted column path has `is_terminal = true`.
is_terminal: bool,
}
impl<'col> ColumnTrie<'col> {
/// Creates an empty trie.
#[internal_api]
pub(crate) fn new() -> Self {
Self::default()
}
/// Builds a trie from a list of column names.
///
/// For example, `from_columns(&[column_name!("a.b"), column_name!("a.c")])` creates:
/// ```text
/// root (is_terminal=false)
/// └── "a" (is_terminal=false)
/// ├── "b" (is_terminal=true)
/// └── "c" (is_terminal=true)
/// ```
#[internal_api]
pub(crate) fn from_columns(columns: &'col [ColumnName]) -> Self {
let mut trie = Self::new();
for column in columns {
trie.insert(column);
}
trie
}
/// Inserts a column path into the trie.
///
/// Walks down the trie for each path component, creating nodes as needed via `or_default()`
/// (which initializes `is_terminal = false`). After the loop, only the final node is marked
/// as terminal.
///
/// For example, inserting `a.b.c` creates:
/// ```text
/// root (is_terminal=false)
/// └── "a" (is_terminal=false)
/// └── "b" (is_terminal=false)
/// └── "c" (is_terminal=true)
/// ```
#[internal_api]
pub(crate) fn insert(&mut self, column: &'col ColumnName) {
let mut node = self;
for part in column.iter() {
node = node.children.entry(part.as_str()).or_default();
}
node.is_terminal = true;
}
/// Returns true if `path` equals or is a descendant of any inserted column.
///
/// For example, if the trie contains `["a", "b"]`:
/// - `["a", "b"]` → true (exact match)
/// - `["a", "b", "c"]` → true (descendant)
/// - `["a"]` → false (ancestor, not descendant)
/// - `["a", "x"]` → false (divergent path)
#[internal_api]
pub(crate) fn contains_prefix_of(&self, path: &[String]) -> bool {
let mut node = self;
for part in path {
if node.is_terminal {
// We've matched a complete specified column, and path continues.
// So path is a descendant of this specified column.
return true;
}
match node.children.get(part.as_str()) {
Some(child) => node = child,
None => return false, // Path diverges from all specified columns
}
}
// We've consumed the entire path. Match only if we're at a terminal.
node.is_terminal
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_column_trie() {
// Build trie with specified column ["a", "b"]
let columns = [ColumnName::new(["a", "b"])];
let trie = ColumnTrie::from_columns(&columns);
// Exact match: path = ["a", "b"] → include
assert!(trie.contains_prefix_of(&["a".to_string(), "b".to_string()]));
// Descendant of specified: path = ["a", "b", "c"] → include
assert!(trie.contains_prefix_of(&["a".to_string(), "b".to_string(), "c".to_string()]));
// Ancestor of specified: path = ["a"] → NOT include
assert!(!trie.contains_prefix_of(&["a".to_string()]));
// Unrelated paths → NOT include
assert!(!trie.contains_prefix_of(&["a".to_string(), "c".to_string()]));
assert!(!trie.contains_prefix_of(&["x".to_string(), "y".to_string()]));
// Non-existent nested path: trie has ["a", "b", "c", "d"], path = ["a", "b"]
// User asked for a.b.c.d but a.b is a leaf → NOT include
let deep_columns = [ColumnName::new(["a", "b", "c", "d"])];
let deep_trie = ColumnTrie::from_columns(&deep_columns);
assert!(!deep_trie.contains_prefix_of(&["a".to_string(), "b".to_string()]));
// Multiple specified columns
let multi_columns = [
ColumnName::new(["a", "b"]),
ColumnName::new(["x", "y", "z"]),
];
let multi_trie = ColumnTrie::from_columns(&multi_columns);
assert!(multi_trie.contains_prefix_of(&["a".to_string(), "b".to_string()]));
assert!(multi_trie.contains_prefix_of(&[
"a".to_string(),
"b".to_string(),
"c".to_string()
]));
assert!(multi_trie.contains_prefix_of(&[
"x".to_string(),
"y".to_string(),
"z".to_string()
]));
assert!(!multi_trie.contains_prefix_of(&["x".to_string(), "y".to_string()])); // ancestor
assert!(!multi_trie.contains_prefix_of(&["a".to_string(), "c".to_string()]));
// divergent
}
}